diff --git "a/weights/checkpoint_epoch_5_metadata.json" "b/weights/checkpoint_epoch_5_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/checkpoint_epoch_5_metadata.json" @@ -0,0 +1,452 @@ +{ + "epoch": 4, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(5008.)", + "exp_avg": "tensor([[ 9.8365e-05, -3.9530e-05, 3.2748e-06, ..., 1.5921e-05,\n 3.7428e-05, -1.2425e-06],\n [ 1.2975e-04, -7.8826e-05, -5.1688e-05, ..., 8.4008e-05,\n -5.7494e-05, 4.5213e-05],\n [ 1.7282e-04, -2.2142e-04, -4.6699e-05, ..., 1.0785e-04,\n 6.8168e-05, 9.4126e-05],\n ...,\n [-3.2087e-05, 8.4764e-05, -3.5756e-05, ..., 9.9390e-05,\n -1.1274e-05, 6.8323e-06],\n [-3.9967e-09, 4.1533e-09, -5.4748e-10, ..., 1.8102e-09,\n 1.3380e-09, -4.0356e-09],\n [-1.4352e-04, 1.0789e-04, 3.1936e-05, ..., -4.3365e-05,\n 1.0860e-05, 1.5493e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.5664e-08, 6.8376e-08, 8.3553e-09, ..., 1.8848e-08, 1.9269e-08,\n 5.1668e-09],\n [1.2595e-07, 9.0480e-08, 3.5098e-08, ..., 6.3703e-08, 2.8370e-08,\n 4.4021e-08],\n [8.6265e-08, 1.1261e-07, 3.7418e-08, ..., 3.2862e-08, 2.1234e-08,\n 2.3994e-08],\n ...,\n [2.2266e-07, 1.3423e-07, 2.5279e-08, ..., 2.8063e-08, 2.2872e-08,\n 1.5268e-08],\n [9.3641e-11, 3.2087e-10, 4.3172e-11, ..., 9.9154e-11, 9.0230e-11,\n 6.9915e-11],\n [1.5499e-07, 9.2860e-08, 1.4243e-08, ..., 1.9437e-08, 3.4265e-08,\n 1.6971e-08]], device='cuda:0')" + }, + "1": { + "step": "tensor(5008.)", + "exp_avg": "tensor([ 1.0100e-03, 4.3379e-03, 3.8078e-03, -6.8617e-04, -2.3703e-05,\n -1.6201e-03, -6.1781e-04, 1.3134e-03, -1.3918e-03, -5.8697e-04,\n 9.8567e-05, -2.0752e-03, 5.6052e-45, -1.2030e-03, -2.4545e-04,\n -2.3046e-03, 1.6347e-03, 2.1588e-03, -3.1311e-04, 5.6052e-45,\n 8.9469e-04, 3.0753e-03, -1.2493e-04, -1.3703e-03, 5.6052e-45,\n -1.6960e-03, -5.1683e-04, 7.4499e-04, 1.0409e-03, -1.6920e-03,\n 2.1064e-03, 4.6151e-03, 3.3147e-20, 5.6052e-45, 1.7645e-04,\n -3.7787e-04, -8.6520e-04, -6.4018e-04, 1.3967e-04, -1.4665e-03,\n 9.5046e-04, 1.4998e-03, -2.7317e-04, -2.1101e-03, 3.5007e-04,\n -4.1675e-03, -1.9003e-03, 6.9841e-04, 1.5728e-04, -2.1979e-05,\n -1.0592e-03, 5.6052e-45, -2.1912e-39, 5.6052e-45, -2.9434e-03,\n -2.0338e-03, 3.7159e-04, -9.1424e-04, -1.6186e-18, -9.4037e-04,\n -1.7348e-03, -8.5306e-04, 4.5903e-04, -9.8386e-04, 2.2164e-03,\n 4.1495e-04, -6.5090e-03, 5.6052e-45, 1.7661e-04, -7.5188e-04,\n -5.0380e-04, 5.6052e-45, -7.0589e-05, -1.6036e-03, -3.4066e-04,\n 2.7951e-03, 2.1685e-03, -3.1408e-03, -6.2691e-03, 1.9335e-03,\n 1.1323e-03, -1.5969e-03, -1.0306e-04, 2.8081e-04, 3.3097e-04,\n 1.6313e-03, 8.5051e-05, -1.2763e-03, 9.9258e-04, 9.0070e-04,\n 5.6052e-45, 1.9071e-03, -2.1552e-04, 1.6449e-03, 3.0969e-04,\n -8.7793e-04, 3.1738e-03, 2.1918e-04, 7.9928e-04, -4.3610e-03,\n -5.8235e-06, -1.3455e-03, -2.5091e-03, 2.0328e-03, 1.2953e-03,\n -1.2261e-04, 8.3383e-04, -2.0773e-03, 1.2066e-03, 2.8728e-04,\n 2.4194e-03, 2.5031e-03, 5.6052e-45, 4.1482e-03, -6.7992e-04,\n -4.1152e-04, -1.8527e-03, 4.2581e-04, 1.1210e-44, 2.4570e-08,\n 5.6052e-45, -3.1715e-04, 5.6052e-45, 5.8409e-04, -7.2493e-06,\n -1.2793e-03, -4.5642e-03, 5.6052e-45, 6.4749e-04, -5.5242e-04,\n 5.4763e-04, -6.0022e-04, 6.4328e-04, 1.7319e-27, -1.6059e-04,\n 3.0520e-03, -2.7362e-03, -1.0627e-03, -9.6242e-04, -1.7206e-03,\n -2.7262e-03, 8.4477e-04, 2.0411e-03, 2.1349e-03, 8.2899e-04,\n 1.4842e-03, -1.0239e-17, 1.3936e-03, -3.2412e-04, 2.1297e-03,\n 4.0267e-09, -5.8235e-04, -3.7435e-04, 5.6052e-45, 1.3363e-03,\n 7.1109e-04, 9.3358e-11, 4.5252e-04, 3.4241e-03, 1.3508e-03,\n 1.7445e-03, 9.2164e-04, -1.5054e-03, 1.4935e-03, -3.3573e-05,\n -5.2704e-04, -6.4840e-04, 1.1677e-03, 1.4617e-04, 2.6853e-11,\n 5.6052e-45, 2.9181e-03, -1.4769e-03, -3.5561e-03, -1.1103e-03,\n 1.9168e-03, 3.3116e-04, 2.1760e-03, 5.6052e-45, 2.8412e-03,\n -6.4284e-04, 4.4530e-04, 6.9378e-04, 5.6052e-45, -1.6709e-03,\n -1.2339e-03, 1.5068e-03, 1.1144e-04, -1.3644e-03, 5.6052e-45,\n 5.6052e-45, -5.9326e-04, 5.2712e-04, 3.1336e-04, 1.1670e-04,\n -7.9362e-05, -7.7011e-04, 1.7574e-03, 5.6052e-45, -1.1470e-03,\n 3.8509e-04, 5.5671e-04, -1.2742e-03, 6.5528e-04, -5.7701e-04,\n 5.6052e-45, -2.8140e-03, -2.8507e-04, -8.3316e-05, 3.9338e-04,\n 2.1675e-04, 8.2024e-04, 5.5736e-04, 3.1526e-03, -9.2956e-04,\n 2.4685e-03, 6.5738e-04, 3.6477e-03, 1.6885e-03, 1.1099e-03,\n 5.6318e-05, -7.2135e-04, -4.7470e-04, -6.0446e-04, -1.3518e-03,\n -1.6788e-03, -2.8578e-03, -1.3504e-03, 4.8137e-04, 1.4038e-03,\n 1.4515e-03, 1.3761e-03, 1.3620e-03, 5.6052e-45, -1.0080e-03,\n -5.3508e-04, 2.4061e-03, 1.5772e-03, 2.6878e-04, 1.5413e-03,\n 3.9742e-04, 2.0431e-03, 1.7856e-03, 2.3080e-03, 5.3345e-04,\n -1.8303e-03, 7.7386e-04, -8.1739e-04, 1.1217e-03, -5.1945e-04,\n -2.2008e-03, -2.5892e-03, 8.7250e-04, -1.4888e-04, -5.6389e-04,\n -1.4506e-03, 5.6052e-45, -8.0094e-04, -8.1137e-04, -6.8996e-04,\n 1.5796e-03, -6.0676e-04, -1.2699e-03, 1.7975e-04, 2.0091e-03,\n 2.7672e-03, 4.2234e-03, 5.6052e-45, -3.0253e-04, -2.5950e-03,\n -1.3132e-03, 6.0431e-04, 7.2586e-04, -1.6223e-03, -8.1025e-04,\n -2.2194e-12, -1.6881e-03, 5.6052e-45, 6.5257e-04, 3.4828e-04,\n -2.5354e-03, -3.0247e-03, 2.7225e-03, -7.6471e-04, -1.8036e-04,\n 3.2882e-04, -4.6440e-05, -2.6052e-03, -1.9783e-03, -4.0878e-04,\n -5.7524e-04, 2.3200e-03, 1.6883e-03, -6.1475e-04, 3.5326e-03,\n -1.6054e-03, -1.0662e-03, -4.6572e-03, 1.1825e-03, 3.1807e-03,\n 3.5462e-03, 1.3842e-04, -1.8833e-03, 1.8477e-03, -3.8198e-04,\n 4.2530e-04, -2.6637e-03, 5.6052e-45, 5.6052e-45, -1.9224e-03,\n -2.5075e-03, -7.6981e-04, -1.0257e-03, 1.4294e-03, 3.0726e-05,\n 3.2058e-03, 1.1552e-03, 9.2449e-04, -7.9117e-04, -6.2546e-04,\n -3.7968e-03, -7.8026e-04, 1.3298e-03, -4.2407e-04, 1.0376e-03,\n 1.9453e-03, -1.2831e-03, 1.5317e-03, -1.8528e-03, -3.5110e-03,\n -6.1829e-04, -6.2017e-04, -9.1996e-05, 1.6102e-03, 5.6052e-45,\n 1.0226e-03, -1.6305e-03, 8.3258e-04, -8.5450e-04, 9.0009e-04,\n 2.6298e-04, 2.9721e-03, 1.5471e-03, -1.7813e-03, 8.4583e-04,\n 5.0357e-04, 5.6052e-45, 2.6301e-03, -5.4768e-03, -1.8239e-04,\n -3.3442e-05, -9.8636e-04, -2.4847e-03, -1.8759e-03, -6.0489e-04,\n -1.3040e-03, -3.7023e-04, 1.5317e-03, 5.2927e-03, -1.5579e-03,\n -1.3381e-03, -1.9522e-03, -5.6323e-04, 1.3188e-03, -7.4051e-03,\n -1.7235e-03, -1.3227e-04, 1.0739e-03, -1.5837e-03, -1.7710e-03,\n 2.0112e-04, -3.7127e-03, 7.0203e-04, -2.3372e-04, 1.9161e-03,\n 1.6225e-03, -1.2839e-03, -3.2253e-04, 5.6052e-45, 4.9533e-03,\n -4.1573e-04, 5.6052e-45, -7.7195e-04, 1.4882e-03, 9.1290e-04,\n 7.2711e-05, 1.5004e-03, -3.6042e-05, -1.7492e-03, -1.3944e-03,\n -1.4387e-03, 1.9572e-03, 2.8628e-03, -1.7983e-34, 5.6052e-45,\n -2.1790e-03, -6.0765e-04, -1.5476e-03, -1.0954e-03, 3.1369e-04,\n 1.9816e-03, 1.6125e-03, -5.0271e-04, 3.4762e-03, 1.0068e-03,\n 6.9574e-04, 6.8149e-04, 5.6052e-45, 9.7559e-04, -6.7203e-05,\n 5.6052e-45, 1.5239e-04, 5.6052e-45, -2.9132e-03, -2.3410e-04,\n -7.3499e-04, -1.7396e-03, 5.6052e-45, -4.4139e-04, -7.6219e-04,\n 1.7692e-03, 6.7959e-04, -9.4247e-04, 2.6277e-05, -1.3771e-04,\n -1.6579e-04, 4.3898e-03, -1.1718e-04, -1.3533e-03, 5.6052e-45,\n -1.4949e-03, 5.1438e-04, 7.7745e-04, 5.6052e-45, 1.8780e-03,\n 2.3024e-04, 5.4866e-05, -1.6454e-04, -3.7131e-03, -1.8146e-03,\n 2.1925e-04, -9.1816e-04, 5.9034e-03, -1.2992e-03, 6.2794e-04,\n 9.2899e-04, -3.4211e-03, 5.6052e-45, 1.0105e-03, -3.6787e-04,\n 5.6052e-45, 3.2439e-04, -6.1816e-04, 7.5364e-04, -1.5462e-03,\n 3.0423e-08, -1.3518e-03, 6.2005e-04, 3.6658e-04, 5.6052e-45,\n 1.4886e-03, 2.1781e-03, 5.6052e-45, 5.8315e-04, 4.6295e-04,\n 1.3821e-03, 4.3556e-04, 5.6052e-45, 7.9198e-04, -5.5912e-04,\n 1.3256e-03, 1.1152e-03, -3.2480e-04, -8.2101e-04, 2.3535e-03,\n -4.5450e-04, 5.6052e-45, 2.9865e-04, 5.6052e-45, -1.8530e-04,\n 3.8849e-04, -2.4578e-04, -9.3907e-04, 1.4681e-03, 1.7482e-03,\n -1.9137e-03, 2.8748e-03, -2.6741e-04, 6.6680e-04, -1.7837e-03,\n 1.7951e-17, -1.6469e-03, 5.9936e-04, 1.7286e-03, 1.9009e-04,\n -5.1925e-04, 1.2606e-03, -2.4088e-04, 1.6288e-03, 5.2623e-04,\n 1.1008e-03, 5.6052e-45, 1.6066e-03, 6.6441e-04, 9.7904e-04,\n 1.3164e-03, 1.9549e-03, 1.5221e-03, 9.9715e-04, 7.2656e-04,\n 8.0884e-04, 3.7079e-03, 1.1459e-03, -1.7992e-03, 5.6052e-45,\n 5.6052e-45, 2.9923e-03, -2.7719e-03, 2.6777e-04, -5.8328e-04,\n -3.0689e-04, 3.0124e-03, 1.6872e-03, 9.2770e-04, 2.9581e-05,\n 1.7367e-03, 1.3459e-03, 2.9990e-03, 4.6529e-04, 1.8867e-03,\n 4.7433e-06, 1.7393e-05, -3.1907e-03, 5.2344e-04, 2.5924e-43,\n -1.2843e-04, 1.2594e-04, 5.6052e-45, 5.6052e-45, -2.3324e-03,\n 1.9488e-03, 2.9439e-04, -1.8544e-04, 1.0520e-03, 2.9453e-04,\n -5.0162e-03, 2.0348e-03, 4.0897e-04, -4.4827e-04, 1.9506e-03,\n -1.3328e-03, -2.6488e-03, -1.1633e-03, -3.6916e-04, -3.5345e-03,\n -1.9869e-04, 7.4746e-04, -2.0632e-03, 4.4074e-04, 7.8711e-04,\n -2.2224e-04, -1.0304e-03, -1.9457e-03, -2.9624e-04, -4.3302e-04,\n -4.3016e-04, 1.5030e-03, -6.5417e-04, 3.5795e-03, -1.1246e-03,\n -7.0597e-04, 6.3308e-04, -2.0187e-04, 8.3933e-05, -3.2152e-05,\n -5.6052e-45, 5.6052e-45, 8.9804e-04, 6.2419e-03, -4.5985e-03,\n -2.6252e-03, 5.6052e-45, -7.8141e-04, -1.0365e-03, 1.8883e-03,\n -1.5764e-03, 2.0519e-03, -7.0695e-04, -2.1459e-04, 1.5949e-04,\n -2.7336e-03, 3.4447e-04, 1.5788e-03, 1.2167e-04, 3.1616e-03,\n 1.0144e-03, 6.5112e-04, -8.3236e-04, 2.8446e-03, 9.1909e-05,\n -1.1754e-03, 4.0495e-04, 9.0915e-04, 4.7872e-04, -1.7729e-03,\n -8.6823e-04, 1.1698e-03, -1.6438e-03, 2.4684e-03, 1.4474e-03,\n 6.7911e-04, 3.4398e-03, 1.4354e-03, 3.3163e-04, 1.1224e-03,\n 1.2695e-03, 7.6487e-04, -8.0364e-04, -1.7695e-03, 7.4380e-29,\n 1.0577e-06, 7.2533e-04, -1.8876e-03, -5.4142e-04, 3.6442e-04,\n -1.4422e-03, 1.3930e-04, -2.0757e-03, -1.4255e-03, -4.7789e-04,\n 1.4461e-03, -5.6052e-45, -1.7851e-03, 1.9392e-03, 5.4515e-04,\n 4.4558e-05, 1.4193e-04, 5.6929e-04, -7.6766e-08, -2.0821e-03],\n device='cuda:0')", + "exp_avg_sq": "tensor([1.5471e-05, 6.4076e-05, 3.3357e-05, 2.5068e-05, 4.5139e-08, 6.0154e-06,\n 4.2440e-05, 4.3535e-05, 3.0885e-05, 4.1081e-05, 4.0602e-05, 8.2067e-05,\n 1.5077e-07, 3.6482e-05, 4.7473e-05, 4.8319e-05, 7.2269e-05, 4.8481e-05,\n 2.3484e-05, 3.9256e-08, 3.9267e-05, 3.0448e-05, 3.0843e-05, 4.5522e-05,\n 1.4329e-07, 5.9980e-05, 4.2434e-05, 5.7961e-05, 4.8435e-05, 4.8824e-05,\n 5.2305e-05, 4.7203e-05, 2.8849e-07, 2.5308e-08, 4.3962e-05, 3.8334e-05,\n 4.6049e-05, 2.9303e-05, 2.8096e-05, 4.7343e-05, 4.5750e-05, 2.5210e-05,\n 4.2427e-05, 3.5225e-05, 3.5363e-05, 5.0365e-05, 4.1504e-05, 7.0064e-05,\n 7.0876e-05, 2.8045e-05, 4.8813e-05, 1.5033e-08, 2.4393e-08, 2.2829e-08,\n 5.6857e-05, 9.4807e-05, 3.3830e-05, 3.3389e-05, 2.9407e-07, 5.2148e-05,\n 4.4678e-05, 3.8734e-05, 7.6580e-05, 2.4787e-05, 4.9735e-05, 5.0104e-05,\n 4.3969e-05, 2.7941e-08, 6.2892e-05, 4.4560e-05, 1.1054e-05, 1.1413e-07,\n 5.1395e-05, 3.7822e-05, 1.4224e-05, 4.8789e-05, 5.3800e-05, 5.8071e-05,\n 3.4194e-05, 5.2288e-05, 1.2852e-04, 4.0109e-05, 5.5946e-05, 3.6911e-05,\n 4.4675e-05, 4.2686e-05, 4.5438e-05, 2.4481e-05, 4.3064e-05, 4.2094e-05,\n 1.7817e-07, 5.5198e-05, 3.8448e-05, 3.1626e-05, 7.4515e-05, 1.7346e-05,\n 4.5794e-05, 3.7357e-05, 3.6593e-05, 3.0351e-05, 2.6467e-05, 9.7158e-06,\n 5.7541e-05, 6.1389e-05, 9.9899e-05, 2.7195e-05, 3.9837e-05, 6.0316e-05,\n 1.4775e-05, 2.2642e-05, 6.7569e-05, 6.8426e-05, 1.2046e-08, 3.1963e-05,\n 5.7610e-06, 3.2757e-05, 4.4288e-05, 6.8741e-05, 5.3711e-09, 5.4133e-08,\n 1.9538e-08, 4.0175e-05, 1.5558e-08, 8.3613e-05, 4.1608e-05, 5.9230e-05,\n 5.4852e-05, 2.9267e-09, 1.8832e-05, 5.6011e-05, 3.5929e-05, 4.3218e-05,\n 4.9762e-05, 5.2210e-09, 5.1671e-05, 4.2105e-05, 2.6792e-05, 3.6953e-05,\n 3.3827e-05, 3.7374e-05, 8.2348e-05, 4.6328e-05, 5.2392e-05, 4.5294e-05,\n 5.0083e-05, 4.7611e-05, 2.3040e-07, 4.6107e-05, 4.4986e-05, 3.5930e-05,\n 1.9773e-07, 2.7350e-05, 3.9997e-05, 1.0602e-07, 3.6958e-05, 5.3994e-05,\n 2.1000e-07, 6.4021e-05, 3.5006e-05, 4.9037e-05, 2.5777e-05, 4.2837e-05,\n 1.0786e-04, 4.4976e-05, 4.2360e-05, 3.1971e-05, 7.5261e-05, 2.7413e-05,\n 4.5072e-05, 2.4950e-08, 3.8450e-08, 4.4696e-05, 6.3628e-05, 3.3728e-05,\n 3.9320e-05, 6.2878e-05, 2.0522e-05, 4.9315e-05, 6.9780e-09, 2.3722e-05,\n 4.4038e-05, 5.4736e-05, 8.3897e-06, 5.1343e-08, 4.7321e-05, 4.4493e-05,\n 6.6636e-05, 2.7097e-05, 3.6464e-05, 1.4909e-08, 1.7082e-07, 4.1329e-05,\n 2.0306e-05, 5.1909e-05, 4.3613e-05, 4.4031e-05, 3.0118e-05, 5.3227e-05,\n 2.6925e-08, 3.7160e-05, 3.1823e-05, 2.0221e-05, 6.2383e-05, 2.0509e-05,\n 9.0029e-05, 1.3280e-09, 3.8402e-05, 4.6113e-05, 3.4872e-05, 4.4204e-05,\n 2.3849e-05, 4.7509e-05, 4.8742e-05, 4.4992e-05, 2.8454e-05, 3.7261e-05,\n 3.2369e-05, 5.6639e-05, 5.2394e-05, 2.8891e-05, 2.9369e-05, 4.8403e-05,\n 3.3956e-05, 3.8979e-05, 4.0484e-05, 4.4008e-05, 6.2775e-05, 6.7298e-05,\n 2.8964e-05, 5.9275e-05, 4.7100e-05, 8.2170e-05, 5.0334e-05, 3.3011e-08,\n 4.9641e-05, 2.3867e-05, 3.7830e-05, 2.8722e-05, 1.0081e-04, 4.6842e-05,\n 3.1110e-05, 4.7810e-05, 4.1731e-05, 4.5337e-05, 6.2854e-05, 2.8358e-05,\n 2.3485e-05, 5.5194e-05, 2.9655e-05, 3.7237e-05, 4.5966e-05, 4.0572e-05,\n 3.2543e-05, 3.8830e-05, 4.6753e-05, 3.9609e-05, 1.4224e-07, 3.3866e-05,\n 2.6186e-05, 1.2490e-05, 4.3658e-05, 5.7079e-05, 5.3233e-05, 4.5710e-05,\n 4.3161e-05, 4.0960e-05, 3.4566e-05, 2.1084e-08, 4.9863e-05, 4.6781e-05,\n 3.2154e-05, 6.2497e-05, 3.4876e-05, 2.3246e-05, 5.5890e-05, 3.6793e-08,\n 4.3361e-05, 1.0397e-07, 2.9562e-05, 1.9245e-05, 4.3136e-05, 3.5374e-05,\n 6.5276e-05, 5.2394e-05, 4.6613e-05, 4.8715e-05, 8.4422e-08, 5.9592e-05,\n 6.1927e-05, 2.9575e-05, 8.6889e-06, 4.5818e-05, 5.5537e-05, 4.7967e-05,\n 3.6788e-05, 7.1107e-05, 7.6536e-05, 4.7009e-05, 4.1536e-05, 9.1709e-05,\n 4.6170e-05, 8.7964e-06, 6.4300e-06, 3.6398e-05, 4.1899e-05, 7.6441e-06,\n 6.7151e-05, 4.3765e-09, 8.6220e-08, 4.3169e-05, 6.2967e-05, 2.9643e-05,\n 4.4238e-05, 5.4220e-05, 5.1227e-05, 4.2333e-05, 4.8903e-05, 3.6519e-05,\n 3.1887e-05, 5.2849e-05, 5.8537e-05, 7.3943e-05, 6.1819e-05, 2.8871e-05,\n 1.6279e-05, 4.5091e-05, 3.6271e-05, 9.5212e-06, 3.2089e-05, 5.0530e-05,\n 9.5944e-06, 2.7550e-05, 5.0638e-05, 6.1794e-05, 1.0286e-11, 4.1570e-05,\n 2.8491e-05, 6.6689e-05, 2.6466e-05, 3.8255e-05, 7.6978e-06, 4.7571e-05,\n 3.3616e-05, 3.6401e-05, 4.0317e-05, 5.9655e-05, 1.5053e-07, 9.6399e-06,\n 4.6553e-05, 4.6551e-05, 3.3759e-05, 4.7260e-05, 3.6972e-05, 3.5313e-05,\n 7.1451e-05, 4.1493e-05, 4.0888e-05, 3.6120e-05, 5.9170e-05, 6.0958e-06,\n 2.2157e-05, 2.7656e-05, 4.7594e-05, 6.9860e-05, 5.6604e-05, 1.1206e-04,\n 1.5597e-05, 5.3454e-05, 6.0671e-05, 5.0776e-05, 5.4602e-05, 3.1172e-05,\n 3.7242e-05, 1.1501e-05, 3.9346e-05, 6.8974e-05, 9.4734e-05, 1.4374e-05,\n 6.5530e-08, 4.7837e-05, 2.3755e-05, 3.5754e-07, 3.8114e-05, 7.8584e-05,\n 4.5895e-05, 3.3532e-05, 4.4310e-05, 5.2872e-05, 2.7890e-05, 2.7272e-05,\n 6.0952e-05, 5.5481e-05, 3.6020e-05, 3.5536e-08, 1.1791e-07, 4.1462e-05,\n 4.5635e-05, 4.2247e-05, 7.1493e-05, 3.6246e-05, 4.1408e-05, 2.8786e-05,\n 1.6079e-05, 3.4269e-05, 3.9221e-05, 6.7002e-05, 3.0166e-05, 4.3424e-08,\n 1.1647e-05, 1.2419e-05, 1.2509e-08, 1.0394e-05, 2.1824e-08, 5.7851e-05,\n 5.1559e-05, 1.8121e-05, 3.6099e-05, 5.8922e-08, 2.0794e-05, 4.8529e-05,\n 2.9892e-05, 4.9308e-05, 5.5888e-05, 6.1415e-05, 3.9056e-05, 4.7653e-06,\n 3.4928e-05, 3.4899e-06, 2.4447e-05, 2.9617e-08, 2.7710e-05, 4.9795e-05,\n 4.3736e-05, 3.7848e-08, 2.6230e-05, 3.9066e-05, 6.2294e-05, 4.7004e-05,\n 4.0768e-05, 2.8438e-05, 5.5390e-05, 2.2288e-05, 4.5816e-05, 5.9187e-05,\n 3.3778e-05, 7.5553e-06, 4.2178e-05, 1.2774e-08, 3.8121e-05, 2.9007e-05,\n 4.2383e-07, 2.5315e-05, 3.3929e-05, 3.9244e-06, 3.8300e-05, 2.4240e-08,\n 2.6116e-05, 4.8966e-06, 3.6977e-05, 5.7217e-07, 4.0459e-05, 7.1547e-05,\n 3.7200e-09, 3.5580e-05, 3.4433e-06, 4.5802e-05, 3.5310e-05, 3.0382e-08,\n 5.4145e-05, 2.7690e-05, 8.0106e-05, 5.6639e-05, 3.4566e-05, 2.0206e-05,\n 2.8322e-05, 4.0039e-05, 2.1118e-07, 5.2947e-05, 6.7049e-08, 9.7945e-06,\n 7.0130e-06, 8.3251e-06, 4.1676e-05, 4.6595e-05, 8.8474e-05, 3.1435e-05,\n 4.7031e-05, 3.4636e-05, 5.1517e-05, 3.6346e-05, 2.8132e-08, 4.4653e-05,\n 4.3771e-05, 8.1040e-05, 7.1904e-06, 2.2715e-05, 3.5739e-05, 2.8812e-05,\n 4.3698e-05, 1.6248e-05, 3.5188e-05, 2.0875e-07, 1.6840e-05, 5.9010e-05,\n 3.9746e-05, 7.2859e-05, 4.8329e-05, 3.8923e-05, 5.6740e-05, 4.1708e-05,\n 4.3219e-05, 9.2567e-05, 3.2320e-05, 4.8412e-05, 1.5668e-07, 1.7292e-07,\n 5.3705e-05, 1.0544e-04, 5.9495e-05, 3.7916e-05, 4.7539e-05, 4.7210e-05,\n 4.0458e-05, 5.2002e-05, 3.9407e-05, 7.7961e-06, 3.7438e-05, 5.1940e-05,\n 3.3141e-05, 3.2486e-05, 8.9524e-08, 3.2571e-05, 5.2228e-05, 2.6336e-05,\n 1.5742e-07, 3.4957e-05, 4.4431e-05, 1.2752e-08, 3.8367e-08, 4.4677e-05,\n 4.8358e-05, 5.2858e-05, 2.7801e-05, 5.3029e-05, 2.4797e-05, 4.5741e-05,\n 3.7552e-05, 2.6059e-05, 3.0109e-05, 4.6073e-05, 4.2974e-05, 3.5412e-05,\n 6.0530e-05, 5.9085e-05, 4.8788e-05, 2.7272e-05, 4.1195e-05, 1.2267e-04,\n 3.3755e-05, 4.0985e-05, 2.1409e-05, 4.0675e-05, 4.6180e-05, 8.2109e-05,\n 4.9221e-06, 3.6088e-05, 5.9531e-05, 2.8684e-05, 5.2169e-05, 4.0875e-05,\n 5.0955e-05, 3.6219e-05, 4.3339e-05, 5.8242e-05, 3.9667e-06, 2.0001e-08,\n 2.2892e-07, 3.6641e-05, 3.8901e-05, 5.2510e-05, 4.1900e-05, 4.3570e-07,\n 4.0382e-05, 4.4372e-05, 2.2940e-05, 4.3935e-05, 3.2343e-05, 3.3750e-05,\n 5.9446e-05, 4.8774e-05, 3.8940e-05, 7.4539e-05, 5.2468e-05, 6.5181e-05,\n 4.0822e-05, 4.8215e-05, 4.4684e-05, 4.8099e-05, 6.4226e-05, 7.8774e-05,\n 6.1564e-05, 2.7928e-05, 3.8759e-05, 3.8469e-05, 4.5286e-05, 3.3635e-05,\n 3.5771e-05, 4.2228e-05, 9.0291e-05, 7.3985e-05, 5.3636e-05, 5.5798e-05,\n 4.1357e-05, 1.8346e-05, 2.2589e-05, 4.3875e-05, 4.2595e-05, 4.3447e-05,\n 2.5827e-05, 1.0024e-08, 2.8626e-07, 5.0575e-06, 4.7845e-05, 3.9193e-05,\n 4.3923e-05, 4.9661e-05, 3.8185e-05, 3.1916e-05, 3.2082e-05, 6.3147e-05,\n 2.9611e-05, 7.3380e-08, 4.7759e-06, 5.4644e-05, 3.8585e-05, 7.2799e-05,\n 4.7572e-05, 4.0752e-05, 6.6774e-08, 3.1248e-05], device='cuda:0')" + }, + "2": { + "step": "tensor(5008.)", + "exp_avg": "tensor([[ 4.2789e-06, -3.0149e-05, -3.8667e-05, ..., 6.3569e-06,\n -3.7737e-09, 2.0149e-05],\n [ 6.1900e-06, -2.6529e-05, 1.1074e-05, ..., -4.3185e-05,\n 3.6260e-09, -4.1065e-05],\n [ 2.0111e-05, 4.3233e-06, -1.1692e-05, ..., -1.3265e-06,\n -1.6374e-09, -7.2251e-05],\n ...,\n [ 1.2849e-05, 3.8253e-05, -1.0706e-04, ..., -3.2491e-05,\n -6.1890e-09, -8.1976e-05],\n [-1.1560e-05, 2.0703e-05, 3.6330e-05, ..., -2.6455e-05,\n -5.8501e-09, -1.7039e-04],\n [-1.0429e-05, 5.4415e-05, 1.6383e-04, ..., 2.3547e-05,\n 7.9381e-10, 3.9757e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1180e-09, 8.2080e-09, 8.6843e-09, ..., 8.8746e-09, 5.5765e-11,\n 7.3709e-09],\n [4.7196e-09, 2.4112e-08, 1.4612e-08, ..., 1.8684e-08, 3.5202e-11,\n 1.2070e-08],\n [3.0408e-09, 1.6331e-08, 1.5068e-08, ..., 1.2581e-08, 8.3955e-11,\n 1.8929e-08],\n ...,\n [4.4023e-09, 1.6173e-08, 1.8797e-08, ..., 1.6050e-08, 7.5485e-11,\n 2.2187e-08],\n [4.8252e-09, 1.7487e-08, 1.6493e-08, ..., 1.7246e-08, 6.3169e-11,\n 4.8849e-08],\n [4.7145e-09, 1.8057e-08, 2.0528e-08, ..., 2.4369e-08, 1.2699e-10,\n 1.1611e-08]], device='cuda:0')" + }, + "3": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 6.5050e-06, -1.5764e-06, -1.3506e-07, ..., -4.3790e-07,\n 1.0749e-06, 8.4761e-07],\n [ 4.0638e-44, -3.2230e-44, -5.6052e-45, ..., 1.1210e-44,\n 3.9236e-44, 5.6052e-45],\n ...,\n [-2.7930e-05, 5.2181e-06, 1.1321e-05, ..., -7.3311e-06,\n -1.8518e-05, -1.8514e-05],\n [-7.0764e-06, 1.3086e-05, 2.6873e-06, ..., 1.7399e-06,\n -6.6194e-06, -3.4864e-06],\n [-1.8533e-08, -1.6899e-09, -2.6012e-08, ..., 1.5782e-08,\n 4.2157e-09, -1.0098e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6708e-10, 2.1343e-10, 1.7283e-12, ..., 2.2451e-16, 8.0663e-12,\n 6.8884e-12],\n [1.0155e-09, 3.5066e-09, 2.6989e-10, ..., 3.7253e-10, 3.7959e-10,\n 3.9067e-10],\n [1.3857e-11, 2.0897e-11, 7.7137e-12, ..., 1.3363e-12, 1.8957e-12,\n 5.7241e-12],\n ...,\n [6.3381e-09, 9.0081e-09, 1.2811e-09, ..., 1.5458e-09, 2.8394e-09,\n 2.6534e-09],\n [5.0212e-09, 3.8868e-09, 9.6041e-10, ..., 1.7011e-09, 1.1332e-09,\n 1.0099e-09],\n [5.6227e-11, 5.1122e-11, 2.7827e-11, ..., 1.8283e-12, 6.6583e-12,\n 8.0268e-12]], device='cuda:0')" + }, + "4": { + "step": "tensor(2504.)", + "exp_avg": "tensor([ 5.6052e-45, 7.9874e-05, 9.6970e-43, ..., -2.6811e-04,\n -1.5187e-04, 5.9570e-07], device='cuda:0')", + "exp_avg_sq": "tensor([3.7229e-08, 5.4027e-07, 7.1286e-09, ..., 2.3510e-06, 1.7445e-06,\n 6.1113e-09], device='cuda:0')" + }, + "5": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 5.6052e-45, -9.5282e-07, -2.2421e-44, ..., -5.3672e-06,\n 1.4303e-06, 1.4593e-09],\n [ 5.6052e-45, 1.1093e-06, -2.3822e-44, ..., 2.6065e-06,\n -3.9888e-06, 1.4066e-10],\n [ 5.6052e-45, -1.1360e-06, 2.9427e-44, ..., -6.3705e-06,\n -5.0734e-07, 1.0122e-09],\n ...,\n [-5.6052e-45, -3.5286e-09, 5.4651e-44, ..., -8.6115e-07,\n 2.9393e-06, 2.3031e-09],\n [ 5.6052e-45, 5.6537e-06, 5.6052e-45, ..., -3.2853e-06,\n -9.1894e-07, 9.9749e-10],\n [-5.6052e-45, 7.1426e-07, 2.2421e-44, ..., 4.6902e-06,\n -4.2619e-06, -2.9763e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.2588e-13, 8.0932e-11, 1.2150e-12, ..., 3.2871e-10, 1.9746e-10,\n 4.3390e-11],\n [3.5978e-12, 1.8005e-10, 3.2323e-12, ..., 2.7853e-10, 2.8891e-10,\n 8.4262e-11],\n [2.4029e-12, 1.3067e-10, 1.9927e-11, ..., 5.7953e-10, 3.3359e-10,\n 5.1162e-11],\n ...,\n [5.8343e-13, 1.2083e-10, 4.8009e-11, ..., 7.8236e-10, 3.5566e-10,\n 7.8672e-11],\n [1.2049e-11, 3.9570e-10, 4.5527e-12, ..., 1.0374e-09, 4.5623e-10,\n 9.0805e-11],\n [3.7272e-13, 1.3071e-10, 4.6136e-12, ..., 9.2254e-10, 3.5708e-10,\n 4.4778e-11]], device='cuda:0')" + }, + "15": { + "step": "tensor(2504.)", + "exp_avg": "tensor([5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.3121e-05], device='cuda:0')" + }, + "16": { + "step": "tensor(2504.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4604e-08, 8.3762e-08, 3.0760e-08], device='cuda:0')" + }, + "17": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([1.0446e-04, 7.3763e-06, 4.3291e-06, 8.2829e-06, 6.5167e-06],\n device='cuda:0')" + }, + "19": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.1509e-14, 3.9211e-13, 6.9879e-14, ..., 1.5807e-14, 1.8716e-13,\n 1.0091e-13],\n [4.3964e-11, 4.4169e-11, 3.3803e-14, ..., 8.1305e-12, 1.6260e-12,\n 3.4645e-12],\n [4.5848e-10, 5.2492e-10, 5.2173e-14, ..., 4.2327e-11, 5.7284e-11,\n 1.7387e-11],\n ...,\n [1.3774e-11, 6.2094e-12, 1.1894e-12, ..., 3.9597e-13, 3.6764e-12,\n 4.5228e-13],\n [1.7424e-12, 1.4222e-12, 3.6278e-14, ..., 3.2481e-14, 1.8038e-13,\n 2.0890e-14],\n [1.5746e-09, 1.8440e-09, 2.8113e-13, ..., 1.4220e-10, 2.0210e-10,\n 6.9021e-11]], device='cuda:0')" + }, + "20": { + "step": "tensor(2504.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.2944e-11, 4.0958e-08, 2.8493e-07, 7.1843e-09, 4.8972e-08, 1.9028e-07,\n 2.1526e-09, 2.9251e-07, 2.2852e-08, 5.3104e-10, 1.3194e-06, 4.2043e-09,\n 1.3313e-09, 3.0221e-08, 3.1236e-08, 2.4767e-07, 8.3586e-08, 8.3206e-09,\n 4.1425e-07, 1.2442e-07, 1.2459e-07, 1.1979e-08, 4.2682e-08, 8.6941e-08,\n 5.1852e-08, 8.4256e-09, 3.8480e-09, 5.7498e-07, 3.8638e-10, 1.2156e-08,\n 2.3425e-08, 1.1495e-07, 1.9044e-07, 7.2045e-09, 1.0103e-08, 2.7294e-08,\n 5.3535e-10, 3.4550e-08, 2.0808e-07, 1.0964e-09, 4.4234e-09, 1.4070e-08,\n 3.7733e-07, 6.4537e-08, 4.4611e-09, 3.4947e-10, 1.5487e-09, 1.2382e-08,\n 3.0406e-09, 6.7533e-11, 2.3559e-10, 2.7345e-09, 1.4618e-08, 7.8860e-08,\n 1.0330e-09, 1.1704e-07, 2.0602e-07, 9.2605e-07, 1.0542e-07, 1.9798e-10,\n 1.5107e-06, 5.1156e-11, 5.0641e-09, 7.8229e-08, 2.2932e-07, 3.3659e-07,\n 5.4807e-09, 5.7875e-09, 1.2160e-06, 1.1369e-08, 1.7563e-07, 4.8266e-08,\n 9.0201e-08, 1.5705e-08, 2.3731e-10, 2.2609e-08, 1.6363e-07, 2.6635e-09,\n 2.2916e-12, 9.2094e-11, 2.4144e-10, 3.5959e-09, 3.7951e-07, 1.5983e-07,\n 2.4730e-08, 5.9653e-09, 1.2932e-07, 2.2169e-08, 3.4262e-08, 8.5584e-07,\n 4.9158e-09, 3.2035e-08, 5.9044e-09, 2.4493e-07, 3.8451e-08, 6.9542e-09,\n 3.2337e-11, 9.6115e-09, 3.1050e-07, 6.1676e-08, 1.6803e-10, 2.2802e-07,\n 1.8376e-09, 1.1451e-10, 9.7054e-08, 1.7144e-08, 3.1495e-10, 4.1418e-09,\n 1.2880e-08, 1.6875e-08, 1.6142e-10, 3.4645e-09, 1.3664e-09, 3.8951e-10,\n 1.9713e-08, 8.3387e-09, 2.4385e-07, 6.7469e-09, 1.4747e-07, 6.4572e-08,\n 3.1071e-09, 5.5429e-08, 8.3802e-08, 6.4990e-09, 1.1008e-07, 1.2545e-08,\n 4.8781e-07, 8.3148e-09, 1.9251e-08, 1.3294e-09, 5.6816e-10, 8.9081e-09,\n 3.1326e-09, 4.1369e-08, 8.1907e-09, 1.3614e-07, 4.9259e-10, 3.1582e-09,\n 8.6024e-08, 1.2168e-08, 2.5826e-10, 7.0613e-10, 1.2711e-06, 3.2392e-06,\n 7.8820e-08, 3.4514e-10, 1.0364e-07, 8.8152e-08, 6.8407e-08, 1.8208e-09,\n 3.4275e-10, 6.1538e-08, 2.2458e-07, 6.5488e-08, 1.1072e-07, 3.3207e-08,\n 6.3740e-10, 3.1896e-09, 4.9460e-09, 3.3921e-07, 1.7344e-09, 5.0832e-08,\n 6.3747e-08, 1.6188e-09, 1.6423e-09, 2.3161e-06, 2.1296e-07, 5.2220e-10,\n 6.5832e-07, 4.7859e-11, 6.1705e-08, 3.3256e-09, 8.1809e-08, 1.7659e-08,\n 3.2816e-08, 4.5427e-07, 7.6673e-08, 3.5976e-08, 7.8306e-09, 2.0474e-07,\n 3.1516e-08, 7.5639e-08, 1.9129e-06, 2.4427e-08, 1.2307e-09, 1.9205e-11,\n 3.1831e-09, 2.9404e-07, 1.5204e-08, 6.4266e-08, 3.2246e-07, 1.2291e-07,\n 2.5757e-09, 1.4638e-09, 2.3656e-07, 4.1147e-08, 4.2504e-10, 9.2114e-08,\n 6.7926e-10, 7.8771e-08, 1.7408e-07, 9.7666e-09, 4.3017e-08, 1.2156e-09,\n 2.0221e-08, 6.2318e-07, 9.6644e-09, 7.0683e-08, 2.1837e-10, 5.3516e-07,\n 1.0144e-07, 6.2384e-10, 4.8834e-09, 4.0165e-10, 3.7621e-07, 3.7809e-09,\n 2.6809e-08, 1.5916e-07, 8.2601e-09, 7.0449e-08, 5.6668e-08, 4.3850e-08,\n 2.5415e-09, 1.4341e-07, 1.0845e-08, 5.0242e-08, 5.8482e-07, 5.2523e-09,\n 2.2636e-10, 1.8405e-09, 2.2206e-08, 3.2338e-08, 1.2661e-09, 4.2055e-08,\n 1.7565e-08, 2.0179e-07, 2.4357e-08, 1.4054e-07, 1.4883e-08, 2.4768e-09,\n 8.9290e-09, 1.5006e-09, 4.0633e-07, 2.6903e-08, 3.7546e-09, 7.9998e-08,\n 2.9122e-09, 8.4035e-09, 2.6431e-07, 7.2188e-09, 9.1326e-08, 5.2122e-10,\n 5.8531e-08, 3.9832e-09, 5.0394e-10, 9.6826e-07], device='cuda:0')" + }, + "21": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.7327e-12, 4.6902e-12, 6.5231e-10, 4.0715e-12, 2.0034e-11, 2.2924e-11,\n 1.0866e-11, 7.6030e-11, 4.8903e-13, 2.6232e-12, 8.1450e-10, 1.6119e-12,\n 2.3751e-13, 1.1175e-11, 6.1200e-11, 2.0981e-10, 9.0258e-11, 6.4702e-13,\n 1.5381e-09, 7.7164e-10, 6.9532e-10, 9.1417e-13, 1.4724e-12, 5.3274e-11,\n 4.3988e-10, 6.2814e-12, 1.0025e-11, 5.5462e-10, 1.8212e-13, 1.3887e-11,\n 4.7173e-12, 9.0395e-11, 6.6209e-10, 1.7745e-12, 8.3497e-12, 2.0514e-10,\n 2.9401e-12, 1.6593e-11, 4.0718e-10, 2.4947e-14, 1.9873e-12, 8.6204e-13,\n 4.1929e-10, 2.3159e-11, 3.5733e-12, 2.9215e-13, 2.0928e-12, 1.7151e-12,\n 1.0005e-12, 3.0663e-12, 3.1011e-13, 3.4026e-12, 2.3174e-11, 1.7966e-11,\n 1.5417e-14, 2.0947e-11, 1.1674e-11, 7.0589e-10, 5.1817e-11, 5.7061e-13,\n 3.2949e-09, 2.1022e-14, 1.4713e-13, 9.6662e-12, 4.4809e-10, 2.7335e-10,\n 3.6666e-12, 5.9145e-12, 4.2916e-09, 1.6058e-12, 3.1986e-10, 1.0115e-12,\n 1.2480e-11, 8.8802e-11, 9.7551e-13, 7.4531e-12, 1.6667e-10, 1.3153e-11,\n 2.8133e-12, 2.4982e-14, 1.3004e-12, 8.4495e-12, 9.4228e-11, 4.6883e-10,\n 2.3232e-11, 3.2398e-11, 1.7471e-11, 1.3498e-12, 8.4390e-12, 2.3335e-09,\n 1.1383e-11, 1.0063e-11, 2.8971e-12, 2.9613e-10, 5.3666e-12, 1.9430e-11,\n 5.9025e-14, 2.6773e-11, 5.3992e-10, 1.0917e-10, 3.8849e-13, 6.0639e-10,\n 2.8531e-13, 8.3677e-13, 2.7382e-11, 2.9964e-11, 1.1820e-12, 4.1353e-12,\n 2.8738e-12, 2.4424e-12, 4.0107e-14, 4.4632e-12, 7.4551e-12, 1.0580e-12,\n 4.8235e-12, 5.4668e-13, 6.4466e-10, 1.5862e-11, 2.0297e-10, 9.1397e-12,\n 8.1565e-12, 5.4636e-11, 6.8973e-11, 4.5762e-12, 2.2474e-10, 1.0053e-13,\n 5.0595e-10, 1.2316e-13, 5.8061e-12, 4.6577e-13, 4.3606e-13, 4.8768e-13,\n 6.6649e-13, 1.6442e-11, 1.7118e-12, 3.5128e-10, 8.0253e-13, 4.1634e-13,\n 5.2229e-11, 1.8337e-12, 7.7593e-13, 3.2173e-12, 5.6178e-09, 6.3765e-09,\n 1.2222e-11, 1.4552e-14, 1.1195e-10, 9.6051e-11, 1.6145e-10, 8.2741e-14,\n 6.3705e-13, 1.2370e-10, 2.3704e-10, 6.7335e-12, 2.0814e-11, 6.1241e-11,\n 4.8146e-15, 1.0386e-11, 2.3076e-12, 2.3371e-11, 3.1228e-13, 8.3313e-11,\n 3.1689e-12, 9.5868e-12, 3.0412e-12, 5.4178e-09, 1.2486e-10, 3.2589e-13,\n 8.6288e-10, 6.1913e-15, 6.7880e-12, 5.1660e-12, 5.5164e-10, 2.3537e-11,\n 1.4477e-11, 2.6630e-10, 2.4638e-11, 1.7592e-12, 1.1320e-11, 2.3196e-10,\n 6.0463e-13, 2.1951e-11, 5.0084e-09, 8.3006e-12, 4.5005e-13, 3.7942e-13,\n 1.4429e-13, 2.3585e-10, 3.4518e-11, 6.4076e-11, 2.0118e-10, 1.6262e-11,\n 1.0023e-12, 1.2389e-13, 1.3874e-10, 8.2211e-13, 6.7424e-14, 5.4224e-12,\n 9.8565e-13, 2.5517e-11, 4.6320e-11, 8.4768e-13, 4.4994e-12, 1.0563e-12,\n 1.3591e-12, 1.4759e-09, 1.0199e-11, 6.1815e-12, 3.4351e-12, 8.6390e-10,\n 3.2058e-11, 4.3923e-13, 3.6788e-13, 5.7062e-12, 3.6787e-11, 1.0877e-13,\n 9.4608e-13, 2.6590e-10, 3.8658e-11, 1.1256e-11, 1.0408e-10, 2.0665e-10,\n 3.2796e-13, 1.1583e-10, 5.8434e-12, 1.6796e-11, 1.1791e-10, 2.0854e-12,\n 6.6988e-14, 1.8011e-13, 2.6559e-12, 5.7292e-11, 1.8793e-13, 8.2889e-11,\n 2.0164e-12, 2.3593e-10, 2.4612e-10, 9.3090e-11, 1.4899e-11, 5.3082e-14,\n 1.3935e-12, 2.7556e-13, 1.5027e-10, 6.0682e-12, 1.4176e-12, 3.8776e-11,\n 3.3851e-13, 2.7566e-13, 1.3945e-09, 6.5608e-12, 9.6834e-12, 8.5278e-13,\n 4.2119e-11, 6.8411e-12, 9.0604e-13, 2.7084e-09], device='cuda:0')" + }, + "22": { + "step": "tensor(2504.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.1409e-13, 5.7443e-11, 4.5522e-10, 6.3895e-12, 7.0120e-11, 2.6330e-10,\n 3.1203e-12, 3.9074e-10, 3.3766e-11, 4.5250e-12, 1.5956e-09, 2.2330e-11,\n 5.7384e-12, 5.2531e-11, 6.4562e-11, 2.5515e-10, 1.3148e-10, 1.4909e-11,\n 4.3940e-10, 2.3513e-10, 2.0697e-10, 2.0104e-11, 4.4236e-11, 1.7140e-10,\n 1.6493e-10, 1.1935e-11, 3.4158e-12, 6.5180e-10, 7.2762e-14, 2.3320e-11,\n 1.0108e-11, 1.5858e-10, 3.0472e-10, 2.2035e-11, 1.7756e-11, 1.1878e-10,\n 1.5433e-12, 2.2792e-11, 3.2522e-10, 2.0838e-13, 1.7459e-11, 2.2010e-11,\n 5.7280e-10, 9.6802e-11, 2.4715e-11, 2.3012e-12, 5.9818e-13, 2.5003e-12,\n 5.8924e-12, 2.4668e-13, 1.4222e-13, 4.7430e-12, 2.9597e-11, 7.1645e-11,\n 4.4420e-14, 1.2704e-10, 2.7938e-10, 1.2510e-09, 1.0839e-10, 2.6135e-12,\n 2.0255e-09, 7.1877e-13, 2.7884e-12, 9.9944e-11, 3.3521e-10, 4.7294e-10,\n 1.1126e-11, 1.6947e-11, 1.6564e-09, 2.0040e-11, 2.5162e-10, 4.9967e-11,\n 1.1481e-10, 8.1408e-11, 5.6783e-13, 1.5763e-11, 2.3973e-10, 9.4529e-12,\n 6.1354e-14, 6.0421e-13, 8.7568e-12, 2.9867e-12, 5.1621e-10, 2.9056e-10,\n 4.3973e-11, 4.2434e-11, 1.4061e-10, 3.4313e-11, 5.5743e-11, 1.0146e-09,\n 3.2940e-11, 5.1378e-11, 6.6347e-13, 3.4605e-10, 5.6343e-11, 7.3162e-12,\n 2.6545e-12, 9.6334e-12, 4.4960e-10, 1.1993e-10, 2.2237e-12, 3.3599e-10,\n 3.9153e-12, 1.4242e-12, 1.0296e-10, 7.0314e-11, 7.2907e-13, 2.7117e-11,\n 2.0261e-11, 2.7544e-11, 9.2933e-13, 1.2272e-12, 1.3450e-11, 3.7930e-13,\n 2.7263e-11, 1.4465e-11, 3.7118e-10, 4.7473e-12, 2.1362e-10, 9.3693e-11,\n 2.5969e-12, 7.1624e-11, 1.2945e-10, 1.4841e-11, 2.0333e-10, 8.7521e-13,\n 5.5078e-10, 1.4722e-12, 3.1821e-11, 2.1323e-13, 1.2696e-12, 1.2024e-12,\n 1.0331e-12, 5.2295e-11, 1.2377e-11, 2.2845e-10, 3.9602e-12, 1.8523e-12,\n 1.2796e-10, 1.8058e-11, 6.8399e-13, 8.1296e-14, 1.5822e-09, 4.0812e-09,\n 7.5280e-11, 5.4419e-13, 1.7030e-10, 1.3622e-10, 1.1260e-10, 3.6727e-12,\n 3.3935e-12, 1.4459e-10, 2.3488e-10, 9.5533e-11, 1.5726e-10, 5.1609e-11,\n 2.1277e-15, 2.4486e-12, 8.7045e-12, 4.1051e-10, 1.1839e-13, 8.2287e-11,\n 9.1205e-11, 2.7370e-12, 1.3914e-12, 3.0651e-09, 3.0473e-10, 1.2966e-12,\n 7.8538e-10, 1.8582e-13, 6.2972e-11, 1.8314e-12, 1.8376e-10, 3.5436e-11,\n 5.5618e-11, 5.3602e-10, 6.3746e-11, 3.6607e-11, 2.9572e-12, 2.9496e-10,\n 3.5601e-11, 7.9502e-11, 2.5547e-09, 1.9248e-11, 3.3755e-12, 1.1371e-13,\n 6.2147e-12, 3.1708e-10, 6.5384e-11, 1.0266e-10, 4.4556e-10, 1.3925e-10,\n 4.4919e-12, 2.9688e-12, 2.4932e-10, 5.7843e-11, 5.7983e-14, 1.2996e-10,\n 4.3464e-12, 6.8339e-11, 1.8162e-10, 1.7105e-11, 4.1759e-11, 1.1233e-11,\n 1.9804e-11, 8.6820e-10, 8.4014e-12, 1.0199e-10, 3.9726e-13, 7.3100e-10,\n 1.0587e-10, 2.1193e-15, 3.9993e-13, 1.0474e-12, 5.0715e-10, 1.6442e-12,\n 4.0330e-11, 2.3436e-10, 1.2259e-11, 1.0417e-10, 1.1436e-10, 9.4523e-11,\n 5.4876e-12, 1.6571e-10, 3.2169e-11, 5.7975e-11, 7.8132e-10, 1.7501e-11,\n 6.7234e-13, 7.9488e-13, 2.8908e-11, 5.7201e-11, 2.1158e-13, 9.9792e-11,\n 6.7415e-12, 3.1088e-10, 1.1282e-10, 2.0366e-10, 5.8284e-11, 1.7188e-14,\n 1.3532e-11, 3.4446e-12, 4.7243e-10, 5.0888e-11, 1.5175e-11, 1.2038e-10,\n 5.0011e-12, 1.4184e-11, 4.4540e-10, 1.4895e-11, 1.3031e-10, 3.0054e-13,\n 5.2213e-11, 5.4198e-12, 3.0348e-13, 1.3685e-09], device='cuda:0')" + }, + "23": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0080e-09, 1.0689e-09, 7.6922e-13, ..., 7.5358e-11, 1.3537e-10,\n 2.5077e-11],\n [7.9168e-10, 8.9608e-10, 5.6093e-15, ..., 4.9939e-11, 1.0455e-10,\n 2.1752e-11],\n [2.3635e-10, 2.7787e-10, 5.8448e-13, ..., 2.1280e-11, 2.6194e-11,\n 8.3350e-12],\n ...,\n [2.3290e-11, 1.4620e-11, 1.1367e-13, ..., 1.4435e-12, 4.0226e-12,\n 6.0904e-13],\n [7.9757e-11, 8.4841e-11, 1.0972e-13, ..., 8.1598e-12, 9.9326e-12,\n 3.9424e-12],\n [7.1743e-11, 1.0432e-10, 9.1855e-14, ..., 9.8559e-12, 9.3286e-12,\n 5.4949e-12]], device='cuda:0')" + }, + "24": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.3686e-07, 4.2068e-07, 1.3797e-07, 8.8219e-08, 8.3893e-09, 5.5624e-07,\n 4.6981e-09, 4.9822e-07, 4.8816e-07, 4.4509e-10, 6.0659e-07, 4.1396e-08,\n 5.5211e-08, 3.8804e-09, 6.0245e-10, 3.5314e-09, 8.5535e-12, 6.2099e-09,\n 1.2178e-07, 1.4445e-07, 3.2688e-08, 6.0177e-08, 7.2295e-08, 7.5157e-10,\n 4.3310e-08, 4.5846e-08, 1.7553e-08, 1.1367e-07, 1.4742e-08, 6.2125e-10,\n 7.4153e-09, 4.1897e-08, 6.7236e-08, 8.5715e-08, 5.1870e-11, 3.3608e-11,\n 4.5090e-08, 6.2973e-10, 4.6970e-08, 3.6415e-08, 9.7488e-11, 2.2894e-07,\n 5.8287e-07, 1.5222e-10, 1.5586e-08, 5.2700e-10, 7.4863e-08, 4.4223e-10,\n 1.6394e-08, 1.5150e-07, 8.1615e-10, 1.2052e-08, 2.4714e-10, 7.1423e-08,\n 4.0534e-09, 1.6776e-09, 1.0988e-06, 9.5886e-07, 3.3259e-09, 1.2131e-10,\n 1.4334e-06, 9.2984e-09, 1.1097e-07, 3.8760e-07, 3.0834e-08, 6.6689e-07,\n 1.5611e-09, 5.9951e-09, 7.0580e-08, 1.1627e-08, 1.4039e-07, 2.6273e-08,\n 2.3562e-08, 9.0000e-09, 5.3112e-09, 1.5004e-08, 7.3193e-08, 3.3824e-11,\n 4.6340e-08, 1.3835e-08, 6.8856e-08, 1.5858e-07, 5.9450e-09, 2.6985e-10,\n 8.0108e-10, 4.1366e-08, 1.6656e-07, 9.5074e-10, 2.5419e-07, 1.5571e-08,\n 5.6861e-09, 4.1813e-10, 7.5575e-09, 1.4262e-07, 3.1456e-07, 1.6693e-07,\n 1.0841e-09, 8.1457e-10, 1.3719e-07, 1.0708e-08, 7.0364e-08, 8.0327e-10,\n 1.7853e-08, 1.7319e-10, 9.6810e-09, 3.3223e-10, 6.4972e-08, 2.0889e-11,\n 7.6403e-08, 1.9701e-08, 3.6531e-09, 3.5443e-09, 3.3951e-08, 3.6629e-10,\n 8.4415e-08, 7.9144e-08, 5.9722e-09, 7.8870e-07, 2.7038e-11, 2.9590e-07,\n 1.8047e-08, 2.6878e-09, 4.7884e-08, 4.2561e-10, 8.7155e-10, 8.2358e-10,\n 5.1034e-07, 3.0715e-08, 1.4845e-10, 2.3301e-09, 1.0272e-08, 1.6358e-08,\n 3.9015e-08, 6.0066e-08, 1.9420e-09, 4.8374e-08, 1.0799e-07, 9.7419e-09,\n 2.3424e-10, 5.1317e-09, 4.4934e-08, 1.0740e-07, 1.3670e-07, 2.6989e-06,\n 1.8952e-09, 4.9763e-11, 7.8463e-08, 1.0316e-08, 5.4656e-10, 7.1564e-09,\n 1.7548e-08, 4.6498e-09, 7.9830e-09, 5.0672e-08, 4.0945e-09, 4.0132e-09,\n 1.9210e-08, 4.3744e-07, 3.7564e-09, 1.4376e-06, 2.2249e-08, 2.2922e-09,\n 9.9568e-11, 3.2868e-07, 8.2408e-08, 1.6410e-07, 1.6515e-09, 3.1250e-08,\n 5.7352e-07, 1.3040e-09, 3.0069e-07, 7.5690e-08, 4.4520e-08, 7.8953e-09,\n 6.4737e-08, 4.5494e-07, 1.6323e-07, 7.4679e-08, 1.3436e-08, 2.0235e-07,\n 5.0427e-07, 4.8973e-08, 5.6939e-07, 5.6151e-11, 6.4317e-08, 3.4683e-11,\n 8.6546e-08, 4.3469e-09, 1.3756e-08, 1.5197e-09, 3.4355e-07, 2.2272e-10,\n 5.4862e-08, 1.2437e-09, 2.0589e-08, 2.5701e-08, 1.6638e-08, 5.0881e-07,\n 2.7797e-08, 1.2584e-09, 4.8109e-08, 2.1821e-09, 2.7728e-09, 1.0471e-08,\n 1.9656e-08, 9.4476e-08, 9.1364e-09, 8.6568e-09, 6.7832e-09, 1.2682e-09,\n 3.1742e-09, 9.8418e-08, 1.7571e-10, 3.5521e-07, 3.0616e-08, 1.6478e-08,\n 1.5487e-07, 3.4155e-08, 3.3293e-07, 9.4788e-08, 3.3928e-10, 1.3263e-07,\n 3.4717e-09, 1.6771e-10, 1.4867e-08, 4.2947e-09, 7.0717e-08, 2.7624e-07,\n 1.1147e-10, 2.4346e-09, 3.9263e-08, 2.5557e-08, 4.2723e-10, 3.3077e-09,\n 7.9928e-09, 1.5997e-07, 1.8381e-08, 1.1952e-08, 4.3148e-08, 1.1137e-08,\n 2.3597e-07, 2.1530e-08, 2.5659e-07, 8.9658e-08, 9.1094e-09, 4.1341e-09,\n 8.9937e-09, 9.0299e-08, 1.4734e-07, 1.9092e-09, 1.5539e-07, 1.5721e-09,\n 1.2254e-07, 9.4017e-09, 5.2547e-08, 6.5910e-08], device='cuda:0')" + }, + "25": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.0435e-10, 4.3119e-10, 8.5839e-10, 1.4747e-12, 1.2036e-11, 1.3834e-09,\n 1.3896e-11, 5.2806e-10, 2.2821e-10, 7.7004e-13, 1.8052e-10, 5.9359e-10,\n 6.7752e-11, 2.5892e-13, 6.0555e-13, 5.6135e-12, 1.7185e-12, 4.3783e-13,\n 4.1856e-11, 4.5078e-10, 2.6728e-10, 7.0767e-12, 6.3812e-12, 8.7493e-12,\n 4.2512e-10, 1.5368e-11, 8.2464e-12, 1.5093e-11, 1.1909e-11, 9.6577e-13,\n 2.3794e-13, 4.5802e-11, 8.6759e-12, 1.2498e-10, 2.7282e-13, 4.1902e-13,\n 1.7684e-11, 3.9138e-15, 2.6984e-11, 2.2745e-11, 3.2090e-13, 1.0836e-09,\n 3.0768e-09, 1.2145e-12, 1.2672e-10, 8.4836e-14, 3.2210e-11, 1.6430e-14,\n 4.3180e-12, 3.6392e-11, 1.4250e-13, 1.1352e-12, 3.1284e-14, 7.5079e-11,\n 2.9858e-12, 1.0132e-12, 5.2927e-10, 1.1976e-09, 1.4619e-12, 2.9598e-13,\n 1.9802e-09, 2.1054e-12, 3.3514e-10, 1.8852e-10, 1.1086e-11, 9.9000e-10,\n 1.2987e-12, 7.9423e-12, 1.0379e-12, 3.3569e-12, 4.1109e-11, 2.8252e-12,\n 3.7188e-12, 8.6005e-12, 5.6657e-12, 1.8702e-12, 2.6427e-11, 8.3437e-13,\n 1.2805e-11, 2.3993e-12, 1.8815e-10, 2.5362e-11, 1.6626e-11, 4.3741e-12,\n 7.9229e-13, 5.3473e-10, 7.9722e-11, 4.9889e-13, 3.4775e-10, 1.9231e-12,\n 1.1011e-12, 4.4943e-13, 2.0748e-13, 7.1150e-11, 9.0945e-10, 4.2152e-11,\n 9.3916e-13, 6.7558e-13, 7.7640e-11, 6.8863e-12, 1.1379e-10, 1.0043e-12,\n 7.8794e-12, 8.4380e-13, 3.8728e-13, 7.9135e-14, 5.9204e-11, 2.2353e-13,\n 2.0764e-11, 2.2153e-12, 2.2055e-13, 4.5656e-11, 9.1554e-11, 3.3855e-13,\n 6.0112e-11, 2.3982e-10, 5.9189e-13, 1.8246e-09, 2.9463e-14, 4.6353e-10,\n 6.9370e-13, 2.8738e-13, 2.1166e-11, 6.9889e-15, 8.0873e-13, 3.8022e-14,\n 6.4294e-10, 8.9135e-12, 5.4774e-16, 2.8602e-13, 4.8217e-13, 1.3507e-12,\n 1.7935e-11, 9.0722e-11, 9.1284e-13, 1.1163e-11, 3.9820e-10, 1.8164e-12,\n 2.5298e-13, 1.5718e-11, 4.2240e-12, 2.2698e-11, 2.2098e-11, 6.5769e-09,\n 3.7512e-12, 6.7722e-15, 5.1381e-11, 1.3576e-12, 1.8884e-13, 5.0813e-12,\n 3.9056e-11, 5.8724e-13, 7.1367e-14, 1.9596e-11, 9.1853e-13, 1.2064e-11,\n 3.8661e-12, 4.0001e-10, 5.4448e-13, 1.3661e-09, 5.3053e-12, 8.4339e-14,\n 2.0049e-12, 9.8057e-10, 1.6933e-10, 2.3671e-12, 3.7841e-12, 4.4246e-12,\n 1.6358e-09, 2.9345e-14, 4.6893e-10, 7.7117e-12, 4.1440e-11, 2.4439e-12,\n 3.0135e-11, 4.5558e-10, 7.0249e-11, 1.7728e-11, 3.0275e-12, 1.4779e-10,\n 3.5344e-10, 1.9889e-12, 1.9566e-10, 1.0710e-13, 3.8950e-11, 6.4325e-13,\n 6.4348e-11, 5.6687e-12, 1.8171e-11, 2.6900e-12, 9.8758e-11, 1.8328e-12,\n 5.1773e-12, 5.3102e-13, 1.1264e-12, 2.6615e-12, 9.4789e-12, 1.4616e-09,\n 4.5382e-12, 1.2172e-12, 7.8921e-12, 1.2430e-13, 5.3676e-13, 2.6154e-11,\n 4.8211e-11, 2.0223e-11, 7.7092e-12, 3.8639e-13, 1.3581e-12, 2.9121e-12,\n 2.5633e-12, 1.1855e-10, 2.1597e-13, 1.1991e-09, 1.3880e-12, 1.5668e-12,\n 5.0596e-11, 4.5093e-12, 6.5705e-11, 8.4558e-12, 2.4525e-12, 2.8185e-10,\n 2.3895e-12, 4.3631e-13, 4.9156e-11, 1.8711e-13, 1.3145e-12, 5.0186e-10,\n 6.2745e-14, 1.9107e-13, 4.7478e-12, 2.0541e-11, 4.4012e-14, 9.2595e-13,\n 2.4170e-13, 5.3392e-10, 1.0302e-11, 3.6885e-13, 5.3778e-10, 8.9606e-13,\n 3.4440e-10, 6.3033e-12, 1.2719e-10, 1.2652e-10, 3.8478e-12, 1.6366e-13,\n 8.9500e-14, 1.3899e-10, 1.6416e-10, 1.5804e-11, 4.1666e-11, 7.7304e-13,\n 2.0978e-10, 2.6258e-11, 6.0698e-12, 2.7372e-11], device='cuda:0')" + }, + "26": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.8853e-10, 5.2495e-10, 2.9373e-10, 9.7211e-11, 2.3630e-11, 7.1744e-10,\n 4.5670e-12, 6.7997e-10, 6.1199e-10, 2.2455e-13, 7.5928e-10, 1.7659e-10,\n 1.0212e-10, 5.4638e-12, 2.4343e-13, 2.0946e-12, 7.7522e-14, 8.5141e-12,\n 1.4679e-10, 2.9810e-10, 1.1687e-10, 7.6657e-11, 8.1505e-11, 3.0259e-12,\n 1.6927e-10, 5.5883e-11, 4.4183e-11, 1.2549e-10, 1.8503e-11, 3.4715e-13,\n 2.1900e-12, 8.9626e-11, 8.5724e-11, 1.9617e-10, 2.9498e-14, 2.8452e-13,\n 5.9689e-11, 1.5007e-13, 6.2077e-11, 4.3347e-11, 1.6926e-12, 3.2629e-10,\n 8.8420e-10, 4.3529e-13, 8.7762e-11, 8.4715e-13, 6.4738e-11, 6.5941e-13,\n 2.0321e-11, 1.9065e-10, 1.3652e-12, 1.6246e-11, 3.7314e-13, 5.6044e-11,\n 5.5225e-12, 3.9146e-13, 1.3816e-09, 1.2119e-09, 4.8689e-13, 2.1464e-12,\n 1.7843e-09, 1.3792e-11, 1.3771e-10, 5.0075e-10, 3.8461e-11, 8.5470e-10,\n 5.0729e-13, 1.0166e-11, 8.9616e-11, 1.5715e-11, 1.7590e-10, 2.6324e-11,\n 3.8032e-11, 3.4104e-11, 7.0162e-12, 4.6708e-12, 9.2722e-11, 4.0142e-12,\n 7.3472e-11, 1.8233e-11, 9.9961e-11, 1.9866e-10, 5.7121e-12, 1.5458e-12,\n 5.5637e-12, 1.2216e-10, 2.0931e-10, 1.1602e-12, 3.4338e-10, 2.2995e-11,\n 8.2682e-12, 3.6683e-12, 3.6716e-12, 1.7596e-10, 3.9034e-10, 2.2700e-10,\n 1.0119e-11, 8.7103e-14, 2.0999e-10, 3.6535e-11, 1.0993e-10, 1.8254e-13,\n 3.0209e-11, 1.0647e-12, 5.2890e-12, 4.8152e-12, 8.4789e-11, 1.0216e-14,\n 1.1351e-10, 2.5462e-11, 5.0593e-12, 4.0700e-11, 6.3936e-11, 2.8509e-13,\n 1.1649e-10, 1.1471e-10, 8.5312e-12, 1.0552e-09, 1.4247e-13, 3.5786e-10,\n 1.3799e-11, 3.1033e-12, 5.9451e-11, 3.0225e-13, 7.8379e-12, 6.6302e-15,\n 6.1638e-10, 2.5647e-11, 3.8894e-15, 1.0637e-12, 1.3981e-11, 8.4046e-12,\n 3.0878e-11, 1.0548e-10, 1.3453e-12, 6.2698e-11, 2.0099e-10, 1.1399e-11,\n 2.4706e-13, 6.1336e-12, 5.8131e-11, 1.1114e-10, 2.0928e-10, 3.5046e-09,\n 7.1391e-13, 6.5134e-14, 1.1041e-10, 1.3774e-11, 2.2423e-14, 6.0567e-12,\n 7.1022e-11, 7.2145e-12, 5.4531e-12, 6.4006e-11, 5.4302e-12, 2.3525e-11,\n 4.8291e-12, 5.4270e-10, 4.3502e-12, 1.8089e-09, 9.4142e-12, 3.3962e-12,\n 3.2211e-13, 4.7215e-10, 9.6427e-11, 2.0885e-10, 2.4475e-12, 3.9924e-11,\n 7.2382e-10, 1.8905e-12, 3.6365e-10, 8.5414e-11, 1.0577e-10, 1.0093e-11,\n 9.0067e-11, 5.8256e-10, 1.7962e-10, 1.0047e-10, 1.0853e-11, 2.5140e-10,\n 6.2677e-10, 5.3729e-11, 7.1707e-10, 3.1931e-12, 8.0301e-11, 2.4620e-13,\n 1.1415e-10, 1.9736e-12, 4.4298e-11, 1.0854e-12, 4.3140e-10, 1.9378e-13,\n 6.9698e-11, 1.6189e-12, 1.5456e-11, 3.3567e-11, 1.3505e-11, 6.3817e-10,\n 3.6310e-11, 3.3233e-13, 4.0493e-11, 3.2131e-12, 1.5940e-12, 4.4423e-11,\n 1.6213e-11, 1.1922e-10, 1.3611e-11, 1.1474e-11, 1.5842e-11, 6.1645e-12,\n 9.4578e-13, 1.1317e-10, 2.5903e-13, 5.1765e-10, 3.9553e-11, 1.5062e-11,\n 1.9735e-10, 4.3070e-11, 4.1810e-10, 1.2179e-10, 9.1741e-13, 2.2931e-10,\n 2.0469e-12, 1.6694e-13, 6.4336e-11, 4.0745e-12, 9.0984e-11, 3.9892e-10,\n 3.3587e-14, 5.2941e-13, 5.9034e-11, 3.1613e-11, 7.4107e-14, 1.5948e-11,\n 1.8282e-12, 2.9865e-10, 3.4904e-11, 1.5730e-11, 1.6852e-10, 2.9973e-12,\n 2.9558e-10, 2.9607e-11, 2.9978e-10, 1.6448e-10, 2.6840e-11, 5.6896e-12,\n 1.2197e-11, 1.1447e-10, 1.9938e-10, 2.5783e-11, 1.9918e-10, 2.4989e-13,\n 1.4331e-10, 8.5346e-12, 6.6371e-11, 1.4699e-10], device='cuda:0')" + }, + "27": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.7614e-12, 2.9950e-14, 6.0509e-13, ..., 4.6730e-14, 1.6853e-13,\n 8.2678e-15],\n [1.0360e-09, 1.0252e-09, 7.1307e-13, ..., 7.2263e-11, 1.3886e-10,\n 2.9059e-11],\n [8.3023e-11, 9.7354e-11, 3.9590e-14, ..., 7.6795e-12, 1.1391e-11,\n 4.7825e-12],\n ...,\n [8.5872e-10, 9.1487e-10, 6.2725e-13, ..., 4.9956e-11, 1.1841e-10,\n 2.0504e-11],\n [1.1936e-11, 1.6449e-11, 2.3556e-13, ..., 1.3390e-12, 1.0480e-12,\n 3.3467e-13],\n [1.5111e-09, 1.5657e-09, 4.2978e-13, ..., 1.1640e-10, 1.9363e-10,\n 4.6866e-11]], device='cuda:0')" + }, + "28": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.0088e-10, 5.6202e-07, 5.7997e-08, 1.1961e-07, 2.2876e-08, 1.8974e-09,\n 5.8516e-08, 2.8985e-07, 1.6941e-07, 2.1078e-09, 6.6933e-09, 4.0223e-09,\n 9.0646e-09, 8.7126e-09, 1.7250e-09, 8.4865e-09, 3.0100e-10, 3.2343e-08,\n 2.5607e-08, 2.0676e-07, 3.5716e-09, 9.1180e-08, 5.6524e-07, 1.0526e-07,\n 1.5313e-07, 1.0884e-09, 3.2553e-09, 6.0824e-07, 1.3756e-11, 9.9465e-10,\n 3.1952e-09, 1.1756e-07, 6.7376e-08, 4.8288e-08, 1.0462e-08, 2.7889e-08,\n 1.9061e-08, 6.8813e-09, 4.6243e-08, 9.4420e-09, 8.6681e-09, 4.3960e-08,\n 4.0367e-07, 3.8439e-08, 6.8592e-09, 3.5914e-08, 1.3674e-08, 3.0033e-08,\n 6.2763e-10, 6.8729e-08, 4.1542e-08, 8.8730e-07, 6.9463e-09, 1.6261e-08,\n 2.6611e-09, 2.3449e-07, 2.0741e-06, 4.5043e-07, 3.4063e-08, 4.8276e-08,\n 3.9206e-07, 6.8467e-09, 7.7641e-08, 8.3000e-07, 1.8894e-08, 3.0337e-07,\n 2.0563e-08, 1.9559e-08, 7.3986e-07, 1.5117e-07, 1.0640e-10, 9.5022e-09,\n 6.4674e-09, 9.1840e-09, 7.1767e-11, 2.9824e-09, 2.8875e-10, 1.1372e-09,\n 5.9950e-08, 5.0930e-09, 5.4216e-10, 1.5510e-07, 2.3375e-08, 1.1429e-07,\n 2.3852e-10, 1.8419e-08, 1.2831e-07, 2.1163e-09, 4.5914e-07, 2.7480e-09,\n 9.1995e-08, 3.7891e-08, 1.2961e-07, 1.9041e-08, 8.6467e-08, 9.5874e-07,\n 1.4626e-09, 6.6027e-10, 5.7265e-08, 2.3413e-08, 6.8251e-10, 9.8989e-09,\n 4.4475e-09, 5.2453e-08, 1.1583e-09, 1.6277e-08, 7.2876e-08, 2.8551e-11,\n 1.1092e-08, 5.5289e-10, 4.3713e-11, 7.8193e-09, 6.2485e-11, 4.4751e-09,\n 7.5470e-08, 1.3007e-07, 8.0439e-08, 6.1431e-07, 6.0999e-10, 3.8277e-09,\n 2.5393e-07, 5.0585e-10, 9.7987e-10, 7.9880e-11, 1.0798e-09, 9.2064e-10,\n 6.5502e-08, 1.0185e-08, 1.1969e-08, 4.9515e-08, 2.3931e-08, 3.8925e-09,\n 9.7436e-09, 3.0252e-09, 6.5110e-09, 1.8678e-11, 1.6610e-08, 1.3876e-08,\n 5.2873e-08, 9.7140e-07, 6.7460e-08, 3.8882e-10, 3.3698e-08, 1.6534e-06,\n 4.2644e-08, 7.3150e-09, 4.5175e-09, 5.8717e-09, 4.6812e-09, 1.1790e-08,\n 2.2968e-09, 1.6014e-09, 1.2585e-09, 1.2206e-07, 1.0329e-09, 5.0613e-10,\n 5.1888e-10, 2.3481e-07, 6.1887e-09, 3.9800e-08, 3.6128e-08, 3.1646e-11,\n 9.9545e-10, 3.7613e-07, 2.3290e-09, 2.4357e-07, 2.2391e-10, 4.7313e-08,\n 7.0219e-07, 1.1232e-08, 3.6064e-07, 7.9252e-09, 1.6951e-08, 6.2660e-11,\n 8.6220e-10, 9.2329e-09, 3.6588e-08, 4.8087e-11, 3.0154e-07, 6.4297e-09,\n 1.4520e-07, 1.0319e-07, 5.0743e-09, 3.5111e-09, 4.5470e-10, 1.8702e-08,\n 5.2128e-09, 2.1422e-09, 7.6332e-09, 3.3749e-08, 1.8013e-07, 1.0709e-11,\n 5.9593e-08, 9.8958e-08, 7.7988e-08, 2.4422e-06, 7.8202e-10, 4.2524e-08,\n 7.5936e-09, 6.2826e-08, 2.1594e-08, 7.0417e-08, 2.2193e-08, 1.1831e-08,\n 2.4146e-08, 2.1747e-08, 1.2607e-10, 4.6344e-08, 1.1024e-10, 5.3024e-07,\n 4.0335e-09, 9.9194e-10, 8.1661e-10, 5.3442e-07, 1.6619e-07, 3.9734e-08,\n 2.9395e-08, 4.5921e-07, 1.5107e-06, 4.1276e-10, 1.2687e-09, 1.3612e-07,\n 6.0697e-10, 8.9449e-08, 6.9324e-09, 1.8854e-09, 4.2517e-07, 3.4301e-07,\n 3.7984e-10, 1.4393e-09, 3.1783e-07, 1.7634e-08, 2.4932e-08, 1.3325e-08,\n 1.6520e-08, 2.9571e-10, 1.0070e-08, 3.3166e-10, 5.0573e-08, 8.3477e-09,\n 2.4491e-10, 4.2091e-08, 3.5093e-07, 1.7345e-08, 5.4041e-08, 1.3921e-08,\n 4.0492e-07, 3.8851e-09, 1.6282e-07, 2.8430e-08, 4.9138e-09, 2.1640e-11,\n 1.2990e-08, 3.8670e-07, 7.3095e-09, 8.0196e-07], device='cuda:0')" + }, + "29": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.9710e-12, 9.4406e-10, 2.2831e-11, 2.5432e-12, 3.8308e-12, 9.8559e-12,\n 1.3337e-12, 1.0782e-10, 2.9673e-11, 6.1189e-12, 2.3286e-11, 1.1657e-12,\n 5.7418e-12, 4.9102e-11, 6.2403e-12, 1.4723e-11, 1.3855e-13, 5.5172e-11,\n 1.0955e-12, 3.3234e-10, 6.5985e-12, 2.3403e-11, 2.5954e-09, 4.3241e-11,\n 4.4185e-10, 1.8133e-13, 7.9672e-13, 3.9879e-10, 4.8711e-13, 2.6432e-14,\n 8.6399e-13, 9.2614e-11, 1.4945e-11, 8.2441e-11, 1.1782e-12, 2.5566e-11,\n 1.7255e-12, 1.1956e-12, 4.7014e-11, 8.2382e-13, 1.8193e-11, 9.8472e-12,\n 5.3455e-10, 4.5207e-11, 1.4266e-11, 1.0939e-10, 7.1458e-13, 1.2293e-11,\n 1.0182e-12, 8.2674e-11, 2.1895e-11, 2.6311e-09, 4.0510e-12, 1.1857e-12,\n 6.1389e-12, 1.8737e-10, 4.5707e-09, 2.0289e-10, 1.2760e-11, 2.7842e-11,\n 5.4099e-11, 1.2983e-11, 2.0971e-11, 4.1338e-10, 9.5309e-13, 1.4582e-10,\n 1.8651e-11, 1.1533e-10, 2.7703e-09, 2.4089e-10, 1.2422e-12, 1.9377e-11,\n 2.4925e-12, 2.3818e-11, 9.2394e-12, 4.0847e-13, 3.8255e-12, 1.3474e-13,\n 1.5902e-11, 4.2963e-12, 1.4014e-14, 9.7508e-11, 1.1550e-12, 1.0089e-09,\n 3.2650e-13, 2.2195e-10, 1.7998e-11, 2.3386e-12, 9.4269e-10, 8.1543e-12,\n 2.3973e-10, 2.8940e-12, 3.6197e-11, 1.2412e-12, 1.7862e-11, 1.0171e-09,\n 4.6226e-11, 1.8597e-13, 3.9677e-11, 1.5488e-11, 5.2089e-12, 6.9187e-13,\n 1.9874e-12, 7.5633e-12, 1.5509e-12, 5.6191e-11, 1.5490e-10, 3.9108e-14,\n 2.4422e-12, 2.3292e-12, 1.2070e-12, 2.0414e-11, 1.2256e-11, 2.7085e-13,\n 3.4926e-10, 1.5595e-10, 5.5133e-11, 1.3727e-09, 3.0384e-14, 1.1114e-12,\n 1.4477e-10, 1.2805e-12, 1.5749e-13, 3.2320e-13, 5.2146e-13, 1.1600e-13,\n 1.2258e-12, 5.4588e-13, 6.9107e-13, 3.9924e-12, 4.3239e-12, 8.0944e-14,\n 3.1482e-13, 1.6901e-12, 2.3718e-12, 5.5534e-13, 8.8565e-12, 1.4175e-11,\n 1.0875e-10, 3.0782e-09, 1.0056e-11, 2.4354e-12, 6.4314e-12, 7.2695e-10,\n 7.8711e-12, 8.8226e-12, 4.6482e-13, 1.3445e-13, 5.5222e-13, 4.0130e-12,\n 7.5548e-13, 2.3647e-13, 1.7695e-12, 4.0563e-10, 2.4307e-13, 1.1554e-12,\n 4.7431e-14, 1.5472e-10, 2.3075e-14, 2.7735e-12, 1.4886e-11, 3.2658e-13,\n 9.0693e-12, 1.3411e-09, 1.3464e-13, 2.2442e-11, 2.2144e-12, 2.5281e-11,\n 9.4095e-10, 1.7922e-11, 7.6362e-10, 1.5951e-13, 2.0607e-11, 3.6633e-13,\n 6.3896e-12, 1.5432e-11, 4.6134e-12, 1.3763e-12, 6.6505e-11, 2.0658e-13,\n 1.5244e-11, 1.0796e-10, 2.8101e-11, 2.1654e-11, 2.5720e-12, 1.1450e-11,\n 1.3190e-11, 1.0544e-12, 7.9193e-12, 1.9331e-11, 8.6766e-11, 2.7630e-12,\n 1.1183e-11, 7.8073e-11, 7.3585e-12, 7.0814e-09, 1.4615e-14, 3.0436e-12,\n 3.1391e-13, 9.2663e-12, 1.3037e-12, 9.3825e-11, 1.7407e-12, 2.9584e-11,\n 6.4383e-12, 3.7713e-13, 8.4747e-13, 8.4030e-12, 8.7577e-13, 1.0861e-09,\n 2.6807e-12, 2.9815e-13, 6.1490e-13, 1.8006e-09, 6.7273e-12, 6.3638e-12,\n 3.2082e-12, 1.0572e-09, 2.8872e-09, 4.1711e-12, 5.8750e-13, 3.9520e-10,\n 7.8761e-14, 3.5010e-11, 3.5017e-12, 1.2824e-13, 8.0309e-11, 2.5235e-10,\n 1.3155e-14, 1.2216e-12, 5.6777e-11, 1.3546e-11, 1.2340e-10, 3.4857e-10,\n 1.2782e-12, 1.7397e-12, 1.3765e-11, 1.1201e-12, 9.1406e-11, 7.4333e-14,\n 3.7240e-12, 5.3370e-11, 4.0692e-10, 1.2463e-11, 4.7632e-11, 2.2436e-12,\n 3.5613e-10, 1.2888e-13, 1.2093e-10, 6.7890e-11, 6.8570e-13, 1.3585e-14,\n 1.4589e-12, 2.5690e-11, 5.6167e-13, 1.5675e-09], device='cuda:0')" + }, + "30": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.5679e-13, 7.7381e-10, 9.0448e-11, 1.3735e-10, 3.5862e-11, 4.5921e-12,\n 8.0079e-11, 3.9836e-10, 2.3432e-10, 9.6894e-12, 6.6087e-12, 1.0699e-11,\n 3.1390e-11, 5.8652e-11, 1.3711e-11, 5.0782e-12, 5.1728e-13, 5.9256e-11,\n 3.0085e-11, 3.0562e-10, 2.4109e-12, 1.3251e-10, 5.6276e-10, 1.8348e-10,\n 2.5434e-10, 2.1142e-12, 7.7268e-12, 6.8749e-10, 1.4255e-14, 1.6703e-12,\n 3.0235e-12, 1.7734e-10, 9.8564e-11, 1.5420e-10, 2.4571e-12, 6.9425e-11,\n 2.9868e-11, 6.5999e-12, 8.1334e-11, 5.3777e-12, 3.1582e-11, 6.7408e-11,\n 5.7774e-10, 7.1165e-11, 3.6987e-11, 6.7668e-11, 1.0853e-11, 7.4708e-12,\n 3.6274e-13, 8.3502e-11, 6.2363e-11, 1.2755e-09, 1.4369e-11, 1.1377e-11,\n 7.7674e-12, 2.5199e-10, 2.7800e-09, 6.1350e-10, 3.4378e-11, 7.1678e-11,\n 5.1757e-10, 2.1137e-11, 7.6707e-11, 1.0074e-09, 2.8085e-11, 4.2094e-10,\n 3.2346e-11, 6.1959e-11, 1.0509e-09, 2.2785e-10, 1.1474e-13, 7.1254e-12,\n 1.4053e-11, 5.1673e-11, 1.1743e-11, 1.0792e-12, 1.2579e-12, 5.1990e-14,\n 9.0432e-11, 1.2573e-11, 3.5529e-13, 1.9520e-10, 3.1279e-11, 2.6276e-10,\n 1.9369e-12, 7.1856e-11, 1.4625e-10, 1.1868e-12, 6.8045e-10, 2.1481e-12,\n 1.6736e-10, 5.4331e-11, 1.3532e-10, 2.6175e-11, 1.2255e-10, 1.1656e-09,\n 3.0877e-11, 5.4327e-14, 9.9611e-11, 5.0540e-11, 3.0042e-12, 1.4887e-11,\n 1.3138e-11, 6.5783e-11, 5.7234e-13, 7.9803e-11, 1.2267e-10, 7.0184e-13,\n 3.1508e-11, 4.9815e-13, 5.4377e-13, 2.2890e-11, 1.1884e-11, 6.3850e-12,\n 8.3169e-11, 1.9813e-10, 1.2588e-10, 7.4695e-10, 1.1827e-12, 4.8535e-12,\n 2.7133e-10, 2.1033e-13, 1.6013e-12, 1.7214e-13, 6.7176e-12, 1.9083e-12,\n 6.9897e-11, 5.5233e-12, 6.0543e-12, 4.3349e-11, 3.6149e-11, 9.2443e-13,\n 8.1247e-12, 4.6240e-13, 6.4831e-12, 3.2149e-13, 3.5912e-11, 2.1996e-11,\n 8.9428e-11, 1.3574e-09, 9.4391e-11, 2.3494e-13, 6.6239e-11, 2.0772e-09,\n 3.7437e-11, 3.6852e-12, 9.1226e-12, 9.0568e-12, 8.4015e-12, 1.9864e-11,\n 1.7468e-13, 3.9628e-12, 5.8007e-13, 1.9640e-10, 1.2333e-12, 1.7536e-13,\n 5.7312e-15, 3.2776e-10, 4.3842e-12, 4.6730e-11, 1.6835e-11, 2.6751e-14,\n 3.7513e-12, 4.5616e-10, 4.2308e-12, 3.2334e-10, 1.5940e-12, 7.6432e-11,\n 8.0760e-10, 1.2757e-11, 3.7132e-10, 4.9911e-12, 5.4491e-11, 1.1084e-13,\n 3.2323e-12, 4.5166e-12, 2.9912e-11, 3.9109e-13, 3.3580e-10, 9.3944e-12,\n 1.7154e-10, 9.2195e-11, 7.1063e-12, 1.0369e-11, 8.5345e-13, 3.4441e-11,\n 2.7791e-11, 6.4111e-13, 3.6267e-11, 5.5786e-11, 2.5089e-10, 4.0897e-15,\n 8.7275e-11, 1.4315e-10, 7.5719e-11, 3.2605e-09, 6.4383e-14, 6.1137e-11,\n 1.1526e-11, 5.4864e-11, 1.7207e-11, 1.1596e-10, 1.8082e-11, 3.6122e-11,\n 2.4778e-11, 3.0927e-11, 6.1455e-13, 6.7318e-11, 1.0794e-12, 6.8525e-10,\n 9.7989e-13, 1.7113e-13, 1.7918e-13, 6.8613e-10, 2.2165e-10, 3.6994e-11,\n 4.2355e-11, 6.5241e-10, 2.0297e-09, 2.8797e-12, 2.4042e-12, 1.9467e-10,\n 1.0748e-12, 1.0485e-10, 2.6338e-11, 8.4466e-13, 5.6319e-10, 4.4623e-10,\n 4.0933e-13, 3.4687e-12, 3.9820e-10, 2.9952e-11, 3.9655e-11, 8.0860e-11,\n 7.1977e-12, 2.4045e-12, 4.5367e-11, 1.2348e-13, 1.3264e-10, 3.7663e-12,\n 3.2964e-13, 8.1226e-11, 3.9202e-10, 3.7618e-11, 7.8824e-11, 2.2974e-11,\n 5.6112e-10, 6.6549e-12, 2.4103e-10, 4.4465e-11, 7.2934e-12, 1.9672e-14,\n 1.5412e-11, 5.0994e-10, 1.0617e-11, 1.1123e-09], device='cuda:0')" + }, + "31": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.3868e-10, 3.5277e-10, 1.7833e-13, ..., 2.4725e-11, 4.0442e-11,\n 1.1753e-11],\n [4.5666e-12, 5.8997e-12, 1.0605e-14, ..., 4.2893e-13, 5.3468e-13,\n 1.1553e-13],\n [2.9938e-10, 3.2953e-10, 1.3024e-13, ..., 2.6344e-11, 3.1763e-11,\n 1.1461e-11],\n ...,\n [2.1508e-09, 2.4148e-09, 4.8157e-14, ..., 1.9527e-10, 2.4156e-10,\n 1.0970e-10],\n [1.5896e-10, 1.8201e-10, 3.8877e-13, ..., 1.2758e-11, 2.1918e-11,\n 6.4550e-12],\n [1.8318e-12, 1.9308e-12, 3.3408e-16, ..., 1.2721e-13, 1.5415e-13,\n 3.5101e-14]], device='cuda:0')" + }, + "32": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.7689e-07, 2.1301e-09, 1.7006e-07, 6.8511e-09, 1.4572e-10, 2.3300e-09,\n 3.4235e-08, 3.3181e-07, 1.2361e-07, 6.9997e-09, 1.2275e-06, 9.5959e-09,\n 6.2678e-11, 5.9557e-11, 9.4955e-10, 2.2901e-08, 1.4273e-09, 1.1109e-07,\n 1.8432e-07, 6.9435e-08, 1.2492e-07, 3.3760e-09, 1.1177e-08, 4.8930e-07,\n 3.2402e-09, 4.3214e-08, 1.5368e-07, 2.7718e-09, 2.0918e-10, 5.9036e-08,\n 2.3743e-09, 2.0714e-07, 5.3659e-08, 4.4498e-08, 1.3157e-08, 1.3911e-07,\n 3.2991e-09, 1.7628e-08, 1.2649e-07, 9.4211e-09, 4.3875e-09, 3.6157e-08,\n 1.2163e-07, 3.0624e-09, 2.2139e-10, 1.1405e-09, 5.7367e-08, 9.2561e-10,\n 1.1788e-07, 1.0728e-07, 1.8157e-08, 1.0860e-07, 1.4352e-09, 5.3840e-08,\n 2.0206e-08, 1.5541e-07, 1.2260e-07, 4.5834e-07, 2.4438e-08, 8.6141e-08,\n 1.6400e-06, 1.1769e-09, 1.6341e-09, 1.5545e-07, 1.9545e-09, 3.2462e-07,\n 3.9040e-09, 3.2320e-09, 2.6659e-10, 6.2289e-08, 9.0254e-08, 1.3103e-07,\n 1.0821e-07, 2.9382e-08, 7.3722e-10, 7.6106e-10, 1.4101e-08, 1.0166e-08,\n 1.2457e-07, 4.1495e-09, 7.0022e-11, 5.3620e-07, 4.5311e-07, 1.3329e-10,\n 4.0047e-08, 1.1707e-08, 1.8637e-07, 5.6575e-08, 5.4173e-07, 2.2877e-09,\n 1.4209e-07, 6.5365e-08, 8.1955e-09, 2.6639e-09, 2.3266e-08, 4.0086e-07,\n 6.5208e-08, 8.3853e-10, 3.0124e-07, 2.2706e-08, 1.5952e-07, 4.2562e-10,\n 7.5400e-09, 3.7087e-08, 6.4499e-09, 3.2535e-08, 1.6391e-08, 2.0321e-10,\n 2.2004e-08, 1.3501e-09, 8.7207e-08, 8.4147e-13, 8.4525e-10, 4.0203e-10,\n 2.6617e-07, 4.2455e-08, 1.0028e-07, 2.2562e-07, 4.2438e-09, 5.2592e-07,\n 2.3477e-08, 2.1335e-08, 5.8049e-08, 2.6807e-10, 4.9142e-09, 1.9779e-09,\n 1.9835e-09, 3.0510e-08, 6.0014e-09, 2.3709e-09, 1.4575e-09, 4.1408e-08,\n 1.6591e-09, 2.7766e-11, 7.3035e-10, 4.3209e-09, 1.4383e-09, 5.9245e-09,\n 3.0718e-08, 5.1275e-07, 9.9189e-08, 5.9470e-09, 6.4859e-07, 3.5432e-06,\n 2.6454e-07, 9.0689e-09, 3.2400e-09, 1.3025e-07, 7.3879e-11, 1.2350e-08,\n 1.4295e-08, 5.8398e-10, 4.4776e-09, 1.2196e-07, 1.1221e-07, 4.3443e-08,\n 1.4423e-08, 2.9068e-07, 3.5344e-09, 2.0073e-08, 1.7907e-09, 2.9031e-10,\n 5.5040e-08, 1.2304e-07, 1.3222e-09, 8.1816e-08, 8.9917e-08, 2.9421e-09,\n 3.3353e-07, 1.0235e-09, 4.7488e-07, 1.2536e-07, 1.0978e-07, 2.4378e-10,\n 4.5166e-09, 1.2571e-08, 4.9911e-08, 1.9865e-07, 2.9978e-07, 6.1409e-10,\n 8.0195e-07, 6.4143e-10, 6.0315e-07, 6.3970e-09, 3.8708e-09, 3.9120e-08,\n 1.9069e-09, 3.4524e-09, 4.7023e-08, 1.0374e-08, 1.8108e-08, 5.9597e-09,\n 2.6453e-07, 1.2550e-07, 1.6378e-08, 2.3742e-06, 8.0659e-09, 6.9345e-10,\n 4.5639e-09, 6.0783e-09, 3.7714e-07, 1.9243e-09, 7.5443e-08, 9.5690e-10,\n 3.7070e-09, 7.2260e-08, 2.2219e-09, 2.5608e-07, 2.2523e-09, 9.2244e-07,\n 6.3234e-10, 8.8009e-08, 1.9293e-09, 2.3631e-07, 1.3611e-08, 4.1970e-09,\n 1.8281e-10, 9.8015e-09, 1.5772e-06, 1.3026e-07, 2.0937e-08, 1.3947e-09,\n 4.8839e-10, 2.4050e-08, 1.1630e-08, 3.1900e-08, 4.1069e-07, 1.7410e-08,\n 5.4455e-09, 4.4014e-09, 3.0622e-09, 1.4207e-08, 1.2031e-09, 7.0658e-08,\n 1.9009e-08, 9.7195e-08, 9.4809e-09, 5.3922e-10, 5.3477e-08, 1.4727e-07,\n 2.1984e-07, 1.5655e-09, 1.3082e-07, 9.1706e-08, 8.5733e-10, 1.3082e-09,\n 1.4644e-07, 4.1890e-08, 2.0546e-08, 2.2402e-09, 1.1325e-07, 4.1172e-10,\n 9.7228e-08, 1.3873e-06, 1.0239e-07, 9.2072e-10], device='cuda:0')" + }, + "33": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.1658e-12, 7.5568e-12, 6.0718e-11, 4.4676e-12, 1.5958e-11, 1.0221e-11,\n 1.6237e-12, 3.0589e-10, 1.4266e-11, 9.6176e-12, 1.8478e-09, 4.1747e-12,\n 3.1966e-12, 1.8259e-13, 1.9633e-11, 6.4977e-13, 1.2315e-12, 3.1479e-10,\n 1.0136e-10, 4.3208e-11, 7.9330e-11, 1.2194e-11, 5.6938e-13, 9.4440e-10,\n 8.8215e-13, 1.2225e-11, 8.8909e-11, 1.6386e-12, 1.8472e-12, 8.2121e-11,\n 4.1051e-13, 3.9837e-10, 6.9143e-12, 5.1325e-11, 1.9046e-12, 7.9278e-10,\n 1.6389e-12, 3.1072e-12, 7.1747e-11, 2.8024e-13, 1.9233e-12, 6.5629e-12,\n 4.4897e-11, 5.9773e-11, 2.5731e-13, 8.9844e-14, 1.9447e-11, 1.5463e-14,\n 3.9316e-10, 3.4381e-11, 1.1144e-11, 4.9555e-12, 6.1958e-12, 1.3808e-11,\n 2.0668e-11, 3.2343e-11, 1.5183e-11, 1.9844e-10, 5.6252e-12, 4.3079e-11,\n 2.0602e-09, 3.5620e-13, 5.6171e-12, 1.1852e-11, 2.8418e-13, 1.8793e-10,\n 2.0571e-13, 3.3884e-13, 8.4672e-12, 1.5259e-11, 4.5646e-11, 5.9623e-12,\n 9.8090e-12, 7.8130e-11, 4.9064e-12, 1.9562e-13, 1.9043e-13, 1.5580e-12,\n 3.4689e-11, 7.7429e-11, 1.1907e-14, 1.5726e-09, 3.5719e-10, 2.9413e-12,\n 1.8653e-11, 1.2340e-11, 8.6397e-11, 2.2027e-11, 1.8884e-09, 1.5945e-12,\n 6.6815e-10, 4.4302e-10, 1.8103e-13, 1.4544e-11, 2.0026e-12, 9.3006e-11,\n 2.3057e-10, 1.7031e-12, 1.2766e-10, 1.0476e-11, 1.8030e-10, 1.2846e-12,\n 4.7761e-12, 4.7016e-12, 1.5393e-12, 2.8692e-10, 8.3647e-13, 1.8811e-12,\n 3.7373e-12, 5.6756e-13, 3.7110e-11, 1.7020e-13, 3.8145e-12, 2.2349e-12,\n 4.2566e-10, 2.1265e-10, 8.7967e-11, 7.8359e-11, 8.4368e-13, 7.1794e-10,\n 8.8877e-13, 7.2256e-13, 1.0733e-10, 7.0406e-14, 3.2650e-12, 4.1094e-13,\n 1.7750e-12, 6.0770e-12, 6.4893e-12, 3.7302e-12, 7.9213e-12, 2.2039e-11,\n 9.7275e-13, 2.4235e-14, 4.5300e-13, 8.3844e-13, 3.6585e-12, 1.9565e-12,\n 1.5567e-11, 4.2887e-10, 2.2588e-10, 2.0261e-12, 7.2695e-10, 1.4149e-08,\n 2.7067e-10, 2.8143e-12, 4.0177e-13, 8.4849e-11, 1.6720e-13, 2.0685e-11,\n 5.8138e-11, 7.9790e-13, 6.7495e-12, 9.5245e-11, 1.8282e-11, 2.8355e-10,\n 1.0712e-12, 1.2118e-10, 1.0530e-12, 5.6948e-11, 4.5642e-13, 5.8432e-13,\n 1.5080e-12, 1.2471e-10, 7.4980e-13, 7.5286e-13, 2.2084e-10, 2.4471e-13,\n 4.5389e-10, 7.9563e-13, 1.0475e-09, 3.8762e-11, 1.4746e-10, 1.3297e-13,\n 1.0687e-12, 5.1743e-12, 1.2997e-11, 7.1352e-11, 4.3967e-11, 2.3017e-12,\n 5.6027e-10, 1.3313e-12, 1.7177e-10, 1.6134e-11, 3.5865e-13, 1.2523e-11,\n 8.4405e-13, 5.2634e-12, 1.0850e-10, 4.5901e-13, 9.7215e-13, 1.4606e-12,\n 1.1408e-09, 1.7599e-10, 6.8856e-13, 5.8594e-09, 2.0819e-13, 2.3890e-12,\n 4.9548e-13, 7.5096e-13, 2.9711e-10, 9.1248e-12, 1.5162e-11, 3.6567e-12,\n 3.3916e-12, 1.5227e-11, 2.1532e-11, 1.3240e-10, 1.1906e-12, 2.8964e-09,\n 1.5856e-12, 2.4586e-11, 1.7727e-13, 3.4278e-10, 7.1443e-12, 7.5123e-12,\n 9.4749e-13, 6.2220e-13, 2.4371e-09, 3.5860e-10, 3.9040e-11, 3.4752e-13,\n 3.2208e-14, 4.2130e-11, 5.2296e-12, 6.3054e-12, 8.2424e-11, 2.0428e-12,\n 2.7616e-13, 1.0450e-12, 1.3572e-11, 4.3055e-13, 2.3325e-13, 1.1522e-10,\n 3.3149e-12, 2.9320e-11, 2.8645e-12, 1.0498e-13, 3.5062e-10, 1.5022e-10,\n 4.2587e-10, 2.8009e-13, 1.1990e-11, 4.4317e-11, 3.0120e-13, 5.2069e-14,\n 5.8538e-11, 1.9985e-11, 1.5424e-12, 2.9524e-12, 2.8859e-11, 1.3274e-13,\n 1.2234e-10, 1.0601e-09, 4.8970e-11, 9.6804e-12], device='cuda:0')" + }, + "34": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.1566e-10, 2.8212e-12, 2.4293e-10, 5.9258e-12, 1.6155e-11, 3.7153e-12,\n 4.9620e-11, 4.7397e-10, 1.7145e-10, 1.5791e-11, 1.4669e-09, 3.9229e-11,\n 1.1098e-12, 2.0715e-13, 2.2620e-11, 2.2317e-11, 3.7589e-12, 1.8144e-10,\n 2.0522e-10, 1.3879e-10, 1.9598e-10, 5.0059e-12, 9.2479e-12, 6.8151e-10,\n 1.5921e-11, 6.4076e-11, 2.0472e-10, 8.4098e-13, 6.2242e-13, 9.0866e-11,\n 1.9371e-12, 2.7960e-10, 7.9157e-11, 1.2826e-10, 1.1477e-12, 2.6588e-10,\n 1.1554e-11, 7.7878e-12, 1.8311e-10, 1.2032e-12, 1.2748e-11, 5.5886e-11,\n 2.0293e-10, 2.8652e-11, 2.4839e-12, 2.7791e-12, 5.1275e-11, 1.6703e-13,\n 1.7536e-10, 1.3936e-10, 2.7866e-11, 1.4962e-10, 5.2446e-12, 4.1062e-11,\n 3.4156e-11, 1.7560e-10, 1.6468e-10, 6.2014e-10, 2.7506e-11, 1.1162e-10,\n 2.1607e-09, 2.0198e-12, 1.4404e-12, 1.8983e-10, 3.2871e-12, 4.4843e-10,\n 4.6255e-14, 5.9091e-12, 8.9571e-13, 9.1273e-11, 1.2710e-10, 1.3948e-10,\n 1.4309e-10, 8.7281e-11, 5.7073e-12, 1.0934e-13, 2.1060e-11, 7.1013e-12,\n 1.6902e-10, 3.9655e-11, 1.2756e-13, 6.2004e-10, 6.1283e-10, 1.1562e-12,\n 3.9931e-11, 2.7767e-11, 2.1733e-10, 8.3401e-11, 8.0003e-10, 1.6333e-12,\n 2.5872e-10, 1.5579e-10, 4.0413e-12, 4.2102e-12, 3.5445e-11, 5.0956e-10,\n 1.2279e-10, 4.3031e-12, 4.1771e-10, 5.7419e-11, 2.3959e-10, 4.8653e-13,\n 3.5288e-11, 4.5053e-11, 3.5007e-12, 1.4409e-10, 2.4390e-11, 5.6915e-13,\n 4.3083e-11, 2.9558e-13, 1.2726e-10, 2.8932e-14, 7.5001e-12, 1.0134e-12,\n 3.2236e-10, 9.2639e-11, 1.5362e-10, 2.8778e-10, 7.9579e-12, 7.1382e-10,\n 1.7992e-11, 2.2729e-11, 9.7846e-11, 1.3266e-12, 2.2260e-11, 1.4590e-13,\n 7.0792e-13, 1.5890e-11, 2.6197e-12, 1.2544e-12, 2.8176e-12, 1.9563e-11,\n 1.6467e-12, 1.4248e-12, 2.1271e-12, 7.4441e-12, 1.3897e-12, 1.0740e-11,\n 4.9326e-11, 6.9738e-10, 1.5928e-10, 4.2583e-12, 8.3384e-10, 4.4006e-09,\n 2.6299e-10, 1.1527e-11, 1.3776e-11, 1.8955e-10, 2.0874e-13, 1.7893e-11,\n 7.2091e-11, 3.3727e-12, 2.2458e-12, 1.7885e-10, 1.5827e-10, 6.9356e-11,\n 6.1370e-12, 3.9681e-10, 6.5842e-12, 2.2490e-11, 1.4302e-13, 3.4415e-13,\n 7.5886e-11, 1.7059e-10, 1.9379e-13, 1.1163e-10, 1.5185e-10, 6.0452e-12,\n 3.6279e-10, 2.5302e-13, 5.1594e-10, 1.3987e-10, 1.8785e-10, 3.1753e-13,\n 9.4874e-12, 1.7932e-11, 4.3516e-11, 2.4004e-10, 3.3678e-10, 8.6916e-13,\n 9.5185e-10, 1.0451e-13, 8.0676e-10, 1.8003e-11, 7.3789e-12, 6.2054e-11,\n 1.0379e-11, 2.2401e-12, 1.2009e-10, 1.6724e-11, 2.7033e-11, 8.5844e-12,\n 4.4014e-10, 1.7819e-10, 1.2440e-11, 3.1381e-09, 2.8966e-13, 1.3077e-12,\n 7.8178e-12, 3.7958e-12, 4.0608e-10, 1.4411e-11, 7.0129e-11, 1.2615e-11,\n 3.0637e-12, 1.0508e-10, 9.8164e-12, 3.5079e-10, 6.7813e-12, 1.1614e-09,\n 6.3732e-14, 9.3752e-11, 8.4683e-13, 3.2628e-10, 1.8365e-11, 2.5060e-12,\n 3.1677e-13, 1.5200e-11, 2.0950e-09, 2.1909e-10, 5.7341e-11, 8.4057e-12,\n 1.4602e-12, 3.6646e-11, 3.8245e-11, 2.6581e-11, 5.5188e-10, 3.4752e-11,\n 9.5658e-12, 9.6695e-13, 3.9124e-12, 2.1950e-11, 1.1600e-13, 1.3351e-10,\n 8.8109e-12, 1.4408e-10, 3.6425e-11, 1.2949e-12, 1.7876e-10, 1.3750e-10,\n 3.2166e-10, 3.2710e-12, 1.4673e-10, 1.4186e-10, 6.8110e-12, 2.7083e-12,\n 2.0617e-10, 6.4867e-11, 3.0121e-11, 1.3738e-11, 1.6194e-10, 3.8547e-14,\n 1.0944e-10, 1.8292e-09, 1.4473e-10, 2.8571e-12], device='cuda:0')" + }, + "35": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7814e-11, 2.1030e-11, 1.0970e-14, ..., 1.5926e-12, 1.8348e-12,\n 7.8504e-13],\n [9.1286e-13, 5.2039e-13, 5.3265e-14, ..., 2.2477e-14, 2.6228e-13,\n 1.2107e-13],\n [6.3202e-10, 7.2591e-10, 8.9526e-13, ..., 5.1283e-11, 8.7455e-11,\n 2.2736e-11],\n ...,\n [2.1155e-09, 2.1772e-09, 2.3828e-12, ..., 1.4333e-10, 2.5639e-10,\n 7.0505e-11],\n [5.8605e-14, 2.0223e-15, 2.2159e-14, ..., 1.1119e-14, 1.2913e-14,\n 3.7693e-15],\n [1.1854e-09, 1.3500e-09, 8.6191e-13, ..., 1.0102e-10, 1.6207e-10,\n 4.2182e-11]], device='cuda:0')" + }, + "36": { + "step": "tensor(2504.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0302e-08, 3.3750e-11, 3.7092e-07, 1.6737e-06, 4.0887e-08, 8.7252e-10,\n 2.4468e-07, 1.4391e-07, 1.0242e-07, 2.3632e-10, 1.3892e-08, 7.5826e-11,\n 7.8771e-08, 4.7002e-09, 2.4413e-09, 3.1388e-08, 4.2148e-09, 3.9611e-08,\n 1.5058e-09, 3.3873e-08, 8.6142e-09, 1.9350e-10, 2.5646e-07, 5.1933e-07,\n 4.1168e-08, 6.2099e-09, 6.2734e-08, 5.5516e-08, 2.3219e-09, 1.3286e-08,\n 2.0114e-08, 3.1027e-08, 6.2282e-08, 5.0042e-08, 5.5554e-10, 4.2458e-08,\n 1.4733e-07, 2.0174e-08, 1.1088e-07, 8.4537e-10, 2.2594e-07, 1.3151e-09,\n 1.4304e-07, 1.7240e-07, 3.5233e-09, 4.4410e-10, 1.4193e-08, 5.6881e-09,\n 1.7590e-09, 2.1931e-07, 4.5039e-08, 2.1417e-07, 6.7893e-11, 2.7237e-08,\n 8.3768e-09, 2.5008e-09, 5.9538e-07, 8.8946e-09, 5.3725e-08, 9.1800e-11,\n 2.8094e-10, 6.4151e-10, 8.1664e-08, 4.0998e-08, 3.1874e-08, 1.9312e-07,\n 3.9088e-08, 1.4142e-09, 4.6141e-08, 1.8484e-07, 3.7573e-08, 2.2651e-08,\n 1.5144e-08, 2.2692e-10, 4.0700e-11, 1.9509e-08, 3.4592e-08, 4.0240e-09,\n 3.3996e-07, 3.4428e-11, 4.7450e-10, 3.7419e-08, 5.1085e-07, 1.0833e-10,\n 4.1392e-08, 1.2444e-09, 4.4067e-08, 5.2452e-09, 6.5984e-08, 4.6019e-07,\n 6.7153e-08, 1.6355e-08, 2.7615e-09, 4.6462e-08, 3.0721e-07, 1.6210e-06,\n 6.2840e-10, 8.5248e-10, 1.0167e-08, 2.9468e-08, 5.7269e-08, 1.1027e-08,\n 1.4825e-08, 5.1406e-08, 3.1462e-07, 7.5232e-10, 6.5401e-11, 1.8642e-08,\n 2.2831e-08, 1.7658e-07, 1.7623e-07, 2.3359e-10, 1.4379e-10, 1.8488e-09,\n 2.5129e-09, 5.7805e-08, 5.5761e-08, 1.5034e-08, 6.5726e-08, 2.9831e-09,\n 5.1192e-07, 1.8173e-07, 2.2215e-10, 1.6629e-08, 5.0108e-08, 1.4544e-09,\n 5.0795e-10, 1.2510e-08, 4.2908e-10, 2.9512e-08, 4.1423e-09, 1.7424e-08,\n 1.5027e-07, 2.2929e-08, 1.5845e-08, 1.4233e-08, 6.2495e-08, 6.6001e-09,\n 1.2478e-08, 1.5121e-10, 2.3202e-09, 3.3489e-07, 9.6527e-08, 1.0477e-08,\n 3.3950e-07, 1.8364e-10, 6.1542e-11, 7.6382e-08, 1.0579e-08, 3.4128e-09,\n 2.1139e-09, 2.3357e-08, 1.2571e-08, 5.2124e-10, 2.2591e-09, 3.3824e-09,\n 2.1242e-09, 5.7275e-07, 1.7011e-07, 5.8219e-07, 1.2997e-08, 1.7895e-08,\n 6.5430e-07, 1.8770e-11, 1.2428e-08, 1.8292e-06, 2.1003e-07, 3.9088e-08,\n 4.9314e-07, 6.8053e-10, 1.2098e-08, 1.0730e-07, 2.1776e-08, 4.6697e-12,\n 1.4447e-08, 5.6106e-07, 3.2333e-09, 7.0391e-10, 8.3487e-07, 2.4714e-07,\n 2.9469e-10, 2.5755e-09, 3.1713e-10, 1.5187e-08, 6.0498e-08, 3.4226e-11,\n 2.1398e-09, 4.8670e-07, 2.5721e-09, 7.0428e-11, 5.4187e-10, 9.7526e-08,\n 1.1428e-07, 9.5257e-08, 1.2478e-07, 1.4003e-08, 1.5658e-10, 3.9547e-07,\n 5.2096e-08, 2.1427e-08, 2.9569e-08, 1.3638e-08, 1.1697e-08, 2.3817e-09,\n 1.4845e-09, 2.1313e-09, 1.3356e-09, 8.0343e-09, 5.0864e-10, 1.2054e-07,\n 4.2705e-08, 3.3833e-09, 3.8782e-09, 1.7466e-08, 1.0719e-06, 1.4194e-09,\n 6.4398e-09, 6.5840e-10, 3.0889e-07, 4.5888e-07, 5.1783e-11, 2.3944e-08,\n 1.7743e-08, 6.2225e-08, 1.0675e-10, 1.4920e-09, 2.0553e-07, 2.3366e-07,\n 6.5279e-11, 1.4355e-10, 3.3749e-11, 3.5910e-09, 3.3809e-08, 1.0087e-09,\n 7.7837e-09, 9.0071e-08, 1.5305e-08, 2.1640e-11, 1.2472e-07, 1.5581e-09,\n 1.9833e-07, 1.5326e-07, 4.2078e-07, 3.1313e-07, 7.4669e-09, 6.5079e-10,\n 1.0102e-09, 1.0976e-07, 2.5002e-07, 3.2018e-09, 3.1772e-07, 8.0870e-10,\n 5.2799e-08, 1.0841e-06, 1.2156e-11, 6.9428e-07], device='cuda:0')" + }, + "37": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.9106e-11, 4.6640e-12, 8.7353e-10, 3.1080e-09, 7.3713e-12, 9.1688e-12,\n 1.1341e-10, 4.7751e-11, 1.4666e-11, 4.9601e-14, 3.7744e-11, 1.7212e-12,\n 1.4387e-10, 8.3409e-12, 1.9431e-11, 6.8902e-13, 2.8577e-11, 1.6188e-11,\n 1.1421e-12, 2.6088e-11, 3.5513e-12, 1.3969e-12, 8.7674e-11, 2.5687e-09,\n 1.0106e-10, 1.7759e-12, 5.1805e-11, 2.0812e-12, 6.3685e-12, 1.5455e-12,\n 1.8367e-11, 6.2765e-11, 6.9375e-12, 6.6555e-11, 1.3575e-13, 2.9621e-10,\n 3.7547e-10, 8.7622e-12, 1.8750e-10, 3.2538e-12, 6.5894e-10, 5.0011e-13,\n 1.1749e-10, 5.9941e-10, 1.8909e-12, 7.7731e-12, 1.0720e-12, 1.7619e-13,\n 5.2404e-13, 1.7149e-10, 4.3894e-11, 5.9725e-11, 2.5800e-13, 2.0358e-12,\n 1.0549e-12, 3.8819e-13, 1.5499e-10, 3.7518e-11, 4.5412e-12, 1.3577e-12,\n 8.4191e-12, 7.4626e-13, 4.8212e-11, 1.2120e-12, 5.4669e-12, 5.1885e-11,\n 4.6278e-11, 2.5893e-13, 2.7365e-12, 4.0911e-10, 1.6522e-11, 1.0810e-12,\n 4.1381e-12, 1.2014e-12, 1.8911e-13, 1.2490e-12, 9.1295e-12, 1.1160e-12,\n 4.1552e-10, 3.4074e-14, 4.5563e-13, 3.2311e-12, 5.3593e-10, 3.0603e-12,\n 6.9695e-11, 1.4843e-13, 1.4470e-11, 9.8901e-13, 2.5732e-11, 2.7215e-10,\n 6.1113e-11, 2.8795e-12, 2.3419e-12, 2.0572e-12, 4.8826e-10, 6.8503e-09,\n 3.5276e-12, 1.9781e-12, 3.6546e-12, 7.7271e-11, 8.3056e-12, 7.3702e-13,\n 4.0490e-11, 1.6591e-11, 2.4150e-10, 8.5028e-14, 1.8871e-13, 6.5281e-11,\n 4.6597e-12, 2.2811e-10, 6.9129e-10, 2.2461e-13, 5.6502e-14, 1.8538e-13,\n 3.3908e-12, 2.1689e-11, 4.3060e-11, 1.9534e-12, 2.3395e-11, 1.4098e-11,\n 3.7999e-10, 3.9399e-10, 5.3722e-14, 1.8996e-11, 3.7024e-10, 1.4581e-13,\n 1.3017e-12, 1.6078e-12, 2.4166e-13, 6.3214e-12, 3.4444e-13, 8.0737e-12,\n 8.3655e-11, 4.5418e-12, 1.8590e-12, 7.0969e-13, 6.6725e-11, 3.5662e-13,\n 2.3683e-11, 5.1470e-12, 2.7509e-13, 2.1612e-10, 2.3167e-11, 6.2384e-11,\n 9.5425e-10, 2.4695e-13, 2.6147e-13, 7.2213e-11, 2.1563e-11, 6.1932e-13,\n 8.4223e-13, 2.3259e-11, 5.3116e-13, 2.4902e-13, 1.2273e-13, 1.1559e-10,\n 3.5264e-13, 2.1465e-09, 7.6639e-11, 1.7384e-10, 1.6691e-12, 1.6857e-11,\n 2.7592e-09, 1.1082e-12, 8.5470e-13, 3.3876e-09, 5.1703e-10, 8.9571e-11,\n 5.4601e-10, 3.9197e-12, 1.4684e-14, 4.9661e-11, 1.6387e-11, 3.6926e-12,\n 4.3680e-13, 5.9702e-10, 2.6808e-12, 1.6819e-12, 9.4095e-10, 4.9446e-10,\n 4.4897e-12, 5.6050e-12, 1.4502e-11, 1.1895e-11, 1.1919e-11, 3.1173e-13,\n 5.1240e-13, 7.3655e-10, 7.9120e-13, 3.9973e-13, 3.8397e-12, 1.4691e-11,\n 3.8053e-10, 1.6168e-10, 4.6370e-11, 6.0786e-11, 1.1048e-14, 2.1334e-09,\n 5.9988e-11, 7.1531e-13, 3.0458e-12, 4.4214e-13, 1.3454e-12, 2.7744e-11,\n 2.0195e-13, 3.8845e-13, 1.7189e-11, 4.9349e-14, 5.5858e-12, 4.4931e-11,\n 2.9484e-11, 2.8571e-13, 1.4979e-13, 5.0414e-12, 1.4385e-09, 4.5008e-14,\n 1.1457e-13, 4.8052e-13, 3.2598e-11, 1.0470e-09, 6.4398e-13, 7.1056e-12,\n 1.3477e-11, 1.1744e-10, 2.3261e-12, 2.4310e-12, 1.3126e-11, 8.9170e-11,\n 1.4115e-12, 8.1451e-13, 5.4231e-12, 8.3207e-13, 7.2478e-11, 6.6668e-13,\n 2.5290e-13, 1.4534e-10, 1.6281e-11, 9.8175e-14, 4.3817e-10, 3.7537e-13,\n 3.4856e-11, 2.0738e-10, 5.0475e-10, 9.5968e-10, 2.7823e-12, 3.2181e-12,\n 6.0374e-12, 7.4388e-11, 8.5566e-10, 1.9588e-12, 7.5794e-10, 5.8365e-14,\n 2.8668e-11, 7.8858e-10, 3.8265e-13, 4.5981e-09], device='cuda:0')" + }, + "38": { + "step": "tensor(2504.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0666e-11, 1.4355e-14, 5.8703e-10, 2.0940e-09, 6.4499e-11, 2.1052e-12,\n 3.5251e-10, 2.2652e-10, 1.5330e-10, 1.7120e-13, 1.0500e-11, 6.3984e-13,\n 1.6181e-10, 3.1219e-11, 2.1619e-11, 3.0226e-11, 2.4829e-11, 6.5524e-11,\n 1.3275e-12, 8.3768e-11, 2.8438e-11, 7.9818e-13, 3.1956e-10, 7.8548e-10,\n 1.2591e-10, 7.5734e-12, 1.1080e-10, 5.6894e-11, 2.4173e-12, 2.2665e-11,\n 3.1131e-11, 8.0741e-11, 9.4031e-11, 1.5011e-10, 1.2632e-12, 1.4158e-10,\n 2.5931e-10, 8.1747e-12, 1.8514e-10, 4.8349e-12, 3.1721e-10, 2.7200e-12,\n 2.6607e-10, 2.4070e-10, 1.8087e-11, 1.2980e-11, 1.0007e-11, 6.1326e-13,\n 1.7515e-13, 2.8544e-10, 7.0214e-11, 3.1083e-10, 2.3524e-13, 2.0312e-11,\n 4.3612e-12, 1.7642e-12, 8.2980e-10, 1.4559e-11, 6.5866e-11, 1.2010e-12,\n 2.7159e-15, 5.9942e-12, 8.0132e-11, 5.4007e-11, 4.9091e-11, 2.8479e-10,\n 5.2911e-11, 2.8711e-12, 6.8497e-11, 2.9228e-10, 5.8947e-11, 1.9381e-11,\n 3.8098e-11, 3.9327e-13, 1.2041e-13, 1.5444e-11, 5.6387e-11, 9.9002e-13,\n 4.6753e-10, 1.4267e-13, 1.0355e-12, 4.6801e-11, 7.3245e-10, 7.3901e-13,\n 5.0630e-11, 3.4051e-12, 3.9877e-11, 9.6111e-12, 1.0994e-10, 5.9342e-10,\n 1.1381e-10, 2.9169e-11, 8.4922e-13, 6.9577e-11, 4.5385e-10, 2.1069e-09,\n 1.6920e-12, 5.3344e-12, 2.9784e-11, 8.4042e-11, 8.7315e-11, 1.8043e-11,\n 6.9571e-11, 5.7272e-11, 3.7771e-10, 6.1058e-12, 2.2870e-13, 7.2082e-11,\n 3.9863e-11, 2.6457e-10, 2.9438e-10, 4.3625e-12, 1.3810e-12, 2.9335e-12,\n 1.1143e-12, 9.2805e-11, 9.2073e-11, 2.5932e-11, 1.0007e-10, 4.9762e-12,\n 6.0250e-10, 2.1781e-10, 5.1303e-13, 3.5052e-11, 1.6323e-10, 3.2324e-12,\n 1.2444e-14, 1.0214e-12, 7.3955e-16, 1.9375e-11, 7.4361e-12, 2.8335e-11,\n 1.6505e-10, 3.8769e-11, 2.5593e-11, 2.2225e-11, 1.2144e-10, 2.7870e-12,\n 2.4527e-11, 2.8528e-13, 3.8984e-12, 3.8762e-10, 1.5848e-10, 1.4740e-11,\n 3.4509e-10, 4.3909e-12, 3.9923e-14, 1.2178e-10, 2.7355e-11, 2.0345e-12,\n 7.8911e-12, 7.2075e-11, 1.0937e-11, 8.6083e-13, 3.8281e-12, 3.5734e-11,\n 1.0423e-13, 8.3601e-10, 2.4994e-10, 7.3667e-10, 4.3012e-12, 3.2161e-11,\n 9.8154e-10, 1.2645e-14, 2.0657e-11, 2.5527e-09, 3.3457e-10, 8.0458e-11,\n 6.2412e-10, 5.7430e-12, 9.6237e-12, 1.0934e-10, 7.0662e-11, 8.2141e-12,\n 2.3465e-11, 7.1586e-10, 9.9600e-13, 3.5319e-13, 1.0096e-09, 3.7299e-10,\n 3.8569e-13, 1.9116e-12, 4.6294e-13, 2.1899e-11, 9.1983e-11, 1.5348e-13,\n 3.5261e-12, 5.6798e-10, 1.2876e-11, 1.5590e-13, 1.9038e-13, 1.1020e-10,\n 2.1655e-10, 1.4504e-10, 1.3244e-10, 2.1918e-11, 5.7416e-14, 6.1521e-10,\n 9.7123e-11, 1.8223e-11, 2.2094e-11, 2.2107e-11, 9.1852e-12, 3.0863e-11,\n 5.8494e-14, 3.7140e-12, 8.8617e-12, 1.3552e-11, 1.0087e-12, 1.9903e-10,\n 4.1532e-11, 4.5461e-12, 7.1550e-12, 4.1019e-11, 1.5023e-09, 3.6736e-13,\n 1.0467e-11, 1.1749e-12, 4.3426e-10, 6.9269e-10, 4.2022e-14, 4.3376e-11,\n 1.1108e-11, 9.7729e-11, 6.5192e-13, 8.8132e-13, 2.8648e-10, 3.2154e-10,\n 1.6335e-13, 3.4386e-13, 4.7869e-14, 6.1721e-12, 5.4281e-11, 1.0630e-11,\n 2.9958e-12, 1.7688e-10, 4.8657e-11, 3.7968e-15, 2.4053e-10, 1.4479e-13,\n 2.8443e-10, 2.4587e-10, 5.0842e-10, 4.3863e-10, 1.5371e-11, 1.8488e-12,\n 2.3880e-12, 1.6830e-10, 4.2134e-10, 8.3794e-12, 5.1705e-10, 7.3644e-13,\n 5.9975e-11, 1.5068e-09, 7.6909e-14, 1.1161e-09], device='cuda:0')" + }, + "39": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.9731e-13, 1.4911e-11, 3.0222e-12, ..., 1.1798e-14, 1.4404e-12,\n 4.6471e-11],\n [5.8929e-12, 8.3959e-13, 2.3957e-13, ..., 5.8394e-12, 7.9410e-15,\n 6.6600e-11],\n [9.5389e-14, 2.4071e-11, 6.0747e-13, ..., 4.0387e-13, 1.5031e-12,\n 3.7412e-11],\n ...,\n [1.1491e-10, 1.2390e-11, 2.0024e-10, ..., 8.0737e-12, 2.5410e-11,\n 2.0414e-09],\n [3.5168e-11, 3.3809e-11, 3.8462e-10, ..., 5.5748e-12, 7.4866e-12,\n 6.2463e-10],\n [1.0998e-10, 2.6366e-09, 3.3278e-08, ..., 7.6446e-10, 2.3730e-11,\n 1.8487e-08]], device='cuda:0')" + }, + "40": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.8445e-11, 4.5690e-11, 1.2304e-11, 4.6142e-12, 3.7957e-11, 1.0584e-11,\n 4.2366e-12, 1.3825e-11, 3.6597e-12, 9.9173e-12, 3.3563e-11, 8.3257e-12,\n 3.0827e-13, 3.9027e-12, 1.2434e-12, 6.3205e-13, 5.0960e-11, 1.7224e-11,\n 8.6111e-12, 4.6243e-11, 4.0339e-12, 7.6690e-11, 1.0562e-11, 5.7889e-12,\n 1.8365e-11, 3.2598e-11, 2.2816e-13, 8.8165e-13, 2.3344e-11, 3.9777e-11,\n 2.8732e-11, 3.1565e-11, 1.7349e-11, 3.9295e-11, 6.2067e-11, 1.8776e-10,\n 5.2068e-11, 7.9412e-12, 5.9564e-11, 3.9233e-11, 1.5926e-11, 2.4561e-11,\n 3.7635e-11, 1.9990e-11, 3.8885e-11, 1.6207e-11, 2.9656e-11, 1.2639e-11,\n 8.3663e-13, 1.0896e-10, 1.4269e-11, 2.2488e-11, 1.7157e-12, 1.0409e-10,\n 2.9201e-12, 1.8096e-11, 4.4285e-11, 9.2939e-13, 5.4480e-11, 1.1204e-11,\n 1.8193e-10, 1.0971e-10, 3.7951e-12, 1.2687e-11, 8.2389e-13, 2.9959e-12,\n 1.1393e-11, 5.9114e-12, 1.5945e-11, 6.8923e-11, 2.8043e-11, 1.4368e-11,\n 7.5271e-12, 2.0952e-11, 7.5111e-11, 1.1125e-10, 2.5540e-11, 6.2674e-12,\n 4.1729e-11, 4.2376e-12, 8.3120e-11, 8.9673e-12, 2.2142e-12, 1.5238e-12,\n 1.8192e-12, 6.7500e-12, 1.6074e-10, 1.9652e-11, 7.0175e-13, 4.0152e-11,\n 5.1074e-13, 2.3992e-11, 3.2051e-12, 2.1324e-10, 3.6773e-13, 1.5809e-11,\n 3.4424e-11, 2.5380e-13, 7.5467e-13, 2.0472e-11, 5.9851e-11, 1.6935e-13,\n 8.9484e-11, 5.4597e-12, 1.0151e-11, 9.6093e-12, 1.3509e-11, 8.2607e-11,\n 5.5215e-11, 3.0599e-11, 7.5763e-12, 2.1283e-11, 1.3403e-11, 1.6192e-11,\n 9.0111e-12, 2.0618e-11, 1.7264e-11, 3.0943e-11, 5.0123e-11, 2.0711e-10,\n 1.4591e-12, 1.9376e-11, 3.5321e-11, 1.0260e-12, 6.1593e-13, 7.6893e-13,\n 2.7610e-11, 1.1961e-10, 1.1672e-12, 2.0288e-12, 1.0752e-11, 9.4745e-11,\n 1.1476e-13, 6.1082e-11, 1.4483e-11, 2.4694e-11, 5.2862e-11, 1.7193e-12,\n 1.5672e-11, 3.1480e-13, 1.7583e-11, 2.7726e-12, 2.9965e-12, 4.4435e-12,\n 7.2108e-13, 3.4710e-14, 1.0001e-11, 6.2904e-12, 2.2632e-12, 3.2061e-12,\n 5.9500e-13, 2.6488e-12, 1.9204e-11, 3.5873e-11, 2.5969e-12, 2.5407e-12,\n 7.7188e-12, 2.7560e-12, 1.3047e-11, 2.0333e-12, 5.5319e-11, 3.9469e-13,\n 5.2322e-13, 7.6869e-11, 2.7191e-11, 2.2094e-10, 2.2890e-10, 3.3340e-11,\n 4.5599e-11, 2.5701e-10, 7.3678e-12, 9.0680e-12, 1.4532e-11, 8.9184e-11,\n 7.2181e-12, 6.9636e-11, 1.6346e-12, 3.3576e-11, 3.0603e-11, 3.8131e-10,\n 8.8528e-11, 1.4555e-10, 1.0190e-11, 2.2210e-11, 2.7582e-10, 5.5226e-12,\n 7.1383e-14, 2.5845e-13, 9.2961e-11, 2.3152e-11, 8.8791e-11, 4.1457e-11,\n 2.2268e-11, 3.6546e-12, 1.4031e-10, 1.3777e-10, 7.1926e-11, 2.1619e-11,\n 7.7640e-11, 5.1436e-13, 2.1532e-11, 5.1043e-11, 3.1440e-13, 6.8617e-13,\n 1.2377e-11, 1.0343e-10, 7.2206e-11, 1.1224e-11, 2.1024e-12, 9.9098e-14,\n 1.2698e-10, 6.3218e-11, 1.7086e-12, 1.1088e-12, 3.5421e-11, 1.1392e-11,\n 1.5401e-11, 6.4886e-11, 5.8014e-11, 1.4118e-10, 2.1144e-12, 2.4044e-11,\n 5.5175e-13, 1.6360e-10, 9.8629e-12, 5.6166e-12, 2.8602e-12, 1.9982e-12,\n 1.4575e-11, 2.3340e-12, 3.0435e-11, 3.3835e-12, 1.0824e-12, 1.7689e-11,\n 9.3472e-12, 1.5034e-12, 1.5460e-12, 8.5509e-12, 7.1027e-12, 3.0553e-12,\n 2.5339e-12, 3.4054e-12, 4.0199e-12, 5.5102e-12, 1.8063e-11, 4.1839e-12,\n 4.4189e-12, 2.6021e-12, 1.5577e-12, 7.2740e-12, 1.9725e-11, 2.6071e-11,\n 2.1799e-12, 1.7113e-11, 2.9612e-12, 2.2504e-11, 4.4298e-26, 3.6876e-27,\n 1.0330e-26, 2.2834e-27, 2.2301e-28, 6.2035e-27, 3.1224e-27, 9.4983e-27,\n 7.1831e-29, 6.8845e-27, 1.4416e-27, 2.9607e-27, 1.3270e-26, 5.1083e-28,\n 3.0093e-28, 7.8842e-28, 1.9288e-27, 6.4853e-28, 2.4041e-26, 4.7192e-27,\n 4.5278e-28, 4.1584e-29, 1.0800e-27, 7.3017e-27, 6.9928e-28, 1.6272e-28,\n 2.5336e-28, 1.7909e-27, 3.8330e-29, 1.2949e-27, 5.7817e-27, 5.4600e-27,\n 3.3597e-27, 1.8749e-28, 1.0995e-27, 1.2765e-27, 3.8979e-27, 8.6191e-28,\n 2.7429e-27, 6.0789e-27, 3.8574e-27, 2.3749e-27, 8.2301e-28, 6.8656e-27,\n 6.7877e-27, 1.1373e-27, 5.2892e-28, 1.0651e-27, 6.9428e-27, 2.0923e-27,\n 7.8981e-28, 1.8473e-28, 2.5733e-27, 2.8689e-27, 6.7642e-27, 7.1338e-27,\n 5.0876e-27, 2.9600e-26, 3.8613e-27, 2.9765e-27, 1.0562e-26, 5.4635e-27,\n 6.2141e-27, 6.0324e-27, 1.4984e-27, 1.9194e-27, 3.1045e-28, 1.0328e-27,\n 3.7754e-27, 1.0208e-27, 5.1697e-27, 3.3857e-28, 8.6840e-29, 3.2288e-27,\n 8.4259e-28, 2.7945e-27, 1.7856e-27, 1.6234e-27, 4.9682e-27, 2.0886e-27,\n 1.9925e-27, 1.5727e-26, 9.0760e-28, 1.2913e-27, 2.7147e-27, 1.7083e-28,\n 1.5948e-27, 4.5352e-29, 1.5364e-27, 1.2889e-28, 6.2966e-28, 3.8620e-27,\n 2.0735e-28, 1.6941e-27, 5.3781e-28, 1.4396e-27, 2.2743e-27, 1.1027e-27,\n 8.8123e-28, 1.8916e-27, 1.8181e-27, 3.9940e-28, 6.4652e-27, 7.1489e-27,\n 1.0229e-27, 2.5973e-27, 1.7774e-27, 7.6970e-27, 3.0670e-27, 1.0302e-27,\n 3.0115e-28, 1.6470e-27, 4.0125e-27, 3.9188e-28, 1.7458e-27, 1.2055e-28,\n 6.0684e-28, 1.7020e-27, 2.8030e-28, 8.9023e-28, 1.1109e-27, 1.3828e-27,\n 5.0247e-27, 2.0112e-27, 8.0456e-27, 1.1369e-26, 5.8366e-27, 1.6162e-26,\n 3.1443e-28, 3.1283e-28, 3.1391e-27, 1.1555e-27, 1.2138e-28, 3.0439e-29,\n 1.1407e-27, 2.9132e-28, 1.3458e-27, 8.4937e-28, 4.4286e-27, 5.7711e-28,\n 1.0751e-27, 1.1764e-26, 1.1551e-27, 2.0180e-28, 5.1382e-28, 1.5839e-28,\n 1.5603e-28, 1.9186e-27, 3.2880e-27, 3.6439e-27, 2.0451e-28, 5.2065e-27,\n 5.3871e-28, 5.0227e-28, 1.2566e-26, 2.5033e-27, 4.0543e-28, 1.6966e-28,\n 1.1083e-27, 4.8145e-28, 2.4861e-27, 7.8620e-29, 1.7235e-27, 3.9536e-28,\n 6.4949e-27, 8.0718e-28, 1.5081e-27, 5.6844e-27, 2.2838e-27, 1.1237e-27,\n 4.7290e-27, 1.8078e-27, 3.0546e-27, 2.9478e-28, 2.8470e-26, 3.9359e-28,\n 6.0268e-27, 9.4530e-28, 1.2598e-27, 3.2577e-27, 1.1672e-28, 7.4367e-28,\n 3.1512e-27, 2.1438e-27, 1.9209e-27, 1.7235e-27, 3.3175e-27, 6.2730e-28,\n 2.4048e-27, 2.5941e-27, 2.6347e-27, 2.4640e-27, 5.4071e-27, 9.1867e-27,\n 1.4708e-27, 4.0538e-28, 4.6063e-27, 1.2791e-27, 1.1049e-28, 2.9881e-28,\n 3.4959e-27, 7.9634e-27, 3.2116e-27, 1.4052e-27, 3.8736e-28, 6.7746e-29,\n 2.6998e-27, 4.5755e-27, 6.8478e-28, 2.6634e-28, 7.5642e-28, 1.0366e-27,\n 1.3033e-27, 7.5141e-28, 2.7042e-27, 3.7953e-27, 7.7750e-27, 7.7454e-28,\n 8.1356e-28, 2.1841e-27, 2.7625e-27, 1.7453e-27, 1.6939e-28, 5.2093e-27,\n 1.6677e-28, 7.6178e-28, 2.7748e-27, 2.6589e-28, 3.3530e-27, 5.1311e-27,\n 7.9975e-28, 5.1788e-27, 5.0979e-27, 2.9929e-27, 2.4584e-27, 3.2418e-27,\n 7.6405e-29, 2.2410e-27, 6.9318e-28, 1.8942e-27, 2.8340e-28, 9.4624e-28,\n 1.8141e-28, 1.1661e-26, 1.8724e-27, 3.0065e-27, 3.6100e-28, 1.0794e-27,\n 3.1505e-27, 1.2102e-26, 2.8867e-27, 5.1599e-27, 6.2198e-27, 4.8290e-27,\n 7.7503e-28, 1.0293e-27, 1.8257e-07, 6.7960e-09, 4.0021e-08, 2.6300e-09,\n 1.0349e-08, 3.4457e-09, 7.0517e-10, 1.4468e-08, 7.9884e-09, 3.2888e-09,\n 1.1420e-07, 2.2272e-09, 6.6522e-09, 8.8840e-08, 1.3069e-08, 1.7734e-08,\n 2.9491e-09, 1.0938e-10, 7.4890e-09, 1.0524e-08, 2.2153e-08, 1.6035e-08,\n 9.6620e-09, 2.3879e-09, 2.3480e-10, 1.9238e-09, 5.5088e-10, 1.3830e-08,\n 1.2517e-07, 3.5807e-09, 4.0011e-08, 9.4479e-09, 8.9704e-09, 2.2684e-08,\n 6.6525e-11, 5.4755e-08, 2.8035e-08, 8.8254e-09, 5.7324e-08, 1.3771e-09,\n 2.2493e-09, 4.3437e-09, 4.0795e-09, 1.1077e-08, 6.3049e-09, 1.8943e-10,\n 1.3475e-09, 1.4397e-08, 2.3793e-09, 2.7565e-08, 9.4513e-08, 1.9906e-08,\n 2.0923e-09, 4.5627e-08, 5.8397e-09, 3.1789e-08, 2.9983e-08, 4.8630e-09,\n 4.3121e-08, 3.6204e-09, 1.5586e-09, 2.7788e-11, 4.5083e-10, 7.4469e-10,\n 1.8532e-08, 1.1148e-08, 8.5084e-09, 3.5472e-09, 1.1310e-08, 1.6788e-09,\n 2.3210e-08, 5.0127e-08, 3.0577e-08, 7.6957e-10, 1.3265e-09, 4.0145e-08,\n 2.8897e-08, 7.7497e-08, 3.4835e-08, 2.5727e-09, 1.4448e-09, 2.9820e-09,\n 1.4239e-08, 5.4341e-09, 1.3474e-08, 2.4830e-09, 1.6364e-08, 1.8954e-09,\n 4.8276e-09, 5.2488e-08, 7.2980e-08, 5.9436e-09, 2.5806e-08, 1.7687e-08,\n 1.0449e-08, 2.6058e-08, 2.5626e-08, 1.7218e-09, 1.0784e-08, 8.7391e-09,\n 7.2676e-09, 6.9728e-08, 9.5269e-09, 5.7947e-08, 2.9987e-08, 5.5564e-09,\n 1.1562e-08, 1.3561e-09, 2.8653e-09, 2.7808e-10, 3.1064e-08, 3.2316e-09,\n 3.0336e-08, 4.2380e-09, 7.3550e-08, 1.8649e-08, 1.2019e-08, 6.3883e-08,\n 1.0273e-08, 6.3109e-09, 7.4716e-10, 1.0011e-08, 1.5020e-08, 3.6520e-09,\n 3.6917e-09, 1.5398e-08, 3.0610e-09, 1.6016e-09, 3.7103e-08, 1.5058e-09,\n 2.1000e-09, 2.3955e-09, 2.6065e-09, 1.0403e-08, 1.4476e-08, 6.5160e-09,\n 6.4377e-09, 4.2389e-10, 1.3592e-08, 9.4861e-09, 1.6439e-09, 8.3458e-09,\n 2.9254e-09, 1.8297e-09, 1.8917e-08, 4.3743e-09, 4.6635e-11, 1.2601e-08,\n 9.4681e-09, 2.9250e-09, 2.6130e-08, 1.4818e-08, 4.9048e-09, 2.0157e-09,\n 4.9450e-08, 1.7606e-09, 1.7930e-09, 2.0122e-08, 7.4958e-09, 1.8178e-08,\n 1.7439e-08, 6.2312e-09, 2.6831e-08, 1.6110e-08, 1.3396e-09, 2.5756e-09,\n 3.4276e-11, 2.9881e-09, 1.8941e-08, 2.7437e-11, 9.7655e-08, 4.9982e-11,\n 5.8998e-09, 2.3515e-09, 5.3464e-08, 2.0641e-08, 9.1801e-09, 1.9344e-08,\n 4.9638e-08, 7.5586e-09, 8.5443e-10, 3.5393e-09, 5.8825e-09, 3.1606e-09,\n 1.0374e-09, 2.2689e-09, 4.0608e-09, 9.2402e-10, 4.3191e-08, 2.5969e-10,\n 9.4036e-09, 1.9460e-09, 6.8855e-08, 3.3709e-08, 1.4092e-09, 4.7627e-09,\n 2.6444e-08, 1.3187e-09, 1.3232e-09, 3.1236e-08, 7.4074e-09, 1.5284e-08,\n 4.5129e-09, 2.0788e-08, 2.3204e-09, 5.2703e-09, 2.4803e-08, 1.1642e-09,\n 2.0275e-08, 9.5437e-09, 3.3648e-09, 1.5209e-08, 7.3286e-08, 1.4908e-09,\n 6.8126e-09, 1.2676e-08, 5.8018e-11, 1.0556e-08, 9.6674e-09, 2.2873e-08,\n 9.1972e-10, 1.5755e-08, 3.3299e-09, 5.2459e-09, 1.4843e-08, 5.8991e-11,\n 2.3675e-08, 3.9754e-09, 3.2948e-08, 1.1154e-08, 1.5365e-08, 2.7786e-09,\n 7.2416e-09, 2.6159e-09, 4.5356e-08, 9.0610e-09, 6.1695e-09, 4.3162e-08,\n 3.8935e-08, 3.7232e-08, 7.8856e-09, 3.7742e-08, 1.3708e-09, 1.6102e-10,\n 3.6067e-09, 1.7015e-09, 1.0473e-09, 2.3627e-09, 2.9678e-08, 9.7925e-10,\n 2.5705e-09, 1.3454e-08, 4.9677e-09, 1.8411e-09, 7.7007e-10, 3.4654e-08],\n device='cuda:0')" + }, + "41": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.6805e-09, 2.6935e-09, 8.6822e-10, ..., 1.3889e-08, 4.7952e-10,\n 5.4115e-09],\n [1.7360e-09, 7.1055e-10, 4.0918e-10, ..., 3.2650e-09, 1.5510e-10,\n 2.8352e-09],\n [4.0306e-10, 2.3823e-11, 9.9820e-11, ..., 1.2808e-10, 3.2007e-11,\n 1.3410e-10],\n ...,\n [1.4730e-09, 1.0638e-10, 3.9696e-10, ..., 6.5487e-10, 9.1545e-11,\n 3.3389e-10],\n [7.6147e-10, 1.2654e-10, 1.8910e-10, ..., 6.4245e-10, 4.7878e-11,\n 1.5506e-10],\n [1.8481e-09, 5.5388e-10, 4.2802e-10, ..., 2.3956e-09, 1.6200e-10,\n 1.7236e-09]], device='cuda:0')" + }, + "42": { + "step": "tensor(2504.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.6356e-07, 4.1745e-08, 3.6905e-10, 1.6084e-07, 4.9675e-08, 4.2794e-09,\n 5.8247e-09, 8.9685e-09, 1.0155e-09, 2.2171e-08, 2.8505e-08, 3.2584e-08,\n 4.8535e-08, 9.7040e-09, 5.3580e-08, 2.4110e-08, 1.0241e-07, 1.5493e-08,\n 1.5761e-09, 1.2498e-07, 1.5878e-08, 1.0906e-08, 3.2746e-08, 1.3021e-07,\n 7.1160e-09, 1.7760e-07, 3.1790e-08, 3.3070e-08, 1.7294e-09, 1.0288e-08,\n 1.4806e-07, 3.7451e-09, 8.6772e-08, 2.8317e-09, 5.7345e-08, 1.1773e-09,\n 2.2102e-08, 4.6871e-08, 2.8809e-08, 2.7215e-08, 1.8149e-09, 3.8130e-08,\n 1.3120e-07, 3.5282e-09, 6.1680e-08, 4.5836e-09, 3.5190e-09, 1.3165e-09,\n 3.9941e-08, 1.2474e-08, 1.5211e-08, 8.3938e-09, 6.4426e-09, 1.7333e-08,\n 2.6548e-07, 2.7868e-09, 7.7244e-09, 3.8978e-09, 8.3892e-10, 6.6708e-08,\n 2.0755e-08, 7.5354e-10, 4.3558e-08, 1.7932e-07, 7.3456e-09, 1.1150e-08,\n 8.0295e-08, 1.2563e-09, 2.5087e-07, 2.5676e-09, 3.6016e-08, 2.5313e-08,\n 1.7133e-08, 8.9540e-10, 6.0379e-09, 7.8135e-09, 4.2351e-08, 1.4453e-08,\n 2.3105e-08, 4.4590e-08, 4.8358e-09, 4.6214e-09, 4.1514e-07, 1.3923e-08,\n 9.2541e-10, 1.7322e-09, 3.5439e-09, 1.8610e-09, 3.6306e-08, 2.8596e-09,\n 9.6575e-10, 7.2436e-08, 4.9007e-09, 1.3839e-07, 3.2844e-08, 4.3750e-09,\n 1.0148e-07, 3.6671e-08, 1.6765e-07, 1.2538e-09, 1.9067e-08, 1.6993e-07,\n 2.2314e-07, 3.7309e-07, 4.6335e-09, 1.6249e-08, 9.7254e-10, 5.9373e-08,\n 2.9039e-08, 1.4688e-09, 2.6046e-09, 2.8234e-08, 3.1853e-09, 4.8978e-09,\n 4.3044e-09, 2.3170e-08, 2.4138e-09, 1.8787e-08, 2.1158e-09, 1.6908e-08,\n 1.3670e-07, 7.9502e-10, 3.3666e-07, 1.2665e-08, 2.4471e-07, 2.2477e-09,\n 8.0142e-08, 9.8402e-08, 1.1382e-07, 9.4490e-09, 1.2081e-07, 9.1894e-08,\n 1.4720e-08, 1.1077e-09, 1.4721e-09, 1.3013e-07, 1.1369e-07, 7.9147e-08,\n 1.4023e-08, 2.1946e-09, 2.3842e-08, 1.5705e-07, 4.0764e-08, 6.0965e-08,\n 3.1880e-08, 7.1102e-10, 4.0028e-08, 2.8771e-08, 2.7132e-09, 1.1627e-09,\n 6.3122e-10, 1.0528e-07, 4.0575e-09, 1.2721e-08, 8.2799e-08, 1.7402e-07,\n 5.5701e-08, 1.2698e-07, 1.4575e-08, 1.0752e-07, 1.1452e-07, 9.7641e-08,\n 4.4686e-08, 1.5430e-07, 7.8346e-10, 1.9440e-07, 2.1345e-08, 2.4608e-08,\n 1.4057e-08, 1.9585e-09, 3.9984e-09, 9.2030e-10, 2.0521e-08, 5.9726e-09,\n 2.8954e-08, 9.1192e-08, 4.8898e-08, 1.6063e-09, 4.6251e-08, 1.0308e-09,\n 2.1357e-09, 6.1592e-08, 1.0956e-09, 2.5455e-08, 1.5811e-09, 3.9624e-08,\n 6.5704e-08, 1.8195e-08, 2.9499e-08, 6.3464e-08, 1.1494e-07, 7.0772e-08,\n 6.9460e-08, 1.2684e-09, 9.6670e-09, 1.4533e-09, 2.0054e-09, 1.6402e-08,\n 7.4209e-10, 2.1710e-08, 6.6311e-08, 7.5062e-08, 9.7206e-09, 2.8757e-08,\n 9.1605e-09, 1.5101e-07, 1.5863e-09, 1.0966e-07, 4.4548e-09, 1.0195e-08,\n 3.1293e-09, 2.6443e-08, 2.8720e-09, 1.5363e-08, 1.1495e-07, 1.8310e-08,\n 9.3906e-09, 1.3074e-09, 3.0670e-09, 8.8801e-09, 5.2095e-08, 2.3117e-09,\n 1.0612e-09, 2.3933e-08, 6.0671e-08, 3.2904e-07, 3.6327e-08, 7.6687e-09,\n 6.3760e-09, 1.8363e-07, 6.4194e-08, 1.2242e-08, 2.0392e-07, 6.0020e-09,\n 9.1140e-09, 2.2145e-08, 9.4260e-09, 5.4933e-08, 9.6007e-09, 1.6675e-10,\n 8.7680e-08, 5.6320e-09, 7.9842e-08, 1.0164e-08, 1.5690e-09, 2.9341e-09,\n 9.8147e-08, 3.7451e-09, 1.4204e-07, 1.6550e-07, 9.2561e-08, 1.6822e-08,\n 3.5002e-08, 2.8969e-09, 4.2748e-09, 2.9824e-08], device='cuda:0')" + }, + "43": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.2470e-08, 2.0408e-07, 2.5502e-08, ..., 5.2823e-07, 4.9773e-07,\n 2.3015e-07],\n [2.1664e-09, 1.3939e-08, 1.6482e-09, ..., 3.6723e-08, 3.5791e-08,\n 1.5418e-08],\n [1.5119e-09, 8.7714e-09, 1.1845e-09, ..., 2.2845e-08, 2.0315e-08,\n 1.0640e-08],\n [2.3710e-09, 1.5885e-08, 1.8863e-09, ..., 4.0679e-08, 3.9441e-08,\n 1.6849e-08],\n [2.3389e-09, 1.3381e-08, 1.8587e-09, ..., 3.4721e-08, 3.0689e-08,\n 1.6453e-08]], device='cuda:0')" + }, + "44": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([1.1607e-05, 8.1958e-07, 4.8098e-07, 9.2031e-07, 7.2403e-07],\n device='cuda:0')" + }, + "45": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.2524e-08, 2.0418e-07, 2.5541e-08, ..., 5.2858e-07, 4.9773e-07,\n 2.3050e-07],\n [2.1674e-09, 1.3940e-08, 1.6489e-09, ..., 3.6730e-08, 3.5791e-08,\n 1.5425e-08],\n [1.5170e-09, 8.7802e-09, 1.1882e-09, ..., 2.2879e-08, 2.0315e-08,\n 1.0673e-08],\n [2.3722e-09, 1.5887e-08, 1.8872e-09, ..., 4.0687e-08, 3.9441e-08,\n 1.6857e-08],\n [2.3477e-09, 1.3396e-08, 1.8652e-09, ..., 3.4778e-08, 3.0689e-08,\n 1.6511e-08]], device='cuda:0')" + }, + "46": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([1.1607e-05, 8.1960e-07, 4.8107e-07, 9.2033e-07, 7.2418e-07],\n device='cuda:0')" + }, + "47": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.2470e-08, 2.0408e-07, 2.5502e-08, ..., 5.2823e-07, 4.9773e-07,\n 2.3015e-07],\n [2.1664e-09, 1.3939e-08, 1.6482e-09, ..., 3.6723e-08, 3.5791e-08,\n 1.5418e-08],\n [1.5119e-09, 8.7714e-09, 1.1845e-09, ..., 2.2845e-08, 2.0315e-08,\n 1.0640e-08],\n [2.3710e-09, 1.5885e-08, 1.8863e-09, ..., 4.0679e-08, 3.9441e-08,\n 1.6849e-08],\n [2.3389e-09, 1.3381e-08, 1.8587e-09, ..., 3.4721e-08, 3.0689e-08,\n 1.6453e-08]], device='cuda:0')" + }, + "48": { + "step": "tensor(2504.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([1.1607e-05, 8.1958e-07, 4.8098e-07, 9.2031e-07, 7.2403e-07],\n device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 0.005000500000000001, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 0, + 1, + 2 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 3, + 4, + 5 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 6, + 7, + 8 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_1024", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 9, + 10, + 11 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_1280", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 12, + 13, + 14 + ] + }, + { + "lr": 0.0025005, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.005, + "params": [ + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 10, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 5, + "base_lrs": [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.005 + ], + "last_epoch": 5, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.0025005 + ] + }, + "metrics": { + "val_acc": 74.24 + }, + "train_config": { + "name": "david_training", + "run_id": "20251012_032356", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": "clip_vit_b16", + "num_classes": 1000, + "preset": "high_accuracy", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": true, + "num_epochs": 20, + "batch_size": 1024, + "learning_rate": 0.01, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.1, + "rose_max_weight": 0.5, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": false, + "cayley_weight": 0.001, + "scale_loss_balance": null, + "use_mixed_precision": false, + "gradient_clip": 5.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "performance", + "freeze_threshold": 70.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-07, + "gradient_scale_multiplier": 5.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/gated-david", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file