diff --git "a/weights/David-partial_shared-hierarchical_tree/20251012_191456/best_model_acc73.04_metadata.json" "b/weights/David-partial_shared-hierarchical_tree/20251012_191456/best_model_acc73.04_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/David-partial_shared-hierarchical_tree/20251012_191456/best_model_acc73.04_metadata.json" @@ -0,0 +1,659 @@ +{ + "epoch": 5, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(15018.)", + "exp_avg": "tensor([[ 3.7653e-05, -5.3344e-05, -8.8738e-05, ..., -4.4149e-05,\n 3.3037e-05, -3.5802e-05],\n [ 8.1040e-06, 2.7052e-05, -2.7348e-05, ..., 5.3478e-05,\n 2.3965e-05, 1.6371e-05],\n [-2.9278e-05, -3.5376e-05, 1.5035e-05, ..., 3.7661e-05,\n -2.0267e-05, -1.9297e-06],\n ...,\n [ 1.6699e-06, -5.8201e-05, 2.3838e-05, ..., -8.9005e-06,\n -3.7286e-05, -1.9936e-05],\n [-2.5500e-05, 1.8177e-05, -7.1431e-05, ..., 2.1149e-05,\n 2.6302e-05, 4.3782e-05],\n [-1.3369e-05, 2.3896e-05, -1.2478e-05, ..., -1.1112e-05,\n -1.4316e-05, 3.2616e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.1141e-09, 7.2652e-08, 1.7875e-08, ..., 4.8585e-08, 6.7099e-09,\n 8.8976e-09],\n [4.2214e-09, 4.2806e-09, 4.2194e-09, ..., 2.2223e-08, 2.5118e-09,\n 3.9230e-09],\n [1.5488e-08, 9.0759e-08, 3.3185e-08, ..., 1.1356e-08, 8.7699e-09,\n 1.0279e-08],\n ...,\n [1.6279e-08, 6.4781e-08, 1.8117e-08, ..., 5.0632e-08, 9.9102e-09,\n 1.1763e-08],\n [1.7151e-08, 6.7539e-08, 3.4644e-08, ..., 2.8894e-08, 8.2838e-09,\n 1.6256e-08],\n [2.0795e-08, 1.9217e-08, 1.0305e-08, ..., 5.5054e-08, 9.9552e-09,\n 1.2785e-08]], device='cuda:0')" + }, + "1": { + "step": "tensor(15018.)", + "exp_avg": "tensor([ 4.7614e-04, 1.4485e-03, 4.1589e-04, -2.1950e-03, 6.1094e-04,\n 7.2924e-04, 6.4161e-04, 5.4346e-04, 1.5857e-05, 8.7380e-04,\n 6.0867e-04, 1.0320e-04, -1.7630e-04, 3.3140e-04, -6.2103e-05,\n 2.3554e-03, 7.9500e-04, 1.6939e-03, 2.4867e-04, 4.6922e-04,\n -1.1655e-03, 4.2645e-04, -1.8445e-03, 6.2139e-04, -6.8024e-04,\n 1.4976e-03, -8.0699e-04, 4.9919e-04, 3.3405e-04, 1.3768e-04,\n -8.0652e-04, 1.4134e-03, -1.3317e-04, 1.7648e-03, 1.6244e-03,\n -4.5679e-05, -9.6327e-05, -2.3139e-03, -8.9711e-04, 8.9681e-04,\n -2.4897e-04, -1.9424e-03, -1.0108e-04, 1.3287e-03, -6.2411e-04,\n -2.5502e-05, 1.8232e-03, -1.0691e-03, -1.1599e-03, 1.5780e-04,\n 4.0012e-04, -5.9582e-04, 3.6818e-04, 1.2414e-04, -2.3417e-04,\n 2.9090e-04, 2.0177e-03, -3.3796e-04, -3.7895e-04, -7.5977e-04,\n 9.4745e-04, -3.4404e-04, -1.1976e-03, -3.7122e-04, 1.4083e-03,\n 1.2692e-03, 1.2134e-03, 4.6650e-04, -3.7871e-04, 1.0239e-03,\n -1.2007e-04, -5.3297e-04, -6.3577e-04, -6.4135e-04, 8.6877e-04,\n -1.3604e-04, 1.0472e-03, -4.0208e-05, -6.0037e-04, 2.2021e-04,\n 1.7326e-03, 5.3781e-04, 1.5185e-04, 4.1026e-04, -4.9575e-05,\n 1.3938e-03, -7.4882e-04, 6.4873e-04, -8.1377e-04, -2.9207e-04,\n 5.2121e-04, 6.9024e-04, 2.2749e-04, 9.1077e-04, -4.2820e-04,\n -9.5497e-04, -1.2966e-03, -1.3913e-03, -1.6141e-04, -8.1345e-04,\n -1.0276e-04, -2.2473e-25, -8.7286e-04, 4.1890e-05, -1.2530e-03,\n -2.5811e-03, 3.2439e-04, 1.5252e-03, 2.9175e-04, -1.4379e-04,\n -2.6422e-05, -6.5442e-04, -1.2884e-04, -5.1088e-04, -1.8042e-04,\n -6.1371e-04, 6.9921e-05, 1.3535e-03, -2.4392e-04, -7.2634e-04,\n 4.2207e-04, -4.7035e-04, 1.8743e-04, -6.2390e-04, -1.0379e-03,\n -6.9377e-04, 1.1780e-03, 8.8476e-04, 3.4359e-04, -7.7854e-05,\n -1.0719e-03, -3.5543e-04, -1.1075e-03, -1.1061e-04, -1.3660e-04,\n -3.9739e-04, -1.2562e-03, -2.1941e-04, -8.8610e-05, -1.3135e-04,\n -1.2655e-04, 1.2476e-03, 1.4595e-04, 8.6776e-06, 9.1526e-05,\n 6.4534e-05, 1.7324e-04, -8.0463e-04, 2.2796e-03, -1.9535e-03,\n 3.5100e-04, 1.1488e-05, 5.2581e-04, -5.8844e-04, -6.1268e-05,\n -2.9151e-04, -5.3180e-04, -1.2729e-05, 1.4188e-03, 3.8592e-04,\n -1.0613e-03, -6.5199e-04, -9.8434e-04, 2.0901e-03, -1.0081e-05,\n -3.1513e-04, 5.5188e-05, -8.7688e-05, -1.0412e-03, -1.0492e-03,\n 4.8167e-04, -1.1351e-03, -1.2050e-03, -2.8170e-04, 1.1162e-03,\n -1.6492e-03, 1.2370e-03, 2.1258e-03, 4.8343e-04, 9.4716e-04,\n 6.1843e-04, 1.6762e-03, -7.2892e-04, -5.0542e-04, -6.5664e-04,\n -1.8946e-04, 9.8525e-04, -2.0150e-03, 3.0186e-04, -8.0632e-04,\n -1.2984e-03, -5.7310e-04, -1.1886e-03, -4.3795e-04, -2.2285e-03,\n -4.5324e-04, 1.1989e-03, -7.6424e-05, 1.7362e-04, -1.0572e-03,\n -3.4597e-04, -1.5327e-03, 7.3210e-05, -9.7292e-04, -5.2228e-04,\n 2.7842e-04, 2.8461e-04, 1.3657e-03, -7.2938e-04, 1.0194e-03,\n -1.0133e-03, -1.4384e-03, 1.5991e-04, 4.4035e-04, 1.4242e-03,\n 1.3251e-03, -9.0389e-04, -1.1089e-04, 5.0050e-04, -1.2537e-03,\n 5.5355e-04, 1.2522e-03, 1.0047e-03, -6.4600e-04, 7.5975e-04,\n 1.6618e-03, 7.2272e-04, -1.6846e-04, -1.7778e-04, -1.0165e-03,\n -6.4431e-04, 1.3600e-03, 2.5154e-03, 3.2286e-04, -1.0679e-03,\n 4.3596e-04, 3.0253e-04, -5.7556e-04, 4.1633e-04, -7.1297e-04,\n 1.0133e-03, 4.6415e-04, 1.4862e-03, -8.6510e-04, 2.1770e-03,\n -1.5623e-03, 3.1182e-04, -5.8279e-04, 1.6427e-03, 1.4738e-03,\n 7.2691e-04, -1.6953e-03, -3.2094e-04, -1.8737e-03, 7.7740e-04,\n 1.7869e-03, -1.1300e-03, -2.3816e-03, 6.2127e-04, 8.1958e-04,\n -1.1243e-03, 1.0244e-03, 3.2337e-04, 7.8605e-05, 2.7280e-03,\n 4.4967e-04, -1.0389e-05, 8.7559e-04, 6.1842e-04, 4.1135e-04,\n 1.8205e-03, 2.1426e-03, -1.6496e-03, -9.0394e-04, 1.0577e-03,\n 4.4092e-04, -1.9006e-04, -7.2816e-04, -9.5134e-04, -2.3410e-04,\n -4.6174e-04, 2.6298e-03, 7.6039e-04, -5.9779e-05, -2.1776e-04,\n -7.0188e-04, 9.3297e-05, 4.2653e-05, 6.5554e-04, -1.2851e-03,\n -9.0391e-04, 1.9052e-03, -2.5997e-04, -1.8204e-03, 1.8105e-04,\n 1.1089e-03, -8.3899e-04, 1.7513e-03, 4.3004e-04, 4.5705e-05,\n 4.2970e-04, -8.5128e-04, 4.6672e-04, -4.5735e-04, 2.4818e-03,\n -2.0025e-03, -8.0177e-04, 1.0292e-03, 7.1372e-04, 9.3386e-04,\n -8.2978e-04, -2.6559e-03, 7.0751e-05, 9.5763e-05, 1.3890e-03,\n -2.3664e-04, -3.2161e-04, 4.5088e-04, -1.2856e-03, -5.7543e-04,\n -1.0586e-03, 2.5142e-04, 4.8893e-04, -1.6118e-04, -5.7232e-04,\n 8.8895e-04, -1.6701e-04, -4.6152e-04, 8.1582e-04, -3.9971e-04,\n 5.3752e-05, -1.0487e-03, -2.4758e-04, 1.8219e-03, 1.2218e-03,\n 1.5689e-03, 1.3251e-03, -2.1546e-03, 1.2162e-03, -2.0817e-03,\n 3.2843e-04, 1.9774e-04, -1.5038e-04, -1.9614e-03, 8.6921e-04,\n -6.4444e-05, -4.9319e-04, 3.2569e-04, 7.0793e-04, 3.6096e-04,\n 6.7958e-04, -3.8210e-05, -1.7061e-03, -2.1830e-04, -7.8948e-04,\n 1.0709e-03, 1.4625e-04, -6.8055e-05, -2.2999e-04, -6.6172e-04,\n -4.7070e-05, 4.7038e-04, 1.0019e-03, -1.0146e-03, -4.9279e-04,\n -6.6330e-04, -1.7998e-05, 9.0128e-04, 3.4021e-04, -4.3900e-04,\n -5.3126e-04, 9.8810e-04, -2.3703e-04, -5.7368e-05, 5.1632e-04,\n 3.5283e-04, -1.0097e-04, 5.6052e-45, 1.7286e-03, -1.3205e-03,\n -1.9483e-04, 2.8688e-04, -9.1162e-04, 3.0389e-03, 5.6052e-45,\n -2.3094e-03, 5.7771e-04, -9.4857e-04, 4.9334e-04, 6.5847e-04,\n -2.6668e-04, 6.3197e-04, -1.0888e-03, -4.0949e-04, 4.4329e-04,\n -1.6831e-04, -5.2801e-04, -3.8421e-04, 5.7433e-05, 5.3305e-04,\n 5.3499e-04, 1.0978e-03, -7.7959e-04, 5.3467e-04, 3.6400e-05,\n -3.3615e-04, 5.6052e-45, 8.2954e-05, -3.2897e-04, -2.9377e-03,\n 6.2567e-04, -5.5722e-04, -1.4261e-03, -5.0761e-04, -4.0209e-04,\n 1.4086e-03, -1.8124e-05, -9.2130e-04, -2.6683e-03, -2.2663e-04,\n 5.5913e-04, -7.8434e-04, 1.2175e-03, -8.0170e-04, 6.3361e-04,\n 6.8326e-04, 2.7815e-05, -6.2484e-04, -3.0481e-04, 3.5902e-04,\n -1.7959e-04, -1.4746e-03, 1.4049e-04, 9.0614e-04, -8.9254e-04,\n 1.0220e-04, -7.5375e-04, -1.3483e-03, -2.3971e-03, 8.1120e-05,\n 1.0030e-03, -4.0408e-04, -1.0214e-03, 1.3008e-03, -7.0978e-04,\n -1.5674e-03, 1.8257e-04, -1.4801e-04, -8.5220e-04, -7.9622e-04,\n -5.8777e-04, 5.9472e-04, -1.0795e-03, 1.5102e-03, -7.7120e-04,\n 1.1412e-03, 9.1863e-04, 9.5414e-04, 6.1453e-04, -2.1872e-04,\n -2.2250e-03, 3.3111e-04, -1.2233e-03, -8.1937e-04, 1.0976e-03,\n -1.6167e-03, 2.6082e-04, -5.8541e-04, -6.4312e-04, -1.3650e-03,\n -6.4699e-04, -5.9038e-04, -1.8717e-03, -7.8083e-04, 7.1175e-04,\n 1.9224e-03, 3.7584e-04, -8.5836e-04, -1.1918e-03, -1.9587e-03,\n -2.1482e-03, 8.8695e-04, -4.1880e-04, 8.8659e-04, 5.0168e-04,\n -8.5337e-04, -6.0043e-04, -3.8342e-04, 4.3700e-05, -9.2669e-04,\n 2.7365e-04, -4.1564e-04, -3.2003e-04, 1.4613e-03, 1.7305e-05,\n 9.6698e-04, 1.4432e-03, -4.6874e-04, -1.6216e-03, -2.1767e-03,\n -1.1990e-04, -2.0095e-03, -1.2909e-03, 1.0365e-03, 1.5852e-03,\n -1.2843e-03, -5.9718e-05, -5.1200e-04, -1.3376e-04, 2.3802e-03,\n 5.6675e-04, 2.5137e-04, -1.7628e-03, -3.6704e-04, 1.4518e-03,\n 9.1754e-04, -4.2458e-04, 1.1256e-04, 2.0021e-03, 5.2054e-04,\n -1.5062e-03, -1.0469e-03, 1.3965e-03, 2.2164e-04, 2.5185e-03,\n -1.3631e-03, 6.6139e-04, -1.5504e-03, -2.1014e-04, -1.6969e-04,\n -1.1144e-03, 5.4068e-04, 1.4415e-03, -1.1322e-04, 9.6505e-04,\n 1.0878e-03, -8.5577e-04, -2.3372e-03, 1.5962e-03, 1.6050e-03,\n -1.2061e-03, 1.3400e-03, 3.8362e-04, 1.8284e-03, 4.0110e-05,\n 8.9081e-04, -1.4408e-03, 1.2668e-03, 1.3145e-04, 2.6215e-04,\n 6.3142e-05, -9.5614e-04, 1.8761e-03, -7.9202e-04, -1.2483e-04,\n 5.6052e-45, -1.5320e-03, 4.0647e-04, -2.0568e-03, 4.5602e-04,\n 1.8740e-03, 2.6514e-04, -6.1595e-04, 2.5596e-04, -1.4372e-03,\n 1.6596e-04, 1.9800e-03, 5.2688e-04, 2.1301e-04, 2.3088e-05,\n -1.3974e-03, -3.3819e-04, -2.3640e-03, -2.6246e-04, 5.8801e-04,\n -7.4488e-04, 1.7055e-03, -1.4142e-03, 8.2726e-04, 1.6888e-03,\n 7.3181e-04, 4.4627e-04, 1.5090e-04, -1.1496e-03, -9.5484e-04,\n 1.0762e-03, -7.6169e-04, 1.1469e-03, -1.2941e-03, 5.6052e-45,\n 2.4981e-03, -7.2153e-04, -1.3090e-04, -5.9208e-05, -7.3869e-04,\n 3.1025e-04, -7.4463e-04, -5.3946e-04, -7.1903e-04, -1.2872e-03,\n -1.8564e-03, 3.3704e-04, -1.4849e-04, -4.3992e-04, 5.0322e-05,\n 7.2806e-04, -1.4296e-04, -1.4528e-03, 1.5324e-03, -1.3710e-03,\n -1.0517e-03, -7.8205e-04, -5.6158e-04, 1.8623e-03, 1.2626e-03,\n 7.8090e-04, 3.1594e-04, 2.6138e-05, 5.3242e-04, 9.2046e-04,\n 1.1867e-03, -1.2285e-04, 5.6052e-45, -3.2691e-04, 1.2146e-03,\n 2.1480e-05, -9.2568e-04, -1.4047e-03, -4.5893e-05, 1.1313e-03,\n 8.5169e-04, 1.4651e-03, -5.7709e-04, 5.3279e-04, -1.4126e-04,\n -1.1514e-03, -3.0831e-04, -3.1487e-04, 1.5548e-03, 1.0012e-03,\n -3.2433e-04, -1.1749e-03, -4.4469e-05, -6.1673e-04, -5.8698e-04,\n 2.0989e-03, -1.3322e-03, -2.3431e-04, -2.6215e-03, -1.0095e-03,\n 7.6560e-04, -4.1478e-04, 1.4150e-04, 1.9199e-03, 7.7997e-04,\n -5.2648e-04, 1.2331e-04, 9.4596e-04, -9.4032e-04, -2.1435e-04,\n -1.6730e-03, -8.2660e-05, 9.9817e-04, 4.6288e-04, -1.2821e-03,\n 4.7451e-04, 9.0966e-04, 8.7362e-04, -1.7448e-04, 3.2148e-04,\n -1.2128e-05, -4.0124e-04, -5.7524e-04, -2.3473e-03, -7.9127e-05,\n -8.5111e-04, -1.0304e-03, -2.3425e-04, 1.7887e-03, 3.1122e-04,\n -1.6338e-03, -2.5408e-04, 7.5638e-04, 6.4130e-04, 1.8479e-03,\n -3.3676e-04, -2.4545e-04, 2.0521e-03, 9.2926e-04, 1.3264e-03,\n -5.3654e-04, 1.3716e-04, -8.5670e-04, 3.0856e-04, -1.7565e-03,\n 3.5369e-04, -8.8421e-04, 2.3793e-03, 1.1787e-03, 4.4550e-04,\n 3.4538e-04, 2.6753e-03, 2.0042e-03, -1.0757e-05, -6.0117e-04,\n -4.6844e-04, 2.0058e-04, 3.1586e-04, -1.7988e-03, 1.2011e-03,\n 2.5001e-04, -9.8673e-04, 1.7523e-03, 2.0474e-03, 2.9616e-04,\n 1.3653e-04, 7.5218e-04, -5.8659e-04, -2.0686e-05, -1.1847e-03,\n -4.1520e-04, 2.8715e-03, 1.3349e-03, -3.5902e-05, 2.4056e-05,\n -8.0807e-04, 7.6291e-05, -3.3697e-04, 1.5613e-03, 2.7256e-04,\n 2.0084e-03, 4.5095e-04, 2.1836e-03, -1.0624e-03, 2.6422e-04,\n -1.0673e-03, -4.3653e-04, 3.7310e-04, -1.8077e-04, 3.1312e-04,\n -7.2981e-04, 6.8201e-04, 1.3027e-03, 1.8121e-03, 3.2071e-04,\n 1.8415e-04, -7.4169e-04, -1.0527e-03, -2.5350e-03, 2.6748e-04,\n -2.2627e-03, 1.2125e-04, -6.9094e-04, 7.2703e-04, -1.0426e-03,\n -4.5038e-04, 1.6376e-03, 5.8209e-05, -9.0707e-04, 3.7652e-04,\n 9.5212e-04, -2.3868e-04, 5.0004e-04, 1.7633e-03, -7.6181e-04,\n 2.0791e-05, 6.9244e-04, 3.8163e-04], device='cuda:0')", + "exp_avg_sq": "tensor([1.2118e-05, 5.8491e-06, 1.0882e-05, 1.8631e-05, 2.3197e-05, 9.2517e-06,\n 7.4356e-06, 1.4738e-05, 1.0896e-05, 8.7817e-06, 1.1466e-05, 7.5857e-06,\n 1.3914e-05, 1.9986e-05, 2.7827e-05, 1.5038e-05, 1.9730e-05, 1.1441e-05,\n 1.5791e-05, 6.3458e-06, 2.0246e-05, 7.0160e-06, 1.8668e-05, 1.6775e-05,\n 1.3820e-05, 2.0986e-05, 1.1881e-05, 1.4417e-05, 1.5387e-05, 1.5522e-05,\n 2.1164e-05, 4.7246e-06, 8.0487e-06, 2.2431e-05, 2.3427e-05, 2.9194e-05,\n 8.7091e-06, 1.6848e-05, 1.9440e-05, 1.9989e-05, 1.6082e-05, 2.6809e-05,\n 1.3400e-05, 2.2839e-05, 4.1013e-06, 1.8181e-05, 1.3152e-05, 1.9473e-05,\n 5.5162e-06, 2.0416e-05, 2.1029e-05, 1.3802e-05, 1.2194e-05, 8.0833e-06,\n 3.7862e-06, 2.1880e-05, 1.1998e-05, 1.2030e-05, 7.3725e-06, 2.7002e-05,\n 2.0146e-05, 1.8558e-05, 1.0492e-05, 1.1646e-05, 2.0381e-05, 2.1651e-05,\n 1.8595e-05, 4.1616e-06, 1.5028e-05, 7.6444e-06, 2.1289e-05, 1.5904e-05,\n 1.7719e-05, 2.2711e-05, 1.0040e-05, 1.4467e-05, 2.2090e-05, 1.0221e-05,\n 2.4154e-05, 8.2460e-06, 1.6608e-05, 9.1370e-06, 1.3284e-05, 1.3452e-05,\n 1.9712e-05, 1.8601e-05, 2.1255e-05, 2.4520e-05, 1.3426e-05, 1.6140e-05,\n 2.8125e-05, 2.2400e-05, 1.2403e-05, 1.9194e-05, 1.5941e-05, 2.0617e-05,\n 3.1445e-05, 2.3966e-05, 8.9328e-06, 1.9163e-05, 1.1431e-05, 1.0405e-09,\n 1.3330e-05, 2.8211e-05, 1.2294e-05, 2.9522e-05, 1.3363e-05, 1.0247e-05,\n 1.0575e-05, 1.2135e-05, 2.0058e-05, 2.6054e-05, 1.8346e-05, 1.9089e-05,\n 1.0889e-05, 1.8728e-05, 9.2768e-06, 1.6436e-05, 2.7454e-06, 8.8982e-06,\n 9.9448e-06, 1.6038e-05, 1.8287e-05, 1.9651e-05, 1.1341e-05, 6.6137e-06,\n 9.7514e-06, 1.5735e-05, 1.1263e-06, 7.8457e-06, 1.1040e-05, 7.1858e-06,\n 1.6453e-05, 8.5899e-06, 1.6346e-05, 1.2096e-05, 1.2862e-05, 1.7156e-05,\n 2.3939e-05, 1.6792e-05, 2.4682e-05, 1.5512e-05, 4.6362e-06, 1.3533e-05,\n 4.5309e-07, 2.5410e-05, 1.8181e-05, 3.9332e-05, 1.5697e-05, 1.9780e-05,\n 2.3850e-05, 1.0715e-05, 1.6191e-05, 1.5037e-05, 9.4639e-06, 2.2471e-05,\n 1.3847e-05, 4.9763e-07, 1.7661e-05, 9.3894e-06, 1.0180e-05, 1.5859e-05,\n 1.5316e-05, 2.8835e-05, 1.2568e-05, 9.1939e-06, 1.5592e-05, 1.4979e-05,\n 1.3941e-05, 1.9138e-05, 1.5455e-05, 1.6041e-05, 8.9970e-06, 1.0635e-05,\n 1.0961e-05, 2.4790e-05, 9.7606e-06, 2.2843e-05, 7.3842e-06, 1.9943e-05,\n 1.7192e-05, 1.1966e-05, 1.9751e-05, 1.0852e-05, 1.7248e-05, 1.8457e-05,\n 7.5761e-06, 1.1440e-05, 2.0928e-05, 1.4987e-05, 5.1570e-06, 1.4819e-05,\n 2.0624e-05, 3.1825e-05, 2.1323e-05, 1.1005e-05, 5.0044e-06, 5.7393e-06,\n 2.1236e-05, 2.3158e-05, 1.4042e-05, 1.2504e-05, 1.6760e-05, 5.2418e-06,\n 5.3438e-06, 9.0318e-06, 5.0343e-06, 2.6952e-05, 1.3537e-05, 2.2708e-05,\n 1.0353e-05, 1.4107e-05, 9.4220e-06, 2.9052e-05, 9.1972e-06, 2.1372e-05,\n 1.5228e-05, 2.3768e-05, 7.6339e-06, 2.3826e-05, 6.0364e-06, 2.0502e-05,\n 1.3595e-05, 1.4567e-05, 1.4153e-05, 1.5272e-05, 1.4327e-05, 1.7479e-05,\n 1.2903e-05, 1.8647e-05, 2.5456e-05, 1.6133e-05, 2.3326e-05, 8.3425e-06,\n 1.5735e-05, 1.3505e-05, 5.2402e-06, 2.3378e-05, 2.4596e-05, 9.2454e-06,\n 1.3327e-05, 1.1611e-05, 1.2664e-05, 1.2567e-05, 1.4825e-05, 2.1705e-05,\n 1.9980e-05, 2.8244e-05, 1.2674e-05, 2.2746e-05, 1.1572e-05, 2.4201e-05,\n 1.6248e-05, 1.3761e-05, 1.7429e-05, 1.6183e-05, 9.9151e-06, 1.8265e-05,\n 7.3024e-06, 1.9757e-05, 1.3277e-05, 2.2168e-05, 2.1995e-05, 2.0963e-05,\n 1.2120e-05, 1.7103e-05, 1.8188e-05, 1.4160e-05, 1.2102e-05, 7.8379e-06,\n 1.7218e-05, 3.2946e-05, 2.3644e-05, 1.5049e-05, 8.0048e-06, 2.4728e-05,\n 1.7832e-05, 8.3665e-06, 1.1303e-05, 5.6234e-06, 1.6951e-05, 1.7002e-05,\n 9.3555e-06, 2.0758e-05, 2.0234e-05, 1.1646e-05, 1.3233e-05, 3.0081e-06,\n 1.3641e-05, 7.2423e-06, 1.2074e-05, 2.1467e-05, 1.3544e-05, 2.2481e-05,\n 1.5885e-05, 2.1137e-05, 1.6899e-05, 1.3656e-05, 1.9038e-05, 1.5845e-05,\n 2.0621e-05, 1.1100e-05, 2.1208e-05, 7.8176e-06, 1.8928e-05, 2.1562e-05,\n 1.7413e-05, 1.4005e-05, 1.9370e-05, 8.6284e-06, 2.1821e-05, 2.0290e-05,\n 1.1661e-05, 8.6062e-06, 1.7638e-05, 1.8082e-05, 9.2315e-06, 1.0017e-05,\n 1.7406e-05, 1.7917e-05, 1.7890e-05, 1.9522e-05, 1.7942e-05, 1.0512e-05,\n 1.1457e-05, 1.0684e-05, 9.7428e-06, 1.2429e-05, 1.9290e-05, 1.7191e-05,\n 9.5494e-06, 2.4724e-05, 1.8282e-05, 1.9695e-05, 1.5676e-05, 2.0856e-05,\n 2.0657e-05, 2.6656e-05, 1.7600e-05, 1.9775e-05, 3.6340e-06, 1.2107e-05,\n 1.0550e-05, 2.6321e-05, 1.3221e-05, 1.7565e-05, 2.1292e-05, 9.0723e-06,\n 1.0811e-05, 2.1791e-05, 1.1121e-05, 8.5360e-06, 5.1777e-06, 3.7574e-08,\n 1.2179e-05, 1.2797e-05, 2.1873e-05, 1.7925e-05, 1.9623e-05, 1.1140e-05,\n 5.3509e-06, 5.1179e-06, 1.8823e-05, 1.0658e-05, 1.6206e-05, 2.6894e-05,\n 2.1227e-05, 8.6312e-06, 6.4807e-06, 1.0549e-05, 1.2929e-05, 1.5056e-05,\n 1.0794e-05, 1.3125e-05, 4.4420e-06, 1.4084e-05, 1.8797e-05, 3.0470e-12,\n 1.9367e-05, 1.0606e-05, 7.5690e-06, 2.0865e-05, 1.4662e-05, 2.1048e-05,\n 7.7981e-15, 3.1000e-05, 9.0904e-06, 1.1136e-05, 7.6262e-06, 1.0737e-05,\n 1.0278e-05, 1.8779e-05, 8.9847e-06, 2.0854e-05, 1.1714e-05, 2.0639e-05,\n 1.5429e-05, 2.3161e-05, 8.6439e-06, 1.1607e-05, 1.9287e-05, 1.0320e-05,\n 7.7904e-06, 1.2159e-05, 1.8304e-05, 2.0475e-05, 2.1061e-13, 9.4248e-06,\n 1.7213e-05, 1.6515e-05, 1.3087e-05, 1.5244e-05, 1.4400e-05, 1.4159e-05,\n 7.9645e-06, 9.6525e-06, 8.3596e-06, 9.5995e-06, 2.8042e-05, 1.1223e-05,\n 1.1726e-05, 2.2253e-06, 6.7136e-06, 1.9626e-05, 9.4247e-06, 1.9105e-05,\n 3.9588e-07, 1.1367e-05, 1.4608e-05, 6.1419e-06, 1.4689e-05, 1.0740e-05,\n 6.0112e-06, 5.2209e-06, 9.5682e-06, 4.1035e-07, 1.7014e-05, 1.5017e-05,\n 1.9868e-05, 2.5487e-05, 1.0876e-05, 2.1464e-05, 2.3511e-05, 1.9605e-05,\n 1.9345e-05, 1.9907e-05, 2.1425e-05, 3.2027e-05, 1.1554e-05, 2.1149e-05,\n 1.1881e-05, 1.3304e-05, 1.5340e-05, 1.1111e-05, 1.7064e-05, 1.7123e-05,\n 1.0605e-05, 8.3598e-06, 8.8444e-06, 7.8648e-06, 2.6236e-05, 2.0525e-05,\n 2.2338e-05, 1.5794e-05, 2.3031e-05, 2.7417e-05, 1.7782e-05, 8.6242e-06,\n 2.1507e-05, 1.1302e-05, 1.4015e-05, 1.2067e-05, 1.9672e-05, 2.6533e-05,\n 2.8727e-06, 1.2010e-05, 1.6154e-05, 1.2629e-05, 2.2949e-05, 1.3700e-05,\n 1.9840e-05, 1.8927e-05, 1.1225e-05, 2.1203e-05, 7.6035e-06, 1.9032e-05,\n 1.9927e-05, 1.4247e-05, 1.2569e-05, 5.3918e-06, 1.3633e-05, 1.5640e-05,\n 7.3380e-06, 1.4530e-05, 2.1799e-05, 9.7018e-06, 1.4428e-05, 1.9807e-05,\n 1.1638e-05, 1.6449e-05, 1.5376e-05, 9.0705e-06, 1.2055e-05, 2.4172e-05,\n 1.4443e-05, 8.1863e-06, 1.1156e-05, 1.4420e-05, 9.8290e-06, 2.7958e-05,\n 1.1320e-05, 1.1341e-05, 7.7889e-06, 2.6264e-05, 1.5501e-05, 7.8533e-06,\n 9.8826e-06, 1.2875e-05, 2.6203e-05, 6.6297e-06, 1.8668e-05, 1.0652e-05,\n 2.3664e-05, 1.8935e-05, 1.3111e-05, 2.1346e-05, 3.8709e-06, 1.9373e-05,\n 1.6161e-05, 1.8049e-05, 2.4189e-05, 2.5083e-05, 2.0269e-05, 3.7992e-06,\n 2.3215e-05, 1.4736e-05, 1.4680e-05, 3.3972e-05, 1.6832e-05, 2.2229e-05,\n 1.7378e-05, 1.7318e-05, 1.3675e-05, 7.5312e-06, 1.0157e-05, 1.5605e-05,\n 3.4681e-05, 1.9289e-05, 1.4712e-05, 6.9875e-06, 1.0729e-05, 1.6927e-05,\n 1.0879e-05, 8.3306e-06, 1.0904e-05, 3.3038e-12, 1.9907e-05, 1.7518e-05,\n 1.8136e-05, 1.0361e-05, 1.9321e-05, 1.5844e-05, 2.1589e-05, 1.3680e-05,\n 1.7086e-05, 1.4748e-05, 2.4677e-05, 1.3094e-05, 1.5677e-05, 1.1633e-05,\n 8.1164e-06, 1.0042e-05, 2.1518e-05, 1.6616e-05, 1.2955e-05, 1.3363e-05,\n 2.3853e-05, 7.2655e-06, 1.8083e-05, 2.6402e-05, 1.0539e-05, 1.5963e-05,\n 1.6530e-06, 1.8027e-05, 1.6460e-05, 1.4168e-05, 2.1156e-05, 1.8540e-05,\n 1.1847e-05, 8.6441e-12, 1.8216e-05, 1.6631e-05, 7.5488e-06, 1.0391e-05,\n 6.6077e-06, 2.4110e-05, 6.9659e-06, 1.1683e-05, 2.2358e-05, 1.6113e-05,\n 2.8895e-05, 1.5839e-05, 2.5241e-05, 1.1646e-05, 3.1275e-06, 9.5185e-06,\n 2.1913e-05, 2.0855e-05, 2.3853e-05, 1.2679e-05, 1.2798e-05, 1.4217e-05,\n 2.0637e-05, 2.0362e-05, 1.6057e-05, 2.0412e-05, 3.1806e-05, 8.5776e-06,\n 1.3077e-05, 1.2201e-05, 2.2335e-05, 1.3708e-05, 3.3788e-14, 7.6366e-06,\n 1.8642e-05, 4.0269e-06, 1.7002e-05, 9.7112e-06, 1.4323e-05, 2.3661e-05,\n 7.6245e-06, 2.5507e-06, 2.2174e-05, 1.5582e-05, 9.1748e-06, 1.1934e-05,\n 6.9595e-06, 1.6553e-05, 1.1197e-05, 3.9306e-05, 1.3304e-05, 1.2041e-05,\n 1.6838e-05, 2.5473e-05, 9.0291e-06, 3.1624e-05, 2.4736e-05, 2.1011e-05,\n 2.1311e-05, 2.2032e-05, 8.5883e-06, 1.6817e-05, 5.5388e-06, 1.5704e-05,\n 1.5913e-05, 1.2320e-05, 2.1549e-05, 1.8087e-05, 1.4054e-05, 5.1909e-06,\n 8.8922e-06, 1.4992e-05, 2.1408e-05, 1.8400e-05, 1.2866e-05, 6.9283e-06,\n 8.9607e-06, 2.2821e-05, 9.7795e-06, 1.2852e-05, 3.1701e-06, 4.3052e-06,\n 1.4948e-05, 2.1089e-05, 1.4397e-05, 3.0428e-05, 2.7653e-05, 1.5057e-05,\n 2.6972e-05, 3.0831e-05, 2.4405e-05, 3.6085e-06, 1.4524e-05, 1.5074e-05,\n 1.0751e-05, 1.2689e-05, 1.3499e-05, 2.7427e-05, 1.1111e-05, 7.8372e-06,\n 4.1762e-06, 4.9005e-07, 1.6849e-05, 2.0357e-05, 2.1932e-05, 2.3950e-05,\n 1.5386e-05, 2.9488e-05, 2.3768e-05, 8.7231e-06, 1.8790e-05, 1.4964e-05,\n 2.6713e-05, 1.2342e-05, 1.5047e-05, 2.4560e-05, 2.0204e-05, 1.2475e-05,\n 4.9419e-06, 1.1148e-05, 1.1094e-05, 1.2443e-05, 8.4325e-06, 1.3554e-05,\n 1.9157e-05, 1.1212e-05, 6.2615e-06, 2.0987e-05, 2.2029e-05, 9.8988e-06,\n 2.4817e-05, 1.8114e-05, 1.8159e-05, 1.6442e-05, 2.1139e-05, 1.4251e-05,\n 7.6134e-06, 1.7663e-05, 3.0034e-05, 3.0816e-05, 1.0142e-05, 1.9452e-05,\n 1.1780e-05, 2.2419e-05, 2.1836e-05, 2.1116e-05, 2.3709e-05, 7.8665e-06,\n 1.5772e-07, 2.2178e-05, 1.0034e-05, 1.2852e-05, 2.4067e-05, 1.8763e-05,\n 7.0997e-06, 1.2723e-05, 9.8661e-06, 1.2556e-05, 2.0661e-05, 1.9870e-05,\n 2.2321e-05, 3.1359e-06, 1.1003e-05, 8.5328e-06, 5.9717e-06, 1.9960e-05,\n 1.9013e-05, 1.4881e-05, 1.3437e-05, 9.8426e-06, 1.5855e-05, 1.8461e-05,\n 1.0004e-05, 2.9310e-05, 3.9497e-06, 2.0613e-05, 1.7068e-05, 2.2442e-05],\n device='cuda:0')" + }, + "2": { + "step": "tensor(15018.)", + "exp_avg": "tensor([[ 1.6727e-07, 2.0565e-06, 2.3408e-06, ..., -4.0864e-06,\n 1.2815e-06, -5.2858e-06],\n [-5.7942e-06, -2.1709e-07, 2.2252e-06, ..., 2.4327e-06,\n -4.6574e-06, 1.7700e-05],\n [ 3.6977e-07, 6.3866e-06, 5.1463e-07, ..., 2.3069e-06,\n -1.6334e-05, 9.6721e-06],\n ...,\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 1.3496e-09, 1.5741e-06, 1.1238e-07, ..., 5.6531e-06,\n 2.1826e-05, 7.4464e-06],\n [ 3.9884e-07, -9.9473e-07, -4.0141e-06, ..., 4.5126e-07,\n -2.6524e-06, -2.0973e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1767e-10, 4.1017e-10, 5.3639e-10, ..., 3.5477e-10, 2.7503e-10,\n 1.3473e-09],\n [9.8309e-10, 3.9998e-10, 3.5478e-10, ..., 1.7235e-10, 8.0700e-10,\n 1.3501e-09],\n [6.8875e-11, 3.8048e-11, 1.5427e-10, ..., 2.6758e-10, 3.0792e-09,\n 2.1057e-09],\n ...,\n [4.9673e-15, 1.6494e-14, 3.0348e-15, ..., 6.3619e-16, 2.3248e-15,\n 1.4062e-17],\n [2.9927e-11, 4.2445e-12, 7.6880e-11, ..., 5.1636e-10, 2.4470e-09,\n 2.3221e-09],\n [1.5596e-11, 8.8179e-11, 1.7908e-10, ..., 7.6643e-11, 1.5228e-10,\n 3.7914e-09]], device='cuda:0')" + }, + "3": { + "step": "tensor(15018.)", + "exp_avg": "tensor([ 3.5826e-05, -4.9009e-06, -1.1044e-05, -2.6069e-05, 5.0720e-05,\n -2.6530e-05, 7.0891e-07, -2.9158e-05, 1.3162e-05, -1.0063e-05,\n -1.0221e-04, 2.1676e-05, 3.6501e-05, 5.6052e-45, -1.1019e-05,\n 3.7976e-05, -4.4132e-06, 5.6052e-45, 1.3474e-05, -1.8157e-05,\n 6.8541e-06, 1.3969e-05, -2.0577e-05, -3.2776e-05, -1.9515e-05,\n 5.6052e-45, 8.6758e-06, -7.0689e-05, 6.8655e-05, -1.7593e-05,\n 3.1415e-06, -5.2848e-05, -5.6052e-45, -1.6760e-05, 4.6515e-05,\n -8.2501e-06, 5.6052e-45, 1.4551e-05, -3.9174e-05, -1.7348e-05,\n -1.7774e-05, 1.1826e-05, -5.3320e-05, 2.5652e-05, 5.6052e-45,\n 3.5949e-05, 6.0958e-05, 9.0023e-06, 1.4548e-05, -8.7103e-06,\n 5.6052e-45, 2.5505e-05, 1.7385e-05, 5.6052e-45, 5.6052e-45,\n 4.7043e-05, 3.1585e-06, -3.1340e-05, 1.7172e-06, 2.9085e-05,\n 4.2202e-05, 5.6052e-45, -4.0613e-05, -8.6231e-06, 1.6319e-05,\n 5.5677e-05, -2.3533e-05, -1.4206e-05, -4.9066e-05, -1.0201e-05,\n 4.7129e-06, 1.0718e-06, -2.8697e-05, -3.6246e-05, 1.3444e-05,\n 1.6978e-05, 5.6052e-45, -1.1687e-05, -3.0005e-05, 5.6052e-45,\n -4.3456e-05, -5.1357e-06, 5.1101e-05, -8.5840e-05, -1.3506e-05,\n 1.8148e-05, -6.8336e-05, -2.2802e-05, 4.6554e-05, 4.6979e-05,\n -1.2823e-05, 2.9057e-05, -3.2911e-05, -1.4318e-05, -5.6706e-06,\n -3.1279e-05, -2.6359e-05, 6.5789e-05, 1.0359e-04, -7.2250e-05,\n 4.0966e-05, -3.3021e-05, 5.6052e-45, 5.6052e-45, -1.0745e-05,\n 1.9459e-05, 2.8815e-05, -1.9353e-05, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -6.4983e-07, -8.9839e-05, -2.9161e-05, -6.1854e-06,\n -4.2703e-05, 5.9979e-05, -4.0420e-05, -2.7330e-05, 5.6052e-45,\n 1.3861e-06, 2.2716e-05, 2.9388e-05, -4.1544e-06, 1.4196e-05,\n 5.6052e-45, 8.5223e-06, 6.5284e-06, 4.3810e-05, 3.8318e-06,\n 6.1242e-05, -3.0774e-06, 3.4456e-06, 5.6052e-45, -1.3429e-05,\n -4.7518e-07, 5.6052e-45, -2.2595e-05, -2.8828e-06, -5.1220e-05,\n 5.6052e-45, 1.6823e-05, -5.9735e-06, 3.1815e-05, 5.6052e-45,\n 5.8718e-06, 5.6052e-45, 1.3773e-05, 5.6052e-45, -3.4663e-06,\n 2.9406e-05, -4.2892e-05, -1.3486e-06, 2.7188e-05, 5.6052e-45,\n -2.9069e-06, -1.4801e-05, -1.3787e-05, 5.6052e-45, 2.0355e-05,\n -2.3650e-05, 1.7486e-05, 4.0968e-05, -2.4170e-05, 1.6436e-05,\n -1.8699e-05, 2.8113e-05, -7.2935e-06, 8.2956e-05, -2.7687e-05,\n 2.1260e-06, -1.1925e-04, 3.6921e-05, 2.6633e-05, -1.4639e-05,\n 1.5423e-05, 5.6052e-45, 3.9311e-06, 4.2979e-06, -3.5346e-06,\n -3.7539e-05, -8.6637e-07, 5.6052e-45, 6.4697e-07, -5.1926e-05,\n 5.6052e-45, 2.4442e-05, -1.0288e-05, 5.6052e-45, 5.6052e-45,\n 4.8761e-06, 2.1708e-05, -3.8793e-05, 3.9624e-05, 4.1041e-05,\n 2.9391e-05, -1.1071e-06, -6.7506e-05, 6.4249e-05, 5.7471e-06,\n 5.6052e-45, -1.0587e-04, -8.2502e-07, -2.1624e-05, -1.5613e-05,\n 6.6582e-05, 2.1222e-05, 1.6302e-06, -5.1031e-06, 5.6052e-45,\n 5.4538e-06, 4.0614e-05, -1.0474e-04, 1.9710e-05, -1.8771e-05,\n -2.1946e-05, -6.3217e-06, -5.7706e-05, 2.0383e-05, -2.5432e-05,\n -1.2442e-05, -2.4550e-05, 1.5374e-05, -1.5987e-05, -7.0799e-06,\n 4.7488e-06, 3.1533e-05, -3.1225e-06, 2.8372e-05, 3.2015e-05,\n 7.1331e-06, 2.4486e-05, 4.8293e-05, 6.1252e-05, 3.7506e-05,\n -3.6495e-05, 8.1633e-06, -1.1681e-04, -5.9995e-05, -5.9779e-05,\n 2.7433e-06, 6.5859e-05, -2.8549e-05, 1.0505e-06, -2.9428e-05,\n 9.6722e-06, 2.9551e-05, 2.1203e-05, 6.0432e-06, 5.6052e-45,\n 7.9762e-06, 6.4456e-06, 9.0625e-06, -5.4282e-05, -7.6654e-05,\n 3.3932e-05, 5.6052e-45, 6.1062e-06, -3.7646e-05, -5.7865e-05,\n 8.2061e-33, -1.8622e-06, 1.2069e-05, 3.8853e-05, 3.1078e-05,\n -4.4797e-05, -4.1786e-05, 1.1478e-05, 6.8527e-06, 1.5616e-05,\n -4.2186e-06, -1.6790e-05, -2.6182e-05, -1.8460e-05, 5.6052e-45,\n 1.9076e-05, 5.6052e-45, 2.8629e-06, -3.2258e-05, 6.6547e-06,\n 5.6052e-45, 1.0750e-05, 5.6052e-45, -3.5559e-05, 1.3961e-05,\n 1.8471e-05, -1.2084e-05, -1.3723e-05, -1.5698e-05, 3.7406e-05,\n -6.9021e-05, 5.7961e-05, 4.1036e-05, -9.2576e-05, 6.4028e-05,\n -1.4338e-05, -3.1892e-05, 4.4774e-06, -2.2782e-05, 1.4176e-05,\n 9.0121e-06, -1.6651e-06, -5.6052e-45, -2.5196e-05, 1.7210e-05,\n 5.6052e-45, -1.2323e-05, 1.5770e-06, 4.1400e-05, 4.3860e-06,\n 4.3183e-06, 1.1615e-05, 1.0527e-05, -8.2458e-06, -5.1930e-05,\n 1.3413e-05, 1.1291e-05, 4.8950e-05, 4.5275e-05, -8.6425e-06,\n 2.6446e-06, -2.4026e-05, 4.5711e-05, 3.3270e-05, -1.8163e-06,\n -1.0644e-04, -4.9222e-06, -5.1235e-06, -3.3706e-05, 6.0063e-05,\n -2.1220e-05, -2.4019e-05, -3.7362e-05, 2.4784e-05, -1.4365e-05,\n -2.2904e-05, -1.4719e-05, 8.9346e-06, 5.5050e-05, -2.1067e-05,\n -3.1365e-06, 6.3876e-05, 5.6052e-45, 9.7641e-05, 3.0432e-05,\n -9.6259e-06, 5.6052e-45, -2.5940e-05, 6.4887e-06, -7.5313e-05,\n 3.0285e-05, -4.2205e-05, -7.0309e-05, -5.4848e-06, -2.4659e-05,\n 4.1225e-05, -4.1749e-05, 4.5952e-05, -2.3938e-05, 6.9087e-05,\n -2.6639e-05, 1.0570e-04, 1.5565e-05, 2.7606e-05, -2.2334e-05,\n -4.2284e-05, 2.1634e-05, -8.0075e-06, -7.7532e-06, 2.0936e-05,\n -1.0219e-05, 2.8477e-05, 1.1085e-06, 1.7695e-06, 5.6052e-45,\n 2.2163e-05, 9.4280e-06, 4.1740e-05, 5.6052e-45, 5.6052e-45,\n 2.6508e-05, 7.1019e-06, 5.6052e-45, 2.3933e-05, 2.7365e-05,\n 4.0416e-05, 1.6227e-05, -1.9414e-06, -2.0017e-05, 2.8450e-05,\n -9.5246e-06, 5.6052e-45, 5.4847e-05, 3.8749e-06, 5.6052e-45,\n -3.5485e-05, 6.1422e-05, 5.6052e-45, -4.1981e-05, -3.7675e-05,\n 2.5756e-06, 1.0664e-05, 9.6318e-06, 2.8694e-05, 4.4070e-05,\n 4.2988e-05, -2.2823e-05, 2.0625e-05, -1.8526e-05, -2.5950e-05,\n 2.8335e-06, -1.0970e-05, -6.0098e-06, -1.8907e-05, 1.1638e-05,\n 2.4760e-05, -2.8968e-05, 5.6052e-45, -7.7710e-05, -1.6970e-05,\n -8.9077e-05, -3.1777e-05, 2.2991e-05, 2.9451e-05, -2.7620e-05,\n -4.2728e-05, -2.0912e-05, 2.1130e-06, -1.0553e-05, -3.1732e-05,\n 2.7421e-06, 2.6042e-05, -4.9951e-05, 1.4943e-06, -3.4710e-05,\n 8.7420e-06, 5.6052e-45, -1.2392e-05, -1.0768e-05, -4.0295e-05,\n 5.6052e-45, 5.6052e-45, 3.9974e-05, -1.3947e-05, 3.5587e-05,\n -3.5981e-07, -1.0947e-05, -1.1752e-05, -2.1535e-05, -8.1079e-05,\n -1.6105e-05, -2.5785e-05, -1.8807e-05, 3.1542e-05, -1.0610e-05,\n 4.4369e-07, 1.1371e-05, 5.6052e-45, -1.9967e-06, 2.7984e-05,\n -6.8228e-06, 5.6992e-05, -3.4135e-05, -4.0731e-05, -2.4270e-06,\n 2.3483e-05, 1.9941e-05, -6.2426e-06, -3.6477e-05, -5.6052e-45,\n -4.4116e-06, -4.4489e-05, 1.1476e-04, -1.2316e-05, 3.8710e-06,\n 7.9790e-06, 3.9368e-05, 1.1534e-05, 6.4399e-06, 5.6052e-45,\n -1.5344e-05, 2.4256e-05, 5.6052e-45, 1.4836e-05, 2.7856e-05,\n -3.0015e-05, -4.3028e-05, 2.1402e-05, -7.9267e-06, -4.5159e-05,\n 4.9833e-05, -4.2222e-05, 5.6052e-45, -3.2694e-05, -3.5179e-05,\n -2.1628e-05, -4.3067e-05, -4.2519e-05, 2.4030e-05, 5.6052e-45,\n 5.6052e-45, 1.7218e-05, -6.2964e-05, 5.6052e-45, -2.2317e-05,\n 5.6052e-45, -2.5854e-05, -2.3435e-05, 3.5161e-05, 5.6052e-45,\n 2.5585e-05, -6.0507e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.5322e-08, 2.3775e-08, 2.3855e-08, 1.2264e-08, 2.0566e-08, 1.8191e-08,\n 1.8993e-08, 1.8187e-08, 3.2414e-08, 1.3852e-08, 2.8287e-08, 2.9240e-08,\n 1.7166e-08, 2.0043e-13, 6.2043e-09, 3.1855e-08, 2.3238e-08, 2.3432e-19,\n 1.6600e-08, 1.0345e-08, 2.6725e-08, 6.5434e-09, 8.1379e-09, 1.8110e-08,\n 1.6802e-08, 8.2298e-11, 2.2572e-08, 2.1217e-08, 1.6002e-08, 1.8671e-08,\n 1.3521e-08, 1.9660e-08, 1.9955e-12, 1.7069e-08, 2.5788e-08, 1.3687e-08,\n 1.2083e-11, 1.5889e-08, 1.6413e-08, 1.5450e-08, 2.0149e-08, 1.6638e-08,\n 1.2847e-08, 1.8619e-08, 4.3563e-11, 1.1738e-08, 3.4569e-08, 1.9617e-08,\n 1.7149e-08, 1.6697e-08, 1.1417e-12, 1.7865e-08, 1.7694e-08, 2.4151e-11,\n 2.0699e-11, 1.0201e-08, 2.0646e-08, 1.6756e-08, 3.6714e-08, 1.2247e-08,\n 2.4563e-08, 2.5087e-12, 2.2238e-08, 1.4416e-08, 1.6263e-08, 1.8849e-08,\n 1.0508e-08, 2.3951e-08, 2.2483e-08, 1.3911e-08, 1.3037e-08, 2.7798e-08,\n 1.6061e-08, 2.1523e-08, 1.6821e-08, 1.7380e-08, 1.0016e-11, 1.7953e-08,\n 2.3599e-08, 2.4098e-15, 2.8817e-08, 2.0627e-08, 1.8605e-08, 8.2464e-09,\n 2.0226e-08, 7.3742e-09, 2.2502e-08, 1.2363e-08, 1.5649e-08, 2.3490e-08,\n 1.5603e-08, 1.6657e-08, 1.9819e-08, 1.0438e-08, 1.2844e-08, 1.8811e-08,\n 1.4873e-08, 1.9513e-08, 1.5897e-08, 2.0926e-08, 1.3483e-08, 1.7046e-08,\n 6.7126e-12, 7.1554e-12, 2.9773e-08, 1.6531e-08, 1.7831e-08, 1.4304e-08,\n 1.8442e-11, 1.3481e-14, 3.2928e-11, 2.2494e-08, 1.9907e-08, 1.7203e-08,\n 1.9719e-08, 1.2766e-08, 2.1233e-08, 1.3285e-08, 2.5377e-08, 7.9629e-11,\n 3.6888e-08, 2.8955e-08, 1.8882e-08, 1.9286e-08, 2.4174e-08, 1.7486e-11,\n 1.8956e-08, 1.3171e-08, 2.5247e-08, 2.6648e-08, 1.7319e-08, 1.1007e-08,\n 1.4097e-08, 5.3512e-11, 1.7060e-08, 1.4894e-08, 5.2917e-12, 1.5280e-08,\n 1.7141e-08, 1.7160e-08, 2.8601e-11, 1.9412e-08, 1.6062e-08, 2.0961e-08,\n 3.3000e-13, 1.4157e-08, 1.5216e-10, 1.9561e-08, 2.9255e-11, 2.0254e-08,\n 1.7018e-08, 1.3014e-08, 1.5813e-08, 2.8445e-08, 2.2419e-12, 1.5713e-08,\n 1.1100e-08, 2.2306e-08, 1.2942e-19, 1.3644e-08, 1.4374e-08, 1.7274e-08,\n 2.2656e-08, 1.1091e-08, 2.3748e-08, 1.1948e-08, 2.5417e-08, 1.9556e-08,\n 1.6970e-08, 1.1012e-08, 3.5001e-08, 1.4823e-08, 2.3017e-08, 2.1628e-08,\n 2.2354e-08, 2.1665e-08, 2.4034e-13, 1.4148e-08, 1.5337e-08, 1.3358e-08,\n 2.6386e-08, 1.4131e-08, 2.6855e-11, 1.7840e-08, 1.8100e-08, 1.6338e-12,\n 2.2193e-08, 1.3082e-08, 2.0852e-11, 6.8228e-11, 1.2549e-08, 2.2678e-08,\n 1.8451e-08, 3.0118e-08, 2.1814e-08, 1.9145e-08, 7.3967e-09, 1.5047e-08,\n 1.8659e-08, 1.6574e-08, 3.7581e-12, 2.2814e-08, 9.2347e-09, 1.4776e-08,\n 2.2218e-08, 1.9674e-08, 1.1736e-08, 2.0828e-08, 1.9790e-08, 1.7016e-12,\n 1.9099e-08, 2.5505e-08, 2.7242e-08, 1.9090e-08, 1.8460e-08, 2.2452e-08,\n 2.0807e-08, 1.1451e-08, 2.0908e-08, 1.4817e-08, 1.4449e-08, 1.3155e-08,\n 1.5245e-08, 1.5964e-08, 1.5742e-08, 1.5561e-08, 1.8640e-08, 1.8536e-08,\n 2.3663e-08, 4.3928e-09, 1.2836e-08, 1.1944e-08, 2.1038e-08, 2.4659e-08,\n 1.9295e-08, 1.4578e-08, 2.2247e-08, 2.1828e-08, 2.1092e-08, 1.7153e-08,\n 2.2577e-08, 2.8490e-08, 1.4856e-08, 6.8274e-09, 1.7200e-08, 1.3263e-08,\n 1.8081e-08, 1.3620e-08, 1.2609e-08, 3.2127e-11, 2.1821e-08, 8.7287e-09,\n 8.2289e-09, 3.3109e-08, 1.4500e-08, 1.9521e-08, 3.7755e-11, 1.9419e-08,\n 2.5512e-08, 1.8959e-08, 5.6087e-11, 2.4954e-08, 1.4213e-08, 1.4611e-08,\n 1.6688e-08, 1.9337e-08, 2.0790e-08, 3.2074e-08, 5.1637e-09, 2.5505e-08,\n 3.5946e-08, 1.5672e-08, 2.6755e-08, 1.4990e-08, 6.3691e-15, 1.1897e-08,\n 3.0021e-11, 2.2288e-08, 1.7225e-08, 1.1945e-08, 4.0175e-11, 1.2415e-08,\n 8.2875e-11, 1.6375e-08, 9.2549e-09, 1.9666e-08, 9.4435e-09, 2.7600e-08,\n 1.8374e-08, 1.5507e-08, 2.0550e-08, 3.2770e-08, 1.4856e-08, 2.1788e-08,\n 2.2115e-08, 1.6909e-08, 9.5083e-09, 1.4918e-08, 1.8967e-08, 1.9919e-08,\n 1.5272e-08, 1.1684e-08, 3.4410e-12, 1.2379e-08, 1.8390e-08, 1.1359e-11,\n 1.2000e-08, 1.9161e-08, 2.2861e-08, 1.5290e-08, 1.8375e-08, 2.7670e-08,\n 1.6823e-08, 1.1680e-08, 1.5609e-08, 2.2057e-08, 1.9881e-08, 2.4911e-08,\n 9.9303e-09, 1.5018e-08, 1.7348e-08, 1.7749e-08, 2.2569e-08, 3.1518e-08,\n 1.5245e-08, 1.2668e-08, 8.1045e-09, 1.9437e-08, 2.3180e-08, 1.3636e-08,\n 2.4499e-08, 1.3292e-08, 1.0252e-08, 1.7358e-08, 2.0112e-08, 2.0762e-08,\n 1.9087e-08, 1.7995e-08, 2.3033e-08, 1.4390e-08, 2.1179e-08, 1.8580e-08,\n 1.2700e-11, 1.8773e-08, 2.4893e-08, 2.4111e-08, 9.4206e-14, 1.6236e-08,\n 2.5319e-08, 1.7543e-08, 1.2476e-08, 1.7449e-08, 2.2485e-08, 2.0952e-08,\n 1.8889e-08, 1.7219e-08, 1.2850e-08, 1.9210e-08, 2.7314e-08, 2.0548e-08,\n 1.4597e-08, 2.1986e-08, 1.7893e-08, 1.5231e-08, 2.5949e-08, 1.7924e-08,\n 1.7212e-08, 1.5254e-08, 1.7227e-08, 2.1744e-08, 9.9507e-09, 1.7124e-08,\n 1.7028e-08, 1.8864e-08, 7.4104e-12, 1.0491e-08, 1.2509e-08, 2.8374e-08,\n 3.6064e-11, 1.5394e-11, 2.4441e-08, 1.1914e-08, 2.4592e-14, 2.2133e-08,\n 1.8048e-08, 2.2196e-08, 1.0643e-08, 1.5908e-08, 2.0965e-08, 2.4180e-08,\n 1.3305e-08, 9.2504e-11, 1.6232e-08, 3.0011e-08, 7.9045e-11, 1.8331e-08,\n 1.6050e-08, 1.1920e-11, 1.4952e-08, 1.3796e-08, 2.3538e-08, 1.5133e-08,\n 1.5461e-08, 1.7607e-08, 2.1954e-08, 2.6453e-08, 2.5656e-08, 1.6547e-08,\n 1.6012e-08, 2.5666e-08, 1.2151e-08, 2.8913e-08, 2.6792e-08, 2.2604e-08,\n 1.9477e-08, 1.3265e-08, 2.6497e-08, 2.3725e-11, 1.7351e-08, 3.2736e-08,\n 2.9507e-08, 1.3593e-08, 1.4668e-08, 3.3119e-08, 9.5708e-09, 2.2473e-08,\n 1.9095e-08, 1.6522e-08, 2.4301e-08, 1.5809e-08, 1.2392e-08, 1.0018e-08,\n 9.5210e-09, 2.2996e-08, 1.8763e-08, 1.8203e-08, 3.6159e-11, 2.0258e-08,\n 2.2990e-08, 2.3036e-08, 1.0258e-11, 2.7958e-11, 1.8533e-08, 1.3015e-08,\n 2.5065e-08, 1.9628e-08, 1.8101e-08, 1.6582e-08, 2.2644e-08, 3.2529e-08,\n 1.5901e-08, 3.1725e-08, 1.8448e-08, 1.9328e-08, 1.4240e-08, 5.6519e-11,\n 1.8330e-08, 5.1578e-12, 2.3408e-08, 1.4645e-08, 1.4976e-08, 2.3752e-08,\n 9.7350e-09, 1.0845e-08, 1.1386e-08, 1.5874e-08, 9.3968e-09, 1.9752e-08,\n 2.1276e-08, 8.6190e-12, 2.1915e-08, 1.9917e-08, 3.0622e-08, 9.0445e-09,\n 1.9451e-08, 1.3972e-08, 1.1085e-08, 1.1538e-08, 2.0987e-08, 9.3759e-14,\n 1.6943e-08, 1.3670e-08, 7.5476e-11, 1.2445e-08, 2.5097e-08, 1.7312e-08,\n 1.5627e-08, 1.9230e-08, 1.2513e-08, 8.4207e-09, 1.1724e-08, 2.0500e-08,\n 1.2641e-13, 1.6815e-08, 2.5929e-08, 2.1073e-08, 2.0890e-08, 2.5071e-08,\n 1.2777e-08, 3.5905e-11, 2.2014e-11, 9.3535e-09, 2.6404e-08, 2.4292e-12,\n 2.1080e-08, 5.9114e-11, 1.9067e-08, 2.4424e-08, 1.6924e-08, 1.0401e-11,\n 1.6464e-08, 8.9623e-09], device='cuda:0')" + }, + "4": { + "step": "tensor(15018.)", + "exp_avg": "tensor([[-9.4243e-06, 2.2649e-06, -6.7840e-06, ..., -5.6052e-45,\n 2.0193e-06, 6.6338e-06],\n [-2.8741e-06, 7.0767e-06, 1.5454e-06, ..., -5.6052e-45,\n 2.1669e-06, 1.3377e-05],\n [ 6.7195e-08, 1.9978e-05, -5.8684e-06, ..., -5.6052e-45,\n 1.2226e-06, 2.3581e-06],\n ...,\n [-3.8855e-06, 1.8198e-05, -7.0203e-06, ..., 5.6052e-45,\n -2.6944e-06, 4.0669e-06],\n [ 2.6637e-07, 2.7276e-06, 3.8313e-07, ..., -5.6052e-45,\n 2.1878e-06, -9.7169e-06],\n [ 9.2036e-06, -1.5127e-05, 2.0010e-07, ..., -5.6052e-45,\n 4.3959e-08, -7.1639e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7149e-10, 7.3872e-10, 6.9620e-10, ..., 2.2547e-16, 6.1569e-11,\n 1.6865e-10],\n [2.0590e-10, 1.2582e-09, 7.8482e-10, ..., 2.1051e-15, 2.2176e-10,\n 3.9653e-10],\n [1.6321e-10, 1.5617e-09, 7.6312e-10, ..., 2.1301e-16, 1.3875e-10,\n 1.6460e-10],\n ...,\n [2.5179e-10, 1.3658e-09, 1.0030e-09, ..., 2.9126e-15, 2.1110e-10,\n 1.5971e-10],\n [2.6679e-10, 1.4653e-09, 9.8746e-10, ..., 3.0563e-15, 2.5337e-10,\n 1.6668e-10],\n [2.3088e-10, 1.7979e-09, 9.4527e-10, ..., 1.1385e-16, 1.5870e-10,\n 5.8150e-10]], device='cuda:0')" + }, + "5": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[-3.7891e-06, -4.0765e-09, 2.1434e-05, ..., 1.3180e-05,\n 1.3397e-06, 5.5336e-06],\n [ 1.9768e-06, 6.2460e-08, 7.2278e-06, ..., 3.3557e-06,\n 1.2757e-05, -5.3477e-07],\n [-2.3695e-06, -5.3200e-09, 1.8014e-06, ..., -8.0994e-07,\n 2.8527e-05, 3.9380e-06],\n ...,\n [-8.5611e-06, -7.5617e-09, -3.5962e-06, ..., -3.7374e-06,\n 2.0408e-07, -9.8332e-06],\n [-5.1191e-06, 4.1020e-09, -1.0239e-05, ..., 1.5417e-05,\n 1.7419e-05, 2.5446e-06],\n [-5.2929e-06, 3.8860e-06, 1.5806e-05, ..., 7.0849e-06,\n -1.1317e-05, -8.2526e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.7402e-10, 9.1581e-12, 6.1135e-09, ..., 5.0045e-10, 6.4788e-09,\n 2.1709e-09],\n [1.2142e-10, 2.3996e-10, 2.8867e-09, ..., 4.8418e-09, 5.0657e-09,\n 2.0268e-09],\n [7.3330e-10, 5.2991e-12, 4.8937e-10, ..., 2.1499e-09, 1.7996e-08,\n 9.6880e-09],\n ...,\n [2.6391e-09, 2.9752e-10, 2.6794e-09, ..., 8.4037e-10, 2.1192e-09,\n 2.0548e-09],\n [2.8610e-09, 2.1060e-11, 1.8065e-09, ..., 4.1858e-09, 2.7083e-08,\n 2.9754e-09],\n [3.4848e-10, 2.3380e-09, 1.2216e-08, ..., 3.2461e-09, 1.1171e-08,\n 2.5961e-09]], device='cuda:0')" + }, + "6": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-3.8260e-05, 1.0032e-05, -8.7159e-05, ..., -3.1401e-05,\n 1.3292e-04, 1.0390e-04], device='cuda:0')", + "exp_avg_sq": "tensor([1.5066e-07, 1.2021e-07, 1.7543e-07, ..., 1.2480e-07, 1.5251e-07,\n 1.7748e-07], device='cuda:0')" + }, + "7": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[-9.6931e-06, 1.4164e-06, -3.2093e-06, ..., 2.1011e-05,\n -2.9477e-06, 7.8448e-06],\n [ 4.5616e-06, -5.9881e-06, 2.8341e-06, ..., -1.4447e-05,\n -7.2106e-07, 2.7824e-05],\n [-4.2409e-06, 2.6835e-06, 1.9509e-06, ..., 6.3852e-06,\n -6.2855e-06, 1.4574e-05],\n ...,\n [-6.8557e-06, 5.5656e-06, -8.4873e-06, ..., 1.3602e-06,\n 1.1233e-05, -7.6355e-06],\n [ 1.2236e-05, 3.5236e-06, 3.2771e-06, ..., 1.2395e-05,\n -8.3300e-06, 2.7537e-06],\n [ 3.5747e-06, 1.4855e-05, 9.5560e-06, ..., 1.0772e-05,\n 2.1741e-07, 1.1398e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.6817e-10, 5.2892e-10, 6.1596e-10, ..., 7.3305e-10, 5.9167e-10,\n 8.0347e-10],\n [8.4369e-10, 5.8288e-10, 5.7147e-10, ..., 1.0668e-09, 9.2013e-10,\n 1.6039e-09],\n [6.8367e-10, 1.0223e-09, 4.9124e-10, ..., 1.4527e-09, 6.2468e-10,\n 3.5394e-09],\n ...,\n [1.1137e-09, 6.1441e-10, 2.2149e-09, ..., 1.4410e-09, 1.6179e-09,\n 1.8494e-09],\n [1.2620e-09, 1.2697e-09, 1.3828e-09, ..., 1.1755e-09, 1.2963e-09,\n 1.5870e-09],\n [1.4221e-09, 5.1345e-10, 9.6160e-10, ..., 1.4938e-09, 8.9371e-10,\n 1.5683e-09]], device='cuda:0')" + }, + "14": { + "step": "tensor(10012.)", + "exp_avg": "tensor(-0.0003, device='cuda:0')", + "exp_avg_sq": "tensor(5.0576e-06, device='cuda:0')" + }, + "15": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[-6.0471e-14, 2.0927e-14, -4.9376e-14, ..., 1.0734e-14,\n -5.9543e-14, -4.4784e-14],\n [-1.1839e-13, -2.7139e-14, 6.8820e-13, ..., 8.3465e-13,\n 8.1906e-13, -1.9885e-14],\n [-1.2997e-14, -4.5683e-14, 1.8364e-14, ..., 7.2951e-15,\n -2.0664e-14, 3.1671e-14],\n ...,\n [ 1.2785e-13, -1.9637e-13, -5.7476e-13, ..., 2.1581e-12,\n 3.2514e-12, 1.4653e-12],\n [ 8.0074e-15, 5.1343e-14, 1.0381e-13, ..., -7.7170e-14,\n -7.6059e-13, -2.8035e-14],\n [ 6.8578e-14, 1.8998e-15, -4.6637e-14, ..., -9.4817e-15,\n -2.1195e-14, -3.2164e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3811e-18, 4.3024e-18, 3.7220e-17, ..., 6.9281e-17, 2.7056e-17,\n 6.8454e-17],\n [2.3299e-18, 8.9125e-19, 1.2907e-17, ..., 6.5552e-18, 1.8511e-18,\n 5.1561e-17],\n [1.1146e-16, 2.6707e-18, 2.9632e-15, ..., 7.8661e-16, 1.4246e-15,\n 9.6367e-16],\n ...,\n [1.4932e-16, 3.2250e-17, 1.2399e-16, ..., 3.3560e-16, 1.7130e-15,\n 9.4162e-16],\n [6.5357e-19, 2.6845e-19, 6.9741e-18, ..., 2.4200e-17, 4.1349e-18,\n 1.8214e-17],\n [1.4071e-17, 3.3297e-18, 2.8182e-16, ..., 7.6713e-16, 2.2518e-16,\n 8.7014e-16]], device='cuda:0')" + }, + "16": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-1.1533e-13, 2.7893e-12, -4.0624e-14, -1.1127e-13, 6.4543e-13,\n 4.0183e-13, -8.8787e-14, -2.4280e-12, 1.4884e-13, -3.1281e-13,\n 3.5998e-13, -5.3538e-13, 3.6293e-12, -1.3202e-13, 2.5376e-13,\n -2.3740e-13, -1.0716e-12, -4.1893e-13, -3.4302e-14, -4.9193e-14,\n -4.8963e-12, -2.5497e-12, 2.4288e-14, 9.9220e-13, -8.2198e-14,\n 1.2033e-12, -1.8036e-12, -4.0132e-13, 1.0493e-13, 1.0437e-12,\n 5.2219e-13, 6.6710e-13, 1.0269e-12, -2.2573e-12, -3.3978e-13,\n 3.2826e-13, 2.1819e-13, -1.7948e-13, -5.1156e-13, 1.2252e-13,\n 4.8935e-13, 2.8740e-12, 4.4853e-12, -1.8285e-13, -5.3047e-12,\n -1.9600e-12, 9.7916e-13, 2.5506e-13, 7.9508e-13, 2.2503e-14,\n 9.0864e-12, -4.4240e-13, 8.0770e-14, -6.5124e-13, -6.6661e-13,\n -1.1286e-11, 2.5391e-13, -1.4529e-12, -3.5625e-12, 2.9148e-13,\n -1.0610e-13, 1.0786e-11, -5.8761e-13, -8.0803e-14], device='cuda:0')", + "exp_avg_sq": "tensor([4.2893e-14, 2.1718e-14, 2.2001e-12, 8.0286e-14, 7.5705e-14, 5.7959e-14,\n 3.6123e-13, 2.2638e-13, 7.7467e-14, 1.1518e-12, 3.5228e-14, 1.4277e-15,\n 3.3832e-13, 1.0488e-13, 5.0320e-13, 1.2643e-12, 7.9193e-14, 2.4204e-13,\n 4.7233e-13, 4.0790e-13, 1.3611e-13, 4.6497e-13, 3.3322e-13, 1.3318e-12,\n 3.2309e-13, 1.4412e-13, 8.3314e-13, 8.9564e-13, 1.2142e-16, 1.1650e-13,\n 1.1335e-12, 4.0983e-16, 5.4903e-13, 1.6795e-13, 6.7719e-13, 1.5889e-12,\n 6.3299e-14, 1.0390e-12, 3.9012e-13, 9.2812e-14, 7.5697e-14, 1.9420e-12,\n 1.9568e-12, 5.0743e-13, 4.0253e-13, 6.1022e-14, 4.1533e-13, 1.4761e-14,\n 2.6273e-13, 2.5558e-12, 2.5713e-12, 1.1092e-13, 3.7441e-14, 6.0246e-13,\n 7.3480e-14, 1.0073e-12, 1.0674e-12, 9.8017e-15, 9.6295e-13, 1.5351e-12,\n 2.3573e-14, 9.2989e-13, 6.7906e-15, 6.8956e-13], device='cuda:0')" + }, + "17": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 6.9852e-16, 3.8195e-12, 3.0469e-15, 3.8132e-14, -3.9961e-14,\n 1.0272e-14, 1.8820e-14, -5.1942e-13, -1.3044e-13, -5.6299e-13,\n -1.6073e-14, -3.9743e-14, 5.1390e-12, -7.0774e-15, -1.0716e-13,\n 1.6724e-14, 5.4621e-13, 5.4192e-14, 2.1119e-14, 2.5518e-12,\n -4.5575e-12, 6.1799e-13, 8.0764e-15, 4.3331e-12, -2.5001e-14,\n 5.3576e-13, -1.5987e-13, 2.7587e-12, -2.7672e-17, 1.9649e-12,\n 3.9407e-12, 7.6580e-14, 3.5048e-12, -1.7767e-13, 2.5491e-14,\n -1.0343e-13, -1.6013e-16, -2.8960e-14, 9.8979e-15, -3.3671e-15,\n -3.7327e-15, 6.8394e-12, 6.4058e-12, 5.7316e-14, -2.2344e-12,\n -4.0787e-13, -2.5020e-13, 5.2394e-15, -7.1215e-14, -4.5582e-14,\n 1.3621e-11, 1.9217e-14, 1.1390e-15, 6.5407e-14, 1.2040e-14,\n -9.2258e-12, -1.1577e-13, -1.0517e-12, -2.7243e-12, 2.1092e-12,\n 1.5625e-14, 1.3451e-11, -1.0006e-12, 3.1859e-14], device='cuda:0')", + "exp_avg_sq": "tensor([6.9201e-16, 7.4433e-17, 1.5875e-14, 5.8202e-16, 8.2892e-16, 1.0069e-15,\n 2.0831e-15, 8.4683e-16, 6.9993e-16, 8.3799e-15, 4.0630e-16, 7.3455e-17,\n 1.8051e-15, 8.2673e-16, 3.1553e-15, 2.0725e-14, 2.1226e-16, 1.6198e-15,\n 3.0345e-15, 3.2321e-15, 6.0658e-16, 2.8016e-15, 2.7494e-15, 1.0179e-14,\n 2.9146e-15, 6.6385e-16, 4.0881e-15, 5.0965e-15, 1.3994e-17, 4.1475e-16,\n 8.1036e-15, 5.4995e-17, 3.2790e-15, 6.1074e-16, 4.7511e-15, 1.8365e-14,\n 6.7927e-16, 1.1285e-14, 4.1640e-15, 1.0958e-15, 8.6223e-16, 1.1225e-14,\n 1.5030e-14, 4.5299e-15, 1.8226e-15, 1.6273e-16, 1.7881e-15, 3.9452e-16,\n 1.8934e-15, 3.1975e-14, 1.8540e-14, 1.5695e-15, 4.5333e-16, 6.3894e-15,\n 7.8633e-16, 6.6135e-15, 1.1406e-14, 1.5418e-17, 6.9437e-15, 8.2392e-15,\n 5.9622e-16, 3.6864e-15, 2.8882e-18, 5.3077e-15], device='cuda:0')" + }, + "18": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-6.4890e-16, 4.1913e-12, 4.3489e-14, -5.9360e-14, 4.6039e-14,\n -9.8886e-15, -4.4895e-14, -9.9266e-13, 1.7073e-13, 4.1210e-13,\n 1.7379e-14, 4.8259e-14, 3.6481e-12, 6.6470e-15, 1.1029e-13,\n -2.7153e-14, 9.0912e-13, -4.3367e-14, -1.6610e-14, 1.8439e-12,\n -4.2187e-12, 1.7667e-13, -2.5481e-15, 3.0907e-12, 3.5204e-14,\n 1.9078e-12, -4.0215e-13, 1.7088e-12, 1.3420e-15, 2.2669e-12,\n 2.7229e-12, -7.5995e-14, 2.1534e-12, -5.5760e-14, -2.9811e-14,\n 1.6069e-13, 9.1952e-16, 3.0605e-14, 2.1650e-15, 3.6204e-15,\n 1.8349e-15, 4.2659e-12, 5.1603e-12, -9.4846e-14, -1.7855e-12,\n -4.2003e-13, 2.8562e-13, -1.0299e-14, 7.3708e-14, 5.2499e-14,\n 1.0270e-11, -2.5442e-14, -1.2894e-15, -7.2953e-14, -1.3341e-14,\n -7.0843e-12, 1.0971e-13, -1.0484e-12, -1.9348e-12, 2.2251e-12,\n -1.1588e-14, 1.0278e-11, -6.2614e-13, -2.8388e-14], device='cuda:0')", + "exp_avg_sq": "tensor([9.4020e-16, 1.2171e-16, 2.4860e-14, 1.0188e-15, 1.2270e-15, 1.2021e-15,\n 4.2467e-15, 1.5231e-15, 9.3393e-16, 8.8028e-15, 6.8086e-16, 1.1653e-16,\n 2.7623e-15, 1.5562e-15, 6.0496e-15, 1.5583e-14, 3.6631e-16, 2.8532e-15,\n 4.9944e-15, 2.0031e-15, 1.0298e-15, 3.1672e-15, 4.0779e-15, 1.0834e-14,\n 4.3958e-15, 8.1044e-16, 6.4984e-15, 7.4607e-15, 2.3679e-17, 4.6106e-16,\n 9.4652e-15, 8.6126e-17, 4.7715e-15, 9.1540e-16, 8.1446e-15, 1.8703e-14,\n 1.0795e-15, 1.2822e-14, 5.1385e-15, 1.6406e-15, 1.3000e-15, 1.6087e-14,\n 1.5049e-14, 6.7309e-15, 3.1978e-15, 2.3203e-16, 4.4688e-15, 5.7557e-16,\n 3.3041e-15, 2.9638e-14, 2.2206e-14, 1.9155e-15, 7.8184e-16, 8.2163e-15,\n 1.3031e-15, 8.7835e-15, 1.3299e-14, 3.2421e-17, 8.1578e-15, 1.3885e-14,\n 7.3921e-16, 8.1387e-15, 8.8629e-18, 8.5113e-15], device='cuda:0')" + }, + "19": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 1.0644e-12, -2.7717e-11, 2.8907e-12, -3.0773e-12, 1.8004e-12,\n 3.9002e-13, 2.7406e-12, 2.1514e-12, 1.9003e-12, 2.7328e-12,\n 1.6379e-12, 3.6564e-13, -2.3478e-11, -4.3998e-14, 3.7848e-12,\n 4.0422e-12, -4.2194e-12, 3.5469e-13, 1.5194e-12, -2.1521e-11,\n 1.5164e-11, -4.3080e-12, 2.6625e-12, -2.1583e-11, 4.2755e-12,\n -2.6840e-12, 3.4964e-13, -1.1992e-11, -1.4838e-12, -1.7701e-11,\n -1.2238e-11, 2.2707e-12, -1.4684e-11, 1.2760e-12, 3.3144e-12,\n 2.2964e-12, 2.3283e-12, 2.8545e-12, 2.0573e-12, -8.6611e-13,\n 2.3661e-12, -2.2567e-11, -2.8133e-11, 1.0329e-12, 8.7105e-12,\n 3.4638e-12, 3.4462e-13, 7.1309e-13, 1.3948e-13, 9.8570e-13,\n -4.3572e-11, 1.4890e-12, 1.1592e-12, 2.8403e-12, -6.2667e-14,\n 2.8247e-11, 3.3534e-12, 9.0375e-12, 1.2355e-11, -6.3377e-12,\n 4.2737e-12, -3.2035e-11, 1.2528e-11, 1.7790e-12],\n [-1.0271e-12, 2.6235e-11, -2.7219e-12, 3.2552e-12, -1.5995e-12,\n -3.0984e-13, -2.6049e-12, -3.0106e-12, -1.8666e-12, -5.6664e-12,\n -1.6764e-12, -3.1809e-13, 2.1726e-11, 2.5986e-13, -3.7032e-12,\n -3.9210e-12, 3.3891e-12, -2.5049e-13, -1.3004e-12, 2.1196e-11,\n -1.5863e-11, 4.1620e-12, -2.6729e-12, 2.0162e-11, -4.2073e-12,\n 3.1635e-12, -1.0485e-12, 1.1746e-11, 1.5795e-12, 1.7044e-11,\n 1.1794e-11, -2.0594e-12, 1.3211e-11, -1.8854e-12, -3.3251e-12,\n -2.3607e-12, -2.2565e-12, -2.6490e-12, -1.9554e-12, 1.0482e-12,\n -2.2505e-12, 2.2210e-11, 2.8235e-11, -1.0648e-12, -9.2236e-12,\n -3.7745e-12, -2.3834e-14, -4.4690e-13, 8.2282e-14, -7.4853e-13,\n 4.3797e-11, -1.3200e-12, -1.0390e-12, -2.6021e-12, 2.2933e-13,\n -3.0957e-11, -3.1564e-12, -9.0533e-12, -1.2198e-11, 5.2172e-12,\n -4.1647e-12, 3.1395e-11, -1.2809e-11, -1.7525e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3493e-13, 1.8066e-14, 1.9446e-13, 9.6674e-15, 7.1379e-14, 3.4792e-13,\n 2.4685e-14, 8.9013e-14, 5.4719e-16, 2.7479e-13, 3.1575e-14, 5.3294e-14,\n 4.9379e-14, 3.8207e-14, 9.1347e-14, 9.6621e-13, 7.6779e-14, 2.9030e-14,\n 2.7380e-15, 1.3894e-12, 7.3206e-15, 4.1186e-13, 1.5910e-13, 4.5484e-13,\n 1.8118e-13, 1.4400e-13, 8.9453e-14, 1.4093e-13, 2.2365e-15, 3.8499e-13,\n 5.7906e-14, 8.7073e-14, 5.2004e-14, 1.7609e-13, 1.0691e-13, 6.7899e-13,\n 3.5623e-14, 4.8438e-13, 1.6730e-13, 1.4745e-13, 1.2648e-13, 1.1882e-13,\n 5.8814e-13, 2.0353e-13, 3.4674e-14, 1.7264e-13, 5.2192e-15, 2.0889e-13,\n 8.4594e-14, 4.9010e-13, 2.3869e-13, 2.2556e-13, 1.0490e-13, 3.1136e-13,\n 1.3188e-13, 5.9813e-14, 2.7864e-13, 1.2437e-14, 2.1907e-13, 6.0327e-14,\n 2.4668e-13, 8.6717e-15, 4.8481e-14, 1.2791e-13],\n [1.3493e-13, 1.8066e-14, 1.9446e-13, 9.6674e-15, 7.1379e-14, 3.4792e-13,\n 2.4685e-14, 8.9013e-14, 5.4719e-16, 2.7479e-13, 3.1575e-14, 5.3294e-14,\n 4.9379e-14, 3.8207e-14, 9.1347e-14, 9.6621e-13, 7.6779e-14, 2.9030e-14,\n 2.7380e-15, 1.3894e-12, 7.3206e-15, 4.1186e-13, 1.5910e-13, 4.5484e-13,\n 1.8118e-13, 1.4400e-13, 8.9453e-14, 1.4093e-13, 2.2364e-15, 3.8499e-13,\n 5.7906e-14, 8.7073e-14, 5.2004e-14, 1.7609e-13, 1.0691e-13, 6.7899e-13,\n 3.5623e-14, 4.8438e-13, 1.6730e-13, 1.4745e-13, 1.2648e-13, 1.1882e-13,\n 5.8814e-13, 2.0353e-13, 3.4674e-14, 1.7264e-13, 5.2192e-15, 2.0889e-13,\n 8.4594e-14, 4.9010e-13, 2.3869e-13, 2.2556e-13, 1.0490e-13, 3.1136e-13,\n 1.3188e-13, 5.9813e-14, 2.7864e-13, 1.2437e-14, 2.1907e-13, 6.0327e-14,\n 2.4668e-13, 8.6716e-15, 4.8481e-14, 1.2791e-13]], device='cuda:0')" + }, + "20": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-1.0569e-11, 9.8210e-12], device='cuda:0')", + "exp_avg_sq": "tensor([1.6727e-12, 1.6727e-12], device='cuda:0')" + }, + "21": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 6.7460e-18, 3.1406e-18, 1.3546e-18, ..., -6.2340e-18,\n -8.0428e-18, -5.6103e-18],\n [ 5.8783e-17, 7.2828e-18, 1.4545e-18, ..., -4.7865e-18,\n -1.1088e-18, -3.1810e-18],\n [-4.1864e-18, 9.0142e-18, 1.4480e-17, ..., 1.4004e-17,\n -2.9125e-18, 2.1869e-17],\n ...,\n [ 1.0592e-16, -2.1083e-17, -1.0886e-18, ..., -5.5425e-17,\n 5.0136e-17, -1.0878e-16],\n [-3.8590e-17, -1.7185e-17, -5.9360e-17, ..., -1.0387e-16,\n -4.7449e-17, -4.4572e-17],\n [-6.6238e-18, 2.9710e-18, 6.7629e-19, ..., 4.3248e-18,\n -4.9968e-18, 4.6497e-18]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5888e-20, 2.9064e-22, 8.2040e-21, ..., 5.5817e-21, 5.9640e-19,\n 2.3879e-19],\n [1.8875e-19, 1.6610e-20, 6.3901e-19, ..., 4.5719e-19, 7.4250e-18,\n 1.8438e-20],\n [4.5272e-18, 5.0431e-19, 5.8351e-21, ..., 5.8941e-18, 5.3156e-17,\n 1.9283e-17],\n ...,\n [3.8919e-18, 1.7154e-19, 1.6660e-18, ..., 5.0184e-17, 8.0182e-17,\n 2.8188e-16],\n [1.9962e-19, 2.1698e-19, 3.3977e-18, ..., 9.0138e-18, 3.8835e-17,\n 1.2627e-17],\n [7.6357e-20, 1.2967e-20, 1.9681e-19, ..., 4.4459e-18, 7.6643e-18,\n 2.2502e-18]], device='cuda:0')" + }, + "22": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-6.9853e-17, 3.8272e-17, 3.5744e-16, -5.8824e-17, -1.0291e-15,\n -3.6569e-16, 2.9324e-17, 1.7251e-16, 6.3040e-17, 2.9880e-16,\n -3.0503e-16, 2.2998e-16, -3.6289e-18, -1.4033e-16, 4.9862e-16,\n 7.0535e-16, 4.0837e-16, -2.7915e-16, 2.6215e-16, -1.1820e-16,\n 7.5010e-17, 4.4795e-16, 3.2739e-16, 3.1632e-16, -4.5566e-16,\n 4.8786e-16, 1.7075e-16, 5.2307e-16, 2.9349e-16, -2.8678e-16,\n -7.4340e-16, -1.6820e-16, 1.0864e-16, 9.5577e-17, -1.8602e-16,\n -3.3439e-16, -1.3962e-16, -1.1111e-16, -4.6077e-16, -1.7946e-15,\n 1.5740e-16, 4.1226e-16, 4.2935e-16, 2.5528e-16, 1.4426e-16,\n -2.2179e-16, -8.0583e-17, 5.4536e-16, -1.1280e-16, 2.3578e-17,\n 1.6706e-17, -2.3444e-16, 2.1176e-16, -6.1627e-16, 3.5163e-16,\n 1.3551e-16, 1.2079e-16, 7.7802e-16, -4.1815e-17, -3.0471e-16,\n 4.0386e-17, -5.1180e-16, -3.5758e-16, -4.1375e-20], device='cuda:0')", + "exp_avg_sq": "tensor([3.2556e-17, 7.5478e-16, 6.7500e-15, 2.2947e-16, 1.3024e-15, 2.0906e-15,\n 6.7220e-17, 1.9351e-15, 6.2945e-16, 1.4569e-16, 1.8675e-16, 1.3551e-15,\n 5.3975e-16, 1.9197e-14, 9.3265e-15, 1.6698e-14, 4.3034e-15, 5.5234e-17,\n 1.4838e-14, 1.6778e-15, 2.0080e-16, 5.1038e-14, 2.4591e-16, 1.6094e-14,\n 2.4282e-15, 1.7279e-15, 5.2375e-15, 5.2421e-15, 2.7330e-15, 1.2295e-15,\n 1.6565e-15, 2.2593e-16, 9.5054e-15, 1.1089e-14, 1.4549e-15, 1.4233e-17,\n 7.9639e-15, 9.9521e-16, 2.9217e-15, 1.7943e-14, 1.5557e-14, 3.6102e-15,\n 4.7835e-15, 2.4721e-15, 1.1385e-15, 8.2020e-16, 4.9841e-15, 1.5978e-15,\n 4.8783e-17, 3.9408e-15, 2.0962e-15, 4.3669e-15, 1.4390e-15, 1.3706e-15,\n 1.7304e-14, 1.1971e-15, 7.8069e-16, 9.9788e-21, 8.7966e-15, 5.8803e-15,\n 2.9146e-15, 1.6043e-14, 1.3428e-14, 1.4831e-15], device='cuda:0')" + }, + "23": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-3.0464e-17, -6.9250e-17, 1.7510e-17, -6.1552e-17, -1.5840e-16,\n 1.5421e-16, 9.9629e-18, -5.2460e-17, 2.6324e-18, 1.2448e-17,\n 3.8355e-19, -1.7524e-18, -2.3735e-18, 1.3340e-16, 5.6074e-16,\n 3.7013e-16, 3.8165e-16, -1.9897e-20, -3.2454e-18, 9.0856e-17,\n 1.0970e-16, -3.7696e-17, 1.1008e-17, -2.3637e-17, 6.2562e-17,\n 1.8054e-16, -6.5977e-17, 2.0512e-16, 4.5835e-18, -8.0666e-17,\n -6.7541e-17, -7.1485e-17, -2.5622e-18, 3.8273e-16, 4.3053e-17,\n 1.8567e-17, -9.2440e-17, 2.5737e-16, 4.4823e-17, -5.2567e-16,\n -1.8317e-17, 1.1521e-16, 3.8459e-17, -3.5723e-18, -1.0257e-17,\n -1.1661e-16, 1.9164e-18, 1.0048e-16, -1.6078e-17, 2.5635e-19,\n 5.9664e-18, 7.8736e-18, -1.1718e-17, 1.0265e-16, 1.8015e-16,\n -8.3361e-19, -8.6081e-18, 5.6518e-17, 1.0632e-17, 2.8901e-16,\n -8.3485e-20, -1.0636e-16, 4.0668e-16, -8.8404e-18], device='cuda:0')", + "exp_avg_sq": "tensor([1.8247e-20, 3.4056e-18, 2.1404e-17, 5.9670e-19, 7.9530e-18, 1.4075e-17,\n 4.2162e-21, 1.4537e-17, 1.2500e-18, 1.5469e-19, 2.0033e-18, 3.1050e-18,\n 1.0612e-18, 7.3466e-17, 1.2325e-16, 7.4751e-17, 3.7047e-17, 5.5202e-19,\n 1.1287e-16, 1.0557e-17, 9.9225e-19, 3.2445e-16, 2.3910e-19, 2.9792e-17,\n 1.7107e-17, 1.1040e-17, 4.0249e-17, 6.8096e-17, 1.9258e-17, 8.6925e-18,\n 2.5610e-17, 4.2383e-18, 5.8659e-17, 9.5763e-17, 1.0234e-17, 2.1907e-21,\n 8.7142e-18, 4.5864e-18, 1.6099e-17, 8.4835e-17, 1.2847e-16, 1.4722e-17,\n 3.0022e-17, 1.5100e-17, 1.3020e-18, 1.4010e-17, 3.3672e-17, 1.6765e-17,\n 5.2850e-19, 1.4340e-17, 7.2040e-18, 1.4523e-17, 5.7798e-18, 2.1273e-17,\n 9.2943e-17, 6.1535e-18, 2.7274e-18, 3.0902e-24, 9.4683e-17, 3.4391e-17,\n 8.0161e-18, 1.3212e-16, 4.2096e-17, 4.9128e-18], device='cuda:0')" + }, + "24": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-6.7285e-17, -2.9259e-17, 7.2560e-17, -3.7344e-17, -2.0685e-16,\n -4.3307e-18, -1.6045e-17, 4.7606e-17, -3.1224e-18, -1.5490e-17,\n 9.7654e-19, 4.1781e-18, -4.2324e-18, 9.5241e-17, 2.9364e-16,\n 3.6218e-16, 2.0209e-16, -1.2686e-19, 8.3740e-18, 4.6045e-17,\n 9.9413e-17, 1.3835e-17, -2.7323e-17, 1.5695e-17, -8.7083e-17,\n 1.4727e-16, 8.4756e-17, 1.2108e-16, 7.2156e-18, -7.0711e-17,\n -1.2525e-16, -1.6544e-17, 2.5919e-18, 1.9268e-16, -7.6781e-18,\n -3.4321e-17, -4.1750e-17, 8.6236e-17, -2.7729e-17, -5.0648e-16,\n 2.1374e-17, 8.8180e-17, 5.1942e-17, 8.3376e-18, -2.4204e-17,\n 6.3491e-17, -3.8130e-18, 1.0277e-16, 1.3620e-17, -1.1497e-18,\n -6.4541e-19, -2.2322e-17, -1.2014e-17, -1.4133e-18, 1.2530e-16,\n 4.2257e-20, -1.6922e-17, 1.0031e-16, 4.4236e-18, 2.3466e-18,\n -5.2698e-19, -4.8938e-17, 3.4706e-18, -2.4891e-17], device='cuda:0')", + "exp_avg_sq": "tensor([1.1512e-20, 3.4215e-18, 4.7693e-17, 7.6913e-19, 1.8265e-17, 2.3036e-17,\n 6.7144e-21, 1.9383e-17, 3.3503e-18, 2.1341e-19, 2.4823e-18, 7.5210e-18,\n 2.2125e-18, 1.7013e-16, 9.9533e-17, 1.4856e-16, 4.2281e-17, 1.1761e-18,\n 9.3065e-17, 1.6541e-17, 2.6801e-18, 3.2696e-16, 8.5256e-19, 1.0039e-16,\n 2.8903e-17, 2.0023e-17, 5.1215e-17, 4.1264e-17, 1.5224e-17, 1.1861e-17,\n 1.9248e-17, 2.6782e-18, 6.5693e-17, 9.7879e-17, 1.6497e-17, 2.1261e-20,\n 6.8807e-17, 1.0089e-17, 3.1392e-17, 1.6439e-16, 1.0758e-16, 2.1160e-17,\n 3.7023e-17, 1.6595e-17, 4.1561e-18, 6.4195e-18, 2.7575e-17, 1.3681e-17,\n 9.9025e-19, 2.6093e-17, 1.2659e-17, 2.8401e-17, 7.7580e-18, 1.6070e-17,\n 1.6334e-16, 7.9865e-18, 3.1014e-18, 3.6783e-24, 5.4025e-17, 4.7404e-17,\n 1.3357e-17, 1.4932e-16, 1.1065e-16, 6.7452e-18], device='cuda:0')" + }, + "25": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[-5.7136e-16, -1.9043e-15, -3.9934e-16, -1.1763e-15, -9.2629e-16,\n 6.8514e-16, 2.7980e-17, -5.7837e-16, 1.0740e-17, 4.4056e-17,\n 3.3785e-16, -4.2694e-17, -1.4814e-16, 3.4193e-16, 2.9649e-15,\n 1.4605e-15, 2.1542e-15, 3.3069e-16, -1.2812e-16, 7.9305e-16,\n 1.0629e-15, 1.3351e-16, -1.0992e-16, -2.0661e-16, 4.2620e-16,\n 9.3706e-16, -2.4534e-17, -1.2042e-15, -9.6554e-16, -4.8634e-16,\n -5.2242e-16, -6.7728e-16, -5.9502e-17, 1.7765e-15, 2.3143e-16,\n -8.5433e-17, -2.3671e-17, 1.8854e-15, 2.3762e-16, -2.5757e-15,\n 6.1695e-17, -1.9033e-15, -9.9695e-16, 7.9861e-17, -4.4330e-16,\n -5.3928e-16, -1.0361e-17, -1.2197e-15, -2.8481e-16, -3.7270e-17,\n -2.4846e-16, 1.5096e-16, -1.4090e-15, 1.2139e-15, 7.8894e-16,\n 6.7874e-17, -5.5114e-16, -1.1937e-15, -8.0673e-16, 1.3298e-15,\n -1.9028e-16, -4.7592e-16, 1.4215e-15, -4.3310e-16],\n [ 5.7134e-16, 1.9043e-15, 3.9932e-16, 1.1763e-15, 9.2631e-16,\n -6.8497e-16, -2.7999e-17, 5.7847e-16, -1.0773e-17, -4.4086e-17,\n -3.3791e-16, 4.2684e-17, 1.4813e-16, -3.4183e-16, -2.9648e-15,\n -1.4604e-15, -2.1542e-15, -3.3064e-16, 1.2809e-16, -7.9291e-16,\n -1.0628e-15, -1.3353e-16, 1.0990e-16, 2.0658e-16, -4.2601e-16,\n -9.3696e-16, 2.4649e-17, 1.2042e-15, 9.6553e-16, 4.8644e-16,\n 5.2251e-16, 6.7754e-16, 5.9491e-17, -1.7764e-15, -2.3138e-16,\n 8.5406e-17, 2.3847e-17, -1.8853e-15, -2.3742e-16, 2.5761e-15,\n -6.1711e-17, 1.9032e-15, 9.9694e-16, -7.9876e-17, 4.4328e-16,\n 5.3937e-16, 1.0340e-17, 1.2197e-15, 2.8493e-16, 3.7245e-17,\n 2.4846e-16, -1.5098e-16, 1.4090e-15, -1.2137e-15, -7.8878e-16,\n -6.7898e-17, 5.5113e-16, 1.1937e-15, 8.0671e-16, -1.3297e-15,\n 1.9025e-16, 4.7603e-16, -1.4214e-15, 4.3308e-16]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1346e-15, 3.1305e-15, 7.6832e-16, 7.4195e-16, 3.9014e-16, 1.1352e-16,\n 3.2365e-15, 2.4870e-15, 5.1440e-17, 3.9169e-16, 2.5863e-19, 2.2536e-16,\n 2.0603e-17, 6.7136e-16, 6.5193e-15, 1.3219e-15, 2.4477e-15, 1.6176e-17,\n 9.2543e-15, 1.7623e-15, 1.2546e-16, 5.8718e-15, 6.2530e-17, 5.3946e-16,\n 1.0060e-15, 8.4315e-17, 1.3911e-16, 7.7088e-16, 3.2823e-15, 5.4424e-16,\n 7.4634e-15, 8.8010e-16, 2.1925e-15, 3.8427e-15, 7.4796e-16, 3.6329e-17,\n 2.7712e-17, 3.1935e-16, 7.5265e-16, 3.5171e-15, 2.6486e-15, 4.4573e-16,\n 1.5605e-15, 5.6161e-16, 6.0327e-16, 2.9568e-16, 4.5501e-15, 5.4953e-17,\n 6.0903e-16, 3.1919e-15, 6.0601e-16, 7.1446e-16, 1.1816e-15, 8.1802e-15,\n 2.2133e-15, 1.3097e-15, 5.4494e-16, 3.2570e-16, 1.9029e-14, 5.9243e-16,\n 1.4672e-15, 2.0796e-15, 3.9139e-16, 2.0924e-15],\n [2.1346e-15, 3.1305e-15, 7.6832e-16, 7.4195e-16, 3.9014e-16, 1.1352e-16,\n 3.2365e-15, 2.4870e-15, 5.1440e-17, 3.9169e-16, 2.5863e-19, 2.2536e-16,\n 2.0603e-17, 6.7136e-16, 6.5193e-15, 1.3219e-15, 2.4477e-15, 1.6176e-17,\n 9.2543e-15, 1.7623e-15, 1.2546e-16, 5.8718e-15, 6.2530e-17, 5.3946e-16,\n 1.0060e-15, 8.4315e-17, 1.3911e-16, 7.7088e-16, 3.2823e-15, 5.4424e-16,\n 7.4634e-15, 8.8010e-16, 2.1925e-15, 3.8427e-15, 7.4796e-16, 3.6329e-17,\n 2.7712e-17, 3.1935e-16, 7.5265e-16, 3.5171e-15, 2.6486e-15, 4.4573e-16,\n 1.5605e-15, 5.6161e-16, 6.0327e-16, 2.9568e-16, 4.5501e-15, 5.4953e-17,\n 6.0903e-16, 3.1919e-15, 6.0601e-16, 7.1446e-16, 1.1816e-15, 8.1802e-15,\n 2.2133e-15, 1.3097e-15, 5.4494e-16, 3.2570e-16, 1.9029e-14, 5.9243e-16,\n 1.4672e-15, 2.0796e-15, 3.9139e-16, 2.0924e-15]], device='cuda:0')" + }, + "26": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-3.6163e-16, 3.6170e-16], device='cuda:0')", + "exp_avg_sq": "tensor([1.4335e-14, 1.4335e-14], device='cuda:0')" + }, + "27": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[-2.5204e-14, 3.4704e-14, -5.6211e-15, ..., 1.7797e-14,\n 3.7520e-14, -2.7264e-14],\n [-4.2949e-14, 4.6157e-14, -7.0961e-15, ..., -1.5333e-14,\n -5.3122e-14, -2.4553e-14],\n [-2.7767e-14, 3.6270e-14, -5.6400e-14, ..., -8.4140e-15,\n 6.5663e-14, -2.1596e-14],\n ...,\n [-2.8089e-14, 4.2068e-14, -5.2308e-14, ..., 2.3932e-14,\n -5.5932e-14, 3.7863e-14],\n [-3.5779e-13, -2.2784e-13, 6.1288e-13, ..., -6.1276e-13,\n -6.6585e-13, 2.6354e-13],\n [-6.0517e-15, 3.9284e-14, -3.2724e-14, ..., -9.7258e-14,\n -2.1441e-14, 1.1005e-14]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.0374e-18, 6.3347e-19, 1.4484e-16, ..., 1.0935e-16, 3.8091e-16,\n 5.4716e-17],\n [1.3867e-17, 1.3837e-18, 3.8954e-17, ..., 1.6249e-16, 3.2435e-16,\n 4.8428e-17],\n [6.6638e-17, 3.1123e-17, 7.4031e-17, ..., 7.9458e-16, 9.0677e-16,\n 2.8607e-16],\n ...,\n [7.0778e-17, 3.5206e-19, 3.9682e-17, ..., 3.8165e-16, 1.2553e-17,\n 7.1599e-17],\n [4.0367e-17, 1.6566e-18, 1.8764e-16, ..., 3.6970e-16, 3.0857e-16,\n 1.2482e-16],\n [8.4597e-17, 3.1993e-17, 7.3856e-17, ..., 1.1849e-15, 5.7379e-16,\n 5.9958e-16]], device='cuda:0')" + }, + "28": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-3.6169e-14, 1.0289e-13, -6.0299e-13, -1.8981e-13, 5.4141e-13,\n -4.0647e-13, 2.0130e-14, 1.8814e-12, -1.1430e-13, 4.5350e-14,\n 2.4320e-13, -9.5235e-13, 2.0156e-14, 2.2495e-13, -1.7217e-14,\n 2.6653e-12, 5.3248e-14, -2.9409e-13, -7.9461e-13, -1.0039e-12,\n 3.3317e-13, 1.6545e-13, 1.0854e-12, 6.1772e-12, -3.0234e-12,\n -5.5455e-13, -2.5480e-13, 7.8075e-14, -7.0159e-14, 7.2959e-14,\n -4.6891e-12, 9.3317e-14, 1.5942e-12, -3.7310e-13, -1.8459e-13,\n 1.6147e-13, -1.9749e-14, -9.9291e-14, -2.2485e-13, 2.3470e-13,\n -8.1161e-13, -2.1130e-13, -3.9155e-13, 1.1839e-13, -3.4691e-12,\n 3.7379e-13, -2.7442e-14, 3.0354e-13, -5.1112e-13, -9.3044e-13,\n -2.2422e-13, -1.4909e-12, 1.5636e-13, -1.7662e-13, 1.4754e-12,\n -4.0153e-13, -1.2163e-13, -2.3388e-14, 5.8687e-13, 2.1665e-13,\n 1.9773e-12, 2.2084e-13, 1.6568e-12, -1.8374e-13], device='cuda:0')", + "exp_avg_sq": "tensor([1.0609e-13, 1.7296e-13, 3.7814e-13, 5.4069e-14, 2.9513e-14, 1.8963e-15,\n 1.2010e-14, 8.3913e-14, 1.5384e-13, 2.1733e-14, 2.9034e-13, 7.1064e-14,\n 2.2839e-14, 3.7428e-15, 1.8268e-14, 4.1524e-13, 2.6002e-15, 1.7195e-14,\n 1.4272e-13, 1.3323e-14, 1.3361e-14, 9.1183e-14, 5.3397e-13, 5.1000e-14,\n 3.1502e-14, 1.9748e-14, 7.7149e-15, 2.1414e-13, 4.9377e-14, 2.9689e-14,\n 1.0781e-14, 3.7313e-14, 1.6215e-14, 1.4935e-15, 3.1934e-14, 1.5184e-13,\n 3.4939e-14, 8.4940e-14, 4.7416e-14, 1.2044e-13, 3.1667e-14, 4.0529e-13,\n 1.2754e-13, 6.3950e-15, 4.3242e-13, 2.2195e-13, 5.8192e-14, 2.6172e-13,\n 3.7369e-14, 2.7662e-14, 1.9809e-13, 1.5117e-13, 5.8123e-15, 1.5893e-13,\n 6.8583e-13, 1.0717e-14, 2.5019e-14, 3.0170e-16, 7.2068e-14, 8.2023e-16,\n 1.0035e-13, 1.2380e-13, 3.2694e-13, 5.9382e-13], device='cuda:0')" + }, + "29": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-5.3272e-14, -4.3366e-14, 3.7305e-14, 4.4716e-15, 1.2619e-15,\n -1.8236e-13, 6.6722e-15, 5.5679e-12, -1.7362e-14, -9.9091e-16,\n -2.1610e-14, 1.8129e-12, 5.6494e-15, 1.0912e-13, 3.1676e-15,\n 6.9304e-12, 5.5440e-14, 2.1469e-17, 3.1641e-12, 2.0154e-12,\n 4.1950e-14, 3.7158e-12, 5.0224e-12, 8.7634e-12, -1.6320e-12,\n 1.4068e-12, 2.5177e-14, -5.8981e-14, 2.6077e-12, -2.4457e-14,\n -1.2476e-12, -3.3973e-14, 3.2149e-12, -8.6120e-14, -8.5382e-16,\n -3.6520e-14, 2.1322e-12, -1.3518e-14, -2.4497e-15, -3.3430e-14,\n 2.3932e-12, 4.8853e-15, 1.9798e-12, 5.8800e-14, -5.5601e-13,\n -7.7868e-14, 3.5488e-12, 4.9541e-12, 2.7225e-12, 1.2942e-12,\n 3.6162e-15, 1.9994e-12, 5.6805e-14, 4.1279e-12, 5.6785e-12,\n 5.0421e-14, 4.9135e-15, 1.2597e-14, 3.6953e-12, 5.3937e-14,\n 5.4972e-12, -1.0984e-13, 4.9303e-12, -6.5067e-15], device='cuda:0')", + "exp_avg_sq": "tensor([7.2346e-16, 1.0866e-15, 2.5419e-15, 2.0253e-16, 9.5319e-17, 4.1152e-17,\n 3.4620e-17, 6.2158e-16, 7.0231e-16, 8.1045e-17, 1.3543e-15, 4.1600e-16,\n 6.2860e-17, 4.8038e-20, 5.0103e-17, 3.7612e-15, 2.9689e-19, 3.2420e-17,\n 1.3542e-15, 2.4923e-16, 2.3428e-17, 5.9962e-16, 4.1064e-15, 4.0367e-16,\n 2.9750e-16, 2.2165e-16, 4.6924e-17, 1.0460e-15, 5.5817e-16, 7.1883e-17,\n 1.0837e-16, 1.1729e-16, 2.6316e-16, 1.1203e-16, 1.2148e-16, 9.9117e-16,\n 5.7909e-16, 3.2497e-16, 1.7608e-16, 5.4750e-16, 6.3472e-16, 2.1518e-15,\n 3.2276e-15, 3.8050e-18, 3.9800e-15, 1.5915e-15, 5.9779e-16, 2.1043e-15,\n 6.6478e-16, 4.0716e-16, 1.4950e-15, 1.7789e-15, 3.1159e-18, 1.2322e-15,\n 9.1119e-15, 7.9798e-17, 1.0895e-16, 1.1105e-17, 8.7478e-16, 6.0264e-18,\n 7.8302e-16, 5.4135e-16, 1.7516e-15, 7.9025e-15], device='cuda:0')" + }, + "30": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 6.4453e-14, 3.8851e-14, -6.6481e-14, -4.2238e-15, -1.1031e-15,\n 4.5435e-13, -7.1246e-15, 4.3611e-12, 1.8351e-14, 8.7884e-16,\n 1.0254e-14, 1.6974e-12, -6.9241e-15, -1.0611e-13, -2.9225e-15,\n 5.1528e-12, -5.9426e-14, -1.6849e-15, 2.4466e-12, 1.7817e-12,\n -4.9561e-14, 2.9722e-12, 3.9094e-12, 7.4138e-12, -8.0316e-13,\n 1.7573e-12, -4.1643e-14, 6.7680e-14, 2.5574e-12, 1.7007e-14,\n -8.5013e-13, 4.0285e-14, 3.7709e-12, 3.6154e-13, 9.1560e-16,\n 4.0108e-14, 2.2304e-12, 7.8622e-15, 4.8990e-15, 3.6120e-14,\n 2.0767e-12, -2.9466e-14, 2.1551e-12, -5.6434e-14, -3.5651e-13,\n 7.5494e-14, 3.0490e-12, 3.3416e-12, 2.3826e-12, 1.6304e-12,\n 9.4871e-15, 1.4830e-12, -6.3731e-14, 2.9174e-12, 4.5097e-12,\n -5.4124e-14, -6.9861e-15, -1.2491e-14, 3.5016e-12, -5.2337e-14,\n 4.5113e-12, 1.1961e-13, 4.0536e-12, 8.8727e-15], device='cuda:0')", + "exp_avg_sq": "tensor([9.0480e-16, 1.3828e-15, 3.1087e-15, 3.4748e-16, 1.4324e-16, 9.6283e-17,\n 5.2193e-17, 1.3377e-15, 1.1675e-15, 1.1710e-16, 2.1854e-15, 1.0068e-15,\n 9.8826e-17, 6.5945e-20, 8.4256e-17, 5.0461e-15, 5.1263e-19, 5.9858e-17,\n 2.1492e-15, 3.5978e-16, 2.2731e-17, 1.2135e-15, 6.4139e-15, 8.9807e-16,\n 5.5997e-16, 4.3243e-16, 4.1339e-17, 1.7691e-15, 8.6072e-16, 1.9251e-16,\n 2.2893e-16, 2.4300e-16, 3.6511e-16, 1.3417e-16, 1.8879e-16, 1.1213e-15,\n 7.1402e-16, 6.3034e-16, 2.9759e-16, 8.1179e-16, 6.8563e-16, 3.1643e-15,\n 2.0741e-15, 4.2932e-18, 5.4095e-15, 1.6900e-15, 1.0019e-15, 3.5493e-15,\n 7.4479e-16, 6.0237e-16, 1.5885e-15, 2.2527e-15, 4.0033e-18, 2.0668e-15,\n 8.3781e-15, 6.9199e-17, 1.7861e-16, 1.8347e-17, 1.1224e-15, 1.1853e-17,\n 1.5587e-15, 8.7898e-16, 4.1665e-15, 4.6755e-15], device='cuda:0')" + }, + "31": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 2.6720e-12, 2.0207e-12, 2.5402e-12, 2.5618e-12, 1.3836e-13,\n 1.4135e-12, 2.3829e-12, -2.1028e-11, 2.6700e-12, 3.0332e-12,\n -3.9196e-14, -6.7135e-12, 2.0921e-12, 2.3082e-12, 2.3616e-12,\n -2.4941e-11, 2.1248e-12, 3.0567e-12, -1.4250e-11, -1.5258e-11,\n 2.4709e-12, -1.1766e-11, -1.7974e-11, -3.0050e-11, 2.7626e-12,\n -8.4064e-12, 1.4771e-12, 2.7652e-12, -1.5662e-11, 3.1204e-12,\n 3.8425e-12, 2.2760e-12, -1.4992e-11, 1.7884e-12, 2.2968e-12,\n 3.3921e-12, -1.5455e-11, 3.0398e-12, 3.1744e-12, 2.3045e-12,\n -1.5826e-11, 2.8764e-12, -1.2837e-11, 2.8958e-12, 1.2510e-12,\n 3.3920e-12, -1.8748e-11, -1.8824e-11, -1.8393e-11, -8.3317e-12,\n 2.3304e-12, -9.8853e-12, 1.6677e-12, -1.9195e-11, -2.0799e-11,\n 2.8213e-12, 1.2077e-12, 2.2315e-12, -1.9689e-11, 1.0164e-12,\n -2.0928e-11, 2.4377e-12, -1.6206e-11, 1.8090e-12],\n [-2.4021e-12, -1.8869e-12, -2.2563e-12, -2.4372e-12, -1.3789e-14,\n -1.7381e-12, -2.0139e-12, 1.9166e-11, -2.4151e-12, -2.7885e-12,\n 2.6768e-13, 6.2014e-12, -1.9013e-12, -2.0931e-12, -2.1598e-12,\n 2.3579e-11, -1.8836e-12, -2.8752e-12, 1.1949e-11, 1.5104e-11,\n -2.2282e-12, 1.1690e-11, 1.5200e-11, 2.7849e-11, -3.1624e-12,\n 7.9605e-12, -1.2600e-12, -2.4731e-12, 1.3167e-11, -2.8593e-12,\n -4.7925e-12, -1.9921e-12, 1.3684e-11, -1.5884e-12, -2.1330e-12,\n -3.0669e-12, 1.4279e-11, -2.8469e-12, -2.9057e-12, -1.9264e-12,\n 1.4311e-11, -2.7140e-12, 1.1605e-11, -2.5754e-12, -2.7190e-12,\n -3.2124e-12, 1.6528e-11, 1.7066e-11, 1.6946e-11, 7.0475e-12,\n -1.9886e-12, 9.8612e-12, -1.5002e-12, 1.7830e-11, 1.9175e-11,\n -2.4784e-12, -1.0037e-12, -1.8877e-12, 1.7793e-11, -7.5550e-13,\n 1.8959e-11, -2.1126e-12, 1.4156e-11, -1.6553e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6058e-14, 5.1692e-14, 8.1956e-14, 3.1864e-14, 2.8879e-14, 4.2743e-15,\n 5.0346e-15, 6.5032e-15, 2.0356e-14, 1.5858e-14, 1.3179e-14, 3.9080e-15,\n 1.5060e-14, 5.1624e-14, 1.6014e-14, 5.6232e-14, 1.8274e-14, 5.7636e-15,\n 2.7551e-14, 5.0069e-14, 1.3498e-13, 3.1449e-15, 5.5891e-14, 3.0292e-15,\n 1.1054e-16, 1.4882e-14, 1.9072e-17, 2.2089e-14, 2.6537e-14, 1.6011e-15,\n 6.1990e-16, 7.1349e-16, 5.5241e-15, 8.7860e-14, 1.0603e-14, 8.0095e-14,\n 8.6179e-14, 3.3051e-15, 5.8936e-15, 3.9352e-14, 7.9252e-14, 4.7826e-14,\n 3.5164e-13, 6.5164e-14, 6.9615e-14, 1.0146e-13, 2.6607e-14, 3.3691e-14,\n 8.5106e-14, 2.8791e-14, 5.6809e-14, 7.8483e-14, 2.1037e-14, 3.7171e-14,\n 1.7241e-13, 8.8780e-16, 4.1788e-15, 3.3435e-14, 4.7011e-14, 1.1367e-14,\n 7.9485e-15, 1.0686e-15, 1.5470e-14, 3.4954e-13],\n [1.6058e-14, 5.1692e-14, 8.1956e-14, 3.1864e-14, 2.8879e-14, 4.2743e-15,\n 5.0346e-15, 6.5031e-15, 2.0356e-14, 1.5858e-14, 1.3179e-14, 3.9080e-15,\n 1.5060e-14, 5.1624e-14, 1.6014e-14, 5.6232e-14, 1.8274e-14, 5.7636e-15,\n 2.7551e-14, 5.0069e-14, 1.3498e-13, 3.1449e-15, 5.5891e-14, 3.0291e-15,\n 1.1048e-16, 1.4882e-14, 1.9071e-17, 2.2089e-14, 2.6537e-14, 1.6011e-15,\n 6.1985e-16, 7.1349e-16, 5.5241e-15, 8.7860e-14, 1.0603e-14, 8.0095e-14,\n 8.6179e-14, 3.3050e-15, 5.8936e-15, 3.9353e-14, 7.9253e-14, 4.7826e-14,\n 3.5164e-13, 6.5164e-14, 6.9615e-14, 1.0146e-13, 2.6607e-14, 3.3691e-14,\n 8.5106e-14, 2.8791e-14, 5.6809e-14, 7.8483e-14, 2.1037e-14, 3.7171e-14,\n 1.7241e-13, 8.8780e-16, 4.1788e-15, 3.3435e-14, 4.7011e-14, 1.1367e-14,\n 7.9485e-15, 1.0686e-15, 1.5470e-14, 3.4954e-13]], device='cuda:0')" + }, + "32": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-1.3487e-11, 1.2078e-11], device='cuda:0')", + "exp_avg_sq": "tensor([3.6936e-13, 3.6937e-13], device='cuda:0')" + }, + "33": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 5.7934e-19, -1.7138e-19, 1.2886e-19, ..., -5.0986e-19,\n -1.2757e-19, -5.4766e-19],\n [ 8.4170e-20, -2.6625e-21, -3.9340e-22, ..., 8.2871e-22,\n 2.8282e-20, 2.6189e-21],\n [ 1.8901e-20, 6.8378e-20, -1.7573e-19, ..., 4.6638e-20,\n 8.2963e-20, 3.3176e-19],\n ...,\n [-6.3728e-20, 6.7987e-22, -2.5000e-20, ..., -2.3934e-20,\n -6.5642e-21, -2.7916e-20],\n [-2.4640e-20, 3.2354e-21, -1.8434e-20, ..., -1.9826e-20,\n -8.1959e-21, -1.2469e-20],\n [ 5.2499e-20, 3.2917e-20, 3.9399e-20, ..., 1.1308e-19,\n 1.0773e-19, 6.0176e-20]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.0804e-20, 1.2283e-17, 2.6776e-17, ..., 6.7426e-19, 1.6515e-17,\n 2.2687e-17],\n [1.3846e-19, 3.8189e-18, 2.0920e-18, ..., 7.8856e-18, 3.4072e-18,\n 7.0064e-18],\n [1.5663e-19, 3.6839e-18, 2.4685e-19, ..., 5.9571e-18, 9.5751e-18,\n 1.3673e-17],\n ...,\n [4.8517e-18, 6.7030e-17, 1.3550e-17, ..., 1.5019e-16, 5.2792e-17,\n 9.1324e-17],\n [1.2639e-21, 4.2060e-19, 1.9402e-19, ..., 7.2299e-19, 3.0245e-20,\n 1.7678e-19],\n [3.8059e-22, 1.3099e-20, 2.0664e-19, ..., 1.3435e-17, 2.4628e-18,\n 4.9536e-18]], device='cuda:0')" + }, + "34": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 1.1131e-18, 5.0481e-19, -9.2184e-19, 5.6620e-19, 1.8084e-19,\n 1.6042e-20, -3.1241e-19, 2.1210e-19, 4.8397e-19, -3.8416e-19,\n -8.1343e-19, -1.6239e-19, 2.1399e-19, 3.2727e-19, 4.2851e-19,\n 7.4688e-19, 3.2286e-20, -2.8029e-18, -1.5378e-18, -5.0781e-19,\n -1.4898e-19, -1.0285e-19, 1.5916e-19, -1.4715e-19, 9.4502e-20,\n -1.2318e-19, -1.1764e-19, 1.3701e-19, -7.6573e-19, 6.8974e-20,\n 2.3956e-20, -1.4674e-19, -4.8830e-20, -7.6297e-19, 3.8352e-18,\n -1.8463e-18, 4.2956e-19, 5.9136e-19, 3.7358e-19, 1.5341e-19,\n 1.8682e-18, -1.6293e-19, -8.3748e-20, -8.9230e-20, 2.4070e-19,\n -2.3322e-19, 3.6410e-19, -1.1695e-19, -1.4705e-19, 2.4005e-19,\n 1.4025e-18, -9.2040e-20, -4.5892e-19, -4.6706e-19, 1.1836e-19,\n 2.7325e-20, -2.6730e-19, -1.1237e-21, -2.8169e-19, 6.8745e-21,\n 6.7770e-20, -1.0719e-19, -1.5550e-19, -7.1153e-19], device='cuda:0')", + "exp_avg_sq": "tensor([2.7658e-14, 4.9374e-15, 8.3114e-15, 6.4086e-15, 7.5823e-15, 5.4417e-15,\n 1.4004e-18, 2.7126e-15, 4.3065e-15, 1.9898e-14, 3.8738e-15, 1.5638e-16,\n 3.1962e-15, 1.8707e-14, 1.2226e-15, 2.1753e-15, 1.1599e-14, 1.4400e-14,\n 1.4246e-14, 2.3575e-17, 8.0213e-15, 1.3492e-15, 8.2092e-16, 9.9222e-16,\n 1.0961e-14, 9.1651e-17, 2.2378e-14, 2.7371e-14, 2.5839e-16, 1.0082e-14,\n 4.8182e-15, 3.8992e-18, 9.7005e-17, 9.5460e-15, 1.9271e-14, 6.2950e-15,\n 1.3217e-14, 4.7193e-15, 1.2300e-14, 2.5332e-14, 3.1292e-14, 2.1878e-15,\n 6.5634e-16, 1.7504e-14, 7.6123e-17, 9.3595e-16, 7.8405e-17, 2.5242e-15,\n 8.1194e-16, 3.0354e-14, 3.2066e-14, 1.2354e-14, 4.9153e-15, 1.3477e-15,\n 9.1152e-16, 1.3472e-15, 6.2834e-16, 8.4538e-15, 7.8528e-16, 3.2457e-15,\n 4.7842e-15, 6.0542e-14, 2.9682e-16, 2.3881e-15], device='cuda:0')" + }, + "35": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 9.9661e-19, -2.1016e-20, -2.4377e-19, 4.0205e-19, -3.1805e-20,\n 9.8500e-19, -6.5469e-20, 6.4744e-19, -9.0810e-19, 6.6865e-19,\n 4.4663e-19, -3.6106e-21, -8.2045e-21, -8.7370e-20, 1.8621e-20,\n 7.8492e-19, -5.2640e-21, -1.1770e-18, -1.5340e-19, 1.8616e-19,\n 1.5734e-20, 2.0623e-20, -1.6969e-19, -2.1420e-20, -1.7828e-20,\n -2.3522e-20, 1.9879e-22, -1.2057e-21, 1.2050e-19, 7.4763e-19,\n 1.9539e-18, 1.7698e-19, -5.3080e-21, -7.4934e-19, 3.5521e-18,\n -6.7298e-19, 4.6366e-19, 3.5811e-19, 5.0365e-19, -6.3643e-20,\n 2.6267e-18, 3.6446e-21, 2.5966e-22, -4.1349e-21, 9.2425e-21,\n -1.3853e-20, 6.1613e-20, 8.0730e-22, -4.7147e-21, -4.4052e-20,\n 2.0272e-18, -1.2351e-20, -2.7078e-19, 9.2975e-20, 2.1795e-22,\n 4.0322e-21, 1.1797e-19, -9.8619e-21, 3.6168e-19, -1.0286e-21,\n -1.0216e-21, 6.1910e-20, -2.5887e-20, -5.0513e-19], device='cuda:0')", + "exp_avg_sq": "tensor([1.3915e-16, 4.3315e-17, 6.6466e-17, 4.4640e-17, 7.0000e-17, 3.5917e-17,\n 2.9480e-19, 2.3729e-17, 2.3552e-17, 1.9210e-16, 2.8271e-17, 1.1668e-19,\n 1.3998e-17, 6.3442e-17, 3.8330e-18, 1.0372e-17, 1.3845e-16, 1.0309e-16,\n 7.1771e-17, 1.9114e-18, 6.5267e-17, 6.7509e-18, 7.0050e-18, 2.4248e-18,\n 5.6034e-17, 8.2974e-21, 1.4699e-16, 1.7898e-16, 2.4275e-18, 6.8619e-17,\n 4.4690e-17, 1.9958e-19, 1.9863e-21, 6.6016e-17, 8.2040e-17, 5.1884e-17,\n 8.3145e-17, 4.1370e-17, 6.1343e-17, 1.6861e-16, 1.2681e-16, 9.9268e-18,\n 1.3049e-18, 7.6899e-17, 1.4045e-18, 1.9614e-18, 8.8194e-21, 6.0706e-18,\n 2.7408e-18, 1.9224e-16, 3.0583e-16, 1.0636e-16, 2.9545e-17, 1.7582e-17,\n 2.3876e-18, 4.0946e-18, 7.5410e-18, 7.4953e-17, 5.8458e-18, 1.4997e-17,\n 2.9075e-17, 6.4098e-16, 2.6911e-19, 1.5208e-17], device='cuda:0')" + }, + "36": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 5.8688e-19, 2.0744e-20, -2.6743e-19, 6.0979e-19, 2.6743e-20,\n 6.0045e-19, -1.8104e-19, 6.6152e-19, 5.6943e-19, 4.3572e-19,\n -1.1052e-20, 1.1716e-20, 7.3054e-21, 1.0498e-19, -1.6959e-20,\n 8.6361e-19, 6.0878e-21, -1.6662e-18, -5.4280e-19, 2.4321e-19,\n -1.1233e-20, -2.0476e-20, 3.2992e-19, 2.1884e-20, 1.4385e-20,\n 2.0964e-20, 6.1467e-21, 6.6375e-21, -3.3622e-20, 1.3602e-19,\n 8.9755e-19, 1.9043e-19, 1.0994e-22, -4.5822e-19, 3.4274e-18,\n -8.1503e-19, 6.0929e-19, 7.1427e-19, 4.9253e-19, 4.8168e-20,\n 2.1334e-18, -2.9425e-21, 2.3119e-22, 1.5791e-20, 3.6549e-19,\n 1.1382e-20, -5.9930e-20, -5.3505e-22, 4.9253e-21, 4.5402e-20,\n 1.7525e-18, 1.9240e-20, -5.7267e-20, 6.6913e-20, -9.8785e-23,\n -3.5523e-21, 1.5507e-19, 1.0302e-20, 2.5986e-19, 6.3628e-22,\n 1.0378e-21, -8.7557e-20, 2.0859e-20, -3.8077e-19], device='cuda:0')", + "exp_avg_sq": "tensor([2.5559e-16, 3.4847e-17, 9.0876e-17, 7.3771e-17, 6.2652e-17, 6.2941e-17,\n 6.0171e-19, 3.8278e-17, 5.1525e-17, 2.1362e-16, 4.4283e-17, 2.4362e-19,\n 1.8149e-17, 1.4209e-16, 4.9060e-18, 3.0582e-17, 7.8740e-17, 1.5278e-16,\n 1.4616e-16, 2.4439e-18, 6.0659e-17, 5.6190e-18, 1.3791e-17, 3.6975e-18,\n 7.4605e-17, 1.2181e-20, 1.7062e-16, 2.1519e-16, 5.7308e-18, 1.2619e-16,\n 5.5015e-17, 4.4050e-19, 4.8784e-21, 1.0073e-16, 1.8889e-16, 7.3651e-17,\n 1.4960e-16, 5.5247e-17, 1.2851e-16, 2.2260e-16, 3.0956e-16, 1.3295e-17,\n 2.3457e-18, 1.2678e-16, 2.8921e-18, 4.0494e-18, 1.7327e-20, 1.3508e-17,\n 3.6666e-18, 2.3019e-16, 3.2095e-16, 9.1462e-17, 5.4965e-17, 1.9085e-17,\n 4.6989e-18, 7.7810e-18, 1.0641e-17, 5.9273e-17, 1.0844e-17, 2.2177e-17,\n 3.1772e-17, 5.1922e-16, 5.5943e-19, 3.3337e-17], device='cuda:0')" + }, + "37": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[-2.9144e-18, 2.0982e-18, 1.1956e-18, -1.9959e-18, 6.5789e-19,\n -4.3934e-18, 8.2661e-19, -3.3738e-18, 3.9443e-18, -2.7240e-18,\n -2.8009e-18, 6.8903e-19, -1.9443e-19, -1.9012e-19, 4.4658e-19,\n -3.3404e-18, 1.7308e-18, 4.1371e-18, 3.9895e-19, -2.4124e-18,\n 3.3560e-19, 6.5701e-19, 8.2021e-19, -2.8923e-20, 5.3004e-19,\n -1.7862e-19, 1.2093e-18, -6.3706e-19, -1.2790e-18, -2.9044e-18,\n -7.5983e-18, -1.6905e-18, 5.5297e-19, 3.3316e-18, -1.1498e-17,\n 3.3316e-18, -1.9502e-18, -1.8090e-18, -1.5704e-18, 3.6749e-19,\n -8.1269e-18, -1.4327e-19, 4.8693e-19, 7.9681e-19, -2.1165e-19,\n 3.6449e-19, -1.7587e-19, 4.7946e-19, -3.1402e-20, -3.6133e-19,\n -7.5711e-18, 1.6378e-18, 1.6431e-18, -8.8147e-19, -1.6492e-19,\n -8.2240e-20, -1.0013e-18, 2.2056e-18, -2.3474e-18, 2.1456e-19,\n 1.3106e-19, 2.7217e-19, 2.0754e-19, 3.3600e-18],\n [ 2.8492e-18, -2.0762e-18, -1.2703e-18, 1.9397e-18, -6.5079e-19,\n 4.2757e-18, -8.1889e-19, 3.3195e-18, -3.9646e-18, 2.6210e-18,\n 2.7169e-18, -6.7017e-19, 1.9081e-19, 1.9802e-19, -4.3917e-19,\n 3.3196e-18, -1.7302e-18, -4.2453e-18, -4.7641e-19, 2.3712e-18,\n -3.2547e-19, -6.4795e-19, -8.8703e-19, 3.0016e-20, -5.1761e-19,\n 2.0289e-19, -1.1904e-18, 6.4159e-19, 1.2605e-18, 2.8710e-18,\n 7.5531e-18, 1.7043e-18, -5.3892e-19, -3.4235e-18, 1.1418e-17,\n -3.3954e-18, 1.8485e-18, 1.6478e-18, 1.4496e-18, -3.5476e-19,\n 8.0796e-18, 1.4778e-19, -4.7732e-19, -7.7567e-19, 1.6523e-19,\n -3.6112e-19, 1.9110e-19, -4.8251e-19, 4.3565e-20, 3.8197e-19,\n 7.4815e-18, -1.6245e-18, -1.7417e-18, 8.0404e-19, 1.7226e-19,\n 9.0845e-20, 9.6765e-19, -2.1947e-18, 2.2710e-18, -2.0774e-19,\n -1.2251e-19, -2.7090e-19, -1.9684e-19, -3.3731e-18]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.2044e-16, 5.5111e-15, 2.6097e-15, 2.0700e-15, 3.2575e-15, 5.7931e-16,\n 8.7195e-16, 6.0751e-16, 7.3680e-17, 4.4686e-15, 1.2756e-15, 5.1710e-16,\n 1.2927e-15, 9.7241e-16, 4.8126e-16, 1.3215e-17, 2.3827e-14, 1.3348e-15,\n 1.1990e-15, 3.3623e-15, 6.4004e-15, 9.6993e-15, 7.7568e-17, 3.0487e-15,\n 3.0518e-15, 1.5514e-16, 5.5691e-15, 5.8335e-15, 9.1814e-16, 5.1436e-16,\n 2.6553e-16, 3.4226e-16, 1.8789e-16, 1.4290e-15, 6.4980e-16, 2.5058e-15,\n 1.7562e-15, 2.7165e-15, 3.6266e-16, 3.2905e-15, 1.1704e-15, 1.0657e-15,\n 5.3056e-17, 1.7179e-15, 8.5521e-16, 4.1082e-16, 1.0679e-16, 2.5114e-16,\n 1.2440e-16, 3.5572e-15, 6.9128e-15, 5.5243e-15, 1.3592e-15, 5.6017e-15,\n 5.0000e-17, 6.9609e-16, 3.9454e-15, 1.0208e-14, 1.6883e-16, 1.2201e-15,\n 5.3977e-15, 9.5182e-15, 1.7154e-16, 1.0893e-15],\n [7.2044e-16, 5.5111e-15, 2.6097e-15, 2.0700e-15, 3.2575e-15, 5.7930e-16,\n 8.7195e-16, 6.0751e-16, 7.3681e-17, 4.4686e-15, 1.2756e-15, 5.1710e-16,\n 1.2927e-15, 9.7241e-16, 4.8126e-16, 1.3214e-17, 2.3827e-14, 1.3348e-15,\n 1.1990e-15, 3.3623e-15, 6.4004e-15, 9.6993e-15, 7.7568e-17, 3.0487e-15,\n 3.0518e-15, 1.5514e-16, 5.5691e-15, 5.8335e-15, 9.1814e-16, 5.1436e-16,\n 2.6553e-16, 3.4226e-16, 1.8789e-16, 1.4290e-15, 6.4980e-16, 2.5058e-15,\n 1.7562e-15, 2.7165e-15, 3.6266e-16, 3.2905e-15, 1.1704e-15, 1.0657e-15,\n 5.3056e-17, 1.7179e-15, 8.5521e-16, 4.1082e-16, 1.0679e-16, 2.5114e-16,\n 1.2440e-16, 3.5572e-15, 6.9128e-15, 5.5243e-15, 1.3592e-15, 5.6017e-15,\n 5.0000e-17, 6.9609e-16, 3.9454e-15, 1.0208e-14, 1.6883e-16, 1.2201e-15,\n 5.3977e-15, 9.5182e-15, 1.7154e-16, 1.0893e-15]], device='cuda:0')" + }, + "38": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-1.8127e-18, 1.7502e-18], device='cuda:0')", + "exp_avg_sq": "tensor([2.1794e-14, 2.1794e-14], device='cuda:0')" + }, + "39": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[-5.6132e-21, 4.4211e-21, 2.4091e-20, ..., 1.4257e-20,\n -6.9126e-21, 1.3636e-20],\n [-1.2977e-20, 4.3374e-22, 1.5941e-21, ..., 1.0343e-22,\n -1.0384e-20, -6.7490e-21],\n [ 9.4049e-22, 5.8010e-23, 7.5760e-22, ..., 1.1957e-22,\n 5.3800e-23, -1.8125e-21],\n ...,\n [ 2.3185e-21, -1.6649e-21, -1.1668e-20, ..., -1.1707e-20,\n 1.0002e-21, -8.4010e-21],\n [ 6.5097e-22, -1.9631e-21, -5.7307e-21, ..., -9.9154e-21,\n 2.5900e-21, -8.0063e-21],\n [ 9.1269e-22, -4.6966e-22, -1.0008e-21, ..., -8.5433e-22,\n 2.9950e-22, -2.2507e-21]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2734e-18, 2.2038e-18, 1.5667e-17, ..., 2.2533e-16, 2.8854e-16,\n 1.0028e-16],\n [1.8754e-19, 1.3454e-18, 1.3449e-17, ..., 1.6124e-17, 5.6246e-18,\n 2.3441e-17],\n [2.4395e-19, 7.9951e-21, 4.2887e-18, ..., 1.1904e-17, 1.8536e-17,\n 1.1030e-17],\n ...,\n [4.2543e-21, 1.0595e-20, 1.0144e-19, ..., 9.7203e-21, 3.8194e-19,\n 3.4246e-20],\n [1.0187e-20, 1.4764e-19, 1.0406e-18, ..., 1.2464e-19, 5.1245e-18,\n 4.5900e-19],\n [1.1551e-22, 3.1944e-20, 1.4040e-19, ..., 7.6019e-20, 4.0253e-19,\n 1.7348e-22]], device='cuda:0')" + }, + "40": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-3.8246e-20, -4.2115e-20, 1.0419e-20, -8.1568e-21, 9.2073e-21,\n 7.9266e-20, 1.8662e-21, 1.1819e-20, -3.7885e-20, 3.1789e-21,\n 8.1330e-21, 1.3202e-20, 5.2351e-21, 9.7800e-21, -7.3046e-21,\n 5.0584e-20, 6.6648e-20, -8.8784e-21, -7.6630e-20, -5.2726e-20,\n -3.7934e-21, -8.9634e-21, 1.2701e-19, -8.8659e-21, -9.3213e-21,\n 3.3050e-21, -1.2216e-20, -2.8529e-21, -5.4059e-21, -1.8263e-21,\n 9.1103e-21, -8.2421e-20, -6.9700e-21, 1.8829e-20, -5.2701e-20,\n 5.1754e-21, 3.8493e-21, -6.2811e-21, 6.3470e-21, 7.7832e-21,\n 4.6670e-21, 3.7635e-21, 2.2829e-21, 4.1004e-20, -4.9086e-20,\n 2.7873e-21, 1.5884e-20, -1.2566e-21, 2.3190e-20, 4.0828e-21,\n -1.2059e-20, -6.2574e-20, 3.2947e-20, -7.6236e-21, -2.5033e-21,\n 5.1256e-20, -1.4372e-20, 3.3839e-21, -7.6202e-22, -9.2013e-20,\n 5.7446e-21, -9.4748e-21, 7.7125e-20, 6.4162e-21], device='cuda:0')", + "exp_avg_sq": "tensor([1.5094e-13, 9.6615e-15, 1.2947e-14, 1.1505e-16, 2.2570e-14, 6.4765e-14,\n 1.1865e-14, 1.5779e-14, 4.6385e-16, 8.2900e-14, 1.6586e-13, 4.2312e-15,\n 5.0908e-15, 1.3613e-13, 5.4986e-14, 1.5347e-15, 9.9865e-15, 6.7105e-14,\n 1.8974e-13, 6.1084e-16, 2.7253e-14, 7.6193e-15, 2.0449e-14, 2.0141e-14,\n 9.4062e-15, 1.7308e-14, 5.5124e-14, 1.0684e-13, 1.0340e-13, 1.2806e-15,\n 1.6169e-14, 8.5532e-15, 2.7740e-15, 6.5473e-14, 4.3097e-15, 8.5555e-15,\n 6.4888e-15, 5.5468e-14, 1.4047e-15, 1.3411e-14, 1.9056e-16, 3.0966e-14,\n 5.2948e-18, 4.0033e-14, 3.6339e-15, 4.5311e-14, 4.3456e-15, 3.2557e-14,\n 7.4900e-14, 4.9349e-16, 2.6624e-14, 1.2127e-14, 1.7738e-15, 4.2666e-14,\n 1.8817e-15, 5.0067e-14, 1.7971e-13, 5.7127e-14, 5.5728e-15, 2.6446e-15,\n 7.8529e-15, 2.3775e-16, 2.2987e-15, 1.2148e-16], device='cuda:0')" + }, + "41": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 6.4969e-20, -5.8673e-21, -8.9840e-23, 6.7857e-21, -2.9491e-21,\n 9.5604e-20, -3.6065e-23, -3.4610e-21, -1.8469e-20, -3.2374e-21,\n -4.2122e-21, 8.5127e-22, -3.0801e-22, -6.5896e-21, -8.2029e-22,\n 5.9479e-20, 9.6366e-20, 1.7621e-19, 7.1673e-20, -9.5632e-21,\n -1.0516e-21, 4.8475e-23, 1.0754e-19, -5.3290e-22, -2.4076e-22,\n 4.1511e-20, 4.0496e-20, -2.2398e-22, 1.1044e-19, 2.6271e-20,\n -2.2966e-21, -2.8639e-20, -1.1212e-21, 9.9387e-20, 2.0987e-20,\n 1.8147e-22, -5.8625e-22, 6.7725e-21, 1.5083e-21, -1.2072e-21,\n -1.8737e-20, 7.0744e-20, 1.8749e-20, 1.3432e-19, -4.9629e-20,\n -5.2610e-21, 6.4393e-20, -4.3809e-21, 8.3101e-20, 2.0049e-21,\n 8.0838e-20, -1.1886e-20, 6.8331e-20, 1.7000e-20, 4.2808e-22,\n 8.1468e-20, -1.3539e-21, -4.4641e-21, 9.4334e-21, -7.3445e-20,\n -4.0786e-23, 1.8466e-20, 7.6353e-20, 3.2608e-21], device='cuda:0')", + "exp_avg_sq": "tensor([9.1773e-16, 3.9810e-17, 1.5608e-16, 2.6837e-21, 2.4668e-16, 5.0629e-16,\n 1.5311e-16, 1.2371e-16, 3.6196e-19, 5.3544e-16, 2.4780e-15, 4.5276e-17,\n 5.0230e-17, 9.7226e-16, 3.9719e-16, 6.7802e-18, 3.9565e-17, 3.3184e-16,\n 1.0516e-15, 5.0226e-20, 3.2025e-16, 7.4816e-17, 1.1261e-16, 1.8136e-16,\n 1.0163e-16, 7.4651e-17, 3.4073e-16, 2.6663e-15, 5.9807e-16, 7.7712e-19,\n 1.2053e-16, 3.4586e-17, 4.2170e-17, 5.9936e-16, 1.4642e-17, 7.6922e-17,\n 3.7805e-17, 3.5423e-16, 2.1868e-18, 1.0675e-16, 3.8093e-19, 9.9130e-17,\n 1.1108e-18, 2.1077e-16, 5.3925e-17, 2.9648e-16, 1.4596e-17, 1.8771e-16,\n 4.4980e-16, 2.0919e-17, 1.1741e-16, 4.4418e-17, 4.1297e-18, 2.8447e-16,\n 1.6660e-17, 3.2348e-16, 1.3258e-15, 3.5020e-16, 1.5898e-17, 4.3617e-18,\n 5.6146e-17, 2.3552e-19, 5.2641e-18, 3.2906e-18], device='cuda:0')" + }, + "42": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 5.6927e-20, 3.6539e-21, 7.1358e-23, 1.1837e-20, 2.1770e-21,\n 1.2939e-19, 4.5204e-23, 3.0430e-21, -1.3175e-20, 1.1951e-21,\n 3.7171e-21, -7.2283e-22, 2.4631e-22, 5.1055e-21, 1.2434e-21,\n 7.0692e-20, 9.1481e-20, 1.1159e-19, 3.7748e-20, -1.4120e-20,\n 1.2134e-21, -6.2393e-23, 1.2937e-19, 6.4841e-22, 3.8602e-22,\n 5.5550e-20, 1.9662e-20, 9.3574e-24, 9.3859e-20, 2.4009e-20,\n 1.9826e-21, -3.2294e-20, 8.2856e-22, 7.3161e-20, 5.2027e-21,\n -1.8539e-22, 5.1881e-22, -5.4321e-21, 1.9908e-20, 1.0810e-21,\n -7.0513e-21, 6.6627e-20, 1.4059e-20, 1.2580e-19, -4.5240e-20,\n 4.5344e-21, 5.6621e-20, 4.3100e-21, 1.0916e-19, -1.6309e-21,\n 5.6115e-20, -1.9291e-20, 7.4214e-20, 2.9996e-20, -3.5692e-22,\n 8.9559e-20, 3.0205e-21, 4.1479e-21, 2.6146e-20, -6.3500e-20,\n 3.9154e-23, 1.7123e-20, 8.9290e-20, -2.3906e-21], device='cuda:0')", + "exp_avg_sq": "tensor([1.2773e-15, 6.4697e-17, 1.9417e-16, 4.8424e-21, 2.9553e-16, 4.5457e-16,\n 1.6275e-16, 2.0620e-16, 5.2622e-19, 9.2480e-16, 1.8653e-15, 6.6493e-17,\n 7.5279e-17, 1.3808e-15, 6.5688e-16, 8.9740e-18, 6.0491e-17, 6.1593e-16,\n 1.5581e-15, 7.8138e-20, 3.5835e-16, 1.1677e-16, 1.2422e-16, 2.5154e-16,\n 1.2687e-16, 1.2467e-16, 4.5729e-16, 1.2723e-15, 8.4804e-16, 9.1662e-19,\n 2.1215e-16, 4.2879e-17, 5.9884e-17, 4.9269e-16, 3.0517e-17, 1.2449e-16,\n 8.5615e-17, 5.8333e-16, 4.2070e-18, 1.7807e-16, 7.5198e-19, 2.2614e-16,\n 2.1150e-18, 3.1635e-16, 3.2837e-17, 5.4849e-16, 1.4855e-17, 3.8241e-16,\n 6.0302e-16, 2.4417e-17, 1.8272e-16, 7.7876e-17, 9.1518e-18, 2.8427e-16,\n 2.3283e-17, 3.7433e-16, 1.9077e-15, 6.2192e-16, 2.3675e-17, 9.3809e-18,\n 1.1508e-16, 4.2171e-19, 8.2417e-18, 7.0542e-18], device='cuda:0')" + }, + "43": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 2.2139e-19, -4.1769e-20, -1.9610e-20, 8.7561e-20, -8.3202e-20,\n 5.3030e-19, -4.4578e-20, -1.1961e-19, -2.6321e-19, 3.4739e-20,\n -3.2045e-20, -7.7217e-20, -3.3189e-20, -4.5152e-20, -6.4137e-20,\n 5.6970e-19, 7.9572e-19, 5.1357e-19, 2.3462e-19, -1.4679e-19,\n -8.1171e-20, -9.2629e-20, 8.5958e-19, -1.0542e-19, -8.0902e-20,\n 2.4841e-19, 1.6171e-19, -3.3071e-20, 4.4291e-19, 3.7055e-19,\n -7.7939e-20, -2.8419e-19, -3.2363e-20, 6.4791e-19, 1.6643e-19,\n 5.8416e-20, -7.1998e-20, 4.2118e-20, 9.8167e-21, -8.7711e-20,\n -2.7398e-19, 4.0672e-19, 2.4280e-19, 7.3721e-19, -5.6095e-19,\n -9.8729e-20, 7.6081e-19, -1.3953e-19, 3.4671e-19, -5.2788e-20,\n 5.4046e-19, -8.7039e-20, 6.5583e-19, 1.0818e-19, -1.4528e-20,\n 4.6588e-19, -1.3038e-19, -8.2876e-20, 1.0416e-19, -8.5050e-19,\n -9.1646e-20, 2.5293e-19, 8.8428e-19, -7.2589e-20],\n [-2.2127e-19, 4.1839e-20, 1.9590e-20, -8.7534e-20, 8.3185e-20,\n -5.3019e-19, 4.4557e-20, 1.1960e-19, 2.6327e-19, -3.4755e-20,\n 3.2026e-20, 7.7196e-20, 3.3175e-20, 4.5136e-20, 6.4115e-20,\n -5.6959e-19, -7.9561e-19, -5.1349e-19, -2.3454e-19, 1.4685e-19,\n 8.1159e-20, 9.2611e-20, -8.5944e-19, 1.0540e-19, 8.0882e-20,\n -2.4830e-19, -1.6161e-19, 3.3052e-20, -4.4283e-19, -3.7048e-19,\n 7.7924e-20, 2.8426e-19, 3.2344e-20, -6.4780e-19, -1.6633e-19,\n -5.8435e-20, 7.1987e-20, -4.2134e-20, -9.7133e-21, 8.7692e-20,\n 2.7403e-19, -4.0665e-19, -2.4280e-19, -7.3708e-19, 5.6099e-19,\n 9.8709e-20, -7.6071e-19, 1.3951e-19, -3.4664e-19, 5.2769e-20,\n -5.4038e-19, 8.7103e-20, -6.5574e-19, -1.0807e-19, 1.4508e-20,\n -4.6581e-19, 1.3036e-19, 8.2859e-20, -1.0412e-19, 8.5058e-19,\n 9.1632e-20, -2.5286e-19, -8.8417e-19, 7.2570e-20]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0708e-14, 1.9408e-15, 2.8283e-14, 6.2240e-16, 1.8019e-14, 3.3760e-14,\n 2.3458e-14, 5.4388e-15, 2.3081e-14, 7.6450e-15, 8.2880e-14, 9.6972e-15,\n 2.8681e-15, 1.3336e-14, 9.1655e-15, 4.5099e-16, 7.9239e-15, 6.7662e-16,\n 9.8570e-15, 1.0977e-14, 1.9970e-14, 1.4021e-14, 2.9455e-14, 1.3475e-14,\n 9.8483e-15, 2.6317e-15, 3.8922e-15, 1.7300e-13, 1.1502e-14, 2.6700e-14,\n 6.5365e-15, 2.3728e-14, 2.2442e-14, 6.8346e-14, 5.7663e-16, 9.6032e-15,\n 2.2914e-15, 5.4948e-15, 2.0684e-15, 1.0344e-14, 5.1243e-15, 4.3423e-15,\n 1.5135e-15, 1.0001e-14, 3.4238e-17, 8.8972e-15, 2.9332e-14, 4.7825e-15,\n 8.6648e-15, 3.2669e-14, 1.0174e-14, 4.4618e-15, 1.7599e-16, 4.8853e-14,\n 1.0215e-17, 2.0759e-14, 1.9073e-14, 7.2594e-15, 1.2242e-14, 5.6964e-15,\n 8.4728e-15, 2.0632e-16, 7.9355e-15, 2.2811e-15],\n [1.0708e-14, 1.9408e-15, 2.8283e-14, 6.2240e-16, 1.8019e-14, 3.3760e-14,\n 2.3458e-14, 5.4388e-15, 2.3081e-14, 7.6450e-15, 8.2880e-14, 9.6972e-15,\n 2.8681e-15, 1.3336e-14, 9.1655e-15, 4.5099e-16, 7.9239e-15, 6.7662e-16,\n 9.8570e-15, 1.0977e-14, 1.9970e-14, 1.4021e-14, 2.9455e-14, 1.3475e-14,\n 9.8483e-15, 2.6317e-15, 3.8922e-15, 1.7300e-13, 1.1502e-14, 2.6700e-14,\n 6.5365e-15, 2.3728e-14, 2.2442e-14, 6.8346e-14, 5.7663e-16, 9.6032e-15,\n 2.2914e-15, 5.4948e-15, 2.0684e-15, 1.0344e-14, 5.1243e-15, 4.3423e-15,\n 1.5135e-15, 1.0001e-14, 3.4238e-17, 8.8972e-15, 2.9332e-14, 4.7825e-15,\n 8.6648e-15, 3.2669e-14, 1.0174e-14, 4.4618e-15, 1.7599e-16, 4.8853e-14,\n 1.0215e-17, 2.0759e-14, 1.9073e-14, 7.2594e-15, 1.2242e-14, 5.6964e-15,\n 8.4728e-15, 2.0632e-16, 7.9355e-15, 2.2811e-15]], device='cuda:0')" + }, + "44": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 4.2074e-19, -4.2073e-19], device='cuda:0')", + "exp_avg_sq": "tensor([1.3389e-13, 1.3389e-13], device='cuda:0')" + }, + "45": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 9.7495e-18, -1.0335e-18, 3.4998e-18, ..., 7.2049e-17,\n -1.3276e-18, 6.6958e-19],\n [ 2.0355e-18, 9.9444e-19, -4.2277e-19, ..., -1.0530e-17,\n 8.6735e-19, 4.5324e-19],\n [-3.7918e-18, -2.9222e-19, -2.9643e-19, ..., -1.5766e-17,\n 1.1760e-19, -9.7701e-19],\n ...,\n [-1.3479e-18, 2.3306e-19, -5.0649e-19, ..., -1.0585e-17,\n 3.7275e-20, 1.6886e-19],\n [-2.5260e-18, -1.9425e-19, -7.5443e-19, ..., -2.5727e-17,\n 3.1627e-19, -6.7078e-20],\n [-3.3267e-18, 9.1903e-22, 6.6754e-19, ..., -4.9453e-17,\n 7.7166e-20, -1.1858e-18]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.0979e-21, 1.0934e-19, 7.5665e-21, ..., 5.2391e-20, 1.0127e-19,\n 1.7481e-20],\n [3.0963e-20, 7.6025e-22, 6.0674e-21, ..., 3.2835e-21, 4.9451e-21,\n 5.3189e-22],\n [1.1031e-21, 5.0159e-22, 6.5406e-22, ..., 1.3620e-20, 3.7468e-21,\n 2.2649e-21],\n ...,\n [9.3957e-22, 5.3818e-21, 1.9168e-22, ..., 8.2449e-21, 1.7028e-21,\n 2.1657e-22],\n [1.8495e-21, 5.3370e-21, 1.4988e-21, ..., 3.8244e-20, 1.2561e-20,\n 1.1849e-22],\n [1.2143e-19, 1.0667e-20, 4.5380e-20, ..., 2.5466e-19, 1.3765e-22,\n 9.0688e-20]], device='cuda:0')" + }, + "46": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 1.3901e-16, -2.6667e-17, -2.8767e-17, 1.2102e-16, -4.0162e-17,\n 7.6033e-17, 1.4816e-16, -7.7348e-19, -1.1599e-16, -1.4401e-17,\n 8.7219e-17, -8.5330e-17, -2.6779e-17, -1.9534e-16, 8.6303e-17,\n 1.3843e-16, -1.2596e-16, -7.7206e-17, -1.7113e-17, -1.9810e-17,\n -1.4199e-17, 5.2170e-17, 2.9813e-17, 4.6547e-17, -1.3636e-16,\n 1.2807e-16, 4.2099e-17, -1.4103e-17, 1.1554e-17, -2.3212e-16,\n 1.8267e-16, -3.7862e-17, -5.7643e-17, 1.1746e-16, -1.2514e-17,\n 2.7574e-17, 1.6080e-16, -2.1574e-17, -2.5755e-17, -1.9344e-16,\n -4.5164e-17, 3.2412e-17, 3.5238e-18, 4.8811e-18, -1.3164e-16,\n 5.9107e-17, 1.5341e-16, -4.0605e-17, -4.9572e-17, -2.2485e-17,\n -2.1845e-17, 1.1487e-16, 9.2200e-17, -3.6072e-17, 9.2349e-17,\n 3.0851e-17, -1.3801e-17, -7.9904e-17, 8.0133e-18, -2.5852e-17,\n -3.8955e-17, -1.1415e-17, -5.1262e-17, -9.8114e-17], device='cuda:0')", + "exp_avg_sq": "tensor([3.4493e-17, 2.0083e-19, 7.6674e-19, 4.2560e-17, 1.8930e-18, 1.8858e-17,\n 5.0430e-17, 3.1798e-18, 1.4412e-16, 1.5576e-17, 2.8870e-17, 1.0721e-17,\n 1.2517e-16, 1.7351e-17, 1.9333e-16, 1.1709e-17, 4.4970e-17, 1.6734e-17,\n 1.0617e-19, 3.7120e-19, 1.0278e-19, 2.4484e-17, 1.3671e-16, 3.8684e-17,\n 5.6444e-18, 6.6298e-17, 2.2608e-18, 1.0127e-17, 2.3241e-18, 2.5741e-18,\n 7.1146e-17, 2.8706e-20, 8.2973e-21, 4.6739e-17, 5.5508e-18, 2.3519e-17,\n 3.2100e-17, 3.5221e-19, 1.7352e-18, 3.9423e-19, 1.5910e-18, 1.8302e-17,\n 3.7884e-18, 1.1725e-19, 7.9438e-17, 2.0229e-16, 4.7598e-17, 1.8526e-18,\n 1.2122e-17, 1.2780e-19, 2.0397e-17, 1.3023e-17, 8.9448e-18, 2.1687e-18,\n 4.8384e-18, 7.4626e-18, 5.4027e-19, 5.0826e-18, 1.7794e-19, 2.6021e-18,\n 1.9894e-19, 1.2105e-18, 3.5835e-18, 3.9135e-17], device='cuda:0')" + }, + "47": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 4.3755e-17, 7.3082e-18, -8.4506e-19, 2.5043e-17, 4.4358e-19,\n 1.3915e-17, 3.1606e-17, 1.8106e-18, -2.4722e-18, 5.2846e-20,\n 5.5128e-17, 5.3254e-18, 9.5225e-18, -1.6855e-17, -1.5844e-18,\n 2.8482e-17, -1.2256e-17, 9.2675e-19, -5.7060e-19, 8.4958e-19,\n 4.1948e-18, 1.8101e-17, -3.2720e-18, -3.1164e-18, -1.2959e-17,\n 1.6443e-16, 5.8940e-17, 3.3032e-19, 3.2265e-18, -3.3305e-17,\n 8.6654e-17, -8.6674e-20, 2.2776e-20, 9.8659e-17, -6.6139e-18,\n 8.7160e-18, 9.9632e-17, 6.1377e-19, -5.0518e-19, -2.7895e-17,\n 2.6811e-21, 1.0183e-16, -3.4107e-18, 1.2101e-17, -1.0297e-17,\n -2.6034e-18, 7.4934e-17, 5.9226e-18, 2.7365e-18, 7.5756e-19,\n 1.7869e-17, 6.2874e-17, 1.0837e-16, -2.5305e-19, 2.1935e-17,\n 4.3467e-18, -1.9096e-18, 4.5200e-18, 3.2510e-18, -7.3081e-18,\n -2.7350e-19, 3.4689e-19, 2.5382e-17, -4.2972e-18], device='cuda:0')", + "exp_avg_sq": "tensor([4.6856e-18, 1.1581e-18, 1.4822e-21, 4.7220e-18, 1.0610e-20, 1.3752e-18,\n 8.5938e-18, 3.4085e-19, 1.1338e-18, 3.7051e-20, 3.2052e-18, 5.6517e-19,\n 1.1924e-17, 6.7835e-19, 1.8429e-18, 1.8374e-19, 2.3563e-19, 1.4842e-19,\n 3.5778e-21, 2.6603e-22, 6.7676e-21, 4.2075e-18, 7.4479e-19, 2.2351e-19,\n 3.0417e-20, 2.9719e-18, 2.2362e-20, 6.7227e-20, 1.8881e-19, 8.1830e-21,\n 1.8517e-18, 1.1774e-20, 6.5961e-21, 4.9042e-18, 5.7916e-19, 1.1520e-18,\n 2.8153e-18, 3.4037e-22, 3.5979e-21, 9.5674e-22, 1.9796e-21, 2.1833e-18,\n 5.3232e-20, 9.8931e-20, 8.2054e-19, 4.5155e-18, 5.3372e-18, 3.6642e-21,\n 4.6213e-20, 5.5850e-19, 8.0009e-19, 1.5593e-19, 2.6314e-19, 1.1840e-20,\n 6.3877e-20, 1.3361e-18, 7.6806e-21, 4.9835e-20, 4.8919e-20, 8.0601e-20,\n 5.2892e-21, 4.5738e-22, 8.9306e-21, 3.0258e-19], device='cuda:0')" + }, + "48": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 7.5253e-17, 1.4063e-18, 5.6159e-18, 6.3476e-17, 5.8131e-18,\n 4.8745e-17, 7.5730e-17, 1.5411e-17, -2.2498e-17, -1.3332e-19,\n 6.2392e-17, 4.4401e-18, 1.4814e-17, -4.1732e-17, 8.4920e-19,\n 7.1700e-17, -1.4538e-17, -9.5865e-18, 1.0169e-17, -1.2149e-18,\n -2.5178e-18, 4.3718e-17, 1.7646e-18, 1.4855e-18, -1.4257e-17,\n 8.9855e-17, 4.9006e-17, -2.8654e-19, 2.3771e-17, -4.5786e-17,\n 8.7271e-17, 9.9374e-19, -7.6433e-20, 7.9534e-17, 1.2332e-17,\n 3.0150e-17, 8.5195e-17, 1.0590e-17, 1.0570e-18, -3.1593e-17,\n 3.4869e-19, 5.5845e-17, 2.0530e-17, -1.1990e-19, -2.2263e-17,\n 5.2790e-19, 8.4371e-17, 1.0822e-17, 4.9146e-18, 1.8372e-18,\n 1.0049e-17, 6.7642e-17, 7.0017e-17, 6.2910e-19, 5.3122e-17,\n 3.3119e-17, 6.6252e-18, 4.0085e-18, 1.8505e-17, 3.4464e-18,\n -6.3721e-19, -1.5002e-18, 1.6906e-17, -1.1268e-17], device='cuda:0')", + "exp_avg_sq": "tensor([5.4391e-18, 7.9887e-19, 1.5251e-21, 4.9588e-18, 1.3469e-20, 2.3496e-18,\n 4.7257e-18, 4.7270e-19, 2.4817e-18, 9.5589e-20, 2.5967e-18, 3.0178e-19,\n 1.6055e-17, 5.5809e-19, 2.5014e-18, 8.0727e-19, 5.5449e-19, 2.2657e-19,\n 7.5900e-21, 1.3575e-21, 2.6965e-20, 2.0925e-18, 1.2777e-18, 3.5310e-19,\n 8.5811e-20, 2.5730e-18, 9.8384e-20, 6.1800e-20, 3.0009e-19, 1.6633e-19,\n 2.9756e-18, 5.0426e-20, 3.4933e-21, 2.2699e-18, 5.0927e-19, 1.7369e-18,\n 3.3350e-18, 2.2493e-22, 9.1867e-21, 2.5717e-21, 6.2311e-21, 1.4554e-18,\n 1.0465e-19, 9.0994e-20, 1.7629e-18, 2.4512e-18, 3.3242e-18, 6.8572e-21,\n 7.7993e-20, 5.9316e-19, 1.8966e-18, 8.1124e-19, 8.4049e-19, 1.5186e-20,\n 5.0175e-19, 1.1346e-18, 1.3566e-20, 3.6851e-20, 5.9154e-20, 6.6369e-19,\n 7.7933e-21, 1.0753e-21, 2.2310e-20, 5.4070e-19], device='cuda:0')" + }, + "49": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 1.9647e-16, -4.1079e-17, -4.1871e-17, 9.9604e-17, 1.7526e-18,\n 8.1822e-17, 1.3811e-16, -6.8709e-18, 1.6499e-19, -2.9993e-18,\n 3.4322e-16, 3.6206e-16, 8.8668e-18, 9.8495e-17, -6.7351e-18,\n 1.2581e-16, 2.1304e-16, -2.3772e-17, -1.3179e-17, -7.4383e-17,\n -5.1290e-17, 1.1593e-16, -3.2551e-17, -2.0043e-17, 2.6236e-16,\n 8.4789e-16, 5.0381e-16, -7.1926e-17, 3.1426e-17, 1.9142e-16,\n 3.3635e-16, -2.7758e-17, -2.7002e-17, 5.4183e-16, -7.0204e-17,\n 3.8673e-17, 3.8820e-16, 3.3832e-18, -6.5170e-17, 2.6820e-16,\n -7.8611e-18, 7.9951e-16, -2.1578e-17, 5.1750e-18, 7.8912e-17,\n -6.2254e-18, 3.5397e-16, 1.3520e-16, 1.0411e-16, -6.3209e-17,\n 8.8172e-18, 3.7869e-16, 7.6476e-16, -6.4976e-17, 1.2278e-16,\n 3.3457e-17, -4.9731e-17, 3.2671e-16, 3.0214e-17, -1.0067e-16,\n -1.6947e-17, -8.1260e-17, 4.4525e-16, 6.7480e-17],\n [-1.9638e-16, 4.1096e-17, 4.1872e-17, -9.9643e-17, -1.7535e-18,\n -8.1825e-17, -1.3817e-16, 6.8280e-18, -1.6125e-19, 2.9941e-18,\n -3.4332e-16, -3.6207e-16, -8.9575e-18, -9.8486e-17, 6.7383e-18,\n -1.2588e-16, -2.1304e-16, 2.3769e-17, 1.3177e-17, 7.4388e-17,\n 5.1297e-17, -1.1592e-16, 3.2553e-17, 2.0047e-17, -2.6237e-16,\n -8.4792e-16, -5.0379e-16, 7.1931e-17, -3.1392e-17, -1.9142e-16,\n -3.3630e-16, 2.7782e-17, 2.7005e-17, -5.4192e-16, 7.0194e-17,\n -3.8744e-17, -3.8814e-16, -3.3762e-18, 6.5155e-17, -2.6820e-16,\n 7.8636e-18, -7.9963e-16, 2.1575e-17, -5.1591e-18, -7.8907e-17,\n 6.2202e-18, -3.5397e-16, -1.3519e-16, -1.0410e-16, 6.3220e-17,\n -8.8844e-18, -3.7867e-16, -7.6486e-16, 6.4979e-17, -1.2280e-16,\n -3.3530e-17, 4.9734e-17, -3.2672e-16, -3.0221e-17, 1.0065e-16,\n 1.6957e-17, 8.1264e-17, -4.4525e-16, -6.7475e-17]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.5251e-18, 1.8291e-18, 1.2497e-17, 5.2513e-18, 8.1510e-18, 1.1254e-17,\n 1.6154e-17, 2.7710e-17, 1.6776e-17, 6.7255e-18, 2.8478e-17, 2.3186e-17,\n 1.4552e-17, 5.3097e-18, 7.1690e-17, 3.1233e-19, 1.0232e-17, 4.6379e-18,\n 7.3020e-20, 8.5368e-18, 1.0585e-17, 7.5925e-17, 4.7154e-17, 5.7136e-18,\n 9.9465e-19, 3.5929e-17, 2.5730e-18, 2.6828e-17, 1.1870e-17, 1.7072e-19,\n 2.6760e-17, 1.1027e-18, 9.4456e-19, 7.8788e-17, 7.0298e-17, 3.2139e-17,\n 2.4793e-18, 2.6625e-17, 8.4326e-19, 5.4210e-19, 6.3240e-19, 7.4042e-17,\n 8.3295e-19, 2.8877e-19, 1.4177e-17, 1.9593e-16, 2.4174e-17, 2.2071e-18,\n 2.2870e-17, 4.0997e-18, 5.8425e-18, 7.4499e-18, 1.3329e-17, 1.4697e-18,\n 1.4413e-19, 1.2213e-17, 1.1593e-17, 1.2839e-17, 2.6948e-18, 1.4210e-18,\n 4.8989e-18, 9.5549e-17, 1.3203e-17, 1.3138e-17],\n [6.5251e-18, 1.8291e-18, 1.2497e-17, 5.2513e-18, 8.1510e-18, 1.1254e-17,\n 1.6154e-17, 2.7711e-17, 1.6776e-17, 6.7255e-18, 2.8478e-17, 2.3186e-17,\n 1.4552e-17, 5.3097e-18, 7.1690e-17, 3.1232e-19, 1.0232e-17, 4.6379e-18,\n 7.3020e-20, 8.5368e-18, 1.0585e-17, 7.5925e-17, 4.7154e-17, 5.7136e-18,\n 9.9465e-19, 3.5929e-17, 2.5730e-18, 2.6828e-17, 1.1870e-17, 1.7072e-19,\n 2.6760e-17, 1.1027e-18, 9.4456e-19, 7.8788e-17, 7.0298e-17, 3.2139e-17,\n 2.4793e-18, 2.6625e-17, 8.4326e-19, 5.4210e-19, 6.3240e-19, 7.4042e-17,\n 8.3295e-19, 2.8877e-19, 1.4177e-17, 1.9593e-16, 2.4174e-17, 2.2071e-18,\n 2.2870e-17, 4.0997e-18, 5.8425e-18, 7.4499e-18, 1.3329e-17, 1.4697e-18,\n 1.4413e-19, 1.2213e-17, 1.1593e-17, 1.2839e-17, 2.6948e-18, 1.4210e-18,\n 4.8989e-18, 9.5549e-17, 1.3203e-17, 1.3138e-17]], device='cuda:0')" + }, + "50": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 3.8925e-16, -3.8926e-16], device='cuda:0')", + "exp_avg_sq": "tensor([1.3451e-16, 1.3451e-16], device='cuda:0')" + }, + "51": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 1.5919e-14, 1.3749e-13, -8.8305e-14, ..., -1.1093e-14,\n -9.0648e-14, 9.6533e-14],\n [ 9.8754e-15, 3.9819e-14, -1.9072e-14, ..., -3.9013e-14,\n -4.3318e-15, 3.7623e-14],\n [-3.3768e-14, -3.0953e-14, 1.2772e-14, ..., 1.6751e-14,\n 6.4104e-14, 8.6519e-14],\n ...,\n [ 5.2918e-14, -8.6271e-14, -6.6000e-14, ..., 3.4079e-13,\n 1.0300e-13, 4.0481e-14],\n [ 3.9611e-14, -2.0374e-13, 4.1600e-14, ..., -9.5145e-13,\n 9.2220e-13, 8.5621e-13],\n [ 3.6335e-14, -1.3831e-14, 6.6951e-15, ..., 2.6970e-14,\n 1.1460e-14, 3.4981e-14]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.2142e-18, 8.8122e-18, 1.8258e-16, ..., 2.7843e-17, 2.6580e-17,\n 8.8043e-17],\n [3.9332e-19, 2.6405e-18, 3.0667e-18, ..., 1.5406e-18, 2.2343e-18,\n 2.5125e-18],\n [8.1107e-17, 1.0755e-17, 4.5409e-16, ..., 9.4237e-17, 1.0449e-16,\n 2.1451e-16],\n ...,\n [3.4592e-18, 9.8332e-18, 1.3152e-16, ..., 2.8789e-17, 2.9374e-17,\n 5.5028e-17],\n [4.8630e-17, 9.1627e-18, 7.2531e-17, ..., 8.3084e-17, 1.1661e-16,\n 2.1859e-16],\n [2.4862e-17, 1.9666e-17, 7.5765e-17, ..., 3.5641e-17, 4.6886e-17,\n 1.0909e-16]], device='cuda:0')" + }, + "52": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 2.1529e-13, -1.0389e-13, 1.1721e-13, 5.1161e-12, 3.3844e-13,\n 8.6226e-14, -1.2574e-13, -2.0696e-12, -5.8437e-14, -1.4378e-13,\n -4.6046e-12, 1.2740e-13, -1.4356e-12, 7.4418e-13, -7.3882e-12,\n -2.5051e-13, 1.9315e-13, 3.1080e-13, 3.7756e-14, -2.5467e-13,\n -3.1567e-14, -2.0087e-13, 3.3798e-13, 1.5451e-13, -2.1180e-14,\n -1.4782e-12, 1.1826e-13, -1.5297e-13, -8.9594e-13, 3.3632e-12,\n -6.1427e-13, 2.3924e-13, 1.5327e-12, 1.0109e-12, -1.7047e-13,\n -1.3788e-13, -1.5249e-12, 2.1146e-13, 2.7755e-13, -4.5750e-13,\n 8.6938e-13, 2.3864e-12, -1.4772e-13, -1.2783e-13, -1.0043e-12,\n 3.6105e-13, 6.3157e-13, 1.2257e-13, 6.8147e-13, -3.1164e-13,\n 3.9338e-12, 3.4589e-13, 2.9689e-14, 1.6199e-13, -8.5986e-13,\n -1.5882e-12, -8.0691e-13, -5.5331e-14, 2.1601e-12, 4.7250e-13,\n 9.1731e-13, 3.8824e-13, -1.4983e-12, 5.2662e-13], device='cuda:0')", + "exp_avg_sq": "tensor([1.1187e-13, 7.8224e-15, 4.6100e-13, 3.9227e-13, 2.4976e-13, 1.0222e-15,\n 1.6378e-13, 1.8545e-14, 9.3816e-15, 4.7420e-16, 2.8083e-14, 2.3258e-15,\n 9.6909e-14, 1.3340e-14, 4.4045e-13, 2.0681e-15, 4.1273e-14, 6.7740e-15,\n 1.1351e-12, 2.2722e-14, 2.2747e-15, 3.3309e-14, 1.7073e-14, 4.0435e-13,\n 1.5439e-14, 8.7045e-15, 1.8523e-13, 1.9491e-13, 6.7909e-14, 3.9323e-13,\n 2.0862e-14, 1.1492e-12, 2.3332e-13, 4.6654e-13, 4.4565e-13, 2.0158e-16,\n 2.9197e-14, 4.1629e-13, 1.6224e-14, 7.4160e-16, 2.5962e-15, 3.3315e-13,\n 1.7477e-13, 6.5592e-14, 1.7636e-13, 5.8775e-14, 1.3569e-13, 3.6524e-14,\n 2.6418e-14, 2.5338e-14, 1.2495e-13, 1.8673e-15, 1.9619e-13, 9.5473e-14,\n 1.0891e-12, 1.0927e-12, 5.1732e-15, 5.8758e-15, 4.1041e-13, 1.8712e-13,\n 1.7486e-14, 6.4358e-14, 1.1525e-13, 1.3930e-13], device='cuda:0')" + }, + "53": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-9.7088e-13, 7.4218e-14, 1.6066e-14, 2.4704e-12, -5.5662e-14,\n 8.5165e-15, 7.3721e-14, -2.3287e-12, 1.2749e-14, -3.3179e-14,\n -4.5581e-12, 1.9894e-15, -1.4783e-12, -6.2023e-13, -1.0491e-11,\n -8.0623e-14, 2.7175e-15, -1.6334e-13, 7.9769e-15, -1.8831e-13,\n 2.0242e-14, -8.3085e-15, 9.4850e-15, 2.2131e-14, -5.9170e-15,\n -1.0629e-12, 1.1606e-14, 5.8730e-14, -1.5096e-12, 2.7102e-12,\n 5.5254e-15, -2.8598e-14, 2.3354e-13, -2.2780e-12, 4.7475e-14,\n 2.7941e-14, -1.5999e-12, -1.0603e-14, 2.6505e-13, -2.3637e-14,\n 1.5249e-13, 1.0037e-12, 2.7386e-14, 2.4450e-14, -2.6960e-12,\n -6.5735e-15, 9.0853e-13, 4.1189e-15, -2.4519e-14, 2.8716e-14,\n 2.5094e-12, 8.0774e-14, 1.8281e-14, 1.1012e-14, -5.5266e-12,\n -1.4836e-12, -4.3424e-13, -3.2737e-13, 1.3374e-12, -4.6819e-14,\n -4.8933e-13, -8.1301e-13, -2.6393e-12, -3.3017e-14], device='cuda:0')", + "exp_avg_sq": "tensor([9.5119e-16, 7.2967e-17, 2.0055e-15, 2.6558e-15, 1.0795e-15, 1.0146e-18,\n 6.1286e-16, 1.3840e-16, 4.2723e-17, 2.4989e-19, 2.6702e-16, 1.3247e-17,\n 1.1848e-15, 1.9130e-16, 3.5382e-15, 8.7254e-18, 1.9158e-16, 7.4988e-17,\n 9.7550e-15, 2.8854e-16, 1.4777e-17, 2.0427e-16, 8.9343e-17, 2.7191e-15,\n 8.1094e-17, 8.8031e-17, 9.8136e-16, 1.4484e-15, 8.6061e-16, 2.6896e-15,\n 6.6817e-17, 9.8503e-15, 1.4025e-15, 3.2107e-15, 2.7912e-15, 4.2288e-19,\n 3.1205e-16, 2.9650e-15, 1.9600e-16, 2.5686e-19, 1.2911e-17, 4.0407e-15,\n 6.9272e-16, 2.7608e-16, 1.3493e-15, 3.4809e-16, 1.0550e-15, 1.8515e-16,\n 1.1158e-16, 9.6963e-17, 9.2020e-16, 7.3013e-18, 1.7245e-15, 5.1792e-16,\n 1.0259e-14, 1.1457e-14, 4.6932e-17, 5.7997e-17, 3.6196e-15, 1.3152e-15,\n 1.4926e-16, 4.6006e-16, 7.7245e-16, 8.9844e-16], device='cuda:0')" + }, + "54": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-7.8496e-13, -4.8749e-13, -1.2324e-14, 1.9357e-12, 8.7114e-14,\n -7.1136e-15, -7.3514e-14, -1.8152e-12, -9.0862e-15, 3.0257e-14,\n -4.3412e-12, -2.9264e-14, -1.5976e-12, -7.1551e-13, -7.4374e-12,\n 1.3174e-13, -5.7035e-15, -2.7565e-13, -6.2460e-15, -6.4258e-13,\n 4.6155e-14, 8.0047e-15, -7.9665e-15, -1.9825e-14, 4.7809e-15,\n -1.7652e-12, -1.3689e-14, -5.8115e-14, -1.4608e-12, 1.7425e-12,\n -4.1107e-15, 1.7443e-14, -1.3581e-13, -1.6461e-12, -3.3827e-14,\n -2.8332e-14, -1.8641e-12, 2.3184e-14, -3.2487e-13, 1.9241e-14,\n -4.0713e-13, 7.2061e-13, -5.4781e-14, -1.9071e-14, -1.4134e-12,\n 7.6686e-15, 3.2249e-13, -3.4254e-15, 2.0650e-14, -2.7285e-14,\n 1.6295e-12, -1.2564e-13, -1.7184e-14, -1.2035e-14, -3.3903e-12,\n -1.2484e-12, -1.4240e-12, -6.7530e-13, 5.1070e-13, 4.5699e-14,\n -3.3676e-13, -8.1585e-13, -1.9301e-12, 3.2105e-14], device='cuda:0')", + "exp_avg_sq": "tensor([1.3083e-15, 1.3659e-16, 4.1073e-15, 4.4282e-15, 2.2728e-15, 1.4369e-18,\n 1.4578e-15, 2.9823e-16, 7.0449e-17, 5.2837e-19, 4.1937e-16, 1.8582e-17,\n 1.1123e-15, 1.9781e-16, 4.6822e-15, 1.1761e-17, 3.5231e-16, 9.4346e-17,\n 9.9136e-15, 2.8152e-16, 3.6575e-17, 2.6247e-16, 1.1662e-16, 3.6346e-15,\n 1.1830e-16, 1.4797e-16, 1.5275e-15, 1.6444e-15, 7.9166e-16, 4.2610e-15,\n 1.4086e-16, 1.0414e-14, 2.7021e-15, 4.9100e-15, 3.7661e-15, 7.0664e-19,\n 4.0957e-16, 3.4527e-15, 2.2558e-16, 7.8413e-19, 1.9785e-17, 3.6433e-15,\n 1.5812e-15, 5.6323e-16, 2.2305e-15, 5.2010e-16, 1.7164e-15, 2.9123e-16,\n 2.2269e-16, 2.1211e-16, 1.7053e-15, 9.5268e-18, 1.5996e-15, 9.2171e-16,\n 1.1389e-14, 1.0927e-14, 7.0720e-17, 7.9609e-17, 4.2784e-15, 1.6509e-15,\n 2.6898e-16, 8.0307e-16, 1.3408e-15, 1.1634e-15], device='cuda:0')" + }, + "55": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 6.0685e-12, 1.3486e-13, -9.5504e-13, -7.4863e-12, -2.8928e-13,\n 5.9258e-13, -2.1336e-13, 1.0308e-11, -1.8440e-13, -9.5927e-13,\n 2.0489e-11, -2.5535e-13, 8.3400e-12, 5.2188e-12, 3.7515e-11,\n 3.2889e-14, -1.0585e-12, 1.9379e-12, 9.0184e-13, 1.9796e-12,\n -1.1672e-12, -8.7001e-13, -8.6215e-13, -8.2865e-13, -6.1458e-13,\n 8.3061e-12, -1.3756e-12, -1.7850e-12, 1.0131e-11, -9.2424e-12,\n -4.6122e-14, -7.5444e-13, -7.9399e-13, 7.1490e-12, -5.9864e-13,\n -5.3459e-13, 9.0451e-12, -3.6373e-13, -1.3462e-12, 8.6207e-13,\n 1.2131e-12, -4.7013e-12, -9.6964e-13, -6.5682e-13, 8.4097e-12,\n -1.4671e-12, -3.3288e-12, -7.4791e-13, -2.3087e-12, -1.1296e-12,\n -6.9221e-12, -1.4156e-12, -1.4917e-12, -1.4073e-12, 1.7195e-11,\n 4.8238e-12, 5.0259e-12, 2.8949e-12, -4.7313e-12, 4.2287e-13,\n 2.6152e-12, 4.3087e-12, 6.9829e-12, -1.3138e-12],\n [-5.3782e-12, 1.9477e-13, 8.7807e-13, 8.1040e-12, 3.2745e-13,\n -5.7281e-13, 5.6417e-14, -1.0800e-11, 1.1193e-13, 9.3377e-13,\n -2.1385e-11, 1.6115e-13, -9.2311e-12, -5.1020e-12, -3.6360e-11,\n -2.9594e-14, 1.0123e-12, -1.5993e-12, -9.5313e-13, -2.1504e-12,\n 1.1959e-12, 8.7386e-13, 7.3886e-13, 6.9618e-13, 7.5020e-13,\n -7.6643e-12, 1.2213e-12, 1.7458e-12, -1.0385e-11, 9.9604e-12,\n -1.1482e-13, 6.4200e-13, 1.0873e-12, -7.9405e-12, 6.4316e-13,\n 5.2379e-13, -8.9067e-12, 2.3019e-13, 1.7896e-12, -8.2124e-13,\n -1.3382e-12, 4.8508e-12, 1.0087e-12, 4.9298e-13, -8.2122e-12,\n 1.5217e-12, 3.3197e-12, 7.2799e-13, 2.2548e-12, 1.0663e-12,\n 7.5129e-12, 1.3455e-12, 1.5492e-12, 1.4219e-12, -1.6348e-11,\n -4.7269e-12, -5.0648e-12, -2.7638e-12, 5.9480e-12, -5.1796e-13,\n -2.9056e-12, -3.2213e-12, -7.8925e-12, 1.1130e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1173e-13, 1.8006e-14, 2.3637e-14, 3.4166e-14, 1.3433e-14, 1.5318e-13,\n 1.7991e-15, 7.6162e-15, 4.5038e-15, 1.9384e-14, 1.3086e-14, 7.0751e-14,\n 2.3437e-13, 1.5268e-13, 6.3526e-14, 6.6884e-14, 3.2892e-15, 8.9305e-14,\n 1.5403e-13, 1.9040e-13, 9.1230e-15, 7.0331e-14, 6.4773e-14, 6.8084e-14,\n 3.9431e-14, 2.6651e-14, 3.3503e-14, 1.1290e-13, 2.1167e-13, 4.9165e-14,\n 1.2418e-14, 1.2322e-13, 1.0054e-14, 4.9436e-14, 4.0803e-14, 3.6512e-16,\n 3.4528e-14, 1.0521e-13, 1.1640e-13, 6.7482e-16, 4.4545e-14, 2.2282e-13,\n 1.7278e-14, 2.4013e-14, 9.1867e-15, 2.1817e-14, 1.3618e-14, 2.0989e-14,\n 9.3458e-15, 8.4211e-15, 5.9211e-15, 1.0114e-13, 1.9484e-13, 8.3798e-15,\n 1.1573e-13, 2.0182e-13, 6.1027e-14, 4.5382e-14, 1.0223e-13, 8.3819e-14,\n 1.9276e-14, 2.7148e-14, 5.0327e-15, 7.3684e-14],\n [1.1173e-13, 1.8006e-14, 2.3637e-14, 3.4165e-14, 1.3433e-14, 1.5318e-13,\n 1.7991e-15, 7.6161e-15, 4.5038e-15, 1.9384e-14, 1.3086e-14, 7.0751e-14,\n 2.3437e-13, 1.5268e-13, 6.3526e-14, 6.6884e-14, 3.2892e-15, 8.9305e-14,\n 1.5403e-13, 1.9040e-13, 9.1230e-15, 7.0331e-14, 6.4773e-14, 6.8084e-14,\n 3.9431e-14, 2.6651e-14, 3.3503e-14, 1.1290e-13, 2.1167e-13, 4.9164e-14,\n 1.2418e-14, 1.2322e-13, 1.0054e-14, 4.9436e-14, 4.0803e-14, 3.6511e-16,\n 3.4528e-14, 1.0521e-13, 1.1640e-13, 6.7482e-16, 4.4545e-14, 2.2281e-13,\n 1.7278e-14, 2.4013e-14, 9.1865e-15, 2.1817e-14, 1.3618e-14, 2.0989e-14,\n 9.3458e-15, 8.4211e-15, 5.9210e-15, 1.0114e-13, 1.9484e-13, 8.3798e-15,\n 1.1573e-13, 2.0182e-13, 6.1027e-14, 4.5382e-14, 1.0223e-13, 8.3819e-14,\n 1.9276e-14, 2.7148e-14, 5.0325e-15, 7.3684e-14]], device='cuda:0')" + }, + "56": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 4.4220e-12, -4.2769e-12], device='cuda:0')", + "exp_avg_sq": "tensor([5.7069e-13, 5.7069e-13], device='cuda:0')" + }, + "57": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 9.3800e-18, 2.2563e-12, -3.6521e-16, -5.8198e-19, 2.1554e-12,\n 9.2823e-16, -7.5808e-13, -6.2605e-06],\n [ 5.7605e-22, -4.0417e-17, 1.5897e-20, 2.2128e-23, -5.8022e-17,\n -2.6669e-20, 3.5243e-17, 2.7337e-10],\n [ 2.9258e-18, 1.1774e-12, -2.1531e-16, -3.3459e-19, 1.1768e-12,\n 5.1137e-16, -4.5352e-13, -3.6993e-06],\n [-4.6038e-20, -1.8864e-14, 3.4694e-18, 5.3820e-21, -1.8872e-14,\n -8.2122e-18, 7.2954e-15, 5.9548e-08],\n [ 1.4528e-21, -9.0113e-15, 1.9494e-18, 2.9360e-21, -9.6338e-15,\n -4.2364e-18, 4.1460e-15, 3.3417e-08],\n [ 9.5474e-18, 2.5681e-12, -4.3039e-16, -6.8056e-19, 2.4835e-12,\n 1.0727e-15, -8.9849e-13, -7.3928e-06],\n [-7.9292e-21, -1.7356e-16, -6.3045e-20, -6.8902e-23, 2.2509e-17,\n 2.5975e-20, -1.5045e-16, -1.0802e-09],\n [ 9.7410e-21, -4.9144e-15, 1.1806e-18, 1.7472e-21, -5.4811e-15,\n -2.4339e-18, 2.5376e-15, 2.0264e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.4006e-12, 1.4118e-11, 6.7181e-13, 2.8120e-12, 5.0680e-12, 1.5680e-13,\n 2.1316e-12, 4.3410e-08],\n [3.3206e-14, 6.2868e-14, 2.5850e-15, 1.0016e-14, 5.2878e-14, 9.9035e-16,\n 1.0933e-14, 7.2472e-10],\n [7.0757e-13, 1.1552e-12, 5.1769e-14, 2.0262e-13, 4.3141e-13, 1.3912e-14,\n 1.7316e-13, 2.5617e-08],\n [1.2278e-12, 2.5334e-12, 1.1004e-13, 4.3397e-13, 9.2954e-13, 2.7821e-14,\n 4.1207e-13, 1.9751e-09],\n [3.7115e-12, 4.8501e-12, 2.5578e-13, 1.0367e-12, 1.8260e-12, 6.4365e-14,\n 8.6554e-13, 1.2785e-09],\n [4.0102e-12, 5.5451e-12, 2.5830e-13, 1.0708e-12, 2.6384e-12, 6.4736e-14,\n 8.6442e-13, 9.8183e-08],\n [3.6580e-12, 5.0685e-12, 3.0167e-13, 1.1954e-12, 1.9956e-12, 7.0675e-14,\n 9.9816e-13, 2.4122e-09],\n [1.0318e-11, 1.6694e-11, 7.5385e-13, 3.1094e-12, 5.5107e-12, 1.9923e-13,\n 2.4912e-12, 1.0248e-08]], device='cuda:0')" + }, + "58": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-6.2584e-06, 2.7336e-10, -3.6995e-06, 5.9543e-08, 3.3421e-08,\n -7.3933e-06, -1.0802e-09, 2.0260e-08], device='cuda:0')", + "exp_avg_sq": "tensor([4.3660e-08, 7.2603e-10, 2.5636e-08, 2.0202e-09, 1.3688e-09, 9.8296e-08,\n 2.5137e-09, 1.0513e-08], device='cuda:0')" + }, + "59": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 3.6625e-06, -3.1111e-07, 3.8156e-06, -3.1008e-07, -3.1065e-07,\n 3.1888e-06, -3.1111e-07, -3.1108e-07],\n [-4.3909e-06, 3.7295e-07, -4.5745e-06, 3.7174e-07, 3.7241e-07,\n -3.8233e-06, 3.7295e-07, 3.7293e-07],\n [-5.3386e-08, 4.5324e-09, -5.5619e-08, 4.5187e-09, 4.5259e-09,\n -4.6505e-08, 4.5324e-09, 4.5324e-09],\n [ 7.8332e-07, -6.6504e-08, 8.1610e-07, -6.6303e-08, -6.6408e-08,\n 6.8234e-07, -6.6504e-08, -6.6504e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.5836e-08, 6.9360e-09, 9.4987e-08, 3.1676e-09, 2.5030e-09, 2.6953e-08,\n 1.7994e-08, 1.0577e-08],\n [4.5134e-09, 4.6900e-10, 8.1691e-09, 2.4522e-10, 1.6710e-10, 2.9760e-09,\n 1.0503e-09, 6.5516e-10],\n [1.3282e-08, 2.4374e-09, 2.9450e-08, 7.9415e-10, 7.7526e-10, 8.6241e-09,\n 5.4115e-09, 3.5443e-09],\n [1.5351e-08, 2.6011e-09, 3.3323e-08, 9.0492e-10, 8.8763e-10, 1.0511e-08,\n 5.3275e-09, 3.6263e-09]], device='cuda:0')" + }, + "60": { + "step": "tensor(10012.)", + "exp_avg": "tensor([ 2.7091e-06, -3.1353e-06, -3.0090e-08, 4.5583e-07], device='cuda:0')", + "exp_avg_sq": "tensor([7.6537e-07, 4.5675e-08, 2.1408e-07, 2.0815e-07], device='cuda:0')" + }, + "61": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 8.5222e-18, 2.1434e-16, 2.2206e-16, ..., -8.3594e-16,\n -6.2999e-15, -5.6208e-14],\n [-5.1666e-18, 2.5146e-16, -6.8133e-17, ..., -2.4529e-14,\n -2.2238e-14, -6.7808e-14],\n [-9.5617e-18, 2.0543e-16, -8.6451e-17, ..., -4.0218e-14,\n 4.3301e-14, 4.2516e-14],\n ...,\n [-8.7037e-17, 3.4408e-15, -2.6789e-16, ..., -4.5484e-13,\n 2.3322e-12, -5.5925e-12],\n [-6.8808e-18, 1.6572e-16, -3.7597e-17, ..., -3.2637e-14,\n -2.5369e-14, 5.3744e-14],\n [ 2.6369e-17, 3.8829e-15, 8.1164e-16, ..., -1.5950e-13,\n 7.2831e-13, -4.0511e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.5121e-16, 5.8394e-16, 7.3826e-15, ..., 9.8008e-15, 6.3047e-15,\n 7.6599e-15],\n [5.6818e-17, 6.9742e-17, 9.6545e-16, ..., 9.8576e-16, 6.2613e-16,\n 8.5252e-16],\n [5.2193e-16, 5.1403e-16, 4.9996e-15, ..., 9.3701e-15, 3.9848e-15,\n 7.6185e-15],\n ...,\n [1.1100e-14, 8.9098e-15, 1.4303e-13, ..., 1.5188e-13, 1.0757e-13,\n 1.4998e-13],\n [4.1043e-16, 2.7796e-16, 4.1781e-15, ..., 7.5463e-15, 2.6022e-15,\n 5.4786e-15],\n [1.9230e-15, 1.6317e-15, 2.8601e-14, ..., 2.7912e-14, 1.7771e-14,\n 2.9507e-14]], device='cuda:0')" + }, + "62": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-3.4118e-13, -1.0675e-12, 8.9346e-14, -2.2441e-12, 1.9914e-14,\n -3.9414e-11, 1.8585e-12, -5.4227e-11, -1.1452e-12, -4.1785e-11,\n -1.6022e-12, -3.3408e-11, -4.4581e-11, -1.0004e-12, -3.1113e-11,\n -4.3042e-11, -5.3031e-11, -4.1989e-11, 7.3117e-13, -2.1476e-12,\n -1.0127e-12, -2.3605e-12, -1.3990e-12, -3.1452e-12, -6.0114e-11,\n -1.9722e-12, -4.7876e-11, -4.2893e-11, -3.2022e-12, 1.3974e-13,\n 1.0587e-12, -3.3351e-11, -4.6490e-11, -5.1081e-11, -4.8032e-11,\n -5.1653e-11, -4.3153e-11, -2.4384e-11, -4.6678e-11, -2.7889e-11,\n 8.2083e-13, -1.0627e-12, -4.6660e-11, -1.0001e-12, -8.7567e-13,\n -3.7053e-13, -5.9201e-11, -1.3968e-13, 1.3703e-12, -1.3452e-12,\n 1.5107e-12, -4.7089e-11, -4.4175e-11, -3.8564e-13, -4.0838e-11,\n -3.0820e-11, -3.9394e-11, -4.9189e-11, 2.1349e-12, -5.0120e-11,\n -6.3699e-11, -2.3349e-11, -5.3182e-11, 6.8232e-13, -5.2614e-13,\n 1.2790e-12, -5.4808e-11, -4.4022e-11, -1.3763e-12, -2.8265e-12,\n -3.3642e-11, -5.0946e-11, 1.1918e-13, -9.1507e-13, -9.0556e-13,\n -4.3462e-11, -3.3427e-11, -1.6035e-12, -4.8165e-11, -3.6444e-11,\n -1.1401e-12, -1.4222e-12, -4.4854e-11, -2.7585e-11, -7.5123e-13,\n -3.2531e-11, -3.6397e-11, -5.9628e-11, -3.5146e-11, -6.4152e-11,\n -5.7373e-11, -3.4223e-11, 2.0065e-12, 5.2895e-13, -3.1864e-11,\n -4.4004e-11, 4.7419e-15, -2.3697e-12, -4.0361e-11, 1.7117e-12,\n 4.0805e-13, -3.6451e-11, -2.8754e-13, -3.9098e-11, -4.8621e-11,\n -3.7357e-11, -3.4341e-11, 1.2383e-12, -3.7992e-12, -3.2122e-11,\n 1.8402e-12, -3.3677e-11, -2.4000e-11, -3.0405e-12, -1.1662e-12,\n -4.1145e-11, -5.5966e-11, 9.9499e-13, -3.3569e-11, 1.4982e-12,\n -3.9588e-11, -4.6300e-11, -1.1462e-13, -4.1425e-11, -2.4190e-12,\n -2.4800e-11, -2.1041e-12, 9.6381e-13, -2.2242e-12, -5.9608e-11,\n -1.7367e-12, 2.2637e-12, 8.9787e-13, -5.3400e-11, -1.9386e-13,\n -3.1594e-11, -1.2775e-12, -4.2012e-11, 1.1727e-12, -2.4419e-12,\n -1.0393e-12, -4.1573e-11, -5.3512e-11, -1.4104e-12, 2.1787e-12,\n 1.8542e-12, -5.2425e-11, -4.5144e-11, 1.7731e-12, 1.4516e-12,\n -4.5691e-11, -5.9307e-11, -6.0424e-11, 4.5848e-13, -3.1207e-12,\n -3.6518e-11, 1.5242e-12, -7.2415e-16, -3.3989e-11, -4.9410e-11,\n -3.8730e-11, -2.4903e-12, -4.1927e-11, -5.0650e-11, -6.5371e-13,\n 4.8838e-13, -4.2614e-11, -3.2336e-11, -3.4263e-12, -4.6585e-11,\n -2.2037e-12, -5.2746e-13, -3.0792e-11, 3.1292e-13, -5.9238e-14,\n -3.1006e-12, 7.0181e-13, -3.3078e-11, -9.7340e-13, -3.3431e-11,\n -3.3413e-11, -4.0439e-11, 1.4441e-12, 5.1511e-14, -6.1103e-14,\n -4.5928e-11, -1.6244e-13, -1.1195e-12, -3.6791e-11, -5.4023e-11,\n 6.4586e-13, -3.7767e-11], device='cuda:0')", + "exp_avg_sq": "tensor([7.9388e-12, 9.6490e-13, 6.2470e-12, 1.7452e-11, 3.9493e-12, 7.9800e-12,\n 2.8428e-11, 9.7547e-11, 3.2766e-11, 6.3580e-11, 6.3078e-11, 1.7583e-11,\n 1.3020e-11, 1.9989e-11, 2.6746e-11, 2.7268e-11, 4.0098e-11, 2.2397e-11,\n 4.6957e-12, 1.1059e-12, 3.9719e-11, 1.2078e-11, 7.7800e-12, 3.6209e-11,\n 1.5934e-10, 6.7568e-13, 2.2939e-11, 2.9237e-11, 2.2272e-12, 2.4118e-11,\n 7.8197e-12, 4.7370e-11, 1.4960e-10, 4.4602e-11, 6.5183e-11, 8.6465e-11,\n 5.3426e-11, 1.9454e-12, 5.6313e-11, 1.1902e-11, 6.8004e-11, 8.6119e-13,\n 4.3315e-11, 1.0972e-12, 4.7608e-13, 5.8473e-11, 1.1208e-10, 3.6874e-12,\n 1.8540e-12, 1.6814e-11, 3.2099e-11, 4.6547e-11, 9.7710e-11, 3.4461e-12,\n 1.2274e-11, 1.0809e-11, 1.5291e-11, 5.2081e-11, 3.4089e-13, 1.6363e-10,\n 1.9804e-10, 2.9275e-13, 1.6740e-10, 1.6492e-11, 1.1409e-12, 6.9358e-11,\n 1.0191e-10, 2.2455e-11, 1.3419e-11, 7.8345e-12, 5.7417e-12, 1.8826e-10,\n 5.6454e-13, 2.0157e-11, 3.5963e-11, 2.1332e-11, 4.0174e-12, 3.1140e-11,\n 5.1547e-11, 3.5929e-11, 6.4228e-12, 3.2610e-12, 8.9964e-11, 6.5855e-12,\n 1.7329e-12, 1.3870e-11, 4.8552e-11, 1.1984e-10, 2.3499e-11, 2.0331e-10,\n 1.2444e-10, 2.7225e-11, 2.5884e-11, 1.1490e-11, 3.0065e-11, 7.0909e-11,\n 1.7191e-11, 3.6456e-12, 1.2471e-11, 5.1976e-11, 2.8960e-12, 1.2328e-11,\n 3.7717e-13, 8.1036e-12, 6.5207e-11, 4.9479e-11, 1.8654e-11, 4.2593e-12,\n 3.3722e-12, 9.1136e-12, 1.7252e-13, 2.9444e-11, 2.5736e-13, 2.5315e-12,\n 7.4188e-12, 2.5449e-11, 8.6019e-11, 2.7446e-11, 6.5613e-12, 2.2357e-11,\n 9.2919e-11, 6.6946e-11, 1.3245e-11, 5.3563e-11, 1.3080e-12, 1.3028e-11,\n 2.3633e-11, 3.0667e-12, 2.2351e-11, 1.0384e-10, 6.3228e-11, 3.6368e-11,\n 3.9197e-12, 1.9675e-10, 5.7626e-13, 1.7410e-11, 6.5390e-12, 1.4141e-11,\n 2.1537e-11, 1.5352e-12, 1.9630e-11, 4.6388e-11, 1.9033e-10, 2.6356e-12,\n 1.9341e-11, 1.3868e-11, 8.8353e-11, 4.7144e-12, 2.3552e-11, 4.4770e-12,\n 9.6150e-11, 2.2453e-10, 1.4542e-10, 2.0679e-11, 2.7477e-11, 2.0676e-12,\n 3.8897e-11, 1.7846e-11, 3.0596e-11, 1.4299e-10, 7.4459e-11, 4.3501e-11,\n 4.2677e-11, 3.4625e-11, 1.5725e-11, 1.0411e-12, 4.9134e-11, 3.9350e-12,\n 2.8874e-12, 1.0581e-11, 3.5777e-11, 9.8559e-13, 7.1173e-12, 1.4458e-11,\n 6.8183e-12, 1.6310e-11, 3.1172e-12, 2.8272e-11, 9.0239e-12, 9.0555e-12,\n 1.1706e-11, 9.1427e-11, 1.1652e-11, 8.5701e-12, 1.0996e-11, 2.1513e-10,\n 4.6320e-11, 3.3159e-12, 5.0551e-12, 1.3645e-10, 4.9460e-12, 2.6233e-11],\n device='cuda:0')" + }, + "63": { + "step": "tensor(10012.)", + "exp_avg": "tensor([[ 4.1393e-11, 4.0572e-11, 4.1033e-11, 4.2687e-11, 4.0761e-11,\n -2.0253e-10, 4.1105e-11, -1.9952e-10, 4.2386e-11, -2.2151e-10,\n 4.2359e-11, -1.9612e-10, -2.0124e-10, 4.1311e-11, -1.9659e-10,\n -2.0425e-10, -2.1918e-10, -2.1335e-10, 4.0957e-11, 4.0119e-11,\n 4.1773e-11, 4.1439e-11, 4.1443e-11, 4.2386e-11, -2.1521e-10,\n 4.3042e-11, -2.2271e-10, -1.9046e-10, 4.1265e-11, 4.1515e-11,\n 4.0394e-11, -1.6499e-10, -1.9171e-10, -2.5237e-10, -1.8045e-10,\n -1.6596e-10, -2.1341e-10, -1.9559e-10, -2.0540e-10, -2.0361e-10,\n 4.1658e-11, 4.1078e-11, -1.9197e-10, 4.0903e-11, 4.1638e-11,\n 4.1173e-11, -1.6920e-10, 3.9521e-11, 4.0994e-11, 4.0942e-11,\n 4.0385e-11, -1.9720e-10, -1.6082e-10, 4.2149e-11, -2.2777e-10,\n -2.1016e-10, -1.9812e-10, -1.7616e-10, 3.9638e-11, -1.4844e-10,\n -1.9953e-10, -2.1668e-10, -1.9921e-10, 4.0974e-11, 4.1234e-11,\n 4.0795e-11, -2.1766e-10, -2.0529e-10, 4.2568e-11, 4.2458e-11,\n -2.2983e-10, -1.8062e-10, 4.1267e-11, 4.1949e-11, 4.1766e-11,\n -2.0178e-10, -2.2275e-10, 4.2499e-11, -2.0898e-10, -1.8905e-10,\n 4.0863e-11, 4.0969e-11, -1.9433e-10, -2.3765e-10, 4.0740e-11,\n -2.0913e-10, -2.1339e-10, -2.2352e-10, -2.1244e-10, -2.1014e-10,\n -1.9335e-10, -2.0436e-10, 4.0283e-11, 4.0631e-11, -2.1283e-10,\n -1.9096e-10, 4.1256e-11, 4.0939e-11, -2.1991e-10, 4.0854e-11,\n 4.0638e-11, -2.1154e-10, 4.0898e-11, -2.1586e-10, -1.9754e-10,\n -2.1759e-10, -2.2738e-10, 4.1190e-11, 4.1719e-11, -2.3264e-10,\n 1.4385e-11, -2.2940e-10, -1.7247e-10, 4.1423e-11, 4.1653e-11,\n -1.8838e-10, -2.0003e-10, 4.0752e-11, -2.3423e-10, 4.1062e-11,\n -1.7988e-10, -1.8147e-10, 4.1561e-11, -1.8389e-10, 4.1546e-11,\n -1.9319e-10, 4.2498e-11, 4.2238e-11, 4.0544e-11, -2.0547e-10,\n 4.3702e-11, 4.1359e-11, 4.1313e-11, -1.7109e-10, 4.0042e-11,\n -2.2269e-10, 4.1000e-11, -2.5105e-10, 4.1298e-11, 4.0372e-11,\n 4.2229e-11, -2.0079e-10, -1.7119e-10, 4.0803e-11, 4.1307e-11,\n 4.1250e-11, -1.7235e-10, -2.1131e-10, 3.9278e-11, 4.0697e-11,\n -1.7302e-10, -1.7192e-10, -2.3075e-10, 3.8394e-11, 4.3275e-11,\n -2.2880e-10, 4.0379e-11, 4.1424e-11, -1.9311e-10, -1.7037e-10,\n -2.0933e-10, 4.1892e-11, -2.0077e-10, -2.0511e-10, 4.1139e-11,\n 4.1118e-11, -1.9670e-10, -2.3445e-10, 4.0620e-11, -1.7292e-10,\n 4.0994e-11, 4.0710e-11, -1.9906e-10, 4.0760e-11, 4.2142e-11,\n 4.1145e-11, 4.1047e-11, -1.7946e-10, 4.1572e-11, -1.9519e-10,\n -2.2784e-10, -1.7735e-10, 4.0628e-11, 4.1287e-11, 4.1016e-11,\n -1.9169e-10, 4.1663e-11, 4.2039e-11, -2.1915e-10, -1.9271e-10,\n 4.1004e-11, -2.3383e-10],\n [-7.2850e-13, -1.2896e-13, -5.7504e-13, -1.8939e-12, -3.3643e-13,\n 2.8811e-11, -2.5937e-13, 2.8428e-11, -1.7847e-12, 4.5966e-11,\n -1.9481e-12, 1.2546e-11, 3.1975e-11, -3.9576e-13, 3.3395e-11,\n 2.8985e-11, 3.6581e-11, 5.1801e-11, -2.4567e-13, 1.1224e-13,\n -1.5224e-12, -8.1981e-13, -8.8643e-13, -1.9482e-12, 3.8054e-11,\n -2.3743e-12, 4.0671e-11, 2.0554e-11, -9.9499e-13, -1.4639e-12,\n -7.3929e-13, 9.9986e-12, 3.7187e-11, 5.2910e-11, 1.8832e-11,\n 1.2034e-11, 3.8107e-11, 2.2676e-11, 2.4700e-11, 2.0256e-11,\n -1.1129e-12, -3.9349e-13, 2.8769e-11, -3.3676e-13, -1.4173e-12,\n -3.1019e-13, 1.2545e-11, 5.8115e-13, -3.7549e-13, -4.1496e-13,\n -2.1399e-13, 1.7103e-11, -4.8753e-12, -1.3979e-12, 3.0768e-11,\n 3.1950e-11, 2.4948e-11, 2.2950e-11, -3.5432e-13, 7.8813e-12,\n 3.8781e-11, 3.1463e-11, 3.2567e-11, -4.5969e-13, -4.7917e-13,\n -8.6157e-14, 4.7388e-11, 3.6391e-11, -2.1665e-12, -1.3385e-12,\n 3.8726e-11, 3.0734e-11, -8.8268e-13, -1.2883e-12, -1.7307e-12,\n 3.7547e-11, 4.0993e-11, -2.1634e-12, 4.1743e-11, 3.8142e-11,\n -6.4312e-13, -4.0001e-13, 3.1065e-11, 4.7345e-11, -1.0558e-12,\n 3.1800e-11, 4.4564e-11, 3.1244e-11, 3.0516e-11, 4.2315e-11,\n 2.8755e-11, 2.5378e-11, 1.6881e-13, -1.5034e-12, 5.0108e-11,\n 3.8950e-11, -7.5930e-13, -5.6920e-13, 2.9052e-11, -3.3725e-13,\n -4.0825e-13, 3.4215e-11, -6.5602e-13, 2.7516e-11, 2.4124e-11,\n 3.1650e-11, 4.3878e-11, -5.0002e-13, -9.7403e-13, 3.5536e-11,\n 1.6873e-12, 4.0501e-11, 1.8495e-11, -9.6497e-13, -1.1135e-12,\n 2.8110e-11, 4.2613e-11, -6.1016e-13, 4.2046e-11, -5.4221e-13,\n 1.7909e-11, 2.5891e-11, -8.8862e-13, 1.9535e-11, -1.0480e-12,\n 2.5446e-11, -1.7937e-12, -2.7427e-12, -1.4521e-15, 4.5763e-11,\n -3.3596e-12, -9.2058e-13, -3.4670e-13, 1.9455e-11, 1.2082e-13,\n 4.0894e-11, -4.8910e-13, 4.7514e-11, -8.0116e-13, -1.2766e-14,\n -1.2380e-12, 2.6278e-11, 2.5166e-11, -2.3662e-13, -6.4492e-13,\n -1.4917e-12, 2.4814e-11, 2.7152e-11, 4.4875e-13, -5.5538e-13,\n 2.4980e-11, 2.2846e-11, 4.9593e-11, 1.1397e-12, -2.3510e-12,\n 4.0698e-11, -1.8292e-13, -8.0676e-13, 2.6860e-11, 1.7307e-11,\n 3.8202e-11, -1.6399e-12, 2.2686e-11, 1.7704e-11, -6.0058e-13,\n -4.7095e-13, 1.3439e-11, 4.0128e-11, -2.4518e-13, 8.6657e-12,\n -6.5311e-13, -9.0543e-14, 2.3566e-11, -8.0375e-13, -1.5061e-12,\n -6.7867e-13, -6.4962e-13, 2.2236e-11, -8.1233e-13, 3.1385e-11,\n 3.9997e-11, 3.4625e-12, -2.6694e-13, -7.7721e-13, -2.9554e-13,\n 1.9777e-11, -1.1899e-12, -1.4746e-12, 2.2177e-11, 2.2209e-11,\n -6.2437e-13, 2.8540e-11],\n [-1.0920e-11, -1.1051e-11, -1.1127e-11, -1.0946e-11, -1.0931e-11,\n 3.7073e-11, -1.1078e-11, 3.4247e-11, -1.0938e-11, 3.6143e-11,\n -1.0590e-11, 4.0664e-11, 3.5315e-11, -1.0878e-11, 3.2083e-11,\n 3.5165e-11, 3.7102e-11, 3.0107e-11, -1.1207e-11, -1.1276e-11,\n -1.0431e-11, -1.1053e-11, -1.0961e-11, -1.0902e-11, 3.6647e-11,\n -1.0825e-11, 3.2291e-11, 3.7589e-11, -1.0750e-11, -1.0625e-11,\n -1.0264e-11, 3.3939e-11, 3.3663e-11, 3.9215e-11, 3.5418e-11,\n 3.4473e-11, 3.0904e-11, 3.4419e-11, 3.8448e-11, 3.2822e-11,\n -1.0609e-11, -1.1308e-11, 3.2337e-11, -1.1227e-11, -1.0596e-11,\n -1.0914e-11, 3.2659e-11, -1.1037e-11, -1.1126e-11, -1.0982e-11,\n -1.0878e-11, 3.8564e-11, 3.4058e-11, -1.0750e-11, 3.8354e-11,\n 3.2980e-11, 3.7301e-11, 3.5446e-11, -1.0167e-11, 3.0051e-11,\n 2.7618e-11, 4.3981e-11, 3.3333e-11, -1.0797e-11, -1.1272e-11,\n -1.0906e-11, 3.3214e-11, 3.4863e-11, -1.0678e-11, -1.0991e-11,\n 3.7157e-11, 2.9203e-11, -1.1072e-11, -1.0866e-11, -1.0633e-11,\n 2.8465e-11, 4.1511e-11, -1.0593e-11, 3.3183e-11, 2.7973e-11,\n -1.0906e-11, -1.0894e-11, 3.4149e-11, 4.1028e-11, -1.0376e-11,\n 3.7279e-11, 3.4253e-11, 3.7321e-11, 4.0453e-11, 3.3195e-11,\n 3.1947e-11, 4.0838e-11, -1.0857e-11, -9.9792e-12, 3.1439e-11,\n 2.7755e-11, -1.0996e-11, -1.0879e-11, 3.8757e-11, -1.0789e-11,\n -1.0780e-11, 3.7227e-11, -1.0871e-11, 3.8500e-11, 3.4848e-11,\n 3.5999e-11, 3.8350e-11, -1.0845e-11, -1.1161e-11, 4.4235e-11,\n -3.6168e-12, 4.0098e-11, 3.8667e-11, -1.1236e-11, -1.0808e-11,\n 3.5600e-11, 3.1634e-11, -1.0683e-11, 4.2784e-11, -1.1062e-11,\n 3.1626e-11, 2.8812e-11, -1.0924e-11, 3.2322e-11, -1.1199e-11,\n 3.1571e-11, -1.0839e-11, -1.0521e-11, -1.1163e-11, 2.8577e-11,\n -1.0437e-11, -1.0851e-11, -1.0949e-11, 3.3671e-11, -1.1008e-11,\n 3.7714e-11, -1.1031e-11, 3.9405e-11, -1.0727e-11, -1.1158e-11,\n -1.1020e-11, 3.2574e-11, 2.6132e-11, -1.1233e-11, -1.0821e-11,\n -1.0644e-11, 3.1262e-11, 3.9748e-11, -1.0637e-11, -1.0648e-11,\n 2.9328e-11, 3.5247e-11, 3.3365e-11, -1.0794e-11, -1.0654e-11,\n 4.0138e-11, -1.0903e-11, -1.0854e-11, 3.7447e-11, 3.2354e-11,\n 3.4491e-11, -1.0807e-11, 3.5012e-11, 4.1369e-11, -1.1047e-11,\n -1.0951e-11, 3.7445e-11, 3.7085e-11, -1.1217e-11, 3.7314e-11,\n -1.0842e-11, -1.1292e-11, 3.7734e-11, -1.0785e-11, -1.0638e-11,\n -1.0963e-11, -1.0623e-11, 3.0026e-11, -1.0898e-11, 3.4959e-11,\n 3.5198e-11, 3.7347e-11, -1.0940e-11, -1.1022e-11, -1.1088e-11,\n 3.8625e-11, -1.1013e-11, -1.0756e-11, 3.6509e-11, 3.6132e-11,\n -1.1057e-11, 4.1400e-11],\n [-3.0319e-11, -2.9891e-11, -2.9698e-11, -3.0148e-11, -2.9865e-11,\n 1.3761e-10, -3.0122e-11, 1.3875e-10, -3.0117e-11, 1.4205e-10,\n -3.0342e-11, 1.4265e-10, 1.3598e-10, -3.0366e-11, 1.3205e-10,\n 1.4141e-10, 1.4653e-10, 1.3173e-10, -2.9972e-11, -2.9351e-11,\n -3.0221e-11, -2.9915e-11, -2.9978e-11, -3.0117e-11, 1.4095e-10,\n -3.0276e-11, 1.4825e-10, 1.3489e-10, -3.0023e-11, -2.9843e-11,\n -2.9780e-11, 1.2348e-10, 1.2200e-10, 1.5970e-10, 1.2854e-10,\n 1.2049e-10, 1.4488e-10, 1.3784e-10, 1.4554e-10, 1.5131e-10,\n -3.0355e-11, -2.9923e-11, 1.3208e-10, -2.9840e-11, -3.0023e-11,\n -3.0358e-11, 1.2485e-10, -2.9480e-11, -2.9981e-11, -2.9952e-11,\n -2.9628e-11, 1.4340e-10, 1.3278e-10, -3.0378e-11, 1.6003e-10,\n 1.4607e-10, 1.3886e-10, 1.1826e-10, -2.9483e-11, 1.1192e-10,\n 1.3481e-10, 1.4552e-10, 1.3458e-10, -3.0077e-11, -2.9883e-11,\n -3.0190e-11, 1.3728e-10, 1.3544e-10, -3.0138e-11, -3.0440e-11,\n 1.5746e-10, 1.2158e-10, -2.9739e-11, -3.0128e-11, -2.9718e-11,\n 1.3657e-10, 1.4227e-10, -3.0139e-11, 1.3583e-10, 1.2528e-10,\n -2.9656e-11, -3.0078e-11, 1.3069e-10, 1.5204e-10, -2.9766e-11,\n 1.4157e-10, 1.3501e-10, 1.5521e-10, 1.4422e-10, 1.3458e-10,\n 1.3151e-10, 1.3980e-10, -2.9993e-11, -2.9450e-11, 1.3369e-10,\n 1.2472e-10, -3.0065e-11, -2.9860e-11, 1.5472e-10, -3.0124e-11,\n -2.9896e-11, 1.4155e-10, -2.9716e-11, 1.5229e-10, 1.3938e-10,\n 1.5098e-10, 1.4482e-10, -3.0185e-11, -2.9993e-11, 1.5548e-10,\n -1.2627e-11, 1.4992e-10, 1.1911e-10, -2.9732e-11, -3.0100e-11,\n 1.2786e-10, 1.2815e-10, -2.9862e-11, 1.4979e-10, -2.9736e-11,\n 1.3135e-10, 1.2685e-10, -3.0149e-11, 1.3307e-10, -2.9748e-11,\n 1.3664e-10, -3.0191e-11, -2.9528e-11, -2.9671e-11, 1.3070e-10,\n -3.0302e-11, -2.9915e-11, -3.0278e-11, 1.1926e-10, -2.9443e-11,\n 1.4496e-10, -2.9893e-11, 1.6477e-10, -3.0203e-11, -2.9552e-11,\n -3.0303e-11, 1.3985e-10, 1.2082e-10, -2.9676e-11, -3.0137e-11,\n -2.9475e-11, 1.1688e-10, 1.4576e-10, -2.9463e-11, -2.9862e-11,\n 1.2164e-10, 1.1633e-10, 1.4862e-10, -2.9205e-11, -3.0621e-11,\n 1.5080e-10, -2.9671e-11, -3.0101e-11, 1.3036e-10, 1.2311e-10,\n 1.3699e-10, -2.9822e-11, 1.4342e-10, 1.4752e-10, -2.9906e-11,\n -3.0018e-11, 1.4894e-10, 1.5830e-10, -2.9554e-11, 1.2884e-10,\n -2.9905e-11, -2.9671e-11, 1.4032e-10, -2.9532e-11, -3.0354e-11,\n -2.9822e-11, -3.0180e-11, 1.2887e-10, -3.0187e-11, 1.3263e-10,\n 1.5176e-10, 1.3856e-10, -2.9828e-11, -2.9981e-11, -3.0059e-11,\n 1.3566e-10, -2.9862e-11, -3.0187e-11, 1.5935e-10, 1.3450e-10,\n -2.9705e-11, 1.6518e-10]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.6240e-10, 3.1986e-10, 4.2088e-10, 3.2127e-10, 3.6471e-10, 9.9327e-10,\n 3.1630e-10, 1.2462e-09, 3.7475e-10, 1.3792e-09, 3.2517e-10, 1.1360e-09,\n 6.6116e-10, 4.2035e-10, 1.0092e-09, 1.0155e-09, 1.4030e-09, 1.0611e-09,\n 3.9779e-10, 3.4480e-10, 3.5635e-10, 3.9131e-10, 3.8235e-10, 4.2695e-10,\n 1.3410e-09, 3.0346e-10, 9.5226e-10, 1.9383e-09, 3.4698e-10, 3.1365e-10,\n 2.2157e-10, 9.8269e-10, 1.4742e-09, 1.3258e-09, 1.1802e-09, 1.0079e-09,\n 1.0637e-09, 6.3388e-10, 1.1132e-09, 1.5950e-09, 2.8644e-10, 3.2254e-10,\n 1.2678e-09, 3.8810e-10, 2.6197e-10, 2.6898e-10, 8.7593e-10, 3.8177e-10,\n 3.7339e-10, 3.5990e-10, 4.1785e-10, 1.3138e-09, 2.0990e-09, 2.7425e-10,\n 9.8267e-10, 7.5175e-10, 1.4293e-09, 1.0125e-09, 1.6457e-10, 1.1303e-09,\n 1.3434e-09, 8.5272e-11, 1.3809e-09, 3.7119e-10, 3.9539e-10, 3.4371e-10,\n 8.2775e-10, 1.1236e-09, 2.9967e-10, 3.5940e-10, 1.0281e-09, 1.8838e-09,\n 2.9040e-10, 3.6116e-10, 3.0977e-10, 1.2733e-09, 9.9834e-10, 2.7978e-10,\n 9.8404e-10, 1.0205e-09, 3.9020e-10, 4.1992e-10, 1.4214e-09, 7.0642e-10,\n 2.7260e-10, 7.7828e-10, 1.1607e-09, 1.3575e-09, 1.4744e-09, 1.1439e-09,\n 1.0735e-09, 9.8178e-10, 3.5648e-10, 3.1343e-10, 1.1719e-09, 7.5137e-10,\n 3.9308e-10, 4.1496e-10, 1.2423e-09, 3.3316e-10, 2.9111e-10, 1.1198e-09,\n 2.3387e-10, 7.2444e-10, 1.4853e-09, 1.2208e-09, 1.0553e-09, 3.1162e-10,\n 3.6780e-10, 8.6668e-10, 3.2868e-11, 1.6746e-09, 7.6221e-11, 3.7492e-10,\n 2.5364e-10, 1.3175e-09, 7.8326e-10, 2.8559e-10, 9.4682e-10, 4.5556e-10,\n 1.1236e-09, 9.7453e-10, 3.2948e-10, 9.0181e-10, 3.7191e-10, 9.9482e-10,\n 3.6076e-10, 2.9719e-10, 3.7915e-10, 1.3435e-09, 2.3270e-10, 4.0149e-10,\n 3.6239e-10, 1.0681e-09, 2.6222e-10, 1.0684e-09, 3.8124e-10, 1.0269e-09,\n 3.0360e-10, 3.9410e-10, 3.7670e-10, 1.5332e-09, 1.2230e-09, 3.9925e-10,\n 3.3186e-10, 3.4918e-10, 1.2199e-09, 9.4730e-10, 3.0099e-10, 2.7243e-10,\n 1.6815e-09, 1.4257e-09, 1.0808e-09, 3.8654e-10, 2.5828e-10, 6.8432e-10,\n 3.8222e-10, 2.8646e-10, 1.0156e-09, 9.4560e-10, 9.9199e-10, 4.2713e-10,\n 1.7413e-09, 1.1952e-09, 3.8969e-10, 3.0680e-10, 1.3831e-09, 9.6562e-10,\n 4.1477e-10, 9.8882e-10, 3.6076e-10, 3.4633e-10, 1.0532e-09, 3.6041e-10,\n 3.1212e-10, 3.8998e-10, 2.9335e-10, 6.8285e-10, 3.1543e-10, 1.1833e-09,\n 1.0048e-09, 1.0238e-09, 3.8729e-10, 4.0909e-10, 4.3247e-10, 1.5079e-09,\n 4.0167e-10, 2.1256e-10, 9.3984e-10, 1.3944e-09, 4.6882e-10, 1.4656e-09],\n [3.0450e-11, 3.6925e-11, 4.8576e-11, 3.7210e-11, 4.2244e-11, 1.1683e-10,\n 3.6584e-11, 1.4578e-10, 4.3213e-11, 1.6114e-10, 3.7611e-11, 1.3273e-10,\n 7.8186e-11, 4.8277e-11, 1.1892e-10, 1.1957e-10, 1.6486e-10, 1.2506e-10,\n 4.5650e-11, 3.9899e-11, 4.1148e-11, 4.5261e-11, 4.4011e-11, 4.9050e-11,\n 1.5656e-10, 3.5273e-11, 1.1232e-10, 2.2536e-10, 3.9953e-11, 3.6354e-11,\n 2.5967e-11, 1.1521e-10, 1.7244e-10, 1.5577e-10, 1.3852e-10, 1.1860e-10,\n 1.2580e-10, 7.4723e-11, 1.3044e-10, 1.8587e-10, 3.3367e-11, 3.7167e-11,\n 1.4907e-10, 4.4791e-11, 3.0277e-11, 3.1241e-11, 1.0351e-10, 4.4076e-11,\n 4.3005e-11, 4.1506e-11, 4.8128e-11, 1.5390e-10, 2.4548e-10, 3.1715e-11,\n 1.1589e-10, 8.8484e-11, 1.6679e-10, 1.1907e-10, 1.9082e-11, 1.3264e-10,\n 1.5807e-10, 1.0171e-11, 1.6118e-10, 4.2812e-11, 4.5467e-11, 3.9799e-11,\n 9.7766e-11, 1.3250e-10, 3.4789e-11, 4.1504e-11, 1.2028e-10, 2.2020e-10,\n 3.3525e-11, 4.1683e-11, 3.5814e-11, 1.4983e-10, 1.1753e-10, 3.2636e-11,\n 1.1626e-10, 1.1979e-10, 4.5127e-11, 4.8441e-11, 1.6679e-10, 8.3156e-11,\n 3.1748e-11, 9.1785e-11, 1.3570e-10, 1.5910e-10, 1.7262e-10, 1.3464e-10,\n 1.2626e-10, 1.1529e-10, 4.1120e-11, 3.6289e-11, 1.3662e-10, 8.8550e-11,\n 4.5279e-11, 4.7764e-11, 1.4566e-10, 3.8606e-11, 3.3763e-11, 1.3176e-10,\n 2.7047e-11, 8.5978e-11, 1.7357e-10, 1.4266e-10, 1.2398e-10, 3.6122e-11,\n 4.2504e-11, 1.0220e-10, 3.6597e-12, 1.9531e-10, 9.0104e-12, 4.3230e-11,\n 2.9626e-11, 1.5448e-10, 9.2662e-11, 3.3097e-11, 1.1157e-10, 5.2244e-11,\n 1.3133e-10, 1.1507e-10, 3.8226e-11, 1.0618e-10, 4.2880e-11, 1.1659e-10,\n 4.1614e-11, 3.4454e-11, 4.3708e-11, 1.5776e-10, 2.7131e-11, 4.6265e-11,\n 4.1821e-11, 1.2573e-10, 3.0373e-11, 1.2543e-10, 4.4152e-11, 1.2131e-10,\n 3.5186e-11, 4.5422e-11, 4.3460e-11, 1.7983e-10, 1.4340e-10, 4.5984e-11,\n 3.8429e-11, 4.0441e-11, 1.4357e-10, 1.1145e-10, 3.4848e-11, 3.1639e-11,\n 1.9711e-10, 1.6680e-10, 1.2735e-10, 4.4577e-11, 3.0001e-11, 8.0418e-11,\n 4.4097e-11, 3.3303e-11, 1.1903e-10, 1.1122e-10, 1.1652e-10, 4.9011e-11,\n 2.0434e-10, 1.4006e-10, 4.5039e-11, 3.5465e-11, 1.6143e-10, 1.1388e-10,\n 4.7846e-11, 1.1619e-10, 4.1547e-11, 4.0066e-11, 1.2266e-10, 4.1507e-11,\n 3.6129e-11, 4.4831e-11, 3.4001e-11, 8.0529e-11, 3.6482e-11, 1.3867e-10,\n 1.1845e-10, 1.2021e-10, 4.4636e-11, 4.7164e-11, 4.9791e-11, 1.7556e-10,\n 4.6308e-11, 2.4903e-11, 1.1048e-10, 1.6359e-10, 5.3892e-11, 1.7111e-10],\n [2.2763e-11, 2.7990e-11, 3.6965e-11, 2.7979e-11, 3.1851e-11, 8.4072e-11,\n 2.7583e-11, 1.0673e-10, 3.2871e-11, 1.1781e-10, 2.8360e-11, 9.7354e-11,\n 5.5883e-11, 3.6978e-11, 8.5617e-11, 8.6064e-11, 1.1984e-10, 9.0093e-11,\n 3.4973e-11, 3.0087e-11, 3.1130e-11, 3.4247e-11, 3.3575e-11, 3.7576e-11,\n 1.1517e-10, 2.6412e-11, 8.0775e-11, 1.6762e-10, 3.0422e-11, 2.7308e-11,\n 1.9159e-11, 8.3491e-11, 1.2619e-10, 1.1351e-10, 1.0083e-10, 8.5957e-11,\n 9.0244e-11, 5.3323e-11, 9.5366e-11, 1.3724e-10, 2.4849e-11, 2.8222e-11,\n 1.0813e-10, 3.4098e-11, 2.2836e-11, 2.3348e-11, 7.4350e-11, 3.3435e-11,\n 3.2734e-11, 3.1526e-11, 3.6690e-11, 1.1286e-10, 1.8084e-10, 2.3853e-11,\n 8.3422e-11, 6.3480e-11, 1.2323e-10, 8.6111e-11, 1.4404e-11, 9.6334e-11,\n 1.1446e-10, 7.1780e-12, 1.1871e-10, 3.2546e-11, 3.4788e-11, 2.9946e-11,\n 7.0201e-11, 9.5797e-11, 2.6067e-11, 3.1443e-11, 8.7729e-11, 1.6223e-10,\n 2.5436e-11, 3.1599e-11, 2.7019e-11, 1.0845e-10, 8.4908e-11, 2.4218e-11,\n 8.3486e-11, 8.6937e-11, 3.4171e-11, 3.6916e-11, 1.2178e-10, 5.9695e-11,\n 2.3628e-11, 6.5902e-11, 9.8908e-11, 1.1645e-10, 1.2627e-10, 9.7324e-11,\n 9.1444e-11, 8.3276e-11, 3.1211e-11, 2.7321e-11, 1.0011e-10, 6.3534e-11,\n 3.4535e-11, 3.6521e-11, 1.0630e-10, 2.9010e-11, 2.5340e-11, 9.5228e-11,\n 2.0424e-11, 6.1006e-11, 1.2748e-10, 1.0439e-10, 8.9855e-11, 2.7118e-11,\n 3.2178e-11, 7.3124e-11, 3.0707e-12, 1.4406e-10, 6.4122e-12, 3.2852e-11,\n 2.1990e-11, 1.1285e-10, 6.6371e-11, 2.4868e-11, 8.0165e-11, 4.0223e-11,\n 9.5942e-11, 8.2495e-11, 2.8681e-11, 7.6473e-11, 3.2634e-11, 8.4569e-11,\n 3.1594e-11, 2.5851e-11, 3.3248e-11, 1.1507e-10, 2.0126e-11, 3.5279e-11,\n 3.1715e-11, 9.0847e-11, 2.2830e-11, 9.1043e-11, 3.3317e-11, 8.7063e-11,\n 2.6404e-11, 3.4666e-11, 3.3007e-11, 1.3091e-10, 1.0431e-10, 3.5082e-11,\n 2.8956e-11, 3.0491e-11, 1.0424e-10, 8.0509e-11, 2.6214e-11, 2.3603e-11,\n 1.4407e-10, 1.2234e-10, 9.1962e-11, 3.3945e-11, 2.2385e-11, 5.8506e-11,\n 3.3523e-11, 2.4857e-11, 8.6303e-11, 8.0504e-11, 8.4160e-11, 3.7702e-11,\n 1.4945e-10, 1.0238e-10, 3.4127e-11, 2.6771e-11, 1.1862e-10, 8.1938e-11,\n 3.6452e-11, 8.4918e-11, 3.1622e-11, 3.0249e-11, 9.0274e-11, 3.1599e-11,\n 2.7188e-11, 3.4255e-11, 2.5529e-11, 5.7749e-11, 2.7489e-11, 1.0110e-10,\n 8.5040e-11, 8.6960e-11, 3.3995e-11, 3.5947e-11, 3.8066e-11, 1.2893e-10,\n 3.5278e-11, 1.8379e-11, 7.9802e-11, 1.1952e-10, 4.1402e-11, 1.2598e-10],\n [3.4956e-11, 4.2516e-11, 5.5783e-11, 4.2734e-11, 4.8396e-11, 1.3325e-10,\n 4.2083e-11, 1.6650e-10, 4.9768e-11, 1.8486e-10, 4.3260e-11, 1.5187e-10,\n 8.8349e-11, 5.5903e-11, 1.3491e-10, 1.3597e-10, 1.8703e-10, 1.4173e-10,\n 5.2971e-11, 4.5837e-11, 4.7419e-11, 5.1911e-11, 5.0818e-11, 5.6741e-11,\n 1.7909e-10, 4.0251e-11, 1.2720e-10, 2.5846e-10, 4.6163e-11, 4.1702e-11,\n 2.9321e-11, 1.3183e-10, 1.9702e-10, 1.7645e-10, 1.5747e-10, 1.3437e-10,\n 1.4172e-10, 8.5281e-11, 1.4845e-10, 2.1309e-10, 3.8015e-11, 4.2948e-11,\n 1.6911e-10, 5.1433e-11, 3.4896e-11, 3.5782e-11, 1.1672e-10, 5.0705e-11,\n 4.9667e-11, 4.7844e-11, 5.5500e-11, 1.7487e-10, 2.7918e-10, 3.6574e-11,\n 1.3119e-10, 1.0097e-10, 1.9037e-10, 1.3533e-10, 2.1788e-11, 1.5114e-10,\n 1.7922e-10, 1.1344e-11, 1.8432e-10, 4.9302e-11, 5.2517e-11, 4.5714e-11,\n 1.1043e-10, 1.4949e-10, 3.9809e-11, 4.7763e-11, 1.3773e-10, 2.5075e-10,\n 3.8572e-11, 4.8025e-11, 4.1225e-11, 1.6992e-10, 1.3332e-10, 3.7151e-11,\n 1.3122e-10, 1.3648e-10, 5.1742e-11, 5.5644e-11, 1.8927e-10, 9.4817e-11,\n 3.6212e-11, 1.0411e-10, 1.5575e-10, 1.8079e-10, 1.9682e-10, 1.5273e-10,\n 1.4327e-10, 1.3169e-10, 4.7401e-11, 4.1676e-11, 1.5737e-10, 1.0070e-10,\n 5.2187e-11, 5.5055e-11, 1.6572e-10, 4.4295e-11, 3.8689e-11, 1.4962e-10,\n 3.1089e-11, 9.6754e-11, 1.9832e-10, 1.6345e-10, 1.4105e-10, 4.1442e-11,\n 4.8842e-11, 1.1629e-10, 4.2789e-12, 2.2352e-10, 1.0234e-11, 4.9835e-11,\n 3.3597e-11, 1.7560e-10, 1.0441e-10, 3.7979e-11, 1.2678e-10, 6.0489e-11,\n 1.5060e-10, 1.3023e-10, 4.3772e-11, 1.2068e-10, 4.9386e-11, 1.3346e-10,\n 4.7959e-11, 3.9536e-11, 5.0374e-11, 1.7886e-10, 3.0937e-11, 5.3278e-11,\n 4.8179e-11, 1.4262e-10, 3.4887e-11, 1.4283e-10, 5.0572e-11, 1.3702e-10,\n 4.0408e-11, 5.2245e-11, 5.0044e-11, 2.0478e-10, 1.6356e-10, 5.3004e-11,\n 4.4082e-11, 4.6345e-11, 1.6235e-10, 1.2665e-10, 4.0053e-11, 3.6299e-11,\n 2.2413e-10, 1.9016e-10, 1.4415e-10, 5.1283e-11, 3.4398e-11, 9.1168e-11,\n 5.0739e-11, 3.8084e-11, 1.3628e-10, 1.2629e-10, 1.3301e-10, 5.6706e-11,\n 2.3152e-10, 1.5939e-10, 5.1703e-11, 4.0826e-11, 1.8497e-10, 1.2896e-10,\n 5.4975e-11, 1.3127e-10, 4.7994e-11, 4.6011e-11, 1.4123e-10, 4.7938e-11,\n 4.1535e-11, 5.1885e-11, 3.9024e-11, 9.1440e-11, 4.1990e-11, 1.5814e-10,\n 1.3452e-10, 1.3721e-10, 5.1432e-11, 5.4260e-11, 5.7355e-11, 2.0261e-10,\n 5.3299e-11, 2.8139e-11, 1.2585e-10, 1.8568e-10, 6.2101e-11, 1.9559e-10]],\n device='cuda:0')" + }, + "64": { + "step": "tensor(10012.)", + "exp_avg": "tensor([-2.5029e-10, 6.4875e-12, 6.7352e-11, 1.7895e-10], device='cuda:0')", + "exp_avg_sq": "tensor([4.3627e-08, 4.9574e-09, 4.0058e-09, 5.6584e-09], device='cuda:0')" + }, + "8": { + "step": "tensor(2503.)", + "exp_avg": "tensor([[ 1.2186e-16, 9.7067e-08, 9.2506e-07, ..., 2.3069e-07,\n 1.4220e-06, 1.1327e-06],\n [-6.7208e-08, -2.4891e-07, 1.4929e-06, ..., -1.3489e-06,\n 2.6980e-07, 7.4294e-07],\n [ 1.0750e-06, -5.1447e-07, 1.1431e-07, ..., 9.2025e-07,\n 8.1624e-08, 2.1337e-07],\n ...,\n [-1.1439e-07, 3.5827e-06, -5.0783e-06, ..., -4.6112e-07,\n 3.9866e-07, -4.2991e-06],\n [-1.8352e-06, -1.5650e-09, 1.7197e-07, ..., -7.5925e-06,\n 1.4064e-05, -4.9675e-06],\n [ 1.3651e-07, -1.8425e-07, 8.3030e-06, ..., 2.8102e-06,\n -2.1438e-06, 7.7433e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.7118e-12, 8.7255e-11, 2.3504e-10, ..., 3.6862e-11, 2.0880e-09,\n 4.5414e-10],\n [3.1182e-11, 1.9139e-11, 9.7417e-10, ..., 2.0720e-10, 6.9521e-10,\n 1.7392e-11],\n [5.2649e-10, 2.5893e-10, 3.3636e-10, ..., 1.6667e-10, 2.5750e-09,\n 1.0076e-10],\n ...,\n [7.1870e-10, 3.6720e-10, 4.4103e-09, ..., 2.3382e-09, 7.1606e-10,\n 1.5638e-09],\n [6.6837e-11, 2.2720e-12, 3.6812e-10, ..., 2.4747e-09, 4.4584e-09,\n 2.9345e-08],\n [1.7343e-10, 1.8967e-10, 2.2053e-09, ..., 6.3565e-10, 3.6666e-09,\n 3.5025e-10]], device='cuda:0')" + }, + "9": { + "step": "tensor(2503.)", + "exp_avg": "tensor([ 3.5794e-05, 3.9986e-05, 1.9286e-05, ..., 5.8102e-05,\n 7.3984e-05, -1.1346e-05], device='cuda:0')", + "exp_avg_sq": "tensor([2.9105e-08, 8.3767e-09, 4.6686e-08, ..., 8.2870e-08, 8.4512e-08,\n 7.9751e-08], device='cuda:0')" + }, + "10": { + "step": "tensor(2503.)", + "exp_avg": "tensor([[-9.1094e-07, -6.5322e-07, 2.6178e-06, ..., 2.8344e-06,\n -6.8462e-07, -1.1183e-07],\n [ 2.8794e-06, 1.4678e-07, -8.9782e-07, ..., 3.4828e-06,\n 4.2018e-06, -1.5007e-06],\n [ 6.6797e-07, -1.8484e-06, -2.3517e-06, ..., -7.8073e-06,\n 3.4013e-06, 2.7175e-06],\n ...,\n [ 7.8450e-07, 8.3828e-07, -5.0319e-06, ..., 1.1731e-06,\n 9.6889e-07, 6.3053e-07],\n [ 3.8873e-06, 2.5216e-06, -5.6357e-07, ..., 1.1990e-06,\n -1.3994e-06, 2.1578e-06],\n [ 5.1735e-06, 9.9764e-07, -1.5487e-07, ..., 3.8950e-07,\n -1.1941e-06, 6.8416e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.9459e-10, 3.2179e-11, 1.0185e-10, ..., 1.6207e-10, 2.2539e-10,\n 2.9037e-10],\n [3.1172e-10, 5.1435e-11, 1.3800e-10, ..., 2.4739e-10, 2.3552e-10,\n 2.6111e-10],\n [3.3131e-10, 1.0146e-10, 1.4110e-10, ..., 3.9303e-10, 3.9442e-10,\n 4.7648e-10],\n ...,\n [1.3462e-10, 6.3975e-11, 1.4747e-10, ..., 4.1815e-10, 3.0700e-10,\n 4.0808e-10],\n [5.2267e-10, 1.1614e-10, 1.4072e-10, ..., 5.9018e-10, 3.5383e-10,\n 4.7948e-10],\n [9.8473e-10, 7.3291e-11, 8.4069e-11, ..., 2.8401e-10, 2.8197e-10,\n 5.5267e-10]], device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 0.0034555695366224513, + "name": "shared", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 0, + 1 + ] + }, + { + "lr": 0.0034555695366224513, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 2, + 3, + 4 + ] + }, + { + "lr": 0.0034555695366224513, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 5, + 6, + 7 + ] + }, + { + "lr": 0.0034555695366224513, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 8, + 9, + 10 + ] + }, + { + "lr": 0.0034555695366224513, + "name": "scale_1024", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 11, + 12, + 13 + ] + }, + { + "lr": 0.001728112022559819, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.005, + "params": [ + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 10, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 6, + "base_lrs": [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.005 + ], + "last_epoch": 6, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.0034555695366224513, + 0.0034555695366224513, + 0.0034555695366224513, + 0.0034555695366224513, + 0.0034555695366224513, + 0.001728112022559819 + ] + }, + "metrics": { + "best_val_acc": 73.041, + "best_epoch": 5, + "scale_accuracies": { + "256": 71.228, + "512": 72.954, + "768": 72.21 + }, + "training_history": { + "epochs": [ + 1, + 2, + 3, + 4, + 5, + 6 + ], + "train_loss": [ + 3.4310503170769358, + 2.1717353230040883, + 2.280789690497776, + 1.9685389901274355, + 1.842365700580385, + 2.1424214597039826 + ], + "train_acc": [ + 54.52540535308824, + 69.02296890257085, + 72.83585980594255, + 75.20327170462555, + 76.72934129586541, + 77.98713984984003 + ], + "val_acc": [ + 66.689, + 68.889, + 71.268, + 72.266, + 72.764, + 73.041 + ], + "scale_accs": { + "256": [ + 66.689, + 68.889, + 70.197, + 70.459, + 70.902, + 71.228 + ], + "512": [ + 70.734, + 72.095, + 72.613, + 72.954 + ], + "768": [ + 72.21 + ] + }, + "lr": [ + 0.00975530705321762, + 0.00904518046337755, + 0.00793913236883622, + 0.00654543046337755, + 0.005000500000000001, + 0.0034555695366224513 + ] + } + }, + "train_config": { + "name": "david_training", + "run_id": "20251012_191456", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": [ + "clip_vit_b32", + "clip_vit_laion_b32" + ], + "num_classes": 1000, + "preset": "balanced", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": true, + "scale_warmup_epochs_override": { + "256": 0, + "512": 2, + "768": 5, + "1024": 8 + }, + "num_epochs": 10, + "batch_size": 1024, + "learning_rate": 0.01, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.2, + "rose_max_weight": 0.8, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": true, + "cayley_weight": 0.01, + "scale_loss_balance": null, + "use_mixed_precision": false, + "gradient_clip": 10.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "never", + "freeze_threshold": 90.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-05, + "gradient_scale_multiplier": 10.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/david-shared-space", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file