diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,36 +1,196 @@ { - "epoch": 0, + "epoch": 1, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[-2.4113e-05, 1.2303e-05, -3.6856e-05, ..., -1.2292e-06,\n -1.0857e-06, 1.0312e-05],\n [ 4.2458e-05, -1.4308e-05, 1.6978e-05, ..., -7.4049e-06,\n -4.0597e-06, -4.5033e-05],\n [ 8.8562e-06, -1.5668e-05, 2.6869e-05, ..., 1.8806e-05,\n 2.2478e-06, -5.1486e-05],\n ...,\n [-1.1202e-05, 2.5152e-05, 1.9325e-05, ..., 1.9150e-05,\n -2.7548e-05, -1.8088e-06],\n [-1.9628e-29, -3.3782e-29, 1.6203e-28, ..., -1.5242e-28,\n 8.8350e-30, -3.3881e-29],\n [ 5.7273e-05, 2.4279e-05, 8.4247e-06, ..., 2.0174e-05,\n -3.2522e-05, -4.0931e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.1983e-08, 1.2085e-08, 6.4733e-09, ..., 9.7722e-09, 7.3066e-09,\n 6.2270e-09],\n [1.2734e-08, 1.1464e-08, 6.8485e-09, ..., 6.7487e-09, 8.0445e-09,\n 6.1970e-09],\n [1.8862e-08, 1.8788e-08, 2.0858e-08, ..., 1.2985e-08, 1.1754e-08,\n 1.0339e-08],\n ...,\n [1.6817e-08, 1.9303e-08, 2.2495e-08, ..., 1.1944e-08, 1.0047e-08,\n 1.0593e-08],\n [4.7075e-11, 2.4558e-10, 7.4629e-11, ..., 4.3751e-13, 2.3549e-10,\n 1.3877e-12],\n [1.4044e-08, 1.3762e-08, 1.4686e-08, ..., 1.0629e-08, 8.7803e-09,\n 8.0974e-09]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[-6.5487e-06, 2.5376e-05, -1.5614e-05, ..., 2.9876e-05,\n -3.7149e-05, 2.6294e-05],\n [ 1.2961e-05, -1.9146e-06, 3.7802e-05, ..., 3.0303e-05,\n 4.4067e-05, -7.5155e-06],\n [-3.5080e-05, -1.4147e-05, -2.7791e-06, ..., 2.4522e-05,\n 2.8205e-05, 1.9891e-05],\n ...,\n [ 3.4402e-06, -2.0476e-05, 4.0793e-05, ..., 2.4010e-05,\n -3.1729e-05, 7.9134e-06],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-4.1767e-05, -7.7922e-05, 5.1855e-05, ..., -2.1684e-05,\n -2.3477e-05, -6.0733e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3577e-08, 1.1693e-08, 6.7396e-09, ..., 8.5626e-09, 7.6407e-09,\n 6.8233e-09],\n [1.0395e-08, 1.0609e-08, 6.7780e-09, ..., 6.2896e-09, 7.6135e-09,\n 5.3988e-09],\n [1.6860e-08, 1.5571e-08, 1.8368e-08, ..., 1.1968e-08, 9.9803e-09,\n 8.9247e-09],\n ...,\n [1.5494e-08, 1.7835e-08, 2.1564e-08, ..., 1.2079e-08, 1.0224e-08,\n 1.0152e-08],\n [1.3452e-11, 7.0176e-11, 2.1326e-11, ..., 1.2502e-13, 6.7292e-11,\n 3.9654e-13],\n [1.2289e-08, 1.3047e-08, 1.4017e-08, ..., 1.0277e-08, 7.2716e-09,\n 6.7571e-09]], device='cuda:0')" }, "1": { - "step": "tensor(1252.)", - "exp_avg": "tensor([-1.4070e-03, 1.3628e-03, -4.4851e-04, ..., -1.1237e-04,\n 3.4341e-27, 1.0627e-03], device='cuda:0')", - "exp_avg_sq": "tensor([1.6147e-05, 1.5322e-05, 3.3518e-05, ..., 3.1431e-05, 1.5939e-06,\n 2.5554e-05], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([ 7.0269e-04, -4.3829e-04, 6.4416e-04, ..., 4.0348e-04,\n 5.6052e-45, 1.4129e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.7100e-05, 1.4015e-05, 2.7751e-05, ..., 2.6989e-05, 4.5548e-07,\n 2.1548e-05], device='cuda:0')" }, "2": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[-2.8681e-05, -1.1480e-06, 2.1258e-06, ..., -9.8516e-05,\n 5.6052e-45, -6.6567e-06],\n [-3.1674e-05, -4.3587e-06, -2.9065e-05, ..., 2.7090e-06,\n -5.6052e-45, -5.9087e-06],\n [ 4.0137e-06, 9.8352e-06, 1.6296e-05, ..., 2.3547e-06,\n -5.6052e-45, -8.3019e-05],\n ...,\n [ 4.1613e-21, -5.6052e-45, 5.6052e-45, ..., 6.7035e-36,\n 5.6052e-45, 1.8286e-19],\n [ 1.6744e-05, 7.4089e-06, -9.6605e-08, ..., -3.2625e-07,\n -5.6052e-45, -1.4310e-06],\n [-3.2172e-06, 9.0451e-06, 3.7791e-05, ..., -7.6358e-06,\n 5.6052e-45, -1.6760e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.1020e-08, 7.6641e-10, 2.6561e-09, ..., 4.8833e-08, 3.7532e-11,\n 1.3521e-08],\n [1.0195e-08, 1.9460e-09, 1.0396e-08, ..., 4.8112e-09, 2.0824e-09,\n 1.1113e-08],\n [3.8271e-08, 5.8692e-09, 6.2402e-09, ..., 4.1616e-09, 1.4351e-08,\n 1.0014e-08],\n ...,\n [1.3482e-09, 5.6214e-11, 4.2657e-12, ..., 5.3734e-11, 1.4411e-09,\n 2.3426e-11],\n [6.5829e-10, 9.2445e-11, 1.0454e-09, ..., 7.6076e-10, 2.9071e-12,\n 3.9806e-10],\n [9.3767e-09, 1.8638e-08, 6.9640e-08, ..., 6.8160e-09, 2.5756e-10,\n 2.5711e-08]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 8.5754e-07, -1.2882e-06, 1.7601e-05, ..., 7.4367e-05,\n 5.6052e-45, 2.1378e-05],\n [ 1.2624e-05, -2.4106e-05, -4.0388e-05, ..., 2.1485e-07,\n -5.6052e-45, -3.7897e-06],\n [-1.4413e-06, -1.7281e-05, 4.5049e-06, ..., 7.7915e-07,\n -5.6052e-45, -1.8508e-05],\n ...,\n [ 5.6052e-45, 3.0647e-20, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 2.4805e-06, -9.2363e-10, 7.3470e-07, ..., 1.4883e-05,\n -5.6052e-45, 4.3061e-07],\n [-3.4704e-06, -2.2494e-05, 8.1961e-05, ..., -1.8339e-06,\n 5.6052e-45, 3.5895e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6828e-08, 6.3362e-10, 4.2841e-09, ..., 2.7095e-08, 1.0725e-11,\n 6.9180e-09],\n [7.5494e-09, 2.3728e-09, 4.8358e-09, ..., 1.8989e-09, 5.9508e-10,\n 8.8431e-09],\n [1.2735e-08, 3.1009e-09, 4.3913e-09, ..., 3.6356e-09, 4.1009e-09,\n 6.4680e-09],\n ...,\n [3.8527e-10, 1.6064e-11, 1.2190e-12, ..., 1.5355e-11, 4.1181e-10,\n 6.6941e-12],\n [1.5946e-09, 5.7076e-11, 4.2836e-10, ..., 4.9555e-10, 8.3073e-13,\n 2.7276e-10],\n [9.3836e-09, 8.1502e-09, 4.3388e-08, ..., 5.0469e-09, 7.3599e-11,\n 1.2771e-08]], device='cuda:0')" }, "3": { - "step": "tensor(1252.)", - "exp_avg": "tensor([-3.8596e-04, -6.4597e-05, -3.1119e-04, -1.1543e-05, -2.9842e-09,\n 3.3450e-05, -1.0319e-03, 1.6386e-04, -6.4902e-04, -1.0547e-04,\n -3.4165e-04, 3.0180e-04, 4.6264e-04, 2.6911e-04, 5.6052e-45,\n -4.9884e-04, 2.6123e-10, -1.0737e-03, 2.7171e-12, -2.5432e-04,\n 4.0728e-10, 6.6772e-04, -1.1303e-04, -9.2099e-04, -5.1912e-04,\n -7.6469e-04, -8.3268e-04, -4.9024e-06, -1.1608e-04, -2.5133e-04,\n 4.8361e-04, -1.0219e-04, 1.8264e-04, -2.1795e-04, 4.2666e-05,\n 2.1247e-04, -3.1320e-04, -1.0890e-03, -1.4995e-03, 6.7879e-05,\n -6.2122e-04, 1.2857e-03, 8.2162e-04, 5.6052e-45, -2.6280e-04,\n -3.1861e-04, 5.6052e-45, 1.8476e-23, -4.1875e-04, 7.9739e-04,\n 1.2772e-04, 6.9094e-21, 7.6941e-04, -7.9875e-04, 3.4710e-04,\n 1.1583e-03, -1.2151e-04, -1.7504e-04, 3.3865e-04, -9.5325e-04,\n -3.3759e-04, 4.0985e-04, -4.6070e-04, -1.1303e-04, 5.4466e-04,\n 3.1754e-08, 1.6964e-04, 6.2881e-05, 2.2999e-04, -7.2908e-05,\n 6.6118e-05, 5.6052e-45, -2.1207e-04, -6.6345e-04, 2.7809e-11,\n 3.8840e-04, -2.7600e-04, 3.9010e-04, -1.5638e-05, -6.9559e-05,\n 9.4896e-11, -3.7856e-04, 6.5805e-05, -3.9131e-04, -1.7825e-04,\n -9.3003e-25, 1.9772e-04, -7.7822e-05, -5.3698e-05, 7.8196e-04,\n 1.3789e-04, 3.6065e-04, 4.4292e-04, -1.1434e-03, 5.1657e-09,\n -6.4051e-04, 2.8768e-04, -1.6908e-04, 6.0949e-05, -8.4681e-05,\n -5.6195e-05, -3.0043e-04, -1.9556e-05, 9.2461e-05, 1.6296e-04,\n -1.9154e-04, -5.8774e-05, -4.4841e-05, -6.3093e-05, -6.3617e-05,\n 5.6052e-45, 1.7717e-04, 1.9154e-04, 2.5361e-04, -2.2194e-04,\n 5.1403e-04, 7.9414e-05, -3.1656e-04, 1.8836e-04, -1.4659e-03,\n -6.4513e-04, 2.8946e-04, 7.8575e-04, -5.0618e-04, 4.8099e-04,\n 1.6465e-05, -3.8902e-04, -1.6629e-03, -1.5244e-04, -5.7997e-18,\n 5.6052e-45, -1.6772e-11, 1.0638e-03, 2.8985e-04, 1.3594e-04,\n -8.5026e-04, -5.5744e-04, -2.6582e-04, 1.3949e-04, -1.5192e-04,\n 2.2805e-04, -6.2003e-04, 4.9995e-20, 4.5681e-04, 1.2257e-03,\n 6.7579e-04, -2.8456e-04, 6.0929e-04, 1.4420e-04, 1.0458e-04,\n 5.6052e-45, -7.2052e-05, 2.4600e-13, 1.6104e-03, -5.8285e-04,\n 3.2160e-19, -1.9803e-04, -5.5236e-04, -2.1408e-04, 2.5637e-05,\n 3.7593e-19, -2.5148e-04, 1.2890e-04, 7.4731e-05, 6.8647e-05,\n 1.5939e-04, -1.3659e-04, -8.2627e-04, -6.5673e-04, -1.6686e-03,\n 6.0147e-05, 1.3706e-37, 5.6052e-45, -1.6358e-04, -6.1835e-04,\n -1.0902e-04, 2.2299e-04, 2.6664e-04, 1.3074e-04, 1.3940e-04,\n 7.8295e-05, -3.3395e-05, 2.7054e-06, -9.1530e-05, 2.7302e-11,\n 1.0139e-04, -5.8781e-05, 3.2014e-04, 2.4959e-04, -3.4389e-04,\n -4.5809e-04, -6.7405e-05, -6.7336e-05, -4.6532e-05, -4.9097e-04,\n -3.7814e-04, -3.4563e-04, 1.8672e-04, 5.6052e-45, -8.8233e-04,\n -1.0234e-03, -6.6298e-04, 2.8351e-04, 1.2371e-03, 5.1655e-05,\n -4.5863e-04, 5.6052e-45, -4.4614e-04, 1.0399e-17, 2.9844e-26,\n -4.8786e-04, -1.0409e-05, 5.6052e-45, 5.3506e-04, -1.8260e-04,\n 1.5808e-04, -3.4635e-04, -3.7648e-05, 1.0781e-03, -3.6383e-04,\n 1.2232e-06, 1.3399e-41, -6.1097e-04, -1.7850e-04, -1.3899e-04,\n 9.6908e-04, -1.8474e-04, 4.3856e-05, 4.8608e-04, 1.8409e-16,\n 2.8570e-04, 7.5947e-04, 1.3514e-04, -2.9787e-04, 7.9737e-04,\n 3.0791e-22, -4.3338e-04, 7.7948e-05, 1.1572e-03, -2.9663e-05,\n 2.7767e-04, 9.1880e-05, 6.9060e-12, -3.6414e-04, 7.3335e-04,\n 7.9991e-11, 1.3920e-04, -8.7617e-04, 6.7773e-05, -2.6058e-04,\n -3.7250e-04, 2.0789e-03, -9.8050e-05, -5.8542e-05, 5.8085e-04,\n 1.1043e-03, 7.4914e-04, -1.8742e-04, -3.3012e-04, -4.3293e-05,\n -1.4991e-03, -5.6566e-04, 1.3569e-04, -5.8456e-05, -3.1165e-11,\n 1.5312e-05, -3.3249e-04, 2.6319e-04, 1.7416e-04, -9.4923e-04,\n 7.6114e-04, -2.7845e-04, 1.2567e-04, -2.2400e-04, -4.8569e-05,\n -4.9704e-04, 1.4508e-11, -9.0531e-04, -8.6441e-05, 3.2276e-04,\n -1.2490e-04, -3.2848e-05, -2.8441e-04, 1.0020e-04, -9.2220e-04,\n 1.9025e-04, 5.6052e-45, -4.7272e-04, 2.2725e-04, -3.1091e-04,\n 5.7819e-04, 2.2189e-04, -1.0462e-03, 5.7439e-23, 3.6965e-11,\n 2.1151e-04, 2.4987e-04, -4.0784e-04, -5.2871e-04, -4.0987e-04,\n -1.6069e-04, 1.9486e-36, 9.6779e-04, 1.1285e-04, 6.6631e-05,\n -6.2609e-05, 1.5715e-06, 6.1327e-05, 4.5623e-04, -1.6076e-04,\n 2.1346e-04, -1.2745e-04, 2.2109e-34, 1.0750e-05, -3.6977e-04,\n 5.0222e-04, -1.2614e-05, 5.3735e-04, 3.8796e-18, 1.4643e-03,\n -1.9504e-04, -1.6178e-04, 1.5984e-04, -1.9731e-04, 5.6052e-45,\n 7.0703e-04, 3.2938e-04, -1.3328e-04, 4.6027e-04, 9.3428e-06,\n 2.7459e-04, 1.3209e-04, -4.2327e-04, 5.5632e-05, 9.4125e-05,\n 6.8775e-04, -5.2552e-04, -6.0187e-05, 5.6052e-45, -6.6453e-04,\n -2.1172e-04, 6.2049e-04, -7.3990e-05, 5.6052e-45, -4.1256e-07,\n -1.3968e-04, -6.0209e-04, 2.7984e-15, 1.1219e-04, 6.8601e-05,\n 2.5446e-04, 4.7863e-04, 7.2318e-05, -1.3480e-04, 2.8360e-04,\n 3.0080e-04, 1.6453e-22, -1.1584e-03, 9.4438e-04, -2.7145e-04,\n -1.3745e-04, 4.8567e-10, -7.0086e-04, 3.3479e-04, -2.7512e-04,\n 1.4524e-04, 5.6052e-45, 2.9174e-04, -1.3285e-03, 2.1839e-04,\n -2.6123e-04, -7.4107e-04, 4.6734e-05, 1.5150e-04, 1.4288e-33,\n 2.3867e-04, -8.5430e-04, 9.0980e-04, -1.1187e-18, 5.6053e-05,\n 5.6052e-45, 3.2515e-05, -4.9052e-04, 1.2196e-03, -7.8002e-05,\n 7.5157e-04, 3.9110e-04, 3.6360e-04, 1.1652e-04, 1.9398e-04,\n -6.5035e-04, 8.4460e-05, -5.6704e-04, -4.1735e-04, 8.8027e-31,\n -1.6961e-04, 6.2388e-05, 1.1136e-03, 5.0222e-19, -9.9475e-04,\n -4.3400e-06, 3.4889e-20, 7.6732e-04, 5.4371e-04, -9.4237e-04,\n 1.0224e-04, -1.5445e-04, 9.1598e-04, -3.4173e-04, -6.7797e-04,\n -1.5145e-03, -1.0239e-03, -8.1216e-04, 2.0897e-03, -8.0373e-05,\n -3.6215e-04, -3.0187e-04, 2.6271e-16, -8.5389e-05, -3.4533e-05,\n 4.1580e-13, 6.9543e-05, 5.7893e-04, 3.7912e-24, -7.6275e-04,\n 6.7632e-05, 2.7817e-04, 6.0587e-04, 3.2767e-04, 3.3246e-04,\n 4.3393e-04, 4.6700e-04, 4.2283e-04, -1.3321e-04, 2.2952e-17,\n -7.0638e-04, 1.1565e-04, 2.6401e-04, 2.7073e-34, 1.2617e-17,\n 2.1887e-05, -3.9295e-04, -1.9401e-04, -3.8431e-05, 7.1870e-04,\n -1.0271e-05, 5.6052e-45, 2.6096e-08, 8.9449e-10, -1.7706e-04,\n -4.8827e-04, -4.8844e-05, 6.1727e-05, 3.7427e-04, -8.3248e-04,\n 2.0339e-05, 1.8269e-07, 1.1896e-04, 5.6052e-45, -7.6055e-05,\n 2.2450e-04, 7.0869e-33, 1.7050e-04, 3.2938e-04, 8.2562e-04,\n -8.8984e-05, -4.0977e-04, 7.5647e-05, 7.6808e-06, 4.6035e-05,\n -2.5070e-04, 9.9478e-05, 8.6605e-04, 2.6146e-04, 8.7703e-05,\n 7.6940e-04, -7.0412e-04, 2.2187e-05, 1.3727e-10, 1.8023e-16,\n -6.9446e-05, -2.7991e-04, -1.8234e-04, -2.0222e-04, 3.7331e-04,\n -2.6971e-05, 1.1342e-05, -3.8569e-04, -8.8542e-04, -1.6218e-04,\n -8.6032e-04, 2.8483e-04, 4.5454e-04, -1.7828e-04, 6.2557e-04,\n 5.6052e-45, 2.4050e-04, -5.7822e-04, -2.7402e-04, -4.9296e-04,\n 8.8966e-26, -2.5190e-04, -2.7119e-06, 8.8683e-04, -3.2232e-04,\n 5.0496e-04, 2.0341e-25, -2.5610e-04, 1.2696e-20, 6.6937e-13,\n -2.0059e-04, 1.1103e-03], device='cuda:0')", - "exp_avg_sq": "tensor([1.1646e-05, 2.3670e-05, 2.5345e-05, 2.9567e-06, 2.1849e-05, 9.2360e-06,\n 2.1594e-06, 1.0790e-05, 1.6643e-05, 1.2774e-06, 2.1523e-05, 2.4080e-06,\n 9.7363e-06, 2.2485e-06, 1.6728e-16, 4.7788e-06, 3.9243e-05, 1.2890e-05,\n 2.6499e-06, 1.1512e-05, 4.7224e-07, 3.7279e-06, 1.9723e-05, 1.3217e-06,\n 2.4748e-06, 4.8854e-06, 9.7763e-06, 4.4047e-06, 4.5954e-07, 3.6725e-07,\n 1.8493e-05, 1.5636e-05, 1.1924e-05, 1.2911e-05, 1.6774e-06, 1.5390e-06,\n 4.6095e-06, 8.3565e-06, 2.5134e-05, 4.3719e-07, 7.7316e-06, 1.2557e-05,\n 1.6841e-05, 4.0266e-09, 2.1064e-05, 1.0284e-05, 7.0078e-05, 1.3973e-10,\n 5.6055e-06, 1.7833e-06, 5.3621e-06, 8.2307e-06, 3.5681e-06, 1.6176e-06,\n 4.5939e-06, 6.0652e-06, 4.3086e-06, 1.9387e-06, 2.2311e-06, 6.0973e-06,\n 4.8846e-06, 2.8962e-06, 4.2966e-06, 1.1502e-05, 5.1295e-06, 1.5926e-08,\n 1.2725e-05, 2.0638e-05, 3.7060e-06, 2.5318e-06, 6.0404e-06, 3.2327e-13,\n 4.3261e-06, 8.1095e-06, 6.3835e-08, 3.1915e-06, 3.3158e-06, 1.0501e-05,\n 6.4769e-06, 4.0831e-06, 3.6508e-06, 2.9187e-05, 3.5377e-06, 2.3106e-06,\n 6.8940e-07, 2.3701e-08, 2.7735e-06, 1.4114e-05, 6.4590e-07, 7.3511e-06,\n 5.9758e-06, 1.6247e-05, 2.0322e-05, 3.4890e-05, 1.0848e-06, 8.9366e-06,\n 6.3200e-06, 1.2089e-05, 1.9465e-05, 8.2843e-06, 3.7706e-06, 6.7491e-06,\n 4.1074e-06, 6.1613e-05, 2.0298e-06, 1.4419e-05, 4.7834e-06, 6.3506e-06,\n 9.0665e-06, 5.2078e-07, 4.9172e-10, 1.6951e-06, 1.6333e-07, 1.6519e-05,\n 1.1877e-05, 6.9821e-06, 2.8651e-06, 1.1825e-05, 3.2137e-06, 3.4336e-06,\n 5.4023e-06, 4.1989e-06, 6.4675e-06, 7.2818e-06, 6.3798e-06, 8.1663e-06,\n 2.2698e-06, 1.7991e-05, 2.3775e-06, 7.2642e-06, 6.6290e-09, 3.2273e-06,\n 3.3132e-06, 2.5987e-06, 5.1199e-06, 1.2651e-06, 1.4008e-05, 4.4884e-06,\n 1.1257e-05, 5.6817e-06, 1.2546e-05, 4.1528e-06, 1.5789e-06, 1.1687e-05,\n 1.5523e-05, 8.0060e-06, 1.6724e-05, 3.8743e-06, 9.9914e-07, 1.9877e-05,\n 1.3746e-05, 3.3975e-06, 1.4818e-05, 2.1632e-05, 3.9806e-06, 1.5638e-07,\n 1.1264e-06, 8.1645e-06, 4.8785e-06, 1.4422e-06, 2.3454e-06, 6.0834e-06,\n 1.9498e-06, 2.2906e-05, 4.6408e-06, 1.4572e-06, 5.0302e-06, 7.3065e-06,\n 5.5400e-06, 1.0866e-06, 1.6480e-06, 1.3108e-05, 1.5676e-06, 2.5121e-05,\n 4.2748e-06, 2.1308e-06, 8.2404e-06, 5.0418e-06, 1.5460e-06, 3.1646e-09,\n 3.2020e-06, 8.9805e-07, 1.2296e-05, 6.5087e-06, 6.9858e-07, 6.8873e-05,\n 1.4505e-06, 2.5442e-06, 1.6775e-05, 5.5311e-06, 3.0022e-05, 3.7115e-06,\n 1.1178e-05, 1.3421e-05, 1.5606e-05, 1.6320e-05, 8.1556e-06, 7.4544e-07,\n 2.4889e-14, 1.2518e-05, 3.7945e-06, 1.9490e-06, 2.3677e-06, 3.7896e-06,\n 1.3652e-05, 1.9451e-06, 3.0783e-05, 2.9028e-06, 7.5173e-07, 1.1916e-09,\n 9.7641e-06, 4.5169e-05, 5.0837e-05, 1.0936e-05, 1.5154e-05, 1.1459e-05,\n 4.5182e-06, 3.4192e-07, 7.9512e-06, 1.7833e-05, 2.9462e-06, 1.4721e-05,\n 6.4637e-06, 9.8000e-06, 9.7995e-06, 9.0646e-06, 7.1000e-06, 8.4800e-05,\n 6.9360e-06, 1.2830e-07, 3.9159e-06, 1.0761e-05, 9.3916e-06, 8.6678e-06,\n 4.1430e-06, 7.1366e-08, 8.3691e-06, 1.0147e-05, 6.8403e-06, 6.2446e-05,\n 4.3131e-06, 3.4965e-06, 1.8788e-06, 4.1807e-06, 1.6722e-06, 5.1601e-06,\n 1.6592e-06, 8.2866e-06, 9.7212e-06, 1.6950e-05, 9.6184e-06, 5.6595e-06,\n 5.6454e-06, 1.1820e-05, 2.6152e-06, 3.3204e-06, 2.0192e-06, 5.8559e-06,\n 5.1866e-06, 4.9788e-08, 1.7735e-06, 1.2741e-06, 5.9167e-06, 2.4006e-06,\n 2.8488e-08, 1.3118e-06, 2.4686e-05, 3.1495e-06, 1.0344e-05, 2.3382e-06,\n 4.0239e-06, 4.9698e-06, 1.1709e-06, 6.7946e-06, 2.2513e-07, 5.0791e-06,\n 1.8953e-05, 2.2942e-05, 2.4257e-06, 4.1847e-06, 2.4200e-06, 2.5326e-05,\n 7.3968e-06, 1.1775e-06, 1.3996e-05, 9.4291e-06, 2.1462e-06, 3.4174e-06,\n 2.5572e-06, 5.0429e-06, 2.0231e-05, 1.7184e-05, 2.2228e-05, 1.5027e-09,\n 9.7378e-06, 3.6731e-06, 8.8822e-06, 4.9464e-06, 3.2134e-06, 2.4772e-06,\n 8.0677e-09, 4.9477e-06, 7.3994e-06, 1.1766e-05, 2.0929e-05, 1.3230e-06,\n 3.3605e-06, 1.3452e-05, 5.0210e-06, 5.7463e-06, 4.2926e-06, 1.3303e-06,\n 1.2176e-05, 4.0081e-06, 1.2465e-05, 4.5045e-06, 1.0319e-06, 1.5232e-05,\n 2.3638e-05, 3.5644e-06, 1.8947e-05, 4.6043e-06, 1.8755e-05, 6.6672e-06,\n 8.9940e-07, 5.0290e-06, 9.9077e-06, 1.3982e-06, 8.1250e-06, 1.2093e-05,\n 1.0513e-05, 7.9980e-06, 1.2526e-05, 7.2681e-07, 3.7290e-06, 1.0661e-05,\n 6.0400e-06, 4.4480e-07, 6.6358e-16, 2.5566e-06, 1.5287e-05, 2.7499e-06,\n 2.8433e-05, 4.2938e-09, 1.5694e-05, 1.0354e-04, 3.3062e-06, 7.9529e-06,\n 1.2575e-06, 8.3170e-06, 3.4237e-06, 4.6308e-05, 3.8885e-06, 4.2888e-06,\n 1.6991e-05, 1.6232e-06, 2.8749e-06, 2.6993e-06, 4.7000e-06, 3.7375e-06,\n 6.7249e-06, 1.3522e-06, 1.2478e-05, 1.5731e-05, 4.9216e-06, 2.1875e-05,\n 2.2132e-09, 3.1859e-06, 5.4861e-07, 8.9420e-06, 5.2632e-06, 6.4448e-06,\n 2.6199e-06, 2.1502e-05, 6.1537e-08, 1.9619e-06, 5.3318e-06, 7.4461e-06,\n 5.4682e-06, 2.2604e-06, 8.1614e-07, 9.0555e-06, 9.4979e-06, 7.0575e-06,\n 1.7312e-06, 1.4187e-05, 5.4295e-06, 3.3405e-08, 1.0664e-05, 1.6029e-05,\n 8.1528e-06, 1.2573e-05, 2.7667e-05, 5.8580e-06, 2.0648e-05, 1.1382e-06,\n 8.0173e-07, 1.2610e-05, 1.3464e-11, 1.3931e-05, 2.2668e-06, 2.5045e-05,\n 9.7316e-06, 7.5790e-06, 1.5621e-05, 4.0114e-06, 6.9230e-06, 1.7080e-05,\n 4.7806e-06, 4.4751e-06, 1.8880e-05, 6.0009e-06, 3.5888e-05, 1.2507e-05,\n 8.5674e-06, 2.8642e-06, 2.5838e-05, 8.2105e-08, 2.7860e-06, 1.3863e-06,\n 3.5043e-05, 5.4335e-06, 2.9210e-06, 2.0725e-05, 1.2616e-05, 8.1148e-06,\n 1.6400e-06, 1.3607e-05, 1.3703e-06, 2.3122e-05, 1.7073e-05, 1.2227e-05,\n 1.6400e-06, 3.0134e-06, 7.9776e-06, 6.8222e-07, 3.5786e-06, 5.8251e-07,\n 2.5846e-05, 2.5413e-05, 6.5450e-06, 4.3046e-06, 7.9441e-07, 1.0544e-05,\n 1.0931e-05, 8.8036e-06, 3.1552e-11, 1.4390e-06, 3.4667e-06, 1.0020e-05,\n 5.2528e-06, 2.2529e-06, 3.1902e-06, 4.8070e-06, 1.1293e-05, 4.7041e-06,\n 3.4954e-06, 1.7057e-06, 5.2908e-07, 1.6021e-05, 4.0355e-06, 5.2321e-06,\n 1.0913e-07, 1.4736e-05, 1.1513e-05, 2.2386e-06, 1.4200e-05, 1.2978e-05,\n 3.6804e-07, 2.6959e-05, 2.0502e-05, 1.6718e-05, 1.5417e-05, 8.8681e-06,\n 1.0105e-06, 3.3234e-06, 5.7375e-06, 5.1494e-06, 8.6707e-08, 3.1666e-08,\n 7.6932e-06, 2.0583e-06, 3.2685e-06, 3.4579e-06, 4.9262e-06, 8.8453e-06,\n 2.8854e-07, 5.0771e-06, 5.9804e-06, 1.5919e-05, 2.2758e-05, 8.1069e-06,\n 3.4503e-06, 1.1633e-05, 1.7483e-06, 8.6523e-16, 3.1836e-06, 1.5804e-05,\n 5.7599e-06, 9.5871e-06, 5.1298e-06, 1.1172e-05, 2.9296e-06, 9.4495e-06,\n 1.3305e-06, 8.8723e-05, 2.4623e-08, 1.4337e-05, 9.8190e-06, 1.5151e-06,\n 3.2962e-07, 7.5590e-06], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([ 4.8291e-04, -2.5564e-04, -4.3181e-04, 2.1158e-04, -3.0085e-04,\n -4.3630e-04, 1.2255e-04, -1.7331e-04, 4.0720e-05, 3.0700e-04,\n -2.8976e-04, -2.0572e-04, -1.2855e-04, 8.4173e-05, 5.6052e-45,\n 1.3033e-04, 1.0698e-04, 4.2868e-04, 1.3082e-04, 5.9594e-05,\n 2.2269e-10, -2.5721e-04, -2.1352e-04, 2.9803e-04, 5.8206e-04,\n -1.6162e-04, 6.6934e-04, 3.3385e-04, -1.7816e-05, 1.6846e-04,\n -5.9458e-04, -8.4374e-05, -3.4366e-04, 1.1044e-04, 1.5403e-04,\n 1.8146e-04, -2.5653e-05, -2.4346e-04, 2.1387e-04, -3.1512e-04,\n 5.2261e-04, -2.2226e-04, -2.6270e-04, 4.8686e-04, -6.0873e-05,\n -1.1042e-04, 1.3970e-11, -3.0145e-05, 1.4523e-04, 2.3919e-04,\n -1.4231e-04, -7.1367e-06, 1.6169e-04, -1.5712e-04, -6.3746e-05,\n 3.6516e-04, -8.7714e-05, -1.1106e-04, 6.9032e-04, 7.0514e-05,\n -1.6933e-04, 2.7264e-05, -1.8100e-04, -1.2855e-04, -5.1260e-05,\n 2.2919e-04, 2.9277e-04, -2.7643e-04, 2.0755e-04, -3.0680e-05,\n 2.3027e-04, 5.6052e-45, 2.1486e-04, -1.1510e-05, 2.0711e-04,\n 1.7116e-05, -3.8606e-04, 5.1995e-05, 3.4637e-04, 7.9762e-05,\n -4.8078e-05, 2.8860e-04, -4.6137e-05, 2.5023e-04, 1.2823e-04,\n 7.1506e-04, -3.5781e-04, -7.2730e-04, -1.3969e-04, -2.4508e-04,\n 3.1036e-04, -1.1096e-03, -2.1758e-04, 8.5803e-05, -7.0037e-05,\n -1.3117e-04, -2.5449e-04, -4.8244e-04, -2.2335e-05, -1.1209e-04,\n -3.0958e-04, 2.7037e-04, -2.6344e-04, 1.3473e-04, -7.5282e-05,\n -2.3703e-04, -4.4581e-05, -2.2416e-05, 6.9121e-05, -7.5774e-05,\n 5.6052e-45, -3.8188e-04, 1.5103e-03, 8.5651e-05, 1.4353e-04,\n -2.5883e-04, 4.8807e-05, -1.9233e-04, 1.5809e-04, 4.6912e-04,\n 1.1051e-04, 4.2287e-04, -7.7485e-05, 1.7526e-04, -3.9805e-04,\n 6.3991e-05, 6.9549e-05, 6.7361e-05, -3.3535e-04, -1.7842e-05,\n 5.6052e-45, -3.6844e-04, -3.5981e-04, 1.2043e-04, -5.2232e-05,\n 2.4118e-04, 3.0183e-04, 5.0486e-04, -7.6292e-05, 3.7732e-04,\n -1.8886e-04, -6.0142e-05, -4.5446e-34, 1.3062e-04, -1.0633e-04,\n -2.0652e-05, -2.9445e-06, 2.3875e-04, 4.1429e-04, 3.3621e-04,\n -5.2464e-05, -1.6028e-04, 3.2129e-10, 2.7243e-04, -6.8243e-05,\n 5.6052e-45, 1.6547e-04, -2.5309e-04, 9.4012e-05, -3.2854e-04,\n 1.2941e-04, -1.0363e-04, 1.4340e-04, -4.6644e-05, 2.2398e-04,\n -7.3773e-05, 1.0335e-04, 1.5786e-04, 1.8143e-04, -9.0634e-05,\n 6.4002e-06, -1.1434e-10, 5.6052e-45, -4.1033e-04, 3.0408e-04,\n -2.1363e-04, 8.8088e-05, -1.2317e-05, -1.3269e-04, 3.9362e-05,\n -1.1127e-04, 2.6498e-04, -3.5545e-04, 7.3270e-05, 1.0562e-04,\n 3.1790e-04, 5.5133e-05, -1.8820e-04, -3.5068e-04, 1.6522e-04,\n 3.3838e-04, 3.2116e-04, -5.0605e-04, 4.2653e-05, 7.6346e-04,\n 1.3238e-05, -6.1535e-04, -3.5559e-05, 5.6052e-45, -2.0554e-05,\n -2.1058e-04, 2.9927e-04, -2.0218e-05, 7.8256e-06, 1.1005e-04,\n 1.2626e-04, 7.3845e-16, 6.1334e-05, 1.1194e-04, -1.0006e-04,\n 3.0287e-04, -4.1067e-04, 5.3052e-09, -1.1673e-04, 1.0101e-04,\n -8.9416e-05, -4.5582e-04, -4.6005e-04, 2.8221e-04, -5.5363e-05,\n -1.9462e-05, 3.5281e-15, -4.2685e-05, -1.4782e-04, -1.8144e-05,\n -3.4890e-04, 2.7212e-04, 1.1848e-04, -1.8938e-04, -1.0332e-05,\n -1.8632e-04, -3.7595e-04, 1.5851e-04, 9.6373e-05, -3.2823e-04,\n -1.5103e-04, 7.7956e-05, 7.7259e-06, 1.1799e-04, 8.2638e-04,\n 8.7609e-05, -4.6048e-04, -4.3501e-04, 9.8771e-05, -7.7503e-05,\n 6.0935e-04, 5.2890e-05, 1.8882e-04, 2.8896e-04, 4.2721e-04,\n 5.1860e-06, 4.1138e-04, -4.0457e-04, 1.1343e-04, 5.2320e-04,\n 1.1119e-04, -6.6602e-05, -2.8657e-05, -7.4795e-04, 5.4243e-05,\n -2.4777e-04, -4.4339e-05, -1.8332e-05, 2.7390e-05, -1.3787e-05,\n 2.0493e-04, -3.4339e-04, -3.2810e-04, -2.4531e-04, -3.2918e-04,\n -1.7741e-04, -3.0680e-05, -4.9926e-04, 1.7674e-04, -2.6172e-06,\n -1.5041e-04, -2.3448e-05, 1.5938e-04, -5.2957e-05, 4.7453e-06,\n 1.2788e-04, -1.1425e-05, 5.9837e-05, 4.0198e-04, 2.6523e-04,\n 3.3642e-04, -5.6052e-45, -4.4810e-04, 1.3778e-05, -1.1877e-05,\n -3.8714e-04, -1.1944e-04, 1.7534e-04, 3.7224e-05, 8.0351e-13,\n -2.9278e-04, 4.2262e-04, -1.6328e-04, 9.9815e-05, 1.9199e-04,\n 2.0700e-04, -4.5008e-05, 1.8056e-05, 3.3261e-04, -3.6401e-04,\n 2.8145e-05, -8.1369e-05, 3.7911e-05, -8.6648e-05, 2.7017e-04,\n 7.8279e-05, 2.8385e-04, 3.6892e-05, 1.0750e-04, 3.1548e-04,\n -1.6263e-05, 4.6995e-05, -2.8492e-04, -9.8753e-10, -3.2162e-04,\n -9.3303e-05, 3.0079e-04, -1.6723e-04, -3.4915e-04, -1.3853e-05,\n -5.9657e-04, -3.9228e-04, 1.2713e-04, -1.8899e-04, -5.1591e-04,\n 2.0431e-04, 3.7862e-05, 8.7674e-05, -3.3424e-04, -6.5328e-05,\n 1.7870e-04, -6.1974e-04, -1.4127e-04, 5.6052e-45, 1.5290e-05,\n 1.7084e-04, -2.5020e-04, 1.2879e-04, 1.6466e-31, -2.0900e-04,\n 3.9337e-04, -1.0026e-04, -7.7029e-07, -4.2284e-05, 6.6850e-06,\n -1.4822e-04, 6.6300e-05, 1.5027e-04, 1.1608e-04, 2.1946e-04,\n 1.8761e-05, -2.9173e-05, -3.0694e-05, 1.1066e-04, -2.6597e-06,\n 4.0898e-04, -9.4613e-05, 4.3921e-04, 1.1763e-04, -5.5133e-05,\n 2.8915e-04, 5.6052e-45, 1.5755e-04, 2.1505e-04, 2.8558e-04,\n -2.3842e-04, 6.5126e-05, 1.6964e-04, 1.5295e-04, 1.8430e-04,\n 6.4572e-05, 6.7611e-05, 3.0185e-04, -1.9374e-16, -2.9602e-05,\n 5.6052e-45, 1.8956e-04, -1.3602e-04, 1.9873e-04, -9.2877e-05,\n -2.0909e-04, -4.7458e-04, -2.0321e-04, -2.9955e-04, -1.9161e-04,\n -2.0671e-04, -2.3793e-04, 3.0047e-05, 1.8534e-04, -1.6973e-05,\n -5.3114e-05, -2.3462e-05, 1.3416e-04, 1.5063e-05, 1.4101e-05,\n 1.7726e-04, 4.3427e-11, 7.0808e-05, 3.6130e-04, -4.9985e-04,\n 2.7836e-04, -8.5321e-05, -1.2437e-05, 2.2005e-04, -8.3558e-05,\n 2.4206e-04, -1.5784e-03, -4.7367e-04, 4.2100e-04, -4.2546e-04,\n -6.9676e-05, 1.0802e-04, -2.0906e-04, -2.8870e-04, -9.8914e-05,\n 1.1694e-35, -2.4278e-04, 2.4333e-04, 8.2304e-05, 5.5331e-04,\n 3.0735e-04, 2.4192e-04, -1.4057e-04, 3.1486e-05, 7.3920e-04,\n -3.0131e-04, 6.4959e-04, 3.0990e-04, -5.0570e-05, 1.5650e-04,\n -1.8757e-04, 4.9364e-04, 5.5478e-05, 9.3269e-17, 4.6102e-05,\n -1.0786e-04, -5.3178e-04, 4.4132e-06, 3.4059e-05, 7.3637e-05,\n -6.6301e-04, 5.6052e-45, 7.6759e-12, 2.2653e-11, -3.7650e-04,\n 2.2187e-05, 2.0610e-04, 3.5358e-04, -1.2537e-04, -9.5501e-05,\n -7.8345e-04, 1.5879e-05, 3.7188e-04, 5.1549e-30, 2.3533e-04,\n 2.9753e-05, 6.4350e-12, 1.3436e-04, -3.1901e-04, 3.6983e-04,\n 4.0607e-05, 2.0178e-04, -1.0115e-04, 6.8562e-05, 1.7706e-04,\n -2.3557e-04, 3.0862e-05, -3.8017e-04, 3.5232e-04, 4.6563e-05,\n 8.4701e-05, -1.1805e-04, -1.7671e-04, -5.0819e-04, 2.6310e-06,\n -5.5245e-04, 2.8356e-05, -8.2160e-05, -1.6378e-05, -7.7622e-05,\n -2.2861e-04, 6.2837e-06, -3.8526e-04, 3.5571e-04, -5.0082e-04,\n 2.0735e-04, 2.4880e-05, 1.8058e-04, -1.0186e-04, 7.3934e-05,\n 5.6052e-45, -1.8872e-04, -2.4286e-04, 9.6475e-05, 5.0200e-04,\n 2.2723e-04, -4.3321e-04, 3.1933e-04, -1.0678e-04, -1.0349e-04,\n 4.4266e-04, 6.0360e-05, 7.9237e-04, -1.0873e-04, 6.8859e-15,\n -1.4409e-05, -1.1024e-04], device='cuda:0')", + "exp_avg_sq": "tensor([3.9896e-06, 7.5239e-06, 7.9639e-06, 1.6433e-06, 6.3922e-06, 3.4436e-06,\n 9.8297e-07, 4.0627e-06, 5.4213e-06, 8.6105e-07, 7.7692e-06, 1.4785e-06,\n 3.0740e-06, 1.5375e-06, 4.7801e-17, 2.2869e-06, 1.1345e-05, 4.9883e-06,\n 9.6034e-07, 3.7881e-06, 1.3495e-07, 1.9154e-06, 7.4161e-06, 9.0188e-07,\n 1.6585e-06, 2.3075e-06, 4.0416e-06, 2.4903e-06, 5.0178e-07, 4.2812e-07,\n 6.2065e-06, 5.8321e-06, 4.8366e-06, 5.3284e-06, 8.4287e-07, 1.0477e-06,\n 1.8842e-06, 3.6888e-06, 8.5693e-06, 6.6826e-07, 3.5616e-06, 5.1926e-06,\n 5.7897e-06, 2.5477e-07, 7.0840e-06, 4.2692e-06, 2.0025e-05, 9.6372e-08,\n 3.0595e-06, 1.1710e-06, 2.1831e-06, 2.3525e-06, 2.1864e-06, 1.1829e-06,\n 1.9764e-06, 2.8575e-06, 2.2550e-06, 1.2867e-06, 1.6381e-06, 2.8839e-06,\n 2.1710e-06, 1.9398e-06, 2.1995e-06, 4.2982e-06, 2.2450e-06, 1.7098e-07,\n 4.2707e-06, 6.4152e-06, 1.4496e-06, 1.0326e-06, 2.6124e-06, 9.2378e-14,\n 2.0461e-06, 3.0031e-06, 2.5085e-07, 1.7877e-06, 2.0705e-06, 4.2785e-06,\n 2.9079e-06, 1.9977e-06, 1.0710e-06, 8.6923e-06, 1.2193e-06, 1.6034e-06,\n 8.5171e-07, 4.4000e-07, 1.7231e-06, 5.9634e-06, 6.5855e-07, 3.4327e-06,\n 2.1731e-06, 5.8914e-06, 6.3589e-06, 1.0862e-05, 4.8623e-07, 3.9726e-06,\n 3.2038e-06, 5.0853e-06, 7.1915e-06, 2.9196e-06, 2.1784e-06, 2.7809e-06,\n 1.9895e-06, 1.8209e-05, 1.2181e-06, 4.6158e-06, 2.6063e-06, 2.3693e-06,\n 3.0493e-06, 5.5274e-07, 1.4051e-10, 1.5788e-06, 4.7022e-07, 5.7959e-06,\n 4.3986e-06, 2.9514e-06, 1.4703e-06, 4.8755e-06, 1.3748e-06, 1.6845e-06,\n 2.5174e-06, 2.1577e-06, 3.1676e-06, 3.2486e-06, 2.7285e-06, 2.6541e-06,\n 1.4464e-06, 5.7576e-06, 1.2252e-06, 2.2612e-06, 1.8943e-09, 1.2079e-06,\n 1.6734e-06, 1.1217e-06, 2.0025e-06, 1.1009e-06, 5.5733e-06, 2.2683e-06,\n 4.0396e-06, 2.9978e-06, 4.9764e-06, 1.9954e-06, 4.5119e-07, 5.0830e-06,\n 5.0591e-06, 3.1787e-06, 6.1178e-06, 1.8077e-06, 1.0388e-06, 6.5480e-06,\n 4.1302e-06, 2.0099e-06, 4.2343e-06, 8.2210e-06, 1.7033e-06, 4.4688e-08,\n 1.0941e-06, 2.9944e-06, 1.8909e-06, 9.8260e-07, 6.9394e-07, 2.3830e-06,\n 1.2400e-06, 6.7745e-06, 2.3055e-06, 7.6112e-07, 2.6990e-06, 3.3100e-06,\n 2.7546e-06, 9.8988e-07, 1.4851e-06, 3.7457e-06, 4.4796e-07, 7.9835e-06,\n 2.0622e-06, 1.1313e-06, 3.3808e-06, 2.2618e-06, 8.8259e-07, 3.0024e-07,\n 1.4767e-06, 6.9091e-07, 3.9290e-06, 2.0474e-06, 2.0032e-07, 2.0592e-05,\n 8.6954e-07, 1.7096e-06, 5.7384e-06, 2.6765e-06, 9.4508e-06, 2.0357e-06,\n 4.3685e-06, 4.8244e-06, 6.5279e-06, 5.3856e-06, 3.4892e-06, 5.1663e-07,\n 7.1123e-15, 5.1385e-06, 2.3259e-06, 1.1826e-06, 1.2718e-06, 2.2320e-06,\n 4.3948e-06, 1.3297e-06, 8.7965e-06, 2.1525e-06, 2.2698e-07, 1.0078e-07,\n 3.9990e-06, 1.3277e-05, 1.4527e-05, 3.7113e-06, 4.7110e-06, 4.0241e-06,\n 2.2627e-06, 4.5358e-07, 3.5602e-06, 6.1550e-06, 1.1840e-06, 4.2067e-06,\n 2.4423e-06, 3.6417e-06, 4.0634e-06, 4.0652e-06, 3.5629e-06, 2.4816e-05,\n 3.1304e-06, 3.7137e-08, 1.7859e-06, 4.0646e-06, 3.7811e-06, 3.1756e-06,\n 2.2575e-06, 3.9166e-07, 3.2261e-06, 3.4727e-06, 2.9968e-06, 1.8894e-05,\n 1.7357e-06, 2.1319e-06, 7.1791e-07, 1.8786e-06, 7.5709e-07, 1.5452e-06,\n 1.1327e-06, 3.0156e-06, 3.3402e-06, 5.9001e-06, 3.2954e-06, 2.8084e-06,\n 2.3975e-06, 4.7194e-06, 1.8270e-06, 1.9746e-06, 1.3654e-06, 2.1741e-06,\n 2.6169e-06, 3.3020e-07, 1.5069e-06, 1.0339e-06, 2.6893e-06, 1.5916e-06,\n 8.1606e-09, 6.9078e-07, 8.0266e-06, 1.9265e-06, 4.6486e-06, 1.6766e-06,\n 2.0741e-06, 2.4987e-06, 8.2676e-07, 3.6847e-06, 3.2538e-07, 2.0592e-06,\n 5.5140e-06, 7.7698e-06, 1.4958e-06, 1.4576e-06, 1.5332e-06, 7.5373e-06,\n 3.0060e-06, 1.0311e-06, 4.8499e-06, 3.8247e-06, 6.1329e-07, 2.0824e-06,\n 1.2495e-06, 2.5490e-06, 7.1466e-06, 5.6165e-06, 8.0272e-06, 8.8825e-08,\n 2.7826e-06, 2.2634e-06, 3.2044e-06, 2.1830e-06, 2.1512e-06, 1.4450e-06,\n 2.0576e-07, 1.7389e-06, 3.0312e-06, 4.8065e-06, 6.5037e-06, 9.5130e-07,\n 1.6794e-06, 4.7446e-06, 1.8124e-06, 2.6443e-06, 1.9910e-06, 6.7251e-07,\n 3.6012e-06, 1.7359e-06, 5.1801e-06, 2.2244e-06, 5.4730e-07, 5.6501e-06,\n 6.7547e-06, 1.9276e-06, 5.7703e-06, 2.1108e-06, 7.1595e-06, 2.4202e-06,\n 2.5887e-07, 2.5562e-06, 3.8686e-06, 7.5490e-07, 3.7486e-06, 3.9485e-06,\n 4.2257e-06, 3.5658e-06, 4.6477e-06, 6.8250e-07, 2.0143e-06, 4.0064e-06,\n 2.8325e-06, 5.9433e-07, 1.8962e-16, 1.6756e-06, 5.1432e-06, 1.6700e-06,\n 8.4100e-06, 1.2270e-09, 4.8930e-06, 3.0121e-05, 1.3328e-06, 2.2728e-06,\n 9.7845e-07, 3.5324e-06, 1.5843e-06, 1.3657e-05, 1.8971e-06, 2.1689e-06,\n 6.3947e-06, 9.3440e-07, 8.2979e-07, 1.7361e-06, 2.4282e-06, 1.7145e-06,\n 2.9677e-06, 4.3123e-07, 4.3169e-06, 6.0273e-06, 2.3104e-06, 6.8735e-06,\n 6.3245e-10, 2.0070e-06, 5.3693e-07, 3.4809e-06, 2.3608e-06, 2.8884e-06,\n 1.8522e-06, 6.4250e-06, 1.9642e-08, 1.2474e-06, 2.0066e-06, 2.6481e-06,\n 1.5626e-06, 1.0489e-06, 2.3322e-07, 3.2182e-06, 4.1447e-06, 3.2395e-06,\n 1.3825e-06, 5.3317e-06, 2.5010e-06, 2.6091e-07, 3.6701e-06, 6.1508e-06,\n 3.2606e-06, 4.3809e-06, 8.5496e-06, 2.9267e-06, 6.0393e-06, 7.4129e-07,\n 7.4056e-07, 4.4162e-06, 1.1795e-08, 5.4210e-06, 1.5646e-06, 7.1567e-06,\n 4.3184e-06, 2.9652e-06, 6.1824e-06, 2.1889e-06, 2.8854e-06, 6.6331e-06,\n 2.2245e-06, 2.3057e-06, 6.5047e-06, 2.3783e-06, 1.0951e-05, 5.0573e-06,\n 3.1538e-06, 1.3562e-06, 9.1222e-06, 2.8989e-07, 1.5558e-06, 8.6860e-07,\n 1.0014e-05, 2.3144e-06, 1.4919e-06, 6.0428e-06, 5.6467e-06, 2.9347e-06,\n 8.0428e-07, 4.6789e-06, 7.7919e-07, 8.5051e-06, 6.3417e-06, 4.8177e-06,\n 1.0312e-06, 1.5377e-06, 2.4740e-06, 5.7683e-07, 1.9854e-06, 4.5343e-07,\n 7.3856e-06, 7.3306e-06, 3.0349e-06, 2.0796e-06, 5.3854e-07, 3.4951e-06,\n 5.0104e-06, 3.2180e-06, 9.0164e-12, 4.1122e-07, 9.9064e-07, 4.5467e-06,\n 2.5611e-06, 8.9574e-07, 1.6890e-06, 2.6067e-06, 4.6340e-06, 2.2322e-06,\n 1.0704e-06, 1.1332e-06, 1.5119e-07, 5.3338e-06, 2.2163e-06, 1.4951e-06,\n 4.2938e-07, 5.1469e-06, 4.9264e-06, 1.0663e-06, 4.5091e-06, 4.8379e-06,\n 2.8278e-07, 8.3315e-06, 7.9743e-06, 5.0650e-06, 5.4380e-06, 3.4426e-06,\n 9.5527e-07, 1.5880e-06, 2.3695e-06, 2.5021e-06, 3.7635e-07, 1.8286e-07,\n 3.7947e-06, 1.5111e-06, 1.4507e-06, 1.8654e-06, 2.4062e-06, 3.8474e-06,\n 3.5159e-07, 2.7651e-06, 2.7836e-06, 6.1307e-06, 8.4544e-06, 3.4597e-06,\n 1.5840e-06, 4.7137e-06, 1.1930e-06, 2.4725e-16, 1.7560e-06, 6.4081e-06,\n 2.0739e-06, 4.1012e-06, 1.6733e-06, 4.2465e-06, 1.4267e-06, 4.0660e-06,\n 1.0331e-06, 2.6052e-05, 9.6603e-08, 4.8336e-06, 2.8538e-06, 4.3294e-07,\n 4.3020e-07, 3.2071e-06], device='cuda:0')" }, "4": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[-4.7434e-07, 5.5272e-06, -2.1584e-05, ..., -1.4595e-17,\n 2.2460e-06, -1.8307e-05],\n [-9.1374e-06, 2.8713e-05, 4.3151e-05, ..., 7.6944e-18,\n -3.3722e-07, -5.5412e-05],\n [-3.6589e-05, 3.7703e-06, 1.7371e-05, ..., 4.0278e-19,\n -9.7063e-06, 7.0266e-06],\n ...,\n [-3.3800e-05, -5.3896e-06, -2.0663e-05, ..., 3.0230e-17,\n -4.1607e-06, 6.1071e-06],\n [-9.1887e-05, -2.6352e-05, 3.2071e-05, ..., -1.9443e-17,\n -1.3118e-05, -3.5037e-05],\n [ 1.0257e-05, 2.1846e-05, -4.8494e-05, ..., -3.1201e-18,\n -5.1401e-07, 3.9305e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4190e-08, 8.3515e-09, 7.6031e-09, ..., 2.8315e-11, 9.6867e-10,\n 1.2486e-08],\n [2.6961e-08, 2.8445e-08, 2.8901e-08, ..., 4.1825e-12, 1.2572e-09,\n 3.0186e-08],\n [1.7654e-08, 2.1029e-08, 1.4906e-08, ..., 5.9217e-11, 2.5814e-09,\n 1.4448e-08],\n ...,\n [9.6995e-08, 1.8377e-08, 2.6906e-08, ..., 4.8164e-11, 6.2385e-09,\n 2.2655e-08],\n [2.8616e-08, 2.5150e-08, 2.9667e-08, ..., 2.8299e-11, 1.1291e-08,\n 2.6722e-08],\n [3.0176e-08, 2.6521e-08, 3.6977e-08, ..., 2.5931e-10, 1.0656e-09,\n 2.9561e-08]], device='cuda:0')" + }, + "5": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 3.0101e-06, 2.5657e-06, 4.0959e-06, ..., -6.9569e-07,\n 0.0000e+00, 3.3073e-07],\n [ 5.6511e-06, 8.4860e-07, -2.4813e-07, ..., -3.9613e-06,\n 0.0000e+00, 2.0307e-06],\n [ 6.1239e-07, -5.1405e-07, 3.1662e-06, ..., 4.7834e-06,\n 0.0000e+00, 1.1445e-05],\n ...,\n [-3.1599e-07, -1.0996e-06, 2.2961e-08, ..., -1.9261e-07,\n 0.0000e+00, 6.9726e-08],\n [-1.7232e-07, 2.9206e-07, 4.6680e-06, ..., 2.7917e-06,\n 0.0000e+00, 1.3129e-05],\n [-7.0413e-07, -4.6396e-06, -1.0318e-06, ..., 7.6837e-07,\n 0.0000e+00, -5.4133e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1560e-10, 9.4684e-11, 6.5009e-11, ..., 9.2650e-11, 0.0000e+00,\n 7.3883e-11],\n [2.5016e-10, 3.4571e-11, 1.1843e-10, ..., 1.0969e-10, 0.0000e+00,\n 8.7191e-11],\n [1.7990e-10, 7.6386e-11, 2.0102e-10, ..., 3.0328e-10, 0.0000e+00,\n 4.6720e-10],\n ...,\n [3.0435e-10, 3.7310e-11, 6.5483e-11, ..., 4.5357e-11, 0.0000e+00,\n 5.3669e-11],\n [2.1396e-11, 3.7439e-11, 3.4767e-10, ..., 3.8044e-10, 0.0000e+00,\n 1.3308e-10],\n [1.5595e-10, 3.4240e-11, 8.3075e-11, ..., 6.8885e-10, 0.0000e+00,\n 1.1229e-10]], device='cuda:0')" + }, + "6": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-1.2679e-05, 1.8230e-05, 9.1510e-05, ..., -1.5824e-05,\n 5.8570e-05, -9.8966e-06], device='cuda:0')", + "exp_avg_sq": "tensor([3.2873e-08, 1.9230e-08, 2.6594e-08, ..., 3.9394e-08, 1.5814e-08,\n 1.3749e-08], device='cuda:0')" + }, + "7": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-4.1048e-06, -9.7021e-07, 2.0351e-06, ..., -1.1260e-07,\n 3.7870e-06, 1.1030e-07],\n [-4.0283e-06, 8.0687e-07, 3.4043e-06, ..., -1.3372e-06,\n -1.7005e-06, -1.7600e-07],\n [ 3.8263e-06, 1.5942e-06, -8.8412e-07, ..., 4.4283e-07,\n -1.4096e-07, 2.6081e-06],\n ...,\n [ 1.7094e-06, 5.9732e-08, 5.5631e-06, ..., 1.1461e-06,\n 4.7124e-06, -4.2240e-07],\n [ 2.2764e-06, -1.9859e-06, -1.4960e-06, ..., -1.7104e-07,\n -5.4290e-07, 1.2185e-06],\n [ 2.7370e-06, 1.0855e-06, 3.2234e-08, ..., -4.4717e-06,\n -1.3623e-06, -3.7549e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.5813e-11, 3.9480e-11, 5.4657e-11, ..., 1.5098e-10, 5.0947e-11,\n 9.6082e-11],\n [8.6957e-11, 6.1018e-11, 1.0126e-10, ..., 2.3654e-10, 5.3615e-11,\n 7.7247e-11],\n [1.1279e-10, 8.1611e-11, 7.4024e-11, ..., 1.7155e-10, 4.5052e-11,\n 7.5932e-11],\n ...,\n [1.5090e-10, 9.9205e-11, 1.4221e-10, ..., 1.5250e-10, 6.8065e-11,\n 1.0326e-10],\n [1.3339e-10, 1.1160e-10, 1.0131e-10, ..., 1.3085e-10, 7.5745e-11,\n 9.7685e-11],\n [1.1225e-10, 8.5538e-11, 9.5493e-11, ..., 1.1275e-10, 8.4256e-11,\n 1.4121e-10]], device='cuda:0')" + }, + "32": { + "step": "tensor(1252.)", + "exp_avg": "tensor([5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0205e-05], device='cuda:0')" + }, + "33": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.2899e-08, 4.5158e-07, 1.3751e-07], device='cuda:0')" + }, + "34": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([5.5438e-04, 5.0262e-06, 6.7886e-06, 7.0446e-06, 8.0768e-06, 9.2780e-06,\n 7.3764e-06, 5.4626e-06, 6.8998e-06, 6.1315e-06], device='cuda:0')" + }, + "36": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.5617e-13, 3.8580e-13, 5.7321e-13, ..., 2.5057e-12, 0.0000e+00,\n 8.4567e-13],\n [1.5412e-13, 3.7260e-13, 1.1207e-12, ..., 1.2390e-12, 0.0000e+00,\n 4.9739e-13],\n [1.3831e-10, 3.2724e-11, 1.8183e-09, ..., 5.8295e-10, 0.0000e+00,\n 1.6266e-09],\n ...,\n [3.8547e-11, 1.6031e-12, 3.0360e-10, ..., 1.6942e-10, 0.0000e+00,\n 4.5586e-10],\n [1.0390e-12, 3.7387e-13, 1.0988e-13, ..., 4.0797e-12, 0.0000e+00,\n 2.1036e-12],\n [3.5053e-14, 6.5772e-13, 8.5922e-14, ..., 2.2030e-13, 0.0000e+00,\n 8.9740e-13]], device='cuda:0')" + }, + "37": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.5205e-09, 5.2476e-10, 1.9788e-06, 2.6257e-09, 9.5513e-07, 3.2895e-07,\n 6.2935e-08, 7.2219e-08, 1.8250e-06, 6.6781e-09, 9.2785e-07, 1.1290e-07,\n 1.0008e-09, 2.0875e-08, 1.4738e-10, 1.0586e-08, 1.5102e-07, 1.7103e-07,\n 1.1058e-08, 1.3280e-08, 1.6198e-07, 7.4729e-09, 1.1264e-07, 6.7097e-08,\n 5.6708e-08, 5.6498e-07, 1.0739e-07, 9.3509e-08, 7.1160e-08, 1.4183e-06,\n 5.2107e-07, 3.2244e-07, 3.7562e-06, 6.1085e-09, 5.0327e-08, 1.1257e-08,\n 1.5649e-07, 6.9550e-09, 6.3168e-07, 1.6684e-08, 6.2554e-07, 1.0668e-08,\n 3.3229e-08, 5.9226e-07, 9.8328e-10, 3.5888e-08, 5.5521e-08, 5.5358e-08,\n 1.1306e-08, 3.9873e-07, 1.3778e-07, 1.4774e-07, 9.8972e-07, 6.4728e-08,\n 1.0997e-08, 7.9250e-08, 1.5804e-11, 3.8596e-08, 1.7001e-07, 1.6666e-06,\n 2.0548e-07, 1.8230e-08, 5.8109e-10, 6.2061e-08, 7.0521e-08, 9.3277e-07,\n 8.5117e-09, 6.2495e-08, 2.0511e-08, 1.4336e-07, 8.7757e-08, 7.6543e-08,\n 2.3618e-08, 3.4935e-07, 4.9107e-07, 2.7730e-07, 3.3645e-07, 8.1236e-08,\n 2.4295e-08, 6.7176e-10, 8.3200e-08, 3.2324e-08, 8.3135e-09, 4.6814e-08,\n 6.5912e-08, 2.0713e-10, 6.8789e-07, 1.1038e-07, 2.0139e-08, 9.0013e-10,\n 6.4538e-09, 6.5871e-08, 3.7654e-07, 5.8949e-07, 5.5232e-07, 4.1942e-08,\n 4.9454e-07, 2.0732e-07, 1.4283e-07, 2.0586e-08, 6.0639e-08, 2.5450e-09,\n 1.0838e-08, 6.4979e-07, 2.6111e-07, 4.5610e-07, 1.4863e-07, 1.4253e-09,\n 6.1144e-07, 5.1083e-08, 8.8646e-08, 1.2258e-08, 1.2984e-07, 6.3116e-08,\n 5.8429e-08, 2.7626e-06, 2.0175e-09, 9.3236e-08, 1.7960e-08, 8.6325e-07,\n 8.7920e-07, 1.1998e-07, 2.7457e-07, 1.0817e-08, 1.3559e-08, 2.3689e-07,\n 1.2796e-07, 1.4455e-07, 1.9353e-08, 4.5338e-07, 3.5780e-10, 1.0223e-08,\n 2.4646e-06, 2.2470e-07, 4.2334e-09, 4.8762e-08, 1.5270e-07, 6.0775e-08,\n 1.0001e-06, 1.9263e-06, 4.5023e-08, 1.3929e-07, 1.1827e-08, 1.9517e-09,\n 2.1555e-08, 9.7660e-08, 1.0275e-07, 1.9374e-07, 7.3002e-09, 4.1298e-07,\n 3.6959e-09, 1.0168e-08, 3.3923e-07, 1.5128e-07, 1.2772e-07, 7.3781e-07,\n 1.0652e-08, 5.6608e-07, 4.9001e-09, 2.6453e-08, 6.3476e-10, 4.5639e-07,\n 4.2358e-10, 3.4460e-08, 5.8766e-07, 3.3355e-07, 1.5710e-08, 2.0631e-07,\n 1.7561e-09, 1.0282e-08, 4.4072e-08, 2.4453e-07, 5.8039e-07, 2.0436e-07,\n 1.2344e-07, 4.2002e-08, 6.0749e-07, 7.3601e-07, 1.7637e-09, 1.3493e-10,\n 1.3402e-09, 2.1931e-07, 1.7672e-08, 6.6855e-09, 2.2969e-08, 8.5756e-07,\n 5.3387e-08, 1.0521e-09, 1.9118e-08, 3.1670e-08, 3.7381e-07, 2.2098e-09,\n 3.4964e-07, 6.9833e-07, 2.0766e-07, 5.6554e-08, 1.6939e-08, 1.9129e-07,\n 5.1738e-08, 2.1650e-07, 5.7837e-08, 3.4784e-09, 1.8107e-06, 3.5660e-09,\n 1.2412e-07, 2.3246e-10, 1.3995e-07, 4.6555e-07, 1.2239e-09, 3.1817e-08,\n 5.3514e-07, 3.2345e-08, 2.4670e-07, 1.6605e-06, 1.5308e-06, 1.6875e-07,\n 2.5189e-09, 7.6853e-08, 4.4765e-07, 9.2023e-10, 1.0565e-07, 1.8615e-06,\n 5.7766e-08, 1.0114e-07, 3.3489e-07, 4.3080e-08, 1.6483e-08, 1.5048e-07,\n 9.0664e-08, 7.5595e-08, 4.2811e-08, 1.7689e-07, 7.9431e-08, 4.2311e-08,\n 1.8373e-07, 2.9094e-08, 5.8919e-08, 7.3794e-08, 1.7942e-07, 3.1607e-09,\n 8.8820e-07, 3.0696e-08, 8.3345e-09, 3.0296e-07, 1.0395e-07, 1.5553e-09,\n 1.6195e-07, 1.0773e-06, 8.0162e-07, 1.1748e-06, 3.9445e-08, 1.6555e-07,\n 1.2588e-07, 5.4843e-07, 4.3440e-09, 5.4639e-10], device='cuda:0')" + }, + "38": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.4157e-12, 3.9175e-15, 3.8014e-09, 1.2986e-11, 2.0221e-09, 6.3644e-10,\n 1.1092e-10, 1.6195e-10, 4.2003e-09, 1.6614e-11, 3.3201e-09, 1.9342e-10,\n 6.0522e-12, 2.6083e-11, 4.7363e-13, 3.8343e-11, 3.3533e-10, 4.7056e-10,\n 4.0119e-11, 2.8996e-11, 2.6340e-10, 1.8367e-11, 1.4920e-10, 1.1537e-10,\n 1.0618e-10, 7.0336e-10, 1.6471e-10, 1.6737e-10, 4.0719e-10, 4.1164e-09,\n 6.4315e-10, 5.6599e-10, 7.0984e-09, 8.9658e-12, 8.7550e-11, 4.2654e-11,\n 1.9901e-10, 2.0929e-11, 1.6008e-09, 2.3433e-11, 1.7631e-09, 3.6338e-11,\n 9.7853e-11, 7.8672e-10, 9.8603e-13, 3.9510e-11, 8.7327e-11, 7.9479e-11,\n 3.0895e-11, 9.4936e-10, 1.8199e-10, 3.8837e-10, 2.6778e-09, 9.1703e-11,\n 1.7982e-11, 1.1757e-10, 9.5042e-13, 4.9615e-11, 2.3878e-10, 4.8454e-09,\n 3.3764e-10, 2.8562e-11, 5.7210e-15, 1.3884e-10, 1.2678e-10, 2.5839e-09,\n 2.1543e-11, 8.6786e-11, 4.1759e-11, 1.8520e-10, 1.4771e-10, 1.3911e-10,\n 2.0522e-11, 7.0095e-10, 8.4641e-10, 4.2222e-10, 9.3172e-10, 1.2408e-10,\n 6.4008e-11, 1.7457e-13, 1.4692e-10, 4.6667e-11, 1.6082e-11, 5.8736e-11,\n 9.6476e-11, 6.1533e-13, 1.0480e-09, 2.6875e-10, 4.5418e-11, 1.6203e-13,\n 1.5658e-11, 1.1045e-10, 8.4748e-10, 1.6157e-09, 6.6712e-10, 5.2171e-11,\n 6.9246e-10, 3.0430e-10, 2.0328e-10, 9.0462e-11, 8.4162e-11, 3.6697e-12,\n 1.8055e-11, 1.2998e-09, 7.5954e-10, 8.8037e-10, 2.1494e-10, 4.8062e-12,\n 2.1931e-09, 9.3395e-11, 1.9854e-10, 2.5918e-11, 2.0794e-10, 7.9095e-11,\n 1.0910e-10, 7.1152e-09, 4.8052e-12, 9.4377e-11, 9.4015e-11, 2.7182e-09,\n 1.5178e-09, 1.6508e-10, 3.4776e-10, 9.2241e-11, 3.8677e-11, 5.0806e-10,\n 2.6096e-10, 3.4038e-10, 3.7559e-11, 1.4779e-09, 5.9446e-12, 2.0393e-11,\n 6.6907e-09, 2.0418e-10, 6.0045e-12, 6.6667e-11, 5.2233e-10, 8.6762e-11,\n 2.8422e-09, 6.1952e-09, 8.9529e-11, 2.9690e-10, 4.5075e-11, 8.2334e-12,\n 3.6488e-11, 1.4942e-10, 2.0272e-10, 3.6326e-10, 2.7427e-11, 1.4682e-09,\n 5.7553e-12, 1.1859e-11, 7.2607e-10, 3.0026e-10, 2.7933e-10, 1.3090e-09,\n 1.5469e-11, 6.5654e-10, 7.1626e-12, 3.9689e-11, 2.4035e-12, 7.5481e-10,\n 1.6289e-12, 7.8572e-11, 1.0086e-09, 5.2377e-10, 1.9441e-11, 4.3057e-10,\n 6.8905e-12, 3.4349e-11, 4.1038e-11, 1.4303e-09, 1.4340e-09, 6.0753e-10,\n 3.5909e-10, 9.7467e-11, 1.1975e-09, 1.8871e-09, 7.5118e-12, 1.1516e-13,\n 2.8150e-12, 6.0070e-10, 5.4452e-11, 1.2162e-11, 8.0961e-11, 1.1150e-09,\n 8.8337e-11, 2.2162e-12, 3.7477e-11, 5.6633e-11, 8.3693e-10, 1.4018e-11,\n 6.5256e-10, 2.5398e-09, 3.0423e-10, 1.6468e-10, 3.0411e-11, 3.2888e-10,\n 7.3046e-11, 4.0883e-10, 9.4185e-11, 2.5207e-11, 2.9326e-09, 1.2155e-11,\n 1.9035e-10, 4.4863e-13, 3.2079e-10, 1.0658e-09, 4.1982e-12, 3.5389e-11,\n 9.4323e-10, 4.3934e-11, 4.2428e-10, 3.0721e-09, 2.4557e-09, 3.1735e-10,\n 4.2134e-12, 1.2434e-10, 7.3293e-10, 8.1065e-13, 1.8698e-10, 3.1921e-09,\n 1.3736e-10, 1.8609e-10, 4.7737e-10, 9.8716e-11, 3.8515e-11, 2.4644e-10,\n 9.6719e-11, 1.7344e-10, 8.1548e-11, 4.8666e-10, 1.1880e-10, 7.7604e-11,\n 2.8153e-10, 6.4333e-11, 7.4233e-11, 1.0618e-10, 6.2621e-10, 6.7338e-12,\n 2.4572e-09, 4.9382e-11, 8.0881e-11, 3.2516e-10, 1.4567e-10, 1.1589e-11,\n 3.4705e-10, 3.3458e-09, 1.3086e-09, 4.1087e-09, 6.4370e-11, 3.4161e-10,\n 2.0284e-10, 9.4924e-10, 7.5082e-12, 2.0389e-12], device='cuda:0')" + }, + "39": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.9625e-12, 2.6584e-13, 5.4413e-09, 1.1720e-11, 2.4811e-09, 8.3259e-10,\n 1.9729e-10, 2.1200e-10, 5.0888e-09, 3.5403e-11, 2.6697e-09, 3.2240e-10,\n 9.5191e-12, 6.3828e-11, 1.9153e-12, 4.8473e-11, 4.5348e-10, 5.1036e-10,\n 3.5939e-11, 4.6322e-11, 4.6564e-10, 2.5564e-11, 3.2743e-10, 2.0313e-10,\n 1.6881e-10, 1.5379e-09, 3.2514e-10, 2.5083e-10, 2.7322e-10, 3.9363e-09,\n 1.3524e-09, 8.9747e-10, 1.0097e-08, 1.5780e-11, 1.5113e-10, 4.4392e-11,\n 4.0343e-10, 3.8043e-11, 1.7132e-09, 5.5931e-11, 1.6136e-09, 4.0068e-11,\n 1.1101e-10, 1.7430e-09, 6.9943e-13, 8.1120e-11, 1.6293e-10, 1.7230e-10,\n 3.3755e-11, 1.1027e-09, 4.1043e-10, 3.4183e-10, 2.5918e-09, 1.8110e-10,\n 3.7334e-11, 2.4240e-10, 2.3300e-12, 1.0319e-10, 4.5769e-10, 4.5333e-09,\n 5.4628e-10, 5.4070e-11, 5.3420e-13, 2.0255e-10, 2.1867e-10, 2.3965e-09,\n 2.9215e-11, 1.7849e-10, 6.5387e-11, 4.1233e-10, 2.6592e-10, 2.3482e-10,\n 6.1665e-11, 1.0026e-09, 1.4129e-09, 7.6142e-10, 9.4839e-10, 2.4134e-10,\n 8.3165e-11, 1.5867e-12, 2.4586e-10, 8.5004e-11, 3.2816e-11, 1.4624e-10,\n 1.6686e-10, 2.7381e-12, 1.8644e-09, 3.3153e-10, 9.1590e-11, 1.7293e-13,\n 2.0175e-11, 1.8498e-10, 9.6604e-10, 1.5311e-09, 1.3791e-09, 9.6520e-11,\n 1.3164e-09, 6.0654e-10, 3.7163e-10, 1.0252e-10, 1.8143e-10, 8.8390e-12,\n 3.7042e-11, 1.7032e-09, 7.6663e-10, 1.0737e-09, 4.4356e-10, 8.4501e-12,\n 1.5182e-09, 1.5344e-10, 2.6895e-10, 4.6287e-11, 3.3242e-10, 1.4669e-10,\n 1.5943e-10, 7.4431e-09, 1.0765e-11, 2.5183e-10, 8.3451e-11, 2.4550e-09,\n 2.3553e-09, 3.4171e-10, 7.7141e-10, 9.5666e-11, 4.5866e-11, 7.0394e-10,\n 3.1980e-10, 4.0994e-10, 6.7436e-11, 1.2758e-09, 8.0501e-12, 3.4567e-11,\n 6.6127e-09, 5.9586e-10, 1.3141e-11, 1.5270e-10, 4.5395e-10, 1.4594e-10,\n 2.6189e-09, 5.1143e-09, 1.4644e-10, 4.2293e-10, 3.5195e-11, 1.0058e-11,\n 6.8708e-11, 2.8024e-10, 2.6205e-10, 5.4021e-10, 3.4839e-11, 1.1890e-09,\n 1.2374e-11, 2.5128e-11, 8.8740e-10, 3.7570e-10, 3.6628e-10, 2.0232e-09,\n 3.3047e-11, 1.5030e-09, 1.5486e-11, 7.3335e-11, 1.2151e-11, 1.3046e-09,\n 5.9214e-12, 1.0406e-10, 1.6584e-09, 8.6535e-10, 4.8357e-11, 5.6139e-10,\n 9.4154e-12, 3.5384e-11, 1.1043e-10, 7.5018e-10, 1.4876e-09, 6.3438e-10,\n 3.3111e-10, 1.3638e-10, 1.7152e-09, 2.0975e-09, 2.1112e-11, 4.1099e-13,\n 4.6604e-12, 5.9061e-10, 6.1238e-11, 2.3696e-11, 6.9740e-11, 2.3387e-09,\n 1.4756e-10, 4.6172e-12, 6.7654e-11, 9.5395e-11, 1.0171e-09, 1.8665e-11,\n 8.9227e-10, 1.9591e-09, 5.6243e-10, 1.7268e-10, 5.2882e-11, 4.6707e-10,\n 1.3986e-10, 6.2074e-10, 1.7294e-10, 2.8221e-11, 4.8531e-09, 2.0365e-11,\n 3.3428e-10, 5.8793e-13, 4.0158e-10, 1.2186e-09, 1.4407e-11, 1.0370e-10,\n 1.5123e-09, 7.7660e-11, 7.0503e-10, 4.3489e-09, 4.0868e-09, 4.5354e-10,\n 1.0561e-11, 2.0806e-10, 1.1926e-09, 3.0604e-12, 2.5689e-10, 5.0849e-09,\n 1.6846e-10, 2.8563e-10, 9.8041e-10, 1.0404e-10, 5.8027e-11, 4.2691e-10,\n 2.5815e-10, 2.2213e-10, 1.3868e-10, 4.7920e-10, 1.8562e-10, 1.2570e-10,\n 5.2652e-10, 9.2961e-11, 1.5038e-10, 2.1929e-10, 4.5148e-10, 1.4908e-11,\n 2.3761e-09, 9.2375e-11, 6.2388e-11, 8.7565e-10, 2.8528e-10, 1.5453e-11,\n 4.8870e-10, 2.7393e-09, 2.3294e-09, 3.3327e-09, 1.2180e-10, 4.2462e-10,\n 2.9872e-10, 1.4971e-09, 1.8397e-11, 1.9902e-12], device='cuda:0')" + }, + "40": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5302e-16, 6.2628e-15, 4.2924e-13, ..., 1.7690e-13, 0.0000e+00,\n 1.7843e-12],\n [4.0761e-13, 4.7288e-14, 4.2443e-12, ..., 2.6116e-12, 0.0000e+00,\n 1.1343e-12],\n [1.3045e-10, 2.0125e-12, 1.7471e-09, ..., 2.7354e-10, 0.0000e+00,\n 1.2553e-09],\n ...,\n [3.0265e-11, 8.4253e-12, 5.3383e-10, ..., 3.0336e-10, 0.0000e+00,\n 2.7306e-10],\n [1.7447e-13, 3.8504e-13, 6.4586e-12, ..., 5.6001e-13, 0.0000e+00,\n 3.0927e-12],\n [3.2159e-13, 4.2120e-12, 2.8661e-11, ..., 3.3525e-12, 0.0000e+00,\n 2.8187e-11]], device='cuda:0')" + }, + "41": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4500e-11, 3.0960e-10, 5.8807e-07, 3.4283e-10, 1.2918e-07, 1.8692e-07,\n 9.7052e-08, 1.2508e-08, 1.9835e-06, 5.5876e-08, 4.5920e-07, 4.1178e-07,\n 1.2332e-09, 3.8563e-08, 8.0065e-10, 1.6007e-09, 1.8632e-07, 1.0440e-07,\n 2.2309e-08, 5.6634e-09, 3.2874e-08, 3.2752e-09, 2.0936e-07, 9.9332e-08,\n 2.6486e-07, 3.5583e-07, 1.1404e-07, 1.6544e-07, 6.4761e-08, 9.2388e-07,\n 9.7580e-07, 3.3532e-08, 3.9054e-06, 3.4369e-08, 4.1347e-08, 3.7331e-09,\n 1.4272e-07, 3.0301e-09, 9.3137e-08, 3.2977e-08, 3.1842e-07, 3.0186e-09,\n 7.6151e-10, 1.9364e-06, 4.8367e-10, 8.7125e-08, 2.0556e-07, 1.6527e-07,\n 1.8701e-09, 1.0829e-07, 1.6241e-07, 2.1101e-08, 6.8796e-07, 4.5137e-08,\n 2.2346e-08, 4.6204e-07, 1.4795e-08, 4.2186e-08, 8.5427e-07, 8.4059e-07,\n 1.7995e-07, 2.3648e-07, 5.6839e-09, 1.1424e-07, 1.4030e-07, 7.8387e-07,\n 4.7427e-09, 2.0615e-07, 4.3437e-08, 9.9338e-08, 3.6234e-08, 3.8659e-08,\n 1.0360e-07, 1.1112e-06, 3.3588e-07, 9.8226e-07, 2.0825e-07, 2.9627e-08,\n 3.1239e-08, 1.4278e-09, 9.6419e-08, 1.3819e-08, 8.8938e-09, 2.5157e-07,\n 7.3054e-08, 7.4006e-09, 1.4662e-06, 1.4752e-07, 4.2348e-08, 3.5063e-10,\n 2.6564e-09, 9.3940e-08, 1.6522e-07, 4.5484e-08, 1.5795e-06, 6.0359e-09,\n 1.4745e-06, 1.0199e-06, 1.7728e-07, 1.1725e-08, 3.9841e-08, 2.3938e-08,\n 2.6475e-09, 7.4751e-07, 8.7026e-09, 1.2386e-07, 9.8182e-08, 1.6555e-08,\n 1.0797e-07, 4.8907e-07, 1.2602e-07, 8.4716e-09, 5.2544e-07, 5.3712e-08,\n 4.1240e-08, 1.5781e-07, 1.4271e-08, 1.2202e-07, 1.5161e-08, 4.1434e-07,\n 1.1392e-06, 1.4457e-07, 1.2933e-07, 1.6854e-09, 2.9352e-08, 1.6609e-06,\n 1.2190e-08, 9.1147e-08, 3.4512e-08, 1.0923e-07, 1.2111e-09, 3.4709e-08,\n 1.6283e-06, 7.3864e-08, 3.2583e-09, 1.3790e-07, 8.7629e-08, 1.3515e-08,\n 4.5121e-07, 8.0147e-07, 1.4454e-08, 4.0128e-08, 5.8417e-07, 9.7694e-10,\n 2.1115e-08, 1.2766e-07, 4.0851e-08, 4.8071e-07, 8.7073e-09, 2.8140e-07,\n 2.6125e-08, 1.5874e-09, 4.2443e-07, 1.1387e-07, 2.9232e-08, 4.1343e-07,\n 1.4944e-07, 5.6797e-07, 8.2764e-10, 7.3083e-08, 3.2165e-09, 3.9211e-07,\n 3.5880e-10, 6.2788e-07, 8.3299e-08, 4.1736e-07, 4.2689e-08, 1.4791e-07,\n 3.0218e-09, 1.0683e-10, 3.5737e-08, 1.3549e-07, 1.2271e-07, 1.5255e-07,\n 5.6751e-08, 4.7979e-08, 6.7625e-07, 1.5238e-07, 1.2629e-09, 2.2444e-10,\n 1.4564e-08, 1.5476e-07, 1.8288e-09, 1.6791e-08, 8.6563e-09, 1.9039e-06,\n 1.8063e-07, 2.5896e-09, 3.5770e-08, 4.2600e-08, 5.9752e-07, 1.7094e-09,\n 2.5354e-07, 6.4618e-07, 4.2274e-07, 5.0062e-09, 5.9634e-09, 1.8653e-08,\n 1.8532e-07, 5.2283e-07, 2.6762e-07, 3.1563e-09, 1.1658e-06, 1.4536e-10,\n 7.7433e-08, 5.2290e-10, 4.4760e-08, 3.8129e-07, 1.0689e-08, 1.2044e-08,\n 7.1760e-07, 2.2552e-08, 2.7894e-07, 1.5366e-06, 5.1693e-06, 3.8285e-08,\n 6.3293e-09, 2.3702e-07, 1.1737e-07, 1.5678e-10, 9.2035e-08, 2.1233e-06,\n 2.9787e-08, 1.9468e-07, 1.7569e-07, 2.1984e-08, 1.2298e-08, 2.9168e-07,\n 3.6018e-07, 1.8169e-08, 3.5005e-07, 5.2962e-08, 1.6852e-07, 2.2558e-07,\n 2.0079e-07, 1.0210e-07, 5.8506e-08, 1.1377e-07, 3.4350e-08, 6.6163e-09,\n 5.9188e-07, 1.2391e-07, 3.8333e-09, 4.9244e-07, 5.0556e-08, 1.0141e-09,\n 4.6247e-07, 5.6265e-07, 1.4163e-06, 4.3693e-07, 1.3582e-08, 3.3684e-07,\n 3.0680e-08, 4.4998e-07, 5.3412e-09, 3.8550e-08], device='cuda:0')" + }, + "42": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.7758e-13, 6.7627e-13, 1.0244e-09, 2.8893e-12, 2.2874e-10, 3.1590e-10,\n 1.6345e-10, 2.0454e-11, 3.5764e-09, 1.2008e-10, 7.3341e-10, 6.6727e-10,\n 3.6766e-12, 5.4483e-11, 2.4884e-12, 4.2326e-12, 3.5146e-10, 4.3603e-10,\n 5.8293e-11, 1.8885e-11, 7.7874e-11, 8.6643e-12, 5.4478e-10, 2.0369e-10,\n 9.4863e-10, 5.5546e-10, 2.3038e-10, 4.9937e-10, 2.5076e-10, 1.5294e-09,\n 1.8032e-09, 8.2423e-11, 7.8735e-09, 3.8330e-11, 6.8699e-11, 5.5716e-12,\n 2.2577e-10, 1.6134e-11, 1.2749e-10, 7.1089e-11, 4.1874e-10, 3.8218e-11,\n 1.0113e-12, 6.4663e-09, 1.9817e-12, 1.5347e-10, 5.6523e-10, 6.5425e-10,\n 5.6975e-12, 1.7205e-10, 3.1560e-10, 2.6428e-11, 1.2018e-09, 7.2751e-11,\n 4.7804e-11, 7.1777e-10, 5.3112e-11, 5.6978e-11, 2.0242e-09, 1.1161e-09,\n 2.6445e-10, 6.4692e-10, 1.5260e-11, 1.8200e-10, 3.5962e-10, 1.7148e-09,\n 1.3985e-11, 3.0361e-10, 8.0425e-11, 1.1110e-10, 5.9136e-11, 8.5164e-11,\n 1.6879e-10, 2.6885e-09, 4.6515e-10, 2.1264e-09, 4.1409e-10, 5.2063e-11,\n 6.1575e-11, 1.5220e-12, 1.1806e-10, 2.0703e-11, 2.7195e-11, 9.8999e-10,\n 9.4948e-11, 4.0241e-11, 3.9001e-09, 5.1563e-10, 1.0666e-10, 2.1048e-12,\n 1.2865e-11, 1.5340e-10, 2.1503e-10, 7.2810e-11, 4.2096e-09, 1.4299e-11,\n 3.7640e-09, 2.6111e-09, 3.3116e-10, 4.6603e-11, 7.7067e-11, 4.9500e-11,\n 8.1435e-12, 1.3909e-09, 3.1118e-11, 1.7159e-10, 1.1510e-10, 4.2622e-11,\n 1.0853e-10, 1.1302e-09, 2.8188e-10, 1.1287e-11, 8.0459e-10, 6.7604e-11,\n 1.0363e-10, 2.6460e-10, 5.2444e-11, 1.3670e-10, 5.6605e-11, 5.8429e-10,\n 2.1661e-09, 4.2879e-10, 1.4291e-10, 3.8703e-12, 8.9625e-11, 3.3754e-09,\n 2.3777e-11, 2.5283e-10, 7.9176e-11, 1.9232e-10, 6.9353e-12, 4.6387e-11,\n 3.0844e-09, 1.1973e-10, 8.3659e-12, 2.7960e-10, 2.3634e-10, 2.9958e-11,\n 7.2383e-10, 1.2712e-09, 2.9041e-11, 6.0597e-11, 1.1167e-09, 6.5230e-12,\n 5.4907e-11, 2.4928e-10, 5.7217e-11, 1.5603e-09, 3.6638e-11, 9.2969e-10,\n 5.1252e-11, 9.8327e-13, 8.7172e-10, 2.1687e-10, 5.4633e-11, 4.2906e-10,\n 2.3614e-10, 7.2816e-10, 1.2193e-12, 1.5464e-10, 3.7974e-11, 8.5350e-10,\n 2.0100e-12, 2.0278e-09, 1.4532e-10, 7.1479e-10, 1.0346e-10, 3.2136e-10,\n 6.9291e-12, 3.1105e-13, 4.9956e-11, 3.3686e-10, 1.5449e-10, 2.1240e-10,\n 1.2696e-10, 1.0608e-10, 1.4905e-09, 2.8398e-10, 4.4651e-12, 5.3673e-13,\n 2.3865e-11, 4.1056e-10, 1.7048e-12, 3.2531e-11, 2.1767e-11, 3.2969e-09,\n 2.2932e-10, 1.4121e-11, 1.5964e-10, 1.1336e-10, 1.8350e-09, 1.2668e-11,\n 3.7981e-10, 1.3961e-09, 7.4075e-10, 1.5013e-11, 5.7722e-12, 3.5599e-11,\n 2.9092e-10, 1.2402e-09, 6.2753e-10, 1.7736e-11, 1.9900e-09, 3.3760e-12,\n 8.5459e-11, 9.8909e-13, 5.7070e-11, 8.0964e-10, 4.3801e-11, 1.8781e-11,\n 2.1782e-09, 2.3474e-11, 3.7081e-10, 3.1376e-09, 1.4944e-08, 5.6278e-11,\n 1.1124e-11, 4.2123e-10, 1.7074e-10, 1.4187e-13, 9.1275e-11, 3.8016e-09,\n 7.8244e-11, 6.0852e-10, 3.1766e-10, 3.5844e-11, 3.2091e-11, 6.0993e-10,\n 6.2294e-10, 2.8173e-11, 4.4473e-10, 1.0925e-10, 3.5011e-10, 4.6640e-10,\n 3.2127e-10, 1.8803e-10, 9.5348e-11, 2.2374e-10, 5.2797e-11, 1.3258e-11,\n 1.2191e-09, 3.1201e-10, 3.7378e-11, 6.8411e-10, 5.2262e-11, 6.7156e-12,\n 1.2593e-09, 1.3536e-09, 3.3407e-09, 7.1341e-10, 2.1970e-11, 7.2621e-10,\n 4.6304e-11, 1.0235e-09, 1.3830e-11, 1.6391e-10], device='cuda:0')" + }, + "43": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.6445e-12, 1.2249e-12, 1.6986e-09, 4.7860e-12, 3.5241e-10, 4.6337e-10,\n 2.7442e-10, 4.4795e-11, 5.4882e-09, 1.6236e-10, 1.3027e-09, 1.0037e-09,\n 3.6731e-12, 1.2044e-10, 3.6955e-12, 1.2153e-11, 5.5990e-10, 3.3967e-10,\n 5.3713e-11, 2.5794e-11, 1.1227e-10, 1.5596e-11, 5.4659e-10, 2.9518e-10,\n 7.8614e-10, 9.1692e-10, 3.3578e-10, 4.2629e-10, 2.4423e-10, 2.5627e-09,\n 2.4175e-09, 1.1207e-10, 1.0882e-08, 8.3509e-11, 1.4087e-10, 1.8742e-11,\n 3.6708e-10, 2.5692e-11, 2.8237e-10, 9.8792e-11, 7.6604e-10, 2.3932e-11,\n 4.5759e-12, 5.4281e-09, 2.1528e-12, 2.0570e-10, 5.1990e-10, 5.0765e-10,\n 1.0298e-11, 3.1958e-10, 5.0515e-10, 5.3534e-11, 1.7806e-09, 1.1701e-10,\n 7.6047e-11, 1.2309e-09, 5.8852e-11, 1.0750e-10, 2.2041e-09, 2.4190e-09,\n 4.3760e-10, 5.5049e-10, 2.6427e-11, 3.3089e-10, 4.0586e-10, 1.9473e-09,\n 1.7345e-11, 5.5045e-10, 1.3033e-10, 2.9230e-10, 1.0948e-10, 1.1423e-10,\n 2.1780e-10, 3.0580e-09, 9.6768e-10, 2.4995e-09, 6.0191e-10, 9.4243e-11,\n 1.0678e-10, 3.2738e-12, 2.6670e-10, 4.2775e-11, 3.8174e-11, 7.2452e-10,\n 1.7887e-10, 3.6003e-11, 4.0476e-09, 4.6466e-10, 1.4902e-10, 2.9878e-12,\n 1.3651e-11, 2.4228e-10, 4.3454e-10, 1.3684e-10, 3.9621e-09, 1.5589e-11,\n 3.7399e-09, 2.5888e-09, 4.0079e-10, 7.4724e-11, 1.2032e-10, 8.6943e-11,\n 7.7332e-12, 1.9371e-09, 2.5914e-11, 3.2453e-10, 2.7778e-10, 6.3679e-11,\n 2.7851e-10, 1.1998e-09, 3.6876e-10, 2.8963e-11, 1.4010e-09, 1.2074e-10,\n 1.3386e-10, 4.5977e-10, 5.5935e-11, 3.0822e-10, 7.5750e-11, 1.1885e-09,\n 3.1113e-09, 4.5325e-10, 3.7331e-10, 2.7075e-11, 1.0793e-10, 4.2376e-09,\n 3.3019e-11, 2.8366e-10, 1.0479e-10, 3.1525e-10, 1.0598e-11, 1.1341e-10,\n 4.1847e-09, 1.9941e-10, 1.4394e-11, 3.6188e-10, 2.8055e-10, 3.3389e-11,\n 1.1318e-09, 2.0211e-09, 5.4310e-11, 1.2792e-10, 1.4403e-09, 1.1939e-11,\n 7.3864e-11, 3.7376e-10, 9.5369e-11, 1.2492e-09, 3.3421e-11, 8.1694e-10,\n 8.6153e-11, 1.9288e-12, 1.0883e-09, 2.6979e-10, 1.0039e-10, 1.0742e-09,\n 3.6303e-10, 1.3748e-09, 4.5390e-12, 2.1547e-10, 5.1819e-11, 1.1462e-09,\n 3.9891e-12, 1.7652e-09, 2.3859e-10, 1.0644e-09, 1.0574e-10, 3.7797e-10,\n 1.1437e-11, 8.5478e-13, 9.3218e-11, 4.1405e-10, 3.4413e-10, 4.5205e-10,\n 1.6082e-10, 1.4070e-10, 1.9017e-09, 4.2417e-10, 1.7604e-11, 1.1617e-12,\n 5.4046e-11, 4.2079e-10, 7.1078e-12, 5.8924e-11, 3.5249e-11, 5.2411e-09,\n 4.3736e-10, 1.3052e-11, 1.3156e-10, 1.4843e-10, 1.4738e-09, 2.2839e-11,\n 6.3863e-10, 1.7215e-09, 1.0293e-09, 1.4004e-11, 2.2158e-11, 6.4037e-11,\n 4.9465e-10, 1.4978e-09, 6.7643e-10, 2.8910e-11, 3.0409e-09, 7.2722e-12,\n 2.0067e-10, 2.1542e-12, 1.4275e-10, 9.4018e-10, 5.3982e-11, 3.8925e-11,\n 2.0367e-09, 5.1902e-11, 8.0363e-10, 4.1232e-09, 1.4020e-08, 1.0292e-10,\n 2.0971e-11, 5.5926e-10, 3.2990e-10, 7.5276e-13, 2.1162e-10, 5.8156e-09,\n 1.0059e-10, 5.7838e-10, 5.3634e-10, 4.9068e-11, 4.3483e-11, 8.1174e-10,\n 1.0043e-09, 5.8581e-11, 9.5466e-10, 1.6777e-10, 3.9938e-10, 6.6900e-10,\n 5.7567e-10, 3.2335e-10, 1.5514e-10, 3.5882e-10, 8.9085e-11, 2.0760e-11,\n 1.5311e-09, 3.7412e-10, 4.9570e-11, 1.3592e-09, 1.3662e-10, 1.6030e-11,\n 1.2789e-09, 1.4291e-09, 3.8967e-09, 1.2846e-09, 4.0862e-11, 8.1536e-10,\n 7.2967e-11, 1.3209e-09, 2.6654e-11, 1.3283e-10], device='cuda:0')" + }, + "44": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 0.0000e+00, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.2366e-14, 7.1849e-14, 2.8644e-13, ..., 5.1413e-14, 0.0000e+00,\n 1.1358e-12],\n [1.8429e-13, 2.2770e-14, 2.0824e-12, ..., 1.3564e-12, 0.0000e+00,\n 6.2287e-12],\n [3.5905e-10, 4.2827e-11, 2.7295e-09, ..., 1.7021e-09, 0.0000e+00,\n 4.4988e-09],\n ...,\n [9.5712e-11, 1.1608e-11, 6.5441e-10, ..., 3.0899e-10, 0.0000e+00,\n 3.3665e-10],\n [4.1620e-13, 2.5252e-13, 9.2285e-13, ..., 6.9060e-12, 0.0000e+00,\n 4.1653e-12],\n [9.1600e-14, 3.8876e-14, 3.4638e-12, ..., 2.6049e-12, 0.0000e+00,\n 1.7723e-13]], device='cuda:0')" + }, + "45": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.7822e-10, 4.3962e-10, 2.2380e-06, 7.2782e-11, 7.4231e-07, 4.6350e-07,\n 1.1025e-07, 6.4957e-08, 2.2610e-06, 4.6852e-08, 5.9819e-08, 2.1331e-07,\n 2.5848e-09, 4.8413e-08, 1.0354e-08, 1.8771e-08, 1.6925e-07, 4.1632e-08,\n 5.8250e-09, 1.8614e-09, 1.5942e-07, 7.7077e-09, 2.1464e-07, 2.2812e-08,\n 1.9567e-07, 6.9815e-07, 8.5342e-08, 8.7704e-08, 3.1293e-08, 7.0103e-07,\n 2.4387e-08, 9.4520e-08, 3.6804e-06, 4.4702e-08, 2.4053e-08, 1.3793e-09,\n 2.4134e-07, 1.3956e-09, 1.4870e-07, 4.7371e-08, 1.1263e-07, 4.1976e-09,\n 1.2556e-09, 3.4910e-07, 1.1355e-10, 1.2997e-07, 5.7475e-08, 7.7207e-08,\n 1.4688e-08, 5.5853e-08, 4.6973e-07, 2.9783e-08, 1.8344e-07, 3.4029e-08,\n 1.8918e-08, 9.2629e-07, 5.4673e-09, 7.8669e-08, 9.8041e-08, 1.6858e-06,\n 1.7401e-07, 1.6473e-07, 8.7526e-09, 2.6315e-07, 1.5336e-07, 5.8494e-07,\n 1.6944e-09, 3.1845e-07, 2.3134e-08, 7.7322e-08, 7.8907e-08, 2.3187e-07,\n 9.4872e-09, 6.2340e-07, 6.6821e-07, 5.4466e-07, 3.8194e-08, 4.0183e-08,\n 1.1585e-08, 3.6347e-09, 1.6055e-07, 1.1375e-08, 1.7652e-08, 1.2683e-08,\n 1.5983e-07, 4.0654e-10, 4.9459e-07, 6.9810e-08, 3.7828e-08, 1.4971e-10,\n 1.0133e-09, 8.6661e-08, 2.5717e-07, 1.7454e-07, 5.1027e-07, 2.9203e-08,\n 8.1160e-07, 5.3754e-07, 2.5459e-08, 2.7982e-08, 9.0308e-08, 3.1885e-08,\n 5.7641e-08, 8.3784e-07, 1.2258e-07, 1.1459e-07, 1.8015e-07, 1.2470e-08,\n 1.2531e-07, 1.8878e-08, 4.8252e-09, 3.0474e-08, 3.0502e-08, 7.6278e-08,\n 1.2492e-08, 1.6953e-07, 2.0031e-08, 9.1512e-07, 8.5962e-09, 5.3044e-07,\n 9.8175e-07, 7.7765e-08, 6.9584e-07, 4.0787e-09, 3.4640e-08, 1.2769e-06,\n 6.9507e-08, 3.6511e-08, 3.7161e-08, 1.1397e-07, 3.1569e-09, 5.4550e-08,\n 2.8864e-06, 2.5569e-07, 3.9701e-09, 1.7313e-07, 1.0175e-07, 1.0821e-07,\n 9.3230e-07, 3.9580e-07, 8.0902e-08, 1.7947e-08, 2.5266e-07, 1.4815e-09,\n 1.0242e-09, 1.8366e-07, 1.3172e-07, 1.5720e-07, 2.5250e-09, 4.0184e-08,\n 5.3119e-08, 2.7187e-11, 2.0524e-07, 4.9719e-08, 9.4860e-09, 1.0410e-06,\n 6.2306e-08, 2.5967e-07, 7.3887e-10, 3.0867e-08, 7.0863e-09, 6.3195e-07,\n 1.1806e-09, 3.1385e-07, 1.0960e-06, 1.9992e-07, 3.1737e-08, 1.6833e-07,\n 2.6665e-09, 5.8074e-11, 1.4581e-07, 3.3608e-07, 8.1434e-07, 9.3510e-08,\n 1.1281e-07, 2.1190e-09, 2.9283e-07, 7.0428e-08, 5.9039e-09, 7.1441e-09,\n 1.7536e-08, 1.7571e-07, 7.8326e-10, 5.4185e-09, 2.4292e-08, 3.2018e-06,\n 5.0425e-08, 6.9466e-10, 8.0443e-09, 2.5542e-08, 5.1511e-07, 2.1792e-09,\n 3.8034e-07, 1.2212e-07, 6.6683e-08, 7.8117e-08, 6.7330e-09, 1.9359e-07,\n 1.3612e-07, 2.2249e-07, 2.0226e-07, 1.0008e-09, 1.0191e-06, 3.2110e-10,\n 1.3164e-07, 5.9543e-10, 7.0565e-08, 3.0861e-07, 7.8355e-09, 6.1556e-08,\n 2.1222e-07, 7.4815e-08, 7.0431e-07, 1.0200e-06, 1.8333e-06, 2.6011e-07,\n 3.3532e-08, 4.3626e-08, 1.2882e-06, 6.9494e-09, 1.2412e-07, 1.7991e-06,\n 1.0051e-08, 1.3913e-07, 1.6559e-06, 2.5578e-08, 3.7195e-08, 1.7340e-07,\n 2.7597e-07, 9.4673e-08, 1.0661e-07, 7.9912e-08, 1.6176e-07, 5.9674e-08,\n 1.8085e-07, 2.3706e-07, 2.6956e-08, 2.0045e-08, 1.2359e-07, 3.7067e-09,\n 1.1294e-06, 7.9139e-08, 4.8813e-09, 5.9585e-07, 1.8351e-07, 8.5000e-10,\n 2.2074e-07, 8.9261e-08, 1.9929e-06, 1.4112e-06, 3.0480e-08, 8.6920e-08,\n 3.5152e-08, 5.5037e-07, 1.1214e-08, 3.0625e-09], device='cuda:0')" + }, + "46": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.2470e-13, 5.3485e-12, 4.9718e-09, 1.0009e-13, 1.2861e-09, 8.6632e-10,\n 1.9196e-10, 2.0632e-10, 4.9250e-09, 1.1343e-10, 1.1564e-10, 3.6552e-10,\n 4.9127e-12, 8.0061e-11, 2.3923e-11, 6.8734e-11, 3.3643e-10, 8.6356e-11,\n 1.4389e-11, 9.7967e-12, 2.4418e-10, 3.9699e-11, 6.1055e-10, 3.7170e-11,\n 6.6144e-10, 8.5142e-10, 1.7472e-10, 1.6607e-10, 1.6681e-10, 1.3529e-09,\n 7.7451e-11, 1.8343e-10, 7.5449e-09, 6.2109e-11, 3.8231e-11, 6.7371e-12,\n 3.9375e-10, 2.5595e-11, 2.2362e-10, 1.2142e-10, 1.6345e-10, 6.3411e-12,\n 2.9482e-12, 6.7447e-10, 2.1906e-13, 2.7818e-10, 7.6798e-11, 2.2797e-10,\n 4.8352e-11, 7.5894e-11, 8.3580e-10, 3.7373e-11, 2.1633e-10, 5.2001e-11,\n 9.7746e-11, 2.5812e-09, 1.7783e-11, 1.1992e-10, 1.5949e-10, 3.4143e-09,\n 2.4611e-10, 2.6931e-10, 2.8777e-11, 2.7663e-10, 3.0707e-10, 9.3445e-10,\n 3.7733e-12, 5.2034e-10, 3.4302e-11, 1.1316e-10, 1.4315e-10, 5.7199e-10,\n 1.1031e-11, 1.0752e-09, 1.6882e-09, 8.1056e-10, 5.2305e-11, 5.3914e-11,\n 1.5247e-11, 2.4763e-12, 2.2498e-10, 1.5741e-11, 3.4142e-11, 1.8224e-11,\n 4.0901e-10, 2.9509e-12, 6.8980e-10, 2.9667e-10, 8.4653e-11, 3.9098e-13,\n 1.4309e-13, 1.5941e-10, 4.6685e-10, 2.5482e-10, 6.2940e-10, 2.7321e-11,\n 1.5163e-09, 8.4336e-10, 5.0199e-11, 1.0830e-10, 1.8255e-10, 6.2676e-11,\n 1.3385e-10, 1.9740e-09, 2.2513e-10, 1.5329e-10, 2.6464e-10, 2.5308e-11,\n 1.1295e-10, 4.5210e-11, 1.5300e-11, 1.2202e-10, 1.0924e-10, 1.0265e-10,\n 1.5704e-11, 4.6926e-10, 5.7819e-11, 3.1370e-09, 3.5587e-11, 1.2408e-09,\n 1.7719e-09, 1.6966e-10, 1.6531e-09, 4.6239e-11, 1.0237e-10, 2.7849e-09,\n 6.2776e-11, 4.1654e-11, 8.2411e-11, 2.2394e-10, 9.8595e-12, 8.6640e-11,\n 1.0394e-08, 2.8953e-10, 1.0354e-11, 3.8732e-10, 3.8160e-10, 2.2133e-10,\n 2.2645e-09, 4.1176e-10, 1.6619e-10, 4.3052e-11, 4.0508e-10, 4.8288e-12,\n 3.4468e-12, 6.1923e-10, 2.1036e-10, 2.4739e-10, 7.0967e-12, 5.9943e-11,\n 1.7700e-10, 3.3839e-13, 2.6335e-10, 7.7264e-11, 1.4828e-11, 1.4848e-09,\n 8.4506e-11, 3.4337e-10, 7.4052e-13, 5.8367e-11, 1.5617e-11, 1.3250e-09,\n 1.2375e-11, 7.0201e-10, 2.6343e-09, 2.6205e-10, 6.7592e-11, 3.7208e-10,\n 1.1240e-11, 2.0303e-12, 2.8769e-10, 1.4783e-09, 1.8350e-09, 1.2964e-10,\n 3.1039e-10, 5.7327e-12, 3.9825e-10, 1.0810e-10, 3.8624e-11, 4.0534e-11,\n 4.3598e-11, 4.4263e-10, 2.6337e-12, 1.3595e-11, 1.1019e-10, 8.9206e-09,\n 6.0882e-11, 2.3838e-12, 1.2525e-11, 8.4632e-11, 1.5660e-09, 9.3568e-12,\n 5.9281e-10, 1.7346e-10, 1.4699e-10, 1.9416e-10, 1.0082e-11, 2.7228e-10,\n 1.8644e-10, 3.0982e-10, 3.9166e-10, 4.9989e-12, 1.1778e-09, 2.0979e-12,\n 2.1174e-10, 1.8027e-12, 1.0445e-10, 5.2898e-10, 5.7535e-11, 1.3479e-10,\n 3.2701e-10, 9.8005e-11, 1.5655e-09, 1.5691e-09, 2.8197e-09, 6.8945e-10,\n 7.7763e-11, 6.0101e-11, 2.5555e-09, 2.5001e-11, 1.8007e-10, 4.3475e-09,\n 2.4506e-11, 3.4930e-10, 4.5788e-09, 4.4223e-11, 1.7852e-10, 2.0406e-10,\n 4.5536e-10, 1.9099e-10, 1.9442e-10, 1.1055e-10, 4.1720e-10, 1.0492e-10,\n 2.5410e-10, 4.9390e-10, 4.2624e-11, 3.1611e-11, 2.4183e-10, 9.9917e-12,\n 3.6151e-09, 1.3068e-10, 2.0369e-11, 8.7565e-10, 2.0189e-10, 2.0843e-12,\n 4.2299e-10, 1.0378e-10, 5.0481e-09, 5.7914e-09, 4.5684e-11, 1.0009e-10,\n 5.9640e-11, 1.3588e-09, 2.7853e-11, 6.0281e-12], device='cuda:0')" + }, + "47": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.8411e-12, 3.7422e-12, 6.1501e-09, 9.0648e-13, 1.8196e-09, 1.1343e-09,\n 3.1610e-10, 2.1773e-10, 6.0568e-09, 1.3680e-10, 1.8995e-10, 5.3274e-10,\n 1.1130e-11, 1.5981e-10, 4.5594e-11, 6.0320e-11, 4.8867e-10, 1.3550e-10,\n 2.1762e-11, 1.3088e-11, 4.6512e-10, 3.9230e-11, 5.5316e-10, 7.8453e-11,\n 6.1277e-10, 1.7551e-09, 2.6763e-10, 2.3784e-10, 1.3746e-10, 1.9862e-09,\n 6.9778e-11, 2.4795e-10, 1.0023e-08, 1.0764e-10, 7.4832e-11, 9.8143e-12,\n 5.6172e-10, 2.6403e-11, 4.5621e-10, 1.5368e-10, 2.8615e-10, 1.7626e-11,\n 5.3461e-12, 1.0621e-09, 5.6323e-13, 2.9149e-10, 1.6602e-10, 2.7095e-10,\n 5.4456e-11, 1.7333e-10, 1.4171e-09, 6.2775e-11, 4.5837e-10, 9.3126e-11,\n 7.8869e-11, 2.3446e-09, 2.6361e-11, 1.8121e-10, 2.5587e-10, 4.7339e-09,\n 4.0092e-10, 3.8160e-10, 3.4907e-11, 7.4397e-10, 4.4438e-10, 1.3785e-09,\n 8.3280e-12, 7.8685e-10, 7.1460e-11, 2.3979e-10, 2.5299e-10, 7.1679e-10,\n 2.0088e-11, 1.7633e-09, 1.9091e-09, 1.4147e-09, 1.2347e-10, 1.3174e-10,\n 3.9452e-11, 4.0321e-12, 4.8931e-10, 3.2512e-11, 5.4960e-11, 4.1590e-11,\n 3.6315e-10, 8.9155e-12, 1.4453e-09, 2.5343e-10, 1.3630e-10, 5.4409e-13,\n 1.4136e-12, 2.2076e-10, 6.3117e-10, 4.6141e-10, 1.3106e-09, 6.6336e-11,\n 2.1094e-09, 1.3700e-09, 6.7975e-11, 1.1569e-10, 2.8009e-10, 1.0128e-10,\n 1.9212e-10, 2.1462e-09, 3.6092e-10, 2.8683e-10, 5.1785e-10, 4.6920e-11,\n 2.8242e-10, 5.5369e-11, 1.5171e-11, 1.1689e-10, 8.3802e-11, 1.7344e-10,\n 4.4499e-11, 4.7693e-10, 7.9943e-11, 2.2314e-09, 4.2977e-11, 1.5121e-09,\n 2.7670e-09, 2.5618e-10, 1.7903e-09, 7.0186e-11, 1.2733e-10, 3.4119e-09,\n 1.6229e-10, 1.1107e-10, 1.2282e-10, 3.3876e-10, 1.6535e-11, 1.7465e-10,\n 7.3580e-09, 6.4095e-10, 1.6522e-11, 4.1842e-10, 3.0378e-10, 2.4732e-10,\n 2.3062e-09, 1.0851e-09, 2.3842e-10, 5.5610e-11, 6.2447e-10, 1.0406e-11,\n 4.8762e-12, 5.5664e-10, 2.7518e-10, 4.4490e-10, 1.5564e-11, 1.3197e-10,\n 1.8440e-10, 1.3077e-14, 5.3439e-10, 1.2036e-10, 2.9629e-11, 2.5747e-09,\n 1.6547e-10, 6.6792e-10, 1.0015e-12, 1.0835e-10, 3.0259e-11, 1.7671e-09,\n 1.5535e-11, 8.9524e-10, 3.0939e-09, 5.1128e-10, 7.5500e-11, 4.2195e-10,\n 1.6969e-11, 2.2606e-12, 3.3918e-10, 9.9845e-10, 2.0221e-09, 2.8622e-10,\n 2.8020e-10, 8.4099e-12, 8.3529e-10, 2.1504e-10, 3.9834e-11, 3.5319e-11,\n 7.0413e-11, 4.7174e-10, 3.5363e-12, 2.1594e-11, 9.7161e-11, 8.7371e-09,\n 1.2447e-10, 4.9374e-12, 2.8993e-11, 1.0062e-10, 1.2946e-09, 1.9195e-11,\n 9.4857e-10, 3.8607e-10, 1.7260e-10, 2.5397e-10, 2.2514e-11, 4.8734e-10,\n 3.5630e-10, 6.5545e-10, 5.0235e-10, 1.6209e-11, 2.7298e-09, 6.6706e-12,\n 3.0855e-10, 1.3488e-12, 2.1847e-10, 8.2380e-10, 4.9272e-11, 1.8937e-10,\n 5.9272e-10, 1.6740e-10, 1.9638e-09, 2.6743e-09, 5.1661e-09, 6.2150e-10,\n 1.2199e-10, 1.0347e-10, 3.2855e-09, 3.1234e-11, 2.9124e-10, 4.8010e-09,\n 3.7948e-11, 4.3074e-10, 4.5872e-09, 4.4771e-11, 1.3553e-10, 4.9994e-10,\n 8.0495e-10, 2.5657e-10, 3.1752e-10, 2.2855e-10, 3.6502e-10, 1.8279e-10,\n 5.0842e-10, 6.8130e-10, 7.8568e-11, 6.2396e-11, 2.8934e-10, 1.4746e-11,\n 2.8127e-09, 2.2328e-10, 3.6025e-11, 1.6845e-09, 4.3489e-10, 5.2344e-12,\n 6.0528e-10, 2.1946e-10, 5.4647e-09, 3.9838e-09, 9.9467e-11, 2.1404e-10,\n 7.9761e-11, 1.5300e-09, 4.1305e-11, 1.3643e-11], device='cuda:0')" + }, + "48": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5531e-13, 4.6390e-14, 7.2510e-13, ..., 8.4498e-13, 0.0000e+00,\n 1.1458e-13],\n [5.1462e-14, 2.1132e-13, 2.6714e-12, ..., 3.6115e-12, 0.0000e+00,\n 6.3134e-12],\n [4.2688e-10, 8.3459e-12, 1.7847e-09, ..., 4.5341e-10, 0.0000e+00,\n 2.5601e-09],\n ...,\n [5.6349e-11, 2.3332e-12, 6.4918e-10, ..., 1.9218e-10, 0.0000e+00,\n 2.0660e-10],\n [7.0121e-12, 8.1777e-15, 2.1286e-13, ..., 8.9361e-13, 0.0000e+00,\n 9.1599e-13],\n [2.6331e-14, 1.5641e-14, 5.8247e-12, ..., 4.9471e-13, 0.0000e+00,\n 3.9035e-12]], device='cuda:0')" + }, + "49": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.1833e-10, 1.9059e-09, 1.0224e-06, 1.0618e-09, 1.6677e-07, 5.3742e-07,\n 2.5208e-07, 5.6792e-08, 2.6834e-08, 5.4854e-08, 7.0797e-07, 2.0849e-07,\n 3.8925e-08, 1.3915e-07, 6.0414e-10, 3.9464e-08, 1.0493e-07, 4.9635e-08,\n 7.9301e-09, 1.7895e-08, 2.5655e-07, 4.9556e-09, 2.0942e-08, 7.8984e-08,\n 1.3229e-07, 6.9070e-08, 1.0948e-07, 2.2267e-07, 3.9730e-08, 4.0138e-07,\n 5.4401e-07, 6.3541e-07, 3.4504e-06, 1.0877e-08, 1.5046e-07, 2.9956e-09,\n 3.0807e-07, 5.0870e-09, 1.1365e-06, 3.2973e-08, 1.8146e-07, 4.2475e-09,\n 1.8805e-08, 7.4021e-07, 3.3700e-10, 3.7232e-08, 1.5870e-07, 1.0960e-07,\n 2.1539e-08, 2.0879e-07, 3.8528e-07, 1.2870e-07, 4.2039e-07, 3.4275e-08,\n 4.3575e-08, 9.6536e-07, 3.3405e-09, 1.4733e-07, 7.7006e-07, 9.5572e-07,\n 1.2220e-07, 1.0898e-07, 4.3357e-09, 7.5369e-07, 3.5981e-08, 1.0013e-06,\n 1.2166e-09, 2.5423e-07, 1.4312e-07, 1.4374e-07, 2.9543e-07, 4.7605e-07,\n 1.2706e-07, 3.5102e-07, 1.5511e-07, 5.9388e-07, 3.4037e-07, 1.3418e-07,\n 2.0661e-08, 7.9652e-10, 7.0736e-07, 5.1190e-08, 4.0809e-08, 2.7452e-07,\n 6.4623e-08, 9.4834e-09, 1.5329e-06, 4.7214e-08, 5.4048e-08, 2.2999e-09,\n 5.6987e-10, 5.8103e-08, 2.1761e-07, 4.7846e-08, 1.3356e-06, 4.1223e-08,\n 7.9052e-07, 1.0924e-06, 1.5747e-07, 9.5177e-08, 9.6253e-08, 4.0825e-08,\n 1.8209e-08, 3.1546e-07, 1.8230e-07, 1.2890e-07, 1.8624e-07, 1.4362e-08,\n 1.2106e-07, 1.4589e-07, 6.8944e-08, 1.7278e-08, 8.4535e-07, 2.1236e-08,\n 2.6463e-08, 8.4260e-07, 2.7270e-08, 1.7521e-07, 3.1853e-08, 5.3969e-08,\n 1.8239e-06, 1.5129e-07, 6.7972e-07, 1.2140e-08, 7.2223e-09, 2.1322e-06,\n 2.7827e-07, 1.4535e-08, 7.3210e-09, 3.1073e-07, 4.5182e-11, 6.3602e-08,\n 1.1174e-06, 7.0615e-07, 2.8712e-09, 7.1425e-08, 1.7269e-07, 5.8442e-08,\n 6.3197e-07, 4.6225e-07, 7.3896e-08, 6.7360e-08, 3.1401e-07, 4.8572e-10,\n 2.4054e-08, 3.6276e-08, 2.3277e-08, 3.8438e-07, 2.2381e-08, 3.6557e-08,\n 5.0654e-08, 6.6071e-08, 1.4935e-07, 1.1270e-07, 4.3305e-08, 5.1441e-07,\n 1.1440e-08, 1.6864e-06, 6.0739e-10, 3.1206e-08, 1.2656e-08, 4.3360e-08,\n 6.5124e-10, 7.8884e-08, 4.8748e-07, 3.1792e-07, 1.9543e-08, 9.0684e-08,\n 9.1522e-11, 1.5823e-08, 7.3504e-09, 1.3544e-07, 1.0870e-07, 1.3440e-07,\n 7.0785e-08, 1.7730e-08, 5.8003e-07, 3.3806e-07, 3.3361e-09, 8.5282e-10,\n 9.2109e-08, 2.4965e-07, 6.9916e-10, 5.3545e-08, 3.7317e-08, 7.3253e-07,\n 1.4774e-08, 3.7850e-09, 3.9765e-08, 1.7274e-08, 7.3699e-08, 8.5863e-10,\n 1.7438e-07, 5.7051e-08, 4.0328e-07, 4.9120e-08, 2.2512e-08, 1.6312e-07,\n 7.9262e-07, 7.6470e-08, 1.1224e-07, 2.3798e-09, 1.3540e-06, 3.0167e-10,\n 1.6765e-07, 6.8146e-10, 1.8443e-07, 1.8331e-07, 7.7284e-09, 1.1346e-07,\n 1.0695e-06, 6.3408e-08, 7.5411e-08, 9.0167e-07, 3.5745e-06, 4.5401e-08,\n 3.7260e-08, 3.7750e-08, 1.8856e-06, 2.0683e-09, 3.5004e-08, 3.5966e-06,\n 4.8595e-09, 2.7395e-07, 1.2885e-06, 2.2373e-08, 3.8550e-08, 5.6633e-07,\n 1.3789e-07, 4.1079e-08, 7.5170e-07, 1.6199e-07, 9.2727e-09, 5.9861e-08,\n 2.5748e-07, 2.5551e-07, 3.8266e-08, 4.7660e-08, 2.7831e-08, 1.7497e-08,\n 1.5822e-07, 1.1832e-07, 9.6653e-09, 1.6321e-06, 2.1811e-07, 2.0026e-09,\n 1.7894e-07, 7.3657e-07, 9.2054e-07, 8.0237e-07, 2.5349e-08, 8.7621e-08,\n 3.5018e-08, 3.3352e-07, 3.6205e-09, 9.2716e-10], device='cuda:0')" + }, + "50": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.6905e-13, 7.2910e-12, 1.5155e-09, 1.6067e-11, 1.4434e-10, 1.2255e-09,\n 7.0053e-10, 1.2580e-10, 2.6235e-10, 9.4654e-11, 1.5125e-09, 3.7089e-10,\n 1.6932e-10, 2.5306e-10, 2.4741e-12, 1.4116e-10, 1.7504e-10, 9.8628e-11,\n 3.0490e-11, 3.2783e-11, 4.7852e-10, 8.0924e-12, 4.0363e-11, 1.1170e-10,\n 2.5066e-10, 1.4251e-10, 1.3989e-10, 9.7118e-10, 2.5911e-10, 4.8855e-10,\n 7.2456e-10, 1.4078e-09, 4.8700e-09, 1.2826e-11, 5.8534e-10, 4.1581e-12,\n 6.1651e-10, 3.8089e-11, 3.5873e-09, 4.5192e-11, 1.8683e-10, 1.2279e-11,\n 3.3646e-11, 1.1132e-09, 3.0171e-14, 4.3275e-11, 3.7943e-10, 2.3837e-10,\n 1.0933e-10, 3.4858e-10, 7.6420e-10, 2.4677e-10, 7.4998e-10, 4.3550e-11,\n 9.2647e-11, 2.2404e-09, 8.6699e-12, 4.2198e-10, 1.1057e-09, 1.2353e-09,\n 1.3085e-10, 1.5795e-10, 7.1820e-12, 1.3985e-09, 5.6212e-11, 2.9202e-09,\n 3.4097e-12, 3.2269e-10, 3.6299e-10, 1.7072e-10, 6.9807e-10, 1.1138e-09,\n 3.1177e-10, 4.7172e-10, 2.0328e-10, 9.0003e-10, 7.0837e-10, 1.9364e-10,\n 2.8883e-11, 2.4103e-12, 1.1895e-09, 1.4742e-10, 1.0916e-10, 9.9416e-10,\n 8.1885e-11, 2.5191e-11, 3.2665e-09, 5.2382e-11, 1.0302e-10, 3.5613e-12,\n 3.5952e-14, 1.0871e-10, 3.4934e-10, 9.2027e-11, 2.7776e-09, 5.9734e-11,\n 1.0518e-09, 2.3789e-09, 2.1994e-10, 5.5199e-10, 1.8208e-10, 7.1780e-11,\n 3.1180e-11, 5.5157e-10, 3.3081e-10, 1.8969e-10, 3.1221e-10, 4.3541e-11,\n 1.5900e-10, 1.7290e-10, 1.1027e-10, 2.3648e-11, 1.5029e-09, 1.9059e-11,\n 2.9193e-11, 1.2322e-09, 6.8483e-11, 3.0002e-10, 1.1459e-10, 8.9695e-11,\n 4.4962e-09, 2.1741e-10, 1.5631e-09, 1.0193e-10, 2.1470e-11, 6.0887e-09,\n 7.7667e-10, 2.7926e-11, 2.0440e-11, 7.9264e-10, 1.7660e-13, 1.3539e-10,\n 1.5569e-09, 1.2480e-09, 7.6222e-12, 1.0804e-10, 5.8446e-10, 6.7984e-11,\n 1.1404e-09, 6.0543e-10, 1.4714e-10, 1.1541e-10, 4.0932e-10, 3.4612e-12,\n 3.3890e-11, 5.5459e-11, 1.8387e-11, 1.2344e-09, 7.4442e-11, 5.9922e-11,\n 1.5009e-10, 1.5257e-10, 2.4164e-10, 1.9975e-10, 9.5529e-11, 1.0656e-09,\n 1.9492e-11, 3.9764e-09, 8.3201e-13, 4.3370e-11, 2.8432e-11, 9.9510e-11,\n 2.7417e-12, 1.3370e-10, 1.0196e-09, 5.6609e-10, 2.0421e-11, 1.3816e-10,\n 1.2007e-12, 3.8889e-11, 1.1650e-11, 3.0210e-10, 1.5791e-10, 2.1108e-10,\n 1.3247e-10, 2.5909e-11, 8.9319e-10, 5.4677e-10, 3.7238e-11, 3.1055e-12,\n 3.3295e-10, 7.0058e-10, 1.2629e-12, 1.0333e-10, 1.4545e-10, 8.9407e-10,\n 3.4076e-11, 8.3891e-12, 1.2428e-10, 3.0150e-11, 1.1227e-10, 4.3421e-12,\n 2.0763e-10, 8.5591e-11, 7.5009e-10, 9.0297e-11, 3.2866e-11, 2.3724e-10,\n 2.5953e-09, 1.1966e-10, 2.3486e-10, 1.4452e-11, 1.5982e-09, 4.3280e-12,\n 4.1226e-10, 5.7468e-13, 6.0109e-10, 2.0973e-10, 5.1485e-11, 3.3391e-10,\n 3.1920e-09, 9.6117e-11, 1.8347e-10, 9.3581e-10, 5.9203e-09, 7.7667e-11,\n 7.9610e-11, 3.4329e-11, 5.6625e-09, 6.5028e-12, 2.8417e-11, 7.9057e-09,\n 1.2669e-11, 9.1391e-10, 2.7608e-09, 3.6215e-11, 7.3023e-11, 1.4705e-09,\n 1.3585e-10, 5.8832e-11, 1.4617e-09, 3.2594e-10, 1.6061e-11, 7.9389e-11,\n 4.2372e-10, 6.2512e-10, 7.6596e-11, 7.9474e-11, 4.3523e-11, 4.1201e-11,\n 2.0376e-10, 2.4167e-10, 4.4176e-11, 3.6978e-09, 3.4152e-10, 1.8055e-11,\n 2.6974e-10, 1.6258e-09, 1.2714e-09, 1.6451e-09, 4.0357e-11, 1.4538e-10,\n 3.0051e-11, 4.6796e-10, 7.7214e-12, 5.1225e-12], device='cuda:0')" + }, + "51": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.7178e-12, 8.5265e-12, 2.7230e-09, 9.0802e-12, 4.4355e-10, 1.3681e-09,\n 6.8832e-10, 1.7993e-10, 8.6404e-11, 1.6716e-10, 1.9648e-09, 5.0991e-10,\n 1.0670e-10, 4.0451e-10, 3.1395e-12, 1.2328e-10, 3.0426e-10, 1.4355e-10,\n 3.0835e-11, 5.6005e-11, 7.2453e-10, 1.5176e-11, 8.1496e-11, 2.3947e-10,\n 3.8240e-10, 1.9729e-10, 2.9935e-10, 5.9354e-10, 2.0248e-10, 1.0554e-09,\n 1.4364e-09, 1.6602e-09, 9.3225e-09, 2.7308e-11, 4.3885e-10, 9.4605e-12,\n 7.8873e-10, 4.3053e-11, 3.1078e-09, 9.8792e-11, 4.8653e-10, 1.5057e-11,\n 5.5194e-11, 2.0068e-09, 9.3192e-13, 9.1429e-11, 4.3268e-10, 3.2496e-10,\n 7.6510e-11, 5.8554e-10, 1.0578e-09, 2.9689e-10, 1.0971e-09, 9.2427e-11,\n 1.1893e-10, 2.4268e-09, 1.5338e-11, 3.7551e-10, 1.9740e-09, 2.4938e-09,\n 3.2297e-10, 2.6856e-10, 1.3809e-11, 1.9893e-09, 1.0802e-10, 2.5727e-09,\n 4.1349e-12, 6.5460e-10, 3.9878e-10, 3.8649e-10, 8.2354e-10, 1.2885e-09,\n 3.0501e-10, 9.4850e-10, 4.5367e-10, 1.5164e-09, 9.2259e-10, 3.7710e-10,\n 6.0742e-11, 2.3957e-12, 1.8626e-09, 1.4106e-10, 1.1968e-10, 7.6211e-10,\n 1.5652e-10, 3.2405e-11, 4.0715e-09, 1.2859e-10, 1.7743e-10, 2.7171e-12,\n 2.3741e-12, 1.5879e-10, 5.7714e-10, 1.3833e-10, 3.4349e-09, 1.0097e-10,\n 2.0439e-09, 2.8046e-09, 3.7170e-10, 3.1138e-10, 2.7889e-10, 1.2737e-10,\n 5.7255e-11, 8.6450e-10, 5.2544e-10, 3.4150e-10, 5.2092e-10, 5.5831e-11,\n 3.0370e-10, 3.8706e-10, 2.0374e-10, 4.8747e-11, 2.2136e-09, 5.1074e-11,\n 8.0188e-11, 2.3445e-09, 9.2418e-11, 4.5967e-10, 1.2323e-10, 1.5894e-10,\n 4.9430e-09, 4.3881e-10, 1.7555e-09, 1.2100e-10, 2.8193e-11, 5.4534e-09,\n 6.8518e-10, 4.5434e-11, 2.6544e-11, 8.4947e-10, 5.8973e-13, 1.9438e-10,\n 2.7977e-09, 1.7679e-09, 1.0930e-11, 1.9385e-10, 5.0925e-10, 1.4146e-10,\n 1.6126e-09, 1.1959e-09, 2.1198e-10, 2.0124e-10, 8.0479e-10, 1.4477e-11,\n 7.4819e-11, 1.0212e-10, 5.8684e-11, 1.0197e-09, 6.5932e-11, 1.0437e-10,\n 1.4809e-10, 1.5294e-10, 4.0620e-10, 2.7899e-10, 1.4049e-10, 1.4176e-09,\n 3.4189e-11, 4.4444e-09, 1.7086e-12, 9.8562e-11, 4.2098e-11, 1.3539e-10,\n 6.3049e-12, 2.4731e-10, 1.2886e-09, 8.2312e-10, 5.1710e-11, 2.8357e-10,\n 4.3008e-12, 5.2946e-11, 1.7800e-11, 3.9488e-10, 3.0107e-10, 3.9571e-10,\n 2.0675e-10, 5.4496e-11, 1.5194e-09, 9.3989e-10, 4.6521e-11, 4.2340e-12,\n 2.6005e-10, 7.2639e-10, 2.4738e-12, 1.5415e-10, 1.2594e-10, 2.0360e-09,\n 3.9123e-11, 1.5619e-11, 1.2692e-10, 5.1148e-11, 2.4046e-10, 1.3289e-11,\n 4.2778e-10, 1.6748e-10, 9.9478e-10, 1.5486e-10, 7.4036e-11, 4.2597e-10,\n 1.9826e-09, 2.3842e-10, 3.1979e-10, 2.6726e-11, 3.3673e-09, 1.0519e-11,\n 4.0055e-10, 8.0965e-13, 5.1333e-10, 4.5662e-10, 5.5969e-11, 3.4369e-10,\n 2.9001e-09, 1.3291e-10, 2.3321e-10, 2.5594e-09, 9.4386e-09, 1.4279e-10,\n 1.0247e-10, 8.6839e-11, 5.0045e-09, 1.0161e-11, 8.1889e-11, 9.6081e-09,\n 1.7302e-11, 7.3192e-10, 3.4326e-09, 4.9832e-11, 1.0961e-10, 1.5495e-09,\n 4.0267e-10, 1.1999e-10, 2.0568e-09, 4.3483e-10, 2.0238e-11, 1.6793e-10,\n 7.3553e-10, 6.9368e-10, 1.1577e-10, 1.5129e-10, 7.7977e-11, 4.8862e-11,\n 4.4279e-10, 3.4491e-10, 5.9811e-11, 4.2917e-09, 5.9998e-10, 2.5645e-11,\n 4.9311e-10, 1.9843e-09, 2.4717e-09, 2.2407e-09, 7.1018e-11, 2.3691e-10,\n 8.5265e-11, 9.1337e-10, 1.1109e-11, 3.8461e-12], device='cuda:0')" + }, + "52": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.2930e-13, 3.3677e-13, 6.2536e-13, ..., 4.0701e-14, 1.9296e-14,\n 3.0661e-12],\n [9.8088e-14, 2.8970e-13, 9.6089e-13, ..., 3.7956e-12, 2.2670e-13,\n 5.0259e-13],\n [5.9773e-13, 2.1491e-14, 3.1756e-13, ..., 1.8341e-12, 1.8942e-14,\n 2.6669e-14],\n ...,\n [8.3221e-10, 1.3545e-08, 1.8412e-08, ..., 3.1511e-08, 1.6084e-08,\n 7.9560e-09],\n [2.4794e-10, 4.1078e-09, 5.6095e-09, ..., 9.0654e-09, 5.1054e-09,\n 2.6476e-09],\n [1.0193e-10, 1.1980e-09, 1.7813e-09, ..., 2.9303e-09, 1.2655e-09,\n 6.2704e-10]], device='cuda:0')" + }, + "53": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.3805e-12, 2.7064e-12, 8.1324e-12, 9.8798e-13, 4.0378e-12, 6.4448e-12,\n 1.8372e-12, 7.8436e-12, 1.9193e-11, 6.4870e-11, 2.3451e-12, 9.7381e-13,\n 2.7040e-11, 7.3874e-12, 5.0047e-11, 2.9547e-14, 5.0341e-12, 1.1304e-12,\n 1.3282e-11, 3.6631e-11, 3.1298e-12, 8.1735e-15, 1.4013e-13, 7.7884e-13,\n 4.7357e-12, 3.2885e-12, 7.0766e-12, 9.9049e-12, 9.1632e-13, 7.5312e-12,\n 2.0696e-11, 2.3994e-12, 1.4298e-11, 2.2629e-11, 5.0797e-11, 3.1224e-11,\n 8.0226e-12, 8.5366e-12, 1.5683e-11, 3.5205e-12, 6.2587e-13, 1.0876e-11,\n 1.1373e-11, 5.4916e-14, 6.8664e-11, 3.2665e-11, 3.1085e-13, 9.7862e-14,\n 3.7047e-12, 1.1685e-11, 1.2746e-11, 7.9052e-12, 9.0455e-12, 4.1092e-12,\n 3.7447e-11, 1.3988e-11, 2.0786e-12, 3.8740e-11, 2.9684e-13, 7.3815e-12,\n 1.0934e-11, 2.7555e-12, 2.3017e-13, 7.1222e-11, 1.7930e-12, 4.2777e-11,\n 2.6527e-13, 4.4786e-11, 8.2884e-12, 8.4211e-11, 9.9418e-11, 2.4095e-12,\n 2.3981e-11, 6.6218e-11, 4.7202e-11, 4.3890e-12, 1.1675e-10, 4.4706e-11,\n 1.6385e-10, 4.4130e-13, 4.6774e-12, 1.4029e-11, 5.5540e-12, 1.8742e-11,\n 5.9249e-11, 2.1807e-12, 4.4125e-12, 1.4295e-11, 1.7046e-10, 4.0927e-12,\n 3.2325e-11, 2.3006e-13, 1.3414e-10, 1.2896e-10, 2.2121e-11, 1.8650e-11,\n 1.4741e-13, 2.3861e-11, 2.4508e-11, 1.8647e-11, 5.4359e-11, 1.3267e-13,\n 7.0374e-13, 2.7764e-13, 1.4327e-13, 2.2681e-12, 5.7716e-13, 3.3009e-12,\n 1.3414e-11, 4.2548e-11, 6.5330e-12, 3.4117e-12, 1.9561e-12, 1.0763e-11,\n 2.9615e-12, 4.1206e-11, 1.5385e-11, 3.0028e-11, 4.6874e-11, 1.5840e-11,\n 6.3584e-13, 1.5031e-13, 2.9929e-11, 4.0423e-12, 2.7707e-12, 1.5955e-12,\n 5.1955e-11, 8.3817e-12, 1.2892e-12, 4.7587e-11, 2.0306e-12, 5.7446e-12,\n 8.1333e-12, 3.1507e-11, 5.9600e-11, 5.4952e-13, 2.2838e-12, 4.5355e-12,\n 3.2540e-12, 8.5390e-14, 1.3468e-11, 3.7458e-12, 1.0957e-12, 4.9242e-14,\n 1.0857e-11, 1.7258e-11, 2.5736e-11, 1.1282e-11, 2.0005e-12, 8.2152e-12,\n 1.9262e-13, 2.7312e-11, 2.8379e-13, 8.2323e-13, 4.1002e-11, 2.0227e-11,\n 6.5927e-14, 4.0680e-14, 2.7177e-12, 1.8191e-11, 8.1059e-11, 5.0280e-12,\n 4.8703e-12, 2.5765e-12, 7.1337e-12, 8.4138e-12, 6.8731e-12, 1.3435e-11,\n 1.5574e-13, 1.6823e-12, 1.9154e-11, 3.8573e-12, 4.8624e-12, 2.8448e-11,\n 1.7878e-14, 2.0560e-11, 2.6943e-11, 2.4516e-11, 2.0808e-13, 1.3652e-12,\n 1.5618e-12, 9.9253e-12, 2.5418e-14, 6.4303e-13, 4.6334e-12, 5.4118e-11,\n 5.1712e-12, 8.4464e-13, 2.2737e-11, 2.5778e-12, 2.2218e-12, 1.8644e-11,\n 1.3129e-11, 1.5637e-11, 8.5005e-12, 2.4176e-11, 2.9179e-14, 1.7593e-12,\n 1.3120e-11, 1.4922e-11, 2.4809e-13, 3.8441e-11, 1.8365e-12, 2.2219e-11,\n 1.2066e-12, 9.4618e-13, 3.6978e-13, 7.2779e-12, 4.6827e-11, 1.5022e-12,\n 7.6516e-12, 1.2361e-11, 2.9612e-11, 3.1475e-12, 1.6360e-13, 3.5257e-12,\n 2.0567e-12, 1.9968e-12, 9.9869e-13, 2.2313e-12, 5.6911e-13, 1.5478e-10,\n 4.1095e-11, 2.5544e-11, 7.7141e-14, 8.4773e-13, 8.4603e-12, 2.7331e-11,\n 8.1049e-11, 2.5866e-11, 9.1206e-13, 4.9672e-12, 2.8738e-11, 2.5146e-13,\n 1.6463e-11, 1.2550e-11, 3.3951e-11, 7.4178e-12, 5.3552e-12, 1.1049e-12,\n 2.2133e-12, 9.8487e-13, 2.6168e-11, 6.7452e-12, 6.4937e-12, 4.5877e-11,\n 8.0871e-12, 7.7435e-14, 1.7692e-11, 1.7946e-11, 7.4812e-11, 1.5348e-11,\n 2.2026e-11, 1.2046e-11, 1.8886e-14, 9.5404e-12, 5.1681e-26, 1.2656e-27,\n 8.0507e-27, 1.4645e-27, 8.2712e-27, 2.4624e-27, 1.3296e-26, 3.2054e-28,\n 7.2611e-27, 1.1293e-26, 1.0567e-26, 2.7145e-28, 6.8606e-30, 5.7677e-28,\n 3.2438e-27, 2.4242e-28, 5.6323e-27, 4.3143e-28, 5.1368e-27, 1.2361e-27,\n 7.9424e-29, 3.9836e-27, 2.2530e-28, 4.6423e-27, 6.5725e-29, 3.9724e-27,\n 8.7763e-27, 3.7954e-28, 2.2908e-26, 7.8134e-27, 1.2710e-27, 1.8308e-27,\n 2.3449e-27, 1.4032e-26, 1.3099e-27, 1.0797e-27, 4.3889e-28, 2.3786e-27,\n 6.4068e-27, 1.2668e-27, 2.2466e-28, 2.6049e-27, 5.2390e-28, 1.1586e-27,\n 1.0950e-27, 9.8759e-27, 4.0423e-27, 1.3542e-27, 1.4519e-27, 9.0156e-28,\n 1.8049e-27, 5.9806e-27, 6.0322e-27, 7.5603e-27, 3.0791e-27, 7.0809e-28,\n 6.2953e-27, 1.0836e-26, 1.4852e-26, 6.4825e-27, 5.2357e-27, 3.6288e-27,\n 2.4965e-28, 2.2470e-27, 1.6932e-29, 8.7699e-27, 2.3221e-26, 7.2386e-27,\n 1.9557e-28, 1.1622e-27, 7.0664e-27, 2.3630e-27, 3.8323e-27, 2.3050e-28,\n 1.9741e-27, 1.0676e-27, 3.7560e-27, 1.0581e-26, 3.6450e-26, 1.3353e-26,\n 1.4464e-27, 1.0958e-26, 1.3274e-27, 1.7711e-26, 2.1670e-27, 2.0000e-27,\n 1.7805e-26, 6.1904e-27, 3.2077e-27, 4.1045e-28, 7.1691e-28, 3.9027e-27,\n 4.0934e-27, 8.6136e-28, 3.3280e-27, 2.3665e-28, 3.1419e-28, 1.0285e-26,\n 5.4030e-28, 3.0961e-27, 4.6359e-27, 3.2337e-28, 2.8248e-27, 2.6693e-27,\n 1.0714e-27, 1.4701e-27, 9.1743e-28, 1.5314e-26, 6.2206e-27, 1.1076e-27,\n 8.4715e-28, 4.5054e-27, 4.8786e-27, 9.1623e-27, 3.8589e-27, 1.8057e-26,\n 4.9588e-27, 6.7179e-27, 1.5622e-26, 1.9181e-26, 1.1606e-26, 6.1133e-27,\n 7.2123e-27, 3.5318e-27, 8.0869e-28, 7.2445e-28, 4.0231e-26, 4.7263e-29,\n 7.6419e-27, 4.5496e-28, 1.7874e-27, 2.1785e-27, 5.6086e-27, 1.8123e-27,\n 4.7531e-28, 5.3379e-31, 1.2205e-28, 7.1795e-28, 1.6115e-27, 4.1880e-27,\n 3.9828e-28, 9.4616e-27, 3.6250e-26, 4.0921e-27, 1.1708e-26, 1.5601e-27,\n 6.4014e-27, 1.7656e-26, 8.1355e-27, 5.4522e-28, 5.8711e-28, 2.3778e-26,\n 4.0940e-27, 4.6026e-27, 2.1346e-28, 2.9286e-27, 1.0138e-26, 3.3198e-27,\n 1.7396e-29, 4.5788e-27, 1.3476e-27, 1.3117e-27, 7.4461e-27, 6.8829e-28,\n 4.8115e-28, 4.1762e-27, 2.7617e-27, 1.7685e-28, 1.6260e-27, 1.8931e-26,\n 1.1181e-27, 3.8852e-26, 1.9767e-26, 2.7854e-28, 6.4402e-27, 2.0423e-26,\n 2.0209e-26, 7.6392e-28, 9.5860e-28, 1.6744e-27, 1.1754e-27, 5.0564e-27,\n 1.0015e-28, 5.0989e-27, 2.6084e-27, 6.5613e-28, 2.3808e-27, 4.4037e-29,\n 8.2445e-27, 5.0491e-29, 1.0140e-26, 5.6604e-27, 6.2445e-27, 1.4185e-26,\n 1.2930e-26, 4.6870e-28, 2.6421e-26, 2.3999e-27, 6.1156e-27, 1.3206e-27,\n 3.9070e-29, 2.2141e-27, 3.0468e-27, 5.0823e-27, 5.8683e-28, 1.8215e-27,\n 1.1972e-28, 2.7628e-27, 4.1555e-27, 1.8203e-28, 8.1477e-27, 1.5472e-28,\n 4.6718e-27, 1.7416e-27, 8.9128e-27, 4.0794e-27, 2.3235e-28, 1.8459e-27,\n 4.4075e-27, 1.2022e-27, 1.3331e-27, 2.4289e-27, 4.7527e-27, 2.7902e-27,\n 3.6761e-27, 1.0633e-28, 4.7637e-27, 5.1222e-27, 1.3682e-26, 1.3890e-26,\n 3.4540e-28, 2.5426e-27, 3.6268e-28, 2.4244e-27, 5.1891e-29, 6.6877e-28,\n 8.0262e-28, 4.2827e-27, 1.4736e-28, 2.1720e-28, 9.5469e-28, 1.0670e-26,\n 7.6173e-28, 2.1880e-27, 5.8210e-28, 7.2911e-27, 1.5042e-26, 1.5661e-30,\n 1.4317e-26, 1.0076e-26, 1.3788e-26, 6.4891e-29, 2.2694e-27, 1.5552e-26,\n 6.8510e-28, 3.0688e-28, 1.9741e-08, 2.9818e-08, 3.1371e-10, 2.5055e-07,\n 6.8171e-08, 1.7102e-08, 4.3110e-08, 1.2146e-07, 2.4175e-09, 3.0194e-08,\n 1.9971e-07, 4.7390e-10, 1.4042e-08, 5.0886e-08, 2.5256e-07, 5.7205e-09,\n 7.4770e-08, 2.5121e-07, 1.7260e-08, 2.8902e-08, 8.0456e-08, 1.4231e-10,\n 1.0581e-07, 3.2324e-07, 2.0374e-08, 3.5230e-07, 7.4534e-09, 5.6349e-08,\n 4.6325e-08, 2.1224e-07, 4.5310e-08, 4.9152e-10, 7.9918e-09, 5.2018e-10,\n 9.7323e-08, 1.3495e-07, 1.3520e-07, 2.0762e-08, 3.9543e-09, 1.3219e-07,\n 2.5071e-07, 1.2593e-07, 1.4904e-07, 1.1630e-07, 5.0517e-08, 3.3111e-10,\n 9.0524e-08, 2.5779e-08, 1.2357e-07, 8.8062e-08, 4.9287e-07, 9.5858e-08,\n 1.6336e-08, 7.5934e-10, 4.2182e-08, 4.2073e-08, 2.7014e-07, 2.3054e-08,\n 6.4615e-08, 6.5285e-08, 6.6946e-08, 2.5697e-08, 2.4719e-08, 1.8158e-08,\n 7.0205e-09, 5.6560e-10, 1.4686e-07, 5.4765e-08, 7.4472e-08, 9.6591e-08,\n 6.9832e-09, 2.2654e-09, 1.3796e-09, 3.2762e-07, 1.0606e-08, 4.4741e-07,\n 8.5709e-08, 2.4843e-07, 1.1399e-07, 1.1534e-08, 7.2232e-08, 8.4328e-07,\n 2.9504e-08, 1.8642e-07, 6.7726e-09, 2.5339e-07, 9.1557e-08, 5.8028e-09,\n 1.6255e-09, 1.0142e-07, 1.4751e-07, 6.8805e-08, 4.9797e-10, 1.4124e-07,\n 3.6170e-07, 1.7826e-08, 7.6169e-09, 1.4783e-07, 1.3918e-08, 1.3992e-08,\n 1.8731e-08, 8.8032e-08, 2.3569e-09, 1.1555e-08, 3.1221e-08, 3.1174e-07,\n 1.8865e-07, 2.6857e-08, 2.8579e-09, 3.3495e-08, 1.5101e-08, 5.0373e-08,\n 9.4129e-08, 5.2151e-09, 7.2276e-08, 1.0059e-07, 2.6601e-08, 1.2566e-08,\n 1.7757e-07, 3.4807e-09, 1.2390e-08, 6.8112e-07, 1.2461e-07, 4.3027e-07,\n 1.5525e-07, 5.1489e-08, 3.8283e-09, 6.2970e-09, 7.7801e-08, 8.7216e-08,\n 4.8663e-08, 6.8801e-08, 1.7221e-08, 2.9136e-10, 7.4458e-08, 2.7850e-08,\n 3.9992e-08, 1.8442e-07, 2.0811e-08, 4.0683e-09, 3.2918e-11, 1.5251e-07,\n 6.6011e-08, 1.5385e-09, 4.5343e-08, 3.6653e-08, 9.5636e-09, 3.2614e-07,\n 1.3928e-08, 2.5167e-10, 4.3062e-08, 4.8713e-07, 2.5298e-08, 2.0659e-08,\n 1.9318e-07, 5.7944e-09, 2.1956e-09, 3.6506e-08, 3.1759e-11, 6.1904e-08,\n 1.5583e-07, 2.1993e-07, 6.3584e-09, 4.1581e-08, 5.7952e-08, 1.0425e-07,\n 4.9477e-08, 2.4017e-08, 3.9915e-09, 2.6075e-07, 1.6078e-08, 3.3641e-07,\n 2.1281e-07, 1.6057e-08, 5.4207e-08, 1.4079e-08, 8.6897e-10, 1.7836e-08,\n 1.4545e-07, 5.6102e-10, 1.0116e-07, 2.8908e-07, 7.6346e-10, 6.8759e-08,\n 1.9102e-07, 1.7971e-07, 4.8584e-08, 2.8078e-08, 2.1932e-09, 1.8486e-07,\n 3.3701e-09, 6.2589e-08, 2.8368e-07, 2.9363e-07, 5.1111e-08, 8.6089e-08,\n 1.5358e-07, 3.7125e-07, 3.5849e-09, 1.6467e-07, 2.0462e-08, 1.0312e-08,\n 3.8234e-07, 2.6229e-08, 1.2117e-08, 4.6776e-08, 1.5164e-08, 2.7510e-08,\n 4.2749e-08, 1.4014e-08, 8.0728e-08, 6.0557e-08, 1.9434e-07, 1.5068e-08,\n 2.2478e-09, 6.1159e-07, 1.9252e-08, 3.6212e-07, 7.3469e-10, 3.9592e-08,\n 3.0981e-08, 1.3762e-07, 1.5711e-07, 9.2762e-08, 2.0082e-07, 7.1155e-09,\n 1.2734e-07, 1.6874e-08, 7.8473e-07, 5.0801e-09, 1.8532e-08, 7.7144e-08,\n 7.8833e-08, 4.6066e-08, 1.6102e-07, 5.8267e-08, 1.6340e-08, 1.4501e-08,\n 9.8361e-08, 5.8929e-10, 1.4328e-07, 5.4075e-09, 1.3292e-11, 2.6820e-08,\n 1.3641e-07, 1.8011e-09, 3.6048e-08, 3.5134e-09, 4.7454e-09, 3.1854e-08,\n 1.1403e-07, 8.2362e-08, 6.1980e-08, 2.6902e-07, 8.1296e-08, 2.3720e-08],\n device='cuda:0')" + }, + "54": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.4434e-09, 1.7839e-10, 1.2424e-10, ..., 4.1215e-10, 2.1877e-09,\n 2.8533e-09],\n [2.8695e-09, 1.3143e-10, 9.4008e-11, ..., 3.4807e-10, 1.2431e-09,\n 1.6278e-09],\n [5.2489e-08, 1.1901e-09, 1.0407e-09, ..., 1.7223e-09, 2.0985e-08,\n 2.6356e-08],\n ...,\n [1.6510e-08, 4.1339e-10, 3.0003e-10, ..., 7.5960e-10, 6.6014e-09,\n 8.7650e-09],\n [4.0952e-10, 3.1015e-11, 9.5972e-12, ..., 1.5631e-10, 1.6103e-10,\n 1.5033e-10],\n [9.2564e-10, 7.4983e-11, 5.8380e-11, ..., 2.5131e-10, 4.5530e-10,\n 4.7888e-10]], device='cuda:0')" + }, + "55": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.7427e-08, 5.2193e-08, 9.2730e-07, 4.9945e-07, 3.7437e-07, 5.1105e-10,\n 1.2446e-07, 1.2918e-07, 2.2319e-07, 2.2275e-08, 4.1187e-07, 5.4207e-09,\n 3.2647e-07, 1.9129e-10, 3.2127e-07, 7.8444e-10, 1.7226e-08, 1.5221e-07,\n 1.4243e-07, 2.4401e-07, 3.4944e-07, 5.3406e-08, 4.0863e-07, 1.8428e-07,\n 1.0291e-06, 2.7800e-07, 2.2149e-07, 5.1656e-08, 2.0655e-07, 4.9759e-07,\n 2.6058e-07, 9.0863e-07, 1.0113e-07, 1.1706e-08, 4.1967e-08, 4.5034e-08,\n 1.1552e-07, 3.8261e-07, 2.3706e-07, 4.8787e-08, 1.3213e-07, 3.7046e-07,\n 3.8959e-09, 1.4381e-07, 2.5622e-07, 9.0186e-08, 9.4469e-08, 2.9799e-07,\n 1.1134e-06, 5.5526e-08, 4.2286e-07, 2.1584e-07, 1.0534e-07, 8.9367e-08,\n 2.3222e-09, 8.1666e-07, 1.5420e-08, 3.4296e-07, 8.3493e-08, 8.0518e-07,\n 4.5733e-07, 3.1176e-07, 9.8574e-09, 1.6608e-07, 7.1366e-08, 7.8576e-08,\n 8.6404e-08, 8.1865e-07, 8.1955e-09, 3.8843e-07, 2.9373e-09, 3.1463e-07,\n 5.2072e-09, 3.2246e-07, 1.9626e-09, 8.6147e-08, 7.1818e-07, 4.1084e-08,\n 3.0371e-07, 5.7687e-07, 2.6702e-07, 1.8878e-07, 2.1761e-07, 3.0014e-07,\n 2.7184e-07, 3.8336e-07, 1.3160e-07, 1.8474e-07, 2.2536e-07, 1.2369e-08,\n 6.4542e-08, 8.1075e-08, 1.4338e-06, 3.9101e-08, 1.9254e-07, 2.0125e-10,\n 1.0323e-08, 3.2797e-09, 7.2564e-10, 2.2313e-08, 1.4333e-06, 1.0201e-08,\n 1.3574e-07, 1.1914e-06, 9.3816e-08, 2.4867e-08, 3.9060e-08, 1.0036e-08,\n 2.7437e-07, 6.5249e-07, 3.6990e-09, 1.9181e-07, 7.6413e-08, 7.4743e-08,\n 2.4378e-07, 5.7536e-08, 1.4273e-08, 8.0696e-09, 1.8574e-07, 1.4480e-07,\n 2.8497e-09, 2.0361e-07, 1.4403e-08, 1.4520e-06, 5.9560e-07, 1.4949e-06,\n 5.5694e-07, 3.5024e-08, 3.0969e-08, 8.6664e-08, 6.7288e-07, 1.1024e-08,\n 6.9186e-08, 3.1050e-07, 1.0874e-09, 4.3597e-09, 1.9325e-07, 4.5478e-07,\n 2.2255e-07, 5.3648e-08, 9.9846e-08, 2.6217e-07, 1.3643e-07, 1.2648e-07,\n 1.3721e-07, 3.0364e-07, 1.1325e-07, 1.7292e-07, 1.2241e-07, 7.9339e-07,\n 4.4155e-10, 9.0914e-08, 1.1112e-08, 9.1860e-09, 1.9179e-06, 8.3493e-07,\n 4.1598e-08, 7.7987e-07, 1.7706e-07, 1.0881e-06, 2.1953e-08, 2.2485e-08,\n 1.4442e-07, 6.1340e-10, 1.0600e-06, 1.7781e-07, 2.3401e-07, 3.7440e-08,\n 1.6731e-07, 7.6755e-07, 1.0512e-08, 1.8175e-07, 2.4728e-07, 6.7934e-08,\n 7.5378e-07, 1.8483e-07, 1.4735e-07, 5.1871e-09, 1.3245e-07, 3.9901e-08,\n 3.5192e-07, 1.6310e-07, 2.4625e-08, 5.0259e-08, 4.8644e-09, 9.7327e-08,\n 4.2051e-08, 6.2123e-07, 1.5360e-08, 5.8888e-08, 5.0887e-08, 1.1234e-08,\n 3.6767e-08, 3.9913e-07, 1.4469e-07, 1.2697e-07, 3.8717e-07, 2.1117e-07,\n 2.2724e-07, 1.0568e-07, 1.1303e-07, 4.7550e-07, 2.9045e-07, 7.8405e-09,\n 4.9996e-07, 1.7004e-08, 3.8506e-07, 5.0939e-07, 5.2427e-07, 5.0409e-07,\n 1.4442e-07, 1.0930e-07, 4.6672e-07, 1.8724e-08, 4.2709e-07, 2.0230e-07,\n 2.7237e-10, 2.9442e-07, 7.0228e-10, 2.1976e-07, 1.2667e-07, 3.1634e-06,\n 3.6989e-07, 1.1303e-06, 1.0309e-08, 5.1111e-08, 2.3843e-07, 1.8287e-08,\n 6.5854e-08, 3.6715e-09, 1.1337e-07, 2.8966e-07, 3.0414e-07, 4.5633e-08,\n 1.0813e-06, 1.4162e-07, 6.2555e-08, 6.8277e-09, 6.1479e-08, 7.9651e-10,\n 3.4042e-08, 2.5733e-07, 1.8850e-08, 1.7226e-08, 1.2391e-07, 7.7653e-07,\n 1.2969e-07, 4.0378e-07, 1.9765e-07, 9.6381e-08, 8.1458e-07, 1.8939e-08,\n 6.3938e-07, 2.9462e-07, 5.5584e-09, 1.6220e-08], device='cuda:0')" + }, + "56": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3897e-06, 5.1840e-07, 1.4343e-06, ..., 6.2110e-07, 5.3174e-08,\n 8.1584e-09],\n [2.1533e-08, 5.6116e-09, 1.2739e-08, ..., 5.1202e-09, 5.0719e-10,\n 1.0081e-10],\n [2.9327e-08, 6.1882e-09, 1.7499e-08, ..., 7.7531e-09, 6.5603e-10,\n 9.4048e-11],\n ...,\n [2.3976e-08, 5.0555e-09, 1.4473e-08, ..., 6.1616e-09, 5.6814e-10,\n 1.1808e-10],\n [2.9702e-08, 6.4139e-09, 1.7854e-08, ..., 7.7272e-09, 5.5072e-10,\n 5.5245e-11],\n [2.6357e-08, 5.5610e-09, 1.6484e-08, ..., 7.0557e-09, 6.3659e-10,\n 1.6132e-10]], device='cuda:0')" + }, + "57": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([6.1598e-05, 5.5846e-07, 7.5429e-07, 7.8274e-07, 8.9742e-07, 1.0309e-06,\n 8.1960e-07, 6.0696e-07, 7.6664e-07, 6.8128e-07], device='cuda:0')" + }, + "58": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3894e-06, 5.1831e-07, 1.4339e-06, ..., 6.2110e-07, 5.3053e-08,\n 8.0760e-09],\n [2.1529e-08, 5.6102e-09, 1.2733e-08, ..., 5.1202e-09, 5.0541e-10,\n 9.9605e-11],\n [2.9324e-08, 6.1871e-09, 1.7495e-08, ..., 7.7530e-09, 6.5469e-10,\n 9.3129e-11],\n ...,\n [2.3972e-08, 5.0539e-09, 1.4466e-08, ..., 6.1616e-09, 5.6614e-10,\n 1.1671e-10],\n [2.9701e-08, 6.4135e-09, 1.7852e-08, ..., 7.7272e-09, 5.5023e-10,\n 5.4919e-11],\n [2.6350e-08, 5.5581e-09, 1.6472e-08, ..., 7.0557e-09, 6.3303e-10,\n 1.5891e-10]], device='cuda:0')" + }, + "59": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([6.1598e-05, 5.5846e-07, 7.5429e-07, 7.8273e-07, 8.9742e-07, 1.0309e-06,\n 8.1960e-07, 6.0696e-07, 7.6664e-07, 6.8127e-07], device='cuda:0')" + }, + "60": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3897e-06, 5.1840e-07, 1.4343e-06, ..., 6.2110e-07, 5.3174e-08,\n 8.1584e-09],\n [2.1533e-08, 5.6116e-09, 1.2739e-08, ..., 5.1202e-09, 5.0719e-10,\n 1.0081e-10],\n [2.9327e-08, 6.1882e-09, 1.7499e-08, ..., 7.7531e-09, 6.5603e-10,\n 9.4048e-11],\n ...,\n [2.3976e-08, 5.0555e-09, 1.4473e-08, ..., 6.1616e-09, 5.6814e-10,\n 1.1808e-10],\n [2.9702e-08, 6.4139e-09, 1.7854e-08, ..., 7.7272e-09, 5.5072e-10,\n 5.5245e-11],\n [2.6357e-08, 5.5610e-09, 1.6484e-08, ..., 7.0557e-09, 6.3659e-10,\n 1.6132e-10]], device='cuda:0')" + }, + "61": { "step": "tensor(1252.)", - "exp_avg": "tensor([[-5.8383e-05, -1.1615e-05, 4.0389e-05, ..., -1.3697e-14,\n 1.0380e-05, 4.2261e-05],\n [ 1.4788e-04, -5.2814e-05, -3.6503e-05, ..., -2.3298e-15,\n -2.8277e-07, 1.0042e-04],\n [-8.2803e-06, 1.6760e-05, -1.1653e-05, ..., -3.3608e-14,\n -1.1686e-05, -4.6015e-05],\n ...,\n [-5.1200e-05, -1.8818e-05, 6.5683e-05, ..., -4.6589e-14,\n -1.0937e-05, -2.0980e-05],\n [-7.7796e-05, 3.0357e-05, -1.1169e-04, ..., -1.1776e-14,\n -2.7905e-05, 2.9225e-05],\n [-4.8278e-05, 5.2970e-05, -3.7701e-05, ..., 4.5509e-15,\n -5.1824e-06, 2.2141e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.3002e-08, 1.6401e-08, 1.4088e-08, ..., 9.9086e-11, 3.0549e-10,\n 2.1370e-08],\n [4.2202e-08, 5.6608e-08, 5.1256e-08, ..., 1.4635e-11, 3.1188e-10,\n 5.3633e-08],\n [2.5887e-08, 3.9902e-08, 3.0881e-08, ..., 2.0723e-10, 8.7301e-10,\n 2.5643e-08],\n ...,\n [1.4298e-07, 3.6782e-08, 4.3443e-08, ..., 1.6855e-10, 2.2267e-09,\n 3.4967e-08],\n [4.6970e-08, 5.9847e-08, 6.4003e-08, ..., 9.9030e-11, 4.1500e-09,\n 4.1659e-08],\n [4.1386e-08, 5.1432e-08, 6.1879e-08, ..., 9.0746e-10, 1.9308e-10,\n 4.4062e-08]], device='cuda:0')" + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([6.1598e-05, 5.5846e-07, 7.5429e-07, 7.8274e-07, 8.9742e-07, 1.0309e-06,\n 8.1960e-07, 6.0696e-07, 7.6664e-07, 6.8128e-07], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "shared", "betas": [ 0.9, @@ -52,7 +212,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_256", "betas": [ 0.9, @@ -75,7 +235,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_512", "betas": [ 0.9, @@ -98,7 +258,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_768", "betas": [ 0.9, @@ -121,7 +281,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_1024", "betas": [ 0.9, @@ -144,7 +304,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_1280", "betas": [ 0.9, @@ -167,7 +327,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_1536", "betas": [ 0.9, @@ -190,7 +350,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_1792", "betas": [ 0.9, @@ -213,7 +373,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_2048", "betas": [ 0.9, @@ -236,7 +396,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_2304", "betas": [ 0.9, @@ -259,7 +419,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_2560", "betas": [ 0.9, @@ -282,7 +442,7 @@ ] }, { - "lr": 0.004877665762479736, + "lr": 0.004522637977440181, "name": "fusion", "betas": [ 0.9, @@ -338,7 +498,7 @@ "T_i": 10, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 1, + "T_cur": 2, "base_lrs": [ 0.01, 0.01, @@ -353,30 +513,31 @@ 0.01, 0.005 ], - "last_epoch": 1, + "last_epoch": 2, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.004877665762479736 + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.004522637977440181 ] }, "metrics": { - "best_val_acc": 79.48, - "best_epoch": 0, + "best_val_acc": 80.816, + "best_epoch": 1, "scale_accuracies": { - "256": 79.48 + "256": 80.816, + "512": 80.742 } }, "train_config": {