diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,226 +1,36 @@ { - "epoch": 7, + "epoch": 0, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(10016.)", - "exp_avg": "tensor([[-3.8364e-05, -1.3476e-05, 3.1047e-06, ..., 8.2673e-06,\n -4.9354e-06, -6.8424e-06],\n [-8.5129e-06, 9.0674e-06, 3.7758e-05, ..., -3.4903e-05,\n 3.4188e-05, 1.9100e-05],\n [-9.0262e-06, 1.0034e-05, 2.8853e-06, ..., -2.9239e-06,\n 2.5236e-06, 2.8129e-06],\n ...,\n [-1.0466e-04, 8.8769e-05, 5.9739e-05, ..., 5.4866e-05,\n 5.4056e-06, -4.7783e-05],\n [-6.0164e-06, -2.6155e-06, -4.3258e-06, ..., 1.2737e-05,\n -1.6833e-05, -2.8955e-05],\n [-1.4755e-05, -1.6626e-05, 2.7456e-05, ..., 6.3978e-06,\n 2.9087e-06, -5.8953e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.2978e-08, 1.4401e-08, 6.7127e-09, ..., 8.4977e-09, 7.0933e-09,\n 5.0371e-09],\n [1.0827e-08, 1.0735e-08, 1.1412e-08, ..., 8.8005e-09, 5.9493e-09,\n 5.5743e-09],\n [2.3069e-11, 3.4361e-11, 3.0287e-11, ..., 1.2249e-11, 2.3164e-11,\n 1.8305e-11],\n ...,\n [1.2206e-08, 9.9823e-09, 9.9576e-09, ..., 7.0460e-09, 7.0107e-09,\n 5.1338e-09],\n [1.4597e-08, 1.1402e-08, 9.1016e-09, ..., 9.4133e-09, 8.6268e-09,\n 6.2845e-09],\n [3.3572e-09, 4.9131e-09, 3.2775e-09, ..., 2.0941e-09, 2.5649e-09,\n 1.8902e-09]], device='cuda:0')" + "step": "tensor(1252.)", + "exp_avg": "tensor([[-2.4113e-05, 1.2303e-05, -3.6856e-05, ..., -1.2292e-06,\n -1.0857e-06, 1.0312e-05],\n [ 4.2458e-05, -1.4308e-05, 1.6978e-05, ..., -7.4049e-06,\n -4.0597e-06, -4.5033e-05],\n [ 8.8562e-06, -1.5668e-05, 2.6869e-05, ..., 1.8806e-05,\n 2.2478e-06, -5.1486e-05],\n ...,\n [-1.1202e-05, 2.5152e-05, 1.9325e-05, ..., 1.9150e-05,\n -2.7548e-05, -1.8088e-06],\n [-1.9628e-29, -3.3782e-29, 1.6203e-28, ..., -1.5242e-28,\n 8.8350e-30, -3.3881e-29],\n [ 5.7273e-05, 2.4279e-05, 8.4247e-06, ..., 2.0174e-05,\n -3.2522e-05, -4.0931e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1983e-08, 1.2085e-08, 6.4733e-09, ..., 9.7722e-09, 7.3066e-09,\n 6.2270e-09],\n [1.2734e-08, 1.1464e-08, 6.8485e-09, ..., 6.7487e-09, 8.0445e-09,\n 6.1970e-09],\n [1.8862e-08, 1.8788e-08, 2.0858e-08, ..., 1.2985e-08, 1.1754e-08,\n 1.0339e-08],\n ...,\n [1.6817e-08, 1.9303e-08, 2.2495e-08, ..., 1.1944e-08, 1.0047e-08,\n 1.0593e-08],\n [4.7075e-11, 2.4558e-10, 7.4629e-11, ..., 4.3751e-13, 2.3549e-10,\n 1.3877e-12],\n [1.4044e-08, 1.3762e-08, 1.4686e-08, ..., 1.0629e-08, 8.7803e-09,\n 8.0974e-09]], device='cuda:0')" }, "1": { - "step": "tensor(10016.)", - "exp_avg": "tensor([ 0.0008, -0.0016, -0.0003, ..., 0.0017, 0.0007, -0.0007],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.5524e-05, 1.5315e-05, 3.4876e-08, ..., 1.4972e-05, 1.6460e-05,\n 5.2708e-06], device='cuda:0')" + "step": "tensor(1252.)", + "exp_avg": "tensor([-1.4070e-03, 1.3628e-03, -4.4851e-04, ..., -1.1237e-04,\n 3.4341e-27, 1.0627e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.6147e-05, 1.5322e-05, 3.3518e-05, ..., 3.1431e-05, 1.5939e-06,\n 2.5554e-05], device='cuda:0')" }, "2": { - "step": "tensor(10016.)", - "exp_avg": "tensor([[ 5.3255e-06, -1.6899e-07, 2.3137e-07, ..., -4.1306e-06,\n 8.4843e-06, -5.9196e-07],\n [ 1.9912e-06, -4.8412e-07, -1.9002e-11, ..., 3.4193e-06,\n -4.4740e-06, 3.4573e-06],\n [ 1.2957e-07, -9.4937e-07, 3.4483e-24, ..., 3.4428e-06,\n 8.3832e-06, -1.5811e-07],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [ 6.3993e-06, -1.4640e-06, -7.8217e-09, ..., -6.0730e-06,\n 8.3424e-07, -1.9380e-07],\n [-1.7300e-05, -1.4405e-05, 1.0443e-08, ..., 3.8763e-06,\n 1.2717e-05, -1.4340e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.2844e-09, 2.4626e-10, 1.2686e-12, ..., 1.9277e-09, 7.6745e-10,\n 4.3538e-10],\n [2.9799e-10, 1.4262e-11, 1.6687e-13, ..., 6.3613e-10, 8.2382e-10,\n 7.2145e-11],\n [3.6190e-10, 9.3170e-10, 2.1373e-17, ..., 8.7891e-10, 2.3802e-09,\n 4.5680e-11],\n ...,\n [0.0000e+00, 2.4396e-20, 0.0000e+00, ..., 7.1471e-22, 8.5359e-22,\n 0.0000e+00],\n [6.1055e-09, 4.2831e-10, 6.9929e-15, ..., 1.6332e-09, 9.0814e-10,\n 1.1672e-09],\n [1.2269e-09, 1.5687e-09, 9.8905e-14, ..., 7.1612e-10, 4.7768e-09,\n 2.7082e-10]], device='cuda:0')" + "step": "tensor(1252.)", + "exp_avg": "tensor([[-2.8681e-05, -1.1480e-06, 2.1258e-06, ..., -9.8516e-05,\n 5.6052e-45, -6.6567e-06],\n [-3.1674e-05, -4.3587e-06, -2.9065e-05, ..., 2.7090e-06,\n -5.6052e-45, -5.9087e-06],\n [ 4.0137e-06, 9.8352e-06, 1.6296e-05, ..., 2.3547e-06,\n -5.6052e-45, -8.3019e-05],\n ...,\n [ 4.1613e-21, -5.6052e-45, 5.6052e-45, ..., 6.7035e-36,\n 5.6052e-45, 1.8286e-19],\n [ 1.6744e-05, 7.4089e-06, -9.6605e-08, ..., -3.2625e-07,\n -5.6052e-45, -1.4310e-06],\n [-3.2172e-06, 9.0451e-06, 3.7791e-05, ..., -7.6358e-06,\n 5.6052e-45, -1.6760e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.1020e-08, 7.6641e-10, 2.6561e-09, ..., 4.8833e-08, 3.7532e-11,\n 1.3521e-08],\n [1.0195e-08, 1.9460e-09, 1.0396e-08, ..., 4.8112e-09, 2.0824e-09,\n 1.1113e-08],\n [3.8271e-08, 5.8692e-09, 6.2402e-09, ..., 4.1616e-09, 1.4351e-08,\n 1.0014e-08],\n ...,\n [1.3482e-09, 5.6214e-11, 4.2657e-12, ..., 5.3734e-11, 1.4411e-09,\n 2.3426e-11],\n [6.5829e-10, 9.2445e-11, 1.0454e-09, ..., 7.6076e-10, 2.9071e-12,\n 3.9806e-10],\n [9.3767e-09, 1.8638e-08, 6.9640e-08, ..., 6.8160e-09, 2.5756e-10,\n 2.5711e-08]], device='cuda:0')" }, "3": { - "step": "tensor(10016.)", - "exp_avg": "tensor([ 3.7474e-05, -2.5909e-05, -2.2788e-05, -5.2803e-05, -9.1339e-05,\n 5.6052e-45, 1.7497e-04, -1.0659e-04, -4.4055e-05, -2.9257e-04,\n -1.0046e-04, 8.2462e-05, 1.7308e-04, 2.7100e-05, 7.5895e-05,\n 1.1306e-04, 7.9223e-07, -4.2650e-07, 3.3759e-05, 1.1120e-05,\n 3.0818e-06, -3.5231e-05, -5.6767e-05, 1.7429e-04, -5.9369e-05,\n -3.2080e-05, 4.7558e-06, 5.6052e-45, -1.0221e-04, 2.0799e-04,\n 4.4067e-05, 8.4504e-05, -1.1517e-05, -9.4945e-05, 1.0674e-04,\n 1.2579e-04, 2.7191e-05, 6.2801e-05, -2.4365e-04, 3.1241e-05,\n 4.2876e-06, 1.9467e-04, 1.2885e-04, -1.0438e-04, 4.4906e-06,\n 4.7282e-06, 3.6985e-04, -4.7418e-05, -7.1090e-05, 1.1253e-04,\n 5.6052e-45, 2.0388e-04, -8.3835e-05, 1.4987e-04, -5.2094e-04,\n -1.7344e-04, -1.9630e-05, 1.1068e-04, 1.8978e-04, 8.1318e-04,\n -4.5100e-05, -7.2708e-04, -3.8147e-05, 5.2595e-05, -1.9674e-04,\n -5.5304e-05, 7.9815e-05, 8.6301e-05, 6.2654e-05, -4.0043e-05,\n 1.3737e-04, -5.6052e-45, 3.7792e-04, 2.0074e-04, 4.8354e-05,\n 3.4237e-05, -7.3087e-06, -3.0850e-06, -3.7568e-05, -1.1909e-04,\n 5.8603e-05, -1.2153e-04, -3.8055e-04, -2.1525e-09, 3.8730e-05,\n -1.7628e-04, -1.3873e-04, -1.9873e-05, 3.0238e-05, 1.1252e-03,\n -6.9745e-05, -6.0440e-05, -1.2854e-04, -1.7936e-04, 3.2520e-04,\n -1.7034e-04, 7.2230e-05, -1.0154e-05, 1.5434e-04, -1.0971e-04,\n -3.1525e-04, 1.9322e-04, 7.4935e-05, 3.2165e-05, -6.7979e-05,\n -6.9207e-05, -1.0404e-04, -6.0169e-05, 3.2101e-05, -1.0588e-05,\n 7.9935e-06, -6.3429e-05, -4.6723e-04, 8.1406e-06, 1.0212e-04,\n 5.7196e-05, -7.6126e-05, -2.2687e-05, -2.0974e-05, 5.6052e-45,\n 8.4059e-05, -3.0936e-05, -1.3927e-04, 5.6052e-45, 4.1840e-05,\n 1.4390e-04, 4.0278e-04, 1.1310e-05, 1.3457e-04, 2.2317e-05,\n -3.7295e-05, -1.7372e-05, 5.6052e-45, -2.0479e-05, -9.7780e-06,\n 1.7575e-04, -2.2412e-20, 1.2841e-04, 6.4591e-05, -2.3109e-06,\n 5.6052e-45, 5.6052e-45, -1.9053e-04, 4.3956e-05, 5.6052e-45,\n 5.6052e-45, -1.1408e-04, 4.8002e-05, 5.6052e-45, 1.6064e-05,\n 1.0096e-04, -1.4010e-04, -3.1632e-05, -1.5716e-04, -1.0005e-04,\n 1.9702e-05, 2.9009e-05, -2.2021e-05, 4.5412e-37, -3.4066e-05,\n 3.7808e-05, 7.3360e-06, -3.6048e-05, 2.6536e-05, -9.1145e-05,\n -3.0035e-04, -1.4694e-05, 5.6052e-45, 7.3027e-05, 2.0941e-04,\n 8.1479e-05, 1.8743e-04, -1.0615e-04, -5.1740e-05, -1.6549e-04,\n 7.8620e-05, 1.4244e-04, 1.2739e-04, 1.3617e-04, -4.8620e-05,\n 2.4989e-04, 2.0780e-04, -9.3203e-05, -3.4447e-05, 5.6052e-45,\n 5.6052e-45, -5.2229e-06, 1.3069e-04, 9.8836e-05, -1.5126e-05,\n -9.5430e-06, 3.3708e-05, -2.5848e-04, 9.5866e-06, 7.9112e-05,\n 7.3002e-05, -7.0136e-04, 5.7049e-05, 4.3128e-05, 5.6052e-45,\n 1.0689e-05, -4.1997e-05, -1.2632e-04, 3.0704e-05, -4.1776e-05,\n -1.1017e-04, 6.8577e-05, -2.8722e-05, -4.6277e-05, 1.6978e-11,\n -1.4947e-04, -1.1926e-13, 6.4637e-05, 1.5182e-04, 6.1284e-05,\n 4.5791e-05, -6.5710e-05, -5.1340e-05, 1.6468e-04, 1.9889e-04,\n 3.3209e-05, 2.0828e-04, 1.7292e-04, -2.9187e-05, 3.0183e-05,\n -6.4409e-34, -1.0193e-04, -1.0971e-04, -1.8080e-04, -8.1868e-05,\n 1.6951e-05, -1.4894e-04, 1.0401e-06, -6.1122e-05, 5.6052e-45,\n -9.2499e-05, 2.8671e-04, 5.6052e-45, 4.1693e-05, -1.4758e-04,\n -4.0646e-05, 1.0256e-14, 6.5936e-05, -9.1262e-05, 5.4439e-05,\n 1.4012e-04, -1.0155e-04, -1.2149e-04, -1.0469e-04, 1.2842e-24,\n 4.1380e-05, 4.1845e-05, 1.3698e-05, 7.2982e-05, -7.1545e-05,\n 3.2502e-04, 5.6052e-45, 1.0328e-04, 1.5411e-05, 1.0556e-04,\n 5.6052e-45, 1.9178e-04, 4.3852e-05, 1.7081e-05, 9.5203e-05,\n -6.2910e-05, -1.8572e-05, -1.3102e-04, -1.1151e-04, 6.6849e-05,\n 2.5052e-05, -1.1378e-05, 2.5219e-05, 1.8214e-04, 1.2052e-04,\n 1.7985e-04, -2.5372e-04, -7.5396e-38, 3.0836e-05, -8.2136e-05,\n 5.6052e-45, -4.4806e-05, -3.4166e-05, -4.4312e-05, -9.1255e-06,\n -9.4186e-05, 2.8922e-05, -4.4213e-05, -1.2766e-05, -1.1254e-03,\n -4.4953e-05, -2.2265e-05, -7.5447e-05, -2.7051e-05, -2.9195e-05,\n 6.4644e-05, -8.4602e-05, 4.4175e-06, -4.8198e-05, -6.8481e-05,\n 1.8627e-04, -1.0176e-04, 8.0720e-05, -2.7229e-05, 5.1226e-05,\n -6.2110e-06, -1.0442e-04, 1.0590e-04, 4.4195e-05, -5.0743e-05,\n 4.4378e-05, 5.6052e-45, -6.9047e-05, 1.8925e-05, -2.4075e-04,\n 5.6052e-45, 3.4305e-06, 8.3737e-05, 5.5612e-05, -4.8908e-05,\n -1.4996e-04, 1.0420e-04, 2.3898e-05, -3.8471e-05, 7.7233e-05,\n 5.6052e-45, -7.9199e-07, -1.7229e-04, 1.5682e-04, -8.7915e-05,\n 1.6391e-04, 6.0770e-05, 1.2649e-04, 7.3693e-05, 9.3841e-05,\n 1.8300e-05, 5.3588e-04, -9.5582e-07, 5.6052e-45, -6.8407e-07,\n -1.8892e-04, 5.6639e-05, 8.8544e-05, 4.2145e-05, -4.9273e-05,\n 6.5660e-05, -1.4829e-05, 3.3166e-05, -9.2450e-05, 8.1804e-05,\n 6.3471e-05, 8.1505e-05, -4.8264e-04, 5.6052e-45, 7.9683e-06,\n -7.1055e-05, -2.8901e-05, -1.2415e-04, -1.8975e-04, -1.8257e-04,\n -1.2187e-04, 1.2920e-04, -5.6052e-45, -1.0471e-05, -3.0797e-05,\n -9.9290e-07, 6.1683e-06, -1.3124e-04, 1.1093e-04, 2.0524e-05,\n -4.2598e-05, 1.1673e-04, -6.5521e-05, 9.6727e-05, 2.3162e-04,\n 8.4199e-05, -1.0368e-27, 3.6158e-05, 5.6052e-45, -2.2239e-04,\n 4.8779e-05, -1.2842e-04, -5.6800e-05, -1.6607e-04, 5.6052e-45,\n -3.2524e-04, 4.2380e-05, -9.4585e-06, 4.4088e-05, -8.9840e-06,\n 5.7611e-05, -1.2270e-04, 2.5844e-04, -3.0249e-05, -1.2416e-04,\n 3.8032e-04, 2.9527e-05, -9.5513e-32, 5.6052e-45, 1.8743e-07,\n -1.5223e-04, 1.5352e-04, -1.7078e-05, 1.9900e-06, 9.6229e-05,\n 6.9948e-05, -1.6215e-06, 3.4138e-05, 6.4825e-05, 4.0748e-05,\n 7.5443e-05, 5.6052e-45, 1.1671e-04, -2.0066e-04, 1.1433e-04,\n 1.3273e-05, 3.7310e-05, -7.3114e-05, 5.4635e-05, 6.3581e-05,\n -1.2196e-04, 4.5680e-05, -1.1242e-05, 5.6052e-45, 5.6052e-45,\n -3.6918e-04, -7.0595e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -8.6405e-05, -1.6493e-05, 7.5069e-05, 7.8265e-05, -2.4708e-06,\n 6.7590e-06, -2.0680e-04, 1.9716e-05, 4.8196e-05, 5.6052e-45,\n 1.9091e-05, -4.1150e-05, 5.6836e-06, 5.6052e-45, 2.7705e-07,\n 2.4541e-04, 5.0446e-05, 4.5254e-05, -3.8148e-05, 1.1438e-04,\n -3.3162e-04, 6.5532e-06, -4.6599e-06, 3.3817e-05, 2.7348e-05,\n 6.9288e-05, -4.5841e-05, 7.8328e-05, 5.6052e-45, 1.0143e-04,\n -2.6951e-06, -8.7770e-05, 5.6052e-45, -8.8965e-05, 6.7286e-05,\n -2.6411e-05, 5.2008e-05, -2.4820e-05, 2.4043e-04, 1.1461e-04,\n 7.9416e-05, 5.6052e-45, -7.6610e-05, 3.2920e-04, 5.9224e-05,\n 5.6202e-05, 5.5689e-05, -1.5317e-04, -7.2605e-06, 2.3946e-04,\n -1.0598e-04, -5.1895e-05, -2.6058e-04, 4.6273e-05, -1.3385e-04,\n 1.3449e-04, -5.1627e-05, -2.4621e-05, -3.7537e-05, -3.0403e-04,\n -1.2660e-04, -2.2741e-05, -1.5932e-04, -3.0256e-05, 1.2421e-04,\n -2.4734e-05, -1.4942e-04, -3.9218e-04, 8.6893e-05, 2.0286e-05,\n 5.6052e-45, -2.7133e-05, 7.8028e-06, -1.0779e-05, 1.7382e-04,\n -9.5260e-05, 1.2349e-04, -1.1965e-04, 1.9779e-04, 7.3530e-05,\n 3.6511e-05, 1.7456e-06, 3.0982e-06, -8.8987e-05, 8.3209e-05,\n 7.9364e-05, -1.2708e-04, 7.4898e-05, 5.6052e-45, 3.1948e-05,\n -1.9904e-04, -9.6433e-05, -4.7694e-05, -1.4326e-04, -4.6416e-05,\n -1.3249e-05, 5.6052e-45, 1.5081e-04, 7.6333e-05, -1.0585e-04,\n -2.5661e-04, 2.2705e-05, -6.7788e-05, -1.0352e-04, 2.0606e-04,\n -8.0181e-05, 1.8501e-04, 5.6052e-45, 2.3160e-05, 1.4017e-04,\n -1.6922e-04, 1.2131e-04, -1.0269e-04, -1.0603e-04, 3.7132e-05,\n -5.6052e-45, 7.2416e-05, 2.3744e-05, -1.8627e-05, 1.8451e-04,\n -3.0995e-05, 1.2163e-04, 1.4370e-04, 3.8371e-05, 4.6026e-05,\n 1.1595e-04, -4.5487e-05, 2.4426e-05, 2.2720e-05, -3.8725e-04,\n 5.6052e-45, 2.3057e-04, -8.4919e-05, 1.3016e-05, -9.1268e-05,\n 1.4988e-04, -4.7190e-06, 5.6052e-45, -8.9964e-05, 5.6052e-45,\n -5.7480e-05, 4.2797e-05, 1.0508e-04, -9.2530e-05, -9.7313e-05,\n -1.3318e-04, -1.1859e-04, 1.0516e-05, 4.0587e-05, -5.8428e-05,\n -9.7832e-05, -2.1023e-34, -4.4569e-05, 1.8348e-05, 5.6052e-45,\n -4.1040e-04, -1.5722e-04, 5.7536e-05, 1.5682e-04, 6.4112e-05,\n 2.0213e-04, 5.6052e-45, 2.3251e-04, -1.3295e-04, -1.6964e-04,\n 3.2845e-05, -3.6446e-05, -9.8557e-05, -3.6059e-05, -2.2269e-05,\n 6.9333e-05, -4.9102e-05, 4.0964e-05, 1.0291e-04, 1.1579e-04,\n 1.4459e-04, 1.3863e-05, -7.5024e-05, 1.4225e-04, -2.5877e-05,\n -4.6410e-04, 4.7010e-06, -7.2781e-06, -9.4889e-05, 9.9664e-05,\n 5.6052e-45, 1.5030e-04, 5.8849e-14, 1.6863e-05, -1.4792e-04,\n -9.8487e-05, 3.9463e-05, 2.0241e-04, 6.8539e-05, 3.4838e-05,\n -4.2135e-05, 5.6052e-45, 3.0461e-05, 1.1163e-04, -3.5213e-05,\n 9.1043e-06, 3.1243e-05, 5.6052e-45, 5.6052e-45, 2.0790e-04,\n 1.3667e-04, 5.6052e-45, -6.5079e-06, -5.2222e-05, 6.8760e-05,\n 1.4838e-04, 5.6052e-45, -7.7176e-05, 1.5675e-04, 2.5943e-07,\n -3.9613e-05, -2.4000e-04, 1.4796e-04, 1.1213e-04, -2.1236e-04,\n 1.6539e-04, -3.4315e-04, -9.7903e-05, 1.4133e-04, 5.4181e-05,\n -7.5282e-06, -2.9038e-04, 8.9320e-05, 1.0306e-05, -6.6155e-05,\n 3.7929e-05, 1.4741e-05, -1.1783e-04, 5.3745e-06, 8.5559e-05,\n 8.3440e-05, 4.0035e-05, -7.6601e-05, 9.3492e-05, 1.1823e-04,\n 2.6748e-05, 5.6052e-45, 4.8860e-05, 1.1284e-04, -2.1432e-04,\n 3.0873e-05, -6.2293e-05, -3.6821e-05, 8.6805e-08, 5.6052e-45,\n 2.3848e-05, -1.0968e-05, -2.3404e-04, 5.6052e-45, 9.8068e-05,\n 3.9736e-16, 2.9361e-04, -9.1428e-06, -1.5857e-05, 5.0332e-05,\n 1.0863e-04, 9.6307e-05, -1.1663e-13, 3.0820e-05, -1.5276e-04,\n -1.5027e-04, 1.2671e-04, 3.5494e-05, 9.4803e-07, 9.4259e-05,\n 3.2283e-04, -5.7023e-05, 2.8357e-05, 5.9250e-05, -3.1347e-05,\n -4.6241e-05, 5.6052e-45, 1.6685e-04, -6.0417e-05, 5.6052e-45,\n -7.0032e-04, 8.6665e-06, -1.1253e-04, -1.4213e-04, 1.3461e-04,\n 7.3806e-05, 3.8439e-05, -1.3691e-05, 2.8398e-04, -6.9941e-05,\n 7.7936e-21, -1.0770e-04, 1.1481e-06, -1.3057e-04, 1.7645e-05,\n -1.3511e-04, 5.3797e-05, 8.6598e-05, -1.5759e-05, 4.5883e-05,\n 4.7141e-05, 5.1064e-05, 1.1980e-04, 2.0223e-05, 5.6052e-45,\n -8.0689e-05, -7.8850e-05, 8.8325e-05, -3.4881e-05, -7.5027e-05,\n -1.1659e-04, -4.5277e-05, 5.9617e-05, -1.4590e-04, -3.6468e-05,\n -1.1379e-04, 1.0076e-04, -9.1554e-25, 8.5094e-05, -5.6052e-45,\n -2.1708e-04, 3.3256e-04, 2.0105e-04, -1.2906e-04, 2.4895e-05,\n 5.5590e-05, -1.4726e-05, 1.9448e-05, 5.5896e-05, 7.6375e-05,\n -2.8172e-05, 4.3313e-05, 2.7125e-04, -1.5308e-05, 5.6052e-45,\n 5.6052e-45, 6.5026e-05, 7.1462e-05], device='cuda:0')", - "exp_avg_sq": "tensor([1.4809e-07, 5.3562e-08, 1.2369e-07, 1.4042e-07, 2.8977e-07, 1.1951e-09,\n 2.8646e-07, 1.5046e-07, 9.7810e-08, 2.4048e-07, 1.5430e-07, 2.9016e-08,\n 1.4914e-07, 5.7602e-08, 1.5057e-07, 1.7299e-07, 2.0440e-07, 9.5669e-08,\n 1.2589e-07, 1.2275e-07, 1.7512e-09, 5.9102e-08, 1.1480e-07, 1.8611e-07,\n 2.4326e-07, 2.5458e-07, 1.0522e-07, 2.7139e-12, 1.8559e-07, 1.9697e-07,\n 6.3321e-08, 1.8005e-07, 1.2879e-07, 1.8256e-07, 1.1744e-07, 1.5133e-07,\n 3.4899e-08, 1.4728e-07, 1.6711e-07, 1.5477e-07, 2.2341e-07, 1.4843e-07,\n 2.3888e-07, 2.5120e-07, 1.8273e-07, 1.0459e-07, 1.7928e-07, 2.3577e-07,\n 3.5583e-07, 2.3412e-07, 2.3278e-09, 3.9389e-07, 1.4211e-07, 1.3796e-07,\n 1.6871e-07, 9.6043e-08, 1.9263e-07, 9.8573e-08, 7.2102e-08, 2.0872e-07,\n 7.3045e-08, 1.8505e-07, 3.2981e-07, 1.4104e-07, 6.6323e-08, 1.6000e-07,\n 2.5554e-07, 1.4626e-07, 6.3728e-08, 5.6456e-08, 2.0954e-07, 1.0590e-09,\n 1.3314e-07, 4.6123e-07, 2.0665e-07, 1.9724e-07, 1.5788e-08, 3.3174e-07,\n 9.7434e-08, 1.7077e-07, 9.0404e-08, 1.7765e-07, 2.5914e-07, 2.0294e-11,\n 1.4407e-07, 2.0857e-07, 1.7013e-07, 2.7886e-07, 1.3621e-07, 4.5962e-07,\n 1.8710e-07, 9.6567e-08, 1.5582e-07, 2.0366e-07, 1.2351e-07, 2.2220e-07,\n 1.8338e-07, 9.4378e-08, 1.4410e-07, 2.0596e-07, 1.7246e-07, 1.6757e-07,\n 3.5293e-07, 1.3541e-07, 1.7087e-07, 1.8521e-07, 1.2113e-07, 1.4635e-07,\n 1.2659e-07, 1.0705e-07, 1.5605e-07, 2.1495e-07, 1.6679e-07, 7.0707e-08,\n 2.2594e-07, 1.7399e-07, 1.8529e-07, 1.2246e-07, 7.5719e-08, 5.5558e-10,\n 1.6793e-07, 2.4503e-07, 1.3104e-07, 1.3922e-09, 1.5180e-07, 1.9934e-07,\n 1.7523e-07, 7.9411e-08, 2.2349e-07, 1.0549e-07, 1.3589e-07, 1.4085e-07,\n 6.0816e-11, 2.2571e-07, 1.5427e-07, 1.6197e-07, 2.4695e-13, 1.8050e-07,\n 1.4465e-07, 9.1405e-08, 6.1315e-10, 3.3718e-10, 1.4173e-07, 6.8121e-08,\n 1.3063e-12, 1.7376e-09, 2.3297e-07, 1.8831e-07, 4.6685e-09, 1.0525e-07,\n 1.4468e-07, 2.2824e-07, 1.6124e-07, 1.7678e-07, 5.8041e-08, 1.8824e-07,\n 1.3442e-07, 1.7371e-07, 1.8866e-09, 9.6071e-08, 2.2448e-07, 1.8942e-07,\n 2.3931e-07, 3.3164e-07, 1.1660e-07, 1.6397e-07, 2.8528e-08, 2.4563e-15,\n 1.5491e-07, 1.5831e-07, 2.3273e-07, 2.2797e-07, 1.5850e-07, 2.0280e-07,\n 1.8884e-07, 1.6312e-07, 1.2891e-07, 2.2512e-07, 1.9646e-07, 2.7118e-07,\n 2.4101e-07, 1.6472e-07, 1.7548e-07, 1.3429e-07, 1.0404e-13, 1.4351e-18,\n 2.6350e-07, 1.8481e-07, 8.5454e-08, 1.1975e-07, 1.1051e-07, 1.2572e-07,\n 2.2321e-07, 2.4365e-07, 2.0825e-07, 1.0113e-07, 3.4817e-07, 2.1007e-07,\n 1.5876e-07, 1.5720e-20, 4.6090e-08, 1.0055e-07, 2.2258e-07, 1.7239e-07,\n 2.5511e-07, 1.5523e-07, 2.1144e-07, 1.2112e-07, 4.3896e-08, 1.1783e-11,\n 2.3340e-07, 1.6531e-11, 9.8818e-08, 2.8675e-07, 1.8815e-07, 1.6788e-07,\n 2.0192e-07, 1.0757e-07, 1.7227e-07, 1.5152e-07, 6.3967e-08, 2.5528e-07,\n 2.8424e-07, 1.2397e-07, 6.8504e-08, 2.3274e-09, 1.9031e-07, 2.9978e-07,\n 2.7926e-07, 3.5520e-08, 1.5241e-07, 1.5307e-07, 1.6333e-07, 3.5703e-07,\n 1.4727e-09, 2.3158e-07, 2.0889e-07, 2.7946e-13, 1.2287e-07, 1.6955e-07,\n 4.0564e-08, 7.6608e-10, 2.6543e-07, 1.8657e-07, 1.3354e-07, 2.2091e-07,\n 1.3212e-07, 1.9880e-07, 1.9620e-07, 9.7720e-10, 2.1341e-07, 3.2783e-07,\n 1.3251e-07, 2.1927e-07, 1.6701e-07, 2.7658e-07, 2.9877e-13, 9.9959e-08,\n 1.1611e-07, 3.0863e-07, 1.7593e-18, 1.7856e-07, 1.6803e-07, 1.0713e-07,\n 2.0010e-07, 1.0293e-07, 1.3139e-07, 1.1874e-07, 1.9976e-07, 1.6993e-07,\n 1.8943e-07, 2.2557e-07, 1.2230e-07, 1.8767e-07, 2.8126e-07, 1.6032e-07,\n 2.5584e-07, 1.5233e-12, 8.8068e-08, 2.8993e-07, 4.3434e-09, 3.1035e-07,\n 7.9826e-08, 1.9974e-07, 2.2433e-07, 1.3605e-07, 2.8673e-07, 2.2208e-07,\n 1.2896e-07, 3.3125e-07, 1.4767e-07, 1.3081e-07, 6.5789e-08, 1.0063e-07,\n 2.3508e-07, 1.0084e-07, 1.3508e-07, 2.1998e-07, 1.4302e-07, 9.0548e-08,\n 2.0406e-07, 1.8325e-07, 7.8441e-08, 1.0607e-07, 1.7408e-07, 9.5742e-09,\n 3.9255e-08, 9.0654e-08, 1.9034e-07, 1.7249e-07, 1.7241e-07, 2.5218e-13,\n 1.8106e-07, 1.3223e-07, 2.2102e-07, 5.0093e-13, 6.0583e-08, 3.2298e-07,\n 7.4601e-08, 1.2218e-07, 1.8283e-07, 4.2231e-08, 1.0220e-07, 1.8820e-07,\n 1.2139e-07, 1.8173e-09, 1.7386e-07, 1.8814e-07, 1.6116e-07, 2.2635e-07,\n 1.8703e-07, 8.2936e-08, 1.5183e-07, 1.3377e-07, 8.9932e-08, 2.3047e-07,\n 2.0780e-07, 1.3994e-07, 1.3650e-18, 9.1446e-10, 3.1558e-07, 1.6660e-07,\n 2.0574e-07, 8.8106e-08, 2.4673e-07, 1.6204e-07, 2.1428e-07, 6.8961e-08,\n 2.1736e-07, 1.8832e-07, 2.2161e-07, 1.7086e-07, 1.3208e-07, 3.7160e-10,\n 1.7763e-07, 3.6038e-07, 7.6418e-08, 7.5853e-08, 1.7920e-07, 1.2385e-07,\n 2.0204e-07, 2.2890e-07, 1.5411e-09, 2.1123e-07, 1.4330e-07, 7.4875e-08,\n 1.2468e-07, 2.2800e-07, 1.3208e-07, 9.4820e-08, 1.0029e-07, 6.1990e-08,\n 1.0797e-07, 1.5514e-07, 1.5894e-07, 1.2460e-07, 2.6829e-10, 8.5503e-08,\n 7.3169e-10, 1.0235e-07, 3.9009e-07, 1.8875e-07, 9.7732e-08, 1.8878e-07,\n 1.5291e-09, 2.8408e-07, 1.0081e-07, 2.3834e-07, 2.9607e-07, 1.0544e-07,\n 1.6894e-07, 2.7599e-07, 2.0321e-07, 4.8972e-08, 2.2138e-07, 2.6066e-07,\n 6.0532e-08, 6.4016e-10, 5.2589e-09, 7.6256e-08, 1.2474e-07, 2.6593e-07,\n 2.3882e-07, 3.5079e-07, 1.9598e-07, 2.7419e-07, 4.5484e-09, 2.2188e-07,\n 1.9309e-07, 2.3878e-07, 2.5643e-07, 9.4664e-15, 9.1738e-08, 2.0589e-07,\n 1.0446e-07, 1.8378e-07, 3.6574e-08, 2.4819e-07, 1.6093e-07, 1.5280e-07,\n 1.7148e-07, 2.4992e-07, 2.7664e-07, 6.4027e-13, 4.1998e-20, 1.2444e-07,\n 4.5114e-08, 3.5319e-10, 9.7100e-13, 2.0306e-12, 2.0254e-07, 1.7390e-07,\n 1.5652e-07, 1.7747e-07, 2.5239e-07, 1.5068e-09, 2.4101e-07, 2.0000e-07,\n 1.8969e-07, 5.2947e-12, 1.0422e-07, 1.3908e-07, 1.2609e-07, 6.4206e-12,\n 1.3719e-07, 3.3082e-07, 1.3176e-07, 1.4491e-07, 1.0191e-07, 8.8559e-08,\n 3.5982e-07, 1.0469e-07, 1.8288e-07, 2.2386e-07, 2.2816e-07, 1.7027e-07,\n 7.4808e-08, 3.6709e-07, 1.9898e-10, 1.3385e-07, 1.1690e-07, 5.5685e-08,\n 4.8479e-14, 1.6646e-07, 1.3645e-07, 2.1811e-07, 1.0650e-07, 1.1082e-07,\n 2.0369e-07, 1.1866e-07, 1.0021e-07, 1.9625e-11, 6.6214e-08, 2.8333e-07,\n 7.9334e-08, 7.3621e-10, 1.6413e-07, 1.3190e-07, 6.1824e-08, 2.6901e-07,\n 1.8390e-07, 1.9360e-07, 1.9760e-07, 2.0853e-07, 1.8747e-07, 1.8346e-07,\n 2.9362e-07, 2.2817e-07, 1.7454e-07, 1.7144e-07, 1.6438e-07, 1.1380e-07,\n 2.3401e-07, 8.4459e-08, 2.1455e-07, 6.9508e-08, 2.0218e-07, 1.7006e-07,\n 2.3791e-07, 6.9547e-08, 1.0506e-08, 4.2989e-07, 1.4303e-07, 1.5083e-07,\n 1.9921e-07, 1.0015e-07, 2.2723e-07, 2.0548e-07, 1.8972e-07, 2.3347e-07,\n 1.3212e-07, 2.3348e-07, 3.7146e-07, 2.4262e-07, 1.4616e-07, 1.2676e-07,\n 1.2886e-07, 8.5240e-08, 1.4551e-09, 1.7955e-07, 1.6442e-07, 1.0715e-07,\n 1.5307e-07, 1.4880e-07, 1.5188e-07, 2.3919e-07, 4.1696e-14, 3.0980e-07,\n 1.3125e-07, 1.4557e-07, 2.0156e-07, 2.1256e-07, 1.7547e-07, 1.9045e-07,\n 1.9957e-07, 9.9956e-08, 1.9703e-07, 4.8589e-09, 9.1403e-08, 1.7751e-07,\n 2.8721e-07, 2.3864e-07, 2.7629e-07, 3.6308e-07, 2.0811e-07, 1.7483e-11,\n 1.9782e-07, 2.1343e-07, 2.3949e-07, 1.0645e-07, 2.2983e-07, 2.7622e-07,\n 1.6365e-07, 1.6731e-07, 1.5270e-07, 1.7143e-07, 1.6124e-07, 1.9758e-07,\n 1.8006e-07, 1.5427e-07, 3.3092e-13, 2.1912e-07, 2.2450e-07, 1.2525e-07,\n 2.1626e-07, 1.8020e-07, 1.3849e-07, 6.9279e-12, 1.7860e-07, 3.3791e-10,\n 2.1192e-07, 1.1409e-07, 1.9021e-07, 7.0844e-08, 8.9275e-08, 1.4755e-07,\n 2.0280e-07, 8.0819e-08, 1.2362e-07, 1.3009e-07, 1.8655e-07, 8.7872e-13,\n 9.1461e-08, 4.3865e-08, 1.1665e-16, 2.5302e-07, 2.0168e-07, 1.5189e-07,\n 1.4246e-07, 9.9873e-08, 1.6885e-07, 1.3561e-13, 1.2131e-07, 1.6704e-07,\n 1.1862e-07, 1.2274e-07, 8.0800e-08, 1.4422e-07, 1.1253e-07, 8.0940e-08,\n 2.5411e-07, 1.0237e-07, 1.7913e-07, 1.6753e-07, 2.1196e-07, 2.2208e-07,\n 3.0935e-07, 5.6140e-08, 1.9505e-07, 1.6943e-07, 2.0934e-07, 1.3742e-07,\n 1.6249e-07, 2.6006e-08, 1.7153e-07, 6.1316e-13, 2.3191e-07, 7.1145e-10,\n 2.0024e-07, 3.2020e-07, 2.8513e-07, 2.6444e-07, 2.3171e-07, 2.7061e-07,\n 1.0526e-07, 2.0351e-07, 6.0835e-14, 1.9652e-07, 2.0964e-07, 6.5822e-08,\n 4.5658e-08, 3.7378e-08, 1.5971e-11, 4.9779e-13, 1.8444e-07, 2.1014e-07,\n 1.8844e-15, 1.3874e-07, 2.2254e-07, 2.6382e-07, 3.3486e-07, 2.0198e-13,\n 1.9377e-07, 9.0863e-08, 1.0790e-07, 1.3277e-07, 2.2078e-07, 2.0112e-07,\n 2.5338e-07, 8.7333e-08, 1.8202e-07, 2.1895e-07, 2.7672e-07, 2.1317e-07,\n 1.2997e-07, 1.5033e-07, 2.9542e-07, 8.7328e-08, 1.5978e-07, 7.7643e-08,\n 3.9461e-07, 9.5050e-08, 2.8794e-07, 1.3654e-07, 1.2360e-07, 3.0798e-07,\n 2.7339e-07, 6.9462e-08, 1.5040e-07, 7.1287e-08, 1.5398e-07, 1.7753e-14,\n 9.7976e-08, 1.5953e-07, 1.5514e-07, 2.1740e-07, 3.2047e-07, 9.3546e-08,\n 7.9852e-10, 1.9114e-12, 3.0060e-07, 1.5134e-07, 1.7525e-07, 4.5219e-09,\n 1.7657e-07, 8.3507e-10, 2.4174e-07, 1.2632e-07, 1.4676e-07, 2.2947e-07,\n 8.3879e-08, 1.3728e-07, 6.8539e-09, 7.8110e-08, 1.2553e-07, 1.1175e-07,\n 2.5791e-07, 1.5402e-07, 2.9852e-11, 1.0114e-07, 2.0173e-07, 1.8111e-07,\n 1.8335e-07, 2.7959e-07, 2.2375e-07, 1.4117e-07, 4.3191e-12, 5.7785e-07,\n 2.9245e-07, 3.4041e-14, 3.1984e-07, 1.1817e-07, 7.9270e-08, 1.9941e-07,\n 2.0532e-07, 6.6216e-08, 1.7210e-07, 2.1650e-07, 1.3466e-07, 2.3400e-07,\n 3.0379e-09, 3.5249e-07, 1.5727e-07, 2.3229e-07, 1.3672e-07, 9.1713e-08,\n 1.9974e-07, 2.8580e-07, 4.6761e-07, 2.5313e-07, 9.8172e-08, 1.5779e-07,\n 1.7261e-07, 2.7075e-07, 4.0786e-09, 8.3860e-08, 1.9244e-07, 2.1760e-07,\n 1.2863e-07, 1.2749e-07, 1.6237e-07, 1.9639e-07, 6.5227e-08, 1.3734e-07,\n 1.3192e-07, 1.6352e-07, 1.5059e-07, 2.7176e-09, 1.5964e-07, 8.1651e-09,\n 1.5567e-07, 1.2342e-07, 1.2830e-07, 1.4376e-07, 2.1527e-07, 2.8866e-07,\n 5.9370e-08, 1.1999e-07, 1.9855e-07, 2.0485e-07, 8.0475e-08, 1.2862e-07,\n 2.3374e-07, 3.6162e-08, 4.3235e-12, 6.5350e-19, 2.2002e-07, 1.3469e-07],\n device='cuda:0')" + "step": "tensor(1252.)", + "exp_avg": "tensor([-3.8596e-04, -6.4597e-05, -3.1119e-04, -1.1543e-05, -2.9842e-09,\n 3.3450e-05, -1.0319e-03, 1.6386e-04, -6.4902e-04, -1.0547e-04,\n -3.4165e-04, 3.0180e-04, 4.6264e-04, 2.6911e-04, 5.6052e-45,\n -4.9884e-04, 2.6123e-10, -1.0737e-03, 2.7171e-12, -2.5432e-04,\n 4.0728e-10, 6.6772e-04, -1.1303e-04, -9.2099e-04, -5.1912e-04,\n -7.6469e-04, -8.3268e-04, -4.9024e-06, -1.1608e-04, -2.5133e-04,\n 4.8361e-04, -1.0219e-04, 1.8264e-04, -2.1795e-04, 4.2666e-05,\n 2.1247e-04, -3.1320e-04, -1.0890e-03, -1.4995e-03, 6.7879e-05,\n -6.2122e-04, 1.2857e-03, 8.2162e-04, 5.6052e-45, -2.6280e-04,\n -3.1861e-04, 5.6052e-45, 1.8476e-23, -4.1875e-04, 7.9739e-04,\n 1.2772e-04, 6.9094e-21, 7.6941e-04, -7.9875e-04, 3.4710e-04,\n 1.1583e-03, -1.2151e-04, -1.7504e-04, 3.3865e-04, -9.5325e-04,\n -3.3759e-04, 4.0985e-04, -4.6070e-04, -1.1303e-04, 5.4466e-04,\n 3.1754e-08, 1.6964e-04, 6.2881e-05, 2.2999e-04, -7.2908e-05,\n 6.6118e-05, 5.6052e-45, -2.1207e-04, -6.6345e-04, 2.7809e-11,\n 3.8840e-04, -2.7600e-04, 3.9010e-04, -1.5638e-05, -6.9559e-05,\n 9.4896e-11, -3.7856e-04, 6.5805e-05, -3.9131e-04, -1.7825e-04,\n -9.3003e-25, 1.9772e-04, -7.7822e-05, -5.3698e-05, 7.8196e-04,\n 1.3789e-04, 3.6065e-04, 4.4292e-04, -1.1434e-03, 5.1657e-09,\n -6.4051e-04, 2.8768e-04, -1.6908e-04, 6.0949e-05, -8.4681e-05,\n -5.6195e-05, -3.0043e-04, -1.9556e-05, 9.2461e-05, 1.6296e-04,\n -1.9154e-04, -5.8774e-05, -4.4841e-05, -6.3093e-05, -6.3617e-05,\n 5.6052e-45, 1.7717e-04, 1.9154e-04, 2.5361e-04, -2.2194e-04,\n 5.1403e-04, 7.9414e-05, -3.1656e-04, 1.8836e-04, -1.4659e-03,\n -6.4513e-04, 2.8946e-04, 7.8575e-04, -5.0618e-04, 4.8099e-04,\n 1.6465e-05, -3.8902e-04, -1.6629e-03, -1.5244e-04, -5.7997e-18,\n 5.6052e-45, -1.6772e-11, 1.0638e-03, 2.8985e-04, 1.3594e-04,\n -8.5026e-04, -5.5744e-04, -2.6582e-04, 1.3949e-04, -1.5192e-04,\n 2.2805e-04, -6.2003e-04, 4.9995e-20, 4.5681e-04, 1.2257e-03,\n 6.7579e-04, -2.8456e-04, 6.0929e-04, 1.4420e-04, 1.0458e-04,\n 5.6052e-45, -7.2052e-05, 2.4600e-13, 1.6104e-03, -5.8285e-04,\n 3.2160e-19, -1.9803e-04, -5.5236e-04, -2.1408e-04, 2.5637e-05,\n 3.7593e-19, -2.5148e-04, 1.2890e-04, 7.4731e-05, 6.8647e-05,\n 1.5939e-04, -1.3659e-04, -8.2627e-04, -6.5673e-04, -1.6686e-03,\n 6.0147e-05, 1.3706e-37, 5.6052e-45, -1.6358e-04, -6.1835e-04,\n -1.0902e-04, 2.2299e-04, 2.6664e-04, 1.3074e-04, 1.3940e-04,\n 7.8295e-05, -3.3395e-05, 2.7054e-06, -9.1530e-05, 2.7302e-11,\n 1.0139e-04, -5.8781e-05, 3.2014e-04, 2.4959e-04, -3.4389e-04,\n -4.5809e-04, -6.7405e-05, -6.7336e-05, -4.6532e-05, -4.9097e-04,\n -3.7814e-04, -3.4563e-04, 1.8672e-04, 5.6052e-45, -8.8233e-04,\n -1.0234e-03, -6.6298e-04, 2.8351e-04, 1.2371e-03, 5.1655e-05,\n -4.5863e-04, 5.6052e-45, -4.4614e-04, 1.0399e-17, 2.9844e-26,\n -4.8786e-04, -1.0409e-05, 5.6052e-45, 5.3506e-04, -1.8260e-04,\n 1.5808e-04, -3.4635e-04, -3.7648e-05, 1.0781e-03, -3.6383e-04,\n 1.2232e-06, 1.3399e-41, -6.1097e-04, -1.7850e-04, -1.3899e-04,\n 9.6908e-04, -1.8474e-04, 4.3856e-05, 4.8608e-04, 1.8409e-16,\n 2.8570e-04, 7.5947e-04, 1.3514e-04, -2.9787e-04, 7.9737e-04,\n 3.0791e-22, -4.3338e-04, 7.7948e-05, 1.1572e-03, -2.9663e-05,\n 2.7767e-04, 9.1880e-05, 6.9060e-12, -3.6414e-04, 7.3335e-04,\n 7.9991e-11, 1.3920e-04, -8.7617e-04, 6.7773e-05, -2.6058e-04,\n -3.7250e-04, 2.0789e-03, -9.8050e-05, -5.8542e-05, 5.8085e-04,\n 1.1043e-03, 7.4914e-04, -1.8742e-04, -3.3012e-04, -4.3293e-05,\n -1.4991e-03, -5.6566e-04, 1.3569e-04, -5.8456e-05, -3.1165e-11,\n 1.5312e-05, -3.3249e-04, 2.6319e-04, 1.7416e-04, -9.4923e-04,\n 7.6114e-04, -2.7845e-04, 1.2567e-04, -2.2400e-04, -4.8569e-05,\n -4.9704e-04, 1.4508e-11, -9.0531e-04, -8.6441e-05, 3.2276e-04,\n -1.2490e-04, -3.2848e-05, -2.8441e-04, 1.0020e-04, -9.2220e-04,\n 1.9025e-04, 5.6052e-45, -4.7272e-04, 2.2725e-04, -3.1091e-04,\n 5.7819e-04, 2.2189e-04, -1.0462e-03, 5.7439e-23, 3.6965e-11,\n 2.1151e-04, 2.4987e-04, -4.0784e-04, -5.2871e-04, -4.0987e-04,\n -1.6069e-04, 1.9486e-36, 9.6779e-04, 1.1285e-04, 6.6631e-05,\n -6.2609e-05, 1.5715e-06, 6.1327e-05, 4.5623e-04, -1.6076e-04,\n 2.1346e-04, -1.2745e-04, 2.2109e-34, 1.0750e-05, -3.6977e-04,\n 5.0222e-04, -1.2614e-05, 5.3735e-04, 3.8796e-18, 1.4643e-03,\n -1.9504e-04, -1.6178e-04, 1.5984e-04, -1.9731e-04, 5.6052e-45,\n 7.0703e-04, 3.2938e-04, -1.3328e-04, 4.6027e-04, 9.3428e-06,\n 2.7459e-04, 1.3209e-04, -4.2327e-04, 5.5632e-05, 9.4125e-05,\n 6.8775e-04, -5.2552e-04, -6.0187e-05, 5.6052e-45, -6.6453e-04,\n -2.1172e-04, 6.2049e-04, -7.3990e-05, 5.6052e-45, -4.1256e-07,\n -1.3968e-04, -6.0209e-04, 2.7984e-15, 1.1219e-04, 6.8601e-05,\n 2.5446e-04, 4.7863e-04, 7.2318e-05, -1.3480e-04, 2.8360e-04,\n 3.0080e-04, 1.6453e-22, -1.1584e-03, 9.4438e-04, -2.7145e-04,\n -1.3745e-04, 4.8567e-10, -7.0086e-04, 3.3479e-04, -2.7512e-04,\n 1.4524e-04, 5.6052e-45, 2.9174e-04, -1.3285e-03, 2.1839e-04,\n -2.6123e-04, -7.4107e-04, 4.6734e-05, 1.5150e-04, 1.4288e-33,\n 2.3867e-04, -8.5430e-04, 9.0980e-04, -1.1187e-18, 5.6053e-05,\n 5.6052e-45, 3.2515e-05, -4.9052e-04, 1.2196e-03, -7.8002e-05,\n 7.5157e-04, 3.9110e-04, 3.6360e-04, 1.1652e-04, 1.9398e-04,\n -6.5035e-04, 8.4460e-05, -5.6704e-04, -4.1735e-04, 8.8027e-31,\n -1.6961e-04, 6.2388e-05, 1.1136e-03, 5.0222e-19, -9.9475e-04,\n -4.3400e-06, 3.4889e-20, 7.6732e-04, 5.4371e-04, -9.4237e-04,\n 1.0224e-04, -1.5445e-04, 9.1598e-04, -3.4173e-04, -6.7797e-04,\n -1.5145e-03, -1.0239e-03, -8.1216e-04, 2.0897e-03, -8.0373e-05,\n -3.6215e-04, -3.0187e-04, 2.6271e-16, -8.5389e-05, -3.4533e-05,\n 4.1580e-13, 6.9543e-05, 5.7893e-04, 3.7912e-24, -7.6275e-04,\n 6.7632e-05, 2.7817e-04, 6.0587e-04, 3.2767e-04, 3.3246e-04,\n 4.3393e-04, 4.6700e-04, 4.2283e-04, -1.3321e-04, 2.2952e-17,\n -7.0638e-04, 1.1565e-04, 2.6401e-04, 2.7073e-34, 1.2617e-17,\n 2.1887e-05, -3.9295e-04, -1.9401e-04, -3.8431e-05, 7.1870e-04,\n -1.0271e-05, 5.6052e-45, 2.6096e-08, 8.9449e-10, -1.7706e-04,\n -4.8827e-04, -4.8844e-05, 6.1727e-05, 3.7427e-04, -8.3248e-04,\n 2.0339e-05, 1.8269e-07, 1.1896e-04, 5.6052e-45, -7.6055e-05,\n 2.2450e-04, 7.0869e-33, 1.7050e-04, 3.2938e-04, 8.2562e-04,\n -8.8984e-05, -4.0977e-04, 7.5647e-05, 7.6808e-06, 4.6035e-05,\n -2.5070e-04, 9.9478e-05, 8.6605e-04, 2.6146e-04, 8.7703e-05,\n 7.6940e-04, -7.0412e-04, 2.2187e-05, 1.3727e-10, 1.8023e-16,\n -6.9446e-05, -2.7991e-04, -1.8234e-04, -2.0222e-04, 3.7331e-04,\n -2.6971e-05, 1.1342e-05, -3.8569e-04, -8.8542e-04, -1.6218e-04,\n -8.6032e-04, 2.8483e-04, 4.5454e-04, -1.7828e-04, 6.2557e-04,\n 5.6052e-45, 2.4050e-04, -5.7822e-04, -2.7402e-04, -4.9296e-04,\n 8.8966e-26, -2.5190e-04, -2.7119e-06, 8.8683e-04, -3.2232e-04,\n 5.0496e-04, 2.0341e-25, -2.5610e-04, 1.2696e-20, 6.6937e-13,\n -2.0059e-04, 1.1103e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.1646e-05, 2.3670e-05, 2.5345e-05, 2.9567e-06, 2.1849e-05, 9.2360e-06,\n 2.1594e-06, 1.0790e-05, 1.6643e-05, 1.2774e-06, 2.1523e-05, 2.4080e-06,\n 9.7363e-06, 2.2485e-06, 1.6728e-16, 4.7788e-06, 3.9243e-05, 1.2890e-05,\n 2.6499e-06, 1.1512e-05, 4.7224e-07, 3.7279e-06, 1.9723e-05, 1.3217e-06,\n 2.4748e-06, 4.8854e-06, 9.7763e-06, 4.4047e-06, 4.5954e-07, 3.6725e-07,\n 1.8493e-05, 1.5636e-05, 1.1924e-05, 1.2911e-05, 1.6774e-06, 1.5390e-06,\n 4.6095e-06, 8.3565e-06, 2.5134e-05, 4.3719e-07, 7.7316e-06, 1.2557e-05,\n 1.6841e-05, 4.0266e-09, 2.1064e-05, 1.0284e-05, 7.0078e-05, 1.3973e-10,\n 5.6055e-06, 1.7833e-06, 5.3621e-06, 8.2307e-06, 3.5681e-06, 1.6176e-06,\n 4.5939e-06, 6.0652e-06, 4.3086e-06, 1.9387e-06, 2.2311e-06, 6.0973e-06,\n 4.8846e-06, 2.8962e-06, 4.2966e-06, 1.1502e-05, 5.1295e-06, 1.5926e-08,\n 1.2725e-05, 2.0638e-05, 3.7060e-06, 2.5318e-06, 6.0404e-06, 3.2327e-13,\n 4.3261e-06, 8.1095e-06, 6.3835e-08, 3.1915e-06, 3.3158e-06, 1.0501e-05,\n 6.4769e-06, 4.0831e-06, 3.6508e-06, 2.9187e-05, 3.5377e-06, 2.3106e-06,\n 6.8940e-07, 2.3701e-08, 2.7735e-06, 1.4114e-05, 6.4590e-07, 7.3511e-06,\n 5.9758e-06, 1.6247e-05, 2.0322e-05, 3.4890e-05, 1.0848e-06, 8.9366e-06,\n 6.3200e-06, 1.2089e-05, 1.9465e-05, 8.2843e-06, 3.7706e-06, 6.7491e-06,\n 4.1074e-06, 6.1613e-05, 2.0298e-06, 1.4419e-05, 4.7834e-06, 6.3506e-06,\n 9.0665e-06, 5.2078e-07, 4.9172e-10, 1.6951e-06, 1.6333e-07, 1.6519e-05,\n 1.1877e-05, 6.9821e-06, 2.8651e-06, 1.1825e-05, 3.2137e-06, 3.4336e-06,\n 5.4023e-06, 4.1989e-06, 6.4675e-06, 7.2818e-06, 6.3798e-06, 8.1663e-06,\n 2.2698e-06, 1.7991e-05, 2.3775e-06, 7.2642e-06, 6.6290e-09, 3.2273e-06,\n 3.3132e-06, 2.5987e-06, 5.1199e-06, 1.2651e-06, 1.4008e-05, 4.4884e-06,\n 1.1257e-05, 5.6817e-06, 1.2546e-05, 4.1528e-06, 1.5789e-06, 1.1687e-05,\n 1.5523e-05, 8.0060e-06, 1.6724e-05, 3.8743e-06, 9.9914e-07, 1.9877e-05,\n 1.3746e-05, 3.3975e-06, 1.4818e-05, 2.1632e-05, 3.9806e-06, 1.5638e-07,\n 1.1264e-06, 8.1645e-06, 4.8785e-06, 1.4422e-06, 2.3454e-06, 6.0834e-06,\n 1.9498e-06, 2.2906e-05, 4.6408e-06, 1.4572e-06, 5.0302e-06, 7.3065e-06,\n 5.5400e-06, 1.0866e-06, 1.6480e-06, 1.3108e-05, 1.5676e-06, 2.5121e-05,\n 4.2748e-06, 2.1308e-06, 8.2404e-06, 5.0418e-06, 1.5460e-06, 3.1646e-09,\n 3.2020e-06, 8.9805e-07, 1.2296e-05, 6.5087e-06, 6.9858e-07, 6.8873e-05,\n 1.4505e-06, 2.5442e-06, 1.6775e-05, 5.5311e-06, 3.0022e-05, 3.7115e-06,\n 1.1178e-05, 1.3421e-05, 1.5606e-05, 1.6320e-05, 8.1556e-06, 7.4544e-07,\n 2.4889e-14, 1.2518e-05, 3.7945e-06, 1.9490e-06, 2.3677e-06, 3.7896e-06,\n 1.3652e-05, 1.9451e-06, 3.0783e-05, 2.9028e-06, 7.5173e-07, 1.1916e-09,\n 9.7641e-06, 4.5169e-05, 5.0837e-05, 1.0936e-05, 1.5154e-05, 1.1459e-05,\n 4.5182e-06, 3.4192e-07, 7.9512e-06, 1.7833e-05, 2.9462e-06, 1.4721e-05,\n 6.4637e-06, 9.8000e-06, 9.7995e-06, 9.0646e-06, 7.1000e-06, 8.4800e-05,\n 6.9360e-06, 1.2830e-07, 3.9159e-06, 1.0761e-05, 9.3916e-06, 8.6678e-06,\n 4.1430e-06, 7.1366e-08, 8.3691e-06, 1.0147e-05, 6.8403e-06, 6.2446e-05,\n 4.3131e-06, 3.4965e-06, 1.8788e-06, 4.1807e-06, 1.6722e-06, 5.1601e-06,\n 1.6592e-06, 8.2866e-06, 9.7212e-06, 1.6950e-05, 9.6184e-06, 5.6595e-06,\n 5.6454e-06, 1.1820e-05, 2.6152e-06, 3.3204e-06, 2.0192e-06, 5.8559e-06,\n 5.1866e-06, 4.9788e-08, 1.7735e-06, 1.2741e-06, 5.9167e-06, 2.4006e-06,\n 2.8488e-08, 1.3118e-06, 2.4686e-05, 3.1495e-06, 1.0344e-05, 2.3382e-06,\n 4.0239e-06, 4.9698e-06, 1.1709e-06, 6.7946e-06, 2.2513e-07, 5.0791e-06,\n 1.8953e-05, 2.2942e-05, 2.4257e-06, 4.1847e-06, 2.4200e-06, 2.5326e-05,\n 7.3968e-06, 1.1775e-06, 1.3996e-05, 9.4291e-06, 2.1462e-06, 3.4174e-06,\n 2.5572e-06, 5.0429e-06, 2.0231e-05, 1.7184e-05, 2.2228e-05, 1.5027e-09,\n 9.7378e-06, 3.6731e-06, 8.8822e-06, 4.9464e-06, 3.2134e-06, 2.4772e-06,\n 8.0677e-09, 4.9477e-06, 7.3994e-06, 1.1766e-05, 2.0929e-05, 1.3230e-06,\n 3.3605e-06, 1.3452e-05, 5.0210e-06, 5.7463e-06, 4.2926e-06, 1.3303e-06,\n 1.2176e-05, 4.0081e-06, 1.2465e-05, 4.5045e-06, 1.0319e-06, 1.5232e-05,\n 2.3638e-05, 3.5644e-06, 1.8947e-05, 4.6043e-06, 1.8755e-05, 6.6672e-06,\n 8.9940e-07, 5.0290e-06, 9.9077e-06, 1.3982e-06, 8.1250e-06, 1.2093e-05,\n 1.0513e-05, 7.9980e-06, 1.2526e-05, 7.2681e-07, 3.7290e-06, 1.0661e-05,\n 6.0400e-06, 4.4480e-07, 6.6358e-16, 2.5566e-06, 1.5287e-05, 2.7499e-06,\n 2.8433e-05, 4.2938e-09, 1.5694e-05, 1.0354e-04, 3.3062e-06, 7.9529e-06,\n 1.2575e-06, 8.3170e-06, 3.4237e-06, 4.6308e-05, 3.8885e-06, 4.2888e-06,\n 1.6991e-05, 1.6232e-06, 2.8749e-06, 2.6993e-06, 4.7000e-06, 3.7375e-06,\n 6.7249e-06, 1.3522e-06, 1.2478e-05, 1.5731e-05, 4.9216e-06, 2.1875e-05,\n 2.2132e-09, 3.1859e-06, 5.4861e-07, 8.9420e-06, 5.2632e-06, 6.4448e-06,\n 2.6199e-06, 2.1502e-05, 6.1537e-08, 1.9619e-06, 5.3318e-06, 7.4461e-06,\n 5.4682e-06, 2.2604e-06, 8.1614e-07, 9.0555e-06, 9.4979e-06, 7.0575e-06,\n 1.7312e-06, 1.4187e-05, 5.4295e-06, 3.3405e-08, 1.0664e-05, 1.6029e-05,\n 8.1528e-06, 1.2573e-05, 2.7667e-05, 5.8580e-06, 2.0648e-05, 1.1382e-06,\n 8.0173e-07, 1.2610e-05, 1.3464e-11, 1.3931e-05, 2.2668e-06, 2.5045e-05,\n 9.7316e-06, 7.5790e-06, 1.5621e-05, 4.0114e-06, 6.9230e-06, 1.7080e-05,\n 4.7806e-06, 4.4751e-06, 1.8880e-05, 6.0009e-06, 3.5888e-05, 1.2507e-05,\n 8.5674e-06, 2.8642e-06, 2.5838e-05, 8.2105e-08, 2.7860e-06, 1.3863e-06,\n 3.5043e-05, 5.4335e-06, 2.9210e-06, 2.0725e-05, 1.2616e-05, 8.1148e-06,\n 1.6400e-06, 1.3607e-05, 1.3703e-06, 2.3122e-05, 1.7073e-05, 1.2227e-05,\n 1.6400e-06, 3.0134e-06, 7.9776e-06, 6.8222e-07, 3.5786e-06, 5.8251e-07,\n 2.5846e-05, 2.5413e-05, 6.5450e-06, 4.3046e-06, 7.9441e-07, 1.0544e-05,\n 1.0931e-05, 8.8036e-06, 3.1552e-11, 1.4390e-06, 3.4667e-06, 1.0020e-05,\n 5.2528e-06, 2.2529e-06, 3.1902e-06, 4.8070e-06, 1.1293e-05, 4.7041e-06,\n 3.4954e-06, 1.7057e-06, 5.2908e-07, 1.6021e-05, 4.0355e-06, 5.2321e-06,\n 1.0913e-07, 1.4736e-05, 1.1513e-05, 2.2386e-06, 1.4200e-05, 1.2978e-05,\n 3.6804e-07, 2.6959e-05, 2.0502e-05, 1.6718e-05, 1.5417e-05, 8.8681e-06,\n 1.0105e-06, 3.3234e-06, 5.7375e-06, 5.1494e-06, 8.6707e-08, 3.1666e-08,\n 7.6932e-06, 2.0583e-06, 3.2685e-06, 3.4579e-06, 4.9262e-06, 8.8453e-06,\n 2.8854e-07, 5.0771e-06, 5.9804e-06, 1.5919e-05, 2.2758e-05, 8.1069e-06,\n 3.4503e-06, 1.1633e-05, 1.7483e-06, 8.6523e-16, 3.1836e-06, 1.5804e-05,\n 5.7599e-06, 9.5871e-06, 5.1298e-06, 1.1172e-05, 2.9296e-06, 9.4495e-06,\n 1.3305e-06, 8.8723e-05, 2.4623e-08, 1.4337e-05, 9.8190e-06, 1.5151e-06,\n 3.2962e-07, 7.5590e-06], device='cuda:0')" }, "4": { - "step": "tensor(10016.)", - "exp_avg": "tensor([[-2.6975e-06, 1.5395e-06, -1.5282e-06, ..., 5.6052e-45,\n 1.1719e-06, -6.7104e-06],\n [-5.5712e-06, 4.8760e-07, -1.6429e-07, ..., -5.6052e-45,\n 1.9619e-06, -5.0373e-06],\n [-1.2807e-05, 1.4625e-06, 7.8990e-06, ..., -5.6052e-45,\n -7.8433e-07, -8.1747e-06],\n ...,\n [ 1.8854e-05, 1.3059e-06, 9.1618e-06, ..., -5.6052e-45,\n -2.6502e-06, 7.3211e-07],\n [ 1.1358e-05, -1.9116e-07, 4.8463e-06, ..., -5.6052e-45,\n -4.4220e-06, -3.7886e-06],\n [-6.7504e-06, 3.6134e-06, 1.4244e-05, ..., -5.6052e-45,\n -2.6559e-06, -1.0057e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.2456e-10, 8.6068e-11, 2.3632e-10, ..., 1.8317e-21, 9.0231e-10,\n 2.6650e-10],\n [1.5471e-09, 1.2868e-10, 5.0562e-10, ..., 9.3613e-20, 2.1164e-09,\n 1.3094e-09],\n [2.3088e-09, 1.2389e-10, 1.5351e-09, ..., 4.0827e-20, 1.8813e-09,\n 1.4043e-09],\n ...,\n [2.1178e-09, 2.0759e-10, 6.6535e-10, ..., 3.5719e-20, 1.0170e-09,\n 7.9025e-10],\n [2.5359e-09, 1.4923e-10, 9.7226e-10, ..., 5.3233e-22, 1.7824e-09,\n 1.7403e-09],\n [2.1738e-09, 2.1503e-10, 1.4626e-09, ..., 3.4116e-20, 1.3393e-09,\n 1.3971e-09]], device='cuda:0')" - }, - "5": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-2.4395e-07, 2.7648e-08, -4.8573e-12, ..., -9.1373e-08,\n 5.9608e-07, 7.9518e-10],\n [-5.0876e-07, 1.4896e-05, -1.3507e-28, ..., -8.1791e-06,\n -9.5034e-08, -4.0904e-07],\n [-2.1661e-08, 1.0536e-07, -3.1118e-09, ..., 2.9513e-07,\n 1.5693e-06, 8.9357e-07],\n ...,\n [ 3.5720e-07, -7.8371e-07, -1.0479e-13, ..., 1.0302e-07,\n -9.0400e-07, 1.9990e-06],\n [-1.4434e-08, -4.1162e-08, 5.6052e-45, ..., 8.0370e-07,\n 3.2088e-06, -7.5650e-11],\n [ 6.8861e-07, -1.3563e-07, 8.8461e-09, ..., 2.6236e-07,\n 6.6968e-07, -2.4157e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.1495e-12, 4.4413e-12, 5.7834e-14, ..., 2.8478e-11, 2.3378e-12,\n 8.5373e-14],\n [1.2743e-10, 8.1257e-11, 1.9737e-15, ..., 1.9776e-10, 5.8265e-11,\n 6.0332e-12],\n [3.5624e-12, 7.2792e-12, 2.1092e-15, ..., 2.7024e-12, 3.9935e-11,\n 6.9201e-12],\n ...,\n [2.9247e-10, 6.4284e-11, 2.1466e-14, ..., 9.0640e-12, 1.0657e-10,\n 1.3140e-09],\n [3.6158e-12, 4.3818e-11, 2.7424e-17, ..., 1.0225e-10, 1.2214e-10,\n 2.4083e-12],\n [3.9301e-11, 1.5697e-11, 1.3206e-12, ..., 1.1520e-10, 5.6408e-11,\n 3.8663e-10]], device='cuda:0')" - }, - "6": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-3.8873e-06, 1.6975e-05, 1.6755e-05, ..., -1.2486e-05,\n -1.0989e-05, -9.7811e-06], device='cuda:0')", - "exp_avg_sq": "tensor([1.0554e-09, 9.0830e-09, 3.2384e-09, ..., 7.9428e-09, 5.2209e-09,\n 5.1785e-09], device='cuda:0')" - }, - "7": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 2.7756e-08, 1.9014e-07, -1.1054e-06, ..., 1.8177e-07,\n 2.1218e-07, 7.5888e-08],\n [ 2.7838e-07, 1.0214e-06, 2.6512e-07, ..., 5.5672e-07,\n 4.9769e-07, -7.6893e-08],\n [ 1.0924e-07, 9.7946e-07, 4.6750e-07, ..., -5.7147e-07,\n -8.0540e-07, 6.8945e-08],\n ...,\n [ 1.0891e-07, -2.0722e-07, -8.2107e-08, ..., 1.5304e-08,\n 4.3948e-07, -1.5270e-08],\n [-2.3322e-07, 2.6663e-08, 8.8711e-07, ..., -4.8016e-07,\n 2.2297e-07, 2.7197e-07],\n [-1.3734e-07, 1.0376e-06, 1.3956e-06, ..., -4.7310e-07,\n 1.4130e-06, -1.8241e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.4645e-12, 4.1419e-12, 1.5407e-11, ..., 3.6958e-12, 2.9299e-12,\n 3.6576e-12],\n [2.2838e-12, 1.0900e-11, 1.1472e-11, ..., 6.0189e-12, 4.9076e-12,\n 4.4921e-12],\n [2.4727e-12, 8.6124e-12, 5.6095e-12, ..., 6.0315e-12, 4.2357e-12,\n 7.9425e-12],\n ...,\n [1.3285e-12, 8.6673e-12, 7.0194e-12, ..., 7.8043e-12, 4.2426e-12,\n 8.1603e-12],\n [1.8556e-12, 1.3032e-11, 2.8335e-11, ..., 7.8757e-12, 2.9173e-12,\n 7.5146e-12],\n [2.5404e-12, 7.1342e-12, 6.1080e-11, ..., 6.1033e-12, 4.6751e-12,\n 5.3402e-12]], device='cuda:0')" - }, - "14": { - "step": "tensor(8764.)", - "exp_avg": "tensor([5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.1037e-09], device='cuda:0')" - }, - "15": { - "step": "tensor(8764.)", - "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.2941e-12, 2.9969e-10, 2.2532e-10], device='cuda:0')" - }, - "16": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.1880e-07, 3.2849e-08, 3.5543e-08, 3.7967e-08], device='cuda:0')" - }, - "18": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.3989e-13, 2.1218e-13, 0.0000e+00, ..., 1.4000e-12, 5.0352e-13,\n 1.6162e-14],\n [7.2515e-14, 1.8641e-13, 0.0000e+00, ..., 1.7138e-13, 6.0262e-13,\n 1.6984e-13],\n [2.1794e-14, 7.6912e-14, 0.0000e+00, ..., 2.5703e-14, 1.6664e-13,\n 5.4574e-14],\n ...,\n [2.7615e-15, 1.4852e-14, 0.0000e+00, ..., 2.1181e-14, 2.9332e-13,\n 1.6525e-15],\n [5.3192e-13, 5.5920e-13, 0.0000e+00, ..., 6.7684e-13, 2.8369e-12,\n 1.7025e-13],\n [1.6615e-15, 1.2913e-15, 0.0000e+00, ..., 2.5557e-15, 2.5350e-14,\n 6.0043e-15]], device='cuda:0')" - }, - "19": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.8083e-10, 2.1924e-10, 5.0092e-11, 9.0939e-11, 3.3085e-11, 3.2060e-13,\n 5.6923e-12, 9.0288e-12, 2.3074e-11, 1.3062e-12, 7.3178e-11, 3.7215e-10,\n 1.5335e-10, 1.8576e-10, 2.8045e-11, 1.7490e-10, 4.1686e-10, 3.4555e-11,\n 2.3112e-10, 3.7861e-10, 4.5194e-12, 1.5907e-10, 3.4388e-11, 3.4001e-10,\n 6.8788e-11, 1.3921e-10, 6.2044e-11, 3.8595e-11, 1.0510e-11, 6.5109e-11,\n 1.9478e-10, 9.7615e-11, 3.5469e-12, 8.9538e-10, 4.6425e-10, 2.2571e-10,\n 2.3404e-11, 2.5811e-12, 1.7740e-10, 1.7289e-10, 8.4822e-10, 2.0981e-11,\n 3.7441e-12, 1.1857e-11, 2.9933e-11, 2.1747e-10, 9.3507e-11, 6.0953e-11,\n 5.9312e-11, 6.9530e-11, 2.8443e-12, 1.0012e-10, 2.5430e-10, 1.5514e-10,\n 3.2899e-11, 1.7156e-11, 1.1691e-10, 1.4892e-12, 4.4971e-11, 1.6168e-11,\n 5.0363e-11, 1.1127e-10, 7.1757e-11, 4.1588e-11, 4.0760e-12, 2.6543e-11,\n 9.6520e-11, 5.6778e-12, 1.2166e-10, 1.1451e-10, 1.0952e-12, 3.6968e-11,\n 9.7794e-11, 4.4457e-12, 3.9234e-11, 3.3614e-13, 2.6013e-11, 6.6427e-10,\n 1.3207e-11, 1.2207e-12, 2.0363e-10, 2.7256e-10, 4.4667e-14, 1.5199e-12,\n 4.8554e-11, 1.6865e-10, 5.0743e-12, 8.6311e-11, 1.1065e-11, 1.9191e-10,\n 3.0474e-11, 2.7835e-10, 3.5479e-10, 2.5106e-11, 3.5150e-11, 1.3992e-10,\n 9.2244e-12, 5.5007e-10, 1.8349e-10, 6.5722e-10, 1.8757e-10, 1.2997e-11,\n 2.5936e-12, 7.9370e-11, 6.5964e-12, 6.5521e-10, 1.6447e-11, 4.6600e-11,\n 1.0500e-10, 8.1011e-13, 8.4533e-10, 1.5660e-11, 2.5141e-10, 1.0677e-10,\n 3.6220e-11, 4.9825e-10, 1.2414e-11, 6.0397e-11, 1.6676e-10, 7.9788e-11,\n 8.7035e-11, 1.6105e-10, 1.5993e-11, 2.6883e-12, 1.0483e-12, 5.8563e-11,\n 1.3966e-09, 1.0890e-11, 9.1671e-11, 1.8181e-11, 2.4466e-12, 4.4913e-10,\n 3.7747e-11, 6.3220e-11, 2.5586e-10, 1.8270e-10, 1.5441e-10, 4.7342e-10,\n 2.0414e-10, 4.4910e-14, 1.8621e-12, 3.0212e-12, 1.6440e-10, 6.7466e-10,\n 3.7787e-10, 2.4826e-11, 2.5392e-11, 3.2779e-11, 2.0221e-10, 4.5792e-13,\n 1.0479e-11, 2.1052e-10, 7.2675e-11, 8.0937e-11, 1.8107e-10, 6.2292e-12,\n 5.6876e-11, 1.4943e-10, 2.9031e-11, 4.8402e-10, 2.2950e-10, 3.7660e-11,\n 8.9330e-11, 2.4254e-10, 1.5332e-11, 1.0970e-10, 1.0799e-09, 1.6408e-10,\n 1.1337e-10, 5.2278e-11, 7.5614e-12, 1.8037e-10, 7.3732e-10, 1.8545e-10,\n 2.4372e-10, 1.7753e-12, 2.4573e-11, 8.4850e-11, 9.6694e-11, 1.0367e-10,\n 1.2722e-10, 5.9403e-11, 1.1928e-11, 7.1622e-11, 1.3633e-10, 3.7830e-13,\n 3.5707e-11, 1.4665e-10, 3.7198e-11, 5.9115e-10, 4.4189e-10, 4.5828e-11,\n 3.0337e-11, 1.3141e-11, 8.7699e-14, 2.1176e-09, 2.4816e-11, 4.2727e-11,\n 4.7174e-11, 1.0343e-09, 1.0304e-14, 9.6587e-10, 1.8168e-10, 2.7642e-11,\n 1.7648e-10, 2.1684e-09, 1.9937e-10, 6.2220e-11, 9.0301e-13, 2.7988e-10,\n 2.2597e-11, 4.0845e-10, 6.3597e-11, 1.2958e-10, 9.9193e-13, 1.6500e-10,\n 1.2688e-09, 7.1829e-13, 1.3985e-11, 4.1018e-11, 3.0198e-11, 8.4412e-10,\n 1.0631e-10, 6.0696e-10, 8.4144e-11, 1.0218e-13, 2.1678e-12, 1.2419e-11,\n 4.6029e-12, 1.7722e-10, 1.5151e-12, 1.6194e-12, 3.0761e-10, 1.2030e-10,\n 9.3823e-11, 2.0904e-11, 2.5023e-11, 1.8940e-10, 2.3714e-11, 2.0837e-11,\n 2.4173e-10, 9.8378e-13, 5.3219e-11, 3.2633e-10, 5.6885e-11, 1.6296e-12,\n 3.0454e-12, 1.3694e-10, 1.5461e-11, 4.9253e-10, 2.5678e-10, 2.5801e-11,\n 2.2180e-10, 2.6189e-11, 7.4075e-10, 1.3098e-11], device='cuda:0')" - }, - "20": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.1863e-12, 1.0848e-12, 7.2952e-14, 4.7607e-13, 5.5851e-14, 4.6062e-15,\n 1.0128e-14, 4.7033e-14, 2.7928e-14, 1.6406e-15, 1.3479e-13, 6.7501e-13,\n 3.8863e-13, 8.7424e-13, 8.9993e-14, 4.1196e-13, 1.3512e-12, 5.9931e-14,\n 7.4762e-13, 1.0772e-12, 2.2323e-15, 6.6908e-13, 7.5652e-14, 1.3877e-12,\n 2.6972e-13, 2.7020e-13, 1.5691e-13, 1.2824e-13, 3.8140e-14, 1.1524e-13,\n 5.9485e-13, 2.8961e-13, 2.2589e-14, 2.4106e-12, 1.7988e-12, 5.6290e-13,\n 4.6465e-14, 4.7024e-17, 3.6862e-13, 3.1254e-13, 3.5879e-12, 5.0051e-14,\n 1.5929e-14, 5.2018e-14, 5.9800e-14, 4.8920e-13, 2.0368e-13, 1.2859e-13,\n 9.0711e-14, 1.0743e-13, 2.2286e-15, 2.7356e-13, 5.2797e-13, 3.3819e-13,\n 4.5065e-14, 8.0903e-14, 3.7823e-13, 3.1160e-15, 1.4842e-13, 4.5796e-14,\n 9.0932e-14, 8.3005e-13, 2.0127e-13, 9.9889e-14, 6.1682e-17, 4.2405e-14,\n 2.1074e-13, 3.0826e-15, 2.2464e-13, 3.7075e-13, 2.5772e-14, 8.4941e-14,\n 1.0858e-13, 1.4926e-14, 7.1757e-14, 2.1724e-14, 1.5168e-13, 3.3728e-12,\n 1.1560e-14, 3.4495e-18, 8.5291e-13, 7.0663e-13, 6.4939e-15, 7.5082e-15,\n 9.5507e-14, 3.7680e-13, 2.0221e-14, 4.0405e-13, 3.2428e-14, 3.5785e-13,\n 9.2308e-14, 6.3604e-13, 1.2897e-12, 3.1832e-14, 9.7246e-14, 4.9317e-13,\n 1.9973e-14, 1.1716e-12, 7.1408e-13, 1.8480e-12, 5.6217e-13, 6.0506e-14,\n 3.1716e-14, 1.4068e-13, 2.7516e-15, 1.5397e-12, 1.7193e-14, 1.0986e-13,\n 2.1863e-13, 1.1449e-15, 2.1576e-12, 4.8814e-14, 7.1250e-13, 2.2419e-13,\n 6.6092e-14, 2.0110e-12, 4.8908e-14, 1.2878e-13, 5.0281e-13, 1.9355e-13,\n 1.6014e-13, 3.3541e-13, 1.3440e-14, 2.5940e-16, 1.0698e-14, 9.0122e-14,\n 6.3153e-12, 4.1300e-14, 2.9279e-13, 2.7730e-14, 8.9651e-16, 1.3394e-12,\n 9.2371e-14, 1.8809e-13, 4.9115e-13, 4.1260e-13, 4.3371e-13, 1.2219e-12,\n 5.5845e-13, 7.5765e-15, 4.7751e-14, 1.7312e-14, 3.6123e-13, 1.9831e-12,\n 2.3744e-12, 9.8275e-14, 1.0200e-13, 7.6729e-14, 4.1889e-13, 1.1614e-16,\n 1.0490e-13, 4.6161e-13, 2.3464e-13, 9.2142e-14, 4.2477e-13, 3.7738e-14,\n 8.3936e-14, 2.5459e-13, 4.7757e-14, 1.9144e-12, 4.4753e-13, 4.2620e-14,\n 2.0812e-13, 8.0851e-13, 4.4555e-14, 1.4256e-13, 3.7121e-12, 3.1119e-13,\n 3.4692e-13, 1.3323e-13, 3.8750e-15, 7.1943e-13, 1.7761e-12, 6.9215e-13,\n 4.5876e-13, 6.8293e-15, 3.8402e-14, 3.7954e-13, 2.9618e-13, 2.7074e-13,\n 3.2855e-13, 1.1496e-13, 1.4484e-14, 1.0754e-13, 3.4128e-13, 2.0401e-19,\n 5.5008e-14, 3.2625e-13, 3.2759e-14, 1.4802e-12, 1.1534e-12, 5.7722e-14,\n 9.0220e-14, 7.1896e-14, 8.2666e-15, 8.2087e-12, 3.1593e-14, 6.8418e-14,\n 1.4277e-13, 2.8426e-12, 2.4488e-15, 2.5951e-12, 3.2784e-13, 3.7759e-14,\n 5.8702e-13, 5.5265e-12, 4.3184e-13, 1.5701e-13, 2.9600e-16, 4.3735e-13,\n 3.0952e-14, 1.0181e-12, 4.3183e-13, 3.9544e-13, 5.9281e-16, 2.8655e-13,\n 6.0367e-12, 2.4628e-16, 5.2506e-14, 4.9135e-14, 3.8571e-14, 3.7130e-12,\n 5.9155e-13, 1.4770e-12, 2.4876e-13, 2.1739e-15, 1.0537e-16, 3.6824e-14,\n 4.5591e-15, 4.9829e-13, 1.2246e-14, 1.1405e-15, 1.8145e-12, 5.3121e-13,\n 2.8396e-13, 4.8695e-14, 3.2005e-14, 5.5089e-13, 6.5083e-14, 2.0805e-14,\n 5.9854e-13, 2.2917e-16, 1.5968e-13, 8.4049e-13, 8.6261e-14, 2.7936e-16,\n 7.1730e-16, 2.6195e-13, 2.4386e-14, 1.3292e-12, 5.2176e-13, 3.6264e-14,\n 4.9681e-13, 5.1829e-14, 2.3723e-12, 6.8975e-14], device='cuda:0')" - }, - "21": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.0346e-12, 1.0386e-12, 1.4299e-13, 4.6737e-13, 7.3495e-14, 3.1220e-15,\n 1.4160e-14, 7.6912e-14, 4.6945e-14, 9.1998e-16, 2.2344e-13, 1.2611e-12,\n 4.4477e-13, 8.6670e-13, 1.5146e-13, 7.9895e-13, 1.8068e-12, 9.5532e-14,\n 6.6400e-13, 1.5677e-12, 2.5238e-15, 7.4915e-13, 1.7340e-13, 1.4638e-12,\n 3.6036e-13, 4.1727e-13, 2.8895e-13, 2.0275e-13, 7.8540e-14, 1.6799e-13,\n 8.7349e-13, 4.5447e-13, 3.5323e-14, 3.0816e-12, 1.9855e-12, 9.6966e-13,\n 3.3578e-14, 9.7601e-18, 7.8296e-13, 5.9465e-13, 2.7594e-12, 1.1642e-13,\n 3.8418e-14, 9.1891e-14, 1.0253e-13, 6.9909e-13, 4.3180e-13, 3.0097e-13,\n 1.4904e-13, 2.2432e-13, 3.6421e-15, 4.4265e-13, 8.2893e-13, 4.8624e-13,\n 8.2035e-14, 1.1098e-13, 5.4037e-13, 2.8096e-15, 2.5059e-13, 9.6237e-14,\n 1.2052e-13, 5.8458e-13, 3.2828e-13, 1.9025e-13, 1.0134e-15, 6.6339e-14,\n 4.5632e-13, 8.3299e-15, 5.4090e-13, 5.5494e-13, 2.8332e-14, 1.8765e-13,\n 3.3992e-13, 3.2211e-14, 9.3294e-14, 1.9910e-14, 1.6154e-13, 2.9355e-12,\n 1.7420e-14, 2.7683e-18, 9.8366e-13, 1.2273e-12, 7.1540e-15, 1.6243e-14,\n 2.4751e-13, 7.8533e-13, 3.5710e-14, 4.5697e-13, 3.5881e-14, 8.4454e-13,\n 1.5953e-13, 1.1980e-12, 1.1426e-12, 6.6583e-14, 1.5818e-13, 3.7792e-13,\n 2.6196e-14, 1.8783e-12, 8.6036e-13, 2.7526e-12, 8.2685e-13, 9.0316e-14,\n 3.6169e-14, 2.1635e-13, 4.0646e-15, 2.1625e-12, 2.9291e-14, 2.4386e-13,\n 2.9548e-13, 7.5974e-16, 2.8387e-12, 9.5291e-14, 1.1309e-12, 3.9339e-13,\n 8.6998e-14, 1.5739e-12, 6.7234e-14, 1.6287e-13, 7.6278e-13, 3.8574e-13,\n 2.4663e-13, 4.7505e-13, 2.4727e-14, 7.1534e-16, 1.9922e-14, 1.6800e-13,\n 5.6146e-12, 6.1117e-14, 4.6285e-13, 5.1195e-14, 1.0350e-15, 1.9451e-12,\n 1.9269e-13, 2.9951e-13, 8.0654e-13, 5.9626e-13, 6.8400e-13, 1.5966e-12,\n 9.0879e-13, 9.2402e-15, 3.6107e-14, 2.8318e-14, 5.2619e-13, 2.3081e-12,\n 1.7560e-12, 1.4713e-13, 1.4564e-13, 1.7131e-13, 6.6301e-13, 2.5928e-16,\n 9.6192e-14, 6.3335e-13, 3.6839e-13, 2.3216e-13, 5.0273e-13, 5.7377e-14,\n 1.4236e-13, 5.4175e-13, 7.6991e-14, 2.0515e-12, 7.5361e-13, 1.0478e-13,\n 2.1013e-13, 1.0908e-12, 8.2391e-14, 3.1331e-13, 3.8559e-12, 5.2301e-13,\n 5.1972e-13, 2.5538e-13, 1.0650e-14, 4.6971e-13, 2.5291e-12, 8.7902e-13,\n 7.3289e-13, 1.3953e-14, 5.7125e-14, 4.5360e-13, 2.2122e-13, 4.9562e-13,\n 3.3602e-13, 1.4843e-13, 2.4155e-14, 1.9567e-13, 5.7156e-13, 5.8648e-17,\n 1.0959e-13, 4.1848e-13, 9.9990e-14, 2.4267e-12, 1.4748e-12, 1.2100e-13,\n 1.6508e-13, 8.6730e-14, 1.0858e-14, 8.6890e-12, 5.0202e-14, 9.2253e-14,\n 2.2905e-13, 3.6447e-12, 6.1247e-15, 4.0213e-12, 5.8460e-13, 6.1369e-14,\n 8.2774e-13, 7.4974e-12, 5.9749e-13, 1.4624e-13, 7.4767e-16, 9.7302e-13,\n 3.7444e-14, 1.7615e-12, 3.6250e-13, 6.2599e-13, 7.0872e-16, 5.5039e-13,\n 4.2608e-12, 1.6602e-16, 8.8372e-14, 9.1536e-14, 5.6090e-14, 3.5398e-12,\n 5.5144e-13, 2.5823e-12, 4.3237e-13, 4.0026e-15, 7.2231e-17, 8.3670e-14,\n 1.1631e-14, 7.6171e-13, 2.4499e-14, 2.4634e-15, 1.3758e-12, 5.6871e-13,\n 4.3285e-13, 1.0467e-13, 6.4797e-14, 8.4969e-13, 1.3502e-13, 5.0514e-14,\n 1.0909e-12, 2.1657e-16, 2.5903e-13, 1.0211e-12, 1.6191e-13, 5.4005e-16,\n 6.6258e-16, 4.3620e-13, 2.4352e-14, 2.1206e-12, 8.1932e-13, 6.0103e-14,\n 6.6592e-13, 8.7936e-14, 2.4564e-12, 9.6114e-14], device='cuda:0')" - }, - "22": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.4376e-13, 5.4002e-13, 0.0000e+00, ..., 7.8632e-13, 1.2858e-12,\n 1.9674e-13],\n [6.5941e-14, 7.3594e-14, 0.0000e+00, ..., 1.2145e-13, 3.7461e-13,\n 2.3812e-16],\n [8.8531e-14, 4.8092e-14, 0.0000e+00, ..., 8.0875e-14, 1.7279e-13,\n 1.3916e-14],\n ...,\n [1.5122e-13, 1.8644e-13, 0.0000e+00, ..., 2.3028e-13, 4.3574e-13,\n 1.3861e-13],\n [4.6041e-13, 1.4618e-13, 0.0000e+00, ..., 1.4532e-13, 9.2209e-13,\n 8.7782e-14],\n [2.2536e-15, 3.1560e-15, 0.0000e+00, ..., 3.4031e-14, 5.4239e-14,\n 1.1071e-14]], device='cuda:0')" - }, - "23": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.6161e-10, 4.3227e-11, 9.5474e-11, 7.1851e-11, 2.4501e-11, 1.3142e-11,\n 5.6530e-11, 3.3634e-14, 2.2542e-11, 3.8234e-12, 4.2445e-10, 8.4045e-11,\n 1.1308e-10, 7.3367e-11, 2.1470e-11, 5.2986e-11, 2.2205e-10, 1.4525e-11,\n 4.2038e-11, 4.4778e-10, 1.7676e-12, 1.7000e-10, 7.4003e-11, 4.2753e-11,\n 5.7212e-12, 1.7831e-10, 5.6325e-11, 8.9553e-12, 6.2618e-12, 4.9875e-11,\n 2.4636e-10, 5.1296e-11, 6.2926e-12, 1.8025e-09, 1.7804e-10, 1.2476e-10,\n 2.1365e-11, 1.5484e-15, 1.6648e-10, 8.3962e-10, 1.0559e-10, 6.4457e-11,\n 1.1394e-11, 6.5550e-11, 3.8276e-10, 2.8399e-10, 1.6060e-10, 9.3970e-11,\n 2.3675e-11, 4.0784e-10, 4.5732e-12, 7.3716e-10, 5.1095e-10, 1.1653e-10,\n 1.1825e-11, 2.1363e-12, 2.8032e-10, 8.9126e-12, 1.2800e-10, 2.5808e-11,\n 6.0232e-11, 6.9071e-11, 3.6747e-10, 1.9504e-10, 8.8996e-12, 2.3026e-11,\n 1.3744e-10, 2.4511e-11, 1.9246e-10, 4.6272e-12, 6.7974e-15, 7.8964e-11,\n 1.4522e-10, 2.5511e-12, 6.1591e-11, 7.5270e-12, 2.1345e-11, 4.0966e-10,\n 6.0227e-12, 2.5745e-12, 3.2913e-11, 2.8864e-10, 8.2320e-15, 2.1392e-11,\n 5.9939e-12, 2.1743e-10, 8.7499e-11, 4.9029e-11, 8.6504e-11, 1.2671e-09,\n 1.6973e-11, 2.8569e-10, 3.6434e-10, 5.5102e-11, 6.9134e-10, 5.1656e-11,\n 6.1166e-11, 3.8835e-10, 1.3051e-10, 1.5218e-10, 3.3806e-10, 1.9032e-11,\n 3.6661e-12, 1.3658e-10, 5.0615e-12, 7.2902e-10, 4.1068e-11, 8.3125e-11,\n 2.5392e-11, 1.1742e-13, 5.9617e-10, 4.2658e-11, 5.9209e-10, 1.8074e-10,\n 1.9775e-11, 3.2893e-11, 3.9190e-11, 3.3304e-11, 2.5129e-11, 6.1074e-11,\n 1.8281e-10, 1.0691e-10, 1.8164e-11, 1.4489e-14, 1.0751e-13, 6.5788e-11,\n 3.6603e-10, 2.3733e-10, 1.9205e-11, 3.1888e-10, 2.5021e-13, 1.5759e-09,\n 7.1694e-11, 1.3327e-10, 2.2863e-10, 6.1386e-10, 1.9475e-10, 6.1884e-10,\n 1.6881e-10, 7.7007e-14, 2.2082e-12, 1.1530e-11, 7.4109e-11, 6.6730e-10,\n 1.1635e-10, 3.1508e-11, 2.7882e-11, 1.0807e-11, 6.6332e-10, 1.4243e-11,\n 1.1632e-11, 5.5410e-11, 6.0546e-11, 2.4098e-10, 8.2151e-11, 1.1003e-11,\n 5.8001e-11, 6.1600e-10, 1.7416e-11, 8.4813e-10, 2.1556e-10, 3.1337e-10,\n 3.2713e-11, 2.8516e-10, 2.1812e-10, 1.7972e-10, 1.7317e-09, 2.4360e-10,\n 2.6792e-10, 3.4648e-11, 1.1052e-11, 4.6603e-11, 3.2117e-10, 9.8802e-11,\n 1.8427e-10, 1.9736e-12, 1.6233e-11, 1.0828e-11, 2.1982e-11, 7.7385e-11,\n 1.1368e-11, 6.2707e-11, 4.1383e-11, 5.5466e-11, 4.2792e-10, 1.4544e-12,\n 4.3344e-11, 1.3271e-10, 2.5487e-11, 6.8795e-10, 7.2536e-11, 1.0364e-10,\n 1.1192e-10, 8.8444e-12, 3.7627e-14, 1.9682e-09, 2.3399e-11, 2.4257e-11,\n 1.4480e-10, 3.7703e-10, 1.5286e-14, 1.0703e-10, 4.0561e-10, 4.7991e-12,\n 2.4379e-10, 8.1139e-10, 1.2786e-10, 2.6263e-11, 9.8098e-14, 1.1089e-09,\n 2.2796e-11, 3.8031e-10, 1.1686e-11, 5.2253e-11, 8.8419e-12, 1.0547e-10,\n 1.3944e-10, 4.7949e-13, 1.0611e-11, 1.3735e-11, 4.5869e-11, 1.7822e-10,\n 3.3017e-11, 6.3182e-10, 1.1137e-10, 4.5495e-12, 1.4059e-12, 6.7907e-12,\n 2.5239e-11, 5.7834e-11, 1.7549e-12, 6.1629e-12, 1.0076e-10, 8.1597e-11,\n 3.2778e-10, 1.0270e-10, 2.4005e-11, 1.8305e-10, 2.7381e-11, 4.8027e-11,\n 1.7557e-10, 3.6470e-13, 2.2961e-10, 2.8489e-10, 9.8931e-11, 2.2501e-13,\n 7.8571e-15, 9.8167e-10, 4.7394e-12, 3.6075e-10, 1.2377e-10, 2.1284e-11,\n 1.6251e-10, 1.7002e-10, 3.9464e-10, 1.1129e-11], device='cuda:0')" - }, - "24": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.5741e-12, 1.3299e-13, 1.5081e-13, 3.2635e-13, 3.8171e-14, 5.1541e-14,\n 1.3252e-13, 4.4353e-16, 2.7118e-14, 2.6015e-15, 1.5137e-12, 2.0646e-13,\n 2.3519e-13, 1.7503e-13, 6.0142e-14, 1.4429e-13, 5.3479e-13, 2.9556e-14,\n 9.2242e-14, 9.9554e-13, 2.2482e-17, 5.8334e-13, 2.2659e-13, 1.1798e-13,\n 2.4113e-14, 3.2781e-13, 1.2571e-13, 3.1289e-14, 2.4566e-14, 1.2316e-13,\n 9.4295e-13, 9.4669e-14, 1.5561e-14, 7.1366e-12, 4.1003e-13, 2.6280e-13,\n 5.1942e-14, 4.5535e-16, 4.5841e-13, 1.8652e-12, 2.0474e-13, 2.1560e-13,\n 2.6016e-14, 5.4254e-13, 1.1425e-12, 5.5847e-13, 7.3085e-13, 2.4021e-13,\n 3.7779e-14, 1.5631e-12, 4.0199e-15, 2.2204e-12, 1.2715e-12, 1.9798e-13,\n 1.0101e-14, 7.1287e-15, 9.2013e-13, 5.7643e-15, 3.5708e-13, 6.4352e-14,\n 1.3516e-13, 1.9890e-13, 1.5839e-12, 5.3562e-13, 6.2962e-15, 3.2302e-14,\n 3.6655e-13, 2.7034e-14, 3.9645e-13, 2.3056e-14, 8.0561e-15, 1.5968e-13,\n 2.9614e-13, 1.5241e-14, 1.1043e-13, 5.1599e-14, 6.8279e-14, 1.0556e-12,\n 9.6541e-15, 2.7432e-15, 7.8249e-14, 7.5959e-13, 7.3131e-16, 1.7088e-13,\n 2.7660e-14, 5.2844e-13, 3.9323e-13, 1.1221e-13, 1.5677e-13, 4.4989e-12,\n 4.6520e-14, 6.8160e-13, 1.2749e-12, 8.1663e-14, 4.4533e-12, 9.9182e-14,\n 7.8924e-14, 7.8029e-13, 3.3947e-13, 3.9144e-13, 1.4321e-12, 1.4351e-13,\n 1.8386e-14, 3.5494e-13, 2.1177e-15, 1.3545e-12, 8.0279e-14, 2.0870e-13,\n 3.9803e-14, 5.0570e-15, 1.2974e-12, 1.6197e-13, 3.7026e-12, 3.6915e-13,\n 1.7165e-14, 5.7884e-14, 1.0865e-13, 5.0584e-14, 9.0613e-14, 2.0451e-13,\n 5.2500e-13, 2.7205e-13, 2.0260e-14, 6.4562e-17, 2.5326e-15, 1.1032e-13,\n 8.0540e-13, 1.4097e-12, 4.2859e-14, 1.0730e-12, 3.1884e-16, 5.1058e-12,\n 1.7891e-13, 3.1080e-13, 3.3916e-13, 2.2298e-12, 4.4204e-13, 1.7966e-12,\n 4.0205e-13, 5.7275e-16, 2.2669e-14, 4.2397e-14, 1.2574e-13, 2.0832e-12,\n 2.4540e-13, 1.0678e-13, 8.6615e-14, 3.0791e-14, 2.3118e-12, 2.0351e-14,\n 5.0283e-14, 9.2883e-14, 1.7646e-13, 8.2767e-13, 1.3580e-13, 5.3800e-14,\n 8.6036e-14, 1.8306e-12, 3.9080e-14, 2.6014e-12, 4.5287e-13, 1.2051e-12,\n 4.5932e-14, 9.6570e-13, 1.3264e-12, 4.0210e-13, 5.9366e-12, 4.6174e-13,\n 1.1593e-12, 1.2924e-13, 8.1987e-15, 6.6756e-14, 6.0528e-13, 3.0138e-13,\n 3.3995e-13, 6.4981e-15, 1.8667e-14, 2.5731e-14, 3.9180e-14, 1.3396e-13,\n 2.0648e-14, 1.4662e-13, 7.0068e-14, 9.5272e-14, 9.1656e-13, 9.3435e-18,\n 7.8506e-14, 3.2286e-13, 3.3633e-14, 1.8689e-12, 9.0656e-14, 2.6761e-13,\n 5.1341e-13, 3.1980e-14, 9.3005e-16, 7.7726e-12, 4.2815e-14, 3.7523e-14,\n 5.7624e-13, 5.4178e-13, 2.5545e-15, 1.8157e-13, 1.3856e-12, 5.6956e-15,\n 7.4717e-13, 1.5232e-12, 2.1306e-13, 4.1326e-14, 3.1950e-17, 3.5302e-12,\n 4.7603e-14, 8.5246e-13, 5.5461e-14, 1.1448e-13, 1.0290e-14, 2.0277e-13,\n 2.6122e-13, 1.9498e-16, 3.2601e-14, 1.6794e-14, 1.1167e-13, 5.1426e-13,\n 1.5060e-13, 2.0315e-12, 3.8676e-13, 1.7794e-14, 6.3973e-16, 7.1198e-14,\n 3.9876e-14, 1.6626e-13, 9.1161e-15, 1.0026e-14, 1.7269e-13, 3.2805e-13,\n 8.2647e-13, 3.3515e-13, 3.7615e-14, 4.0175e-13, 8.4270e-14, 7.2015e-14,\n 4.4593e-13, 1.5467e-16, 1.9413e-12, 6.9534e-13, 1.6198e-13, 1.5320e-16,\n 2.7901e-15, 4.6311e-12, 5.0063e-15, 1.1177e-12, 2.3615e-13, 3.1889e-14,\n 3.7217e-13, 3.5249e-13, 8.6055e-13, 4.1739e-14], device='cuda:0')" - }, - "25": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.2848e-12, 2.0145e-13, 2.7182e-13, 3.6866e-13, 5.7195e-14, 8.1881e-14,\n 1.6578e-13, 5.2063e-16, 4.9052e-14, 4.5343e-15, 1.3540e-12, 3.0457e-13,\n 3.4024e-13, 3.3993e-13, 1.0881e-13, 2.6499e-13, 9.6813e-13, 4.1149e-14,\n 1.3456e-13, 1.9354e-12, 3.5301e-16, 7.7490e-13, 3.6899e-13, 2.0000e-13,\n 3.0360e-14, 5.8218e-13, 2.6715e-13, 5.4961e-14, 4.3973e-14, 1.4768e-13,\n 1.1163e-12, 2.2830e-13, 3.8968e-14, 6.3540e-12, 7.7886e-13, 5.4299e-13,\n 4.0402e-14, 3.4432e-16, 7.4730e-13, 2.9517e-12, 3.6979e-13, 3.1148e-13,\n 6.4860e-14, 3.6069e-13, 1.2516e-12, 9.9289e-13, 7.3810e-13, 4.5370e-13,\n 6.1335e-14, 1.3141e-12, 4.0701e-15, 3.0102e-12, 1.6599e-12, 3.7077e-13,\n 2.1326e-14, 1.7376e-14, 1.2502e-12, 1.5950e-14, 5.4991e-13, 1.4384e-13,\n 1.5520e-13, 3.2403e-13, 1.5790e-12, 8.4724e-13, 1.0927e-14, 6.1290e-14,\n 6.1098e-13, 5.7018e-14, 8.1796e-13, 2.8477e-14, 6.6568e-15, 3.7675e-13,\n 4.6700e-13, 1.6093e-14, 1.7623e-13, 5.8193e-14, 1.2541e-13, 1.6981e-12,\n 1.5835e-14, 2.4315e-15, 1.7427e-13, 1.2932e-12, 1.9520e-15, 1.4469e-13,\n 3.6281e-14, 9.2486e-13, 4.3940e-13, 2.2168e-13, 2.6948e-13, 5.0919e-12,\n 8.3839e-14, 1.2380e-12, 1.1603e-12, 1.7131e-13, 2.9627e-12, 1.5924e-13,\n 1.9319e-13, 1.3726e-12, 5.8168e-13, 7.1146e-13, 1.4551e-12, 1.2790e-13,\n 3.4848e-14, 3.9708e-13, 6.1360e-15, 2.4905e-12, 1.0040e-13, 4.0239e-13,\n 7.2850e-14, 9.0241e-15, 2.0469e-12, 2.1921e-13, 2.5268e-12, 6.2263e-13,\n 5.2235e-14, 1.0688e-13, 2.0195e-13, 1.0084e-13, 1.2271e-13, 3.1580e-13,\n 5.4061e-13, 3.2570e-13, 4.0773e-14, 4.2275e-17, 2.6801e-15, 2.0108e-13,\n 1.5604e-12, 1.0799e-12, 9.7115e-14, 9.7118e-13, 4.2316e-16, 6.3899e-12,\n 3.2492e-13, 5.7249e-13, 7.2898e-13, 2.0564e-12, 8.6740e-13, 2.1206e-12,\n 7.2673e-13, 1.2464e-15, 2.8753e-14, 7.7283e-14, 2.3068e-13, 2.2455e-12,\n 4.9372e-13, 1.7021e-13, 1.4025e-13, 5.6600e-14, 2.2485e-12, 2.3012e-14,\n 8.1781e-14, 1.8354e-13, 2.9316e-13, 7.1367e-13, 2.3684e-13, 7.8334e-14,\n 1.5596e-13, 2.0869e-12, 5.1854e-14, 3.4577e-12, 7.1644e-13, 9.4383e-13,\n 7.7482e-14, 1.2741e-12, 1.0124e-12, 5.6466e-13, 6.0442e-12, 8.2715e-13,\n 1.1396e-12, 1.6939e-13, 2.1147e-14, 1.3230e-13, 1.1124e-12, 4.6373e-13,\n 5.9867e-13, 1.5254e-14, 3.6070e-14, 5.2658e-14, 5.0718e-14, 3.5496e-13,\n 3.0287e-14, 1.5566e-13, 1.1339e-13, 1.7169e-13, 1.7881e-12, 1.5738e-16,\n 1.4079e-13, 3.9336e-13, 6.6932e-14, 2.8589e-12, 2.4362e-13, 3.0134e-13,\n 5.3150e-13, 5.0812e-14, 2.4498e-15, 7.8964e-12, 5.5812e-14, 6.4281e-14,\n 6.7320e-13, 1.2936e-12, 6.1804e-15, 4.7539e-13, 1.2761e-12, 1.0765e-14,\n 1.0094e-12, 2.8487e-12, 4.2060e-13, 6.7722e-14, 5.6671e-17, 3.9975e-12,\n 4.4370e-14, 1.5972e-12, 8.2557e-14, 2.5470e-13, 1.3901e-14, 3.5368e-13,\n 4.6507e-13, 1.9142e-15, 6.4610e-14, 3.1136e-14, 9.3348e-14, 7.7936e-13,\n 1.7626e-13, 2.6465e-12, 5.2679e-13, 3.6566e-14, 5.0748e-16, 6.2826e-14,\n 5.2945e-14, 2.7116e-13, 1.9485e-14, 1.0799e-14, 4.5310e-13, 4.1576e-13,\n 1.4083e-12, 4.8409e-13, 5.5769e-14, 7.7413e-13, 1.5932e-13, 1.3654e-13,\n 7.7865e-13, 3.3939e-16, 1.0808e-12, 9.4754e-13, 2.8946e-13, 1.1526e-15,\n 5.7706e-15, 3.3403e-12, 8.0474e-15, 1.4640e-12, 3.7812e-13, 4.9095e-14,\n 5.0257e-13, 5.0790e-13, 1.2834e-12, 7.5381e-14], device='cuda:0')" - }, - "26": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.1017e-13, 1.5229e-13, 0.0000e+00, ..., 3.2235e-13, 5.1979e-13,\n 6.3495e-14],\n [1.0185e-14, 1.4160e-14, 0.0000e+00, ..., 3.9574e-16, 1.2757e-14,\n 4.0740e-16],\n [8.2325e-14, 6.4334e-14, 0.0000e+00, ..., 6.6219e-14, 7.1519e-14,\n 4.6518e-14],\n ...,\n [4.0995e-14, 3.3689e-14, 0.0000e+00, ..., 2.3765e-14, 2.3211e-13,\n 6.6987e-15],\n [2.4057e-13, 7.4449e-14, 0.0000e+00, ..., 6.4159e-14, 5.2583e-13,\n 1.1645e-13],\n [4.8650e-14, 8.5531e-15, 0.0000e+00, ..., 1.1801e-14, 1.0137e-13,\n 1.0658e-15]], device='cuda:0')" - }, - "27": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.8671e-10, 8.3738e-12, 6.6465e-11, 8.6363e-11, 2.6298e-11, 6.6289e-12,\n 8.7234e-11, 2.1230e-12, 3.9148e-11, 5.9748e-12, 1.9848e-10, 6.8695e-10,\n 1.1695e-10, 1.3753e-10, 1.4349e-10, 5.9395e-10, 6.6565e-11, 6.5992e-11,\n 2.2275e-10, 2.5898e-10, 8.3049e-13, 2.7825e-11, 5.6121e-11, 6.9581e-11,\n 6.4443e-11, 1.1947e-10, 9.4974e-11, 1.8654e-11, 2.7259e-12, 3.1049e-11,\n 1.6500e-10, 1.0901e-10, 1.7093e-11, 8.3842e-10, 3.0368e-10, 1.8613e-10,\n 5.5370e-12, 1.0494e-13, 7.8077e-11, 2.5212e-09, 3.7611e-10, 5.2877e-11,\n 1.6575e-11, 1.0374e-11, 4.3484e-10, 7.6576e-10, 1.1928e-10, 2.6425e-10,\n 3.2919e-11, 2.1154e-10, 4.0795e-12, 4.5037e-10, 1.1483e-10, 1.0345e-10,\n 1.6440e-11, 1.4341e-12, 1.9611e-10, 7.2776e-12, 1.3034e-10, 1.1406e-11,\n 6.6980e-11, 1.2823e-10, 3.2325e-10, 1.7228e-10, 4.8589e-12, 1.2022e-11,\n 1.2925e-10, 8.4143e-12, 8.1959e-11, 1.5573e-11, 2.8430e-13, 7.9427e-11,\n 3.3570e-10, 2.4656e-11, 6.7965e-11, 4.5434e-13, 4.4137e-12, 2.7420e-10,\n 1.7695e-11, 5.8429e-15, 1.6230e-10, 1.9532e-10, 2.7970e-13, 1.6580e-11,\n 1.1572e-10, 4.9427e-10, 3.9732e-11, 1.7066e-10, 3.3807e-11, 2.8191e-10,\n 1.9964e-10, 1.2280e-10, 3.1212e-10, 5.6950e-11, 1.0057e-10, 1.5414e-11,\n 5.2432e-11, 3.5896e-10, 8.7096e-11, 7.2493e-10, 6.4218e-10, 1.5713e-11,\n 2.7929e-13, 8.6248e-11, 4.8472e-12, 5.6870e-10, 4.5579e-11, 4.1625e-11,\n 1.2403e-10, 7.7346e-13, 6.1479e-11, 4.0118e-13, 4.0069e-10, 3.9249e-10,\n 3.0940e-11, 3.4312e-10, 2.9298e-11, 5.5960e-11, 8.2909e-11, 8.5157e-11,\n 2.9678e-11, 6.7524e-12, 1.2176e-11, 1.6153e-14, 1.5823e-12, 7.6211e-11,\n 6.4767e-10, 1.0697e-10, 2.7049e-10, 1.2299e-10, 6.7923e-13, 7.8049e-10,\n 1.2584e-10, 1.1631e-10, 2.8404e-10, 1.6573e-10, 1.8470e-10, 2.4784e-10,\n 1.6835e-10, 6.4861e-13, 4.9128e-12, 9.0519e-12, 6.6649e-11, 8.4813e-10,\n 1.6204e-10, 9.8991e-12, 1.5282e-10, 1.8461e-11, 1.5603e-10, 3.8597e-12,\n 7.5193e-12, 2.6440e-10, 1.9289e-10, 2.4138e-10, 5.1257e-11, 8.6413e-12,\n 1.5370e-10, 4.5501e-10, 7.9825e-11, 1.0058e-10, 1.2427e-10, 1.4267e-10,\n 7.7456e-11, 1.0342e-10, 9.6833e-11, 9.1142e-11, 6.5425e-10, 5.0391e-10,\n 7.3557e-11, 2.7753e-10, 2.7631e-11, 1.9939e-11, 4.1178e-10, 3.9178e-11,\n 2.9408e-10, 2.1874e-11, 8.4161e-12, 2.6553e-11, 2.8397e-11, 3.1576e-11,\n 3.7218e-11, 3.2035e-11, 1.1669e-11, 8.3128e-11, 1.2791e-09, 1.9540e-13,\n 6.3416e-11, 5.3177e-11, 3.7541e-11, 3.8366e-10, 1.1279e-10, 1.3832e-11,\n 5.7040e-11, 7.8642e-11, 5.9732e-13, 6.9680e-10, 3.6665e-11, 5.5837e-12,\n 1.7415e-10, 7.7274e-10, 6.4709e-16, 7.5565e-11, 3.7506e-10, 8.3471e-11,\n 6.9701e-11, 5.4417e-10, 1.3458e-10, 2.8995e-11, 2.8902e-14, 6.5262e-10,\n 1.3257e-11, 6.5393e-10, 4.0178e-13, 1.4501e-10, 2.0812e-12, 1.7423e-10,\n 2.4525e-10, 4.1691e-15, 2.0732e-11, 2.8828e-11, 2.4539e-11, 7.9807e-10,\n 4.2191e-11, 1.6740e-10, 3.7689e-11, 1.5508e-12, 8.6335e-13, 3.0757e-12,\n 2.8198e-11, 5.6186e-11, 4.7104e-12, 6.9116e-13, 2.9399e-10, 2.0983e-11,\n 1.5425e-11, 1.0756e-10, 9.1498e-11, 1.3463e-10, 5.5680e-11, 4.3203e-11,\n 1.4600e-11, 3.3073e-14, 1.1085e-10, 7.1879e-11, 3.2158e-11, 1.0272e-12,\n 6.6369e-13, 5.4913e-10, 2.5514e-11, 6.3311e-10, 2.0126e-10, 2.1969e-11,\n 3.0219e-10, 8.5407e-11, 1.7841e-10, 3.3910e-11], device='cuda:0')" - }, - "28": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.3408e-13, 4.1444e-14, 1.2169e-13, 2.8219e-13, 5.0337e-14, 3.1103e-14,\n 1.7957e-13, 1.1899e-14, 6.0874e-14, 7.4070e-15, 3.7226e-13, 1.8160e-12,\n 2.5528e-13, 3.4402e-13, 5.5181e-13, 2.4625e-12, 2.0085e-13, 1.6224e-13,\n 7.5099e-13, 5.1352e-13, 9.0821e-17, 7.3684e-14, 2.0887e-13, 1.4161e-13,\n 2.2979e-13, 2.8815e-13, 1.9770e-13, 5.8850e-14, 7.0039e-15, 5.4352e-14,\n 4.4663e-13, 2.5938e-13, 4.0229e-14, 2.1038e-12, 7.9440e-13, 5.6189e-13,\n 1.1862e-14, 4.4815e-17, 1.8943e-13, 1.5413e-11, 8.4395e-13, 1.6915e-13,\n 5.5353e-14, 3.0955e-14, 1.4720e-12, 3.3790e-12, 3.6826e-13, 1.2037e-12,\n 3.5751e-14, 4.3875e-13, 4.0648e-15, 1.1375e-12, 2.2040e-13, 2.2859e-13,\n 2.3267e-14, 5.4352e-15, 6.2847e-13, 6.1859e-15, 5.3248e-13, 3.9617e-14,\n 1.5515e-13, 9.2068e-13, 1.5413e-12, 6.0337e-13, 2.2734e-15, 1.3910e-14,\n 3.4995e-13, 7.4293e-15, 1.3419e-13, 5.0527e-14, 1.1142e-14, 2.3955e-13,\n 7.8056e-13, 5.8178e-14, 1.6294e-13, 4.7682e-15, 1.5863e-14, 7.8640e-13,\n 2.5094e-14, 4.4687e-16, 6.0311e-13, 4.2029e-13, 4.8817e-16, 8.0545e-14,\n 4.5855e-13, 1.4382e-12, 1.1505e-13, 7.2828e-13, 8.2736e-14, 6.4597e-13,\n 1.1313e-12, 3.0756e-13, 7.7113e-13, 8.3396e-14, 2.1753e-13, 1.5061e-14,\n 1.0584e-13, 6.3759e-13, 2.0498e-13, 2.0287e-12, 4.2045e-12, 7.1693e-14,\n 1.6037e-15, 1.9663e-13, 2.5691e-15, 1.2576e-12, 1.3898e-13, 1.1338e-13,\n 3.7113e-13, 3.2069e-18, 1.6161e-13, 8.1110e-15, 1.2260e-12, 7.7356e-13,\n 4.3217e-14, 9.0338e-13, 9.8021e-14, 1.0735e-13, 2.0797e-13, 2.2403e-13,\n 3.8934e-14, 2.7715e-14, 2.2541e-14, 9.5579e-17, 6.3015e-15, 1.4369e-13,\n 1.6495e-12, 3.3221e-13, 1.3765e-12, 2.7880e-13, 1.4655e-16, 2.0381e-12,\n 5.8396e-13, 2.2604e-13, 8.8545e-13, 3.0080e-13, 3.1608e-13, 4.2102e-13,\n 3.6277e-13, 3.5651e-17, 2.4492e-14, 5.5127e-14, 7.9280e-14, 2.4221e-12,\n 4.1603e-13, 2.9272e-14, 6.9317e-13, 4.8563e-14, 2.7596e-13, 7.8172e-16,\n 2.0579e-14, 6.8701e-13, 1.1894e-12, 9.7812e-13, 5.1621e-14, 7.4149e-14,\n 7.2795e-13, 1.2132e-12, 1.7549e-13, 2.1631e-13, 1.6922e-13, 2.3853e-13,\n 3.7987e-13, 1.9069e-13, 3.0124e-13, 2.1330e-13, 1.1851e-12, 1.6620e-12,\n 1.5134e-13, 6.5358e-13, 5.0936e-14, 2.9020e-14, 9.0363e-13, 7.7080e-14,\n 8.6002e-13, 1.3164e-13, 1.8246e-14, 6.1695e-14, 5.5384e-14, 1.1096e-13,\n 5.0140e-14, 6.9870e-14, 1.3156e-14, 1.5449e-13, 3.9335e-12, 1.3298e-15,\n 1.0072e-13, 9.6654e-14, 4.1813e-14, 8.4161e-13, 1.6662e-13, 2.2992e-14,\n 2.0043e-13, 3.6174e-13, 2.4998e-15, 1.5475e-12, 1.2398e-13, 8.5748e-15,\n 8.0836e-13, 1.9144e-12, 2.5554e-16, 1.7214e-13, 1.3679e-12, 2.7468e-13,\n 2.5776e-13, 1.0821e-12, 2.6195e-13, 7.5294e-14, 1.0565e-16, 1.3953e-12,\n 2.0894e-14, 1.7745e-12, 4.6291e-15, 3.8788e-13, 2.1781e-15, 3.9313e-13,\n 3.4559e-13, 1.8758e-15, 6.9499e-14, 5.0741e-14, 6.2051e-14, 3.6145e-12,\n 1.5300e-13, 4.3811e-13, 1.0469e-13, 1.6792e-14, 2.3859e-17, 1.7641e-14,\n 3.8828e-14, 1.2823e-13, 1.8140e-14, 1.3170e-17, 1.0955e-12, 7.0870e-14,\n 5.6050e-14, 3.8863e-13, 3.3252e-13, 2.7834e-13, 2.5094e-13, 9.6829e-14,\n 7.5358e-14, 6.7518e-16, 3.7207e-13, 1.2033e-13, 5.8652e-14, 1.0325e-15,\n 1.1959e-15, 1.8256e-12, 5.8426e-14, 2.0671e-12, 3.4261e-13, 3.7876e-14,\n 1.2298e-12, 1.5487e-13, 3.2184e-13, 2.1921e-13], device='cuda:0')" - }, - "29": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.4515e-13, 4.0946e-14, 2.2256e-13, 3.7904e-13, 6.0959e-14, 4.2341e-14,\n 2.6890e-13, 1.9264e-14, 1.1101e-13, 1.2135e-14, 6.5508e-13, 2.5070e-12,\n 3.9051e-13, 5.5971e-13, 6.5271e-13, 2.5512e-12, 3.4288e-13, 2.2269e-13,\n 7.3236e-13, 1.1174e-12, 1.2592e-17, 1.3125e-13, 2.7193e-13, 3.2154e-13,\n 3.1380e-13, 3.9661e-13, 4.2406e-13, 9.9927e-14, 1.7271e-14, 9.6848e-14,\n 7.4596e-13, 4.6889e-13, 9.0015e-14, 3.0434e-12, 1.3134e-12, 8.4235e-13,\n 9.5171e-15, 2.0591e-16, 3.4732e-13, 9.1739e-12, 1.3124e-12, 2.6664e-13,\n 8.9993e-14, 6.1101e-14, 1.5126e-12, 2.6854e-12, 5.4811e-13, 1.1946e-12,\n 8.9289e-14, 7.3819e-13, 7.0967e-15, 1.9146e-12, 3.7782e-13, 3.5494e-13,\n 4.2354e-14, 1.3981e-14, 8.5375e-13, 1.8940e-14, 6.1398e-13, 6.4310e-14,\n 1.9605e-13, 6.2948e-13, 1.4007e-12, 7.6698e-13, 8.0125e-15, 3.4535e-14,\n 5.8392e-13, 1.8229e-14, 3.6107e-13, 7.6895e-14, 7.1951e-15, 3.8051e-13,\n 1.1213e-12, 1.3361e-13, 2.0875e-13, 4.0063e-15, 2.5638e-14, 1.1967e-12,\n 3.7066e-14, 5.1992e-16, 7.1854e-13, 8.7418e-13, 1.8744e-15, 9.3821e-14,\n 5.5219e-13, 2.1390e-12, 1.8037e-13, 7.4383e-13, 1.1627e-13, 1.2431e-12,\n 9.2138e-13, 5.4740e-13, 1.0936e-12, 1.8454e-13, 4.4115e-13, 4.5084e-14,\n 1.7162e-13, 1.3230e-12, 4.1537e-13, 2.9207e-12, 2.7727e-12, 9.4322e-14,\n 2.7545e-15, 2.6565e-13, 3.8389e-15, 2.0777e-12, 1.1691e-13, 2.0045e-13,\n 3.9552e-13, 7.7702e-17, 2.1899e-13, 2.2084e-15, 1.6722e-12, 1.3999e-12,\n 9.8737e-14, 1.1971e-12, 1.4252e-13, 1.7703e-13, 3.7668e-13, 3.5955e-13,\n 9.4367e-14, 2.2418e-14, 2.5972e-14, 2.4924e-16, 1.4868e-14, 2.4094e-13,\n 2.7480e-12, 4.9421e-13, 1.1907e-12, 3.9855e-13, 8.9123e-18, 3.1422e-12,\n 5.9652e-13, 5.2346e-13, 9.6753e-13, 5.6828e-13, 8.0424e-13, 9.1268e-13,\n 7.3855e-13, 7.6700e-17, 3.6501e-14, 6.4461e-14, 2.2935e-13, 2.9702e-12,\n 6.8219e-13, 5.5359e-14, 6.9825e-13, 9.7263e-14, 5.2594e-13, 6.3757e-15,\n 4.5059e-14, 8.9336e-13, 8.7622e-13, 8.0127e-13, 1.5645e-13, 6.2816e-14,\n 4.7049e-13, 1.6617e-12, 2.4680e-13, 4.6396e-13, 4.4091e-13, 4.8403e-13,\n 2.0977e-13, 4.4240e-13, 4.3590e-13, 2.9715e-13, 2.4038e-12, 1.7446e-12,\n 3.5573e-13, 1.2393e-12, 6.8859e-14, 6.7902e-14, 1.4712e-12, 1.8124e-13,\n 9.8577e-13, 1.3463e-13, 2.4264e-14, 1.3381e-13, 7.2975e-14, 1.5936e-13,\n 1.1058e-13, 9.3289e-14, 3.1160e-14, 2.5853e-13, 5.0172e-12, 2.4928e-15,\n 2.1200e-13, 1.6682e-13, 1.0850e-13, 1.5834e-12, 4.1333e-13, 4.2860e-14,\n 2.6888e-13, 3.9525e-13, 9.1681e-15, 3.0172e-12, 9.9169e-14, 1.4697e-14,\n 8.1112e-13, 2.8883e-12, 2.3455e-15, 3.4625e-13, 1.2939e-12, 2.2959e-13,\n 3.4169e-13, 2.1218e-12, 4.5254e-13, 8.8510e-14, 9.0032e-16, 2.3774e-12,\n 2.8990e-14, 2.6328e-12, 4.3655e-15, 6.3768e-13, 1.7257e-15, 6.4548e-13,\n 8.4745e-13, 2.2047e-15, 1.1508e-13, 9.2039e-14, 5.1697e-14, 3.3115e-12,\n 2.0967e-13, 7.7472e-13, 1.9699e-13, 2.2076e-14, 3.7170e-17, 2.8657e-14,\n 7.4155e-14, 2.5012e-13, 3.2386e-14, 3.9212e-16, 1.2966e-12, 1.0454e-13,\n 7.5098e-14, 4.9799e-13, 2.6638e-13, 6.0969e-13, 2.7877e-13, 1.2718e-13,\n 5.9332e-14, 1.5833e-15, 5.0946e-13, 2.5005e-13, 1.0741e-13, 1.2561e-15,\n 5.2173e-16, 1.9025e-12, 6.0660e-14, 2.6142e-12, 6.7828e-13, 5.9627e-14,\n 9.9361e-13, 2.7487e-13, 6.7948e-13, 1.8834e-13], device='cuda:0')" - }, - "30": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.8613e-14, 2.3172e-13, 0.0000e+00, ..., 7.9712e-13, 4.5359e-13,\n 4.8491e-13],\n [1.0961e-13, 4.0940e-14, 0.0000e+00, ..., 3.4241e-14, 3.9661e-13,\n 1.7885e-14],\n [8.9273e-14, 1.5753e-13, 0.0000e+00, ..., 1.0958e-13, 7.4337e-13,\n 4.3951e-14],\n ...,\n [2.1583e-14, 5.8759e-14, 0.0000e+00, ..., 7.4735e-14, 1.1108e-13,\n 6.8573e-16],\n [4.9280e-13, 4.3808e-13, 0.0000e+00, ..., 6.3703e-13, 2.7173e-12,\n 5.5395e-13],\n [3.9783e-15, 5.3193e-15, 0.0000e+00, ..., 2.0518e-14, 4.7984e-14,\n 4.9931e-16]], device='cuda:0')" - }, - "31": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.2517e-10, 1.3217e-10, 1.6805e-10, 5.2299e-12, 4.7519e-11, 5.0264e-12,\n 5.1554e-11, 4.4089e-12, 3.0125e-11, 1.9308e-11, 3.9665e-10, 3.9097e-10,\n 5.3658e-11, 3.8737e-11, 7.5795e-11, 1.0254e-11, 1.9250e-10, 5.7262e-11,\n 1.9228e-10, 5.3535e-10, 8.8060e-13, 1.5346e-10, 3.3816e-11, 3.1416e-10,\n 2.3172e-11, 5.0886e-11, 3.3646e-10, 5.4393e-12, 1.9104e-11, 5.8331e-11,\n 1.0119e-10, 3.3309e-10, 4.4019e-13, 5.9948e-10, 3.4492e-10, 4.0946e-10,\n 1.5531e-11, 5.5356e-13, 2.2250e-10, 1.4692e-09, 6.1745e-10, 1.7388e-11,\n 1.5803e-11, 1.5929e-11, 1.1228e-10, 2.5630e-10, 1.5621e-10, 7.2195e-11,\n 1.5725e-11, 2.1718e-10, 5.8211e-12, 5.4387e-10, 2.7258e-10, 1.8161e-10,\n 8.0394e-12, 1.6697e-11, 1.2223e-10, 1.4300e-11, 9.0621e-11, 2.8652e-11,\n 3.5012e-11, 9.0616e-11, 1.4916e-10, 3.6689e-10, 3.8437e-12, 9.1449e-11,\n 8.8078e-11, 2.1294e-11, 1.7546e-10, 2.5473e-11, 1.7865e-13, 7.0985e-11,\n 2.9133e-10, 2.8392e-11, 1.7386e-11, 2.2505e-13, 4.5701e-11, 5.1131e-10,\n 2.0006e-11, 1.4357e-13, 2.0834e-10, 6.6534e-10, 2.9737e-13, 4.8937e-12,\n 8.2632e-11, 4.9255e-10, 2.2239e-11, 1.0102e-10, 7.3905e-11, 1.8918e-11,\n 1.0466e-10, 3.7736e-10, 1.1495e-10, 1.3337e-10, 4.2769e-10, 1.0660e-10,\n 1.8902e-10, 6.4132e-10, 9.9965e-11, 7.7576e-10, 7.2265e-11, 1.4800e-11,\n 2.8833e-14, 7.6520e-11, 8.3998e-13, 9.2472e-10, 2.6469e-11, 1.0461e-10,\n 7.9542e-11, 5.5292e-13, 7.0115e-10, 3.1015e-11, 2.1780e-10, 3.2800e-10,\n 5.7309e-11, 4.5667e-10, 9.8437e-12, 3.6262e-11, 5.6733e-11, 3.0448e-10,\n 3.1753e-11, 2.4313e-11, 1.3536e-11, 2.2731e-13, 2.2501e-12, 1.6360e-10,\n 2.7377e-10, 4.0333e-11, 1.3567e-10, 2.7803e-10, 3.0357e-12, 6.2340e-10,\n 1.5224e-10, 2.2791e-11, 2.6389e-10, 9.9838e-11, 4.9956e-10, 9.6804e-11,\n 7.6286e-11, 5.4884e-14, 1.9454e-13, 1.8804e-12, 3.4213e-10, 1.2684e-09,\n 2.2211e-10, 4.3728e-11, 5.3660e-11, 8.4893e-11, 7.4968e-11, 6.4062e-12,\n 1.9613e-11, 1.7375e-10, 1.2600e-10, 3.0838e-11, 3.7741e-11, 2.2324e-11,\n 6.0034e-11, 6.7569e-10, 2.9898e-11, 3.2389e-11, 1.0704e-10, 3.3508e-11,\n 3.1906e-11, 9.5591e-11, 3.0339e-11, 1.4237e-10, 6.5281e-10, 4.7229e-11,\n 2.2295e-10, 4.0425e-11, 2.0491e-12, 7.4035e-11, 1.3683e-10, 2.1404e-11,\n 2.7695e-10, 4.7081e-11, 3.2692e-11, 3.1225e-11, 3.4807e-11, 9.4108e-11,\n 1.1363e-10, 3.3904e-11, 5.0726e-11, 8.4242e-11, 7.4434e-10, 4.3499e-13,\n 5.8360e-11, 2.1938e-10, 3.8957e-11, 7.1486e-10, 1.2419e-10, 7.3123e-11,\n 4.2042e-11, 1.4660e-11, 6.4318e-14, 1.9824e-09, 2.7699e-11, 5.9130e-12,\n 5.8105e-11, 2.7184e-10, 3.1200e-13, 8.0644e-10, 5.5588e-11, 6.9369e-11,\n 6.8151e-11, 1.8727e-10, 3.3998e-10, 6.9233e-12, 1.5834e-13, 1.2978e-10,\n 3.7491e-11, 9.5308e-11, 3.5519e-12, 2.3719e-10, 4.6972e-12, 5.1651e-10,\n 5.9931e-10, 2.3590e-12, 5.9020e-12, 6.0041e-11, 2.4456e-11, 3.3463e-10,\n 3.1119e-11, 4.0516e-10, 5.4647e-11, 7.5015e-15, 6.9491e-14, 5.3577e-12,\n 8.3156e-12, 8.7567e-11, 2.3049e-12, 7.7751e-12, 9.0466e-11, 5.2482e-11,\n 3.5815e-10, 2.7566e-12, 2.5170e-11, 1.6768e-10, 4.6845e-11, 5.4875e-11,\n 9.9508e-11, 4.0579e-13, 3.9402e-11, 1.4058e-10, 5.7766e-11, 1.3937e-12,\n 3.2482e-12, 2.6849e-10, 1.8609e-11, 6.0088e-11, 4.8323e-10, 7.4022e-12,\n 3.0138e-10, 2.6648e-11, 7.1751e-10, 1.7162e-11], device='cuda:0')" - }, - "32": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([9.1643e-13, 4.2361e-13, 4.6941e-13, 2.7782e-14, 1.0526e-13, 1.4921e-14,\n 7.5194e-14, 3.5881e-14, 4.1104e-14, 4.6344e-14, 7.3023e-13, 6.4281e-13,\n 1.1106e-13, 9.8161e-14, 2.5027e-13, 7.4455e-14, 4.5937e-13, 9.9651e-14,\n 4.7048e-13, 1.2730e-12, 4.2369e-16, 5.3875e-13, 1.6117e-13, 6.6174e-13,\n 7.7780e-14, 5.8888e-14, 1.6067e-12, 1.6747e-14, 6.6441e-14, 1.1196e-13,\n 1.5310e-13, 2.4262e-12, 4.1395e-15, 1.0250e-12, 1.0258e-12, 1.5594e-12,\n 1.9449e-14, 6.2713e-16, 6.4913e-13, 3.7187e-12, 1.8261e-12, 2.8918e-14,\n 4.2106e-14, 7.2513e-14, 1.4556e-13, 4.5970e-13, 6.4438e-13, 1.7384e-13,\n 2.0070e-14, 5.0436e-13, 6.3575e-15, 1.6662e-12, 5.6651e-13, 3.9561e-13,\n 9.4072e-15, 1.1906e-13, 2.6334e-13, 1.5825e-14, 4.5878e-13, 9.8794e-14,\n 8.0519e-14, 3.7165e-13, 4.7356e-13, 1.6476e-12, 4.6340e-15, 2.6612e-13,\n 2.3999e-13, 3.4145e-14, 4.0753e-13, 5.5226e-14, 5.1746e-17, 2.5888e-13,\n 7.4470e-13, 9.3882e-14, 2.6284e-14, 8.5102e-16, 3.2487e-13, 1.4863e-12,\n 4.1658e-14, 7.6899e-18, 1.2162e-12, 3.6619e-12, 1.8058e-15, 2.7486e-14,\n 3.7182e-13, 1.4384e-12, 8.5156e-14, 3.9421e-13, 1.5742e-13, 9.9348e-14,\n 2.8481e-13, 9.0424e-13, 1.6645e-13, 3.5221e-13, 1.4620e-12, 2.1270e-13,\n 5.0556e-13, 1.4678e-12, 2.5481e-13, 2.2470e-12, 1.5978e-13, 8.9683e-14,\n 4.1140e-15, 1.4658e-13, 4.2637e-16, 2.8470e-12, 5.2591e-14, 5.3010e-13,\n 1.2784e-13, 2.0339e-19, 2.1474e-12, 9.1322e-14, 5.4884e-13, 5.7084e-13,\n 1.0204e-13, 1.4346e-12, 3.0807e-14, 5.9332e-14, 1.1302e-13, 1.3160e-12,\n 3.8694e-14, 3.4835e-14, 1.4362e-14, 1.0635e-14, 2.0885e-14, 5.4159e-13,\n 4.9020e-13, 1.0148e-13, 4.1753e-13, 1.0482e-12, 1.5820e-16, 1.1700e-12,\n 5.8744e-13, 6.9766e-14, 4.3740e-13, 1.4367e-13, 1.7049e-12, 2.1839e-13,\n 1.9322e-13, 8.4513e-18, 7.9936e-17, 9.2549e-15, 8.2217e-13, 5.2495e-12,\n 7.6978e-13, 1.6625e-13, 1.3169e-13, 5.0695e-13, 8.8731e-14, 6.3847e-15,\n 1.2859e-13, 3.5476e-13, 4.9860e-13, 5.0306e-14, 4.9765e-14, 1.7840e-13,\n 1.0624e-13, 1.7166e-12, 4.5170e-14, 1.4100e-13, 2.3720e-13, 4.9820e-14,\n 3.8613e-14, 3.2577e-13, 7.2764e-14, 3.0912e-13, 1.2234e-12, 9.0497e-14,\n 1.0735e-12, 1.3315e-13, 6.4559e-16, 1.3895e-13, 2.2231e-13, 6.5332e-14,\n 7.8609e-13, 3.2638e-13, 4.8157e-14, 1.0078e-13, 7.2713e-14, 2.0121e-13,\n 2.5741e-13, 5.2080e-14, 1.0821e-13, 1.3804e-13, 1.7281e-12, 2.8067e-17,\n 9.8368e-14, 8.0022e-13, 4.4564e-14, 1.5596e-12, 2.0903e-13, 1.8002e-13,\n 1.6267e-13, 4.6077e-14, 1.6593e-16, 8.1217e-12, 2.9756e-14, 4.3816e-15,\n 1.2125e-13, 5.2100e-13, 1.0751e-14, 3.5413e-12, 9.7532e-14, 1.9147e-13,\n 1.3591e-13, 4.1800e-13, 8.7702e-13, 1.2166e-14, 1.1424e-17, 2.2586e-13,\n 7.7325e-14, 2.1593e-13, 1.1210e-14, 8.3067e-13, 3.3918e-15, 1.4971e-12,\n 1.4122e-12, 2.8583e-16, 1.6428e-14, 1.2870e-13, 4.7653e-14, 8.1348e-13,\n 1.0519e-13, 8.6160e-13, 1.4974e-13, 6.8027e-16, 5.0443e-16, 1.9601e-14,\n 1.1814e-14, 2.6438e-13, 6.9014e-15, 4.4681e-15, 3.1308e-13, 1.8360e-13,\n 1.1289e-12, 1.8697e-14, 3.7191e-14, 3.5813e-13, 2.0016e-13, 1.0050e-13,\n 1.9547e-13, 2.2091e-16, 1.1187e-13, 4.0075e-13, 9.4461e-14, 1.1007e-16,\n 1.3960e-15, 4.3929e-13, 2.9405e-14, 2.0530e-13, 1.4211e-12, 6.9194e-15,\n 1.0806e-12, 5.0218e-14, 1.9134e-12, 5.4092e-14], device='cuda:0')" - }, - "33": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.7577e-12, 6.1971e-13, 4.9245e-13, 3.1996e-14, 1.0278e-13, 3.9959e-14,\n 1.4150e-13, 4.6338e-14, 6.9313e-14, 3.2239e-14, 1.2950e-12, 1.3913e-12,\n 1.6705e-13, 1.8515e-13, 3.9369e-13, 4.2617e-14, 8.6267e-13, 1.4677e-13,\n 6.1836e-13, 2.2321e-12, 3.4923e-16, 7.1170e-13, 1.9895e-13, 1.3201e-12,\n 1.2890e-13, 1.6086e-13, 1.5102e-12, 3.1891e-14, 1.1569e-13, 1.5294e-13,\n 4.8044e-13, 1.5595e-12, 4.6554e-15, 2.0530e-12, 1.4626e-12, 1.8106e-12,\n 3.2687e-14, 1.1257e-15, 9.9164e-13, 5.1449e-12, 2.0672e-12, 1.0240e-13,\n 9.3686e-14, 9.9329e-14, 3.7865e-13, 8.5550e-13, 7.5423e-13, 3.5234e-13,\n 4.1710e-14, 7.3527e-13, 5.4742e-15, 2.3126e-12, 9.2648e-13, 5.5095e-13,\n 1.5994e-14, 1.2764e-13, 5.4274e-13, 3.5143e-14, 4.6808e-13, 1.5701e-13,\n 7.7933e-14, 4.6286e-13, 7.1095e-13, 1.6331e-12, 6.3929e-15, 2.3926e-13,\n 4.3800e-13, 4.5636e-14, 7.5875e-13, 1.2977e-13, 1.2945e-18, 3.5568e-13,\n 9.3992e-13, 1.6129e-13, 4.8250e-14, 3.9114e-15, 2.6479e-13, 2.1133e-12,\n 3.0513e-14, 2.0957e-16, 9.8671e-13, 2.8911e-12, 2.4658e-15, 4.4106e-14,\n 4.1953e-13, 2.0584e-12, 1.2718e-13, 5.0021e-13, 2.4081e-13, 8.3387e-14,\n 4.8148e-13, 1.6673e-12, 3.7419e-13, 4.2523e-13, 1.8657e-12, 3.0693e-13,\n 5.7786e-13, 2.1910e-12, 4.9935e-13, 3.2592e-12, 3.4845e-13, 9.7009e-14,\n 8.7186e-15, 2.1386e-13, 2.5837e-16, 3.1203e-12, 5.4254e-14, 5.2080e-13,\n 2.3265e-13, 2.6942e-16, 2.3603e-12, 1.6862e-13, 9.8114e-13, 1.1277e-12,\n 1.4249e-13, 1.5155e-12, 5.2620e-14, 1.0247e-13, 2.5620e-13, 1.4000e-12,\n 9.6515e-14, 6.9548e-14, 2.5499e-14, 5.6286e-15, 2.5954e-14, 4.8096e-13,\n 1.1519e-12, 2.0324e-13, 6.2818e-13, 8.3791e-13, 8.5385e-16, 2.5542e-12,\n 7.1514e-13, 1.1948e-13, 8.6630e-13, 3.2268e-13, 2.1134e-12, 3.3396e-13,\n 3.5855e-13, 3.4416e-16, 7.4519e-16, 1.6344e-14, 1.0500e-12, 4.2750e-12,\n 9.7581e-13, 2.3124e-13, 2.5575e-13, 4.5446e-13, 2.4898e-13, 6.7929e-15,\n 1.3231e-13, 5.5246e-13, 6.1296e-13, 9.3232e-14, 1.1573e-13, 1.6148e-13,\n 1.6870e-13, 2.3078e-12, 8.5295e-14, 1.4532e-13, 3.5763e-13, 9.8289e-14,\n 7.8451e-14, 4.3930e-13, 1.5598e-13, 4.4599e-13, 2.2988e-12, 1.5618e-13,\n 1.0593e-12, 1.9522e-13, 3.5632e-15, 2.1112e-13, 4.8267e-13, 1.1688e-13,\n 8.9632e-13, 2.8772e-13, 8.2272e-14, 1.7797e-13, 7.3181e-14, 4.2758e-13,\n 3.1132e-13, 8.9254e-14, 1.2157e-13, 2.4205e-13, 3.1437e-12, 2.3225e-16,\n 1.8364e-13, 6.3231e-13, 1.0713e-13, 2.8990e-12, 4.1867e-13, 2.1226e-13,\n 2.1586e-13, 9.3842e-14, 1.5745e-16, 8.0082e-12, 6.5150e-14, 1.0871e-14,\n 2.8797e-13, 9.7498e-13, 1.8881e-14, 3.4718e-12, 1.9704e-13, 1.7500e-13,\n 3.2388e-13, 7.0633e-13, 1.0450e-12, 1.9788e-14, 5.4152e-16, 4.6527e-13,\n 8.1247e-14, 3.9923e-13, 2.6921e-14, 1.0443e-12, 3.8078e-15, 1.7044e-12,\n 2.0286e-12, 3.4973e-16, 3.3488e-14, 1.5006e-13, 3.7445e-14, 1.4594e-12,\n 1.7514e-13, 1.7354e-12, 2.6195e-13, 1.8028e-15, 4.1301e-16, 4.1383e-14,\n 1.9761e-14, 4.2373e-13, 1.5327e-14, 9.0956e-15, 4.1082e-13, 2.7108e-13,\n 1.5554e-12, 1.4129e-14, 6.0565e-14, 7.5242e-13, 2.5618e-13, 1.4528e-13,\n 4.4717e-13, 2.3067e-16, 2.1042e-13, 4.9628e-13, 1.7192e-13, 4.9985e-18,\n 1.9407e-15, 8.7192e-13, 4.1497e-14, 2.7630e-13, 1.6244e-12, 1.5057e-14,\n 9.0015e-13, 8.2719e-14, 2.4943e-12, 1.0565e-13], device='cuda:0')" - }, - "34": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.2649e-15, 2.2622e-14, 1.3801e-14, ..., 3.1235e-15, 1.7102e-14,\n 1.0277e-14],\n [4.4591e-16, 6.6709e-17, 7.2707e-16, ..., 1.9403e-15, 2.2556e-15,\n 8.0392e-17],\n [2.1735e-15, 9.5592e-15, 3.0141e-15, ..., 1.5424e-15, 9.2879e-15,\n 1.7772e-15],\n ...,\n [5.4388e-14, 1.2901e-13, 1.3370e-13, ..., 2.3757e-14, 2.0059e-13,\n 2.9031e-13],\n [3.6318e-13, 5.7443e-13, 8.8659e-13, ..., 1.9688e-13, 8.6748e-13,\n 1.5927e-12],\n [4.9828e-12, 1.1052e-11, 1.3207e-11, ..., 2.8116e-12, 1.6635e-11,\n 2.6099e-11]], device='cuda:0')" - }, - "35": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.2631e-13, 4.0594e-15, 3.6240e-14, 2.3848e-15, 1.5968e-14, 2.3607e-14,\n 3.4859e-15, 3.0155e-15, 1.4690e-16, 2.4612e-14, 1.9963e-14, 3.4526e-14,\n 8.5753e-16, 7.2596e-16, 8.8327e-15, 5.7931e-15, 1.9110e-15, 7.5636e-14,\n 2.5812e-14, 3.3060e-16, 1.3016e-15, 1.0562e-14, 2.4387e-14, 9.4070e-16,\n 4.5561e-14, 5.3433e-15, 7.5657e-15, 7.7143e-16, 1.7583e-15, 3.5967e-15,\n 3.5898e-15, 5.9707e-14, 8.6527e-15, 4.6317e-16, 1.7472e-17, 1.7851e-14,\n 6.3522e-15, 2.5591e-15, 7.6520e-17, 3.4120e-18, 1.0367e-14, 1.2973e-15,\n 6.3346e-15, 2.9910e-15, 5.1167e-16, 5.8249e-15, 7.3757e-15, 7.4833e-15,\n 3.9512e-15, 1.6790e-14, 6.5156e-15, 1.8532e-15, 2.4181e-15, 1.4240e-15,\n 5.3418e-17, 7.2629e-17, 1.6623e-15, 1.3343e-16, 5.3485e-15, 2.2747e-16,\n 1.8890e-14, 6.3000e-15, 1.4867e-17, 1.1461e-14, 2.2165e-14, 7.5494e-15,\n 1.1319e-14, 2.2413e-15, 2.8705e-16, 1.7694e-16, 1.1597e-14, 1.2251e-15,\n 9.7479e-17, 4.7893e-15, 2.7906e-15, 7.5221e-15, 5.5326e-15, 1.4328e-15,\n 2.3729e-20, 5.1959e-15, 3.9571e-15, 3.4829e-14, 5.3297e-16, 4.9241e-15,\n 6.6986e-16, 3.3798e-15, 1.9702e-15, 5.4517e-18, 3.3524e-15, 4.3510e-16,\n 1.5562e-17, 2.5249e-15, 2.2578e-15, 6.4345e-14, 2.1200e-14, 1.4743e-14,\n 1.6062e-15, 7.5791e-14, 5.2353e-15, 9.7090e-15, 9.0237e-16, 1.0380e-15,\n 5.1126e-16, 5.5527e-15, 6.7494e-17, 1.9364e-14, 3.1798e-14, 3.5345e-15,\n 4.0134e-15, 6.0336e-16, 6.1033e-15, 6.9308e-14, 5.2429e-16, 7.9402e-14,\n 1.1014e-14, 4.9065e-14, 4.8149e-21, 4.5008e-14, 3.2786e-14, 1.6226e-14,\n 6.2097e-14, 2.7781e-16, 3.2419e-14, 1.2224e-16, 1.7693e-14, 2.9470e-14,\n 2.6708e-14, 1.9345e-14, 3.5708e-14, 5.4036e-14, 6.2144e-15, 1.2120e-14,\n 3.6502e-16, 2.0828e-14, 5.9839e-15, 5.4732e-15, 1.2655e-14, 1.7135e-14,\n 1.4397e-17, 3.3431e-15, 4.2031e-14, 1.5468e-16, 8.8404e-15, 3.2617e-15,\n 2.0418e-14, 8.4936e-14, 1.6889e-14, 2.1690e-16, 5.3567e-14, 8.1234e-16,\n 1.7276e-15, 1.4035e-14, 5.8276e-14, 4.7899e-15, 2.0579e-14, 3.0874e-14,\n 2.2735e-16, 5.7333e-17, 2.1016e-15, 1.0332e-14, 9.8572e-15, 2.7338e-15,\n 4.1153e-14, 2.1287e-14, 2.3704e-15, 2.2872e-17, 4.5675e-16, 6.4709e-17,\n 1.2921e-15, 5.8186e-15, 1.1766e-15, 7.1376e-16, 4.9275e-15, 5.9691e-15,\n 5.4333e-16, 1.5244e-16, 5.7119e-15, 1.3188e-15, 8.1901e-16, 1.1673e-16,\n 9.5170e-15, 2.7682e-17, 2.0584e-15, 8.8859e-15, 2.5298e-15, 1.1796e-15,\n 8.8149e-15, 2.9593e-15, 2.9695e-14, 9.6950e-16, 8.2805e-15, 6.8252e-16,\n 7.5001e-15, 7.4476e-14, 1.0512e-14, 2.2400e-14, 1.1458e-14, 3.3491e-14,\n 3.8306e-18, 4.0671e-16, 1.9481e-15, 2.8925e-15, 4.9866e-15, 4.1079e-14,\n 1.0313e-15, 3.0878e-15, 2.3091e-16, 3.4778e-15, 4.0060e-16, 9.3026e-15,\n 1.5919e-18, 6.2555e-15, 4.2264e-16, 4.6075e-15, 4.2393e-14, 2.1491e-14,\n 3.1571e-14, 8.5381e-15, 1.0804e-14, 3.3612e-14, 8.5358e-15, 1.6300e-16,\n 4.3216e-15, 3.8441e-14, 2.4589e-16, 5.0750e-15, 3.0731e-14, 1.0926e-14,\n 9.6384e-15, 9.3250e-15, 8.0706e-15, 2.4030e-16, 3.5445e-15, 1.8724e-15,\n 8.6861e-15, 2.3840e-15, 6.0032e-17, 5.5072e-16, 2.2239e-15, 2.3420e-16,\n 2.1544e-14, 5.9421e-15, 9.0968e-16, 7.2012e-15, 2.2056e-16, 8.2357e-15,\n 2.2121e-14, 2.8383e-15, 9.4047e-15, 2.9317e-15, 2.2366e-15, 2.3001e-14,\n 8.9333e-15, 2.5452e-14, 2.3809e-14, 5.1609e-15, 8.1878e-31, 3.6195e-32,\n 1.4767e-29, 1.0382e-30, 1.6533e-29, 6.4977e-31, 4.2622e-31, 9.0290e-30,\n 5.9168e-30, 8.6122e-30, 2.5999e-31, 3.2203e-31, 9.0316e-30, 8.1169e-30,\n 4.4747e-31, 9.8602e-31, 3.6324e-30, 2.3142e-33, 2.2962e-29, 3.6346e-30,\n 1.1960e-30, 8.1696e-30, 3.6334e-30, 3.7299e-30, 2.7272e-30, 8.7108e-31,\n 2.1402e-30, 2.4493e-31, 1.8507e-29, 3.8037e-30, 3.3042e-30, 2.9429e-30,\n 1.5929e-29, 4.8252e-30, 5.2050e-31, 1.8444e-30, 2.9310e-30, 4.1822e-32,\n 4.0258e-31, 1.2628e-31, 8.6942e-33, 3.8857e-31, 1.9242e-30, 2.7623e-32,\n 3.1558e-30, 9.2778e-31, 4.9027e-30, 2.5332e-32, 4.0098e-30, 3.4543e-32,\n 1.0571e-30, 1.2074e-34, 1.4845e-30, 7.0667e-30, 2.0505e-30, 3.9355e-31,\n 1.5352e-29, 1.9751e-30, 2.8528e-29, 2.2955e-30, 6.5948e-32, 3.6683e-31,\n 3.3301e-30, 1.9744e-30, 5.0870e-31, 2.6512e-30, 9.7029e-30, 1.2714e-29,\n 6.7710e-30, 2.7824e-31, 2.3690e-29, 1.7268e-29, 1.8147e-29, 3.0289e-29,\n 6.2663e-29, 1.9704e-30, 3.9050e-29, 8.3047e-31, 2.8761e-30, 5.7355e-31,\n 8.4524e-30, 3.3481e-29, 2.0004e-29, 1.3034e-29, 3.4679e-30, 6.2723e-30,\n 3.0524e-29, 6.8649e-29, 3.8445e-30, 1.0530e-29, 6.0613e-30, 3.1740e-29,\n 3.3303e-30, 1.9460e-31, 2.2084e-30, 2.1084e-30, 1.9608e-29, 6.6662e-32,\n 3.9978e-30, 2.8909e-32, 2.2730e-30, 1.0733e-29, 1.5779e-29, 1.3523e-30,\n 3.5674e-31, 1.5583e-30, 2.6963e-30, 1.5463e-31, 9.4981e-32, 4.4399e-31,\n 5.1325e-31, 8.7764e-32, 2.0314e-30, 2.5390e-30, 3.5915e-30, 3.2574e-30,\n 2.3166e-30, 3.0591e-32, 1.0333e-33, 5.9528e-32, 4.2162e-32, 4.3798e-30,\n 6.0725e-30, 4.4447e-30, 2.0986e-30, 5.6940e-30, 2.8222e-32, 9.5421e-30,\n 1.0159e-31, 1.1669e-30, 4.7531e-31, 1.5146e-29, 6.0539e-30, 9.4656e-30,\n 7.8749e-30, 1.6680e-31, 1.4836e-31, 1.7513e-30, 3.5062e-30, 1.0852e-29,\n 1.1471e-30, 5.4421e-31, 3.1226e-31, 4.7297e-32, 1.7803e-29, 1.6745e-30,\n 5.0325e-30, 3.7918e-31, 5.9622e-31, 3.8564e-29, 6.5556e-30, 7.4539e-32,\n 5.3204e-30, 6.3155e-30, 1.8331e-29, 1.0897e-29, 1.3957e-29, 6.6769e-30,\n 1.0076e-31, 7.8574e-29, 1.7802e-29, 2.1833e-31, 2.1534e-29, 5.4704e-30,\n 4.7610e-30, 2.8037e-31, 3.2323e-30, 1.5408e-30, 1.6049e-30, 6.3081e-30,\n 5.0450e-30, 6.0036e-34, 4.6809e-30, 4.7313e-31, 7.2843e-32, 1.1133e-30,\n 1.1399e-29, 8.0669e-30, 4.1709e-32, 6.3537e-30, 1.8444e-30, 1.3247e-30,\n 2.2199e-29, 2.2235e-29, 2.3534e-30, 1.2094e-31, 2.2720e-29, 2.1051e-29,\n 9.8158e-30, 1.2485e-30, 1.2740e-30, 5.0596e-30, 5.9334e-30, 6.6448e-30,\n 3.2698e-29, 2.3016e-30, 7.2097e-31, 1.1383e-29, 2.1972e-30, 1.0981e-29,\n 1.7331e-30, 1.8786e-30, 1.7816e-30, 8.4864e-30, 1.6558e-29, 1.1941e-29,\n 7.9540e-32, 3.0160e-31, 3.8757e-30, 1.6013e-33, 1.1754e-30, 7.3748e-30,\n 7.8714e-31, 2.2840e-30, 3.3698e-30, 6.7259e-30, 2.1014e-30, 3.3435e-30,\n 6.9201e-30, 3.7871e-30, 3.2702e-31, 7.5822e-31, 2.4154e-31, 4.3897e-31,\n 2.9835e-30, 1.9785e-30, 5.5806e-30, 1.9166e-29, 1.2085e-29, 8.2931e-30,\n 5.9353e-30, 7.0886e-30, 2.5602e-31, 1.4684e-29, 4.4440e-31, 6.6968e-31,\n 3.1590e-30, 1.8318e-30, 3.3375e-30, 6.7422e-31, 5.1726e-31, 1.0383e-34,\n 3.1333e-31, 2.5928e-31, 4.8230e-30, 8.5740e-30, 6.6701e-31, 5.7627e-30,\n 2.5355e-30, 7.5779e-30, 6.2616e-32, 2.1279e-30, 6.0649e-31, 8.9777e-30,\n 1.3923e-30, 8.0282e-31, 1.6661e-11, 2.6081e-10, 4.5909e-11, 4.3781e-11,\n 3.4640e-10, 3.0779e-10, 4.1985e-12, 2.2378e-12, 5.6103e-12, 5.6152e-11,\n 7.4375e-14, 1.4970e-11, 2.7175e-10, 9.1408e-12, 1.5041e-11, 6.8869e-11,\n 1.0110e-10, 2.0768e-11, 1.8512e-10, 6.9845e-11, 7.7466e-14, 2.1755e-13,\n 3.7570e-11, 2.4751e-10, 2.0852e-10, 7.2644e-13, 9.3923e-11, 5.0516e-12,\n 2.6427e-11, 5.3615e-12, 1.1688e-10, 9.7595e-11, 3.7827e-10, 4.0604e-12,\n 7.7389e-11, 1.1109e-10, 6.5534e-11, 6.6328e-11, 5.8498e-11, 2.3121e-12,\n 3.5824e-10, 1.1732e-12, 5.7953e-12, 1.0204e-11, 1.7349e-10, 1.4836e-11,\n 3.2443e-11, 1.3538e-11, 2.6040e-12, 2.4796e-11, 4.9703e-11, 8.8783e-12,\n 1.4498e-10, 1.3683e-10, 1.4746e-11, 1.0334e-11, 1.5990e-11, 1.1589e-12,\n 9.3628e-11, 3.5517e-11, 7.5706e-11, 7.4925e-11, 7.8886e-12, 2.0428e-12,\n 4.2175e-15, 2.0561e-13, 6.3509e-13, 1.8994e-12, 2.7392e-11, 1.9521e-12,\n 2.4393e-11, 6.3612e-11, 9.1550e-11, 1.4593e-11, 3.3070e-11, 4.1755e-13,\n 1.8649e-11, 5.5225e-10, 2.8976e-12, 2.7840e-11, 3.7377e-11, 2.5562e-10,\n 9.3815e-14, 7.2501e-11, 2.0890e-11, 1.0090e-10, 6.4416e-11, 7.4802e-11,\n 6.1121e-15, 2.3859e-10, 1.6039e-10, 1.4601e-13, 7.1831e-12, 1.7278e-10,\n 8.0253e-12, 3.3519e-11, 4.7374e-13, 1.1905e-10, 2.2923e-11, 1.1141e-10,\n 5.8071e-11, 2.0137e-10, 2.5362e-12, 4.8692e-11, 9.9831e-11, 3.6379e-10,\n 1.6087e-10, 2.8393e-13, 6.0995e-12, 6.1080e-11, 3.8177e-11, 7.0502e-12,\n 1.0756e-11, 1.2716e-11, 3.7711e-11, 5.6087e-12, 4.7518e-11, 9.4305e-13,\n 1.4595e-10, 1.8447e-12, 1.4301e-10, 1.6383e-10, 5.0478e-11, 1.4876e-12,\n 1.7251e-11, 2.0196e-12, 4.7228e-11, 1.1193e-10, 1.1177e-11, 2.8994e-12,\n 1.1638e-10, 5.7746e-12, 6.7969e-11, 3.8298e-11, 1.9730e-11, 1.3438e-13,\n 2.7417e-12, 2.9039e-10, 3.7422e-12, 2.0147e-10, 3.4787e-14, 9.9721e-11,\n 3.7785e-11, 3.7744e-11, 1.1140e-11, 1.3573e-10, 5.1058e-11, 1.9830e-11,\n 8.2309e-11, 2.5689e-13, 1.8707e-10, 1.1783e-11, 1.4507e-12, 1.9452e-11,\n 3.8460e-11, 1.5085e-10, 2.9069e-11, 6.9095e-11, 1.0717e-12, 1.7379e-10,\n 5.0812e-11, 5.1928e-11, 4.0750e-11, 3.1870e-10, 2.9025e-13, 9.0167e-12,\n 2.7654e-10, 2.1584e-11, 2.5498e-11, 1.3265e-12, 4.4184e-11, 1.1431e-10,\n 9.9330e-11, 1.5106e-10, 3.6655e-12, 4.4959e-11, 2.6237e-11, 2.3478e-11,\n 2.3995e-11, 1.5115e-11, 3.8247e-11, 9.5953e-11, 7.3696e-12, 1.6362e-10,\n 1.8273e-10, 4.4401e-11, 4.3608e-11, 4.1505e-11, 3.0019e-13, 8.8815e-12,\n 9.0454e-12, 1.9508e-11, 8.8248e-11, 1.1178e-11, 4.8635e-12, 6.7274e-11,\n 1.8565e-14, 1.6355e-10, 1.3829e-12, 1.7920e-10, 7.6237e-11, 2.1430e-10,\n 5.5316e-12, 3.6655e-11, 1.4394e-10, 5.4162e-11, 5.2585e-12, 1.3893e-10,\n 1.0585e-11, 6.7341e-11, 3.4208e-13, 1.0375e-11, 3.0304e-10, 6.6113e-11,\n 3.0415e-10, 3.4222e-11, 4.8863e-11, 1.2103e-13, 1.6901e-11, 4.2620e-11,\n 3.9191e-13, 2.8689e-12, 3.6817e-11, 1.1788e-11, 1.6937e-11, 9.6476e-12,\n 1.0931e-10, 1.0949e-11, 2.7650e-12, 9.1272e-13, 3.0223e-13, 5.2636e-13,\n 6.5952e-12, 9.0397e-11, 6.3066e-12, 1.2895e-11, 1.6117e-12, 1.5390e-10,\n 7.5469e-11, 1.4634e-10, 3.3298e-10, 6.8964e-12, 1.8023e-10, 7.3553e-11,\n 1.6333e-11, 6.1176e-10, 1.3202e-11, 6.2954e-11, 2.1108e-11, 1.5147e-11,\n 7.8794e-12, 6.7685e-11, 3.6462e-11, 1.7611e-12, 9.1208e-12, 1.5673e-10],\n device='cuda:0')" - }, - "36": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.5807e-12, 3.1163e-15, 3.1163e-11, ..., 8.0704e-14, 8.1246e-13,\n 2.2458e-12],\n [7.5644e-12, 7.8836e-16, 4.4202e-11, ..., 1.2651e-13, 1.2005e-12,\n 3.1706e-12],\n [4.2715e-12, 5.4334e-16, 2.4982e-11, ..., 6.6008e-14, 7.1100e-13,\n 1.6154e-12],\n ...,\n [3.2322e-12, 4.6134e-16, 1.9602e-11, ..., 3.0318e-14, 4.8906e-13,\n 1.3002e-12],\n [4.2462e-13, 1.1357e-15, 2.3390e-12, ..., 8.1350e-15, 4.5976e-14,\n 1.7628e-13],\n [2.2467e-12, 2.7073e-15, 1.3184e-11, ..., 3.8369e-14, 3.0704e-13,\n 9.0220e-13]], device='cuda:0')" - }, - "37": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.0189e-10, 2.8087e-10, 1.5021e-10, 7.9748e-10, 2.4203e-12, 2.3130e-11,\n 4.2359e-10, 7.9538e-10, 6.4333e-10, 3.2036e-10, 1.3886e-11, 6.6892e-10,\n 3.3046e-11, 1.6480e-12, 8.6986e-11, 8.3111e-13, 3.6333e-10, 6.8804e-10,\n 1.9242e-10, 4.1108e-11, 7.4919e-11, 5.5141e-11, 2.6249e-10, 3.3649e-12,\n 8.7466e-12, 7.4219e-11, 7.6528e-12, 1.1189e-10, 5.3023e-11, 1.6647e-10,\n 3.6401e-13, 8.9329e-11, 9.0389e-11, 3.1481e-11, 4.0530e-12, 1.7722e-10,\n 6.0682e-12, 1.0190e-09, 1.7088e-10, 1.2161e-11, 3.8389e-10, 2.5007e-11,\n 7.3479e-11, 6.6444e-12, 1.3408e-10, 2.2995e-10, 2.8874e-12, 3.0039e-13,\n 1.8863e-10, 1.7139e-10, 2.4483e-10, 1.0204e-11, 1.0641e-10, 3.9882e-10,\n 5.0949e-12, 4.4564e-11, 4.0312e-10, 1.5993e-12, 1.8499e-11, 3.3212e-11,\n 1.3942e-10, 1.0912e-10, 2.2449e-11, 2.1330e-10, 1.3410e-12, 3.3622e-10,\n 4.5794e-11, 8.7550e-11, 9.8585e-11, 3.6099e-11, 1.6959e-11, 3.3225e-10,\n 4.0709e-10, 8.5289e-10, 5.5038e-12, 6.6589e-13, 3.3585e-11, 2.8675e-10,\n 4.3452e-12, 8.2467e-13, 6.7407e-11, 4.2742e-12, 1.7090e-10, 2.9357e-11,\n 3.2304e-11, 1.6698e-11, 3.2628e-11, 4.6910e-11, 4.8359e-11, 5.6418e-10,\n 4.9167e-13, 3.3542e-10, 1.3909e-10, 3.7373e-10, 9.1080e-10, 5.8052e-10,\n 5.8952e-11, 7.9478e-10, 2.8570e-11, 1.0045e-11, 3.5376e-10, 1.2161e-13,\n 1.5422e-10, 9.3169e-11, 8.7830e-12, 2.4650e-10, 1.4815e-13, 1.4517e-09,\n 3.0076e-10, 3.1201e-10, 1.1027e-12, 9.0251e-12, 2.7949e-11, 2.7881e-10,\n 8.7889e-11, 2.3429e-10, 4.9545e-13, 2.8959e-10, 3.6414e-10, 1.4598e-10,\n 4.8685e-10, 1.9233e-10, 1.5033e-10, 2.1544e-11, 1.0185e-09, 5.3410e-10,\n 1.3575e-10, 5.6102e-11, 1.1631e-10, 4.1397e-13, 1.2112e-09, 1.1771e-10,\n 1.5291e-12, 4.5155e-13, 7.8669e-10, 3.0346e-10, 4.5693e-11, 2.0079e-11,\n 1.8568e-12, 6.0796e-13, 5.0153e-10, 1.0432e-12, 7.5253e-10, 1.4643e-11,\n 1.6988e-11, 3.6013e-11, 2.9966e-10, 2.5135e-10, 7.2997e-11, 8.7904e-13,\n 1.4473e-11, 4.6182e-12, 5.4803e-13, 4.9550e-10, 4.3843e-11, 6.5004e-10,\n 1.5597e-11, 3.6440e-10, 4.9303e-11, 4.9382e-10, 1.2909e-10, 2.1012e-11,\n 2.2168e-10, 1.9286e-10, 2.6757e-10, 5.8970e-10, 1.2390e-09, 5.8538e-13,\n 2.3247e-12, 1.1274e-10, 6.5194e-11, 9.9256e-13, 3.9980e-10, 5.1461e-11,\n 5.7801e-10, 7.2081e-13, 2.7012e-10, 3.0142e-11, 1.3005e-10, 1.3051e-10,\n 1.3915e-09, 1.3854e-10, 8.9462e-11, 1.3300e-13, 2.6066e-11, 4.0403e-10,\n 1.6503e-12, 1.4761e-10, 3.7606e-10, 3.3507e-11, 3.8422e-10, 3.3410e-11,\n 9.6858e-11, 8.8657e-11, 5.8583e-10, 3.1349e-10, 3.6885e-10, 2.7077e-12,\n 1.1750e-10, 7.5254e-10, 8.0460e-13, 6.9801e-11, 3.5981e-11, 5.2763e-10,\n 5.1549e-10, 7.3731e-11, 2.4388e-10, 5.0310e-11, 3.6037e-12, 1.3746e-12,\n 1.6325e-12, 8.2096e-11, 4.2300e-10, 3.7650e-10, 1.3586e-10, 4.7262e-10,\n 5.7405e-11, 2.8396e-10, 2.0686e-12, 7.4512e-12, 1.4228e-13, 4.0105e-11,\n 4.4486e-10, 3.3152e-11, 1.1180e-11, 2.2896e-10, 6.4455e-11, 2.0126e-10,\n 1.4312e-11, 2.8390e-10, 3.9732e-12, 1.5535e-10, 4.8288e-10, 2.0368e-12,\n 4.0209e-10, 9.0223e-12, 9.5474e-11, 3.5837e-13, 7.5810e-10, 1.5280e-11,\n 6.9490e-11, 1.5687e-11, 1.8353e-11, 1.0750e-10, 1.5820e-10, 1.0963e-10,\n 3.9630e-10, 1.5070e-10, 6.4156e-10, 6.1645e-11, 3.3367e-10, 7.2335e-15,\n 2.6300e-11, 1.2391e-10, 1.6018e-11, 8.5260e-11], device='cuda:0')" - }, - "38": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.4126e-09, 3.7391e-10, 5.3522e-10, ..., 6.8171e-11, 4.1611e-10,\n 7.6838e-10],\n [1.4585e-10, 3.7911e-11, 5.6758e-11, ..., 7.5584e-12, 4.2012e-11,\n 7.6175e-11],\n [1.5889e-10, 4.0964e-11, 5.9138e-11, ..., 7.2778e-12, 4.7161e-11,\n 8.8100e-11],\n [1.6647e-10, 4.5960e-11, 6.2582e-11, ..., 7.8939e-12, 4.9697e-11,\n 9.2270e-11]], device='cuda:0')" - }, - "39": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.5422e-08, 3.6498e-09, 3.9492e-09, 4.2186e-09], device='cuda:0')" - }, - "40": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.4126e-09, 3.7391e-10, 5.3522e-10, ..., 6.8171e-11, 4.1611e-10,\n 7.6838e-10],\n [1.4585e-10, 3.7911e-11, 5.6758e-11, ..., 7.5584e-12, 4.2012e-11,\n 7.6175e-11],\n [1.5889e-10, 4.0964e-11, 5.9138e-11, ..., 7.2778e-12, 4.7161e-11,\n 8.8100e-11],\n [1.6647e-10, 4.5960e-11, 6.2582e-11, ..., 7.8939e-12, 4.9697e-11,\n 9.2270e-11]], device='cuda:0')" - }, - "41": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.5422e-08, 3.6498e-09, 3.9492e-09, 4.2186e-09], device='cuda:0')" - }, - "42": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.4126e-09, 3.7391e-10, 5.3522e-10, ..., 6.8171e-11, 4.1611e-10,\n 7.6838e-10],\n [1.4585e-10, 3.7911e-11, 5.6758e-11, ..., 7.5584e-12, 4.2012e-11,\n 7.6175e-11],\n [1.5889e-10, 4.0964e-11, 5.9138e-11, ..., 7.2778e-12, 4.7161e-11,\n 8.8100e-11],\n [1.6647e-10, 4.5960e-11, 6.2582e-11, ..., 7.8939e-12, 4.9697e-11,\n 9.2270e-11]], device='cuda:0')" - }, - "43": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.5422e-08, 3.6498e-09, 3.9492e-09, 4.2186e-09], device='cuda:0')" - }, - "8": { - "step": "tensor(7512.)", - "exp_avg": "tensor([[ 2.2368e-08, -3.3318e-07, 1.9725e-11, ..., 5.9028e-07,\n 3.6494e-07, 2.0068e-07],\n [-2.3866e-06, -8.7858e-07, -9.9789e-23, ..., 8.3304e-07,\n -6.9993e-07, 2.0753e-07],\n [-4.7618e-07, -2.8098e-07, -1.7814e-07, ..., 8.2335e-08,\n -4.4128e-07, -8.9045e-09],\n ...,\n [ 9.8957e-08, -7.1507e-07, -2.4960e-08, ..., -6.9307e-08,\n 2.5854e-06, 4.0851e-07],\n [ 5.6256e-07, 4.7147e-07, 1.7591e-23, ..., 7.4376e-07,\n -3.6477e-07, -1.7580e-09],\n [ 4.8474e-07, 2.4108e-07, 5.6052e-45, ..., 8.5858e-07,\n -3.4696e-08, 2.5221e-10]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.1610e-11, 9.9389e-12, 1.1435e-16, ..., 3.6274e-11, 1.3860e-10,\n 6.5016e-11],\n [4.2252e-11, 1.9007e-11, 3.9843e-14, ..., 1.2941e-11, 4.6148e-10,\n 2.0045e-11],\n [8.1329e-11, 8.4552e-12, 2.8951e-14, ..., 1.0305e-11, 1.2370e-10,\n 4.5318e-11],\n ...,\n [3.7472e-11, 6.6699e-11, 1.2694e-13, ..., 1.5775e-11, 2.4866e-10,\n 7.3626e-13],\n [3.2204e-11, 5.2181e-11, 8.6586e-13, ..., 3.2685e-11, 8.1685e-11,\n 6.2884e-12],\n [1.2286e-10, 1.7833e-11, 1.1910e-19, ..., 5.0657e-11, 1.0711e-10,\n 7.4538e-13]], device='cuda:0')" - }, - "9": { - "step": "tensor(7512.)", - "exp_avg": "tensor([-2.3614e-05, -1.8106e-05, -8.6463e-06, ..., -1.4655e-06,\n -6.3257e-06, 3.6434e-06], device='cuda:0')", - "exp_avg_sq": "tensor([5.6055e-09, 6.6771e-09, 4.4235e-09, ..., 7.2016e-09, 4.3843e-09,\n 9.1469e-09], device='cuda:0')" - }, - "10": { - "step": "tensor(7512.)", - "exp_avg": "tensor([[-3.6469e-08, 9.9373e-08, 6.3475e-07, ..., 1.1707e-07,\n 4.4768e-07, 3.9645e-07],\n [ 3.0016e-08, -1.0191e-08, 3.6579e-07, ..., -3.9553e-07,\n 2.4955e-07, -3.4551e-07],\n [-9.5337e-08, -8.2314e-07, 2.3396e-07, ..., -3.8094e-07,\n -2.2527e-07, -3.3627e-07],\n ...,\n [-4.1957e-07, 1.9134e-07, 5.7351e-08, ..., -1.4164e-06,\n 3.7646e-07, -1.4726e-07],\n [ 2.2964e-07, 4.0105e-07, -5.2322e-07, ..., -4.8190e-07,\n 2.3941e-07, 5.4318e-07],\n [-2.1060e-07, 2.8306e-07, -2.1044e-07, ..., -2.9558e-08,\n -1.1427e-08, 4.0960e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.6737e-12, 2.2872e-12, 4.1657e-12, ..., 2.1093e-12, 2.2613e-12,\n 3.6693e-12],\n [2.8209e-12, 6.7091e-12, 3.9439e-12, ..., 5.2986e-12, 3.2199e-12,\n 3.8366e-12],\n [3.9917e-12, 8.9647e-12, 4.6885e-12, ..., 5.0230e-12, 4.0036e-12,\n 6.9466e-12],\n ...,\n [4.8271e-12, 8.2693e-12, 4.8958e-12, ..., 5.7128e-12, 3.9532e-12,\n 7.9045e-12],\n [3.9726e-12, 6.8854e-12, 3.8570e-12, ..., 6.5696e-12, 2.5307e-12,\n 4.4924e-12],\n [2.6932e-12, 8.5942e-12, 5.5215e-12, ..., 1.1185e-11, 4.3038e-12,\n 3.1649e-12]], device='cuda:0')" - }, - "11": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[-1.5677e-07, 5.8510e-07, 9.9366e-09, ..., 8.6319e-07,\n -4.8264e-07, -2.2431e-09],\n [ 1.4435e-07, 4.2155e-08, -6.5393e-07, ..., 3.1514e-08,\n -5.5564e-07, 6.9825e-06],\n [ 8.4818e-08, 5.1287e-08, 1.8217e-44, ..., 3.8034e-08,\n -8.4645e-07, 8.1740e-10],\n ...,\n [ 1.7449e-06, 3.0198e-08, -4.9696e-10, ..., -1.0802e-06,\n -1.3551e-06, 4.0876e-07],\n [ 1.2716e-07, 6.4997e-07, -4.3724e-31, ..., 2.4805e-06,\n -4.7689e-06, 7.0099e-10],\n [ 1.7423e-07, -1.9496e-07, -2.6480e-08, ..., -2.8856e-07,\n 1.7486e-07, -4.7179e-08]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3030e-11, 3.1395e-11, 1.8146e-14, ..., 1.8023e-10, 4.2274e-11,\n 9.7998e-12],\n [5.4443e-11, 1.8977e-12, 1.8210e-13, ..., 2.3087e-11, 8.7353e-11,\n 8.7627e-11],\n [2.6727e-11, 1.1677e-11, 2.6510e-17, ..., 3.0938e-12, 8.6429e-11,\n 5.1166e-12],\n ...,\n [1.6189e-10, 4.7486e-12, 2.0782e-17, ..., 4.1196e-12, 1.7263e-11,\n 9.6943e-12],\n [1.9639e-11, 2.0184e-11, 6.5467e-20, ..., 1.8952e-10, 8.3961e-10,\n 4.6515e-12],\n [1.1977e-11, 5.0523e-11, 7.7101e-13, ..., 3.3288e-10, 3.9035e-11,\n 2.9287e-12]], device='cuda:0')" - }, - "12": { - "step": "tensor(6260.)", - "exp_avg": "tensor([ 7.5028e-06, 8.8002e-06, -2.4455e-05, ..., 5.4578e-06,\n -2.2109e-05, 6.0244e-07], device='cuda:0')", - "exp_avg_sq": "tensor([3.5464e-09, 3.4345e-09, 5.5166e-09, ..., 4.8840e-09, 7.3741e-09,\n 4.7979e-09], device='cuda:0')" - }, - "13": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[-1.1752e-07, 5.3074e-08, -7.5518e-07, ..., -3.1240e-07,\n -3.0108e-08, -1.1819e-07],\n [-1.1535e-06, 2.0434e-07, -6.9877e-07, ..., 3.0106e-07,\n 2.3966e-09, 4.1238e-08],\n [ 5.3194e-07, 1.9694e-07, 8.9798e-08, ..., 5.2648e-07,\n 5.4877e-08, -1.4747e-08],\n ...,\n [-5.1821e-07, -7.0825e-08, -2.9462e-08, ..., 5.3486e-08,\n 1.4980e-07, 2.9555e-07],\n [-4.2527e-07, 2.4454e-07, -1.1012e-07, ..., 4.3838e-07,\n -1.5313e-07, -2.1933e-07],\n [-3.9213e-07, 1.6864e-07, -1.0604e-07, ..., 1.0937e-06,\n 2.6105e-07, 2.1883e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.8964e-12, 1.1537e-12, 1.2121e-11, ..., 1.4576e-12, 1.4670e-12,\n 1.3625e-12],\n [2.4471e-12, 1.7454e-12, 1.8229e-11, ..., 3.1542e-12, 1.4255e-12,\n 1.8486e-12],\n [3.4317e-12, 1.5739e-12, 1.7840e-11, ..., 9.8822e-12, 2.4846e-12,\n 2.0118e-12],\n ...,\n [4.0600e-12, 2.5229e-12, 5.0981e-12, ..., 2.3195e-12, 2.5424e-12,\n 2.3444e-12],\n [3.6121e-12, 3.8243e-12, 4.4249e-12, ..., 7.6758e-12, 1.8224e-12,\n 2.3512e-12],\n [3.2576e-12, 2.5433e-12, 2.1328e-12, ..., 3.9090e-12, 1.7917e-12,\n 2.7222e-12]], device='cuda:0')" + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.8383e-05, -1.1615e-05, 4.0389e-05, ..., -1.3697e-14,\n 1.0380e-05, 4.2261e-05],\n [ 1.4788e-04, -5.2814e-05, -3.6503e-05, ..., -2.3298e-15,\n -2.8277e-07, 1.0042e-04],\n [-8.2803e-06, 1.6760e-05, -1.1653e-05, ..., -3.3608e-14,\n -1.1686e-05, -4.6015e-05],\n ...,\n [-5.1200e-05, -1.8818e-05, 6.5683e-05, ..., -4.6589e-14,\n -1.0937e-05, -2.0980e-05],\n [-7.7796e-05, 3.0357e-05, -1.1169e-04, ..., -1.1776e-14,\n -2.7905e-05, 2.9225e-05],\n [-4.8278e-05, 5.2970e-05, -3.7701e-05, ..., 4.5509e-15,\n -5.1824e-06, 2.2141e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3002e-08, 1.6401e-08, 1.4088e-08, ..., 9.9086e-11, 3.0549e-10,\n 2.1370e-08],\n [4.2202e-08, 5.6608e-08, 5.1256e-08, ..., 1.4635e-11, 3.1188e-10,\n 5.3633e-08],\n [2.5887e-08, 3.9902e-08, 3.0881e-08, ..., 2.0723e-10, 8.7301e-10,\n 2.5643e-08],\n ...,\n [1.4298e-07, 3.6782e-08, 4.3443e-08, ..., 1.6855e-10, 2.2267e-09,\n 3.4967e-08],\n [4.6970e-08, 5.9847e-08, 6.4003e-08, ..., 9.9030e-11, 4.1500e-09,\n 4.1659e-08],\n [4.1386e-08, 5.1432e-08, 6.1879e-08, ..., 9.0746e-10, 1.9308e-10,\n 4.4062e-08]], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.0009558195366224509, + "lr": 0.00975530705321762, "name": "shared", "betas": [ 0.9, @@ -242,8 +52,8 @@ ] }, { - "lr": 0.0009558195366224509, - "name": "scale_384", + "lr": 0.00975530705321762, + "name": "scale_256", "betas": [ 0.9, 0.999 @@ -265,8 +75,8 @@ ] }, { - "lr": 0.0009558195366224509, - "name": "scale_768", + "lr": 0.00975530705321762, + "name": "scale_512", "betas": [ 0.9, 0.999 @@ -288,8 +98,8 @@ ] }, { - "lr": 0.0009558195366224509, - "name": "scale_1024", + "lr": 0.00975530705321762, + "name": "scale_768", "betas": [ 0.9, 0.999 @@ -311,8 +121,8 @@ ] }, { - "lr": 0.0009558195366224509, - "name": "scale_1280", + "lr": 0.00975530705321762, + "name": "scale_1024", "betas": [ 0.9, 0.999 @@ -334,8 +144,8 @@ ] }, { - "lr": 0.00047836202255981916, - "name": "fusion", + "lr": 0.00975530705321762, + "name": "scale_1280", "betas": [ 0.9, 0.999 @@ -349,26 +159,146 @@ "differentiable": false, "fused": null, "decoupled_weight_decay": true, - "initial_lr": 0.005, + "initial_lr": 0.01, "params": [ 14, 15, - 16, + 16 + ] + }, + { + "lr": 0.00975530705321762, + "name": "scale_1536", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ 17, 18, - 19, + 19 + ] + }, + { + "lr": 0.00975530705321762, + "name": "scale_1792", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ 20, 21, - 22, + 22 + ] + }, + { + "lr": 0.00975530705321762, + "name": "scale_2048", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ 23, 24, - 25, + 25 + ] + }, + { + "lr": 0.00975530705321762, + "name": "scale_2304", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ 26, 27, - 28, + 28 + ] + }, + { + "lr": 0.00975530705321762, + "name": "scale_2560", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ 29, 30, - 31, + 31 + ] + }, + { + "lr": 0.004877665762479736, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.005, + "params": [ 32, 33, 34, @@ -380,7 +310,25 @@ 40, 41, 42, - 43 + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61 ] } ] @@ -390,8 +338,14 @@ "T_i": 10, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 8, + "T_cur": 1, "base_lrs": [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, 0.01, 0.01, 0.01, @@ -399,52 +353,61 @@ 0.01, 0.005 ], - "last_epoch": 8, + "last_epoch": 1, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.0009558195366224509, - 0.0009558195366224509, - 0.0009558195366224509, - 0.0009558195366224509, - 0.0009558195366224509, - 0.00047836202255981916 + 0.00975530705321762, + 0.00975530705321762, + 0.00975530705321762, + 0.00975530705321762, + 0.00975530705321762, + 0.00975530705321762, + 0.00975530705321762, + 0.00975530705321762, + 0.00975530705321762, + 0.00975530705321762, + 0.00975530705321762, + 0.004877665762479736 ] }, "metrics": { - "best_val_acc": 82.336, - "best_epoch": 7, + "best_val_acc": 79.48, + "best_epoch": 0, "scale_accuracies": { - "384": 82.336, - "768": 82.464, - "1024": 82.43, - "1280": 82.352 + "256": 79.48 } }, "train_config": { "name": "david_training", - "run_id": "20251012_041353", + "run_id": "20251012_050214", "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", "model_variant": "clip_vit_l14", "num_classes": 1000, - "preset": "clip_vit_l14", + "preset": "clip_vit_l14_deep", "custom_config_path": null, "num_classes_override": null, "use_belly_override": null, "belly_expand_override": null, "progressive_training_override": true, "scale_warmup_epochs_override": { - "384": 0, - "768": 1, - "1024": 2, - "1280": 3 + "256": 0, + "512": 1, + "768": 2, + "1024": 3, + "1280": 4, + "1536": 5, + "1792": 6, + "2048": 7, + "2304": 8, + "2560": 9 }, - "num_epochs": 20, + "num_epochs": 10, "batch_size": 1024, "learning_rate": 0.01, "weight_decay": 1e-05, - "warmup_epochs": 3, + "warmup_epochs": 0, "use_rose_loss": true, "rose_initial_weight": 0.1, "rose_max_weight": 0.5,