diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,26 +1,221 @@ { - "epoch": 2, + "epoch": 8, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[ 1.7831e-05, -4.5677e-05, -1.0656e-05, ..., 2.5515e-05,\n 2.5247e-05, 2.0031e-05],\n [ 2.8097e-05, -8.2606e-05, 2.0928e-05, ..., 5.4769e-06,\n 6.5177e-06, 1.4157e-05],\n [-1.5467e-05, -4.2629e-05, -4.7339e-06, ..., 5.2465e-05,\n 1.1511e-05, 1.5347e-05],\n ...,\n [-2.3319e-04, 1.3111e-04, -6.7066e-05, ..., -8.4160e-05,\n -1.3645e-05, -5.2447e-05],\n [ 2.8317e-32, -3.0252e-32, 3.7288e-32, ..., 1.2018e-33,\n -2.5549e-32, -1.2403e-32],\n [ 2.8868e-04, -2.0421e-04, 5.1263e-05, ..., 1.0802e-04,\n 6.9886e-05, -1.3845e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.5924e-08, 7.8008e-08, 9.2438e-09, ..., 2.2459e-08, 2.2992e-08,\n 6.1665e-09],\n [1.5358e-07, 1.1598e-07, 3.8675e-08, ..., 7.3238e-08, 3.6567e-08,\n 5.1830e-08],\n [1.1599e-07, 1.4503e-07, 3.3933e-08, ..., 4.0283e-08, 2.8559e-08,\n 3.0028e-08],\n ...,\n [3.0393e-07, 1.8733e-07, 3.2085e-08, ..., 3.6319e-08, 2.9407e-08,\n 1.9908e-08],\n [1.7530e-11, 4.0619e-10, 6.3743e-11, ..., 2.6928e-11, 8.5347e-11,\n 5.0808e-11],\n [1.9579e-07, 1.2667e-07, 1.9311e-08, ..., 2.4558e-08, 4.5792e-08,\n 2.2544e-08]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 2.3530e-05, 6.0072e-05, -8.8154e-06, ..., 3.5271e-05,\n 2.2801e-05, 2.1883e-05],\n [-1.1921e-04, 1.7628e-04, -1.1531e-04, ..., 5.9485e-05,\n -1.4297e-04, 6.2732e-05],\n [ 6.6196e-05, -5.6226e-05, -8.6743e-05, ..., 7.3746e-05,\n -5.1870e-05, 1.2947e-05],\n ...,\n [-2.5849e-04, 1.6659e-04, -9.3175e-05, ..., -3.6065e-05,\n -8.8496e-05, 5.2400e-05],\n [-6.9186e-16, -1.0957e-15, 3.9209e-15, ..., -1.8864e-15,\n 1.1515e-16, -2.1864e-15],\n [-3.9295e-05, 4.6565e-05, 6.9427e-07, ..., -2.5127e-05,\n -4.9041e-05, 4.7725e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.1560e-08, 6.0060e-08, 7.5593e-09, ..., 1.7688e-08, 1.7216e-08,\n 4.4965e-09],\n [1.2321e-07, 9.4368e-08, 3.0819e-08, ..., 5.7368e-08, 2.9058e-08,\n 4.0093e-08],\n [7.2192e-08, 9.1778e-08, 2.6598e-08, ..., 2.6849e-08, 1.7405e-08,\n 2.1316e-08],\n ...,\n [1.8360e-07, 1.0476e-07, 2.0546e-08, ..., 2.4855e-08, 1.9352e-08,\n 1.3445e-08],\n [2.6992e-11, 9.1711e-11, 1.2411e-11, ..., 2.8353e-11, 2.5807e-11,\n 2.0060e-11],\n [1.2515e-07, 7.9013e-08, 1.2013e-08, ..., 1.6320e-08, 3.0070e-08,\n 1.3699e-08]], device='cuda:0')" }, "1": { - "step": "tensor(3756.)", - "exp_avg": "tensor([ 5.1233e-04, 1.4962e-03, 8.6219e-04, 6.6010e-04, -2.8160e-12,\n 9.4107e-05, 1.1703e-04, 5.3116e-04, 1.1135e-03, 2.2131e-03,\n -7.7125e-04, 3.4627e-03, 5.6052e-45, -6.5682e-04, 1.2157e-03,\n -3.2750e-04, -2.4690e-03, 2.3180e-03, 5.7181e-04, 5.6052e-45,\n -2.4293e-03, -2.9330e-03, -1.1394e-03, -1.1585e-03, 5.6052e-45,\n 2.7927e-03, 1.9219e-04, -7.7537e-04, 1.0711e-03, -2.8580e-03,\n -8.0813e-04, -2.4576e-04, -2.3504e-08, 5.6052e-45, -1.2613e-03,\n 7.3082e-04, 2.2089e-04, -2.8135e-03, -1.7028e-04, -9.0479e-04,\n -3.4984e-03, -2.0797e-03, 3.1206e-04, -1.5827e-03, 1.4216e-03,\n 5.5516e-03, 3.3014e-03, -6.2577e-04, 1.7331e-03, 1.2679e-03,\n -4.2741e-05, 5.6052e-45, -5.6052e-45, 5.6052e-45, -1.2496e-03,\n -9.4733e-04, 1.6383e-03, -4.1002e-04, 3.2997e-09, 3.3793e-03,\n -3.7731e-04, 3.8605e-04, -2.8461e-03, -1.1535e-03, -1.6822e-03,\n -1.6403e-03, 7.6820e-05, 5.6052e-45, 1.1597e-04, 2.3749e-03,\n -2.1615e-04, 5.6052e-45, 3.5370e-03, 3.2385e-03, 2.5593e-04,\n 1.1675e-03, -1.5430e-03, -1.2461e-03, 3.1295e-03, -9.0900e-03,\n 2.0511e-03, -1.5631e-03, 1.7974e-03, -5.7405e-04, 1.3671e-03,\n -8.7397e-04, 2.9302e-03, 1.2321e-03, 1.2652e-03, 2.4025e-03,\n 5.6052e-45, -1.0113e-03, -8.0032e-04, -3.4159e-03, -9.0481e-04,\n -1.8566e-03, 7.8791e-04, 5.2959e-04, -3.0380e-03, -6.7021e-04,\n 7.4207e-04, 6.3742e-04, -1.4810e-03, -1.3313e-03, -4.9065e-04,\n -1.0980e-04, 4.6380e-05, 2.6412e-03, -1.7604e-04, -7.9487e-04,\n 4.2142e-04, -1.7397e-03, 5.6052e-45, 1.0810e-03, -1.6093e-05,\n 1.8620e-03, -1.1098e-03, 4.2065e-03, -5.6052e-45, 1.0830e-05,\n 5.6052e-45, -4.9362e-04, 5.6052e-45, 4.5340e-03, 2.7321e-04,\n 3.2198e-03, -6.2468e-03, 5.6052e-45, 4.0794e-04, -2.9376e-04,\n 2.0729e-03, 1.9686e-03, 2.8169e-03, 9.9771e-25, 1.8688e-03,\n -3.2034e-04, 2.9599e-04, 1.7660e-03, 5.4940e-04, -5.5974e-04,\n 1.7476e-03, 2.0100e-03, 5.2797e-03, 2.3488e-03, 5.5077e-03,\n 2.2498e-03, -8.5076e-13, 4.0588e-03, -4.4685e-04, 2.2580e-03,\n -2.3057e-17, 1.3667e-03, -2.0953e-03, 5.6052e-45, 3.2642e-03,\n 1.1543e-03, 6.5475e-20, 2.3212e-03, 3.4358e-03, -4.1328e-04,\n -1.2298e-03, 4.9247e-04, 6.0939e-04, 3.2708e-03, 2.0755e-04,\n 3.8615e-05, 9.2578e-04, 1.8436e-04, 2.7992e-03, 7.7843e-23,\n 5.6052e-45, -6.7832e-04, -2.2660e-03, 8.3994e-04, -2.6474e-03,\n 5.8393e-04, -7.7083e-04, -2.7944e-03, 5.6052e-45, 1.2081e-03,\n -1.4105e-03, -1.8192e-03, 1.4575e-04, 5.6052e-45, -5.0196e-04,\n 1.1285e-03, -8.7956e-04, -7.5027e-04, -1.9143e-05, 5.6052e-45,\n 5.6052e-45, 2.1472e-03, 2.0672e-03, -4.0834e-03, 1.1510e-04,\n 7.5289e-04, -1.6370e-03, 1.5778e-03, 5.6052e-45, 9.0714e-04,\n 1.8977e-03, -1.8082e-03, 9.2614e-04, -1.9589e-03, -1.1776e-03,\n 5.6052e-45, 3.3800e-04, 3.8359e-04, 1.0382e-03, 1.8186e-03,\n 7.9169e-04, 5.4020e-04, -4.6689e-04, 3.7264e-04, 5.1813e-04,\n -2.6652e-03, 1.9006e-03, -1.2771e-03, -2.5275e-03, -3.0154e-03,\n 5.3493e-04, 2.4243e-04, 1.5057e-03, -1.1234e-03, 2.1051e-03,\n 9.7383e-04, -3.3279e-03, -3.1169e-04, 2.4938e-04, -2.2146e-06,\n 7.9057e-04, -2.7659e-04, -1.0425e-03, 5.6052e-45, -1.8182e-03,\n 3.0576e-03, 2.8684e-03, 4.9024e-04, 1.5073e-03, 2.0503e-03,\n 7.2662e-04, 5.9012e-04, -1.2612e-03, 8.4114e-04, 1.4172e-04,\n 3.8061e-03, -1.0976e-03, -4.4220e-04, -1.6167e-03, 2.3890e-03,\n -3.2845e-03, 3.0843e-03, 1.2070e-04, 1.6564e-03, 5.6408e-04,\n -1.9539e-03, 5.6052e-45, -2.9932e-04, -2.6776e-04, 5.9844e-04,\n -1.4972e-04, 7.2556e-04, -1.3185e-03, 8.6790e-04, -1.6135e-03,\n -5.3571e-04, -2.9040e-04, 5.6052e-45, -2.0915e-03, -2.2987e-03,\n 5.6645e-04, -8.4885e-04, 9.0699e-04, 3.9753e-04, 1.3428e-04,\n 5.6052e-45, 1.8292e-03, 5.6052e-45, -4.4438e-03, 8.0182e-04,\n -1.1643e-03, 4.3945e-03, -2.2254e-03, -5.2929e-03, -3.7621e-04,\n 3.2349e-04, -4.3990e-19, 3.2879e-03, 5.5551e-04, -2.1160e-05,\n -1.6796e-04, 2.1284e-03, 6.7839e-04, 1.9942e-03, -1.7185e-04,\n 2.9146e-03, -5.7036e-03, 4.0484e-03, 3.0776e-03, -6.2567e-04,\n 1.2639e-03, 3.4377e-04, -4.9843e-06, 1.6383e-04, 4.6551e-04,\n 2.1297e-05, -2.2879e-04, 5.6052e-45, 5.6052e-45, 5.6641e-04,\n 1.3571e-03, -3.0112e-03, -5.2022e-04, 1.7431e-03, -8.2426e-04,\n -4.6999e-04, 1.4273e-03, 1.8569e-03, -3.6938e-04, 1.7993e-04,\n 7.7306e-04, -3.0861e-04, -2.5285e-03, -8.6260e-05, 2.4698e-04,\n -1.5429e-03, 1.3110e-03, -5.4266e-04, 1.9959e-05, 1.3702e-03,\n 2.2933e-04, 1.4004e-03, -1.4422e-03, 1.7862e-03, 5.6052e-45,\n -2.2920e-04, 1.7263e-04, -9.7651e-04, 1.5978e-03, -3.9989e-04,\n -6.9877e-04, 1.0665e-03, 3.1147e-03, 9.5281e-04, -1.3571e-03,\n -2.0422e-03, 5.6052e-45, -3.1476e-04, -1.0402e-03, 1.6093e-03,\n 5.0974e-04, -1.3595e-03, 1.8338e-03, 1.0209e-03, -1.0805e-03,\n -6.0864e-03, -4.5676e-04, 2.0731e-03, 2.0280e-03, -1.4483e-03,\n -1.5112e-03, -2.0867e-04, 3.4876e-03, 5.2873e-04, -4.3349e-03,\n 1.6431e-03, 3.3320e-04, -5.3557e-04, -1.1041e-04, 1.0357e-03,\n 3.0968e-03, 7.0172e-04, 2.3487e-04, -1.8611e-03, -1.2893e-03,\n 2.4788e-03, -3.5122e-04, 7.0537e-05, 5.6052e-45, -1.3271e-03,\n -2.2722e-03, 5.6052e-45, -7.8819e-04, -3.5749e-03, 1.7267e-03,\n -1.9830e-03, -1.6744e-03, 2.2860e-03, 1.9861e-04, 2.0112e-04,\n 3.5650e-04, -2.7359e-03, 1.4410e-03, -5.6052e-45, 5.6052e-45,\n -8.9577e-04, 3.5788e-03, 1.2752e-03, 3.6856e-03, -3.6852e-04,\n 2.5619e-04, 2.8084e-04, -1.5861e-03, -4.6875e-04, -5.3986e-04,\n 9.9002e-04, 2.3711e-04, 5.6052e-45, -3.6677e-03, -9.2103e-04,\n 5.6052e-45, -8.7159e-04, 5.6052e-45, -4.0697e-05, -8.5340e-04,\n 3.2206e-04, 7.8644e-04, 5.6052e-45, -1.5146e-03, 3.0435e-03,\n -6.3783e-04, -1.3443e-03, -1.6531e-03, -2.1750e-03, 1.1121e-03,\n 7.2222e-04, -2.5241e-03, 6.5804e-24, -2.7617e-03, 2.8942e-12,\n -6.0669e-04, 2.4579e-04, 6.0150e-04, 5.6052e-45, -6.9064e-05,\n 1.4827e-03, -4.3664e-04, 4.7341e-04, -1.2992e-03, -3.8477e-04,\n 3.5847e-03, -6.8151e-04, 1.3217e-03, -1.0765e-03, -4.6359e-04,\n 7.9395e-04, -2.2083e-03, 5.6052e-45, -2.3358e-03, 4.1295e-03,\n 5.6052e-45, 2.1284e-03, 2.2264e-04, 6.1589e-04, -6.9541e-04,\n 3.0862e-15, -6.7634e-04, 5.3916e-06, 5.7845e-04, 5.6052e-45,\n -8.4421e-04, -6.4071e-03, 5.6052e-45, -6.1570e-04, 1.1874e-10,\n 1.6721e-03, 1.0634e-03, 5.6052e-45, 5.2167e-04, -4.3601e-04,\n 5.2242e-03, 7.0871e-04, -5.1759e-03, -1.8040e-03, -2.1591e-03,\n -5.3001e-05, 5.6052e-45, 3.0465e-03, 5.6052e-45, -2.9522e-04,\n -1.0857e-04, -5.6573e-04, -1.4785e-03, 2.4057e-03, 5.6347e-04,\n -5.0183e-04, 2.1063e-03, -2.7572e-04, -3.3027e-03, 1.9016e-03,\n 5.6052e-45, -9.0800e-04, 7.3842e-04, 1.9143e-03, -5.9897e-04,\n 1.0573e-04, 3.2216e-03, -1.4969e-04, -9.8013e-05, -8.7615e-04,\n 2.4830e-03, 5.6052e-45, -3.5623e-04, 2.7601e-03, -1.5760e-04,\n -6.6513e-04, -4.0614e-06, 6.0204e-04, -2.1472e-03, 2.3123e-03,\n 2.0108e-03, -3.6236e-03, -1.8755e-03, 4.0671e-04, -3.7499e-13,\n 5.6052e-45, -2.5051e-04, -2.9165e-03, -1.5708e-03, 6.8843e-04,\n 1.6382e-03, 2.6966e-03, 2.3469e-03, 8.3923e-04, -6.5503e-04,\n -5.1303e-04, -1.9877e-03, -8.5651e-04, -9.4771e-04, -3.1107e-03,\n 2.7734e-22, 1.8081e-03, 1.3143e-03, 4.3692e-04, -5.6052e-45,\n 6.7914e-04, 1.3574e-05, 5.6052e-45, 5.6052e-45, 1.9418e-03,\n 4.6115e-04, 1.3270e-03, 1.6926e-03, -1.7398e-03, -5.8611e-05,\n -1.4337e-03, 1.4241e-03, 6.2230e-04, 2.5681e-03, 7.9564e-04,\n -7.1825e-04, 2.9431e-04, 8.9161e-04, -5.4874e-04, -1.8233e-03,\n -3.6833e-04, 8.9179e-04, -8.6929e-04, -9.3005e-04, 3.4687e-03,\n 5.7667e-04, 2.3803e-03, -4.0191e-04, 3.5921e-03, 6.4549e-04,\n -2.6844e-03, -3.5730e-03, -4.3390e-04, 3.9128e-04, -9.8408e-04,\n 4.1179e-03, 5.0758e-04, -1.0377e-03, 1.0575e-03, -2.8883e-04,\n -3.8610e-35, 5.6052e-45, -2.5155e-03, -1.5328e-03, 3.0458e-03,\n -8.3949e-04, 5.6052e-45, 1.3146e-03, -5.2305e-03, -1.0560e-03,\n -8.0811e-04, -3.0424e-03, 4.5300e-04, -2.0464e-03, -2.4431e-03,\n 4.9951e-04, 6.8201e-04, -3.1527e-04, -8.7993e-04, -2.9484e-03,\n 4.8691e-04, -1.7727e-03, -7.9813e-04, -2.3738e-03, -2.6145e-05,\n -1.8552e-03, -1.0963e-03, 1.2725e-04, -2.3126e-03, -6.4813e-04,\n 1.9604e-04, -2.7641e-03, 9.3641e-04, 9.5629e-04, 2.4258e-03,\n -1.0593e-03, 1.5176e-03, -2.2232e-04, 1.4156e-03, 3.2536e-04,\n 4.4908e-03, -1.0206e-03, 9.0033e-08, 1.5631e-03, 1.4958e-10,\n 2.7674e-14, -1.5015e-05, 2.3728e-03, 1.6508e-04, -3.3530e-03,\n 2.2802e-03, 7.1558e-04, -6.1278e-04, 1.3469e-03, -4.3877e-04,\n -6.6916e-03, -3.7049e-34, 8.0133e-06, 1.9363e-03, 2.4436e-04,\n -2.1892e-03, -1.7682e-03, -1.6809e-03, 4.8598e-31, 3.1390e-03],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.8429e-05, 7.8424e-05, 4.1216e-05, 3.2207e-05, 1.5486e-07, 8.1643e-06,\n 5.6661e-05, 5.4997e-05, 3.6809e-05, 5.8048e-05, 5.5900e-05, 1.0907e-04,\n 5.2763e-07, 4.9280e-05, 6.0358e-05, 6.0430e-05, 9.3022e-05, 6.6292e-05,\n 2.2263e-05, 1.3737e-07, 4.6491e-05, 4.1435e-05, 3.8910e-05, 6.2517e-05,\n 5.0144e-07, 8.2325e-05, 5.6426e-05, 7.6278e-05, 5.5761e-05, 6.7651e-05,\n 6.4038e-05, 6.5705e-05, 1.0096e-06, 8.8563e-08, 4.7941e-05, 5.5138e-05,\n 6.3255e-05, 4.0000e-05, 3.1811e-05, 5.8204e-05, 6.2877e-05, 3.0693e-05,\n 5.5385e-05, 4.5257e-05, 4.4873e-05, 6.5875e-05, 5.8716e-05, 9.9101e-05,\n 9.7689e-05, 3.3143e-05, 7.1325e-05, 5.2607e-08, 8.5362e-08, 7.9888e-08,\n 7.1962e-05, 1.1149e-04, 4.4462e-05, 4.1195e-05, 2.5838e-07, 6.9597e-05,\n 5.9119e-05, 4.8329e-05, 9.5072e-05, 3.5547e-05, 6.7055e-05, 7.1558e-05,\n 5.2508e-05, 9.7777e-08, 7.3431e-05, 6.2996e-05, 7.9860e-06, 3.9941e-07,\n 7.2067e-05, 4.6330e-05, 1.3939e-05, 6.3709e-05, 7.0940e-05, 7.1251e-05,\n 3.4550e-05, 7.3103e-05, 9.4840e-05, 5.4518e-05, 7.9089e-05, 4.5982e-05,\n 6.0059e-05, 4.9573e-05, 5.8442e-05, 3.0440e-05, 6.0963e-05, 5.4131e-05,\n 6.2351e-07, 7.2436e-05, 5.1906e-05, 4.2414e-05, 9.1787e-05, 2.1514e-05,\n 5.2725e-05, 4.1873e-05, 4.9157e-05, 3.6346e-05, 2.9048e-05, 5.7960e-06,\n 7.6109e-05, 8.0546e-05, 1.2031e-04, 3.1543e-05, 5.6253e-05, 8.2481e-05,\n 1.2189e-05, 2.6002e-05, 8.6256e-05, 9.0572e-05, 4.2156e-08, 4.1130e-05,\n 5.3011e-07, 4.5521e-05, 6.3898e-05, 9.7266e-05, 1.8796e-08, 9.8546e-08,\n 6.8374e-08, 5.0582e-05, 5.4445e-08, 1.1226e-04, 5.4902e-05, 7.8698e-05,\n 7.3919e-05, 1.0242e-08, 2.3582e-05, 7.7462e-05, 4.4632e-05, 5.0593e-05,\n 7.1332e-05, 1.8271e-08, 6.7731e-05, 5.8945e-05, 3.1110e-05, 5.1993e-05,\n 4.2545e-05, 5.2321e-05, 1.0764e-04, 6.3356e-05, 7.3797e-05, 5.5977e-05,\n 6.8316e-05, 6.1020e-05, 8.0626e-07, 6.7718e-05, 5.7531e-05, 4.4023e-05,\n 6.9153e-07, 3.8952e-05, 4.7206e-05, 3.7102e-07, 4.9116e-05, 7.3234e-05,\n 7.3487e-07, 7.9238e-05, 4.3922e-05, 6.1611e-05, 2.1392e-05, 5.9273e-05,\n 1.4000e-04, 5.4958e-05, 5.5462e-05, 4.1322e-05, 1.0242e-04, 3.5023e-05,\n 5.3783e-05, 8.7312e-08, 1.3455e-07, 6.2080e-05, 8.8259e-05, 4.6051e-05,\n 5.0702e-05, 8.2736e-05, 2.5034e-05, 6.8924e-05, 2.4419e-08, 2.8527e-05,\n 5.2167e-05, 7.3348e-05, 9.0304e-06, 1.7967e-07, 6.7260e-05, 6.1738e-05,\n 8.1591e-05, 3.5377e-05, 4.5612e-05, 5.2171e-08, 5.9779e-07, 5.3762e-05,\n 2.8652e-05, 7.1083e-05, 6.1304e-05, 5.5979e-05, 3.9565e-05, 7.5861e-05,\n 9.4224e-08, 4.8972e-05, 4.0873e-05, 2.9134e-05, 8.5371e-05, 2.5459e-05,\n 1.1806e-04, 4.6474e-09, 5.6294e-05, 6.2230e-05, 4.8022e-05, 6.4653e-05,\n 3.1432e-05, 5.7633e-05, 6.0467e-05, 5.9572e-05, 3.4891e-05, 5.2127e-05,\n 3.9065e-05, 7.4697e-05, 7.3555e-05, 3.6516e-05, 4.0428e-05, 6.5586e-05,\n 4.5840e-05, 5.1340e-05, 5.2006e-05, 5.3998e-05, 8.8808e-05, 9.3350e-05,\n 3.8006e-05, 8.3215e-05, 6.1270e-05, 1.0445e-04, 7.0716e-05, 1.1552e-07,\n 6.6224e-05, 3.3235e-05, 5.2076e-05, 3.9190e-05, 1.3412e-04, 5.7846e-05,\n 3.7738e-05, 5.5868e-05, 5.8193e-05, 5.1935e-05, 9.0579e-05, 3.8098e-05,\n 3.0676e-05, 7.1407e-05, 4.0514e-05, 5.2177e-05, 5.7026e-05, 5.2525e-05,\n 4.3748e-05, 4.9606e-05, 6.8073e-05, 5.4826e-05, 4.9775e-07, 4.4180e-05,\n 3.3553e-05, 1.3345e-05, 5.5598e-05, 8.0616e-05, 6.8480e-05, 6.4118e-05,\n 5.9491e-05, 5.7352e-05, 4.2876e-05, 7.3783e-08, 6.4196e-05, 6.4906e-05,\n 4.3185e-05, 8.3245e-05, 4.9406e-05, 2.4759e-05, 7.4066e-05, 1.2876e-07,\n 5.5531e-05, 3.6385e-07, 4.0812e-05, 1.8832e-05, 5.4092e-05, 4.6169e-05,\n 8.6733e-05, 7.8781e-05, 6.8829e-05, 6.6240e-05, 1.4839e-08, 7.6035e-05,\n 7.8169e-05, 4.0586e-05, 6.9116e-06, 5.9455e-05, 8.6081e-05, 6.8788e-05,\n 4.7999e-05, 9.0710e-05, 9.8244e-05, 5.5921e-05, 5.4694e-05, 1.1449e-04,\n 4.9329e-05, 6.2151e-06, 1.1886e-06, 4.3558e-05, 5.7345e-05, 3.4039e-06,\n 6.2881e-05, 1.5315e-08, 3.0172e-07, 5.8508e-05, 8.4363e-05, 3.9821e-05,\n 6.2632e-05, 7.2649e-05, 5.8004e-05, 5.8830e-05, 6.8701e-05, 5.1764e-05,\n 4.0504e-05, 6.9099e-05, 7.2059e-05, 1.0150e-04, 8.1962e-05, 4.0281e-05,\n 1.9336e-05, 6.2384e-05, 4.9710e-05, 7.0670e-06, 4.2761e-05, 6.5936e-05,\n 1.1104e-05, 3.6789e-05, 7.6847e-05, 7.2629e-05, 3.5995e-11, 5.1730e-05,\n 3.6673e-05, 8.5187e-05, 3.6331e-05, 4.6995e-05, 9.1364e-06, 6.6941e-05,\n 4.9654e-05, 5.2543e-05, 5.2972e-05, 8.5916e-05, 5.2677e-07, 6.7806e-06,\n 5.9184e-05, 5.9994e-05, 4.2475e-05, 6.7451e-05, 4.9160e-05, 5.1374e-05,\n 9.5346e-05, 5.5690e-05, 5.1087e-05, 5.1013e-05, 7.7113e-05, 1.3310e-06,\n 2.9492e-05, 3.7743e-05, 6.4588e-05, 8.8312e-05, 6.5177e-05, 1.4898e-04,\n 1.7569e-05, 6.9014e-05, 7.9805e-05, 6.7745e-05, 6.6396e-05, 4.6284e-05,\n 4.9685e-05, 1.0765e-05, 5.2503e-05, 9.4992e-05, 1.2216e-04, 1.7051e-05,\n 2.2932e-07, 6.6446e-05, 1.9666e-05, 1.2512e-06, 5.0863e-05, 1.0809e-04,\n 4.8287e-05, 4.1113e-05, 5.7037e-05, 7.6571e-05, 4.0140e-05, 3.2797e-05,\n 7.5846e-05, 7.5232e-05, 4.8110e-05, 1.2436e-07, 4.1264e-07, 5.8930e-05,\n 6.4290e-05, 5.9733e-05, 8.5655e-05, 4.2477e-05, 5.1533e-05, 4.0953e-05,\n 1.9062e-05, 4.4839e-05, 4.8009e-05, 8.5967e-05, 4.2455e-05, 1.5196e-07,\n 8.6692e-06, 1.7067e-05, 4.3776e-08, 5.9799e-06, 7.6373e-08, 7.7939e-05,\n 6.6996e-05, 2.2114e-05, 4.4705e-05, 2.0620e-07, 2.7376e-05, 6.1274e-05,\n 3.9801e-05, 6.7514e-05, 7.7499e-05, 8.3942e-05, 5.4821e-05, 3.9531e-06,\n 3.7451e-05, 1.5607e-07, 3.2283e-05, 2.8839e-08, 3.2195e-05, 6.9658e-05,\n 5.9980e-05, 1.3245e-07, 3.5261e-05, 5.4220e-05, 7.9021e-05, 6.9342e-05,\n 4.8851e-05, 3.0698e-05, 7.3776e-05, 3.0239e-05, 6.1269e-05, 8.3531e-05,\n 4.3949e-05, 1.2398e-06, 5.4793e-05, 4.4704e-08, 5.6602e-05, 3.6121e-05,\n 1.4832e-06, 2.8724e-05, 4.3440e-05, 4.2151e-06, 5.2914e-05, 7.0113e-08,\n 3.5470e-05, 5.1513e-06, 4.5728e-05, 2.0023e-06, 5.5838e-05, 9.5254e-05,\n 1.3018e-08, 4.7710e-05, 1.6855e-06, 6.7336e-05, 4.4844e-05, 1.0632e-07,\n 6.9499e-05, 3.8512e-05, 1.1443e-04, 7.7913e-05, 4.8103e-05, 1.9658e-05,\n 3.8010e-05, 5.8789e-05, 7.3901e-07, 6.7558e-05, 2.3464e-07, 9.9241e-06,\n 6.0042e-06, 7.3457e-06, 4.9822e-05, 6.4646e-05, 1.1142e-04, 5.0077e-05,\n 5.9531e-05, 4.5883e-05, 6.5449e-05, 4.9554e-05, 9.8446e-08, 5.5766e-05,\n 5.6535e-05, 1.1022e-04, 7.2413e-06, 2.2127e-05, 4.5780e-05, 3.7642e-05,\n 5.6162e-05, 1.2533e-05, 4.8463e-05, 7.3051e-07, 7.0802e-06, 8.7394e-05,\n 5.1142e-05, 9.3222e-05, 5.9558e-05, 5.3520e-05, 6.2056e-05, 5.1860e-05,\n 6.2345e-05, 1.1813e-04, 4.4469e-05, 6.1378e-05, 5.4675e-07, 6.0514e-07,\n 6.8865e-05, 1.3582e-04, 8.0553e-05, 4.9708e-05, 6.0015e-05, 6.1626e-05,\n 5.5857e-05, 6.6562e-05, 5.0500e-05, 1.0480e-05, 5.3138e-05, 6.2440e-05,\n 4.3975e-05, 4.3015e-05, 1.4075e-07, 4.1689e-05, 6.3768e-05, 3.6194e-05,\n 5.5089e-07, 5.1195e-05, 5.9280e-05, 4.4625e-08, 1.3426e-07, 6.4783e-05,\n 6.1671e-05, 7.4173e-05, 3.5884e-05, 7.4318e-05, 3.4199e-05, 6.2636e-05,\n 4.9733e-05, 3.5539e-05, 3.9190e-05, 6.2689e-05, 6.2631e-05, 5.4694e-05,\n 8.1090e-05, 7.6418e-05, 6.4501e-05, 3.5079e-05, 5.3659e-05, 1.6279e-04,\n 4.1786e-05, 5.2087e-05, 2.7329e-05, 5.4073e-05, 6.3129e-05, 9.9276e-05,\n 1.4463e-06, 4.7581e-05, 7.8885e-05, 3.6910e-05, 7.5648e-05, 5.6108e-05,\n 6.1684e-05, 4.9811e-05, 5.5668e-05, 8.1306e-05, 7.6103e-07, 6.9993e-08,\n 8.0110e-07, 5.5193e-05, 4.9125e-05, 6.7111e-05, 6.0407e-05, 1.5247e-06,\n 5.0229e-05, 5.9279e-05, 2.5739e-05, 5.7219e-05, 4.1998e-05, 4.4227e-05,\n 8.3390e-05, 6.9618e-05, 4.7138e-05, 9.0475e-05, 6.8649e-05, 8.6704e-05,\n 5.7291e-05, 6.2965e-05, 6.1046e-05, 6.3377e-05, 8.6306e-05, 8.9585e-05,\n 8.1385e-05, 4.0861e-05, 5.0476e-05, 5.1084e-05, 5.8064e-05, 4.2190e-05,\n 4.8913e-05, 6.1226e-05, 1.2306e-04, 9.7292e-05, 7.2088e-05, 7.9435e-05,\n 6.0723e-05, 2.1911e-05, 2.4702e-05, 6.3349e-05, 5.6102e-05, 5.9785e-05,\n 3.3363e-05, 3.4988e-08, 1.0017e-06, 7.3015e-07, 6.3292e-05, 4.8849e-05,\n 5.9466e-05, 7.0894e-05, 5.3603e-05, 4.3099e-05, 4.0995e-05, 7.9944e-05,\n 4.0672e-05, 2.5679e-07, 4.2538e-08, 7.4015e-05, 5.2913e-05, 8.9377e-05,\n 6.6951e-05, 5.6542e-05, 9.5307e-08, 4.0459e-05], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([-2.5383e-04, 3.3642e-04, 1.5263e-03, 2.0180e-04, -3.2217e-04,\n 9.9299e-04, -1.2958e-03, 1.7783e-03, -1.1433e-02, 1.3442e-03,\n -5.2038e-06, -1.2961e-03, 5.6052e-45, 4.6860e-04, 3.0243e-04,\n -1.5875e-03, -7.7546e-04, -7.4867e-04, 1.2261e-03, 5.6052e-45,\n -2.8617e-03, -1.2040e-03, 1.9934e-03, 4.4646e-04, 5.6052e-45,\n 1.4099e-03, -4.9186e-04, 2.8079e-03, 8.8708e-04, -1.7457e-03,\n 6.8585e-04, 7.7115e-04, -5.9056e-09, 5.6052e-45, -3.1902e-03,\n -2.6145e-04, 5.1512e-03, 3.0591e-03, 6.3891e-04, -3.4599e-03,\n -2.0975e-04, -9.9475e-04, 3.1483e-03, 6.3304e-03, -2.7553e-03,\n -1.6319e-04, 3.6616e-04, -8.7693e-04, -1.1744e-03, 1.3425e-03,\n -6.8330e-04, 5.6052e-45, -4.3270e-11, 5.6052e-45, -1.0810e-03,\n 7.9324e-04, 2.9458e-04, 1.1268e-03, -3.3478e-05, 1.2027e-03,\n -1.7015e-04, 2.4234e-03, 5.1939e-04, -2.7528e-04, 5.9288e-04,\n -2.0985e-03, -4.4793e-03, 5.6052e-45, -2.4158e-03, 1.2987e-03,\n 1.6490e-03, 5.6052e-45, 3.0649e-03, 1.9158e-03, -1.7971e-03,\n -3.5207e-03, 4.2541e-05, 6.6341e-04, 1.8764e-03, 7.9388e-04,\n 2.4894e-03, 2.6783e-03, -1.8094e-03, 2.0309e-04, 1.5313e-03,\n -2.3402e-03, -3.6859e-03, 1.0882e-03, 1.4556e-03, -2.3890e-03,\n 5.6052e-45, 1.7438e-03, 1.4836e-03, 1.1200e-03, 3.1667e-03,\n -3.7070e-03, -2.8607e-04, 6.8942e-04, -8.5061e-04, 3.8672e-04,\n -9.2338e-04, 1.1207e-03, 3.0808e-03, 2.3356e-03, 4.5637e-04,\n -1.5440e-03, 3.3261e-03, -3.2884e-03, 1.1839e-04, -4.6794e-05,\n -1.7103e-03, 3.3829e-03, 5.6052e-45, -2.5911e-03, -5.6360e-04,\n 8.8871e-04, -8.4471e-04, -2.2190e-03, -5.0496e-16, 1.7679e-07,\n 5.6052e-45, 8.2512e-04, 5.6052e-45, 6.8095e-04, 2.3092e-03,\n 1.2416e-03, -8.7029e-04, 5.6052e-45, 1.0723e-03, 3.2067e-03,\n -2.6833e-04, 1.1413e-03, 1.3956e-03, 2.5150e-20, -6.5868e-05,\n 1.1413e-03, 1.9837e-03, 2.2025e-03, 1.7465e-03, -4.4475e-04,\n -4.3164e-04, -3.9841e-03, 2.4628e-03, -7.6038e-04, -1.9682e-03,\n 1.0329e-04, -5.6052e-45, 1.1217e-03, -2.2650e-03, -5.6302e-04,\n -1.4785e-25, -4.4081e-04, 1.1927e-03, 5.6052e-45, 9.4334e-04,\n 1.8371e-04, -1.7026e-33, 5.0472e-04, 6.1390e-03, 2.0046e-04,\n -5.5753e-04, -2.2900e-03, 5.3853e-03, -2.5106e-03, -5.0420e-03,\n -2.5502e-03, -2.5170e-03, -1.1467e-03, -8.4865e-04, 5.6052e-45,\n 5.6052e-45, 3.2256e-04, 1.9479e-03, 5.1432e-04, -4.2804e-04,\n -8.0500e-05, 3.3792e-04, 8.9718e-04, 5.6052e-45, 8.0570e-05,\n -4.0074e-03, 1.0959e-03, -1.4137e-04, 5.6052e-45, -2.0903e-04,\n 9.3977e-04, -5.2416e-04, 1.8220e-04, -1.1301e-03, 5.6052e-45,\n 5.6052e-45, 1.0757e-04, 4.9479e-03, 3.5221e-03, -4.5186e-04,\n 5.0306e-04, 7.9772e-04, 1.1046e-03, 5.6052e-45, 1.8709e-03,\n 9.2141e-04, -3.1505e-04, 9.0721e-04, -4.0493e-04, 3.5744e-04,\n 5.6052e-45, 5.1685e-04, -1.5379e-03, -1.1911e-03, 6.4799e-04,\n 3.5037e-04, 2.0768e-03, -7.5834e-03, 6.5965e-04, -2.8627e-03,\n 1.7813e-03, 2.3761e-04, 3.7499e-04, 2.4069e-03, -4.7942e-05,\n 7.1553e-04, -3.7912e-03, 1.7914e-03, 3.6356e-03, 1.3194e-03,\n 1.4334e-03, 2.4092e-03, 3.0253e-03, 1.1649e-03, 1.1193e-04,\n -1.8358e-03, -1.8537e-03, 1.8078e-04, 5.6052e-45, 1.0969e-03,\n 3.5196e-04, 1.5360e-03, 2.0031e-03, -5.4725e-04, -5.0848e-04,\n 1.5853e-03, 1.6889e-03, 1.2233e-03, -3.2627e-03, 2.4035e-03,\n -2.7509e-03, -2.3404e-04, -4.1648e-04, 1.5942e-04, -1.3968e-03,\n 1.2093e-04, 1.9641e-03, 1.1517e-03, 1.1792e-03, -6.6849e-04,\n 2.5845e-03, 5.6052e-45, 1.7769e-03, -6.1465e-04, -2.4848e-04,\n 6.4684e-04, -2.0915e-04, -1.9209e-05, 2.1165e-03, -2.5082e-03,\n 2.8314e-03, 2.3913e-03, 5.6052e-45, -7.4337e-03, -1.7149e-03,\n 1.4748e-03, 3.2400e-04, -1.4173e-04, -6.7552e-04, -2.1635e-03,\n -7.3868e-14, -1.2644e-03, 5.6052e-45, 1.4047e-03, -8.3320e-04,\n 1.7829e-03, 1.1660e-03, 2.2050e-03, 1.1764e-03, 2.3199e-03,\n -4.0779e-03, 1.0116e-04, -4.2430e-03, 5.0496e-04, -2.0566e-04,\n -3.4104e-04, -2.9261e-04, -2.8332e-03, -1.8661e-03, 1.5172e-03,\n -8.6613e-04, -1.4609e-03, -3.4278e-04, 5.5266e-04, 8.5275e-04,\n -7.3712e-03, -8.1383e-04, -1.8512e-05, 2.9731e-04, -1.2587e-04,\n -1.7235e-03, 8.4856e-04, 5.6052e-45, 5.6052e-45, 1.0038e-03,\n 1.3149e-03, 1.1838e-03, -1.2740e-03, -1.4760e-03, -3.8575e-04,\n 1.2066e-04, 5.9129e-04, 2.3998e-03, -9.2707e-04, -2.2159e-03,\n -4.7793e-05, 1.9066e-03, -4.4703e-04, 1.6235e-03, 2.7080e-03,\n 2.2248e-03, -1.1790e-03, 5.8560e-04, -4.2412e-04, 1.9551e-03,\n 6.0337e-05, -1.9445e-03, 1.3213e-03, 2.9132e-04, 5.6052e-45,\n -7.4821e-04, 9.1699e-05, -1.6500e-03, -1.2714e-03, -3.9415e-04,\n 3.3128e-04, -1.3279e-03, -4.1022e-03, -7.1368e-04, 1.0617e-04,\n 4.2122e-04, 5.6052e-45, -7.3137e-04, 2.8261e-03, -3.8412e-03,\n -1.6172e-03, 9.4337e-04, -2.5811e-04, 1.7815e-03, 4.3357e-04,\n 2.1772e-03, 1.0317e-04, -7.5788e-04, -6.6014e-04, 1.8676e-03,\n 3.8791e-04, 1.2253e-03, -6.2575e-03, -1.4788e-03, 2.7072e-05,\n 1.7084e-03, -6.1967e-04, 1.3204e-03, -3.1039e-03, 3.0302e-04,\n -2.6108e-03, 1.0986e-03, -5.3722e-04, -1.7633e-03, 1.0695e-03,\n -3.9537e-03, 1.9419e-03, 4.6936e-04, 5.6052e-45, -1.9232e-03,\n 1.6557e-03, 5.6052e-45, -7.4832e-04, 3.0119e-03, 1.8134e-03,\n 2.8102e-03, -6.7810e-04, 1.2902e-03, 2.5185e-03, 2.7113e-03,\n -1.6999e-03, -1.7654e-03, -1.6577e-04, -9.4647e-21, 5.6052e-45,\n -6.3248e-03, 1.7675e-03, 2.5238e-03, -1.6636e-04, 5.1199e-04,\n -3.7028e-03, 3.2892e-03, -5.0988e-04, -1.0067e-03, -4.8039e-03,\n -2.1621e-03, -7.8040e-04, 5.6052e-45, 5.0562e-05, 9.8408e-04,\n 5.6052e-45, -1.0536e-03, 5.6052e-45, -1.3911e-03, 2.2791e-03,\n 1.2400e-03, 1.3171e-03, 5.6052e-45, -2.2337e-04, 5.3265e-04,\n -2.5126e-04, 1.7054e-03, 2.0940e-03, 2.6929e-03, -7.9512e-04,\n 7.4613e-04, -3.6072e-04, -2.4193e-04, -7.2536e-05, 5.6052e-45,\n 2.5617e-03, -4.0028e-03, 7.1800e-04, 5.6052e-45, -2.7940e-03,\n 1.3879e-03, -5.3645e-03, -1.9681e-03, 2.3033e-04, -2.1055e-04,\n -2.3001e-03, 1.5731e-04, -4.4659e-03, 3.5292e-03, -2.4825e-03,\n -5.9628e-04, 1.6698e-03, 5.6052e-45, -1.1510e-03, -1.0174e-04,\n 5.6052e-45, 9.2394e-05, -9.2492e-04, -1.2869e-04, 7.0905e-04,\n 4.5798e-08, 1.8736e-04, -7.6005e-05, 4.3942e-04, 5.6052e-45,\n 2.7952e-04, 3.7757e-04, 5.6052e-45, 3.8123e-04, 5.3931e-05,\n 5.0965e-04, 1.6549e-03, 5.6052e-45, 2.1060e-03, -4.1522e-05,\n 3.7485e-03, 1.2029e-04, -4.6988e-04, -3.2823e-05, -1.5356e-03,\n 3.4967e-04, 5.6052e-45, -5.2579e-04, 5.6052e-45, 1.6180e-03,\n 8.4434e-04, -4.9545e-04, 1.7854e-03, 2.0302e-03, 2.8658e-03,\n 4.2917e-03, -3.1647e-04, -1.3547e-05, -1.8413e-03, -3.0740e-04,\n 1.9323e-37, -8.3822e-04, -1.3524e-04, -1.4803e-03, 1.4194e-04,\n 2.0769e-04, -1.1327e-03, 4.8733e-04, 6.5384e-04, -1.3887e-03,\n 1.6279e-03, 5.6052e-45, 5.1626e-04, -1.5877e-03, -5.7097e-04,\n 2.4501e-03, -6.7106e-04, 7.8480e-04, -7.5487e-04, 5.3939e-04,\n -2.7195e-03, -1.4107e-03, -5.9300e-04, -4.2834e-03, 5.6052e-45,\n 5.6052e-45, -7.5068e-04, -6.3335e-03, -9.7074e-05, 1.1434e-03,\n 5.2953e-04, 2.5766e-05, -1.9734e-04, 2.5388e-03, -2.8762e-03,\n -1.9364e-04, -2.7211e-03, 1.4126e-03, 2.6122e-04, -1.1918e-03,\n 2.9070e-04, -2.0184e-03, -1.2838e-03, -2.9494e-05, 5.6052e-45,\n -1.4761e-03, -2.0301e-03, 5.6052e-45, 5.6052e-45, -7.1633e-04,\n -3.4456e-03, -1.5447e-03, 9.4841e-04, 3.8709e-04, 6.8828e-06,\n 2.0253e-03, 4.5288e-04, 2.9476e-03, -8.0906e-05, -9.8687e-05,\n 3.9675e-04, 2.5121e-03, 8.8842e-04, 5.3956e-05, -5.2579e-04,\n -2.4194e-03, -2.5995e-03, -2.7839e-04, 2.0765e-04, 2.2041e-03,\n -1.0488e-03, -8.5193e-04, 1.0936e-03, -2.3788e-04, 3.6597e-04,\n -7.3021e-04, 4.5324e-04, 5.6987e-04, 3.6895e-03, 1.2378e-03,\n 2.7245e-03, -7.9741e-04, 1.0225e-03, 1.9997e-04, 1.8216e-04,\n -5.6052e-45, 5.6052e-45, -1.7986e-03, 3.7832e-04, 9.0102e-04,\n 2.8945e-04, 5.6052e-45, -1.4635e-04, -1.5616e-03, -2.1742e-03,\n -1.7092e-04, 2.0810e-04, 6.1069e-04, 2.3253e-03, -3.7953e-03,\n -2.9309e-04, 7.9889e-04, 8.2072e-04, -3.5315e-03, 7.0650e-04,\n 2.3199e-04, -1.5255e-04, -3.0804e-03, 3.8992e-03, -3.2597e-03,\n 6.2782e-03, 4.7445e-04, -7.1991e-04, -2.8219e-03, -5.2184e-03,\n 5.4292e-04, -1.7206e-03, 1.5001e-03, -3.7600e-03, -3.9258e-03,\n 2.1162e-03, -1.5392e-03, -3.8298e-03, 5.8741e-04, 1.2812e-03,\n 1.4966e-03, -2.1805e-04, 4.4091e-04, -3.9576e-04, -8.4735e-10,\n 5.4539e-08, 2.2878e-03, 8.9630e-04, 1.0187e-03, 2.0335e-03,\n -2.6973e-03, -4.0402e-03, 6.2057e-04, 8.3841e-04, -5.2095e-04,\n -1.0639e-03, -3.3994e-39, -5.1481e-04, -2.1795e-03, -2.6319e-04,\n 6.3650e-04, 2.4263e-04, -2.0230e-03, -6.0368e-14, -8.9237e-04],\n device='cuda:0')", + "exp_avg_sq": "tensor([1.3311e-05, 6.0514e-05, 2.5175e-05, 1.9146e-05, 3.9522e-06, 5.0957e-06,\n 3.5278e-05, 3.3902e-05, 3.4233e-05, 3.4695e-05, 3.0516e-05, 6.2106e-05,\n 4.3085e-08, 2.9803e-05, 3.7183e-05, 3.6746e-05, 5.5893e-05, 3.8376e-05,\n 2.1617e-05, 1.1218e-08, 3.4465e-05, 2.2396e-05, 2.3250e-05, 3.4491e-05,\n 4.0947e-08, 4.7140e-05, 3.3724e-05, 4.7018e-05, 4.1878e-05, 4.0964e-05,\n 4.4174e-05, 3.7067e-05, 8.2439e-08, 7.2318e-09, 3.7203e-05, 3.0538e-05,\n 3.8977e-05, 2.1839e-05, 2.3140e-05, 3.7936e-05, 3.7581e-05, 2.1455e-05,\n 3.4589e-05, 3.2321e-05, 2.9276e-05, 4.0218e-05, 3.2253e-05, 5.6126e-05,\n 6.1230e-05, 2.7371e-05, 3.9125e-05, 4.2957e-09, 6.9704e-09, 6.5234e-09,\n 4.7881e-05, 7.3133e-05, 2.9743e-05, 2.7767e-05, 1.0309e-07, 4.5017e-05,\n 3.3996e-05, 3.2703e-05, 6.4470e-05, 1.8737e-05, 3.8428e-05, 3.9311e-05,\n 3.6721e-05, 7.9843e-09, 5.2418e-05, 3.5935e-05, 1.1966e-05, 3.2615e-08,\n 4.1343e-05, 2.9839e-05, 1.2738e-05, 4.1247e-05, 4.5111e-05, 4.7932e-05,\n 2.6883e-05, 4.5188e-05, 7.1613e-05, 3.1745e-05, 4.3055e-05, 2.8623e-05,\n 3.4108e-05, 3.4707e-05, 3.4663e-05, 2.0292e-05, 3.8552e-05, 3.8108e-05,\n 5.0915e-08, 4.5768e-05, 3.0824e-05, 2.5799e-05, 5.6976e-05, 1.4823e-05,\n 3.9359e-05, 3.3999e-05, 3.1020e-05, 2.4178e-05, 2.4476e-05, 1.0092e-05,\n 4.7268e-05, 5.1863e-05, 8.2968e-05, 2.5773e-05, 3.1980e-05, 5.1106e-05,\n 1.4126e-05, 2.1015e-05, 5.1987e-05, 5.2175e-05, 3.4424e-09, 2.5601e-05,\n 8.7473e-06, 2.5030e-05, 3.5607e-05, 6.0366e-05, 1.5349e-09, 2.9979e-08,\n 5.5832e-09, 3.3000e-05, 4.4458e-09, 6.6693e-05, 3.4208e-05, 4.5696e-05,\n 4.5871e-05, 8.3632e-10, 1.6641e-05, 4.3601e-05, 3.2062e-05, 3.6073e-05,\n 4.0439e-05, 1.4920e-09, 4.0968e-05, 3.5414e-05, 2.3472e-05, 2.9006e-05,\n 3.0424e-05, 2.8554e-05, 6.2339e-05, 3.9619e-05, 4.7221e-05, 3.4816e-05,\n 4.0517e-05, 4.1525e-05, 6.5837e-08, 4.0609e-05, 3.5331e-05, 3.2016e-05,\n 5.6504e-08, 2.1272e-05, 3.0968e-05, 3.0297e-08, 2.9838e-05, 4.3890e-05,\n 6.0013e-08, 5.1566e-05, 3.0873e-05, 3.7370e-05, 2.6625e-05, 3.4743e-05,\n 9.3730e-05, 3.8688e-05, 3.9094e-05, 2.8193e-05, 6.0957e-05, 2.1070e-05,\n 3.7245e-05, 7.1297e-09, 1.0987e-08, 3.4593e-05, 5.3563e-05, 2.8054e-05,\n 3.3369e-05, 4.8340e-05, 1.6512e-05, 4.0573e-05, 1.9940e-09, 1.9736e-05,\n 3.7234e-05, 4.8279e-05, 8.4764e-06, 1.4672e-08, 3.7023e-05, 3.7642e-05,\n 5.2603e-05, 2.3431e-05, 3.1872e-05, 4.2605e-09, 4.8814e-08, 3.3933e-05,\n 1.9507e-05, 4.3989e-05, 3.1003e-05, 3.6994e-05, 2.2514e-05, 4.2640e-05,\n 7.6941e-09, 3.2019e-05, 2.6379e-05, 1.6421e-05, 5.7783e-05, 1.9437e-05,\n 7.3734e-05, 3.7950e-10, 2.9028e-05, 3.5288e-05, 2.7534e-05, 3.5252e-05,\n 1.7869e-05, 3.9220e-05, 4.5721e-05, 3.7415e-05, 2.4657e-05, 2.9123e-05,\n 2.4406e-05, 4.4917e-05, 3.8253e-05, 2.3384e-05, 2.2344e-05, 4.0144e-05,\n 2.5456e-05, 3.1103e-05, 3.4081e-05, 3.6113e-05, 4.9859e-05, 5.3670e-05,\n 2.3129e-05, 4.6888e-05, 4.0657e-05, 7.0371e-05, 4.0257e-05, 9.4332e-09,\n 4.1213e-05, 1.8357e-05, 3.0556e-05, 2.2143e-05, 7.4990e-05, 3.8493e-05,\n 2.8854e-05, 3.9848e-05, 3.2491e-05, 3.2045e-05, 5.0824e-05, 2.2425e-05,\n 2.0981e-05, 4.5693e-05, 2.3878e-05, 2.8133e-05, 3.6585e-05, 3.1763e-05,\n 2.5954e-05, 3.0128e-05, 3.7588e-05, 2.9525e-05, 4.0645e-08, 2.7552e-05,\n 2.1957e-05, 1.2320e-05, 3.4997e-05, 5.9168e-05, 4.2474e-05, 3.6013e-05,\n 3.3367e-05, 3.2273e-05, 3.1601e-05, 6.0249e-09, 4.8408e-05, 3.7184e-05,\n 2.3909e-05, 5.3186e-05, 2.9116e-05, 2.2981e-05, 4.6294e-05, 1.0514e-08,\n 3.6242e-05, 2.9711e-08, 2.3919e-05, 1.4452e-05, 3.4708e-05, 2.7793e-05,\n 5.1990e-05, 4.0867e-05, 3.6544e-05, 4.3248e-05, 2.1820e-06, 5.2025e-05,\n 4.7962e-05, 2.4138e-05, 8.4048e-06, 3.5386e-05, 4.6387e-05, 3.7644e-05,\n 3.3176e-05, 5.5716e-05, 6.0354e-05, 3.7498e-05, 3.2842e-05, 7.3572e-05,\n 4.3849e-05, 8.3857e-06, 6.7912e-06, 2.9934e-05, 3.5644e-05, 1.0878e-05,\n 4.0054e-05, 1.2506e-09, 2.4638e-08, 3.6757e-05, 5.2168e-05, 2.2669e-05,\n 3.2675e-05, 4.3797e-05, 4.5221e-05, 3.2376e-05, 4.0366e-05, 2.6117e-05,\n 2.7149e-05, 4.7122e-05, 4.9004e-05, 5.7672e-05, 4.9505e-05, 2.2842e-05,\n 1.4050e-05, 3.4761e-05, 3.2627e-05, 1.0293e-05, 2.7988e-05, 4.0666e-05,\n 9.9559e-06, 2.3909e-05, 4.1833e-05, 4.9941e-05, 2.9393e-12, 3.5011e-05,\n 2.3016e-05, 5.2086e-05, 2.3590e-05, 3.3328e-05, 5.8258e-06, 3.7053e-05,\n 2.7632e-05, 3.0490e-05, 3.1777e-05, 4.7832e-05, 4.3014e-08, 9.9737e-06,\n 3.9429e-05, 3.9532e-05, 2.9914e-05, 3.9563e-05, 2.8559e-05, 3.0181e-05,\n 5.6046e-05, 3.5828e-05, 3.7762e-05, 3.1532e-05, 4.3956e-05, 8.2534e-06,\n 1.8275e-05, 2.1712e-05, 4.7149e-05, 5.6648e-05, 4.3619e-05, 8.8223e-05,\n 1.4317e-05, 4.4178e-05, 5.2889e-05, 3.8753e-05, 4.6259e-05, 2.4645e-05,\n 2.8898e-05, 9.9445e-06, 3.2438e-05, 5.5087e-05, 6.8958e-05, 1.1871e-05,\n 1.8726e-08, 3.7359e-05, 2.4413e-05, 1.0217e-07, 2.8345e-05, 6.0592e-05,\n 3.5349e-05, 2.8253e-05, 3.4053e-05, 4.2070e-05, 2.2226e-05, 2.1119e-05,\n 5.2166e-05, 4.3497e-05, 2.8234e-05, 1.0155e-08, 3.3695e-08, 3.9174e-05,\n 3.7581e-05, 3.4287e-05, 5.7549e-05, 2.9426e-05, 3.6184e-05, 2.3307e-05,\n 1.4121e-05, 2.6148e-05, 3.2520e-05, 5.3005e-05, 2.4563e-05, 1.2409e-08,\n 1.1708e-05, 9.9340e-06, 3.5747e-09, 1.1808e-05, 6.2365e-09, 4.6425e-05,\n 4.2321e-05, 1.5619e-05, 3.1568e-05, 1.6837e-08, 1.8509e-05, 4.2679e-05,\n 2.6337e-05, 3.9077e-05, 4.6447e-05, 4.8305e-05, 3.2076e-05, 3.9195e-06,\n 3.1133e-05, 6.6710e-06, 2.0082e-05, 8.4633e-09, 2.3589e-05, 4.4472e-05,\n 3.3544e-05, 1.0815e-08, 2.2287e-05, 3.0834e-05, 4.8556e-05, 3.9168e-05,\n 3.3910e-05, 2.5545e-05, 4.6118e-05, 1.7317e-05, 3.7420e-05, 5.2016e-05,\n 2.8716e-05, 8.2903e-06, 3.4994e-05, 3.6504e-09, 2.9451e-05, 2.3958e-05,\n 1.2111e-07, 2.2642e-05, 2.8133e-05, 3.7288e-06, 3.0414e-05, 7.8381e-09,\n 2.0593e-05, 3.9261e-06, 3.1326e-05, 1.6350e-07, 3.0301e-05, 5.4665e-05,\n 1.0630e-09, 2.7470e-05, 6.1610e-06, 3.7335e-05, 2.5991e-05, 8.6819e-09,\n 4.5253e-05, 2.2189e-05, 6.1497e-05, 4.5398e-05, 2.5775e-05, 1.7709e-05,\n 2.2717e-05, 3.0640e-05, 6.0346e-08, 4.4240e-05, 1.9160e-08, 9.2665e-06,\n 7.2145e-06, 8.2439e-06, 3.6468e-05, 3.7881e-05, 6.7687e-05, 2.4483e-05,\n 4.3888e-05, 2.8398e-05, 4.2333e-05, 3.1223e-05, 8.0390e-09, 3.6482e-05,\n 3.5413e-05, 6.6510e-05, 6.6403e-06, 2.0803e-05, 2.7950e-05, 2.1849e-05,\n 3.8547e-05, 1.7279e-05, 3.1985e-05, 5.9652e-08, 1.9927e-05, 4.6623e-05,\n 3.2985e-05, 5.9695e-05, 3.7871e-05, 3.1045e-05, 4.2322e-05, 3.6158e-05,\n 3.5571e-05, 7.4985e-05, 2.9321e-05, 3.8378e-05, 4.4773e-08, 4.9414e-08,\n 4.3266e-05, 8.0830e-05, 4.8934e-05, 2.9087e-05, 3.6011e-05, 3.4274e-05,\n 3.1817e-05, 4.7425e-05, 3.2986e-05, 6.2725e-06, 3.1714e-05, 3.8764e-05,\n 3.0417e-05, 2.6466e-05, 7.6673e-07, 2.8387e-05, 3.7635e-05, 2.5055e-05,\n 4.4984e-08, 2.7384e-05, 3.6660e-05, 3.6440e-09, 1.0964e-08, 3.4977e-05,\n 4.1029e-05, 4.4366e-05, 2.2350e-05, 4.1401e-05, 1.9588e-05, 3.9129e-05,\n 3.1102e-05, 1.9863e-05, 2.2185e-05, 3.7681e-05, 3.7233e-05, 2.8558e-05,\n 4.7910e-05, 4.8012e-05, 4.0460e-05, 2.4428e-05, 3.2610e-05, 1.0697e-04,\n 2.8344e-05, 3.2518e-05, 1.8103e-05, 3.1356e-05, 3.8430e-05, 6.8956e-05,\n 6.5067e-06, 2.7966e-05, 4.8813e-05, 2.2977e-05, 4.3701e-05, 2.9500e-05,\n 4.1277e-05, 2.8475e-05, 3.4002e-05, 4.7319e-05, 5.5738e-06, 5.7155e-09,\n 6.5416e-08, 2.9182e-05, 3.1456e-05, 3.8036e-05, 3.2897e-05, 1.2451e-07,\n 3.0031e-05, 3.4883e-05, 2.0542e-05, 3.6318e-05, 2.6387e-05, 2.4917e-05,\n 4.9457e-05, 4.2343e-05, 3.3598e-05, 5.8827e-05, 4.3868e-05, 5.4305e-05,\n 3.0545e-05, 4.1284e-05, 3.5085e-05, 4.0758e-05, 4.8390e-05, 6.1708e-05,\n 4.9915e-05, 2.1822e-05, 3.0854e-05, 3.5410e-05, 3.8892e-05, 2.5923e-05,\n 2.9530e-05, 3.5008e-05, 6.7068e-05, 5.9785e-05, 4.2225e-05, 4.4093e-05,\n 3.9467e-05, 1.6882e-05, 2.0927e-05, 3.6647e-05, 3.2884e-05, 3.6112e-05,\n 1.8672e-05, 3.0325e-09, 8.1847e-08, 6.6168e-06, 3.8777e-05, 3.4095e-05,\n 3.4811e-05, 3.6944e-05, 3.4062e-05, 2.6564e-05, 2.5683e-05, 5.3070e-05,\n 2.5196e-05, 2.0969e-08, 5.9801e-06, 4.5888e-05, 3.0643e-05, 5.4534e-05,\n 3.7676e-05, 3.2464e-05, 1.9150e-08, 2.4995e-05], device='cuda:0')" }, "2": { + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 7.9436e-06, -7.0702e-05, -4.0870e-07, ..., -3.0017e-05,\n -3.6760e-16, -5.4073e-05],\n [ 2.8054e-06, 2.0132e-05, -5.1456e-05, ..., 4.6699e-06,\n -7.4626e-15, 1.6501e-05],\n [ 9.1971e-06, -1.0070e-04, -9.4052e-05, ..., 3.1487e-05,\n -1.8700e-15, 3.3967e-05],\n ...,\n [-4.3714e-06, -3.6980e-05, -4.2132e-05, ..., -1.0561e-04,\n -6.8541e-16, 4.5185e-05],\n [-1.2728e-05, -4.1654e-05, 2.3512e-05, ..., -9.5335e-05,\n 6.1973e-15, 9.6555e-05],\n [-3.3969e-06, -6.9386e-05, 9.9327e-05, ..., 4.8945e-06,\n 7.7783e-15, 6.3467e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3648e-09, 5.9209e-09, 6.1990e-09, ..., 6.3399e-09, 1.5939e-11,\n 5.4520e-09],\n [2.9963e-09, 1.6589e-08, 1.0601e-08, ..., 1.3126e-08, 1.0125e-11,\n 8.2170e-09],\n [1.7926e-09, 1.1513e-08, 1.2759e-08, ..., 8.8502e-09, 2.4057e-11,\n 1.4364e-08],\n ...,\n [3.0310e-09, 1.1686e-08, 1.3834e-08, ..., 1.2641e-08, 2.1579e-11,\n 1.6039e-08],\n [3.1418e-09, 1.1282e-08, 1.2126e-08, ..., 1.2456e-08, 1.8113e-11,\n 3.8286e-08],\n [3.1862e-09, 1.1905e-08, 1.4944e-08, ..., 1.7467e-08, 3.6336e-11,\n 8.1483e-09]], device='cuda:0')" + }, + "3": { + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-3.5884e-06, -9.1305e-07, -3.8677e-06, ..., 5.6785e-08,\n -4.7949e-06, 3.2131e-06],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-1.6671e-05, 2.1392e-05, 3.0510e-06, ..., -7.4079e-06,\n -1.3666e-05, -2.0398e-05],\n [-3.5488e-07, 6.6073e-06, 3.5814e-06, ..., 6.1645e-06,\n -1.3975e-06, 1.5459e-06],\n [-1.5385e-10, 6.2621e-11, 1.7855e-11, ..., -4.4535e-11,\n 3.1088e-11, 5.2681e-11]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.8987e-12, 4.9801e-12, 4.0329e-14, ..., 5.2389e-18, 1.8822e-13,\n 1.6074e-13],\n [8.2366e-10, 3.1522e-09, 2.5344e-10, ..., 2.2229e-10, 2.8496e-10,\n 1.9323e-10],\n [3.2335e-13, 4.8761e-13, 1.7999e-13, ..., 3.1182e-14, 4.4236e-14,\n 1.3357e-13],\n ...,\n [4.0422e-09, 5.9532e-09, 8.4945e-10, ..., 1.0579e-09, 1.9066e-09,\n 1.7891e-09],\n [4.5963e-09, 3.2513e-09, 9.1882e-10, ..., 1.3384e-09, 9.1669e-10,\n 8.4945e-10],\n [1.3125e-12, 1.1983e-12, 6.5030e-13, ..., 4.3585e-14, 1.5564e-13,\n 1.8954e-13]], device='cuda:0')" + }, + "4": { + "step": "tensor(6260.)", + "exp_avg": "tensor([ 5.6052e-45, 5.0984e-05, 5.6052e-45, ..., -3.8832e-04,\n 1.5855e-04, -6.5029e-10], device='cuda:0')", + "exp_avg_sq": "tensor([8.6871e-10, 4.7850e-07, 1.6634e-10, ..., 1.5138e-06, 1.2812e-06,\n 1.4478e-10], device='cuda:0')" + }, + "5": { + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 5.6052e-45, -1.0455e-06, -5.6052e-45, ..., 4.6188e-07,\n 3.0311e-06, -1.9946e-11],\n [ 5.6052e-45, 1.4908e-06, -5.6052e-45, ..., -2.0474e-06,\n -1.1939e-06, 3.8515e-11],\n [ 5.6052e-45, 9.1025e-07, 5.6052e-45, ..., 1.0417e-06,\n 3.8167e-06, 1.4708e-10],\n ...,\n [-5.6052e-45, -2.0839e-06, 5.6052e-45, ..., -5.8479e-06,\n 2.3616e-06, -2.1579e-10],\n [ 5.6052e-45, 1.5614e-06, 5.6052e-45, ..., -3.0806e-06,\n -1.6603e-06, 6.2062e-11],\n [-5.6052e-45, -6.4165e-07, 5.6052e-45, ..., 6.4397e-06,\n -2.3729e-06, -1.1015e-10]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6938e-14, 2.0620e-11, 2.8350e-14, ..., 1.4765e-10, 1.0219e-10,\n 1.0152e-12],\n [8.3953e-14, 7.7299e-11, 7.5424e-14, ..., 1.1041e-10, 8.8559e-11,\n 1.9811e-12],\n [5.6070e-14, 4.3398e-11, 4.6499e-13, ..., 2.5987e-10, 1.1993e-10,\n 1.2245e-12],\n ...,\n [1.3614e-14, 2.9772e-11, 1.1203e-12, ..., 3.1999e-10, 1.2742e-10,\n 1.8369e-12],\n [2.8116e-13, 2.3667e-10, 1.0623e-13, ..., 3.8768e-10, 2.0683e-10,\n 2.1229e-12],\n [8.6972e-15, 4.2253e-11, 1.0766e-13, ..., 4.0483e-10, 1.4063e-10,\n 1.0507e-12]], device='cuda:0')" + }, + "15": { + "step": "tensor(7512.)", + "exp_avg": "tensor([5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.7489e-08], device='cuda:0')" + }, + "16": { + "step": "tensor(7512.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.7380e-11, 5.5852e-10, 2.0511e-10], device='cuda:0')" + }, + "17": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([6.9654e-07, 4.9185e-08, 2.8866e-08, 5.5230e-08, 4.3453e-08],\n device='cuda:0')" + }, + "19": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.4350e-16, 2.6146e-15, 4.6595e-16, ..., 1.0540e-16, 1.2480e-15,\n 6.7286e-16],\n [2.9315e-13, 2.9452e-13, 2.2539e-16, ..., 5.4214e-14, 1.0842e-14,\n 2.3101e-14],\n [3.0571e-12, 3.5002e-12, 3.4789e-16, ..., 2.8224e-13, 3.8197e-13,\n 1.1594e-13],\n ...,\n [9.1844e-14, 4.1404e-14, 7.9308e-15, ..., 2.6403e-15, 2.4514e-14,\n 3.0158e-15],\n [1.1618e-14, 9.4831e-15, 2.4190e-16, ..., 2.1658e-16, 1.2028e-15,\n 1.3929e-16],\n [1.0500e-11, 1.2296e-11, 1.8746e-15, ..., 9.4818e-13, 1.3476e-12,\n 4.6023e-13]], device='cuda:0')" + }, + "20": { + "step": "tensor(7512.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.1971e-13, 2.7310e-10, 1.8999e-09, 4.7905e-11, 3.2654e-10, 1.2688e-09,\n 1.4354e-11, 1.9504e-09, 1.5238e-10, 3.5410e-12, 8.7977e-09, 2.8034e-11,\n 8.8773e-12, 2.0151e-10, 2.0828e-10, 1.6515e-09, 5.5735e-10, 5.5481e-11,\n 2.7622e-09, 8.2963e-10, 8.3076e-10, 7.9879e-11, 2.8460e-10, 5.7972e-10,\n 3.4574e-10, 5.6182e-11, 2.5658e-11, 3.8339e-09, 2.5764e-12, 8.1058e-11,\n 1.5620e-10, 7.6649e-10, 1.2699e-09, 4.8040e-11, 6.7370e-11, 1.8200e-10,\n 3.5697e-12, 2.3038e-10, 1.3874e-09, 7.3105e-12, 2.9495e-11, 9.3818e-11,\n 2.5160e-09, 4.3033e-10, 2.9746e-11, 2.3302e-12, 1.0327e-11, 8.2563e-11,\n 2.0275e-11, 4.5031e-13, 1.5709e-12, 1.8234e-11, 9.7473e-11, 5.2583e-10,\n 6.8880e-12, 7.8039e-10, 1.3737e-09, 6.1749e-09, 7.0297e-10, 1.3201e-12,\n 1.0073e-08, 3.4111e-13, 3.3767e-11, 5.2163e-10, 1.5291e-09, 2.2444e-09,\n 3.6545e-11, 3.8591e-11, 8.1084e-09, 7.5805e-11, 1.1711e-09, 3.2184e-10,\n 6.0146e-10, 1.0472e-10, 1.5824e-12, 1.5076e-10, 1.0911e-09, 1.7760e-11,\n 1.5280e-14, 6.1408e-13, 1.6099e-12, 2.3977e-11, 2.5306e-09, 1.0658e-09,\n 1.6490e-10, 3.9777e-11, 8.6233e-10, 1.4782e-10, 2.2845e-10, 5.7067e-09,\n 3.2778e-11, 2.1361e-10, 3.9370e-11, 1.6332e-09, 2.5639e-10, 4.6370e-11,\n 2.1562e-13, 6.4089e-11, 2.0704e-09, 4.1125e-10, 1.1204e-12, 1.5205e-09,\n 1.2253e-11, 7.6356e-13, 6.4716e-10, 1.1431e-10, 2.1001e-12, 2.7617e-11,\n 8.5884e-11, 1.1252e-10, 1.0764e-12, 2.3101e-11, 9.1109e-12, 2.5972e-12,\n 1.3145e-10, 5.5602e-11, 1.6260e-09, 4.4988e-11, 9.8331e-10, 4.3057e-10,\n 2.0718e-11, 3.6960e-10, 5.5879e-10, 4.3335e-11, 7.3401e-10, 8.3650e-11,\n 3.2527e-09, 5.5443e-11, 1.2837e-10, 8.8642e-12, 3.7885e-12, 5.9399e-11,\n 2.0888e-11, 2.7585e-10, 5.4615e-11, 9.0778e-10, 3.2846e-12, 2.1059e-11,\n 5.7360e-10, 8.1138e-11, 1.7221e-12, 4.7085e-12, 8.4758e-09, 2.1599e-08,\n 5.2557e-10, 2.3014e-12, 6.9109e-10, 5.8779e-10, 4.5614e-10, 1.2141e-11,\n 2.2854e-12, 4.1033e-10, 1.4975e-09, 4.3667e-10, 7.3830e-10, 2.2142e-10,\n 4.2501e-12, 2.1268e-11, 3.2980e-11, 2.2619e-09, 1.1565e-11, 3.3895e-10,\n 4.2506e-10, 1.0794e-11, 1.0951e-11, 1.5443e-08, 1.4200e-09, 3.4820e-12,\n 4.3896e-09, 3.1912e-13, 4.1145e-10, 2.2175e-11, 5.4550e-10, 1.1775e-10,\n 2.1881e-10, 3.0291e-09, 5.1125e-10, 2.3989e-10, 5.2214e-11, 1.3652e-09,\n 2.1015e-10, 5.0436e-10, 1.2755e-08, 1.6288e-10, 8.2063e-12, 1.2806e-13,\n 2.1225e-11, 1.9606e-09, 1.0138e-10, 4.2852e-10, 2.1502e-09, 8.1954e-10,\n 1.7175e-11, 9.7606e-12, 1.5773e-09, 2.7437e-10, 2.8342e-12, 6.1421e-10,\n 4.5293e-12, 5.2524e-10, 1.1608e-09, 6.5123e-11, 2.8684e-10, 8.1057e-12,\n 1.3483e-10, 4.1554e-09, 6.4442e-11, 4.7131e-10, 1.4561e-12, 3.5684e-09,\n 6.7637e-10, 4.1597e-12, 3.2563e-11, 2.6782e-12, 2.5085e-09, 2.5211e-11,\n 1.7876e-10, 1.0613e-09, 5.5078e-11, 4.6975e-10, 3.7786e-10, 2.9239e-10,\n 1.6947e-11, 9.5624e-10, 7.2313e-11, 3.3501e-10, 3.8996e-09, 3.5022e-11,\n 1.5093e-12, 1.2272e-11, 1.4807e-10, 2.1563e-10, 8.4423e-12, 2.8042e-10,\n 1.1712e-10, 1.3456e-09, 1.6241e-10, 9.3713e-10, 9.9242e-11, 1.6515e-11,\n 5.9538e-11, 1.0006e-11, 2.7094e-09, 1.7939e-10, 2.5036e-11, 5.3343e-10,\n 1.9418e-11, 5.6034e-11, 1.7624e-09, 4.8134e-11, 6.0896e-10, 3.4755e-12,\n 3.9028e-10, 2.6560e-11, 3.3603e-12, 6.4563e-09], device='cuda:0')" + }, + "21": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.1558e-14, 3.1274e-14, 4.3496e-12, 2.7149e-14, 1.3358e-13, 1.5286e-13,\n 7.2455e-14, 5.0697e-13, 3.2609e-15, 1.7491e-14, 5.4311e-12, 1.0748e-14,\n 1.5837e-15, 7.4518e-14, 4.0808e-13, 1.3990e-12, 6.0184e-13, 4.3143e-15,\n 1.0256e-11, 5.1453e-12, 4.6364e-12, 6.0957e-15, 9.8176e-15, 3.5523e-13,\n 2.9331e-12, 4.1884e-14, 6.6849e-14, 3.6982e-12, 1.2144e-15, 9.2595e-14,\n 3.1455e-14, 6.0275e-13, 4.4148e-12, 1.1832e-14, 5.5675e-14, 1.3679e-12,\n 1.9604e-14, 1.1064e-13, 2.7151e-12, 1.6635e-16, 1.3251e-14, 5.7481e-15,\n 2.7958e-12, 1.5442e-13, 2.3827e-14, 1.9481e-15, 1.3955e-14, 1.1436e-14,\n 6.6712e-15, 2.0446e-14, 2.0678e-15, 2.2688e-14, 1.5452e-13, 1.1979e-13,\n 1.0280e-16, 1.3967e-13, 7.7839e-14, 4.7069e-12, 3.4552e-13, 3.8048e-15,\n 2.1970e-11, 1.4017e-16, 9.8106e-16, 6.4454e-14, 2.9878e-12, 1.8227e-12,\n 2.4449e-14, 3.9438e-14, 2.8616e-11, 1.0708e-14, 2.1328e-12, 6.7447e-15,\n 8.3213e-14, 5.9212e-13, 6.5047e-15, 4.9697e-14, 1.1113e-12, 8.7703e-14,\n 1.8759e-14, 1.6658e-16, 8.6707e-15, 5.6341e-14, 6.2831e-13, 3.1261e-12,\n 1.5491e-13, 2.1603e-13, 1.1649e-13, 9.0007e-15, 5.6271e-14, 1.5560e-11,\n 7.5902e-14, 6.7100e-14, 1.9318e-14, 1.9746e-12, 3.5784e-14, 1.2956e-13,\n 3.9358e-16, 1.7852e-13, 3.6001e-12, 7.2796e-13, 2.5904e-15, 4.0434e-12,\n 1.9024e-15, 5.5796e-15, 1.8258e-13, 1.9980e-13, 7.8813e-15, 2.7574e-14,\n 1.9162e-14, 1.6286e-14, 2.6743e-16, 2.9760e-14, 4.9711e-14, 7.0546e-15,\n 3.2163e-14, 3.6452e-15, 4.2986e-12, 1.0577e-13, 1.3534e-12, 6.0943e-14,\n 5.4387e-14, 3.6431e-13, 4.5991e-13, 3.0514e-14, 1.4986e-12, 6.7035e-16,\n 3.3736e-12, 8.2120e-16, 3.8715e-14, 3.1058e-15, 2.9077e-15, 3.2519e-15,\n 4.4441e-15, 1.0963e-13, 1.1414e-14, 2.3423e-12, 5.3512e-15, 2.7761e-15,\n 3.4826e-13, 1.2227e-14, 5.1739e-15, 2.1453e-14, 3.7460e-11, 4.2518e-11,\n 8.1495e-14, 9.7035e-17, 7.4645e-13, 6.4047e-13, 1.0765e-12, 5.5171e-16,\n 4.2478e-15, 8.2480e-13, 1.5806e-12, 4.4899e-14, 1.3879e-13, 4.0835e-13,\n 3.2103e-17, 6.9252e-14, 1.5387e-14, 1.5584e-13, 2.0822e-15, 5.5553e-13,\n 2.1130e-14, 6.3924e-14, 2.0279e-14, 3.6125e-11, 8.3257e-13, 2.1730e-15,\n 5.7537e-12, 4.1283e-17, 4.5262e-14, 3.4447e-14, 3.6783e-12, 1.5694e-13,\n 9.6530e-14, 1.7757e-12, 1.6429e-13, 1.1730e-14, 7.5480e-14, 1.5467e-12,\n 4.0317e-15, 1.4637e-13, 3.3396e-11, 5.5348e-14, 3.0009e-15, 2.5299e-15,\n 9.6215e-16, 1.5726e-12, 2.3017e-13, 4.2726e-13, 1.3414e-12, 1.0843e-13,\n 6.6830e-15, 8.2611e-16, 9.2508e-13, 5.4818e-15, 4.4958e-16, 3.6157e-14,\n 6.5723e-15, 1.7014e-13, 3.0886e-13, 5.6523e-15, 3.0002e-14, 7.0431e-15,\n 9.0625e-15, 9.8409e-12, 6.8008e-14, 4.1218e-14, 2.2905e-14, 5.7605e-12,\n 2.1376e-13, 2.9288e-15, 2.4530e-15, 3.8049e-14, 2.4529e-13, 7.2524e-16,\n 6.3084e-15, 1.7730e-12, 2.5777e-13, 7.5052e-14, 6.9399e-13, 1.3779e-12,\n 2.1868e-15, 7.7233e-13, 3.8963e-14, 1.1199e-13, 7.8624e-13, 1.3905e-14,\n 4.4667e-16, 1.2010e-15, 1.7709e-14, 3.8202e-13, 1.2531e-15, 5.5270e-13,\n 1.3445e-14, 1.5732e-12, 1.6411e-12, 6.2072e-13, 9.9345e-14, 3.5395e-16,\n 9.2919e-15, 1.8374e-15, 1.0020e-12, 4.0462e-14, 9.4525e-15, 2.5856e-13,\n 2.2572e-15, 1.8381e-15, 9.2985e-12, 4.3747e-14, 6.4568e-14, 5.6863e-15,\n 2.8085e-13, 4.5616e-14, 6.0414e-15, 1.8059e-11], device='cuda:0')" + }, + "22": { + "step": "tensor(7512.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4275e-15, 3.8302e-13, 3.0354e-12, 4.2605e-14, 4.6756e-13, 1.7557e-12,\n 2.0806e-14, 2.6055e-12, 2.2515e-13, 3.0172e-14, 1.0639e-11, 1.4890e-13,\n 3.8263e-14, 3.5028e-13, 4.3050e-13, 1.7013e-12, 8.7670e-13, 9.9410e-14,\n 2.9299e-12, 1.5678e-12, 1.3801e-12, 1.3405e-13, 2.9497e-13, 1.1429e-12,\n 1.0998e-12, 7.9584e-14, 2.2776e-14, 4.3462e-12, 4.8518e-16, 1.5549e-13,\n 6.7401e-14, 1.0574e-12, 2.0319e-12, 1.4693e-13, 1.1840e-13, 7.9199e-13,\n 1.0291e-14, 1.5198e-13, 2.1686e-12, 1.3895e-15, 1.1642e-13, 1.4676e-13,\n 3.8194e-12, 6.4547e-13, 1.6480e-13, 1.5344e-14, 3.9886e-15, 1.6672e-14,\n 3.9290e-14, 1.6448e-15, 9.4834e-16, 3.1626e-14, 1.9735e-13, 4.7773e-13,\n 2.9619e-16, 8.4713e-13, 1.8629e-12, 8.3417e-12, 7.2274e-13, 1.7427e-14,\n 1.3506e-11, 4.7928e-15, 1.8593e-14, 6.6642e-13, 2.2351e-12, 3.1535e-12,\n 7.4189e-14, 1.1300e-13, 1.1045e-11, 1.3362e-13, 1.6778e-12, 3.3318e-13,\n 7.6552e-13, 5.4283e-13, 3.7862e-15, 1.0511e-13, 1.5985e-12, 6.3031e-14,\n 4.0911e-16, 4.0288e-15, 5.8390e-14, 1.9916e-14, 3.4421e-12, 1.9374e-12,\n 2.9321e-13, 2.8295e-13, 9.3755e-13, 2.2880e-13, 3.7169e-13, 6.7655e-12,\n 2.1964e-13, 3.4258e-13, 4.4240e-15, 2.3075e-12, 3.7569e-13, 4.8784e-14,\n 1.7700e-14, 6.4235e-14, 2.9979e-12, 7.9968e-13, 1.4828e-14, 2.2404e-12,\n 2.6107e-14, 9.4967e-15, 6.8653e-13, 4.6885e-13, 4.8614e-15, 1.8081e-13,\n 1.3510e-13, 1.8366e-13, 6.1967e-15, 8.1827e-15, 8.9683e-14, 2.5291e-15,\n 1.8179e-13, 9.6453e-14, 2.4750e-12, 3.1655e-14, 1.4244e-12, 6.2474e-13,\n 1.7316e-14, 4.7759e-13, 8.6313e-13, 9.8956e-14, 1.3558e-12, 5.8358e-15,\n 3.6726e-12, 9.8162e-15, 2.1218e-13, 1.4218e-15, 8.4654e-15, 8.0175e-15,\n 6.8889e-15, 3.4870e-13, 8.2530e-14, 1.5233e-12, 2.6407e-14, 1.2351e-14,\n 8.5325e-13, 1.2041e-13, 4.5608e-15, 5.4208e-16, 1.0550e-11, 2.7213e-11,\n 5.0196e-13, 3.6286e-15, 1.1356e-12, 9.0834e-13, 7.5082e-13, 2.4490e-14,\n 2.2628e-14, 9.6411e-13, 1.5662e-12, 6.3701e-13, 1.0486e-12, 3.4413e-13,\n 1.4188e-17, 1.6327e-14, 5.8041e-14, 2.7373e-12, 7.8943e-16, 5.4869e-13,\n 6.0815e-13, 1.8250e-14, 9.2780e-15, 2.0438e-11, 2.0320e-12, 8.6454e-15,\n 5.2369e-12, 1.2390e-15, 4.1989e-13, 1.2212e-14, 1.2253e-12, 2.3629e-13,\n 3.7086e-13, 3.5742e-12, 4.2506e-13, 2.4409e-13, 1.9718e-14, 1.9668e-12,\n 2.3738e-13, 5.3011e-13, 1.7035e-11, 1.2835e-13, 2.2508e-14, 7.5819e-16,\n 4.1439e-14, 2.1143e-12, 4.3598e-13, 6.8450e-13, 2.9710e-12, 9.2851e-13,\n 2.9952e-14, 1.9796e-14, 1.6624e-12, 3.8570e-13, 3.8663e-16, 8.6659e-13,\n 2.8981e-14, 4.5568e-13, 1.2110e-12, 1.1406e-13, 2.7845e-13, 7.4899e-14,\n 1.3205e-13, 5.7891e-12, 5.6020e-14, 6.8009e-13, 2.6489e-15, 4.8743e-12,\n 7.0593e-13, 1.4131e-17, 2.6667e-15, 6.9841e-15, 3.3817e-12, 1.0963e-14,\n 2.6892e-13, 1.5627e-12, 8.1740e-14, 6.9460e-13, 7.6256e-13, 6.3028e-13,\n 3.6591e-14, 1.1049e-12, 2.1450e-13, 3.8657e-13, 5.2098e-12, 1.1669e-13,\n 4.4831e-15, 5.3003e-15, 1.9276e-13, 3.8142e-13, 1.4108e-15, 6.6541e-13,\n 4.4952e-14, 2.0729e-12, 7.5231e-13, 1.3580e-12, 3.8863e-13, 1.1461e-16,\n 9.0231e-14, 2.2968e-14, 3.1502e-12, 3.3932e-13, 1.0119e-13, 8.0269e-13,\n 3.3347e-14, 9.4578e-14, 2.9699e-12, 9.9319e-14, 8.6888e-13, 2.0040e-15,\n 3.4816e-13, 3.6139e-14, 2.0236e-15, 9.1251e-12], device='cuda:0')" + }, + "23": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.7213e-12, 7.1275e-12, 5.1291e-15, ..., 5.0248e-13, 9.0264e-13,\n 1.6721e-13],\n [5.2789e-12, 5.9750e-12, 3.7403e-17, ..., 3.3299e-13, 6.9710e-13,\n 1.4504e-13],\n [1.5760e-12, 1.8528e-12, 3.8973e-15, ..., 1.4190e-13, 1.7466e-13,\n 5.5577e-14],\n ...,\n [1.5530e-13, 9.7487e-14, 7.5794e-16, ..., 9.6251e-15, 2.6823e-14,\n 4.0610e-15],\n [5.3181e-13, 5.6572e-13, 7.3164e-16, ..., 5.4409e-14, 6.6230e-14,\n 2.6288e-14],\n [4.7838e-13, 6.9561e-13, 6.1249e-16, ..., 6.5719e-14, 6.2203e-14,\n 3.6640e-14]], device='cuda:0')" + }, + "24": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.5798e-09, 2.8051e-09, 9.1996e-10, 5.8824e-10, 5.5940e-11, 3.7090e-09,\n 3.1326e-11, 3.3221e-09, 3.2550e-09, 2.9678e-12, 4.0447e-09, 2.7603e-10,\n 3.6815e-10, 2.5875e-11, 4.0172e-12, 2.3547e-11, 5.7035e-14, 4.1407e-11,\n 8.1205e-10, 9.6316e-10, 2.1796e-10, 4.0126e-10, 4.8206e-10, 5.0115e-12,\n 2.8879e-10, 3.0570e-10, 1.1705e-10, 7.5795e-10, 9.8299e-11, 4.1425e-12,\n 4.9445e-11, 2.7936e-10, 4.4833e-10, 5.7154e-10, 3.4587e-13, 2.2410e-13,\n 3.0066e-10, 4.1990e-12, 3.1319e-10, 2.4281e-10, 6.5004e-13, 1.5266e-09,\n 3.8865e-09, 1.0150e-12, 1.0393e-10, 3.5140e-12, 4.9918e-10, 2.9488e-12,\n 1.0932e-10, 1.0102e-09, 5.4420e-12, 8.0364e-11, 1.6479e-12, 4.7625e-10,\n 2.7028e-11, 1.1186e-11, 7.3268e-09, 6.3936e-09, 2.2177e-11, 8.0888e-13,\n 9.5575e-09, 6.2001e-11, 7.3994e-10, 2.5845e-09, 2.0560e-10, 4.4468e-09,\n 1.0409e-11, 3.9975e-11, 4.7063e-10, 7.7528e-11, 9.3610e-10, 1.7519e-10,\n 1.5711e-10, 6.0011e-11, 3.5415e-11, 1.0005e-10, 4.8805e-10, 2.2554e-13,\n 3.0899e-10, 9.2253e-11, 4.5913e-10, 1.0574e-09, 3.9641e-11, 1.7994e-12,\n 5.3416e-12, 2.7583e-10, 1.1106e-09, 6.3395e-12, 1.6949e-09, 1.0383e-10,\n 3.7915e-11, 2.7881e-12, 5.0393e-11, 9.5101e-10, 2.0975e-09, 1.1131e-09,\n 7.2289e-12, 5.4315e-12, 9.1475e-10, 7.1399e-11, 4.6919e-10, 5.3562e-12,\n 1.1904e-10, 1.1548e-12, 6.4553e-11, 2.2153e-12, 4.3323e-10, 1.3929e-13,\n 5.0945e-10, 1.3137e-10, 2.4359e-11, 2.3633e-11, 2.2639e-10, 2.4424e-12,\n 5.6288e-10, 5.2773e-10, 3.9822e-11, 5.2590e-09, 1.8029e-13, 1.9731e-09,\n 1.2034e-10, 1.7922e-11, 3.1929e-10, 2.8379e-12, 5.8114e-12, 5.4916e-12,\n 3.4029e-09, 2.0480e-10, 9.8983e-13, 1.5537e-11, 6.8496e-11, 1.0907e-10,\n 2.6015e-10, 4.0052e-10, 1.2949e-11, 3.2256e-10, 7.2004e-10, 6.4958e-11,\n 1.5619e-12, 3.4218e-11, 2.9962e-10, 7.1614e-10, 9.1154e-10, 1.7996e-08,\n 1.2637e-11, 3.3182e-13, 5.2319e-10, 6.8789e-11, 3.6444e-12, 4.7719e-11,\n 1.1701e-10, 3.1005e-11, 5.3230e-11, 3.3788e-10, 2.7302e-11, 2.6760e-11,\n 1.2809e-10, 2.9169e-09, 2.5048e-11, 9.5857e-09, 1.4836e-10, 1.5285e-11,\n 6.6391e-13, 2.1916e-09, 5.4949e-10, 1.0942e-09, 1.1012e-11, 2.0837e-10,\n 3.8242e-09, 8.6948e-12, 2.0050e-09, 5.0469e-10, 2.9686e-10, 5.2646e-11,\n 4.3166e-10, 3.0336e-09, 1.0884e-09, 4.9796e-10, 8.9592e-11, 1.3493e-09,\n 3.3625e-09, 3.2655e-10, 3.7967e-09, 3.7442e-13, 4.2886e-10, 2.3126e-13,\n 5.7709e-10, 2.8985e-11, 9.1725e-11, 1.0133e-11, 2.2908e-09, 1.4851e-12,\n 3.6582e-10, 8.2928e-12, 1.3729e-10, 1.7137e-10, 1.1094e-10, 3.3927e-09,\n 1.8535e-10, 8.3910e-12, 3.2079e-10, 1.4550e-11, 1.8489e-11, 6.9822e-11,\n 1.3106e-10, 6.2996e-10, 6.0921e-11, 5.7723e-11, 4.5230e-11, 8.4561e-12,\n 2.1166e-11, 6.5625e-10, 1.1716e-12, 2.3685e-09, 2.0415e-10, 1.0988e-10,\n 1.0327e-09, 2.2775e-10, 2.2200e-09, 6.3204e-10, 2.2623e-12, 8.8437e-10,\n 2.3149e-11, 1.1183e-12, 9.9132e-11, 2.8637e-11, 4.7154e-10, 1.8420e-09,\n 7.4331e-13, 1.6234e-11, 2.6181e-10, 1.7041e-10, 2.8488e-12, 2.2056e-11,\n 5.3296e-11, 1.0667e-09, 1.2256e-10, 7.9694e-11, 2.8771e-10, 7.4263e-11,\n 1.5735e-09, 1.4356e-10, 1.7109e-09, 5.9784e-10, 6.0741e-11, 2.7566e-11,\n 5.9970e-11, 6.0211e-10, 9.8243e-10, 1.2730e-11, 1.0361e-09, 1.0482e-11,\n 8.1712e-10, 6.2690e-11, 3.5038e-10, 4.3948e-10], device='cuda:0')" + }, + "25": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.3630e-12, 2.8751e-12, 5.7237e-12, 9.8333e-15, 8.0253e-14, 9.2245e-12,\n 9.2657e-14, 3.5211e-12, 1.5217e-12, 5.1346e-15, 1.2037e-12, 3.9580e-12,\n 4.5177e-13, 1.7265e-15, 4.0378e-15, 3.7430e-14, 1.1459e-14, 2.9194e-15,\n 2.7909e-13, 3.0058e-12, 1.7822e-12, 4.7187e-14, 4.2550e-14, 5.8340e-14,\n 2.8347e-12, 1.0247e-13, 5.4987e-14, 1.0064e-13, 7.9408e-14, 6.4397e-15,\n 1.5866e-15, 3.0540e-13, 5.7851e-14, 8.3333e-13, 1.8191e-15, 2.7940e-15,\n 1.1792e-13, 2.6097e-17, 1.7993e-13, 1.5166e-13, 2.1397e-15, 7.2257e-12,\n 2.0516e-11, 8.0983e-15, 8.4493e-13, 5.6569e-16, 2.1478e-13, 1.0956e-16,\n 2.8793e-14, 2.4266e-13, 9.5017e-16, 7.5695e-15, 2.0860e-16, 5.0062e-13,\n 1.9909e-14, 6.7562e-15, 3.5291e-12, 7.9857e-12, 9.7480e-15, 1.9735e-15,\n 1.3204e-11, 1.4039e-14, 2.2347e-12, 1.2571e-12, 7.3921e-14, 6.6013e-12,\n 8.6597e-15, 5.2959e-14, 6.9204e-15, 2.2384e-14, 2.7411e-13, 1.8838e-14,\n 2.4797e-14, 5.7348e-14, 3.7779e-14, 1.2471e-14, 1.7622e-13, 5.5635e-15,\n 8.5386e-14, 1.5999e-14, 1.2546e-12, 1.6911e-13, 1.1086e-13, 2.9166e-14,\n 5.2830e-15, 3.5656e-12, 5.3158e-13, 3.3266e-15, 2.3188e-12, 1.2823e-14,\n 7.3419e-15, 2.9968e-15, 1.3834e-15, 4.7442e-13, 6.0642e-12, 2.8107e-13,\n 6.2623e-15, 4.5047e-15, 5.1770e-13, 4.5918e-14, 7.5878e-13, 6.6969e-15,\n 5.2539e-14, 5.6264e-15, 2.5824e-15, 5.2767e-16, 3.9477e-13, 1.4905e-15,\n 1.3846e-13, 1.4772e-14, 1.4706e-15, 3.0443e-13, 6.1048e-13, 2.2574e-15,\n 4.0082e-13, 1.5991e-12, 3.9467e-15, 1.2166e-11, 1.9646e-16, 3.0908e-12,\n 4.6256e-15, 1.9162e-15, 1.4113e-13, 4.6601e-17, 5.3926e-15, 2.5353e-16,\n 4.2871e-12, 5.9435e-14, 3.6524e-18, 1.9072e-15, 3.2151e-15, 9.0063e-15,\n 1.1959e-13, 6.0493e-13, 6.0868e-15, 7.4435e-14, 2.6552e-12, 1.2111e-14,\n 1.6868e-15, 1.0481e-13, 2.8165e-14, 1.5135e-13, 1.4735e-13, 4.3854e-11,\n 2.5013e-14, 4.5157e-17, 3.4261e-13, 9.0523e-15, 1.2592e-15, 3.3882e-14,\n 2.6042e-13, 3.9157e-15, 4.7588e-16, 1.3067e-13, 6.1247e-15, 8.0440e-14,\n 2.5779e-14, 2.6672e-12, 3.6306e-15, 9.1090e-12, 3.5375e-14, 5.6237e-16,\n 1.3368e-14, 6.5384e-12, 1.1291e-12, 1.5784e-14, 2.5233e-14, 2.9503e-14,\n 1.0907e-11, 1.9567e-16, 3.1268e-12, 5.1422e-14, 2.7632e-13, 1.6296e-14,\n 2.0094e-13, 3.0378e-12, 4.6842e-13, 1.1821e-13, 2.0187e-14, 9.8544e-13,\n 2.3567e-12, 1.3262e-14, 1.3047e-12, 7.1417e-16, 2.5972e-13, 4.2892e-15,\n 4.2907e-13, 3.7799e-14, 1.2116e-13, 1.7937e-14, 6.5852e-13, 1.2221e-14,\n 3.4522e-14, 3.5408e-15, 7.5111e-15, 1.7747e-14, 6.3205e-14, 9.7456e-12,\n 3.0261e-14, 8.1164e-15, 5.2624e-14, 8.2881e-16, 3.5791e-15, 1.7440e-13,\n 3.2147e-13, 1.3484e-13, 5.1405e-14, 2.5764e-15, 9.0560e-15, 1.9418e-14,\n 1.7092e-14, 7.9048e-13, 1.4401e-15, 7.9955e-12, 9.2553e-15, 1.0448e-14,\n 3.3737e-13, 3.0068e-14, 4.3812e-13, 5.6383e-14, 1.6353e-14, 1.8794e-12,\n 1.5933e-14, 2.9093e-15, 3.2777e-13, 1.2476e-15, 8.7651e-15, 3.3464e-12,\n 4.1838e-16, 1.2741e-15, 3.1658e-14, 1.3696e-13, 2.9347e-16, 6.1742e-15,\n 1.6117e-15, 3.5601e-12, 6.8693e-14, 2.4595e-15, 3.5859e-12, 5.9749e-15,\n 2.2965e-12, 4.2031e-14, 8.4813e-13, 8.4365e-13, 2.5657e-14, 1.0913e-15,\n 5.9678e-16, 9.2676e-13, 1.0946e-12, 1.0538e-13, 2.7783e-13, 5.1546e-15,\n 1.3988e-12, 1.7509e-13, 4.0473e-14, 1.8251e-13], device='cuda:0')" + }, + "26": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.5911e-12, 3.5003e-12, 1.9586e-12, 6.4820e-13, 1.5756e-13, 4.7838e-12,\n 3.0453e-14, 4.5340e-12, 4.0807e-12, 1.4973e-15, 5.0628e-12, 1.1775e-12,\n 6.8096e-13, 3.6433e-14, 1.6232e-15, 1.3967e-14, 5.1691e-16, 5.6772e-14,\n 9.7879e-13, 1.9877e-12, 7.7929e-13, 5.1114e-13, 5.4347e-13, 2.0177e-14,\n 1.1287e-12, 3.7263e-13, 2.9461e-13, 8.3673e-13, 1.2337e-13, 2.3148e-15,\n 1.4603e-14, 5.9763e-13, 5.7161e-13, 1.3080e-12, 1.9669e-16, 1.8971e-15,\n 3.9800e-13, 1.0006e-15, 4.1393e-13, 2.8904e-13, 1.1286e-14, 2.1757e-12,\n 5.8958e-12, 2.9025e-15, 5.8520e-13, 5.6487e-15, 4.3167e-13, 4.3969e-15,\n 1.3550e-13, 1.2713e-12, 9.1029e-15, 1.0833e-13, 2.4881e-15, 3.7370e-13,\n 3.6824e-14, 2.6103e-15, 9.2125e-12, 8.0809e-12, 3.2466e-15, 1.4312e-14,\n 1.1897e-11, 9.1961e-14, 9.1822e-13, 3.3390e-12, 2.5646e-13, 5.6991e-12,\n 3.3825e-15, 6.7786e-14, 5.9756e-13, 1.0479e-13, 1.1729e-12, 1.7552e-13,\n 2.5360e-13, 2.2740e-13, 4.6784e-14, 3.1145e-14, 6.1827e-13, 2.6766e-14,\n 4.8991e-13, 1.2158e-13, 6.6654e-13, 1.3247e-12, 3.8088e-14, 1.0307e-14,\n 3.7099e-14, 8.1456e-13, 1.3956e-12, 7.7360e-15, 2.2897e-12, 1.5333e-13,\n 5.5132e-14, 2.4460e-14, 2.4482e-14, 1.1733e-12, 2.6027e-12, 1.5136e-12,\n 6.7471e-14, 5.8080e-16, 1.4002e-12, 2.4362e-13, 7.3304e-13, 1.2172e-15,\n 2.0143e-13, 7.0991e-15, 3.5267e-14, 3.2107e-14, 5.6537e-13, 6.8120e-17,\n 7.5689e-13, 1.6978e-13, 3.3735e-14, 2.7138e-13, 4.2632e-13, 1.9010e-15,\n 7.7675e-13, 7.6491e-13, 5.6885e-14, 7.0363e-12, 9.5000e-16, 2.3862e-12,\n 9.2014e-14, 2.0693e-14, 3.9641e-13, 2.0154e-15, 5.2263e-14, 4.4210e-17,\n 4.1100e-12, 1.7101e-13, 2.5934e-17, 7.0925e-15, 9.3226e-14, 5.6042e-14,\n 2.0589e-13, 7.0337e-13, 8.9701e-15, 4.1807e-13, 1.3402e-12, 7.6009e-14,\n 1.6474e-15, 4.0899e-14, 3.8762e-13, 7.4106e-13, 1.3954e-12, 2.3369e-11,\n 4.7603e-15, 4.3431e-16, 7.3622e-13, 9.1841e-14, 1.4952e-16, 4.0386e-14,\n 4.7357e-13, 4.8106e-14, 3.6361e-14, 4.2679e-13, 3.6209e-14, 1.5686e-13,\n 3.2200e-14, 3.6187e-12, 2.9007e-14, 1.2062e-11, 6.2773e-14, 2.2646e-14,\n 2.1478e-15, 3.1483e-12, 6.4297e-13, 1.3926e-12, 1.6320e-14, 2.6621e-13,\n 4.8264e-12, 1.2606e-14, 2.4248e-12, 5.6954e-13, 7.0528e-13, 6.7299e-14,\n 6.0057e-13, 3.8845e-12, 1.1977e-12, 6.6992e-13, 7.2370e-14, 1.6763e-12,\n 4.1793e-12, 3.5826e-13, 4.7814e-12, 2.1292e-14, 5.3544e-13, 1.6416e-15,\n 7.6114e-13, 1.3160e-14, 2.9538e-13, 7.2377e-15, 2.8766e-12, 1.2921e-15,\n 4.6475e-13, 1.0794e-14, 1.0306e-13, 2.2382e-13, 9.0049e-14, 4.2553e-12,\n 2.4211e-13, 2.2159e-15, 2.7000e-13, 2.1425e-14, 1.0629e-14, 2.9621e-13,\n 1.0811e-13, 7.9495e-13, 9.0760e-14, 7.6511e-14, 1.0563e-13, 4.1105e-14,\n 6.3064e-15, 7.5462e-13, 1.7272e-15, 3.4516e-12, 2.6374e-13, 1.0043e-13,\n 1.3160e-12, 2.8719e-13, 2.7879e-12, 8.1208e-13, 6.1172e-15, 1.5290e-12,\n 1.3649e-14, 1.1132e-15, 4.2899e-13, 2.7169e-14, 6.0668e-13, 2.6600e-12,\n 2.2396e-16, 3.5301e-15, 3.9364e-13, 2.1079e-13, 4.9414e-16, 1.0634e-13,\n 1.2191e-14, 1.9914e-12, 2.3274e-13, 1.0489e-13, 1.1237e-12, 1.9986e-14,\n 1.9709e-12, 1.9742e-13, 1.9989e-12, 1.0968e-12, 1.7897e-13, 3.7938e-14,\n 8.1326e-14, 7.6328e-13, 1.3294e-12, 1.7192e-13, 1.3281e-12, 1.6663e-15,\n 9.5558e-13, 5.6908e-14, 4.4256e-13, 9.8011e-13], device='cuda:0')" + }, + "27": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.1748e-14, 1.9970e-16, 4.0347e-15, ..., 3.1160e-16, 1.1237e-15,\n 5.5130e-17],\n [6.9081e-12, 6.8360e-12, 4.7547e-15, ..., 4.8185e-13, 9.2593e-13,\n 1.9377e-13],\n [5.5360e-13, 6.4915e-13, 2.6399e-16, ..., 5.1206e-14, 7.5953e-14,\n 3.1889e-14],\n ...,\n [5.7259e-12, 6.1003e-12, 4.1825e-15, ..., 3.3311e-13, 7.8953e-13,\n 1.3672e-13],\n [7.9589e-14, 1.0968e-13, 1.5707e-15, ..., 8.9285e-15, 6.9882e-15,\n 2.2316e-15],\n [1.0076e-11, 1.0440e-11, 2.8657e-15, ..., 7.7617e-13, 1.2911e-12,\n 3.1250e-13]], device='cuda:0')" + }, + "28": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.3394e-12, 3.7475e-09, 3.8672e-10, 7.9753e-10, 1.5254e-10, 1.2652e-11,\n 3.9018e-10, 1.9327e-09, 1.1296e-09, 1.4055e-11, 4.4631e-11, 2.6821e-11,\n 6.0442e-11, 5.8095e-11, 1.1502e-11, 5.6588e-11, 2.0070e-12, 2.1566e-10,\n 1.7075e-10, 1.3787e-09, 2.3815e-11, 6.0798e-10, 3.7690e-09, 7.0189e-10,\n 1.0211e-09, 7.2574e-12, 2.1706e-11, 4.0557e-09, 9.1724e-14, 6.6323e-12,\n 2.1305e-11, 7.8388e-10, 4.4926e-10, 3.2198e-10, 6.9760e-11, 1.8596e-10,\n 1.2710e-10, 4.5884e-11, 3.0834e-10, 6.2959e-11, 5.7798e-11, 2.9312e-10,\n 2.6916e-09, 2.5631e-10, 4.5737e-11, 2.3947e-10, 9.1179e-11, 2.0026e-10,\n 4.1850e-12, 4.5828e-10, 2.7700e-10, 5.9164e-09, 4.6318e-11, 1.0843e-10,\n 1.7744e-11, 1.5636e-09, 1.3830e-08, 3.0034e-09, 2.2713e-10, 3.2191e-10,\n 2.6142e-09, 4.5654e-11, 5.1771e-10, 5.5344e-09, 1.2598e-10, 2.0229e-09,\n 1.3711e-10, 1.3042e-10, 4.9333e-09, 1.0080e-09, 7.0946e-13, 6.3360e-11,\n 4.3124e-11, 6.1238e-11, 4.7854e-13, 1.9887e-11, 1.9254e-12, 7.5829e-12,\n 3.9974e-10, 3.3960e-11, 3.6151e-12, 1.0342e-09, 1.5586e-10, 7.6209e-10,\n 1.5905e-12, 1.2282e-10, 8.5554e-10, 1.4112e-11, 3.0616e-09, 1.8323e-11,\n 6.1342e-10, 2.5265e-10, 8.6425e-10, 1.2697e-10, 5.7656e-10, 6.3929e-09,\n 9.7526e-12, 4.4026e-12, 3.8184e-10, 1.5612e-10, 4.5510e-12, 6.6006e-11,\n 2.9655e-11, 3.4976e-10, 7.7233e-12, 1.0853e-10, 4.8593e-10, 1.9038e-13,\n 7.3962e-11, 3.6867e-12, 2.9147e-13, 5.2138e-11, 4.1665e-13, 2.9840e-11,\n 5.0323e-10, 8.6733e-10, 5.3637e-10, 4.0962e-09, 4.0674e-12, 2.5523e-11,\n 1.6932e-09, 3.3730e-12, 6.5337e-12, 5.3264e-13, 7.1999e-12, 6.1388e-12,\n 4.3676e-10, 6.7911e-11, 7.9810e-11, 3.3016e-10, 1.5957e-10, 2.5955e-11,\n 6.4970e-11, 2.0172e-11, 4.3415e-11, 1.2454e-13, 1.1076e-10, 9.2522e-11,\n 3.5255e-10, 6.4772e-09, 4.4982e-10, 2.5926e-12, 2.2469e-10, 1.1025e-08,\n 2.8435e-10, 4.8776e-11, 3.0123e-11, 3.9152e-11, 3.1214e-11, 7.8614e-11,\n 1.5315e-11, 1.0678e-11, 8.3915e-12, 8.1392e-10, 6.8872e-12, 3.3749e-12,\n 3.4599e-12, 1.5657e-09, 4.1266e-11, 2.6539e-10, 2.4090e-10, 2.1101e-13,\n 6.6376e-12, 2.5080e-09, 1.5530e-11, 1.6241e-09, 1.4930e-12, 3.1548e-10,\n 4.6821e-09, 7.4893e-11, 2.4047e-09, 5.2845e-11, 1.1303e-10, 4.1782e-13,\n 5.7491e-12, 6.1565e-11, 2.4397e-10, 3.2064e-13, 2.0107e-09, 4.2873e-11,\n 9.6817e-10, 6.8807e-10, 3.3835e-11, 2.3412e-11, 3.0319e-12, 1.2471e-10,\n 3.4759e-11, 1.4284e-11, 5.0898e-11, 2.2504e-10, 1.2011e-09, 7.1404e-14,\n 3.9736e-10, 6.5985e-10, 5.2002e-10, 1.6284e-08, 5.2145e-12, 2.8355e-10,\n 5.0634e-11, 4.1892e-10, 1.4399e-10, 4.6954e-10, 1.4798e-10, 7.8887e-11,\n 1.6101e-10, 1.4501e-10, 8.4066e-13, 3.0902e-10, 7.3509e-13, 3.5356e-09,\n 2.6895e-11, 6.6142e-12, 5.4451e-12, 3.5635e-09, 1.1082e-09, 2.6494e-10,\n 1.9600e-10, 3.0620e-09, 1.0073e-08, 2.7523e-12, 8.4598e-12, 9.0768e-10,\n 4.0472e-12, 5.9645e-10, 4.6225e-11, 1.2572e-11, 2.8350e-09, 2.2872e-09,\n 2.5327e-12, 9.5969e-12, 2.1193e-09, 1.1758e-10, 1.6624e-10, 8.8848e-11,\n 1.1015e-10, 1.9718e-12, 6.7144e-11, 2.2115e-12, 3.3722e-10, 5.5662e-11,\n 1.6331e-12, 2.8066e-10, 2.3400e-09, 1.1566e-10, 3.6034e-10, 9.2827e-11,\n 2.7000e-09, 2.5906e-11, 1.0857e-09, 1.8957e-10, 3.2765e-11, 1.4430e-13,\n 8.6619e-11, 2.5785e-09, 4.8740e-11, 5.3474e-09], device='cuda:0')" + }, + "29": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.9810e-14, 6.2950e-12, 1.5224e-13, 1.6958e-14, 2.5544e-14, 6.5719e-14,\n 8.8930e-15, 7.1893e-13, 1.9786e-13, 4.0801e-14, 1.5527e-13, 7.7730e-15,\n 3.8286e-14, 3.2741e-13, 4.1610e-14, 9.8174e-14, 9.2382e-16, 3.6788e-13,\n 7.3051e-15, 2.2160e-12, 4.3999e-14, 1.5605e-13, 1.7306e-11, 2.8833e-13,\n 2.9463e-12, 1.2091e-15, 5.3125e-15, 2.6591e-12, 3.2480e-15, 1.7625e-16,\n 5.7611e-15, 6.1755e-13, 9.9652e-14, 5.4971e-13, 7.8561e-15, 1.7048e-13,\n 1.1505e-14, 7.9724e-15, 3.1349e-13, 5.4932e-15, 1.2131e-13, 6.5660e-14,\n 3.5644e-12, 3.0144e-13, 9.5123e-14, 7.2943e-13, 4.7648e-15, 8.1972e-14,\n 6.7895e-15, 5.5127e-13, 1.4599e-13, 1.7544e-11, 2.7012e-14, 7.9063e-15,\n 4.0934e-14, 1.2494e-12, 3.0477e-11, 1.3528e-12, 8.5082e-14, 1.8565e-13,\n 3.6073e-13, 8.6572e-14, 1.3983e-13, 2.7564e-12, 6.3552e-15, 9.7229e-13,\n 1.2437e-13, 7.6903e-13, 1.8472e-11, 1.6063e-12, 8.2832e-15, 1.2920e-13,\n 1.6620e-14, 1.5882e-13, 6.1608e-14, 2.7236e-15, 2.5508e-14, 8.9845e-16,\n 1.0603e-13, 2.8648e-14, 9.3448e-17, 6.5018e-13, 7.7016e-15, 6.7273e-12,\n 2.1771e-15, 1.4800e-12, 1.2001e-13, 1.5594e-14, 6.2858e-12, 5.4373e-14,\n 1.5985e-12, 1.9297e-14, 2.4136e-13, 8.2762e-15, 1.1910e-13, 6.7817e-12,\n 3.0823e-13, 1.2400e-15, 2.6456e-13, 1.0327e-13, 3.4733e-14, 4.6133e-15,\n 1.3252e-14, 5.0432e-14, 1.0341e-14, 3.7468e-13, 1.0329e-12, 2.6077e-16,\n 1.6285e-14, 1.5531e-14, 8.0483e-15, 1.3612e-13, 8.1722e-14, 1.8060e-15,\n 2.3289e-12, 1.0399e-12, 3.6763e-13, 9.1531e-12, 2.0260e-16, 7.4106e-15,\n 9.6534e-13, 8.5384e-15, 1.0501e-15, 2.1551e-15, 3.4771e-15, 7.7349e-16,\n 8.1733e-15, 3.6399e-15, 4.6080e-15, 2.6621e-14, 2.8832e-14, 5.3973e-16,\n 2.0992e-15, 1.1270e-14, 1.5815e-14, 3.7030e-15, 5.9055e-14, 9.4517e-14,\n 7.2515e-13, 2.0525e-11, 6.7056e-14, 1.6239e-14, 4.2884e-14, 4.8473e-12,\n 5.2484e-14, 5.8828e-14, 3.0994e-15, 8.9653e-16, 3.6822e-15, 2.6759e-14,\n 5.0375e-15, 1.5768e-15, 1.1799e-14, 2.7047e-12, 1.6208e-15, 7.7041e-15,\n 3.1626e-16, 1.0317e-12, 1.5386e-16, 1.8493e-14, 9.9260e-14, 2.1776e-15,\n 6.0474e-14, 8.9425e-12, 8.9778e-16, 1.4964e-13, 1.4766e-14, 1.6857e-13,\n 6.2742e-12, 1.1950e-13, 5.0918e-12, 1.0636e-15, 1.3741e-13, 2.4427e-15,\n 4.2606e-14, 1.0290e-13, 3.0762e-14, 9.1768e-15, 4.4346e-13, 1.3775e-15,\n 1.0165e-13, 7.1987e-13, 1.8738e-13, 1.4439e-13, 1.7150e-14, 7.6349e-14,\n 8.7951e-14, 7.0305e-15, 5.2806e-14, 1.2890e-13, 5.7855e-13, 1.8423e-14,\n 7.4568e-14, 5.2059e-13, 4.9066e-14, 4.7219e-11, 9.7450e-17, 2.0295e-14,\n 2.0932e-15, 6.1788e-14, 8.6929e-15, 6.2562e-13, 1.1607e-14, 1.9726e-13,\n 4.2930e-14, 2.5147e-15, 5.6509e-15, 5.6031e-14, 5.8396e-15, 7.2421e-12,\n 1.7874e-14, 1.9881e-15, 4.1001e-15, 1.2006e-11, 4.4857e-14, 4.2434e-14,\n 2.1392e-14, 7.0492e-12, 1.9252e-11, 2.7813e-14, 3.9174e-15, 2.6352e-12,\n 5.2517e-16, 2.3345e-13, 2.3349e-14, 8.5510e-16, 5.3550e-13, 1.6827e-12,\n 8.7714e-17, 8.1459e-15, 3.7859e-13, 9.0322e-14, 8.2281e-13, 2.3242e-12,\n 8.5227e-15, 1.1600e-14, 9.1785e-14, 7.4685e-15, 6.0949e-13, 4.9565e-16,\n 2.4832e-14, 3.5587e-13, 2.7134e-12, 8.3100e-14, 3.1761e-13, 1.4960e-14,\n 2.3747e-12, 8.5937e-16, 8.0636e-13, 4.5269e-13, 4.5722e-15, 9.0581e-17,\n 9.7276e-15, 1.7130e-13, 3.7452e-15, 1.0452e-11], device='cuda:0')" + }, + "30": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.3795e-15, 5.1598e-12, 6.0311e-13, 9.1585e-13, 2.3912e-13, 3.0620e-14,\n 5.3396e-13, 2.6562e-12, 1.5624e-12, 6.4609e-14, 4.4067e-14, 7.1341e-14,\n 2.0931e-13, 3.9109e-13, 9.1424e-14, 3.3861e-14, 3.4492e-15, 3.9512e-13,\n 2.0061e-13, 2.0379e-12, 1.6076e-14, 8.8358e-13, 3.7524e-12, 1.2234e-12,\n 1.6960e-12, 1.4097e-14, 5.1522e-14, 4.5842e-12, 9.5053e-17, 1.1137e-14,\n 2.0161e-14, 1.1825e-12, 6.5722e-13, 1.0282e-12, 1.6384e-14, 4.6293e-13,\n 1.9916e-13, 4.4008e-14, 5.4233e-13, 3.5858e-14, 2.1059e-13, 4.4947e-13,\n 3.8523e-12, 4.7453e-13, 2.4663e-13, 4.5121e-13, 7.2367e-14, 4.9815e-14,\n 2.4187e-15, 5.5679e-13, 4.1583e-13, 8.5050e-12, 9.5815e-14, 7.5864e-14,\n 5.1793e-14, 1.6802e-12, 1.8537e-11, 4.0908e-12, 2.2923e-13, 4.7794e-13,\n 3.4511e-12, 1.4094e-13, 5.1148e-13, 6.7170e-12, 1.8727e-13, 2.8068e-12,\n 2.1568e-13, 4.1314e-13, 7.0077e-12, 1.5193e-12, 7.6510e-16, 4.7512e-14,\n 9.3704e-14, 3.4455e-13, 7.8304e-14, 7.1963e-15, 8.3876e-15, 3.4667e-16,\n 6.0299e-13, 8.3838e-14, 2.3690e-15, 1.3016e-12, 2.0856e-13, 1.7521e-12,\n 1.2915e-14, 4.7913e-13, 9.7517e-13, 7.9136e-15, 4.5372e-12, 1.4324e-14,\n 1.1160e-12, 3.6228e-13, 9.0231e-13, 1.7454e-13, 8.1717e-13, 7.7723e-12,\n 2.0588e-13, 3.6225e-16, 6.6421e-13, 3.3700e-13, 2.0032e-14, 9.9267e-14,\n 8.7605e-14, 4.3864e-13, 3.8164e-15, 5.3213e-13, 8.1795e-13, 4.6799e-15,\n 2.1009e-13, 3.3216e-15, 3.6258e-15, 1.5263e-13, 7.9243e-14, 4.2575e-14,\n 5.5457e-13, 1.3211e-12, 8.3939e-13, 4.9806e-12, 7.8859e-15, 3.2363e-14,\n 1.8092e-12, 1.4025e-15, 1.0678e-14, 1.1478e-15, 4.4793e-14, 1.2725e-14,\n 4.6607e-13, 3.6829e-14, 4.0370e-14, 2.8905e-13, 2.4104e-13, 6.1641e-15,\n 5.4175e-14, 3.0832e-15, 4.3229e-14, 2.1437e-15, 2.3946e-13, 1.4667e-13,\n 5.9630e-13, 9.0512e-12, 6.2940e-13, 1.5666e-15, 4.4168e-13, 1.3851e-11,\n 2.4963e-13, 2.4573e-14, 6.0829e-14, 6.0390e-14, 5.6021e-14, 1.3245e-13,\n 1.1647e-15, 2.6424e-14, 3.8679e-15, 1.3096e-12, 8.2239e-15, 1.1693e-15,\n 3.8215e-17, 2.1855e-12, 2.9234e-14, 3.1159e-13, 1.1226e-13, 1.7837e-16,\n 2.5013e-14, 3.0416e-12, 2.8211e-14, 2.1561e-12, 1.0629e-14, 5.0965e-13,\n 5.3850e-12, 8.5062e-14, 2.4760e-12, 3.3281e-14, 3.6335e-13, 7.3911e-16,\n 2.1553e-14, 3.0117e-14, 1.9945e-13, 2.6077e-15, 2.2391e-12, 6.2641e-14,\n 1.1438e-12, 6.1475e-13, 4.7384e-14, 6.9143e-14, 5.6908e-15, 2.2965e-13,\n 1.8531e-13, 4.2749e-15, 2.4183e-13, 3.7198e-13, 1.6730e-12, 2.7270e-17,\n 5.8195e-13, 9.5449e-13, 5.0489e-13, 2.1741e-11, 4.2930e-16, 4.0766e-13,\n 7.6857e-14, 3.6583e-13, 1.1473e-13, 7.7325e-13, 1.2057e-13, 2.4086e-13,\n 1.6522e-13, 2.0622e-13, 4.0978e-15, 4.4888e-13, 7.1973e-15, 4.5692e-12,\n 6.5339e-15, 1.1411e-15, 1.1948e-15, 4.5751e-12, 1.4780e-12, 2.4667e-13,\n 2.8242e-13, 4.3503e-12, 1.3534e-11, 1.9202e-14, 1.6031e-14, 1.2981e-12,\n 7.1670e-15, 6.9910e-13, 1.7562e-13, 5.6322e-15, 3.7553e-12, 2.9754e-12,\n 2.7294e-15, 2.3129e-14, 2.6552e-12, 1.9972e-13, 2.6442e-13, 5.3917e-13,\n 4.7994e-14, 1.6033e-14, 3.0250e-13, 8.2333e-16, 8.8442e-13, 2.5113e-14,\n 2.1980e-15, 5.4161e-13, 2.6140e-12, 2.5084e-13, 5.2559e-13, 1.5319e-13,\n 3.7415e-12, 4.4375e-14, 1.6072e-12, 2.9649e-13, 4.8632e-14, 1.3117e-16,\n 1.0277e-13, 3.4003e-12, 7.0794e-14, 7.4166e-12], device='cuda:0')" + }, + "31": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2583e-12, 2.3523e-12, 1.1891e-15, ..., 1.6486e-13, 2.6966e-13,\n 7.8366e-14],\n [3.0450e-14, 3.9339e-14, 7.0712e-17, ..., 2.8601e-15, 3.5652e-15,\n 7.7038e-16],\n [1.9963e-12, 2.1973e-12, 8.6840e-16, ..., 1.7566e-13, 2.1179e-13,\n 7.6419e-14],\n ...,\n [1.4342e-11, 1.6102e-11, 3.2111e-16, ..., 1.3020e-12, 1.6107e-12,\n 7.3146e-13],\n [1.0599e-12, 1.2136e-12, 2.5923e-15, ..., 8.5072e-14, 1.4615e-13,\n 4.3042e-14],\n [1.2214e-14, 1.2874e-14, 2.2276e-18, ..., 8.4823e-16, 1.0279e-15,\n 2.3405e-16]], device='cuda:0')" + }, + "32": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.1795e-09, 1.4204e-11, 1.1339e-09, 4.5683e-11, 9.7164e-13, 1.5537e-11,\n 2.2828e-10, 2.2125e-09, 8.2425e-10, 4.6674e-11, 8.1849e-09, 6.3985e-11,\n 4.1793e-13, 3.9713e-13, 6.3316e-12, 1.5271e-10, 9.5175e-12, 7.4077e-10,\n 1.2291e-09, 4.6299e-10, 8.3294e-10, 2.2511e-11, 7.4530e-11, 3.2626e-09,\n 2.1606e-11, 2.8815e-10, 1.0247e-09, 1.8482e-11, 1.3948e-12, 3.9365e-10,\n 1.5831e-11, 1.3812e-09, 3.5779e-10, 2.9671e-10, 8.7733e-11, 9.2760e-10,\n 2.1998e-11, 1.1754e-10, 8.4345e-10, 6.2819e-11, 2.9256e-11, 2.4109e-10,\n 8.1101e-10, 2.0420e-11, 1.4762e-12, 7.6051e-12, 3.8252e-10, 6.1720e-12,\n 7.8600e-10, 7.1533e-10, 1.2107e-10, 7.2412e-10, 9.5701e-12, 3.5901e-10,\n 1.3474e-10, 1.0363e-09, 8.1748e-10, 3.0562e-09, 1.6295e-10, 5.7438e-10,\n 1.0935e-08, 7.8476e-12, 1.0896e-11, 1.0366e-09, 1.3032e-11, 2.1646e-09,\n 2.6032e-11, 2.1551e-11, 1.7776e-12, 4.1534e-10, 6.0181e-10, 8.7374e-10,\n 7.2152e-10, 1.9592e-10, 4.9158e-12, 5.0747e-12, 9.4024e-11, 6.7788e-11,\n 8.3063e-10, 2.7669e-11, 4.6691e-13, 3.5753e-09, 3.0213e-09, 8.8875e-13,\n 2.6703e-10, 7.8064e-11, 1.2427e-09, 3.7724e-10, 3.6122e-09, 1.5254e-11,\n 9.4744e-10, 4.3585e-10, 5.4647e-11, 1.7763e-11, 1.5513e-10, 2.6729e-09,\n 4.3481e-10, 5.5913e-12, 2.0087e-09, 1.5141e-10, 1.0637e-09, 2.8380e-12,\n 5.0277e-11, 2.4730e-10, 4.3008e-11, 2.1694e-10, 1.0930e-10, 1.3550e-12,\n 1.4672e-10, 9.0023e-12, 5.8150e-10, 5.6109e-15, 5.6361e-12, 2.6807e-12,\n 1.7748e-09, 2.8309e-10, 6.6869e-10, 1.5045e-09, 2.8298e-11, 3.5068e-09,\n 1.5654e-10, 1.4226e-10, 3.8707e-10, 1.7875e-12, 3.2767e-11, 1.3189e-11,\n 1.3226e-11, 2.0344e-10, 4.0017e-11, 1.5809e-11, 9.7183e-12, 2.7611e-10,\n 1.1063e-11, 1.8514e-13, 4.8700e-12, 2.8812e-11, 9.5903e-12, 3.9504e-11,\n 2.0483e-10, 3.4190e-09, 6.6139e-10, 3.9654e-11, 4.3248e-09, 2.3626e-08,\n 1.7640e-09, 6.0471e-11, 2.1604e-11, 8.6849e-10, 4.9262e-13, 8.2351e-11,\n 9.5318e-11, 3.8940e-12, 2.9856e-11, 8.1322e-10, 7.4821e-10, 2.8968e-10,\n 9.6172e-11, 1.9382e-09, 2.3567e-11, 1.3385e-10, 1.1940e-11, 1.9358e-12,\n 3.6701e-10, 8.2042e-10, 8.8165e-12, 5.4554e-10, 5.9956e-10, 1.9617e-11,\n 2.2240e-09, 6.8250e-12, 3.1665e-09, 8.3591e-10, 7.3201e-10, 1.6255e-12,\n 3.0117e-11, 8.3820e-11, 3.3281e-10, 1.3246e-09, 1.9990e-09, 4.0947e-12,\n 5.3474e-09, 4.2770e-12, 4.0218e-09, 4.2655e-11, 2.5811e-11, 2.6085e-10,\n 1.2715e-11, 2.3021e-11, 3.1355e-10, 6.9170e-11, 1.2074e-10, 3.9739e-11,\n 1.7639e-09, 8.3684e-10, 1.0921e-10, 1.5831e-08, 5.3783e-11, 4.6239e-12,\n 3.0432e-11, 4.0530e-11, 2.5147e-09, 1.2831e-11, 5.0305e-10, 6.3806e-12,\n 2.4718e-11, 4.8183e-10, 1.4816e-11, 1.7076e-09, 1.5018e-11, 6.1508e-09,\n 4.2164e-12, 5.8684e-10, 1.2864e-11, 1.5757e-09, 9.0756e-11, 2.7985e-11,\n 1.2190e-12, 6.5356e-11, 1.0517e-08, 8.6855e-10, 1.3960e-10, 9.2996e-12,\n 3.2566e-12, 1.6037e-10, 7.7547e-11, 2.1271e-10, 2.7385e-09, 1.1609e-10,\n 3.6310e-11, 2.9348e-11, 2.0419e-11, 9.4734e-11, 8.0223e-12, 4.7114e-10,\n 1.2675e-10, 6.4810e-10, 6.3218e-11, 3.5955e-12, 3.5658e-10, 9.8200e-10,\n 1.4659e-09, 1.0439e-11, 8.7229e-10, 6.1149e-10, 5.7166e-12, 8.7232e-12,\n 9.7648e-10, 2.7932e-10, 1.3700e-10, 1.4937e-11, 7.5514e-10, 2.7453e-12,\n 6.4831e-10, 9.2507e-09, 6.8274e-10, 6.1393e-12], device='cuda:0')" + }, + "33": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.1117e-14, 5.0388e-14, 4.0487e-13, 2.9790e-14, 1.0641e-13, 6.8150e-14,\n 1.0827e-14, 2.0397e-12, 9.5126e-14, 6.4130e-14, 1.2321e-11, 2.7837e-14,\n 2.1315e-14, 1.2175e-15, 1.3091e-13, 4.3327e-15, 8.2115e-15, 2.0990e-12,\n 6.7589e-13, 2.8811e-13, 5.2897e-13, 8.1311e-14, 3.7966e-15, 6.2973e-12,\n 5.8821e-15, 8.1518e-14, 5.9284e-13, 1.0926e-14, 1.2317e-14, 5.4758e-13,\n 2.7373e-15, 2.6563e-12, 4.6104e-14, 3.4223e-13, 1.2700e-14, 5.2862e-12,\n 1.0928e-14, 2.0719e-14, 4.7841e-13, 1.8686e-15, 1.2824e-14, 4.3761e-14,\n 2.9937e-13, 3.9856e-13, 1.7158e-15, 5.9907e-16, 1.2967e-13, 1.0311e-16,\n 2.6215e-12, 2.2925e-13, 7.4309e-14, 3.3043e-14, 4.1313e-14, 9.2073e-14,\n 1.3781e-13, 2.1566e-13, 1.0124e-13, 1.3232e-12, 3.7509e-14, 2.8725e-13,\n 1.3737e-11, 2.3751e-15, 3.7454e-14, 7.9030e-14, 1.8949e-15, 1.2531e-12,\n 1.3717e-15, 2.2593e-15, 5.6459e-14, 1.0175e-13, 3.0436e-13, 3.9756e-14,\n 6.5406e-14, 5.2097e-13, 3.2716e-14, 1.3044e-15, 1.2698e-15, 1.0388e-14,\n 2.3130e-13, 5.1629e-13, 7.9395e-17, 1.0486e-11, 2.3817e-12, 1.9612e-14,\n 1.2438e-13, 8.2283e-14, 5.7609e-13, 1.4687e-13, 1.2592e-11, 1.0632e-14,\n 4.4552e-12, 2.9540e-12, 1.2071e-15, 9.6976e-14, 1.3353e-14, 6.2016e-13,\n 1.5374e-12, 1.1356e-14, 8.5126e-13, 6.9855e-14, 1.2023e-12, 8.5656e-15,\n 3.1847e-14, 3.1350e-14, 1.0264e-14, 1.9132e-12, 5.5775e-15, 1.2543e-14,\n 2.4920e-14, 3.7844e-15, 2.4745e-13, 1.1349e-15, 2.5435e-14, 1.4902e-14,\n 2.8383e-12, 1.4179e-12, 5.8656e-13, 5.2249e-13, 5.6256e-15, 4.7872e-12,\n 5.9263e-15, 4.8180e-15, 7.1568e-13, 4.6946e-16, 2.1771e-14, 2.7402e-15,\n 1.1835e-14, 4.0521e-14, 4.3271e-14, 2.4873e-14, 5.2819e-14, 1.4695e-13,\n 6.4863e-15, 1.6160e-16, 3.0206e-15, 5.5907e-15, 2.4395e-14, 1.3046e-14,\n 1.0380e-13, 2.8597e-12, 1.5062e-12, 1.3510e-14, 4.8473e-12, 9.4348e-11,\n 1.8048e-12, 1.8766e-14, 2.6790e-15, 5.6577e-13, 1.1149e-15, 1.3793e-13,\n 3.8766e-13, 5.3203e-15, 4.5005e-14, 6.3509e-13, 1.2190e-13, 1.8907e-12,\n 7.1430e-15, 8.0803e-13, 7.0213e-15, 3.7973e-13, 3.0434e-15, 3.8962e-15,\n 1.0055e-14, 8.3153e-13, 4.9997e-15, 5.0200e-15, 1.4725e-12, 1.6317e-15,\n 3.0265e-12, 5.3052e-15, 6.9844e-12, 2.5847e-13, 9.8323e-13, 8.8666e-16,\n 7.1262e-15, 3.4502e-14, 8.6663e-14, 4.7577e-13, 2.9317e-13, 1.5348e-14,\n 3.7359e-12, 8.8769e-15, 1.1453e-12, 1.0758e-13, 2.3914e-15, 8.3502e-14,\n 5.6281e-15, 3.5096e-14, 7.2348e-13, 3.0607e-15, 6.4823e-15, 9.7391e-15,\n 7.6067e-12, 1.1735e-12, 4.5913e-15, 3.9070e-11, 1.3882e-15, 1.5929e-14,\n 3.3038e-15, 5.0074e-15, 1.9811e-12, 6.0844e-14, 1.0110e-13, 2.4382e-14,\n 2.2615e-14, 1.0153e-13, 1.4357e-13, 8.8281e-13, 7.9391e-15, 1.9313e-11,\n 1.0573e-14, 1.6394e-13, 1.1820e-15, 2.2857e-12, 4.7638e-14, 5.0092e-14,\n 6.3179e-15, 4.1488e-15, 1.6250e-11, 2.3912e-12, 2.6032e-13, 2.3172e-15,\n 2.1476e-16, 2.8092e-13, 3.4871e-14, 4.2044e-14, 5.4960e-13, 1.3621e-14,\n 1.8414e-15, 6.9681e-15, 9.0497e-14, 2.8709e-15, 1.5553e-15, 7.6829e-13,\n 2.2103e-14, 1.9551e-13, 1.9101e-14, 7.0000e-16, 2.3379e-12, 1.0017e-12,\n 2.8397e-12, 1.8676e-15, 7.9951e-14, 2.9551e-13, 2.0084e-15, 3.4719e-16,\n 3.9033e-13, 1.3326e-13, 1.0285e-14, 1.9687e-14, 1.9243e-13, 8.8512e-16,\n 8.1579e-13, 7.0685e-12, 3.2653e-13, 6.4549e-14], device='cuda:0')" + }, + "34": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4380e-12, 1.8812e-14, 1.6198e-12, 3.9513e-14, 1.0772e-13, 2.4774e-14,\n 3.3087e-13, 3.1604e-12, 1.1432e-12, 1.0530e-13, 9.7815e-12, 2.6158e-13,\n 7.4001e-15, 1.3813e-15, 1.5083e-13, 1.4881e-13, 2.5064e-14, 1.2098e-12,\n 1.3684e-12, 9.2544e-13, 1.3068e-12, 3.3379e-14, 6.1665e-14, 4.5443e-12,\n 1.0616e-13, 4.2726e-13, 1.3651e-12, 5.6076e-15, 4.1502e-15, 6.0589e-13,\n 1.2916e-14, 1.8643e-12, 5.2782e-13, 8.5525e-13, 7.6527e-15, 1.7729e-12,\n 7.7040e-14, 5.1929e-14, 1.2210e-12, 8.0230e-15, 8.5002e-14, 3.7265e-13,\n 1.3531e-12, 1.9105e-13, 1.6563e-14, 1.8531e-14, 3.4190e-13, 1.1138e-15,\n 1.1693e-12, 9.2926e-13, 1.8581e-13, 9.9769e-13, 3.4971e-14, 2.7380e-13,\n 2.2775e-13, 1.1709e-12, 1.0981e-12, 4.1351e-12, 1.8341e-13, 7.4427e-13,\n 1.4408e-11, 1.3468e-14, 9.6046e-15, 1.2658e-12, 2.1918e-14, 2.9901e-12,\n 3.0843e-16, 3.9402e-14, 5.9725e-15, 6.0861e-13, 8.4748e-13, 9.3005e-13,\n 9.5409e-13, 5.8199e-13, 3.8056e-14, 7.2905e-16, 1.4043e-13, 4.7351e-14,\n 1.1270e-12, 2.6442e-13, 8.5054e-16, 4.1344e-12, 4.0863e-12, 7.7098e-15,\n 2.6626e-13, 1.8515e-13, 1.4492e-12, 5.5612e-13, 5.3346e-12, 1.0891e-14,\n 1.7251e-12, 1.0388e-12, 2.6947e-14, 2.8074e-14, 2.3635e-13, 3.3977e-12,\n 8.1873e-13, 2.8693e-14, 2.7853e-12, 3.8287e-13, 1.5976e-12, 3.2442e-15,\n 2.3530e-13, 3.0041e-13, 2.3343e-14, 9.6079e-13, 1.6263e-13, 3.7950e-15,\n 2.8727e-13, 1.9709e-15, 8.4854e-13, 1.9292e-16, 5.0010e-14, 6.7573e-15,\n 2.1495e-12, 6.1771e-13, 1.0244e-12, 1.9189e-12, 5.3063e-14, 4.7598e-12,\n 1.1997e-13, 1.5155e-13, 6.5243e-13, 8.8459e-15, 1.4843e-13, 9.7288e-16,\n 4.7204e-15, 1.0596e-13, 1.7468e-14, 8.3642e-15, 1.8787e-14, 1.3045e-13,\n 1.0980e-14, 9.5004e-15, 1.4184e-14, 4.9637e-14, 9.2664e-15, 7.1617e-14,\n 3.2890e-13, 4.6501e-12, 1.0621e-12, 2.8394e-14, 5.5600e-12, 2.9343e-11,\n 1.7536e-12, 7.6862e-14, 9.1856e-14, 1.2639e-12, 1.3919e-15, 1.1931e-13,\n 4.8070e-13, 2.2489e-14, 1.4975e-14, 1.1925e-12, 1.0553e-12, 4.6246e-13,\n 4.0921e-14, 2.6459e-12, 4.3903e-14, 1.4996e-13, 9.5367e-16, 2.2948e-15,\n 5.0600e-13, 1.1375e-12, 1.2922e-15, 7.4432e-13, 1.0126e-12, 4.0309e-14,\n 2.4191e-12, 1.6872e-15, 3.4403e-12, 9.3263e-13, 1.2526e-12, 2.1173e-15,\n 6.3262e-14, 1.1957e-13, 2.9016e-13, 1.6006e-12, 2.2456e-12, 5.7955e-15,\n 6.3469e-12, 6.9689e-16, 5.3795e-12, 1.2005e-13, 4.9202e-14, 4.1377e-13,\n 6.9209e-14, 1.4937e-14, 8.0079e-13, 1.1152e-13, 1.8026e-13, 5.7241e-14,\n 2.9349e-12, 1.1881e-12, 8.2950e-14, 2.0925e-11, 1.9314e-15, 8.7198e-15,\n 5.2129e-14, 2.5310e-14, 2.7077e-12, 9.6093e-14, 4.6761e-13, 8.4118e-14,\n 2.0428e-14, 7.0068e-13, 6.5455e-14, 2.3391e-12, 4.5218e-14, 7.7444e-12,\n 4.2496e-16, 6.2514e-13, 5.6466e-15, 2.1757e-12, 1.2246e-13, 1.6710e-14,\n 2.1122e-15, 1.0135e-13, 1.3970e-11, 1.4609e-12, 3.8235e-13, 5.6049e-14,\n 9.7366e-15, 2.4435e-13, 2.5502e-13, 1.7724e-13, 3.6799e-12, 2.3172e-13,\n 6.3784e-14, 6.4476e-15, 2.6088e-14, 1.4636e-13, 7.7350e-16, 8.9026e-13,\n 5.8751e-14, 9.6072e-13, 2.4288e-13, 8.6342e-15, 1.1919e-12, 9.1682e-13,\n 2.1448e-12, 2.1811e-14, 9.7842e-13, 9.4595e-13, 4.5416e-14, 1.8059e-14,\n 1.3748e-12, 4.3253e-13, 2.0084e-13, 9.1602e-14, 1.0798e-12, 2.5703e-16,\n 7.2975e-13, 1.2197e-11, 9.6503e-13, 1.9051e-14], device='cuda:0')" + }, + "35": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1878e-13, 1.4022e-13, 7.3147e-17, ..., 1.0619e-14, 1.2234e-14,\n 5.2347e-15],\n [6.0869e-15, 3.4699e-15, 3.5517e-16, ..., 1.4988e-16, 1.7489e-15,\n 8.0727e-16],\n [4.2143e-12, 4.8404e-12, 5.9695e-15, ..., 3.4195e-13, 5.8315e-13,\n 1.5160e-13],\n ...,\n [1.4106e-11, 1.4518e-11, 1.5889e-14, ..., 9.5572e-13, 1.7096e-12,\n 4.7013e-13],\n [3.9078e-16, 1.3485e-17, 1.4776e-16, ..., 7.4144e-17, 8.6105e-17,\n 2.5134e-17],\n [7.9041e-12, 9.0016e-12, 5.7472e-15, ..., 6.7359e-13, 1.0807e-12,\n 2.8127e-13]], device='cuda:0')" + }, + "36": { + "step": "tensor(7512.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.8696e-11, 2.2504e-13, 2.4733e-09, 1.1160e-08, 2.7264e-10, 5.8179e-12,\n 1.6315e-09, 9.5961e-10, 6.8295e-10, 1.5758e-12, 9.2631e-11, 5.0561e-13,\n 5.2524e-10, 3.1341e-11, 1.6278e-11, 2.0930e-10, 2.8104e-11, 2.6413e-10,\n 1.0041e-11, 2.2586e-10, 5.7439e-11, 1.2903e-12, 1.7100e-09, 3.4629e-09,\n 2.7450e-10, 4.1408e-11, 4.1831e-10, 3.7018e-10, 1.5482e-11, 8.8591e-11,\n 1.3412e-10, 2.0689e-10, 4.1529e-10, 3.3368e-10, 3.7043e-12, 2.8311e-10,\n 9.8238e-10, 1.3452e-10, 7.3937e-10, 5.6369e-12, 1.5065e-09, 8.7691e-12,\n 9.5378e-10, 1.1495e-09, 2.3493e-11, 2.9612e-12, 9.4640e-11, 3.7928e-11,\n 1.1729e-11, 1.4623e-09, 3.0032e-10, 1.4281e-09, 4.5271e-13, 1.8162e-10,\n 5.5856e-11, 1.6675e-11, 3.9700e-09, 5.9309e-11, 3.5824e-10, 6.1212e-13,\n 1.8733e-12, 4.2775e-12, 5.4453e-10, 2.7337e-10, 2.1253e-10, 1.2877e-09,\n 2.6064e-10, 9.4298e-12, 3.0766e-10, 1.2325e-09, 2.5053e-10, 1.5103e-10,\n 1.0098e-10, 1.5131e-12, 2.7139e-13, 1.3008e-10, 2.3066e-10, 2.6832e-11,\n 2.2668e-09, 2.2957e-13, 3.1640e-12, 2.4951e-10, 3.4063e-09, 7.2234e-13,\n 2.7600e-10, 8.2974e-12, 2.9383e-10, 3.4974e-11, 4.3998e-10, 3.0685e-09,\n 4.4777e-10, 1.0905e-10, 1.8414e-11, 3.0980e-10, 2.0485e-09, 1.0809e-08,\n 4.1901e-12, 5.6843e-12, 6.7792e-11, 1.9649e-10, 3.8187e-10, 7.3527e-11,\n 9.8853e-11, 3.4277e-10, 2.0979e-09, 5.0164e-12, 4.3609e-13, 1.2430e-10,\n 1.5224e-10, 1.1774e-09, 1.1751e-09, 1.5576e-12, 9.5877e-13, 1.2328e-11,\n 1.6756e-11, 3.8544e-10, 3.7181e-10, 1.0025e-10, 4.3826e-10, 1.9891e-11,\n 3.4135e-09, 1.2118e-09, 1.4813e-12, 1.1088e-10, 3.3412e-10, 9.6979e-12,\n 3.3870e-12, 8.3416e-11, 2.8611e-12, 1.9678e-10, 2.7621e-11, 1.1618e-10,\n 1.0020e-09, 1.5289e-10, 1.0565e-10, 9.4904e-11, 4.1672e-10, 4.4009e-11,\n 8.3202e-11, 1.0083e-12, 1.5471e-11, 2.2330e-09, 6.4364e-10, 6.9858e-11,\n 2.2638e-09, 1.2245e-12, 4.1036e-13, 5.0931e-10, 7.0543e-11, 2.2756e-11,\n 1.4096e-11, 1.5574e-10, 8.3825e-11, 3.4756e-12, 1.5063e-11, 2.2554e-11,\n 1.4164e-11, 3.8191e-09, 1.1343e-09, 3.8820e-09, 8.6664e-11, 1.1932e-10,\n 4.3628e-09, 1.2516e-13, 8.2871e-11, 1.2197e-08, 1.4005e-09, 2.6064e-10,\n 3.2883e-09, 4.5377e-12, 8.0670e-11, 7.1548e-10, 1.4520e-10, 3.1137e-14,\n 9.6334e-11, 3.7411e-09, 2.1560e-11, 4.6936e-12, 5.5669e-09, 1.6479e-09,\n 1.9650e-12, 1.7173e-11, 2.1146e-12, 1.0127e-10, 4.0340e-10, 2.2822e-13,\n 1.4268e-11, 3.2453e-09, 1.7151e-11, 4.6961e-13, 3.6132e-12, 6.5030e-10,\n 7.6202e-10, 6.3517e-10, 8.3206e-10, 9.3372e-11, 1.0441e-12, 2.6369e-09,\n 3.4737e-10, 1.4287e-10, 1.9717e-10, 9.0938e-11, 7.7995e-11, 1.5881e-11,\n 9.8985e-12, 1.4211e-11, 8.9057e-12, 5.3573e-11, 3.3916e-12, 8.0377e-10,\n 2.8475e-10, 2.2560e-11, 2.5860e-11, 1.1646e-10, 7.1476e-09, 9.4648e-12,\n 4.2940e-11, 4.3902e-12, 2.0596e-09, 3.0598e-09, 3.4529e-13, 1.5966e-10,\n 1.1831e-10, 4.1491e-10, 7.1181e-13, 9.9483e-12, 1.3705e-09, 1.5580e-09,\n 4.3527e-13, 9.5718e-13, 2.2504e-13, 2.3945e-11, 2.2543e-10, 6.7261e-12,\n 5.1901e-11, 6.0059e-10, 1.0205e-10, 1.4430e-13, 8.3160e-10, 1.0390e-11,\n 1.3224e-09, 1.0219e-09, 2.8057e-09, 2.0879e-09, 4.9789e-11, 4.3395e-12,\n 6.7360e-12, 7.3189e-10, 1.6671e-09, 2.1350e-11, 2.1185e-09, 5.3924e-12,\n 3.5206e-10, 7.2288e-09, 8.1058e-14, 4.6294e-09], device='cuda:0')" + }, + "37": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.9408e-13, 3.1099e-14, 5.8247e-12, 2.0724e-11, 4.9152e-14, 6.1138e-14,\n 7.5623e-13, 3.1840e-13, 9.7795e-14, 3.3074e-16, 2.5167e-13, 1.1477e-14,\n 9.5931e-13, 5.5617e-14, 1.2957e-13, 4.5943e-15, 1.9055e-13, 1.0794e-13,\n 7.6157e-15, 1.7395e-13, 2.3680e-14, 9.3143e-15, 5.8461e-13, 1.7128e-11,\n 6.7386e-13, 1.1842e-14, 3.4543e-13, 1.3877e-14, 4.2465e-14, 1.0305e-14,\n 1.2247e-13, 4.1851e-13, 4.6259e-14, 4.4379e-13, 9.0521e-16, 1.9751e-12,\n 2.5036e-12, 5.8426e-14, 1.2502e-12, 2.1696e-14, 4.3938e-12, 3.3347e-15,\n 7.8343e-13, 3.9969e-12, 1.2608e-14, 5.1831e-14, 7.1478e-15, 1.1748e-15,\n 3.4943e-15, 1.1435e-12, 2.9268e-13, 3.9825e-13, 1.7203e-15, 1.3575e-14,\n 7.0340e-15, 2.5884e-15, 1.0335e-12, 2.5017e-13, 3.0280e-14, 9.0534e-15,\n 5.6138e-14, 4.9761e-15, 3.2148e-13, 8.0818e-15, 3.6453e-14, 3.4597e-13,\n 3.0858e-13, 1.7266e-15, 1.8247e-14, 2.7279e-12, 1.1017e-13, 7.2079e-15,\n 2.7592e-14, 8.0111e-15, 1.2610e-15, 8.3280e-15, 6.0875e-14, 7.4415e-15,\n 2.7707e-12, 2.2720e-16, 3.0381e-15, 2.1545e-14, 3.5736e-12, 2.0406e-14,\n 4.6472e-13, 9.8971e-16, 9.6483e-14, 6.5947e-15, 1.7158e-13, 1.8147e-12,\n 4.0750e-13, 1.9201e-14, 1.5616e-14, 1.3717e-14, 3.2557e-12, 4.5677e-11,\n 2.3522e-14, 1.3190e-14, 2.4369e-14, 5.1524e-13, 5.5381e-14, 4.9144e-15,\n 2.6998e-13, 1.1063e-13, 1.6103e-12, 5.6696e-16, 1.2583e-15, 4.3529e-13,\n 3.1071e-14, 1.5210e-12, 4.6095e-12, 1.4977e-15, 3.7675e-16, 1.2361e-15,\n 2.2610e-14, 1.4462e-13, 2.8712e-13, 1.3025e-14, 1.5600e-13, 9.4005e-14,\n 2.5338e-12, 2.6271e-12, 3.5822e-16, 1.2666e-13, 2.4687e-12, 9.7226e-16,\n 8.6800e-15, 1.0721e-14, 1.6114e-15, 4.2151e-14, 2.2967e-15, 5.3835e-14,\n 5.5781e-13, 3.0284e-14, 1.2396e-14, 4.7322e-15, 4.4492e-13, 2.3779e-15,\n 1.5792e-13, 3.4320e-14, 1.8343e-15, 1.4411e-12, 1.5448e-13, 4.1597e-13,\n 6.3629e-12, 1.6467e-15, 1.7435e-15, 4.8151e-13, 1.4378e-13, 4.1296e-15,\n 5.6160e-15, 1.5509e-13, 3.5418e-15, 1.6605e-15, 8.1834e-16, 7.7074e-13,\n 2.3514e-15, 1.4313e-11, 5.1103e-13, 1.1591e-12, 1.1129e-14, 1.1240e-13,\n 1.8398e-11, 7.3896e-15, 5.6991e-15, 2.2589e-11, 3.4475e-12, 5.9725e-13,\n 3.6408e-12, 2.6137e-14, 9.7915e-17, 3.3114e-13, 1.0927e-13, 2.4622e-14,\n 2.9125e-15, 3.9809e-12, 1.7875e-14, 1.1215e-14, 6.2742e-12, 3.2970e-12,\n 2.9937e-14, 3.7374e-14, 9.6698e-14, 7.9312e-14, 7.9477e-14, 2.0786e-15,\n 3.4166e-15, 4.9113e-12, 5.2757e-15, 2.6654e-15, 2.5603e-14, 9.7956e-14,\n 2.5374e-12, 1.0781e-12, 3.0920e-13, 4.0532e-13, 7.3667e-17, 1.4225e-11,\n 4.0000e-13, 4.7697e-15, 2.0309e-14, 2.9482e-15, 8.9712e-15, 1.8500e-13,\n 1.3466e-15, 2.5902e-15, 1.1461e-13, 3.2906e-16, 3.7246e-14, 2.9960e-13,\n 1.9660e-13, 1.9051e-15, 9.9881e-16, 3.3616e-14, 9.5918e-12, 3.0011e-16,\n 7.6396e-16, 3.2041e-15, 2.1736e-13, 6.9812e-12, 4.2940e-15, 4.7380e-14,\n 8.9863e-14, 7.8310e-13, 1.5511e-14, 1.6210e-14, 8.7527e-14, 5.9458e-13,\n 9.4121e-15, 5.4311e-15, 3.6161e-14, 5.5482e-15, 4.8328e-13, 4.4454e-15,\n 1.6863e-15, 9.6909e-13, 1.0856e-13, 6.5463e-16, 2.9217e-12, 2.5029e-15,\n 2.3242e-13, 1.3828e-12, 3.3656e-12, 6.3991e-12, 1.8552e-14, 2.1458e-14,\n 4.0257e-14, 4.9602e-13, 5.7055e-12, 1.3061e-14, 5.0539e-12, 3.8917e-16,\n 1.9116e-13, 5.2582e-12, 2.5515e-15, 3.0660e-11], device='cuda:0')" + }, + "38": { + "step": "tensor(7512.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.1119e-14, 9.5720e-17, 3.9143e-12, 1.3963e-11, 4.3008e-13, 1.4037e-14,\n 2.3505e-12, 1.5104e-12, 1.0222e-12, 1.1416e-15, 7.0011e-14, 4.2665e-15,\n 1.0790e-12, 2.0817e-13, 1.4415e-13, 2.0155e-13, 1.6556e-13, 4.3691e-13,\n 8.8519e-15, 5.5856e-13, 1.8962e-13, 5.3223e-15, 2.1308e-12, 5.2376e-12,\n 8.3958e-13, 5.0499e-14, 7.3884e-13, 3.7937e-13, 1.6119e-14, 1.5113e-13,\n 2.0758e-13, 5.3838e-13, 6.2699e-13, 1.0009e-12, 8.4232e-15, 9.4403e-13,\n 1.7291e-12, 5.4509e-14, 1.2345e-12, 3.2239e-14, 2.1152e-12, 1.8137e-14,\n 1.7741e-12, 1.6050e-12, 1.2061e-13, 8.6547e-14, 6.6724e-14, 4.0892e-15,\n 1.1679e-15, 1.9033e-12, 4.6818e-13, 2.0726e-12, 1.5686e-15, 1.3544e-13,\n 2.9081e-14, 1.1764e-14, 5.5331e-12, 9.7077e-14, 4.3919e-13, 8.0085e-15,\n 1.8110e-17, 3.9969e-14, 5.3432e-13, 3.6012e-13, 3.2734e-13, 1.8990e-12,\n 3.5281e-13, 1.9145e-14, 4.5673e-13, 1.9489e-12, 3.9306e-13, 1.2923e-13,\n 2.5404e-13, 2.6223e-15, 8.0292e-16, 1.0298e-13, 3.7598e-13, 6.6014e-15,\n 3.1175e-12, 9.5132e-16, 6.9049e-15, 3.1206e-13, 4.8839e-12, 4.9277e-15,\n 3.3760e-13, 2.2705e-14, 2.6590e-13, 6.4086e-14, 7.3309e-13, 3.9569e-12,\n 7.5890e-13, 1.9450e-13, 5.6625e-15, 4.6394e-13, 3.0262e-12, 1.4049e-11,\n 1.1282e-14, 3.5570e-14, 1.9860e-13, 5.6039e-13, 5.8221e-13, 1.2031e-13,\n 4.6390e-13, 3.8189e-13, 2.5186e-12, 4.0713e-14, 1.5249e-15, 4.8064e-13,\n 2.6581e-13, 1.7641e-12, 1.9629e-12, 2.9089e-14, 9.2086e-15, 1.9561e-14,\n 7.4302e-15, 6.1882e-13, 6.1394e-13, 1.7291e-13, 6.6729e-13, 3.3181e-14,\n 4.0175e-12, 1.4524e-12, 3.4209e-15, 2.3373e-13, 1.0884e-12, 2.1554e-14,\n 8.2975e-17, 6.8104e-15, 4.9313e-18, 1.2919e-13, 4.9584e-14, 1.8894e-13,\n 1.1006e-12, 2.5851e-13, 1.7065e-13, 1.4820e-13, 8.0973e-13, 1.8584e-14,\n 1.6355e-13, 1.9023e-15, 2.5995e-14, 2.5846e-12, 1.0567e-12, 9.8285e-14,\n 2.3010e-12, 2.9279e-14, 2.6621e-16, 8.1204e-13, 1.8240e-13, 1.3566e-14,\n 5.2617e-14, 4.8059e-13, 7.2926e-14, 5.7400e-15, 2.5526e-14, 2.3828e-13,\n 6.9498e-16, 5.5745e-12, 1.6666e-12, 4.9120e-12, 2.8680e-14, 2.1445e-13,\n 6.5448e-12, 8.4319e-17, 1.3774e-13, 1.7021e-11, 2.2309e-12, 5.3649e-13,\n 4.1616e-12, 3.8294e-14, 6.4170e-14, 7.2910e-13, 4.7117e-13, 5.4771e-14,\n 1.5646e-13, 4.7733e-12, 6.6413e-15, 2.3551e-15, 6.7320e-12, 2.4871e-12,\n 2.5717e-15, 1.2747e-14, 3.0869e-15, 1.4602e-13, 6.1334e-13, 1.0234e-15,\n 2.3512e-14, 3.7873e-12, 8.5858e-14, 1.0395e-15, 1.2694e-15, 7.3480e-13,\n 1.4439e-12, 9.6711e-13, 8.8312e-13, 1.4615e-13, 3.8285e-16, 4.1022e-12,\n 6.4761e-13, 1.2151e-13, 1.4732e-13, 1.4741e-13, 6.1246e-14, 2.0579e-13,\n 3.9004e-16, 2.4765e-14, 5.9090e-14, 9.0364e-14, 6.7257e-15, 1.3271e-12,\n 2.7693e-13, 3.0313e-14, 4.7709e-14, 2.7352e-13, 1.0017e-11, 2.4495e-15,\n 6.9794e-14, 7.8342e-15, 2.8956e-12, 4.6188e-12, 2.8020e-16, 2.8923e-13,\n 7.4070e-14, 6.5165e-13, 4.3470e-15, 5.8766e-15, 1.9103e-12, 2.1440e-12,\n 1.0892e-15, 2.2929e-15, 3.1919e-16, 4.1155e-14, 3.6194e-13, 7.0880e-14,\n 1.9976e-14, 1.1795e-12, 3.2444e-13, 2.5317e-17, 1.6038e-12, 9.6544e-16,\n 1.8965e-12, 1.6395e-12, 3.3901e-12, 2.9248e-12, 1.0249e-13, 1.2328e-14,\n 1.5923e-14, 1.1222e-12, 2.8095e-12, 5.5874e-14, 3.4477e-12, 4.9105e-15,\n 3.9991e-13, 1.0048e-11, 5.1282e-16, 7.4421e-12], device='cuda:0')" + }, + "39": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.6496e-15, 9.9428e-14, 2.0152e-14, ..., 7.8672e-17, 9.6047e-15,\n 3.0987e-13],\n [3.9293e-14, 5.5984e-15, 1.5974e-15, ..., 3.8937e-14, 5.2951e-17,\n 4.4409e-13],\n [6.3605e-16, 1.6051e-13, 4.0506e-15, ..., 2.6930e-15, 1.0023e-14,\n 2.4946e-13],\n ...,\n [7.6623e-13, 8.2615e-14, 1.3352e-12, ..., 5.3835e-14, 1.6944e-13,\n 1.3612e-11],\n [2.3450e-13, 2.2544e-13, 2.5646e-12, ..., 3.7173e-14, 4.9920e-14,\n 4.1650e-12],\n [7.3331e-13, 1.7581e-11, 2.2190e-10, ..., 5.0974e-12, 1.5823e-13,\n 1.2327e-10]], device='cuda:0')" + }, + "40": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.2299e-13, 3.0466e-13, 8.2042e-14, 3.0767e-14, 2.5309e-13, 7.0572e-14,\n 2.8249e-14, 9.2183e-14, 2.4403e-14, 6.6128e-14, 2.2379e-13, 5.5516e-14,\n 2.0555e-15, 2.6023e-14, 8.2910e-15, 4.2145e-15, 3.3980e-13, 1.1485e-13,\n 5.7419e-14, 3.0835e-13, 2.6898e-14, 5.1137e-13, 7.0426e-14, 3.8600e-14,\n 1.2246e-13, 2.1736e-13, 1.5214e-15, 5.8788e-15, 1.5566e-13, 2.6523e-13,\n 1.9159e-13, 2.1047e-13, 1.1568e-13, 2.6201e-13, 4.1386e-13, 1.2520e-12,\n 3.4718e-13, 5.2951e-14, 3.9717e-13, 2.6160e-13, 1.0619e-13, 1.6378e-13,\n 2.5095e-13, 1.3329e-13, 2.5928e-13, 1.0806e-13, 1.9774e-13, 8.4277e-14,\n 5.5786e-15, 7.2654e-13, 9.5145e-14, 1.4995e-13, 1.1440e-14, 6.9406e-13,\n 1.9471e-14, 1.2066e-13, 2.9529e-13, 6.1972e-15, 3.6327e-13, 7.4710e-14,\n 1.2131e-12, 7.3154e-13, 2.5305e-14, 8.4598e-14, 5.4937e-15, 1.9977e-14,\n 7.5971e-14, 3.9417e-14, 1.0632e-13, 4.5957e-13, 1.8699e-13, 9.5803e-14,\n 5.0191e-14, 1.3970e-13, 5.0084e-13, 7.4181e-13, 1.7030e-13, 4.1791e-14,\n 2.7825e-13, 2.8256e-14, 5.5424e-13, 5.9793e-14, 1.4764e-14, 1.0160e-14,\n 1.2130e-14, 4.5009e-14, 1.0718e-12, 1.3104e-13, 4.6792e-15, 2.6773e-13,\n 3.4056e-15, 1.5998e-13, 2.1372e-14, 1.4219e-12, 2.4520e-15, 1.0541e-13,\n 2.2954e-13, 1.6923e-15, 5.0321e-15, 1.3651e-13, 3.9908e-13, 1.1292e-15,\n 5.9667e-13, 3.6405e-14, 6.7685e-14, 6.4074e-14, 9.0075e-14, 5.5082e-13,\n 3.6817e-13, 2.0403e-13, 5.0518e-14, 1.4192e-13, 8.9369e-14, 1.0797e-13,\n 6.0086e-14, 1.3748e-13, 1.1512e-13, 2.0633e-13, 3.3422e-13, 1.3810e-12,\n 9.7293e-15, 1.2920e-13, 2.3552e-13, 6.8413e-15, 4.1070e-15, 5.1272e-15,\n 1.8410e-13, 7.9759e-13, 7.7831e-15, 1.3528e-14, 7.1696e-14, 6.3176e-13,\n 7.6520e-16, 4.0729e-13, 9.6571e-14, 1.6466e-13, 3.5248e-13, 1.1464e-14,\n 1.0450e-13, 2.0991e-15, 1.1724e-13, 1.8488e-14, 1.9981e-14, 2.9629e-14,\n 4.8081e-15, 2.3144e-16, 6.6683e-14, 4.1944e-14, 1.5091e-14, 2.1378e-14,\n 3.9675e-15, 1.7662e-14, 1.2805e-13, 2.3920e-13, 1.7316e-14, 1.6941e-14,\n 5.1469e-14, 1.8377e-14, 8.6994e-14, 1.3558e-14, 3.6886e-13, 2.6318e-15,\n 3.4888e-15, 5.1256e-13, 1.8131e-13, 1.4732e-12, 1.5263e-12, 2.2231e-13,\n 3.0405e-13, 1.7137e-12, 4.9128e-14, 6.0465e-14, 9.6900e-14, 5.9467e-13,\n 4.8130e-14, 4.6433e-13, 1.0899e-14, 2.2388e-13, 2.0406e-13, 2.5426e-12,\n 5.9030e-13, 9.7052e-13, 6.7945e-14, 1.4810e-13, 1.8392e-12, 3.6824e-14,\n 4.7598e-16, 1.7233e-15, 6.1986e-13, 1.5438e-13, 5.9206e-13, 2.7644e-13,\n 1.4848e-13, 2.4368e-14, 9.3559e-13, 9.1864e-13, 4.7960e-13, 1.4415e-13,\n 5.1770e-13, 3.4297e-15, 1.4357e-13, 3.4035e-13, 2.0964e-15, 4.5754e-15,\n 8.2529e-14, 6.8966e-13, 4.8147e-13, 7.4839e-14, 1.4019e-14, 6.6078e-16,\n 8.4667e-13, 4.2153e-13, 1.1393e-14, 7.3936e-15, 2.3619e-13, 7.5961e-14,\n 1.0269e-13, 4.3265e-13, 3.8683e-13, 9.4139e-13, 1.4099e-14, 1.6032e-13,\n 3.6790e-15, 1.0909e-12, 6.5765e-14, 3.7451e-14, 1.9072e-14, 1.3324e-14,\n 9.7184e-14, 1.5563e-14, 2.0294e-13, 2.2561e-14, 7.2174e-15, 1.1795e-13,\n 6.2326e-14, 1.0025e-14, 1.0309e-14, 5.7017e-14, 4.7360e-14, 2.0373e-14,\n 1.6896e-14, 2.2707e-14, 2.6805e-14, 3.6742e-14, 1.2044e-13, 2.7898e-14,\n 2.9465e-14, 1.7351e-14, 1.0387e-14, 4.8503e-14, 1.3152e-13, 1.7384e-13,\n 1.4535e-14, 1.1411e-13, 1.9745e-14, 1.5005e-13, 2.9538e-28, 2.4589e-29,\n 6.8878e-29, 1.5225e-29, 1.4870e-30, 4.1365e-29, 2.0820e-29, 6.3334e-29,\n 4.7897e-31, 4.5906e-29, 9.6123e-30, 1.9742e-29, 8.8484e-29, 3.4062e-30,\n 2.0066e-30, 5.2571e-30, 1.2861e-29, 4.3244e-30, 1.6030e-28, 3.1467e-29,\n 3.0191e-30, 2.7728e-31, 7.2014e-30, 4.8688e-29, 4.6628e-30, 1.0850e-30,\n 1.6894e-30, 1.1942e-29, 2.5558e-31, 8.6345e-30, 3.8552e-29, 3.6407e-29,\n 2.2403e-29, 1.2502e-30, 7.3314e-30, 8.5116e-30, 2.5991e-29, 5.7472e-30,\n 1.8290e-29, 4.0534e-29, 2.5721e-29, 1.5836e-29, 5.4878e-30, 4.5779e-29,\n 4.5260e-29, 7.5838e-30, 3.5268e-30, 7.1020e-30, 4.6294e-29, 1.3951e-29,\n 5.2664e-30, 1.2318e-30, 1.7159e-29, 1.9129e-29, 4.5104e-29, 4.7568e-29,\n 3.3924e-29, 1.9737e-28, 2.5747e-29, 1.9847e-29, 7.0430e-29, 3.6430e-29,\n 4.1436e-29, 4.0224e-29, 9.9912e-30, 1.2798e-29, 2.0701e-30, 6.8868e-30,\n 2.5174e-29, 6.8064e-30, 3.4471e-29, 2.2576e-30, 5.7904e-31, 2.1530e-29,\n 5.6184e-30, 1.8634e-29, 1.1906e-29, 1.0825e-29, 3.3128e-29, 1.3926e-29,\n 1.3286e-29, 1.0487e-28, 6.0519e-30, 8.6105e-30, 1.8102e-29, 1.1391e-30,\n 1.0634e-29, 3.0240e-31, 1.0245e-29, 8.5946e-31, 4.1985e-30, 2.5752e-29,\n 1.3826e-30, 1.1296e-29, 3.5861e-30, 9.5992e-30, 1.5165e-29, 7.3527e-30,\n 5.8760e-30, 1.2613e-29, 1.2123e-29, 2.6632e-30, 4.3109e-29, 4.7668e-29,\n 6.8209e-30, 1.7319e-29, 1.1852e-29, 5.1324e-29, 2.0451e-29, 6.8692e-30,\n 2.0080e-30, 1.0982e-29, 2.6755e-29, 2.6130e-30, 1.1641e-29, 8.0381e-31,\n 4.0464e-30, 1.1349e-29, 1.8690e-30, 5.9361e-30, 7.4071e-30, 9.2206e-30,\n 3.3505e-29, 1.3411e-29, 5.3648e-29, 7.5807e-29, 3.8918e-29, 1.0777e-28,\n 2.0966e-30, 2.0859e-30, 2.0931e-29, 7.7046e-30, 8.0936e-31, 2.0297e-31,\n 7.6059e-30, 1.9425e-30, 8.9738e-30, 5.6636e-30, 2.9530e-29, 3.8482e-30,\n 7.1689e-30, 7.8445e-29, 7.7024e-30, 1.3456e-30, 3.4261e-30, 1.0562e-30,\n 1.0404e-30, 1.2793e-29, 2.1924e-29, 2.4298e-29, 1.3636e-30, 3.4717e-29,\n 3.5921e-30, 3.3491e-30, 8.3792e-29, 1.6692e-29, 2.7034e-30, 1.1313e-30,\n 7.3903e-30, 3.2103e-30, 1.6577e-29, 5.2423e-31, 1.1492e-29, 2.6362e-30,\n 4.3308e-29, 5.3823e-30, 1.0056e-29, 3.7903e-29, 1.5228e-29, 7.4926e-30,\n 3.1532e-29, 1.2054e-29, 2.0368e-29, 1.9656e-30, 1.8983e-28, 2.6244e-30,\n 4.0187e-29, 6.3033e-30, 8.4003e-30, 2.1722e-29, 7.7826e-31, 4.9588e-30,\n 2.1012e-29, 1.4295e-29, 1.2809e-29, 1.1492e-29, 2.2121e-29, 4.1828e-30,\n 1.6035e-29, 1.7297e-29, 1.7568e-29, 1.6430e-29, 3.6054e-29, 6.1256e-29,\n 9.8073e-30, 2.7030e-30, 3.0714e-29, 8.5289e-30, 7.3671e-31, 1.9925e-30,\n 2.3311e-29, 5.3099e-29, 2.1415e-29, 9.3695e-30, 2.5829e-30, 4.5173e-31,\n 1.8002e-29, 3.0509e-29, 4.5661e-30, 1.7760e-30, 5.0438e-30, 6.9117e-30,\n 8.6900e-30, 5.0104e-30, 1.8032e-29, 2.5307e-29, 5.1843e-29, 5.1646e-30,\n 5.4248e-30, 1.4564e-29, 1.8420e-29, 1.1638e-29, 1.1295e-30, 3.4736e-29,\n 1.1120e-30, 5.0795e-30, 1.8502e-29, 1.7730e-30, 2.2358e-29, 3.4214e-29,\n 5.3327e-30, 3.4532e-29, 3.3992e-29, 1.9957e-29, 1.6392e-29, 2.1616e-29,\n 5.0947e-31, 1.4943e-29, 4.6221e-30, 1.2630e-29, 1.8897e-30, 6.3095e-30,\n 1.2096e-30, 7.7755e-29, 1.2485e-29, 2.0047e-29, 2.4071e-30, 7.1977e-30,\n 2.1007e-29, 8.0698e-29, 1.9248e-29, 3.4406e-29, 4.1473e-29, 3.2200e-29,\n 5.1678e-30, 6.8633e-30, 1.2174e-09, 4.5315e-11, 2.6686e-10, 1.7537e-11,\n 6.9008e-11, 2.2976e-11, 4.7021e-12, 9.6471e-11, 5.3267e-11, 2.1929e-11,\n 7.6145e-10, 1.4851e-11, 4.4356e-11, 5.9238e-10, 8.7145e-11, 1.1825e-10,\n 1.9664e-11, 7.2937e-13, 4.9936e-11, 7.0172e-11, 1.4771e-10, 1.0692e-10,\n 6.4426e-11, 1.5922e-11, 1.5656e-12, 1.2827e-11, 3.6732e-12, 9.2219e-11,\n 8.3461e-10, 2.3876e-11, 2.6679e-10, 6.2998e-11, 5.9814e-11, 1.5126e-10,\n 4.4359e-13, 3.6510e-10, 1.8694e-10, 5.8847e-11, 3.8223e-10, 9.1823e-12,\n 1.4998e-11, 2.8964e-11, 2.7202e-11, 7.3862e-11, 4.2041e-11, 1.2631e-12,\n 8.9850e-12, 9.6001e-11, 1.5865e-11, 1.8381e-10, 6.3021e-10, 1.3273e-10,\n 1.3951e-11, 3.0424e-10, 3.8939e-11, 2.1197e-10, 1.9992e-10, 3.2426e-11,\n 2.8753e-10, 2.4141e-11, 1.0393e-11, 1.8529e-13, 3.0061e-12, 4.9656e-12,\n 1.2357e-10, 7.4332e-11, 5.6734e-11, 2.3652e-11, 7.5414e-11, 1.1194e-11,\n 1.5476e-10, 3.3424e-10, 2.0389e-10, 5.1315e-12, 8.8452e-12, 2.6768e-10,\n 1.9269e-10, 5.1674e-10, 2.3228e-10, 1.7155e-11, 9.6337e-12, 1.9884e-11,\n 9.4947e-11, 3.6235e-11, 8.9845e-11, 1.6557e-11, 1.0912e-10, 1.2638e-11,\n 3.2190e-11, 3.4999e-10, 4.8663e-10, 3.9632e-11, 1.7207e-10, 1.1793e-10,\n 6.9675e-11, 1.7376e-10, 1.7087e-10, 1.1481e-11, 7.1904e-11, 5.8272e-11,\n 4.8460e-11, 4.6494e-10, 6.3525e-11, 3.8639e-10, 1.9995e-10, 3.7050e-11,\n 7.7093e-11, 9.0421e-12, 1.9105e-11, 1.8542e-12, 2.0714e-10, 2.1548e-11,\n 2.0228e-10, 2.8259e-11, 4.9043e-10, 1.2435e-10, 8.0141e-11, 4.2597e-10,\n 6.8499e-11, 4.2081e-11, 4.9820e-12, 6.6751e-11, 1.0015e-10, 2.4351e-11,\n 2.4616e-11, 1.0267e-10, 2.0411e-11, 1.0679e-11, 2.4740e-10, 1.0041e-11,\n 1.4002e-11, 1.5973e-11, 1.7380e-11, 6.9366e-11, 9.6525e-11, 4.3448e-11,\n 4.2926e-11, 2.8265e-12, 9.0634e-11, 6.3253e-11, 1.0961e-11, 5.5649e-11,\n 1.9506e-11, 1.2200e-11, 1.2614e-10, 2.9168e-11, 3.1096e-13, 8.4025e-11,\n 6.3133e-11, 1.9504e-11, 1.7423e-10, 9.8804e-11, 3.2705e-11, 1.3441e-11,\n 3.2973e-10, 1.1739e-11, 1.1956e-11, 1.3417e-10, 4.9981e-11, 1.2121e-10,\n 1.1628e-10, 4.1549e-11, 1.7891e-10, 1.0742e-10, 8.9323e-12, 1.7174e-11,\n 2.2855e-13, 1.9924e-11, 1.2630e-10, 1.8295e-13, 6.5116e-10, 3.3328e-13,\n 3.9339e-11, 1.5680e-11, 3.5650e-10, 1.3763e-10, 6.1212e-11, 1.2898e-10,\n 3.3099e-10, 5.0400e-11, 5.6973e-12, 2.3600e-11, 3.9225e-11, 2.1075e-11,\n 6.9174e-12, 1.5129e-11, 2.7077e-11, 6.1613e-12, 2.8799e-10, 1.7316e-12,\n 6.2703e-11, 1.2976e-11, 4.5913e-10, 2.2477e-10, 9.3967e-12, 3.1757e-11,\n 1.7633e-10, 8.7929e-12, 8.8227e-12, 2.0828e-10, 4.9392e-11, 1.0191e-10,\n 3.0092e-11, 1.3861e-10, 1.5472e-11, 3.5142e-11, 1.6538e-10, 7.7626e-12,\n 1.3519e-10, 6.3637e-11, 2.2436e-11, 1.0141e-10, 4.8867e-10, 9.9407e-12,\n 4.5426e-11, 8.4525e-11, 3.8686e-13, 7.0387e-11, 6.4462e-11, 1.5251e-10,\n 6.1326e-12, 1.0505e-10, 2.2203e-11, 3.4980e-11, 9.8972e-11, 3.9335e-13,\n 1.5786e-10, 2.6508e-11, 2.1969e-10, 7.4372e-11, 1.0245e-10, 1.8528e-11,\n 4.8287e-11, 1.7443e-11, 3.0243e-10, 6.0418e-11, 4.1138e-11, 2.8780e-10,\n 2.5962e-10, 2.4826e-10, 5.2581e-11, 2.5166e-10, 9.1403e-12, 1.0737e-12,\n 2.4049e-11, 1.1345e-11, 6.9836e-12, 1.5754e-11, 1.9789e-10, 6.5296e-12,\n 1.7140e-11, 8.9710e-11, 3.3125e-11, 1.2276e-11, 5.1348e-12, 2.3107e-10],\n device='cuda:0')" + }, + "41": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4541e-11, 1.7960e-11, 5.7893e-12, ..., 9.2608e-11, 3.1974e-12,\n 3.6084e-11],\n [1.1576e-11, 4.7379e-12, 2.7284e-12, ..., 2.1771e-11, 1.0342e-12,\n 1.8905e-11],\n [2.6876e-12, 1.5885e-13, 6.6560e-13, ..., 8.5406e-13, 2.1342e-13,\n 8.9420e-13],\n ...,\n [9.8217e-12, 7.0935e-13, 2.6469e-12, ..., 4.3666e-12, 6.1041e-13,\n 2.2263e-12],\n [5.0774e-12, 8.4374e-13, 1.2609e-12, ..., 4.2839e-12, 3.1925e-13,\n 1.0339e-12],\n [1.2323e-11, 3.6932e-12, 2.8540e-12, ..., 1.5974e-11, 1.0802e-12,\n 1.1493e-11]], device='cuda:0')" + }, + "42": { + "step": "tensor(7512.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0906e-09, 2.7835e-10, 2.4608e-12, 1.0725e-09, 3.3123e-10, 2.8535e-11,\n 3.8839e-11, 5.9802e-11, 6.7714e-12, 1.4784e-10, 1.9007e-10, 2.1727e-10,\n 3.2363e-10, 6.4706e-11, 3.5727e-10, 1.6077e-10, 6.8285e-10, 1.0331e-10,\n 1.0509e-11, 8.3338e-10, 1.0587e-10, 7.2722e-11, 2.1835e-10, 8.6824e-10,\n 4.7449e-11, 1.1843e-09, 2.1198e-10, 2.2051e-10, 1.1531e-11, 6.8603e-11,\n 9.8727e-10, 2.4972e-11, 5.7859e-10, 1.8882e-11, 3.8237e-10, 7.8501e-12,\n 1.4738e-10, 3.1254e-10, 1.9210e-10, 1.8147e-10, 1.2102e-11, 2.5425e-10,\n 8.7486e-10, 2.3526e-11, 4.1128e-10, 3.0563e-11, 2.3465e-11, 8.7781e-12,\n 2.6632e-10, 8.3174e-11, 1.0143e-10, 5.5970e-11, 4.2959e-11, 1.1558e-10,\n 1.7702e-09, 1.8582e-11, 5.1506e-11, 2.5991e-11, 5.5939e-12, 4.4481e-10,\n 1.3840e-10, 5.0246e-12, 2.9045e-10, 1.1957e-09, 4.8980e-11, 7.4347e-11,\n 5.3540e-10, 8.3772e-12, 1.6728e-09, 1.7121e-11, 2.4015e-10, 1.6879e-10,\n 1.1424e-10, 5.9705e-12, 4.0260e-11, 5.2100e-11, 2.8239e-10, 9.6375e-11,\n 1.5406e-10, 2.9733e-10, 3.2245e-11, 3.0816e-11, 2.7681e-09, 9.2835e-11,\n 6.1706e-12, 1.1550e-11, 2.3631e-11, 1.2409e-11, 2.4209e-10, 1.9067e-11,\n 6.4395e-12, 4.8300e-10, 3.2678e-11, 9.2279e-10, 2.1900e-10, 2.9172e-11,\n 6.7664e-10, 2.4452e-10, 1.1179e-09, 8.3600e-12, 1.2714e-10, 1.1331e-09,\n 1.4879e-09, 2.4877e-09, 3.0896e-11, 1.0834e-10, 6.4848e-12, 3.9590e-10,\n 1.9363e-10, 9.7938e-12, 1.7368e-11, 1.8826e-10, 2.1240e-11, 3.2658e-11,\n 2.8702e-11, 1.5450e-10, 1.6095e-11, 1.2527e-10, 1.4108e-11, 1.1274e-10,\n 9.1149e-10, 5.3012e-12, 2.2448e-09, 8.4452e-11, 1.6317e-09, 1.4988e-11,\n 5.3438e-10, 6.5614e-10, 7.5891e-10, 6.3006e-11, 8.0556e-10, 6.1274e-10,\n 9.8154e-11, 7.3862e-12, 9.8161e-12, 8.6768e-10, 7.5810e-10, 5.2775e-10,\n 9.3504e-11, 1.4634e-11, 1.5898e-10, 1.0472e-09, 2.7181e-10, 4.0651e-10,\n 2.1257e-10, 4.7411e-12, 2.6691e-10, 1.9184e-10, 1.8091e-11, 7.7529e-12,\n 4.2090e-12, 7.0199e-10, 2.7055e-11, 8.4822e-11, 5.5210e-10, 1.1603e-09,\n 3.7141e-10, 8.4670e-10, 9.7186e-11, 7.1694e-10, 7.6364e-10, 6.5106e-10,\n 2.9796e-10, 1.0289e-09, 5.2241e-12, 1.2963e-09, 1.4233e-10, 1.6409e-10,\n 9.3730e-11, 1.3059e-11, 2.6661e-11, 6.1366e-12, 1.3683e-10, 3.9825e-11,\n 1.9306e-10, 6.0807e-10, 3.2605e-10, 1.0710e-11, 3.0840e-10, 6.8731e-12,\n 1.4240e-11, 4.1069e-10, 7.3056e-12, 1.6973e-10, 1.0542e-11, 2.6421e-10,\n 4.3811e-10, 1.2132e-10, 1.9670e-10, 4.2317e-10, 7.6641e-10, 4.7190e-10,\n 4.6315e-10, 8.4573e-12, 6.4459e-11, 9.6908e-12, 1.3372e-11, 1.0937e-10,\n 4.9482e-12, 1.4476e-10, 4.4216e-10, 5.0051e-10, 6.4817e-11, 1.9175e-10,\n 6.1082e-11, 1.0070e-09, 1.0577e-11, 7.3121e-10, 2.9705e-11, 6.7983e-11,\n 2.0866e-11, 1.7632e-10, 1.9150e-11, 1.0244e-10, 7.6645e-10, 1.2209e-10,\n 6.2616e-11, 8.7175e-12, 2.0451e-11, 5.9212e-11, 3.4737e-10, 1.5415e-11,\n 7.0762e-12, 1.5958e-10, 4.0455e-10, 2.1940e-09, 2.4223e-10, 5.1135e-11,\n 4.2515e-11, 1.2245e-09, 4.2804e-10, 8.1627e-11, 1.3597e-09, 4.0021e-11,\n 6.0772e-11, 1.4766e-10, 6.2852e-11, 3.6629e-10, 6.4017e-11, 1.1119e-12,\n 5.8464e-10, 3.7554e-11, 5.3238e-10, 6.7776e-11, 1.0462e-11, 1.9564e-11,\n 6.5444e-10, 2.4972e-11, 9.4709e-10, 1.1035e-09, 6.1720e-10, 1.1217e-10,\n 2.3339e-10, 1.9316e-11, 2.8504e-11, 1.9887e-10], device='cuda:0')" + }, + "43": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1651e-10, 1.3608e-09, 1.7004e-10, ..., 3.5222e-09, 3.3188e-09,\n 1.5346e-09],\n [1.4445e-11, 9.2942e-11, 1.0990e-11, ..., 2.4487e-10, 2.3865e-10,\n 1.0281e-10],\n [1.0081e-11, 5.8487e-11, 7.8979e-12, ..., 1.5233e-10, 1.3546e-10,\n 7.0949e-11],\n [1.5810e-11, 1.0592e-10, 1.2578e-11, ..., 2.7124e-10, 2.6299e-10,\n 1.1235e-10],\n [1.5596e-11, 8.9226e-11, 1.2394e-11, ..., 2.3152e-10, 2.0464e-10,\n 1.0971e-10]], device='cuda:0')" + }, + "44": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([7.7392e-08, 5.4649e-09, 3.2072e-09, 6.1366e-09, 4.8278e-09],\n device='cuda:0')" + }, + "45": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1687e-10, 1.3614e-09, 1.7031e-10, ..., 3.5246e-09, 3.3188e-09,\n 1.5370e-09],\n [1.4452e-11, 9.2954e-11, 1.0995e-11, ..., 2.4491e-10, 2.3865e-10,\n 1.0285e-10],\n [1.0115e-11, 5.8546e-11, 7.9229e-12, ..., 1.5255e-10, 1.3546e-10,\n 7.1170e-11],\n [1.5818e-11, 1.0593e-10, 1.2584e-11, ..., 2.7130e-10, 2.6299e-10,\n 1.1240e-10],\n [1.5654e-11, 8.9327e-11, 1.2437e-11, ..., 2.3190e-10, 2.0464e-10,\n 1.1009e-10]], device='cuda:0')" + }, + "46": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([7.7398e-08, 5.4650e-09, 3.2077e-09, 6.1368e-09, 4.8288e-09],\n device='cuda:0')" + }, + "47": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1651e-10, 1.3608e-09, 1.7004e-10, ..., 3.5222e-09, 3.3188e-09,\n 1.5346e-09],\n [1.4445e-11, 9.2942e-11, 1.0990e-11, ..., 2.4487e-10, 2.3865e-10,\n 1.0281e-10],\n [1.0081e-11, 5.8487e-11, 7.8979e-12, ..., 1.5233e-10, 1.3546e-10,\n 7.0949e-11],\n [1.5810e-11, 1.0592e-10, 1.2578e-11, ..., 2.7124e-10, 2.6299e-10,\n 1.1235e-10],\n [1.5596e-11, 8.9226e-11, 1.2394e-11, ..., 2.3152e-10, 2.0464e-10,\n 1.0971e-10]], device='cuda:0')" + }, + "48": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([7.7392e-08, 5.4649e-09, 3.2072e-09, 6.1366e-09, 4.8278e-09],\n device='cuda:0')" + }, + "6": { + "step": "tensor(3756.)", + "exp_avg": "tensor([[ 1.9930e-05, -6.2995e-05, -4.2584e-05, ..., 1.6352e-05,\n -6.4978e-07, -5.9187e-06],\n [ 4.0638e-44, -9.5288e-44, -5.6052e-45, ..., -5.6052e-45,\n 1.8217e-44, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 4.9335e-07, 3.5067e-07, -6.5156e-07, ..., -8.5107e-07,\n -9.8314e-08, -8.2159e-07],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.0329e-08, 2.5043e-08, 4.6351e-09, ..., 2.6301e-09, 3.0717e-09,\n 3.8911e-09],\n [5.0665e-12, 1.9084e-12, 2.0117e-13, ..., 4.1590e-13, 1.3966e-12,\n 2.0877e-13],\n [3.8202e-11, 4.4309e-12, 2.4022e-12, ..., 7.3493e-13, 5.3473e-12,\n 4.1011e-13],\n ...,\n [1.4396e-11, 5.2315e-11, 6.7413e-12, ..., 7.9827e-12, 5.5351e-12,\n 5.2049e-12],\n [2.1101e-11, 4.0302e-12, 3.5389e-12, ..., 3.4827e-13, 1.1001e-11,\n 9.4412e-14],\n [2.9506e-11, 3.5337e-11, 1.2982e-12, ..., 1.5174e-12, 4.9907e-12,\n 3.7489e-13]], device='cuda:0')" + }, + "7": { + "step": "tensor(3756.)", + "exp_avg": "tensor([ 3.9364e-04, 2.0039e-42, 5.6052e-45, ..., -1.6331e-05,\n -5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.9845e-06, 8.0693e-10, 4.5190e-09, ..., 1.3378e-08, 1.0224e-09,\n 8.2227e-09], device='cuda:0')" + }, + "8": { "step": "tensor(3756.)", - "exp_avg": "tensor([[-7.9560e-06, 2.3993e-05, 3.2534e-06, ..., 5.1585e-06,\n -3.0564e-33, 5.7035e-06],\n [-2.3746e-05, -4.0098e-05, -1.1183e-05, ..., 1.7490e-05,\n -2.7885e-33, -3.1322e-05],\n [-2.1273e-05, -1.0974e-05, 1.0933e-06, ..., 1.0596e-05,\n -3.1965e-33, 4.0670e-05],\n ...,\n [-8.9289e-06, 5.3523e-05, -6.5075e-06, ..., 2.6585e-05,\n 4.1366e-34, 2.6323e-05],\n [-2.7970e-06, 3.2530e-05, 6.1558e-06, ..., 4.4867e-05,\n 7.4581e-33, 1.1290e-04],\n [ 8.1028e-06, 1.2931e-05, 5.9366e-08, ..., 1.4795e-05,\n 5.9547e-33, 5.4140e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.0505e-09, 1.4402e-08, 1.2323e-08, ..., 1.4270e-08, 5.3745e-14,\n 1.2518e-08],\n [8.1800e-09, 3.9253e-08, 2.2590e-08, ..., 2.9301e-08, 9.9092e-12,\n 2.1253e-08],\n [6.0028e-09, 2.6590e-08, 2.3240e-08, ..., 2.0730e-08, 6.6242e-11,\n 2.8640e-08],\n ...,\n [6.7921e-09, 2.7842e-08, 2.6969e-08, ..., 2.5708e-08, 4.8211e-11,\n 3.2653e-08],\n [9.4379e-09, 3.2343e-08, 2.5039e-08, ..., 3.0256e-08, 4.4243e-13,\n 7.4594e-08],\n [8.0911e-09, 3.1045e-08, 2.6019e-08, ..., 3.9526e-08, 6.0953e-11,\n 1.9206e-08]], device='cuda:0')" + "exp_avg": "tensor([[ 2.2463e-06, -7.0065e-45, 5.6052e-45, ..., -1.3449e-07,\n 5.6052e-45, -5.6052e-45],\n [ 1.2868e-05, -5.6052e-45, 5.6052e-45, ..., 1.9796e-08,\n -5.6052e-45, 5.6052e-45],\n [-8.4837e-07, -1.2612e-44, -5.6052e-45, ..., 1.1571e-07,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 3.5500e-06, 2.9427e-44, -5.6052e-45, ..., 7.2518e-09,\n -5.6052e-45, -5.6052e-45],\n [ 3.4483e-06, -5.0447e-44, 5.6052e-45, ..., -9.2344e-08,\n -5.6052e-45, -5.6052e-45],\n [ 3.0545e-06, 2.3822e-44, 5.6052e-45, ..., 7.4046e-09,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3793e-10, 2.9861e-12, 5.9375e-12, ..., 8.6596e-12, 1.1250e-11,\n 8.8466e-12],\n [2.7425e-10, 3.5559e-12, 6.6492e-12, ..., 1.3280e-11, 1.4838e-11,\n 1.4987e-11],\n [2.2450e-10, 3.0325e-12, 6.1294e-12, ..., 1.8155e-11, 1.6550e-11,\n 1.9302e-11],\n ...,\n [5.5109e-10, 5.1650e-12, 1.2334e-11, ..., 2.5506e-11, 2.6034e-11,\n 2.8036e-11],\n [2.8697e-10, 7.2990e-12, 1.1818e-11, ..., 1.6121e-11, 2.6344e-11,\n 2.1009e-11],\n [3.8477e-10, 2.7493e-12, 6.1829e-12, ..., 1.3810e-11, 1.7964e-11,\n 3.2804e-11]], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.00793913236883622, + "lr": 0.00024569294678237997, "name": "scale_256", "betas": [ 0.9, @@ -43,7 +238,7 @@ ] }, { - "lr": 0.00793913236883622, + "lr": 0.00024569294678237997, "name": "scale_512", "betas": [ 0.9, @@ -66,7 +261,7 @@ ] }, { - "lr": 0.00793913236883622, + "lr": 0.00024569294678237997, "name": "scale_768", "betas": [ 0.9, @@ -89,7 +284,7 @@ ] }, { - "lr": 0.00793913236883622, + "lr": 0.00024569294678237997, "name": "scale_1024", "betas": [ 0.9, @@ -112,7 +307,7 @@ ] }, { - "lr": 0.00793913236883622, + "lr": 0.00024569294678237997, "name": "scale_1280", "betas": [ 0.9, @@ -135,7 +330,7 @@ ] }, { - "lr": 0.003969669238105037, + "lr": 0.00012333423752026375, "name": "fusion", "betas": [ 0.9, @@ -195,7 +390,7 @@ "T_i": 10, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 3, + "T_cur": 9, "base_lrs": [ 0.01, 0.01, @@ -204,24 +399,26 @@ 0.01, 0.005 ], - "last_epoch": 3, + "last_epoch": 9, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.00793913236883622, - 0.00793913236883622, - 0.00793913236883622, - 0.00793913236883622, - 0.00793913236883622, - 0.003969669238105037 + 0.00024569294678237997, + 0.00024569294678237997, + 0.00024569294678237997, + 0.00024569294678237997, + 0.00024569294678237997, + 0.00012333423752026375 ] }, "metrics": { - "best_val_acc": 74.29, - "best_epoch": 2, + "best_val_acc": 75.532, + "best_epoch": 8, "scale_accuracies": { - "256": 74.29 + "256": 75.532, + "512": 77.548, + "768": 77.802 } }, "train_config": {