diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,211 +1,226 @@ { - "epoch": 2, + "epoch": 3, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[-1.4696e-04, 9.1187e-05, 1.0800e-04, ..., -1.6044e-04,\n 7.1880e-06, 1.3602e-04],\n [-1.0927e-04, -7.9829e-05, 3.7893e-05, ..., 4.4160e-05,\n -3.0845e-05, -1.0366e-04],\n [-1.6137e-04, -1.2838e-04, 1.6428e-04, ..., -1.5805e-04,\n -1.2261e-04, 1.3438e-05],\n ...,\n [-2.5741e-04, -1.0685e-04, -8.1751e-05, ..., -1.7755e-05,\n -6.3467e-06, -3.8910e-05],\n [ 3.0890e-05, -2.9144e-05, -1.9468e-04, ..., 4.4760e-05,\n -5.2350e-05, 6.5412e-05],\n [-5.0468e-05, -9.1592e-05, -4.0162e-05, ..., -9.7185e-05,\n -3.0343e-04, -7.6591e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.4230e-07, 9.3495e-07, 5.1223e-07, ..., 4.2538e-07, 3.3366e-07,\n 2.7168e-07],\n [2.6763e-07, 2.7007e-07, 3.1478e-07, ..., 1.7557e-07, 1.9300e-07,\n 1.5755e-07],\n [4.5685e-07, 4.7150e-07, 3.1750e-07, ..., 2.7065e-07, 2.6642e-07,\n 1.9424e-07],\n ...,\n [5.2408e-07, 3.5291e-07, 3.0417e-07, ..., 3.0904e-07, 2.5971e-07,\n 2.5404e-07],\n [3.4291e-07, 3.1023e-07, 2.4811e-07, ..., 2.1291e-07, 1.8740e-07,\n 1.5737e-07],\n [5.0277e-07, 4.9959e-07, 2.3844e-07, ..., 2.8614e-07, 2.7950e-07,\n 2.2017e-07]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-1.8385e-04, 1.7876e-04, 1.3645e-04, ..., -7.5934e-05,\n 2.2033e-04, -1.1906e-04],\n [ 6.0718e-05, -6.3270e-05, -4.4654e-05, ..., 1.9177e-04,\n 4.8559e-05, 1.2724e-04],\n [-1.1748e-04, 1.1439e-04, -2.7160e-05, ..., -3.5968e-05,\n -1.4399e-04, -6.4333e-06],\n ...,\n [-2.3493e-05, 1.2810e-04, -4.4965e-05, ..., 1.4815e-05,\n 8.9382e-05, 6.4703e-05],\n [-4.1782e-05, 5.1327e-05, -1.2943e-04, ..., -1.2500e-04,\n -2.6982e-05, -1.0682e-05],\n [ 4.9775e-05, -1.9352e-05, -3.2604e-05, ..., 4.5885e-05,\n -6.9116e-05, -6.4673e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.9853e-07, 9.2277e-07, 4.6346e-07, ..., 3.8416e-07, 3.1318e-07,\n 2.5321e-07],\n [2.6276e-07, 2.7426e-07, 3.3549e-07, ..., 1.7384e-07, 1.7698e-07,\n 1.6233e-07],\n [3.9234e-07, 3.7226e-07, 3.2127e-07, ..., 2.3992e-07, 2.5350e-07,\n 1.6868e-07],\n ...,\n [5.5000e-07, 3.4711e-07, 2.8068e-07, ..., 3.1202e-07, 2.2854e-07,\n 2.6131e-07],\n [2.8401e-07, 2.6151e-07, 1.9712e-07, ..., 2.0182e-07, 1.6983e-07,\n 1.3613e-07],\n [5.3582e-07, 5.6975e-07, 2.4217e-07, ..., 3.1615e-07, 2.9332e-07,\n 2.5110e-07]], device='cuda:0')" }, "1": { - "step": "tensor(3756.)", - "exp_avg": "tensor([ 0.0071, -0.0025, -0.0105, ..., -0.0042, 0.0052, 0.0037],\n device='cuda:0')", - "exp_avg_sq": "tensor([0.0008, 0.0004, 0.0006, ..., 0.0007, 0.0005, 0.0006], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-0.0026, -0.0021, 0.0002, ..., 0.0088, 0.0030, -0.0013],\n device='cuda:0')", + "exp_avg_sq": "tensor([0.0008, 0.0004, 0.0006, ..., 0.0006, 0.0004, 0.0006], device='cuda:0')" }, "2": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[ 1.6469e-04, 3.2893e-05, -4.1503e-05, ..., 1.1671e-04,\n 8.3501e-05, 2.7150e-05],\n [-7.0790e-05, -4.7709e-05, -5.4345e-05, ..., -9.6343e-05,\n 5.4711e-05, -2.4721e-05],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 4.7751e-07, 6.1481e-06, -1.9595e-06, ..., -5.6778e-06,\n -8.1800e-07, 8.9861e-07],\n [ 9.3507e-06, -6.2523e-07, 3.8211e-06, ..., -1.2915e-04,\n -4.8805e-05, 1.0502e-05],\n [-2.3436e-05, -1.4268e-11, 3.6513e-06, ..., -1.7262e-04,\n -3.0035e-08, 2.7608e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.9569e-07, 5.8830e-07, 6.0002e-08, ..., 6.8561e-07, 1.9336e-07,\n 2.2442e-07],\n [8.3280e-07, 3.9777e-07, 2.0967e-07, ..., 8.9164e-07, 1.7551e-07,\n 2.9572e-07],\n [1.4661e-11, 5.7355e-11, 3.0492e-12, ..., 4.1948e-11, 1.3524e-11,\n 1.4087e-11],\n ...,\n [3.8655e-09, 1.7543e-08, 1.3705e-09, ..., 1.6059e-08, 4.2924e-09,\n 5.6006e-09],\n [1.0118e-08, 1.2867e-08, 2.9503e-08, ..., 2.1462e-08, 4.8553e-08,\n 1.1520e-08],\n [7.9902e-09, 6.9888e-09, 1.0463e-09, ..., 3.6875e-07, 1.2281e-09,\n 1.4287e-08]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-1.4826e-04, -3.7766e-05, 1.3122e-05, ..., 6.7239e-04,\n -4.7286e-05, 8.0338e-06],\n [-2.3818e-05, 4.0960e-05, 2.6177e-05, ..., 3.7192e-04,\n 2.0681e-05, 4.5664e-06],\n [-3.2113e-09, 1.1314e-14, 1.0166e-15, ..., -6.2164e-08,\n -6.0928e-08, -1.4108e-07],\n ...,\n [ 3.2559e-07, 2.4579e-07, 8.5059e-07, ..., -2.0529e-05,\n -3.1550e-06, 9.6581e-07],\n [ 2.0538e-05, 1.9034e-05, -3.3606e-08, ..., 6.5104e-05,\n 5.8213e-05, 1.8850e-05],\n [-2.1527e-05, 8.1597e-08, -3.0233e-08, ..., 1.9321e-05,\n 1.2456e-07, -1.9977e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.8630e-07, 5.7644e-07, 3.5303e-08, ..., 4.8351e-07, 1.0573e-07,\n 1.7077e-07],\n [5.8228e-07, 3.1363e-07, 1.5318e-07, ..., 6.0549e-07, 8.9433e-08,\n 2.1251e-07],\n [1.7340e-10, 1.7096e-10, 3.1034e-11, ..., 2.1640e-10, 4.5407e-11,\n 2.4137e-10],\n ...,\n [2.2829e-09, 1.4872e-08, 1.2334e-09, ..., 4.1267e-08, 2.9066e-09,\n 4.3822e-09],\n [1.0260e-08, 1.5371e-08, 2.9725e-08, ..., 3.3514e-08, 5.1853e-08,\n 1.8715e-08],\n [6.8201e-09, 5.2145e-09, 6.0593e-10, ..., 2.4282e-07, 4.0987e-09,\n 1.0989e-08]], device='cuda:0')" }, "3": { - "step": "tensor(3756.)", - "exp_avg": "tensor([ 3.4385e-02, -3.4354e-03, 5.6052e-45, -1.1900e-02, 2.5258e-03,\n -2.2041e-02, 8.5872e-03, -1.7643e-03, -7.0052e-03, -1.6333e-02,\n -8.8700e-04, 4.5249e-02, 3.4440e-04, 7.1858e-03, 7.8294e-03,\n 6.0785e-03, -3.2115e-03, -1.2172e-02, 1.0266e-02, 1.8950e-03,\n -6.8670e-03, 1.1626e-02, 1.0156e-02, 3.3033e-02, -7.8505e-03,\n 1.5766e-03, -2.6365e-02, 1.8282e-04, 2.6729e-03, 1.1851e-03,\n -4.1102e-02, -1.5851e-02, -5.0955e-03, -1.1723e-02, 1.4081e-02,\n 1.6115e-03, -9.6441e-03, 7.6148e-03, 2.8092e-02, -2.3113e-02,\n -4.8746e-03, 5.8593e-03, 3.0541e-03, 9.7823e-32, 7.5626e-03,\n 1.3359e-02, -2.1657e-02, -3.4498e-04, -6.1955e-03, -8.8577e-03,\n 7.9662e-04, -3.7435e-02, 9.7587e-04, 2.8991e-02, 9.4901e-03,\n 2.6357e-02, -2.6293e-02, 3.6496e-02, 9.7497e-03, 5.3924e-03,\n -5.6016e-04, -1.0498e-02, 4.2402e-03, -2.9644e-02, 8.5141e-03,\n 2.3953e-02, 1.9489e-02, 3.5804e-02, -1.3843e-02, -8.3419e-04,\n 8.8677e-03, -1.2561e-03, 3.3183e-03, 2.3399e-02, -1.7113e-02,\n 5.6052e-45, -3.6541e-02, -2.6380e-02, 2.3451e-03, 4.3775e-02,\n 1.6554e-03, -2.8200e-03, -7.7190e-03, 7.0287e-03, -1.5903e-02,\n -2.4677e-03, 1.8479e-02, 1.5828e-02, 2.2581e-02, 6.5273e-05,\n 7.3067e-03, 1.4548e-02, 5.2871e-03, -4.5947e-03, 7.3921e-03,\n -1.8823e-04, -2.8342e-03, 7.2006e-03, -4.0031e-02, 1.8801e-03,\n 4.8731e-03, -4.7807e-03, -9.7334e-04, -1.6165e-08, -1.4527e-03,\n -1.0444e-02, -6.0792e-04, 2.2867e-03, 1.7134e-04, 2.4484e-03,\n 5.6052e-45, 8.0740e-03, 1.1355e-02, -2.9171e-03, -1.7921e-02,\n 2.6557e-02, -4.4509e-04, -1.9745e-02, -1.5912e-03, -1.0623e-03,\n 5.6052e-45, 4.6117e-02, -9.1408e-05, 1.9099e-02, -1.7711e-02,\n -1.5577e-02, 5.3021e-02, -1.3166e-02, -3.5531e-02, -1.4018e-03,\n 1.8851e-02, 1.0609e-02, -8.4691e-03, -8.5039e-03, -2.1829e-02,\n 2.2260e-02, 1.5379e-02, 1.0972e-02, -4.0397e-02, 1.8312e-02,\n 6.8667e-03, 2.1778e-09, -7.5284e-03, -2.0842e-02, 7.4211e-03,\n -4.9609e-04, 1.9609e-02, -4.2211e-02, -3.6348e-03, 2.9852e-02,\n -3.4850e-02, 5.9620e-03, -5.5469e-03, -1.4919e-02, -1.9676e-02,\n -1.5966e-02, 5.6052e-45, 1.9588e-03, 7.6927e-04, -1.4884e-02,\n 1.3100e-02, 1.9566e-03, -1.8573e-02, -1.7757e-02, 1.7167e-02,\n 2.6077e-02, 1.7453e-03, 1.1000e-02, 1.9339e-02, 3.0172e-02,\n 2.2516e-03, -5.0039e-03, -2.7566e-03, 1.6014e-02, 2.0872e-02,\n -4.4677e-03, 1.9302e-02, -1.2298e-02, -2.0414e-02, 2.0568e-02,\n -2.6836e-04, 1.5246e-02, -2.0090e-05, 1.2673e-02, -3.6611e-02,\n -9.2828e-03, -2.3231e-02, -2.5653e-05, 1.2505e-02, 3.0633e-03,\n 2.4187e-02, 5.7908e-03, -6.6423e-03, 2.4028e-02, -1.1412e-03,\n -5.5714e-03, -3.0733e-03, 2.6022e-03, -3.4920e-03, -7.1206e-05,\n 1.3749e-03, -1.6666e-02, 4.1827e-03, -7.2208e-03, -2.4958e-02,\n 6.3490e-03, -4.3013e-03, -1.4962e-04, 1.6028e-02, 1.1462e-02,\n 4.0215e-06, 1.8281e-02, 7.8031e-03, -4.2786e-02, -1.8772e-02,\n -4.7549e-02, -2.1343e-02, -1.0112e-02, -8.2545e-03, -2.3932e-02,\n 2.5577e-02, 5.6052e-45, -3.5692e-03, 7.5123e-03, -9.5326e-03,\n 7.6621e-03, -9.4447e-04, 1.1579e-02, -5.2485e-03, 3.9404e-02,\n 2.6206e-03, 1.9527e-02, 3.3471e-02, 1.6070e-02, 3.1636e-03,\n -1.2556e-02, 2.1602e-02, 7.0681e-03, -8.4302e-04, 1.1564e-02,\n 1.4286e-02, -1.3272e-02, 8.4437e-03, 2.5339e-03, -1.2827e-03,\n -2.0409e-03, 1.8860e-02, -3.1625e-03, -2.2927e-02, -1.4177e-02,\n 7.4846e-04, 2.1712e-02, 4.4026e-03, -8.1537e-03, -1.1850e-02,\n -2.6412e-02, -1.8456e-02, 1.2677e-03, -1.7429e-02, 1.3126e-02,\n 5.6052e-45, 3.5268e-03, 1.7266e-12, -1.4625e-03, 7.0512e-03,\n 1.6830e-02, 1.3768e-03, -9.5835e-03, -2.9882e-02, 1.1718e-02,\n 3.5438e-03, -6.8700e-03, -2.7125e-03, 3.6306e-03, -2.1009e-02,\n -3.6975e-03, 3.2628e-02, -1.5234e-02, 3.3574e-03, -5.0712e-03,\n 7.2853e-03, 2.7361e-02, -1.4910e-02, -1.0360e-02, -5.8130e-03,\n 7.0132e-03, -9.5131e-04, 1.9477e-02, -9.0310e-03, -4.3620e-03,\n 1.6100e-03, -2.7940e-03, -1.8503e-02, -8.5917e-03, -4.4746e-03,\n 6.9936e-03, -3.6012e-03, -9.0011e-03, 5.6052e-45, -1.0415e-02,\n 2.4286e-03, 7.1899e-03, -1.2722e-02, 9.5783e-03, 8.5215e-03,\n 6.7637e-03, -1.8500e-02, 9.0715e-03, -3.4790e-03, -1.1122e-02,\n 1.1882e-03, 3.4038e-02, 1.7116e-02, 1.2090e-03, -2.2405e-02,\n 1.2140e-03, 1.2981e-02, 3.2805e-02, 4.8984e-04, -1.2310e-02,\n -2.9504e-02, -1.5450e-02, 2.2141e-02, 1.4102e-02, 1.3321e-03,\n 5.6052e-45, 5.6052e-45, -2.0391e-02, -3.6033e-02, -2.8959e-03,\n -2.6439e-02, 3.5403e-03, 1.4500e-02, -1.3186e-02, -1.6918e-02,\n 5.1589e-02, -3.8516e-04, 5.6052e-45, -2.8537e-03, 1.8787e-02,\n -1.0228e-03, -4.2652e-03, 3.1662e-03, -3.7418e-02, 7.0096e-03,\n -9.2120e-03, 1.3849e-02, -4.9929e-03, 4.8561e-05, 5.6052e-45,\n 2.4112e-03, 1.0657e-02, -6.6845e-03, 4.2097e-03, 1.6548e-11,\n -1.5186e-03, -8.4826e-03, 8.7491e-03, 1.5617e-02, -8.1855e-03,\n -1.7909e-02, 2.6587e-02, 3.9749e-02, -1.2276e-02, -4.9077e-03,\n 1.6772e-02, -1.4828e-02, -1.3703e-02, -1.9517e-02, -5.2112e-03,\n -1.2494e-02, 1.6557e-02, -3.1608e-03, 5.6052e-45, -6.4571e-04,\n 7.3374e-03, 1.9096e-02, -2.4264e-03, -1.6941e-02, 2.9033e-03,\n 1.2402e-02, 4.0073e-03, -1.3054e-03, -3.6242e-02, 3.4978e-03,\n 1.1286e-03, -4.5170e-03, 5.1537e-02, 2.1713e-02, -2.0441e-03,\n 5.3294e-03, 3.2963e-02, -1.3902e-02, -6.4777e-03, 8.2918e-03,\n -1.8808e-02, 5.5549e-03, -3.9146e-20, -5.3425e-03, 5.9413e-04,\n -1.8026e-02, 2.1375e-02, 2.8979e-02, -1.2001e-02, 6.4989e-04,\n -1.5583e-02, 1.7469e-03, 3.9681e-03, -1.2142e-02, 5.6052e-45,\n 7.2875e-03, 7.2588e-03, -1.3845e-02, 1.7360e-03, 1.0922e-02,\n 1.1295e-03, -1.5724e-02, 2.5495e-03, -4.1073e-03, -1.5420e-03,\n 7.1973e-04, 7.0052e-04, -2.6559e-02, -1.4204e-02, -1.8514e-04,\n -1.7902e-04, 1.2159e-02, 7.0378e-03, -1.8452e-03, 2.5601e-03,\n 7.0964e-03, -2.9830e-03, 4.0828e-02, 5.6052e-45, -4.8078e-03,\n -9.4201e-03, 2.0601e-02, 9.8594e-03, 1.4592e-02, -4.3839e-02,\n 1.0086e-02, -1.3692e-02, 2.1874e-02, -1.1301e-02, 2.7514e-02,\n -1.4098e-02, 3.1673e-03, -2.0209e-02, 1.5612e-02, -9.3501e-03,\n -2.4976e-02, -3.7959e-03, -1.9365e-02, -8.9002e-03, 2.0572e-02,\n 3.6914e-02, -6.4782e-03, -1.4818e-03, -3.0124e-02, 1.2001e-02,\n 2.0817e-02, 1.7766e-02, 2.6337e-03, -3.4167e-03, 5.0660e-03,\n -5.3714e-03, -2.5933e-02, -3.4784e-02, 2.6722e-03, 3.7501e-02,\n 5.0615e-03, 5.6052e-45, 9.5609e-04, -2.7244e-04, 1.0414e-02,\n -1.2931e-02, 9.3761e-03, -1.8524e-04, -2.2073e-02, 1.2838e-02,\n -3.2212e-03, 5.6052e-45, 1.5483e-02, -1.3375e-02, -1.5283e-02,\n 1.6224e-03, -1.5075e-02, 2.0652e-12, 2.8104e-02, -6.9900e-03,\n 1.9916e-02, -1.9520e-02, -6.6399e-03, -8.0089e-03, -6.0308e-03,\n -1.4390e-03, -2.1796e-02, -1.0539e-03, -1.6818e-03, 5.6052e-45,\n 5.3486e-03, -1.9170e-02, 5.6052e-45, -9.7670e-03, -5.1018e-03,\n 8.3698e-29, -1.0562e-02, -5.2293e-03, 1.8478e-02, 3.9008e-03,\n 7.4572e-03, 1.8777e-03], device='cuda:0')", - "exp_avg_sq": "tensor([5.9887e-03, 5.2722e-03, 1.7493e-07, 4.4650e-03, 1.7880e-03, 3.4090e-03,\n 4.7808e-03, 4.7527e-03, 5.3835e-03, 3.8956e-03, 1.5232e-03, 5.9871e-03,\n 1.3831e-03, 3.1203e-03, 4.9953e-03, 8.2330e-04, 2.3458e-03, 5.8677e-03,\n 2.4155e-03, 1.7061e-03, 2.5118e-03, 5.1917e-03, 2.6106e-03, 5.3476e-03,\n 4.8678e-03, 5.7410e-04, 5.1720e-03, 4.5226e-03, 5.5224e-03, 5.1458e-03,\n 4.3276e-03, 4.7952e-03, 4.4468e-03, 2.9728e-03, 3.9547e-03, 4.4656e-03,\n 1.2991e-03, 5.1104e-03, 4.9820e-03, 4.8808e-03, 3.8555e-03, 4.2795e-03,\n 1.1052e-03, 8.1773e-10, 4.8092e-03, 1.3788e-03, 6.1062e-03, 4.8051e-03,\n 5.3780e-03, 5.3546e-03, 1.1253e-03, 5.5625e-03, 1.7995e-03, 4.5240e-03,\n 5.5145e-03, 3.7284e-03, 6.0132e-03, 6.6531e-03, 5.2382e-03, 3.1592e-03,\n 4.8768e-03, 4.3386e-03, 5.0355e-03, 5.2016e-03, 6.0760e-03, 4.9345e-03,\n 3.2820e-03, 4.8314e-03, 6.0828e-03, 1.2281e-03, 5.0486e-03, 4.0461e-04,\n 3.2151e-03, 4.9997e-03, 4.4207e-03, 7.5963e-09, 4.5572e-03, 5.6966e-03,\n 5.0904e-03, 4.8237e-03, 5.0574e-03, 4.7790e-03, 3.3964e-03, 4.6340e-03,\n 6.1395e-03, 3.3987e-03, 5.0259e-03, 1.4894e-03, 4.4966e-03, 1.6173e-05,\n 2.3872e-03, 2.9893e-03, 5.0763e-04, 4.3138e-05, 5.0013e-03, 5.5565e-03,\n 6.4057e-04, 5.3545e-03, 4.6709e-03, 2.9628e-03, 2.6988e-03, 1.6295e-03,\n 2.2265e-03, 4.5438e-07, 4.6367e-03, 4.0164e-03, 5.5400e-03, 3.4361e-03,\n 3.8535e-03, 1.6477e-03, 4.5660e-07, 4.5038e-03, 2.4037e-03, 1.9588e-03,\n 5.8820e-03, 5.8243e-03, 5.7288e-03, 5.2507e-03, 3.6816e-03, 2.6251e-04,\n 7.6602e-06, 5.1962e-03, 3.6438e-03, 5.5486e-03, 5.1495e-03, 2.6844e-03,\n 5.2168e-03, 5.0818e-03, 5.2454e-03, 4.7272e-03, 5.0131e-03, 6.0442e-03,\n 1.4627e-03, 5.6676e-03, 5.0793e-03, 4.5197e-03, 5.0911e-03, 3.2910e-03,\n 4.9865e-03, 5.6167e-03, 5.7405e-03, 3.5721e-06, 6.1082e-03, 3.5305e-03,\n 4.8929e-03, 5.4398e-03, 4.5595e-03, 4.8292e-03, 4.9683e-03, 5.2717e-03,\n 4.5061e-03, 5.6444e-04, 5.0089e-03, 3.7042e-03, 5.2461e-03, 5.2832e-03,\n 4.2408e-09, 4.7127e-03, 1.1403e-03, 8.8000e-04, 5.4930e-03, 5.7890e-03,\n 4.6545e-03, 4.1398e-03, 4.9975e-03, 4.8199e-03, 4.6658e-03, 1.3249e-03,\n 4.7806e-03, 4.2185e-03, 4.8866e-03, 3.9481e-03, 2.9295e-03, 5.8027e-03,\n 4.9627e-03, 7.5260e-04, 5.0515e-03, 4.6313e-03, 5.0109e-03, 3.3962e-03,\n 4.6776e-03, 4.8875e-03, 1.6381e-06, 1.4105e-03, 4.2358e-03, 5.4158e-03,\n 4.6759e-03, 4.8343e-03, 2.7400e-03, 2.1136e-03, 4.2139e-03, 1.7677e-03,\n 4.9119e-03, 5.3387e-03, 7.1655e-04, 6.4817e-04, 7.3310e-04, 2.4508e-03,\n 3.5062e-03, 5.3061e-03, 1.3330e-03, 4.6425e-03, 3.2772e-03, 2.7240e-03,\n 4.4504e-03, 4.8597e-03, 1.4880e-03, 2.4611e-03, 2.0921e-03, 5.8750e-03,\n 3.0428e-06, 5.0516e-03, 5.1089e-03, 5.4699e-03, 5.8542e-03, 6.2152e-03,\n 5.5481e-03, 5.2508e-03, 3.7090e-03, 5.3732e-03, 5.2957e-03, 1.1590e-09,\n 3.2583e-03, 5.1236e-03, 2.6994e-03, 4.8746e-03, 8.0444e-04, 3.2786e-03,\n 5.5779e-03, 4.7727e-03, 9.9885e-04, 5.0906e-03, 5.1406e-03, 5.2479e-03,\n 5.0542e-03, 1.6196e-03, 5.1452e-03, 2.6850e-03, 5.3559e-03, 3.6652e-03,\n 4.3711e-03, 4.3179e-03, 5.8011e-03, 1.7352e-03, 5.0370e-03, 1.8232e-03,\n 4.9442e-03, 5.0350e-03, 6.1055e-03, 4.8524e-03, 5.4817e-03, 2.2739e-03,\n 5.2863e-03, 1.5224e-03, 5.2408e-03, 3.8532e-03, 3.7768e-03, 2.8902e-04,\n 4.9886e-03, 3.1615e-03, 5.2055e-08, 1.2246e-03, 7.4590e-08, 1.0430e-03,\n 2.9189e-03, 4.0555e-03, 4.8819e-03, 5.9090e-03, 5.1231e-03, 2.7469e-03,\n 5.4785e-03, 1.5450e-03, 5.5428e-03, 5.2124e-03, 4.3684e-03, 4.3834e-03,\n 3.7605e-03, 2.6076e-03, 2.4390e-03, 1.6975e-04, 5.3874e-03, 6.0106e-03,\n 5.2486e-03, 2.0539e-03, 1.9447e-03, 5.7172e-03, 2.0903e-03, 5.3565e-03,\n 3.7829e-03, 2.1495e-03, 3.3393e-03, 5.4914e-03, 4.2499e-03, 5.1388e-03,\n 2.6391e-03, 4.6107e-03, 5.0836e-03, 1.2434e-03, 1.5806e-06, 2.6057e-03,\n 2.9921e-03, 5.0417e-03, 4.6289e-03, 4.9108e-03, 5.5184e-03, 3.8408e-03,\n 4.8457e-03, 4.6668e-03, 5.5907e-03, 1.6032e-03, 4.3989e-03, 4.9937e-03,\n 5.4731e-03, 4.8112e-03, 5.1898e-03, 4.5727e-03, 1.1531e-03, 5.2453e-03,\n 1.2121e-03, 5.5694e-03, 4.6013e-03, 5.6230e-03, 4.6856e-03, 5.7632e-03,\n 3.3795e-03, 2.4174e-07, 1.0646e-06, 4.2609e-03, 4.4365e-03, 5.3266e-03,\n 5.3651e-03, 3.0050e-04, 4.7710e-03, 6.2867e-03, 4.4176e-04, 4.9656e-03,\n 1.6347e-03, 7.5761e-09, 1.4130e-04, 5.8974e-03, 4.5079e-03, 5.6282e-03,\n 5.3255e-03, 4.6028e-03, 4.2260e-03, 5.5358e-03, 5.0926e-03, 5.1143e-03,\n 4.4211e-03, 2.3120e-07, 1.4165e-03, 4.1020e-03, 5.0158e-03, 6.1019e-03,\n 2.1643e-07, 1.4936e-03, 5.4041e-03, 5.1690e-03, 4.5085e-03, 5.7200e-03,\n 5.4235e-03, 5.0832e-03, 5.3066e-03, 5.5415e-03, 5.1345e-03, 1.4395e-03,\n 5.1485e-03, 4.7917e-03, 5.1202e-03, 4.0493e-03, 1.8120e-03, 4.4451e-03,\n 2.2914e-04, 3.0239e-10, 1.1358e-03, 8.6221e-04, 2.7632e-03, 1.0224e-04,\n 5.0056e-03, 1.1790e-03, 5.2158e-03, 5.0226e-03, 3.2694e-03, 4.0520e-03,\n 2.3491e-03, 5.1377e-03, 5.2879e-03, 3.6345e-03, 4.7577e-03, 2.1328e-03,\n 5.5369e-03, 5.5374e-03, 5.6940e-03, 5.2344e-03, 5.1991e-03, 4.1761e-03,\n 5.6161e-03, 1.0084e-05, 4.3437e-04, 5.3236e-03, 4.4506e-03, 6.4832e-03,\n 4.4065e-03, 3.0837e-03, 1.8801e-03, 5.2062e-03, 4.1772e-03, 2.0598e-03,\n 3.1853e-03, 2.8651e-06, 4.9216e-04, 5.0653e-03, 4.1842e-03, 4.1762e-03,\n 9.2228e-04, 5.5591e-03, 5.3643e-03, 4.9212e-03, 1.7804e-03, 1.5115e-03,\n 4.8589e-03, 4.3949e-03, 3.0205e-03, 4.1750e-03, 4.8885e-03, 1.3763e-03,\n 4.5622e-03, 5.0534e-03, 3.3506e-03, 5.7891e-03, 2.0273e-03, 1.7191e-03,\n 4.9539e-03, 4.3321e-07, 9.2051e-04, 4.6741e-03, 4.8967e-03, 4.2577e-03,\n 4.9426e-03, 6.3284e-03, 2.0580e-03, 5.2112e-03, 5.5553e-03, 1.8373e-03,\n 5.3013e-03, 6.7496e-03, 4.6189e-03, 1.8754e-03, 5.1306e-03, 3.8483e-03,\n 4.9959e-03, 1.2065e-03, 4.8498e-03, 3.9499e-03, 1.6330e-03, 4.8119e-03,\n 5.3567e-03, 3.8812e-03, 4.7606e-03, 4.5422e-03, 5.2526e-03, 4.3312e-03,\n 2.8671e-04, 2.0372e-03, 2.5051e-03, 5.3116e-03, 4.1620e-03, 6.7943e-03,\n 4.0847e-03, 5.6035e-03, 5.0009e-03, 6.6495e-07, 5.2329e-03, 4.8072e-03,\n 4.1131e-04, 4.8632e-03, 3.1229e-03, 2.0019e-03, 4.8642e-03, 2.3627e-03,\n 5.2356e-03, 2.4311e-10, 1.7563e-03, 4.3193e-03, 4.5424e-03, 1.5602e-03,\n 5.6331e-03, 1.7562e-06, 4.4516e-03, 5.2198e-03, 4.2919e-03, 4.7847e-03,\n 4.4411e-03, 3.7210e-03, 4.2995e-03, 2.1362e-03, 4.1117e-03, 5.0423e-03,\n 1.1584e-03, 1.6296e-07, 9.1039e-04, 5.2832e-03, 2.5991e-08, 4.6524e-03,\n 4.6450e-03, 1.7617e-07, 4.5059e-03, 3.9553e-03, 2.9651e-03, 4.2768e-04,\n 8.2834e-04, 1.0890e-03], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([ 1.4126e-02, 5.1097e-03, -7.9496e-04, -1.0821e-02, -3.9196e-03,\n 9.7481e-03, 2.8996e-02, -8.3758e-03, 2.6963e-02, 1.0866e-02,\n -9.9463e-03, 1.8908e-03, -2.3400e-02, -4.5097e-03, -2.6022e-04,\n -1.0945e-03, -1.5825e-03, -1.8060e-02, 1.8558e-02, 4.8557e-03,\n 8.4040e-04, -2.3065e-02, -1.1609e-02, 2.6445e-02, -2.7615e-02,\n -1.1002e-02, 1.5652e-02, 2.2094e-03, -3.6463e-03, -1.0016e-02,\n -6.4752e-03, 1.2186e-02, -3.8771e-03, -6.0847e-03, -1.3005e-02,\n -1.7814e-03, 8.7434e-03, -4.7052e-03, -1.8526e-03, -4.0972e-03,\n -4.9735e-03, 2.3356e-02, 6.2180e-04, 1.0972e-07, -1.1905e-02,\n -1.0580e-03, 2.9581e-03, -2.7340e-02, 1.9426e-03, -4.0088e-02,\n -1.6175e-04, -1.3983e-02, -4.2165e-03, 5.4822e-03, -7.6666e-03,\n 1.3896e-02, -3.0725e-02, -1.0907e-02, -2.3293e-02, -8.6505e-03,\n -2.0347e-02, 6.2076e-03, 5.6377e-03, -3.9024e-02, 1.4011e-02,\n 9.9071e-03, 1.1338e-02, 1.8101e-02, 2.4495e-03, 1.6637e-03,\n -5.5351e-03, -7.6718e-03, 1.0056e-03, -6.0980e-03, -2.2850e-02,\n 5.6052e-45, -1.3769e-02, 2.8396e-02, 1.3683e-02, -1.0294e-02,\n -3.0784e-03, 2.6520e-03, 1.1138e-02, -2.0509e-02, -4.1792e-03,\n 9.3004e-03, -3.0858e-03, 9.1495e-03, -1.9156e-02, -2.9184e-03,\n 2.7368e-04, -8.2334e-03, -2.6975e-03, -7.0788e-04, 4.3320e-03,\n -3.1705e-03, 3.2434e-03, 9.2458e-03, 2.2696e-03, 2.5398e-02,\n -2.2191e-02, 2.8266e-03, 5.2332e-03, -3.1063e-05, -8.1051e-04,\n -5.6563e-03, -2.4841e-02, 4.0808e-03, -1.0918e-02, 6.1963e-03,\n 5.6052e-45, -4.1354e-03, -3.2764e-04, 9.5932e-03, -8.7973e-03,\n -2.1817e-02, -4.7290e-02, 1.4857e-02, -7.4116e-03, 1.6306e-03,\n -3.2103e-04, 2.2532e-02, -3.0149e-02, 2.0232e-02, -3.7742e-02,\n 6.1250e-03, 2.7181e-02, -4.7548e-03, -1.7456e-02, 1.2499e-02,\n 8.6738e-03, -8.0440e-04, 1.3617e-04, 1.3468e-02, -1.0849e-02,\n 7.6905e-03, 1.3681e-02, -1.6021e-02, 2.1311e-02, 1.6096e-02,\n 1.9417e-02, 3.2313e-09, 2.1293e-02, 1.8147e-02, -2.6864e-02,\n 2.9685e-02, 5.2116e-03, -1.1284e-02, -1.9821e-02, 4.6708e-03,\n -3.5911e-03, -1.7155e-03, 4.5961e-02, 4.4525e-02, 1.5316e-02,\n -9.3124e-03, 5.6052e-45, 1.7929e-02, 4.2426e-04, -5.1501e-03,\n 3.8218e-03, 7.5045e-03, -7.2297e-03, 1.6126e-02, -2.4166e-04,\n -9.2504e-03, -2.4791e-02, 1.1752e-02, 1.0200e-03, 2.2198e-02,\n 4.1368e-03, -1.2097e-02, 6.2400e-03, -2.6837e-02, 2.0055e-02,\n -6.6926e-03, -9.4271e-03, 1.4073e-02, -3.6922e-03, -1.7924e-03,\n -3.1745e-03, -6.8568e-03, 2.6574e-03, -9.7224e-03, -1.1626e-02,\n -1.1487e-02, -1.7578e-02, 2.2066e-02, 4.9428e-03, 2.7734e-04,\n -1.5122e-03, 4.6194e-03, 2.6900e-02, -1.1146e-03, -1.3893e-03,\n -5.1913e-03, 5.7615e-05, 9.1089e-03, -8.2396e-03, -6.2549e-03,\n -5.7927e-02, 1.3553e-02, 1.8898e-02, 8.4385e-03, 1.1864e-02,\n -1.4659e-02, 6.8217e-03, 1.7848e-03, 3.5994e-03, 7.0828e-03,\n 1.6439e-14, 2.9533e-02, -2.5959e-02, 1.0810e-02, 6.7062e-03,\n 2.1347e-02, -2.7662e-02, 1.7307e-02, 5.2727e-03, -1.2526e-02,\n 1.2054e-02, 5.6052e-45, -1.6051e-02, -2.1680e-03, -3.3772e-02,\n -4.1733e-03, 3.6567e-03, -4.4006e-03, 2.1873e-02, 4.1847e-03,\n 4.1026e-03, 1.1686e-02, -2.2715e-02, 1.0566e-02, 7.2040e-03,\n -5.0501e-03, -6.2318e-03, 1.5517e-02, -7.3302e-04, -1.8367e-02,\n 8.9231e-03, 5.4178e-03, -8.3624e-03, -3.0468e-04, 5.7543e-04,\n 4.3196e-03, -1.8185e-02, -3.3677e-04, -1.0428e-02, 1.8199e-02,\n 2.6856e-03, 9.6491e-03, -2.4636e-02, -1.4652e-03, 9.9017e-03,\n -2.0520e-02, 1.6636e-03, 5.4025e-03, 3.8037e-02, 1.1356e-03,\n 5.6052e-45, 2.7663e-03, 8.5401e-04, 3.1312e-02, 4.2938e-03,\n -4.4984e-02, 1.5437e-02, 7.1961e-03, -1.9223e-02, 2.0231e-02,\n -6.2331e-03, -4.8741e-03, -3.1687e-03, -1.9983e-02, -1.0632e-02,\n -7.9567e-03, 9.5404e-03, 9.4787e-03, 3.6711e-03, -3.9520e-03,\n 2.8143e-03, -1.3014e-02, -6.3484e-03, -4.7869e-03, 2.1816e-03,\n 1.1211e-03, 2.6783e-02, -1.6852e-02, -1.7441e-02, 9.6654e-03,\n -1.1413e-02, 4.3951e-03, -2.8843e-02, 1.3558e-03, 6.0770e-03,\n -3.9430e-03, -3.0027e-02, 2.8810e-03, 5.6052e-45, -7.1474e-03,\n 2.6602e-03, 1.0157e-02, 1.8439e-02, -5.2718e-03, -2.1569e-03,\n 1.3810e-02, 3.8801e-02, -9.0744e-04, -3.1336e-03, -1.1210e-03,\n -1.2633e-02, 1.0190e-02, 1.6988e-02, -7.1919e-03, -6.4628e-03,\n -4.0605e-03, 1.6374e-03, 2.1224e-02, 6.5178e-03, -2.2417e-03,\n 1.0412e-02, 1.6728e-03, 3.2706e-02, 1.3444e-02, -2.9927e-03,\n 5.6052e-45, 5.6052e-45, -1.1627e-02, 2.1780e-02, -2.5630e-02,\n 1.9772e-02, -8.6046e-03, -1.9349e-02, -2.0929e-02, -7.3220e-04,\n 1.7790e-02, 4.0794e-03, -1.6303e-03, 3.6801e-03, 2.6842e-03,\n 6.7983e-03, 2.4575e-03, -5.7469e-03, 4.1209e-03, -3.6542e-02,\n -7.5764e-03, -1.0995e-02, 7.8305e-03, -5.6046e-03, 8.4083e-10,\n -6.7101e-03, -8.6087e-03, 2.2583e-02, -5.1795e-03, 5.6052e-45,\n 4.1433e-03, -6.5207e-04, 1.0495e-02, -3.8115e-03, -5.4206e-03,\n -2.7440e-02, -2.5837e-02, -3.0073e-02, 1.0225e-02, 6.6747e-03,\n 1.2307e-02, -2.6078e-02, -2.5466e-03, 2.0137e-02, 1.3106e-03,\n 8.8868e-03, 8.6641e-03, -1.2534e-03, -7.3521e-04, 2.1014e-02,\n 1.3129e-03, -2.1967e-04, -1.6432e-02, -3.3611e-02, 3.8524e-03,\n -3.0511e-02, -3.3889e-03, 1.4865e-02, -1.7318e-03, 1.9834e-02,\n -1.2463e-02, -8.2149e-03, 4.1179e-02, -2.3903e-03, -5.8150e-03,\n -9.7681e-03, 4.5003e-03, 3.0471e-03, 1.1223e-02, -9.5615e-03,\n 5.6407e-03, -5.5656e-03, -1.9322e-07, -2.7733e-02, -1.5459e-02,\n -8.2485e-03, -1.7746e-02, 5.8798e-04, 1.4315e-03, 7.5043e-04,\n 2.4938e-03, -5.1563e-03, -1.3973e-03, -6.9038e-03, 5.6052e-45,\n 3.1674e-03, 1.4597e-03, 2.0790e-02, 1.4572e-02, -1.3867e-03,\n 1.4563e-02, -7.2573e-03, -2.9555e-03, -5.8591e-03, -1.0083e-04,\n 4.2798e-02, -1.5628e-02, -1.6586e-02, 1.3316e-02, 1.9965e-03,\n 5.6790e-05, 7.0538e-03, 1.8303e-02, -9.1613e-03, -2.4298e-02,\n -2.6639e-02, -1.0305e-02, -1.4221e-02, 5.6052e-45, 7.7494e-03,\n 1.4284e-02, -1.1253e-02, -5.6627e-04, 1.1633e-02, -1.8695e-02,\n 7.5929e-03, -1.5957e-02, -1.0356e-02, -1.4654e-03, 2.2605e-02,\n -1.7703e-02, -3.6910e-02, 2.9079e-03, 2.8037e-02, 7.0646e-04,\n -3.2189e-03, 2.8289e-03, 2.2728e-02, -2.3164e-02, -1.2167e-03,\n -3.7707e-03, -4.1843e-02, -1.3808e-02, -1.5413e-02, 2.1055e-02,\n 3.4101e-02, -1.8718e-02, 3.8578e-03, -9.5683e-03, -6.8403e-03,\n 6.6832e-03, -3.9139e-02, 5.8818e-03, -2.8080e-02, -5.5688e-03,\n 4.9375e-03, -3.9503e-24, 1.2888e-02, 8.0715e-04, -4.8611e-03,\n 3.4440e-02, -6.0116e-03, 1.0638e-02, 1.2437e-02, -1.1479e-02,\n 1.8371e-02, 8.3561e-04, 1.2870e-02, 1.3342e-02, 2.5543e-03,\n -1.0643e-03, 2.9523e-02, 2.7264e-23, -3.2583e-03, -4.1601e-02,\n -3.3796e-03, 7.2853e-03, -1.7614e-02, -9.6136e-03, 1.0517e-02,\n 1.6596e-02, 2.3512e-03, -1.1211e-02, 9.5979e-03, 5.6052e-45,\n -3.9685e-03, -3.5961e-02, 5.6052e-45, 9.1520e-03, -1.2400e-02,\n 3.2587e-03, -5.1009e-03, -1.1741e-02, -1.1344e-02, -7.7764e-04,\n 1.7889e-02, 1.8438e-03], device='cuda:0')", + "exp_avg_sq": "tensor([3.5245e-03, 3.0989e-03, 5.8316e-06, 2.7782e-03, 1.2305e-03, 2.5088e-03,\n 2.9681e-03, 2.8440e-03, 3.2411e-03, 2.7461e-03, 1.1367e-03, 3.7409e-03,\n 1.1126e-03, 2.2902e-03, 3.0541e-03, 7.7844e-04, 1.7053e-03, 3.6030e-03,\n 1.6700e-03, 1.2427e-03, 1.9963e-03, 3.2655e-03, 1.8608e-03, 3.2262e-03,\n 2.9827e-03, 5.2594e-04, 3.3345e-03, 2.7471e-03, 3.3105e-03, 3.1088e-03,\n 2.6826e-03, 3.0603e-03, 2.7265e-03, 2.0890e-03, 2.7632e-03, 2.7218e-03,\n 1.1992e-03, 3.1737e-03, 2.9874e-03, 3.2021e-03, 2.2937e-03, 2.8871e-03,\n 9.8573e-04, 2.4173e-10, 3.0276e-03, 1.0109e-03, 3.6031e-03, 2.7669e-03,\n 3.4287e-03, 3.6319e-03, 9.3104e-04, 3.3241e-03, 1.3330e-03, 2.8987e-03,\n 3.3942e-03, 2.4242e-03, 3.4951e-03, 4.1314e-03, 3.2309e-03, 2.1436e-03,\n 3.1456e-03, 2.9493e-03, 3.0450e-03, 3.2197e-03, 3.6016e-03, 3.0326e-03,\n 2.3636e-03, 2.9171e-03, 3.5651e-03, 1.0107e-03, 3.1955e-03, 4.8811e-04,\n 2.1526e-03, 3.0692e-03, 2.8523e-03, 2.1707e-09, 2.9350e-03, 3.6001e-03,\n 3.0574e-03, 2.8947e-03, 2.9443e-03, 2.9042e-03, 2.3127e-03, 2.8866e-03,\n 3.7563e-03, 2.2563e-03, 3.0041e-03, 1.1368e-03, 3.0427e-03, 8.7803e-05,\n 1.7396e-03, 2.3220e-03, 5.5170e-04, 1.4170e-04, 3.1903e-03, 3.3795e-03,\n 6.8571e-04, 3.3800e-03, 2.9339e-03, 2.0842e-03, 2.0097e-03, 1.4033e-03,\n 1.7231e-03, 1.2994e-07, 2.8825e-03, 2.5919e-03, 3.4111e-03, 2.2335e-03,\n 2.6591e-03, 1.2840e-03, 1.3048e-07, 2.7770e-03, 1.6337e-03, 1.6395e-03,\n 3.6700e-03, 3.4465e-03, 3.4928e-03, 3.1441e-03, 2.4282e-03, 2.9633e-04,\n 2.7618e-06, 3.6469e-03, 2.5495e-03, 3.4521e-03, 3.2639e-03, 1.8594e-03,\n 3.2305e-03, 3.0190e-03, 3.4732e-03, 3.0105e-03, 3.2083e-03, 3.8227e-03,\n 1.0629e-03, 3.4641e-03, 3.3395e-03, 2.7458e-03, 3.1340e-03, 2.2571e-03,\n 2.8862e-03, 3.4735e-03, 3.2128e-03, 1.0209e-06, 3.8112e-03, 2.4857e-03,\n 2.9170e-03, 3.3203e-03, 2.8669e-03, 2.8642e-03, 3.1892e-03, 3.2006e-03,\n 2.7177e-03, 5.2134e-04, 3.4524e-03, 2.4638e-03, 3.2252e-03, 3.0644e-03,\n 1.2118e-09, 2.9078e-03, 1.1007e-03, 8.7336e-04, 3.2088e-03, 3.4442e-03,\n 2.9715e-03, 2.5343e-03, 3.0796e-03, 3.0252e-03, 2.9668e-03, 9.3902e-04,\n 3.0296e-03, 2.6524e-03, 3.2265e-03, 2.4156e-03, 2.0831e-03, 3.4327e-03,\n 3.0006e-03, 6.0793e-04, 3.0452e-03, 2.9479e-03, 3.0454e-03, 2.4560e-03,\n 2.7675e-03, 2.9988e-03, 8.1108e-05, 1.2105e-03, 2.7738e-03, 3.2372e-03,\n 2.8985e-03, 2.9229e-03, 1.9048e-03, 1.5983e-03, 2.9451e-03, 1.2891e-03,\n 3.0882e-03, 3.3258e-03, 5.6151e-04, 6.8484e-04, 5.4282e-04, 1.6011e-03,\n 2.1493e-03, 3.3810e-03, 1.4191e-03, 2.8497e-03, 2.2658e-03, 1.8723e-03,\n 2.9324e-03, 2.9257e-03, 1.1150e-03, 1.7068e-03, 1.5290e-03, 3.6414e-03,\n 8.6959e-07, 3.2632e-03, 3.1068e-03, 3.3475e-03, 3.3326e-03, 4.0044e-03,\n 3.4000e-03, 3.3288e-03, 2.3884e-03, 3.2901e-03, 3.2062e-03, 3.0442e-08,\n 2.0117e-03, 2.9938e-03, 2.1004e-03, 3.0491e-03, 6.9192e-04, 2.2366e-03,\n 3.5984e-03, 2.7813e-03, 8.4004e-04, 3.1827e-03, 3.2556e-03, 3.1886e-03,\n 3.1232e-03, 1.4098e-03, 3.4118e-03, 1.8634e-03, 3.3689e-03, 2.3985e-03,\n 2.6272e-03, 2.7072e-03, 3.6023e-03, 1.2666e-03, 3.1058e-03, 1.4297e-03,\n 3.1085e-03, 3.2609e-03, 3.7172e-03, 3.1241e-03, 3.3506e-03, 1.6199e-03,\n 3.1798e-03, 1.1382e-03, 3.1803e-03, 2.8215e-03, 2.6485e-03, 3.4405e-04,\n 3.2753e-03, 2.3239e-03, 1.4875e-08, 8.4955e-04, 4.2929e-07, 1.0277e-03,\n 2.1103e-03, 2.6360e-03, 3.0892e-03, 3.4876e-03, 3.1552e-03, 1.9621e-03,\n 3.4098e-03, 1.1956e-03, 3.2534e-03, 3.3033e-03, 2.8629e-03, 2.6129e-03,\n 2.7069e-03, 1.8476e-03, 1.6560e-03, 5.9022e-04, 3.2106e-03, 3.7321e-03,\n 3.0724e-03, 1.4267e-03, 1.4068e-03, 3.5085e-03, 1.5807e-03, 3.5264e-03,\n 2.4169e-03, 1.7570e-03, 2.0984e-03, 3.1953e-03, 3.0157e-03, 3.0794e-03,\n 1.7428e-03, 2.9307e-03, 3.2441e-03, 9.1340e-04, 4.5167e-07, 1.7210e-03,\n 2.1401e-03, 3.1930e-03, 2.9891e-03, 3.0363e-03, 3.5141e-03, 2.4301e-03,\n 3.0205e-03, 3.1117e-03, 3.4568e-03, 1.1679e-03, 2.8948e-03, 3.0076e-03,\n 3.2871e-03, 2.9746e-03, 3.3678e-03, 2.9983e-03, 9.1050e-04, 3.4039e-03,\n 8.4987e-04, 3.4848e-03, 2.8639e-03, 3.4694e-03, 3.1387e-03, 3.2850e-03,\n 2.2859e-03, 6.9078e-08, 3.0422e-07, 2.6707e-03, 2.9058e-03, 3.3984e-03,\n 3.1217e-03, 4.1820e-04, 2.8930e-03, 3.6061e-03, 3.8565e-04, 2.9558e-03,\n 1.0633e-03, 3.2754e-05, 4.2475e-04, 3.7753e-03, 3.0595e-03, 3.2076e-03,\n 3.2501e-03, 2.9508e-03, 2.6832e-03, 3.2857e-03, 3.0548e-03, 3.2382e-03,\n 2.7525e-03, 6.7612e-07, 1.1585e-03, 2.5886e-03, 3.0787e-03, 3.8604e-03,\n 6.1846e-08, 1.1775e-03, 3.4450e-03, 3.1999e-03, 2.8818e-03, 3.4784e-03,\n 3.3564e-03, 3.3370e-03, 3.3485e-03, 3.4814e-03, 3.2127e-03, 1.6400e-03,\n 3.0564e-03, 3.0543e-03, 3.1782e-03, 2.4688e-03, 1.3647e-03, 2.6223e-03,\n 2.9558e-04, 2.9925e-05, 7.5277e-04, 6.6727e-04, 2.0271e-03, 4.5108e-04,\n 3.1783e-03, 9.2151e-04, 3.2987e-03, 3.1006e-03, 2.1819e-03, 2.5893e-03,\n 1.6246e-03, 3.1001e-03, 3.5160e-03, 2.5541e-03, 3.1172e-03, 1.4197e-03,\n 3.4440e-03, 3.3488e-03, 3.4078e-03, 3.0565e-03, 3.3904e-03, 2.5311e-03,\n 3.4801e-03, 2.8824e-06, 7.0489e-04, 3.2944e-03, 2.8817e-03, 4.0015e-03,\n 3.0253e-03, 2.1322e-03, 1.3625e-03, 3.2911e-03, 2.6535e-03, 1.4167e-03,\n 2.1815e-03, 8.1873e-07, 6.2930e-04, 3.3428e-03, 2.8277e-03, 2.6530e-03,\n 9.8417e-04, 3.3517e-03, 3.2300e-03, 2.8839e-03, 1.5831e-03, 1.0962e-03,\n 3.2469e-03, 2.5689e-03, 2.1926e-03, 2.7866e-03, 3.2263e-03, 9.5498e-04,\n 2.9671e-03, 3.1669e-03, 2.2558e-03, 3.6475e-03, 1.7521e-03, 1.4799e-03,\n 3.0374e-03, 1.2379e-07, 9.1657e-04, 2.6936e-03, 3.0681e-03, 2.6039e-03,\n 3.2815e-03, 3.8153e-03, 1.4944e-03, 3.1235e-03, 3.2186e-03, 1.3103e-03,\n 3.0070e-03, 4.1272e-03, 3.1370e-03, 1.4355e-03, 3.0428e-03, 2.4455e-03,\n 3.1923e-03, 9.6530e-04, 2.8741e-03, 2.6727e-03, 1.2283e-03, 2.9912e-03,\n 3.5006e-03, 2.5610e-03, 2.7516e-03, 2.7789e-03, 3.3772e-03, 2.8176e-03,\n 3.2926e-04, 1.4924e-03, 1.6918e-03, 3.3008e-03, 2.5937e-03, 4.1381e-03,\n 2.6666e-03, 3.3360e-03, 3.3307e-03, 5.3948e-07, 3.2026e-03, 3.1882e-03,\n 5.4203e-04, 3.0714e-03, 2.3310e-03, 1.3735e-03, 3.0627e-03, 1.7767e-03,\n 3.1126e-03, 7.4040e-07, 1.3200e-03, 2.8227e-03, 2.8408e-03, 1.1421e-03,\n 3.3019e-03, 5.0184e-07, 3.0598e-03, 3.1509e-03, 2.5896e-03, 2.8107e-03,\n 2.9210e-03, 2.5604e-03, 2.6587e-03, 1.5415e-03, 2.6800e-03, 3.2319e-03,\n 8.7669e-04, 4.6567e-08, 9.1330e-04, 3.1646e-03, 7.4270e-09, 2.8080e-03,\n 3.0547e-03, 4.5176e-05, 2.7430e-03, 2.5698e-03, 2.0088e-03, 4.7702e-04,\n 9.7096e-04, 8.0002e-04], device='cuda:0')" }, "4": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[ 4.5855e-05, -1.4842e-04, -5.6052e-45, ..., -7.6102e-05,\n -2.6761e-05, -1.5499e-05],\n [-4.2937e-04, 1.0597e-04, 5.6052e-45, ..., -6.7287e-06,\n 1.2485e-06, 5.6074e-05],\n [-1.0269e-04, -2.1287e-05, -5.6052e-45, ..., -4.8599e-05,\n -2.8047e-05, 5.4025e-05],\n ...,\n [-4.6379e-04, 1.6832e-04, -5.6052e-45, ..., 3.9565e-05,\n 1.0545e-07, -2.2321e-05],\n [ 3.9244e-04, -2.1128e-04, 5.6052e-45, ..., -6.8291e-05,\n -1.2705e-04, -3.7668e-05],\n [-2.1013e-04, -6.3101e-05, -5.6052e-45, ..., -9.9995e-07,\n 8.9981e-05, 2.6153e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.5315e-07, 6.1332e-07, 9.1007e-12, ..., 1.0500e-07, 5.0304e-08,\n 9.0964e-09],\n [1.0525e-06, 1.2724e-06, 1.3841e-11, ..., 1.2999e-08, 9.4319e-08,\n 4.3889e-08],\n [9.0287e-07, 1.1079e-06, 3.8171e-11, ..., 3.7623e-08, 4.6619e-08,\n 3.3683e-08],\n ...,\n [8.6719e-07, 1.3635e-06, 1.2737e-11, ..., 2.9412e-08, 6.4111e-08,\n 2.9133e-08],\n [8.4302e-07, 1.2488e-06, 8.8669e-12, ..., 5.1059e-08, 6.4354e-08,\n 2.6535e-08],\n [9.6562e-07, 1.4600e-06, 1.3618e-11, ..., 2.8980e-08, 8.4542e-08,\n 1.8166e-08]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-7.5344e-05, -7.1724e-05, -6.2509e-06, ..., 4.6868e-05,\n -7.7025e-05, 2.5811e-05],\n [-6.7487e-05, 2.1505e-04, 7.8508e-06, ..., -1.1984e-05,\n 3.5171e-05, 2.9165e-05],\n [-2.5396e-04, 6.6164e-06, -5.9092e-06, ..., 5.3140e-05,\n -6.5008e-05, 9.3074e-06],\n ...,\n [-2.1450e-04, -4.6651e-04, 9.8743e-06, ..., -7.3970e-05,\n 2.3799e-05, -1.8468e-05],\n [-2.3246e-04, -3.1011e-04, 3.1966e-06, ..., 5.2319e-05,\n 4.6063e-05, 1.8782e-05],\n [-3.7946e-04, -3.3305e-04, -8.9615e-06, ..., 5.9148e-05,\n 8.9888e-05, 6.7588e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.8595e-07, 4.2781e-07, 8.6885e-10, ..., 1.1767e-07, 5.1917e-08,\n 7.2798e-09],\n [7.2047e-07, 9.4135e-07, 8.8510e-10, ..., 1.3500e-08, 8.9640e-08,\n 3.3696e-08],\n [5.8322e-07, 7.9306e-07, 1.3457e-09, ..., 4.0700e-08, 4.5807e-08,\n 2.4430e-08],\n ...,\n [6.1643e-07, 9.8033e-07, 1.3682e-09, ..., 2.7439e-08, 6.7610e-08,\n 2.2201e-08],\n [5.9508e-07, 9.3096e-07, 9.2737e-10, ..., 5.5658e-08, 7.1794e-08,\n 1.9539e-08],\n [6.7103e-07, 1.0500e-06, 1.4224e-09, ..., 2.9874e-08, 7.9149e-08,\n 1.3567e-08]], device='cuda:0')" }, "5": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-1.3261e-05, 6.3534e-06, -1.0474e-05, ..., 3.7078e-07,\n -3.9021e-06, -1.8423e-06],\n [ 1.0040e-06, -2.0151e-06, 7.3882e-07, ..., 4.1687e-05,\n -2.6064e-06, -5.9805e-07],\n [ 2.7497e-06, 1.3970e-06, -4.7957e-06, ..., 1.2630e-05,\n -2.6829e-06, 4.7263e-06],\n ...,\n [-9.5981e-06, -3.9470e-06, -6.2526e-06, ..., -9.5790e-06,\n -2.4820e-05, -1.7848e-05],\n [ 1.4061e-06, -3.1455e-05, -1.1004e-06, ..., 4.6208e-06,\n -5.8822e-06, 2.9834e-06],\n [ 8.8908e-06, 9.8183e-07, 6.9689e-06, ..., -5.4952e-06,\n -1.3601e-06, 2.3992e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.1183e-09, 2.1034e-09, 6.1219e-10, ..., 6.5076e-10, 9.4138e-10,\n 7.9195e-10],\n [4.7118e-10, 1.4755e-09, 3.6522e-10, ..., 5.3271e-09, 4.4318e-10,\n 7.4079e-10],\n [2.6475e-09, 5.7718e-10, 7.2296e-10, ..., 4.8315e-10, 6.5756e-10,\n 9.8884e-10],\n ...,\n [2.1074e-09, 1.0199e-09, 7.0296e-10, ..., 1.3752e-09, 1.0102e-09,\n 6.1798e-10],\n [8.2429e-10, 3.4237e-09, 8.2395e-10, ..., 2.2644e-09, 1.0470e-09,\n 1.3915e-09],\n [4.4429e-09, 2.8878e-10, 4.6834e-10, ..., 6.3340e-10, 1.6645e-09,\n 6.5735e-10]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[ 1.9344e-05, -4.3844e-06, -3.9807e-06, ..., 1.0638e-06,\n 3.2200e-06, 1.1846e-06],\n [-2.4820e-06, -7.4683e-07, 1.4782e-06, ..., 2.3849e-06,\n -2.9651e-07, -2.5063e-06],\n [-3.8461e-05, 4.6091e-06, -2.3092e-06, ..., 1.5943e-06,\n -6.6827e-06, 1.8238e-06],\n ...,\n [-4.8553e-06, -2.9455e-06, -1.1885e-06, ..., -2.7153e-06,\n -4.7364e-06, 1.2281e-06],\n [ 3.1686e-06, 1.8283e-05, -1.4925e-06, ..., -4.1849e-05,\n 5.7003e-07, 2.5618e-06],\n [-4.8176e-06, 3.9461e-06, 3.1710e-06, ..., -2.0360e-06,\n 5.5658e-06, -3.9552e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.9708e-09, 2.1698e-09, 4.3459e-10, ..., 4.7141e-10, 7.1263e-10,\n 5.8265e-10],\n [2.3898e-10, 1.2055e-09, 2.4603e-10, ..., 3.5086e-09, 2.8452e-10,\n 5.4737e-10],\n [2.0209e-09, 3.5109e-10, 3.7400e-10, ..., 4.0715e-10, 3.7138e-10,\n 6.9912e-10],\n ...,\n [1.1893e-09, 7.1078e-10, 5.8920e-10, ..., 8.7904e-10, 7.0051e-10,\n 4.9060e-10],\n [4.5274e-10, 3.1342e-09, 5.4684e-10, ..., 2.0259e-09, 6.5252e-10,\n 1.2674e-09],\n [3.4775e-09, 1.9601e-10, 2.4465e-10, ..., 3.7311e-10, 1.3005e-09,\n 3.9145e-10]], device='cuda:0')" }, "6": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-0.0009, 0.0025, -0.0001, ..., -0.0028, -0.0001, -0.0011],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.6714e-05, 1.4319e-05, 1.8972e-05, ..., 1.5303e-05, 1.8057e-05,\n 1.7354e-05], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([ 3.6913e-04, -6.7691e-05, 6.9457e-04, ..., -9.3732e-04,\n -1.6668e-04, 4.4366e-04], device='cuda:0')", + "exp_avg_sq": "tensor([1.0980e-05, 8.5164e-06, 1.1740e-05, ..., 9.9763e-06, 1.0980e-05,\n 1.0590e-05], device='cuda:0')" }, "7": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-8.2791e-06, 1.7364e-06, -3.1005e-06, ..., -2.4857e-06,\n -6.4462e-06, -5.3115e-06],\n [-1.1708e-05, -1.2055e-05, -1.1279e-05, ..., -6.2256e-06,\n 4.0300e-06, -1.1097e-05],\n [ 2.3342e-07, 1.6688e-05, -3.3735e-06, ..., -1.3965e-05,\n 2.3437e-05, 8.0478e-06],\n ...,\n [ 1.2395e-05, -3.5972e-06, 3.0265e-06, ..., -6.6632e-06,\n -2.2389e-05, 1.4957e-06],\n [-1.8733e-06, 6.9948e-06, -1.2975e-05, ..., 1.7175e-06,\n -4.0093e-06, -4.8393e-06],\n [-5.4485e-06, -1.3062e-05, 1.3268e-05, ..., 5.1308e-06,\n 7.8862e-06, -1.3686e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[8.0351e-10, 4.7126e-10, 1.0270e-09, ..., 6.3777e-10, 1.4415e-09,\n 9.0971e-10],\n [1.4342e-09, 1.2641e-09, 1.5700e-09, ..., 1.6246e-09, 2.4461e-09,\n 1.6832e-09],\n [1.9768e-09, 1.1687e-09, 2.1909e-09, ..., 1.6694e-09, 2.4319e-09,\n 1.6430e-09],\n ...,\n [1.7806e-09, 1.5395e-09, 2.2977e-09, ..., 1.9315e-09, 2.7329e-09,\n 1.8206e-09],\n [1.5669e-09, 1.3032e-09, 1.6655e-09, ..., 1.6991e-09, 2.1206e-09,\n 1.3731e-09],\n [1.8564e-09, 1.1127e-09, 2.1880e-09, ..., 1.6197e-09, 2.1426e-09,\n 1.7312e-09]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-2.5236e-06, -2.4313e-07, -9.9778e-07, ..., -6.0134e-06,\n -1.2107e-05, -4.5511e-06],\n [ 6.4662e-07, -7.6123e-06, -1.8934e-06, ..., -1.1629e-06,\n 8.9630e-06, 2.9747e-06],\n [ 2.1072e-06, 4.0318e-06, 9.3244e-07, ..., 4.9246e-06,\n -1.0804e-06, -4.0186e-06],\n ...,\n [-9.2428e-06, -7.1276e-06, -1.2110e-05, ..., -9.1235e-06,\n -1.2226e-05, -2.1270e-05],\n [-3.7411e-06, -5.1128e-06, 5.7573e-06, ..., -7.2627e-06,\n 7.0029e-06, 5.9699e-06],\n [ 6.0660e-06, 2.0607e-06, -7.4726e-06, ..., 6.5047e-06,\n 3.3419e-06, 3.6422e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.2213e-10, 3.0148e-10, 6.5367e-10, ..., 4.1233e-10, 9.4298e-10,\n 6.6421e-10],\n [9.3345e-10, 8.5156e-10, 9.6361e-10, ..., 1.0655e-09, 1.6165e-09,\n 1.0631e-09],\n [1.1104e-09, 7.6816e-10, 1.1835e-09, ..., 1.0129e-09, 1.4576e-09,\n 9.9300e-10],\n ...,\n [1.1547e-09, 1.0530e-09, 1.4530e-09, ..., 1.3083e-09, 1.6963e-09,\n 1.3076e-09],\n [9.9813e-10, 9.2181e-10, 1.0355e-09, ..., 1.1628e-09, 1.3689e-09,\n 8.8515e-10],\n [1.1674e-09, 6.8156e-10, 1.4471e-09, ..., 1.0959e-09, 1.3624e-09,\n 1.0884e-09]], device='cuda:0')" }, "32": { - "step": "tensor(2504.)", - "exp_avg": "tensor([3.9721e-14], device='cuda:0')", - "exp_avg_sq": "tensor([0.0006], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([3.2996e-14], device='cuda:0')", + "exp_avg_sq": "tensor([0.0002], device='cuda:0')" }, "33": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 2.6609e-16, -5.9071e-17, -2.0702e-16], device='cuda:0')", - "exp_avg_sq": "tensor([1.8850e-07, 3.1507e-07, 2.9428e-08], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([ 2.4515e-16, -6.8982e-17, -1.7617e-16], device='cuda:0')", + "exp_avg_sq": "tensor([5.3865e-08, 9.0033e-08, 8.4092e-09], device='cuda:0')" }, "34": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, -1.7438e-16, -5.0364e-17, 1.8669e-16, 2.0043e-16,\n 3.3032e-16, 3.9273e-16, 3.7967e-16, 1.5165e-15, 4.5319e-16],\n device='cuda:0')", - "exp_avg_sq": "tensor([2.4879e-04, 2.9890e-06, 2.8757e-06, 2.2628e-06, 2.7188e-06, 2.5230e-06,\n 4.2297e-06, 3.7827e-06, 4.0851e-06, 2.6603e-06], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-5.6052e-45, -1.5779e-16, -4.5071e-17, -1.9336e-17, 1.7088e-16,\n 2.8636e-16, 3.4053e-16, 3.3500e-16, 1.3364e-15, 3.9669e-16],\n device='cuda:0')", + "exp_avg_sq": "tensor([7.1094e-05, 8.5414e-07, 8.2176e-07, 6.4662e-07, 7.7692e-07, 7.2097e-07,\n 1.2087e-06, 1.0809e-06, 1.1673e-06, 7.6020e-07], device='cuda:0')" }, "36": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 8.3168e-18, 2.1829e-19, 1.7099e-18, ..., 1.4257e-18,\n 6.9364e-19, 4.3117e-19],\n [-2.6836e-20, -1.7936e-20, 1.8920e-19, ..., -1.9992e-19,\n -3.5933e-19, 3.3921e-20],\n [ 1.0904e-19, 9.6095e-20, 9.8408e-19, ..., 2.6608e-19,\n 3.6386e-19, 3.5248e-19],\n ...,\n [ 7.7314e-19, 9.5425e-20, 1.1226e-18, ..., 1.3580e-19,\n 3.4710e-19, 1.7801e-19],\n [-9.8653e-18, -5.5100e-19, -2.8829e-18, ..., -5.6397e-19,\n -1.3692e-18, -4.5664e-19],\n [ 1.2407e-18, 2.3005e-19, 1.3670e-18, ..., 8.1638e-20,\n 7.5523e-20, 2.5153e-19]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.3963e-11, 7.4848e-12, 2.0227e-11, ..., 2.3241e-11, 2.9310e-11,\n 3.9550e-11],\n [9.3863e-14, 1.6839e-13, 2.3787e-13, ..., 1.3663e-13, 1.1640e-13,\n 2.2594e-13],\n [6.8608e-13, 3.6216e-13, 8.8751e-13, ..., 3.7980e-13, 1.3976e-12,\n 7.3632e-13],\n ...,\n [8.0393e-13, 5.7553e-13, 5.3470e-13, ..., 9.1074e-13, 6.1511e-13,\n 1.3178e-12],\n [1.7475e-11, 4.4697e-12, 1.3879e-11, ..., 9.2805e-12, 1.7463e-11,\n 1.9010e-11],\n [1.2039e-11, 6.4661e-12, 1.4621e-11, ..., 5.4949e-12, 1.5398e-11,\n 1.9356e-11]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[ 4.5294e-19, 3.4204e-19, 1.9497e-19, ..., 5.8787e-19,\n 3.4717e-19, 2.4887e-19],\n [ 2.4285e-19, 8.0816e-20, -3.2845e-20, ..., 2.8686e-18,\n 6.6140e-20, 1.8168e-20],\n [ 5.1541e-19, 3.3712e-19, 1.1623e-19, ..., 4.7309e-19,\n 2.7430e-20, 7.1804e-20],\n ...,\n [ 2.5658e-19, 1.5340e-20, 1.3976e-19, ..., -8.9581e-19,\n 2.9776e-19, 4.8070e-20],\n [-1.3401e-18, -9.5119e-19, -4.1277e-19, ..., 1.2395e-17,\n -9.8085e-19, -6.1156e-19],\n [ 5.0215e-19, 4.7688e-19, 3.4981e-19, ..., 1.0548e-18,\n 3.8249e-19, 1.9957e-19]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.7051e-12, 2.1388e-12, 5.7799e-12, ..., 6.6412e-12, 8.3756e-12,\n 1.1302e-11],\n [2.6822e-14, 4.8117e-14, 6.7975e-14, ..., 3.9042e-14, 3.3263e-14,\n 6.4564e-14],\n [1.9605e-13, 1.0349e-13, 2.5361e-13, ..., 1.0853e-13, 3.9937e-13,\n 2.1041e-13],\n ...,\n [2.2973e-13, 1.6446e-13, 1.5279e-13, ..., 2.6025e-13, 1.7577e-13,\n 3.7656e-13],\n [4.9936e-12, 1.2773e-12, 3.9659e-12, ..., 2.6520e-12, 4.9901e-12,\n 5.4322e-12],\n [3.4404e-12, 1.8478e-12, 4.1782e-12, ..., 1.5702e-12, 4.4002e-12,\n 5.5310e-12]], device='cuda:0')" }, "37": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 5.2595e-16, -2.7454e-17, 2.4006e-16, -2.7388e-16, -5.9997e-17,\n 6.8454e-16, 1.0007e-16, -7.2295e-18, -8.2284e-16, -1.4315e-15,\n -1.1876e-15, 6.7728e-16, -1.2127e-15, 3.6558e-16, 2.3116e-16,\n 4.9012e-16, -9.8529e-16, 6.3402e-16, 7.8463e-16, -3.0996e-17,\n -1.0433e-16, 1.6855e-16, 5.4041e-17, 8.3965e-16, -1.3227e-17,\n -9.1586e-16, 2.6664e-16, -1.4094e-15, -6.1298e-16, 4.5664e-17,\n -1.9517e-16, -1.7569e-15, 6.0885e-16, 1.6712e-15, -4.4677e-16,\n -1.4181e-16, 1.0021e-16, 3.5701e-16, 5.4799e-17, 6.4747e-16,\n -1.3637e-17, -5.4556e-16, 7.6361e-16, 6.8801e-16, -6.1754e-16,\n 5.4955e-17, 1.0468e-15, -8.2998e-16, 5.2741e-16, 3.5617e-16,\n 1.3388e-15, 3.1867e-17, -3.2558e-16, 3.3467e-16, -4.6678e-17,\n 2.1875e-16, 3.3682e-16, -2.8381e-16, -3.7516e-16, -2.4670e-16,\n 2.2738e-16, 1.9389e-16, -1.9144e-15, -1.6390e-15, -5.8529e-16,\n 2.3566e-16, 1.4191e-16, 3.5264e-16, -5.4274e-16, 8.9693e-17,\n -3.8636e-18, 1.0859e-15, 3.6125e-17, -1.3565e-16, -2.1814e-17,\n 7.6698e-17, -1.4915e-16, -5.0243e-16, 2.0653e-16, -7.5903e-17,\n -3.3677e-16, -7.6041e-16, -4.8185e-17, -8.9007e-18, -1.4361e-15,\n 1.0493e-16, -2.7248e-17, 1.2294e-15, -5.3925e-17, -3.7875e-16,\n 8.8367e-16, 4.6364e-16, -7.1015e-17, 1.1891e-16, -1.2228e-17,\n -2.2270e-16, 5.8874e-16, -1.6801e-15, -9.7659e-17, -4.3644e-17,\n 1.0008e-15, 9.6573e-16, 1.2241e-15, 1.5346e-16, 3.2266e-16,\n 1.6259e-15, -1.6321e-15, 1.0680e-18, 4.7013e-17, 9.0871e-16,\n 3.9740e-16, 3.1927e-16, -2.8306e-16, 1.3026e-15, 4.6996e-17,\n -2.8193e-16, -3.4921e-16, 5.9492e-16, 3.7998e-16, 1.6380e-16,\n -8.8281e-16, 1.6790e-16, -2.2447e-16, -9.3166e-16, 6.0176e-16,\n 2.5679e-16, -3.9590e-16, 7.2232e-16, -4.1458e-17, 4.5170e-16,\n 4.5806e-16, -2.9831e-16, -4.5326e-16, 2.4702e-16, 9.0884e-17,\n -1.8143e-15, 4.1482e-17, 9.9490e-18, -1.6096e-16, 7.2276e-16,\n 6.9062e-17, -8.4587e-16, -7.4171e-16, 2.4391e-17, -6.8616e-18,\n -9.0999e-17, -8.8294e-16, -9.9505e-17, 2.7512e-16, 6.6081e-16,\n 3.9880e-17, -1.7291e-15, -4.8208e-16, 1.8820e-15, -1.1945e-16,\n 2.9760e-17, -1.6630e-15, 2.9099e-16, -3.5119e-16, -9.9569e-16,\n -8.0788e-16, -3.0523e-17, -1.0784e-17, -6.3138e-16, 1.0241e-15,\n 4.2662e-16, -6.2524e-16, 6.0913e-16, 1.3804e-15, 1.3588e-16,\n -3.3477e-16, 1.9655e-16, 8.4196e-16, 7.1518e-16, -6.8713e-17,\n -2.9352e-16, 3.5202e-16, 6.7449e-16, 1.3832e-15, 1.3868e-15,\n 3.3476e-16, 1.2911e-16, 5.4110e-16, 1.2341e-16, -3.4894e-16,\n -6.8447e-16, -8.7175e-17, 6.5904e-17, 5.5738e-16, 2.8082e-17,\n -2.9641e-16, 7.6563e-17, -2.2441e-16, 4.4375e-16, 2.5643e-16,\n 1.7552e-16, -1.6708e-17, 1.5453e-16, -1.9017e-15, 7.9176e-16,\n 2.3111e-16, -1.8723e-16, 3.5097e-16, -2.2150e-15, -1.5893e-15,\n 6.0377e-16, -8.5571e-16, 2.3988e-16, 2.9069e-16, -7.8282e-16,\n 1.1568e-15, 2.7363e-16, -1.6523e-16, 1.3135e-16, 2.6921e-16,\n 2.0185e-15, 8.6086e-16, -6.0837e-16, 3.6176e-16, -6.8459e-17,\n 4.2374e-16, -4.0043e-16, -4.0974e-18, -1.6454e-15, -6.1177e-16,\n -9.4657e-16, -7.3329e-17, 2.1506e-16, -1.3470e-16, 2.9567e-16,\n 6.0816e-16, 5.9771e-16, -4.8754e-16, -1.2129e-15, -2.4020e-16,\n 4.6969e-16, 1.0009e-15, 5.3494e-16, -6.6514e-16, -6.8513e-17,\n 9.2599e-17, 3.3053e-16, -9.0346e-16, 6.9467e-16, 9.1681e-16,\n 5.8882e-16, -1.5894e-15, 3.9420e-16, -9.8796e-16, -7.6165e-18,\n -7.9888e-17, 5.4631e-17, 1.3186e-16, 2.1650e-16, -8.8646e-16,\n 2.6856e-16], device='cuda:0')", - "exp_avg_sq": "tensor([6.5048e-06, 6.2764e-08, 1.6305e-07, 1.2725e-06, 2.8950e-09, 2.4974e-06,\n 1.8146e-08, 6.8849e-07, 6.8484e-08, 2.4307e-06, 5.9131e-07, 1.0336e-05,\n 5.1389e-06, 2.7563e-07, 4.7098e-07, 1.4243e-06, 2.4955e-07, 3.0390e-07,\n 2.0621e-06, 4.2013e-08, 1.7255e-09, 9.6215e-10, 3.6474e-07, 1.0403e-06,\n 1.0091e-09, 2.6633e-05, 4.9782e-07, 1.6270e-05, 5.2580e-07, 1.2986e-06,\n 3.9089e-07, 4.2643e-06, 8.5766e-07, 2.7607e-06, 7.0966e-07, 1.4057e-06,\n 2.7755e-07, 1.5125e-05, 1.6808e-09, 1.6790e-07, 1.9049e-09, 4.1042e-08,\n 9.0300e-09, 1.4353e-09, 1.4499e-06, 1.1682e-05, 7.7394e-08, 1.1161e-05,\n 1.0468e-05, 1.3560e-07, 2.6224e-06, 1.2259e-06, 2.9201e-08, 2.6746e-07,\n 6.6076e-07, 1.4105e-06, 1.3037e-08, 6.3126e-08, 1.7915e-06, 1.1780e-06,\n 1.9444e-06, 1.1058e-08, 8.3545e-06, 9.1402e-06, 7.6040e-09, 1.5023e-08,\n 1.5992e-07, 6.2160e-06, 1.3008e-07, 2.8639e-06, 5.5775e-08, 1.6864e-07,\n 1.3049e-07, 6.7469e-07, 8.3235e-06, 2.9058e-10, 1.1129e-08, 2.6156e-07,\n 2.7757e-06, 2.2684e-07, 7.1371e-08, 4.0365e-08, 5.2587e-06, 4.8909e-09,\n 1.2317e-07, 3.4307e-07, 1.7024e-06, 8.4045e-06, 1.6383e-06, 7.6558e-08,\n 1.1864e-05, 2.5756e-06, 8.4499e-09, 4.9156e-07, 4.5609e-08, 6.5022e-08,\n 1.0683e-05, 1.1362e-06, 8.7625e-08, 2.1464e-08, 1.7855e-06, 6.9429e-07,\n 9.3524e-06, 1.9687e-07, 7.9800e-06, 8.5981e-06, 8.8425e-09, 1.0318e-06,\n 1.1231e-07, 1.8454e-07, 2.2012e-06, 1.0629e-09, 1.6098e-08, 2.2791e-06,\n 9.3989e-09, 2.0837e-06, 5.7026e-06, 8.2583e-09, 5.3486e-07, 2.7874e-07,\n 1.7784e-06, 2.8561e-07, 2.7183e-08, 6.2926e-08, 1.3088e-06, 5.2599e-06,\n 1.0485e-06, 6.9172e-06, 1.6696e-06, 1.9751e-05, 3.0525e-07, 3.3376e-08,\n 2.6659e-07, 4.3741e-06, 7.5665e-10, 3.8989e-06, 1.1101e-07, 1.1949e-09,\n 1.1962e-06, 1.6572e-06, 2.9113e-06, 9.6254e-06, 2.8975e-08, 1.7977e-08,\n 3.9865e-09, 9.0767e-08, 6.4220e-06, 4.5928e-07, 5.9048e-07, 8.1511e-07,\n 2.1914e-09, 2.9051e-05, 6.7274e-08, 2.1228e-07, 1.2004e-09, 4.9897e-06,\n 2.2539e-05, 1.1403e-07, 1.4745e-07, 1.8535e-06, 3.4192e-06, 9.3610e-09,\n 1.2721e-09, 1.0192e-05, 2.5482e-06, 2.9685e-07, 1.0121e-07, 4.6167e-06,\n 2.5725e-06, 2.3455e-06, 5.1003e-06, 1.7741e-06, 9.9692e-06, 8.3351e-08,\n 5.9797e-07, 2.7764e-07, 8.9662e-08, 2.6697e-06, 2.1265e-07, 2.7556e-06,\n 1.2579e-05, 3.4967e-09, 3.2827e-06, 9.3964e-07, 8.0839e-07, 5.9618e-06,\n 7.8728e-07, 4.2066e-07, 6.6542e-07, 6.1116e-06, 2.1124e-07, 4.1966e-07,\n 6.1007e-07, 6.6481e-06, 4.6556e-06, 4.9043e-08, 1.6329e-08, 1.1678e-06,\n 6.7474e-07, 1.5416e-06, 5.7174e-07, 3.4901e-05, 5.7243e-07, 4.9591e-06,\n 2.4395e-06, 7.7098e-06, 3.9178e-06, 2.7054e-09, 2.3628e-06, 4.5947e-06,\n 1.4751e-05, 4.0358e-09, 2.7526e-07, 7.3400e-09, 1.1191e-07, 2.3983e-06,\n 4.3945e-07, 2.1161e-08, 3.7848e-09, 9.8890e-09, 6.7392e-06, 3.6904e-06,\n 9.4878e-07, 1.0071e-05, 1.1451e-06, 4.1767e-06, 4.6102e-08, 1.1968e-06,\n 2.4285e-07, 9.7638e-07, 9.8238e-06, 5.1227e-06, 1.2042e-06, 1.5907e-06,\n 2.3299e-07, 5.0609e-07, 6.3688e-08, 2.2222e-05, 6.1204e-07, 9.0405e-07,\n 1.1631e-06, 1.4646e-05, 1.0925e-06, 6.2298e-08, 1.4114e-07, 3.2238e-08,\n 1.4624e-06, 6.4398e-08, 6.6658e-08, 4.9442e-07, 2.2357e-07, 5.1152e-06,\n 8.4085e-08, 2.6741e-07, 3.3490e-06, 3.7993e-06], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([ 3.4735e-16, -2.5859e-17, 1.2601e-16, 1.0585e-16, -3.5206e-17,\n 5.9722e-16, 1.8142e-16, -1.6605e-17, -9.7797e-16, -9.7171e-16,\n -1.2746e-15, 6.4251e-16, -8.5013e-16, 3.6271e-16, 2.0346e-16,\n 3.3970e-16, -7.5637e-16, 7.4393e-16, 6.8761e-16, -3.3277e-17,\n -1.0889e-16, 1.6614e-16, 7.3648e-17, 1.0529e-15, 3.5190e-17,\n -1.0526e-15, 6.9280e-17, -1.2865e-15, -7.5915e-16, 1.2386e-17,\n -1.9885e-16, -1.3212e-15, 5.2001e-16, 1.3064e-15, -3.7440e-16,\n -7.8038e-17, 1.1574e-16, 3.0402e-16, 4.6083e-17, 5.2604e-16,\n 1.1966e-16, -3.4510e-16, 5.9856e-16, 6.5749e-16, -8.8005e-16,\n 9.3166e-17, 7.9544e-16, -9.9323e-16, 5.0098e-16, 5.4516e-16,\n 9.2985e-16, 4.7202e-17, -1.4667e-16, 3.0452e-16, 2.8799e-16,\n 2.6944e-16, 3.4654e-16, -3.0260e-16, -1.0171e-17, -8.4553e-17,\n 1.7992e-16, 8.2747e-17, -1.6611e-15, -1.5959e-15, -7.2520e-16,\n 2.0618e-16, 3.3464e-17, 3.1917e-16, -2.0778e-16, 2.7676e-16,\n -9.8759e-17, 9.8969e-16, 2.0412e-17, -6.5600e-17, 1.5278e-17,\n 1.9070e-16, -1.3535e-16, -3.8596e-16, 1.8917e-16, -8.5366e-17,\n -2.7388e-16, -6.1232e-16, -8.9073e-17, -1.2106e-17, -9.5570e-16,\n 1.0408e-16, -1.1119e-16, 1.0434e-15, -5.6980e-17, -3.7134e-16,\n 7.9973e-16, 3.9838e-16, -6.1434e-17, 1.0975e-16, -5.6588e-17,\n -1.3159e-16, 7.0617e-16, -1.2759e-15, -5.4282e-17, 2.6406e-17,\n 6.7993e-16, 8.1273e-16, 9.5437e-16, 1.7651e-16, 3.3083e-16,\n 8.3058e-16, -1.1675e-15, -2.4181e-17, 8.5481e-17, 8.4382e-16,\n 3.3986e-16, 3.3851e-16, -1.6633e-16, 8.4256e-16, 2.4516e-17,\n -1.3641e-16, -8.4563e-16, 3.8385e-16, 2.7168e-16, 1.1079e-16,\n -9.6029e-16, 1.0933e-16, -1.8343e-16, -8.2675e-16, 6.1709e-16,\n 2.4719e-16, -3.3665e-16, 6.0756e-16, 5.2940e-18, 4.6090e-16,\n 3.5988e-16, -1.8439e-16, -3.4276e-16, 2.5628e-16, 7.3527e-17,\n -1.5409e-15, 7.7813e-17, 5.4999e-17, -2.3959e-17, 6.0771e-16,\n 3.6123e-17, -8.1663e-16, -5.8114e-16, 5.9235e-17, 2.5704e-17,\n -5.4332e-17, -7.2030e-16, -8.8894e-17, 2.3956e-16, 5.9759e-16,\n 4.3970e-17, -1.4351e-15, -3.5471e-16, 1.5354e-15, -1.0347e-16,\n 6.0742e-17, -1.5143e-15, 3.6416e-16, -3.2721e-16, -9.0099e-16,\n -6.3795e-16, -1.0826e-16, 2.3890e-17, -7.0842e-16, 9.8169e-16,\n 3.7367e-16, -4.0266e-16, 5.2823e-16, 1.0747e-15, 1.4045e-16,\n -7.1214e-16, 1.0911e-16, 6.6285e-16, 7.0205e-16, 7.5631e-17,\n -2.1780e-16, 2.7693e-16, 4.0579e-16, 8.6428e-16, 1.0211e-15,\n 3.2795e-16, 1.2642e-16, 5.4052e-16, 8.4167e-17, -2.7857e-16,\n -6.5072e-16, -4.7687e-17, -1.2458e-17, 4.9890e-16, 7.4180e-18,\n -2.8372e-16, 1.0705e-16, -1.5712e-16, 2.0327e-16, 6.7319e-17,\n 1.3072e-16, -9.6078e-18, 8.1719e-17, -1.6428e-15, 7.3647e-16,\n 1.6732e-16, -4.0822e-16, 4.7146e-16, -1.7310e-15, -1.1965e-15,\n 5.7607e-16, -1.0607e-15, 1.5674e-16, 3.9303e-17, -6.7948e-16,\n 1.0059e-15, 2.6879e-16, -1.3021e-16, 1.0750e-16, 3.1803e-16,\n 2.0104e-15, 7.7919e-16, -3.6475e-16, 2.9165e-16, -2.0337e-17,\n 3.6613e-16, -3.4857e-16, -4.7715e-17, -1.0025e-15, -5.4202e-16,\n -8.5028e-16, -7.3384e-17, 2.5049e-16, -9.2844e-17, 3.3457e-16,\n 4.8082e-16, 3.6655e-16, -4.1048e-16, -1.0616e-15, -2.0433e-16,\n 3.3430e-16, 7.7415e-16, 4.4150e-16, -5.7455e-16, -1.3759e-16,\n 7.8468e-17, 2.8579e-16, -7.1702e-16, 6.0135e-16, 8.0261e-16,\n 4.0171e-16, -1.5693e-15, 3.5079e-16, -7.0865e-16, -7.7718e-17,\n -7.6112e-17, 4.8426e-17, 4.9742e-17, 1.6227e-16, -5.0690e-16,\n 3.1273e-16], device='cuda:0')", + "exp_avg_sq": "tensor([1.8588e-06, 1.7935e-08, 4.6593e-08, 3.6362e-07, 8.2726e-10, 7.1365e-07,\n 5.1854e-09, 1.9674e-07, 1.9570e-08, 6.9459e-07, 1.6897e-07, 2.9536e-06,\n 1.4685e-06, 7.8762e-08, 1.3459e-07, 4.0700e-07, 7.1312e-08, 8.6842e-08,\n 5.8925e-07, 1.2006e-08, 4.9307e-10, 2.7494e-10, 1.0423e-07, 2.9727e-07,\n 2.8837e-10, 7.6105e-06, 1.4226e-07, 4.6494e-06, 1.5025e-07, 3.7109e-07,\n 1.1170e-07, 1.2186e-06, 2.4508e-07, 7.8888e-07, 2.0279e-07, 4.0170e-07,\n 7.9313e-08, 4.3220e-06, 4.8029e-10, 4.7979e-08, 5.4435e-10, 1.1728e-08,\n 2.5804e-09, 4.1014e-10, 4.1432e-07, 3.3382e-06, 2.2116e-08, 3.1892e-06,\n 2.9913e-06, 3.8748e-08, 7.4936e-07, 3.5031e-07, 8.3446e-09, 7.6430e-08,\n 1.8882e-07, 4.0308e-07, 3.7254e-09, 1.8039e-08, 5.1195e-07, 3.3662e-07,\n 5.5564e-07, 3.1600e-09, 2.3874e-06, 2.6119e-06, 2.1729e-09, 4.2930e-09,\n 4.5698e-08, 1.7763e-06, 3.7171e-08, 8.1837e-07, 1.5938e-08, 4.8190e-08,\n 3.7289e-08, 1.9280e-07, 2.3785e-06, 8.3036e-11, 3.1803e-09, 7.4744e-08,\n 7.9318e-07, 6.4820e-08, 2.0395e-08, 1.1534e-08, 1.5027e-06, 1.3976e-09,\n 3.5197e-08, 9.8034e-08, 4.8647e-07, 2.4017e-06, 4.6815e-07, 2.1877e-08,\n 3.3903e-06, 7.3599e-07, 2.4146e-09, 1.4047e-07, 1.3033e-08, 1.8581e-08,\n 3.0527e-06, 3.2467e-07, 2.5039e-08, 6.1336e-09, 5.1022e-07, 1.9840e-07,\n 2.6725e-06, 5.6256e-08, 2.2804e-06, 2.4570e-06, 2.5268e-09, 2.9484e-07,\n 3.2093e-08, 5.2732e-08, 6.2901e-07, 3.0373e-10, 4.6002e-09, 6.5127e-07,\n 2.6858e-09, 5.9543e-07, 1.6296e-06, 2.3599e-09, 1.5284e-07, 7.9653e-08,\n 5.0820e-07, 8.1615e-08, 7.7677e-09, 1.7982e-08, 3.7399e-07, 1.5031e-06,\n 2.9962e-07, 1.9766e-06, 4.7711e-07, 5.6440e-06, 8.7228e-08, 9.5375e-09,\n 7.6180e-08, 1.2499e-06, 2.1622e-10, 1.1141e-06, 3.1723e-08, 3.4147e-10,\n 3.4181e-07, 4.7357e-07, 8.3193e-07, 2.7505e-06, 8.2799e-09, 5.1372e-09,\n 1.1392e-09, 2.5937e-08, 1.8351e-06, 1.3124e-07, 1.6873e-07, 2.3292e-07,\n 6.2620e-10, 8.3015e-06, 1.9224e-08, 6.0661e-08, 3.4302e-10, 1.4259e-06,\n 6.4407e-06, 3.2585e-08, 4.2135e-08, 5.2967e-07, 9.7705e-07, 2.6750e-09,\n 3.6352e-10, 2.9124e-06, 7.2816e-07, 8.4827e-08, 2.8922e-08, 1.3193e-06,\n 7.3512e-07, 6.7024e-07, 1.4574e-06, 5.0697e-07, 2.8488e-06, 2.3818e-08,\n 1.7087e-07, 7.9339e-08, 2.5622e-08, 7.6290e-07, 6.0765e-08, 7.8745e-07,\n 3.5947e-06, 9.9921e-10, 9.3806e-07, 2.6851e-07, 2.3100e-07, 1.7036e-06,\n 2.2497e-07, 1.2021e-07, 1.9015e-07, 1.7464e-06, 6.0363e-08, 1.1992e-07,\n 1.7433e-07, 1.8997e-06, 1.3304e-06, 1.4014e-08, 4.6661e-09, 3.3370e-07,\n 1.9281e-07, 4.4051e-07, 1.6338e-07, 9.9731e-06, 1.6358e-07, 1.4171e-06,\n 6.9712e-07, 2.2031e-06, 1.1196e-06, 7.7308e-10, 6.7519e-07, 1.3130e-06,\n 4.2153e-06, 1.1533e-09, 7.8658e-08, 2.0975e-09, 3.1980e-08, 6.8533e-07,\n 1.2558e-07, 6.0468e-09, 1.0815e-09, 2.8259e-09, 1.9258e-06, 1.0546e-06,\n 2.7112e-07, 2.8778e-06, 3.2721e-07, 1.1935e-06, 1.3174e-08, 3.4198e-07,\n 6.9396e-08, 2.7901e-07, 2.8072e-06, 1.4638e-06, 3.4410e-07, 4.5455e-07,\n 6.6579e-08, 1.4462e-07, 1.8199e-08, 6.3501e-06, 1.7489e-07, 2.5834e-07,\n 3.3237e-07, 4.1852e-06, 3.1220e-07, 1.7802e-08, 4.0332e-08, 9.2124e-09,\n 4.1789e-07, 1.8402e-08, 1.9048e-08, 1.4128e-07, 6.3886e-08, 1.4617e-06,\n 2.4028e-08, 7.6415e-08, 9.5701e-07, 1.0857e-06], device='cuda:0')" }, "38": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-2.6566e-18, -5.0547e-18, -2.2466e-19, -1.4595e-16, 1.1182e-18,\n -5.1931e-20, -4.2983e-17, -6.4656e-19, -1.5448e-17, -1.8149e-16,\n -4.5486e-17, -2.3576e-19, -2.0221e-16, -2.2379e-17, 5.2068e-19,\n -8.5848e-17, -2.8797e-17, -1.1185e-18, -1.3595e-18, 2.3387e-19,\n -2.2594e-18, 7.6039e-19, 1.0976e-18, -5.0613e-17, -1.2036e-18,\n -2.3433e-16, 1.2591e-18, -1.8189e-16, -1.3120e-16, -7.1140e-19,\n -3.6270e-18, -1.4464e-16, -9.0587e-18, 2.9486e-18, 3.6994e-18,\n -8.9248e-19, -1.4340e-18, -4.2516e-19, 1.8748e-19, 1.5438e-20,\n -1.5242e-19, -3.1915e-17, -1.4893e-18, -1.5249e-18, -1.2192e-17,\n 3.7718e-18, -7.9411e-17, -1.9419e-16, -4.3054e-18, -5.1101e-17,\n -6.9883e-17, -2.7563e-19, -1.9244e-17, -2.1462e-18, -5.9352e-17,\n -1.3468e-17, 3.8008e-19, -2.9834e-18, -6.8933e-17, -1.8839e-16,\n 2.4223e-19, -4.7812e-19, -1.0161e-16, -1.9745e-16, -6.2950e-18,\n 4.0009e-19, -2.2950e-19, 1.3722e-20, -2.5291e-17, 1.1277e-18,\n -1.3800e-17, -8.8447e-19, -2.6588e-20, -3.2921e-18, -1.6178e-16,\n 2.0058e-18, -3.8171e-18, 4.8194e-18, 1.8735e-19, -4.1583e-18,\n -4.7891e-18, -1.2138e-17, -4.1569e-18, -1.7187e-18, -6.2959e-17,\n 6.0542e-19, -1.5505e-17, -3.5519e-19, 2.7992e-18, -4.4517e-17,\n -3.5960e-17, 8.6813e-20, -5.9064e-18, -3.8104e-19, -4.0148e-18,\n -5.2374e-19, 5.4142e-19, -1.2702e-16, -6.2590e-18, 6.2094e-19,\n -1.6888e-17, -6.2923e-18, -1.0593e-16, 4.5446e-19, -2.5720e-20,\n -8.8836e-17, -5.3660e-17, 2.2332e-19, -1.0391e-18, 2.0651e-19,\n -9.6792e-17, 1.0424e-19, -3.3265e-18, -3.2504e-17, 8.2079e-19,\n -8.4264e-18, -2.1189e-16, -1.9410e-18, -1.4059e-17, -1.1218e-16,\n -5.7319e-17, -2.6896e-19, -5.8944e-18, -1.7380e-17, -1.8436e-19,\n -1.1625e-16, 8.7861e-19, -8.9700e-19, -2.8370e-19, -1.2984e-19,\n 4.0412e-19, -3.0161e-19, -1.1878e-18, -1.8225e-20, 2.6375e-19,\n -1.9310e-16, 8.1585e-19, 1.5680e-18, 2.6011e-18, -2.9524e-19,\n -3.0669e-18, -1.3994e-16, 6.4005e-18, 4.0483e-19, -3.5147e-19,\n -4.6804e-19, -1.5736e-16, -5.0470e-18, 2.0799e-19, -2.1921e-19,\n 3.1107e-19, -2.6041e-16, 9.3369e-19, 1.9255e-18, -5.6294e-18,\n 3.4478e-19, -1.7948e-16, -6.0400e-19, 3.4252e-18, -1.4515e-16,\n -2.0858e-17, -1.3505e-17, -1.2809e-18, -2.0022e-16, -1.9310e-17,\n -4.7796e-19, -7.8468e-17, -9.6310e-18, -7.5724e-17, 2.1461e-19,\n -1.1882e-16, -9.5302e-17, -3.3990e-18, 2.1614e-20, -2.9226e-17,\n 2.4125e-18, 1.2708e-19, -5.6767e-17, 2.4483e-18, -9.1353e-18,\n -2.2124e-18, 5.5759e-19, -3.0422e-19, -1.3172e-17, 1.9288e-18,\n -5.7567e-18, -1.9434e-18, -5.9598e-18, -2.6174e-19, -1.2703e-18,\n 1.0858e-18, 2.9441e-19, 7.7891e-19, -8.6502e-17, 2.7211e-19,\n -1.1036e-19, -1.4751e-18, -1.1053e-16, -4.0741e-17, -6.8292e-17,\n 4.9682e-19, -1.6631e-16, -1.2790e-17, -1.3536e-16, -9.8230e-17,\n -1.2561e-19, -1.5627e-16, 5.0773e-19, -8.6747e-17, -6.1772e-17,\n -4.1791e-19, 1.0836e-18, -3.4294e-18, 7.9862e-20, -3.9178e-18,\n -5.2846e-18, -2.7308e-17, 6.8505e-18, -5.4431e-19, -1.1606e-18,\n -8.4643e-20, -8.0648e-19, -1.7404e-16, -1.4904e-16, -1.6176e-17,\n -2.5370e-17, -7.6688e-19, 1.1635e-19, 6.6873e-19, 1.0887e-18,\n -8.1692e-19, -8.9338e-17, 3.8514e-18, -5.2865e-17, -5.1359e-18,\n -1.7226e-17, -7.9306e-19, -1.1087e-17, -2.8723e-17, -4.1936e-19,\n -7.1618e-19, -1.3582e-21, 1.1146e-17, 1.1377e-19, 2.7137e-19,\n -2.2038e-19, -6.7899e-17, 5.7078e-19, -5.1056e-17, -2.5207e-18,\n -2.9013e-18, 7.5897e-20, 9.8081e-19, 4.3500e-19, -6.1355e-17,\n -3.0575e-20], device='cuda:0')", - "exp_avg_sq": "tensor([1.3918e-09, 1.7365e-10, 1.6396e-12, 5.6503e-09, 1.7191e-11, 1.4545e-11,\n 6.8650e-10, 3.5493e-12, 1.5234e-12, 9.8510e-09, 1.3715e-10, 7.4841e-10,\n 1.7982e-08, 6.1003e-10, 2.9852e-12, 2.4121e-09, 2.1220e-12, 3.3550e-12,\n 3.7225e-12, 1.5489e-11, 4.0558e-13, 1.3155e-13, 1.0510e-11, 1.2929e-09,\n 2.1417e-12, 4.9916e-08, 7.9850e-13, 1.7416e-08, 6.8230e-09, 1.2845e-11,\n 7.2288e-13, 4.8120e-09, 4.7590e-12, 1.4118e-10, 1.5915e-13, 3.8831e-12,\n 1.4602e-11, 1.8591e-09, 2.2881e-12, 2.1259e-11, 1.0823e-12, 3.2018e-12,\n 2.0332e-12, 4.9943e-13, 7.1707e-10, 7.1311e-11, 7.6353e-09, 1.9321e-08,\n 1.3955e-09, 6.0778e-10, 7.2080e-09, 5.3325e-12, 6.6249e-13, 4.3415e-11,\n 2.4583e-09, 5.1119e-10, 5.5754e-12, 1.9512e-13, 1.0122e-09, 1.1696e-08,\n 4.2307e-11, 4.2370e-13, 1.5476e-09, 1.1102e-08, 3.7571e-12, 5.1438e-12,\n 2.2582e-11, 1.6901e-10, 1.5417e-09, 4.9161e-11, 1.5954e-12, 8.3668e-12,\n 4.4558e-14, 2.4536e-12, 2.0366e-08, 6.8014e-13, 6.7087e-14, 1.9960e-13,\n 5.7946e-12, 1.9692e-10, 8.1964e-14, 5.2532e-13, 1.1422e-09, 8.8893e-14,\n 3.7558e-11, 3.7259e-12, 1.3989e-09, 2.8398e-10, 7.9920e-11, 2.7708e-09,\n 1.9654e-09, 2.0293e-10, 1.8855e-12, 5.2256e-13, 7.8694e-11, 5.7434e-11,\n 6.7583e-10, 1.2048e-09, 2.6820e-13, 4.7302e-13, 3.5769e-10, 2.2733e-12,\n 1.8863e-08, 3.9004e-12, 2.6364e-10, 1.9103e-08, 1.8111e-12, 2.4490e-13,\n 2.0014e-11, 1.2107e-11, 5.3612e-09, 3.8030e-12, 2.4166e-14, 1.7733e-09,\n 3.0766e-11, 8.1521e-10, 3.2321e-08, 1.9743e-11, 9.2253e-10, 3.3205e-09,\n 5.4209e-09, 3.2319e-12, 1.2812e-13, 9.7856e-13, 1.7693e-11, 1.0763e-08,\n 5.2104e-13, 6.7554e-10, 2.1164e-12, 2.0891e-09, 3.1601e-12, 1.5493e-11,\n 9.8562e-14, 2.8910e-10, 1.8891e-12, 8.9193e-09, 3.4031e-11, 1.0588e-12,\n 1.7581e-13, 1.5704e-12, 1.4703e-10, 7.6673e-09, 1.0772e-13, 5.2863e-12,\n 7.6568e-12, 2.4242e-11, 8.9621e-09, 9.6012e-11, 5.3793e-11, 2.2913e-11,\n 2.9851e-12, 5.3566e-08, 1.4344e-14, 6.6512e-11, 7.2035e-13, 5.9728e-11,\n 1.5802e-08, 6.2977e-13, 8.4790e-12, 3.3515e-09, 1.7893e-09, 2.6123e-12,\n 1.6830e-12, 2.6987e-08, 5.6612e-10, 6.3612e-12, 4.9432e-10, 1.9545e-09,\n 8.9688e-09, 8.3356e-11, 5.2685e-09, 3.2132e-09, 6.3315e-10, 1.6005e-11,\n 5.7419e-10, 1.5763e-14, 5.8589e-12, 1.7070e-09, 1.9674e-11, 3.7754e-10,\n 2.3504e-09, 2.0283e-12, 1.1362e-11, 1.2342e-09, 3.3889e-13, 1.2520e-09,\n 9.1536e-12, 2.3151e-10, 3.5239e-11, 8.3078e-11, 1.2608e-13, 3.0382e-14,\n 7.2111e-13, 4.1751e-09, 5.7882e-11, 8.9653e-12, 2.7828e-11, 4.4056e-09,\n 1.1389e-10, 5.6232e-09, 1.9661e-14, 4.4326e-08, 1.5924e-09, 1.8397e-09,\n 7.7280e-10, 5.2606e-10, 7.8499e-09, 1.5266e-13, 3.1535e-09, 5.1306e-10,\n 1.8875e-09, 2.5430e-12, 3.0589e-14, 2.4128e-13, 3.5869e-11, 4.5310e-10,\n 4.6486e-10, 4.9160e-13, 4.5780e-12, 5.9001e-14, 2.5224e-10, 3.7384e-10,\n 1.2894e-08, 7.5428e-09, 1.1463e-09, 2.0091e-09, 1.5854e-13, 2.6493e-11,\n 1.4572e-12, 7.9891e-11, 9.7793e-10, 4.5192e-09, 2.6947e-13, 4.4736e-11,\n 7.5383e-15, 1.6574e-09, 3.1482e-11, 9.7985e-09, 3.0423e-12, 2.9751e-11,\n 3.6650e-11, 7.2058e-10, 2.2150e-12, 1.9128e-11, 2.8245e-11, 6.9330e-12,\n 4.1733e-11, 3.7169e-12, 9.5226e-11, 6.5169e-11, 4.1978e-14, 6.2325e-11,\n 2.5800e-11, 5.4639e-12, 1.7056e-10, 5.9511e-11], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-2.4159e-18, -4.5582e-18, 1.9728e-19, -1.1200e-16, -8.6550e-19,\n -1.9159e-19, -3.7369e-17, -6.4106e-19, -1.4458e-17, -1.4080e-16,\n -4.4118e-17, 7.6363e-20, -1.6643e-16, -2.1112e-17, 2.8057e-19,\n -7.0573e-17, -1.9029e-17, -8.6035e-20, -4.8383e-19, -3.5845e-19,\n -2.0038e-18, 3.8252e-19, -2.7737e-19, -3.9356e-17, -1.2695e-18,\n -2.0438e-16, 1.9449e-18, -1.5601e-16, -1.2764e-16, -8.3684e-19,\n -3.5344e-18, -1.0671e-16, -8.7643e-18, -5.5718e-19, 3.1193e-18,\n -3.9555e-19, -5.0696e-19, -8.7052e-20, -7.0800e-19, 1.1478e-19,\n 7.3225e-19, -2.3973e-17, -1.1031e-18, -1.1134e-18, -1.4308e-17,\n 2.3882e-18, -7.5883e-17, -1.6155e-16, -3.8915e-18, -3.9067e-17,\n -6.4825e-17, -1.0648e-20, -1.4496e-17, -2.2483e-19, -4.6179e-17,\n -1.1137e-17, 6.8694e-20, -2.1365e-18, -6.0110e-17, -1.5949e-16,\n -3.8641e-20, 7.2266e-20, -8.7370e-17, -1.6217e-16, -8.2252e-18,\n 1.3968e-19, -1.2347e-18, -1.0605e-19, -2.2459e-17, 2.1074e-19,\n -1.1295e-17, -8.9759e-19, -4.8381e-19, -2.3685e-18, -1.3699e-16,\n 6.4828e-19, -4.0168e-18, 3.5284e-18, 1.7395e-19, -4.2165e-18,\n -4.0950e-18, -1.0540e-17, -4.1091e-18, -1.9562e-18, -4.5862e-17,\n 3.7697e-19, -1.5923e-17, 2.2880e-19, 2.9372e-18, -3.8268e-17,\n -3.1942e-17, -5.1268e-20, -4.5107e-18, -3.9244e-19, -3.0391e-18,\n 8.3788e-20, -2.6643e-19, -9.8214e-17, -3.6621e-18, -1.1238e-18,\n -1.6843e-17, -4.6905e-18, -9.4145e-17, -5.1457e-20, -7.8994e-20,\n -1.0337e-16, -3.1249e-17, 2.2055e-19, -5.7722e-19, 4.2454e-19,\n -8.9640e-17, -7.1371e-20, -2.5695e-18, -3.5806e-17, -2.1421e-19,\n -6.5839e-18, -2.1122e-16, -3.4651e-19, -1.4273e-17, -9.9350e-17,\n -5.6400e-17, -2.5498e-19, -4.7126e-18, -1.6184e-17, 1.9922e-19,\n -1.0474e-16, 9.3424e-19, -6.1823e-19, -1.1193e-19, -1.6270e-19,\n 9.2581e-20, 1.7542e-19, -9.8854e-19, 1.3139e-19, 5.0400e-19,\n -1.6121e-16, 4.4000e-19, 1.2089e-18, 1.7364e-18, -1.2911e-19,\n -3.0360e-18, -1.1435e-16, 5.3175e-18, -5.0530e-19, -2.3407e-19,\n 1.3837e-21, -1.2952e-16, -4.4381e-18, -4.6703e-20, 8.2186e-21,\n 8.8253e-20, -2.1662e-16, 6.2503e-19, 7.8482e-19, -3.7514e-18,\n 2.2009e-19, -1.4825e-16, -4.6624e-19, 3.3155e-18, -1.2022e-16,\n -2.2032e-17, -1.0885e-17, -1.0569e-18, -1.7939e-16, -1.6790e-17,\n -4.4479e-19, -6.2355e-17, -8.5865e-18, -7.6541e-17, -8.6061e-20,\n -1.0679e-16, -8.6312e-17, -2.5875e-18, -7.1197e-20, -2.5360e-17,\n 1.9524e-18, -5.5978e-20, -5.1909e-17, -1.1805e-19, -1.0995e-17,\n -2.4153e-18, 6.5257e-19, -3.3603e-19, -1.1175e-17, 1.5131e-18,\n -8.5536e-18, -1.1919e-18, -5.4754e-18, 6.0949e-19, -1.7076e-18,\n 1.1555e-18, -6.6322e-20, 3.6369e-20, -7.2438e-17, 8.9940e-19,\n 4.2155e-19, -7.6376e-19, -9.9527e-17, -4.4435e-17, -6.2285e-17,\n 8.2943e-19, -1.5577e-16, -1.1071e-17, -1.0507e-16, -8.1703e-17,\n 2.9017e-20, -1.5425e-16, 9.2013e-20, -8.1331e-17, -5.7905e-17,\n -3.7686e-19, 5.4390e-19, -2.6368e-18, -1.5417e-19, -3.0067e-18,\n -3.5538e-18, -2.3505e-17, 4.3022e-18, -4.1120e-19, -7.1821e-19,\n -3.3786e-20, -7.2672e-19, -1.5328e-16, -1.2460e-16, -1.4473e-17,\n -1.9889e-17, -8.5632e-19, -1.5482e-19, -3.3238e-19, 2.7296e-20,\n -4.3738e-19, -7.1797e-17, 3.1785e-18, -4.6574e-17, -2.9271e-18,\n -1.6935e-17, -6.6600e-19, -1.2068e-17, -2.9008e-17, -6.8737e-19,\n 1.3034e-19, -2.2318e-20, 9.7130e-18, -7.9998e-20, 2.7970e-19,\n -1.7290e-19, -5.6835e-17, 7.5732e-20, -4.3603e-17, -2.4800e-18,\n -2.0597e-18, -4.2637e-19, -6.1979e-20, -3.2478e-19, -4.2814e-17,\n -1.6031e-20], device='cuda:0')", + "exp_avg_sq": "tensor([3.9772e-10, 4.9621e-11, 4.6852e-13, 1.6146e-09, 4.9125e-12, 4.1562e-12,\n 1.9617e-10, 1.0142e-12, 4.3533e-13, 2.8150e-09, 3.9191e-11, 2.1386e-10,\n 5.1385e-09, 1.7432e-10, 8.5305e-13, 6.8928e-10, 6.0639e-13, 9.5871e-13,\n 1.0637e-12, 4.4260e-12, 1.1590e-13, 3.7592e-14, 3.0034e-12, 3.6945e-10,\n 6.1201e-13, 1.4264e-08, 2.2818e-13, 4.9769e-09, 1.9497e-09, 3.6705e-12,\n 2.0657e-13, 1.3751e-09, 1.3599e-12, 4.0342e-11, 4.5477e-14, 1.1096e-12,\n 4.1726e-12, 5.3124e-10, 6.5383e-13, 6.0749e-12, 3.0928e-13, 9.1495e-13,\n 5.8102e-13, 1.4272e-13, 2.0491e-10, 2.0378e-11, 2.1818e-09, 5.5211e-09,\n 3.9877e-10, 1.7368e-10, 2.0598e-09, 1.5238e-12, 1.8931e-13, 1.2406e-11,\n 7.0248e-10, 1.4608e-10, 1.5932e-12, 5.5757e-14, 2.8926e-10, 3.3422e-09,\n 1.2089e-11, 1.2108e-13, 4.4223e-10, 3.1724e-09, 1.0736e-12, 1.4699e-12,\n 6.4529e-12, 4.8295e-11, 4.4054e-10, 1.4048e-11, 4.5590e-13, 2.3909e-12,\n 1.2733e-14, 7.0113e-13, 5.8197e-09, 1.9436e-13, 1.9171e-14, 5.7039e-14,\n 1.6559e-12, 5.6271e-11, 2.3422e-14, 1.5011e-13, 3.2640e-10, 2.5402e-14,\n 1.0732e-11, 1.0647e-12, 3.9975e-10, 8.1149e-11, 2.2838e-11, 7.9179e-10,\n 5.6163e-10, 5.7990e-11, 5.3880e-13, 1.4932e-13, 2.2487e-11, 1.6412e-11,\n 1.9312e-10, 3.4428e-10, 7.6639e-14, 1.3517e-13, 1.0221e-10, 6.4963e-13,\n 5.3902e-09, 1.1146e-12, 7.5336e-11, 5.4589e-09, 5.1752e-13, 6.9981e-14,\n 5.7192e-12, 3.4596e-12, 1.5320e-09, 1.0867e-12, 6.9055e-15, 5.0674e-10,\n 8.7916e-12, 2.3295e-10, 9.2359e-09, 5.6418e-12, 2.6362e-10, 9.4885e-10,\n 1.5491e-09, 9.2354e-13, 3.6611e-14, 2.7963e-13, 5.0560e-12, 3.0757e-09,\n 1.4889e-13, 1.9304e-10, 6.0477e-13, 5.9698e-10, 9.0301e-13, 4.4273e-12,\n 2.8165e-14, 8.2613e-11, 5.3984e-13, 2.5487e-09, 9.7247e-12, 3.0256e-13,\n 5.0239e-14, 4.4875e-13, 4.2016e-11, 2.1910e-09, 3.0780e-14, 1.5106e-12,\n 2.1880e-12, 6.9273e-12, 2.5610e-09, 2.7436e-11, 1.5372e-11, 6.5477e-12,\n 8.5301e-13, 1.5307e-08, 4.0988e-15, 1.9006e-11, 2.0585e-13, 1.7068e-11,\n 4.5154e-09, 1.7996e-13, 2.4230e-12, 9.5772e-10, 5.1132e-10, 7.4647e-13,\n 4.8093e-13, 7.7117e-09, 1.6177e-10, 1.8177e-12, 1.4126e-10, 5.5852e-10,\n 2.5629e-09, 2.3820e-11, 1.5055e-09, 9.1821e-10, 1.8093e-10, 4.5736e-12,\n 1.6408e-10, 4.5043e-15, 1.6742e-12, 4.8778e-10, 5.6220e-12, 1.0789e-10,\n 6.7165e-10, 5.7959e-13, 3.2467e-12, 3.5269e-10, 9.6840e-14, 3.5777e-10,\n 2.6157e-12, 6.6156e-11, 1.0070e-11, 2.3740e-11, 3.6029e-14, 8.6819e-15,\n 2.0606e-13, 1.1931e-09, 1.6540e-11, 2.5619e-12, 7.9520e-12, 1.2589e-09,\n 3.2545e-11, 1.6069e-09, 5.6183e-15, 1.2667e-08, 4.5505e-10, 5.2570e-10,\n 2.2083e-10, 1.5032e-10, 2.2432e-09, 4.3624e-14, 9.0112e-10, 1.4661e-10,\n 5.3938e-10, 7.2668e-13, 8.7411e-15, 6.8947e-14, 1.0250e-11, 1.2948e-10,\n 1.3284e-10, 1.4048e-13, 1.3082e-12, 1.6860e-14, 7.2081e-11, 1.0683e-10,\n 3.6846e-09, 2.1554e-09, 3.2756e-10, 5.7411e-10, 4.5305e-14, 7.5705e-12,\n 4.1642e-13, 2.2829e-11, 2.7945e-10, 1.2914e-09, 7.7003e-14, 1.2784e-11,\n 2.1541e-15, 4.7361e-10, 8.9962e-12, 2.8000e-09, 8.6937e-13, 8.5015e-12,\n 1.0473e-11, 2.0591e-10, 6.3294e-13, 5.4659e-12, 8.0713e-12, 1.9812e-12,\n 1.1925e-11, 1.0621e-12, 2.7212e-11, 1.8622e-11, 1.1996e-14, 1.7810e-11,\n 7.3726e-12, 1.5614e-12, 4.8740e-11, 1.7006e-11], device='cuda:0')" }, "39": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-1.3189e-17, 6.2987e-19, 4.5097e-19, -8.3119e-17, -8.1951e-18,\n -6.1265e-18, -4.8659e-17, -1.2916e-18, -4.9064e-17, -1.0875e-16,\n -6.9347e-17, 2.2195e-18, -1.0912e-16, -3.5839e-17, -4.4326e-18,\n -5.8677e-17, -5.8714e-17, -2.3961e-18, 7.5960e-18, -1.5526e-17,\n 6.9516e-19, -2.4821e-18, -9.7923e-18, -4.3113e-17, -7.2031e-19,\n -1.1065e-16, 2.0077e-18, -1.0753e-16, -8.6467e-17, -8.3791e-19,\n 2.7561e-18, -1.0512e-16, -2.1411e-17, 5.3158e-18, -2.7889e-18,\n 7.4079e-19, -1.2313e-17, -1.1487e-17, -2.8089e-18, -5.2208e-19,\n -1.2738e-17, -5.3829e-17, 6.6848e-18, 6.7008e-18, -4.1833e-17,\n 1.1125e-19, -5.0251e-17, -1.0226e-16, -1.5766e-17, -4.8027e-17,\n -4.3657e-17, 4.2888e-19, -4.2791e-17, -1.0649e-17, -5.7821e-17,\n -2.9474e-17, -1.4006e-18, 2.3105e-18, -6.6033e-17, -9.2096e-17,\n -3.9799e-18, 1.6390e-18, -9.8007e-17, -1.1297e-16, -3.5300e-17,\n -1.2656e-18, -6.8754e-18, 5.6319e-19, -4.9871e-17, 6.9791e-19,\n -3.4019e-17, 1.1149e-17, -2.8202e-18, 1.6196e-18, -8.3983e-17,\n -7.5009e-18, 2.3773e-18, -3.4101e-18, 4.9433e-19, 1.3270e-18,\n 3.5337e-18, -4.4797e-17, -2.3886e-17, 7.0788e-19, -7.9300e-17,\n -4.1507e-18, -3.4213e-17, 6.5101e-18, 1.2535e-18, -5.5144e-17,\n -3.4591e-17, -2.3431e-18, 3.4267e-18, -2.2417e-18, 2.4767e-18,\n -1.6030e-17, 3.1119e-18, -9.9578e-17, 2.9530e-18, -2.0835e-17,\n -2.3566e-17, -1.1207e-17, -5.6125e-17, -7.9286e-18, -3.1358e-19,\n -4.6876e-17, -7.8372e-17, 4.1127e-20, -1.2984e-17, 4.0576e-18,\n -6.2648e-17, 3.0693e-18, 2.5264e-18, -2.9401e-17, -1.2053e-17,\n -3.3345e-17, -9.6664e-17, 7.2330e-18, -2.8006e-17, -7.0250e-17,\n -6.7440e-17, -2.6100e-18, 4.3387e-18, -5.1635e-17, 3.5297e-18,\n -7.0946e-17, -6.4047e-19, -5.1170e-18, 2.5580e-19, -4.2319e-18,\n 2.9986e-18, -2.1812e-17, 8.5254e-19, -5.2947e-18, -2.7208e-18,\n -1.1573e-16, -9.3958e-18, 2.4764e-19, -1.3117e-18, 3.4908e-18,\n -1.8218e-17, -9.0102e-17, -4.7714e-18, -1.7469e-17, 1.1506e-19,\n -1.2039e-17, -9.3851e-17, 9.5158e-19, -3.9709e-18, -1.0712e-18,\n -2.7177e-18, -1.2503e-16, -6.8208e-19, 2.5833e-17, 3.9434e-18,\n -1.8217e-19, -1.1097e-16, -6.9954e-18, -2.5016e-18, -9.3300e-17,\n -5.0352e-17, 1.0372e-17, -7.0589e-19, -1.0000e-16, -2.5389e-17,\n -3.3641e-18, -7.1254e-17, -2.0625e-17, -4.5006e-17, -3.9354e-18,\n -7.6945e-17, -6.4931e-17, -8.9785e-18, -4.0624e-19, -4.4880e-17,\n -1.5559e-18, -3.3267e-18, -4.6998e-17, 1.6827e-17, -1.3866e-17,\n -1.8648e-17, -2.6587e-18, 4.4260e-18, -3.0914e-17, -1.7794e-18,\n -3.5723e-17, -2.1692e-19, -2.4630e-17, 5.2681e-18, -8.4842e-19,\n -6.9152e-19, -2.2278e-18, -5.7160e-19, -5.9452e-17, 1.8723e-18,\n -6.6908e-18, -4.8019e-18, -6.9599e-17, -8.0538e-17, -4.9220e-17,\n 1.3452e-18, -8.6737e-17, -2.4451e-17, -1.0998e-16, -9.1017e-17,\n 1.0645e-18, -9.1392e-17, -1.4960e-18, -6.1300e-17, -6.8233e-17,\n -1.7770e-18, -6.7351e-18, 2.2256e-18, -1.2441e-18, -1.6708e-17,\n -6.2477e-18, -3.1365e-17, -5.5082e-18, 3.0186e-18, 8.7954e-19,\n -1.3032e-19, -2.4533e-17, -8.5505e-17, -1.0471e-16, -4.3753e-17,\n -5.5746e-17, -1.8655e-18, -5.7252e-19, -8.5038e-18, -6.6529e-18,\n -2.2484e-18, -5.8275e-17, -2.6560e-18, -7.1772e-17, 3.7489e-18,\n -2.9405e-17, 9.3621e-18, -2.5177e-17, -5.3938e-17, -3.0752e-19,\n -3.4803e-18, -5.2997e-19, -8.4828e-18, 9.8178e-19, 5.5260e-18,\n -5.7921e-18, -8.2567e-17, -3.5122e-18, -6.9007e-17, -2.6122e-19,\n 1.5600e-18, -1.7796e-18, -3.7676e-18, -6.9372e-18, -6.8636e-17,\n 1.3983e-19], device='cuda:0')", - "exp_avg_sq": "tensor([1.4116e-09, 7.5573e-11, 8.5939e-11, 3.5195e-09, 5.8966e-12, 3.7759e-11,\n 1.0629e-09, 1.2786e-12, 1.1737e-10, 5.1018e-09, 1.1884e-09, 1.6496e-09,\n 7.6997e-09, 8.8354e-10, 4.6698e-11, 2.8537e-09, 2.9989e-10, 1.3321e-10,\n 8.6193e-11, 7.8054e-12, 2.8102e-12, 3.3714e-14, 4.3479e-12, 2.1660e-09,\n 5.4886e-13, 1.9492e-08, 3.8560e-11, 1.2431e-08, 3.1900e-09, 1.7294e-10,\n 1.0761e-10, 5.2132e-09, 6.3981e-10, 6.1678e-10, 7.5171e-11, 1.1149e-10,\n 6.3259e-12, 3.2933e-09, 1.3483e-12, 9.9640e-12, 2.8055e-13, 3.3718e-10,\n 4.2583e-13, 3.4559e-14, 1.1053e-09, 2.5212e-09, 2.6017e-09, 1.0635e-08,\n 9.9652e-10, 1.1362e-09, 3.2657e-09, 3.0404e-10, 1.5721e-10, 2.0666e-11,\n 1.4866e-09, 3.5141e-10, 1.8184e-12, 1.2145e-11, 2.3588e-09, 4.2313e-09,\n 1.7016e-10, 1.5768e-12, 5.8108e-09, 8.4704e-09, 1.5542e-11, 3.2789e-12,\n 1.0714e-11, 1.0887e-09, 1.3288e-09, 3.7860e-10, 1.6195e-10, 4.3738e-12,\n 4.6128e-11, 1.6726e-10, 8.3446e-09, 4.5389e-14, 8.6916e-12, 9.4192e-12,\n 1.7783e-10, 7.6615e-11, 1.3220e-11, 8.1687e-11, 1.2424e-09, 7.7116e-13,\n 5.7761e-10, 6.7640e-11, 9.5945e-10, 4.9618e-10, 2.7020e-11, 1.5702e-09,\n 6.2515e-09, 5.0748e-10, 6.8738e-12, 2.3108e-13, 4.0368e-11, 2.6366e-11,\n 1.9717e-09, 2.3354e-09, 6.6675e-11, 1.7280e-10, 1.1774e-09, 5.3943e-10,\n 9.1860e-09, 1.8817e-12, 1.1348e-09, 8.5175e-09, 2.3158e-10, 8.3103e-14,\n 1.0007e-11, 6.1790e-12, 3.4681e-09, 1.6573e-13, 3.9322e-13, 2.0813e-09,\n 1.6352e-11, 1.0549e-09, 9.2968e-09, 1.8982e-12, 7.2505e-10, 2.3917e-09,\n 2.4035e-09, 6.9813e-11, 8.7948e-12, 1.1797e-10, 5.0928e-11, 6.6474e-09,\n 1.0624e-10, 7.1422e-10, 1.2981e-10, 3.8800e-09, 1.5184e-12, 7.6799e-12,\n 3.6954e-11, 8.6337e-10, 5.1848e-13, 5.8060e-09, 3.2179e-10, 6.6526e-14,\n 6.6015e-11, 6.4902e-12, 1.5133e-10, 7.3602e-09, 5.6611e-13, 2.4317e-12,\n 6.6484e-13, 1.2420e-11, 6.6391e-09, 3.2885e-11, 2.1396e-11, 1.0218e-10,\n 4.1260e-13, 2.0373e-08, 5.8352e-13, 3.0584e-11, 1.8870e-12, 7.9854e-10,\n 1.4580e-08, 1.0939e-10, 3.1354e-11, 3.1842e-09, 1.0066e-09, 5.9480e-12,\n 9.4753e-13, 1.0776e-08, 1.8756e-09, 3.0752e-12, 1.0372e-09, 1.0345e-09,\n 3.6701e-09, 4.1624e-10, 5.3986e-09, 3.1070e-09, 5.3127e-10, 8.3943e-12,\n 7.9428e-10, 1.6403e-12, 2.9424e-12, 2.8218e-09, 9.6224e-12, 1.5164e-09,\n 2.1875e-09, 6.9351e-13, 2.0752e-10, 9.3458e-10, 6.3441e-11, 1.9868e-09,\n 3.1230e-12, 4.1752e-10, 1.5241e-11, 1.4291e-09, 8.9521e-12, 3.1487e-11,\n 3.7391e-11, 5.8088e-09, 6.9520e-10, 4.3597e-12, 1.1521e-11, 3.0147e-09,\n 1.1875e-09, 2.4531e-09, 5.1626e-12, 2.0797e-08, 8.8933e-10, 4.5720e-09,\n 2.6239e-09, 1.2517e-09, 5.5360e-09, 1.1312e-14, 3.5702e-09, 3.3627e-09,\n 1.4201e-09, 9.3792e-13, 2.9004e-12, 4.2973e-13, 3.2908e-10, 2.1122e-09,\n 1.0968e-09, 4.9029e-13, 8.9747e-13, 3.7193e-13, 1.0336e-09, 7.0664e-10,\n 4.4861e-09, 7.5323e-09, 1.1134e-09, 2.2158e-09, 2.3767e-11, 1.6563e-10,\n 1.8897e-10, 2.3868e-10, 1.7823e-09, 4.5767e-09, 2.5759e-10, 1.5490e-09,\n 5.0269e-12, 9.2920e-10, 1.3070e-11, 4.0916e-09, 6.4592e-10, 9.4388e-12,\n 1.8673e-10, 3.0836e-09, 1.8065e-10, 1.0049e-11, 1.3398e-11, 3.2031e-12,\n 1.4641e-09, 1.7256e-12, 7.0309e-10, 2.2322e-11, 4.6179e-12, 8.8384e-10,\n 2.7150e-10, 2.5663e-12, 2.4530e-09, 3.1073e-10], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-1.3363e-17, 7.7465e-19, -2.6205e-19, -6.4591e-17, -5.0939e-18,\n -4.4247e-18, -4.1022e-17, -4.7286e-19, -4.7294e-17, -8.5933e-17,\n -6.5115e-17, 1.5106e-18, -8.9125e-17, -3.0766e-17, -3.2411e-18,\n -5.1123e-17, -4.6056e-17, 2.5297e-18, 7.0084e-18, -1.3420e-17,\n 7.6120e-19, -1.8739e-18, -7.8178e-18, -3.1563e-17, -1.1308e-19,\n -9.9355e-17, 1.1607e-18, -9.3207e-17, -8.0357e-17, -9.2268e-19,\n 2.5172e-18, -8.3877e-17, -1.9218e-17, -5.1185e-19, -2.1171e-18,\n 2.6135e-20, -7.8958e-18, -9.6095e-18, -1.1313e-18, -1.5195e-18,\n -5.3207e-18, -4.2514e-17, 4.8320e-18, 6.7584e-18, -4.4632e-17,\n 9.6737e-19, -4.7303e-17, -9.0670e-17, -1.3387e-17, -3.6902e-17,\n -4.2299e-17, 9.6800e-20, -3.2944e-17, -7.4101e-18, -4.3303e-17,\n -2.4580e-17, -4.4366e-19, 1.5354e-18, -5.1634e-17, -7.7924e-17,\n -2.0714e-18, 4.4679e-19, -8.4228e-17, -9.9233e-17, -3.5838e-17,\n -1.0586e-18, -8.1708e-18, 7.4764e-19, -3.9014e-17, 1.8076e-18,\n -3.1507e-17, 9.6696e-18, -1.9939e-18, 7.5809e-19, -7.1842e-17,\n -3.8159e-18, 2.5672e-18, -2.0708e-18, 7.3875e-19, 2.2580e-18,\n 3.0224e-18, -3.7587e-17, -2.3469e-17, 9.2459e-19, -6.0483e-17,\n -4.8024e-18, -3.2854e-17, 3.7709e-18, 8.5185e-19, -4.8873e-17,\n -3.1054e-17, -1.6399e-18, 2.1815e-18, -1.3570e-18, 1.0999e-18,\n -1.2171e-17, 4.6147e-18, -8.0628e-17, 6.8883e-19, -1.6341e-17,\n -2.4140e-17, -1.1730e-17, -5.0277e-17, -5.1343e-18, -4.9333e-19,\n -5.4331e-17, -5.7605e-17, -3.4399e-20, -1.1026e-17, 5.1849e-18,\n -5.6403e-17, 3.5788e-18, 1.9379e-18, -3.2186e-17, -9.2472e-18,\n -2.6148e-17, -9.7589e-17, 4.2607e-18, -2.6509e-17, -6.1824e-17,\n -6.4128e-17, -2.6120e-18, 3.1793e-18, -4.4638e-17, 4.0856e-18,\n -6.1108e-17, -6.0428e-19, -4.6202e-18, 1.3472e-19, -4.6186e-18,\n 2.2472e-18, -1.6479e-17, 5.9676e-19, -4.4875e-18, -3.9639e-18,\n -9.8369e-17, -7.4563e-18, 5.4554e-19, -1.6266e-19, 3.0340e-18,\n -1.7751e-17, -7.7866e-17, -3.9939e-18, -1.3788e-17, -1.9831e-19,\n -9.3826e-18, -7.9630e-17, 5.7353e-20, -2.8270e-18, -1.5952e-18,\n -2.0487e-18, -1.0659e-16, -3.8278e-19, 2.0837e-17, 2.2481e-18,\n 1.6153e-19, -9.5280e-17, -8.2340e-18, -2.3301e-18, -8.0247e-17,\n -4.3747e-17, 7.6203e-18, -6.7298e-19, -9.0185e-17, -2.0914e-17,\n -3.3231e-18, -5.8019e-17, -1.9322e-17, -4.4449e-17, -4.3036e-18,\n -7.4651e-17, -5.8161e-17, -1.0440e-17, 2.5448e-19, -3.6313e-17,\n -1.0714e-18, -2.2268e-18, -4.3625e-17, 8.4065e-18, -1.5873e-17,\n -1.4401e-17, -3.7034e-18, 4.4828e-18, -2.7269e-17, -8.9072e-19,\n -3.6275e-17, -1.0596e-18, -2.3727e-17, 4.7426e-18, -3.9164e-19,\n -7.7340e-19, -2.5498e-18, -7.4628e-20, -5.3085e-17, 1.3260e-19,\n -7.2322e-18, -7.3567e-18, -6.2240e-17, -7.0876e-17, -4.3626e-17,\n 1.5505e-18, -8.1493e-17, -2.1476e-17, -8.9340e-17, -7.5013e-17,\n 1.1809e-18, -8.9924e-17, -5.5138e-19, -5.7843e-17, -6.0599e-17,\n -2.0294e-18, -5.4681e-18, 1.6296e-18, -1.2114e-18, -1.6101e-17,\n -3.3142e-18, -2.6699e-17, -3.0608e-18, 2.2085e-18, 1.7653e-19,\n -6.3091e-19, -2.2101e-17, -7.6207e-17, -8.2847e-17, -3.8843e-17,\n -4.7961e-17, -6.6053e-19, -2.6847e-19, -7.4281e-18, -4.6162e-18,\n -4.5956e-18, -5.0971e-17, -2.0189e-18, -6.2415e-17, 2.0806e-18,\n -2.8176e-17, 7.1870e-18, -2.2650e-17, -4.8278e-17, 2.3624e-19,\n -4.5733e-18, -6.1282e-19, -6.9461e-18, 6.8193e-19, 5.0108e-18,\n -4.6071e-18, -7.4579e-17, -2.7169e-18, -5.5976e-17, -6.6015e-19,\n 9.3165e-19, -1.2375e-19, -5.8993e-18, -4.2975e-18, -5.2455e-17,\n 1.3236e-19], device='cuda:0')", + "exp_avg_sq": "tensor([4.0338e-10, 2.1595e-11, 2.4558e-11, 1.0057e-09, 1.6850e-12, 1.0790e-11,\n 3.0373e-10, 3.6536e-13, 3.3540e-11, 1.4579e-09, 3.3961e-10, 4.7138e-10,\n 2.2002e-09, 2.5248e-10, 1.3344e-11, 8.1546e-10, 8.5696e-11, 3.8066e-11,\n 2.4630e-11, 2.2305e-12, 8.0302e-13, 9.6340e-15, 1.2424e-12, 6.1894e-10,\n 1.5684e-13, 5.5700e-09, 1.1019e-11, 3.5524e-09, 9.1158e-10, 4.9420e-11,\n 3.0751e-11, 1.4897e-09, 1.8283e-10, 1.7625e-10, 2.1481e-11, 3.1861e-11,\n 1.8077e-12, 9.4110e-10, 3.8529e-13, 2.8473e-12, 8.0170e-14, 9.6353e-11,\n 1.2168e-13, 9.8755e-15, 3.1584e-10, 7.2045e-10, 7.4346e-10, 3.0391e-09,\n 2.8476e-10, 3.2468e-10, 9.3319e-10, 8.6880e-11, 4.4923e-11, 5.9054e-12,\n 4.2481e-10, 1.0042e-10, 5.1961e-13, 3.4705e-12, 6.7403e-10, 1.2091e-09,\n 4.8625e-11, 4.5058e-13, 1.6605e-09, 2.4205e-09, 4.4412e-12, 9.3697e-13,\n 3.0616e-12, 3.1110e-10, 3.7970e-10, 1.0819e-10, 4.6278e-11, 1.2499e-12,\n 1.3182e-11, 4.7795e-11, 2.3845e-09, 1.2970e-14, 2.4837e-12, 2.6916e-12,\n 5.0817e-11, 2.1893e-11, 3.7778e-12, 2.3343e-11, 3.5503e-10, 2.2036e-13,\n 1.6506e-10, 1.9329e-11, 2.7417e-10, 1.4179e-10, 7.7210e-12, 4.4870e-10,\n 1.7864e-09, 1.4502e-10, 1.9643e-12, 6.6033e-14, 1.1536e-11, 7.5343e-12,\n 5.6343e-10, 6.6735e-10, 1.9053e-11, 4.9379e-11, 3.3645e-10, 1.5415e-10,\n 2.6250e-09, 5.3773e-13, 3.2429e-10, 2.4339e-09, 6.6177e-11, 2.3747e-14,\n 2.8594e-12, 1.7657e-12, 9.9103e-10, 4.7358e-14, 1.1236e-13, 5.9475e-10,\n 4.6728e-12, 3.0143e-10, 2.6566e-09, 5.4241e-13, 2.0719e-10, 6.8345e-10,\n 6.8680e-10, 1.9950e-11, 2.5132e-12, 3.3711e-11, 1.4553e-11, 1.8995e-09,\n 3.0359e-11, 2.0409e-10, 3.7095e-11, 1.1087e-09, 4.3388e-13, 2.1946e-12,\n 1.0560e-11, 2.4671e-10, 1.4816e-13, 1.6591e-09, 9.1955e-11, 1.9010e-14,\n 1.8864e-11, 1.8546e-12, 4.3243e-11, 2.1032e-09, 1.6177e-13, 6.9488e-13,\n 1.8998e-13, 3.5491e-12, 1.8972e-09, 9.3971e-12, 6.1139e-12, 2.9200e-11,\n 1.1790e-13, 5.8218e-09, 1.6675e-13, 8.7396e-12, 5.3922e-13, 2.2819e-10,\n 4.1663e-09, 3.1260e-11, 8.9596e-12, 9.0992e-10, 2.8766e-10, 1.6997e-12,\n 2.7076e-13, 3.0793e-09, 5.3598e-10, 8.7875e-13, 2.9639e-10, 2.9563e-10,\n 1.0488e-09, 1.1894e-10, 1.5427e-09, 8.8784e-10, 1.5182e-10, 2.3987e-12,\n 2.2697e-10, 4.6872e-13, 8.4082e-13, 8.0634e-10, 2.7497e-12, 4.3331e-10,\n 6.2509e-10, 1.9818e-13, 5.9301e-11, 2.6706e-10, 1.8129e-11, 5.6774e-10,\n 8.9243e-13, 1.1931e-10, 4.3553e-12, 4.0838e-10, 2.5581e-12, 8.9977e-12,\n 1.0685e-11, 1.6599e-09, 1.9866e-10, 1.2458e-12, 3.2921e-12, 8.6148e-10,\n 3.3933e-10, 7.0099e-10, 1.4752e-12, 5.9428e-09, 2.5413e-10, 1.3065e-09,\n 7.4980e-10, 3.5770e-10, 1.5820e-09, 3.2326e-15, 1.0202e-09, 9.6091e-10,\n 4.0580e-10, 2.6802e-13, 8.2880e-13, 1.2280e-13, 9.4037e-11, 6.0357e-10,\n 3.1342e-10, 1.4010e-13, 2.5646e-13, 1.0628e-13, 2.9537e-10, 2.0193e-10,\n 1.2819e-09, 2.1524e-09, 3.1817e-10, 6.3319e-10, 6.7915e-12, 4.7329e-11,\n 5.3999e-11, 6.8205e-11, 5.0930e-10, 1.3078e-09, 7.3608e-11, 4.4263e-10,\n 1.4365e-12, 2.6553e-10, 3.7350e-12, 1.1692e-09, 1.8458e-10, 2.6972e-12,\n 5.3360e-11, 8.8115e-10, 5.1623e-11, 2.8717e-12, 3.8286e-12, 9.1530e-13,\n 4.1838e-10, 4.9311e-13, 2.0091e-10, 6.3787e-12, 1.3196e-12, 2.5256e-10,\n 7.7584e-11, 7.3333e-13, 7.0097e-10, 8.8792e-11], device='cuda:0')" }, "40": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-3.4433e-19, -5.0309e-21, -1.2386e-18, ..., 1.4215e-20,\n -1.3633e-19, 3.1612e-20],\n [-1.2378e-19, -8.9439e-21, 4.6083e-19, ..., 6.5519e-20,\n -3.6843e-21, -7.1758e-20],\n [ 9.7739e-19, 2.7977e-19, 6.7020e-19, ..., 1.3812e-18,\n 1.0631e-18, 7.0064e-19],\n ...,\n [-3.1036e-19, 1.1830e-19, 8.1323e-19, ..., 5.8955e-19,\n 2.8867e-19, 3.8287e-19],\n [ 3.2825e-18, -1.2996e-19, 1.7567e-18, ..., 1.3461e-18,\n -1.0839e-19, 2.7983e-19],\n [ 2.7983e-19, 2.8190e-20, -1.5539e-19, ..., 3.3339e-19,\n 2.5343e-19, 5.4018e-20]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.6324e-12, 7.3208e-13, 2.4516e-12, ..., 2.9491e-12, 3.0981e-12,\n 4.0743e-12],\n [9.5300e-14, 1.5121e-13, 7.8657e-14, ..., 8.8530e-14, 5.0477e-13,\n 2.1313e-13],\n [1.4399e-13, 1.9852e-14, 6.7080e-14, ..., 2.2159e-14, 1.3806e-13,\n 6.2202e-14],\n ...,\n [1.0330e-13, 1.1315e-14, 6.7229e-14, ..., 1.5070e-14, 8.0885e-14,\n 3.0744e-14],\n [8.1295e-12, 2.2108e-12, 5.8290e-12, ..., 6.5166e-12, 9.9966e-12,\n 1.1178e-11],\n [1.7471e-12, 5.1808e-13, 1.7289e-12, ..., 6.4581e-13, 2.3349e-12,\n 1.8234e-12]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-2.9358e-20, 6.9080e-21, -4.4647e-20, ..., 2.0321e-18,\n 1.1465e-20, -1.0596e-20],\n [-1.5498e-19, -3.6837e-20, -5.6970e-20, ..., -5.6016e-19,\n 3.5868e-21, -3.5050e-21],\n [ 7.5419e-19, 5.1390e-19, 2.9588e-19, ..., 1.3840e-18,\n 4.0671e-19, 2.4597e-19],\n ...,\n [ 3.6508e-19, 2.1126e-19, 5.3966e-20, ..., 1.6728e-19,\n 1.3646e-19, 1.2325e-19],\n [ 1.1473e-18, -1.7726e-20, 1.8149e-19, ..., 3.1768e-18,\n 1.7180e-19, 3.2438e-19],\n [ 2.4178e-19, 7.6346e-20, 5.9272e-21, ..., -1.1332e-18,\n 2.4086e-19, 4.8745e-20]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.5223e-13, 2.0920e-13, 7.0057e-13, ..., 8.4272e-13, 8.8530e-13,\n 1.1643e-12],\n [2.7233e-14, 4.3210e-14, 2.2477e-14, ..., 2.5298e-14, 1.4424e-13,\n 6.0903e-14],\n [4.1146e-14, 5.6729e-15, 1.9169e-14, ..., 6.3322e-15, 3.9453e-14,\n 1.7775e-14],\n ...,\n [2.9518e-14, 3.2333e-15, 1.9211e-14, ..., 4.3064e-15, 2.3114e-14,\n 8.7853e-15],\n [2.3231e-12, 6.3176e-13, 1.6657e-12, ..., 1.8622e-12, 2.8566e-12,\n 3.1942e-12],\n [4.9924e-13, 1.4804e-13, 4.9406e-13, ..., 1.8455e-13, 6.6721e-13,\n 5.2106e-13]], device='cuda:0')" }, "41": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-1.3195e-16, 1.0992e-17, 3.9106e-16, 7.0008e-18, -2.3726e-16,\n 1.1663e-16, 1.0906e-16, 4.4584e-16, -9.4106e-16, -6.1777e-16,\n -6.8952e-17, -1.3533e-16, -9.3152e-16, 5.3517e-17, 9.4667e-16,\n -7.9834e-16, -1.5387e-15, 5.3303e-17, 6.4069e-16, 7.4571e-17,\n -1.5343e-16, -6.4293e-17, 1.3672e-16, -2.3691e-16, -6.5474e-17,\n -2.3257e-15, -9.7867e-17, -2.1059e-15, -1.8800e-15, -6.9622e-16,\n 4.4407e-16, -1.6710e-15, 1.0971e-15, 2.4713e-17, -6.6332e-17,\n 3.7870e-16, 1.9427e-17, 1.2221e-16, 7.0210e-16, -3.1599e-16,\n 4.3156e-17, -7.6312e-16, 8.6213e-16, 8.6890e-16, -4.5849e-17,\n 6.0551e-16, -9.7613e-18, -7.5893e-16, 5.0188e-16, -3.9525e-16,\n -5.7185e-18, -1.1461e-16, -3.8260e-16, 9.4392e-16, 2.5361e-18,\n 1.5343e-16, 1.2976e-15, -3.7110e-16, -2.2614e-16, 8.0699e-16,\n 3.2218e-16, 5.7730e-16, -8.0069e-16, -2.0288e-17, -1.5569e-17,\n 2.7887e-16, 3.9192e-17, -4.0147e-17, 5.4799e-17, -2.5833e-16,\n 3.4282e-17, -5.2611e-17, 7.2281e-16, -2.0519e-16, 1.2188e-15,\n -2.0242e-16, 1.8898e-17, 5.6120e-16, 1.3855e-15, -9.4073e-16,\n 3.8357e-16, 7.3716e-17, -8.9022e-18, -4.5093e-16, 1.3444e-16,\n -1.9207e-16, -1.9167e-17, -1.9961e-16, 1.4323e-15, 6.1827e-17,\n 1.0462e-15, -1.8491e-16, -1.1624e-15, 2.8956e-16, 6.2583e-16,\n 2.2476e-16, -1.8200e-16, 4.8500e-16, -9.5251e-16, 1.9451e-16,\n 8.9229e-17, -2.9673e-17, -1.0847e-16, 8.6079e-16, 3.6005e-17,\n -5.5068e-16, 1.6574e-16, -2.0244e-16, -1.1227e-15, -2.4063e-16,\n 1.3750e-16, -5.7429e-16, -7.0121e-20, 6.8619e-17, -2.0652e-17,\n -4.1401e-16, -4.4570e-16, 7.3356e-16, -2.1149e-17, -2.0007e-17,\n -5.7519e-17, 2.5785e-16, 8.1727e-16, -1.6938e-16, -1.4782e-17,\n -4.9923e-16, 1.2650e-15, -1.2454e-17, 2.0892e-16, 5.5830e-16,\n 3.1928e-16, -1.0595e-15, 3.4526e-16, -1.0600e-16, -3.2203e-17,\n -1.2462e-15, 9.6967e-16, 7.3735e-17, -1.7366e-16, -5.0025e-16,\n 5.3454e-16, 6.5032e-16, 8.3288e-16, -3.4456e-17, -1.7380e-16,\n -3.8322e-16, -7.1258e-16, -6.5501e-16, 7.2834e-17, 1.9240e-16,\n 1.0615e-16, -2.1355e-15, 5.3329e-16, 3.7548e-16, -1.7479e-16,\n 5.3073e-16, 5.7177e-18, 4.3954e-16, 4.5932e-16, -2.0599e-16,\n 1.9066e-16, -1.2807e-15, 2.3442e-17, -4.0519e-17, 2.4567e-16,\n 1.8652e-16, -4.9235e-17, 1.7157e-16, -1.0624e-16, 1.9428e-17,\n -1.2194e-16, -1.0382e-15, 3.3148e-16, 3.7815e-16, -1.2827e-16,\n -3.8536e-18, -1.0518e-16, -3.9785e-17, 2.7210e-16, 4.7958e-16,\n 7.1741e-17, 5.8119e-17, -3.3751e-16, -7.6594e-17, -1.3007e-16,\n 7.1331e-17, 9.7634e-17, 8.1538e-16, -2.0112e-16, 2.5941e-16,\n 7.3974e-16, 1.1477e-15, 5.9879e-16, -7.5187e-17, 1.7526e-16,\n -1.0763e-16, 2.8571e-16, -9.2213e-17, -1.0667e-15, 1.5502e-15,\n -2.8296e-16, -5.1307e-16, -1.5985e-16, -1.2425e-15, -4.9266e-16,\n -4.2355e-17, -1.3704e-15, 2.1632e-17, 1.3729e-16, 4.3584e-17,\n 1.8670e-16, 2.0691e-16, 7.1501e-16, -2.7331e-16, 3.9318e-17,\n 5.0342e-16, 2.6237e-16, 4.5787e-16, 7.6763e-16, 6.1267e-16,\n 6.8766e-17, 8.6832e-17, -4.4217e-17, -2.4765e-16, 6.0224e-17,\n 1.9119e-17, 6.8389e-18, 5.6683e-17, -1.4327e-15, -6.7733e-18,\n 2.7979e-16, -3.7315e-16, -4.0993e-16, 6.1563e-17, 2.0489e-16,\n 5.1626e-17, 4.1527e-16, -1.9230e-16, 5.4716e-16, 7.1246e-16,\n -3.4930e-16, 4.5646e-16, -5.3575e-16, 4.3364e-17, -3.2897e-16,\n 1.5068e-16, -6.0448e-17, 2.4064e-16, 9.1561e-17, -3.2784e-16,\n 2.2550e-16, -8.2072e-17, -1.3668e-16, 2.0008e-16, 2.2502e-16,\n 5.0156e-17], device='cuda:0')", - "exp_avg_sq": "tensor([6.1055e-07, 4.8697e-08, 2.3591e-08, 1.3647e-08, 1.6585e-06, 3.7979e-07,\n 3.8262e-08, 1.4474e-06, 4.3292e-06, 8.1857e-07, 5.6442e-10, 3.2007e-06,\n 4.4271e-06, 1.1697e-07, 1.4387e-07, 5.9490e-06, 1.3365e-05, 4.0866e-09,\n 4.4684e-07, 1.0826e-08, 5.2943e-06, 3.3418e-08, 5.4921e-07, 1.0976e-07,\n 1.5948e-07, 2.4045e-05, 7.2821e-07, 1.1407e-06, 1.1984e-06, 5.2391e-07,\n 2.6888e-05, 5.5432e-06, 3.9286e-06, 2.0647e-07, 1.2940e-06, 6.1706e-06,\n 9.1244e-08, 1.2919e-05, 2.1615e-06, 4.3460e-07, 3.3201e-07, 1.0800e-07,\n 5.5930e-07, 6.9191e-08, 3.1351e-06, 2.9704e-05, 2.6638e-09, 5.9547e-06,\n 2.7473e-06, 3.6162e-06, 1.8576e-07, 8.4708e-09, 6.9191e-06, 1.6800e-05,\n 8.3803e-09, 9.4556e-07, 4.6013e-07, 9.5536e-07, 6.5866e-08, 3.8099e-06,\n 1.1868e-07, 2.2819e-07, 9.0153e-06, 9.1942e-08, 3.9201e-08, 9.3314e-06,\n 3.8981e-07, 1.3816e-06, 3.7925e-08, 1.2811e-06, 9.1775e-08, 6.8340e-07,\n 2.9604e-06, 7.1036e-08, 1.1353e-05, 1.7970e-07, 1.8130e-08, 3.3694e-07,\n 1.0401e-05, 6.9363e-06, 2.7035e-08, 1.2029e-08, 6.5539e-07, 1.2520e-07,\n 1.0400e-06, 1.8545e-07, 4.9362e-08, 3.3908e-08, 2.0398e-07, 2.3030e-07,\n 1.6503e-05, 4.6494e-09, 1.7370e-06, 1.2330e-08, 1.1345e-05, 2.6972e-07,\n 9.2217e-07, 3.9712e-07, 2.0868e-06, 8.9464e-07, 1.3500e-07, 7.4810e-08,\n 7.8071e-09, 2.3830e-07, 1.7618e-06, 2.0794e-06, 1.0174e-06, 7.3395e-08,\n 3.0197e-06, 1.8030e-07, 5.6297e-07, 9.6164e-08, 4.6509e-07, 3.3989e-09,\n 9.8611e-08, 6.7695e-07, 1.1851e-06, 1.6024e-07, 4.0292e-07, 5.5765e-08,\n 7.8121e-08, 2.1388e-06, 5.0679e-07, 3.0268e-06, 2.0096e-07, 3.2266e-09,\n 8.1904e-06, 7.6907e-09, 3.9149e-06, 1.7462e-05, 3.1509e-07, 3.0059e-06,\n 1.5575e-07, 3.4174e-07, 2.9144e-06, 8.7198e-07, 1.1342e-07, 2.3053e-07,\n 9.8789e-08, 8.6847e-08, 1.5482e-06, 4.3889e-06, 5.7530e-08, 6.2244e-09,\n 5.5059e-08, 9.7269e-09, 3.2178e-06, 4.5551e-06, 4.0519e-08, 6.5126e-08,\n 1.9208e-06, 9.3137e-06, 5.7335e-07, 1.6517e-08, 4.0529e-06, 3.9037e-06,\n 1.9372e-05, 8.6793e-07, 1.6697e-05, 1.1669e-08, 1.6060e-06, 7.7319e-06,\n 7.4830e-08, 9.0190e-09, 2.5167e-06, 2.1336e-06, 1.2846e-07, 1.2963e-06,\n 1.6114e-07, 4.5637e-09, 1.0119e-07, 8.8747e-07, 3.3086e-06, 5.2300e-06,\n 5.8842e-07, 2.1809e-07, 4.2164e-09, 2.1178e-07, 9.6040e-09, 4.3610e-06,\n 5.6759e-08, 4.4496e-08, 6.4574e-07, 1.7979e-08, 6.9592e-09, 2.0849e-06,\n 6.5613e-08, 9.4344e-07, 7.9663e-07, 1.1593e-05, 6.6291e-06, 2.6889e-06,\n 1.7495e-07, 6.0376e-08, 2.5515e-09, 9.1475e-07, 2.9402e-06, 1.3759e-06,\n 3.1239e-06, 1.1906e-06, 1.4413e-07, 3.9818e-07, 4.7722e-09, 9.2101e-06,\n 3.4293e-07, 6.7347e-09, 3.5541e-06, 4.9544e-09, 9.3051e-08, 2.3475e-07,\n 4.8299e-09, 5.9874e-08, 1.8745e-06, 3.1286e-07, 3.8262e-09, 1.4658e-08,\n 3.9609e-07, 4.4192e-06, 1.3471e-07, 3.9061e-08, 3.9941e-06, 5.2537e-06,\n 3.1511e-09, 1.0330e-05, 1.0093e-08, 1.3333e-06, 1.8541e-07, 3.9210e-08,\n 2.7067e-06, 5.1162e-09, 5.0591e-06, 3.3469e-08, 1.9697e-07, 1.0047e-08,\n 2.7055e-07, 3.9775e-07, 1.0955e-05, 1.3960e-05, 2.7118e-06, 6.5847e-07,\n 1.3738e-07, 1.7471e-05, 2.3288e-07, 1.3379e-09, 4.8078e-07, 6.9721e-09,\n 9.2263e-08, 1.6346e-07, 1.3762e-06, 1.7019e-08, 6.8157e-08, 2.8031e-06,\n 1.7768e-07, 7.4604e-09, 1.7604e-06, 3.0005e-07], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-1.6028e-17, -5.8330e-17, 3.9616e-16, -3.0733e-17, -8.9117e-17,\n 4.6785e-17, 1.2902e-16, 4.0556e-16, -5.6599e-16, -5.1712e-16,\n 1.4183e-17, -1.0364e-16, -7.6983e-16, 5.1262e-17, 7.7548e-16,\n -5.9984e-16, -1.2507e-15, 3.7049e-17, 3.5972e-16, 6.5277e-17,\n -2.8666e-16, -3.2487e-17, 1.5895e-16, -1.8238e-16, -3.4071e-17,\n -2.0378e-15, -1.3303e-16, -1.5817e-15, -1.2176e-15, -7.0818e-16,\n 4.0508e-16, -1.2899e-15, 1.0056e-15, 1.4678e-16, -4.1452e-17,\n 2.7438e-16, -1.8440e-16, 9.0941e-17, 4.5635e-16, -2.1847e-16,\n 4.3025e-17, -4.3393e-16, 4.6039e-16, 6.9000e-16, -1.1089e-16,\n 6.8133e-16, -2.1876e-17, -1.3857e-15, 4.4148e-16, -4.3241e-16,\n 8.9424e-18, -6.1794e-17, -3.7716e-16, 9.0284e-16, 1.1849e-17,\n 1.3095e-16, 1.2297e-15, -2.8789e-16, -1.5094e-16, 5.5260e-16,\n 3.6115e-16, 5.6512e-16, -8.9691e-16, -1.0834e-17, -6.7597e-18,\n 2.3452e-16, -1.0725e-16, -4.1173e-17, -9.6696e-19, -2.1018e-16,\n 2.0365e-17, -5.4760e-17, 7.4430e-16, -1.7298e-16, 7.5664e-16,\n -1.5561e-16, 2.7952e-18, 3.6112e-16, 1.1401e-15, -8.1504e-16,\n 2.4403e-16, 4.1920e-17, -5.7894e-18, -4.3717e-16, -1.9676e-16,\n -1.7141e-16, 1.2637e-16, -2.7585e-16, 9.6000e-16, -4.1181e-17,\n 1.2851e-15, -1.9468e-16, -9.3793e-16, 2.1622e-16, 1.9792e-16,\n 1.2217e-17, -1.4257e-16, 1.4681e-16, -7.7218e-16, -3.9344e-17,\n 7.9286e-17, -9.4656e-17, -1.0957e-16, 5.7935e-16, -2.7149e-17,\n -3.3906e-16, 5.6727e-17, -2.0975e-16, -9.3854e-16, -1.7865e-16,\n -1.1278e-17, -5.2619e-16, 5.4089e-17, 3.7105e-17, -4.6461e-17,\n -4.6722e-16, -3.9669e-16, 7.4553e-16, 1.2584e-17, -1.8916e-17,\n 1.8433e-17, 2.2775e-16, 6.2224e-16, -2.4385e-17, 1.9860e-17,\n -2.9012e-16, 1.0826e-15, -5.1302e-17, 2.7022e-16, 5.0967e-16,\n 1.2094e-16, -7.1565e-16, 2.2972e-16, -1.2130e-16, -5.9508e-17,\n -1.4092e-15, 7.2816e-16, 3.3821e-17, -1.5872e-16, -3.6564e-16,\n 4.6543e-16, 7.7261e-16, 6.4459e-16, -7.1227e-18, -1.6179e-16,\n -1.3346e-16, -6.2417e-16, -5.4724e-16, 1.4345e-16, 1.6225e-16,\n 1.2773e-16, -1.6449e-15, 3.5302e-16, 2.8606e-16, 7.8211e-17,\n 4.7238e-16, 4.8660e-17, 5.1474e-16, 3.7178e-16, -8.8413e-17,\n 1.7643e-16, -1.5248e-15, -1.0544e-18, 1.1522e-17, 2.6988e-17,\n 1.7165e-16, -2.3660e-17, 1.0650e-16, -5.2030e-17, 2.3834e-17,\n -7.1470e-17, -8.1682e-16, 3.1970e-16, 3.9879e-16, 2.1207e-17,\n -3.0466e-17, -6.1085e-17, -3.8214e-17, 2.4860e-16, 3.7088e-16,\n 7.5709e-17, 1.9662e-17, -2.9583e-16, -1.2867e-17, -1.0794e-16,\n 8.0342e-17, 3.1345e-17, 8.1312e-16, -1.6897e-16, 3.0712e-16,\n 7.1046e-16, 6.8664e-16, 5.5377e-16, -5.1250e-18, 1.4248e-16,\n -1.6810e-16, 3.5602e-16, 1.0890e-17, -1.0696e-15, 1.2923e-15,\n -2.9836e-16, -1.8255e-16, -1.1501e-16, -1.3936e-15, 7.6058e-17,\n -1.2614e-17, -7.1647e-16, 2.4016e-17, 1.6897e-16, -1.5844e-17,\n 1.1314e-16, 1.7213e-16, 5.8552e-16, -2.0641e-16, 2.0194e-18,\n 3.8349e-16, 2.5406e-16, 6.2538e-16, 6.2457e-16, 3.7166e-16,\n 5.5436e-17, 8.7046e-17, -6.6383e-17, -2.7570e-16, 4.3033e-17,\n -1.0073e-17, 3.1502e-18, 4.2289e-17, -1.2231e-15, 7.1979e-18,\n 2.2364e-16, -1.6639e-16, -2.9948e-16, 1.3373e-16, 1.4739e-16,\n 8.5727e-17, 3.4422e-16, -2.3302e-16, 3.9803e-16, 6.2038e-16,\n -2.4039e-16, 4.1722e-16, -4.6742e-16, 6.2072e-17, -1.9251e-16,\n 2.0189e-16, -2.3127e-17, 1.6904e-16, 1.6758e-16, -7.6754e-17,\n 1.3515e-16, 1.0632e-17, -6.1695e-17, 2.0780e-16, 1.5710e-16,\n 5.7399e-17], device='cuda:0')", + "exp_avg_sq": "tensor([1.7447e-07, 1.3916e-08, 6.7413e-09, 3.8996e-09, 4.7392e-07, 1.0853e-07,\n 1.0934e-08, 4.1359e-07, 1.2371e-06, 2.3391e-07, 1.6129e-10, 9.1461e-07,\n 1.2651e-06, 3.3425e-08, 4.1111e-08, 1.7000e-06, 3.8192e-06, 1.1678e-09,\n 1.2769e-07, 3.0935e-09, 1.5129e-06, 9.5494e-09, 1.5694e-07, 3.1365e-08,\n 4.5572e-08, 6.8710e-06, 2.0809e-07, 3.2597e-07, 3.4245e-07, 1.4971e-07,\n 7.6835e-06, 1.5840e-06, 1.1226e-06, 5.9001e-08, 3.6976e-07, 1.7633e-06,\n 2.6074e-08, 3.6918e-06, 6.1766e-07, 1.2419e-07, 9.4873e-08, 3.0863e-08,\n 1.5983e-07, 1.9772e-08, 8.9588e-07, 8.4882e-06, 7.6120e-10, 1.7016e-06,\n 7.8505e-07, 1.0334e-06, 5.3081e-08, 2.4206e-09, 1.9772e-06, 4.8008e-06,\n 2.3947e-09, 2.7020e-07, 1.3149e-07, 2.7300e-07, 1.8822e-08, 1.0887e-06,\n 3.3914e-08, 6.5207e-08, 2.5762e-06, 2.6273e-08, 1.1202e-08, 2.6665e-06,\n 1.1139e-07, 3.9481e-07, 1.0837e-08, 3.6608e-07, 2.6225e-08, 1.9529e-07,\n 8.4595e-07, 2.0299e-08, 3.2443e-06, 5.1350e-08, 5.1809e-09, 9.6282e-08,\n 2.9722e-06, 1.9821e-06, 7.7254e-09, 3.4373e-09, 1.8728e-07, 3.5776e-08,\n 2.9718e-07, 5.2994e-08, 1.4105e-08, 9.6896e-09, 5.8290e-08, 6.5810e-08,\n 4.7159e-06, 1.3286e-09, 4.9635e-07, 3.5233e-09, 3.2419e-06, 7.7074e-08,\n 2.6352e-07, 1.1348e-07, 5.9633e-07, 2.5565e-07, 3.8579e-08, 2.1378e-08,\n 2.2309e-09, 6.8097e-08, 5.0346e-07, 5.9421e-07, 2.9072e-07, 2.0973e-08,\n 8.6289e-07, 5.1523e-08, 1.6087e-07, 2.7480e-08, 1.3290e-07, 9.7128e-10,\n 2.8179e-08, 1.9344e-07, 3.3864e-07, 4.5788e-08, 1.1514e-07, 1.5935e-08,\n 2.2324e-08, 6.1118e-07, 1.4482e-07, 8.6493e-07, 5.7427e-08, 9.2204e-10,\n 2.3405e-06, 2.1977e-09, 1.1187e-06, 4.9899e-06, 9.0038e-08, 8.5897e-07,\n 4.4507e-08, 9.7655e-08, 8.3283e-07, 2.4917e-07, 3.2410e-08, 6.5874e-08,\n 2.8230e-08, 2.4817e-08, 4.4240e-07, 1.2542e-06, 1.6439e-08, 1.7787e-09,\n 1.5734e-08, 2.7795e-09, 9.1950e-07, 1.3017e-06, 1.1579e-08, 1.8610e-08,\n 5.4888e-07, 2.6615e-06, 1.6384e-07, 4.7200e-09, 1.1581e-06, 1.1155e-06,\n 5.5357e-06, 2.4802e-07, 4.7713e-06, 3.3344e-09, 4.5894e-07, 2.2094e-06,\n 2.1383e-08, 2.5773e-09, 7.1918e-07, 6.0968e-07, 3.6708e-08, 3.7043e-07,\n 4.6047e-08, 1.3041e-09, 2.8915e-08, 2.5360e-07, 9.4546e-07, 1.4945e-06,\n 1.6815e-07, 6.2321e-08, 1.2049e-09, 6.0518e-08, 2.7444e-09, 1.2462e-06,\n 1.6219e-08, 1.2715e-08, 1.8452e-07, 5.1375e-09, 1.9887e-09, 5.9577e-07,\n 1.8750e-08, 2.6959e-07, 2.2764e-07, 3.3128e-06, 1.8943e-06, 7.6838e-07,\n 4.9992e-08, 1.7253e-08, 7.2910e-10, 2.6140e-07, 8.4020e-07, 3.9318e-07,\n 8.9268e-07, 3.4023e-07, 4.1186e-08, 1.1378e-07, 1.3637e-09, 2.6318e-06,\n 9.7996e-08, 1.9245e-09, 1.0156e-06, 1.4158e-09, 2.6590e-08, 6.7081e-08,\n 1.3802e-09, 1.7109e-08, 5.3566e-07, 8.9402e-08, 1.0934e-09, 4.1885e-09,\n 1.1319e-07, 1.2628e-06, 3.8494e-08, 1.1162e-08, 1.1414e-06, 1.5013e-06,\n 9.0044e-10, 2.9518e-06, 2.8842e-09, 3.8101e-07, 5.2982e-08, 1.1205e-08,\n 7.7347e-07, 1.4620e-09, 1.4457e-06, 9.5640e-09, 5.6287e-08, 2.8710e-09,\n 7.7311e-08, 1.1366e-07, 3.1304e-06, 3.9892e-06, 7.7493e-07, 1.8816e-07,\n 3.9258e-08, 4.9924e-06, 6.6548e-08, 3.8231e-10, 1.3739e-07, 1.9923e-09,\n 2.6365e-08, 4.6709e-08, 3.9327e-07, 4.8633e-09, 1.9476e-08, 8.0102e-07,\n 5.0775e-08, 2.1319e-09, 5.0304e-07, 8.5743e-08], device='cuda:0')" }, "42": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.0327e-19, -2.6577e-18, 6.2409e-19, -1.2187e-18, -1.0248e-16,\n -2.3122e-19, 8.3310e-20, -4.0887e-18, -1.1687e-16, -1.1692e-16,\n -6.8909e-19, -1.3538e-18, -7.1427e-17, 2.0410e-19, -5.0551e-18,\n -1.5189e-16, -8.3856e-17, 3.3478e-19, -6.5655e-19, 6.5132e-19,\n -1.1643e-16, 1.9399e-19, 5.8351e-19, -2.4205e-17, -1.4794e-19,\n -1.7874e-16, 2.3441e-18, -5.3374e-17, -7.1810e-17, -5.2151e-17,\n -5.1919e-18, -1.0891e-16, -1.5225e-17, -5.4782e-19, 6.4670e-19,\n -1.9059e-19, -1.8030e-17, -2.1305e-18, -5.3087e-17, -1.3611e-18,\n 8.1425e-19, -4.4653e-17, -1.4265e-18, 4.3852e-19, -2.8106e-18,\n -7.0888e-19, -4.8509e-19, -1.5175e-16, -2.1615e-19, -1.0905e-16,\n -1.2082e-19, -2.0737e-18, -1.1500e-16, -2.8510e-18, 7.8674e-19,\n 1.7650e-19, -2.4549e-19, -5.7909e-17, -4.6142e-18, -9.4885e-17,\n 9.1733e-20, -4.3703e-19, -1.1171e-16, -3.9285e-18, -2.1058e-18,\n -2.5105e-18, -4.3679e-17, -5.7941e-19, -1.7443e-17, 8.7099e-19,\n -7.5236e-19, 1.1905e-18, -1.2622e-17, -3.4796e-18, -8.1015e-17,\n -1.6345e-18, -1.0264e-18, -2.3704e-19, 2.0765e-18, -7.6446e-17,\n 3.2949e-19, 4.0547e-19, 6.6662e-19, 2.5648e-18, -1.1528e-16,\n -2.1780e-18, -1.9681e-18, 1.9141e-18, -4.8004e-19, -5.8848e-17,\n -5.5411e-17, -3.8117e-19, -6.8058e-17, 4.0779e-19, -1.1108e-16,\n -1.4197e-17, -3.3694e-19, -1.0034e-16, -3.3643e-17, -8.4842e-17,\n -2.6860e-19, -1.6749e-18, -1.0965e-18, -1.9115e-17, -1.1067e-18,\n -4.9921e-17, -1.1998e-16, 5.1650e-19, -2.4442e-17, -2.3545e-18,\n -2.2854e-17, 7.4148e-18, -1.9543e-19, -4.2816e-19, -4.9001e-19,\n -2.3652e-17, -1.5144e-16, -6.5389e-19, -9.2264e-19, -1.2605e-21,\n 9.9448e-19, 3.1485e-19, -3.5007e-17, -5.0642e-17, -2.9026e-20,\n -3.9426e-18, 1.9194e-18, 1.7335e-19, -4.4530e-19, -1.4857e-19,\n 3.6939e-19, -1.4391e-16, -1.8521e-19, -2.2109e-18, -1.0239e-17,\n -7.5278e-17, -7.5420e-18, 5.2645e-19, 2.9836e-18, 3.2299e-18,\n -7.2952e-20, -8.1642e-17, 4.2856e-19, -2.6856e-18, -3.3384e-20,\n -1.2093e-17, -4.8300e-17, -1.3742e-16, 1.7783e-18, 6.8514e-19,\n 3.2098e-20, -9.4526e-17, 2.8740e-20, -9.5027e-19, -1.1001e-16,\n -4.5309e-19, -1.3082e-16, -9.7611e-18, -1.0607e-18, -2.3038e-18,\n 3.7435e-19, -1.4352e-16, -3.0412e-18, 8.3457e-19, -2.7605e-17,\n 2.5446e-19, -2.7559e-19, -1.3311e-18, -4.7468e-18, 1.7266e-19,\n -4.3061e-18, -5.9871e-17, 5.3061e-20, 5.3933e-20, -9.0508e-17,\n -1.3730e-20, 2.1471e-19, -6.3773e-19, -1.9906e-19, -3.0859e-17,\n 1.9157e-18, 1.3733e-18, 3.7073e-18, 1.5351e-18, 1.3640e-18,\n 1.1238e-19, 1.1754e-18, -1.1054e-17, -7.8292e-19, 4.4766e-19,\n -2.9770e-18, -1.6525e-17, 8.3861e-19, -3.5015e-18, -4.2999e-19,\n -3.3518e-17, -4.6783e-17, -6.4546e-17, -1.1879e-16, -5.3391e-17,\n 2.4288e-18, -1.1431e-17, -2.3966e-18, -1.9544e-16, -6.9793e-17,\n 1.0212e-18, -1.9212e-16, -1.9505e-19, 8.8598e-19, -1.3403e-17,\n 3.3047e-19, -9.3310e-18, -1.2419e-18, -2.3743e-18, -4.6772e-19,\n -3.0323e-19, 4.2080e-19, 2.3003e-19, -4.7771e-19, 5.9142e-20,\n -3.4182e-19, 7.9334e-19, -4.9113e-18, -8.4202e-17, 1.3299e-18,\n -1.2625e-18, -9.1729e-20, 3.8999e-19, -1.3917e-16, 1.6893e-18,\n 3.0949e-19, -4.0057e-18, 3.3154e-19, 5.7850e-19, -3.5605e-19,\n -1.2442e-18, -1.8193e-19, -1.0621e-17, -8.5039e-17, -3.8967e-19,\n -2.6085e-18, 1.3447e-19, 5.5198e-18, 6.8258e-21, 1.9036e-18,\n -1.2276e-19, -2.8176e-18, 1.3239e-19, -1.1611e-16, -1.5466e-17,\n -5.7767e-20, 2.3134e-19, -5.2077e-17, 5.2705e-19, -2.2644e-17,\n -5.5849e-20], device='cuda:0')", - "exp_avg_sq": "tensor([4.6220e-12, 1.5308e-10, 2.0565e-13, 7.2324e-11, 7.2263e-09, 2.3398e-12,\n 3.1713e-12, 3.4036e-10, 8.3133e-09, 6.9418e-09, 1.5325e-12, 5.2615e-12,\n 9.7838e-10, 4.2130e-11, 1.1752e-09, 1.3789e-08, 2.2594e-09, 1.8902e-13,\n 1.1901e-11, 1.6053e-11, 1.3179e-08, 3.0363e-12, 1.5426e-10, 6.3136e-12,\n 1.5643e-10, 1.6692e-08, 4.9244e-13, 5.3148e-12, 1.4538e-10, 2.5854e-09,\n 1.2034e-08, 3.8512e-09, 9.1761e-10, 1.3662e-11, 3.2043e-12, 4.8711e-10,\n 1.4994e-09, 3.0552e-09, 3.6976e-09, 7.9266e-13, 8.4870e-12, 4.1169e-11,\n 6.8312e-11, 2.3292e-12, 9.3320e-10, 2.1148e-09, 4.5615e-13, 2.1870e-08,\n 9.7071e-11, 7.4249e-09, 3.7645e-11, 1.5490e-13, 1.1697e-08, 5.8701e-09,\n 2.2638e-11, 3.2372e-13, 1.3573e-10, 4.3650e-10, 1.6376e-10, 1.8179e-08,\n 8.3348e-12, 1.9946e-11, 8.4795e-09, 1.7683e-10, 9.7272e-11, 2.2729e-09,\n 3.5220e-10, 9.4232e-12, 1.6257e-12, 1.1883e-12, 6.9904e-11, 1.3341e-13,\n 1.5592e-10, 2.5447e-13, 2.9863e-08, 2.8066e-14, 3.4127e-13, 8.1702e-12,\n 1.0606e-09, 2.3174e-09, 4.0473e-12, 3.9170e-11, 2.6908e-11, 1.1391e-12,\n 9.0506e-09, 1.1689e-13, 1.9787e-12, 6.1438e-14, 2.0531e-11, 1.1865e-09,\n 2.1029e-08, 2.4699e-13, 4.5965e-09, 2.0963e-13, 3.3557e-08, 5.4776e-10,\n 5.1034e-13, 6.7865e-09, 3.4842e-09, 4.0696e-09, 3.6657e-11, 8.8120e-11,\n 5.5775e-11, 5.7196e-10, 9.6842e-12, 2.4403e-10, 9.9548e-09, 3.1271e-15,\n 2.2211e-09, 8.0903e-14, 6.7645e-11, 3.6395e-13, 1.4843e-11, 1.0191e-12,\n 6.8304e-11, 1.0164e-09, 1.1369e-08, 5.2361e-11, 2.9039e-14, 2.6127e-11,\n 1.2436e-11, 9.3146e-11, 3.5804e-09, 9.1919e-10, 2.7392e-12, 2.7176e-12,\n 5.5075e-10, 1.0093e-12, 1.1285e-10, 3.6454e-09, 1.0160e-11, 1.3354e-08,\n 2.0686e-11, 1.1420e-13, 1.2532e-09, 8.1127e-10, 1.0133e-09, 3.5131e-11,\n 8.8374e-14, 3.2085e-14, 2.6053e-11, 1.3075e-08, 2.2493e-12, 6.7393e-13,\n 9.2406e-13, 6.1779e-13, 3.4646e-10, 1.2035e-08, 9.7467e-12, 1.2315e-13,\n 9.2262e-11, 1.7707e-09, 6.8156e-13, 8.6886e-12, 1.0336e-08, 2.4584e-10,\n 3.9803e-08, 1.0596e-10, 5.3809e-10, 3.5716e-13, 1.2941e-10, 1.4225e-08,\n 2.4250e-12, 7.0791e-11, 3.3999e-10, 1.4328e-11, 3.5793e-11, 2.9238e-10,\n 8.1915e-13, 6.7406e-13, 1.9671e-10, 1.7274e-10, 7.9235e-11, 3.7939e-10,\n 3.5424e-09, 2.8964e-14, 4.8586e-12, 8.3238e-11, 2.7999e-12, 1.3235e-09,\n 1.8849e-11, 3.8195e-12, 2.0302e-12, 3.1181e-12, 6.1624e-15, 5.5952e-11,\n 4.1416e-14, 1.2687e-09, 5.0267e-13, 1.4860e-09, 3.2602e-10, 2.0425e-09,\n 1.3305e-11, 2.6290e-10, 2.5389e-13, 9.2348e-10, 1.2681e-09, 1.7061e-09,\n 6.9620e-09, 8.9985e-09, 1.7897e-13, 5.8619e-12, 8.7944e-14, 2.8544e-08,\n 2.3649e-09, 1.0972e-12, 1.8240e-08, 5.9871e-14, 6.0767e-13, 1.0298e-12,\n 8.9177e-13, 1.2721e-09, 2.3167e-11, 1.8076e-13, 8.2853e-14, 3.1741e-13,\n 7.0867e-13, 1.0351e-10, 2.1732e-11, 7.7535e-12, 1.4436e-10, 2.5088e-10,\n 9.8901e-14, 6.0477e-09, 2.2978e-12, 9.1824e-12, 2.2496e-14, 4.6747e-12,\n 1.0978e-08, 4.7358e-13, 3.3626e-10, 1.0170e-12, 1.6436e-12, 5.7786e-13,\n 2.0520e-12, 9.6105e-14, 1.2924e-09, 5.9683e-09, 6.9144e-09, 1.5010e-11,\n 2.2801e-13, 2.7269e-09, 1.8813e-12, 5.9649e-14, 3.7628e-13, 7.3421e-13,\n 1.2340e-10, 2.7879e-12, 8.6516e-09, 1.8916e-12, 1.5570e-12, 1.7854e-11,\n 4.3645e-10, 8.1353e-13, 1.0852e-10, 2.8463e-11], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-8.5171e-19, -2.2545e-18, -8.6593e-20, -1.3267e-18, -8.8495e-17,\n -4.7121e-19, 3.1247e-19, -3.2253e-18, -9.6702e-17, -1.0203e-16,\n -1.5924e-18, -3.3330e-19, -5.3567e-17, -2.0774e-19, -4.0251e-18,\n -1.1727e-16, -6.9292e-17, 3.3246e-19, 6.9034e-19, 8.3155e-19,\n -1.0588e-16, -8.7277e-19, 3.3391e-19, -2.0825e-17, -1.6357e-18,\n -1.2931e-16, 2.5419e-18, -3.7041e-17, -5.0133e-17, -4.2779e-17,\n -6.5668e-18, -8.0084e-17, -1.3756e-17, -1.4362e-20, 4.1108e-19,\n -7.0366e-20, -1.6567e-17, -1.2982e-18, -4.9462e-17, -6.2913e-19,\n -2.9886e-19, -3.0156e-17, -5.3824e-19, -2.7858e-19, -3.7438e-18,\n -2.9695e-19, -1.4799e-18, -1.5894e-16, -3.1224e-19, -9.9036e-17,\n -7.8032e-19, -5.9373e-19, -1.0357e-16, -1.9561e-18, 4.0180e-19,\n 1.5919e-19, 1.2554e-18, -4.5686e-17, -3.8552e-18, -9.3279e-17,\n -3.7720e-20, -1.2876e-21, -1.0089e-16, -3.2914e-18, -1.1771e-18,\n -2.6325e-18, -4.2135e-17, -6.3329e-21, -1.4060e-17, 1.1245e-18,\n -1.2628e-19, 1.2497e-18, -6.7008e-18, -3.1556e-18, -8.0511e-17,\n -9.0999e-19, -4.5764e-19, 9.5598e-20, 9.4157e-19, -6.8683e-17,\n 2.2222e-19, -6.2397e-19, 1.7133e-19, 2.5810e-18, -1.1124e-16,\n -1.7409e-18, 3.6608e-19, 1.9527e-18, -7.8409e-19, -5.7903e-17,\n -3.7312e-17, -6.5287e-19, -5.7300e-17, 4.1409e-19, -1.1595e-16,\n -1.5647e-17, -2.5808e-19, -9.4360e-17, -3.3897e-17, -7.4211e-17,\n -1.5328e-19, -1.4850e-18, -3.9792e-19, -2.0812e-17, -5.5671e-19,\n -3.6561e-17, -1.0967e-16, 6.4091e-19, -2.3974e-17, -1.6230e-18,\n -2.4102e-17, 6.4126e-18, -7.0663e-20, -5.8495e-20, -8.7547e-19,\n -2.1631e-17, -1.3450e-16, -3.9644e-19, -5.0478e-19, -6.4921e-20,\n -2.1190e-20, -2.4387e-20, -3.2549e-17, -3.8520e-17, -3.9875e-19,\n -3.5778e-18, 1.7444e-18, 6.5381e-20, 7.9430e-20, -7.8847e-19,\n 6.4260e-19, -1.2224e-16, -2.3406e-20, -2.2930e-18, -1.1301e-17,\n -6.8647e-17, -8.7332e-18, -3.7395e-21, 2.0190e-18, 2.9414e-18,\n 1.2777e-19, -6.5409e-17, -3.7778e-19, -5.3346e-19, 1.6544e-19,\n -4.8977e-18, -4.0813e-17, -1.1536e-16, 5.9458e-19, 3.4935e-19,\n 1.6430e-19, -7.0933e-17, -1.2700e-19, -6.0851e-19, -8.7762e-17,\n -8.2155e-20, -1.0876e-16, -6.4931e-18, 1.0896e-19, 2.4271e-19,\n 4.2736e-19, -1.4123e-16, -2.7892e-18, -4.0782e-19, -2.7244e-17,\n 1.7841e-19, -4.0232e-19, -6.5769e-19, -6.2555e-18, -2.1215e-19,\n -3.4916e-18, -4.2709e-17, 3.5725e-20, 1.1112e-19, -7.3084e-17,\n -6.4384e-20, 2.2315e-19, -1.2715e-18, -2.5230e-19, -2.9660e-17,\n 9.2231e-19, 5.0690e-19, 2.8991e-18, 3.0150e-19, 1.0020e-18,\n 4.0196e-19, 5.7823e-19, -7.7986e-18, 1.4154e-19, 8.5952e-20,\n -2.8868e-18, -2.3357e-17, 3.1469e-19, -5.2298e-18, 7.4645e-20,\n -3.0551e-17, -3.1043e-17, -5.0558e-17, -1.1013e-16, -4.4542e-17,\n 2.9682e-18, -9.1875e-18, -2.0673e-18, -1.7584e-16, -5.5986e-17,\n 6.9988e-19, -1.5347e-16, -2.5163e-19, 3.1306e-19, -1.1042e-17,\n -3.0526e-20, -9.3587e-18, -1.7667e-18, -1.0288e-18, -3.1556e-19,\n -1.8463e-19, 2.7401e-19, 2.8692e-20, -8.0841e-19, -3.9719e-20,\n 2.5558e-19, 8.9749e-20, -2.9935e-18, -6.7261e-17, 8.3663e-19,\n -8.2822e-19, -4.9038e-19, 1.6123e-19, -1.2198e-16, 9.8029e-19,\n 5.3155e-20, -2.4119e-18, 3.8577e-19, 3.9102e-20, 9.9263e-20,\n 1.8364e-19, -1.8191e-19, -1.3960e-17, -6.7768e-17, 6.6472e-20,\n -1.4009e-18, -2.6814e-19, 5.3534e-18, -1.6806e-19, 8.9490e-19,\n -2.5579e-19, -2.6465e-18, 5.2817e-20, -9.3986e-17, -7.1225e-18,\n -2.5390e-19, 7.1562e-20, -4.3653e-17, 4.1406e-19, -2.0768e-17,\n 3.3432e-19], device='cuda:0')", + "exp_avg_sq": "tensor([1.3208e-12, 4.3745e-11, 5.8766e-14, 2.0667e-11, 2.0650e-09, 6.6862e-13,\n 9.0621e-13, 9.7260e-11, 2.3756e-09, 1.9837e-09, 4.3793e-13, 1.5035e-12,\n 2.7958e-10, 1.2039e-11, 3.3581e-10, 3.9402e-09, 6.4564e-10, 5.4013e-14,\n 3.4007e-12, 4.5871e-12, 3.7660e-09, 8.6764e-13, 4.4082e-11, 1.8042e-12,\n 4.4702e-11, 4.7699e-09, 1.4072e-13, 1.5188e-12, 4.1545e-11, 7.3881e-10,\n 3.4388e-09, 1.1005e-09, 2.6221e-10, 3.9040e-12, 9.1566e-13, 1.3920e-10,\n 4.2845e-10, 8.7306e-10, 1.0566e-09, 2.2651e-13, 2.4252e-12, 1.1764e-11,\n 1.9521e-11, 6.6560e-13, 2.6667e-10, 6.0431e-10, 1.3035e-13, 6.2496e-09,\n 2.7739e-11, 2.1217e-09, 1.0757e-11, 4.4265e-14, 3.3425e-09, 1.6774e-09,\n 6.4689e-12, 9.2507e-14, 3.8787e-11, 1.2473e-10, 4.6796e-11, 5.1948e-09,\n 2.3817e-12, 5.6997e-12, 2.4231e-09, 5.0529e-11, 2.7796e-11, 6.4949e-10,\n 1.0064e-10, 2.6928e-12, 4.6457e-13, 3.3958e-13, 1.9976e-11, 3.8123e-14,\n 4.4556e-11, 7.2717e-14, 8.5335e-09, 8.0199e-15, 9.7520e-14, 2.3347e-12,\n 3.0308e-10, 6.6221e-10, 1.1566e-12, 1.1193e-11, 7.6891e-12, 3.2551e-13,\n 2.5863e-09, 3.3401e-14, 5.6543e-13, 1.7556e-14, 5.8669e-12, 3.3904e-10,\n 6.0092e-09, 7.0579e-14, 1.3135e-09, 5.9903e-14, 9.5892e-09, 1.5653e-10,\n 1.4583e-13, 1.9393e-09, 9.9564e-10, 1.1629e-09, 1.0475e-11, 2.5181e-11,\n 1.5938e-11, 1.6344e-10, 2.7673e-12, 6.9735e-11, 2.8447e-09, 8.9360e-16,\n 6.3470e-10, 2.3119e-14, 1.9330e-11, 1.0400e-13, 4.2415e-12, 2.9122e-13,\n 1.9518e-11, 2.9045e-10, 3.2489e-09, 1.4962e-11, 8.2983e-15, 7.4659e-12,\n 3.5536e-12, 2.6617e-11, 1.0231e-09, 2.6266e-10, 7.8276e-13, 7.7659e-13,\n 1.5738e-10, 2.8841e-13, 3.2248e-11, 1.0417e-09, 2.9033e-12, 3.8159e-09,\n 5.9113e-12, 3.2633e-14, 3.5811e-10, 2.3183e-10, 2.8957e-10, 1.0039e-11,\n 2.5254e-14, 9.1687e-15, 7.4448e-12, 3.7363e-09, 6.4275e-13, 1.9258e-13,\n 2.6406e-13, 1.7654e-13, 9.9004e-11, 3.4391e-09, 2.7852e-12, 3.5191e-14,\n 2.6365e-11, 5.0598e-10, 1.9476e-13, 2.4828e-12, 2.9536e-09, 7.0251e-11,\n 1.1374e-08, 3.0279e-11, 1.5376e-10, 1.0206e-13, 3.6980e-11, 4.0649e-09,\n 6.9296e-13, 2.0229e-11, 9.7155e-11, 4.0943e-12, 1.0228e-11, 8.3551e-11,\n 2.3408e-13, 1.9262e-13, 5.6212e-11, 4.9362e-11, 2.2642e-11, 1.0841e-10,\n 1.0123e-09, 8.2766e-15, 1.3884e-12, 2.3786e-11, 8.0010e-13, 3.7819e-10,\n 5.3862e-12, 1.0915e-12, 5.8015e-13, 8.9103e-13, 1.7610e-15, 1.5989e-11,\n 1.1835e-14, 3.6253e-10, 1.4364e-13, 4.2464e-10, 9.3161e-11, 5.8367e-10,\n 3.8020e-12, 7.5127e-11, 7.2550e-14, 2.6389e-10, 3.6236e-10, 4.8754e-10,\n 1.9894e-09, 2.5714e-09, 5.1142e-14, 1.6751e-12, 2.5131e-14, 8.1566e-09,\n 6.7578e-10, 3.1354e-13, 5.2124e-09, 1.7109e-14, 1.7365e-13, 2.9426e-13,\n 2.5483e-13, 3.6352e-10, 6.6203e-12, 5.1653e-14, 2.3676e-14, 9.0701e-14,\n 2.0251e-13, 2.9578e-11, 6.2102e-12, 2.2156e-12, 4.1252e-11, 7.1691e-11,\n 2.8262e-14, 1.7282e-09, 6.5663e-13, 2.6240e-12, 6.4284e-15, 1.3358e-12,\n 3.1371e-09, 1.3533e-13, 9.6088e-11, 2.9062e-13, 4.6967e-13, 1.6513e-13,\n 5.8637e-13, 2.7463e-14, 3.6930e-10, 1.7055e-09, 1.9758e-09, 4.2893e-12,\n 6.5157e-14, 7.7924e-10, 5.3760e-13, 1.7045e-14, 1.0752e-13, 2.0981e-13,\n 3.5263e-11, 7.9667e-13, 2.4723e-09, 5.4054e-13, 4.4491e-13, 5.1021e-12,\n 1.2472e-10, 2.3247e-13, 3.1011e-11, 8.1337e-12], device='cuda:0')" }, "43": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.7672e-18, -5.8285e-19, -2.8169e-18, -1.6982e-18, -6.3716e-17,\n -1.2756e-18, -8.2369e-18, -1.3153e-17, -7.8744e-17, -7.2466e-17,\n -1.8389e-17, 8.8957e-19, -6.6308e-17, -4.6556e-18, -6.3556e-18,\n -8.2177e-17, -7.9941e-17, -2.3069e-19, 8.3890e-18, -7.1519e-18,\n -6.5294e-17, -1.3669e-17, -8.0618e-18, -3.8296e-17, -1.6606e-17,\n -1.1130e-16, -7.9369e-19, -8.3169e-17, -8.3622e-17, -5.6613e-17,\n -1.4507e-17, -8.7366e-17, -1.6249e-17, -4.9245e-18, -2.3179e-19,\n -2.3056e-18, -3.0539e-17, -1.5431e-17, -3.6665e-17, 8.5718e-19,\n -3.6867e-18, -5.5546e-17, 1.1330e-17, 9.3308e-18, -1.9123e-17,\n 7.7123e-18, -1.6575e-18, -8.0402e-17, -3.1401e-18, -6.6846e-17,\n -2.7447e-18, 1.4691e-18, -6.8076e-17, -4.5311e-18, -7.5742e-18,\n -2.0982e-18, -7.4388e-20, -5.3029e-17, 3.3858e-18, -4.7355e-17,\n -2.2671e-18, 3.3723e-18, -7.3500e-17, 2.5310e-18, -2.8647e-19,\n -1.2197e-17, -4.2311e-17, 6.8082e-19, -3.0393e-17, -5.2804e-19,\n -1.8274e-18, -1.1392e-19, -1.7217e-17, 2.7682e-18, -3.9252e-17,\n 1.1354e-18, -5.3513e-20, 4.7133e-18, 3.9144e-18, -6.6725e-17,\n -2.6697e-18, -4.1358e-18, -6.2399e-18, -1.9622e-18, -6.0475e-17,\n 1.5917e-18, -1.4531e-17, -1.0136e-18, 2.2634e-17, -4.7163e-17,\n -3.3876e-17, 2.8622e-19, -6.9375e-17, -4.0740e-18, -5.2983e-17,\n -2.5306e-17, 2.0447e-19, -5.3155e-17, -5.5005e-17, -5.2651e-17,\n -4.1809e-19, 4.3139e-19, -9.0890e-18, -2.0442e-17, 2.7562e-19,\n -5.3692e-17, -6.1719e-17, -4.2496e-19, -5.3261e-17, 1.7975e-18,\n -3.1813e-17, -5.9585e-18, -2.3746e-18, -1.7383e-18, -4.8031e-18,\n -4.1071e-17, -7.7135e-17, 6.7579e-18, 8.2871e-20, -7.6275e-18,\n -1.3706e-17, -7.0292e-18, -2.8689e-17, -4.7840e-17, -6.6798e-19,\n -2.9453e-17, 8.7076e-18, -1.1842e-19, -1.6447e-19, -2.2130e-18,\n 3.8137e-18, -8.4337e-17, -3.7883e-18, 1.4560e-18, -2.6119e-17,\n -7.2463e-17, -1.0487e-17, -9.7475e-19, -2.1344e-18, -2.5231e-18,\n 2.0837e-18, -4.7126e-17, 7.4428e-18, 2.7002e-19, -9.1392e-20,\n -3.3046e-17, -5.6270e-17, -7.6754e-17, -1.0527e-17, -2.3404e-18,\n -6.6384e-18, -9.3488e-17, 2.4032e-18, 5.2540e-18, -6.4363e-17,\n 2.3393e-18, -6.6455e-17, -1.9522e-17, 4.4945e-18, -1.8848e-17,\n -4.0689e-18, -8.6655e-17, -1.9812e-17, -5.3259e-18, -3.2158e-17,\n -1.3516e-18, -5.5346e-18, -1.3276e-17, -2.4551e-17, -1.9012e-18,\n 2.6177e-18, -6.4820e-17, -2.6500e-18, -1.1566e-18, -5.9349e-17,\n -8.8126e-20, -1.5133e-17, -1.7677e-18, 9.9465e-19, -3.0408e-17,\n -7.9032e-18, -1.2398e-17, -2.8996e-18, -1.2674e-17, -1.0080e-18,\n -3.5432e-18, -6.0381e-18, -1.6643e-17, 6.8516e-19, -4.2589e-18,\n -7.9245e-18, -1.5958e-17, 4.6942e-18, -1.0550e-18, 2.0000e-18,\n -4.0471e-17, -3.9402e-17, -5.1124e-17, -7.9850e-17, -2.8304e-17,\n -1.4817e-18, -3.7334e-17, 1.9676e-18, -9.6538e-17, -6.0674e-17,\n -5.1052e-19, -9.8720e-17, -6.6282e-20, -1.0346e-17, -2.8486e-17,\n -9.9254e-19, -2.2061e-17, -5.3106e-18, 1.5290e-18, -1.1439e-18,\n -4.2581e-18, -3.4267e-18, 4.1168e-18, 5.4862e-18, 1.7364e-18,\n -3.4121e-18, -4.3935e-18, 3.9384e-18, -5.9012e-17, -4.6850e-18,\n -3.1914e-19, -1.1994e-18, -1.1864e-17, -9.0238e-17, -6.1153e-18,\n -1.2152e-18, -2.7782e-17, -3.1402e-19, -1.2211e-17, -1.9370e-18,\n 4.8179e-19, -2.5672e-18, -2.8907e-17, -4.8380e-17, 2.2492e-18,\n 1.7646e-18, -2.2652e-18, -4.1339e-18, -2.8282e-19, -1.2895e-18,\n 1.1763e-18, -2.0338e-19, -3.5906e-18, -6.1469e-17, -3.3912e-17,\n -5.8061e-18, -1.9259e-19, -4.8231e-17, -5.0755e-18, -3.0028e-17,\n -7.2831e-18], device='cuda:0')", - "exp_avg_sq": "tensor([2.3223e-10, 7.0042e-11, 1.2312e-13, 3.9841e-11, 2.9612e-09, 1.0365e-12,\n 1.8199e-12, 2.6036e-10, 6.0461e-09, 3.4571e-09, 6.9967e-11, 8.4581e-10,\n 4.0715e-09, 1.9727e-11, 8.7269e-10, 7.4083e-09, 8.7295e-09, 3.6861e-14,\n 5.6829e-12, 2.5322e-10, 7.1235e-09, 1.3738e-12, 3.8865e-10, 3.9392e-10,\n 5.0383e-10, 1.6519e-08, 7.4240e-11, 1.1855e-09, 1.6120e-09, 1.8083e-09,\n 6.8419e-09, 5.8013e-09, 3.1554e-09, 7.1950e-12, 1.6626e-10, 1.0919e-09,\n 1.1352e-09, 3.7385e-09, 2.8575e-09, 1.3345e-10, 8.0433e-11, 6.0031e-10,\n 3.1286e-11, 9.8640e-13, 1.3123e-09, 8.3000e-09, 1.5597e-11, 8.6408e-09,\n 1.1932e-10, 5.1581e-09, 1.7324e-11, 6.0636e-12, 7.4649e-09, 2.7098e-09,\n 1.3273e-11, 2.5554e-12, 5.8618e-10, 1.6878e-09, 8.4125e-11, 6.3692e-09,\n 4.4198e-12, 1.0514e-11, 8.2743e-09, 7.6714e-11, 4.9665e-11, 2.1648e-09,\n 1.2033e-09, 2.7442e-10, 2.9428e-10, 2.9866e-10, 3.4747e-11, 4.3013e-11,\n 8.0666e-11, 1.8350e-11, 1.0726e-08, 1.7602e-11, 1.3392e-11, 4.2514e-12,\n 6.7329e-10, 5.6705e-09, 2.1518e-12, 1.9394e-11, 2.5045e-10, 3.2588e-11,\n 3.7757e-09, 3.1578e-11, 9.9577e-13, 2.6017e-12, 1.1301e-11, 1.5044e-09,\n 1.0318e-08, 4.2182e-12, 2.8370e-09, 8.6995e-14, 1.1297e-08, 1.1224e-09,\n 2.6443e-10, 2.9203e-09, 1.8215e-09, 2.8202e-09, 1.8873e-11, 4.4239e-11,\n 2.7113e-11, 1.0220e-09, 2.9559e-10, 2.1081e-09, 3.9660e-09, 9.5544e-14,\n 1.9333e-09, 1.3805e-11, 8.9565e-10, 1.3682e-11, 1.3246e-10, 2.5437e-13,\n 3.1279e-11, 1.4533e-09, 4.3345e-09, 2.7001e-11, 4.3767e-12, 1.3687e-11,\n 6.8430e-12, 1.3396e-10, 1.5022e-09, 3.0395e-09, 7.7548e-11, 3.3518e-12,\n 6.8507e-10, 5.7189e-13, 4.6464e-10, 3.8060e-09, 4.5907e-12, 5.9498e-09,\n 1.0791e-11, 9.2558e-11, 7.3260e-10, 1.9695e-09, 1.1541e-09, 1.6671e-11,\n 4.3793e-12, 1.3855e-12, 5.4094e-11, 5.3522e-09, 1.0087e-12, 9.8068e-11,\n 2.8801e-11, 1.8396e-10, 2.8681e-09, 6.4091e-09, 5.2872e-12, 5.7781e-11,\n 4.3823e-10, 6.7702e-09, 6.4164e-11, 1.7395e-12, 5.7964e-09, 5.2048e-10,\n 1.5738e-08, 6.6370e-10, 4.2161e-09, 2.0061e-11, 4.3157e-10, 8.7533e-09,\n 1.0442e-10, 3.8017e-11, 2.1983e-09, 1.3096e-10, 1.6840e-11, 4.2099e-10,\n 2.2952e-10, 2.2106e-13, 8.7407e-11, 1.3842e-09, 2.0936e-10, 6.8310e-10,\n 2.1917e-09, 6.7279e-12, 1.9415e-12, 3.3809e-11, 6.2556e-13, 3.1560e-09,\n 9.7804e-12, 2.0992e-12, 8.2942e-11, 1.3039e-12, 3.1647e-15, 6.0960e-10,\n 7.1487e-11, 5.8808e-10, 1.1026e-10, 3.0168e-09, 2.5754e-10, 7.8315e-10,\n 6.8110e-12, 1.0815e-10, 1.5339e-12, 9.0416e-10, 3.1021e-09, 2.3786e-09,\n 5.1157e-09, 3.3955e-09, 6.6542e-12, 3.2584e-10, 1.2699e-12, 1.0685e-08,\n 1.6047e-09, 4.4376e-13, 6.7759e-09, 2.0892e-14, 9.0210e-11, 3.6061e-10,\n 1.2030e-13, 1.0138e-09, 2.5287e-11, 5.8708e-11, 7.3095e-13, 2.8223e-11,\n 8.0355e-11, 4.5489e-10, 1.1983e-11, 3.2937e-12, 8.8205e-10, 1.2794e-09,\n 1.2760e-12, 7.7463e-09, 7.7041e-13, 4.5504e-10, 1.1125e-11, 2.1203e-12,\n 5.3220e-09, 4.4869e-13, 1.1582e-09, 4.8511e-11, 6.0693e-11, 2.6345e-11,\n 8.9535e-13, 5.3895e-12, 2.5303e-09, 3.2155e-09, 4.0206e-09, 1.3591e-10,\n 2.7231e-11, 4.1561e-09, 5.5446e-11, 1.3029e-12, 7.0453e-11, 2.3744e-13,\n 5.6096e-11, 1.4963e-12, 4.0711e-09, 7.5565e-11, 8.1245e-13, 5.4985e-10,\n 1.1342e-09, 2.0277e-13, 1.5924e-09, 1.4472e-10], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-2.0666e-18, -2.0801e-18, -8.1666e-19, -2.1208e-18, -5.2520e-17,\n -7.4694e-19, -6.2530e-18, -9.9171e-18, -6.1521e-17, -6.2079e-17,\n -1.4853e-17, 1.1740e-19, -5.3598e-17, -1.5417e-18, -8.1944e-18,\n -6.6672e-17, -6.5925e-17, 2.5174e-19, 4.6224e-18, -6.6465e-18,\n -5.9742e-17, -9.7847e-18, -7.1277e-18, -3.2322e-17, -1.4494e-17,\n -9.1568e-17, -8.8868e-19, -6.3479e-17, -6.0331e-17, -4.9318e-17,\n -1.3869e-17, -6.9528e-17, -1.3692e-17, -6.7946e-19, 2.3100e-19,\n -3.4472e-18, -2.9521e-17, -1.2920e-17, -3.4849e-17, 3.9646e-19,\n -1.4848e-18, -4.0527e-17, 4.9345e-18, 7.5144e-18, -1.9504e-17,\n 7.9288e-18, -6.8712e-19, -8.6349e-17, -3.7673e-18, -5.9777e-17,\n -9.8781e-19, 3.5409e-19, -6.0033e-17, -4.5869e-18, -4.8589e-18,\n -1.6137e-18, 1.7898e-18, -4.3968e-17, 2.7978e-18, -4.5454e-17,\n -8.8836e-19, 5.4659e-18, -6.7406e-17, 1.8356e-18, -8.6522e-19,\n -1.1244e-17, -3.9895e-17, 6.8577e-20, -2.5664e-17, -7.5545e-19,\n -2.2008e-18, 8.8536e-21, -1.0733e-17, 2.1986e-18, -4.0072e-17,\n 5.7983e-19, -1.0770e-18, 3.2788e-18, 2.4014e-18, -5.8753e-17,\n -3.7210e-18, -2.3844e-18, -5.8969e-18, -1.9477e-18, -5.9234e-17,\n 1.2196e-18, -7.5007e-18, -1.4844e-18, 1.4486e-17, -4.3876e-17,\n -2.1928e-17, 4.9369e-19, -5.7670e-17, -4.0445e-18, -5.5103e-17,\n -2.6692e-17, 1.8239e-19, -5.1180e-17, -4.7071e-17, -4.8615e-17,\n -7.4729e-19, 1.8973e-19, -8.7704e-18, -2.1881e-17, -1.0338e-19,\n -4.1548e-17, -5.5521e-17, -4.4450e-19, -4.6367e-17, 1.1213e-18,\n -3.1158e-17, -4.8491e-18, -2.1564e-18, -2.0315e-18, -4.3175e-18,\n -3.7480e-17, -6.6957e-17, 8.4893e-18, -1.7881e-19, -7.8207e-18,\n -9.4288e-18, -6.7712e-18, -2.6961e-17, -3.7236e-17, -2.7088e-19,\n -2.0140e-17, 7.6790e-18, -5.2554e-20, -7.2714e-19, -3.7688e-18,\n 1.2313e-18, -6.9286e-17, -3.1006e-18, 1.3901e-18, -2.4852e-17,\n -6.8475e-17, -1.2810e-17, -3.8575e-20, -1.1737e-18, -2.2007e-18,\n 1.7355e-18, -3.5925e-17, 5.8450e-18, -1.5629e-18, -1.3011e-19,\n -2.1075e-17, -4.7414e-17, -6.5204e-17, -6.1504e-18, -3.6257e-18,\n -3.0743e-18, -7.3336e-17, 6.3851e-19, 3.6435e-18, -5.0204e-17,\n 1.5549e-18, -5.5330e-17, -1.2936e-17, 3.2546e-18, -1.1605e-17,\n -4.0543e-18, -8.5130e-17, -1.6530e-17, -2.7740e-18, -3.2093e-17,\n -8.6513e-19, -4.3111e-18, -1.1024e-17, -2.1003e-17, -1.3394e-18,\n 2.2119e-18, -5.1211e-17, -2.4056e-18, -8.7279e-19, -4.7429e-17,\n -8.2546e-20, -1.0517e-17, -3.3232e-19, 9.4573e-19, -2.8569e-17,\n -6.3577e-18, -1.0003e-17, -2.0973e-18, -8.7444e-18, -6.2621e-19,\n -2.9077e-18, -5.3859e-18, -1.1632e-17, -1.0397e-19, -3.6040e-18,\n -6.3476e-18, -2.1949e-17, 4.7734e-18, 3.2782e-18, 1.4724e-18,\n -3.6444e-17, -2.9570e-17, -4.0982e-17, -7.1960e-17, -2.4568e-17,\n -1.9970e-18, -2.5499e-17, 1.6561e-18, -8.8957e-17, -4.1990e-17,\n -1.5262e-20, -7.4525e-17, -5.8240e-20, -7.4804e-18, -2.4375e-17,\n -1.2419e-19, -2.0157e-17, -5.7065e-18, 6.1771e-19, -1.1554e-18,\n -2.7072e-18, -3.9572e-18, 6.1385e-18, 5.4839e-18, 8.3773e-19,\n -3.2053e-18, -4.1447e-18, 2.3151e-18, -5.0152e-17, -4.1529e-18,\n -1.0935e-18, -4.5681e-19, -8.8183e-18, -7.7138e-17, -5.2387e-18,\n -1.0299e-18, -1.9272e-17, -3.0187e-19, -8.2749e-18, -2.4807e-18,\n -1.1196e-18, -2.0733e-18, -2.8508e-17, -4.1078e-17, 4.7839e-19,\n 1.0536e-18, -2.5652e-18, -3.9708e-18, -1.8412e-19, -5.9355e-19,\n 1.6160e-18, 1.2361e-18, -1.8580e-18, -5.0572e-17, -2.1479e-17,\n -6.2449e-18, -1.6621e-20, -3.9927e-17, -2.9820e-18, -2.6984e-17,\n -6.4927e-18], device='cuda:0')", + "exp_avg_sq": "tensor([6.6363e-11, 2.0015e-11, 3.5183e-14, 1.1385e-11, 8.4619e-10, 2.9619e-13,\n 5.2005e-13, 7.4399e-11, 1.7277e-09, 9.8789e-10, 1.9994e-11, 2.4170e-10,\n 1.1635e-09, 5.6372e-12, 2.4938e-10, 2.1170e-09, 2.4945e-09, 1.0533e-14,\n 1.6239e-12, 7.2361e-11, 2.0356e-09, 3.9258e-13, 1.1106e-10, 1.1257e-10,\n 1.4397e-10, 4.7205e-09, 2.1215e-11, 3.3876e-10, 4.6063e-10, 5.1672e-10,\n 1.9551e-09, 1.6578e-09, 9.0169e-10, 2.0560e-12, 4.7509e-11, 3.1202e-10,\n 3.2438e-10, 1.0683e-09, 8.1654e-10, 3.8133e-11, 2.2984e-11, 1.7154e-10,\n 8.9401e-12, 2.8187e-13, 3.7501e-10, 2.3718e-09, 4.4571e-12, 2.4692e-09,\n 3.4096e-11, 1.4740e-09, 4.9504e-12, 1.7327e-12, 2.1331e-09, 7.7434e-10,\n 3.7930e-12, 7.3022e-13, 1.6751e-10, 4.8230e-10, 2.4039e-11, 1.8201e-09,\n 1.2630e-12, 3.0044e-12, 2.3644e-09, 2.1922e-11, 1.4192e-11, 6.1861e-10,\n 3.4386e-10, 7.8416e-11, 8.4094e-11, 8.5343e-11, 9.9292e-12, 1.2291e-11,\n 2.3051e-11, 5.2437e-12, 3.0651e-09, 5.0299e-12, 3.8269e-12, 1.2149e-12,\n 1.9240e-10, 1.6204e-09, 6.1490e-13, 5.5421e-12, 7.1569e-11, 9.3122e-12,\n 1.0789e-09, 9.0238e-12, 2.8455e-13, 7.4345e-13, 3.2293e-12, 4.2990e-10,\n 2.9483e-09, 1.2054e-12, 8.1070e-10, 2.4859e-14, 3.2282e-09, 3.2073e-10,\n 7.5564e-11, 8.3450e-10, 5.2051e-10, 8.0590e-10, 5.3931e-12, 1.2642e-11,\n 7.7478e-12, 2.9204e-10, 8.4466e-11, 6.0240e-10, 1.1333e-09, 2.7303e-14,\n 5.5246e-10, 3.9448e-12, 2.5594e-10, 3.9098e-12, 3.7850e-11, 7.2688e-14,\n 8.9381e-12, 4.1528e-10, 1.2386e-09, 7.7157e-12, 1.2507e-12, 3.9111e-12,\n 1.9555e-12, 3.8281e-11, 4.2926e-10, 8.6857e-10, 2.2160e-11, 9.5782e-13,\n 1.9577e-10, 1.6342e-13, 1.3278e-10, 1.0876e-09, 1.3118e-12, 1.7002e-09,\n 3.0836e-12, 2.6449e-11, 2.0935e-10, 5.6279e-10, 3.2981e-10, 4.7640e-12,\n 1.2514e-12, 3.9593e-13, 1.5458e-11, 1.5294e-09, 2.8823e-13, 2.8024e-11,\n 8.2300e-12, 5.2568e-11, 8.1959e-10, 1.8315e-09, 1.5109e-12, 1.6511e-11,\n 1.2523e-10, 1.9346e-09, 1.8335e-11, 4.9707e-13, 1.6564e-09, 1.4873e-10,\n 4.4974e-09, 1.8966e-10, 1.2048e-09, 5.7325e-12, 1.2333e-10, 2.5013e-09,\n 2.9839e-11, 1.0864e-11, 6.2818e-10, 3.7422e-11, 4.8120e-12, 1.2030e-10,\n 6.5588e-11, 6.3169e-14, 2.4977e-11, 3.9556e-10, 5.9825e-11, 1.9520e-10,\n 6.2628e-10, 1.9226e-12, 5.5480e-13, 9.6613e-12, 1.7876e-13, 9.0184e-10,\n 2.7948e-12, 5.9986e-13, 2.3701e-11, 3.7260e-13, 9.0435e-16, 1.7420e-10,\n 2.0428e-11, 1.6805e-10, 3.1507e-11, 8.6208e-10, 7.3594e-11, 2.2379e-10,\n 1.9463e-12, 3.0906e-11, 4.3831e-13, 2.5837e-10, 8.8644e-10, 6.7972e-10,\n 1.4619e-09, 9.7030e-10, 1.9015e-12, 9.3112e-11, 3.6289e-13, 3.0533e-09,\n 4.5856e-10, 1.2681e-13, 1.9363e-09, 5.9700e-15, 2.5778e-11, 1.0305e-10,\n 3.4376e-14, 2.8969e-10, 7.2259e-12, 1.6776e-11, 2.0887e-13, 8.0650e-12,\n 2.2962e-11, 1.2999e-10, 3.4243e-12, 9.4121e-13, 2.5205e-10, 3.6560e-10,\n 3.6463e-13, 2.2136e-09, 2.2015e-13, 1.3003e-10, 3.1790e-12, 6.0589e-13,\n 1.5208e-09, 1.2822e-13, 3.3096e-10, 1.3862e-11, 1.7343e-11, 7.5283e-12,\n 2.5585e-13, 1.5401e-12, 7.2306e-10, 9.1885e-10, 1.1489e-09, 3.8836e-11,\n 7.7814e-12, 1.1876e-09, 1.5844e-11, 3.7233e-13, 2.0133e-11, 6.7850e-14,\n 1.6030e-11, 4.2757e-13, 1.1634e-09, 2.1593e-11, 2.3217e-13, 1.5713e-10,\n 3.2411e-10, 5.7944e-14, 4.5504e-10, 4.1355e-11], device='cuda:0')" }, "44": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-2.9268e-18, -4.6781e-19, -3.2663e-18, ..., -1.6735e-18,\n -2.0955e-18, -1.3180e-18],\n [-1.0474e-18, 2.2833e-19, -1.7349e-19, ..., -2.3491e-19,\n 1.3322e-19, 4.8686e-19],\n [ 5.7793e-21, 4.8371e-20, 2.5898e-19, ..., 8.5907e-19,\n 2.7856e-21, 1.8480e-19],\n ...,\n [ 9.2542e-20, -2.1618e-20, -1.0773e-19, ..., -1.6023e-19,\n -3.0530e-20, -6.2681e-20],\n [ 9.0682e-19, 1.0290e-19, -6.3651e-19, ..., 1.4597e-18,\n 3.1404e-20, 3.2373e-19],\n [ 4.0950e-18, 4.9997e-19, 4.2752e-18, ..., 2.5109e-18,\n 2.1750e-18, 1.6885e-18]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.0576e-11, 3.3579e-12, 6.9322e-12, ..., 1.2979e-11, 1.0104e-11,\n 1.4176e-11],\n [2.3215e-13, 1.2313e-13, 4.4507e-14, ..., 3.6857e-13, 1.3436e-13,\n 3.2798e-13],\n [1.5355e-14, 2.7742e-14, 2.9741e-14, ..., 5.0114e-14, 1.8058e-14,\n 9.1231e-15],\n ...,\n [1.3219e-12, 4.0016e-13, 8.8973e-13, ..., 7.6646e-13, 1.4295e-12,\n 1.8226e-12],\n [4.8577e-14, 6.5294e-14, 2.3323e-13, ..., 3.0428e-13, 5.0912e-13,\n 9.7191e-13],\n [3.1001e-12, 1.5819e-12, 2.2251e-12, ..., 2.6588e-12, 4.2147e-12,\n 5.9083e-12]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-1.8680e-18, -1.0229e-18, -6.0919e-19, ..., 1.5212e-18,\n -1.1186e-18, -3.4532e-19],\n [-6.1113e-20, 8.0913e-20, -3.0100e-20, ..., -1.0081e-17,\n 1.7863e-19, 1.0567e-20],\n [-8.8787e-20, 1.0985e-19, -8.2696e-20, ..., -4.4979e-18,\n 2.6159e-20, -3.0218e-20],\n ...,\n [-2.0106e-19, -3.8720e-20, -4.5941e-20, ..., -1.4454e-19,\n -5.0065e-20, -4.1109e-20],\n [ 1.9236e-19, 1.3789e-19, 1.1166e-19, ..., 2.2489e-19,\n 1.9087e-19, 9.2049e-20],\n [ 1.9226e-18, 1.2366e-18, 6.0980e-19, ..., -3.7421e-18,\n 1.1619e-18, 5.0412e-19]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.0222e-12, 9.5955e-13, 1.9809e-12, ..., 3.7088e-12, 2.8872e-12,\n 4.0510e-12],\n [6.6339e-14, 3.5184e-14, 1.2718e-14, ..., 1.0532e-13, 3.8395e-14,\n 9.3722e-14],\n [4.3879e-15, 7.9276e-15, 8.4986e-15, ..., 1.4320e-14, 5.1601e-15,\n 2.6070e-15],\n ...,\n [3.7774e-13, 1.1435e-13, 2.5425e-13, ..., 2.1902e-13, 4.0848e-13,\n 5.2082e-13],\n [1.3881e-14, 1.8658e-14, 6.6647e-14, ..., 8.6950e-14, 1.4549e-13,\n 2.7773e-13],\n [8.8589e-13, 4.5204e-13, 6.3583e-13, ..., 7.5977e-13, 1.2044e-12,\n 1.6883e-12]], device='cuda:0')" }, "45": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-9.0333e-16, -7.4984e-18, 8.5932e-17, 4.7546e-16, 1.7387e-16,\n 1.6642e-16, -2.0129e-16, 1.4410e-16, -1.5314e-15, -8.2372e-17,\n 4.6547e-17, 7.3387e-17, -4.9757e-16, -8.3795e-16, -1.2764e-16,\n -4.9176e-16, -8.2539e-16, 1.4300e-16, -2.8081e-16, 1.9419e-16,\n 5.4545e-18, 7.5010e-16, 9.2605e-17, 5.8296e-16, -5.1919e-16,\n -1.8822e-17, 2.7475e-16, -9.4241e-16, -2.7313e-16, -1.7185e-16,\n 3.0205e-16, -7.9927e-17, 7.4240e-17, 5.2999e-17, -8.2316e-17,\n 3.6818e-16, -2.2381e-16, 1.1527e-16, 9.4224e-16, -7.4979e-17,\n 3.5324e-16, 4.6625e-17, 4.9357e-16, 2.6070e-16, 1.9890e-16,\n 3.1970e-16, -5.3556e-16, -4.3787e-16, -1.0230e-16, 3.2154e-17,\n -1.8906e-16, 6.4455e-16, -3.7321e-16, -2.6597e-16, -5.8173e-16,\n -5.0948e-18, 4.4512e-16, 2.5066e-16, 2.1753e-16, -1.8771e-16,\n 8.6109e-17, -6.2981e-17, -1.9761e-15, -4.2844e-16, -6.8494e-16,\n 2.3749e-16, 1.8305e-17, -3.1841e-18, -2.4665e-18, -1.2566e-17,\n -7.3046e-16, 4.7123e-16, 2.2130e-16, -6.5255e-17, -1.3408e-15,\n 2.0369e-16, -5.2623e-16, -2.7356e-16, 9.0889e-16, -9.0041e-16,\n -4.8059e-17, 4.3698e-17, 7.8117e-17, 5.3252e-17, 4.7259e-16,\n 3.1009e-16, 1.5258e-17, -9.1818e-17, 1.4248e-15, 2.8302e-16,\n -3.7443e-18, 7.5162e-19, -1.9982e-16, 7.5830e-16, -3.4462e-16,\n -1.2053e-16, -4.5632e-16, 1.4448e-18, 1.6560e-16, -6.1238e-16,\n 1.9519e-16, 8.7494e-16, -1.3860e-15, -5.7478e-17, 4.1140e-16,\n 3.7236e-16, -1.1445e-15, 3.8779e-16, -1.9624e-17, 7.0699e-16,\n -7.6532e-16, 9.0138e-16, -6.3420e-17, 4.9559e-17, -2.0395e-15,\n -1.5707e-16, 2.5318e-17, -3.3463e-16, 3.6303e-16, -9.9096e-16,\n 1.7017e-16, 1.8947e-16, 1.7238e-17, 1.3785e-15, -2.8682e-16,\n -7.9731e-16, -2.0742e-16, 1.6615e-16, 1.7322e-16, 4.1970e-16,\n 2.1453e-16, -1.3122e-16, -1.7856e-16, 1.7668e-16, -1.4664e-16,\n -3.4381e-16, -2.7926e-17, 4.9069e-16, 5.4620e-16, -1.9597e-16,\n 2.6162e-16, -3.4518e-16, 2.8996e-16, -7.2232e-16, -5.5467e-16,\n -5.0771e-17, -8.7226e-17, -7.8857e-16, 7.3162e-16, -1.7730e-16,\n -3.1809e-16, -3.4705e-16, -2.0747e-17, 8.2060e-16, 5.5354e-16,\n -2.3508e-16, -1.6483e-16, -1.5947e-16, 2.7188e-16, -4.9511e-17,\n 3.5030e-16, -8.8121e-16, 1.0269e-16, -5.8298e-16, -5.3277e-18,\n 4.3654e-16, -4.5863e-16, -2.9897e-16, -5.5864e-16, 1.9058e-16,\n 4.9576e-16, 1.2501e-17, -1.0797e-16, -1.4705e-16, -4.2194e-18,\n 2.6160e-17, -2.3824e-17, -8.6331e-17, -3.1103e-16, 2.3562e-16,\n -1.7713e-16, 2.7094e-16, 7.7520e-16, -1.5157e-16, -2.7964e-16,\n -1.4436e-17, 1.8433e-16, 3.1471e-16, 3.1620e-16, -5.2942e-18,\n 8.3536e-16, 1.5427e-17, -1.6641e-16, -1.3720e-15, 8.9641e-16,\n 3.8358e-16, -2.4031e-17, 2.1223e-16, -1.1799e-16, 7.6528e-16,\n 6.6708e-16, -7.5004e-16, -2.3244e-16, -6.0831e-17, 4.8354e-16,\n -1.8466e-16, -4.8317e-16, 8.6102e-16, 1.8837e-17, -1.8469e-16,\n 2.9630e-16, -6.7884e-17, 8.5230e-17, 2.0018e-16, 1.5452e-16,\n -4.7239e-17, -1.3505e-16, 6.5612e-16, 6.8420e-16, 2.5190e-16,\n 6.1801e-16, 5.4421e-16, -2.2365e-16, -6.2098e-16, -6.4529e-17,\n -3.8013e-17, -3.6249e-17, -5.3708e-16, -5.3854e-17, 2.8851e-16,\n -5.8873e-17, -4.8396e-17, 8.7529e-16, -2.9618e-17, 3.3759e-16,\n -3.0917e-17, -1.0885e-17, 5.9180e-16, -1.1227e-17, 2.8932e-17,\n -6.6100e-17, 4.0320e-16, 6.0002e-16, 1.9492e-16, 3.1871e-16,\n 4.5223e-16, -5.8766e-16, 4.5617e-16, -5.8184e-16, -7.2484e-17,\n 3.5714e-16, 6.8047e-16, -1.8403e-16, -8.0171e-17, 3.9316e-17,\n 1.0834e-15], device='cuda:0')", - "exp_avg_sq": "tensor([2.3534e-06, 6.0398e-08, 1.8171e-09, 6.6376e-06, 4.3670e-06, 9.3447e-08,\n 6.8034e-09, 5.8788e-07, 6.3071e-06, 9.5854e-07, 9.7191e-07, 7.1239e-06,\n 6.7453e-06, 8.4370e-08, 2.2513e-08, 2.7103e-08, 3.2070e-08, 3.9434e-08,\n 5.4250e-08, 8.6334e-07, 4.2725e-09, 6.1647e-07, 1.3724e-07, 2.0938e-07,\n 1.8265e-07, 5.8558e-08, 2.6929e-07, 7.6088e-06, 1.1141e-06, 1.5934e-08,\n 2.0088e-05, 6.9332e-09, 4.6653e-06, 1.6470e-07, 1.0284e-06, 1.3568e-05,\n 1.0394e-07, 1.4504e-07, 2.1247e-06, 4.0405e-09, 8.0203e-07, 1.6350e-06,\n 2.8373e-08, 2.2535e-07, 1.6381e-08, 1.0809e-08, 3.4304e-08, 4.5259e-06,\n 6.8873e-09, 1.3663e-06, 8.8282e-07, 7.3629e-07, 6.7019e-07, 1.2544e-07,\n 1.4809e-07, 5.9951e-07, 1.1524e-08, 1.0920e-07, 1.0866e-05, 1.6990e-07,\n 2.1880e-06, 1.2329e-06, 4.1632e-06, 1.4515e-05, 6.4393e-06, 6.3676e-06,\n 4.0901e-10, 4.5777e-09, 3.1116e-08, 1.7765e-06, 1.2532e-05, 1.2924e-06,\n 3.4228e-07, 1.6739e-07, 1.8290e-06, 6.6455e-08, 5.2622e-08, 1.6494e-07,\n 5.0834e-06, 7.5965e-06, 2.0671e-09, 1.4492e-08, 5.1650e-07, 1.8998e-06,\n 4.5965e-06, 2.0246e-07, 6.4570e-10, 2.0122e-09, 9.0800e-08, 1.7437e-06,\n 3.3474e-06, 4.3129e-07, 2.5168e-06, 4.8565e-06, 1.1126e-08, 6.5558e-09,\n 1.1116e-07, 2.4266e-06, 1.2510e-06, 5.4461e-07, 1.4362e-06, 8.0885e-06,\n 4.9808e-06, 1.9136e-07, 1.7857e-06, 1.8574e-06, 2.7299e-07, 2.5748e-07,\n 7.5830e-09, 6.6810e-06, 2.1105e-06, 1.2417e-07, 8.9061e-07, 1.2683e-07,\n 3.2260e-06, 1.0069e-07, 2.2023e-09, 5.9064e-08, 7.0876e-09, 4.8311e-08,\n 4.5182e-09, 1.5487e-07, 1.8856e-08, 7.2576e-06, 2.4742e-08, 1.8780e-06,\n 6.7254e-07, 4.3077e-06, 4.3765e-07, 2.1996e-05, 5.7787e-07, 5.3806e-09,\n 6.2138e-07, 3.5524e-08, 3.4824e-08, 4.7797e-06, 1.0386e-08, 1.2209e-07,\n 1.5871e-07, 5.7171e-09, 1.8158e-06, 1.2930e-07, 6.5482e-09, 3.0119e-07,\n 7.7997e-06, 2.6336e-09, 1.7125e-07, 2.9185e-07, 4.1567e-06, 1.2394e-07,\n 9.8084e-06, 8.1391e-08, 4.3099e-07, 4.6401e-08, 1.0482e-06, 7.7085e-07,\n 3.7838e-08, 2.0624e-06, 1.4079e-05, 1.2322e-07, 1.2213e-06, 1.7565e-06,\n 2.4967e-06, 1.0287e-05, 5.6939e-07, 3.4151e-06, 2.9120e-06, 3.8352e-08,\n 3.7122e-06, 3.4102e-06, 7.7685e-06, 1.5466e-08, 3.0221e-07, 2.3399e-07,\n 5.7911e-08, 4.4389e-07, 3.5141e-09, 8.4366e-08, 3.6265e-07, 2.4222e-07,\n 3.7883e-06, 5.2897e-06, 1.5720e-07, 2.2046e-08, 1.1796e-07, 1.4560e-05,\n 7.9270e-08, 4.8583e-08, 3.9231e-06, 2.7368e-06, 3.9421e-06, 2.7749e-08,\n 4.2324e-09, 6.6558e-06, 3.7073e-07, 1.3447e-06, 1.3164e-07, 1.5053e-06,\n 7.7846e-06, 1.0736e-06, 1.3169e-08, 4.1182e-05, 2.6826e-08, 5.1269e-06,\n 3.7014e-06, 1.5632e-06, 2.9290e-06, 1.7445e-06, 1.3605e-07, 6.2994e-08,\n 5.8031e-06, 2.3797e-09, 1.4173e-06, 2.6863e-09, 3.9150e-07, 3.6600e-08,\n 1.7915e-08, 2.6791e-07, 4.5667e-06, 2.4705e-08, 1.0011e-05, 2.0618e-07,\n 3.2791e-08, 1.1748e-05, 3.1068e-09, 1.3771e-07, 1.4062e-06, 6.3786e-07,\n 1.0532e-07, 9.9260e-08, 1.0050e-08, 8.3351e-07, 2.3104e-07, 5.0427e-08,\n 3.7087e-06, 3.6938e-07, 4.2435e-09, 2.3124e-05, 1.6744e-08, 1.8848e-08,\n 1.3347e-06, 2.3871e-05, 2.0867e-08, 1.6141e-06, 3.1245e-06, 2.0825e-08,\n 9.2202e-06, 1.5618e-06, 3.0075e-06, 1.1265e-09, 3.2090e-07, 1.1992e-05,\n 1.0208e-07, 3.7104e-07, 5.6256e-08, 1.1025e-06], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-7.4525e-16, 4.4397e-17, 1.1884e-17, 1.6656e-16, 1.9573e-16,\n 2.1921e-16, -1.4185e-16, 8.7519e-17, -1.2036e-15, -1.9404e-16,\n -1.4009e-17, 9.3525e-17, -4.3279e-16, -5.7282e-16, -4.8530e-17,\n -4.3272e-16, -5.6255e-16, 1.4999e-16, -2.1644e-16, 1.4030e-16,\n 3.5170e-17, 7.0613e-16, 1.5169e-17, 4.8037e-16, -7.2434e-16,\n -3.7134e-17, 1.4658e-16, -6.0108e-16, -2.3122e-16, -2.5499e-16,\n 2.3613e-16, -1.1963e-16, -7.9445e-17, 2.1147e-17, -1.3956e-17,\n 2.4337e-16, -1.3673e-16, 1.4418e-16, 5.2317e-16, -5.7557e-17,\n 4.1151e-16, 5.3290e-17, 4.1657e-16, 1.8578e-16, 1.8122e-16,\n 3.8674e-16, -4.6482e-16, -3.7498e-16, -1.2684e-16, 1.3610e-16,\n -8.8741e-17, 5.6287e-16, -2.3224e-16, -1.8898e-16, -4.3596e-16,\n 1.2506e-16, 3.9728e-16, 3.7801e-16, -1.1713e-16, -1.7657e-16,\n 1.3679e-16, -6.8618e-17, -1.2462e-15, -4.4483e-16, -6.5863e-16,\n 2.5439e-16, 9.4252e-19, -5.1350e-17, -1.3656e-17, -5.5393e-18,\n -7.1467e-16, 4.3583e-16, 1.7717e-16, -4.7988e-17, -1.1578e-15,\n 8.1972e-17, -4.8052e-16, -2.0002e-16, 7.0346e-16, -4.8560e-16,\n -2.2915e-17, 8.7478e-18, 4.9984e-17, -1.2767e-17, 5.0286e-16,\n 2.1445e-16, -1.0637e-17, -3.9947e-17, 1.1834e-15, -1.7668e-17,\n 7.9586e-17, 4.7677e-17, 1.3377e-17, 7.0385e-16, -1.9841e-16,\n -1.1744e-16, -3.1546e-16, -6.3469e-17, 2.3725e-16, -5.4296e-16,\n 1.2553e-17, 7.6461e-16, -1.0156e-15, -6.8403e-18, 3.8774e-16,\n 3.0327e-16, -8.1287e-16, 3.6999e-16, -7.7349e-17, 7.5422e-16,\n -5.5516e-16, 7.9708e-16, -1.0216e-17, 3.5144e-17, -1.7616e-15,\n -1.0985e-16, 2.2811e-17, -3.0022e-16, 2.0844e-16, -6.9009e-16,\n 9.9196e-17, 7.5294e-17, 5.0937e-17, 1.1649e-15, -2.0505e-16,\n -7.2296e-16, -2.4096e-16, 1.5821e-16, 2.0834e-16, 3.0245e-16,\n 2.4845e-16, -1.1421e-16, -1.5621e-16, 7.9754e-17, -1.2171e-16,\n -5.5838e-17, -2.7620e-17, 4.8703e-16, 3.9396e-16, -2.8692e-16,\n 2.4002e-16, -2.7897e-16, 2.5377e-16, -7.0516e-16, -5.7932e-16,\n -8.0645e-17, -1.1231e-16, -6.9698e-16, 6.0671e-16, -9.9666e-17,\n -3.3000e-16, -3.3025e-16, -4.0259e-18, 6.6799e-16, 2.9937e-16,\n -1.6599e-16, -1.1781e-16, -1.3127e-16, 2.1379e-16, -9.0111e-17,\n 2.6136e-16, -1.0353e-15, -1.6536e-16, -7.0985e-16, -1.2829e-16,\n 3.6229e-16, -4.0902e-16, -2.3188e-16, -4.4775e-16, 1.8365e-16,\n 4.6000e-16, 1.5379e-17, -9.4872e-17, -1.5237e-16, -1.3080e-17,\n 3.1590e-17, -3.4047e-17, -6.8792e-17, -2.3905e-16, 1.5609e-16,\n -1.0576e-16, 2.7385e-16, 6.0907e-16, -5.3138e-18, -2.0906e-16,\n 1.1393e-16, 8.8860e-17, 3.0921e-16, 2.0750e-16, 1.2059e-18,\n 6.3072e-16, 6.4395e-17, -9.5685e-17, -1.1838e-15, 5.7071e-16,\n 5.0689e-16, -6.0613e-18, 1.3330e-16, -4.9862e-17, 6.0782e-16,\n 4.3888e-16, -6.9706e-16, -1.8760e-16, 7.9920e-17, 3.4895e-16,\n -2.7607e-17, -4.0140e-16, 5.9738e-16, -1.1818e-17, -1.0530e-16,\n 2.6349e-16, -4.9802e-17, 9.6071e-17, 1.7252e-16, 1.4077e-16,\n -4.7564e-17, -8.2592e-17, 4.9487e-16, 5.9260e-16, 1.9488e-16,\n 5.2377e-16, 3.4388e-16, -1.6431e-16, -4.7626e-16, -7.4734e-17,\n -4.4776e-17, -5.6050e-17, -4.6662e-16, -3.7145e-18, 1.5768e-16,\n -5.4790e-17, 1.2782e-16, 7.5541e-16, -1.1160e-17, 3.2977e-16,\n -1.2313e-17, -3.7107e-17, 4.3148e-16, -1.8262e-17, -2.6786e-17,\n -3.4473e-17, 3.1833e-16, 4.5247e-16, 1.7680e-16, 2.3674e-16,\n 2.7911e-16, -5.5896e-16, 4.2461e-16, -4.4691e-16, -7.5466e-17,\n 3.6706e-16, 6.0621e-16, 7.4456e-17, -6.0237e-17, 1.0013e-16,\n 9.1022e-16], device='cuda:0')", + "exp_avg_sq": "tensor([6.7251e-07, 1.7259e-08, 5.1924e-10, 1.8967e-06, 1.2479e-06, 2.6703e-08,\n 1.9441e-09, 1.6799e-07, 1.8023e-06, 2.7391e-07, 2.7773e-07, 2.0357e-06,\n 1.9275e-06, 2.4109e-08, 6.4334e-09, 7.7450e-09, 9.1642e-09, 1.1269e-08,\n 1.5502e-08, 2.4671e-07, 1.2209e-09, 1.7616e-07, 3.9218e-08, 5.9832e-08,\n 5.2195e-08, 1.6733e-08, 7.6950e-08, 2.1743e-06, 3.1835e-07, 4.5531e-09,\n 5.7403e-06, 1.9812e-09, 1.3331e-06, 4.7063e-08, 2.9387e-07, 3.8772e-06,\n 2.9702e-08, 4.1445e-08, 6.0714e-07, 1.1546e-09, 2.2919e-07, 4.6722e-07,\n 8.1079e-09, 6.4394e-08, 4.6811e-09, 3.0888e-09, 9.8026e-09, 1.2933e-06,\n 1.9681e-09, 3.9043e-07, 2.5227e-07, 2.1040e-07, 1.9151e-07, 3.5846e-08,\n 4.2319e-08, 1.7132e-07, 3.2932e-09, 3.1204e-08, 3.1049e-06, 4.8549e-08,\n 6.2523e-07, 3.5232e-07, 1.1897e-06, 4.1478e-06, 1.8401e-06, 1.8196e-06,\n 1.1688e-10, 1.3081e-09, 8.8916e-09, 5.0765e-07, 3.5812e-06, 3.6932e-07,\n 9.7809e-08, 4.7832e-08, 5.2265e-07, 1.8990e-08, 1.5037e-08, 4.7134e-08,\n 1.4526e-06, 2.1708e-06, 5.9070e-10, 4.1411e-09, 1.4760e-07, 5.4287e-07,\n 1.3135e-06, 5.7856e-08, 1.8452e-10, 5.7499e-10, 2.5947e-08, 4.9827e-07,\n 9.5656e-07, 1.2325e-07, 7.1921e-07, 1.3878e-06, 3.1794e-09, 1.8734e-09,\n 3.1766e-08, 6.9341e-07, 3.5748e-07, 1.5563e-07, 4.1041e-07, 2.3114e-06,\n 1.4233e-06, 5.4682e-08, 5.1028e-07, 5.3075e-07, 7.8008e-08, 7.3576e-08,\n 2.1669e-09, 1.9091e-06, 6.0308e-07, 3.5483e-08, 2.5450e-07, 3.6244e-08,\n 9.2186e-07, 2.8774e-08, 6.2933e-10, 1.6878e-08, 2.0253e-09, 1.3805e-08,\n 1.2911e-09, 4.4254e-08, 5.3881e-09, 2.0739e-06, 7.0701e-09, 5.3664e-07,\n 1.9218e-07, 1.2309e-06, 1.2506e-07, 6.2857e-06, 1.6513e-07, 1.5375e-09,\n 1.7756e-07, 1.0151e-08, 9.9514e-09, 1.3658e-06, 2.9679e-09, 3.4890e-08,\n 4.5351e-08, 1.6337e-09, 5.1888e-07, 3.6948e-08, 1.8712e-09, 8.6066e-08,\n 2.2288e-06, 7.5257e-10, 4.8935e-08, 8.3400e-08, 1.1878e-06, 3.5416e-08,\n 2.8028e-06, 2.3258e-08, 1.2316e-07, 1.3259e-08, 2.9954e-07, 2.2028e-07,\n 1.0813e-08, 5.8935e-07, 4.0232e-06, 3.5213e-08, 3.4900e-07, 5.0193e-07,\n 7.1345e-07, 2.9396e-06, 1.6271e-07, 9.7588e-07, 8.3213e-07, 1.0959e-08,\n 1.0608e-06, 9.7449e-07, 2.2199e-06, 4.4194e-09, 8.6359e-08, 6.6863e-08,\n 1.6549e-08, 1.2685e-07, 1.0042e-09, 2.4108e-08, 1.0363e-07, 6.9216e-08,\n 1.0825e-06, 1.5116e-06, 4.4921e-08, 6.2998e-09, 3.3709e-08, 4.1606e-06,\n 2.2652e-08, 1.3883e-08, 1.1211e-06, 7.8208e-07, 1.1265e-06, 7.9296e-09,\n 1.2094e-09, 1.9020e-06, 1.0594e-07, 3.8427e-07, 3.7617e-08, 4.3016e-07,\n 2.2245e-06, 3.0680e-07, 3.7632e-09, 1.1768e-05, 7.6658e-09, 1.4651e-06,\n 1.0577e-06, 4.4670e-07, 8.3699e-07, 4.9852e-07, 3.8877e-08, 1.8001e-08,\n 1.6583e-06, 6.8002e-10, 4.0501e-07, 7.6763e-10, 1.1188e-07, 1.0459e-08,\n 5.1192e-09, 7.6557e-08, 1.3050e-06, 7.0597e-09, 2.8608e-06, 5.8918e-08,\n 9.3702e-09, 3.3570e-06, 8.8778e-10, 3.9351e-08, 4.0182e-07, 1.8227e-07,\n 3.0095e-08, 2.8364e-08, 2.8717e-09, 2.3818e-07, 6.6022e-08, 1.4410e-08,\n 1.0598e-06, 1.0555e-07, 1.2126e-09, 6.6078e-06, 4.7847e-09, 5.3859e-09,\n 3.8140e-07, 6.8214e-06, 5.9628e-09, 4.6123e-07, 8.9285e-07, 5.9509e-09,\n 2.6347e-06, 4.4630e-07, 8.5940e-07, 3.2191e-10, 9.1700e-08, 3.4269e-06,\n 2.9171e-08, 1.0603e-07, 1.6076e-08, 3.1506e-07], device='cuda:0')" }, "46": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-1.3798e-17, -8.8736e-18, -1.0756e-19, -5.2209e-17, -5.7255e-17,\n -1.3484e-20, -1.8966e-18, -4.6982e-18, -1.0489e-16, -3.5271e-17,\n -4.4345e-17, 2.1541e-19, -1.4518e-16, -5.1228e-17, 1.7235e-18,\n -1.0894e-17, -7.5518e-18, 5.3262e-20, 2.9441e-18, 7.9293e-20,\n -8.6598e-18, -1.3559e-19, -4.3711e-18, -8.8368e-18, -4.5196e-17,\n -1.3936e-17, -4.0906e-20, -1.4907e-16, -1.0273e-16, -8.9041e-19,\n -2.8096e-18, -8.2897e-18, -3.4719e-17, 7.6193e-20, 8.0346e-19,\n -5.5037e-18, -1.4475e-18, 1.7299e-19, -5.0141e-17, -3.5171e-20,\n -1.0457e-18, -9.4886e-17, -1.3622e-19, -2.8691e-19, 8.8487e-19,\n 5.1050e-19, -2.1485e-17, -6.0631e-17, -1.8719e-19, -5.0708e-17,\n -5.2035e-17, 6.6452e-20, -1.7446e-17, -1.5271e-18, -3.5299e-17,\n -1.2474e-17, -4.3571e-19, -5.3247e-18, -6.8202e-17, -1.0046e-17,\n 5.3692e-19, 1.1999e-18, -8.4007e-17, -6.4713e-17, -9.4186e-17,\n 1.5650e-19, -1.0126e-18, 3.7017e-19, -1.8387e-18, 5.5965e-19,\n -1.2659e-16, -1.0155e-18, -1.3186e-19, -1.7613e-18, -6.0132e-17,\n -3.7718e-17, -1.2143e-17, 1.7078e-18, -5.9340e-19, -8.2763e-17,\n -4.3774e-18, -2.3945e-19, 9.9109e-19, -1.0365e-17, -9.1140e-17,\n -6.0345e-20, -1.7873e-19, 9.7491e-19, -2.3388e-18, -7.7447e-17,\n -1.9452e-17, -6.5500e-20, -5.0635e-17, -2.9073e-19, -4.5336e-18,\n 1.5744e-18, 2.9830e-18, -1.2151e-16, -1.2525e-17, -4.6059e-17,\n -1.6857e-17, -6.3195e-17, -1.3215e-16, -1.3633e-18, -1.4548e-19,\n -3.4249e-17, -4.1348e-17, -1.0371e-19, -4.1533e-18, 5.0983e-19,\n -4.9013e-17, -1.4536e-18, 1.7050e-18, -3.2874e-19, -1.3259e-16,\n -6.5826e-18, 2.0504e-19, 5.0626e-18, 1.4197e-19, -2.4242e-17,\n 3.1192e-19, 1.4034e-19, 2.4275e-20, -5.0752e-17, 3.7931e-19,\n -8.9000e-17, 1.2991e-18, 1.2443e-19, 4.6369e-21, -1.4288e-18,\n 3.6519e-21, -7.0049e-18, -1.7633e-18, 1.8707e-19, -4.9198e-18,\n -1.3545e-16, -7.4022e-19, -1.4488e-18, -1.5235e-18, 2.7633e-18,\n 3.7917e-19, -6.1450e-18, -2.2493e-19, -9.9729e-17, -1.2089e-17,\n -1.3707e-18, -1.0387e-17, -2.5922e-17, -1.4589e-19, -2.0008e-19,\n -1.2063e-17, -1.1241e-17, -6.3286e-19, -2.3883e-18, -9.8809e-17,\n 8.2386e-19, -1.6277e-17, -2.2958e-17, -3.6552e-19, -4.9656e-19,\n -5.9922e-18, -4.8761e-17, -4.0798e-17, -1.4355e-16, -1.2269e-17,\n -1.8242e-20, -1.3286e-16, -2.2048e-17, -5.9539e-17, 4.3229e-19,\n -5.7836e-17, 1.1593e-19, -1.4367e-18, -1.8018e-18, -1.1633e-18,\n -3.4589e-19, -3.0876e-18, -4.8317e-18, 3.7622e-18, -3.1063e-18,\n -2.1605e-17, -8.5072e-18, -1.3677e-18, -2.4794e-18, 1.0300e-18,\n -1.1065e-17, 1.1432e-19, -3.9334e-18, -7.4131e-20, -1.0024e-18,\n -8.4520e-18, -3.1811e-19, 7.2369e-19, -1.1139e-16, 1.0680e-18,\n -5.0806e-17, -4.4497e-18, -6.0676e-17, -1.0357e-16, -1.3822e-17,\n 2.4267e-19, -1.2594e-16, -4.0925e-18, -1.3573e-16, -6.1007e-17,\n 1.2273e-18, -5.7314e-17, -1.0525e-17, -6.8623e-19, -6.1345e-18,\n 8.6593e-20, -3.8402e-18, 3.3605e-19, 1.4080e-19, -1.9032e-19,\n -1.5651e-18, -1.6059e-18, -1.0214e-18, -1.7031e-18, 4.2374e-19,\n -1.4648e-20, -7.4323e-20, -9.2311e-19, -6.1608e-17, -1.4088e-18,\n -6.7649e-18, -5.1990e-17, -2.7505e-18, 1.2500e-18, -5.1762e-18,\n -1.5779e-18, -1.7177e-17, 5.6696e-19, -9.8958e-19, -7.0122e-18,\n -7.2001e-18, 6.3105e-19, -3.2919e-18, -1.3072e-18, -6.0866e-19,\n -2.4323e-18, -2.7213e-18, -1.6417e-18, -2.6716e-18, -3.7934e-19,\n 6.9167e-20, -1.0389e-16, -5.6034e-18, -1.3149e-16, 2.3302e-19,\n 5.0258e-19, -2.9311e-18, -4.3045e-17, -1.6955e-18, -2.9720e-18,\n -1.1235e-18], device='cuda:0')", - "exp_avg_sq": "tensor([1.3103e-09, 1.4907e-12, 1.2982e-12, 4.4721e-09, 3.9034e-09, 3.7011e-12,\n 1.1422e-13, 6.5586e-10, 5.4588e-09, 2.5781e-09, 3.8793e-09, 8.1280e-11,\n 2.2445e-08, 2.0477e-09, 3.1510e-13, 2.6719e-12, 1.6170e-12, 1.3307e-11,\n 3.2899e-14, 2.1682e-10, 6.3840e-11, 8.5004e-11, 6.6928e-10, 3.3100e-11,\n 7.6244e-10, 3.5530e-10, 3.0630e-11, 1.8100e-08, 4.0314e-09, 1.3164e-13,\n 6.3819e-09, 3.5103e-11, 8.9624e-10, 2.6190e-13, 1.1612e-12, 4.3711e-09,\n 6.6749e-14, 7.6420e-11, 5.9752e-09, 1.7534e-12, 4.0454e-10, 6.7691e-09,\n 1.8525e-11, 4.6961e-14, 1.1252e-11, 2.4286e-11, 2.8339e-13, 1.5149e-09,\n 2.0279e-13, 8.5359e-10, 6.3174e-10, 1.0667e-10, 2.5784e-12, 9.8185e-13,\n 3.2644e-11, 3.9509e-10, 7.0805e-13, 3.6060e-11, 1.0586e-08, 2.1080e-12,\n 1.2084e-10, 9.4470e-13, 5.0956e-10, 5.6762e-09, 4.9700e-09, 6.8288e-10,\n 8.3115e-13, 1.0434e-12, 1.7384e-13, 2.2640e-12, 1.5130e-08, 2.7622e-13,\n 1.7310e-11, 1.8065e-13, 2.2367e-10, 2.3039e-09, 5.3920e-13, 2.7320e-13,\n 2.2179e-10, 2.8135e-09, 4.0541e-13, 2.9832e-11, 2.0585e-11, 8.0989e-10,\n 1.5379e-08, 2.3186e-11, 9.4097e-13, 1.5793e-12, 2.4061e-11, 5.4639e-09,\n 2.3953e-11, 3.8949e-12, 1.3746e-09, 2.5582e-10, 3.8537e-11, 1.0393e-11,\n 7.7950e-13, 1.4588e-08, 2.0432e-09, 1.1659e-10, 5.9815e-11, 1.6341e-08,\n 7.2762e-09, 3.2098e-11, 7.0670e-11, 5.7407e-10, 2.2929e-11, 1.0304e-12,\n 2.1344e-13, 7.8602e-10, 1.8318e-10, 2.9944e-11, 1.5367e-11, 1.4841e-11,\n 2.7199e-09, 1.5246e-11, 2.2723e-11, 5.3926e-12, 3.8525e-13, 2.9454e-12,\n 5.2724e-12, 9.9697e-12, 4.4537e-13, 1.8651e-08, 3.3621e-14, 3.5290e-09,\n 2.1784e-12, 1.0527e-10, 3.0708e-11, 6.2036e-09, 1.3284e-14, 2.8331e-11,\n 2.9262e-10, 1.3926e-11, 4.8571e-13, 1.7524e-08, 8.7721e-13, 6.7579e-13,\n 1.0901e-11, 3.0541e-13, 9.0219e-11, 2.3071e-10, 1.4511e-12, 4.3826e-09,\n 4.0343e-09, 5.1902e-13, 2.7783e-10, 9.7859e-13, 7.2488e-10, 7.3799e-14,\n 4.2483e-09, 1.6862e-10, 1.3837e-13, 4.4664e-11, 1.0234e-08, 1.8028e-12,\n 1.3728e-10, 3.5589e-10, 2.5368e-10, 5.9842e-11, 1.6071e-09, 2.0950e-10,\n 8.5230e-10, 2.0382e-08, 6.2202e-12, 8.4881e-11, 1.0134e-08, 1.0434e-09,\n 1.4279e-09, 5.8405e-10, 7.5508e-09, 2.5683e-11, 1.1051e-13, 1.0253e-12,\n 4.0100e-11, 5.9769e-13, 1.9897e-11, 8.0226e-11, 2.8425e-13, 2.8359e-12,\n 3.2054e-09, 2.2981e-09, 1.1657e-11, 5.8789e-13, 1.2558e-13, 8.2728e-09,\n 6.4251e-12, 1.5671e-10, 2.6732e-10, 1.6528e-12, 2.4146e-10, 3.2103e-13,\n 1.1067e-12, 3.3063e-09, 1.2403e-10, 2.6765e-09, 6.0210e-11, 2.7626e-09,\n 1.9894e-08, 5.9230e-10, 1.0551e-12, 3.9312e-08, 7.4571e-14, 2.0599e-08,\n 5.4099e-09, 1.2931e-12, 9.4908e-10, 4.2159e-10, 3.6120e-11, 1.3271e-10,\n 3.5843e-10, 5.3839e-13, 1.4408e-11, 1.0814e-12, 1.1842e-11, 4.8496e-11,\n 7.7790e-14, 3.9005e-11, 1.8330e-10, 3.7843e-13, 1.5009e-09, 7.5055e-11,\n 5.8577e-14, 2.5090e-09, 8.1566e-13, 9.0907e-13, 7.5714e-10, 1.6540e-10,\n 1.0319e-12, 2.9233e-10, 5.1281e-13, 3.7119e-11, 8.3826e-11, 6.1058e-11,\n 8.4101e-10, 1.5313e-10, 1.4639e-12, 1.0456e-08, 1.9278e-11, 1.1600e-13,\n 4.4059e-10, 8.1053e-09, 1.7723e-11, 4.3751e-10, 7.7564e-11, 1.1468e-11,\n 9.3498e-09, 6.3513e-11, 1.0848e-08, 3.7796e-13, 5.7801e-11, 4.8433e-09,\n 2.2780e-09, 2.8374e-12, 2.0625e-12, 5.8104e-10], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-1.3957e-17, -5.9059e-18, 1.6436e-19, -4.8358e-17, -4.7501e-17,\n 4.7221e-20, -1.6412e-18, -5.1603e-18, -8.6391e-17, -3.4692e-17,\n -3.7553e-17, 2.1297e-20, -1.2523e-16, -3.8356e-17, 9.9426e-19,\n -1.0057e-17, -7.0092e-18, 3.1624e-20, 2.2508e-18, -7.1314e-19,\n -8.7918e-18, -1.1726e-19, -5.6431e-18, -4.9517e-18, -4.5532e-17,\n -1.3555e-17, 7.5269e-19, -1.1042e-16, -7.9496e-17, -1.0041e-18,\n -3.4931e-18, -6.0849e-18, -3.2525e-17, 1.8341e-19, 7.3855e-19,\n -5.8827e-18, -1.0350e-18, -1.0617e-19, -4.9887e-17, 1.1954e-19,\n -1.1179e-18, -7.6151e-17, -3.1241e-19, -1.1953e-19, 5.4390e-19,\n -1.3554e-18, -1.6664e-17, -4.9756e-17, -1.4315e-19, -3.5961e-17,\n -4.0049e-17, -1.3789e-19, -1.5853e-17, -1.4858e-18, -2.6571e-17,\n -7.4903e-18, -3.8970e-19, -4.4966e-18, -6.6028e-17, -1.0907e-17,\n 5.8602e-19, 1.0019e-18, -5.9864e-17, -6.0147e-17, -7.5805e-17,\n -2.9037e-20, -7.5624e-19, 4.5714e-19, -6.5446e-19, 6.3437e-19,\n -1.0731e-16, -6.5494e-19, -6.5224e-20, -1.2983e-18, -5.4637e-17,\n -3.1487e-17, -1.2063e-17, 1.0997e-18, -1.1708e-18, -5.8729e-17,\n -2.2343e-18, -7.4093e-19, 3.4768e-19, -8.6220e-18, -7.3586e-17,\n -5.3491e-20, -7.1542e-19, 3.2813e-19, -1.1573e-18, -7.4021e-17,\n -1.6840e-17, 2.5176e-19, -3.3309e-17, 2.4084e-19, -4.3316e-18,\n 2.4619e-19, 2.5710e-18, -1.0393e-16, -1.0761e-17, -3.5535e-17,\n -1.9214e-17, -5.3846e-17, -1.0353e-16, -1.6963e-18, -1.2276e-19,\n -2.8135e-17, -3.5588e-17, 2.1403e-20, -2.5898e-18, 5.0803e-19,\n -3.5699e-17, -9.2096e-20, 1.9010e-19, -2.7414e-19, -1.0335e-16,\n -7.5559e-18, -2.8717e-19, 4.6533e-18, 1.2450e-19, -1.7678e-17,\n 3.7730e-19, -5.8174e-20, 4.7324e-21, -3.9666e-17, -1.0891e-19,\n -7.5377e-17, 1.7559e-18, 3.2593e-20, -1.4237e-20, -2.0078e-18,\n -2.3144e-19, -5.3346e-18, -2.4362e-18, 2.2047e-19, -3.8410e-18,\n -1.0238e-16, -7.2665e-19, -6.8814e-19, -1.2316e-19, 3.6930e-18,\n 1.3836e-19, -5.4681e-18, -2.0528e-19, -8.2599e-17, -1.7012e-17,\n -1.9811e-18, -8.3052e-18, -2.4358e-17, 2.4260e-20, -8.7009e-21,\n -1.3615e-17, -8.8083e-18, -7.8093e-19, -1.8434e-18, -8.4891e-17,\n 5.2466e-19, -1.2481e-17, -1.9969e-17, -8.3997e-20, -4.4442e-19,\n -7.7875e-18, -4.5550e-17, -4.0297e-17, -1.1824e-16, -1.1471e-17,\n 3.0052e-20, -1.0726e-16, -1.7234e-17, -5.2001e-17, -9.2836e-19,\n -4.5686e-17, -3.9499e-19, -7.6025e-19, -1.7073e-18, -1.3306e-18,\n -1.9413e-19, -3.0258e-18, -3.2281e-18, 3.2145e-18, -1.5350e-18,\n -1.4765e-17, -8.7202e-18, -3.2378e-19, -3.1860e-19, 6.9192e-19,\n -1.1041e-17, 2.5736e-20, -1.5723e-18, -1.3202e-19, -9.7573e-19,\n -9.9037e-18, -1.1525e-19, 3.3668e-19, -8.0217e-17, -2.7549e-19,\n -3.9677e-17, -2.7835e-18, -4.8490e-17, -8.7624e-17, -1.2774e-17,\n -3.3527e-19, -1.0941e-16, -2.3494e-18, -1.0393e-16, -5.3112e-17,\n 1.0967e-18, -4.6057e-17, -1.0828e-17, -6.8429e-19, -6.0262e-18,\n 9.0023e-20, -2.5247e-18, 1.8831e-19, 7.0244e-20, -5.3867e-19,\n -1.7696e-18, -9.8366e-19, -2.3591e-19, -7.4799e-19, 2.4696e-19,\n 2.2567e-19, -1.9094e-19, -1.1233e-18, -5.2289e-17, -1.8626e-18,\n -4.2530e-18, -4.0872e-17, -4.6357e-18, 4.2134e-19, -4.2778e-18,\n -1.5394e-18, -1.1293e-17, 6.0532e-19, -1.3176e-18, -7.4995e-18,\n -7.2566e-18, 6.4761e-19, -3.4525e-18, -1.2351e-18, -6.2744e-19,\n -1.5281e-18, -3.0035e-18, -1.1737e-18, -1.6675e-18, -9.5587e-20,\n -1.1247e-19, -8.9369e-17, -3.4771e-18, -1.0891e-16, -6.2968e-19,\n -3.1041e-20, -3.1284e-18, -3.6520e-17, -1.3669e-19, -3.2963e-18,\n -2.0902e-18], device='cuda:0')", + "exp_avg_sq": "tensor([3.7442e-10, 4.2599e-13, 3.7096e-13, 1.2779e-09, 1.1154e-09, 1.0576e-12,\n 3.2639e-14, 1.8742e-10, 1.5599e-09, 7.3672e-10, 1.1086e-09, 2.3226e-11,\n 6.4137e-09, 5.8514e-10, 9.0042e-14, 7.6350e-13, 4.6206e-13, 3.8026e-12,\n 9.4010e-15, 6.1958e-11, 1.8243e-11, 2.4291e-11, 1.9125e-10, 9.4585e-12,\n 2.1787e-10, 1.0153e-10, 8.7529e-12, 5.1723e-09, 1.1520e-09, 3.7616e-14,\n 1.8237e-09, 1.0031e-11, 2.5611e-10, 7.4840e-14, 3.3183e-13, 1.2491e-09,\n 1.9074e-14, 2.1838e-11, 1.7075e-09, 5.0106e-13, 1.1560e-10, 1.9343e-09,\n 5.2937e-12, 1.3420e-14, 3.2153e-12, 6.9398e-12, 8.0980e-14, 4.3289e-10,\n 5.7950e-14, 2.4392e-10, 1.8053e-10, 3.0481e-11, 7.3679e-13, 2.8057e-13,\n 9.3284e-12, 1.1290e-10, 2.0233e-13, 1.0304e-11, 3.0251e-09, 6.0238e-13,\n 3.4530e-11, 2.6995e-13, 1.4561e-10, 1.6220e-09, 1.4202e-09, 1.9514e-10,\n 2.3751e-13, 2.9816e-13, 4.9677e-14, 6.4695e-13, 4.3236e-09, 7.8932e-14,\n 4.9465e-12, 5.1621e-14, 6.3916e-11, 6.5835e-10, 1.5408e-13, 7.8071e-14,\n 6.3379e-11, 8.0399e-10, 1.1585e-13, 8.5246e-12, 5.8824e-12, 2.3143e-10,\n 4.3948e-09, 6.6255e-12, 2.6889e-13, 4.5130e-13, 6.8757e-12, 1.5613e-09,\n 6.8448e-12, 1.1130e-12, 3.9282e-10, 7.3101e-11, 1.1012e-11, 2.9698e-12,\n 2.2275e-13, 4.1687e-09, 5.8386e-10, 3.3316e-11, 1.7093e-11, 4.6697e-09,\n 2.0792e-09, 9.1721e-12, 2.0194e-11, 1.6404e-10, 6.5521e-12, 2.9444e-13,\n 6.0993e-14, 2.2461e-10, 5.2346e-11, 8.5566e-12, 4.3914e-12, 4.2409e-12,\n 7.7722e-10, 4.3567e-12, 6.4932e-12, 1.5410e-12, 1.1009e-13, 8.4166e-13,\n 1.5066e-12, 2.8489e-12, 1.2727e-13, 5.3297e-09, 9.6075e-15, 1.0085e-09,\n 6.2249e-13, 3.0083e-11, 8.7749e-12, 1.7727e-09, 3.7959e-15, 8.0959e-12,\n 8.3618e-11, 3.9795e-12, 1.3880e-13, 5.0076e-09, 2.5067e-13, 1.9311e-13,\n 3.1150e-12, 8.7273e-14, 2.5781e-11, 6.5928e-11, 4.1467e-13, 1.2524e-09,\n 1.1528e-09, 1.4831e-13, 7.9392e-11, 2.7964e-13, 2.0714e-10, 2.1089e-14,\n 1.2140e-09, 4.8183e-11, 3.9539e-14, 1.2763e-11, 2.9245e-09, 5.1516e-13,\n 3.9228e-11, 1.0170e-10, 7.2491e-11, 1.7100e-11, 4.5923e-10, 5.9866e-11,\n 2.4355e-10, 5.8242e-09, 1.7775e-12, 2.4255e-11, 2.8959e-09, 2.9815e-10,\n 4.0802e-10, 1.6690e-10, 2.1577e-09, 7.3392e-12, 3.1578e-14, 2.9300e-13,\n 1.1459e-11, 1.7079e-13, 5.6856e-12, 2.2925e-11, 8.1227e-14, 8.1039e-13,\n 9.1597e-10, 6.5671e-10, 3.3312e-12, 1.6800e-13, 3.5886e-14, 2.3640e-09,\n 1.8360e-12, 4.4781e-11, 7.6388e-11, 4.7229e-13, 6.8998e-11, 9.1737e-14,\n 3.1626e-13, 9.4479e-10, 3.5441e-11, 7.6483e-10, 1.7205e-11, 7.8942e-10,\n 5.6847e-09, 1.6925e-10, 3.0150e-13, 1.1234e-08, 2.1309e-14, 5.8864e-09,\n 1.5459e-09, 3.6951e-13, 2.7121e-10, 1.2047e-10, 1.0322e-11, 3.7924e-11,\n 1.0242e-10, 1.5385e-13, 4.1173e-12, 3.0901e-13, 3.3839e-12, 1.3858e-11,\n 2.2229e-14, 1.1146e-11, 5.2379e-11, 1.0814e-13, 4.2888e-10, 2.1448e-11,\n 1.6739e-14, 7.1698e-10, 2.3308e-13, 2.5977e-13, 2.1636e-10, 4.7266e-11,\n 2.9488e-13, 8.3535e-11, 1.4654e-13, 1.0607e-11, 2.3954e-11, 1.7448e-11,\n 2.4033e-10, 4.3759e-11, 4.1831e-13, 2.9880e-09, 5.5087e-12, 3.3149e-14,\n 1.2590e-10, 2.3162e-09, 5.0645e-12, 1.2502e-10, 2.2165e-11, 3.2769e-12,\n 2.6718e-09, 1.8149e-11, 3.1000e-09, 1.0800e-13, 1.6517e-11, 1.3840e-09,\n 6.5096e-10, 8.1082e-13, 5.8938e-13, 1.6604e-10], device='cuda:0')" }, "47": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-4.2537e-17, -2.2950e-17, 5.5445e-19, -3.5945e-17, -4.1572e-17,\n 2.1945e-20, 1.4566e-18, -1.7414e-17, -8.1574e-17, -3.7100e-17,\n -3.9559e-17, -7.2096e-20, -7.1407e-17, -5.5120e-17, -1.0650e-17,\n -3.3810e-17, -3.7863e-17, -4.2791e-19, -2.3168e-18, -8.4479e-18,\n 6.7760e-18, -1.4446e-19, -1.6541e-17, -1.4082e-17, -4.7390e-17,\n 9.5652e-18, 4.2497e-18, -7.9741e-17, -5.9571e-17, 5.7482e-19,\n -1.2673e-17, 6.3853e-18, -3.4980e-17, 1.9647e-19, -3.0664e-19,\n -1.3000e-17, 1.0409e-18, -5.7816e-18, -2.9604e-17, 1.0458e-19,\n -7.1504e-18, -5.3148e-17, 2.4180e-18, 1.1726e-18, -5.8420e-18,\n 5.5060e-18, -4.0083e-17, -5.1722e-17, 1.2036e-19, -4.1346e-17,\n -4.5077e-17, 4.8991e-18, -3.5463e-17, 1.1601e-18, -4.6173e-17,\n -2.5556e-17, 3.7420e-18, -1.6162e-17, -4.2837e-17, -2.7704e-17,\n -3.8666e-18, -4.8226e-19, -8.5270e-17, -5.2782e-17, -6.4087e-17,\n -3.8765e-18, -1.9523e-19, -7.3144e-21, 4.1022e-19, -7.2372e-20,\n -7.1477e-17, 5.7308e-18, -7.3711e-18, 1.4072e-18, -6.7765e-17,\n -3.4471e-17, -3.5839e-17, -1.3058e-18, -2.0627e-18, -6.5298e-17,\n 3.4102e-18, -9.1138e-19, -5.3058e-18, -2.2381e-17, -4.6982e-17,\n -2.2470e-18, -9.8511e-19, -7.2078e-19, 2.5475e-17, -4.5081e-17,\n -2.9685e-17, -2.0463e-18, -4.5124e-17, 4.2287e-19, 2.3457e-18,\n -1.3018e-17, -2.3193e-18, -5.9356e-17, -2.3693e-17, -5.0289e-17,\n -2.4509e-17, -3.3992e-17, -8.3733e-17, -1.6317e-18, 6.6180e-19,\n -3.0706e-17, -5.8331e-17, -2.2699e-19, 1.1894e-18, 2.6087e-18,\n -5.3692e-17, 1.3792e-17, -7.4945e-18, -4.4128e-19, -9.5005e-17,\n -2.5025e-17, -2.6121e-18, -4.1675e-18, 1.2623e-18, -5.1007e-17,\n -1.6339e-18, -1.3827e-18, -1.6543e-18, -2.5085e-17, -2.7170e-19,\n -6.4904e-17, -8.7640e-19, -7.0192e-19, 1.3059e-18, -7.8945e-18,\n 1.3139e-18, 5.3413e-18, -1.7943e-17, -5.6527e-18, 3.7365e-18,\n -6.7491e-17, -1.7693e-18, 6.7058e-18, 7.8371e-18, -1.1551e-18,\n -1.9222e-18, 4.2361e-18, 1.1839e-18, -6.6151e-17, -3.4750e-17,\n -3.5208e-19, 7.6511e-18, -4.6762e-17, -3.4780e-19, 8.1613e-20,\n -2.9878e-17, 7.1867e-18, -4.9779e-19, 1.3006e-17, -4.7167e-17,\n -6.8111e-19, 1.0625e-17, -3.3774e-17, 1.6606e-18, 2.3036e-19,\n -1.4678e-17, -5.5710e-17, -3.6502e-17, -7.2484e-17, -2.5246e-17,\n -2.2044e-18, -6.8325e-17, -3.6102e-17, -5.3029e-17, -8.4927e-18,\n -3.7414e-17, -2.4594e-18, 1.0804e-18, 1.0424e-18, -3.6345e-19,\n 3.5684e-19, 2.1867e-18, 3.5339e-18, -2.6849e-18, -1.3644e-17,\n -3.3038e-17, -1.7412e-17, 1.1481e-17, -1.9004e-17, -7.7885e-19,\n -2.3305e-17, -1.0565e-18, -1.2203e-17, -3.7409e-18, 3.1700e-19,\n -1.1465e-17, -6.1525e-19, -6.4019e-19, -7.9170e-17, 1.2506e-17,\n -3.7319e-17, 2.5221e-18, -4.1622e-17, -5.7565e-17, -1.5944e-17,\n 5.9875e-18, -7.1784e-17, 3.0538e-18, -6.2940e-17, -3.8352e-17,\n -4.6257e-19, -5.1259e-17, -1.2173e-17, -4.6908e-19, 4.6017e-18,\n -2.1744e-18, 3.0287e-18, -1.6057e-18, -2.0224e-18, -9.0187e-18,\n 3.7689e-19, 8.8511e-19, 9.8037e-18, 9.2650e-18, -2.7969e-18,\n 1.1361e-18, 1.7931e-18, 6.1074e-19, -5.5278e-17, 9.0194e-19,\n 5.3126e-18, -4.2769e-17, -2.9292e-17, -6.9982e-18, -1.4874e-17,\n 1.1780e-18, -2.9223e-17, 9.8014e-18, -1.7839e-18, -1.5899e-17,\n -2.2592e-17, -3.1644e-19, -8.2870e-18, 4.7895e-19, -1.4240e-18,\n -1.5479e-17, -8.5096e-18, 7.9461e-18, -1.3097e-17, 3.0616e-18,\n 1.9930e-19, -6.4861e-17, -1.2658e-17, -7.0350e-17, -7.7369e-18,\n -3.0458e-18, -6.8041e-18, -4.3176e-17, -3.4826e-18, -1.8138e-17,\n -3.2036e-18], device='cuda:0')", - "exp_avg_sq": "tensor([1.4464e-09, 1.3637e-10, 7.1158e-13, 5.6730e-09, 3.9182e-09, 2.1165e-12,\n 3.6777e-13, 5.3760e-10, 6.3827e-09, 2.1993e-09, 2.4787e-09, 1.9998e-09,\n 8.4505e-09, 1.2865e-09, 6.3726e-11, 9.4418e-11, 6.8872e-11, 6.8340e-12,\n 1.4041e-12, 3.7893e-10, 3.4473e-11, 1.6046e-10, 6.6087e-10, 4.0394e-10,\n 1.2193e-09, 1.2914e-10, 1.5565e-11, 8.6164e-09, 2.8718e-09, 2.8441e-12,\n 5.8463e-09, 2.0322e-11, 3.1090e-09, 5.6569e-12, 2.1606e-10, 2.7059e-09,\n 8.1710e-12, 3.7124e-11, 2.6985e-09, 5.0569e-13, 4.9532e-10, 3.7735e-09,\n 9.4804e-12, 1.1298e-11, 6.0338e-12, 6.4763e-12, 2.7562e-10, 4.2965e-09,\n 2.2749e-14, 2.1263e-09, 1.4363e-09, 4.8591e-10, 7.2805e-10, 3.3539e-11,\n 4.9273e-10, 3.3157e-10, 3.0577e-13, 3.2433e-10, 8.0495e-09, 2.7740e-10,\n 5.6292e-10, 2.3335e-10, 3.4538e-09, 9.8756e-09, 5.9803e-09, 1.7270e-09,\n 2.8860e-13, 1.3993e-12, 4.6078e-11, 4.1273e-10, 1.0059e-08, 1.1791e-10,\n 9.8178e-11, 2.7600e-11, 1.8748e-09, 1.2511e-09, 1.8962e-10, 9.9627e-12,\n 1.6976e-10, 5.8300e-09, 3.7089e-13, 1.6403e-11, 2.7523e-10, 4.7051e-10,\n 6.0408e-09, 1.0402e-11, 3.2433e-13, 2.1021e-13, 1.2683e-11, 3.3590e-09,\n 2.1324e-09, 2.4392e-10, 3.1852e-09, 2.4723e-10, 1.3140e-11, 4.9290e-12,\n 2.7026e-11, 4.8966e-09, 1.2071e-09, 1.0582e-09, 1.1717e-09, 5.9215e-09,\n 5.5715e-09, 1.2619e-11, 2.2416e-10, 1.8864e-09, 6.6226e-10, 8.3352e-11,\n 2.6162e-11, 8.1774e-10, 1.8384e-09, 1.5999e-11, 1.4124e-10, 7.4401e-12,\n 3.9902e-09, 4.5287e-10, 1.2186e-11, 1.3921e-11, 1.4131e-13, 1.8293e-10,\n 2.6958e-12, 4.3678e-12, 2.6180e-11, 6.7634e-09, 2.5270e-13, 3.4375e-09,\n 1.2617e-10, 9.3146e-10, 1.3129e-11, 5.6935e-09, 1.2511e-11, 1.5732e-11,\n 6.5276e-10, 6.9259e-12, 1.0315e-11, 6.7340e-09, 7.2594e-11, 3.3606e-11,\n 6.0969e-12, 5.0168e-14, 3.4362e-10, 1.1428e-10, 7.6844e-13, 2.2586e-09,\n 2.6106e-09, 3.5739e-12, 9.9813e-11, 4.6372e-10, 7.6335e-10, 2.9740e-12,\n 2.6498e-09, 9.5112e-11, 2.2893e-11, 1.9405e-11, 3.6733e-09, 1.5906e-10,\n 7.1217e-11, 7.2010e-10, 3.8137e-09, 2.6106e-11, 1.0836e-09, 2.0357e-09,\n 2.2629e-09, 9.8628e-09, 4.5141e-10, 2.1763e-10, 4.6140e-09, 9.6683e-10,\n 2.9757e-09, 8.7646e-10, 6.4474e-09, 1.3975e-11, 2.6625e-11, 4.4614e-11,\n 2.0906e-11, 4.5205e-11, 1.0179e-11, 3.8977e-11, 4.2350e-11, 1.7244e-10,\n 1.6433e-09, 1.0159e-09, 6.4166e-12, 2.0635e-10, 5.7294e-12, 4.0318e-09,\n 3.2782e-12, 4.0079e-10, 5.7766e-10, 8.0813e-10, 1.4010e-10, 1.4304e-11,\n 2.9260e-13, 5.8536e-09, 5.9036e-11, 2.1039e-09, 2.5198e-11, 1.9667e-09,\n 8.8871e-09, 9.3255e-10, 4.9260e-13, 2.3120e-08, 2.1340e-12, 6.8452e-09,\n 3.8846e-09, 2.9623e-10, 3.0422e-09, 2.0274e-10, 1.6287e-11, 6.4423e-11,\n 1.1139e-09, 1.7093e-13, 1.7923e-10, 5.6400e-13, 8.7147e-11, 2.4793e-11,\n 2.4364e-12, 1.9550e-11, 8.7121e-10, 2.2881e-13, 2.3246e-09, 3.5683e-11,\n 3.1485e-12, 7.4576e-09, 6.8498e-14, 4.7114e-11, 1.9337e-09, 7.5694e-10,\n 1.8394e-10, 6.7992e-10, 2.5295e-12, 6.5932e-10, 4.2995e-11, 2.7375e-11,\n 4.2211e-10, 2.7329e-10, 1.3082e-12, 6.3863e-09, 1.1153e-11, 8.2909e-12,\n 6.5374e-10, 6.2262e-09, 7.7816e-12, 6.9209e-10, 6.7401e-10, 5.7784e-12,\n 7.2789e-09, 1.0670e-10, 5.2382e-09, 3.1596e-11, 1.6391e-10, 2.2325e-09,\n 1.5407e-09, 4.9270e-11, 1.0647e-10, 3.0238e-10], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-3.6476e-17, -1.6466e-17, 5.4139e-20, -3.4556e-17, -3.3740e-17,\n 3.3898e-19, 1.2261e-18, -1.5657e-17, -6.5451e-17, -3.5285e-17,\n -3.3930e-17, -2.1235e-19, -6.0429e-17, -4.2489e-17, -7.0960e-18,\n -2.8675e-17, -2.8208e-17, -4.9240e-19, -1.5608e-18, -8.4820e-18,\n 6.8008e-18, -7.1971e-19, -1.6723e-17, -1.0186e-17, -4.7717e-17,\n 9.3192e-18, 2.2418e-18, -6.0526e-17, -4.8138e-17, 7.4813e-19,\n -1.1171e-17, 4.3642e-18, -3.2569e-17, 4.0855e-20, -9.1668e-20,\n -1.3371e-17, 7.4867e-19, -4.1151e-18, -3.0216e-17, -3.8894e-20,\n -5.8286e-18, -4.3513e-17, 2.1198e-18, 6.4259e-19, -3.5191e-18,\n 6.1509e-18, -3.2618e-17, -4.2883e-17, 1.0440e-19, -3.0828e-17,\n -3.5334e-17, 4.0012e-18, -2.8345e-17, 1.1694e-18, -3.6422e-17,\n -1.7180e-17, 3.0911e-18, -1.0507e-17, -4.3350e-17, -2.4353e-17,\n -3.7229e-18, -5.6834e-19, -6.0467e-17, -4.6915e-17, -5.4040e-17,\n -4.0499e-18, -5.6997e-19, -1.6733e-19, -1.3129e-18, -8.9838e-20,\n -6.1280e-17, 5.3030e-18, -6.7322e-18, 5.4016e-19, -5.7644e-17,\n -3.0193e-17, -3.0901e-17, -7.6915e-19, -2.6261e-18, -4.7094e-17,\n 1.7166e-18, -6.9336e-19, -4.5761e-18, -1.9754e-17, -3.6849e-17,\n -1.0439e-18, -2.9956e-19, -4.6124e-20, 2.0648e-17, -4.4022e-17,\n -2.3538e-17, -2.7899e-18, -3.1608e-17, 9.8998e-20, 2.1857e-18,\n -1.0641e-17, -1.9839e-18, -5.1129e-17, -1.8093e-17, -4.1553e-17,\n -2.5296e-17, -2.8648e-17, -6.5668e-17, 4.6329e-19, 3.5252e-19,\n -2.5348e-17, -4.6374e-17, -9.2766e-19, -1.6352e-18, 2.5649e-18,\n -4.1667e-17, 1.1974e-17, -3.7626e-18, -6.6198e-19, -7.8609e-17,\n -2.1479e-17, -1.7008e-18, -3.7424e-18, -2.0489e-19, -3.8068e-17,\n -1.9121e-18, -1.3878e-18, -1.2870e-18, -2.0240e-17, 8.5807e-20,\n -5.4853e-17, -1.3252e-18, -7.3642e-19, 1.4444e-18, -8.4801e-18,\n 1.6656e-18, 4.0177e-18, -1.7751e-17, -5.6768e-18, 2.8901e-18,\n -5.0642e-17, -1.5775e-18, 6.5229e-18, 5.3709e-18, -2.7376e-18,\n -1.4225e-18, 3.8187e-18, 1.0928e-18, -5.6292e-17, -3.3909e-17,\n -2.7178e-19, 6.0084e-18, -4.0057e-17, -3.5852e-19, -1.0017e-20,\n -2.8685e-17, 5.5832e-18, 1.9348e-19, 1.0241e-17, -4.1920e-17,\n -4.9432e-19, 8.3713e-18, -2.8483e-17, 1.3039e-18, 1.6735e-19,\n -1.5300e-17, -5.3150e-17, -3.6395e-17, -6.3237e-17, -2.3862e-17,\n -1.9799e-18, -5.6852e-17, -2.8602e-17, -4.4574e-17, -8.5651e-18,\n -2.9770e-17, -1.5730e-18, 4.5086e-19, 1.1836e-18, 5.2929e-19,\n -2.8333e-19, 1.7810e-18, 2.0902e-18, -2.2650e-18, -1.0710e-17,\n -2.5389e-17, -1.5337e-17, 8.6835e-18, -1.1064e-17, -5.1083e-19,\n -1.9456e-17, -1.0087e-18, -7.7762e-18, -4.5614e-18, 9.2468e-20,\n -1.1846e-17, -8.7076e-19, -1.9171e-19, -6.4044e-17, 7.0822e-18,\n -2.7291e-17, 1.0859e-18, -3.4704e-17, -4.7498e-17, -1.4197e-17,\n 3.2111e-18, -6.1502e-17, 1.7748e-18, -4.8964e-17, -3.3320e-17,\n -8.3502e-20, -4.2200e-17, -1.3106e-17, -3.5626e-19, 4.5453e-18,\n -2.7722e-18, 1.5520e-18, -1.6453e-18, -5.0635e-19, -8.8606e-18,\n 8.7843e-19, 5.0292e-19, 6.5635e-18, 7.8836e-18, -2.5708e-18,\n 5.9814e-19, 7.5765e-19, 8.0655e-19, -4.5337e-17, 1.4644e-18,\n 3.3389e-18, -3.5283e-17, -2.5438e-17, -5.7982e-18, -1.4098e-17,\n 1.1586e-18, -1.9972e-17, 9.0756e-18, -6.3552e-19, -1.3741e-17,\n -1.8646e-17, -3.3972e-19, -9.1350e-18, 5.6479e-19, 1.7761e-19,\n -1.3920e-17, -9.3858e-18, 5.9527e-18, -9.5806e-18, 2.4971e-18,\n -1.1285e-18, -5.5620e-17, -9.0198e-18, -5.7664e-17, -7.7417e-18,\n -1.3395e-18, -6.2122e-18, -3.1869e-17, -4.3430e-18, -1.3002e-17,\n -3.4499e-18], device='cuda:0')", + "exp_avg_sq": "tensor([4.1331e-10, 3.8968e-11, 2.0334e-13, 1.6211e-09, 1.1197e-09, 6.0479e-13,\n 1.0509e-13, 1.5362e-10, 1.8239e-09, 6.2847e-10, 7.0831e-10, 5.7145e-10,\n 2.4148e-09, 3.6762e-10, 1.8210e-11, 2.6981e-11, 1.9681e-11, 1.9529e-12,\n 4.0124e-13, 1.0828e-10, 9.8509e-12, 4.5853e-11, 1.8885e-10, 1.1543e-10,\n 3.4842e-10, 3.6903e-11, 4.4478e-12, 2.4622e-09, 8.2064e-10, 8.1272e-13,\n 1.6706e-09, 5.8071e-12, 8.8842e-10, 1.6165e-12, 6.1740e-11, 7.7324e-10,\n 2.3349e-12, 1.0608e-11, 7.7112e-10, 1.4450e-13, 1.4154e-10, 1.0783e-09,\n 2.7091e-12, 3.2286e-12, 1.7242e-12, 1.8507e-12, 7.8760e-11, 1.2277e-09,\n 6.5008e-15, 6.0760e-10, 4.1044e-10, 1.3885e-10, 2.0804e-10, 9.5840e-12,\n 1.4080e-10, 9.4750e-11, 8.7376e-14, 9.2680e-11, 2.3002e-09, 7.9270e-11,\n 1.6086e-10, 6.6682e-11, 9.8695e-10, 2.8220e-09, 1.7089e-09, 4.9350e-10,\n 8.2470e-14, 3.9988e-13, 1.3167e-11, 1.1794e-10, 2.8744e-09, 3.3693e-11,\n 2.8055e-11, 7.8870e-12, 5.3573e-10, 3.5751e-10, 5.4185e-11, 2.8469e-12,\n 4.8510e-11, 1.6660e-09, 1.0598e-13, 4.6872e-12, 7.8648e-11, 1.3445e-10,\n 1.7262e-09, 2.9725e-12, 9.2681e-14, 6.0070e-14, 3.6244e-12, 9.5986e-10,\n 6.0934e-10, 6.9701e-11, 9.1020e-10, 7.0648e-11, 3.7550e-12, 1.4085e-12,\n 7.7228e-12, 1.3992e-09, 3.4494e-10, 3.0239e-10, 3.3483e-10, 1.6921e-09,\n 1.5921e-09, 3.6059e-12, 6.4055e-11, 5.3905e-10, 1.8925e-10, 2.3819e-11,\n 7.4761e-12, 2.3368e-10, 5.2532e-10, 4.5717e-12, 4.0362e-11, 2.1261e-12,\n 1.1402e-09, 1.2941e-10, 3.4823e-12, 3.9781e-12, 4.0379e-14, 5.2274e-11,\n 7.7034e-13, 1.2481e-12, 7.4810e-12, 1.9327e-09, 7.2210e-14, 9.8229e-10,\n 3.6053e-11, 2.6617e-10, 3.7518e-12, 1.6270e-09, 3.5752e-12, 4.4956e-12,\n 1.8653e-10, 1.9791e-12, 2.9477e-12, 1.9243e-09, 2.0744e-11, 9.6033e-12,\n 1.7422e-12, 1.4336e-14, 9.8193e-11, 3.2656e-11, 2.1959e-13, 6.4540e-10,\n 7.4600e-10, 1.0213e-12, 2.8522e-11, 1.3251e-10, 2.1813e-10, 8.4984e-13,\n 7.5720e-10, 2.7179e-11, 6.5418e-12, 5.5451e-12, 1.0497e-09, 4.5452e-11,\n 2.0351e-11, 2.0577e-10, 1.0898e-09, 7.4601e-12, 3.0965e-10, 5.8172e-10,\n 6.4664e-10, 2.8184e-09, 1.2900e-10, 6.2190e-11, 1.3185e-09, 2.7628e-10,\n 8.5033e-10, 2.5046e-10, 1.8424e-09, 3.9934e-12, 7.6083e-12, 1.2749e-11,\n 5.9740e-12, 1.2918e-11, 2.9087e-12, 1.1138e-11, 1.2102e-11, 4.9276e-11,\n 4.6958e-10, 2.9029e-10, 1.8336e-12, 5.8965e-11, 1.6372e-12, 1.1521e-09,\n 9.3676e-13, 1.1453e-10, 1.6507e-10, 2.3093e-10, 4.0035e-11, 4.0873e-12,\n 8.3613e-14, 1.6727e-09, 1.6870e-11, 6.0122e-10, 7.2005e-12, 5.6200e-10,\n 2.5396e-09, 2.6648e-10, 1.4076e-13, 6.6068e-09, 6.0982e-13, 1.9561e-09,\n 1.1101e-09, 8.4651e-11, 8.6933e-10, 5.7933e-11, 4.6541e-12, 1.8409e-11,\n 3.1831e-10, 4.8844e-14, 5.1215e-11, 1.6117e-13, 2.4903e-11, 7.0849e-12,\n 6.9622e-13, 5.5865e-12, 2.4896e-10, 6.5385e-14, 6.6428e-10, 1.0197e-11,\n 8.9972e-13, 2.1311e-09, 1.9574e-14, 1.3463e-11, 5.5257e-10, 2.1630e-10,\n 5.2562e-11, 1.9429e-10, 7.2283e-13, 1.8841e-10, 1.2286e-11, 7.8227e-12,\n 1.2062e-10, 7.8096e-11, 3.7383e-13, 1.8249e-09, 3.1871e-12, 2.3692e-12,\n 1.8681e-10, 1.7792e-09, 2.2237e-12, 1.9777e-10, 1.9260e-10, 1.6512e-12,\n 2.0800e-09, 3.0490e-11, 1.4969e-09, 9.0289e-12, 4.6839e-11, 6.3795e-10,\n 4.4027e-10, 1.4079e-11, 3.0424e-11, 8.6408e-11], device='cuda:0')" }, "48": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-1.4875e-18, -7.7048e-20, -5.4588e-19, ..., -2.9531e-19,\n -4.1316e-19, -3.1814e-19],\n [ 4.9093e-18, 1.6687e-19, -3.1324e-19, ..., 7.3290e-19,\n 3.8807e-20, 7.5669e-19],\n [ 1.2450e-18, 1.5267e-19, 6.6242e-19, ..., 3.9342e-19,\n 4.0915e-19, 4.0990e-19],\n ...,\n [-6.7951e-19, 6.3737e-20, 8.4680e-19, ..., 2.0156e-19,\n 2.5164e-19, 7.9122e-20],\n [ 1.8716e-18, -9.8992e-20, 9.7597e-19, ..., 1.1786e-18,\n 6.1989e-19, 3.7704e-19],\n [-1.1802e-19, -2.4524e-20, 1.5550e-19, ..., 1.0469e-19,\n 2.9304e-20, -1.1135e-19]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.2145e-12, 8.6704e-13, 2.0348e-12, ..., 2.0814e-12, 2.2793e-12,\n 3.2232e-12],\n [5.2351e-11, 1.5357e-11, 3.9287e-11, ..., 2.3646e-11, 5.5390e-11,\n 6.3175e-11],\n [2.7600e-12, 5.2002e-13, 1.2435e-12, ..., 1.7033e-12, 2.0736e-12,\n 3.6643e-12],\n ...,\n [1.3839e-12, 7.2114e-13, 1.0886e-12, ..., 5.3332e-13, 1.7742e-12,\n 9.1577e-13],\n [6.3333e-11, 1.8660e-11, 4.0802e-11, ..., 3.8981e-11, 6.1167e-11,\n 7.1342e-11],\n [9.8127e-13, 5.3611e-13, 9.1407e-13, ..., 5.4558e-13, 2.0597e-12,\n 1.9059e-12]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-4.6436e-19, -4.9252e-19, -1.2743e-19, ..., -2.7026e-19,\n -2.7985e-19, -1.3071e-19],\n [ 1.1449e-19, 3.6455e-20, 1.9012e-20, ..., 8.2252e-18,\n -3.2478e-19, -9.1849e-21],\n [ 5.0781e-19, 1.8146e-19, 1.1055e-19, ..., -4.5274e-18,\n 2.6792e-19, 7.3024e-20],\n ...,\n [ 3.9972e-19, 2.1860e-20, 1.1402e-19, ..., 2.2850e-18,\n 1.3360e-19, 4.5697e-20],\n [ 7.3201e-19, 6.1168e-23, 7.3376e-20, ..., -3.3709e-20,\n 5.0184e-19, 5.5131e-19],\n [-1.7510e-19, 8.8043e-20, -4.6837e-20, ..., 1.0697e-18,\n 1.3193e-19, 5.0888e-21]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.1857e-13, 2.4776e-13, 5.8145e-13, ..., 5.9477e-13, 6.5132e-13,\n 9.2106e-13],\n [1.4960e-11, 4.3883e-12, 1.1226e-11, ..., 6.7570e-12, 1.5828e-11,\n 1.8053e-11],\n [7.8870e-13, 1.4860e-13, 3.5534e-13, ..., 4.8674e-13, 5.9254e-13,\n 1.0471e-12],\n ...,\n [3.9547e-13, 2.0607e-13, 3.1107e-13, ..., 1.5240e-13, 5.0700e-13,\n 2.6169e-13],\n [1.8098e-11, 5.3322e-12, 1.1659e-11, ..., 1.1139e-11, 1.7479e-11,\n 2.0387e-11],\n [2.8040e-13, 1.5320e-13, 2.6120e-13, ..., 1.5590e-13, 5.8856e-13,\n 5.4462e-13]], device='cuda:0')" }, "49": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-1.7233e-16, 1.0147e-16, 2.6700e-16, -7.5126e-16, -1.1890e-16,\n 2.9574e-16, 9.9222e-17, 1.4685e-16, -1.4351e-15, 4.0326e-17,\n 1.0759e-16, 4.4423e-16, -6.8875e-16, 5.1857e-18, 3.2602e-16,\n -1.4099e-15, -5.6083e-16, -3.4125e-18, -7.3138e-17, 5.9005e-17,\n -3.6649e-16, -1.7180e-16, 2.1607e-16, 2.2240e-16, -7.4634e-17,\n -1.6511e-15, 3.7681e-17, -1.3070e-15, -9.5475e-16, -1.2176e-16,\n 5.0942e-17, -4.1835e-16, -3.0881e-16, 2.3301e-16, 2.6925e-16,\n -4.5138e-17, 5.2250e-17, -6.0519e-16, 7.4904e-17, 3.3383e-16,\n 2.5312e-16, 6.2841e-18, 8.3914e-16, 1.7688e-16, -8.0410e-17,\n 8.5638e-16, -2.1805e-17, -1.0795e-15, 8.5647e-17, -2.4741e-16,\n 3.1002e-17, -3.2988e-16, 1.2371e-16, 5.0591e-16, 3.0515e-16,\n 1.3617e-15, 1.0141e-16, -4.3315e-16, -1.0813e-16, 4.9181e-18,\n 1.0468e-17, -1.7560e-16, -9.5132e-16, -7.0718e-16, 2.2744e-16,\n 2.7982e-16, -6.0115e-16, 5.2596e-16, -1.7570e-16, 1.4200e-16,\n -1.4192e-16, -2.5221e-17, 3.5076e-16, 3.8155e-16, -4.1825e-17,\n 1.0330e-15, 2.3522e-16, -2.4971e-16, 1.5393e-16, -8.1109e-16,\n 2.4993e-16, 1.7284e-15, 7.6761e-17, 3.7691e-16, -7.7828e-16,\n 5.6883e-16, 5.1759e-16, 2.5327e-16, 5.5786e-16, -3.3451e-16,\n -2.3236e-16, -1.9563e-16, -4.1487e-16, -2.0778e-16, -1.9663e-16,\n -4.8087e-16, 3.5984e-16, -2.2225e-16, 4.4900e-16, -9.9340e-17,\n 1.1598e-15, 1.9670e-16, -5.1344e-16, -2.5213e-16, 5.9270e-17,\n -2.5036e-16, -4.8162e-16, -2.2305e-16, -1.4272e-15, 1.9194e-16,\n -1.1055e-16, 1.0213e-15, 1.4959e-16, 4.1578e-16, -2.5064e-16,\n 1.0182e-16, -4.1376e-16, 1.3517e-16, 7.8744e-17, -6.5551e-16,\n 1.1879e-16, -1.6378e-16, 4.7905e-17, -2.2034e-17, -2.5876e-16,\n -3.3467e-16, 4.3631e-16, 4.4120e-16, 1.9942e-16, 5.0796e-16,\n 9.0177e-17, -1.1025e-16, 4.6951e-16, -2.3125e-16, -4.8696e-16,\n -9.1553e-17, 3.8173e-17, 2.2836e-16, 2.2865e-16, -1.1432e-16,\n 4.2838e-16, -3.3564e-16, 4.6874e-16, -1.1103e-16, 1.7033e-16,\n 2.1365e-16, 4.2999e-16, -1.2539e-15, 2.0981e-16, 3.7130e-16,\n -8.5703e-17, -8.1326e-16, 6.0464e-16, 4.2814e-16, -2.8557e-16,\n -2.0300e-16, -7.1361e-16, 5.9571e-17, 2.6148e-16, 2.5243e-16,\n -1.3303e-16, -5.0797e-17, 7.3173e-16, -5.8815e-17, 1.0444e-16,\n -8.3237e-17, -1.5333e-16, -9.8727e-18, 2.7149e-17, -1.3792e-16,\n -2.0948e-16, -1.5114e-16, 7.9397e-16, 4.9673e-16, 1.1191e-17,\n 5.7844e-16, 1.0676e-16, 1.3809e-17, 1.3423e-18, 7.1454e-16,\n -8.0167e-17, 8.4296e-17, -2.9490e-16, 9.5806e-17, -3.0936e-16,\n 1.5870e-18, 4.5813e-17, -8.2736e-17, 3.8015e-16, 8.6596e-16,\n 7.9220e-17, 5.7561e-16, 9.0901e-17, -1.3014e-15, -1.8077e-16,\n 5.3117e-17, -3.1861e-17, -6.0331e-17, -1.1904e-15, -3.9646e-17,\n 7.0150e-17, -3.9747e-16, -2.6234e-17, -1.3068e-15, -3.2215e-17,\n -2.9399e-16, -1.5152e-15, -1.3381e-16, 1.7300e-16, 2.5209e-16,\n 2.3834e-16, 6.1893e-17, 4.4872e-16, 4.5856e-16, 1.5615e-16,\n -3.9513e-16, 1.0130e-15, -9.9967e-17, 6.8122e-16, 7.3362e-16,\n 1.8093e-16, 3.3726e-16, -6.9692e-17, 1.4905e-16, 1.4781e-16,\n -2.7743e-18, -2.9104e-17, -9.4003e-16, -1.1702e-15, -2.0120e-17,\n -1.6533e-17, -7.2395e-17, 8.2910e-16, 1.8462e-16, 1.4720e-16,\n -5.9724e-17, -4.6160e-17, 6.8266e-17, 1.0486e-16, 1.3935e-15,\n 1.9568e-17, 2.1431e-16, -3.3586e-16, -1.7140e-16, -1.5050e-16,\n 1.3105e-16, -1.4654e-16, 4.4905e-16, 5.4675e-18, 5.5943e-16,\n 5.4385e-16, -5.0611e-17, 2.9660e-17, 1.2281e-16, 3.2259e-16,\n 9.6612e-18], device='cuda:0')", - "exp_avg_sq": "tensor([7.0800e-07, 1.1872e-05, 5.3861e-07, 7.9466e-06, 1.2233e-06, 4.2469e-08,\n 5.3801e-08, 7.9825e-07, 4.2139e-06, 1.8390e-06, 2.3037e-07, 2.9408e-08,\n 7.9554e-07, 1.0332e-07, 4.5137e-07, 5.9066e-06, 1.2469e-05, 8.4284e-08,\n 3.4436e-09, 1.2764e-07, 1.0689e-06, 1.3789e-07, 1.5140e-07, 2.7031e-07,\n 7.3348e-08, 4.9645e-07, 2.5941e-07, 1.8601e-05, 4.7472e-07, 2.5514e-07,\n 2.2318e-06, 4.7670e-06, 2.8015e-07, 1.1099e-07, 1.2850e-06, 7.5151e-07,\n 5.8190e-07, 7.6641e-06, 3.8682e-08, 2.9606e-06, 2.6484e-07, 5.3493e-08,\n 6.3896e-08, 5.0769e-07, 1.4909e-07, 5.6985e-08, 2.5190e-08, 3.2817e-06,\n 6.8038e-07, 6.8613e-07, 1.5307e-08, 1.9086e-08, 1.7600e-05, 1.6331e-05,\n 8.4675e-07, 1.9822e-06, 2.4085e-09, 6.5432e-07, 3.6338e-07, 1.2049e-07,\n 1.4521e-06, 2.3771e-07, 1.0113e-05, 6.8172e-06, 6.5272e-06, 9.9390e-06,\n 1.4719e-07, 4.2195e-08, 3.2641e-09, 2.5758e-06, 8.4583e-06, 2.7073e-09,\n 6.2505e-07, 1.6072e-06, 1.3361e-07, 3.1872e-07, 2.3264e-07, 3.1158e-08,\n 1.4616e-06, 2.2671e-06, 7.7230e-07, 7.0034e-06, 7.3350e-07, 2.6129e-06,\n 4.5184e-08, 1.6787e-06, 7.3259e-07, 5.2096e-07, 3.2246e-06, 1.1471e-08,\n 9.3991e-08, 4.9961e-09, 1.7382e-06, 2.1652e-08, 3.4933e-08, 7.4419e-07,\n 3.6130e-08, 1.2920e-08, 3.2673e-07, 9.3321e-09, 6.0317e-06, 2.4344e-08,\n 1.3351e-09, 2.8131e-07, 2.4686e-06, 3.9952e-09, 5.2092e-07, 6.6466e-09,\n 2.6978e-06, 5.7468e-08, 4.1509e-09, 3.2737e-08, 1.6459e-06, 2.6310e-07,\n 2.9512e-06, 3.2225e-07, 2.6781e-06, 6.5347e-06, 1.4349e-07, 2.9340e-07,\n 1.9321e-06, 1.0953e-08, 4.7882e-08, 2.1881e-06, 1.8893e-08, 4.8242e-06,\n 9.3488e-06, 1.2379e-06, 1.5513e-06, 2.2600e-05, 4.3492e-07, 3.6632e-09,\n 1.2206e-06, 2.8961e-07, 2.5019e-06, 2.7326e-08, 1.2803e-08, 4.7475e-08,\n 3.1165e-07, 3.7146e-09, 3.0062e-08, 1.6117e-07, 7.7099e-09, 1.4565e-09,\n 1.2321e-05, 1.0385e-06, 1.1277e-05, 3.1998e-06, 3.5642e-06, 1.0775e-07,\n 5.0362e-06, 2.7155e-05, 1.2819e-06, 2.7106e-08, 1.3697e-06, 2.7308e-08,\n 1.3276e-05, 7.0832e-08, 1.0916e-05, 5.0016e-08, 7.3991e-07, 3.4763e-08,\n 2.2246e-06, 1.9498e-08, 7.1864e-08, 1.6770e-07, 1.9231e-07, 7.8749e-08,\n 9.3896e-08, 5.1616e-08, 1.3241e-08, 6.3548e-07, 6.7602e-06, 7.1581e-06,\n 5.1707e-09, 7.6686e-08, 4.0352e-06, 7.5170e-06, 8.9536e-10, 2.0292e-06,\n 4.2925e-06, 2.7048e-06, 2.0323e-07, 4.6636e-10, 3.0152e-08, 1.3823e-05,\n 2.1690e-06, 1.1365e-07, 6.2318e-06, 2.8808e-05, 1.3411e-07, 2.0848e-06,\n 4.8871e-07, 5.2819e-06, 9.3081e-07, 4.9143e-08, 7.3479e-08, 1.8467e-07,\n 4.2185e-06, 1.2982e-07, 1.5277e-09, 1.2204e-06, 1.9976e-08, 6.4753e-06,\n 2.5208e-09, 3.0884e-07, 8.2726e-07, 7.4507e-08, 7.0500e-08, 5.8893e-07,\n 1.5665e-07, 2.0623e-08, 3.5366e-06, 1.0028e-06, 3.6083e-07, 2.9065e-08,\n 7.1394e-07, 1.2778e-08, 6.2348e-08, 4.4414e-06, 3.7324e-06, 6.3757e-06,\n 2.1673e-07, 6.3008e-07, 9.5700e-08, 3.2106e-06, 8.4697e-08, 6.7987e-07,\n 7.7440e-07, 9.5982e-08, 1.3350e-06, 2.7226e-08, 1.1932e-07, 1.7914e-06,\n 9.0463e-07, 3.2601e-07, 1.6602e-06, 1.7005e-07, 2.3054e-06, 2.9113e-06,\n 9.8329e-07, 2.1790e-07, 1.5736e-07, 1.2189e-08, 2.0856e-08, 1.3213e-09,\n 2.0469e-08, 1.4579e-06, 1.0544e-07, 1.4498e-06, 3.6153e-07, 5.4721e-09,\n 2.1267e-08, 4.1646e-07, 1.3087e-05, 3.2797e-07], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-2.0657e-16, 8.6221e-17, 2.0293e-16, -5.6874e-16, -3.3520e-17,\n 2.1351e-16, 1.6916e-16, 1.0395e-16, -1.2365e-15, 1.9825e-17,\n 9.7793e-17, 3.6046e-16, -4.2751e-16, -4.5013e-17, 3.0009e-16,\n -9.8387e-16, -8.7637e-16, 5.7971e-18, -2.4428e-17, 5.8241e-17,\n -3.8245e-16, -2.3107e-16, 1.1494e-16, 2.2057e-16, 9.5457e-17,\n -1.3796e-15, 6.3064e-17, -1.1724e-15, -6.9384e-16, -1.0762e-16,\n -1.6200e-17, -1.7839e-16, -1.4537e-16, 2.0113e-16, 1.1564e-16,\n -4.7434e-17, 9.0248e-17, -4.6124e-16, 1.7223e-17, 2.8605e-16,\n 1.0913e-16, 4.4788e-17, 5.7256e-16, 9.4451e-17, -1.0936e-16,\n 5.7553e-16, 2.0743e-17, -9.5320e-16, 6.3945e-17, -5.1938e-16,\n 1.1211e-17, -2.3951e-16, 2.5939e-16, 5.1604e-16, 3.1500e-16,\n 1.0701e-15, 8.7479e-17, -3.4026e-16, -4.4742e-17, 2.2698e-17,\n 4.3165e-17, -1.8734e-16, -6.2672e-16, -6.6859e-16, 2.0785e-16,\n 3.8041e-16, -5.2367e-16, 4.3044e-16, -7.3637e-17, 1.6683e-16,\n -3.3762e-16, -5.9207e-17, 2.9954e-16, 3.7719e-16, -5.5508e-17,\n 8.1180e-16, 3.6357e-16, -2.0988e-16, 2.0379e-16, -6.4983e-16,\n 3.6407e-16, 1.3352e-15, 4.1200e-17, 4.5892e-16, -7.2582e-16,\n 5.2920e-16, 3.5136e-16, 1.6586e-16, 4.4144e-16, -1.9958e-16,\n -1.5818e-16, -1.2668e-16, -3.6369e-16, -1.6230e-16, -1.6489e-16,\n -4.3793e-16, 2.4039e-16, -1.6846e-16, 4.6738e-16, -2.8812e-17,\n 8.2297e-16, 1.6480e-16, -3.4568e-16, -7.0739e-17, 9.2619e-17,\n -1.7998e-16, -2.9591e-16, -1.8855e-16, -1.2649e-15, 1.9607e-16,\n -1.1121e-16, 7.8550e-16, 1.5565e-16, 3.5074e-16, -7.4657e-17,\n -6.7992e-17, -4.7720e-16, 1.4285e-16, 6.8961e-17, -7.8490e-16,\n 1.2014e-16, -1.4786e-16, 6.0289e-17, -9.9874e-17, -1.7383e-16,\n -3.1993e-16, 4.6908e-16, 2.6580e-16, 1.6326e-16, 4.2424e-16,\n 1.1764e-16, -7.8387e-17, 3.6105e-16, -3.7173e-16, -2.9964e-16,\n -6.2942e-17, 3.2822e-17, 1.0082e-16, 1.9325e-16, -3.6418e-17,\n 3.3933e-16, -2.6794e-16, 2.9607e-16, -8.9700e-17, 2.1777e-16,\n 2.1343e-16, 2.1539e-16, -9.0093e-16, 2.0392e-16, 3.2399e-16,\n -1.6610e-16, -6.3453e-16, 5.7036e-16, 3.6114e-16, -2.4777e-16,\n -1.5732e-16, -6.3571e-16, 3.0585e-17, 2.2663e-16, 1.9024e-16,\n -4.8577e-17, -6.0037e-17, 5.2811e-16, -4.2388e-17, 1.7289e-16,\n -4.7782e-17, -5.3598e-17, 2.1938e-17, 1.0321e-16, -1.0447e-16,\n -1.4375e-16, 2.8368e-18, 6.8438e-16, 3.2262e-16, -9.0033e-18,\n 4.3482e-16, -1.3784e-16, 3.0467e-17, -1.7734e-17, 7.1321e-16,\n -1.0526e-16, 1.0588e-16, -2.9706e-16, 2.4239e-17, -2.3971e-16,\n 9.1555e-17, -5.6677e-17, -5.2457e-17, 3.4966e-16, 6.9614e-16,\n 9.6684e-17, 5.3061e-16, 9.3740e-17, -1.0833e-15, -1.9020e-16,\n 1.9076e-17, -2.4548e-18, -4.7042e-17, -9.7971e-16, -3.4199e-17,\n 1.3664e-16, -2.8407e-16, -2.2862e-18, -1.0852e-15, -3.0177e-17,\n -2.2647e-16, -1.2552e-15, -1.1336e-16, 7.2294e-17, 2.1087e-16,\n 2.3408e-16, 5.2018e-17, 3.9068e-16, 2.7729e-16, 8.8530e-17,\n -2.8144e-16, 8.3723e-16, -1.1969e-16, 5.2617e-16, 6.1218e-16,\n 1.0575e-16, 2.9994e-16, -2.1685e-17, 1.0471e-16, 8.9558e-17,\n -4.7923e-17, -5.2309e-17, -8.3350e-16, -6.7541e-16, -1.3452e-18,\n -3.2111e-18, -7.4595e-17, 5.2658e-16, -1.0217e-16, 1.3512e-16,\n 5.8984e-18, -4.1204e-17, 9.9643e-17, 9.9880e-17, 1.0732e-15,\n 4.2581e-17, 2.1614e-16, -2.7272e-16, -1.3117e-16, -1.3666e-16,\n 7.9481e-17, -1.1755e-16, 3.3381e-16, -2.1945e-17, 5.9792e-16,\n 4.4386e-16, -4.3886e-17, 2.8325e-17, 1.0050e-16, 2.8993e-16,\n -4.1459e-17], device='cuda:0')", + "exp_avg_sq": "tensor([2.0232e-07, 3.3926e-06, 1.5391e-07, 2.2708e-06, 3.4958e-07, 1.2136e-08,\n 1.5374e-08, 2.2811e-07, 1.2042e-06, 5.2550e-07, 6.5830e-08, 8.4034e-09,\n 2.2733e-07, 2.9524e-08, 1.2898e-07, 1.6879e-06, 3.5631e-06, 2.4085e-08,\n 9.8404e-10, 3.6475e-08, 3.0545e-07, 3.9403e-08, 4.3264e-08, 7.7242e-08,\n 2.0960e-08, 1.4186e-07, 7.4129e-08, 5.3154e-06, 1.3566e-07, 7.2908e-08,\n 6.3776e-07, 1.3622e-06, 8.0055e-08, 3.1715e-08, 3.6721e-07, 2.1475e-07,\n 1.6628e-07, 2.1901e-06, 1.1054e-08, 8.4601e-07, 7.5681e-08, 1.5286e-08,\n 1.8259e-08, 1.4508e-07, 4.2603e-08, 1.6284e-08, 7.1981e-09, 9.3776e-07,\n 1.9442e-07, 1.9607e-07, 4.3741e-09, 5.4539e-09, 5.0293e-06, 4.6668e-06,\n 2.4196e-07, 5.6644e-07, 6.8824e-10, 1.8698e-07, 1.0384e-07, 3.4430e-08,\n 4.1494e-07, 6.7927e-08, 2.8898e-06, 1.9481e-06, 1.8652e-06, 2.8401e-06,\n 4.2060e-08, 1.2057e-08, 9.3275e-10, 7.3606e-07, 2.4170e-06, 7.7364e-10,\n 1.7861e-07, 4.5926e-07, 3.8181e-08, 9.1077e-08, 6.6479e-08, 8.9035e-09,\n 4.1765e-07, 6.4784e-07, 2.2069e-07, 2.0013e-06, 2.0961e-07, 7.4666e-07,\n 1.2912e-08, 4.7969e-07, 2.0934e-07, 1.4887e-07, 9.2146e-07, 3.2780e-09,\n 2.6859e-08, 1.4277e-09, 4.9670e-07, 6.1871e-09, 9.9824e-09, 2.1266e-07,\n 1.0324e-08, 3.6920e-09, 9.3366e-08, 2.6667e-09, 1.7236e-06, 6.9565e-09,\n 3.8151e-10, 8.0385e-08, 7.0541e-07, 1.1417e-09, 1.4886e-07, 1.8993e-09,\n 7.7093e-07, 1.6422e-08, 1.1861e-09, 9.3548e-09, 4.7032e-07, 7.5184e-08,\n 8.4332e-07, 9.2087e-08, 7.6529e-07, 1.8673e-06, 4.1004e-08, 8.3841e-08,\n 5.5211e-07, 3.1299e-09, 1.3683e-08, 6.2527e-07, 5.3989e-09, 1.3786e-06,\n 2.6715e-06, 3.5374e-07, 4.4329e-07, 6.4582e-06, 1.2428e-07, 1.0468e-09,\n 3.4879e-07, 8.2758e-08, 7.1493e-07, 7.8086e-09, 3.6587e-09, 1.3566e-08,\n 8.9058e-08, 1.0615e-09, 8.5903e-09, 4.6054e-08, 2.2032e-09, 4.1621e-10,\n 3.5207e-06, 2.9675e-07, 3.2226e-06, 9.1436e-07, 1.0185e-06, 3.0789e-08,\n 1.4391e-06, 7.7596e-06, 3.6632e-07, 7.7458e-09, 3.9141e-07, 7.8034e-09,\n 3.7938e-06, 2.0241e-08, 3.1195e-06, 1.4292e-08, 2.1143e-07, 9.9337e-09,\n 6.3571e-07, 5.5717e-09, 2.0536e-08, 4.7920e-08, 5.4953e-08, 2.2503e-08,\n 2.6832e-08, 1.4750e-08, 3.7838e-09, 1.8159e-07, 1.9318e-06, 2.0455e-06,\n 1.4776e-09, 2.1914e-08, 1.1531e-06, 2.1480e-06, 2.5586e-10, 5.7985e-07,\n 1.2266e-06, 7.7293e-07, 5.8076e-08, 1.3327e-10, 8.6162e-09, 3.9499e-06,\n 6.1981e-07, 3.2475e-08, 1.7808e-06, 8.2321e-06, 3.8322e-08, 5.9575e-07,\n 1.3965e-07, 1.5093e-06, 2.6599e-07, 1.4043e-08, 2.0997e-08, 5.2771e-08,\n 1.2055e-06, 3.7096e-08, 4.3655e-10, 3.4874e-07, 5.7084e-09, 1.8504e-06,\n 7.2033e-10, 8.8255e-08, 2.3640e-07, 2.1291e-08, 2.0146e-08, 1.6829e-07,\n 4.4764e-08, 5.8933e-09, 1.0106e-06, 2.8656e-07, 1.0311e-07, 8.3054e-09,\n 2.0401e-07, 3.6516e-09, 1.7817e-08, 1.2692e-06, 1.0666e-06, 1.8219e-06,\n 6.1931e-08, 1.8005e-07, 2.7347e-08, 9.1746e-07, 2.4203e-08, 1.9428e-07,\n 2.2129e-07, 2.7428e-08, 3.8149e-07, 7.7799e-09, 3.4097e-08, 5.1191e-07,\n 2.5851e-07, 9.3159e-08, 4.7442e-07, 4.8593e-08, 6.5880e-07, 8.3194e-07,\n 2.8098e-07, 6.2266e-08, 4.4967e-08, 3.4832e-09, 5.9598e-09, 3.7757e-10,\n 5.8493e-09, 4.1662e-07, 3.0130e-08, 4.1429e-07, 1.0331e-07, 1.5637e-09,\n 6.0772e-09, 1.1901e-07, 3.7397e-06, 9.3719e-08], device='cuda:0')" }, "50": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 4.4535e-19, -7.6782e-17, 1.6882e-20, -8.3583e-17, -3.3033e-17,\n -2.2061e-19, -2.4509e-17, -1.7780e-18, -1.1025e-16, -1.1675e-16,\n 7.6725e-19, -2.9196e-19, -2.6328e-17, -5.1345e-18, -1.3694e-17,\n -1.0931e-16, -5.9643e-17, 1.4642e-18, 4.7798e-19, 7.5170e-19,\n -5.3177e-17, -3.4919e-17, -2.8592e-18, -1.9231e-17, -7.3509e-17,\n -2.7956e-17, 8.1122e-19, -1.6778e-16, -7.8686e-17, 1.0486e-18,\n -8.1881e-19, -1.0687e-16, -1.3003e-17, -1.2580e-19, -3.6857e-19,\n -1.1557e-18, -3.2758e-18, -6.9902e-18, 1.5856e-19, -3.1414e-19,\n -2.0936e-18, -8.8728e-19, 1.9182e-18, -3.1127e-19, -1.0683e-18,\n -1.4116e-18, 6.2629e-19, -1.5341e-16, 9.9117e-19, -4.2272e-17,\n 7.2386e-19, 8.6089e-19, -9.8747e-17, 4.3627e-20, -2.3041e-17,\n 4.2595e-18, 1.7590e-19, -5.0686e-17, -1.9723e-17, -6.8908e-19,\n -7.3359e-20, 1.0618e-18, -1.0620e-16, -7.1112e-17, -5.9396e-17,\n -3.8376e-18, -4.3184e-17, -1.0094e-19, -4.8235e-18, 1.5204e-19,\n -5.4908e-17, 6.9084e-19, -1.2340e-18, -3.4225e-18, -4.0851e-18,\n -5.3578e-18, -5.1390e-17, 1.7505e-18, 4.8693e-19, -3.6169e-17,\n -1.1847e-17, -2.7841e-17, 1.7535e-19, -4.2447e-18, -1.9602e-17,\n -3.4770e-18, 1.5733e-22, 5.4505e-20, -7.9403e-19, -1.3874e-17,\n -3.4898e-18, -2.7077e-18, -9.3322e-17, -2.6118e-19, -5.6417e-18,\n -3.3471e-17, -1.7391e-19, -1.0919e-17, -1.3075e-17, 5.3281e-19,\n -2.3113e-17, -7.2095e-19, -4.8645e-18, -4.8174e-17, -2.0991e-21,\n -2.5323e-18, -3.6205e-17, 9.9686e-19, -7.7417e-17, 1.8968e-19,\n -5.4878e-18, -4.7578e-19, 2.9225e-19, 7.7852e-20, -9.8254e-17,\n -1.8475e-17, -6.0261e-17, 7.1595e-19, 3.5282e-19, -3.8472e-17,\n -8.4582e-19, -2.9256e-18, 6.9342e-19, -3.4338e-17, 1.1232e-19,\n -9.9419e-17, -3.1400e-19, -3.9861e-19, -1.0667e-19, -3.1524e-18,\n 4.3674e-19, -8.5509e-18, -3.8875e-18, -2.2219e-17, -2.2123e-17,\n -9.7068e-18, 8.6887e-19, -4.4373e-19, 3.8465e-19, 1.4475e-18,\n -2.3387e-19, -6.2051e-18, -3.1642e-19, -4.3039e-18, -6.7215e-18,\n -9.4425e-17, -7.2679e-17, -6.5367e-17, 1.8889e-19, 2.7661e-19,\n -6.6788e-18, -1.2696e-16, -1.2806e-20, -1.8067e-18, -6.8430e-17,\n 2.1379e-19, -9.9660e-17, -1.6537e-19, -2.0578e-19, 4.6622e-20,\n -8.6151e-19, -9.3192e-19, -5.7541e-17, -4.0643e-18, -2.3107e-18,\n -9.6436e-19, -3.0199e-17, -3.5414e-19, -9.3585e-18, -2.5598e-18,\n -7.7485e-18, -2.5715e-17, -3.7532e-18, -3.8644e-19, -2.1264e-18,\n -6.6477e-18, -1.0762e-16, -1.1324e-16, -5.2919e-20, -6.3659e-18,\n -4.9996e-19, -4.3600e-18, 4.5656e-18, -3.2159e-19, 1.6196e-18,\n -1.2311e-17, -6.2603e-18, -2.9777e-18, -5.4489e-19, 4.5458e-20,\n -4.4298e-20, -1.2720e-17, 8.3332e-20, -1.1052e-16, 2.4130e-18,\n 2.8270e-19, -2.4264e-18, -3.3239e-18, -1.2655e-16, -8.3788e-19,\n 1.1173e-19, -1.7621e-17, -1.4578e-18, -1.3468e-16, -3.9010e-18,\n 2.9447e-18, -5.7208e-17, -2.2209e-18, -2.3127e-20, -1.4096e-17,\n 1.3697e-19, 7.5333e-20, 9.6544e-20, -1.0013e-17, -2.0806e-17,\n -6.3993e-18, -5.4371e-18, 1.5926e-18, -6.4679e-19, -3.1789e-18,\n 4.9748e-19, -1.4560e-19, -2.1512e-17, -9.0290e-18, 8.3406e-20,\n -5.3435e-18, -9.9056e-19, -2.5261e-17, -4.1953e-17, -1.8239e-18,\n -1.0992e-18, -3.0120e-18, 1.2968e-18, -4.7873e-17, 1.9089e-19,\n -1.1806e-18, -2.3269e-20, -1.1499e-19, -3.9710e-17, 9.9931e-19,\n -1.1135e-17, -2.6070e-19, 3.7044e-18, -1.0080e-18, -1.5743e-20,\n -1.5234e-19, -4.4022e-18, -7.3776e-19, -2.0144e-18, -7.7347e-17,\n -3.7127e-20, -7.7369e-20, -4.8611e-19, -5.0301e-19, -7.3742e-17,\n -6.4030e-19], device='cuda:0')", - "exp_avg_sq": "tensor([7.4550e-11, 1.0989e-08, 5.4513e-11, 3.2030e-09, 1.2142e-10, 8.1253e-13,\n 3.6647e-10, 2.3200e-10, 4.4357e-09, 1.4081e-08, 5.2157e-11, 2.3209e-11,\n 2.1534e-12, 2.1966e-11, 1.0685e-09, 3.4291e-09, 2.2496e-09, 2.0831e-13,\n 2.0716e-13, 5.5807e-11, 1.4912e-09, 6.1269e-10, 5.0340e-10, 1.0367e-11,\n 2.4295e-09, 4.1245e-12, 2.6636e-11, 3.5760e-08, 3.5244e-09, 2.9345e-11,\n 2.9359e-12, 1.1912e-08, 1.4900e-12, 2.4194e-12, 1.2839e-11, 2.5529e-13,\n 1.2560e-10, 2.4956e-09, 3.9547e-13, 5.0870e-10, 3.0259e-10, 4.1554e-11,\n 1.9473e-11, 2.5326e-11, 6.0165e-13, 5.2714e-11, 2.2458e-11, 1.4688e-08,\n 6.6919e-12, 3.5316e-10, 5.2772e-12, 1.9312e-13, 3.2087e-08, 2.8245e-09,\n 4.0453e-10, 1.5301e-11, 5.8343e-13, 3.9564e-10, 1.2489e-12, 1.8621e-11,\n 9.0109e-12, 8.6215e-13, 6.6460e-09, 1.9134e-09, 5.7321e-09, 3.5348e-09,\n 8.0836e-11, 1.8329e-11, 1.5155e-12, 2.4817e-11, 2.0528e-09, 6.9090e-13,\n 8.7500e-12, 4.8882e-10, 1.2023e-10, 1.2599e-09, 2.7417e-09, 1.8447e-13,\n 1.9629e-12, 4.1807e-11, 1.2778e-09, 1.4784e-08, 5.9928e-12, 5.9865e-10,\n 2.0359e-12, 3.8069e-10, 9.8638e-11, 5.5984e-11, 1.7229e-11, 5.5250e-13,\n 8.4771e-11, 2.4897e-13, 8.5980e-09, 1.4737e-15, 8.6327e-11, 1.3659e-09,\n 2.9166e-11, 9.0547e-13, 1.3146e-09, 1.9487e-13, 4.8879e-09, 6.5757e-13,\n 1.2028e-12, 6.6319e-10, 2.1674e-11, 1.0185e-12, 4.1370e-11, 1.1200e-13,\n 5.5849e-09, 3.1775e-12, 9.8262e-12, 1.0788e-11, 6.0849e-11, 3.3209e-13,\n 6.4196e-09, 1.3866e-10, 1.4101e-09, 3.1638e-11, 6.6935e-12, 4.0741e-11,\n 4.8823e-10, 4.3793e-14, 2.4128e-12, 2.4379e-10, 1.9256e-14, 9.6718e-09,\n 4.6257e-10, 9.9556e-12, 6.3224e-12, 8.3542e-09, 2.1003e-14, 8.8428e-12,\n 8.2443e-10, 1.5854e-10, 1.9819e-09, 1.6068e-10, 1.3090e-11, 1.1084e-11,\n 1.0516e-11, 1.5177e-12, 5.7585e-13, 2.6909e-10, 8.8123e-13, 5.0522e-12,\n 5.9041e-09, 8.9825e-09, 1.6774e-08, 3.9845e-10, 2.7966e-10, 5.4941e-12,\n 1.5296e-09, 2.8717e-08, 3.4781e-12, 3.3006e-11, 1.6688e-09, 9.1548e-13,\n 1.0260e-08, 2.0611e-11, 1.6322e-10, 9.3266e-13, 4.5845e-13, 9.4101e-11,\n 6.1398e-09, 6.5869e-11, 1.4966e-12, 3.0000e-14, 1.3328e-11, 1.7183e-11,\n 9.9378e-13, 2.9872e-13, 3.1165e-11, 6.3857e-11, 1.3404e-09, 9.2616e-10,\n 1.0874e-11, 3.5202e-11, 1.4309e-08, 1.9622e-08, 9.7118e-13, 4.4968e-11,\n 4.4412e-10, 7.3138e-10, 3.2013e-13, 4.3804e-12, 3.8923e-14, 9.0623e-09,\n 2.0111e-10, 2.9069e-14, 8.8240e-10, 1.1271e-08, 7.7677e-12, 6.3868e-10,\n 1.6710e-12, 3.8140e-09, 1.7116e-12, 6.2754e-13, 3.4094e-11, 6.9316e-11,\n 7.1620e-09, 5.1785e-11, 8.1576e-13, 1.1133e-11, 3.7128e-13, 9.6387e-09,\n 1.3076e-12, 1.0796e-12, 2.5111e-11, 4.1451e-14, 5.1551e-13, 2.1558e-12,\n 4.0552e-11, 1.8578e-13, 2.5445e-10, 5.5276e-10, 2.2921e-10, 8.8061e-11,\n 5.6796e-10, 2.0376e-12, 2.4044e-11, 1.5001e-10, 3.0061e-10, 1.0132e-09,\n 5.6690e-12, 2.7193e-12, 2.0174e-11, 1.3814e-09, 2.9250e-14, 1.5475e-09,\n 3.8832e-10, 3.1045e-13, 3.5885e-12, 3.6675e-11, 5.4147e-11, 1.7465e-09,\n 7.4177e-12, 2.4134e-13, 1.6002e-12, 1.0015e-10, 9.0058e-10, 6.3763e-10,\n 1.4663e-09, 1.0718e-10, 3.3936e-13, 9.6208e-14, 4.0672e-13, 4.5647e-13,\n 5.4852e-11, 1.8796e-12, 6.5074e-11, 6.9592e-09, 3.5024e-11, 4.9278e-12,\n 4.1395e-13, 1.5517e-10, 1.4953e-08, 1.6703e-13], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([ 5.3796e-20, -6.0134e-17, 1.1468e-19, -5.9050e-17, -2.3068e-17,\n 7.2813e-21, -1.8755e-17, -2.2828e-18, -9.5476e-17, -9.6824e-17,\n 2.5340e-19, -2.2882e-19, -2.1396e-17, -3.6398e-18, -1.2572e-17,\n -7.9514e-17, -6.0796e-17, 5.5772e-19, 2.7535e-19, 1.0270e-19,\n -4.1643e-17, -2.6907e-17, -3.7831e-18, -1.0795e-17, -5.4963e-17,\n -2.6896e-17, 6.7771e-19, -1.3587e-16, -6.3826e-17, 3.6491e-19,\n -7.6803e-19, -8.2319e-17, -9.5551e-18, 1.0135e-19, 2.0145e-19,\n -6.6627e-19, -1.0650e-18, -7.0116e-18, -1.1349e-19, -7.8578e-19,\n -2.3450e-18, -1.5748e-19, -3.5962e-21, 2.3395e-19, -1.1813e-18,\n -1.1095e-18, -2.1140e-19, -1.2918e-16, 2.6033e-19, -4.3399e-17,\n 1.2672e-19, 7.0760e-19, -7.8269e-17, 3.7229e-19, -1.4800e-17,\n 3.6050e-18, 1.1401e-19, -4.2875e-17, -1.1233e-17, 7.2644e-21,\n -9.0220e-20, 1.1396e-18, -7.5907e-17, -5.7098e-17, -4.8696e-17,\n -1.9512e-18, -3.8696e-17, -1.9459e-19, -3.0306e-18, 3.0694e-19,\n -4.8404e-17, 1.0306e-18, -1.0979e-18, -2.3772e-18, -3.7469e-18,\n -4.0473e-18, -4.3270e-17, 1.5836e-18, 1.0768e-19, -2.7577e-17,\n -6.8941e-18, -2.2779e-17, -2.5940e-19, -2.4097e-18, -1.4937e-17,\n -3.8091e-18, -2.7108e-19, 1.1892e-19, -1.5050e-19, -9.8887e-18,\n -2.2897e-18, -2.7522e-18, -8.0672e-17, -2.5919e-20, -5.1022e-18,\n -2.4189e-17, -2.5270e-19, -1.0725e-17, -1.1340e-17, 5.0236e-20,\n -2.1333e-17, -9.3656e-19, -3.0868e-18, -3.8172e-17, -4.8049e-20,\n -3.1180e-18, -2.6310e-17, 1.0733e-18, -6.6697e-17, 9.4251e-20,\n -3.8003e-18, -7.6063e-19, 1.3168e-19, -1.1845e-20, -7.0857e-17,\n -1.5676e-17, -5.3738e-17, 2.5851e-19, -9.9410e-20, -3.4870e-17,\n -9.9756e-19, -2.1243e-18, 5.4841e-20, -2.9846e-17, 1.8792e-19,\n -8.0498e-17, -5.1678e-20, 1.4521e-21, -8.9840e-20, -1.9139e-18,\n 1.3052e-19, -6.1020e-18, -3.6212e-18, -1.8084e-17, -1.9535e-17,\n -7.3037e-18, 1.7477e-19, 1.6397e-20, 1.6502e-19, 9.5187e-19,\n -6.3609e-20, -4.9509e-18, -2.9499e-20, -3.5232e-18, -6.8501e-18,\n -8.4664e-17, -6.4327e-17, -5.1589e-17, 1.3565e-19, -6.1118e-20,\n -5.2284e-18, -1.0074e-16, 3.2951e-19, -9.1266e-19, -5.3967e-17,\n 2.0694e-19, -8.3182e-17, -1.9985e-19, 1.4352e-19, 4.1470e-19,\n -4.4388e-19, -1.1449e-18, -4.7522e-17, -2.4612e-18, -1.4019e-18,\n -7.6614e-19, -1.7967e-17, -4.0248e-19, -7.5763e-18, -1.9087e-18,\n -5.9098e-18, -1.9297e-17, -2.5902e-18, -6.3794e-19, -1.9989e-18,\n -5.3609e-18, -9.7924e-17, -9.3675e-17, -1.1201e-20, -4.4126e-18,\n -1.4857e-19, -3.8966e-18, 4.3438e-18, -5.6317e-19, 1.7702e-18,\n -1.0334e-17, -5.9084e-18, -1.1726e-18, -2.1862e-19, 1.3938e-20,\n 1.1773e-19, -1.0475e-17, 1.7710e-19, -8.0305e-17, 2.4325e-18,\n -8.0542e-20, -8.5654e-19, -1.8153e-18, -9.4995e-17, -6.5730e-19,\n -1.5564e-19, -1.4010e-17, -9.2407e-19, -1.0797e-16, -4.4114e-18,\n 2.5425e-18, -4.0207e-17, -1.7607e-18, -6.9178e-20, -9.3490e-18,\n 1.5085e-19, 5.0153e-19, -5.8690e-20, -1.0297e-17, -1.6901e-17,\n -4.7736e-18, -2.8908e-18, 1.2155e-18, 1.1917e-19, -3.0729e-18,\n 4.2777e-19, -3.7237e-19, -1.3299e-17, -9.1690e-18, 1.2838e-19,\n -6.2977e-18, -8.8448e-19, -2.5307e-17, -2.7953e-17, -1.0072e-18,\n -1.0595e-19, -2.0622e-18, 1.1307e-19, -4.7445e-17, 5.1248e-20,\n -6.8973e-19, 5.5962e-20, -1.1741e-19, -3.1158e-17, -3.1480e-19,\n -1.0509e-17, -4.8542e-20, 3.1713e-18, -1.0183e-18, -3.6078e-19,\n 1.8802e-19, -4.4931e-18, -7.2895e-19, -1.7313e-18, -5.1878e-17,\n 6.9097e-20, 9.6750e-21, -3.5966e-19, -1.0229e-18, -6.1107e-17,\n -6.7106e-19], device='cuda:0')", + "exp_avg_sq": "tensor([2.1303e-11, 3.1403e-09, 1.5578e-11, 9.1530e-10, 3.4696e-11, 2.3219e-13,\n 1.0472e-10, 6.6295e-11, 1.2675e-09, 4.0237e-09, 1.4904e-11, 6.6321e-12,\n 6.1535e-13, 6.2769e-12, 3.0534e-10, 9.7989e-10, 6.4284e-10, 5.9528e-14,\n 5.9197e-14, 1.5947e-11, 4.2613e-10, 1.7508e-10, 1.4385e-10, 2.9624e-12,\n 6.9425e-10, 1.1786e-12, 7.6114e-12, 1.0219e-08, 1.0071e-09, 8.3855e-12,\n 8.3894e-13, 3.4039e-09, 4.2579e-13, 6.9137e-13, 3.6689e-12, 7.2952e-14,\n 3.5892e-11, 7.1313e-10, 1.1301e-13, 1.4536e-10, 8.6469e-11, 1.1874e-11,\n 5.5645e-12, 7.2370e-12, 1.7193e-13, 1.5063e-11, 6.4176e-12, 4.1973e-09,\n 1.9123e-12, 1.0092e-10, 1.5080e-12, 5.5187e-14, 9.1690e-09, 8.0711e-10,\n 1.1560e-10, 4.3724e-12, 1.6672e-13, 1.1306e-10, 3.5688e-13, 5.3210e-12,\n 2.5749e-12, 2.4637e-13, 1.8992e-09, 5.4677e-10, 1.6380e-09, 1.0101e-09,\n 2.3100e-11, 5.2378e-12, 4.3307e-13, 7.0917e-12, 5.8661e-10, 1.9743e-13,\n 2.5004e-12, 1.3968e-10, 3.4356e-11, 3.6003e-10, 7.8347e-10, 5.2715e-14,\n 5.6093e-13, 1.1947e-11, 3.6514e-10, 4.2246e-09, 1.7125e-12, 1.7107e-10,\n 5.8177e-13, 1.0878e-10, 2.8187e-11, 1.5998e-11, 4.9232e-12, 1.5788e-13,\n 2.4224e-11, 7.1144e-14, 2.4569e-09, 4.2111e-16, 2.4669e-11, 3.9031e-10,\n 8.3345e-12, 2.5875e-13, 3.7565e-10, 5.5687e-14, 1.3968e-09, 1.8791e-13,\n 3.4370e-13, 1.8951e-10, 6.1935e-12, 2.9105e-13, 1.1822e-11, 3.2005e-14,\n 1.5959e-09, 9.0799e-13, 2.8079e-12, 3.0828e-12, 1.7388e-11, 9.4897e-14,\n 1.8344e-09, 3.9622e-11, 4.0295e-10, 9.0408e-12, 1.9127e-12, 1.1642e-11,\n 1.3952e-10, 1.2514e-14, 6.8949e-13, 6.9664e-11, 5.5026e-15, 2.7638e-09,\n 1.3218e-10, 2.8449e-12, 1.8067e-12, 2.3873e-09, 6.0018e-15, 2.5269e-12,\n 2.3559e-10, 4.5304e-11, 5.6633e-10, 4.5917e-11, 3.7405e-12, 3.1672e-12,\n 3.0050e-12, 4.3370e-13, 1.6455e-13, 7.6894e-11, 2.5182e-13, 1.4437e-12,\n 1.6871e-09, 2.5668e-09, 4.7933e-09, 1.1386e-10, 7.9915e-11, 1.5700e-12,\n 4.3711e-10, 8.2060e-09, 9.9389e-13, 9.4318e-12, 4.7688e-10, 2.6161e-13,\n 2.9320e-09, 5.8896e-12, 4.6642e-11, 2.6651e-13, 1.3100e-13, 2.6890e-11,\n 1.7545e-09, 1.8823e-11, 4.2766e-13, 8.5726e-15, 3.8086e-12, 4.9101e-12,\n 2.8398e-13, 8.5361e-14, 8.9057e-12, 1.8248e-11, 3.8303e-10, 2.6466e-10,\n 3.1074e-12, 1.0059e-11, 4.0890e-09, 5.6070e-09, 2.7752e-13, 1.2850e-11,\n 1.2691e-10, 2.0900e-10, 9.1479e-14, 1.2517e-12, 1.1122e-14, 2.5896e-09,\n 5.7469e-11, 8.3066e-15, 2.5215e-10, 3.2208e-09, 2.2197e-12, 1.8251e-10,\n 4.7750e-13, 1.0899e-09, 4.8911e-13, 1.7932e-13, 9.7428e-12, 1.9808e-11,\n 2.0466e-09, 1.4798e-11, 2.3311e-13, 3.1814e-12, 1.0610e-13, 2.7543e-09,\n 3.7367e-13, 3.0851e-13, 7.1757e-12, 1.1845e-14, 1.4731e-13, 6.1602e-13,\n 1.1588e-11, 5.3088e-14, 7.2710e-11, 1.5795e-10, 6.5497e-11, 2.5164e-11,\n 1.6230e-10, 5.8227e-13, 6.8709e-12, 4.2866e-11, 8.5902e-11, 2.8952e-10,\n 1.6200e-12, 7.7707e-13, 5.7648e-12, 3.9475e-10, 8.3584e-15, 4.4222e-10,\n 1.1096e-10, 8.8712e-14, 1.0254e-12, 1.0480e-11, 1.5473e-11, 4.9908e-10,\n 2.1197e-12, 6.8965e-14, 4.5726e-13, 2.8619e-11, 2.5735e-10, 1.8221e-10,\n 4.1900e-10, 3.0628e-11, 9.6974e-14, 2.7492e-14, 1.1622e-13, 1.3044e-13,\n 1.5674e-11, 5.3710e-13, 1.8595e-11, 1.9886e-09, 1.0008e-11, 1.4082e-12,\n 1.1829e-13, 4.4340e-11, 4.2729e-09, 4.7730e-14], device='cuda:0')" }, "51": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-1.5446e-17, -4.5224e-17, -1.5868e-18, -5.9573e-17, -3.5388e-17,\n 3.5031e-19, -2.8148e-17, -1.1236e-17, -7.6613e-17, -5.4815e-17,\n -7.5906e-18, 1.8698e-18, -4.2965e-17, -1.8153e-17, -2.0476e-17,\n -7.5708e-17, -5.1702e-17, -3.4471e-19, -3.0391e-19, -8.2979e-18,\n -4.6142e-17, -3.6893e-17, -1.1086e-17, -2.3885e-17, -4.7110e-17,\n -6.2613e-17, 4.5568e-19, -8.3928e-17, -6.1739e-17, -1.2734e-17,\n 6.7618e-20, -5.9394e-17, -3.0578e-17, -9.9299e-19, 1.7707e-18,\n 6.7666e-19, -1.5372e-17, -3.2153e-17, -2.9220e-18, -4.9690e-18,\n -1.0079e-17, -1.4270e-18, 8.6799e-18, 1.6182e-18, 5.3434e-19,\n 1.1598e-17, -5.8711e-18, -7.7692e-17, -5.7745e-18, -4.0388e-17,\n -3.9452e-18, -6.2080e-19, -5.0645e-17, 2.1962e-19, -2.5093e-17,\n 8.3959e-18, 4.2110e-19, -4.5921e-17, -2.9322e-17, -4.7218e-19,\n -3.0758e-19, -6.6103e-19, -6.7278e-17, -5.5557e-17, -3.8791e-17,\n -1.3583e-17, -4.6701e-17, 2.6787e-18, 3.1101e-18, 6.7751e-19,\n -4.2648e-17, -1.5649e-19, -8.1844e-18, -1.0093e-17, 2.9411e-18,\n -5.5796e-18, -3.7570e-17, -1.3625e-18, -1.1830e-18, -4.7571e-17,\n -1.9354e-17, -1.3738e-17, -2.0933e-18, -1.1436e-17, -4.1136e-17,\n -8.0493e-18, -2.6292e-18, 1.4996e-18, 7.6081e-18, -3.1222e-17,\n 2.4642e-18, 1.9350e-18, -5.7039e-17, 1.9232e-19, 3.9924e-18,\n -4.1261e-17, 9.3052e-19, -2.6685e-17, -1.8124e-17, -1.1167e-17,\n -1.5375e-17, -7.3966e-18, -2.5194e-17, -4.2907e-17, -7.4921e-19,\n -1.8493e-17, -4.2449e-17, -7.6188e-19, -6.9691e-17, -2.7039e-18,\n 3.9615e-18, 1.4888e-17, -2.1312e-18, -7.5939e-19, -5.5263e-17,\n -2.5984e-17, -4.7476e-17, 9.6348e-19, -2.5488e-18, -4.5872e-17,\n -1.0014e-17, 2.2864e-18, -5.6726e-18, -3.4659e-17, -1.2879e-19,\n -5.6281e-17, 1.9993e-18, 3.0310e-18, 1.6221e-19, -8.4389e-18,\n 1.8047e-19, 6.1709e-18, -9.1246e-18, -3.2848e-17, -3.6795e-17,\n 6.9316e-18, -5.1065e-18, 9.2092e-19, 2.5862e-18, -6.3033e-19,\n 1.0510e-18, 4.0766e-18, 3.8314e-18, 2.8800e-18, -1.7200e-17,\n -4.7392e-17, -3.9724e-17, -6.4076e-17, -2.9668e-18, 1.8207e-18,\n -2.0192e-17, -6.9487e-17, -1.3064e-18, 6.0499e-18, -4.8391e-17,\n -1.2027e-19, -6.2726e-17, -3.4910e-19, 3.1520e-18, -5.2690e-18,\n 5.7368e-19, -4.0553e-18, -3.2416e-17, 3.1402e-18, -1.4175e-17,\n 6.6578e-19, -3.4546e-17, -6.0254e-18, -2.1881e-17, 1.9159e-18,\n 5.1525e-18, -3.3705e-17, -6.9517e-18, -3.3751e-18, 6.1239e-19,\n -1.1582e-17, -5.1630e-17, -5.3995e-17, 8.8718e-20, -1.0909e-17,\n -1.0920e-17, -1.5870e-17, -3.2637e-18, -3.3878e-19, -1.2843e-18,\n -2.3739e-17, -1.7779e-17, 2.2601e-18, -3.6695e-18, -2.1914e-18,\n -7.5638e-19, -1.6550e-17, 4.3362e-19, -7.3858e-17, -1.6663e-18,\n -2.0206e-18, 1.0097e-18, 2.3887e-18, -7.5323e-17, 3.6520e-19,\n 4.4532e-19, -3.4117e-17, 7.1919e-19, -7.8310e-17, 1.3851e-18,\n -2.1309e-18, -6.7267e-17, 1.6952e-18, -9.6538e-19, -2.0595e-17,\n -3.2607e-18, -2.3927e-18, -4.8581e-19, -1.5129e-17, -2.5910e-17,\n 4.0811e-18, -6.8172e-18, -1.1492e-18, 6.8199e-18, -6.2662e-18,\n -6.1596e-18, -4.2756e-18, -2.9529e-17, -2.0459e-17, -4.2615e-18,\n -1.7717e-17, 2.8368e-19, -4.6017e-17, -5.6849e-17, 8.9077e-19,\n 7.9052e-19, 2.2829e-18, 7.0653e-18, -3.5807e-17, -1.2806e-18,\n 1.6072e-19, 3.1996e-20, -9.3934e-18, -3.4551e-17, 7.5529e-19,\n -2.1157e-17, -5.9571e-18, -2.8792e-18, 8.0177e-19, -6.4364e-20,\n 1.0829e-18, 3.3289e-18, -3.4475e-18, -6.3962e-19, -3.9452e-17,\n 1.0191e-18, 4.2981e-20, -5.3046e-19, -8.3447e-18, -4.1370e-17,\n 2.4684e-19], device='cuda:0')", - "exp_avg_sq": "tensor([5.0200e-10, 8.1913e-09, 1.8206e-10, 5.5379e-09, 1.1793e-09, 4.4736e-13,\n 6.0871e-10, 2.7781e-10, 4.5854e-09, 4.3991e-09, 2.9500e-10, 1.1503e-11,\n 8.0345e-10, 2.0973e-10, 5.2369e-10, 4.7753e-09, 7.2416e-09, 8.6269e-12,\n 3.2446e-14, 2.1432e-10, 2.0875e-09, 6.7746e-10, 5.5140e-10, 4.4131e-10,\n 1.4376e-09, 4.7820e-10, 1.2729e-11, 1.4031e-08, 2.0107e-09, 3.1203e-10,\n 6.2540e-10, 5.9493e-09, 2.8778e-10, 1.3277e-12, 2.2968e-10, 1.2528e-10,\n 1.8595e-10, 2.7212e-09, 2.6444e-11, 7.2520e-10, 3.8908e-10, 1.8545e-11,\n 9.5177e-12, 9.6551e-12, 4.0773e-11, 2.2161e-11, 1.1581e-11, 5.4503e-09,\n 2.9685e-11, 1.2612e-09, 2.8856e-12, 3.8443e-12, 1.2314e-08, 3.6325e-09,\n 6.4695e-10, 1.2349e-11, 1.2971e-13, 1.2185e-09, 4.0290e-10, 8.1507e-12,\n 3.4295e-10, 4.0910e-11, 7.3241e-09, 4.7842e-09, 5.3525e-09, 2.2079e-09,\n 5.9356e-10, 9.4933e-12, 4.4523e-13, 5.3595e-10, 5.5183e-09, 6.2974e-14,\n 1.4284e-10, 3.0098e-10, 4.8070e-11, 7.1902e-10, 1.4694e-09, 9.6920e-13,\n 8.0313e-11, 1.6926e-09, 6.8007e-10, 5.8234e-09, 1.9897e-10, 3.7551e-10,\n 1.3132e-10, 2.0368e-10, 1.3715e-10, 2.2971e-11, 3.7025e-10, 1.1697e-10,\n 4.6886e-11, 3.6906e-12, 3.7915e-09, 4.8296e-14, 4.3745e-11, 1.4548e-09,\n 1.5404e-11, 1.7767e-10, 1.2129e-09, 9.7483e-11, 3.9477e-09, 5.4616e-11,\n 1.4432e-11, 9.2444e-10, 4.0655e-10, 2.3541e-11, 7.5433e-10, 7.5196e-14,\n 3.4005e-09, 1.5192e-12, 2.6844e-12, 5.0156e-12, 2.2228e-10, 7.3676e-11,\n 4.1138e-09, 8.2195e-10, 2.9234e-09, 1.6396e-09, 3.0359e-12, 6.7274e-10,\n 7.4435e-10, 6.0833e-13, 1.2914e-12, 1.9313e-09, 2.7989e-12, 5.6722e-09,\n 1.6935e-09, 7.3456e-11, 1.6138e-10, 5.6793e-09, 1.0805e-11, 3.7083e-12,\n 5.6228e-10, 8.1228e-10, 8.2868e-10, 6.8937e-11, 7.2797e-12, 5.7516e-12,\n 4.6619e-12, 4.5499e-13, 2.9438e-13, 1.0975e-10, 3.4230e-13, 1.3447e-12,\n 3.4211e-09, 2.9635e-09, 8.4107e-09, 2.6039e-09, 8.6886e-10, 2.7337e-12,\n 1.3303e-09, 1.6356e-08, 1.0940e-11, 1.0947e-11, 2.0841e-09, 5.9597e-12,\n 8.8653e-09, 1.0902e-11, 2.5709e-09, 5.1364e-11, 1.4075e-10, 3.8905e-11,\n 2.4879e-09, 3.2508e-11, 9.5138e-11, 1.6228e-12, 4.3164e-10, 7.5060e-12,\n 1.9410e-10, 1.5984e-11, 1.3620e-11, 7.7787e-10, 6.0099e-10, 1.1110e-09,\n 4.7671e-12, 3.2565e-10, 4.7859e-09, 6.5399e-09, 7.6513e-14, 1.2303e-09,\n 1.1649e-09, 6.1333e-10, 1.8879e-11, 2.1629e-12, 1.8971e-12, 3.6668e-09,\n 1.4771e-10, 5.6561e-13, 1.1270e-09, 7.7577e-09, 3.9760e-12, 8.9320e-10,\n 4.2561e-11, 4.6766e-09, 1.8678e-10, 1.6932e-11, 1.6282e-11, 2.7121e-11,\n 5.0538e-09, 2.2165e-11, 1.4372e-13, 8.3723e-10, 1.8427e-11, 6.1802e-09,\n 1.5487e-12, 7.1297e-11, 9.7703e-10, 2.0349e-12, 2.0504e-11, 5.4800e-10,\n 1.9759e-11, 5.5252e-11, 3.9519e-10, 2.7284e-10, 5.7394e-10, 4.6908e-11,\n 4.8202e-10, 1.7857e-12, 1.2503e-11, 7.3996e-11, 8.0380e-10, 1.5613e-09,\n 3.9863e-10, 5.3925e-10, 8.6817e-12, 1.3254e-09, 2.2871e-12, 1.0642e-09,\n 1.3315e-09, 4.5753e-11, 2.9356e-10, 1.7365e-11, 2.8346e-11, 2.2063e-09,\n 5.9073e-11, 1.9848e-11, 3.5054e-10, 5.3090e-11, 2.0763e-09, 1.0136e-09,\n 1.0655e-09, 5.5190e-11, 1.5643e-11, 2.9484e-12, 6.1339e-12, 5.6145e-15,\n 2.9632e-11, 4.4065e-12, 2.4923e-11, 3.0665e-09, 1.0865e-10, 4.6788e-13,\n 9.0322e-11, 3.1030e-10, 8.4805e-09, 2.8588e-11], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-1.4224e-17, -3.5725e-17, -1.6938e-18, -4.5252e-17, -2.6203e-17,\n -4.4684e-19, -2.1228e-17, -1.0069e-17, -6.3735e-17, -4.4988e-17,\n -6.1624e-18, 1.6528e-18, -3.1706e-17, -1.5143e-17, -1.6627e-17,\n -5.6492e-17, -5.0733e-17, 2.5501e-20, -1.2865e-19, -8.1163e-18,\n -3.7952e-17, -3.0371e-17, -1.2229e-17, -1.6545e-17, -3.4339e-17,\n -5.1447e-17, 5.9683e-19, -6.9355e-17, -4.8735e-17, -1.0669e-17,\n -7.8144e-20, -4.4690e-17, -2.1430e-17, -1.2964e-19, 7.0683e-19,\n 3.5412e-19, -9.5555e-18, -2.5288e-17, -3.6954e-18, -4.9640e-18,\n -1.0745e-17, -1.3283e-18, 5.7085e-18, 9.7266e-19, 8.9539e-19,\n 7.3050e-18, -2.3203e-18, -6.4842e-17, -4.8211e-18, -4.0278e-17,\n -3.1155e-18, -5.3818e-19, -3.7777e-17, 1.7701e-19, -1.7232e-17,\n 7.2301e-18, 5.1362e-19, -3.7466e-17, -2.0652e-17, -1.3995e-18,\n -3.3406e-19, -7.9428e-19, -5.0188e-17, -4.6389e-17, -3.1368e-17,\n -7.4357e-18, -3.9180e-17, 2.2183e-18, 1.8447e-18, 1.3264e-18,\n -3.8742e-17, -3.3180e-19, -6.2099e-18, -7.6480e-18, 2.8231e-18,\n -5.0795e-18, -2.7782e-17, -1.1714e-18, -7.7762e-19, -3.7526e-17,\n -1.0946e-17, -1.1625e-17, -9.5983e-19, -6.4033e-18, -3.4375e-17,\n -6.4628e-18, -3.0510e-18, 1.3555e-18, 5.8712e-18, -2.2584e-17,\n 1.7026e-18, 1.9548e-18, -4.7101e-17, 5.0437e-22, 3.6226e-18,\n -3.2695e-17, 1.9085e-19, -2.2303e-17, -1.3888e-17, -4.5920e-18,\n -1.4902e-17, -5.8227e-18, -1.6951e-17, -3.2087e-17, -7.3788e-19,\n -1.4854e-17, -3.1424e-17, -8.4151e-19, -5.8656e-17, -2.3990e-18,\n 2.3656e-18, 1.0564e-17, -1.9511e-18, -3.2053e-20, -4.0715e-17,\n -2.3419e-17, -4.2550e-17, 1.5887e-18, -4.1135e-19, -4.2313e-17,\n -9.2879e-18, 1.5625e-18, -3.9905e-18, -2.9365e-17, -1.3477e-19,\n -4.6495e-17, 2.4713e-18, 1.3632e-18, 3.1178e-19, -6.0332e-18,\n 5.7013e-19, 4.4715e-18, -8.3208e-18, -2.9581e-17, -2.8644e-17,\n 5.4492e-18, -3.4477e-18, -6.6915e-20, 2.3369e-18, -2.6640e-19,\n 1.2553e-18, 3.3000e-18, 1.9528e-18, 2.4029e-18, -1.3572e-17,\n -3.9450e-17, -3.5465e-17, -4.8820e-17, -2.6144e-18, 1.5927e-18,\n -1.7669e-17, -5.4834e-17, 7.3767e-19, 4.5495e-18, -3.9133e-17,\n -1.0770e-19, -5.1775e-17, -1.7064e-19, 2.0465e-18, -3.4320e-18,\n 2.5677e-19, -3.4176e-18, -2.6989e-17, 1.7768e-18, -8.2921e-18,\n 4.9325e-19, -2.4208e-17, -3.0298e-18, -1.5269e-17, 1.4297e-18,\n 3.9609e-18, -2.3705e-17, -3.9570e-18, -4.3713e-18, 5.5313e-19,\n -9.7722e-18, -4.6967e-17, -4.3942e-17, -1.3964e-20, -6.0667e-18,\n -1.0922e-17, -1.2245e-17, -2.9513e-18, 2.0377e-19, -1.3224e-18,\n -1.8116e-17, -1.7074e-17, 7.6103e-19, -3.3701e-18, -9.5456e-20,\n -2.1798e-19, -1.2617e-17, 2.9309e-19, -5.8058e-17, -1.7048e-18,\n -1.3933e-18, -4.8907e-19, 1.1156e-18, -5.9367e-17, 1.7118e-19,\n 8.7569e-19, -2.6231e-17, 1.6161e-19, -6.3503e-17, 2.3237e-18,\n -1.8212e-18, -5.2544e-17, 1.2487e-18, -1.4524e-18, -1.5490e-17,\n -1.7878e-18, -3.6425e-18, -6.8185e-19, -1.4949e-17, -2.1399e-17,\n 3.1951e-18, -4.0116e-18, -8.1396e-19, 4.2855e-18, -5.8919e-18,\n -4.0622e-18, -4.3224e-18, -2.1361e-17, -1.7079e-17, -2.4727e-18,\n -1.6430e-17, -1.8119e-20, -3.9463e-17, -3.8695e-17, -2.7316e-20,\n 6.0209e-20, 9.7079e-19, 3.2812e-18, -3.5121e-17, -9.6278e-19,\n -1.2086e-19, -6.8950e-20, -6.0167e-18, -2.7366e-17, -1.8555e-19,\n -1.8465e-17, -3.1810e-18, -2.3574e-18, 8.0040e-19, 2.9274e-19,\n 4.4916e-19, 3.3104e-18, -4.5697e-18, -1.4990e-18, -2.7508e-17,\n 8.1671e-20, -2.7412e-20, -6.9432e-19, -8.6957e-18, -3.3813e-17,\n 3.4587e-19], device='cuda:0')", + "exp_avg_sq": "tensor([1.4345e-10, 2.3407e-09, 5.2024e-11, 1.5825e-09, 3.3698e-10, 1.2784e-13,\n 1.7394e-10, 7.9386e-11, 1.3103e-09, 1.2571e-09, 8.4298e-11, 3.2870e-12,\n 2.2959e-10, 5.9932e-11, 1.4965e-10, 1.3646e-09, 2.0693e-09, 2.4652e-12,\n 9.2718e-15, 6.1243e-11, 5.9652e-10, 1.9359e-10, 1.5757e-10, 1.2611e-10,\n 4.1080e-10, 1.3665e-10, 3.6374e-12, 4.0095e-09, 5.7458e-10, 8.9166e-11,\n 1.7871e-10, 1.7001e-09, 8.2234e-11, 3.7941e-13, 6.5632e-11, 3.5799e-11,\n 5.3137e-11, 7.7760e-10, 7.5566e-12, 2.0723e-10, 1.1118e-10, 5.2993e-12,\n 2.7197e-12, 2.7590e-12, 1.1651e-11, 6.3328e-12, 3.3092e-12, 1.5575e-09,\n 8.4827e-12, 3.6041e-10, 8.2458e-13, 1.0986e-12, 3.5189e-09, 1.0380e-09,\n 1.8487e-10, 3.5289e-12, 3.7066e-14, 3.4821e-10, 1.1513e-10, 2.3291e-12,\n 9.8000e-11, 1.1690e-11, 2.0929e-09, 1.3671e-09, 1.5295e-09, 6.3092e-10,\n 1.6961e-10, 2.7128e-12, 1.2723e-13, 1.5315e-10, 1.5769e-09, 1.7995e-14,\n 4.0819e-11, 8.6009e-11, 1.3736e-11, 2.0546e-10, 4.1991e-10, 2.7696e-13,\n 2.2950e-11, 4.8366e-10, 1.9434e-10, 1.6641e-09, 5.6856e-11, 1.0731e-10,\n 3.7525e-11, 5.8204e-11, 3.9191e-11, 6.5641e-12, 1.0580e-10, 3.3424e-11,\n 1.3398e-11, 1.0546e-12, 1.0834e-09, 1.3801e-14, 1.2501e-11, 4.1571e-10,\n 4.4019e-12, 5.0771e-11, 3.4658e-10, 2.7856e-11, 1.1281e-09, 1.5607e-11,\n 4.1240e-12, 2.6417e-10, 1.1617e-10, 6.7269e-12, 2.1556e-10, 2.1488e-14,\n 9.7173e-10, 4.3412e-13, 7.6710e-13, 1.4332e-12, 6.3517e-11, 2.1054e-11,\n 1.1756e-09, 2.3488e-10, 8.3540e-10, 4.6852e-10, 8.6753e-13, 1.9224e-10,\n 2.1270e-10, 1.7384e-13, 3.6904e-13, 5.5188e-10, 7.9979e-13, 1.6209e-09,\n 4.8394e-10, 2.0991e-11, 4.6116e-11, 1.6229e-09, 3.0875e-12, 1.0597e-12,\n 1.6068e-10, 2.3212e-10, 2.3680e-10, 1.9699e-11, 2.0802e-12, 1.6436e-12,\n 1.3322e-12, 1.3002e-13, 8.4121e-14, 3.1361e-11, 9.7815e-14, 3.8425e-13,\n 9.7760e-10, 8.4685e-10, 2.4034e-09, 7.4409e-10, 2.4828e-10, 7.8118e-13,\n 3.8014e-10, 4.6738e-09, 3.1262e-12, 3.1282e-12, 5.9555e-10, 1.7030e-12,\n 2.5333e-09, 3.1154e-12, 7.3466e-10, 1.4678e-11, 4.0220e-11, 1.1117e-11,\n 7.1094e-10, 9.2894e-12, 2.7186e-11, 4.6372e-13, 1.2334e-10, 2.1449e-12,\n 5.5466e-11, 4.5676e-12, 3.8919e-12, 2.2228e-10, 1.7174e-10, 3.1746e-10,\n 1.3622e-12, 9.3057e-11, 1.3676e-09, 1.8688e-09, 2.1864e-14, 3.5156e-10,\n 3.3288e-10, 1.7526e-10, 5.3949e-12, 6.1807e-13, 5.4210e-13, 1.0478e-09,\n 4.2210e-11, 1.6163e-13, 3.2204e-10, 2.2168e-09, 1.1362e-12, 2.5524e-10,\n 1.2162e-11, 1.3364e-09, 5.3373e-11, 4.8383e-12, 4.6526e-12, 7.7500e-12,\n 1.4442e-09, 6.3337e-12, 4.1069e-14, 2.3925e-10, 5.2655e-12, 1.7660e-09,\n 4.4255e-13, 2.0374e-11, 2.7919e-10, 5.8149e-13, 5.8592e-12, 1.5660e-10,\n 5.6464e-12, 1.5789e-11, 1.1293e-10, 7.7967e-11, 1.6401e-10, 1.3404e-11,\n 1.3774e-10, 5.1027e-13, 3.5728e-12, 2.1145e-11, 2.2969e-10, 4.4616e-10,\n 1.1391e-10, 1.5409e-10, 2.4809e-12, 3.7873e-10, 6.5355e-13, 3.0411e-10,\n 3.8049e-10, 1.3074e-11, 8.3887e-11, 4.9622e-12, 8.1000e-12, 6.3047e-10,\n 1.6881e-11, 5.6716e-12, 1.0017e-10, 1.5171e-11, 5.9331e-10, 2.8965e-10,\n 3.0448e-10, 1.5771e-11, 4.4702e-12, 8.4253e-13, 1.7528e-12, 1.6044e-15,\n 8.4675e-12, 1.2592e-12, 7.1218e-12, 8.7627e-10, 3.1046e-11, 1.3370e-13,\n 2.5810e-11, 8.8672e-11, 2.4234e-09, 8.1692e-12], device='cuda:0')" }, "52": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-1.8009e-19, 1.7929e-18, -4.5470e-19, ..., -3.3360e-19,\n 4.9312e-18, -1.1080e-18],\n [-6.6791e-20, 1.1929e-18, -1.2399e-19, ..., -2.2743e-19,\n 1.0481e-18, 1.6102e-20],\n [-7.7447e-21, -2.2394e-18, 3.6307e-19, ..., 2.7166e-19,\n -3.9362e-18, 4.5168e-19],\n ...,\n [ 5.6506e-18, -8.4135e-17, 1.2972e-17, ..., 9.7910e-18,\n -1.2640e-16, 6.8253e-18],\n [ 1.0287e-18, -4.3049e-18, 2.0490e-18, ..., 1.7246e-18,\n -1.4098e-17, 1.4123e-18],\n [-3.4614e-18, 4.2548e-17, -8.3519e-18, ..., -6.7260e-18,\n 7.2257e-17, -3.9187e-18]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.5763e-10, 2.7211e-10, 8.9054e-11, ..., 3.5807e-11, 3.3328e-10,\n 1.0587e-09],\n [7.0876e-12, 5.3907e-12, 6.1010e-13, ..., 6.3111e-12, 7.3304e-12,\n 3.2534e-11],\n [2.9553e-11, 2.0104e-11, 1.5588e-13, ..., 1.5873e-11, 4.2059e-11,\n 8.9101e-11],\n ...,\n [3.8300e-10, 6.1634e-11, 5.3678e-12, ..., 1.0085e-11, 3.7799e-10,\n 4.3704e-10],\n [2.4646e-08, 3.0625e-09, 8.5780e-11, ..., 1.8019e-09, 1.0883e-08,\n 3.0740e-08],\n [5.4661e-09, 1.9868e-09, 1.6149e-11, ..., 4.8994e-10, 5.8234e-09,\n 9.4416e-09]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-1.9976e-19, 1.4220e-18, 5.6148e-20, ..., -1.2490e-19,\n 2.0742e-18, -4.6218e-19],\n [-8.9554e-20, 5.0541e-19, -1.6300e-20, ..., -8.3208e-20,\n 1.2014e-18, -1.7354e-19],\n [ 2.0757e-19, -1.4764e-18, 3.9301e-20, ..., 1.0869e-19,\n -2.0133e-18, 1.5241e-19],\n ...,\n [ 3.5694e-18, -7.7274e-17, 1.1304e-17, ..., 5.6629e-18,\n -1.1416e-16, 4.5752e-18],\n [ 6.9929e-19, -1.2641e-18, 1.0754e-18, ..., 1.0632e-18,\n -7.2736e-18, 6.6842e-19],\n [-2.1179e-18, 3.1748e-17, -6.2563e-18, ..., -4.0619e-18,\n 5.5462e-17, -1.9740e-18]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.5043e-11, 7.7756e-11, 2.5448e-11, ..., 1.0232e-11, 9.5236e-11,\n 3.0254e-10],\n [2.0253e-12, 1.5404e-12, 1.7434e-13, ..., 1.8034e-12, 2.0947e-12,\n 9.2968e-12],\n [8.4450e-12, 5.7449e-12, 4.4544e-14, ..., 4.5359e-12, 1.2019e-11,\n 2.5461e-11],\n ...,\n [1.0945e-10, 1.7612e-11, 1.5339e-12, ..., 2.8819e-12, 1.0801e-10,\n 1.2489e-10],\n [7.0428e-09, 8.7513e-10, 2.4512e-11, ..., 5.1491e-10, 3.1098e-09,\n 8.7841e-09],\n [1.5620e-09, 5.6775e-10, 4.6147e-12, ..., 1.4000e-10, 1.6641e-09,\n 2.6980e-09]], device='cuda:0')" }, "53": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 1.8560e-18, 5.1245e-19, -1.2020e-18, 3.3408e-18, 5.1633e-18,\n -1.4593e-18, 4.7957e-18, -8.8097e-19, 1.3154e-18, 2.3549e-18,\n -2.0349e-18, 2.2592e-18, -6.1643e-18, 5.5025e-19, -2.6554e-18,\n -3.8776e-18, 1.1670e-18, -2.4631e-18, -3.1089e-18, 2.6496e-18,\n -4.0865e-18, 1.2607e-18, 1.7618e-18, 6.6088e-19, -1.8338e-18,\n -1.1772e-18, 3.5604e-18, -4.7940e-18, -2.7677e-18, 6.3545e-19,\n 4.1334e-18, 8.1702e-19, 2.0349e-18, 5.7671e-18, 4.0301e-18,\n 6.5231e-18, -7.9444e-18, -1.9709e-18, -4.6999e-18, 1.3737e-17,\n -7.4910e-18, 4.3928e-18, 4.5744e-19, 1.2836e-17, -1.1770e-18,\n 9.6101e-18, -1.0770e-18, 1.1302e-17, 1.0849e-18, -1.9741e-17,\n 1.2995e-17, 1.4931e-17, -3.4442e-18, 1.1018e-18, 1.0829e-17,\n -2.1648e-17, 1.3760e-17, 1.6686e-17, -2.4997e-18, -7.2344e-18,\n 8.1047e-18, -1.0541e-17, -9.0999e-18, -1.1265e-18, -1.9433e-18,\n -4.8664e-20, 3.0095e-18, -4.9253e-18, 1.9274e-18, -1.8213e-19,\n 5.7306e-18, -2.3075e-18, -1.0347e-17, 2.4647e-18, -2.3697e-18,\n 1.7773e-20, -3.1392e-18, 3.3136e-18, 2.7270e-18, 1.1374e-18,\n 1.4628e-18, 3.7568e-19, -3.2133e-18, 6.7078e-18, 5.2116e-18,\n -4.2353e-18, -2.9006e-18, 1.4621e-18, -1.8718e-18, 7.2912e-20,\n 5.5990e-18, -1.6097e-19, -1.0873e-18, -1.3406e-18, -5.8573e-18,\n -5.8605e-18, -7.8127e-18, 2.1880e-18, -2.8094e-18, -6.4718e-18,\n 4.7630e-18, 8.6847e-18, 2.8085e-18, -6.6894e-18, 9.5079e-18,\n -2.3925e-18, 5.4265e-19, 3.1311e-19, -8.7670e-18, 8.0583e-18,\n -1.3525e-18, -2.5250e-18, 2.8201e-18, 2.1538e-18, 1.1920e-17,\n 1.3337e-17, 3.1672e-18, 5.7164e-18, -2.5442e-18, -1.8652e-18,\n 8.0136e-18, -1.2757e-19, -3.8005e-18, -2.5858e-18, -6.6709e-18,\n 3.5953e-18, -4.1533e-18, 4.0186e-18, 2.7339e-18, 1.6511e-18,\n -2.1193e-18, 6.4478e-19, 4.0728e-18, -3.3687e-19, -9.4386e-19,\n -6.9036e-19, 2.1274e-18, -3.5073e-18, -2.2867e-19, -9.0726e-19,\n 4.2248e-19, 7.2872e-19, 1.9406e-18, 4.4797e-19, -1.2419e-18,\n 1.0961e-18, -1.5917e-18, 8.6592e-19, 2.5083e-18, -1.6091e-18,\n 5.3579e-18, -7.0708e-19, 1.3954e-18, -1.3223e-18, -4.3979e-21,\n 9.0947e-20, 4.7417e-18, -3.8644e-18, -1.6533e-18, 3.8491e-18,\n 2.9159e-18, -1.1319e-18, -4.8796e-18, 3.1352e-18, 2.6048e-18,\n 2.2147e-18, 1.5311e-18, 5.7978e-18, 6.7445e-19, -4.5571e-18,\n -2.2212e-18, -3.7723e-18, -1.4962e-18, -3.6320e-18, -3.1201e-18,\n -4.1735e-18, -2.3336e-18, -1.8250e-18, -4.4796e-18, 4.5795e-18,\n 3.9190e-18, -5.5772e-18, 1.9763e-18, 2.9128e-18, -1.4558e-18,\n -3.1589e-18, -1.2162e-18, 3.6925e-18, -2.0804e-18, 3.7616e-18,\n 1.3973e-18, -8.2699e-19, -5.7560e-19, -1.5521e-19, -1.5641e-19,\n 3.2347e-19, -6.7170e-19, -1.0483e-18, 1.3998e-18, 1.6465e-18,\n 3.9944e-19, 6.8342e-19, -9.8776e-19, 1.7970e-18, 5.5486e-19,\n -8.8188e-20, -2.6109e-18, 8.3274e-19, -1.5656e-18, -6.5842e-19,\n -3.4492e-19, 8.9827e-19, 8.3036e-19, -3.3179e-19, -1.6636e-19,\n -1.5025e-18, 6.8913e-19, -1.2078e-18, 1.3800e-18, -5.6296e-19,\n 4.9011e-19, -3.1053e-19, -1.6158e-18, 1.6413e-19, 8.3541e-18,\n -1.0182e-17, 3.1121e-18, 2.7080e-18, -7.6025e-18, -9.4829e-18,\n -9.7238e-21, 5.1038e-18, -1.0412e-17, 2.5345e-18, -4.1113e-18,\n 7.8271e-18, 2.1247e-18, -6.8940e-18, 8.7654e-18, -8.7508e-18,\n 5.0918e-18, -1.5998e-18, 4.1008e-19, -1.2258e-18, 5.1345e-19,\n -2.5483e-18, -1.0969e-17, -4.5925e-18, -1.7440e-18, 3.9832e-18,\n 7.9722e-18, -3.2475e-18, -1.4318e-19, 2.0190e-18, 1.0402e-17,\n 3.2853e-18, 2.0907e-25, -1.7584e-24, 1.5556e-25, -1.5243e-24,\n -3.6656e-24, -1.8985e-24, -4.9510e-24, 1.4060e-24, 2.3902e-25,\n -1.9225e-24, 2.0888e-24, -1.6550e-24, 7.7694e-24, 1.6736e-24,\n 2.4764e-24, 3.1379e-24, -2.0630e-24, 1.4962e-24, 2.8350e-24,\n 1.0626e-24, 9.1436e-25, 1.5354e-24, -6.1675e-25, 2.0868e-25,\n 2.5422e-24, 4.9363e-25, -2.9131e-24, 3.6211e-24, 3.8880e-25,\n -2.8014e-24, 2.2372e-25, -2.5956e-24, -1.3713e-24, 3.7058e-25,\n 4.2503e-25, 1.6189e-25, -5.6100e-25, -7.1831e-25, 6.6646e-26,\n 2.8735e-25, 1.3574e-24, 1.4371e-24, 2.5452e-25, 1.1818e-24,\n -3.8097e-25, -1.1538e-24, -9.9447e-25, -1.6775e-24, -7.5329e-25,\n 2.9744e-24, -3.1965e-24, -4.9731e-25, -9.8854e-25, 1.4316e-24,\n -1.5607e-24, 2.1896e-24, -1.7687e-24, -1.7287e-24, -6.5523e-25,\n 7.9628e-25, 2.1379e-25, 4.9753e-24, 4.1376e-25, 2.0325e-24,\n -6.6842e-24, -7.1301e-25, 7.0816e-25, -2.9769e-24, -3.9448e-24,\n 1.0966e-24, 3.6743e-24, -2.7581e-24, -3.5240e-24, 4.1991e-24,\n 8.4647e-25, -2.4407e-24, -2.1853e-24, 1.3189e-25, -3.1304e-25,\n 2.9171e-24, -1.5979e-24, 2.1636e-24, -8.9887e-25, 2.1901e-24,\n -1.2039e-24, -9.0652e-25, -1.1179e-24, 2.3877e-24, -2.1358e-24,\n -3.7083e-26, 1.2779e-24, 1.9447e-25, 1.7044e-24, 5.7722e-25,\n 2.1054e-25, 5.1652e-25, 2.0171e-24, -4.6128e-26, 7.6920e-26,\n -6.5579e-25, -3.3665e-24, 1.0812e-24, 1.1045e-24, -4.3374e-25,\n 5.0362e-25, 9.7056e-25, -1.7286e-24, 5.2631e-25, 1.5706e-24,\n 1.5135e-24, 6.7600e-26, 1.6376e-24, -4.5323e-24, -1.4904e-25,\n -1.3268e-24, 6.7996e-25, -1.6029e-24, 2.1407e-24, 1.2423e-24,\n 7.9817e-25, -3.0203e-25, -3.9681e-25, -1.9063e-24, -1.0913e-24,\n -1.4518e-24, 1.4430e-25, -1.1362e-24, -9.1162e-25, 6.3589e-26,\n 1.2091e-24, 8.2014e-25, -2.1062e-24, -1.7175e-24, -3.5669e-25,\n -1.3761e-24, -1.7239e-24, 3.9355e-25, 5.6916e-25, 1.4865e-24,\n 2.8102e-24, 4.0586e-24, -2.9160e-24, -2.7872e-24, -2.8579e-24,\n -6.9956e-26, -9.2037e-25, 6.6976e-25, 1.7946e-24, -5.3485e-25,\n 1.7861e-24, -2.2727e-24, -2.6610e-24, 2.1741e-25, 2.1644e-24,\n -4.6921e-25, 3.3404e-25, -1.1290e-24, 8.7659e-25, 1.3068e-24,\n -6.1687e-26, 2.8575e-25, 5.8666e-25, -5.3031e-26, -1.6931e-25,\n -2.7948e-25, 1.2123e-25, -3.0843e-25, -4.8882e-25, 3.7026e-25,\n 7.5978e-26, 2.0999e-25, 8.9417e-26, -6.4713e-26, 2.1000e-25,\n 7.2380e-25, 1.1210e-24, 4.1354e-25, 2.4522e-25, 2.1068e-25,\n 9.1756e-26, -4.4430e-25, 2.2795e-26, 9.3352e-26, -2.2174e-26,\n -7.5107e-26, 2.0656e-25, 2.9222e-25, -8.1975e-25, 1.2668e-25,\n 1.6060e-25, 4.5952e-25, -1.7393e-25, -2.9903e-25, -5.4000e-26,\n 1.5760e-25, 3.8832e-25, 6.1296e-25, -5.4205e-25, -6.5970e-26,\n 3.1955e-25, 3.0504e-25, 1.6619e-25, -4.2197e-26, 4.6139e-25,\n 2.1730e-25, -1.5781e-25, -2.2072e-25, 1.0040e-24, 9.0415e-25,\n -5.0114e-25, 2.0053e-26, -4.4707e-25, -1.0773e-24, 8.3376e-25,\n 9.8080e-26, 1.8748e-25, -1.0280e-25, -3.4542e-25, 2.8528e-26,\n 1.4723e-25, -3.4038e-25, -2.1863e-25, 9.4806e-26, 9.7674e-26,\n -1.4152e-24, 1.7341e-24, -2.9679e-25, -1.6836e-24, 3.6920e-24,\n 2.3720e-24, 1.1649e-26, -2.9938e-24, 2.5169e-24, 1.7654e-24,\n -1.5644e-24, -6.4506e-25, -1.4079e-24, 3.6681e-24, 4.8865e-25,\n 7.0196e-25, -7.9811e-25, 2.6067e-24, 1.7829e-24, -1.3589e-24,\n -8.4056e-25, 2.2316e-24, 1.3855e-24, -1.3557e-24, -6.3988e-25,\n -2.5138e-24, -4.0402e-24, -1.4336e-24, 1.7285e-24, 1.5217e-24,\n -1.5338e-24, -1.4197e-24, 1.1410e-17, 8.7492e-17, -8.9537e-17,\n -3.7300e-17, 1.4701e-16, -1.3360e-16, -1.4177e-16, 3.3554e-17,\n -2.9221e-17, -1.6154e-17, -8.6428e-17, 8.7596e-17, -5.2921e-17,\n 9.2995e-17, 1.0951e-16, 4.8503e-17, -1.3591e-16, 9.0454e-17,\n -7.4396e-17, 1.1347e-16, -1.2684e-16, 1.0860e-16, -1.2812e-16,\n -1.1699e-16, -7.5055e-17, -1.8947e-17, -1.8168e-16, -1.0316e-16,\n 7.8767e-17, -1.1600e-16, 1.1253e-16, 7.9369e-17, -9.2532e-17,\n 9.4992e-17, -7.7604e-17, -4.8310e-17, 1.5638e-16, 5.0366e-17,\n -1.2736e-17, -1.1502e-16, 1.5915e-17, 1.7871e-16, 1.4041e-16,\n 8.5908e-17, -2.8656e-17, -1.0671e-16, 1.4943e-16, -1.6954e-16,\n -1.2176e-16, -8.7533e-17, 6.7140e-17, -3.9368e-17, 3.0287e-17,\n -1.0784e-16, -4.2147e-17, 9.5387e-17, -8.9084e-17, 1.0138e-16,\n -7.5095e-17, -3.5887e-17, 1.6294e-16, -8.9004e-17, 1.4026e-16,\n 4.7046e-17, 1.6749e-16, 2.5284e-17, -9.6623e-17, 9.8858e-17,\n -1.3349e-16, 1.8978e-16, 4.0020e-18, -1.3611e-16, 1.0175e-16,\n -5.4845e-17, 9.8524e-17, -8.0728e-17, 5.2755e-17, -4.4090e-17,\n 1.7949e-17, -6.5969e-17, 3.5246e-17, -8.6650e-17, 7.2449e-17,\n 1.0395e-16, 9.1482e-17, 3.5349e-17, -3.2676e-17, -3.9484e-17,\n 9.2816e-17, -6.6738e-17, -1.1371e-16, -9.0397e-17, 7.2904e-17,\n 1.3677e-16, -4.6775e-17, 6.1216e-17, -6.4636e-17, 1.8243e-16,\n -3.6845e-18, 8.2308e-17, 3.0500e-17, 8.1197e-17, 5.0352e-17,\n -1.8117e-16, 1.0159e-16, -1.8778e-17, 9.0319e-17, -1.9096e-17,\n -9.4514e-17, -7.2850e-17, 1.6190e-16, 5.7154e-17, 7.3892e-17,\n 1.1165e-16, 9.2099e-17, 8.1935e-17, 1.4766e-16, -1.4862e-16,\n 1.3035e-16, -7.0911e-17, 1.7277e-16, 7.8425e-17, 1.7687e-16,\n 4.1279e-17, 1.5762e-16, -1.7071e-16, -8.2411e-17, 1.4517e-17,\n -1.7444e-16, 3.3312e-17, -1.1867e-16, 1.0846e-16, 3.6906e-17,\n 9.4567e-17, -1.5469e-18, -1.2585e-16, 4.6171e-17, -1.4002e-16,\n 1.4793e-16, -9.2078e-17, -1.9743e-16, 4.2164e-17, -3.9316e-17,\n -8.5009e-17, -6.9542e-17, -1.0080e-16, 1.1965e-16, 7.6903e-17,\n -3.0319e-17, 4.1918e-17, 4.1543e-17, 1.0244e-16, -1.3378e-16,\n 1.2579e-17, -1.1186e-16, -8.8307e-17, -1.3342e-16, -9.4341e-17,\n -8.8322e-17, -1.3346e-16, 1.4324e-16, 8.2679e-17, -4.1534e-17,\n 5.4530e-17, -1.1830e-16, 3.2589e-17, -3.9771e-17, -6.0804e-17,\n 8.2817e-17, 1.7504e-16, -1.1733e-16, -7.6065e-18, 6.2108e-17,\n -9.5227e-17, -6.0966e-17, 2.8421e-17, -2.0330e-16, -1.2169e-16,\n 6.8615e-17, -1.3330e-16, -4.6496e-17, -4.7947e-17, 2.8996e-17,\n -1.2251e-16, 2.8893e-17, -5.2596e-17, 1.0807e-16, 4.3351e-17,\n -7.8043e-17, -9.1495e-17, 5.2932e-18, -1.1030e-16, -2.1554e-17,\n -1.2096e-16, 6.9428e-17, -1.1943e-16, 3.9971e-17, -1.4494e-16,\n 1.7882e-17, 2.2771e-17, 3.3902e-17, 1.4617e-16, 1.6105e-16,\n 1.1400e-16, -3.8920e-17, -1.0374e-16, -4.6100e-17, 1.4908e-16,\n -7.3408e-17, -1.2912e-16, 8.0513e-17, 1.2604e-16, -1.5732e-16,\n 6.0351e-17, 5.8051e-17, -8.6629e-17, 6.2396e-18, -1.7861e-17,\n -4.2886e-17, 8.1952e-18, 1.9740e-17, -9.7276e-17, 6.0602e-17,\n 1.3777e-16, -1.7793e-16, -9.9676e-18, -1.2553e-16, 1.0778e-16,\n 1.2734e-16, 3.4335e-17, -6.0054e-17, -1.3570e-16, -1.4706e-16,\n -1.0899e-16, 6.5626e-17, 4.5298e-18, 9.3125e-17, -1.4193e-16,\n 8.8807e-17, -1.0227e-16, -5.9649e-17, -1.3356e-16, -6.2075e-17,\n -1.4201e-16, -1.4748e-16, -2.0227e-17, 9.2456e-17, -1.3516e-16,\n -9.4172e-17, 6.2986e-17, -6.7142e-17, -1.2160e-16, 9.5061e-17,\n -1.2147e-16, -1.7064e-17, 7.4595e-17], device='cuda:0')", - "exp_avg_sq": "tensor([3.8135e-10, 2.9346e-11, 2.2730e-10, 1.5712e-10, 1.9873e-10, 3.2471e-10,\n 3.6162e-11, 4.1909e-10, 5.4246e-10, 3.8287e-11, 2.1283e-10, 7.4842e-11,\n 5.3604e-10, 1.2105e-11, 6.1730e-10, 8.5915e-11, 1.2247e-10, 1.0795e-10,\n 1.2324e-10, 6.1760e-11, 7.1647e-11, 1.4549e-10, 1.3012e-11, 1.7211e-10,\n 2.3034e-10, 1.5455e-11, 2.3615e-10, 7.9060e-10, 4.6045e-11, 7.8791e-12,\n 2.2478e-10, 2.5859e-10, 9.8025e-10, 3.7730e-12, 8.3471e-11, 1.0965e-10,\n 7.0800e-10, 2.8297e-12, 1.4605e-10, 4.8172e-11, 6.9804e-11, 1.0535e-11,\n 9.8455e-10, 2.7691e-10, 6.1274e-10, 2.8104e-11, 1.5235e-09, 6.0075e-10,\n 1.7352e-10, 1.7228e-11, 4.7770e-11, 8.9354e-10, 2.4643e-11, 1.6793e-10,\n 6.8621e-12, 3.1104e-11, 5.8536e-10, 1.3700e-10, 7.6497e-11, 5.5164e-10,\n 4.3492e-10, 1.1135e-09, 9.4968e-10, 2.4318e-10, 5.6450e-10, 4.1130e-11,\n 1.0566e-10, 1.5502e-10, 1.2509e-11, 7.8150e-10, 8.3273e-10, 6.2970e-11,\n 6.8241e-10, 2.7025e-11, 6.4001e-11, 8.8364e-10, 1.0330e-10, 3.0655e-10,\n 1.3175e-10, 4.0885e-10, 1.8835e-10, 2.9988e-11, 1.7199e-10, 7.6655e-10,\n 6.9663e-10, 1.9584e-10, 9.7917e-11, 4.1977e-10, 1.8196e-10, 5.3620e-11,\n 3.2391e-10, 2.0302e-10, 2.1725e-10, 1.0102e-11, 5.9819e-10, 1.4079e-10,\n 6.8944e-11, 2.2007e-11, 2.1968e-10, 7.3735e-11, 2.4985e-11, 2.6717e-11,\n 8.5729e-11, 1.2380e-10, 5.4652e-11, 6.5608e-11, 2.8828e-11, 2.0776e-10,\n 4.9035e-11, 9.4131e-11, 5.9684e-12, 2.6977e-10, 3.8864e-10, 4.5436e-11,\n 7.1235e-11, 3.1512e-11, 1.3136e-10, 1.3698e-11, 1.4713e-11, 4.8748e-11,\n 1.5266e-11, 1.9812e-11, 6.1146e-11, 1.1151e-10, 8.0725e-11, 3.6823e-12,\n 1.3034e-10, 1.7523e-11, 6.2918e-11, 1.1688e-10, 7.0976e-10, 6.4365e-10,\n 4.5180e-10, 2.0354e-10, 2.3981e-10, 1.7354e-11, 1.0436e-09, 3.2853e-10,\n 1.8928e-10, 1.1784e-10, 2.6926e-10, 3.2247e-11, 4.0836e-11, 4.0697e-11,\n 2.7973e-11, 7.4817e-10, 5.9448e-11, 3.1724e-11, 2.0455e-09, 7.2724e-11,\n 1.9564e-10, 9.8338e-10, 4.8166e-10, 1.3472e-11, 6.2840e-11, 3.2825e-12,\n 1.0619e-09, 4.3001e-11, 3.6463e-10, 5.8596e-10, 1.7354e-12, 2.1320e-12,\n 2.0498e-11, 5.7114e-11, 5.5159e-12, 3.7978e-11, 6.8558e-12, 1.6082e-10,\n 2.7787e-11, 3.2272e-11, 1.9058e-11, 7.1991e-12, 4.0630e-11, 5.5789e-12,\n 4.2195e-12, 1.7466e-11, 1.7835e-12, 6.1563e-11, 1.4181e-11, 3.1882e-11,\n 7.1017e-11, 4.3913e-11, 1.2321e-11, 1.2379e-11, 2.4676e-12, 1.0146e-11,\n 7.0175e-11, 3.7525e-11, 2.6698e-11, 3.1824e-11, 5.0084e-11, 5.1929e-11,\n 7.4456e-11, 7.0523e-10, 1.6789e-09, 1.3634e-09, 6.7500e-10, 2.4403e-11,\n 2.0257e-10, 1.0966e-11, 3.4634e-10, 1.3885e-09, 1.9157e-10, 2.6122e-10,\n 2.6255e-10, 7.3091e-11, 2.5268e-10, 9.7216e-12, 1.0868e-09, 2.5670e-10,\n 8.1316e-10, 2.9308e-11, 4.3670e-10, 9.3356e-12, 1.9526e-12, 1.7987e-09,\n 1.4089e-10, 1.9349e-10, 3.1601e-11, 1.8640e-09, 4.2892e-11, 1.1734e-09,\n 5.6437e-10, 3.6432e-12, 1.3313e-10, 7.5775e-11, 4.1007e-11, 1.7729e-10,\n 4.3707e-11, 2.4061e-12, 2.8933e-12, 9.2580e-11, 6.4883e-11, 4.8623e-11,\n 2.8592e-10, 1.7842e-11, 1.4250e-10, 9.7011e-11, 3.6639e-12, 1.9586e-11,\n 1.4193e-10, 2.9132e-10, 9.1822e-11, 4.1416e-10, 2.1248e-11, 6.7739e-11,\n 6.5302e-10, 1.7979e-10, 1.4530e-11, 5.3616e-11, 1.1520e-10, 5.2804e-11,\n 5.9831e-11, 6.4475e-11, 4.6107e-12, 1.2675e-11, 2.6089e-27, 6.0549e-26,\n 3.7133e-26, 6.9135e-27, 1.0043e-26, 1.8580e-26, 1.1153e-26, 3.2257e-27,\n 6.5865e-27, 3.5948e-27, 2.3981e-26, 2.8462e-27, 2.2600e-26, 3.0182e-27,\n 4.4636e-27, 2.0338e-26, 8.4481e-27, 1.1004e-26, 1.0855e-26, 1.7737e-27,\n 5.2082e-26, 1.1206e-26, 4.9953e-26, 4.2792e-26, 3.9967e-26, 6.1088e-27,\n 3.1921e-26, 1.3395e-26, 2.9421e-26, 8.3175e-27, 4.0624e-27, 6.0548e-26,\n 2.3854e-26, 3.5473e-27, 1.8633e-26, 3.6362e-26, 6.0618e-26, 1.0932e-26,\n 2.6536e-26, 5.4978e-26, 8.4245e-27, 7.6517e-27, 4.0087e-26, 2.9131e-26,\n 2.0478e-26, 2.6621e-26, 5.0063e-26, 1.1366e-26, 2.0393e-26, 9.1542e-27,\n 5.8228e-27, 1.6785e-26, 4.7561e-27, 9.3843e-27, 2.3955e-26, 3.3091e-26,\n 6.8638e-26, 2.3547e-26, 3.2040e-26, 2.4844e-26, 3.9877e-26, 4.5248e-26,\n 1.2527e-26, 1.9455e-26, 1.0328e-25, 8.0204e-27, 6.0314e-26, 3.6928e-26,\n 2.7447e-26, 1.5412e-26, 1.0523e-26, 2.3572e-26, 3.4513e-26, 1.4910e-25,\n 5.4871e-27, 3.9173e-27, 4.0400e-27, 6.8795e-28, 1.0893e-26, 8.8012e-27,\n 4.6019e-27, 1.2150e-26, 4.8601e-26, 1.9661e-26, 8.0493e-27, 1.1314e-26,\n 1.7106e-26, 1.6655e-26, 7.6155e-27, 1.4241e-26, 1.9211e-27, 1.0346e-26,\n 2.6479e-27, 9.9168e-27, 1.3922e-26, 1.1952e-26, 1.5575e-26, 3.3818e-27,\n 8.6023e-27, 1.5299e-26, 8.1329e-27, 2.6639e-27, 3.8624e-27, 7.8304e-27,\n 1.6119e-26, 8.0836e-27, 2.3232e-27, 1.4182e-26, 3.5158e-27, 5.8722e-27,\n 7.9113e-27, 1.1204e-26, 1.1640e-26, 3.6011e-27, 3.0542e-26, 1.1216e-26,\n 5.3298e-27, 7.6223e-27, 5.3545e-27, 1.5508e-26, 6.8331e-27, 1.0519e-26,\n 6.3871e-27, 5.5459e-27, 5.8020e-27, 7.2084e-27, 4.2395e-27, 3.0147e-27,\n 6.3893e-26, 3.4566e-26, 1.5868e-26, 3.7710e-26, 9.0446e-26, 4.5217e-26,\n 9.3522e-27, 7.1501e-26, 1.0704e-27, 8.1631e-27, 2.1211e-26, 1.1902e-25,\n 1.1264e-26, 1.4650e-26, 5.1064e-26, 1.3279e-26, 5.3857e-26, 5.8304e-26,\n 1.1571e-26, 4.5517e-27, 3.9594e-26, 1.4235e-26, 2.9218e-26, 1.3561e-25,\n 9.3747e-27, 1.9308e-26, 1.5232e-26, 5.5073e-26, 9.7771e-26, 9.9599e-26,\n 3.8900e-26, 4.3497e-27, 7.8656e-27, 1.1698e-26, 3.9512e-27, 3.2551e-27,\n 4.3362e-27, 1.3439e-26, 9.0063e-28, 1.4595e-26, 5.2385e-27, 4.4974e-27,\n 8.9488e-28, 7.2738e-27, 2.3975e-27, 9.9803e-27, 5.6137e-27, 1.7287e-26,\n 4.0560e-27, 8.0345e-27, 1.9710e-27, 1.8358e-27, 6.5581e-28, 1.6038e-27,\n 4.8319e-27, 1.0845e-27, 9.3033e-27, 6.5450e-27, 1.9687e-26, 3.8576e-27,\n 2.1421e-26, 1.6757e-27, 1.9908e-27, 4.2608e-27, 3.0604e-27, 6.9574e-26,\n 4.3777e-27, 1.3349e-26, 1.0682e-26, 3.9629e-26, 8.9456e-27, 3.8679e-27,\n 1.2779e-26, 2.5133e-26, 4.7554e-27, 2.0809e-27, 4.7233e-26, 1.6686e-27,\n 5.1195e-27, 7.5870e-27, 5.3298e-27, 3.2130e-26, 2.4511e-26, 3.0802e-26,\n 2.5890e-27, 8.2217e-27, 1.6607e-27, 6.6026e-27, 5.7514e-27, 1.7953e-26,\n 9.7599e-27, 1.8558e-26, 8.2460e-27, 8.2229e-27, 8.5720e-27, 2.0872e-27,\n 3.5026e-26, 6.5809e-26, 3.6493e-26, 1.3731e-26, 7.8335e-26, 6.8962e-26,\n 2.6866e-26, 3.1107e-26, 1.0874e-26, 5.4707e-26, 4.2866e-26, 2.1640e-26,\n 1.5816e-26, 2.3886e-26, 3.0388e-26, 5.8547e-27, 3.4892e-26, 1.3769e-26,\n 2.1072e-26, 6.9270e-27, 8.5806e-27, 5.7059e-26, 1.9713e-26, 8.9811e-26,\n 1.3733e-26, 5.6092e-27, 4.0062e-26, 4.9470e-26, 4.4716e-26, 1.6000e-26,\n 4.7324e-27, 4.5191e-26, 7.3788e-08, 1.1530e-08, 3.1148e-08, 1.3366e-08,\n 1.8782e-07, 1.5522e-08, 1.2569e-07, 1.9259e-08, 3.3999e-08, 1.9151e-09,\n 2.3876e-09, 3.6916e-08, 3.5256e-08, 4.8529e-09, 1.3363e-08, 1.4652e-07,\n 4.4285e-08, 1.1456e-08, 8.2263e-08, 1.7871e-08, 8.1699e-08, 1.2407e-08,\n 2.0450e-08, 4.6640e-09, 1.0398e-08, 1.0700e-08, 1.1707e-07, 1.3100e-07,\n 8.6257e-08, 9.0513e-08, 3.1948e-08, 1.7235e-08, 2.7110e-08, 4.1254e-08,\n 6.4592e-08, 2.1775e-09, 2.7045e-07, 2.8420e-08, 4.8243e-09, 5.3326e-08,\n 1.0185e-08, 6.0046e-08, 1.3024e-07, 1.5308e-07, 6.9575e-08, 6.3695e-09,\n 1.8426e-07, 4.2712e-08, 5.8628e-08, 1.3386e-08, 1.3225e-08, 8.4022e-08,\n 1.6247e-07, 7.3965e-08, 2.9887e-08, 2.4129e-08, 4.2269e-08, 8.0683e-09,\n 1.5147e-08, 1.7416e-08, 2.6746e-07, 2.6604e-08, 1.9241e-08, 2.7863e-09,\n 3.0512e-08, 2.9887e-09, 5.4569e-08, 1.3010e-08, 6.6169e-08, 2.7476e-07,\n 1.2038e-08, 8.6181e-08, 1.1990e-07, 1.9595e-08, 1.2192e-07, 1.5002e-08,\n 1.6285e-09, 4.2862e-09, 1.2273e-09, 1.8819e-08, 1.7466e-08, 1.3986e-07,\n 7.0472e-09, 8.5366e-08, 2.9353e-08, 1.5050e-08, 2.1423e-08, 1.5654e-09,\n 3.5847e-08, 2.0333e-08, 1.8457e-08, 1.1178e-07, 1.6064e-08, 1.5178e-07,\n 1.3152e-07, 1.1670e-08, 4.7411e-08, 1.5019e-07, 1.1581e-07, 4.4603e-08,\n 6.5911e-10, 4.9457e-08, 1.3070e-08, 8.3108e-08, 2.1736e-07, 6.1706e-09,\n 3.5152e-09, 1.2377e-08, 7.0752e-09, 1.3229e-08, 2.3816e-07, 9.4730e-09,\n 4.5726e-09, 2.4506e-08, 4.7870e-08, 5.2631e-08, 1.3012e-07, 9.9652e-08,\n 1.4941e-08, 1.4226e-08, 1.3815e-07, 3.6787e-09, 1.5725e-07, 6.1195e-09,\n 1.2527e-08, 5.6705e-08, 1.1205e-08, 2.1650e-09, 1.1654e-07, 2.9153e-08,\n 7.2500e-08, 9.4385e-08, 3.7085e-09, 4.6941e-08, 4.2547e-09, 3.6009e-08,\n 2.5778e-08, 1.1930e-07, 9.1483e-08, 6.1280e-09, 1.0279e-07, 4.6137e-08,\n 4.8937e-09, 6.8331e-09, 1.1150e-07, 5.8508e-08, 1.0029e-08, 2.9449e-08,\n 4.7196e-08, 1.5016e-08, 3.1960e-09, 5.6667e-09, 1.5565e-07, 3.0389e-09,\n 3.4612e-08, 8.7103e-09, 1.3448e-07, 3.4065e-08, 1.2815e-08, 7.7717e-08,\n 4.9882e-08, 1.7521e-08, 9.1175e-08, 4.8238e-09, 7.6589e-08, 3.4475e-09,\n 1.4348e-08, 1.8998e-07, 1.0234e-08, 5.3418e-08, 1.0001e-08, 4.3785e-08,\n 2.4354e-08, 3.3635e-08, 5.5290e-08, 1.3877e-08, 7.5943e-08, 1.8475e-08,\n 1.1772e-08, 1.9333e-08, 8.7398e-09, 2.2046e-09, 2.4421e-10, 1.9867e-08,\n 4.7823e-08, 8.7319e-10, 3.8532e-08, 1.1040e-08, 7.5688e-08, 3.8658e-08,\n 6.5675e-08, 1.9084e-08, 1.6717e-08, 4.1098e-08, 9.1480e-09, 7.3181e-08,\n 7.5530e-09, 2.4002e-08, 1.3393e-09, 1.0686e-07, 8.3257e-09, 3.0076e-08,\n 6.0142e-08, 3.2818e-08, 4.4091e-08, 6.7038e-08, 8.9958e-08, 5.5500e-08,\n 3.2003e-08, 2.0532e-08, 2.9386e-08, 7.0126e-08, 2.5730e-07, 4.2949e-08,\n 2.6244e-08, 1.0592e-07, 5.1378e-08, 3.1348e-08, 4.7270e-08, 1.2097e-08,\n 1.9941e-07, 1.0407e-08, 4.9650e-08, 1.4662e-07, 4.3451e-08, 1.3309e-09,\n 2.6092e-08, 2.2817e-08, 2.3760e-07, 5.6057e-08, 1.4397e-09, 5.4559e-08,\n 1.3166e-07, 8.4711e-08, 4.8643e-08, 8.1874e-08, 1.1944e-08, 2.6081e-07,\n 7.3403e-09, 1.2544e-07, 2.9434e-08, 1.5457e-08, 3.3044e-09, 3.2217e-07,\n 2.7201e-08, 2.9580e-08, 9.3195e-08, 4.5897e-08, 1.2000e-07, 6.2808e-09,\n 1.0781e-07, 6.5941e-08, 9.7027e-08, 3.6276e-09, 1.1806e-07, 4.0518e-08],\n device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([ 1.6228e-18, 1.1027e-18, -1.7628e-18, 1.5560e-18, 5.4524e-18,\n -6.5763e-19, 3.6372e-18, -5.5378e-19, 4.1800e-18, 2.5260e-18,\n -7.2113e-19, 1.5606e-18, -5.6106e-18, 9.2975e-20, -1.8906e-18,\n -4.4434e-18, 5.8637e-19, -3.1718e-18, -3.2340e-18, 4.5951e-19,\n -3.2090e-18, -8.9381e-19, 3.4397e-18, 6.3357e-19, 8.6231e-20,\n -2.6168e-18, 4.3428e-18, -5.8214e-18, -2.8397e-18, 6.0705e-21,\n 3.2758e-18, 2.8056e-18, 1.7265e-18, 1.1762e-18, 2.6687e-19,\n 2.1844e-18, -2.3662e-18, -9.4431e-19, 2.7519e-19, 3.0639e-18,\n -2.2744e-18, -3.2726e-19, -1.4584e-19, 3.0547e-18, -1.2328e-18,\n 2.5078e-18, 1.2822e-18, 2.5487e-18, -9.4694e-19, -3.5907e-18,\n 2.0228e-18, 2.3192e-18, 8.2827e-19, 5.1564e-19, 3.5380e-18,\n -5.6505e-18, 1.6776e-18, 5.6117e-18, -6.2777e-19, -2.0594e-18,\n 2.9059e-18, -2.1363e-18, -1.2878e-18, 8.9277e-19, -6.8290e-18,\n -2.4474e-18, 5.6717e-18, -6.0317e-18, -3.5864e-19, 4.7464e-18,\n 6.9166e-18, -4.0961e-18, -9.6119e-18, 2.4448e-18, -2.4340e-18,\n -1.6431e-18, -2.4153e-18, 3.3638e-18, 5.0944e-18, 3.1008e-18,\n 7.3055e-19, 1.2699e-18, -5.9560e-18, 6.8257e-18, 4.2350e-18,\n -6.4348e-18, -2.2760e-18, 4.9257e-18, -5.5699e-18, -1.3355e-18,\n 4.9481e-18, -2.9286e-18, 5.5034e-19, -8.8936e-19, -8.6859e-18,\n -5.8524e-18, -7.1999e-18, -7.9603e-19, 9.6795e-19, -2.1104e-18,\n 2.5869e-18, 1.1254e-18, 2.5314e-18, -5.8372e-19, 4.5189e-18,\n -2.9871e-18, 1.8362e-18, 9.3956e-19, -6.7772e-18, 1.9138e-18,\n 1.4171e-18, -2.1496e-18, 3.9535e-18, -1.1676e-18, 5.0109e-18,\n 7.1902e-18, 3.9611e-18, 3.9584e-18, -2.9922e-18, -3.4297e-18,\n 2.0130e-18, -1.5497e-19, -5.4619e-19, -2.2102e-18, -1.8345e-18,\n 1.4512e-18, -1.4292e-18, 3.5642e-18, 6.8149e-18, 5.4023e-18,\n -4.3683e-18, 6.2474e-18, 8.8833e-18, -5.3829e-19, -1.4002e-18,\n 2.0401e-18, -4.7113e-19, -4.6999e-18, 4.9568e-18, -3.6731e-18,\n -3.4346e-18, 1.2913e-18, 1.5586e-18, 1.7163e-18, 4.5213e-18,\n 7.6789e-18, -1.7869e-18, -2.1130e-18, 6.7314e-18, -2.9673e-18,\n 7.6321e-18, 2.0194e-18, 1.7975e-18, 1.0499e-18, 4.9169e-19,\n 2.0472e-18, 9.2115e-18, -4.5741e-18, -4.0962e-18, 5.7795e-18,\n 1.1807e-18, -2.0662e-18, -3.4543e-18, 1.8128e-18, 2.0610e-18,\n -1.3172e-18, 8.6485e-19, 6.9436e-18, 9.2218e-19, -5.0305e-18,\n -3.0419e-18, -3.1923e-18, -6.6368e-19, -3.7548e-18, -4.9736e-18,\n -4.4310e-18, -3.7257e-18, 2.5944e-19, -5.3736e-18, 5.4468e-18,\n 4.4239e-18, -4.2657e-18, -2.7314e-19, 2.8079e-18, 5.2459e-19,\n -3.9412e-18, -1.0461e-18, 4.5337e-18, -2.2000e-18, 3.4524e-18,\n -1.6092e-19, 1.4093e-18, 1.3175e-18, -8.3699e-19, -6.5602e-19,\n -2.3614e-18, 1.3525e-19, 4.2075e-20, 4.2170e-18, -2.1182e-18,\n -6.7363e-19, 1.4565e-18, 1.7299e-18, 1.8428e-18, 1.1396e-18,\n -4.5145e-18, -2.5979e-18, 1.4962e-18, -2.6302e-18, 1.4782e-18,\n -1.3320e-18, -3.0974e-19, 4.2210e-20, -5.1647e-20, 1.4380e-18,\n -2.8925e-18, 2.8802e-18, -5.4888e-19, -1.1291e-18, -5.9116e-19,\n 1.1200e-18, 2.5303e-18, 1.1467e-18, 1.5763e-18, 5.5769e-18,\n -6.5714e-18, 5.2509e-18, 1.1020e-19, -6.7858e-18, -6.8379e-18,\n 2.2164e-18, 3.1130e-18, -4.7441e-18, 1.6856e-18, -1.6745e-18,\n 4.3096e-18, 1.8057e-18, -5.0344e-18, 6.6900e-18, -4.8773e-18,\n 5.5578e-18, -3.5315e-18, 8.3166e-19, -3.3870e-19, 2.0811e-18,\n -1.7798e-18, -9.8333e-18, -1.8342e-18, 9.1681e-20, 2.9052e-18,\n 3.2173e-18, -9.3142e-19, 5.0905e-19, 2.0861e-18, 6.8390e-18,\n 1.6021e-18, -7.4106e-26, 4.2064e-25, -5.0719e-25, -3.1023e-25,\n 3.0609e-25, -5.4811e-25, 4.2798e-26, 3.7342e-25, 2.6871e-25,\n 3.3039e-26, 2.0195e-25, 1.8337e-25, 1.2344e-24, -1.5581e-25,\n 3.4779e-25, 1.7923e-26, 3.5642e-26, 1.5300e-25, 2.2959e-25,\n -1.1059e-25, -1.6202e-25, -4.5284e-25, 9.8922e-26, -1.2918e-25,\n 2.6474e-25, -4.4014e-25, 9.0769e-26, 4.1666e-25, -1.6120e-26,\n -3.7657e-25, -1.0210e-25, 6.0705e-25, 1.5038e-25, 2.5950e-25,\n 6.5813e-26, 3.5522e-25, -4.9143e-25, 2.7862e-26, -6.4119e-25,\n 3.5096e-25, -3.1268e-25, 2.9337e-25, 1.9275e-25, 7.9101e-25,\n -2.5762e-25, 2.4525e-25, 6.3410e-25, -3.1648e-25, -1.5506e-25,\n 3.0400e-25, -5.4638e-25, -3.4837e-25, -3.6923e-25, 5.4847e-25,\n -5.5017e-25, -8.1764e-26, 4.3793e-26, -2.2913e-25, -4.6453e-25,\n -1.2932e-25, 4.6113e-25, 7.0804e-26, -3.5972e-25, -7.0603e-28,\n -1.0231e-24, 8.7809e-26, 7.7597e-26, -1.1960e-24, -4.7620e-25,\n 2.2553e-25, -2.2206e-26, -3.7559e-25, -1.0454e-24, 1.1440e-24,\n 6.3096e-26, -3.9462e-25, -3.4416e-25, -1.3187e-25, 5.9569e-26,\n 4.3377e-25, -4.4848e-25, 4.5780e-25, -7.5342e-26, 9.0115e-25,\n -1.4478e-26, -1.1733e-25, -3.0737e-25, 5.8454e-25, -4.3638e-25,\n -3.5583e-26, -2.9022e-27, -5.4868e-26, 6.9301e-25, 3.3828e-25,\n -5.7526e-25, -1.6923e-25, -2.5580e-25, 2.0499e-25, -2.0376e-25,\n -1.5147e-26, -2.6908e-25, 3.5445e-25, 2.5636e-25, -1.0408e-25,\n 2.4633e-25, 1.4812e-25, 2.0411e-25, -3.2468e-25, -4.5968e-25,\n 3.6088e-25, -1.7912e-25, -9.9040e-26, 5.1916e-25, 2.4402e-25,\n 2.4649e-25, 8.7565e-26, 3.7833e-25, 7.3327e-25, 1.3959e-26,\n 2.2885e-25, 1.9551e-25, 8.2543e-26, 1.0578e-26, 2.4047e-25,\n 7.9738e-26, -3.6071e-25, -4.0396e-25, 1.0184e-25, -6.1506e-25,\n -6.2521e-25, 8.2216e-25, -5.7568e-25, -1.0568e-24, -2.3277e-25,\n -9.9979e-25, -1.4382e-26, -1.6897e-25, 6.0529e-25, -3.0839e-25,\n 9.0538e-25, 6.9365e-25, -5.3636e-26, -3.2242e-26, -1.2505e-25,\n 7.9215e-26, -7.2148e-25, 7.3105e-25, -5.8364e-25, 7.5936e-27,\n 1.7222e-25, -7.6747e-25, -6.8784e-25, -3.3254e-25, 1.8043e-25,\n -8.3829e-25, 5.0318e-25, -1.1253e-24, 6.2887e-25, 5.4669e-25,\n 1.9489e-26, -2.5079e-25, 1.7471e-25, 7.0774e-26, -1.1712e-25,\n -1.5259e-25, -5.4730e-25, -2.1304e-25, -1.9768e-26, 1.8738e-25,\n -9.1150e-26, 9.2415e-27, 4.3030e-26, 1.1688e-25, 4.0833e-25,\n -4.9418e-26, -1.3176e-25, -3.0653e-25, -1.8524e-25, 4.4478e-26,\n 1.4733e-25, -1.3725e-27, 1.1055e-26, -9.9977e-27, -6.6197e-26,\n -2.9023e-26, 9.8210e-26, 5.9082e-26, 1.8989e-25, -1.4810e-25,\n -1.0745e-25, -2.1866e-25, 1.3329e-25, 3.0033e-25, 3.4863e-27,\n 3.8342e-26, 2.2759e-25, -3.0888e-25, -2.9245e-27, -2.3008e-25,\n 3.9541e-26, 3.3065e-26, -7.7674e-26, -5.2337e-26, -2.6307e-25,\n -2.3061e-25, 2.3005e-25, -1.9469e-25, -1.3119e-25, -2.6942e-26,\n 2.4274e-25, -2.8529e-25, 3.0154e-25, -1.6135e-25, -1.5702e-25,\n 1.1552e-25, 1.0994e-25, 1.7293e-26, 2.0317e-26, 8.2610e-26,\n 1.0321e-25, -2.8220e-25, 2.5038e-26, -1.7767e-25, 9.4827e-27,\n 1.0967e-25, -5.6868e-25, -4.1833e-25, -9.2970e-26, -5.0667e-25,\n -1.5998e-25, -1.4898e-25, 4.0143e-25, -2.4178e-25, -1.3286e-25,\n -5.3070e-26, 1.6103e-25, -1.2211e-25, -3.6940e-26, 1.9881e-26,\n -3.2107e-26, -1.7266e-25, -2.9206e-26, -1.3021e-25, 1.8360e-25,\n -5.1863e-26, -1.9256e-25, -7.1676e-26, 4.3660e-25, -1.8155e-25,\n 2.7767e-25, 6.7195e-25, 9.0407e-26, -4.3080e-25, -1.9174e-25,\n 4.0726e-25, 2.8464e-25, 1.3648e-17, 7.6574e-17, -7.6044e-17,\n -3.1564e-17, 1.1985e-16, -1.1257e-16, -1.1431e-16, 3.0898e-17,\n -2.4039e-17, -1.2089e-17, -7.6194e-17, 6.9901e-17, -4.2505e-17,\n 7.6698e-17, 8.9785e-17, 3.7298e-17, -1.1519e-16, 7.3426e-17,\n -6.9011e-17, 9.7507e-17, -1.0611e-16, 8.9438e-17, -1.0461e-16,\n -1.0200e-16, -6.3279e-17, -1.5936e-17, -1.5283e-16, -8.3189e-17,\n 6.3236e-17, -9.9092e-17, 8.7326e-17, 6.6642e-17, -7.8081e-17,\n 7.9084e-17, -6.4060e-17, -3.6846e-17, 1.2782e-16, 3.9687e-17,\n -1.0408e-17, -9.9968e-17, 8.1339e-18, 1.4884e-16, 1.1805e-16,\n 6.9162e-17, -2.2568e-17, -9.2040e-17, 1.2694e-16, -1.4339e-16,\n -1.0624e-16, -7.3911e-17, 6.1079e-17, -3.3014e-17, 2.4260e-17,\n -9.4557e-17, -3.7687e-17, 8.1554e-17, -7.5528e-17, 8.7650e-17,\n -6.4330e-17, -2.9470e-17, 1.3606e-16, -7.4086e-17, 1.1884e-16,\n 4.6439e-17, 1.4736e-16, 2.4757e-17, -8.2383e-17, 8.3965e-17,\n -1.1919e-16, 1.6736e-16, 1.6197e-18, -1.1927e-16, 8.6368e-17,\n -4.6620e-17, 8.2177e-17, -7.5055e-17, 4.8390e-17, -3.9269e-17,\n 1.3758e-17, -5.8585e-17, 2.9520e-17, -7.0179e-17, 5.6793e-17,\n 8.8128e-17, 7.9842e-17, 3.1107e-17, -3.3429e-17, -3.5902e-17,\n 7.9032e-17, -5.5699e-17, -9.2843e-17, -8.0065e-17, 6.0355e-17,\n 1.2251e-16, -3.6942e-17, 5.6793e-17, -5.0958e-17, 1.4923e-16,\n -2.9261e-18, 7.1037e-17, 2.3536e-17, 6.5654e-17, 4.2910e-17,\n -1.4535e-16, 8.6983e-17, -1.5691e-17, 7.0924e-17, -1.7288e-17,\n -7.6063e-17, -6.3481e-17, 1.3386e-16, 4.6435e-17, 6.1552e-17,\n 9.1621e-17, 7.1690e-17, 6.6754e-17, 1.2490e-16, -1.1591e-16,\n 1.0639e-16, -6.0323e-17, 1.3965e-16, 6.2862e-17, 1.4093e-16,\n 2.8384e-17, 1.2426e-16, -1.4082e-16, -6.8455e-17, 1.5408e-17,\n -1.4290e-16, 2.8423e-17, -9.4098e-17, 8.9580e-17, 2.8901e-17,\n 7.5192e-17, -1.7895e-18, -1.0005e-16, 3.7248e-17, -1.1450e-16,\n 1.1951e-16, -7.4772e-17, -1.6258e-16, 3.5416e-17, -3.1476e-17,\n -6.5141e-17, -5.6700e-17, -8.0858e-17, 9.8257e-17, 6.3824e-17,\n -2.2733e-17, 3.6865e-17, 3.3561e-17, 8.4190e-17, -1.1281e-16,\n 1.0433e-17, -9.3924e-17, -6.8356e-17, -1.1098e-16, -7.9414e-17,\n -7.3215e-17, -1.1075e-16, 1.1930e-16, 7.1449e-17, -2.9602e-17,\n 4.4483e-17, -9.4106e-17, 2.8295e-17, -3.1536e-17, -4.6745e-17,\n 6.7793e-17, 1.4606e-16, -9.5455e-17, -4.0385e-18, 4.7628e-17,\n -8.5551e-17, -5.4613e-17, 2.3427e-17, -1.7070e-16, -1.0500e-16,\n 5.8291e-17, -1.0865e-16, -3.9106e-17, -3.9097e-17, 2.5064e-17,\n -1.0405e-16, 2.1908e-17, -4.3701e-17, 9.2273e-17, 4.0240e-17,\n -6.1134e-17, -7.6245e-17, 8.7936e-18, -9.3585e-17, -1.5948e-17,\n -1.0526e-16, 5.7598e-17, -1.0109e-16, 3.5747e-17, -1.2154e-16,\n 1.1586e-17, 1.7713e-17, 2.7955e-17, 1.2663e-16, 1.3162e-16,\n 9.5451e-17, -2.9500e-17, -8.5763e-17, -3.3960e-17, 1.2871e-16,\n -5.8877e-17, -1.0965e-16, 6.7987e-17, 1.0588e-16, -1.2883e-16,\n 4.9566e-17, 4.8359e-17, -7.0671e-17, 3.0344e-18, -1.2480e-17,\n -3.7209e-17, 1.4813e-17, 2.0724e-17, -7.4374e-17, 5.0808e-17,\n 1.1384e-16, -1.5234e-16, -7.6468e-18, -1.0145e-16, 9.1756e-17,\n 1.0457e-16, 2.9265e-17, -4.8587e-17, -1.1240e-16, -1.2103e-16,\n -9.1772e-17, 4.8552e-17, 7.5933e-18, 7.8151e-17, -1.1987e-16,\n 7.7258e-17, -8.1563e-17, -4.8342e-17, -1.1057e-16, -4.9410e-17,\n -1.1691e-16, -1.2450e-16, -2.1549e-17, 7.9703e-17, -1.1368e-16,\n -7.7897e-17, 5.0940e-17, -5.8639e-17, -1.0001e-16, 7.9134e-17,\n -1.0590e-16, -1.0318e-17, 5.8261e-17], device='cuda:0')", + "exp_avg_sq": "tensor([1.0897e-10, 8.3857e-12, 6.4952e-11, 4.4897e-11, 5.6789e-11, 9.2790e-11,\n 1.0333e-11, 1.1976e-10, 1.5501e-10, 1.0941e-11, 6.0817e-11, 2.1387e-11,\n 1.5318e-10, 3.4592e-12, 1.7640e-10, 2.4551e-11, 3.4996e-11, 3.0848e-11,\n 3.5218e-11, 1.7648e-11, 2.0474e-11, 4.1576e-11, 3.7183e-12, 4.9182e-11,\n 6.5823e-11, 4.4165e-12, 6.7482e-11, 2.2592e-10, 1.3158e-11, 2.2515e-12,\n 6.4232e-11, 7.3893e-11, 2.8012e-10, 1.0782e-12, 2.3853e-11, 3.1333e-11,\n 2.0232e-10, 8.0861e-13, 4.1734e-11, 1.3766e-11, 1.9947e-11, 3.0103e-12,\n 2.8134e-10, 7.9129e-11, 1.7510e-10, 8.0310e-12, 4.3535e-10, 1.7167e-10,\n 4.9584e-11, 4.9229e-12, 1.3651e-11, 2.5534e-10, 7.0420e-12, 4.7988e-11,\n 1.9609e-12, 8.8882e-12, 1.6727e-10, 3.9148e-11, 2.1860e-11, 1.5764e-10,\n 1.2428e-10, 3.1819e-10, 2.7138e-10, 6.9491e-11, 1.6131e-10, 1.1753e-11,\n 3.0194e-11, 4.4299e-11, 3.5746e-12, 2.2332e-10, 2.3796e-10, 1.7994e-11,\n 1.9501e-10, 7.7228e-12, 1.8289e-11, 2.5251e-10, 2.9518e-11, 8.7599e-11,\n 3.7649e-11, 1.1683e-10, 5.3822e-11, 8.5692e-12, 4.9149e-11, 2.1905e-10,\n 1.9907e-10, 5.5963e-11, 2.7980e-11, 1.1995e-10, 5.1998e-11, 1.5322e-11,\n 9.2560e-11, 5.8014e-11, 6.2080e-11, 2.8867e-12, 1.7094e-10, 4.0231e-11,\n 1.9701e-11, 6.2886e-12, 6.2777e-11, 2.1070e-11, 7.1396e-12, 7.6347e-12,\n 2.4498e-11, 3.5376e-11, 1.5617e-11, 1.8748e-11, 8.2379e-12, 5.9368e-11,\n 1.4012e-11, 2.6899e-11, 1.7055e-12, 7.7089e-11, 1.1106e-10, 1.2984e-11,\n 2.0356e-11, 9.0047e-12, 3.7536e-11, 3.9144e-12, 4.2044e-12, 1.3930e-11,\n 4.3625e-12, 5.6615e-12, 1.7473e-11, 3.1864e-11, 2.3068e-11, 1.0523e-12,\n 3.7245e-11, 5.0074e-12, 1.7979e-11, 3.3400e-11, 2.0282e-10, 1.8393e-10,\n 1.2910e-10, 5.8164e-11, 6.8528e-11, 4.9589e-12, 2.9823e-10, 9.3881e-11,\n 5.4088e-11, 3.3674e-11, 7.6943e-11, 9.2148e-12, 1.1669e-11, 1.1630e-11,\n 7.9936e-12, 2.1379e-10, 1.6988e-11, 9.0655e-12, 5.8452e-10, 2.0781e-11,\n 5.5905e-11, 2.8101e-10, 1.3764e-10, 3.8498e-12, 1.7957e-11, 9.3799e-13,\n 3.0344e-10, 1.2288e-11, 1.0420e-10, 1.6744e-10, 4.9591e-13, 6.0924e-13,\n 5.8576e-12, 1.6321e-11, 1.5762e-12, 1.0852e-11, 1.9591e-12, 4.5955e-11,\n 7.9405e-12, 9.2220e-12, 5.4459e-12, 2.0572e-12, 1.1610e-11, 1.5942e-12,\n 1.2058e-12, 4.9909e-12, 5.0965e-13, 1.7592e-11, 4.0523e-12, 9.1106e-12,\n 2.0294e-11, 1.2548e-11, 3.5208e-12, 3.5374e-12, 7.0515e-13, 2.8994e-12,\n 2.0053e-11, 1.0723e-11, 7.6291e-12, 9.0941e-12, 1.4312e-11, 1.4839e-11,\n 2.1276e-11, 2.0152e-10, 4.7976e-10, 3.8960e-10, 1.9289e-10, 6.9733e-12,\n 5.7885e-11, 3.1336e-12, 9.8969e-11, 3.9678e-10, 5.4743e-11, 7.4646e-11,\n 7.5025e-11, 2.0886e-11, 7.2206e-11, 2.7780e-12, 3.1057e-10, 7.3353e-11,\n 2.3237e-10, 8.3750e-12, 1.2479e-10, 2.6677e-12, 5.5798e-13, 5.1400e-10,\n 4.0261e-11, 5.5292e-11, 9.0301e-12, 5.3267e-10, 1.2257e-11, 3.3530e-10,\n 1.6127e-10, 1.0411e-12, 3.8043e-11, 2.1653e-11, 1.1718e-11, 5.0662e-11,\n 1.2490e-11, 6.8757e-13, 8.2679e-13, 2.6456e-11, 1.8541e-11, 1.3894e-11,\n 8.1705e-11, 5.0986e-12, 4.0722e-11, 2.7722e-11, 1.0470e-12, 5.5970e-12,\n 4.0558e-11, 8.3247e-11, 2.6239e-11, 1.1835e-10, 6.0718e-12, 1.9357e-11,\n 1.8660e-10, 5.1377e-11, 4.1522e-12, 1.5321e-11, 3.2920e-11, 1.5089e-11,\n 1.7097e-11, 1.8424e-11, 1.3175e-12, 3.6220e-12, 7.4552e-28, 1.7302e-26,\n 1.0611e-26, 1.9756e-27, 2.8700e-27, 5.3093e-27, 3.1871e-27, 9.2177e-28,\n 1.8821e-27, 1.0272e-27, 6.8527e-27, 8.1331e-28, 6.4580e-27, 8.6249e-28,\n 1.2755e-27, 5.8118e-27, 2.4141e-27, 3.1443e-27, 3.1018e-27, 5.0685e-28,\n 1.4883e-26, 3.2021e-27, 1.4275e-26, 1.2228e-26, 1.1421e-26, 1.7456e-27,\n 9.1215e-27, 3.8277e-27, 8.4073e-27, 2.3768e-27, 1.1609e-27, 1.7302e-26,\n 6.8166e-27, 1.0137e-27, 5.3245e-27, 1.0391e-26, 1.7322e-26, 3.1238e-27,\n 7.5828e-27, 1.5711e-26, 2.4074e-27, 2.1865e-27, 1.1455e-26, 8.3244e-27,\n 5.8518e-27, 7.6072e-27, 1.4306e-26, 3.2479e-27, 5.8273e-27, 2.6159e-27,\n 1.6639e-27, 4.7965e-27, 1.3591e-27, 2.6816e-27, 6.8452e-27, 9.4560e-27,\n 1.9614e-26, 6.7287e-27, 9.1558e-27, 7.0995e-27, 1.1395e-26, 1.2930e-26,\n 3.5798e-27, 5.5595e-27, 2.9513e-26, 2.2919e-27, 1.7235e-26, 1.0552e-26,\n 7.8433e-27, 4.4040e-27, 3.0070e-27, 6.7358e-27, 9.8624e-27, 4.2607e-26,\n 1.5680e-27, 1.1194e-27, 1.1545e-27, 1.9659e-28, 3.1128e-27, 2.5150e-27,\n 1.3150e-27, 3.4719e-27, 1.3888e-26, 5.6182e-27, 2.3001e-27, 3.2330e-27,\n 4.8881e-27, 4.7592e-27, 2.1762e-27, 4.0694e-27, 5.4897e-28, 2.9566e-27,\n 7.5666e-28, 2.8338e-27, 3.9783e-27, 3.4153e-27, 4.4507e-27, 9.6637e-28,\n 2.4582e-27, 4.3717e-27, 2.3240e-27, 7.6123e-28, 1.1037e-27, 2.2376e-27,\n 4.6062e-27, 2.3099e-27, 6.6388e-28, 4.0526e-27, 1.0047e-27, 1.6780e-27,\n 2.2607e-27, 3.2016e-27, 3.3262e-27, 1.0290e-27, 8.7277e-27, 3.2050e-27,\n 1.5230e-27, 2.1781e-27, 1.5301e-27, 4.4316e-27, 1.9526e-27, 3.0060e-27,\n 1.8252e-27, 1.5848e-27, 1.6580e-27, 2.0599e-27, 1.2115e-27, 8.6147e-28,\n 1.8258e-26, 9.8774e-27, 4.5345e-27, 1.0776e-26, 2.5846e-26, 1.2921e-26,\n 2.6725e-27, 2.0432e-26, 3.0588e-28, 2.3327e-27, 6.0612e-27, 3.4010e-26,\n 3.2187e-27, 4.1865e-27, 1.4592e-26, 3.7946e-27, 1.5390e-26, 1.6661e-26,\n 3.3065e-27, 1.3007e-27, 1.1314e-26, 4.0676e-27, 8.3494e-27, 3.8752e-26,\n 2.6789e-27, 5.5174e-27, 4.3528e-27, 1.5737e-26, 2.7939e-26, 2.8461e-26,\n 1.1116e-26, 1.2430e-27, 2.2476e-27, 3.3429e-27, 1.1291e-27, 9.3018e-28,\n 1.2391e-27, 3.8403e-27, 2.5736e-28, 4.1707e-27, 1.4969e-27, 1.2852e-27,\n 2.5572e-28, 2.0785e-27, 6.8511e-28, 2.8519e-27, 1.6042e-27, 4.9400e-27,\n 1.1590e-27, 2.2959e-27, 5.6323e-28, 5.2458e-28, 1.8740e-28, 4.5829e-28,\n 1.3808e-27, 3.0990e-28, 2.6585e-27, 1.8703e-27, 5.6256e-27, 1.1023e-27,\n 6.1213e-27, 4.7886e-28, 5.6889e-28, 1.2175e-27, 8.7452e-28, 1.9881e-26,\n 1.2510e-27, 3.8145e-27, 3.0524e-27, 1.1324e-26, 2.5563e-27, 1.1053e-27,\n 3.6517e-27, 7.1819e-27, 1.3589e-27, 5.9463e-28, 1.3497e-26, 4.7680e-28,\n 1.4629e-27, 2.1680e-27, 1.5230e-27, 9.1813e-27, 7.0042e-27, 8.8019e-27,\n 7.3982e-28, 2.3494e-27, 4.7456e-28, 1.8867e-27, 1.6435e-27, 5.1303e-27,\n 2.7890e-27, 5.3030e-27, 2.3564e-27, 2.3497e-27, 2.4495e-27, 5.9642e-28,\n 1.0009e-26, 1.8805e-26, 1.0428e-26, 3.9237e-27, 2.2385e-26, 1.9706e-26,\n 7.6771e-27, 8.8891e-27, 3.1072e-27, 1.5633e-26, 1.2249e-26, 6.1837e-27,\n 4.5195e-27, 6.8257e-27, 8.6835e-27, 1.6730e-27, 9.9708e-27, 3.9345e-27,\n 6.0215e-27, 1.9794e-27, 2.4520e-27, 1.6305e-26, 5.6330e-27, 2.5664e-26,\n 3.9243e-27, 1.6029e-27, 1.1448e-26, 1.4136e-26, 1.2778e-26, 4.5720e-27,\n 1.3523e-27, 1.2914e-26, 2.1086e-08, 3.2948e-09, 8.9008e-09, 3.8193e-09,\n 5.3670e-08, 4.4354e-09, 3.5916e-08, 5.5035e-09, 9.7156e-09, 5.4726e-10,\n 6.8226e-10, 1.0549e-08, 1.0075e-08, 1.3868e-09, 3.8186e-09, 4.1870e-08,\n 1.2655e-08, 3.2735e-09, 2.3507e-08, 5.1066e-09, 2.3346e-08, 3.5453e-09,\n 5.8438e-09, 1.3328e-09, 2.9713e-09, 3.0575e-09, 3.3454e-08, 3.7434e-08,\n 2.4649e-08, 2.5865e-08, 9.1294e-09, 4.9252e-09, 7.7468e-09, 1.1789e-08,\n 1.8458e-08, 6.2222e-10, 7.7284e-08, 8.1212e-09, 1.3786e-09, 1.5238e-08,\n 2.9105e-09, 1.7159e-08, 3.7217e-08, 4.3744e-08, 1.9881e-08, 1.8201e-09,\n 5.2655e-08, 1.2205e-08, 1.6753e-08, 3.8253e-09, 3.7790e-09, 2.4010e-08,\n 4.6428e-08, 2.1136e-08, 8.5405e-09, 6.8951e-09, 1.2079e-08, 2.3056e-09,\n 4.3284e-09, 4.9768e-09, 7.6430e-08, 7.6024e-09, 5.4983e-09, 7.9619e-10,\n 8.7191e-09, 8.5404e-10, 1.5594e-08, 3.7176e-09, 1.8908e-08, 7.8514e-08,\n 3.4398e-09, 2.4627e-08, 3.4263e-08, 5.5995e-09, 3.4838e-08, 4.2870e-09,\n 4.6534e-10, 1.2248e-09, 3.5071e-10, 5.3778e-09, 4.9911e-09, 3.9965e-08,\n 2.0138e-09, 2.4394e-08, 8.3878e-09, 4.3008e-09, 6.1217e-09, 4.4731e-10,\n 1.0243e-08, 5.8103e-09, 5.2743e-09, 3.1941e-08, 4.5904e-09, 4.3372e-08,\n 3.7582e-08, 3.3347e-09, 1.3548e-08, 4.2919e-08, 3.3093e-08, 1.2746e-08,\n 1.8835e-10, 1.4133e-08, 3.7348e-09, 2.3749e-08, 6.2111e-08, 1.7633e-09,\n 1.0045e-09, 3.5369e-09, 2.0218e-09, 3.7802e-09, 6.8056e-08, 2.7070e-09,\n 1.3067e-09, 7.0027e-09, 1.3679e-08, 1.5040e-08, 3.7184e-08, 2.8476e-08,\n 4.2694e-09, 4.0651e-09, 3.9479e-08, 1.0512e-09, 4.4935e-08, 1.7487e-09,\n 3.5796e-09, 1.6204e-08, 3.2020e-09, 6.1867e-10, 3.3301e-08, 8.3306e-09,\n 2.0718e-08, 2.6971e-08, 1.0597e-09, 1.3414e-08, 1.2158e-09, 1.0290e-08,\n 7.3662e-09, 3.4091e-08, 2.6142e-08, 1.7511e-09, 2.9374e-08, 1.3184e-08,\n 1.3984e-09, 1.9526e-09, 3.1862e-08, 1.6719e-08, 2.8659e-09, 8.4153e-09,\n 1.3487e-08, 4.2909e-09, 9.1327e-10, 1.6193e-09, 4.4478e-08, 8.6839e-10,\n 9.8907e-09, 2.4890e-09, 3.8428e-08, 9.7344e-09, 3.6619e-09, 2.2208e-08,\n 1.4254e-08, 5.0068e-09, 2.6054e-08, 1.3784e-09, 2.1886e-08, 9.8516e-10,\n 4.1001e-09, 5.4288e-08, 2.9245e-09, 1.5264e-08, 2.8580e-09, 1.2512e-08,\n 6.9594e-09, 9.6114e-09, 1.5800e-08, 3.9654e-09, 2.1701e-08, 5.2794e-09,\n 3.3639e-09, 5.5245e-09, 2.4975e-09, 6.2999e-10, 6.9784e-11, 5.6773e-09,\n 1.3666e-08, 2.4952e-10, 1.1011e-08, 3.1549e-09, 2.1629e-08, 1.1047e-08,\n 1.8767e-08, 5.4535e-09, 4.7771e-09, 1.1744e-08, 2.6141e-09, 2.0912e-08,\n 2.1583e-09, 6.8589e-09, 3.8271e-10, 3.0537e-08, 2.3791e-09, 8.5943e-09,\n 1.7186e-08, 9.3781e-09, 1.2599e-08, 1.9157e-08, 2.5706e-08, 1.5859e-08,\n 9.1450e-09, 5.8672e-09, 8.3972e-09, 2.0039e-08, 7.3525e-08, 1.2273e-08,\n 7.4996e-09, 3.0266e-08, 1.4682e-08, 8.9579e-09, 1.3508e-08, 3.4567e-09,\n 5.6984e-08, 2.9740e-09, 1.4188e-08, 4.1898e-08, 1.2416e-08, 3.8032e-10,\n 7.4561e-09, 6.5200e-09, 6.7897e-08, 1.6019e-08, 4.1142e-10, 1.5591e-08,\n 3.7624e-08, 2.4207e-08, 1.3900e-08, 2.3396e-08, 3.4131e-09, 7.4530e-08,\n 2.0975e-09, 3.5846e-08, 8.4111e-09, 4.4171e-09, 9.4427e-10, 9.2062e-08,\n 7.7728e-09, 8.4528e-09, 2.6631e-08, 1.3115e-08, 3.4290e-08, 1.7948e-09,\n 3.0807e-08, 1.8843e-08, 2.7726e-08, 1.0366e-09, 3.3735e-08, 1.1578e-08],\n device='cuda:0')" }, "54": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-5.8983e-17, 4.8743e-17, -3.2547e-17, ..., -4.2741e-17,\n -8.0591e-17, 6.9218e-17],\n [-2.1787e-17, 1.6977e-17, -1.0918e-17, ..., -1.2500e-17,\n -2.1789e-17, 2.8286e-17],\n [ 5.1705e-17, -4.2039e-17, 2.4153e-17, ..., 3.2382e-17,\n 5.8156e-17, -5.7976e-17],\n ...,\n [ 6.2303e-17, -5.0982e-17, 2.8654e-17, ..., 4.1927e-17,\n 7.8247e-17, -6.5749e-17],\n [ 8.3957e-17, -7.0172e-17, 4.9092e-17, ..., 5.9623e-17,\n 1.0839e-16, -1.0573e-16],\n [ 1.8921e-17, -1.6766e-17, 1.2602e-17, ..., 1.3780e-17,\n 2.2947e-17, -2.7139e-17]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.1474e-09, 7.0297e-09, 1.8898e-08, ..., 9.5763e-11, 1.0948e-08,\n 1.2264e-08],\n [1.3385e-09, 1.0104e-08, 2.2731e-08, ..., 1.4591e-10, 7.0716e-09,\n 1.1806e-08],\n [9.4847e-10, 3.8639e-09, 5.5983e-09, ..., 9.1003e-11, 3.2437e-09,\n 4.7875e-09],\n ...,\n [1.5167e-09, 7.4519e-09, 2.6390e-08, ..., 1.7768e-10, 1.8841e-08,\n 1.4637e-08],\n [2.1950e-10, 1.4326e-09, 3.8283e-09, ..., 2.6185e-11, 3.0966e-09,\n 3.6599e-09],\n [1.0050e-09, 5.0803e-09, 1.4563e-08, ..., 2.5801e-11, 4.9760e-09,\n 6.7807e-09]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-4.9650e-17, 4.2547e-17, -2.5764e-17, ..., -3.2979e-17,\n -6.7064e-17, 5.9262e-17],\n [-1.8908e-17, 1.4019e-17, -8.6043e-18, ..., -9.1731e-18,\n -1.5288e-17, 2.3093e-17],\n [ 4.7316e-17, -3.8915e-17, 2.0350e-17, ..., 2.7842e-17,\n 5.1188e-17, -5.4741e-17],\n ...,\n [ 5.1576e-17, -4.3968e-17, 2.1891e-17, ..., 3.2749e-17,\n 6.5619e-17, -5.6116e-17],\n [ 7.1102e-17, -6.1531e-17, 3.8425e-17, ..., 4.7443e-17,\n 8.8589e-17, -9.1175e-17],\n [ 1.8696e-17, -1.7137e-17, 1.0832e-17, ..., 1.3506e-17,\n 2.1092e-17, -2.6624e-17]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.2788e-10, 2.0088e-09, 5.4002e-09, ..., 2.7365e-11, 3.1285e-09,\n 3.5046e-09],\n [3.8248e-10, 2.8873e-09, 6.4956e-09, ..., 4.1695e-11, 2.0208e-09,\n 3.3736e-09],\n [2.7103e-10, 1.1041e-09, 1.5998e-09, ..., 2.6005e-11, 9.2691e-10,\n 1.3681e-09],\n ...,\n [4.3341e-10, 2.1294e-09, 7.5410e-09, ..., 5.0772e-11, 5.3840e-09,\n 4.1826e-09],\n [6.2723e-11, 4.0938e-10, 1.0940e-09, ..., 7.4826e-12, 8.8488e-10,\n 1.0458e-09],\n [2.8719e-10, 1.4517e-09, 4.1614e-09, ..., 7.3728e-12, 1.4219e-09,\n 1.9377e-09]], device='cuda:0')" }, "55": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-8.4878e-17, -2.9344e-17, 7.2117e-17, -3.4991e-18, 9.2097e-17,\n -5.7528e-17, 6.2481e-17, -6.1856e-17, 1.3595e-16, -1.3166e-17,\n 2.1454e-17, -5.5093e-17, -6.1466e-17, -2.4573e-17, -1.3427e-16,\n -9.7037e-17, 2.3835e-17, -8.3309e-17, -3.3337e-18, 5.1237e-17,\n -1.0155e-16, -4.2435e-17, -1.4074e-16, 3.4847e-17, -1.0288e-16,\n -5.9304e-17, 5.5278e-17, -3.0051e-17, 4.1929e-17, 8.8877e-17,\n 9.8383e-17, 5.9840e-17, 8.0196e-17, -5.1412e-17, 8.8437e-17,\n 8.0123e-17, 1.4219e-16, 8.7011e-17, 1.5479e-17, 9.3406e-17,\n 3.6083e-17, 8.0592e-17, 4.8386e-17, -7.5608e-17, 9.1085e-17,\n -6.0129e-17, 2.9111e-17, -9.5839e-17, 1.3499e-17, 3.6496e-18,\n -8.5423e-17, -3.8509e-18, 7.8855e-17, 3.5235e-17, 1.4088e-16,\n -1.1665e-16, -2.0344e-17, 9.2391e-17, -1.0082e-16, 9.3442e-17,\n 9.9651e-17, -8.9052e-17, 4.3807e-17, 7.0175e-17, -8.0043e-17,\n -6.7453e-17, -1.4284e-16, -8.4026e-17, -6.7373e-17, -3.0710e-17,\n -1.2260e-16, 1.3799e-16, 1.0586e-16, 5.8832e-17, -9.2347e-17,\n -8.0617e-17, -8.0423e-17, -6.3089e-17, -9.2202e-17, 2.7993e-17,\n -1.1471e-16, -1.2478e-17, -5.0918e-17, 4.0400e-17, 1.2364e-16,\n -3.9518e-17, 2.3519e-17, 6.5655e-17, -1.1966e-17, -6.0276e-17,\n 5.4315e-17, 8.9952e-17, 7.6358e-19, -1.1027e-16, 4.9962e-17,\n -5.3627e-17, 5.1046e-17, -4.0752e-17, 1.1604e-16, 4.4028e-17,\n 2.2188e-17, -5.9501e-17, -5.7113e-17, 9.3636e-17, 1.1612e-16,\n 6.4297e-17, 7.5661e-17, -2.5093e-17, -5.5192e-17, 1.2363e-16,\n -6.6257e-17, -1.2067e-16, 8.4836e-17, -5.1785e-17, -8.9284e-19,\n -6.5860e-17, 3.8835e-17, -6.2929e-17, 6.5851e-17, 7.4759e-17,\n -4.5485e-17, -1.3619e-17, -8.7187e-17, 4.4526e-17, -1.2196e-16,\n -3.2195e-17, 1.1879e-16, -2.2218e-17, -9.4273e-17, -3.6200e-17,\n 1.0985e-16, -6.8493e-17, -9.1609e-17, -7.8314e-17, -9.6448e-17,\n 1.0142e-16, 7.5775e-17, -3.8069e-17, 6.4638e-17, 6.3426e-17,\n -1.3040e-16, 9.2286e-17, -6.4254e-17, -1.2528e-16, -4.6209e-17,\n 3.4556e-17, -3.3704e-17, 4.8657e-17, 1.3269e-16, 1.0856e-16,\n 3.1960e-17, -1.4920e-17, -1.1330e-16, 5.8982e-17, 1.1846e-16,\n 9.1552e-19, 7.2672e-17, -1.0229e-16, -3.3188e-17, 5.5642e-17,\n 6.8979e-17, -6.6990e-17, -5.4470e-17, 1.1930e-16, -1.0640e-18,\n 4.3434e-17, -1.2947e-16, 8.9869e-17, 5.2408e-18, 1.2356e-16,\n -1.3180e-17, 4.2483e-19, -1.9325e-16, 9.1651e-17, -9.3597e-17,\n 1.8032e-16, 7.9748e-17, 8.0241e-17, 1.0850e-17, -1.3265e-16,\n -5.0622e-17, -7.1136e-17, 9.1582e-17, 1.4742e-17, -6.2526e-17,\n 4.0009e-17, -1.4731e-16, 6.0266e-17, 7.7725e-17, -1.1231e-16,\n -8.0952e-17, -1.4272e-16, 3.2665e-17, -9.9648e-17, -9.7459e-17,\n 4.3110e-17, -2.8523e-17, 4.8408e-17, 7.9614e-17, 4.9956e-17,\n 4.3080e-17, 9.6073e-17, 1.0955e-16, -1.5785e-16, 1.0439e-16,\n 3.7417e-17, 6.3247e-17, 1.3544e-16, 1.0309e-16, 2.5984e-17,\n -1.0134e-16, -5.5686e-17, 6.7364e-17, 1.0023e-16, 8.6198e-17,\n 3.2600e-17, -1.0816e-16, 1.1068e-16, -5.5144e-17, 9.4969e-17,\n -3.7491e-17, 1.5037e-16, 6.6857e-17, 5.3119e-17, -4.9723e-17,\n 4.4274e-17, -8.4535e-17, -1.3419e-16, 9.5415e-17, 1.1926e-16,\n 2.7270e-17, -8.8488e-17, -8.8699e-17, -1.0664e-16, 9.7854e-17,\n -1.0703e-16, 6.0839e-17, 9.2265e-18, -8.7594e-17, -7.5248e-17,\n -9.3920e-17, -4.1121e-17, -7.3401e-18, -6.4202e-17, -9.3775e-17,\n -3.1523e-18, -7.0306e-17, 1.9949e-17, 6.2073e-17, 8.0652e-17,\n 5.3789e-17, 1.4844e-16, -2.9727e-17, 8.7077e-17, 1.2673e-16,\n 3.4219e-17], device='cuda:0')", - "exp_avg_sq": "tensor([9.5178e-08, 1.2834e-07, 3.1292e-08, 5.3292e-07, 6.7344e-07, 2.3865e-08,\n 2.9591e-08, 7.2228e-09, 3.3348e-08, 3.8375e-09, 7.4147e-09, 2.9312e-08,\n 3.9984e-08, 1.4348e-07, 3.3498e-08, 7.6617e-09, 1.6192e-07, 1.6794e-07,\n 7.0041e-08, 1.0036e-07, 6.0378e-07, 4.0746e-07, 7.4178e-08, 3.1268e-07,\n 1.6030e-07, 3.9078e-07, 4.0207e-09, 2.2925e-07, 1.8334e-08, 1.9563e-08,\n 6.3110e-08, 4.2800e-08, 2.2030e-08, 2.2154e-08, 4.1162e-07, 2.3776e-08,\n 1.1241e-06, 1.0236e-07, 1.5531e-07, 6.1139e-08, 1.4899e-08, 3.0334e-08,\n 2.6470e-08, 1.1821e-07, 1.7404e-07, 2.7802e-07, 1.1633e-08, 7.1394e-08,\n 1.6165e-08, 3.3504e-08, 6.3494e-08, 1.7686e-08, 1.9494e-07, 2.6842e-08,\n 7.5393e-07, 6.8936e-08, 3.3463e-08, 4.9618e-07, 2.8029e-07, 5.8166e-09,\n 5.3388e-07, 3.1400e-07, 3.3751e-07, 1.0341e-07, 6.2793e-08, 4.1826e-07,\n 3.8525e-07, 5.8682e-09, 7.4319e-09, 3.3357e-07, 5.2462e-07, 7.6151e-08,\n 9.8264e-08, 1.7181e-08, 2.8492e-08, 2.7788e-07, 7.0001e-09, 9.2049e-08,\n 2.8361e-07, 1.7493e-08, 4.4224e-08, 3.1478e-08, 1.5793e-07, 6.7698e-08,\n 7.8084e-08, 1.7467e-08, 4.4596e-07, 5.6762e-09, 6.6407e-08, 3.3711e-08,\n 2.3293e-08, 1.3881e-07, 5.5323e-07, 7.7371e-08, 7.2340e-09, 6.3426e-08,\n 2.3439e-08, 5.6109e-08, 1.3783e-07, 4.4989e-09, 1.6254e-07, 7.3723e-09,\n 6.2395e-08, 2.6423e-07, 3.3646e-08, 6.9954e-08, 7.4529e-09, 6.8983e-08,\n 1.8421e-07, 5.0495e-08, 8.5320e-08, 5.5389e-08, 4.0805e-08, 5.0930e-09,\n 2.3894e-08, 3.7993e-08, 4.5217e-08, 1.5002e-07, 1.2803e-07, 8.1742e-08,\n 1.0865e-07, 3.1180e-08, 7.7220e-08, 1.8040e-08, 5.1611e-07, 3.5003e-08,\n 2.4924e-08, 2.9506e-08, 1.2441e-07, 1.3820e-07, 1.3376e-07, 4.0125e-07,\n 5.4757e-09, 2.9632e-07, 2.6618e-07, 2.3720e-08, 2.1756e-07, 3.0109e-08,\n 5.5363e-08, 9.8924e-08, 7.4126e-08, 1.0388e-07, 1.9719e-07, 2.6302e-07,\n 1.0877e-08, 5.1347e-08, 1.5959e-08, 1.9303e-09, 1.0065e-08, 3.5468e-08,\n 4.1082e-09, 3.9495e-07, 5.1019e-08, 1.1067e-07, 4.0296e-07, 7.4607e-08,\n 9.3618e-08, 5.0193e-07, 3.3082e-09, 2.6848e-08, 3.3229e-08, 1.1503e-08,\n 3.2994e-08, 3.3378e-07, 2.3675e-07, 1.1434e-07, 1.8223e-07, 2.8465e-07,\n 2.9361e-08, 7.8353e-08, 6.8930e-09, 9.0132e-08, 1.5427e-07, 7.2014e-08,\n 1.1128e-07, 9.5021e-08, 7.3925e-09, 1.4741e-07, 1.2453e-07, 7.0454e-09,\n 8.5824e-08, 5.2822e-08, 1.6440e-08, 2.9172e-07, 2.2185e-08, 5.2412e-07,\n 2.2036e-08, 6.6092e-09, 1.4314e-08, 1.1004e-08, 2.0478e-07, 1.4404e-07,\n 7.2996e-08, 6.0871e-08, 4.2465e-07, 5.7168e-08, 1.0781e-07, 3.7227e-09,\n 4.8732e-07, 2.9125e-08, 2.0122e-07, 7.4551e-08, 1.4407e-07, 8.7835e-08,\n 7.4960e-08, 5.5771e-09, 1.8610e-08, 2.3695e-07, 1.3382e-07, 3.4977e-07,\n 3.2009e-07, 3.4581e-07, 5.0133e-08, 1.1135e-07, 5.1109e-08, 3.7446e-07,\n 2.4297e-07, 2.3057e-07, 3.3545e-07, 6.1016e-09, 1.0207e-08, 1.7156e-08,\n 1.0093e-07, 3.5179e-07, 1.3005e-07, 2.7867e-08, 6.8092e-08, 5.0136e-07,\n 6.4484e-08, 2.0982e-07, 1.1010e-08, 1.6394e-07, 2.4582e-07, 1.8617e-08,\n 3.4775e-08, 7.2643e-08, 1.0367e-07, 3.3170e-09, 1.6366e-07, 5.4659e-09,\n 1.2116e-08, 3.6359e-08, 1.7109e-07, 6.8750e-09, 1.1901e-07, 3.6695e-08,\n 4.1849e-09, 5.1387e-09, 2.6948e-07, 6.0331e-09, 6.6485e-08, 2.4814e-07,\n 3.6984e-08, 1.3693e-07, 1.8208e-08, 7.6771e-08], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-7.2041e-17, -2.0985e-17, 6.1219e-17, -6.3800e-18, 7.1046e-17,\n -3.8188e-17, 5.2277e-17, -4.3406e-17, 1.1265e-16, -4.1799e-18,\n 2.2257e-17, -4.1489e-17, -4.6978e-17, -2.4665e-17, -1.1242e-16,\n -8.0403e-17, 2.7151e-17, -7.2513e-17, -5.4611e-18, 3.9211e-17,\n -8.0539e-17, -4.5402e-17, -1.2137e-16, 2.2777e-17, -8.3656e-17,\n -4.8879e-17, 4.7865e-17, -2.5249e-17, 3.3360e-17, 7.5774e-17,\n 7.9040e-17, 5.2441e-17, 7.0113e-17, -3.9198e-17, 6.8257e-17,\n 6.1769e-17, 1.1501e-16, 7.3381e-17, 7.8825e-18, 7.6049e-17,\n 2.6098e-17, 6.5098e-17, 3.7255e-17, -5.9676e-17, 8.2327e-17,\n -4.9081e-17, 2.1339e-17, -7.4023e-17, 1.8196e-17, -4.7165e-18,\n -7.0649e-17, 3.0405e-18, 6.1731e-17, 2.0018e-17, 1.2197e-16,\n -9.6676e-17, -1.9761e-17, 7.0829e-17, -8.0859e-17, 7.8816e-17,\n 8.3378e-17, -7.6842e-17, 3.1570e-17, 5.4540e-17, -6.2287e-17,\n -5.2599e-17, -1.1858e-16, -6.6773e-17, -5.4091e-17, -2.1632e-17,\n -1.0522e-16, 1.1120e-16, 9.1730e-17, 4.8746e-17, -8.0950e-17,\n -6.2622e-17, -6.5264e-17, -4.6919e-17, -7.4227e-17, 2.3641e-17,\n -9.8854e-17, -7.0618e-18, -4.8549e-17, 3.4059e-17, 1.0272e-16,\n -3.7131e-17, 2.0709e-17, 5.7843e-17, -5.2295e-18, -4.3365e-17,\n 3.9477e-17, 6.9483e-17, 5.8773e-18, -9.1689e-17, 3.6792e-17,\n -4.4942e-17, 3.4618e-17, -3.1481e-17, 9.1223e-17, 4.1789e-17,\n 1.2385e-17, -4.2214e-17, -4.9480e-17, 7.1977e-17, 9.5031e-17,\n 4.9797e-17, 6.1251e-17, -1.5448e-17, -4.5310e-17, 1.0114e-16,\n -5.8864e-17, -9.7699e-17, 6.9752e-17, -3.3727e-17, -1.5573e-18,\n -5.9551e-17, 3.4281e-17, -5.3732e-17, 5.3347e-17, 5.9231e-17,\n -4.4242e-17, -3.1340e-18, -7.9299e-17, 3.1131e-17, -1.0978e-16,\n -1.9341e-17, 9.0902e-17, -1.8047e-17, -7.8429e-17, -3.2488e-17,\n 9.4477e-17, -5.4496e-17, -7.5881e-17, -6.8549e-17, -8.3586e-17,\n 8.7875e-17, 6.5514e-17, -3.4598e-17, 6.2686e-17, 5.7963e-17,\n -1.0929e-16, 7.4763e-17, -4.8610e-17, -1.0851e-16, -4.2971e-17,\n 2.6096e-17, -2.0703e-17, 4.2898e-17, 1.1417e-16, 9.1861e-17,\n 2.5790e-17, -1.1839e-17, -9.7153e-17, 4.6899e-17, 9.2944e-17,\n 5.8746e-18, 7.0746e-17, -7.8615e-17, -3.3455e-17, 4.8555e-17,\n 6.2420e-17, -5.2375e-17, -4.5254e-17, 9.5934e-17, 4.1189e-18,\n 4.3501e-17, -1.0982e-16, 7.4933e-17, -1.6652e-18, 1.0738e-16,\n -1.1574e-17, 7.9524e-18, -1.5974e-16, 6.8354e-17, -8.8769e-17,\n 1.5403e-16, 6.5677e-17, 5.7011e-17, 4.0717e-18, -1.1042e-16,\n -3.5377e-17, -4.9228e-17, 7.5267e-17, 8.8923e-18, -5.5754e-17,\n 2.9749e-17, -1.2390e-16, 4.1188e-17, 6.2240e-17, -8.5771e-17,\n -6.4473e-17, -1.1842e-16, 2.9114e-17, -8.7798e-17, -7.7670e-17,\n 3.3598e-17, -2.1786e-17, 4.5737e-17, 6.7833e-17, 3.9133e-17,\n 3.1462e-17, 7.6714e-17, 8.9101e-17, -1.3620e-16, 8.9987e-17,\n 2.8375e-17, 4.5988e-17, 1.1988e-16, 8.7927e-17, 2.5680e-17,\n -7.8729e-17, -4.2626e-17, 5.6410e-17, 7.7140e-17, 7.2962e-17,\n 2.8186e-17, -9.2253e-17, 8.8193e-17, -4.7683e-17, 7.8121e-17,\n -2.6039e-17, 1.2821e-16, 5.0464e-17, 4.6111e-17, -3.9514e-17,\n 3.4420e-17, -7.1968e-17, -1.1558e-16, 8.1470e-17, 9.6238e-17,\n 2.7886e-17, -7.3666e-17, -7.0858e-17, -8.9760e-17, 8.1065e-17,\n -8.7064e-17, 5.2294e-17, 9.1214e-18, -7.3811e-17, -5.3494e-17,\n -7.6805e-17, -3.9681e-17, -1.2048e-17, -5.7648e-17, -7.9222e-17,\n 9.9199e-19, -5.4788e-17, 1.3900e-17, 4.1815e-17, 5.8570e-17,\n 4.0578e-17, 1.2207e-16, -2.2381e-17, 7.1138e-17, 1.0281e-16,\n 2.8514e-17], device='cuda:0')", + "exp_avg_sq": "tensor([2.7198e-08, 3.6674e-08, 8.9418e-09, 1.5228e-07, 1.9244e-07, 6.8197e-09,\n 8.4560e-09, 2.0640e-09, 9.5295e-09, 1.0966e-09, 2.1188e-09, 8.3763e-09,\n 1.1426e-08, 4.1000e-08, 9.5722e-09, 2.1894e-09, 4.6271e-08, 4.7991e-08,\n 2.0015e-08, 2.8678e-08, 1.7253e-07, 1.1644e-07, 2.1197e-08, 8.9350e-08,\n 4.5807e-08, 1.1167e-07, 1.1489e-09, 6.5509e-08, 5.2390e-09, 5.5904e-09,\n 1.8034e-08, 1.2230e-08, 6.2952e-09, 6.3307e-09, 1.1762e-07, 6.7943e-09,\n 3.2123e-07, 2.9249e-08, 4.4382e-08, 1.7471e-08, 4.2576e-09, 8.6681e-09,\n 7.5641e-09, 3.3779e-08, 4.9733e-08, 7.9446e-08, 3.3244e-09, 2.0401e-08,\n 4.6192e-09, 9.5740e-09, 1.8144e-08, 5.0538e-09, 5.5705e-08, 7.6704e-09,\n 2.1544e-07, 1.9699e-08, 9.5624e-09, 1.4179e-07, 8.0094e-08, 1.6622e-09,\n 1.5256e-07, 8.9728e-08, 9.6447e-08, 2.9551e-08, 1.7944e-08, 1.1952e-07,\n 1.1009e-07, 1.6769e-09, 2.1237e-09, 9.5321e-08, 1.4992e-07, 2.1761e-08,\n 2.8080e-08, 4.9095e-09, 8.1417e-09, 7.9408e-08, 2.0003e-09, 2.6304e-08,\n 8.1044e-08, 4.9987e-09, 1.2637e-08, 8.9950e-09, 4.5129e-08, 1.9345e-08,\n 2.2313e-08, 4.9914e-09, 1.2744e-07, 1.6220e-09, 1.8976e-08, 9.6332e-09,\n 6.6562e-09, 3.9666e-08, 1.5809e-07, 2.2109e-08, 2.0672e-09, 1.8125e-08,\n 6.6979e-09, 1.6033e-08, 3.9386e-08, 1.2856e-09, 4.6448e-08, 2.1067e-09,\n 1.7830e-08, 7.5504e-08, 9.6146e-09, 1.9990e-08, 2.1297e-09, 1.9712e-08,\n 5.2639e-08, 1.4429e-08, 2.4381e-08, 1.5828e-08, 1.1660e-08, 1.4554e-09,\n 6.8278e-09, 1.0857e-08, 1.2921e-08, 4.2869e-08, 3.6585e-08, 2.3358e-08,\n 3.1046e-08, 8.9098e-09, 2.2066e-08, 5.1551e-09, 1.4748e-07, 1.0002e-08,\n 7.1222e-09, 8.4315e-09, 3.5552e-08, 3.9493e-08, 3.8223e-08, 1.1466e-07,\n 1.5647e-09, 8.4675e-08, 7.6062e-08, 6.7781e-09, 6.2170e-08, 8.6040e-09,\n 1.5820e-08, 2.8268e-08, 2.1182e-08, 2.9684e-08, 5.6350e-08, 7.5159e-08,\n 3.1081e-09, 1.4673e-08, 4.5603e-09, 5.5160e-10, 2.8761e-09, 1.0135e-08,\n 1.1739e-09, 1.1286e-07, 1.4579e-08, 3.1626e-08, 1.1515e-07, 2.1319e-08,\n 2.6752e-08, 1.4343e-07, 9.4533e-10, 7.6719e-09, 9.4955e-09, 3.2872e-09,\n 9.4284e-09, 9.5380e-08, 6.7654e-08, 3.2674e-08, 5.2073e-08, 8.1340e-08,\n 8.3901e-09, 2.2390e-08, 1.9697e-09, 2.5756e-08, 4.4083e-08, 2.0579e-08,\n 3.1799e-08, 2.7153e-08, 2.1125e-09, 4.2123e-08, 3.5587e-08, 2.0133e-09,\n 2.4525e-08, 1.5094e-08, 4.6980e-09, 8.3362e-08, 6.3395e-09, 1.4977e-07,\n 6.2969e-09, 1.8886e-09, 4.0902e-09, 3.1445e-09, 5.8518e-08, 4.1160e-08,\n 2.0859e-08, 1.7394e-08, 1.2135e-07, 1.6336e-08, 3.0808e-08, 1.0638e-09,\n 1.3926e-07, 8.3226e-09, 5.7500e-08, 2.1304e-08, 4.1170e-08, 2.5100e-08,\n 2.1421e-08, 1.5937e-09, 5.3179e-09, 6.7712e-08, 3.8240e-08, 9.9951e-08,\n 9.1468e-08, 9.8819e-08, 1.4326e-08, 3.1818e-08, 1.4605e-08, 1.0701e-07,\n 6.9429e-08, 6.5889e-08, 9.5857e-08, 1.7436e-09, 2.9169e-09, 4.9026e-09,\n 2.8843e-08, 1.0053e-07, 3.7162e-08, 7.9633e-09, 1.9458e-08, 1.4327e-07,\n 1.8427e-08, 5.9957e-08, 3.1462e-09, 4.6846e-08, 7.0245e-08, 5.3199e-09,\n 9.9373e-09, 2.0758e-08, 2.9624e-08, 9.4786e-10, 4.6767e-08, 1.5619e-09,\n 3.4622e-09, 1.0390e-08, 4.8889e-08, 1.9646e-09, 3.4008e-08, 1.0486e-08,\n 1.1959e-09, 1.4684e-09, 7.7006e-08, 1.7240e-09, 1.8999e-08, 7.0909e-08,\n 1.0568e-08, 3.9129e-08, 5.2031e-09, 2.1938e-08], device='cuda:0')" }, "56": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-4.8873e-17, -7.7930e-17, 6.5289e-17, ..., 1.2857e-16,\n 8.1527e-17, -4.8632e-17],\n [-1.4067e-17, -2.2315e-17, 1.8752e-17, ..., 3.7237e-17,\n 2.3708e-17, -1.3836e-17],\n ...,\n [ 1.0783e-16, 1.7422e-16, -1.4148e-16, ..., -2.7605e-16,\n -1.7200e-16, 1.0645e-16],\n [ 4.3039e-16, 6.9788e-16, -5.6435e-16, ..., -1.1077e-15,\n -6.9303e-16, 4.2962e-16],\n [ 1.2819e-16, 2.0672e-16, -1.6896e-16, ..., -3.3010e-16,\n -2.0693e-16, 1.2771e-16]], device='cuda:0')", - "exp_avg_sq": "tensor([[9.3774e-08, 3.5270e-07, 8.8265e-07, ..., 3.1072e-06, 1.3988e-06,\n 2.1122e-07],\n [1.1774e-09, 5.0600e-09, 1.0905e-08, ..., 3.8300e-08, 1.7166e-08,\n 2.8747e-09],\n [1.0806e-09, 3.9424e-09, 1.0142e-08, ..., 3.5946e-08, 1.6151e-08,\n 2.3641e-09],\n ...,\n [1.3347e-09, 5.1016e-09, 1.3155e-08, ..., 4.5195e-08, 2.0699e-08,\n 3.2783e-09],\n [2.0899e-09, 7.1960e-09, 1.5531e-08, ..., 6.5190e-08, 2.6336e-08,\n 2.9089e-09],\n [1.1010e-09, 3.8410e-09, 9.5786e-09, ..., 3.5098e-08, 1.5443e-08,\n 2.1246e-09]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-4.8515e-17, -7.6114e-17, 5.8360e-17, ..., 1.1684e-16,\n 7.1090e-17, -4.4626e-17],\n [-1.3608e-17, -2.1864e-17, 1.7054e-17, ..., 3.3081e-17,\n 2.0604e-17, -1.2967e-17],\n ...,\n [ 1.0285e-16, 1.6419e-16, -1.2721e-16, ..., -2.4293e-16,\n -1.4863e-16, 9.2089e-17],\n [ 4.0456e-16, 6.4733e-16, -5.0491e-16, ..., -9.7651e-16,\n -6.0398e-16, 3.7301e-16],\n [ 1.2114e-16, 1.9219e-16, -1.5055e-16, ..., -2.8861e-16,\n -1.7824e-16, 1.1015e-16]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.6797e-08, 1.0079e-07, 2.5222e-07, ..., 8.8790e-07, 3.9970e-07,\n 6.0358e-08],\n [3.3646e-10, 1.4459e-09, 3.1162e-09, ..., 1.0944e-08, 4.9054e-09,\n 8.2148e-10],\n [3.0878e-10, 1.1266e-09, 2.8981e-09, ..., 1.0272e-08, 4.6152e-09,\n 6.7556e-10],\n ...,\n [3.8139e-10, 1.4578e-09, 3.7590e-09, ..., 1.2915e-08, 5.9150e-09,\n 9.3679e-10],\n [5.9720e-10, 2.0563e-09, 4.4381e-09, ..., 1.8629e-08, 7.5257e-09,\n 8.3123e-10],\n [3.1463e-10, 1.0976e-09, 2.7372e-09, ..., 1.0029e-08, 4.4131e-09,\n 6.0712e-10]], device='cuda:0')" }, "57": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, -5.8315e-17, -1.6842e-17, 6.2430e-17, 6.7025e-17,\n 1.1046e-16, 1.3133e-16, 1.2696e-16, 5.0712e-16, 1.5155e-16],\n device='cuda:0')", - "exp_avg_sq": "tensor([2.7666e-05, 3.3238e-07, 3.1979e-07, 2.5161e-07, 3.0231e-07, 2.8056e-07,\n 4.7033e-07, 4.2063e-07, 4.5437e-07, 2.9584e-07], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-5.6052e-45, -5.2766e-17, -1.5072e-17, -6.4661e-18, 5.7143e-17,\n 9.5762e-17, 1.1388e-16, 1.1203e-16, 4.4691e-16, 1.3266e-16],\n device='cuda:0')", + "exp_avg_sq": "tensor([7.9058e-06, 9.4979e-08, 9.1382e-08, 7.1900e-08, 8.6386e-08, 8.0173e-08,\n 1.3440e-07, 1.2020e-07, 1.2984e-07, 8.4540e-08], device='cuda:0')" }, "58": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-4.8332e-17, -7.7068e-17, 6.4567e-17, ..., 1.2714e-16,\n 8.0625e-17, -4.8094e-17],\n [-1.3911e-17, -2.2068e-17, 1.8544e-17, ..., 3.6825e-17,\n 2.3446e-17, -1.3683e-17],\n ...,\n [ 1.0663e-16, 1.7229e-16, -1.3992e-16, ..., -2.7300e-16,\n -1.7009e-16, 1.0527e-16],\n [ 4.2563e-16, 6.9016e-16, -5.5811e-16, ..., -1.0954e-15,\n -6.8536e-16, 4.2487e-16],\n [ 1.2677e-16, 2.0443e-16, -1.6709e-16, ..., -3.2644e-16,\n -2.0464e-16, 1.2630e-16]], device='cuda:0')", - "exp_avg_sq": "tensor([[9.3293e-08, 3.5127e-07, 8.8003e-07, ..., 3.0933e-06, 1.3939e-06,\n 2.1098e-07],\n [1.1709e-09, 5.0395e-09, 1.0873e-08, ..., 3.8117e-08, 1.7104e-08,\n 2.8717e-09],\n [1.0751e-09, 3.9262e-09, 1.0112e-08, ..., 3.5787e-08, 1.6094e-08,\n 2.3613e-09],\n ...,\n [1.3280e-09, 5.0817e-09, 1.3117e-08, ..., 4.4999e-08, 2.0630e-08,\n 3.2749e-09],\n [2.0784e-09, 7.1589e-09, 1.5473e-08, ..., 6.4864e-08, 2.6225e-08,\n 2.9032e-09],\n [1.0954e-09, 3.8242e-09, 9.5485e-09, ..., 3.4937e-08, 1.5387e-08,\n 2.1219e-09]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-4.7978e-17, -7.5272e-17, 5.7714e-17, ..., 1.1555e-16,\n 7.0304e-17, -4.4133e-17],\n [-1.3458e-17, -2.1622e-17, 1.6865e-17, ..., 3.2715e-17,\n 2.0376e-17, -1.2824e-17],\n ...,\n [ 1.0171e-16, 1.6237e-16, -1.2580e-16, ..., -2.4024e-16,\n -1.4698e-16, 9.1070e-17],\n [ 4.0008e-16, 6.4017e-16, -4.9932e-16, ..., -9.6570e-16,\n -5.9729e-16, 3.6888e-16],\n [ 1.1980e-16, 1.9007e-16, -1.4888e-16, ..., -2.8541e-16,\n -1.7627e-16, 1.0894e-16]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.6659e-08, 1.0038e-07, 2.5147e-07, ..., 8.8394e-07, 3.9831e-07,\n 6.0290e-08],\n [3.3461e-10, 1.4401e-09, 3.1070e-09, ..., 1.0892e-08, 4.8877e-09,\n 8.2061e-10],\n [3.0721e-10, 1.1220e-09, 2.8895e-09, ..., 1.0226e-08, 4.5991e-09,\n 6.7477e-10],\n ...,\n [3.7948e-10, 1.4521e-09, 3.7483e-09, ..., 1.2859e-08, 5.8951e-09,\n 9.3582e-10],\n [5.9392e-10, 2.0457e-09, 4.4215e-09, ..., 1.8536e-08, 7.4940e-09,\n 8.2960e-10],\n [3.1301e-10, 1.0928e-09, 2.7286e-09, ..., 9.9837e-09, 4.3970e-09,\n 6.0634e-10]], device='cuda:0')" }, "59": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, -5.7670e-17, -1.6656e-17, 6.1739e-17, 6.6284e-17,\n 1.0924e-16, 1.2988e-16, 1.2556e-16, 5.0151e-16, 1.4987e-16],\n device='cuda:0')", - "exp_avg_sq": "tensor([2.7598e-05, 3.3159e-07, 3.1899e-07, 2.5106e-07, 3.0165e-07, 2.7988e-07,\n 4.6926e-07, 4.1963e-07, 4.5295e-07, 2.9507e-07], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-5.6052e-45, -5.2182e-17, -1.4905e-17, -6.3946e-18, 5.6511e-17,\n 9.4702e-17, 1.1262e-16, 1.1079e-16, 4.4196e-16, 1.3119e-16],\n device='cuda:0')", + "exp_avg_sq": "tensor([7.8864e-06, 9.4753e-08, 9.1155e-08, 7.1741e-08, 8.6200e-08, 7.9977e-08,\n 1.3409e-07, 1.1991e-07, 1.2943e-07, 8.4318e-08], device='cuda:0')" }, "60": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-4.8943e-17, -7.8043e-17, 6.5384e-17, ..., 1.2875e-16,\n 8.1645e-17, -4.8702e-17],\n [-1.4087e-17, -2.2347e-17, 1.8779e-17, ..., 3.7291e-17,\n 2.3742e-17, -1.3856e-17],\n ...,\n [ 1.0798e-16, 1.7447e-16, -1.4169e-16, ..., -2.7645e-16,\n -1.7224e-16, 1.0660e-16],\n [ 4.3101e-16, 6.9889e-16, -5.6516e-16, ..., -1.1093e-15,\n -6.9403e-16, 4.3024e-16],\n [ 1.2838e-16, 2.0702e-16, -1.6921e-16, ..., -3.3057e-16,\n -2.0722e-16, 1.2790e-16]], device='cuda:0')", - "exp_avg_sq": "tensor([[9.3778e-08, 3.5272e-07, 8.8266e-07, ..., 3.1073e-06, 1.3988e-06,\n 2.1122e-07],\n [1.1775e-09, 5.0603e-09, 1.0905e-08, ..., 3.8301e-08, 1.7167e-08,\n 2.8748e-09],\n [1.0806e-09, 3.9426e-09, 1.0142e-08, ..., 3.5947e-08, 1.6151e-08,\n 2.3641e-09],\n ...,\n [1.3347e-09, 5.1019e-09, 1.3155e-08, ..., 4.5197e-08, 2.0700e-08,\n 3.2783e-09],\n [2.0900e-09, 7.1966e-09, 1.5531e-08, ..., 6.5194e-08, 2.6337e-08,\n 2.9090e-09],\n [1.1011e-09, 3.8412e-09, 9.5788e-09, ..., 3.5099e-08, 1.5444e-08,\n 2.1247e-09]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-4.8585e-17, -7.6224e-17, 5.8444e-17, ..., 1.1701e-16,\n 7.1193e-17, -4.4691e-17],\n [-1.3628e-17, -2.1896e-17, 1.7078e-17, ..., 3.3129e-17,\n 2.0634e-17, -1.2986e-17],\n ...,\n [ 1.0300e-16, 1.6443e-16, -1.2739e-16, ..., -2.4328e-16,\n -1.4884e-16, 9.2222e-17],\n [ 4.0514e-16, 6.4826e-16, -5.0564e-16, ..., -9.7792e-16,\n -6.0485e-16, 3.7355e-16],\n [ 1.2131e-16, 1.9247e-16, -1.5077e-16, ..., -2.8902e-16,\n -1.7850e-16, 1.1031e-16]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.6798e-08, 1.0079e-07, 2.5223e-07, ..., 8.8794e-07, 3.9971e-07,\n 6.0359e-08],\n [3.3648e-10, 1.4460e-09, 3.1163e-09, ..., 1.0945e-08, 4.9056e-09,\n 8.2149e-10],\n [3.0879e-10, 1.1266e-09, 2.8982e-09, ..., 1.0272e-08, 4.6153e-09,\n 6.7557e-10],\n ...,\n [3.8141e-10, 1.4579e-09, 3.7591e-09, ..., 1.2915e-08, 5.9151e-09,\n 9.3681e-10],\n [5.9724e-10, 2.0565e-09, 4.4382e-09, ..., 1.8630e-08, 7.5260e-09,\n 8.3126e-10],\n [3.1465e-10, 1.0977e-09, 2.7372e-09, ..., 1.0030e-08, 4.4132e-09,\n 6.0714e-10]], device='cuda:0')" }, "61": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, -5.8399e-17, -1.6866e-17, 6.2520e-17, 6.7122e-17,\n 1.1062e-16, 1.3152e-16, 1.2715e-16, 5.0785e-16, 1.5177e-16],\n device='cuda:0')", - "exp_avg_sq": "tensor([2.7666e-05, 3.3238e-07, 3.1979e-07, 2.5161e-07, 3.0231e-07, 2.8057e-07,\n 4.7033e-07, 4.2063e-07, 4.5438e-07, 2.9585e-07], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-5.6052e-45, -5.2842e-17, -1.5094e-17, -6.4755e-18, 5.7226e-17,\n 9.5900e-17, 1.1404e-16, 1.1219e-16, 4.4755e-16, 1.3285e-16],\n device='cuda:0')", + "exp_avg_sq": "tensor([7.9059e-06, 9.4981e-08, 9.1382e-08, 7.1900e-08, 8.6386e-08, 8.0174e-08,\n 1.3440e-07, 1.2020e-07, 1.2984e-07, 8.4541e-08], device='cuda:0')" }, "8": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[ 4.2400e-05, 3.4110e-06, 1.9102e-06, ..., 7.4579e-06,\n -1.4278e-05, -5.0292e-06],\n [ 1.0430e-05, -1.0075e-05, 1.1921e-05, ..., 1.0706e-06,\n 1.3771e-06, 3.8012e-06],\n [-3.2101e-05, 9.4789e-08, -6.4011e-06, ..., -6.8380e-06,\n 9.7407e-07, -4.8447e-06],\n ...,\n [-9.8196e-06, -5.0250e-07, 4.8766e-06, ..., 1.1707e-05,\n 4.5277e-06, 9.3883e-07],\n [-7.8619e-06, -4.4446e-05, 5.0702e-06, ..., 1.2779e-05,\n -8.1896e-06, -2.8105e-06],\n [ 2.5025e-06, 5.8407e-06, -2.0434e-06, ..., 3.7213e-06,\n 6.1410e-06, 3.8240e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.4308e-09, 7.1797e-10, 5.2256e-10, ..., 7.0347e-10, 1.2880e-09,\n 6.4716e-10],\n [1.9560e-09, 1.2002e-09, 1.8966e-10, ..., 1.4158e-09, 4.4397e-10,\n 4.7350e-10],\n [2.6449e-09, 5.4565e-10, 1.9815e-10, ..., 2.9532e-10, 3.0997e-10,\n 6.4282e-10],\n ...,\n [1.4150e-09, 3.7737e-10, 2.2751e-10, ..., 4.9653e-10, 3.8778e-10,\n 3.4507e-10],\n [1.7239e-09, 6.3335e-09, 7.5279e-10, ..., 2.5844e-09, 9.5264e-10,\n 5.9761e-10],\n [1.5207e-09, 3.0333e-10, 7.2221e-10, ..., 1.2095e-09, 6.5814e-10,\n 2.8987e-10]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[-3.0737e-06, 8.7520e-06, 1.4637e-06, ..., -4.0705e-06,\n -7.4631e-06, 2.9114e-06],\n [-1.2266e-05, 4.5630e-06, 6.3181e-07, ..., 4.3212e-07,\n -6.4283e-07, 1.2137e-06],\n [-4.4418e-05, 2.1131e-06, -1.8541e-07, ..., -4.5932e-07,\n -2.3614e-06, -1.0473e-06],\n ...,\n [-4.5549e-06, -1.0688e-06, 6.8834e-07, ..., 6.0025e-08,\n 1.2434e-06, 9.7536e-07],\n [-9.4261e-06, 1.5845e-05, -4.1200e-06, ..., -1.0249e-04,\n -1.6453e-07, 1.3206e-06],\n [-1.1960e-05, 7.6448e-07, 2.6573e-06, ..., 3.1933e-06,\n 3.4532e-06, -8.9025e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.9218e-10, 5.3232e-10, 3.4147e-10, ..., 4.5641e-10, 8.0567e-10,\n 4.2613e-10],\n [1.8813e-09, 1.0928e-09, 1.1016e-10, ..., 8.3258e-10, 2.9235e-10,\n 3.4493e-10],\n [2.5027e-09, 3.8070e-10, 1.2418e-10, ..., 1.7405e-10, 2.3603e-10,\n 6.0563e-10],\n ...,\n [1.0668e-09, 3.4138e-10, 1.5454e-10, ..., 3.3948e-10, 2.4352e-10,\n 1.8536e-10],\n [1.1052e-09, 6.7410e-09, 6.6126e-10, ..., 3.3903e-09, 6.8576e-10,\n 3.8274e-10],\n [7.6874e-10, 1.6566e-10, 4.9535e-10, ..., 7.7665e-10, 4.1493e-10,\n 2.1824e-10]], device='cuda:0')" }, "9": { - "step": "tensor(1252.)", - "exp_avg": "tensor([ 3.0944e-05, -9.5820e-05, -1.7820e-03, ..., -5.6558e-05,\n -9.8920e-04, 1.2117e-04], device='cuda:0')", - "exp_avg_sq": "tensor([8.6634e-06, 1.0399e-05, 1.2091e-05, ..., 1.1943e-05, 1.4601e-05,\n 1.2225e-05], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([ 0.0010, -0.0006, -0.0004, ..., 0.0006, -0.0019, 0.0002],\n device='cuda:0')", + "exp_avg_sq": "tensor([6.1443e-06, 6.9887e-06, 8.5625e-06, ..., 7.2287e-06, 1.0823e-05,\n 7.4309e-06], device='cuda:0')" }, "10": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[-2.6008e-06, -5.7844e-07, 5.4990e-06, ..., -2.8300e-06,\n 2.0379e-06, -7.4494e-07],\n [-1.2116e-05, 3.6621e-06, -9.3010e-07, ..., 3.4764e-06,\n 9.0775e-07, 6.1111e-06],\n [-1.6457e-05, 9.5690e-07, 7.5930e-06, ..., -4.3626e-06,\n -1.0885e-05, 3.2208e-06],\n ...,\n [ 9.7670e-06, 4.9667e-06, -1.5245e-06, ..., -2.3862e-06,\n 2.6722e-06, 3.9367e-06],\n [ 3.9343e-06, -8.0080e-06, -2.4377e-05, ..., 1.7220e-06,\n -1.5819e-05, -2.9216e-06],\n [ 9.2034e-06, -2.2727e-06, -7.4299e-06, ..., 2.4760e-06,\n -7.1631e-06, -2.0116e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2536e-10, 3.3050e-10, 2.9918e-10, ..., 2.9444e-10, 2.4096e-10,\n 3.1632e-10],\n [3.9903e-10, 4.0058e-10, 5.1248e-10, ..., 3.8727e-10, 4.2262e-10,\n 3.4943e-10],\n [4.2687e-10, 4.6896e-10, 7.2703e-10, ..., 6.3673e-10, 5.2520e-10,\n 2.8824e-10],\n ...,\n [4.0164e-10, 4.2705e-10, 5.0906e-10, ..., 4.1937e-10, 4.2654e-10,\n 3.5511e-10],\n [4.4536e-10, 5.3699e-10, 7.3895e-10, ..., 5.0692e-10, 6.3258e-10,\n 3.8585e-10],\n [3.7556e-10, 5.5035e-10, 6.3684e-10, ..., 4.0670e-10, 3.6536e-10,\n 5.0182e-10]], device='cuda:0')" + }, + "11": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 2.2400e-06, -1.7493e-06, -3.3295e-06, ..., -8.3568e-06,\n -4.0441e-06, 4.2276e-06],\n [-7.1236e-06, -1.8737e-08, 4.0134e-07, ..., 9.6989e-07,\n -1.0233e-06, -5.1237e-08],\n [ 5.3244e-06, -1.0476e-05, 1.7954e-06, ..., -8.1612e-07,\n 5.8334e-07, 1.6297e-06],\n ...,\n [ 1.5747e-06, 2.8610e-06, 3.8701e-08, ..., 1.1658e-06,\n 4.2142e-06, -4.0844e-07],\n [ 1.4537e-05, 5.1508e-06, 5.2313e-06, ..., 2.7217e-06,\n -3.3382e-07, -9.8494e-07],\n [-2.5154e-05, 1.2677e-06, 5.2210e-06, ..., 3.0719e-07,\n 3.3700e-07, -9.8608e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.5008e-10, 2.7738e-10, 1.8933e-10, ..., 4.5408e-10, 2.3169e-10,\n 3.5987e-10],\n [1.1718e-09, 4.0080e-10, 1.7888e-10, ..., 7.0110e-10, 3.3457e-10,\n 1.9048e-10],\n [8.0587e-10, 7.8096e-10, 2.6877e-10, ..., 4.9713e-10, 4.2692e-10,\n 1.1956e-10],\n ...,\n [5.9169e-10, 2.8799e-10, 9.3536e-10, ..., 2.4158e-10, 3.9146e-10,\n 1.2101e-09],\n [1.1091e-09, 2.2158e-10, 3.3988e-10, ..., 2.6035e-10, 2.4770e-10,\n 5.5635e-10],\n [1.9346e-09, 1.4396e-09, 2.6213e-10, ..., 6.2057e-10, 4.5475e-10,\n 4.3175e-10]], device='cuda:0')" + }, + "12": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.5173e-04, -1.4769e-03, -1.5285e-05, ..., 8.7728e-04,\n 1.2964e-03, 4.4651e-04], device='cuda:0')", + "exp_avg_sq": "tensor([7.4165e-06, 7.0857e-06, 5.5132e-06, ..., 9.0214e-06, 6.7439e-06,\n 8.6653e-06], device='cuda:0')" + }, + "13": { "step": "tensor(1252.)", - "exp_avg": "tensor([[ 4.1450e-07, -8.9020e-06, 4.4521e-06, ..., -4.1807e-06,\n -4.7499e-06, 7.5627e-07],\n [-1.0614e-05, -5.9561e-06, -1.6540e-05, ..., -1.2164e-06,\n 1.3629e-05, -5.2408e-06],\n [ 2.3761e-06, 4.6688e-06, -9.1250e-07, ..., 1.6132e-05,\n 1.1219e-05, -3.6616e-06],\n ...,\n [-1.1223e-07, -5.9672e-06, -2.3899e-06, ..., -1.1755e-05,\n -2.0804e-06, -3.9345e-06],\n [ 7.4505e-06, 1.6177e-05, 9.4020e-06, ..., 1.2093e-07,\n -4.0877e-06, 1.1859e-06],\n [ 3.4426e-06, 5.9773e-07, 1.0036e-05, ..., 4.7861e-06,\n -6.4719e-06, 1.0038e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.1936e-10, 7.6796e-10, 5.1056e-10, ..., 6.7066e-10, 4.3758e-10,\n 6.8136e-10],\n [6.7942e-10, 7.5176e-10, 8.5009e-10, ..., 7.2837e-10, 7.1216e-10,\n 6.3688e-10],\n [6.9030e-10, 8.2528e-10, 1.0449e-09, ..., 1.0442e-09, 8.4600e-10,\n 5.8115e-10],\n ...,\n [6.8449e-10, 7.7146e-10, 8.2522e-10, ..., 7.7658e-10, 7.1544e-10,\n 6.5999e-10],\n [6.2648e-10, 8.1297e-10, 1.1065e-09, ..., 7.9849e-10, 9.4683e-10,\n 6.2198e-10],\n [5.5705e-10, 8.9349e-10, 9.5561e-10, ..., 7.0026e-10, 6.1759e-10,\n 8.6271e-10]], device='cuda:0')" + "exp_avg": "tensor([[ 2.1609e-06, -2.0701e-06, 4.9883e-07, ..., -1.4916e-06,\n 1.0655e-06, -4.4693e-06],\n [ 7.4102e-06, -6.7491e-06, -3.3030e-06, ..., 6.7660e-06,\n 1.1759e-06, 1.1307e-06],\n [ 1.7231e-06, -5.2475e-06, 4.2851e-07, ..., -7.4490e-06,\n -5.0060e-06, 3.5242e-06],\n ...,\n [ 8.0831e-06, 6.6383e-06, 1.9367e-06, ..., -1.1632e-06,\n 9.7002e-06, -5.6480e-07],\n [-8.6882e-06, 1.1630e-06, -1.1768e-06, ..., 9.4874e-07,\n -2.3350e-06, -1.1377e-06],\n [-1.1154e-05, -4.8152e-06, -3.9367e-06, ..., -4.2132e-06,\n -2.7027e-07, -2.8822e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7024e-10, 1.7080e-10, 1.7244e-10, ..., 1.8738e-10, 2.4030e-10,\n 2.3993e-10],\n [2.7314e-10, 3.2404e-10, 2.8058e-10, ..., 2.6721e-10, 3.2521e-10,\n 3.3571e-10],\n [2.8413e-10, 2.5982e-10, 3.2786e-10, ..., 2.9619e-10, 3.7572e-10,\n 3.9370e-10],\n ...,\n [3.4880e-10, 3.0956e-10, 3.2308e-10, ..., 3.1320e-10, 4.4961e-10,\n 4.9448e-10],\n [3.0727e-10, 2.7409e-10, 2.7580e-10, ..., 3.1379e-10, 3.4206e-10,\n 3.1780e-10],\n [4.3672e-10, 3.8126e-10, 3.3052e-10, ..., 3.6795e-10, 4.3287e-10,\n 4.1707e-10]], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "shared", "betas": [ 0.9, @@ -227,7 +242,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_256", "betas": [ 0.9, @@ -250,7 +265,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_512", "betas": [ 0.9, @@ -273,7 +288,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_768", "betas": [ 0.9, @@ -296,7 +311,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_1024", "betas": [ 0.9, @@ -319,7 +334,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_1280", "betas": [ 0.9, @@ -342,7 +357,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_1536", "betas": [ 0.9, @@ -365,7 +380,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_1792", "betas": [ 0.9, @@ -388,7 +403,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_2048", "betas": [ 0.9, @@ -411,7 +426,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_2304", "betas": [ 0.9, @@ -434,7 +449,7 @@ ] }, { - "lr": 0.0007940987335200904, + "lr": 0.0006548539886902864, "name": "scale_2560", "betas": [ 0.9, @@ -457,7 +472,7 @@ ] }, { - "lr": 0.00039715242044697206, + "lr": 0.0003275997400965494, "name": "fusion", "betas": [ 0.9, @@ -513,7 +528,7 @@ "T_i": 10, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 3, + "T_cur": 4, "base_lrs": [ 0.001, 0.001, @@ -528,32 +543,33 @@ 0.001, 0.0005 ], - "last_epoch": 3, + "last_epoch": 4, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.0007940987335200904, - 0.0007940987335200904, - 0.0007940987335200904, - 0.0007940987335200904, - 0.0007940987335200904, - 0.0007940987335200904, - 0.0007940987335200904, - 0.0007940987335200904, - 0.0007940987335200904, - 0.0007940987335200904, - 0.0007940987335200904, - 0.00039715242044697206 + 0.0006548539886902864, + 0.0006548539886902864, + 0.0006548539886902864, + 0.0006548539886902864, + 0.0006548539886902864, + 0.0006548539886902864, + 0.0006548539886902864, + 0.0006548539886902864, + 0.0006548539886902864, + 0.0006548539886902864, + 0.0006548539886902864, + 0.0003275997400965494 ] }, "metrics": { - "best_val_acc": 81.948, - "best_epoch": 2, + "best_val_acc": 82.292, + "best_epoch": 3, "scale_accuracies": { - "256": 81.948, - "512": 81.854, - "768": 81.538 + "256": 82.292, + "512": 82.358, + "768": 82.276, + "1024": 81.94 } }, "train_config": {