diff --git "a/weights/David-decoupled-deep_efficiency/20251013_004438/best_model_acc62.77_metadata.json" "b/weights/David-decoupled-deep_efficiency/20251013_004438/best_model_acc62.77_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/David-decoupled-deep_efficiency/20251013_004438/best_model_acc62.77_metadata.json" @@ -0,0 +1,900 @@ +{ + "epoch": 7, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-4.5438e-04, 2.1092e-04, -2.8826e-05, ..., -1.6596e-05,\n -6.7419e-05, -1.6061e-04],\n [ 1.0807e-04, -9.0481e-05, -2.0868e-06, ..., -5.9896e-05,\n -4.5320e-05, 1.6931e-05],\n [-2.6241e-05, -1.5015e-05, 3.9307e-05, ..., -3.5217e-05,\n 4.7605e-06, -7.6686e-06],\n ...,\n [-1.3041e-06, 7.7665e-05, 3.3696e-05, ..., -2.0024e-05,\n 3.5963e-06, -8.1119e-06],\n [ 1.3531e-05, 3.7214e-05, -2.9589e-05, ..., 2.8715e-05,\n 3.4663e-05, 6.9457e-06],\n [-1.8103e-05, 1.3068e-04, 9.5395e-05, ..., -9.1106e-06,\n 5.0699e-05, 1.5254e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4493e-07, 7.2677e-08, 4.6996e-08, ..., 5.0421e-08, 2.1863e-08,\n 2.7050e-08],\n [4.8334e-08, 8.9985e-08, 4.8526e-08, ..., 2.6106e-08, 1.1936e-08,\n 1.5146e-08],\n [3.2913e-08, 5.2975e-08, 2.2465e-08, ..., 8.9308e-09, 1.2076e-08,\n 8.8997e-09],\n ...,\n [3.4275e-08, 6.5217e-08, 2.1002e-08, ..., 1.1038e-08, 1.0860e-08,\n 1.2283e-08],\n [7.7770e-09, 4.9030e-08, 1.2315e-08, ..., 1.9993e-08, 5.4661e-09,\n 5.7799e-09],\n [1.5970e-08, 6.7440e-08, 3.2920e-08, ..., 3.1768e-08, 8.2209e-09,\n 1.1772e-08]], device='cuda:0')" + }, + "1": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-4.5238e-03, -3.1926e-03, -1.1362e-04, -9.9817e-04, 2.1180e-04,\n -3.5401e-05, 1.5328e-03, 8.9718e-04, 7.9492e-04, 1.7072e-04,\n -2.3547e-03, 5.8568e-04, -1.8504e-03, 8.2032e-05, 2.6948e-04,\n 2.3326e-04, 3.5728e-04, -9.5869e-04, 4.2193e-04, 1.6927e-04,\n -3.6506e-04, -1.0994e-03, 1.1270e-03, 1.5250e-03, -5.3897e-04,\n 2.1763e-05, -1.8335e-04, 7.0387e-04, -1.4965e-03, -7.5655e-04,\n -2.1013e-04, 1.8075e-03, 1.1130e-03, 1.1284e-04, 4.0045e-03,\n 4.7534e-04, 2.5281e-04, -1.6200e-03, -3.4792e-04, -5.7544e-05,\n -1.5937e-03, 5.9370e-04, 6.4973e-04, -1.6356e-03, 1.4337e-03,\n -1.0903e-03, 3.9380e-04, -2.7870e-04, 1.0407e-03, -1.2961e-04,\n 1.7633e-03, -3.9052e-04, 1.2032e-03, -4.9862e-05, -7.4006e-04,\n 6.1954e-04, -1.0476e-03, 2.0202e-03, 1.7284e-04, 3.2316e-03,\n 4.0579e-05, -2.0614e-03, 3.7633e-04, -3.1493e-04, 2.4122e-04,\n -1.1400e-03, 1.6109e-03, 9.5061e-04, 9.9361e-05, -2.9572e-04,\n -8.6395e-04, 9.3146e-04, 5.1589e-04, -2.5824e-05, 3.4782e-04,\n -2.0717e-03, -1.5159e-03, -1.4355e-03, 5.9828e-04, 1.5839e-03,\n -1.1480e-03, -2.1895e-04, 2.4044e-03, 2.8582e-03, 3.3003e-04,\n 1.0443e-03, 9.5584e-05, 3.5377e-04, -1.6290e-04, 4.2056e-04,\n 5.8976e-04, -1.1787e-03, -3.9909e-05, 1.2259e-04, 5.0831e-04,\n -1.4358e-04, 1.8623e-04, 9.4081e-04, -1.3251e-04, -2.0602e-04,\n -1.5968e-03, 6.6765e-04, 9.6588e-04, 7.6351e-04, -9.4695e-04,\n -3.8082e-03, -1.4942e-03, -1.3362e-03, -4.3855e-04, 5.8333e-06,\n -1.3689e-04, 6.1009e-04, 3.8591e-04, 3.1869e-04, -1.2460e-04,\n 9.6857e-04, -1.3482e-03, 1.1388e-03, -1.0521e-03, 1.9998e-04,\n 3.1647e-05, -6.6989e-04, 2.5818e-04, -1.7867e-03, -6.9506e-04,\n 1.0619e-04, 8.0005e-04, -2.5240e-03, -1.0070e-03, 3.5906e-04,\n -9.1353e-05, 5.0192e-04, 1.1173e-03, 1.0024e-03, 1.7744e-04,\n -3.5249e-03, 1.6051e-04, -1.1450e-03, 5.8830e-04, 4.4362e-05,\n 3.0258e-04, -1.4068e-03, -1.9485e-04, -1.2085e-04, -1.8894e-03,\n 1.2280e-04, -7.4176e-05, 6.3621e-04, 1.0926e-03, 3.3497e-04,\n 7.5204e-04, -1.7680e-03, -1.8498e-03, 2.1341e-04, 9.3513e-04,\n 1.8053e-04, 5.1842e-05, 2.4135e-04, 6.5967e-04, -1.2519e-04,\n -2.2730e-04, 1.9117e-04, 1.6154e-03, -6.1012e-04, 3.9121e-04,\n 5.0061e-04, -1.7108e-03, 1.7678e-03, 1.1191e-04, 1.3655e-04,\n -9.1648e-04, 4.7937e-04, 5.5786e-04, 3.5134e-04, -5.2485e-04,\n 1.0944e-03, -6.3406e-04, 4.6767e-04, -1.5273e-03, -1.8909e-04,\n 7.0360e-04, -1.8061e-04, 1.3085e-03, -9.4021e-04, 8.2647e-04,\n 5.2872e-04, -5.5491e-04, -2.2439e-04, -8.4021e-04, 2.0071e-03,\n -9.3343e-04, -6.1495e-04, -5.2533e-04, 3.5487e-04, 2.0492e-03,\n 1.0334e-03, -2.7773e-04, -1.3115e-03, -1.1885e-03, -1.4118e-03,\n 8.8712e-04, 3.0865e-04, -3.8202e-04, -5.0710e-05, 1.3665e-03,\n -4.6996e-04, 8.1802e-04, 8.5191e-04, -4.5768e-04, -1.0061e-03,\n 3.5317e-04, 1.0993e-03, 5.3245e-04, 3.8484e-04, -7.8242e-04,\n 7.4506e-04, 2.2716e-03, -2.7411e-03, 2.0446e-04, -4.9247e-04,\n -1.3409e-03, -9.1510e-04, -4.7540e-05, -1.1866e-03, -1.0618e-04,\n -2.1348e-03, -5.6721e-04, 1.1569e-03, -1.2162e-04, -3.8410e-04,\n 5.4537e-04, 8.0822e-04, 1.0379e-03, 1.6829e-03, -1.1933e-03,\n -8.7809e-06, -1.2002e-03, 1.6115e-03, -4.2084e-04, -9.7183e-04,\n 1.2505e-04, 1.9685e-06, -1.3145e-03, 9.5047e-04, 7.4584e-06,\n 6.5339e-04, 1.6825e-03, -3.8549e-04, 1.3002e-03, -1.8281e-04,\n 6.9357e-04, -4.8224e-04, -6.8956e-04, -1.1568e-03, 1.1490e-03,\n 2.5011e-04, -3.1382e-04, 7.2658e-05, -8.0146e-05, 7.1756e-04,\n 1.1830e-03, 6.1797e-04, -2.4086e-03, -1.4421e-03, 3.0114e-04,\n 4.9374e-04, -1.8338e-03, 9.8083e-04, 1.3044e-05, 4.0267e-04,\n 1.5365e-03, 8.4462e-04, 2.1998e-04, 2.5360e-04, 8.6698e-04,\n 5.4915e-04, 3.2483e-04, -8.2621e-04, 7.9883e-04, -1.6548e-03,\n 1.9207e-04, 1.2999e-03, -9.3214e-04, 9.6246e-04, -1.9085e-04,\n 1.3670e-04, -1.4586e-03, 7.2442e-04, 7.7941e-05, -3.2862e-03,\n 9.0880e-04, -7.0240e-04, -6.6406e-04, 1.1268e-03, -1.5741e-03,\n 7.0559e-04, -1.1714e-03, -1.1576e-04, 2.9486e-04, 5.0992e-04,\n -4.3858e-05, 1.2916e-03, 2.5138e-03, 1.0627e-03, 2.1645e-03,\n -4.9736e-04, -1.3168e-03, -4.7393e-04, -1.4513e-04, -6.1131e-04,\n 4.4207e-04, 7.3147e-04, -3.7805e-04, 1.5218e-03, -1.0173e-03,\n -2.5783e-04, 1.2538e-03, -1.0987e-03, -1.6478e-03, -6.8100e-05,\n 8.2780e-04, 1.3403e-03, 5.8463e-04, 1.1460e-03, -9.1540e-04,\n -1.5686e-04, -7.5243e-04, -2.6669e-04, 6.4009e-05, 1.1485e-03,\n -3.0696e-04, 7.1409e-04, 3.1326e-04, -1.3959e-03, -1.4030e-03,\n -2.0969e-04, -4.7721e-04, 4.9036e-04, -9.7950e-04, 6.4107e-04,\n -1.4702e-03, -9.3840e-04, -3.1136e-04, -4.5977e-04, 6.9260e-04,\n 8.3715e-04, -4.7440e-04, 4.3786e-04, 5.5782e-04, -2.7371e-04,\n 2.8680e-03, 3.7701e-04, -1.4057e-03, -1.7219e-03, -8.3062e-04,\n -1.8508e-03, 1.7512e-04, 5.6025e-04, -8.3102e-04, 2.9273e-03,\n -2.0982e-04, -1.6289e-03, 5.4201e-04, 2.5308e-04, -2.1188e-03,\n 4.5889e-04, -7.4994e-04, 1.2173e-03, 1.7102e-04, 5.5322e-04,\n 6.4347e-04, -1.0050e-03, -4.1625e-04, -2.2543e-03, -1.4897e-03,\n -1.1156e-03, -8.8717e-04, -1.0975e-03, 1.2776e-04, 2.7628e-04,\n 2.0473e-04, -2.2653e-04, -2.5214e-03, -1.6335e-03, -4.9916e-04,\n 6.6349e-04, 8.3445e-05, 5.7284e-04, -1.8799e-04, -1.8056e-03,\n -6.8713e-04, 1.6985e-03, 1.4946e-04, 1.0753e-04, 1.7026e-03,\n -9.2364e-05, -8.5269e-04, 5.1338e-05, 6.7277e-04, -1.7996e-03,\n 1.2301e-04, -3.6932e-04, 1.2833e-03, 1.8922e-03, -8.1411e-04,\n 7.4684e-05, -3.6894e-04, 1.3441e-03, 2.9232e-05, 4.4075e-04,\n 1.2959e-05, -8.2232e-06, -1.0557e-03, -7.8130e-05, 2.1926e-04,\n 1.8165e-04, 4.0890e-04, -1.4163e-03, -1.5335e-03, -2.4707e-04,\n -5.5323e-04, -6.4818e-04, -1.0394e-03, -1.6868e-03, 7.4359e-04,\n -2.7679e-04, -1.3780e-03, 6.4952e-06, -1.9697e-04, 5.6810e-04,\n 1.2921e-03, 5.5347e-04, 6.3843e-04, -2.6431e-04, 1.7463e-04,\n 3.8353e-04, -1.6985e-04, -4.5172e-04, -7.5443e-04, 1.3149e-04,\n -2.0011e-03, 2.1476e-03, -6.8950e-05, -1.5591e-03, 5.8480e-04,\n 7.6556e-04, -1.0113e-03, -8.9070e-04, -1.6085e-04, -1.3762e-03,\n 2.9344e-04, 3.0153e-04, -1.4222e-03, 2.4324e-03, 4.7471e-04,\n 1.3987e-03, 4.3823e-04, 4.6728e-04, 6.0509e-04, 1.2426e-03,\n -1.4214e-04, -1.5273e-03, 1.9292e-03, 4.9332e-04, 7.6693e-05,\n 6.2002e-05, 7.8005e-04, 3.2667e-04, 8.2382e-04, 4.5461e-04,\n 3.1357e-04, -1.6530e-04, -1.0001e-03, 8.3129e-04, -7.4242e-05,\n 5.7789e-04, -4.9113e-04, 7.2293e-04, 1.3440e-03, 2.2949e-03,\n -9.7826e-04, 8.6878e-04, 2.5068e-04, 4.2280e-04, -1.7790e-03,\n 1.2242e-03, -4.8854e-04, 1.3435e-04, -2.9686e-04, -7.7987e-04,\n 3.5071e-04, 8.9410e-04, 2.4378e-04, 1.0982e-05, 1.7449e-04,\n 7.1960e-04, 1.5904e-03, -8.1613e-04, 9.5295e-04, -4.0829e-04,\n 1.3821e-03, -1.9611e-04, -1.9896e-03, -3.8762e-04, -2.1146e-04,\n 3.3408e-04, -1.6556e-03, 8.0895e-04, 5.0252e-04, -8.2207e-04,\n 5.1598e-04, -1.0491e-03], device='cuda:0')", + "exp_avg_sq": "tensor([3.3955e-05, 2.0150e-05, 1.1040e-05, 1.0746e-05, 1.5310e-05, 1.8396e-05,\n 1.2439e-05, 1.8233e-05, 1.1165e-05, 1.0928e-05, 1.7253e-05, 1.7078e-05,\n 1.0242e-05, 1.4374e-05, 1.0741e-05, 1.4675e-05, 9.8046e-06, 1.7057e-05,\n 6.8404e-06, 6.0411e-06, 8.1245e-06, 1.4599e-05, 9.5804e-06, 1.6279e-05,\n 1.4292e-05, 8.0506e-06, 2.5428e-05, 2.6640e-05, 1.0946e-05, 6.7354e-06,\n 1.3770e-05, 1.7032e-05, 1.4766e-05, 1.5731e-05, 2.5769e-05, 1.3059e-05,\n 1.7043e-05, 1.1494e-05, 1.6675e-05, 9.5539e-06, 2.3140e-05, 1.2669e-05,\n 1.1479e-05, 2.2023e-05, 1.5117e-05, 1.8994e-05, 1.1890e-05, 1.4843e-05,\n 2.0426e-05, 9.2470e-06, 9.4788e-06, 2.0495e-05, 1.5812e-05, 1.4952e-05,\n 7.1871e-06, 1.1898e-05, 1.9196e-05, 1.4160e-05, 4.0872e-05, 1.7579e-05,\n 1.8558e-05, 1.8736e-05, 2.6020e-05, 1.5186e-05, 9.2082e-06, 2.5587e-05,\n 1.4897e-05, 9.8092e-06, 1.4021e-05, 1.5278e-05, 1.4898e-05, 1.6002e-05,\n 1.0466e-05, 1.6101e-05, 9.0373e-06, 4.8885e-05, 1.6597e-05, 1.3995e-05,\n 1.5302e-05, 1.4219e-05, 1.2154e-05, 1.8621e-05, 2.0226e-05, 2.5219e-05,\n 1.4059e-05, 1.4974e-05, 1.7741e-05, 1.3071e-05, 1.0118e-05, 1.5383e-05,\n 1.2162e-05, 1.5939e-05, 1.4687e-05, 1.5476e-05, 1.4424e-05, 1.4762e-05,\n 8.8210e-06, 1.5265e-05, 7.3823e-06, 1.2931e-05, 2.4229e-05, 1.1082e-05,\n 3.6101e-06, 2.2275e-05, 2.0012e-05, 1.1673e-05, 2.5634e-05, 2.6423e-05,\n 1.3133e-05, 1.9332e-05, 1.0497e-05, 2.9382e-05, 1.9169e-05, 1.4322e-05,\n 1.2958e-05, 2.6185e-05, 1.5374e-05, 1.1257e-05, 2.2665e-05, 1.7573e-05,\n 8.1334e-06, 1.1379e-05, 3.2955e-06, 1.0536e-05, 1.8278e-05, 1.7783e-05,\n 1.0616e-05, 1.7761e-05, 1.7317e-05, 1.4131e-05, 1.3039e-05, 1.0471e-05,\n 1.0298e-05, 1.5977e-05, 1.3308e-05, 1.5250e-05, 1.3002e-05, 1.7523e-05,\n 1.2340e-05, 1.3711e-05, 1.1755e-05, 1.2961e-05, 1.2767e-05, 1.2957e-05,\n 1.8925e-05, 1.2824e-05, 1.8324e-05, 1.1190e-05, 2.5651e-05, 2.5691e-05,\n 1.3238e-05, 2.1355e-05, 3.0772e-05, 1.9378e-05, 1.6711e-05, 1.4210e-05,\n 2.8739e-05, 1.2657e-05, 2.2288e-05, 1.2945e-05, 1.2745e-05, 1.1592e-05,\n 2.8351e-05, 1.0039e-05, 1.2143e-05, 9.1950e-06, 2.0761e-05, 1.3413e-05,\n 1.5324e-05, 1.5867e-05, 1.8275e-05, 1.4469e-05, 1.2390e-05, 2.7911e-05,\n 1.8474e-05, 1.3730e-05, 1.1763e-05, 6.0916e-06, 1.0915e-05, 7.7601e-06,\n 1.8575e-05, 2.1234e-05, 1.8422e-05, 1.9388e-05, 2.1898e-05, 1.5207e-05,\n 2.3281e-05, 2.3698e-05, 7.6959e-06, 1.5438e-05, 1.0263e-05, 1.2573e-05,\n 1.2336e-05, 1.9388e-05, 1.7321e-05, 8.9250e-06, 9.4714e-06, 1.1204e-05,\n 1.8041e-05, 1.1044e-05, 1.6401e-05, 1.1732e-05, 1.1757e-05, 1.4335e-05,\n 1.3110e-05, 1.0099e-05, 9.6097e-06, 1.3271e-05, 2.8537e-05, 2.3756e-05,\n 1.9880e-05, 1.7465e-05, 2.4471e-05, 1.0820e-05, 2.2206e-05, 1.2796e-05,\n 1.1313e-05, 1.3469e-05, 1.2838e-05, 1.6383e-05, 2.2145e-05, 1.3983e-05,\n 1.7822e-05, 6.9144e-06, 2.2820e-05, 1.1279e-05, 1.4446e-05, 1.1988e-05,\n 1.5215e-05, 1.5327e-05, 1.9547e-05, 8.5657e-06, 1.5217e-05, 1.1825e-05,\n 1.4759e-05, 1.3779e-05, 8.6109e-06, 2.9832e-05, 1.3678e-05, 1.5832e-05,\n 1.2302e-05, 9.1174e-06, 1.1888e-05, 1.8514e-05, 5.6095e-06, 1.2182e-05,\n 1.1718e-05, 1.3385e-05, 2.3201e-05, 3.2973e-05, 2.0120e-05, 2.9089e-05,\n 8.5150e-06, 2.6526e-05, 1.6603e-05, 2.0059e-05, 1.5787e-05, 1.8143e-05,\n 8.3478e-06, 1.9002e-05, 1.2922e-05, 7.1946e-06, 1.4266e-05, 1.5024e-05,\n 3.2251e-05, 8.8062e-06, 1.7060e-05, 1.7207e-05, 1.0901e-05, 3.7883e-05,\n 8.5558e-06, 1.1528e-05, 1.3290e-05, 9.8486e-06, 8.5352e-06, 1.5978e-05,\n 7.9015e-06, 8.2602e-06, 1.6691e-05, 1.2193e-05, 6.7892e-06, 1.2801e-05,\n 1.3707e-05, 8.8840e-06, 1.0671e-05, 2.3212e-05, 6.9699e-06, 9.8238e-06,\n 1.1866e-05, 1.5958e-05, 1.1169e-05, 3.9708e-06, 1.0550e-05, 6.9569e-06,\n 1.2619e-05, 1.6862e-05, 1.6663e-05, 1.2377e-05, 2.1153e-05, 7.4485e-06,\n 1.7854e-05, 1.6328e-05, 2.9748e-05, 1.8892e-05, 2.1160e-05, 7.5823e-06,\n 3.1675e-05, 1.3978e-05, 1.2075e-05, 1.0290e-05, 9.6713e-06, 7.1051e-06,\n 1.9052e-05, 1.2020e-05, 2.0870e-05, 6.6542e-06, 1.0999e-05, 1.1073e-05,\n 2.0724e-05, 1.2462e-05, 1.5728e-05, 1.7574e-05, 1.4885e-05, 5.9699e-06,\n 1.0824e-05, 1.7335e-05, 1.0525e-05, 2.3217e-05, 7.9929e-06, 7.5927e-06,\n 1.1925e-05, 1.2163e-05, 1.3401e-05, 1.5227e-05, 1.3896e-05, 1.1744e-05,\n 1.8021e-05, 1.5668e-05, 3.1017e-05, 1.7027e-05, 1.4700e-05, 1.0796e-05,\n 2.1371e-05, 1.8069e-05, 1.0221e-05, 2.0209e-05, 1.0579e-05, 9.9234e-06,\n 1.7876e-05, 1.1790e-05, 1.4137e-05, 1.0648e-05, 1.5891e-05, 1.2204e-05,\n 1.0089e-05, 1.3548e-05, 4.1877e-05, 3.1442e-05, 8.6029e-06, 1.8699e-05,\n 2.3712e-05, 3.0324e-05, 1.7684e-05, 1.2386e-05, 2.5330e-05, 1.6899e-05,\n 1.5739e-05, 1.2169e-05, 6.7774e-06, 1.5576e-05, 8.1656e-06, 2.2428e-05,\n 9.1412e-06, 1.6685e-05, 1.7913e-05, 2.3956e-05, 5.6943e-06, 1.8118e-05,\n 1.3673e-05, 2.8027e-05, 1.5044e-05, 3.9204e-06, 9.4291e-06, 1.0170e-05,\n 4.4122e-05, 1.4725e-05, 1.0530e-05, 1.6048e-05, 1.3236e-05, 2.1710e-05,\n 2.0111e-05, 1.7352e-05, 1.7340e-05, 4.0034e-06, 1.3703e-05, 1.8973e-05,\n 1.8904e-05, 9.4258e-06, 2.3766e-05, 1.4560e-05, 1.7384e-05, 1.3880e-05,\n 2.8870e-05, 5.7637e-05, 2.0318e-05, 1.1070e-05, 6.3593e-06, 1.1081e-05,\n 4.8632e-06, 1.6692e-05, 1.9923e-05, 7.4244e-06, 1.9645e-05, 1.0406e-05,\n 1.3819e-05, 9.3944e-06, 7.3770e-06, 2.5988e-05, 3.6730e-05, 1.4023e-05,\n 1.0196e-05, 1.6176e-05, 1.5113e-05, 1.1481e-05, 1.4158e-05, 1.2432e-05,\n 8.4210e-06, 8.7469e-06, 9.1489e-06, 2.8084e-05, 1.5825e-05, 6.4226e-06,\n 2.0589e-05, 1.6043e-05, 1.0834e-05, 5.6307e-06, 1.5638e-05, 1.6285e-05,\n 1.9312e-05, 1.9922e-05, 1.3077e-05, 1.5724e-05, 9.1935e-06, 2.2890e-05,\n 7.4303e-06, 1.2169e-05, 1.5600e-05, 1.4899e-05, 1.5494e-05, 1.3445e-05,\n 1.2955e-05, 9.9517e-06, 1.0856e-05, 1.7665e-05, 7.7037e-06, 1.2871e-05,\n 9.7339e-06, 1.1538e-05, 1.6294e-05, 2.2612e-05, 2.4181e-05, 2.1100e-05,\n 1.8779e-05, 2.3077e-05, 6.5000e-06, 1.6153e-05, 1.2028e-05, 2.7636e-05,\n 1.9513e-05, 1.5344e-05, 1.4412e-05, 1.3026e-05, 2.5734e-05, 1.5372e-05,\n 1.0631e-05, 1.5751e-05, 2.0736e-05, 2.5439e-05, 1.3701e-05, 2.4339e-05,\n 1.6386e-05, 1.0800e-05, 1.3114e-05, 1.3657e-05, 1.6407e-05, 5.7781e-06,\n 1.0706e-05, 1.6161e-05, 1.5085e-05, 1.0446e-05, 3.3256e-05, 1.1968e-05,\n 7.6204e-06, 3.9497e-05, 1.4360e-05, 2.0012e-05, 1.5338e-05, 2.5506e-05,\n 1.2236e-05, 1.6900e-05, 1.1247e-05, 1.9148e-05, 1.5347e-05, 5.6577e-06,\n 1.2709e-05, 1.9322e-05, 3.2195e-05, 1.4080e-05, 9.7830e-06, 1.3297e-05,\n 8.0364e-06, 1.3826e-05], device='cuda:0')" + }, + "2": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 1.1990e-05, 4.7580e-06, -2.5683e-05, ..., -2.8723e-05,\n -3.3648e-05, -1.1585e-05],\n [-1.9327e-05, -1.0673e-05, 1.7265e-05, ..., -1.5051e-08,\n 1.0906e-05, -4.3969e-05],\n [-3.8601e-05, 6.1295e-06, -1.3676e-05, ..., -5.4327e-05,\n 1.0393e-05, -1.8566e-05],\n ...,\n [ 4.1090e-05, 1.8298e-05, -1.1668e-07, ..., -7.5594e-06,\n -1.8772e-06, -1.3181e-05],\n [-1.5379e-05, -1.3977e-05, -2.3438e-05, ..., -7.9755e-06,\n 2.2769e-05, -3.8612e-05],\n [-2.1131e-05, -2.7726e-05, -2.9135e-06, ..., 1.9035e-05,\n 6.5989e-06, -9.5817e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.3232e-09, 2.7312e-09, 1.8877e-08, ..., 5.5306e-09, 6.0061e-09,\n 1.9921e-09],\n [1.5149e-08, 8.2447e-09, 7.3848e-09, ..., 9.9966e-09, 2.5594e-09,\n 7.3214e-09],\n [1.0317e-08, 2.0752e-08, 8.0211e-09, ..., 1.2433e-08, 3.1056e-09,\n 4.5005e-09],\n ...,\n [1.4535e-08, 8.3301e-09, 1.6923e-08, ..., 1.1762e-08, 5.2031e-09,\n 6.4227e-09],\n [1.5765e-08, 6.6108e-09, 6.5941e-09, ..., 2.2503e-08, 6.0096e-09,\n 4.5733e-09],\n [1.0023e-08, 5.9754e-09, 7.8686e-09, ..., 1.4249e-08, 4.8582e-09,\n 4.6293e-09]], device='cuda:0')" + }, + "3": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 2.6490e-06, 5.5461e-07, 6.8104e-06, ..., -1.1044e-06,\n 1.7739e-06, 9.3053e-07],\n [-2.9308e-07, -5.6575e-07, 1.3762e-06, ..., -3.0080e-06,\n -4.4668e-07, 1.3238e-06],\n [ 6.4353e-07, 7.9796e-07, -2.5395e-06, ..., 2.9973e-07,\n 9.9336e-07, 1.4250e-06],\n ...,\n [ 1.2327e-06, 3.7331e-06, -3.4192e-06, ..., 1.9055e-06,\n -1.0752e-07, 3.4384e-06],\n [-1.3779e-06, -1.4936e-06, -2.7406e-06, ..., -1.8193e-07,\n 6.1052e-07, 1.0283e-06],\n [ 2.1419e-06, 3.0347e-06, -1.8405e-06, ..., 7.7244e-07,\n 2.4213e-07, -1.3157e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7991e-10, 3.1396e-10, 1.2383e-10, ..., 5.0467e-11, 3.2293e-11,\n 5.2198e-11],\n [6.2037e-11, 3.0332e-10, 1.1110e-10, ..., 1.6280e-10, 2.9389e-11,\n 3.3281e-11],\n [3.7942e-11, 3.3718e-10, 8.7619e-11, ..., 2.1174e-11, 2.1288e-11,\n 2.0732e-11],\n ...,\n [6.6711e-11, 2.0188e-10, 1.9389e-10, ..., 1.2114e-10, 2.8049e-11,\n 4.2882e-11],\n [1.0287e-10, 2.4364e-10, 2.0055e-10, ..., 2.8277e-10, 5.7343e-11,\n 5.0189e-11],\n [7.2934e-11, 3.8698e-10, 9.4756e-11, ..., 4.2061e-11, 3.8738e-11,\n 3.8686e-11]], device='cuda:0')" + }, + "4": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 3.6405e-06, -5.2676e-05, -3.4314e-06, ..., 7.4942e-05,\n 4.2250e-06, 1.0188e-05], device='cuda:0')", + "exp_avg_sq": "tensor([6.3849e-08, 6.9165e-08, 2.7085e-08, ..., 5.2972e-08, 9.9588e-08,\n 5.6588e-08], device='cuda:0')" + }, + "5": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 3.1968e-07, -9.2250e-07, -4.3482e-07, ..., 4.2580e-07,\n 1.0485e-06, 1.0616e-07],\n [-9.8568e-08, 8.9370e-07, -7.0449e-07, ..., -5.2129e-07,\n 1.4874e-07, 3.1622e-07],\n [ 2.5989e-07, 5.4057e-07, -6.0651e-08, ..., -1.0084e-06,\n -1.0690e-06, 1.3946e-07],\n ...,\n [-1.8848e-07, 6.9418e-08, 1.9184e-07, ..., -2.1291e-08,\n 1.0093e-06, -1.7544e-07],\n [-6.2520e-08, 1.2557e-06, -3.3431e-08, ..., 2.7193e-08,\n 9.9192e-07, 6.0311e-08],\n [ 4.0480e-08, 6.1049e-08, -1.5418e-07, ..., 1.8356e-07,\n -1.2984e-06, 4.3504e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4501e-12, 6.2103e-12, 8.7026e-12, ..., 3.2342e-12, 1.2410e-11,\n 1.5996e-12],\n [4.8766e-12, 4.3688e-12, 1.6537e-11, ..., 7.6303e-12, 3.1354e-11,\n 1.0226e-11],\n [2.2539e-12, 2.9787e-12, 1.5767e-11, ..., 7.0007e-12, 1.8726e-11,\n 4.5703e-12],\n ...,\n [6.8041e-12, 5.6639e-12, 2.3173e-11, ..., 1.3697e-11, 2.0973e-11,\n 3.5972e-12],\n [7.4388e-12, 1.1040e-11, 2.0020e-11, ..., 6.8175e-12, 2.7436e-11,\n 2.6229e-12],\n [5.4500e-12, 2.6842e-12, 2.2924e-11, ..., 7.0100e-12, 2.8349e-11,\n 8.8305e-12]], device='cuda:0')" + }, + "6": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 2.5670e-07, 2.9033e-07, -2.6376e-06, ..., -5.0535e-06,\n 9.8419e-07, 1.3646e-06],\n [ 1.6384e-06, 1.9178e-06, 3.1212e-06, ..., 1.1886e-06,\n 1.1859e-06, -3.2366e-08],\n [-1.1981e-06, 2.1022e-06, 1.2943e-06, ..., -1.7979e-06,\n -1.6299e-06, -1.4683e-06],\n ...,\n [ 6.3451e-06, 4.2388e-06, 1.4100e-06, ..., 2.0357e-07,\n -1.2159e-06, 3.4292e-06],\n [ 1.8567e-06, 8.6604e-06, 1.4884e-06, ..., 1.5318e-06,\n -1.4269e-07, 5.2208e-07],\n [ 2.8811e-07, -6.6141e-06, 4.1930e-07, ..., 1.2933e-06,\n -3.8676e-07, 2.7519e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.6528e-11, 2.7970e-10, 7.3424e-11, ..., 1.8077e-10, 2.3141e-11,\n 3.6769e-11],\n [5.2477e-11, 1.8373e-10, 6.9159e-11, ..., 8.0093e-11, 2.5709e-11,\n 4.6147e-11],\n [1.2449e-10, 2.7412e-10, 1.1102e-10, ..., 6.6767e-11, 1.0916e-10,\n 6.3364e-11],\n ...,\n [4.9053e-11, 1.1493e-10, 5.0423e-11, ..., 1.4311e-11, 1.0807e-11,\n 1.3371e-11],\n [3.6234e-11, 2.2040e-10, 4.8286e-11, ..., 2.0733e-11, 1.2070e-11,\n 1.4405e-11],\n [4.0249e-11, 1.8399e-10, 5.5475e-11, ..., 2.4115e-11, 2.3552e-11,\n 2.0530e-11]], device='cuda:0')" + }, + "7": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-6.6164e-05, 3.1040e-05, -3.4955e-05, ..., 8.6721e-05,\n 6.8816e-05, 2.8504e-06], device='cuda:0')", + "exp_avg_sq": "tensor([5.3288e-08, 4.4934e-08, 7.2810e-08, ..., 1.7069e-08, 1.7427e-08,\n 3.7313e-08], device='cuda:0')" + }, + "8": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.3185e-08, -3.7648e-07, -5.3978e-07, ..., -4.6948e-07,\n -4.7070e-07, -8.1368e-08],\n [ 8.2556e-08, 1.7740e-07, 4.9694e-07, ..., -1.0779e-08,\n 4.9118e-07, 1.7161e-07],\n [ 2.0429e-07, -6.4035e-07, -3.1528e-08, ..., -4.2941e-07,\n 3.3402e-07, 3.6910e-07],\n ...,\n [-4.5608e-07, 1.2670e-07, -1.7465e-07, ..., 3.5470e-07,\n -3.6792e-07, -4.9007e-07],\n [-8.2802e-07, 1.1741e-07, 4.7895e-07, ..., 2.1434e-08,\n -3.2137e-07, -6.0387e-08],\n [-3.0791e-07, 1.7133e-07, -5.6838e-07, ..., 4.8632e-07,\n 4.0102e-07, -3.6527e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1930e-12, 7.0811e-12, 1.3497e-12, ..., 8.4050e-13, 2.6972e-12,\n 8.6085e-13],\n [2.3209e-12, 2.0125e-11, 3.9583e-12, ..., 1.0028e-12, 2.8653e-12,\n 1.9348e-12],\n [2.6220e-12, 6.9260e-12, 6.4892e-12, ..., 1.2396e-12, 4.0150e-12,\n 1.2064e-12],\n ...,\n [2.8519e-12, 7.9632e-12, 3.1248e-12, ..., 3.0420e-12, 4.2670e-12,\n 2.6940e-12],\n [3.5597e-12, 7.3903e-12, 4.4049e-12, ..., 4.4526e-13, 4.1708e-12,\n 2.6109e-12],\n [2.4323e-12, 7.3965e-12, 4.1471e-12, ..., 1.3361e-12, 2.5877e-12,\n 3.1801e-12]], device='cuda:0')" + }, + "9": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-2.2049e-07, -2.7975e-07, -2.8883e-07, ..., 1.1716e-06,\n -2.2739e-07, -3.6128e-07],\n [-1.6846e-06, 8.4505e-07, 4.5291e-07, ..., 9.1668e-07,\n -7.5276e-07, -4.9883e-07],\n [-1.4191e-06, 1.2859e-06, 1.6470e-06, ..., -3.1608e-06,\n -3.5031e-07, 1.3010e-06],\n ...,\n [-3.0432e-06, 2.6895e-06, 3.7251e-06, ..., 7.2451e-07,\n -2.7483e-07, -9.6475e-07],\n [ 1.4015e-06, -8.5900e-07, 1.1363e-07, ..., 8.8821e-07,\n 2.8552e-07, -1.4502e-07],\n [ 8.7826e-07, -2.3005e-06, 2.1540e-07, ..., -3.6655e-06,\n -8.1735e-07, 2.4642e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4364e-11, 2.7772e-11, 3.1309e-11, ..., 1.3011e-10, 1.2131e-11,\n 1.6168e-11],\n [4.6164e-11, 1.6127e-10, 3.2913e-11, ..., 9.7653e-11, 2.6455e-11,\n 2.8911e-11],\n [2.9922e-11, 1.7548e-10, 2.2243e-11, ..., 6.8009e-11, 1.4299e-11,\n 2.5931e-11],\n ...,\n [5.0226e-11, 1.0434e-10, 7.2605e-11, ..., 7.2292e-11, 1.5922e-11,\n 1.6108e-11],\n [7.1697e-11, 1.6525e-10, 5.4006e-11, ..., 1.4796e-10, 2.8433e-11,\n 3.1824e-11],\n [5.8315e-11, 5.8727e-11, 2.0638e-11, ..., 1.0664e-10, 2.7417e-11,\n 2.5577e-11]], device='cuda:0')" + }, + "10": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 1.5506e-05, 1.9861e-05, -1.0072e-05, ..., -3.6183e-05,\n 3.8615e-05, -3.2343e-05], device='cuda:0')", + "exp_avg_sq": "tensor([3.8325e-08, 4.5279e-08, 2.9553e-08, ..., 3.0801e-08, 6.2150e-08,\n 4.2368e-08], device='cuda:0')" + }, + "11": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-2.7554e-07, -1.5880e-07, 5.6332e-07, ..., -1.7005e-07,\n 5.0743e-07, -4.8004e-07],\n [ 3.7842e-07, 2.3208e-07, 8.6221e-07, ..., 9.5063e-08,\n -1.6734e-07, -7.2024e-08],\n [-3.9527e-07, -2.7238e-07, 5.9171e-07, ..., -4.1488e-07,\n -6.4342e-07, 2.3867e-08],\n ...,\n [-8.9559e-08, -7.6353e-08, 9.1880e-07, ..., 3.7837e-08,\n -3.9768e-08, 1.7829e-07],\n [ 7.2416e-07, -1.6638e-07, -3.8498e-07, ..., 8.4271e-07,\n -2.5518e-07, 2.7836e-07],\n [-1.6575e-07, -1.8484e-07, -7.4742e-07, ..., -6.8850e-07,\n -1.1178e-06, 9.0043e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1509e-12, 7.0483e-13, 2.8062e-12, ..., 4.0901e-13, 3.1648e-12,\n 3.4488e-12],\n [1.7096e-12, 1.0097e-12, 3.2196e-12, ..., 8.0814e-13, 1.0159e-11,\n 7.1036e-12],\n [1.9070e-12, 1.8579e-12, 3.7102e-12, ..., 3.9826e-12, 6.7013e-12,\n 5.9076e-12],\n ...,\n [1.7370e-12, 1.9601e-12, 5.6545e-12, ..., 6.6763e-13, 7.3537e-12,\n 8.7797e-12],\n [2.8383e-12, 1.2953e-12, 2.7421e-12, ..., 2.7812e-12, 6.1261e-12,\n 8.0521e-12],\n [1.6753e-12, 4.3819e-12, 8.8039e-12, ..., 2.1396e-12, 1.0624e-11,\n 6.1720e-12]], device='cuda:0')" + }, + "12": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 7.6790e-07, -5.0617e-07, 1.1306e-07, ..., -2.8547e-08,\n -1.3239e-07, -3.3638e-07],\n [-6.3731e-07, -1.2421e-06, -1.8959e-06, ..., -3.1695e-06,\n -1.4730e-07, 1.1221e-06],\n [-6.6663e-07, 1.3212e-06, 2.4300e-07, ..., -2.6487e-07,\n -6.2269e-07, 6.3625e-08],\n ...,\n [-2.0128e-07, -2.0260e-07, -1.1749e-06, ..., 6.9827e-07,\n 1.2592e-07, 6.0325e-07],\n [-5.9812e-07, -1.7453e-06, 4.7768e-07, ..., -1.8868e-06,\n 5.7808e-07, -5.3225e-07],\n [ 5.2540e-07, 3.4412e-06, 2.0359e-06, ..., 8.8442e-07,\n -3.5143e-07, 3.1825e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7483e-11, 3.3938e-11, 5.4325e-12, ..., 1.6146e-11, 6.8378e-12,\n 8.4066e-12],\n [2.8755e-11, 8.4795e-11, 2.8879e-11, ..., 5.6573e-11, 1.4234e-11,\n 1.3481e-11],\n [4.1663e-11, 5.0569e-10, 6.9871e-11, ..., 1.0931e-10, 1.5015e-11,\n 3.4711e-11],\n ...,\n [1.0843e-11, 1.3837e-10, 3.6692e-11, ..., 7.5408e-12, 5.5251e-12,\n 7.6522e-12],\n [6.0153e-11, 1.0210e-10, 1.1619e-10, ..., 4.9373e-11, 1.4636e-11,\n 3.1822e-11],\n [9.3997e-11, 1.3649e-10, 4.1455e-11, ..., 5.0523e-11, 2.9159e-11,\n 3.1589e-11]], device='cuda:0')" + }, + "13": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 3.9398e-06, -8.0800e-05, -3.5145e-05, ..., 1.4827e-05,\n -3.0400e-05, 1.4220e-06], device='cuda:0')", + "exp_avg_sq": "tensor([1.2094e-08, 2.2621e-08, 4.4520e-08, ..., 1.1010e-08, 3.5897e-08,\n 3.9266e-08], device='cuda:0')" + }, + "14": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-1.7270e-07, -1.5561e-08, 5.6265e-08, ..., -1.8301e-07,\n 2.8410e-07, 2.0239e-07],\n [-2.1796e-08, 5.1446e-07, 2.8002e-07, ..., -7.0432e-08,\n -4.4585e-07, 2.2388e-07],\n [ 2.5909e-07, 1.0258e-07, -3.9069e-07, ..., -1.3531e-07,\n 4.6990e-07, -2.8873e-07],\n ...,\n [-8.3746e-08, -4.1261e-07, -1.1765e-07, ..., -7.2776e-08,\n 3.8032e-07, -1.9368e-07],\n [-3.3743e-07, 9.3770e-08, -3.8971e-08, ..., -2.8522e-07,\n 3.0309e-08, 3.4089e-07],\n [-6.7386e-08, -3.6357e-07, -7.3352e-07, ..., -3.3487e-07,\n 4.1184e-07, 2.0586e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.4930e-13, 2.5234e-13, 3.1269e-13, ..., 5.6974e-13, 1.2135e-12,\n 6.5393e-13],\n [6.2308e-13, 5.6718e-13, 4.0542e-13, ..., 1.1261e-12, 2.3579e-12,\n 1.5953e-12],\n [8.1216e-13, 5.4087e-13, 6.7747e-13, ..., 1.2068e-12, 2.6472e-12,\n 1.0667e-12],\n ...,\n [6.8898e-13, 5.4028e-13, 5.7443e-13, ..., 7.7752e-13, 5.5786e-12,\n 1.0713e-12],\n [8.6525e-13, 4.1823e-13, 5.5503e-13, ..., 7.4185e-13, 2.1809e-12,\n 1.1677e-12],\n [8.8385e-13, 8.7796e-13, 1.6400e-12, ..., 1.0114e-12, 1.8169e-12,\n 1.8435e-12]], device='cuda:0')" + }, + "15": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-9.5755e-08, -7.3837e-07, -3.9371e-07, ..., 2.2782e-07,\n 1.1438e-07, -1.1929e-07],\n [-4.8708e-07, -1.8258e-06, 7.9891e-08, ..., 3.2836e-08,\n -2.2864e-07, -9.1596e-08],\n [ 1.0859e-06, -6.9347e-07, 6.3702e-07, ..., -2.7740e-06,\n 8.5536e-07, 7.6948e-07],\n ...,\n [-1.2196e-06, 4.1175e-07, 2.7089e-07, ..., 4.5666e-07,\n 8.5601e-07, 7.1432e-07],\n [ 1.5921e-07, -1.5456e-06, -1.0338e-08, ..., 5.6264e-07,\n -4.5209e-07, -2.2013e-07],\n [ 8.5794e-07, -1.5947e-06, -4.1096e-07, ..., 1.3120e-06,\n 6.4989e-08, -2.5178e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.2730e-11, 5.1445e-11, 3.3483e-11, ..., 1.3033e-11, 1.0331e-11,\n 9.3185e-12],\n [6.1958e-12, 8.1491e-11, 1.0226e-11, ..., 5.5464e-12, 2.1690e-12,\n 5.9395e-12],\n [3.5529e-11, 8.0316e-11, 3.9962e-11, ..., 9.2478e-11, 1.5526e-11,\n 3.4697e-11],\n ...,\n [5.5385e-11, 4.8712e-11, 1.1628e-11, ..., 1.2376e-11, 1.3077e-11,\n 9.9870e-12],\n [1.7454e-11, 2.2682e-11, 1.6499e-11, ..., 6.1711e-11, 1.1878e-11,\n 1.6424e-11],\n [2.6227e-11, 6.1161e-11, 1.6096e-11, ..., 5.6190e-11, 1.4826e-11,\n 2.0356e-11]], device='cuda:0')" + }, + "16": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 1.0501e-05, 5.3926e-06, -2.7590e-06, ..., -8.8165e-07,\n 2.3332e-05, 4.3302e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.2585e-08, 4.9182e-09, 3.3158e-08, ..., 2.2331e-08, 2.5014e-08,\n 2.3361e-08], device='cuda:0')" + }, + "17": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.7098e-08, 4.5505e-08, -5.7311e-07, ..., 4.1823e-08,\n -1.9255e-07, -4.4167e-08],\n [-9.1932e-09, 2.4639e-08, 1.6704e-07, ..., 2.7333e-07,\n 8.2548e-08, 6.6427e-08],\n [ 1.1281e-07, 6.9555e-08, 9.8875e-10, ..., -1.9477e-07,\n -1.3191e-07, 1.8045e-07],\n ...,\n [-6.8072e-08, 3.1144e-07, 2.1738e-07, ..., 3.6211e-08,\n 3.8401e-08, -8.0782e-08],\n [-1.7077e-08, 1.3673e-07, 9.8708e-07, ..., 5.4495e-07,\n 2.0689e-07, 1.1628e-07],\n [ 1.2619e-09, 1.6163e-07, 5.1317e-07, ..., -1.8720e-07,\n 2.4568e-07, 1.1081e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1927e-13, 7.2047e-14, 3.3514e-12, ..., 3.6308e-13, 6.7017e-13,\n 7.5042e-13],\n [4.5452e-13, 9.2500e-14, 5.1874e-12, ..., 6.5107e-13, 5.5799e-13,\n 1.0200e-12],\n [3.5739e-13, 8.6370e-14, 4.7888e-12, ..., 7.7075e-13, 4.3211e-13,\n 5.8741e-13],\n ...,\n [3.0714e-13, 5.3964e-13, 1.5652e-12, ..., 2.1724e-12, 6.7390e-13,\n 9.4225e-13],\n [2.6528e-13, 1.1609e-13, 1.5365e-11, ..., 3.1583e-12, 5.3618e-13,\n 1.0474e-12],\n [3.6188e-13, 2.1327e-13, 3.5254e-12, ..., 1.9243e-12, 6.5423e-13,\n 7.4669e-13]], device='cuda:0')" + }, + "18": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.0064e-07, 1.6093e-06, -1.7206e-06, ..., -7.2410e-07,\n -8.4424e-08, 4.5196e-07],\n [ 9.7230e-08, 2.2219e-07, -1.7763e-09, ..., -4.2572e-07,\n 4.3428e-08, 8.7359e-08],\n [ 9.4961e-08, 3.3866e-06, -1.2528e-07, ..., -4.1746e-07,\n -6.5925e-07, 2.6044e-07],\n ...,\n [ 1.6064e-06, 4.4494e-08, 7.5617e-07, ..., 1.1114e-07,\n 1.0986e-06, -8.2267e-07],\n [-1.2123e-06, 5.5734e-06, 8.5335e-07, ..., 6.4433e-07,\n -4.6682e-07, -1.0813e-06],\n [-2.8003e-07, -2.8065e-06, -5.0842e-07, ..., -9.7622e-07,\n -1.4040e-06, 4.4895e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.4781e-11, 5.9480e-11, 2.9713e-11, ..., 1.5318e-11, 5.8279e-12,\n 1.7759e-11],\n [2.8353e-12, 3.9859e-12, 1.4131e-12, ..., 1.4628e-11, 1.6218e-12,\n 1.5250e-12],\n [4.2041e-11, 9.7246e-11, 2.2850e-11, ..., 9.0085e-11, 1.4751e-11,\n 2.9004e-11],\n ...,\n [7.7818e-11, 1.5524e-10, 7.3156e-11, ..., 2.6444e-11, 3.6521e-11,\n 2.1904e-11],\n [3.2666e-11, 1.2965e-10, 3.8474e-11, ..., 1.7634e-11, 1.3744e-11,\n 1.1464e-11],\n [7.8870e-11, 7.5544e-11, 3.7541e-11, ..., 4.5284e-11, 2.0979e-11,\n 1.7749e-11]], device='cuda:0')" + }, + "19": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-4.2849e-06, -2.5117e-07, 1.9276e-05, ..., 2.3766e-05,\n -1.9594e-05, 4.1119e-06], device='cuda:0')", + "exp_avg_sq": "tensor([1.5245e-08, 3.7873e-09, 3.6870e-08, ..., 3.3507e-08, 2.3667e-08,\n 2.9228e-08], device='cuda:0')" + }, + "20": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-1.4642e-07, 4.3724e-08, -1.5410e-07, ..., -5.8224e-07,\n -1.3080e-07, -4.3208e-08],\n [-2.4994e-08, 1.8258e-07, 3.8244e-09, ..., 1.8056e-08,\n 2.6616e-07, -1.7045e-08],\n [ 3.0195e-08, 5.4458e-08, -2.6717e-07, ..., -6.9361e-07,\n 4.6901e-07, 1.5106e-07],\n ...,\n [-6.6393e-08, 5.2380e-07, 8.6883e-08, ..., 4.9353e-07,\n -3.9082e-09, -2.5480e-08],\n [-2.2284e-07, 2.8922e-08, -2.1180e-08, ..., 4.5109e-07,\n 1.6604e-07, -9.7914e-08],\n [-1.3904e-07, 9.2332e-08, -2.6331e-07, ..., -2.5100e-07,\n 1.1235e-07, -4.3173e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.6521e-13, 2.9916e-14, 1.5129e-12, ..., 1.7894e-12, 9.3118e-13,\n 6.4059e-13],\n [2.8565e-13, 6.3768e-14, 1.3042e-12, ..., 7.1364e-13, 1.4330e-12,\n 8.4103e-13],\n [4.3544e-13, 3.7084e-14, 8.2672e-13, ..., 1.7505e-12, 2.2185e-12,\n 1.0572e-12],\n ...,\n [4.1118e-13, 6.9112e-14, 1.5761e-12, ..., 1.3427e-12, 2.2140e-12,\n 1.3126e-12],\n [3.6952e-13, 3.8899e-14, 1.4065e-12, ..., 1.6706e-12, 1.7164e-12,\n 8.6243e-13],\n [3.5337e-13, 5.7159e-14, 1.1381e-12, ..., 8.4393e-13, 2.4612e-12,\n 1.9295e-12]], device='cuda:0')" + }, + "21": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 7.9927e-07, -3.1271e-07, 6.6902e-08, ..., 4.2622e-07,\n 1.5058e-07, -1.4401e-07],\n [ 3.1766e-06, 2.9521e-07, -1.4921e-06, ..., 2.0341e-06,\n 7.9944e-07, 1.5961e-06],\n [ 1.4089e-06, -1.9647e-06, 2.8902e-07, ..., -6.2738e-07,\n 3.3717e-06, 4.6923e-07],\n ...,\n [-1.3177e-07, 1.6644e-07, -4.6603e-07, ..., 3.5365e-07,\n -8.5843e-07, -5.0986e-08],\n [-9.5869e-07, -2.4651e-06, -1.3824e-06, ..., -1.3974e-06,\n 9.0065e-08, -1.3863e-06],\n [-5.3077e-06, 5.8930e-06, 2.7554e-06, ..., -2.2841e-06,\n 7.4017e-09, -1.5049e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.9317e-11, 3.1694e-11, 5.1782e-12, ..., 4.0290e-12, 6.7619e-12,\n 5.6289e-12],\n [1.0438e-10, 9.0662e-11, 1.4971e-11, ..., 6.2055e-11, 1.6560e-11,\n 2.2861e-11],\n [5.2553e-11, 7.6646e-11, 1.1768e-11, ..., 1.1557e-11, 5.2870e-11,\n 1.7898e-11],\n ...,\n [6.0097e-12, 8.3066e-12, 1.0196e-11, ..., 4.6650e-11, 1.8009e-11,\n 6.2494e-12],\n [1.1417e-10, 2.2012e-10, 8.6473e-11, ..., 1.1092e-10, 4.9132e-11,\n 6.9395e-11],\n [2.7411e-11, 4.0032e-11, 1.2618e-11, ..., 3.2784e-11, 1.3546e-11,\n 1.3862e-11]], device='cuda:0')" + }, + "22": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 1.0493e-06, 6.3731e-05, 5.2313e-05, ..., 8.8996e-06,\n -3.1595e-05, -7.2035e-05], device='cuda:0')", + "exp_avg_sq": "tensor([7.8132e-09, 3.5387e-08, 2.4785e-08, ..., 1.5803e-08, 6.4886e-08,\n 1.8555e-08], device='cuda:0')" + }, + "23": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.0049e-09, -1.0278e-08, 1.1379e-07, ..., -1.4796e-09,\n -1.1904e-07, -1.0014e-07],\n [-3.4476e-08, 1.1204e-08, 2.0296e-07, ..., 5.3648e-08,\n -4.5852e-07, 7.8854e-08],\n [-1.7917e-08, 2.4038e-07, 1.6473e-07, ..., 8.5640e-08,\n 2.8058e-07, -9.4299e-08],\n ...,\n [-2.2346e-08, 4.4137e-08, -2.5131e-07, ..., 1.3667e-07,\n 2.2614e-07, 1.0055e-07],\n [-1.1376e-08, 1.9600e-08, -2.1563e-07, ..., -3.3568e-09,\n 9.7481e-08, -3.4236e-08],\n [-1.1712e-08, 1.1509e-08, 5.0106e-08, ..., 1.2665e-08,\n 3.4757e-07, 5.7376e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.5336e-14, 8.3878e-14, 1.9274e-13, ..., 2.1016e-13, 1.9758e-13,\n 9.1746e-14],\n [1.8286e-13, 1.7863e-13, 7.0636e-13, ..., 4.8010e-13, 4.8052e-13,\n 1.5918e-13],\n [1.0207e-13, 2.0504e-13, 7.1212e-13, ..., 3.6530e-13, 7.3564e-13,\n 1.9218e-13],\n ...,\n [1.0802e-13, 3.0890e-13, 6.5461e-13, ..., 4.0089e-13, 5.1061e-13,\n 2.2619e-13],\n [1.3861e-13, 4.0219e-13, 8.0543e-13, ..., 3.7478e-13, 5.7123e-13,\n 1.0225e-13],\n [1.8078e-13, 2.4768e-13, 4.6570e-13, ..., 8.4315e-13, 9.8324e-13,\n 1.0680e-13]], device='cuda:0')" + }, + "24": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-4.0942e-07, 5.4089e-07, -3.3553e-07, ..., -6.2269e-08,\n -2.0688e-07, 7.5729e-07],\n [-2.7199e-07, -2.6543e-07, -2.7639e-07, ..., 1.0583e-07,\n -8.4993e-08, 4.3500e-07],\n [ 4.4631e-07, -1.4815e-06, -7.4557e-07, ..., -9.8311e-07,\n -2.6243e-07, -3.2021e-07],\n ...,\n [ 2.5987e-07, 6.4424e-07, -1.4358e-07, ..., 3.3764e-07,\n 4.9710e-07, -1.6990e-07],\n [ 5.9077e-07, 3.4228e-06, -1.0141e-06, ..., -3.8670e-07,\n 6.6641e-08, 2.0436e-07],\n [-6.4318e-07, -6.0448e-07, 6.0120e-07, ..., 4.6550e-07,\n 4.3785e-07, 5.5036e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.9232e-11, 3.2446e-11, 7.1241e-12, ..., 1.7216e-11, 1.0341e-11,\n 5.0206e-12],\n [8.3343e-12, 1.1608e-11, 6.3833e-12, ..., 4.6880e-11, 7.9017e-12,\n 1.2217e-11],\n [1.0339e-11, 3.2003e-11, 1.2892e-11, ..., 3.2361e-11, 6.9589e-12,\n 7.7384e-12],\n ...,\n [7.1433e-11, 3.9378e-11, 6.3875e-12, ..., 1.7218e-11, 1.4001e-11,\n 7.7377e-12],\n [8.1940e-12, 2.5584e-11, 2.9985e-11, ..., 1.4065e-11, 4.5470e-12,\n 6.5105e-12],\n [1.0903e-11, 2.4256e-11, 2.8303e-11, ..., 2.1945e-11, 3.3237e-12,\n 5.8349e-12]], device='cuda:0')" + }, + "25": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 2.8432e-06, 4.0824e-06, -1.2051e-05, ..., 3.0090e-06,\n 4.1230e-06, -4.4278e-06], device='cuda:0')", + "exp_avg_sq": "tensor([1.0943e-08, 1.5794e-08, 1.0522e-08, ..., 1.4796e-08, 9.1233e-09,\n 7.9258e-09], device='cuda:0')" + }, + "26": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 3.8110e-08, -2.7387e-09, -3.2622e-08, ..., 2.5165e-10,\n -8.5326e-08, 2.4248e-07],\n [-3.0149e-08, -9.3179e-08, 2.2204e-08, ..., -8.9025e-08,\n 7.3475e-09, -1.8457e-07],\n [-1.9623e-08, -1.0703e-07, 1.5977e-08, ..., 5.7561e-08,\n -1.5707e-07, 6.3199e-07],\n ...,\n [-5.2186e-08, 4.5815e-08, -2.2803e-08, ..., -1.5981e-09,\n 2.1373e-07, -3.6760e-07],\n [ 1.9395e-08, -3.1603e-08, -1.0378e-07, ..., 1.0586e-07,\n -8.0169e-09, 1.2529e-07],\n [-1.0775e-08, 8.4521e-08, 3.3475e-08, ..., -1.0481e-07,\n 9.6896e-08, -1.1017e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0426e-13, 1.1629e-13, 8.5969e-14, ..., 1.1483e-13, 1.6204e-13,\n 1.1503e-13],\n [1.3511e-13, 1.6002e-13, 1.4216e-13, ..., 1.7825e-13, 2.8486e-13,\n 9.8909e-14],\n [2.3196e-13, 1.5269e-13, 1.8542e-13, ..., 3.3629e-13, 3.5857e-13,\n 2.4370e-13],\n ...,\n [1.2407e-13, 1.2665e-13, 1.2749e-13, ..., 1.9128e-13, 2.3206e-13,\n 1.8332e-13],\n [1.2539e-13, 2.0713e-13, 1.2405e-13, ..., 5.1324e-13, 1.5339e-13,\n 1.5217e-13],\n [1.4349e-13, 1.3651e-13, 1.5384e-13, ..., 3.0079e-13, 3.3102e-13,\n 1.2927e-13]], device='cuda:0')" + }, + "27": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.9540e-19], device='cuda:0')" + }, + "28": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.3510e-21, 6.0582e-21, 2.1670e-21], device='cuda:0')" + }, + "29": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.0092e-20, 9.2756e-21, 1.9298e-20, 8.1023e-21, 3.6805e-21, 9.1337e-21,\n 5.6282e-20, 8.3744e-20, 3.8781e-21], device='cuda:0')" + }, + "31": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3550e-25, 2.7123e-26, 4.9772e-26, ..., 3.2959e-26, 7.8219e-27,\n 1.0187e-25],\n [6.5425e-27, 8.1995e-26, 3.0305e-27, ..., 6.8347e-28, 9.3081e-28,\n 4.3395e-27],\n [4.7264e-25, 6.1954e-26, 7.1053e-25, ..., 4.8560e-26, 6.6648e-26,\n 6.0558e-26],\n ...,\n [8.5270e-27, 6.4462e-27, 2.3466e-27, ..., 4.3727e-27, 7.8661e-28,\n 2.9837e-27],\n [4.6166e-27, 7.1099e-27, 1.4194e-26, ..., 5.4040e-27, 3.7517e-27,\n 5.0485e-27],\n [3.3710e-25, 5.7745e-26, 1.4038e-25, ..., 1.0651e-25, 2.9722e-26,\n 1.0676e-25]], device='cuda:0')" + }, + "32": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.1321e-22, 7.0717e-24, 6.0733e-22, 8.1120e-23, 1.1161e-23, 2.9474e-22,\n 3.6667e-24, 1.3982e-21, 5.7392e-24, 5.8271e-22, 5.7800e-22, 6.2529e-23,\n 3.7141e-22, 8.5081e-24, 3.8795e-23, 7.1340e-22, 2.1560e-23, 1.1653e-22,\n 7.9592e-22, 2.6448e-22, 7.7865e-22, 1.2131e-23, 1.0028e-22, 2.1958e-22,\n 7.8173e-24, 1.3321e-21, 5.1250e-24, 2.3640e-22, 6.3940e-22, 1.5467e-23,\n 9.2121e-23, 2.6277e-24, 4.2201e-22, 1.2866e-22, 9.8124e-23, 2.8694e-22,\n 9.5312e-24, 5.1519e-23, 3.0128e-23, 2.0466e-22, 1.5048e-23, 1.0918e-22,\n 1.4010e-22, 4.3257e-22, 1.5950e-22, 3.5045e-23, 2.1671e-22, 3.6422e-24,\n 1.2192e-22, 1.1320e-21, 2.3842e-22, 1.5527e-22, 1.2318e-22, 3.8099e-24,\n 3.6177e-22, 5.3972e-24, 5.4654e-24, 9.8762e-22, 1.9675e-21, 8.1626e-24,\n 3.9236e-23, 9.3920e-23, 6.3304e-22, 2.0370e-23, 9.7299e-24, 7.6918e-24,\n 6.8896e-24, 2.1432e-22, 6.3750e-23, 1.1923e-22, 1.2927e-23, 7.6039e-24,\n 2.3986e-21, 2.2303e-22, 7.1803e-24, 1.9982e-22, 9.4727e-23, 2.8485e-23,\n 9.7213e-24, 2.2851e-23, 1.5653e-22, 2.6221e-23, 1.4439e-22, 3.0706e-23,\n 1.8868e-23, 1.5694e-23, 4.0604e-23, 6.7148e-23, 2.3264e-22, 5.4127e-22,\n 1.6047e-22, 6.4352e-22, 1.2885e-23, 9.7704e-22, 1.4437e-24, 1.2251e-22,\n 9.2356e-22, 9.7770e-24, 1.4561e-22, 2.1444e-22, 5.7615e-23, 2.5248e-22,\n 1.0262e-22, 1.9455e-22, 4.7335e-23, 3.8305e-22, 1.8903e-23, 6.7814e-23,\n 2.0573e-22, 2.0311e-23, 2.6828e-22, 3.2617e-22, 1.1610e-22, 3.5262e-22,\n 1.6512e-21, 9.1947e-24, 1.5532e-23, 7.8795e-23, 4.9460e-23, 1.4267e-23,\n 3.0782e-22, 2.3572e-22, 1.0603e-22, 1.7975e-22, 3.8431e-22, 2.5384e-23,\n 1.4937e-22, 1.0012e-22, 4.7679e-22, 7.8778e-23, 1.8597e-23, 4.1214e-22,\n 3.9339e-24, 1.8360e-23, 6.7456e-23, 5.4983e-22, 7.5496e-23, 1.4229e-21,\n 5.6182e-23, 3.0024e-23, 3.2847e-23, 1.5590e-22, 5.7752e-23, 7.4890e-24,\n 2.4997e-23, 1.1411e-21, 2.6184e-23, 5.4874e-22, 3.4536e-22, 5.5419e-23,\n 1.8624e-21, 9.7132e-24, 2.6412e-22, 3.1127e-22, 6.6255e-23, 6.6427e-24,\n 1.1239e-24, 6.6440e-22, 2.6025e-21, 2.0261e-23, 5.5347e-22, 1.7491e-22,\n 6.4921e-23, 1.1238e-22, 3.2497e-22, 7.3487e-22, 2.2913e-21, 5.4206e-22,\n 1.6439e-21, 5.9502e-22, 8.0200e-22, 1.7648e-22, 3.3776e-22, 1.8482e-22,\n 8.9631e-23, 3.6223e-22, 1.0153e-22, 1.3216e-21, 2.0586e-22, 3.0137e-22,\n 3.0202e-22, 4.4504e-23, 4.6488e-23, 7.6819e-22, 3.6348e-22, 3.1518e-22,\n 5.0854e-22, 6.6537e-24, 1.6924e-23, 4.1229e-22, 1.5464e-22, 3.2215e-22,\n 1.2848e-21, 6.6101e-23, 2.8459e-23, 2.1849e-24, 1.9183e-23, 1.2825e-23,\n 7.1130e-22, 1.6981e-21, 2.7352e-22, 2.9511e-24, 5.9361e-24, 3.4130e-24,\n 8.1625e-24, 1.1809e-23, 2.5536e-22, 6.2064e-24, 3.7242e-22, 6.8236e-22,\n 1.4957e-22, 2.2323e-21, 3.2949e-22, 9.1434e-23, 1.7370e-22, 2.8482e-22,\n 2.7945e-23, 2.6269e-23, 4.6217e-23, 5.2289e-22, 8.9305e-22, 1.1102e-23,\n 2.1063e-23, 9.2784e-25, 2.7302e-21, 1.3860e-21, 1.7475e-22, 3.8174e-23,\n 4.3499e-22, 5.5476e-23, 9.7600e-23, 7.2339e-24, 4.8759e-22, 1.4498e-23,\n 4.1561e-23, 1.9073e-23, 2.3680e-22, 1.9463e-22, 1.5375e-22, 1.5460e-23,\n 2.4703e-22, 5.3867e-22, 1.0540e-21, 1.5258e-22, 2.5478e-23, 3.9338e-22,\n 4.1356e-22, 2.7246e-22, 1.3143e-22, 2.3546e-23, 9.4264e-23, 8.4241e-23,\n 8.0338e-22, 1.2742e-23, 6.3705e-24, 6.3559e-22], device='cuda:0')" + }, + "33": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.4962e-25, 1.1639e-26, 7.8033e-25, 5.6533e-26, 1.5662e-26, 2.7491e-25,\n 8.8048e-27, 3.8850e-24, 1.9407e-27, 7.0328e-25, 1.9474e-24, 2.7961e-26,\n 8.9008e-25, 7.7900e-27, 4.5528e-27, 5.3935e-24, 5.1518e-26, 5.8844e-25,\n 1.2656e-23, 7.2778e-25, 8.5370e-25, 2.5003e-26, 2.4321e-26, 5.9881e-25,\n 3.6552e-27, 6.6530e-24, 1.2258e-26, 2.7640e-25, 6.5004e-25, 1.9502e-25,\n 6.0219e-26, 7.0694e-27, 4.6816e-25, 4.0195e-26, 1.8146e-25, 2.1376e-25,\n 1.3179e-26, 2.4692e-26, 7.3757e-27, 5.7143e-25, 4.1605e-26, 8.4248e-26,\n 4.4603e-25, 1.1075e-24, 3.7150e-25, 2.0900e-25, 1.6841e-25, 8.8596e-27,\n 2.8370e-25, 8.5529e-24, 1.0536e-25, 1.9181e-26, 1.3017e-25, 1.6108e-26,\n 2.6821e-25, 5.6108e-27, 1.1587e-26, 2.2952e-24, 1.1123e-24, 3.0004e-27,\n 4.0186e-25, 4.6822e-26, 7.2035e-25, 1.8408e-26, 8.7430e-27, 2.1106e-27,\n 1.9355e-26, 1.1205e-25, 9.2123e-27, 1.8253e-25, 1.4904e-26, 1.1521e-26,\n 3.4857e-24, 6.3531e-25, 1.1325e-26, 2.3699e-25, 1.3181e-25, 2.0270e-26,\n 1.9417e-26, 1.7723e-25, 2.3906e-24, 4.3709e-27, 3.5630e-25, 1.2607e-26,\n 1.1871e-26, 1.3263e-26, 4.2460e-26, 1.1670e-25, 1.0489e-24, 8.2845e-25,\n 1.8197e-25, 4.2560e-24, 2.0643e-26, 1.5853e-24, 8.5459e-27, 5.0555e-25,\n 2.7089e-24, 1.4433e-26, 1.4816e-25, 1.1533e-24, 6.7130e-26, 2.9813e-25,\n 3.0510e-25, 7.9716e-25, 5.7764e-27, 1.8374e-25, 6.8177e-26, 7.0032e-26,\n 4.0416e-26, 9.6456e-26, 1.0487e-24, 1.5659e-24, 2.6424e-25, 5.4100e-24,\n 5.2326e-24, 3.0983e-27, 3.8172e-26, 4.7573e-26, 2.6042e-25, 4.1484e-27,\n 4.1905e-25, 1.0522e-25, 2.2758e-25, 5.9941e-26, 8.5741e-25, 3.2577e-25,\n 2.2195e-25, 7.0024e-26, 1.4509e-24, 7.3882e-27, 6.8391e-27, 5.5528e-25,\n 3.4497e-26, 5.4821e-26, 1.4220e-26, 7.4410e-25, 1.6789e-25, 4.6920e-24,\n 5.7379e-26, 9.6750e-27, 2.3233e-26, 1.0988e-25, 2.3292e-26, 5.5032e-27,\n 6.6666e-27, 1.0168e-24, 1.1395e-26, 1.2199e-24, 1.5109e-25, 2.8658e-26,\n 2.0907e-24, 1.6442e-26, 1.5610e-25, 2.2503e-25, 2.7040e-25, 5.5483e-27,\n 2.3266e-27, 1.8852e-24, 3.5209e-23, 2.4992e-26, 6.5861e-24, 3.0718e-25,\n 5.1853e-26, 1.1536e-25, 7.8504e-25, 8.9160e-25, 2.7281e-24, 2.6944e-25,\n 1.7351e-24, 1.3778e-24, 2.7789e-24, 3.8805e-25, 5.9831e-25, 1.5087e-24,\n 6.0358e-26, 1.2729e-25, 1.4011e-25, 1.1777e-24, 1.3313e-25, 3.0484e-24,\n 2.3349e-25, 6.8329e-26, 1.0038e-25, 7.2505e-25, 2.5732e-24, 1.5425e-25,\n 2.7464e-24, 6.1814e-27, 5.3440e-26, 3.9120e-25, 8.3849e-26, 1.8521e-24,\n 2.3084e-24, 2.0331e-25, 1.5118e-26, 6.1017e-27, 1.5539e-26, 2.1101e-26,\n 1.7315e-24, 2.0998e-23, 1.6069e-25, 8.5774e-27, 1.9461e-26, 4.0384e-28,\n 2.4434e-26, 1.2855e-26, 1.2293e-25, 1.0049e-26, 3.1758e-25, 1.7059e-24,\n 1.1032e-24, 3.4185e-24, 9.9990e-25, 3.0988e-26, 3.4398e-25, 2.2625e-25,\n 1.5486e-26, 2.7044e-26, 1.6483e-26, 1.8671e-24, 1.7850e-24, 2.4689e-26,\n 8.0894e-26, 1.7401e-26, 4.8234e-24, 1.8370e-24, 1.0965e-25, 1.3958e-26,\n 8.8828e-25, 6.0182e-26, 2.2868e-25, 3.2659e-27, 1.8061e-24, 3.8890e-26,\n 1.1135e-26, 7.7008e-27, 7.4949e-25, 8.8186e-26, 9.6975e-26, 2.3933e-26,\n 2.1403e-25, 7.6481e-25, 1.4905e-24, 1.1799e-25, 3.1734e-26, 8.7637e-25,\n 2.6116e-24, 5.2189e-25, 2.7946e-25, 1.5884e-26, 2.0279e-25, 1.1713e-26,\n 4.6743e-24, 2.2926e-26, 9.3198e-27, 3.7047e-24], device='cuda:0')" + }, + "34": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.1957e-25, 4.4097e-26, 9.1831e-25, 1.0090e-25, 5.0507e-26, 8.0131e-25,\n 9.9653e-26, 4.3128e-24, 7.2844e-26, 1.1270e-24, 1.1451e-24, 1.0020e-25,\n 1.6980e-24, 1.4373e-26, 5.1979e-26, 2.8391e-24, 1.7623e-25, 7.2083e-25,\n 3.0036e-24, 1.0038e-24, 2.5685e-24, 1.7734e-25, 1.2110e-25, 4.1350e-25,\n 2.2868e-27, 4.5299e-24, 5.7212e-27, 1.7765e-25, 6.8671e-25, 1.1215e-25,\n 4.3058e-25, 2.4949e-27, 6.1248e-25, 1.2895e-25, 4.9027e-25, 7.2575e-26,\n 4.7536e-26, 1.9297e-25, 1.4930e-25, 7.6308e-25, 1.0018e-25, 1.2713e-25,\n 4.0292e-25, 1.9175e-24, 5.8245e-25, 2.9749e-25, 6.4827e-25, 2.9655e-26,\n 3.3202e-25, 3.2634e-24, 2.2827e-25, 6.9524e-26, 3.0989e-25, 5.1551e-26,\n 4.2060e-25, 4.5026e-27, 1.8041e-26, 3.4224e-24, 2.5725e-24, 1.2745e-26,\n 1.2759e-25, 2.0894e-25, 2.1723e-24, 9.0726e-26, 4.3075e-26, 7.3598e-26,\n 4.1586e-27, 2.8834e-25, 7.9947e-26, 4.8809e-25, 7.6261e-26, 1.6375e-26,\n 2.8071e-24, 4.5421e-25, 1.4472e-26, 6.7759e-25, 2.9468e-25, 6.9096e-26,\n 1.4399e-25, 2.1406e-25, 1.2297e-24, 2.2946e-25, 3.3391e-25, 3.7709e-26,\n 3.7491e-27, 1.0660e-25, 3.1487e-26, 3.9325e-25, 1.2094e-24, 1.4546e-24,\n 1.9882e-25, 1.4525e-24, 6.3442e-26, 1.2073e-24, 1.2809e-27, 7.4510e-25,\n 1.0653e-24, 4.2490e-26, 1.6966e-25, 5.2091e-25, 2.0686e-25, 3.6307e-25,\n 2.8187e-25, 7.1648e-25, 4.3924e-26, 2.6665e-25, 1.4527e-25, 2.4423e-25,\n 1.2527e-25, 4.3326e-26, 8.8226e-25, 1.3877e-24, 5.2912e-25, 1.9104e-24,\n 1.8197e-24, 1.6194e-26, 6.3962e-26, 1.8494e-25, 3.6371e-25, 3.2397e-26,\n 5.9754e-25, 4.7598e-25, 3.7676e-25, 5.2746e-25, 1.3313e-24, 1.7990e-25,\n 3.8605e-25, 2.0308e-25, 6.9959e-25, 1.8079e-25, 4.1875e-26, 6.1652e-25,\n 6.3238e-26, 7.2907e-26, 2.2525e-25, 9.1030e-25, 2.3374e-25, 1.6262e-24,\n 2.6222e-25, 3.8556e-26, 8.1620e-26, 2.3220e-25, 5.2742e-26, 5.2099e-26,\n 4.7498e-26, 1.2840e-24, 8.1123e-26, 1.1170e-24, 4.3486e-25, 8.5342e-26,\n 1.9397e-24, 1.6494e-26, 1.5604e-25, 1.1877e-24, 5.6160e-25, 2.1398e-26,\n 1.0707e-27, 2.6078e-24, 8.7399e-24, 2.2840e-25, 2.7797e-24, 3.1314e-25,\n 2.7972e-25, 1.3377e-25, 6.0614e-25, 2.0958e-24, 6.2451e-24, 1.7918e-24,\n 1.7472e-24, 9.1111e-25, 2.4905e-24, 7.3750e-25, 1.4946e-24, 1.1237e-24,\n 1.6933e-25, 3.4327e-25, 1.3891e-25, 1.1230e-24, 7.9331e-25, 1.8511e-24,\n 1.9548e-25, 2.1805e-25, 2.4792e-25, 7.9133e-25, 1.5556e-24, 2.5354e-25,\n 2.5203e-24, 5.7888e-26, 2.2408e-26, 1.3643e-25, 3.0658e-25, 1.5497e-24,\n 1.4437e-24, 2.1493e-25, 1.2388e-25, 1.6283e-26, 2.8368e-26, 5.0820e-26,\n 2.1307e-24, 6.2314e-24, 6.5389e-26, 4.0350e-27, 8.1032e-26, 7.3345e-27,\n 8.5986e-26, 3.1290e-26, 2.8443e-25, 5.8112e-26, 4.2116e-25, 1.8561e-24,\n 2.8558e-25, 5.6341e-24, 1.1224e-24, 1.2484e-25, 9.2175e-25, 5.4499e-25,\n 3.1201e-25, 2.3049e-25, 2.4684e-25, 2.0987e-24, 2.2852e-24, 5.1917e-26,\n 7.1361e-26, 3.9540e-27, 7.2514e-24, 4.1375e-24, 1.8474e-25, 1.3945e-25,\n 1.0565e-24, 1.2518e-25, 3.8064e-25, 1.4783e-26, 1.9884e-24, 3.0097e-26,\n 3.5706e-26, 8.1018e-26, 1.0838e-24, 5.5225e-25, 4.6791e-25, 1.0842e-25,\n 1.0949e-24, 6.5306e-25, 9.4623e-25, 3.4723e-25, 3.6206e-26, 1.4062e-24,\n 6.6043e-25, 1.1636e-24, 6.3128e-25, 4.3212e-26, 3.5215e-25, 1.2464e-25,\n 3.3960e-24, 6.3725e-27, 3.7308e-26, 2.9100e-24], device='cuda:0')" + }, + "35": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.5866e-27, 4.1014e-26, 3.0747e-27, ..., 2.8218e-26, 5.6486e-27,\n 4.7272e-27],\n [1.3843e-26, 5.7585e-26, 1.0086e-26, ..., 1.5908e-26, 1.0468e-26,\n 4.1813e-26],\n [1.9745e-25, 4.7709e-26, 3.8948e-25, ..., 4.3908e-26, 2.8855e-26,\n 3.8431e-26],\n ...,\n [5.9742e-26, 3.7578e-27, 1.0562e-26, ..., 1.6900e-25, 6.5555e-27,\n 2.1482e-26],\n [3.7130e-25, 4.1212e-26, 1.8274e-25, ..., 1.1920e-25, 1.8867e-26,\n 7.1381e-26],\n [1.6197e-26, 2.1475e-26, 2.5187e-26, ..., 6.8412e-27, 2.3473e-27,\n 7.1193e-27]], device='cuda:0')" + }, + "36": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.8907e-23, 1.8211e-23, 1.9209e-22, 3.2826e-22, 3.7329e-22, 3.1194e-22,\n 9.5793e-24, 7.0651e-22, 3.4643e-24, 2.4790e-23, 2.1454e-23, 7.0399e-23,\n 1.4809e-22, 2.7652e-23, 3.9094e-22, 2.4790e-22, 1.6781e-24, 3.5823e-23,\n 6.7776e-22, 5.1967e-22, 1.5959e-22, 4.6138e-24, 1.6663e-22, 1.3331e-22,\n 4.3401e-24, 8.6571e-22, 1.4951e-22, 4.5963e-22, 6.4898e-23, 1.8959e-23,\n 4.7508e-23, 1.1980e-23, 1.1387e-21, 5.3391e-23, 1.6859e-23, 1.1653e-22,\n 8.0654e-23, 3.6003e-23, 9.4294e-22, 3.6674e-23, 7.5667e-23, 3.3889e-22,\n 7.4602e-24, 9.4927e-22, 3.6500e-23, 4.5272e-23, 4.0138e-22, 2.3964e-24,\n 6.5521e-23, 6.9887e-22, 2.1684e-22, 1.2780e-22, 5.7817e-23, 1.7073e-22,\n 8.3273e-22, 1.8829e-23, 4.5938e-24, 3.1393e-22, 3.7522e-21, 1.3048e-22,\n 1.8833e-22, 1.4956e-22, 3.2011e-22, 4.7634e-23, 1.5927e-22, 5.0696e-24,\n 4.6497e-22, 4.6557e-22, 3.2086e-22, 1.0907e-22, 6.2333e-23, 1.9784e-22,\n 6.8998e-22, 1.2597e-22, 2.3159e-23, 2.4440e-24, 5.9716e-23, 5.2271e-23,\n 3.1306e-24, 5.4472e-23, 1.2170e-22, 8.5116e-23, 2.9017e-23, 1.2892e-23,\n 2.5523e-23, 3.6719e-22, 3.5320e-23, 1.0937e-23, 8.0580e-24, 5.2727e-22,\n 7.4082e-23, 5.6324e-22, 2.8876e-23, 2.6596e-22, 9.8149e-22, 8.1523e-23,\n 1.8091e-22, 5.8597e-24, 3.9937e-23, 2.1889e-22, 1.6220e-23, 3.3293e-23,\n 2.1329e-23, 1.2411e-23, 1.1380e-22, 5.6396e-22, 6.3112e-23, 1.1002e-23,\n 2.1523e-23, 1.3698e-22, 2.5991e-22, 8.9872e-23, 3.4972e-24, 1.8988e-24,\n 1.2197e-22, 5.7609e-23, 4.4307e-23, 1.0820e-22, 9.1365e-23, 1.2998e-22,\n 8.2422e-22, 6.6039e-22, 3.8379e-23, 1.7152e-23, 1.2836e-23, 6.8121e-22,\n 1.1173e-22, 6.2229e-24, 2.5603e-22, 9.3070e-22, 1.7971e-22, 3.6631e-22,\n 1.6808e-23, 7.0486e-23, 2.6838e-22, 3.2005e-23, 2.5254e-22, 6.4381e-22,\n 6.3152e-23, 2.3010e-24, 4.0986e-22, 1.8919e-24, 8.1982e-23, 1.2850e-22,\n 1.0325e-23, 4.6817e-22, 4.7969e-22, 1.9861e-23, 2.2773e-22, 3.5662e-22,\n 1.3347e-21, 1.8856e-23, 3.1220e-22, 2.9042e-22, 6.3878e-22, 1.0727e-22,\n 5.0147e-23, 8.0509e-23, 1.3624e-22, 1.9455e-24, 6.3075e-22, 3.9159e-23,\n 5.6290e-24, 8.1032e-23, 4.5947e-24, 3.3908e-22, 1.8817e-22, 2.4224e-22,\n 3.4615e-22, 8.4269e-25, 8.9344e-22, 7.8069e-24, 1.6587e-22, 2.3368e-22,\n 3.0871e-22, 1.6239e-22, 6.9056e-23, 3.0595e-24, 1.1776e-21, 1.2087e-24,\n 5.5670e-23, 8.3218e-23, 1.4088e-22, 1.8120e-22, 6.0363e-23, 2.1273e-23,\n 1.9688e-22, 7.9993e-23, 9.1793e-23, 5.2000e-22, 7.9333e-23, 2.7412e-22,\n 6.8353e-22, 3.2053e-22, 2.0900e-22, 2.7922e-22, 1.2210e-24, 4.1447e-22,\n 1.6131e-22, 4.7156e-22, 3.6509e-23, 2.9114e-23, 2.3465e-23, 2.2569e-23,\n 2.0424e-22, 2.9581e-22, 4.0124e-23, 4.3681e-23, 6.9779e-24, 2.2723e-23,\n 5.2265e-24, 3.2070e-23, 3.3975e-22, 1.4357e-22, 1.7331e-22, 1.3611e-22,\n 2.3393e-22, 1.2588e-22, 4.3059e-23, 2.9532e-24, 3.9039e-22, 1.7827e-23,\n 1.9008e-23, 1.6292e-23, 1.1023e-24, 3.4774e-22, 6.8937e-24, 5.7429e-22,\n 1.8275e-22, 1.0338e-22, 1.3417e-22, 3.8525e-23, 3.6243e-22, 2.2836e-23,\n 6.5811e-24, 1.1535e-23, 2.9760e-22, 9.0103e-23, 1.1778e-22, 9.1409e-23,\n 2.0541e-22, 2.1388e-22, 4.3713e-22, 1.8815e-22, 1.3118e-22, 8.2654e-22,\n 5.5810e-23, 1.1108e-22, 4.8508e-24, 8.6141e-25, 7.8429e-23, 1.6623e-22,\n 1.9422e-22, 1.1494e-22, 4.3335e-22, 2.4611e-23], device='cuda:0')" + }, + "37": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.2420e-27, 3.1317e-26, 2.3868e-25, 5.7973e-25, 1.3988e-24, 6.5719e-25,\n 2.4922e-26, 6.1167e-25, 3.1726e-26, 1.2443e-26, 1.7697e-26, 2.1206e-26,\n 5.4259e-25, 3.0346e-26, 5.5062e-25, 7.1560e-25, 1.0719e-26, 2.4861e-25,\n 4.6019e-24, 3.0704e-24, 1.5994e-25, 1.6623e-26, 4.2488e-25, 7.9165e-25,\n 2.0866e-27, 2.5932e-24, 1.5701e-25, 1.4520e-24, 2.6720e-26, 1.1142e-26,\n 9.2319e-26, 4.7881e-27, 2.7485e-24, 2.8099e-25, 1.0423e-25, 3.1648e-26,\n 3.8353e-26, 8.5054e-27, 1.1498e-23, 1.3846e-25, 6.9820e-26, 2.1656e-24,\n 9.4126e-27, 2.8936e-24, 1.7497e-26, 5.8858e-26, 5.2005e-25, 6.2857e-27,\n 6.3401e-26, 3.8442e-24, 1.2313e-25, 2.5842e-27, 1.6888e-25, 3.9419e-25,\n 3.0268e-24, 3.9580e-26, 3.6708e-26, 1.8474e-25, 1.1002e-23, 2.0034e-25,\n 7.6239e-25, 9.1203e-26, 6.1989e-25, 1.0638e-25, 1.2844e-25, 6.2283e-27,\n 3.5819e-24, 7.3287e-24, 5.9363e-25, 7.5069e-25, 3.0185e-26, 1.7392e-25,\n 2.3724e-24, 1.4445e-25, 1.9292e-26, 9.7937e-27, 5.2902e-26, 4.8145e-26,\n 9.3318e-27, 8.1865e-25, 3.9947e-25, 1.5694e-25, 1.0943e-26, 4.9436e-27,\n 9.5753e-27, 1.9133e-24, 1.6971e-26, 9.1311e-27, 3.2597e-26, 2.0550e-24,\n 5.5240e-25, 4.3252e-24, 1.1101e-26, 3.4354e-24, 1.6962e-24, 3.4984e-26,\n 1.0420e-25, 9.9645e-27, 1.3924e-25, 7.8171e-25, 1.1530e-25, 2.0988e-26,\n 2.0825e-25, 2.9871e-27, 1.2536e-25, 7.6555e-25, 2.0764e-25, 3.5064e-26,\n 2.9348e-26, 7.2682e-26, 8.6199e-25, 5.0500e-25, 2.5523e-27, 9.1026e-27,\n 1.5359e-25, 1.3926e-25, 5.8603e-26, 1.2402e-25, 3.3751e-26, 1.5921e-25,\n 1.5260e-24, 1.9262e-24, 3.4427e-26, 8.8415e-26, 1.3033e-26, 3.0112e-24,\n 1.5484e-25, 6.2825e-26, 1.4799e-24, 3.6129e-24, 3.8766e-25, 8.8268e-25,\n 1.6250e-26, 1.4266e-24, 8.5146e-25, 1.4187e-26, 1.2848e-24, 1.8123e-24,\n 2.2420e-25, 1.3678e-26, 6.5161e-25, 8.8623e-27, 4.4044e-26, 1.9101e-25,\n 1.5849e-26, 6.4060e-25, 1.7945e-24, 8.9927e-26, 2.6320e-24, 1.2298e-24,\n 1.7305e-24, 1.8887e-26, 7.6712e-25, 3.9571e-25, 5.9160e-24, 3.0701e-26,\n 9.4719e-26, 6.7866e-26, 1.2847e-25, 7.6060e-27, 1.7611e-24, 6.7303e-27,\n 1.4687e-26, 2.8185e-24, 1.3610e-26, 4.7035e-25, 3.7931e-26, 2.0077e-25,\n 4.9041e-25, 7.3000e-27, 2.2727e-24, 7.4733e-26, 3.7389e-25, 1.1609e-24,\n 4.0685e-25, 5.8652e-26, 7.9031e-26, 5.0247e-27, 6.0067e-24, 1.6068e-26,\n 1.4421e-25, 9.6779e-25, 2.5907e-25, 3.8064e-25, 3.2042e-25, 2.5995e-27,\n 1.5727e-24, 2.3756e-25, 2.2916e-25, 5.6131e-25, 3.9788e-26, 1.1434e-24,\n 1.0456e-24, 6.4503e-25, 2.9255e-25, 2.1470e-24, 2.6992e-26, 5.6510e-25,\n 1.4341e-25, 9.5902e-25, 3.0751e-26, 1.3486e-25, 5.8580e-27, 2.5108e-25,\n 1.7397e-25, 3.5099e-25, 9.9455e-27, 1.1178e-25, 4.1305e-27, 5.6094e-27,\n 1.0372e-27, 1.1717e-25, 3.3951e-25, 7.1834e-26, 4.6872e-25, 2.8511e-25,\n 2.7874e-25, 5.8315e-25, 1.9847e-26, 1.0946e-26, 1.7523e-25, 2.7881e-26,\n 1.8308e-26, 1.0475e-26, 8.3909e-26, 2.8526e-25, 3.5234e-27, 1.7222e-24,\n 3.6635e-25, 1.4395e-25, 4.6803e-25, 3.2447e-26, 2.1823e-24, 1.0036e-25,\n 1.6861e-26, 8.9201e-27, 2.3105e-24, 3.5359e-26, 2.6178e-25, 1.4098e-24,\n 5.4021e-25, 6.5281e-25, 1.2905e-24, 5.7223e-25, 1.4344e-25, 3.6694e-24,\n 1.1252e-25, 8.6329e-26, 2.2013e-27, 3.1471e-27, 5.1789e-26, 2.0745e-25,\n 4.9315e-25, 7.7730e-26, 1.0396e-24, 6.6406e-26], device='cuda:0')" + }, + "38": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.1775e-26, 5.2279e-26, 5.0633e-25, 4.1720e-25, 1.4097e-24, 1.0295e-24,\n 5.8377e-26, 1.9858e-24, 1.0238e-26, 9.0556e-26, 4.1409e-26, 1.5235e-25,\n 7.2184e-25, 2.1471e-25, 6.8199e-25, 1.0579e-24, 1.7701e-27, 2.4433e-25,\n 2.6507e-24, 1.6610e-24, 5.8238e-25, 7.2787e-26, 3.9426e-25, 6.2727e-25,\n 7.4329e-27, 3.0428e-24, 7.1566e-25, 1.2572e-24, 7.9569e-26, 3.2537e-26,\n 1.7815e-25, 9.6068e-28, 1.5300e-24, 4.2204e-25, 3.8791e-26, 6.6487e-26,\n 1.5061e-25, 1.4378e-25, 3.9690e-24, 1.0426e-25, 2.2740e-25, 8.8996e-25,\n 1.5438e-26, 3.2845e-24, 1.5532e-25, 2.3902e-25, 1.0843e-24, 3.3670e-27,\n 1.3836e-25, 2.2309e-24, 3.1599e-25, 1.4931e-25, 2.3035e-25, 1.3716e-24,\n 1.6540e-24, 1.9013e-25, 1.2851e-26, 1.0997e-24, 5.5730e-24, 5.2164e-25,\n 2.8375e-25, 2.9268e-25, 1.2276e-24, 4.0179e-25, 1.8680e-25, 5.7957e-27,\n 1.5415e-24, 3.3193e-24, 6.1775e-25, 5.1830e-25, 1.7315e-25, 3.7552e-25,\n 1.1744e-24, 2.4902e-25, 1.1556e-25, 3.2117e-26, 1.7969e-25, 8.9296e-26,\n 9.8358e-27, 4.3367e-25, 2.7607e-25, 2.8123e-25, 4.3192e-26, 2.3519e-26,\n 2.1099e-26, 1.5420e-24, 6.3987e-26, 4.5154e-26, 1.4471e-25, 1.4944e-24,\n 5.0439e-25, 1.7431e-24, 7.1714e-26, 1.6894e-24, 1.3020e-24, 3.4076e-25,\n 3.2903e-25, 5.3305e-26, 1.0505e-25, 7.7411e-25, 1.2941e-25, 5.0069e-26,\n 3.1762e-25, 5.8076e-26, 3.2779e-25, 6.4534e-25, 4.1294e-25, 2.1314e-25,\n 6.3190e-26, 1.3712e-25, 8.8158e-25, 3.9775e-25, 1.5845e-26, 3.0946e-26,\n 4.5718e-25, 3.2642e-25, 1.6828e-25, 4.1218e-25, 4.2990e-25, 3.8035e-25,\n 1.0409e-24, 1.7520e-24, 1.3080e-25, 4.0941e-26, 2.1859e-26, 2.1988e-24,\n 2.8730e-25, 2.9185e-26, 1.3873e-24, 7.9118e-25, 1.2249e-24, 7.6273e-25,\n 2.4188e-25, 6.1368e-25, 1.0972e-24, 9.7647e-26, 1.3251e-24, 1.1845e-24,\n 4.0507e-25, 3.0323e-27, 1.4182e-24, 3.6605e-27, 1.4274e-25, 4.2399e-25,\n 1.6863e-26, 7.1386e-25, 8.7385e-25, 2.7487e-25, 8.4315e-25, 1.1304e-24,\n 1.6165e-24, 2.2648e-26, 3.2380e-25, 9.8141e-25, 2.6982e-24, 1.0476e-25,\n 5.3511e-26, 3.5558e-25, 4.6841e-25, 3.0071e-27, 1.9199e-24, 6.3114e-26,\n 2.0053e-26, 1.1250e-24, 1.2562e-26, 1.1973e-24, 5.1592e-25, 7.9273e-25,\n 5.0756e-25, 1.0362e-27, 2.6191e-24, 1.2239e-25, 6.3672e-25, 1.1759e-24,\n 7.0397e-25, 1.7384e-25, 2.0605e-25, 6.8433e-27, 3.8613e-24, 4.9689e-27,\n 9.3322e-26, 6.2407e-25, 5.8076e-25, 3.5105e-25, 4.2023e-25, 1.7285e-26,\n 1.0154e-24, 5.1507e-25, 1.2924e-25, 3.7547e-25, 1.3536e-25, 7.2084e-25,\n 6.6541e-25, 6.2586e-25, 8.0977e-25, 8.0392e-25, 7.8352e-27, 9.6997e-25,\n 5.4393e-25, 1.5898e-24, 3.9739e-26, 9.6463e-26, 1.2066e-25, 1.4441e-25,\n 4.9618e-25, 8.5262e-25, 6.1774e-26, 3.3530e-25, 1.5597e-26, 1.6149e-25,\n 8.3441e-27, 4.2249e-25, 1.1595e-24, 2.4345e-25, 8.9698e-25, 3.4536e-25,\n 7.5715e-25, 1.2337e-24, 1.8050e-25, 1.8067e-26, 9.0238e-25, 1.7282e-25,\n 3.7838e-25, 8.2874e-26, 8.7614e-27, 1.1417e-24, 5.7362e-26, 1.2038e-24,\n 5.4533e-25, 4.6648e-25, 4.0969e-25, 9.2313e-26, 1.8750e-24, 1.0507e-25,\n 1.0194e-25, 3.0507e-27, 1.3420e-24, 2.4079e-25, 5.2541e-25, 4.8630e-25,\n 9.2110e-25, 4.7921e-25, 8.1342e-25, 6.6748e-25, 3.1417e-25, 2.8672e-24,\n 3.0350e-25, 4.1038e-25, 6.9602e-28, 1.0682e-26, 1.9257e-25, 2.7672e-25,\n 8.4742e-25, 1.6217e-25, 1.1709e-24, 1.4395e-25], device='cuda:0')" + }, + "39": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0635e-26, 3.0107e-26, 5.5242e-26, ..., 1.8777e-26, 4.4273e-27,\n 2.5737e-26],\n [1.2779e-25, 1.6687e-25, 3.2335e-26, ..., 7.5369e-26, 1.7649e-26,\n 1.0473e-25],\n [4.0099e-25, 1.3609e-25, 2.8334e-25, ..., 6.0827e-26, 3.8370e-26,\n 6.9588e-26],\n ...,\n [9.7832e-25, 7.7691e-26, 1.4017e-25, ..., 1.4036e-25, 5.1311e-26,\n 1.5242e-25],\n [3.5991e-26, 7.6640e-26, 1.4404e-25, ..., 1.8466e-26, 5.5961e-27,\n 1.2818e-26],\n [1.7685e-26, 8.3649e-27, 5.5171e-26, ..., 9.0767e-27, 1.1167e-26,\n 4.9507e-27]], device='cuda:0')" + }, + "40": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.2360e-23, 1.1639e-22, 2.1029e-22, 2.4908e-22, 1.4418e-22, 2.2317e-22,\n 2.9135e-23, 3.4101e-23, 9.6848e-22, 5.4938e-23, 1.4930e-22, 2.7768e-22,\n 1.1128e-24, 6.3295e-22, 1.3065e-23, 1.2272e-22, 1.9044e-22, 3.9018e-23,\n 1.8729e-21, 4.6865e-22, 1.6680e-21, 4.1180e-23, 4.1614e-23, 2.6964e-22,\n 5.3396e-23, 1.7707e-22, 7.4298e-22, 6.3693e-22, 3.6102e-23, 2.6935e-22,\n 1.6625e-22, 2.6403e-22, 3.2987e-21, 2.5855e-23, 7.7513e-22, 2.0229e-22,\n 2.1785e-22, 6.5484e-23, 2.8815e-21, 1.7974e-22, 7.1830e-24, 4.0100e-22,\n 3.5884e-24, 8.6423e-23, 3.6250e-22, 1.1824e-23, 4.0107e-22, 2.1537e-22,\n 2.3667e-23, 6.5965e-22, 1.9338e-23, 8.7952e-22, 3.1372e-22, 2.4925e-24,\n 1.3599e-22, 2.9931e-22, 2.8584e-22, 5.6163e-22, 3.5465e-21, 2.4879e-22,\n 6.6386e-22, 2.1940e-22, 9.2369e-22, 9.6240e-23, 5.5540e-23, 2.0632e-23,\n 5.5413e-22, 2.6103e-23, 4.3202e-24, 7.3658e-22, 9.8165e-24, 3.1561e-23,\n 2.5945e-21, 3.3753e-22, 8.7175e-23, 7.3927e-23, 2.6275e-22, 2.9561e-22,\n 4.0340e-22, 2.1112e-23, 2.4510e-22, 3.4494e-23, 2.1173e-22, 4.2234e-22,\n 1.0889e-22, 1.0588e-22, 1.9011e-22, 1.5300e-22, 1.2098e-22, 3.0596e-22,\n 8.0192e-23, 5.0197e-23, 1.2147e-23, 7.8910e-22, 2.5657e-22, 2.2293e-22,\n 1.8132e-23, 6.1328e-23, 5.2844e-24, 9.1639e-24, 2.1977e-23, 6.1720e-24,\n 1.4121e-23, 1.7689e-22, 2.9456e-22, 3.6693e-22, 4.1076e-22, 1.7620e-23,\n 1.3663e-22, 1.3187e-22, 5.6067e-22, 2.6869e-22, 2.1570e-22, 3.9195e-22,\n 9.8444e-22, 1.3933e-22, 3.7103e-22, 2.9350e-22, 2.1440e-22, 1.0971e-23,\n 6.9079e-22, 1.9005e-22, 2.7210e-22, 1.0062e-23, 2.9776e-23, 3.2213e-22,\n 1.6193e-22, 8.7499e-23, 6.9389e-22, 1.2976e-21, 1.3299e-22, 3.6702e-22,\n 1.7693e-22, 7.5647e-22, 5.5211e-24, 5.2795e-23, 6.6930e-23, 9.6563e-22,\n 5.9466e-23, 8.2721e-24, 2.0590e-21, 1.5202e-22, 3.1461e-23, 5.4355e-23,\n 2.2866e-23, 9.2521e-22, 5.8915e-22, 5.6954e-22, 4.6554e-22, 1.4945e-22,\n 2.4006e-22, 2.1038e-22, 1.4530e-23, 6.9683e-22, 3.8402e-22, 2.0118e-22,\n 1.5217e-24, 6.2210e-24, 2.3725e-21, 1.1911e-23, 7.3077e-22, 1.1813e-22,\n 2.0174e-22, 3.0251e-23, 2.9754e-22, 1.3910e-21, 2.4869e-21, 5.8150e-22,\n 1.5244e-22, 2.4719e-23, 1.2405e-21, 3.2436e-22, 1.7126e-23, 6.7281e-23,\n 8.2740e-24, 1.9942e-23, 1.1104e-22, 4.9790e-22, 1.7568e-21, 7.1943e-23,\n 2.2247e-22, 1.4692e-22, 1.3759e-23, 5.1995e-23, 3.4101e-22, 4.3462e-22,\n 3.9281e-23, 5.5666e-23, 3.0504e-22, 3.7438e-22, 2.5892e-23, 5.9071e-23,\n 1.1762e-21, 1.9879e-22, 2.4961e-23, 2.7616e-23, 1.5419e-22, 5.7959e-22,\n 6.4876e-22, 1.2365e-21, 4.8469e-22, 1.7932e-23, 1.6357e-23, 5.2237e-22,\n 4.5151e-22, 1.8851e-22, 8.0586e-25, 9.5076e-24, 4.4144e-23, 1.2365e-22,\n 1.9656e-23, 6.7951e-24, 3.9240e-22, 9.7592e-22, 8.6345e-24, 3.9500e-22,\n 1.9790e-23, 1.5055e-23, 4.5208e-22, 3.2245e-22, 1.3175e-23, 3.2127e-22,\n 2.6144e-24, 5.6388e-22, 1.8881e-21, 6.7253e-23, 6.3702e-23, 3.0873e-22,\n 4.8263e-22, 5.5253e-22, 4.2181e-22, 4.0516e-23, 1.2535e-22, 1.4612e-22,\n 7.7473e-23, 3.4729e-23, 9.8329e-22, 5.5514e-22, 3.5238e-22, 2.7276e-23,\n 1.9848e-23, 2.6445e-22, 2.7510e-22, 7.3813e-24, 6.8540e-23, 1.4205e-22,\n 8.5749e-22, 1.7821e-23, 2.6468e-22, 2.3252e-22, 9.5017e-23, 2.9404e-23,\n 5.1225e-24, 9.3774e-22, 8.6608e-23, 2.8610e-23], device='cuda:0')" + }, + "41": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.4033e-26, 7.6918e-26, 2.1394e-25, 2.4823e-25, 2.0577e-25, 1.0459e-24,\n 1.7062e-25, 3.6821e-26, 3.0511e-24, 3.3030e-26, 3.0189e-26, 3.4838e-25,\n 4.7240e-27, 2.9358e-25, 2.9674e-26, 2.8926e-25, 1.3711e-24, 3.1457e-26,\n 1.8767e-23, 4.4231e-24, 2.0523e-23, 1.8223e-25, 6.3684e-26, 7.5744e-24,\n 1.0087e-25, 1.0348e-25, 4.5008e-24, 1.6074e-25, 1.0357e-26, 1.5704e-24,\n 8.0563e-26, 1.0952e-25, 6.1987e-24, 1.6222e-25, 3.5333e-24, 5.7321e-25,\n 6.3688e-26, 1.3367e-26, 1.3833e-23, 2.2892e-25, 2.3388e-26, 3.7288e-24,\n 4.1015e-27, 5.3199e-26, 1.6350e-24, 4.1881e-27, 7.8939e-25, 1.9098e-25,\n 3.0543e-26, 9.3174e-25, 1.1556e-26, 1.5064e-24, 4.3422e-25, 6.4892e-26,\n 4.6358e-26, 8.4799e-26, 1.3188e-24, 4.8716e-25, 3.9772e-24, 4.8901e-25,\n 3.1457e-25, 3.6517e-26, 3.0302e-24, 2.2436e-25, 2.1581e-26, 4.4945e-26,\n 1.1057e-23, 2.2305e-25, 1.5441e-26, 4.2237e-24, 2.5397e-26, 1.4181e-26,\n 3.4081e-24, 1.5695e-25, 4.4094e-26, 1.2659e-26, 3.4977e-25, 3.3449e-25,\n 2.7113e-25, 1.3220e-26, 7.4314e-26, 3.3507e-26, 2.4074e-24, 3.8560e-25,\n 4.1652e-26, 2.7172e-26, 5.0753e-26, 1.7051e-25, 1.7115e-25, 6.4702e-25,\n 1.1962e-25, 3.8556e-25, 7.2692e-27, 7.5190e-25, 3.6749e-26, 7.1155e-25,\n 3.9080e-26, 6.1608e-25, 3.8823e-27, 1.3188e-26, 7.3304e-27, 3.6928e-26,\n 3.9721e-27, 1.4311e-25, 5.0403e-25, 8.2658e-26, 1.9619e-24, 3.3212e-26,\n 1.7034e-25, 1.6189e-25, 5.5766e-24, 2.3991e-24, 9.3955e-25, 6.5435e-25,\n 2.3299e-24, 1.3902e-24, 6.3778e-24, 1.8001e-25, 1.0268e-25, 2.1902e-26,\n 4.8736e-25, 4.3204e-26, 4.9873e-25, 7.1395e-26, 1.7669e-26, 4.0946e-25,\n 8.9782e-26, 3.8043e-26, 1.4247e-24, 2.4622e-24, 1.6597e-25, 8.4434e-27,\n 9.9333e-26, 7.9600e-24, 2.6483e-26, 1.9928e-26, 4.0786e-26, 7.2255e-24,\n 2.4077e-26, 3.3504e-27, 1.0778e-23, 1.4006e-25, 1.6198e-26, 3.4436e-26,\n 1.1359e-25, 1.2335e-24, 4.1878e-25, 2.0037e-24, 2.9980e-26, 2.6979e-27,\n 2.1296e-25, 6.5400e-25, 1.2101e-26, 4.5832e-24, 3.3446e-24, 2.5909e-25,\n 1.0046e-26, 1.1706e-26, 5.6636e-24, 6.9848e-27, 6.6604e-24, 8.3386e-27,\n 1.1246e-25, 1.7133e-26, 1.5046e-25, 2.4257e-24, 2.1653e-24, 1.7586e-25,\n 9.1484e-26, 1.0714e-26, 1.5026e-24, 1.7698e-24, 1.9757e-26, 9.7664e-25,\n 1.6331e-26, 2.3881e-26, 5.8236e-26, 5.0476e-26, 2.7583e-23, 3.9581e-25,\n 3.8783e-25, 5.0169e-26, 5.3784e-27, 1.1244e-27, 2.8414e-25, 8.5697e-25,\n 8.9454e-26, 3.5713e-26, 2.4998e-25, 1.0784e-25, 3.3209e-26, 1.1085e-25,\n 2.1730e-24, 1.1097e-25, 3.0962e-26, 8.7830e-27, 3.4052e-25, 7.3717e-25,\n 1.4686e-24, 1.2920e-23, 2.3610e-25, 7.2633e-27, 1.8607e-26, 5.2535e-26,\n 2.1903e-25, 2.5333e-25, 4.4674e-27, 1.5516e-26, 8.0277e-26, 3.9701e-26,\n 4.6689e-27, 4.2130e-26, 9.4704e-25, 9.1309e-25, 1.0206e-26, 3.0785e-25,\n 2.5236e-26, 6.1437e-26, 5.6888e-25, 3.5820e-25, 2.9029e-26, 5.4010e-25,\n 8.1613e-27, 5.2056e-25, 6.5047e-24, 5.8476e-27, 1.4148e-25, 1.1755e-25,\n 3.0236e-25, 5.4634e-24, 3.1877e-24, 3.8575e-26, 1.2952e-24, 5.6938e-26,\n 2.2938e-24, 5.5391e-27, 6.8742e-24, 1.8343e-25, 4.1855e-25, 9.0514e-25,\n 1.0911e-25, 8.2593e-25, 2.4401e-25, 1.0387e-26, 1.4927e-25, 5.7610e-26,\n 7.7172e-25, 2.8361e-26, 4.6405e-25, 7.3051e-25, 1.8371e-26, 4.2625e-26,\n 1.8857e-26, 6.5755e-25, 7.8096e-26, 2.7622e-26], device='cuda:0')" + }, + "42": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.2255e-25, 3.7433e-25, 4.8676e-25, 6.6117e-25, 6.3956e-25, 7.0292e-25,\n 1.6293e-25, 5.3032e-26, 3.3287e-24, 7.2509e-26, 1.2774e-25, 1.2170e-24,\n 7.7048e-27, 3.3845e-25, 3.1011e-26, 6.6607e-25, 1.0595e-24, 7.5047e-26,\n 7.3805e-24, 3.0265e-24, 7.0469e-24, 3.6872e-25, 1.4997e-25, 2.2314e-24,\n 2.0545e-25, 6.7753e-25, 2.6968e-24, 3.1423e-25, 3.8766e-26, 1.5431e-24,\n 5.6996e-25, 2.5487e-25, 2.2177e-24, 2.4522e-25, 2.1671e-24, 3.5819e-25,\n 3.6828e-25, 1.6377e-25, 8.7626e-24, 6.3000e-25, 1.0398e-26, 2.4017e-24,\n 4.4600e-26, 3.9381e-25, 1.2076e-24, 2.7038e-26, 5.6069e-25, 3.8138e-25,\n 9.4215e-26, 1.9373e-24, 1.3249e-26, 1.0235e-24, 1.2227e-24, 8.1440e-26,\n 9.2717e-26, 1.4806e-25, 1.6751e-24, 1.9067e-24, 4.0927e-24, 8.2897e-25,\n 3.4923e-25, 6.6003e-26, 3.8440e-24, 5.0830e-25, 3.5671e-26, 1.4302e-25,\n 3.8660e-24, 2.3921e-25, 4.0623e-28, 3.2820e-24, 6.7634e-26, 1.5191e-26,\n 1.9036e-24, 3.4430e-25, 1.2179e-25, 3.3836e-25, 1.1502e-24, 4.1479e-25,\n 1.0807e-24, 1.3515e-25, 5.1623e-26, 5.7854e-26, 1.5634e-24, 4.7396e-25,\n 4.6668e-26, 3.0457e-25, 2.6822e-26, 4.5050e-25, 4.7347e-25, 1.0009e-24,\n 5.3711e-25, 3.1246e-25, 9.9564e-27, 1.2860e-24, 1.7208e-25, 1.2529e-24,\n 1.8875e-27, 3.0155e-25, 1.6820e-26, 4.6788e-27, 6.2897e-26, 4.8564e-26,\n 7.5174e-27, 1.4692e-25, 3.8511e-25, 1.3488e-25, 2.5449e-24, 2.8238e-26,\n 7.9494e-26, 9.3660e-26, 2.1814e-24, 1.8172e-24, 1.5012e-24, 1.8404e-24,\n 1.0131e-24, 1.1654e-24, 3.0318e-24, 1.1563e-24, 8.4044e-25, 9.3724e-27,\n 6.9019e-25, 6.4373e-25, 6.9854e-25, 1.5257e-26, 1.7017e-25, 8.4201e-25,\n 2.4579e-25, 2.5235e-25, 1.5449e-24, 1.0656e-24, 4.6316e-25, 4.9663e-25,\n 4.8939e-25, 3.0695e-24, 4.4287e-27, 1.4702e-25, 5.7566e-25, 2.7157e-24,\n 3.4417e-25, 1.2986e-25, 7.4996e-24, 3.5547e-25, 6.4696e-27, 1.6530e-25,\n 1.6181e-25, 1.2513e-24, 9.0193e-25, 8.5418e-25, 3.8866e-25, 1.1422e-25,\n 2.7938e-25, 6.3831e-25, 2.9501e-26, 3.4454e-24, 2.6823e-24, 3.8637e-25,\n 2.0263e-26, 1.0960e-26, 6.5554e-24, 4.3221e-26, 3.3831e-24, 6.0260e-26,\n 3.1742e-25, 1.7880e-26, 3.2537e-25, 4.4304e-24, 6.5162e-24, 1.4619e-24,\n 1.4242e-25, 4.0774e-26, 3.6538e-24, 2.0582e-24, 9.8648e-26, 1.0047e-24,\n 4.8054e-26, 1.3261e-26, 1.4751e-25, 2.6519e-25, 7.2640e-24, 4.0271e-25,\n 4.4403e-25, 9.8829e-26, 7.7142e-26, 2.3827e-26, 1.6161e-24, 1.0346e-24,\n 3.4146e-25, 2.0278e-25, 3.4009e-25, 3.1422e-25, 1.4228e-25, 2.0111e-25,\n 1.0480e-24, 2.6844e-25, 4.4938e-26, 1.0014e-25, 8.3942e-25, 1.2379e-24,\n 1.6705e-24, 5.1042e-24, 3.1754e-25, 1.4376e-26, 5.1094e-26, 2.1632e-26,\n 7.4319e-25, 4.2766e-25, 2.1697e-27, 1.4936e-25, 1.5522e-25, 2.1278e-25,\n 2.1260e-26, 5.7838e-26, 1.2405e-24, 1.2188e-24, 9.3651e-26, 4.7053e-25,\n 2.6139e-26, 2.2929e-25, 1.5145e-24, 1.2514e-24, 2.2314e-26, 1.4419e-24,\n 9.7205e-27, 2.0532e-24, 5.0925e-24, 1.9791e-25, 2.1336e-25, 1.7799e-25,\n 7.0981e-25, 3.2163e-24, 1.4863e-24, 9.3728e-26, 1.3043e-24, 4.7494e-25,\n 9.7474e-25, 4.0452e-26, 5.0279e-24, 1.5426e-24, 1.1657e-24, 4.3959e-25,\n 6.3578e-26, 1.1729e-24, 4.5102e-25, 2.3666e-26, 1.3574e-25, 3.3580e-25,\n 4.4191e-25, 2.7989e-26, 1.0035e-24, 6.7480e-25, 1.4646e-25, 4.1156e-26,\n 1.3378e-26, 1.0651e-24, 2.0089e-25, 2.1084e-25], device='cuda:0')" + }, + "43": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.6180e-26, 1.2988e-26, 4.2362e-27, ..., 9.6394e-26, 6.8916e-27,\n 4.0201e-26],\n [3.7144e-26, 1.2811e-25, 2.6720e-26, ..., 5.7466e-27, 1.5463e-26,\n 1.2731e-25],\n [5.1969e-25, 1.0736e-25, 4.6577e-25, ..., 5.4717e-26, 4.5038e-26,\n 3.4762e-27],\n ...,\n [2.6682e-25, 2.8902e-26, 1.2015e-25, ..., 2.1009e-25, 1.5842e-26,\n 3.9195e-26],\n [2.3738e-25, 3.3464e-25, 5.7611e-25, ..., 6.4051e-26, 6.9899e-27,\n 5.7395e-26],\n [4.8173e-26, 1.7060e-26, 5.8015e-26, ..., 1.3827e-26, 5.8235e-27,\n 1.8646e-26]], device='cuda:0')" + }, + "44": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0713e-22, 6.3694e-23, 3.1350e-22, 3.1036e-22, 2.4277e-23, 1.8110e-22,\n 3.4645e-23, 5.7674e-23, 1.2313e-22, 4.6047e-23, 4.4163e-22, 1.0018e-21,\n 5.9239e-22, 2.0385e-24, 1.4209e-23, 1.4123e-22, 1.2261e-22, 7.6104e-23,\n 6.5769e-22, 1.1536e-23, 1.1539e-21, 6.1903e-23, 3.1897e-22, 2.1812e-22,\n 5.9124e-23, 3.9425e-22, 2.7890e-23, 1.9593e-21, 3.7876e-22, 4.8465e-23,\n 7.9059e-22, 2.7899e-22, 1.7537e-22, 1.5203e-23, 3.4246e-22, 2.2124e-22,\n 1.5477e-23, 1.3023e-22, 5.9218e-22, 1.9252e-22, 9.8609e-23, 6.9321e-23,\n 8.0459e-23, 3.2293e-22, 3.9891e-23, 2.0267e-23, 9.8385e-23, 1.5404e-23,\n 5.9052e-22, 9.4029e-22, 6.9375e-22, 5.2482e-24, 1.9540e-22, 3.2020e-22,\n 4.8770e-22, 2.3489e-22, 5.8005e-22, 7.9230e-24, 9.1669e-23, 2.7062e-23,\n 2.2782e-22, 5.0084e-22, 6.2910e-24, 2.6040e-22, 7.7228e-24, 4.8421e-23,\n 3.6581e-22, 4.2403e-22, 1.7456e-22, 2.5744e-23, 9.0559e-23, 3.2041e-24,\n 1.5946e-21, 8.2291e-22, 4.2490e-24, 4.7187e-23, 4.5702e-23, 1.8825e-22,\n 1.8479e-23, 1.6107e-22, 2.1529e-24, 1.0086e-22, 3.4398e-23, 2.3183e-22,\n 1.2883e-22, 2.5664e-22, 1.7840e-22, 3.2343e-22, 4.7420e-22, 4.6213e-22,\n 1.7483e-23, 6.4302e-23, 2.4962e-22, 2.7774e-22, 3.9858e-22, 1.0003e-22,\n 1.6027e-22, 6.2964e-23, 1.1963e-22, 4.0125e-23, 7.4220e-23, 3.1009e-23,\n 1.9351e-23, 1.5417e-22, 7.1837e-23, 1.4143e-22, 9.1697e-23, 1.4151e-22,\n 1.6694e-22, 6.6198e-23, 2.4881e-22, 4.9165e-23, 2.6207e-22, 1.1449e-22,\n 2.2478e-23, 9.6656e-23, 1.0701e-22, 6.4351e-22, 2.0751e-22, 1.7714e-23,\n 2.3914e-22, 1.1247e-21, 2.3542e-22, 2.1121e-21, 2.3551e-22, 8.3491e-22,\n 1.8445e-23, 6.9416e-22, 5.9439e-23, 1.0430e-22, 3.6932e-23, 6.9245e-22,\n 1.0995e-22, 2.4599e-22, 3.5367e-24, 1.9213e-23, 3.7501e-22, 6.2855e-23,\n 2.8960e-22, 1.7943e-22, 1.6549e-22, 4.5594e-22, 2.3089e-23, 5.0205e-22,\n 6.9845e-23, 5.1637e-22, 3.0741e-22, 5.0895e-22, 1.1407e-22, 2.3206e-22,\n 1.0034e-21, 1.6024e-22, 1.8342e-22, 8.4923e-22, 3.1423e-24, 2.9381e-22,\n 4.6483e-23, 1.1176e-22, 7.0902e-23, 2.1141e-22, 7.5449e-22, 2.6609e-22,\n 4.0320e-22, 1.5528e-23, 1.4031e-22, 5.3590e-23, 9.1845e-22, 1.6153e-23,\n 7.9831e-22, 8.0476e-23, 3.8724e-22, 2.1075e-22, 1.4193e-22, 1.4014e-23,\n 2.3753e-22, 2.0572e-21, 1.8530e-23, 2.3891e-22, 6.7086e-22, 1.8651e-24,\n 8.1956e-24, 3.2257e-22, 1.0250e-23, 4.9513e-22, 6.0848e-22, 6.3190e-24,\n 3.1743e-23, 8.8125e-23, 6.8083e-22, 4.1803e-22, 3.2376e-23, 1.4221e-22,\n 1.2388e-23, 3.5740e-23, 1.7888e-22, 5.3560e-23, 4.3890e-24, 3.3126e-22,\n 1.1303e-22, 9.6299e-22, 3.7258e-23, 4.4237e-22, 1.4418e-23, 2.4232e-22,\n 3.4324e-22, 7.1349e-22, 7.6015e-24, 9.1350e-23, 5.0474e-23, 7.8721e-23,\n 5.9648e-24, 1.7755e-22, 1.5392e-22, 6.8606e-22, 5.3719e-22, 1.2524e-22,\n 4.9329e-23, 2.7775e-23, 7.7872e-23, 1.6182e-22, 1.5462e-23, 6.1593e-22,\n 2.0479e-23, 1.8894e-22, 3.8590e-22, 1.2931e-22, 2.0228e-23, 5.7679e-22,\n 1.9723e-23, 3.1047e-22, 5.6044e-24, 2.6734e-23, 5.9451e-24, 5.3644e-23,\n 8.9973e-23, 1.4763e-23, 4.2685e-22, 4.3711e-23, 1.8592e-23, 2.7804e-22,\n 1.2389e-22, 9.7669e-23, 5.1467e-23, 4.9780e-23, 6.1468e-23, 9.0104e-22,\n 4.8792e-22, 4.7688e-24, 1.6131e-22, 2.1828e-24, 1.1335e-22, 9.6612e-23,\n 6.3153e-22, 5.3560e-22, 3.0549e-22, 3.1443e-23], device='cuda:0')" + }, + "45": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.9584e-25, 1.1164e-25, 4.7557e-25, 8.2932e-25, 3.4734e-26, 1.1652e-25,\n 1.4326e-25, 3.5322e-26, 1.9645e-26, 1.6274e-26, 2.1559e-25, 2.7091e-24,\n 2.5031e-24, 1.8287e-27, 1.1966e-28, 4.4994e-25, 4.7743e-25, 3.3679e-25,\n 2.5681e-24, 2.8516e-26, 3.8655e-24, 5.3377e-26, 3.2994e-25, 1.6290e-24,\n 2.4984e-25, 7.6482e-25, 2.5681e-26, 3.5957e-24, 4.4797e-25, 4.4464e-26,\n 1.9441e-24, 1.2037e-25, 3.8657e-26, 1.1225e-26, 7.6172e-25, 1.2576e-25,\n 1.8730e-26, 1.1485e-25, 2.6451e-24, 6.6770e-25, 1.6472e-26, 3.0517e-25,\n 6.9677e-26, 4.7334e-25, 1.7499e-26, 6.1621e-26, 4.1254e-26, 2.2391e-26,\n 1.7032e-24, 3.3555e-24, 1.2284e-24, 6.3659e-27, 6.2635e-25, 4.3525e-25,\n 4.4535e-25, 4.3495e-25, 8.2049e-24, 1.6015e-26, 2.2237e-25, 9.7214e-27,\n 2.5898e-25, 1.2714e-24, 4.9320e-26, 9.5616e-25, 6.9859e-28, 4.0970e-27,\n 8.2243e-25, 3.9596e-24, 3.2534e-26, 6.3146e-26, 8.2614e-26, 1.2742e-26,\n 2.4053e-24, 2.0463e-24, 3.5378e-27, 4.1392e-27, 2.5892e-26, 1.3522e-25,\n 3.9494e-26, 1.0153e-24, 1.1484e-26, 1.2953e-25, 1.0741e-25, 6.5573e-26,\n 6.5945e-26, 1.9531e-25, 1.8426e-25, 9.2104e-25, 1.1239e-24, 5.3464e-25,\n 7.9872e-26, 6.2888e-26, 2.2654e-24, 3.9617e-25, 5.9406e-25, 1.9712e-25,\n 1.0812e-25, 9.3280e-26, 2.0265e-25, 5.1800e-27, 3.4791e-25, 3.2220e-26,\n 7.2021e-27, 1.8578e-25, 1.9107e-26, 1.1271e-25, 1.7087e-25, 5.5738e-26,\n 1.5249e-25, 7.0984e-26, 1.4085e-24, 5.6781e-26, 9.5201e-25, 6.7072e-25,\n 3.2161e-27, 8.3091e-25, 6.5559e-25, 6.1184e-25, 1.2810e-25, 1.0037e-26,\n 2.2461e-25, 2.7342e-24, 3.3157e-25, 8.3307e-24, 2.8003e-25, 3.7748e-24,\n 1.1476e-25, 1.2458e-24, 2.2195e-25, 6.1113e-26, 5.4861e-27, 1.8415e-24,\n 3.9521e-26, 1.1954e-24, 2.0078e-26, 6.5996e-27, 1.6792e-24, 1.9540e-25,\n 4.4195e-25, 1.0191e-25, 4.6986e-25, 1.2502e-24, 2.9474e-26, 1.0494e-24,\n 8.7419e-26, 2.9146e-25, 2.1991e-25, 3.4325e-24, 4.2566e-26, 5.9483e-25,\n 1.5054e-24, 1.8407e-25, 2.0008e-25, 2.7821e-24, 4.1072e-26, 2.2522e-25,\n 1.5529e-26, 4.6937e-25, 6.2717e-26, 4.4824e-26, 3.5225e-24, 3.2141e-25,\n 9.0118e-25, 5.3289e-27, 8.4706e-25, 6.2792e-26, 8.6368e-25, 7.2116e-26,\n 1.2349e-24, 1.2887e-26, 2.1729e-25, 6.2265e-26, 1.0489e-25, 9.1871e-27,\n 1.9522e-25, 2.5746e-24, 1.8553e-26, 1.9237e-25, 5.8284e-24, 5.1308e-27,\n 3.3399e-27, 2.3146e-24, 3.4258e-26, 5.3676e-25, 9.8399e-24, 6.5854e-27,\n 5.3178e-26, 2.4863e-25, 6.4513e-25, 3.3054e-25, 2.1039e-26, 8.5949e-25,\n 1.4042e-26, 4.9092e-27, 7.3138e-26, 6.8420e-26, 2.6791e-26, 2.4204e-25,\n 3.4106e-25, 7.4638e-24, 5.8593e-26, 5.0185e-25, 1.6542e-26, 2.4520e-25,\n 2.9171e-25, 1.6103e-24, 9.4245e-27, 2.4671e-25, 5.5718e-26, 1.3118e-26,\n 2.0948e-26, 1.3073e-25, 2.7826e-25, 8.6565e-25, 1.0704e-24, 1.9906e-25,\n 4.3520e-26, 2.8583e-26, 6.0493e-26, 1.4741e-25, 1.2959e-26, 1.5444e-24,\n 1.6692e-25, 2.1087e-25, 3.2235e-25, 3.6160e-26, 1.8079e-26, 4.0229e-25,\n 7.5289e-26, 6.3855e-25, 2.4167e-26, 5.8090e-27, 1.1027e-26, 2.0927e-26,\n 1.4825e-25, 2.9817e-27, 2.5741e-24, 7.1480e-27, 4.6616e-27, 3.7658e-25,\n 8.8759e-26, 3.2785e-26, 4.6955e-26, 3.7020e-25, 4.2209e-27, 2.8095e-24,\n 7.7562e-25, 1.4583e-26, 3.0777e-25, 1.7538e-26, 4.3413e-26, 9.4574e-26,\n 7.0918e-25, 4.3382e-25, 4.9034e-25, 1.1519e-26], device='cuda:0')" + }, + "46": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.1735e-25, 1.9402e-25, 4.1434e-25, 5.9760e-25, 9.8728e-26, 4.5859e-25,\n 1.0518e-25, 9.1138e-26, 4.3848e-25, 9.6650e-26, 6.2775e-25, 2.3571e-24,\n 2.4905e-24, 4.4407e-27, 2.2403e-26, 6.5023e-25, 4.3493e-25, 4.4229e-25,\n 1.9906e-24, 1.0150e-25, 3.0788e-24, 2.0165e-25, 5.2436e-25, 5.5355e-25,\n 4.7262e-25, 1.3785e-24, 1.0328e-25, 2.6642e-24, 6.6467e-25, 7.3079e-26,\n 2.7511e-24, 1.1834e-25, 1.1917e-25, 2.4557e-26, 1.0627e-24, 1.3163e-25,\n 2.9398e-26, 5.1222e-25, 1.5700e-24, 7.8573e-25, 2.1763e-25, 2.4633e-25,\n 1.3336e-25, 1.2754e-24, 1.4802e-25, 1.8493e-25, 2.0167e-25, 5.9615e-26,\n 6.7627e-25, 2.4556e-24, 8.3230e-25, 1.9661e-26, 4.1846e-25, 4.7699e-25,\n 7.7977e-25, 3.0079e-25, 2.8743e-24, 6.0820e-26, 1.5176e-25, 9.3534e-26,\n 3.4794e-25, 5.5041e-25, 8.8427e-27, 7.2221e-25, 2.4789e-27, 2.2695e-25,\n 1.4253e-24, 1.2972e-24, 2.9139e-25, 8.7719e-26, 2.3423e-25, 4.3855e-27,\n 2.5193e-24, 1.4639e-24, 1.3755e-26, 1.1355e-25, 2.1179e-25, 3.4078e-25,\n 2.9943e-26, 4.2245e-25, 1.1394e-27, 2.2348e-25, 1.7432e-25, 3.4244e-25,\n 1.5287e-25, 6.5357e-25, 1.7899e-25, 1.1735e-24, 1.3794e-24, 1.0417e-24,\n 4.3110e-26, 8.7197e-26, 6.3642e-25, 5.0637e-25, 5.6774e-25, 4.3813e-25,\n 1.6564e-25, 1.7196e-25, 1.1966e-25, 5.1452e-26, 4.5558e-25, 1.3497e-26,\n 3.2970e-26, 4.3992e-25, 5.5534e-26, 8.0359e-26, 5.2765e-25, 2.8922e-25,\n 1.3642e-25, 9.0323e-26, 1.1326e-24, 2.1285e-25, 1.0972e-24, 6.2042e-25,\n 4.5412e-26, 6.3641e-25, 6.8240e-25, 1.0459e-24, 6.5061e-25, 3.0533e-26,\n 2.9191e-25, 2.7826e-24, 6.0293e-25, 5.3544e-24, 5.5761e-25, 2.2034e-24,\n 2.7431e-25, 1.4471e-24, 2.1402e-25, 2.7948e-25, 9.6167e-26, 1.0279e-24,\n 1.8684e-25, 6.1677e-25, 3.4521e-27, 1.0130e-25, 7.5199e-25, 2.1442e-25,\n 1.1144e-24, 2.2507e-25, 4.8784e-25, 6.4824e-25, 2.4319e-26, 1.1001e-24,\n 2.4221e-25, 6.1534e-25, 5.6308e-25, 1.4139e-24, 1.3313e-25, 6.6324e-25,\n 1.2121e-24, 3.0261e-25, 1.9316e-25, 2.8318e-24, 1.5693e-26, 2.9182e-25,\n 5.3506e-26, 6.0127e-25, 2.3898e-25, 4.4518e-25, 1.7403e-24, 3.3925e-25,\n 7.9431e-25, 2.2192e-26, 7.3164e-25, 1.4003e-25, 2.3608e-24, 5.4104e-26,\n 4.7116e-25, 1.0673e-25, 9.6180e-25, 6.2121e-25, 4.3564e-25, 8.1452e-26,\n 3.9756e-25, 1.8181e-24, 9.5095e-26, 2.4159e-25, 2.0210e-24, 2.7287e-26,\n 2.0419e-26, 5.5023e-25, 8.4280e-26, 5.1577e-25, 2.1322e-24, 1.1200e-26,\n 1.8005e-25, 4.7813e-25, 1.0677e-24, 2.8794e-25, 4.4399e-26, 7.7826e-25,\n 5.3617e-27, 5.1831e-26, 4.7753e-25, 1.0075e-25, 1.5645e-26, 7.4627e-25,\n 2.9804e-25, 2.6800e-24, 4.7637e-26, 3.5659e-25, 1.1703e-25, 2.6789e-25,\n 7.1135e-25, 2.1454e-24, 6.2778e-27, 3.4909e-25, 7.9549e-26, 1.8967e-25,\n 1.1443e-26, 3.9548e-25, 4.9455e-25, 1.0372e-24, 1.9333e-24, 2.0463e-25,\n 2.9423e-25, 4.4389e-26, 2.2710e-25, 5.5415e-25, 9.1056e-26, 1.8369e-24,\n 1.9017e-25, 4.0192e-25, 9.4520e-25, 3.5744e-25, 4.6319e-26, 8.9019e-25,\n 8.0276e-26, 1.1186e-24, 3.3466e-26, 6.1688e-26, 2.8765e-26, 1.2473e-25,\n 2.0301e-25, 3.8698e-26, 1.7106e-24, 1.3006e-25, 9.9083e-26, 5.1066e-25,\n 5.3906e-25, 1.0429e-25, 4.6666e-26, 2.2725e-25, 8.9433e-26, 2.8038e-24,\n 7.2331e-25, 1.2832e-26, 5.7742e-25, 5.4847e-27, 1.9941e-25, 2.0842e-25,\n 2.1538e-24, 6.6187e-25, 2.9629e-25, 1.6484e-25], device='cuda:0')" + }, + "47": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.8428e-26, 6.9167e-26, 4.2835e-26, ..., 4.5712e-26, 1.7445e-27,\n 8.9998e-27],\n [7.2699e-27, 2.2980e-26, 1.1416e-26, ..., 8.2526e-27, 9.7530e-28,\n 4.7387e-27],\n [2.3449e-25, 5.6914e-26, 6.2808e-25, ..., 1.2593e-25, 2.2337e-26,\n 1.1991e-25],\n ...,\n [3.9081e-25, 1.7677e-25, 1.5428e-25, ..., 6.8150e-26, 2.9997e-27,\n 6.7970e-26],\n [1.3858e-24, 2.2422e-25, 7.2987e-25, ..., 1.9910e-25, 6.0881e-26,\n 1.8739e-25],\n [3.5910e-26, 4.5479e-26, 1.2763e-26, ..., 6.0950e-26, 8.8444e-27,\n 4.1985e-27]], device='cuda:0')" + }, + "48": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.1983e-22, 3.5586e-24, 5.5406e-22, 5.5221e-22, 1.0442e-23, 2.9469e-22,\n 1.8698e-23, 8.4218e-22, 1.7722e-22, 4.1802e-22, 8.5299e-22, 3.6589e-22,\n 1.0341e-22, 2.8092e-23, 4.9872e-23, 1.8001e-22, 8.2482e-23, 5.4119e-22,\n 6.8009e-24, 2.0346e-21, 2.2606e-21, 8.7915e-24, 2.6863e-22, 8.1063e-22,\n 1.7420e-22, 4.2676e-23, 2.4732e-22, 1.0126e-21, 8.8913e-22, 8.8516e-23,\n 6.1463e-22, 3.4184e-23, 8.3974e-23, 8.6374e-24, 2.0260e-21, 3.3536e-22,\n 1.9361e-22, 5.7079e-24, 1.2892e-21, 5.9649e-23, 2.6829e-22, 4.1151e-23,\n 6.1248e-24, 3.8508e-23, 6.2335e-22, 4.7443e-23, 9.1417e-23, 4.0520e-23,\n 2.0633e-22, 6.1255e-23, 1.6354e-21, 8.9492e-22, 2.8787e-22, 1.3518e-23,\n 7.6498e-22, 6.5091e-22, 7.0426e-23, 1.1359e-23, 7.7854e-22, 5.1576e-23,\n 2.2186e-22, 1.9756e-22, 1.4832e-21, 1.0670e-22, 7.5607e-22, 5.9354e-23,\n 6.3582e-24, 5.0886e-22, 5.9970e-22, 1.3765e-23, 1.4583e-23, 1.3395e-22,\n 1.1073e-21, 1.2347e-21, 2.7848e-24, 7.8643e-24, 9.7649e-24, 2.4331e-23,\n 2.1536e-23, 3.2008e-22, 2.8791e-22, 2.9287e-22, 4.8510e-22, 9.0910e-22,\n 7.0615e-24, 9.2588e-23, 5.8981e-23, 2.1764e-23, 7.5867e-22, 2.3359e-22,\n 5.3199e-23, 2.4258e-22, 3.7173e-22, 2.0660e-21, 2.9324e-23, 3.3752e-23,\n 1.0718e-22, 6.7666e-24, 2.6252e-22, 2.5493e-22, 4.8199e-24, 7.3970e-24,\n 1.5125e-21, 2.5197e-22, 2.0541e-22, 8.1645e-23, 9.1484e-22, 3.9637e-24,\n 5.5553e-22, 2.6933e-22, 7.0287e-23, 5.2367e-22, 5.9184e-23, 1.9599e-24,\n 1.7712e-21, 2.3173e-22, 1.1454e-23, 6.7285e-23, 6.1329e-22, 1.2968e-22,\n 1.3750e-21, 1.7707e-21, 1.8405e-22, 2.0216e-21, 2.0904e-23, 1.0088e-23,\n 1.1490e-23, 9.2596e-23, 9.7930e-24, 4.2109e-21, 8.9752e-23, 3.2558e-22,\n 1.7001e-23, 1.1765e-21, 2.2762e-21, 2.9778e-24, 2.3230e-22, 1.4406e-21,\n 1.2875e-21, 1.3272e-22, 1.4059e-23, 2.6291e-22, 2.4695e-23, 4.7135e-23,\n 5.0746e-24, 2.4573e-22, 2.7275e-22, 2.6419e-24, 1.1603e-23, 2.5485e-24,\n 9.6959e-22, 2.1974e-21, 3.3772e-23, 2.4423e-22, 2.3561e-22, 4.6815e-22,\n 2.2713e-22, 6.9945e-22, 2.0850e-21, 8.4240e-24, 5.7613e-22, 7.9918e-22,\n 2.4576e-23, 1.2852e-22, 4.3218e-22, 5.4649e-21, 9.5735e-23, 1.9627e-23,\n 1.4600e-22, 3.3374e-22, 4.9083e-23, 7.9642e-22, 2.2650e-24, 1.6409e-22,\n 1.9884e-24, 2.2260e-21, 6.3690e-23, 4.8649e-22, 1.0576e-21, 1.2160e-22,\n 4.9672e-23, 3.2328e-22, 1.1138e-23, 4.5627e-23, 6.5297e-22, 5.4543e-24,\n 1.4234e-21, 3.7146e-23, 5.0011e-22, 6.4256e-23, 7.6465e-22, 4.6034e-24,\n 6.4708e-23, 3.5127e-22, 7.2737e-23, 1.7837e-22, 1.4324e-23, 1.0518e-21,\n 1.6155e-21, 1.6058e-21, 5.1956e-23, 1.2605e-21, 1.1391e-23, 1.0600e-22,\n 4.6762e-23, 1.9016e-23, 8.0620e-23, 2.4613e-22, 2.7007e-22, 6.7113e-24,\n 2.6094e-23, 2.2101e-23, 1.1526e-21, 7.7520e-24, 3.6970e-22, 4.3769e-22,\n 3.0000e-22, 3.5097e-22, 7.9633e-22, 8.1354e-23, 8.7884e-24, 3.3984e-22,\n 1.3159e-22, 9.1021e-22, 3.5141e-22, 1.0454e-22, 1.9837e-22, 7.7381e-22,\n 9.2223e-22, 6.3559e-22, 4.9896e-22, 1.5915e-23, 7.3109e-23, 6.9222e-22,\n 1.0424e-22, 5.1858e-22, 2.3341e-22, 2.6183e-22, 4.1272e-22, 3.8918e-22,\n 1.4936e-22, 1.7926e-22, 1.1563e-21, 1.1281e-22, 1.8193e-23, 1.6293e-23,\n 8.0109e-22, 1.1232e-23, 7.0483e-22, 1.0718e-24, 6.2347e-23, 7.4263e-22,\n 1.7568e-21, 6.6695e-22, 1.4657e-21, 4.1647e-23], device='cuda:0')" + }, + "49": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.8540e-26, 1.0601e-26, 3.7048e-25, 1.0480e-24, 1.2372e-26, 6.4533e-25,\n 4.7324e-26, 4.2189e-25, 2.0500e-25, 2.6281e-25, 1.1554e-24, 1.4629e-25,\n 2.4245e-24, 8.7881e-26, 4.0644e-26, 3.1056e-25, 3.9559e-26, 2.9266e-24,\n 5.4476e-26, 3.5449e-24, 1.2214e-23, 1.6633e-26, 2.2100e-25, 1.1121e-23,\n 1.6910e-25, 4.5008e-27, 7.2897e-26, 4.0901e-24, 6.5430e-25, 4.1915e-26,\n 2.0729e-24, 5.5206e-26, 3.4905e-25, 1.2216e-25, 1.0957e-23, 1.8219e-24,\n 1.2517e-25, 2.5492e-27, 7.1417e-24, 1.3702e-25, 3.2983e-25, 1.3167e-25,\n 4.3474e-27, 9.3835e-27, 1.2072e-24, 6.0974e-26, 2.7796e-26, 1.0516e-26,\n 2.1042e-24, 3.6898e-26, 3.9939e-24, 1.2449e-24, 2.0086e-25, 3.7985e-27,\n 4.9418e-25, 9.5829e-25, 7.0401e-26, 4.5086e-26, 1.2101e-25, 2.6493e-26,\n 1.9695e-25, 1.2913e-25, 1.6633e-23, 1.8027e-25, 2.1068e-24, 1.7373e-25,\n 4.9761e-26, 2.1344e-25, 2.6885e-25, 1.5087e-26, 5.1150e-26, 4.0298e-26,\n 3.7272e-25, 8.7528e-25, 2.4394e-26, 6.5284e-27, 1.1429e-26, 1.1482e-26,\n 1.5552e-26, 3.9045e-25, 4.4098e-25, 1.5654e-25, 5.6266e-25, 6.0465e-25,\n 3.0530e-26, 5.5875e-26, 2.7870e-26, 4.2030e-26, 2.2957e-24, 6.6519e-25,\n 1.5382e-25, 2.4409e-25, 6.6270e-25, 4.0995e-24, 9.7019e-28, 4.9356e-26,\n 3.3184e-26, 4.7684e-27, 1.7561e-24, 1.5560e-24, 1.5729e-26, 1.1764e-26,\n 3.3294e-24, 2.6720e-25, 3.9536e-25, 4.9890e-26, 2.6215e-24, 2.8490e-26,\n 4.1054e-25, 7.9880e-26, 1.2745e-26, 4.8484e-24, 1.7848e-25, 6.4915e-27,\n 8.0702e-24, 1.4701e-24, 2.3383e-26, 1.9785e-26, 1.4690e-24, 5.4026e-26,\n 1.6556e-24, 8.1643e-24, 1.3315e-25, 2.9502e-24, 1.0395e-25, 3.4820e-26,\n 1.3373e-26, 1.2859e-26, 2.1891e-25, 1.9963e-23, 9.4387e-26, 2.0038e-25,\n 1.7030e-26, 5.0139e-24, 2.0874e-24, 1.0438e-26, 3.6011e-24, 9.6740e-24,\n 2.1138e-24, 3.6156e-26, 1.4234e-25, 8.7772e-26, 2.3005e-26, 5.9617e-26,\n 1.6226e-26, 6.7484e-26, 1.5139e-25, 1.0323e-26, 1.7190e-26, 2.2437e-26,\n 5.5314e-25, 2.0516e-23, 1.4669e-26, 4.7625e-25, 1.6433e-25, 3.2616e-25,\n 5.6985e-25, 2.2185e-24, 5.4140e-24, 1.5356e-26, 2.1314e-25, 1.0402e-24,\n 5.0329e-26, 5.6368e-26, 3.5462e-24, 1.8025e-23, 7.0578e-26, 4.2199e-26,\n 9.6882e-26, 4.2194e-25, 1.2039e-26, 7.0939e-25, 8.9329e-27, 4.5048e-25,\n 1.3162e-26, 4.1522e-24, 4.4580e-26, 9.7900e-26, 3.3468e-24, 2.3512e-25,\n 2.4433e-26, 1.3468e-24, 7.2024e-27, 1.6112e-26, 7.3025e-24, 5.0691e-27,\n 1.6314e-23, 2.1130e-26, 1.0202e-24, 3.4169e-26, 3.6517e-25, 7.2921e-27,\n 5.4360e-26, 1.4313e-25, 2.4767e-26, 1.0109e-25, 7.1385e-27, 1.7474e-24,\n 7.0590e-24, 3.7362e-24, 1.2342e-26, 1.1109e-24, 1.1367e-26, 1.2848e-26,\n 1.1623e-27, 1.1494e-26, 8.4947e-26, 8.5496e-25, 3.2934e-25, 2.8750e-26,\n 3.6531e-26, 8.2518e-26, 3.2334e-24, 1.7759e-26, 9.9903e-25, 5.2537e-25,\n 3.7479e-25, 2.0095e-24, 5.5441e-24, 7.6352e-26, 2.5636e-26, 9.5995e-25,\n 1.6814e-25, 1.8836e-24, 1.6835e-25, 3.9064e-27, 1.0480e-25, 7.4601e-25,\n 1.6454e-24, 3.3002e-24, 1.7860e-24, 4.1890e-27, 4.5720e-26, 7.9604e-25,\n 4.0708e-26, 4.2624e-25, 7.8398e-25, 1.0834e-25, 2.0091e-24, 1.6780e-24,\n 1.6338e-25, 1.9289e-25, 1.3866e-24, 3.4944e-26, 9.8002e-27, 2.5854e-26,\n 9.8438e-25, 1.6502e-26, 2.2225e-24, 3.1193e-26, 9.0970e-26, 2.0386e-24,\n 1.5319e-24, 1.1556e-24, 1.3532e-23, 6.0637e-26], device='cuda:0')" + }, + "50": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.5721e-25, 5.6660e-27, 1.8696e-24, 8.7779e-25, 3.8052e-26, 4.4719e-25,\n 1.6953e-25, 2.0332e-24, 7.9915e-25, 9.9018e-25, 1.5925e-24, 1.0339e-24,\n 7.9492e-25, 3.2639e-26, 8.2114e-26, 5.1466e-25, 2.5380e-25, 2.1773e-24,\n 1.7842e-26, 6.9762e-24, 7.3136e-24, 1.5784e-26, 4.2980e-25, 2.4513e-24,\n 3.0852e-25, 1.7479e-25, 1.8639e-25, 2.1296e-24, 1.2582e-24, 4.9028e-25,\n 2.6652e-24, 2.1678e-26, 2.6012e-25, 1.9453e-25, 7.5987e-24, 1.1272e-24,\n 5.1790e-25, 3.7903e-26, 3.8517e-24, 2.1597e-25, 8.8072e-25, 2.9494e-25,\n 2.7524e-26, 1.9592e-25, 2.4824e-24, 2.0888e-25, 2.2540e-25, 1.0715e-25,\n 1.1404e-24, 2.4756e-25, 2.3996e-24, 7.3310e-25, 1.2334e-24, 1.0694e-25,\n 7.9776e-25, 1.0970e-24, 2.3675e-25, 2.6273e-26, 9.7413e-25, 1.4563e-25,\n 8.2629e-25, 4.9596e-25, 6.4446e-24, 3.3871e-25, 5.6263e-25, 1.6135e-25,\n 1.2459e-26, 9.7457e-25, 9.3086e-25, 1.5483e-26, 1.4863e-25, 1.6979e-25,\n 1.8482e-24, 2.5749e-24, 1.4796e-26, 4.3966e-26, 5.3943e-26, 3.2079e-26,\n 5.5414e-26, 1.3232e-24, 3.1594e-25, 3.4459e-25, 6.0505e-25, 1.1069e-24,\n 7.5303e-27, 2.9062e-25, 4.6449e-26, 9.0347e-26, 2.8826e-24, 9.7646e-25,\n 3.8496e-25, 4.5364e-25, 1.0467e-24, 2.1674e-24, 6.4910e-26, 1.2641e-25,\n 3.3659e-25, 3.9637e-27, 9.7321e-25, 1.3121e-24, 1.6281e-26, 1.8578e-26,\n 1.8644e-24, 5.9897e-25, 4.4163e-25, 1.9478e-25, 3.8361e-24, 1.0854e-26,\n 3.3337e-25, 4.6561e-25, 1.3837e-25, 2.7564e-24, 3.6713e-25, 5.2650e-27,\n 3.1883e-24, 1.5308e-24, 1.3468e-26, 1.7887e-25, 2.0740e-24, 1.9101e-25,\n 2.9030e-24, 4.1216e-24, 3.5224e-25, 5.6238e-24, 4.9226e-26, 4.2422e-26,\n 8.1052e-26, 3.1120e-25, 2.0422e-26, 8.3309e-24, 3.3307e-25, 3.7951e-25,\n 8.3455e-26, 2.2855e-24, 5.2990e-24, 9.7846e-27, 1.4277e-24, 4.6374e-24,\n 4.5330e-24, 2.3486e-25, 9.4310e-26, 4.6306e-25, 4.9631e-26, 7.3138e-26,\n 1.4666e-26, 2.3448e-25, 3.9763e-25, 7.8411e-27, 1.5070e-26, 5.9180e-27,\n 1.3636e-24, 7.7942e-24, 3.3941e-26, 1.0616e-24, 5.2889e-25, 2.7282e-25,\n 8.0876e-25, 3.1128e-24, 5.3362e-24, 2.6858e-26, 2.1922e-24, 7.0668e-25,\n 7.2639e-26, 1.7152e-25, 1.7584e-24, 1.6025e-23, 1.9973e-25, 6.5288e-26,\n 1.6941e-25, 6.1729e-25, 1.9594e-25, 2.3488e-24, 4.8196e-27, 8.3434e-25,\n 6.1733e-27, 2.1768e-24, 1.4290e-25, 4.8362e-25, 3.8806e-24, 4.4578e-25,\n 5.6004e-26, 8.0703e-25, 1.6309e-26, 7.4454e-26, 2.9158e-24, 8.4637e-27,\n 6.1001e-24, 1.7136e-25, 8.3826e-25, 1.5857e-25, 2.3794e-24, 4.9815e-26,\n 1.8117e-25, 4.1766e-25, 1.1019e-25, 5.3643e-25, 4.4391e-26, 2.2214e-24,\n 4.2384e-24, 3.9841e-24, 1.8882e-26, 1.8360e-24, 9.5009e-26, 1.8510e-25,\n 6.5799e-26, 1.0119e-25, 3.1864e-25, 1.1914e-24, 5.7315e-25, 1.5329e-26,\n 1.8447e-25, 7.3443e-26, 3.2268e-24, 1.5774e-26, 1.4111e-24, 8.3553e-25,\n 6.0687e-25, 1.5193e-24, 2.5122e-24, 2.6971e-25, 2.7490e-26, 1.2309e-24,\n 2.4436e-25, 3.4226e-24, 8.6951e-25, 3.3243e-25, 2.3629e-25, 8.0787e-25,\n 2.1090e-24, 2.4129e-24, 1.6969e-24, 2.3647e-26, 1.8330e-25, 2.1789e-24,\n 2.6313e-26, 8.4940e-25, 1.0362e-24, 6.3264e-25, 1.7720e-24, 5.5468e-25,\n 6.5430e-25, 6.9067e-25, 1.4658e-24, 4.1911e-25, 9.3618e-26, 6.9854e-26,\n 1.4744e-24, 1.7458e-26, 2.1232e-24, 1.3743e-26, 1.4563e-25, 1.8109e-24,\n 5.5447e-24, 8.7852e-25, 3.5084e-24, 1.4844e-25], device='cuda:0')" + }, + "51": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.6672e-25, 8.1372e-26, 3.1030e-25, ..., 3.3037e-25, 2.2035e-26,\n 2.5252e-25],\n [1.1446e-25, 2.4846e-26, 5.2112e-27, ..., 1.4011e-25, 4.4751e-26,\n 6.1361e-26],\n [1.0344e-24, 3.2081e-25, 1.9777e-24, ..., 2.9604e-25, 3.1851e-26,\n 2.1053e-25],\n ...,\n [1.7372e-26, 5.0235e-26, 7.4577e-27, ..., 1.8557e-26, 1.0165e-26,\n 6.3283e-27],\n [1.0678e-24, 7.7831e-25, 1.1325e-24, ..., 1.5787e-24, 6.0490e-26,\n 9.6392e-25],\n [2.1814e-25, 3.8225e-25, 3.4682e-25, ..., 7.1373e-26, 1.9730e-26,\n 1.5278e-25]], device='cuda:0')" + }, + "52": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.0429e-22, 1.9450e-22, 1.7685e-21, 1.2430e-23, 5.4692e-22, 2.3132e-21,\n 3.5217e-23, 1.2464e-22, 6.1014e-23, 1.8110e-24, 3.1472e-22, 1.8597e-22,\n 1.4198e-21, 1.1838e-22, 2.1042e-22, 6.1481e-22, 9.5973e-24, 4.0904e-22,\n 1.8735e-21, 2.6380e-21, 7.8125e-24, 4.3571e-23, 7.0330e-23, 1.4108e-21,\n 1.4013e-21, 1.5155e-21, 3.9599e-22, 2.1223e-21, 3.1232e-22, 1.9223e-21,\n 4.2003e-21, 6.4805e-22, 3.8749e-21, 9.1248e-24, 5.0190e-22, 5.7800e-23,\n 8.3738e-23, 3.3113e-22, 1.4188e-21, 2.7111e-22, 1.3593e-22, 6.5467e-22,\n 1.1391e-22, 7.6911e-22, 1.2898e-21, 1.1715e-22, 4.5583e-22, 2.8876e-22,\n 7.4438e-22, 2.1109e-21, 2.8787e-22, 3.2925e-21, 4.8448e-24, 2.3418e-23,\n 2.3145e-21, 9.0392e-24, 3.6793e-23, 4.4139e-22, 8.6068e-22, 5.6221e-23,\n 7.7096e-24, 4.8085e-23, 1.6058e-21, 1.3125e-23, 1.1193e-21, 6.4476e-23,\n 1.5364e-22, 1.0751e-21, 1.6181e-21, 6.2703e-24, 1.9577e-23, 4.9749e-23,\n 3.3307e-21, 2.1579e-22, 8.8347e-23, 6.4065e-23, 1.1502e-22, 3.2695e-23,\n 6.1834e-22, 2.2314e-22, 1.4332e-23, 2.5848e-23, 2.8734e-22, 1.0847e-23,\n 5.4010e-23, 6.5041e-22, 4.4603e-22, 5.7606e-23, 2.8582e-22, 9.6444e-23,\n 1.2669e-21, 7.3296e-24, 3.9769e-23, 2.2819e-24, 1.4255e-21, 2.5462e-23,\n 1.9361e-22, 1.2213e-21, 6.2906e-22, 1.4369e-22, 1.2499e-22, 5.6061e-23,\n 3.5827e-22, 7.7031e-22, 2.2856e-22, 3.0896e-23, 2.7476e-22, 4.4720e-22,\n 1.6480e-21, 7.7550e-23, 1.9472e-22, 6.4487e-24, 6.0160e-23, 3.8082e-21,\n 4.6125e-21, 1.7296e-22, 3.2284e-23, 7.3675e-23, 1.2797e-23, 7.4465e-23,\n 2.7477e-22, 8.3785e-23, 2.9185e-22, 2.0322e-22, 3.9325e-22, 1.5141e-21,\n 3.5558e-23, 3.3229e-22, 1.3480e-21, 2.4277e-21, 1.3335e-21, 3.9766e-22,\n 2.0975e-22, 3.3541e-22, 2.5387e-23, 6.5091e-22, 6.6376e-23, 5.2619e-22,\n 5.7482e-24, 7.3608e-24, 2.1965e-21, 4.7871e-22, 2.7703e-22, 4.9919e-22,\n 1.8858e-22, 3.3032e-23, 6.2600e-23, 6.1603e-23, 6.5570e-23, 7.8621e-23,\n 2.1175e-22, 1.2190e-23, 2.7531e-22, 3.4082e-24, 1.0146e-21, 7.8880e-23,\n 1.0589e-23, 1.3667e-22, 4.2051e-21, 7.7695e-22, 1.8193e-21, 1.5927e-21,\n 7.9852e-22, 6.7090e-24, 3.7220e-23, 1.4785e-21, 4.6449e-22, 1.1647e-22,\n 5.0702e-21, 9.0423e-23, 1.5321e-21, 3.9345e-21, 2.6952e-21, 4.4356e-22,\n 2.7152e-21, 2.3113e-21, 1.2464e-21, 4.5901e-22, 5.1625e-22, 2.1387e-23,\n 3.1208e-22, 1.4472e-22, 1.0791e-22, 6.8466e-23, 3.2302e-22, 6.6179e-23,\n 4.2255e-23, 9.0164e-24, 1.4949e-21, 4.5864e-23, 5.7785e-22, 7.5356e-24,\n 2.6059e-21, 3.3136e-22, 4.5567e-23, 2.5867e-21, 2.7555e-22, 7.7575e-23,\n 9.7957e-22, 7.9160e-22, 9.7170e-22, 9.4248e-22, 7.7787e-23, 1.8026e-23,\n 2.9149e-23, 5.2853e-22, 5.0433e-22, 5.1812e-24, 1.9994e-22, 1.7138e-22,\n 7.3447e-21, 2.2009e-21, 2.6695e-22, 8.6469e-23, 3.7019e-22, 4.9319e-22,\n 3.3782e-22, 1.3460e-22, 1.8517e-21, 1.3007e-21, 4.5101e-22, 2.4653e-23,\n 5.3833e-23, 5.8755e-22, 2.8200e-21, 3.8784e-23, 8.7621e-23, 1.0631e-21,\n 1.1861e-21, 1.1719e-21, 8.9979e-22, 2.8750e-22, 1.2124e-22, 7.8104e-22,\n 7.2719e-22, 1.0299e-23, 2.3229e-21, 5.1733e-23, 5.3341e-22, 1.7324e-21,\n 6.1853e-22, 4.1860e-22, 1.6947e-21, 1.5473e-22, 2.2081e-22, 5.6181e-23,\n 1.6178e-21, 1.7371e-21, 4.4814e-22, 1.7211e-22, 2.1783e-22, 1.9735e-21,\n 6.8143e-22, 2.2147e-23, 4.6277e-21, 4.2727e-22], device='cuda:0')" + }, + "53": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.3815e-24, 2.3090e-25, 6.3545e-24, 3.8989e-26, 4.1382e-25, 3.4368e-24,\n 4.7764e-26, 7.4938e-26, 4.2713e-26, 1.7305e-26, 1.6985e-25, 6.7840e-26,\n 3.4674e-24, 9.1052e-27, 1.4838e-25, 8.7019e-25, 3.8789e-26, 2.9502e-24,\n 8.3081e-24, 1.5536e-23, 6.3040e-26, 1.1401e-25, 4.4320e-26, 4.3279e-24,\n 2.4928e-24, 2.7673e-24, 1.0274e-24, 9.1425e-24, 8.7348e-26, 1.4635e-24,\n 5.2961e-23, 1.9738e-24, 1.6451e-23, 4.7434e-26, 1.9823e-24, 1.2478e-26,\n 1.1178e-25, 7.3847e-26, 2.7210e-24, 8.6672e-26, 1.4269e-25, 1.7275e-24,\n 1.6365e-25, 2.7136e-25, 5.5042e-25, 3.1909e-26, 2.5413e-24, 1.7703e-25,\n 2.5868e-24, 3.7569e-24, 9.1578e-26, 8.1838e-24, 3.4567e-26, 1.7877e-26,\n 4.6293e-24, 2.8538e-26, 2.1042e-26, 1.1333e-25, 5.2989e-25, 3.4033e-26,\n 1.8873e-27, 9.0098e-26, 6.7715e-24, 4.3252e-27, 1.1520e-24, 6.0569e-26,\n 5.7952e-25, 9.9534e-24, 1.9144e-24, 4.5329e-26, 1.8499e-26, 1.2269e-26,\n 1.4238e-23, 1.0950e-24, 6.4041e-25, 1.8845e-24, 7.7197e-26, 2.1650e-25,\n 7.8175e-25, 3.8244e-24, 1.3825e-25, 1.1074e-25, 1.4698e-25, 5.6913e-26,\n 3.2723e-26, 3.0859e-24, 1.1892e-24, 5.3219e-26, 1.3798e-25, 2.2665e-24,\n 2.7663e-24, 3.0745e-26, 4.8832e-26, 1.8320e-26, 1.9605e-24, 5.3508e-26,\n 4.8210e-25, 3.2123e-24, 5.6607e-25, 9.2370e-27, 1.5018e-25, 6.9417e-25,\n 1.8850e-24, 2.7434e-25, 3.0495e-25, 4.9201e-26, 1.0794e-24, 1.1785e-24,\n 2.5587e-24, 4.2203e-26, 2.0598e-25, 1.1995e-26, 7.1156e-26, 1.3036e-23,\n 7.1006e-23, 1.5264e-25, 5.9558e-26, 3.0795e-26, 4.0541e-26, 7.4468e-26,\n 7.9727e-25, 1.4687e-26, 2.2385e-25, 1.4931e-25, 1.1787e-25, 4.8361e-24,\n 4.8122e-26, 2.0165e-25, 2.0404e-24, 3.7052e-24, 5.0225e-25, 5.5472e-25,\n 4.8448e-25, 2.3592e-24, 1.2319e-26, 1.7140e-24, 3.7097e-25, 3.0657e-24,\n 5.5477e-26, 5.2229e-26, 5.1303e-24, 1.5224e-24, 1.5638e-25, 9.1037e-24,\n 6.6950e-26, 1.0951e-25, 8.1429e-26, 2.9696e-26, 1.2261e-25, 2.6705e-26,\n 2.3785e-24, 2.4101e-26, 1.5278e-25, 6.2893e-26, 2.8725e-24, 4.4097e-26,\n 4.2080e-26, 3.5074e-26, 2.7474e-23, 2.7079e-25, 6.9482e-24, 6.0683e-24,\n 5.7074e-25, 9.5686e-27, 5.1562e-26, 1.7467e-24, 1.5892e-25, 1.6351e-25,\n 7.2944e-24, 7.4621e-26, 4.1751e-24, 1.4237e-23, 7.2946e-24, 3.1040e-24,\n 4.5186e-24, 4.1525e-24, 1.9537e-24, 2.4726e-25, 1.0586e-24, 2.7559e-26,\n 1.2842e-24, 5.5560e-26, 2.4051e-25, 2.0843e-25, 2.6675e-24, 3.0205e-25,\n 4.6530e-26, 4.8975e-27, 4.7606e-24, 2.0375e-25, 7.7063e-25, 4.9647e-26,\n 7.5562e-24, 2.8480e-25, 6.1967e-26, 1.5802e-23, 9.4405e-26, 5.8454e-26,\n 3.0727e-24, 1.3349e-24, 3.4363e-24, 6.2712e-25, 9.5371e-26, 1.8992e-26,\n 2.3011e-26, 1.6266e-24, 2.6784e-24, 2.0648e-26, 3.8262e-26, 6.0265e-26,\n 2.3861e-23, 3.8905e-24, 7.0368e-25, 6.9127e-26, 7.2392e-25, 4.3757e-25,\n 8.9436e-25, 5.7924e-25, 1.4910e-24, 2.8403e-24, 3.5801e-26, 1.7310e-26,\n 3.2614e-26, 3.3865e-25, 9.0157e-24, 5.6744e-26, 1.4715e-24, 7.4119e-25,\n 7.7258e-24, 1.8195e-24, 2.6362e-24, 7.2020e-25, 7.4585e-26, 1.5406e-24,\n 2.1343e-24, 2.2178e-26, 7.9488e-24, 2.8171e-26, 1.7009e-24, 2.5751e-24,\n 4.8643e-25, 1.9779e-24, 3.6901e-24, 2.1708e-25, 5.3373e-26, 7.2429e-26,\n 1.0927e-23, 3.3900e-24, 5.0649e-25, 1.8807e-25, 4.7605e-26, 7.3061e-24,\n 6.3138e-24, 1.6447e-26, 4.4153e-23, 1.8744e-25], device='cuda:0')" + }, + "54": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.1798e-24, 7.7292e-25, 5.4859e-24, 1.2362e-25, 1.5335e-24, 4.7237e-24,\n 1.4854e-25, 7.3443e-25, 4.1185e-25, 7.9156e-26, 4.1167e-25, 2.9439e-25,\n 5.7370e-24, 1.1238e-25, 5.2351e-25, 1.9301e-24, 1.1188e-25, 2.3138e-24,\n 6.2630e-24, 9.9814e-24, 7.9612e-26, 7.9736e-25, 2.6276e-25, 5.5896e-24,\n 1.6130e-24, 5.1438e-24, 6.2269e-25, 3.7267e-24, 8.1711e-25, 3.9806e-24,\n 1.6546e-23, 7.8984e-25, 6.4554e-24, 1.8050e-26, 2.4810e-24, 1.8558e-25,\n 1.6993e-24, 1.5277e-24, 4.3481e-24, 2.4628e-25, 6.1267e-25, 6.8390e-25,\n 3.1283e-25, 3.1680e-24, 5.0192e-24, 2.5924e-25, 2.3184e-24, 8.0901e-25,\n 1.3536e-24, 4.3488e-24, 1.5938e-25, 5.9562e-24, 3.3537e-26, 1.7131e-26,\n 5.2419e-24, 1.6898e-25, 2.4321e-25, 1.6753e-24, 1.5072e-24, 2.0828e-25,\n 7.6296e-26, 1.5845e-25, 5.7008e-24, 8.8292e-26, 4.4068e-24, 5.7430e-25,\n 8.1402e-25, 4.8674e-24, 3.1379e-24, 1.4827e-26, 1.3715e-25, 8.9867e-26,\n 4.3265e-24, 4.7235e-25, 1.3143e-24, 1.5492e-24, 2.6036e-25, 8.3046e-25,\n 1.9980e-24, 1.9070e-24, 7.1491e-26, 1.9859e-25, 1.4290e-24, 2.1815e-25,\n 5.6633e-26, 2.7392e-24, 9.0252e-25, 1.8710e-25, 1.3413e-24, 1.3873e-24,\n 2.5443e-24, 2.1928e-26, 1.9445e-25, 6.8110e-27, 2.3583e-24, 3.5419e-25,\n 9.4324e-25, 5.0373e-24, 6.3148e-25, 1.6670e-25, 7.5522e-25, 1.1977e-24,\n 1.1681e-24, 2.4962e-24, 5.2617e-25, 1.6179e-26, 1.4600e-24, 1.4756e-24,\n 2.5455e-24, 1.1587e-25, 5.9309e-25, 1.7558e-27, 4.2119e-25, 1.4074e-23,\n 1.5761e-23, 4.6108e-25, 2.9331e-25, 4.2469e-25, 1.5490e-25, 4.3450e-25,\n 1.1244e-24, 1.5446e-25, 8.2323e-25, 5.7213e-25, 1.3144e-24, 3.5704e-24,\n 1.7244e-25, 8.3332e-25, 2.1814e-24, 3.8429e-24, 2.4337e-24, 6.8502e-25,\n 6.9386e-25, 1.9017e-24, 7.3883e-26, 1.4485e-24, 1.0940e-24, 1.9368e-24,\n 7.1317e-26, 8.1644e-26, 7.2240e-24, 1.7277e-24, 1.1208e-24, 8.4236e-24,\n 5.8002e-25, 3.8868e-26, 2.1511e-25, 2.3018e-25, 1.1217e-25, 2.6660e-25,\n 1.2475e-24, 1.7684e-25, 1.2190e-24, 1.7505e-26, 3.6680e-24, 1.4505e-25,\n 1.9926e-25, 6.0826e-25, 1.3589e-23, 1.7185e-24, 5.9373e-24, 4.7147e-24,\n 2.4998e-24, 2.1124e-25, 1.2600e-25, 5.0862e-24, 1.3975e-24, 1.8962e-24,\n 1.0179e-23, 1.7702e-25, 4.6607e-24, 8.8515e-24, 9.3901e-24, 3.2814e-24,\n 8.7474e-24, 2.7476e-24, 1.5082e-24, 6.2925e-25, 2.1497e-24, 6.4200e-26,\n 6.7709e-25, 5.5076e-25, 5.1322e-25, 2.4919e-25, 1.9342e-24, 1.9742e-25,\n 4.3821e-25, 1.5953e-25, 1.6363e-24, 4.5109e-25, 1.3113e-24, 7.9725e-26,\n 2.9258e-24, 3.7767e-25, 7.2075e-25, 1.0012e-23, 3.6930e-25, 1.8279e-25,\n 2.7492e-24, 2.8114e-24, 3.3804e-24, 1.8918e-24, 4.1881e-25, 2.5390e-26,\n 2.9895e-25, 2.2012e-24, 3.1046e-24, 1.2591e-25, 2.6589e-25, 5.9650e-25,\n 1.9370e-23, 6.0224e-24, 9.9163e-25, 1.9591e-25, 1.4177e-24, 5.7980e-25,\n 1.6492e-24, 6.3384e-25, 6.4095e-24, 4.3137e-24, 8.5256e-25, 1.5509e-25,\n 1.0863e-24, 2.4280e-24, 7.6540e-24, 2.3064e-25, 1.5393e-24, 3.8195e-24,\n 3.5075e-24, 4.1040e-24, 1.1757e-24, 6.8358e-25, 6.6386e-25, 3.0019e-24,\n 1.2028e-24, 5.8084e-27, 9.0927e-24, 9.2662e-26, 2.0440e-24, 6.5606e-24,\n 2.4255e-24, 1.8236e-24, 1.9951e-24, 6.0678e-25, 3.7813e-25, 1.8001e-25,\n 3.5069e-24, 5.7284e-24, 1.2473e-24, 6.3253e-25, 5.3120e-25, 2.8511e-24,\n 3.8442e-24, 1.1988e-26, 1.7273e-23, 2.0378e-24], device='cuda:0')" + }, + "55": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.0150e-25, 1.8198e-25, 3.1691e-25, ..., 1.4011e-25, 1.1784e-26,\n 9.6271e-26],\n [4.2247e-26, 1.3450e-25, 1.4758e-26, ..., 3.7029e-26, 1.4112e-26,\n 9.5782e-27],\n [4.4431e-27, 8.8368e-27, 3.5184e-26, ..., 8.0293e-27, 3.7368e-27,\n 5.8323e-27],\n ...,\n [2.4767e-25, 1.6130e-25, 2.1594e-25, ..., 3.8348e-27, 1.7332e-26,\n 2.1766e-26],\n [5.6161e-25, 9.9938e-26, 3.8680e-25, ..., 1.7848e-25, 3.4185e-26,\n 2.1585e-25],\n [6.9930e-25, 4.5637e-25, 4.1983e-25, ..., 9.3001e-25, 1.3953e-26,\n 2.8727e-25]], device='cuda:0')" + }, + "56": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.5345e-22, 6.5790e-23, 8.0806e-25, 9.2647e-23, 7.0828e-22, 7.8015e-22,\n 2.3234e-23, 9.9380e-23, 6.5275e-22, 3.6469e-22, 2.8962e-23, 7.7692e-22,\n 2.1088e-22, 3.4757e-22, 3.1253e-22, 1.3513e-22, 1.0582e-23, 1.5705e-22,\n 3.7931e-22, 9.8458e-22, 1.1922e-23, 1.0293e-22, 8.8058e-23, 9.3559e-22,\n 1.1414e-23, 9.5469e-22, 5.9202e-22, 3.1758e-22, 6.0598e-22, 4.0689e-23,\n 4.8512e-22, 5.5050e-22, 6.0097e-22, 4.0551e-23, 8.3211e-22, 1.1521e-22,\n 1.0029e-23, 2.7994e-23, 6.5862e-23, 5.5689e-22, 3.3476e-22, 6.9147e-23,\n 4.2149e-24, 4.4552e-22, 2.8996e-24, 4.5049e-24, 5.1845e-23, 8.5254e-22,\n 3.5391e-22, 4.9181e-22, 5.0218e-22, 5.4885e-22, 3.4883e-23, 6.0916e-22,\n 2.8048e-22, 1.9705e-22, 5.4040e-23, 9.0917e-24, 3.2709e-21, 2.5475e-22,\n 1.0074e-22, 4.5646e-22, 5.9041e-22, 7.7669e-23, 2.3983e-23, 8.1676e-23,\n 4.2612e-22, 5.1330e-22, 7.7908e-22, 3.6174e-23, 1.2104e-23, 5.1279e-23,\n 3.2050e-21, 3.1152e-22, 9.6217e-23, 2.0223e-22, 4.4185e-24, 4.7205e-22,\n 1.6016e-22, 2.0166e-22, 1.6868e-22, 2.4040e-22, 1.0843e-23, 1.2338e-22,\n 1.9553e-22, 2.9801e-22, 1.9732e-22, 3.3625e-23, 2.4827e-22, 3.1220e-22,\n 1.6408e-23, 1.7831e-23, 2.2019e-22, 1.1008e-21, 4.1244e-23, 8.5821e-23,\n 9.0134e-22, 8.4407e-24, 2.5885e-23, 3.8467e-22, 7.0304e-23, 5.4763e-23,\n 2.1430e-22, 1.4962e-22, 3.8841e-23, 1.0078e-21, 1.9434e-24, 1.1278e-21,\n 3.2385e-24, 2.9573e-22, 3.8829e-22, 1.4420e-22, 7.2308e-24, 3.8801e-22,\n 2.4645e-21, 7.6245e-24, 1.7997e-22, 6.1810e-22, 7.0395e-25, 2.9961e-23,\n 1.5159e-23, 5.5405e-23, 1.4903e-23, 1.4371e-21, 1.0419e-21, 1.8973e-23,\n 1.5254e-22, 4.6489e-23, 5.9942e-22, 1.6564e-22, 5.7148e-22, 7.5851e-23,\n 1.4078e-22, 1.2458e-21, 2.3063e-21, 5.6226e-24, 1.0653e-22, 4.4065e-22,\n 5.4585e-22, 1.4133e-24, 1.3931e-21, 8.2062e-23, 1.1274e-21, 3.2537e-22,\n 1.0396e-23, 7.3478e-24, 2.4434e-22, 3.6638e-22, 7.3199e-23, 5.3013e-23,\n 4.5442e-23, 3.4125e-22, 1.3301e-22, 1.0688e-21, 1.7486e-22, 5.1517e-22,\n 6.8417e-23, 2.5730e-22, 2.0147e-21, 1.4856e-23, 1.7858e-21, 3.2846e-23,\n 1.7949e-22, 6.3836e-23, 1.4072e-22, 3.0391e-21, 1.7724e-22, 6.0160e-22,\n 4.0535e-22, 6.8438e-24, 8.4201e-22, 4.4273e-22, 2.4568e-22, 3.6618e-23,\n 1.7767e-22, 5.1796e-22, 3.5496e-23, 5.9843e-22, 1.0085e-21, 2.0875e-22,\n 7.4941e-22, 1.7432e-23, 8.2476e-23, 9.0618e-24, 9.1614e-22, 2.3293e-23,\n 5.2358e-23, 1.3475e-22, 4.6286e-23, 6.6122e-23, 2.3451e-23, 3.8867e-23,\n 7.6708e-22, 2.6412e-23, 8.2281e-24, 2.3005e-23, 3.4673e-24, 1.9724e-22,\n 6.2605e-23, 4.2811e-24, 4.1300e-22, 4.6577e-23, 1.0737e-22, 2.0602e-22,\n 4.0372e-22, 1.0636e-22, 3.6803e-22, 1.7379e-23, 1.5954e-23, 1.6867e-23,\n 4.0771e-22, 3.4700e-21, 2.8992e-23, 2.0319e-22, 1.8053e-24, 7.6224e-22,\n 1.8989e-22, 8.1084e-24, 7.0429e-22, 6.9090e-22, 9.8533e-24, 1.9284e-22,\n 1.1113e-22, 8.8059e-22, 1.0490e-21, 9.6404e-22, 1.8775e-22, 1.2728e-23,\n 9.0376e-22, 5.3962e-22, 9.9537e-24, 1.7102e-22, 6.9645e-22, 1.8761e-22,\n 8.9628e-23, 1.9307e-22, 6.4797e-23, 2.9667e-23, 8.2896e-24, 4.6440e-24,\n 1.5321e-24, 2.7595e-22, 9.9503e-23, 5.0743e-22, 3.3939e-22, 6.7970e-24,\n 3.1497e-22, 3.6522e-23, 1.6561e-23, 1.9152e-24, 1.5545e-22, 2.6654e-22,\n 1.5972e-22, 4.6494e-22, 5.6805e-22, 2.2071e-21], device='cuda:0')" + }, + "57": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.9556e-24, 4.1489e-26, 3.1810e-26, 6.1218e-26, 8.4971e-25, 8.0633e-25,\n 3.7158e-26, 1.3832e-25, 8.3597e-25, 2.6577e-25, 1.2677e-26, 6.5541e-25,\n 1.7078e-24, 2.8241e-25, 2.3834e-25, 2.6875e-25, 1.2798e-26, 3.4765e-25,\n 2.7990e-25, 2.6330e-24, 4.2764e-26, 2.1475e-25, 1.5597e-26, 8.0476e-24,\n 1.5836e-26, 2.3300e-24, 1.8139e-25, 1.2434e-25, 2.1753e-25, 1.2339e-26,\n 5.3436e-24, 2.8341e-25, 4.4826e-25, 8.9388e-27, 5.2945e-24, 4.5860e-25,\n 1.8346e-26, 6.5395e-27, 2.5013e-26, 6.5856e-25, 6.1861e-25, 2.5689e-25,\n 9.2577e-26, 1.2431e-24, 2.7187e-26, 6.4190e-26, 7.8862e-26, 2.5475e-24,\n 4.0613e-25, 3.1383e-24, 2.7535e-25, 4.6347e-25, 2.2907e-26, 1.1262e-24,\n 3.1367e-25, 1.6112e-25, 1.3876e-26, 4.4122e-26, 2.6713e-24, 4.6812e-25,\n 3.4718e-26, 5.0755e-25, 2.4454e-24, 2.0034e-26, 6.3194e-26, 2.1662e-24,\n 2.7472e-24, 5.3630e-25, 2.0328e-25, 1.3584e-25, 2.0547e-26, 5.8844e-27,\n 1.1080e-23, 8.7467e-26, 7.1031e-26, 3.2629e-25, 1.7004e-26, 9.4585e-25,\n 1.9241e-25, 1.1819e-24, 9.5862e-25, 6.4024e-25, 4.4056e-27, 1.0885e-26,\n 3.6142e-26, 3.9417e-25, 7.1786e-26, 2.8646e-27, 3.6082e-25, 1.2204e-24,\n 3.6610e-26, 2.8480e-26, 5.4706e-25, 1.4530e-24, 5.6935e-26, 1.8474e-25,\n 4.9819e-24, 7.1916e-28, 6.4264e-27, 1.2030e-25, 3.3751e-26, 1.2489e-26,\n 8.6404e-26, 2.9254e-25, 1.8756e-26, 1.0285e-24, 2.7792e-27, 3.7746e-24,\n 4.7536e-27, 5.1388e-25, 8.3479e-25, 2.4853e-25, 1.0482e-26, 4.8350e-24,\n 8.3531e-24, 1.5372e-26, 1.1491e-24, 5.4803e-25, 1.4010e-26, 1.4767e-26,\n 1.9392e-26, 1.9161e-26, 3.5069e-26, 1.0910e-24, 2.0421e-24, 2.8665e-26,\n 8.9141e-26, 5.6498e-26, 1.9453e-24, 4.4378e-25, 4.4219e-25, 1.7432e-26,\n 2.1979e-25, 6.7774e-24, 4.0554e-24, 1.5538e-26, 3.1535e-26, 1.4826e-24,\n 1.6376e-24, 1.2080e-26, 7.7468e-24, 1.8262e-25, 1.7322e-24, 3.7348e-25,\n 7.1195e-27, 1.3823e-26, 8.6005e-26, 3.5581e-25, 5.4782e-26, 2.4248e-26,\n 1.0131e-26, 6.3096e-25, 1.0920e-24, 1.4209e-23, 2.8132e-25, 1.7711e-24,\n 1.4949e-26, 6.7269e-25, 2.7638e-24, 5.8122e-26, 5.8231e-24, 2.6790e-26,\n 1.3805e-25, 3.8062e-25, 6.8255e-25, 9.3616e-24, 4.6789e-26, 4.5825e-25,\n 8.3187e-26, 1.7846e-26, 1.2725e-24, 1.3251e-24, 3.5258e-24, 2.8399e-26,\n 1.8570e-25, 2.7450e-25, 9.8975e-26, 3.0719e-25, 6.6223e-24, 1.0331e-25,\n 3.7400e-25, 5.0120e-26, 1.8157e-25, 7.5084e-27, 7.3301e-24, 1.7256e-26,\n 8.4891e-26, 2.0899e-25, 1.2968e-26, 1.4527e-25, 3.0518e-26, 3.6154e-26,\n 6.6625e-25, 2.2720e-26, 1.3172e-26, 2.2349e-26, 4.6824e-27, 8.7124e-26,\n 4.0879e-26, 2.0096e-26, 8.0602e-25, 9.5586e-27, 8.4128e-26, 5.1480e-26,\n 4.5871e-25, 4.0639e-25, 5.5112e-25, 1.4273e-26, 6.6784e-26, 2.7077e-26,\n 2.9151e-25, 1.1421e-23, 2.3819e-26, 1.0315e-25, 1.6030e-26, 7.7643e-25,\n 1.8772e-25, 6.3348e-27, 5.8987e-24, 1.9756e-24, 3.3355e-26, 3.8509e-25,\n 2.1129e-25, 6.9921e-24, 1.4675e-24, 9.1550e-25, 5.4435e-25, 1.8551e-26,\n 3.6938e-24, 2.2960e-24, 8.2145e-26, 3.2828e-25, 4.4423e-24, 4.6266e-25,\n 1.2591e-25, 6.3246e-26, 1.1186e-25, 5.3787e-26, 5.6273e-27, 1.0642e-26,\n 5.9060e-26, 1.0431e-24, 1.0044e-26, 8.4153e-25, 6.3945e-25, 1.9570e-26,\n 1.4858e-25, 1.9288e-26, 1.5079e-26, 1.7701e-26, 2.6060e-25, 2.3876e-25,\n 3.0389e-25, 1.0782e-25, 3.6214e-24, 9.7819e-24], device='cuda:0')" + }, + "58": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.7892e-24, 2.0562e-25, 6.3181e-27, 3.2299e-25, 1.8315e-24, 1.3053e-24,\n 1.8299e-25, 1.7282e-25, 1.8808e-24, 1.0411e-24, 3.9458e-26, 1.0960e-24,\n 8.5704e-25, 3.8451e-25, 5.1602e-25, 6.0945e-25, 5.7404e-26, 4.7225e-25,\n 1.3319e-24, 3.9931e-24, 1.4847e-26, 5.6182e-25, 1.0667e-25, 2.6019e-24,\n 1.9564e-26, 3.4867e-24, 6.4463e-25, 3.2200e-25, 7.6676e-25, 1.5134e-25,\n 2.2550e-24, 9.6160e-26, 8.7603e-25, 6.3866e-26, 3.4016e-24, 3.4899e-25,\n 6.6760e-26, 1.3938e-25, 2.5846e-25, 4.0234e-25, 1.0024e-24, 5.0327e-25,\n 8.9547e-26, 2.0941e-24, 1.0404e-26, 1.8416e-25, 3.5163e-25, 2.3008e-24,\n 5.9853e-25, 1.6619e-24, 5.9250e-25, 3.9738e-25, 1.8664e-25, 1.8098e-24,\n 2.3782e-25, 3.0997e-25, 2.3496e-25, 1.5705e-26, 4.1992e-24, 5.2905e-25,\n 1.0354e-25, 5.7621e-25, 2.6043e-24, 1.2692e-25, 1.4082e-25, 7.0689e-25,\n 1.5222e-24, 7.4201e-25, 1.2139e-24, 2.6868e-25, 2.8700e-26, 6.5659e-26,\n 2.5593e-24, 4.2322e-25, 1.4039e-25, 6.1555e-25, 8.7366e-26, 1.0812e-24,\n 2.2227e-25, 1.4399e-24, 1.1071e-24, 8.2912e-25, 2.0099e-26, 9.2455e-26,\n 1.7866e-25, 7.4737e-25, 1.5643e-25, 9.8293e-26, 1.3942e-24, 1.2213e-24,\n 1.5192e-25, 7.2536e-26, 6.7680e-25, 1.0904e-24, 8.0921e-26, 5.6572e-25,\n 3.4183e-24, 2.5570e-26, 2.9620e-27, 2.0452e-25, 1.2159e-25, 8.8914e-26,\n 1.2360e-25, 5.1369e-25, 1.1759e-25, 7.2456e-25, 2.1066e-26, 3.1407e-24,\n 6.1765e-27, 3.6865e-25, 9.6008e-25, 8.1108e-25, 6.5784e-26, 1.8086e-24,\n 3.1690e-24, 7.9020e-26, 1.2920e-24, 1.3386e-24, 1.0171e-26, 9.2117e-26,\n 9.2033e-27, 1.1940e-25, 4.9929e-26, 3.8469e-24, 2.7493e-24, 2.6753e-26,\n 3.3729e-25, 1.4712e-25, 8.9395e-25, 3.9407e-25, 4.4537e-25, 5.6285e-27,\n 5.4790e-25, 4.1053e-24, 4.7400e-24, 3.9989e-27, 4.3403e-25, 1.4903e-24,\n 2.6723e-24, 9.6133e-27, 5.5957e-24, 3.9816e-25, 1.5187e-24, 7.7049e-25,\n 1.4430e-26, 3.0505e-26, 3.4387e-25, 8.8076e-25, 1.1385e-25, 1.1347e-25,\n 7.9561e-26, 1.3851e-24, 7.2635e-25, 4.6270e-24, 5.7181e-25, 1.1646e-24,\n 7.5877e-26, 1.3039e-24, 5.5975e-24, 5.2589e-26, 6.0661e-24, 1.3879e-26,\n 6.6239e-25, 3.0577e-25, 3.3942e-25, 9.3828e-24, 5.2101e-25, 1.7848e-24,\n 4.3353e-25, 1.7351e-26, 2.2907e-24, 1.8198e-24, 1.5486e-24, 2.4406e-25,\n 8.2004e-25, 7.3013e-25, 1.9588e-25, 5.6893e-25, 3.7264e-24, 6.5989e-25,\n 7.3491e-25, 1.1612e-25, 4.7191e-25, 2.2085e-27, 4.5163e-24, 3.4241e-26,\n 3.9372e-25, 5.2049e-25, 4.4484e-26, 2.2076e-25, 4.0922e-26, 1.2611e-25,\n 7.4289e-25, 1.2661e-25, 2.7811e-26, 9.5765e-26, 1.4741e-26, 3.1886e-25,\n 1.0290e-26, 9.3616e-27, 4.2701e-25, 6.2490e-26, 2.2861e-25, 1.1771e-25,\n 8.1623e-25, 6.7973e-25, 6.4346e-25, 4.7584e-27, 9.8643e-26, 6.3406e-26,\n 3.4410e-25, 9.4063e-24, 1.0605e-25, 3.0507e-25, 2.8836e-26, 8.2593e-25,\n 4.8274e-25, 7.1745e-26, 3.3187e-24, 2.5886e-24, 1.8905e-26, 9.2220e-25,\n 4.2063e-25, 4.6591e-24, 2.3259e-24, 2.7862e-24, 2.9908e-25, 5.7379e-26,\n 1.9438e-24, 2.5376e-24, 1.1580e-25, 5.7241e-25, 2.6331e-24, 9.7804e-25,\n 8.9988e-26, 2.4718e-25, 5.1728e-25, 5.9322e-26, 2.5472e-26, 2.5097e-27,\n 6.1794e-27, 6.2706e-25, 1.1424e-25, 1.5335e-24, 5.0817e-25, 1.8863e-26,\n 4.2708e-25, 1.6556e-25, 7.7873e-26, 1.7577e-27, 4.4117e-25, 3.8136e-25,\n 6.4105e-25, 6.9918e-25, 2.1586e-24, 8.3596e-24], device='cuda:0')" + }, + "59": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.6694e-25, 2.7996e-25, 2.2453e-25, ..., 3.1537e-25, 2.8060e-26,\n 2.5908e-25],\n [1.0422e-26, 1.5312e-26, 3.8863e-26, ..., 1.4375e-26, 2.4024e-27,\n 1.2412e-26],\n [2.7237e-26, 7.4640e-27, 1.8816e-26, ..., 1.7271e-26, 7.0963e-27,\n 1.2342e-26],\n ...,\n [1.1904e-24, 2.5998e-25, 6.6275e-25, ..., 4.7778e-25, 7.0540e-26,\n 2.4290e-25],\n [5.6826e-26, 9.4864e-27, 1.3759e-25, ..., 3.9298e-26, 4.0741e-26,\n 4.7122e-26],\n [3.8438e-26, 8.6614e-26, 4.4914e-27, ..., 2.9854e-26, 2.5280e-26,\n 1.6618e-26]], device='cuda:0')" + }, + "60": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.1006e-21, 1.4101e-23, 2.0944e-23, 1.2176e-23, 7.4738e-23, 7.8569e-21,\n 8.5275e-23, 1.7779e-22, 5.2651e-22, 1.7692e-22, 3.6455e-22, 1.0965e-21,\n 1.6741e-22, 1.2976e-22, 6.8134e-22, 9.7937e-22, 1.5708e-22, 1.1461e-21,\n 7.9301e-21, 1.0236e-22, 2.1867e-21, 1.4113e-23, 1.4316e-22, 2.8869e-21,\n 2.0713e-21, 2.5951e-22, 2.7331e-21, 2.9401e-21, 4.3739e-22, 3.3150e-22,\n 3.7562e-22, 2.6955e-22, 4.9398e-22, 1.7621e-22, 4.1083e-23, 3.7666e-22,\n 4.3015e-22, 4.3997e-23, 1.6628e-21, 6.7843e-22, 2.6831e-23, 1.6114e-21,\n 2.4912e-22, 8.0566e-23, 3.8260e-23, 5.0153e-22, 9.8869e-22, 2.4240e-21,\n 1.1981e-21, 2.8341e-21, 6.6331e-22, 5.2661e-22, 1.0606e-20, 8.9947e-23,\n 2.7715e-21, 4.0244e-22, 1.5706e-21, 1.7868e-21, 5.2410e-21, 3.0104e-22,\n 1.7452e-20, 2.6310e-22, 6.8160e-22, 7.7801e-22, 9.8440e-24, 1.1220e-22,\n 6.5944e-22, 1.3214e-21, 1.1366e-20, 5.6584e-23, 3.2923e-22, 1.5082e-23,\n 1.0859e-20, 5.6278e-21, 1.7445e-22, 1.3904e-22, 1.3961e-21, 3.1796e-22,\n 8.5368e-22, 1.1470e-22, 6.9168e-22, 1.8519e-23, 7.5300e-23, 2.4148e-22,\n 6.3383e-22, 2.5031e-23, 9.6538e-23, 1.0009e-21, 1.2387e-23, 2.2201e-23,\n 2.0965e-22, 8.2495e-22, 2.4076e-22, 4.2532e-22, 7.1381e-23, 1.8562e-23,\n 1.5562e-21, 1.2685e-21, 7.6873e-22, 1.1400e-21, 1.5167e-23, 3.3122e-22,\n 4.0801e-22, 7.1011e-22, 7.1124e-23, 5.3695e-22, 8.6746e-23, 7.2519e-23,\n 6.7566e-23, 3.8853e-23, 2.5486e-21, 5.8258e-23, 9.2482e-23, 4.3840e-22,\n 1.6122e-21, 7.1913e-22, 8.0134e-22, 1.0130e-21, 1.0509e-21, 6.1584e-22,\n 6.6750e-23, 3.9055e-22, 5.5371e-23, 6.0793e-22, 1.6674e-22, 3.5655e-22,\n 2.6714e-23, 7.9071e-23, 1.9410e-21, 8.7212e-22, 3.9250e-22, 9.7378e-22,\n 3.0922e-22, 2.8280e-21, 5.0605e-22, 3.3463e-24, 4.6304e-23, 4.0179e-21,\n 2.4754e-21, 9.8802e-23, 1.0952e-21, 6.5656e-22, 2.3730e-23, 1.4291e-22,\n 4.3379e-23, 6.5768e-22, 1.3683e-22, 6.9925e-22, 1.4812e-21, 1.1317e-22,\n 3.6290e-22, 8.6933e-23, 2.9162e-22, 2.2027e-23, 1.0767e-22, 1.2186e-21,\n 4.9604e-21, 1.0645e-22, 6.6570e-21, 2.0301e-22, 4.9135e-22, 2.4613e-23,\n 4.6259e-22, 1.0885e-23, 2.7277e-22, 4.1565e-22, 2.0880e-21, 4.3558e-22,\n 6.6746e-23, 4.2225e-22, 3.7079e-22, 9.6074e-22, 9.6064e-24, 2.2229e-23,\n 3.6451e-21, 4.0010e-22, 3.3749e-23, 8.4810e-24, 8.3794e-22, 1.3162e-23,\n 1.7941e-22, 2.1765e-23, 9.9508e-22, 2.7458e-22, 2.2790e-21, 1.9751e-21,\n 1.1761e-22, 9.7566e-23, 1.4653e-23, 2.5921e-21, 5.4226e-23, 9.7885e-22,\n 1.4117e-21, 1.2398e-23, 3.4604e-22, 1.2222e-22, 4.6590e-23, 1.2678e-21,\n 2.0028e-21, 1.4580e-21, 1.1611e-21, 7.2259e-21, 6.9951e-23, 2.3834e-21,\n 1.1592e-22, 9.4889e-22, 1.7305e-21, 9.6059e-23, 1.2215e-22, 4.0047e-22,\n 3.5154e-21, 7.1615e-23, 5.9370e-23, 8.7962e-22, 4.5405e-22, 4.4349e-22,\n 3.9244e-23, 4.0739e-23, 2.2248e-21, 1.7152e-22, 9.1885e-22, 2.5386e-22,\n 2.8030e-23, 3.5608e-21, 7.3612e-23, 7.6292e-22, 3.8646e-22, 1.3498e-22,\n 1.2345e-22, 1.5021e-22, 8.8585e-22, 4.7395e-22, 2.3043e-22, 1.2229e-22,\n 2.7690e-22, 4.7924e-22, 8.9394e-22, 3.3754e-23, 4.2477e-23, 2.2884e-23,\n 6.0370e-22, 4.4758e-22, 2.0889e-23, 3.5288e-22, 3.5053e-22, 3.5694e-22,\n 5.7331e-22, 1.4090e-22, 9.4092e-22, 1.0569e-21, 1.3334e-23, 2.1745e-22,\n 1.9663e-23, 2.1255e-21, 1.9541e-22, 3.0628e-23], device='cuda:0')" + }, + "61": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.6806e-24, 3.7598e-26, 1.4433e-25, 2.7281e-26, 3.6665e-26, 3.0839e-23,\n 1.9181e-25, 1.6429e-25, 2.7668e-25, 1.1260e-25, 4.2815e-25, 7.2701e-24,\n 4.9650e-26, 8.1190e-26, 6.4305e-25, 2.7678e-24, 1.4204e-25, 3.0856e-24,\n 7.7462e-23, 1.5371e-25, 1.6212e-23, 1.8672e-26, 3.1924e-25, 4.5654e-23,\n 2.2055e-24, 1.6670e-25, 3.9894e-23, 3.9468e-24, 2.9744e-25, 9.4441e-25,\n 4.8408e-25, 8.1880e-24, 8.4914e-26, 5.2999e-25, 2.5608e-26, 4.6844e-24,\n 5.9215e-25, 6.8234e-26, 2.3263e-23, 1.4998e-23, 4.5432e-26, 8.7427e-24,\n 2.8544e-25, 1.1327e-25, 6.3028e-26, 1.4468e-24, 4.4881e-25, 1.9709e-24,\n 1.7152e-24, 5.7862e-24, 6.6013e-27, 6.1497e-24, 6.0880e-23, 2.4742e-26,\n 1.3646e-23, 2.8846e-24, 4.0196e-24, 1.6121e-24, 1.3846e-23, 1.0756e-25,\n 8.2667e-23, 3.8779e-25, 3.8956e-25, 7.6912e-24, 7.1318e-26, 1.1110e-25,\n 9.9638e-25, 2.0417e-23, 2.7171e-23, 4.3952e-26, 2.7157e-25, 1.7937e-25,\n 3.4399e-23, 2.3186e-23, 2.2994e-25, 1.0485e-25, 1.7215e-24, 2.1929e-24,\n 7.7356e-25, 2.6009e-26, 2.2752e-24, 5.3806e-26, 1.1316e-25, 2.5264e-25,\n 7.6742e-24, 2.1642e-25, 4.9150e-25, 2.3957e-24, 6.0174e-26, 1.4732e-26,\n 3.4432e-24, 1.4731e-23, 1.1080e-24, 9.1189e-25, 5.0572e-26, 4.1062e-26,\n 3.7545e-24, 3.2019e-24, 1.3789e-24, 1.7296e-25, 1.1240e-26, 1.9161e-25,\n 3.6585e-24, 1.3899e-23, 2.8900e-25, 2.3104e-24, 1.9005e-24, 1.6456e-25,\n 2.1732e-26, 1.3413e-26, 2.3915e-24, 6.9728e-26, 7.2482e-25, 1.3386e-23,\n 1.0014e-23, 1.1265e-23, 4.4071e-24, 2.2517e-24, 1.0459e-24, 1.5513e-25,\n 1.7436e-26, 1.1356e-25, 5.5000e-26, 1.0957e-25, 1.7745e-24, 2.9957e-25,\n 1.6970e-26, 7.3243e-26, 6.9982e-24, 3.6705e-25, 1.5472e-24, 5.4016e-25,\n 1.7532e-25, 7.0244e-23, 3.9769e-24, 4.2141e-26, 1.1614e-25, 6.3501e-23,\n 7.7030e-24, 7.9993e-26, 7.4918e-25, 3.0058e-25, 3.2740e-26, 1.0708e-25,\n 2.3124e-26, 7.0317e-26, 1.2852e-25, 2.9906e-24, 2.5533e-23, 8.7236e-26,\n 3.4629e-25, 2.3726e-25, 2.8731e-24, 4.9442e-26, 4.0637e-25, 1.2822e-23,\n 1.1159e-23, 1.7404e-25, 1.0918e-23, 6.7302e-26, 1.0671e-25, 5.3460e-26,\n 3.0864e-25, 6.3191e-27, 2.1808e-26, 3.3272e-25, 2.5868e-24, 1.7677e-25,\n 2.4751e-26, 4.2023e-25, 3.1731e-25, 2.7733e-24, 2.2515e-26, 4.6817e-26,\n 5.1043e-24, 1.6574e-25, 3.1978e-26, 4.4229e-26, 3.0148e-24, 1.8318e-26,\n 5.5168e-25, 3.5146e-26, 3.5139e-25, 1.2885e-24, 2.3826e-24, 5.0532e-24,\n 2.8898e-24, 5.5134e-26, 7.4620e-27, 3.7660e-24, 1.1752e-24, 3.4560e-25,\n 1.3138e-23, 1.3423e-25, 1.1243e-24, 1.8784e-24, 1.1976e-25, 2.3803e-24,\n 8.9919e-24, 1.6243e-24, 1.9728e-23, 2.3951e-23, 8.1490e-26, 2.4896e-24,\n 2.7980e-26, 6.5977e-24, 9.2477e-25, 1.0001e-25, 7.8073e-27, 1.3822e-24,\n 1.5924e-23, 8.9572e-26, 4.7321e-26, 5.5242e-25, 4.9995e-25, 9.3922e-25,\n 5.9839e-26, 1.4903e-25, 3.2276e-23, 5.2583e-26, 5.3725e-25, 3.5618e-25,\n 3.3519e-26, 2.1859e-23, 3.8946e-25, 4.4357e-25, 5.3356e-25, 1.0589e-24,\n 1.6913e-26, 7.8881e-25, 4.2867e-24, 1.4256e-23, 1.1174e-23, 1.1346e-25,\n 2.2775e-24, 3.0572e-25, 5.9994e-24, 2.7551e-25, 4.9721e-26, 3.4685e-25,\n 6.1564e-25, 1.8097e-24, 5.2347e-26, 7.6632e-25, 4.2378e-27, 5.1312e-25,\n 2.8533e-25, 2.6466e-25, 7.1769e-24, 1.0317e-24, 7.9821e-26, 1.1372e-25,\n 4.2054e-26, 1.0239e-23, 8.8323e-26, 1.2062e-25], device='cuda:0')" + }, + "62": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.9764e-24, 2.5076e-25, 1.5312e-26, 8.0667e-27, 5.1158e-25, 1.2600e-23,\n 9.4248e-25, 1.1389e-24, 3.3354e-24, 1.2636e-24, 2.5896e-24, 4.3861e-24,\n 7.0085e-25, 3.0659e-25, 1.3645e-24, 3.8938e-24, 9.8046e-25, 2.0865e-24,\n 3.6512e-23, 8.0052e-25, 7.5156e-24, 6.0553e-26, 7.7811e-25, 1.7381e-23,\n 3.4552e-24, 1.9573e-24, 1.1403e-23, 3.0470e-24, 1.8381e-24, 2.0879e-24,\n 5.3415e-25, 2.5930e-24, 1.8297e-25, 2.8553e-24, 1.4730e-25, 1.6776e-24,\n 1.7987e-24, 7.5509e-25, 8.9464e-24, 5.5748e-24, 2.6285e-25, 9.9559e-24,\n 1.6349e-24, 7.3540e-25, 1.3496e-25, 3.3330e-24, 4.7562e-24, 3.2850e-24,\n 7.0823e-25, 5.8341e-24, 7.0443e-25, 3.5492e-24, 4.5589e-23, 5.4474e-25,\n 1.3294e-23, 3.4703e-24, 8.8782e-24, 8.0099e-24, 1.3926e-23, 1.1633e-24,\n 3.2920e-23, 7.0797e-25, 2.9915e-24, 5.5494e-24, 2.8915e-26, 1.1236e-24,\n 4.5118e-25, 7.7071e-24, 2.2612e-23, 7.6633e-25, 1.1059e-24, 5.3966e-26,\n 1.4479e-23, 1.1715e-23, 7.7447e-25, 1.3011e-24, 6.0844e-24, 3.8744e-24,\n 1.4591e-24, 1.1929e-24, 4.9777e-24, 3.6970e-25, 1.1726e-24, 2.1170e-25,\n 2.2637e-24, 3.4909e-25, 1.0626e-24, 5.2034e-24, 3.6051e-25, 7.5409e-26,\n 3.1530e-24, 3.8051e-24, 1.5384e-24, 1.3739e-24, 4.6380e-25, 1.9320e-25,\n 7.8639e-24, 7.3441e-24, 1.1481e-24, 1.4876e-24, 6.9023e-26, 5.6649e-25,\n 2.4250e-24, 7.4163e-24, 6.7156e-25, 1.3644e-24, 2.3118e-24, 5.8877e-25,\n 2.4814e-25, 7.1120e-25, 3.1079e-24, 5.7648e-25, 1.0913e-24, 4.8355e-24,\n 2.2911e-24, 7.1108e-24, 5.7144e-24, 3.4763e-24, 4.3548e-24, 3.3615e-24,\n 6.0287e-25, 2.4987e-24, 3.1486e-25, 1.9800e-24, 2.5842e-24, 8.6179e-25,\n 1.2039e-25, 1.1535e-24, 1.7189e-24, 8.8767e-25, 1.4838e-24, 2.2374e-24,\n 1.6468e-24, 1.7975e-23, 2.3061e-24, 1.8517e-26, 7.3069e-25, 1.9490e-23,\n 1.2746e-23, 8.4112e-25, 5.0410e-24, 7.7092e-25, 4.9593e-27, 4.8306e-25,\n 2.4279e-25, 7.3784e-25, 4.0096e-25, 2.6466e-24, 6.6441e-24, 4.3788e-25,\n 1.6341e-24, 1.3268e-24, 1.2334e-24, 4.2948e-25, 1.0867e-24, 7.1928e-24,\n 9.3188e-24, 5.1231e-25, 2.4524e-23, 5.3686e-25, 2.6680e-24, 5.8870e-27,\n 1.2249e-24, 9.9777e-27, 6.5500e-26, 1.5024e-24, 6.5229e-24, 1.4430e-24,\n 8.4675e-27, 1.0392e-24, 1.3445e-24, 5.2107e-24, 2.2519e-25, 2.2237e-25,\n 1.5686e-23, 1.9055e-24, 5.8730e-26, 2.9989e-26, 3.7140e-24, 7.5460e-26,\n 5.7086e-25, 1.7411e-25, 5.3330e-24, 3.8065e-24, 1.0707e-23, 9.5772e-24,\n 1.9110e-24, 1.2227e-24, 4.9495e-27, 1.5909e-24, 4.4857e-25, 2.0394e-24,\n 5.1859e-24, 2.6382e-25, 2.2567e-24, 8.5726e-25, 3.4960e-25, 3.5070e-24,\n 5.2510e-24, 5.6470e-24, 7.5727e-24, 9.3815e-24, 3.3750e-25, 2.1339e-24,\n 7.4542e-25, 6.7600e-24, 1.6846e-24, 2.5545e-25, 2.0751e-24, 3.1319e-24,\n 1.1085e-23, 8.0077e-25, 3.8577e-25, 3.4709e-24, 3.0033e-24, 7.4059e-25,\n 4.7657e-26, 5.8503e-25, 1.3948e-23, 7.0719e-25, 1.9223e-24, 1.5416e-24,\n 5.9685e-25, 1.8182e-23, 3.3684e-25, 2.1962e-24, 3.0293e-24, 9.2315e-25,\n 1.3511e-25, 2.4675e-24, 1.8539e-24, 6.0820e-24, 4.7948e-24, 2.8882e-25,\n 3.0367e-24, 4.3668e-24, 6.0987e-24, 3.5842e-25, 8.0852e-25, 8.6059e-25,\n 3.0301e-24, 3.6791e-24, 1.4188e-26, 1.0963e-24, 2.0929e-25, 1.0115e-24,\n 4.2150e-25, 5.6321e-25, 5.0801e-24, 4.4613e-24, 1.1798e-25, 1.3339e-24,\n 2.4076e-25, 8.6094e-24, 1.4846e-24, 2.0831e-25], device='cuda:0')" + }, + "63": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7465e-25, 6.3393e-27, 4.0228e-26, ..., 6.0761e-26, 5.6099e-25,\n 1.0651e-26],\n [5.4226e-26, 1.0244e-26, 1.2092e-25, ..., 2.2132e-25, 2.6910e-25,\n 3.6606e-26],\n [1.9500e-26, 9.4565e-28, 5.5530e-26, ..., 9.0323e-26, 1.1785e-25,\n 5.2913e-26],\n ...,\n [5.3721e-23, 1.3060e-24, 4.0676e-23, ..., 2.8340e-23, 1.4730e-22,\n 3.2075e-23],\n [2.7773e-23, 1.9502e-24, 3.3158e-23, ..., 2.2139e-23, 9.3995e-23,\n 5.0875e-24],\n [1.8743e-23, 1.1167e-24, 1.1312e-23, ..., 1.5195e-23, 5.3353e-23,\n 1.9971e-24]], device='cuda:0')" + }, + "64": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.1969e-25, 4.8321e-25, 2.2850e-25, 7.2527e-25, 2.6557e-25, 1.5716e-25,\n 6.9958e-25, 3.6579e-25, 2.7105e-25, 6.4751e-25, 3.1182e-25, 9.2826e-26,\n 9.6607e-25, 6.5563e-25, 2.5743e-25, 1.2964e-24, 1.2742e-24, 1.5610e-24,\n 2.3300e-25, 3.5568e-25, 3.1849e-25, 7.4353e-25, 5.5148e-25, 2.3461e-25,\n 2.4823e-25, 5.0848e-25, 1.3383e-25, 5.7621e-25, 5.8607e-25, 1.4369e-24,\n 5.1155e-25, 2.3654e-25, 1.0665e-24, 2.1366e-25, 1.5309e-25, 1.7179e-25,\n 4.8893e-25, 9.6804e-25, 2.6192e-24, 1.4339e-24, 1.2208e-25, 2.5164e-25,\n 5.2111e-25, 1.3595e-24, 2.1720e-25, 2.7584e-25, 4.4623e-25, 7.4763e-26,\n 2.9887e-25, 1.6815e-25, 2.7592e-25, 1.0854e-24, 2.7819e-26, 1.5405e-24,\n 3.1150e-25, 6.1875e-26, 3.7232e-25, 4.0492e-25, 7.7791e-25, 3.8635e-25,\n 4.0534e-25, 4.3694e-25, 3.1322e-26, 5.3346e-25, 3.0194e-25, 1.9256e-25,\n 5.3020e-25, 3.5625e-25, 2.7110e-25, 2.8051e-25, 1.6164e-25, 3.7965e-25,\n 2.6294e-26, 9.4267e-25, 4.8263e-25, 1.3394e-24, 2.0193e-25, 2.9409e-25,\n 5.6622e-25, 3.2933e-25, 7.5683e-26, 5.6962e-26, 3.1102e-25, 2.5571e-25,\n 4.2416e-25, 3.3974e-25, 2.0176e-25, 1.6888e-25, 1.2857e-24, 9.4903e-25,\n 1.9206e-25, 6.9061e-26, 8.3236e-25, 1.4387e-24, 2.3193e-25, 2.0309e-25,\n 3.2253e-25, 2.4565e-25, 4.5978e-25, 9.4693e-25, 1.5902e-25, 4.0011e-25,\n 1.9526e-26, 4.8073e-25, 2.1127e-25, 3.2713e-25, 2.5837e-25, 1.5721e-25,\n 2.1155e-25, 1.0491e-25, 4.9100e-25, 1.7005e-25, 1.2871e-25, 2.9345e-25,\n 1.2608e-25, 1.4843e-25, 2.9579e-25, 2.5244e-26, 1.3032e-25, 7.5879e-25,\n 5.4247e-25, 4.8682e-26, 5.0659e-25, 1.9037e-25, 2.8918e-25, 3.8294e-25,\n 2.8169e-25, 9.4015e-25, 4.9128e-25, 1.0558e-25, 3.4772e-25, 1.1946e-25,\n 7.4196e-25, 4.6371e-25, 1.3819e-25, 5.4449e-25, 4.3337e-25, 4.7781e-25,\n 3.4816e-25, 1.6629e-25, 7.7023e-26, 3.5200e-25, 1.7799e-25, 3.4882e-25,\n 3.7926e-26, 3.3024e-25, 1.0514e-25, 2.0218e-25, 1.1748e-25, 2.0111e-25,\n 2.2654e-25, 3.2481e-25, 1.6192e-25, 1.5729e-25, 1.8028e-26, 3.1809e-25,\n 2.8712e-25, 1.3399e-25, 5.1227e-25, 2.7582e-25, 3.5703e-25, 2.4513e-25,\n 5.4552e-25, 1.7100e-25, 6.5201e-26, 1.0631e-25, 4.0638e-25, 4.8648e-25,\n 2.4105e-25, 1.1440e-25, 2.4366e-25, 5.4463e-25, 4.0459e-25, 3.4169e-25,\n 1.8100e-25, 8.4165e-26, 1.9243e-25, 5.2536e-25, 2.6697e-25, 1.3153e-25,\n 2.4962e-25, 3.0430e-25, 2.9803e-25, 6.1081e-26, 1.6048e-25, 1.0873e-25,\n 1.2679e-25, 5.6654e-25, 1.0924e-25, 1.6058e-25, 2.4875e-25, 1.7961e-25,\n 4.2282e-25, 6.9454e-27, 7.2383e-26, 1.5161e-25, 1.1408e-24, 1.6843e-25,\n 1.0892e-25, 6.4284e-26, 2.2181e-24, 1.6825e-25, 6.0741e-26, 2.4564e-25,\n 9.0339e-25, 7.9355e-25, 1.0923e-24, 1.9373e-25, 2.9498e-25, 5.0463e-25,\n 1.4004e-25, 2.3935e-25, 1.0725e-24, 1.4787e-25, 3.3413e-25, 1.0032e-24,\n 1.9108e-25, 1.4742e-26, 2.9098e-25, 2.2452e-26, 1.5423e-25, 7.2366e-25,\n 5.2899e-25, 1.5998e-24, 1.5676e-25, 4.0076e-26, 2.5188e-25, 3.2598e-25,\n 1.3090e-25, 1.8564e-25, 1.7144e-26, 3.7819e-25, 1.6098e-25, 2.8701e-25,\n 1.4259e-25, 2.3424e-25, 1.4731e-25, 2.7123e-25, 7.5950e-25, 4.6423e-25,\n 2.5725e-25, 6.4261e-25, 7.8634e-26, 2.1118e-25, 2.9578e-25, 9.0409e-25,\n 5.8549e-25, 2.6308e-26, 2.7812e-25, 8.8301e-26, 4.2781e-25, 1.8287e-25,\n 2.0691e-25, 2.1169e-25, 3.6649e-25, 1.9330e-25, 4.9930e-33, 2.0792e-33,\n 4.9164e-33, 2.2505e-33, 5.4737e-33, 4.9798e-34, 9.4351e-34, 4.0360e-33,\n 2.4483e-33, 6.4063e-34, 3.7108e-33, 2.5318e-33, 4.5494e-33, 4.3659e-33,\n 4.7773e-33, 2.1009e-33, 5.1477e-33, 4.9259e-33, 3.8558e-33, 3.4111e-33,\n 2.7037e-33, 1.0177e-33, 9.4650e-34, 5.6840e-34, 8.8150e-34, 2.2629e-33,\n 1.2105e-33, 4.6829e-33, 3.3056e-33, 4.3176e-34, 2.1525e-33, 2.8068e-34,\n 1.4482e-33, 1.1637e-33, 1.5442e-33, 6.9949e-34, 1.8434e-33, 2.8143e-33,\n 7.4122e-33, 2.1678e-33, 5.5912e-34, 9.7609e-34, 2.1934e-33, 6.3048e-34,\n 6.5506e-34, 1.4759e-33, 1.4182e-33, 1.8902e-34, 1.2642e-33, 9.9537e-34,\n 1.4221e-33, 1.2873e-33, 2.2383e-33, 2.9617e-34, 1.6501e-34, 8.7564e-34,\n 1.1621e-34, 3.2759e-34, 1.7264e-33, 1.5079e-33, 1.0363e-33, 1.3892e-34,\n 6.9654e-34, 1.6501e-33, 1.4610e-33, 1.3461e-33, 5.2632e-34, 2.0584e-33,\n 2.5239e-33, 1.5442e-33, 4.9605e-33, 5.6735e-33, 2.4043e-33, 1.9617e-34,\n 5.1424e-33, 1.8062e-33, 1.1967e-32, 2.5027e-33, 5.5373e-33, 1.4134e-33,\n 2.7228e-33, 3.7520e-33, 1.4544e-33, 3.0264e-33, 3.0648e-33, 1.3976e-33,\n 2.9500e-33, 9.9748e-34, 2.5787e-33, 1.2621e-33, 3.5082e-33, 5.9944e-34,\n 6.6136e-34, 4.6017e-33, 3.9514e-33, 2.2139e-33, 6.3238e-34, 2.5380e-33,\n 7.5749e-34, 2.9985e-33, 1.4666e-33, 1.5289e-33, 1.7234e-33, 3.1128e-33,\n 2.7674e-33, 1.0982e-33, 2.8026e-33, 1.6363e-33, 1.0330e-33, 4.0898e-33,\n 1.7916e-33, 3.2466e-33, 3.1705e-33, 1.1196e-33, 3.5208e-34, 2.3489e-33,\n 3.1859e-33, 1.0783e-33, 8.1157e-34, 1.1122e-33, 2.3411e-34, 2.1030e-33,\n 3.1792e-33, 3.3107e-33, 8.8713e-34, 1.4493e-33, 2.3326e-33, 4.6290e-34,\n 7.3752e-33, 3.5413e-33, 3.2593e-33, 6.0908e-34, 3.1585e-33, 2.3678e-33,\n 4.4747e-33, 4.6907e-34, 3.3688e-33, 1.6621e-33, 1.1906e-34, 1.8304e-33,\n 1.7368e-33, 4.8809e-34, 1.3433e-33, 1.5211e-33, 1.6987e-33, 2.6940e-33,\n 5.0779e-33, 4.2564e-34, 9.4903e-34, 3.1945e-34, 1.5968e-33, 8.7084e-34,\n 2.9365e-33, 9.6415e-34, 3.9111e-33, 2.4100e-33, 1.6390e-33, 2.0417e-33,\n 5.5741e-34, 2.4140e-33, 2.7059e-34, 1.4059e-33, 2.1471e-33, 2.6499e-33,\n 1.1277e-33, 1.9762e-33, 1.7400e-33, 2.2760e-33, 4.4332e-33, 4.2474e-33,\n 2.2082e-33, 5.1870e-33, 1.9487e-33, 8.4197e-34, 2.0276e-33, 8.1066e-34,\n 3.6027e-34, 2.2616e-33, 1.6238e-33, 1.8965e-33, 1.9939e-33, 1.1063e-33,\n 3.7408e-33, 5.2204e-34, 3.0955e-33, 2.3918e-33, 1.4580e-33, 1.1674e-33,\n 1.3075e-33, 2.2954e-33, 1.4935e-33, 3.1465e-33, 1.1016e-33, 1.6699e-34,\n 1.7322e-33, 3.2772e-33, 6.0070e-33, 5.2505e-34, 3.0699e-33, 1.9636e-33,\n 6.2513e-34, 2.3072e-33, 4.5864e-33, 4.6950e-33, 1.5606e-33, 1.0813e-33,\n 1.1850e-33, 7.2136e-34, 3.4835e-33, 8.1238e-34, 5.8369e-34, 5.4285e-33,\n 2.8035e-33, 7.6146e-34, 7.9965e-34, 1.2680e-33, 1.8907e-34, 9.6087e-34,\n 1.0852e-33, 6.5809e-33, 2.5048e-33, 4.3380e-34, 9.4410e-34, 1.2586e-33,\n 2.6980e-33, 3.4121e-33, 5.1860e-33, 7.0258e-34, 4.7149e-33, 1.4769e-33,\n 1.0406e-33, 1.2575e-32, 6.3276e-33, 2.3484e-33, 6.1830e-33, 5.2350e-33,\n 1.6811e-33, 7.1095e-33, 2.9015e-33, 2.0163e-33, 1.1197e-32, 2.0009e-33,\n 1.5997e-33, 4.4945e-33, 4.6969e-33, 4.3670e-33, 1.4474e-32, 2.9257e-33,\n 4.5493e-33, 6.6510e-34, 1.4258e-32, 2.4275e-33, 3.1168e-33, 6.0949e-33,\n 1.1766e-33, 3.6909e-33, 7.1842e-22, 1.5876e-22, 1.2301e-22, 3.7208e-22,\n 4.1684e-22, 4.0302e-23, 2.5401e-22, 4.2055e-23, 4.5979e-22, 9.4756e-23,\n 2.5814e-22, 1.3163e-22, 2.8867e-22, 1.0263e-22, 2.0739e-22, 1.0299e-22,\n 2.8223e-23, 4.9897e-23, 3.3554e-23, 5.9089e-23, 7.1007e-22, 1.1755e-22,\n 2.9147e-22, 2.2677e-22, 2.8493e-23, 2.8671e-23, 1.6604e-22, 1.6430e-22,\n 3.7494e-23, 4.9873e-22, 2.5459e-22, 1.2255e-22, 2.5919e-23, 1.0676e-21,\n 1.8278e-22, 5.4488e-22, 3.9816e-23, 6.7452e-23, 2.5825e-22, 1.7515e-22,\n 8.4829e-23, 3.9268e-22, 1.2071e-22, 3.6715e-22, 7.5684e-23, 2.5613e-23,\n 1.1277e-22, 2.3897e-23, 2.1905e-22, 4.8281e-22, 9.3406e-23, 3.1350e-22,\n 1.8171e-22, 1.4955e-22, 2.1701e-22, 1.8927e-22, 3.4341e-23, 1.0014e-22,\n 3.6874e-23, 3.0505e-22, 5.0456e-22, 9.2433e-23, 1.6742e-22, 7.5556e-22,\n 1.0459e-22, 2.0931e-23, 1.6308e-22, 1.4285e-22, 1.0320e-22, 1.9794e-22,\n 2.3625e-22, 1.1258e-22, 4.5957e-22, 2.3790e-22, 1.1945e-22, 4.7871e-22,\n 4.5605e-22, 2.3360e-22, 1.3603e-22, 2.8376e-22, 1.0078e-22, 1.3119e-22,\n 1.2913e-22, 4.6339e-22, 2.3141e-22, 4.4532e-22, 2.8271e-23, 7.4656e-23,\n 9.8310e-23, 1.0910e-22, 7.5077e-23, 1.6910e-22, 2.5977e-22, 7.6605e-23,\n 5.5173e-23, 6.9476e-22, 4.9625e-23, 3.8907e-23, 4.0299e-22, 2.5511e-22,\n 1.8499e-22, 2.0382e-23, 3.1596e-22, 3.8355e-22, 1.5658e-22, 1.6436e-22,\n 1.0912e-22, 1.2878e-22, 9.9328e-23, 1.7185e-22, 9.0469e-23, 3.1169e-22,\n 1.1984e-22, 1.5657e-22, 1.5005e-22, 4.1171e-22, 3.3488e-23, 5.5837e-22,\n 1.8829e-22, 1.5855e-22, 5.0072e-22, 1.0261e-22, 3.5474e-22, 1.7604e-22,\n 1.5030e-22, 4.8889e-23, 4.0649e-22, 8.4500e-23, 6.1259e-23, 6.7645e-23,\n 2.9993e-22, 3.9637e-22, 1.3186e-22, 1.0109e-22, 3.0791e-22, 3.0595e-23,\n 3.2623e-23, 9.1605e-23, 3.5456e-22, 1.5533e-22, 1.0091e-22, 9.1938e-23,\n 2.5297e-22, 6.9652e-23, 2.3206e-22, 1.4896e-23, 1.5594e-22, 4.5321e-22,\n 9.5981e-23, 1.3117e-22, 3.5106e-22, 2.2149e-22, 1.4430e-22, 4.1012e-22,\n 3.8185e-22, 4.1383e-22, 3.7667e-23, 1.5560e-22, 7.0955e-23, 1.1089e-22,\n 1.2012e-22, 1.6496e-22, 2.6086e-22, 4.5702e-22, 7.9208e-23, 6.5988e-23,\n 3.2549e-23, 5.6117e-23, 2.3903e-22, 5.6986e-23, 5.0205e-22, 2.9878e-22,\n 9.0888e-23, 1.7458e-22, 8.1234e-23, 1.4766e-22, 1.2385e-22, 4.6960e-23,\n 3.3407e-22, 1.4276e-22, 2.3244e-22, 1.7022e-22, 9.7844e-23, 1.6075e-22,\n 2.0427e-22, 1.0736e-22, 8.7115e-23, 2.8615e-22, 5.5930e-24, 1.4337e-22,\n 2.6439e-22, 4.8076e-22, 8.6564e-23, 1.2399e-22, 4.1712e-22, 2.9968e-22,\n 2.4576e-23, 5.4487e-23, 2.8595e-23, 3.1391e-23, 1.8046e-22, 3.6467e-22,\n 1.5069e-23, 3.6775e-22, 1.6336e-22, 5.6723e-22, 5.1039e-22, 2.5366e-22,\n 3.6033e-22, 8.3599e-23, 1.2188e-22, 2.4098e-23, 2.2225e-22, 6.2143e-23,\n 5.5508e-23, 6.3791e-22, 2.4125e-22, 1.6688e-22, 5.5837e-22, 1.1163e-22,\n 1.1612e-22, 9.6915e-23, 2.1313e-22, 3.5545e-22, 2.9161e-22, 3.7271e-22,\n 2.4352e-22, 2.8296e-22, 1.1910e-23, 1.5235e-22, 1.2288e-22, 9.3207e-23,\n 2.8224e-23, 1.9288e-22, 2.3821e-22, 2.1673e-22, 7.9992e-23, 2.2245e-22,\n 2.3597e-22, 1.7286e-22, 1.4332e-22, 1.2537e-23, 1.8850e-22, 6.2456e-23,\n 1.8730e-22, 2.6560e-22, 2.8472e-22, 1.4902e-22, 4.1597e-23, 1.2098e-22,\n 2.0089e-22, 3.5331e-22, 1.9459e-22, 4.2255e-22, 1.7253e-22, 8.2564e-23],\n device='cuda:0')" + }, + "65": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.0902e-24, 6.0584e-23, 6.9032e-23, ..., 6.8623e-23, 2.6795e-23,\n 5.4783e-23],\n [9.8042e-24, 1.8497e-22, 2.3158e-22, ..., 3.1748e-22, 6.5229e-23,\n 1.5299e-22],\n [2.3040e-24, 6.2107e-23, 6.3664e-23, ..., 4.0811e-23, 1.2171e-23,\n 6.0149e-23],\n ...,\n [1.7240e-23, 2.9526e-22, 2.7259e-22, ..., 1.4436e-22, 6.5674e-23,\n 3.2622e-22],\n [5.1399e-23, 6.4168e-22, 6.2291e-22, ..., 4.3773e-22, 1.6919e-22,\n 6.6212e-22],\n [1.3174e-24, 4.2198e-23, 5.2359e-23, ..., 6.6327e-23, 1.3302e-23,\n 3.9855e-23]], device='cuda:0')" + }, + "66": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.0077e-23, 2.4615e-22, 5.5717e-23, 3.2436e-22, 5.9475e-22, 1.1822e-22,\n 3.1092e-23, 1.3662e-22, 2.7148e-23, 1.3044e-22, 3.3758e-22, 2.5775e-22,\n 4.4160e-23, 2.7883e-22, 1.4926e-22, 9.4633e-23, 4.3516e-22, 8.6982e-23,\n 2.7702e-22, 4.0499e-22, 1.7513e-22, 1.5128e-22, 2.2174e-22, 5.7337e-22,\n 1.2898e-23, 1.0577e-22, 2.8586e-22, 5.7866e-22, 3.9191e-22, 2.6748e-22,\n 3.2201e-22, 1.6680e-22, 2.9354e-22, 1.1131e-22, 3.5095e-22, 4.3582e-22,\n 3.2348e-22, 4.4698e-22, 2.4511e-22, 3.0343e-22, 1.3407e-21, 2.4330e-22,\n 2.7680e-22, 1.0478e-22, 3.2038e-22, 6.2855e-22, 1.2673e-22, 2.8597e-22,\n 1.2364e-22, 5.2865e-22, 1.0331e-21, 2.9358e-22, 3.1303e-22, 1.2545e-22,\n 3.1789e-22, 7.1548e-22, 3.9890e-22, 1.6422e-22, 2.3368e-22, 2.5383e-22,\n 3.1621e-22, 1.0656e-22, 4.2613e-22, 2.5363e-22, 9.3871e-23, 3.6945e-22,\n 4.9529e-23, 2.6886e-22, 2.1162e-22, 7.0215e-23, 6.2549e-23, 1.7060e-22,\n 4.1817e-22, 6.7766e-23, 1.1309e-22, 1.3899e-22, 2.7680e-22, 1.1017e-22,\n 4.6066e-22, 1.4655e-22, 2.9362e-22, 8.0653e-23, 3.0562e-22, 2.3586e-22,\n 9.0575e-23, 8.5660e-23, 4.6695e-22, 2.0215e-22, 2.4999e-22, 1.0441e-21,\n 3.6176e-22, 2.1171e-22, 1.4693e-22, 3.3127e-22, 7.1128e-22, 8.4560e-22,\n 4.1251e-23, 1.0987e-22, 4.8696e-22, 2.0480e-22, 1.4759e-22, 3.5415e-22,\n 2.6345e-22, 6.1074e-22, 1.7526e-22, 2.9018e-22, 1.6894e-22, 1.7198e-22,\n 1.4562e-22, 6.4807e-23, 1.9158e-22, 2.1226e-22, 2.2524e-22, 1.0225e-22,\n 1.2360e-22, 6.2392e-22, 4.0353e-23, 5.6886e-23, 2.0987e-22, 6.5220e-23,\n 5.3168e-22, 8.3198e-23, 5.1545e-22, 2.3436e-22, 2.6998e-22, 2.2683e-22,\n 1.8792e-22, 1.8477e-22, 5.0429e-23, 1.2526e-22, 1.1564e-22, 2.0791e-22,\n 2.0094e-22, 2.7481e-22, 9.9709e-23, 1.8644e-22, 9.1217e-24, 3.5174e-22,\n 2.3177e-22, 2.9271e-22, 9.1057e-23, 1.6172e-22, 3.3147e-22, 6.8169e-22,\n 2.1443e-22, 2.9223e-23, 3.9313e-23, 2.5226e-22, 2.2145e-22, 2.7465e-22,\n 3.6565e-22, 4.0324e-22, 5.6953e-22, 4.1459e-22, 1.7884e-22, 4.1577e-22,\n 2.9292e-22, 7.7746e-23, 2.1723e-22, 2.7684e-22, 7.0529e-22, 4.1838e-23,\n 4.1675e-22, 1.4282e-22, 2.6658e-22, 4.1474e-22, 1.0706e-22, 5.7896e-22,\n 5.3043e-22, 3.3964e-22, 2.7009e-22, 1.0698e-22, 1.3833e-22, 1.4516e-21,\n 3.0895e-22, 1.7672e-21, 1.1536e-22, 3.4334e-22, 1.5717e-22, 1.5253e-22,\n 1.2474e-22, 1.8760e-22, 2.1518e-22, 2.7825e-22, 2.2522e-22, 2.1046e-22,\n 1.5518e-22, 4.4849e-22, 1.2892e-22, 1.1548e-21, 2.8922e-22, 2.8260e-22,\n 4.4730e-22, 2.2102e-22, 9.9148e-22, 1.4458e-22, 1.1577e-22, 2.9313e-22,\n 8.8843e-23, 2.8191e-22, 2.5882e-23, 2.1640e-22, 2.5162e-22, 1.5069e-22,\n 2.3540e-22, 2.2307e-22, 3.1783e-22, 3.2265e-22, 2.4356e-22, 5.8545e-22,\n 3.6896e-22, 3.7764e-22, 7.4732e-23, 5.8526e-22, 1.0262e-21, 6.8856e-23,\n 1.5926e-22, 3.6715e-22, 8.2033e-22, 8.5837e-22, 2.0877e-22, 6.4674e-23,\n 2.1194e-22, 1.7127e-22, 4.1987e-22, 2.3171e-22, 1.0871e-22, 2.0407e-22,\n 2.1263e-22, 6.2841e-22, 2.6761e-22, 3.6253e-22, 4.9906e-22, 1.2295e-21,\n 4.5788e-22, 1.5824e-22, 1.5921e-22, 2.3060e-22, 3.8419e-22, 1.6477e-22,\n 4.0374e-22, 1.9898e-22, 7.8994e-22, 2.1965e-22, 9.6617e-22, 1.8510e-22,\n 2.9610e-23, 4.0071e-22, 3.5459e-22, 2.8224e-22, 6.8880e-23, 2.7611e-22,\n 5.3213e-22, 2.7667e-22, 5.2237e-22, 1.0061e-22], device='cuda:0')" + }, + "67": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.9887e-23, 1.4214e-20, 1.5032e-21, ..., 1.9084e-20, 6.8989e-23,\n 1.9401e-21],\n [9.0617e-22, 5.5892e-22, 4.3549e-22, ..., 3.7806e-21, 9.0614e-22,\n 6.5906e-22],\n [2.9858e-23, 1.4898e-20, 1.4276e-21, ..., 1.9995e-20, 4.2255e-23,\n 2.0449e-21],\n ...,\n [5.8077e-22, 1.7040e-22, 1.8587e-22, ..., 5.0490e-21, 1.2337e-22,\n 2.3525e-21],\n [4.5065e-21, 2.5164e-21, 2.1907e-21, ..., 2.0679e-20, 4.4203e-21,\n 5.0024e-21],\n [5.6680e-24, 6.5682e-24, 1.5905e-23, ..., 5.6087e-23, 4.9467e-24,\n 1.6337e-23]], device='cuda:0')" + }, + "68": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.8458e-21, 1.0534e-21, 2.2049e-21, 9.2018e-22, 4.1172e-22, 1.0268e-21,\n 6.3282e-21, 9.4710e-21, 4.3136e-22], device='cuda:0')" + }, + "69": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.5968e-23, 1.3417e-20, 1.4252e-21, ..., 1.8011e-20, 6.6842e-23,\n 1.8309e-21],\n [8.7334e-22, 5.3856e-22, 4.1986e-22, ..., 3.6441e-21, 8.7340e-22,\n 6.3536e-22],\n [2.8311e-23, 1.4057e-20, 1.3477e-21, ..., 1.8867e-20, 3.9894e-23,\n 1.9296e-21],\n ...,\n [5.6947e-22, 1.6715e-22, 1.8251e-22, ..., 4.9491e-21, 1.2094e-22,\n 2.3059e-21],\n [4.3515e-21, 2.4286e-21, 2.1133e-21, ..., 1.9998e-20, 4.2630e-21,\n 4.8556e-21],\n [5.5698e-24, 6.5476e-24, 1.5889e-23, ..., 5.5131e-23, 4.8616e-24,\n 1.6021e-23]], device='cuda:0')" + }, + "70": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.7368e-21, 1.0200e-21, 2.0906e-21, 8.9097e-22, 4.0760e-22, 1.0089e-21,\n 6.2165e-21, 9.2258e-21, 4.3068e-22], device='cuda:0')" + }, + "71": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.7872e-23, 1.3746e-20, 1.4576e-21, ..., 1.8454e-20, 6.7746e-23,\n 1.8760e-21],\n [8.7175e-22, 5.3795e-22, 4.1912e-22, ..., 3.6380e-21, 8.7182e-22,\n 6.3429e-22],\n [2.8952e-23, 1.4408e-20, 1.3810e-21, ..., 1.9337e-20, 4.0869e-23,\n 1.9776e-21],\n ...,\n [5.6947e-22, 1.6715e-22, 1.8251e-22, ..., 4.9491e-21, 1.2094e-22,\n 2.3059e-21],\n [4.3450e-21, 2.4249e-21, 2.1100e-21, ..., 1.9972e-20, 4.2562e-21,\n 4.8510e-21],\n [5.5695e-24, 6.5475e-24, 1.5889e-23, ..., 5.5130e-23, 4.8613e-24,\n 1.6021e-23]], device='cuda:0')" + }, + "72": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.7821e-21, 1.0186e-21, 2.1381e-21, 8.8978e-22, 4.0754e-22, 1.0089e-21,\n 6.2165e-21, 9.2194e-21, 4.3068e-22], device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 0.0009558195366224509, + "name": "scale_128", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 0, + 1, + 2 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 3, + 4, + 5 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_384", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 6, + 7, + 8 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_448", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 9, + 10, + 11 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 12, + 13, + 14 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_576", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 15, + 16, + 17 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_640", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 18, + 19, + 20 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 21, + 22, + 23 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_896", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 24, + 25, + 26 + ] + }, + { + "lr": 0.00047836202255981916, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.005, + "params": [ + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 10, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 8, + "base_lrs": [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.005 + ], + "last_epoch": 8, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.00047836202255981916 + ] + }, + "metrics": { + "best_val_acc": 62.774, + "best_epoch": 7, + "scale_accuracies": { + "128": 62.774, + "256": 70.86466666666666, + "384": 73.318, + "448": 74.12333333333333, + "512": 74.48333333333333, + "576": 74.91666666666667, + "640": 75.04866666666666, + "768": 75.43866666666666, + "896": 75.78266666666667 + }, + "training_history": { + "epochs": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8 + ], + "train_loss": [ + 4.900288952114611, + 4.0311296205286915, + 3.872760212415066, + 3.772298225203155, + 3.694623793321019, + 3.6294693627380346, + 3.5741883491542534, + 3.5266703713365737 + ], + "train_acc": [ + 51.657460216609806, + 56.824520144524485, + 57.987626385423084, + 58.73678711154231, + 59.30749074866899, + 59.79954213619302, + 60.232454733327764, + 60.59530100291375 + ], + "val_acc": [ + 58.398666666666664, + 60.18533333333333, + 60.89533333333333, + 61.468666666666664, + 61.802, + 62.246, + 62.55133333333333, + 62.774 + ], + "scale_accs": { + "128": [ + 58.398666666666664, + 60.18533333333333, + 60.89533333333333, + 61.468666666666664, + 61.802, + 62.246, + 62.55133333333333, + 62.774 + ], + "256": [ + 67.03133333333334, + 68.56666666666666, + 69.374, + 69.826, + 70.146, + 70.442, + 70.74533333333333, + 70.86466666666666 + ], + "384": [ + 69.55466666666666, + 71.02133333333333, + 71.764, + 72.25133333333333, + 72.66, + 72.89, + 73.12933333333334, + 73.318 + ], + "448": [ + 70.34333333333333, + 71.97066666666667, + 72.66733333333333, + 73.1, + 73.44666666666667, + 73.616, + 73.936, + 74.12333333333333 + ], + "512": [ + 70.83533333333334, + 72.43066666666667, + 73.07733333333333, + 73.59866666666667, + 73.876, + 74.13, + 74.29866666666666, + 74.48333333333333 + ], + "576": [ + 71.29266666666666, + 72.69866666666667, + 73.488, + 73.876, + 74.25133333333333, + 74.53733333333334, + 74.74466666666666, + 74.91666666666667 + ], + "640": [ + 71.60266666666666, + 73.09866666666667, + 73.69266666666667, + 74.18333333333334, + 74.45733333333334, + 74.72066666666667, + 74.906, + 75.04866666666666 + ], + "768": [ + 72.03333333333333, + 73.37733333333334, + 74.138, + 74.52333333333333, + 74.82, + 75.086, + 75.31, + 75.43866666666666 + ], + "896": [ + 72.252, + 73.866, + 74.37, + 75.06266666666667, + 75.262, + 75.43733333333333, + 75.59266666666667, + 75.78266666666667 + ] + }, + "lr": [ + 0.00975530705321762, + 0.00904518046337755, + 0.00793913236883622, + 0.00654543046337755, + 0.005000500000000001, + 0.0034555695366224513, + 0.0020618676311637816, + 0.0009558195366224509 + ] + } + }, + "train_config": { + "name": "david_training", + "run_id": "20251013_004438", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": [ + "clip_vit_b16", + "clip_vit_laion_b32", + "clip_vit_b32" + ], + "num_classes": 1000, + "preset": "gated_expert_team", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": true, + "scale_warmup_epochs_override": { + "128": 0, + "256": 0, + "384": 0, + "448": 0, + "512": 0, + "576": 0, + "640": 0, + "768": 0, + "896": 0 + }, + "num_epochs": 10, + "batch_size": 1024, + "learning_rate": 0.01, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.1, + "rose_max_weight": 0.8, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": false, + "cayley_weight": 0.01, + "scale_loss_balance": null, + "use_mixed_precision": false, + "gradient_clip": 10.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "never", + "freeze_threshold": 90.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-05, + "gradient_scale_multiplier": 10.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/david-shared-space", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file