diff --git "a/weights/final_model_metadata.json" "b/weights/final_model_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/final_model_metadata.json" @@ -0,0 +1,497 @@ +{ + "epoch": 19, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 2.0182e-05, -4.0774e-05, -1.8981e-06, ..., 2.2015e-05,\n 1.9074e-05, 1.4373e-05],\n [-7.9625e-06, -3.4462e-05, 2.4047e-05, ..., 1.1974e-05,\n 5.1719e-05, 6.0935e-06],\n [-1.0442e-04, -2.1260e-04, -5.8897e-05, ..., 3.3500e-05,\n -1.6321e-05, 4.1818e-05],\n ...,\n [ 1.4544e-04, -1.4001e-04, -6.1162e-06, ..., 1.9627e-05,\n 1.2254e-05, -1.5506e-05],\n [-2.5096e-06, -7.7651e-06, -2.3486e-06, ..., -8.7364e-07,\n -2.1224e-06, 2.1338e-06],\n [-1.1176e-04, 1.1404e-04, 2.5317e-06, ..., -3.0614e-05,\n -4.9245e-05, 3.8300e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.3660e-08, 4.8834e-08, 6.4916e-09, ..., 1.5737e-08, 1.4136e-08,\n 3.8758e-09],\n [1.2246e-07, 7.6356e-08, 3.1421e-08, ..., 4.7450e-08, 2.6371e-08,\n 4.2753e-08],\n [5.4321e-08, 7.3815e-08, 1.8063e-08, ..., 2.0440e-08, 1.2274e-08,\n 1.6688e-08],\n ...,\n [1.4520e-07, 8.5617e-08, 1.7459e-08, ..., 2.0662e-08, 1.5681e-08,\n 1.0392e-08],\n [7.5145e-11, 6.6761e-11, 1.3205e-11, ..., 1.8387e-11, 1.7622e-11,\n 2.4248e-11],\n [9.9692e-08, 6.6532e-08, 9.6312e-09, ..., 1.4973e-08, 2.2145e-08,\n 1.0498e-08]], device='cuda:0')" + }, + "1": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 8.2773e-04, -6.4425e-04, 1.7857e-03, 4.9067e-04, 6.1624e-04,\n 8.7618e-05, 4.9512e-04, -1.6068e-03, 1.5713e-03, -2.4647e-05,\n 9.6222e-04, 5.7114e-04, 5.6052e-45, -2.0018e-03, -1.0832e-04,\n 4.1191e-04, -1.6788e-03, -1.2133e-03, -3.9914e-04, 5.6052e-45,\n 1.1034e-03, -1.3365e-03, -1.4586e-03, -4.6580e-04, 5.6052e-45,\n 2.1290e-03, 4.8886e-04, -4.8179e-03, -2.7341e-03, 2.3249e-03,\n 3.2989e-04, -8.8534e-04, -3.4335e-19, 5.6052e-45, 2.0127e-03,\n 9.6239e-04, -2.5377e-03, 1.0100e-03, 8.5631e-04, -1.5793e-03,\n 1.2097e-03, -2.7516e-04, 2.5896e-03, 5.9248e-04, -8.9859e-04,\n 2.9885e-04, -1.0231e-04, 2.4279e-03, -2.9709e-03, -1.1404e-03,\n -7.4071e-04, 5.6052e-45, 1.4141e-17, 5.6052e-45, -1.8489e-03,\n 1.2444e-03, 3.0435e-03, -1.6353e-03, -1.4592e-05, -1.0750e-03,\n -5.5469e-04, 8.2039e-04, 1.6815e-03, -7.3230e-05, -8.3017e-04,\n -8.9741e-05, -3.7664e-04, 5.6052e-45, -2.7964e-03, 1.6682e-03,\n 6.2612e-04, 5.6052e-45, 1.9980e-03, 1.0338e-04, 1.7844e-03,\n 3.5141e-03, 1.9415e-03, 1.9057e-03, -2.0060e-03, -2.1909e-03,\n -1.3067e-03, 9.9421e-04, 1.9729e-04, 3.6982e-04, -1.3752e-04,\n 1.2777e-03, -9.4964e-04, -8.6883e-04, 9.8645e-04, -8.9502e-04,\n 5.6052e-45, 2.0321e-03, 3.6609e-04, -1.8435e-04, -2.0463e-03,\n -5.6449e-04, -8.4193e-04, -8.1765e-06, 8.0381e-05, 3.6658e-03,\n -1.2114e-03, 2.0239e-03, -7.1849e-04, 2.1659e-04, -8.7701e-03,\n 8.4257e-04, 5.4097e-04, -1.1808e-03, 4.3671e-03, -3.8656e-04,\n -6.4111e-04, 1.7248e-04, 5.6052e-45, 1.1811e-03, -5.5449e-04,\n -4.1107e-04, 1.1029e-04, 2.6941e-04, 5.6052e-45, -8.7269e-05,\n 5.6052e-45, -4.0957e-04, 5.6052e-45, -1.5132e-03, -8.9312e-04,\n 1.8585e-03, -7.7790e-04, 5.6052e-45, 2.3466e-04, 9.1805e-04,\n 2.1684e-04, -5.6645e-04, -1.2484e-03, 5.6052e-45, 1.6146e-03,\n 5.1343e-03, -3.0490e-03, -7.0139e-04, -1.7743e-03, 2.3718e-04,\n -2.9958e-03, -4.7922e-04, -8.0391e-04, 3.3454e-04, -5.8078e-04,\n 1.1812e-03, -7.1774e-17, -8.4783e-05, 9.4378e-04, -1.2443e-03,\n 5.6052e-45, -9.2922e-04, -9.2161e-04, 5.6052e-45, -1.1006e-03,\n 1.3668e-03, 5.6052e-45, -2.5135e-03, 1.4426e-04, -1.3429e-05,\n 3.8922e-03, 1.7803e-03, -8.6296e-04, 4.4729e-04, 1.7739e-03,\n 1.1162e-03, -3.7004e-03, -1.9289e-04, 3.5782e-03, 5.6052e-45,\n 5.6052e-45, 1.9803e-03, 2.3266e-04, 4.9124e-04, -5.1448e-04,\n -3.1422e-03, 9.1015e-04, -7.3558e-04, 5.6052e-45, 3.3636e-04,\n 5.3761e-04, -4.1913e-04, 6.1199e-04, 5.6052e-45, -2.2210e-03,\n -2.7276e-03, -3.1889e-03, 2.8493e-04, -2.0855e-04, 5.6052e-45,\n 5.6052e-45, 9.5652e-04, -1.2630e-03, 1.6360e-03, 5.5614e-04,\n 3.4465e-04, -3.5330e-04, 5.2866e-04, 5.6052e-45, -1.0250e-03,\n 1.5011e-04, -6.9857e-04, -2.7772e-05, 1.0855e-03, 2.5180e-03,\n 5.6052e-45, 2.4834e-04, -7.4751e-04, 1.5066e-03, 3.8407e-04,\n 4.4421e-04, 6.1384e-04, -2.5369e-03, -2.2903e-05, -5.2461e-04,\n -7.2133e-04, 1.0224e-03, -5.3143e-04, 1.5448e-03, -3.0465e-03,\n 5.8001e-04, 1.8682e-03, -3.8420e-04, 1.6195e-03, 6.9232e-04,\n -1.9160e-03, -4.5354e-05, 1.8747e-03, -1.4597e-04, -6.8441e-04,\n -3.8738e-04, 6.0303e-03, -2.7899e-05, 5.6052e-45, 5.2764e-03,\n -7.4054e-04, -2.0248e-03, 3.5693e-03, 3.1313e-03, -7.8526e-04,\n -1.5008e-03, 3.2735e-04, -1.8656e-03, 9.5574e-04, 2.3722e-03,\n -3.9908e-04, -1.5825e-03, 4.1585e-03, -8.0897e-04, 2.0960e-03,\n -2.3298e-03, -1.4655e-03, -6.2848e-03, 6.5966e-04, -6.1024e-04,\n 8.7266e-04, 5.6052e-45, -3.4838e-04, 7.1056e-04, -2.7258e-04,\n -1.4859e-03, 2.6273e-03, 3.0573e-03, 3.0832e-04, -6.4224e-03,\n -1.5697e-03, 1.3539e-04, 5.6052e-45, 2.7669e-03, -3.4179e-03,\n 6.6493e-04, -2.7624e-03, -4.4543e-04, -5.2986e-04, -1.0633e-03,\n 1.9116e-08, 2.8892e-04, 5.6052e-45, 4.2836e-04, -1.3736e-05,\n 4.0174e-03, 2.5140e-04, 1.4610e-04, 1.1332e-03, -1.1089e-03,\n 1.8500e-03, -1.7069e-06, 1.6849e-03, 1.8568e-03, -1.4822e-03,\n -2.7685e-04, 1.0891e-04, 1.2516e-03, 1.0307e-03, -4.8170e-05,\n -1.9083e-03, -1.5284e-03, 4.2632e-04, 8.9915e-04, 2.1877e-04,\n -3.4887e-04, 1.0388e-03, -3.4039e-04, -1.1169e-04, 1.5134e-03,\n 5.9226e-04, -2.5771e-04, 5.6052e-45, 5.6052e-45, -2.9321e-03,\n 2.5862e-03, -2.0084e-03, 1.3321e-05, -2.5144e-03, 1.3686e-03,\n 4.1642e-03, -8.5512e-04, 7.2002e-04, 1.4807e-03, -2.0000e-03,\n 7.3819e-04, -3.1331e-03, 2.9554e-03, 2.8573e-04, 2.9124e-05,\n 1.1793e-03, -6.0067e-04, -4.9977e-04, -7.4153e-05, 1.0614e-03,\n 1.5445e-04, 1.2792e-03, 4.8051e-04, 1.9615e-04, 5.6052e-45,\n -4.8989e-04, 3.6085e-03, -1.4915e-04, 9.9903e-04, 1.5752e-03,\n -1.4566e-03, -1.4576e-03, -8.7608e-04, -5.3462e-04, -3.4671e-04,\n 1.1216e-05, 5.6052e-45, -1.2838e-04, -3.7601e-03, 7.8891e-04,\n -1.8381e-04, 1.7889e-05, -2.3339e-04, -1.0192e-03, -3.1364e-03,\n -4.2878e-04, 1.4757e-03, 3.7549e-05, 1.0976e-03, -1.2838e-03,\n 3.9614e-04, 9.8725e-04, 1.0959e-03, 1.6716e-03, -1.6409e-04,\n 1.9207e-05, 1.7592e-03, -2.2722e-03, -1.0977e-03, 5.2207e-05,\n -1.4760e-03, 1.5352e-04, -5.7351e-04, 1.1521e-03, -4.9552e-05,\n 1.5729e-03, -7.5888e-04, 8.1303e-04, 5.6052e-45, 9.0218e-04,\n -1.7156e-03, 5.6052e-45, 1.6691e-03, 1.3558e-03, 2.4062e-03,\n -2.5692e-04, -1.1860e-03, -2.4563e-03, -1.2424e-03, 1.2869e-03,\n 2.1723e-03, -1.6746e-03, 4.9060e-04, -5.6052e-45, 5.6052e-45,\n 2.0775e-03, -1.8036e-03, -1.3034e-03, -2.1443e-03, 4.2014e-04,\n -6.9643e-04, -1.1622e-03, -1.0378e-04, 8.2443e-04, 7.4210e-04,\n 1.3883e-03, -1.1441e-03, 5.6052e-45, 4.8680e-04, 4.2605e-04,\n 5.6052e-45, -1.4322e-03, 5.6052e-45, -2.5965e-03, -7.5789e-04,\n 1.0123e-03, -8.5900e-04, 5.6052e-45, -1.9630e-03, 1.2519e-03,\n -1.3044e-04, -1.1691e-03, -5.2099e-04, 1.0979e-03, -2.3067e-03,\n 4.6410e-04, -1.4142e-03, -4.5494e-04, 2.1748e-03, 5.6052e-45,\n -1.1997e-03, 1.5133e-03, -3.0294e-04, 5.6052e-45, -8.1899e-04,\n -1.0962e-03, -2.4025e-03, -1.5583e-03, 1.5534e-03, 1.5569e-03,\n -1.4238e-04, -1.7844e-03, 2.1826e-03, -1.2957e-03, -3.8465e-04,\n 1.1225e-03, -1.2652e-03, 5.6052e-45, -2.0127e-04, -8.5212e-04,\n 5.6052e-45, -1.9310e-03, -2.0809e-03, 7.3990e-04, -2.5793e-04,\n -1.4591e-06, 1.3225e-03, -6.5221e-04, 7.3424e-04, 5.6052e-45,\n 5.5580e-04, 1.9511e-04, 5.6052e-45, 1.0027e-04, -4.1564e-04,\n -1.6945e-03, 9.8967e-04, 5.6052e-45, -3.2336e-03, 4.6944e-04,\n 2.9962e-03, -2.5836e-04, -1.4509e-05, -1.3102e-03, 2.8218e-04,\n 6.0319e-04, 5.6052e-45, -1.2067e-03, 5.6052e-45, -6.4428e-04,\n 3.9529e-05, 1.3031e-03, -2.5292e-03, -6.4129e-06, -1.7618e-04,\n 6.7476e-04, -1.1076e-04, -5.8217e-05, 6.1865e-04, 3.1321e-04,\n 5.6052e-45, -8.4158e-04, -7.0308e-04, 7.9133e-03, 3.1855e-04,\n 1.5999e-03, 7.1572e-04, 1.5979e-03, 3.3382e-03, 4.2100e-04,\n 1.3982e-03, 5.6052e-45, -1.0131e-03, 1.4660e-03, 1.3862e-03,\n 1.9460e-03, -1.5197e-03, -1.0353e-03, -1.1254e-06, -4.0425e-04,\n 4.0114e-04, 4.9384e-04, -1.6642e-03, -2.5886e-03, 5.6052e-45,\n 5.6052e-45, 1.2701e-03, -6.7478e-04, 1.3946e-03, 2.7300e-05,\n -1.7730e-03, -1.9198e-03, 6.0817e-04, 1.3600e-03, 9.2329e-04,\n 1.2708e-04, -1.0352e-03, -2.6717e-04, -1.0693e-03, 5.0040e-04,\n 2.5503e-04, 2.3313e-04, -1.5503e-03, -1.1250e-04, 1.1253e-13,\n 3.6064e-04, -1.4253e-03, 5.6052e-45, 5.6052e-45, -3.8738e-03,\n -1.1088e-03, 2.7068e-04, -1.3287e-03, -4.9581e-03, 1.3900e-04,\n -1.9448e-03, -2.2386e-03, 1.5938e-05, -3.1080e-04, -1.0636e-03,\n -3.6972e-04, 1.5126e-03, 5.0351e-04, 2.2495e-04, 3.5768e-04,\n 1.7465e-03, -7.7100e-05, 3.7583e-03, 1.4316e-03, 6.8150e-04,\n -5.8458e-04, -9.2893e-04, -4.9790e-04, 6.9771e-04, 2.4666e-04,\n -6.7207e-04, 4.7628e-04, 4.9579e-04, -1.3267e-03, 2.5109e-04,\n 3.8340e-03, 1.4504e-04, 1.5863e-03, -2.2466e-03, 9.5885e-04,\n 5.6052e-45, 5.6052e-45, 8.5443e-04, -1.4534e-03, 1.3633e-04,\n -9.7699e-04, 5.6052e-45, -2.0840e-03, 3.8862e-04, -4.0500e-04,\n 4.1766e-03, -1.6476e-04, 7.7546e-04, -1.5928e-04, 1.0407e-03,\n -4.1593e-03, -7.5137e-05, 2.3522e-03, 1.8350e-03, -5.5561e-04,\n -6.0348e-04, -9.6258e-04, -8.1638e-04, -3.2454e-03, -1.2969e-03,\n 7.4895e-04, 1.1299e-03, 1.5143e-03, 1.0670e-03, -1.6771e-03,\n -2.6141e-03, 2.0824e-04, 1.9134e-04, 4.3226e-04, -1.6366e-03,\n -2.3573e-04, -1.0413e-03, -3.3944e-04, -1.9002e-03, 4.5692e-04,\n 1.1255e-03, 3.1364e-04, 3.4930e-03, 3.0225e-04, 1.1595e-06,\n 5.6052e-45, -2.7086e-05, 1.5793e-03, 8.4390e-04, -6.3016e-04,\n -4.9585e-03, 1.9688e-04, 5.4001e-05, 1.6109e-03, -2.3454e-03,\n -1.4736e-03, 2.4908e-09, 1.5868e-03, 1.8527e-03, -4.3517e-04,\n -7.9860e-04, -2.0151e-03, 1.8394e-03, 2.5244e-05, -1.4951e-03],\n device='cuda:0')", + "exp_avg_sq": "tensor([1.1273e-05, 5.1797e-05, 1.9440e-05, 1.7217e-05, 1.9449e-05, 4.4492e-06,\n 2.9770e-05, 2.8510e-05, 2.0807e-05, 2.5146e-05, 2.5313e-05, 5.3386e-05,\n 3.5182e-09, 2.4866e-05, 3.0669e-05, 3.3956e-05, 4.3389e-05, 2.8850e-05,\n 2.2814e-05, 9.1601e-10, 2.4757e-05, 1.8483e-05, 1.9234e-05, 3.0473e-05,\n 3.3436e-09, 3.8172e-05, 2.5830e-05, 4.0012e-05, 3.2905e-05, 3.2854e-05,\n 3.7264e-05, 2.9129e-05, 1.1742e-08, 5.9053e-10, 3.0114e-05, 2.2832e-05,\n 3.3176e-05, 1.8133e-05, 2.1886e-05, 2.8002e-05, 3.0998e-05, 1.7940e-05,\n 2.7404e-05, 2.4656e-05, 2.6293e-05, 3.0807e-05, 2.7662e-05, 4.4733e-05,\n 5.2698e-05, 2.4782e-05, 3.0028e-05, 3.5078e-10, 5.6919e-10, 5.3269e-10,\n 4.2056e-05, 5.6650e-05, 2.2027e-05, 2.2191e-05, 7.7930e-06, 3.4976e-05,\n 3.0242e-05, 2.6698e-05, 5.3060e-05, 1.5441e-05, 3.1232e-05, 3.2492e-05,\n 2.7819e-05, 6.5197e-10, 4.7832e-05, 2.9770e-05, 1.1872e-05, 2.6632e-09,\n 3.5276e-05, 2.5343e-05, 1.2339e-05, 3.7042e-05, 3.8841e-05, 3.8538e-05,\n 2.1571e-05, 3.7756e-05, 4.9254e-05, 2.4929e-05, 3.5599e-05, 2.4263e-05,\n 2.8473e-05, 3.0795e-05, 2.6701e-05, 1.9337e-05, 2.7760e-05, 2.7242e-05,\n 4.1576e-09, 3.8606e-05, 2.6639e-05, 2.1874e-05, 4.3723e-05, 1.2501e-05,\n 3.8363e-05, 3.1196e-05, 2.6740e-05, 2.3173e-05, 2.2998e-05, 1.2007e-05,\n 3.9679e-05, 4.0403e-05, 7.9596e-05, 2.3888e-05, 2.6269e-05, 4.7644e-05,\n 1.6745e-05, 1.7231e-05, 4.1824e-05, 4.0237e-05, 2.8110e-10, 1.9598e-05,\n 1.0910e-05, 2.3334e-05, 3.1267e-05, 4.7966e-05, 2.2597e-09, 1.0256e-07,\n 4.5591e-10, 2.6255e-05, 3.6304e-10, 5.2553e-05, 2.7509e-05, 3.5435e-05,\n 3.8970e-05, 6.8291e-11, 1.2914e-05, 3.0851e-05, 2.6186e-05, 3.1513e-05,\n 3.2568e-05, 1.2183e-10, 3.2759e-05, 3.1417e-05, 2.0791e-05, 2.2869e-05,\n 2.7484e-05, 2.2866e-05, 5.5160e-05, 2.8264e-05, 4.1226e-05, 2.9687e-05,\n 3.2551e-05, 3.6347e-05, 5.3766e-09, 3.0543e-05, 2.9276e-05, 2.4711e-05,\n 4.6160e-09, 1.7543e-05, 2.5490e-05, 2.4740e-09, 2.4890e-05, 3.8092e-05,\n 4.9008e-09, 4.4546e-05, 2.4380e-05, 3.1733e-05, 2.6021e-05, 2.8000e-05,\n 7.2468e-05, 3.1023e-05, 3.0183e-05, 2.3696e-05, 4.6504e-05, 1.8179e-05,\n 3.3737e-05, 5.8220e-10, 8.9719e-10, 2.9084e-05, 4.6298e-05, 2.1423e-05,\n 2.6156e-05, 4.2278e-05, 1.3106e-05, 3.5007e-05, 1.6283e-10, 1.8783e-05,\n 3.1891e-05, 3.1791e-05, 9.3305e-06, 1.1981e-09, 3.0791e-05, 3.2247e-05,\n 4.4904e-05, 2.0346e-05, 2.7734e-05, 3.4790e-10, 3.9861e-09, 2.8048e-05,\n 1.3671e-05, 3.4568e-05, 2.4713e-05, 3.2567e-05, 1.8238e-05, 3.3351e-05,\n 6.2828e-10, 2.4609e-05, 2.3380e-05, 1.3767e-05, 4.1968e-05, 1.4906e-05,\n 6.0364e-05, 3.0989e-11, 2.3023e-05, 2.8761e-05, 2.1595e-05, 2.4372e-05,\n 1.3465e-05, 3.0027e-05, 3.2928e-05, 2.9476e-05, 2.0268e-05, 2.2353e-05,\n 2.1550e-05, 3.8122e-05, 3.0614e-05, 2.0302e-05, 1.6543e-05, 2.9863e-05,\n 2.3059e-05, 2.4036e-05, 2.8494e-05, 2.8400e-05, 3.9922e-05, 4.1975e-05,\n 1.8396e-05, 3.9321e-05, 3.3105e-05, 6.0085e-05, 3.0780e-05, 7.7029e-10,\n 3.4863e-05, 1.4814e-05, 2.4222e-05, 1.7474e-05, 7.0323e-05, 3.0378e-05,\n 2.5957e-05, 3.4401e-05, 2.5851e-05, 3.1982e-05, 4.1810e-05, 1.7741e-05,\n 1.7698e-05, 4.2603e-05, 2.0581e-05, 1.8894e-05, 2.8759e-05, 2.4901e-05,\n 2.6695e-05, 2.5855e-05, 2.9800e-05, 2.2438e-05, 3.3190e-09, 2.3369e-05,\n 1.8397e-05, 1.1830e-05, 2.9601e-05, 4.0504e-05, 3.6252e-05, 2.7336e-05,\n 3.1382e-05, 2.4348e-05, 2.4407e-05, 4.9198e-10, 3.7481e-05, 3.0397e-05,\n 2.2471e-05, 4.0952e-05, 2.2840e-05, 1.8426e-05, 3.4652e-05, 8.6132e-10,\n 2.8678e-05, 2.4261e-09, 1.9666e-05, 1.7287e-05, 2.5201e-05, 2.5379e-05,\n 4.0488e-05, 2.8273e-05, 2.8031e-05, 3.3987e-05, 7.6536e-06, 4.5791e-05,\n 3.9669e-05, 1.8340e-05, 9.2352e-06, 2.9238e-05, 3.8767e-05, 3.1807e-05,\n 2.1820e-05, 5.4615e-05, 5.3549e-05, 2.9483e-05, 2.7077e-05, 6.9100e-05,\n 2.7941e-05, 9.1014e-06, 9.2246e-06, 2.4754e-05, 2.7202e-05, 1.3513e-05,\n 3.2050e-05, 1.0212e-10, 2.0119e-09, 3.1486e-05, 4.2039e-05, 1.7291e-05,\n 2.5225e-05, 3.8511e-05, 4.2790e-05, 2.5054e-05, 3.6087e-05, 1.9927e-05,\n 2.1730e-05, 3.4800e-05, 4.0779e-05, 4.6161e-05, 3.6755e-05, 1.7430e-05,\n 1.2628e-05, 2.6797e-05, 2.5732e-05, 1.0281e-05, 2.2095e-05, 2.9692e-05,\n 9.0620e-06, 1.9599e-05, 3.2504e-05, 4.8332e-05, 2.4002e-13, 2.5450e-05,\n 2.0987e-05, 4.0258e-05, 1.6686e-05, 2.8730e-05, 6.0505e-06, 3.0033e-05,\n 2.0023e-05, 2.4150e-05, 2.5331e-05, 3.6898e-05, 3.5125e-09, 1.0199e-05,\n 2.9095e-05, 2.9642e-05, 2.3918e-05, 3.0196e-05, 2.3201e-05, 2.3486e-05,\n 4.5781e-05, 2.7998e-05, 2.7640e-05, 2.6392e-05, 3.6736e-05, 1.0141e-05,\n 1.5378e-05, 1.7884e-05, 3.7931e-05, 4.9622e-05, 3.3265e-05, 7.5043e-05,\n 1.3823e-05, 3.6306e-05, 3.7831e-05, 3.0736e-05, 3.6577e-05, 2.0074e-05,\n 2.5152e-05, 9.4860e-06, 2.3829e-05, 4.6180e-05, 5.7710e-05, 9.7927e-06,\n 1.5291e-09, 3.0029e-05, 2.3761e-05, 8.3429e-09, 2.4965e-05, 5.2599e-05,\n 3.2481e-05, 2.4719e-05, 2.7854e-05, 3.0429e-05, 1.8981e-05, 1.6101e-05,\n 4.4055e-05, 3.8381e-05, 2.6587e-05, 8.3153e-10, 2.7515e-09, 2.6601e-05,\n 3.1561e-05, 2.7640e-05, 4.8663e-05, 2.8106e-05, 3.0985e-05, 1.7375e-05,\n 1.2416e-05, 2.2789e-05, 2.6778e-05, 4.1648e-05, 2.1579e-05, 1.0133e-09,\n 1.3722e-05, 9.3331e-06, 2.9190e-10, 1.1182e-05, 5.0925e-10, 3.5357e-05,\n 3.7015e-05, 1.4003e-05, 2.6715e-05, 1.3749e-09, 1.6317e-05, 3.5460e-05,\n 2.2291e-05, 3.1739e-05, 3.5743e-05, 3.6435e-05, 2.6887e-05, 4.8309e-06,\n 2.6141e-05, 9.2170e-06, 1.7095e-05, 6.9109e-10, 2.0324e-05, 3.4510e-05,\n 2.9027e-05, 8.8316e-10, 1.6884e-05, 2.4937e-05, 4.1931e-05, 3.0977e-05,\n 2.8933e-05, 2.1816e-05, 4.0242e-05, 1.4872e-05, 2.7699e-05, 4.1151e-05,\n 2.5864e-05, 1.2116e-05, 2.8399e-05, 2.9808e-10, 2.4562e-05, 2.0554e-05,\n 9.8897e-09, 2.3354e-05, 2.7550e-05, 5.8115e-06, 2.3847e-05, 7.3918e-09,\n 1.7782e-05, 3.7284e-06, 2.3836e-05, 1.3351e-08, 2.6618e-05, 4.7488e-05,\n 8.6804e-11, 2.3244e-05, 9.6228e-06, 2.8771e-05, 2.5232e-05, 7.0894e-10,\n 4.1529e-05, 1.8182e-05, 4.6358e-05, 3.9575e-05, 2.1524e-05, 1.5674e-05,\n 1.9491e-05, 2.5433e-05, 4.9277e-09, 3.4802e-05, 1.5646e-09, 9.3115e-06,\n 8.3634e-06, 8.9900e-06, 3.1058e-05, 3.0719e-05, 5.5941e-05, 1.8189e-05,\n 3.8369e-05, 2.0126e-05, 3.4253e-05, 2.4113e-05, 6.5646e-10, 2.6814e-05,\n 3.1083e-05, 6.0704e-05, 7.6522e-06, 1.9478e-05, 2.1187e-05, 1.9135e-05,\n 3.2790e-05, 1.6771e-05, 2.5007e-05, 4.8710e-09, 1.9982e-05, 3.6210e-05,\n 2.5072e-05, 5.1209e-05, 3.4966e-05, 2.4837e-05, 3.1782e-05, 3.2060e-05,\n 2.9836e-05, 6.3070e-05, 2.0268e-05, 3.3328e-05, 3.6561e-09, 4.0351e-09,\n 3.5810e-05, 6.6472e-05, 3.6711e-05, 2.4908e-05, 3.2167e-05, 2.9984e-05,\n 2.7308e-05, 3.8995e-05, 2.4970e-05, 6.5235e-06, 2.2411e-05, 3.0601e-05,\n 2.6079e-05, 2.5492e-05, 1.6292e-06, 2.4090e-05, 3.3386e-05, 2.0111e-05,\n 3.6733e-09, 2.4086e-05, 2.9803e-05, 2.9756e-10, 8.9527e-10, 3.0056e-05,\n 3.2283e-05, 3.5436e-05, 1.8326e-05, 3.2708e-05, 1.6139e-05, 2.7822e-05,\n 2.6405e-05, 1.6421e-05, 1.7274e-05, 3.2764e-05, 2.8293e-05, 2.3556e-05,\n 3.7383e-05, 4.1208e-05, 3.0816e-05, 2.1770e-05, 2.5777e-05, 8.4081e-05,\n 2.3228e-05, 2.6591e-05, 1.6371e-05, 2.6842e-05, 3.0430e-05, 5.2466e-05,\n 8.8297e-06, 2.0897e-05, 4.0834e-05, 2.0501e-05, 3.3538e-05, 2.2415e-05,\n 3.4438e-05, 2.1073e-05, 2.8814e-05, 3.8930e-05, 6.7094e-06, 1.0098e-08,\n 5.3417e-09, 2.3250e-05, 2.3633e-05, 3.0488e-05, 2.7050e-05, 1.0167e-08,\n 2.5310e-05, 3.1970e-05, 1.8291e-05, 3.1846e-05, 2.1763e-05, 2.0995e-05,\n 4.0601e-05, 3.3635e-05, 3.4720e-05, 5.6454e-05, 3.3595e-05, 4.1093e-05,\n 2.0942e-05, 3.3692e-05, 3.1295e-05, 2.8817e-05, 4.1319e-05, 4.8132e-05,\n 3.8749e-05, 1.9050e-05, 2.4253e-05, 2.6643e-05, 3.2381e-05, 2.0979e-05,\n 2.5738e-05, 2.9189e-05, 5.9937e-05, 4.3228e-05, 3.2698e-05, 3.2954e-05,\n 3.1785e-05, 1.3805e-05, 1.7517e-05, 2.8479e-05, 2.8112e-05, 2.8400e-05,\n 1.7905e-05, 7.2059e-09, 9.2455e-09, 7.3346e-06, 3.1698e-05, 2.8865e-05,\n 2.8884e-05, 2.9498e-05, 2.7321e-05, 2.0765e-05, 2.3601e-05, 4.0243e-05,\n 2.2241e-05, 1.7422e-09, 8.7847e-06, 3.5144e-05, 2.7513e-05, 4.7851e-05,\n 3.0824e-05, 2.6605e-05, 2.5510e-08, 2.0918e-05], device='cuda:0')" + }, + "2": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-1.7785e-06, 4.6818e-06, -2.3424e-05, ..., -5.2214e-06,\n 4.3520e-06, 1.1826e-05],\n [-1.9751e-06, 1.8068e-05, -3.8495e-05, ..., -4.5789e-07,\n -5.3884e-07, 1.3391e-05],\n [ 4.2994e-06, -1.6573e-05, 8.2906e-06, ..., 1.1568e-06,\n 1.5130e-07, -8.5059e-06],\n ...,\n [-1.4670e-06, 3.3114e-05, 6.5894e-06, ..., -9.3644e-06,\n 1.5096e-06, 6.1332e-05],\n [-7.2405e-06, -1.2774e-05, 8.3824e-06, ..., 5.0912e-06,\n 3.4127e-06, -1.7737e-05],\n [ 1.0344e-05, -1.0473e-05, 4.4951e-05, ..., 2.1670e-05,\n -2.0671e-06, -1.4626e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.0217e-10, 3.0466e-09, 4.6082e-09, ..., 4.0297e-09, 1.3367e-11,\n 3.6041e-09],\n [1.9454e-09, 1.0702e-08, 7.7039e-09, ..., 8.7261e-09, 1.8002e-11,\n 4.8208e-09],\n [9.8640e-10, 6.4236e-09, 8.3304e-09, ..., 6.3375e-09, 2.5834e-11,\n 9.2396e-09],\n ...,\n [1.8448e-09, 6.6157e-09, 9.9721e-09, ..., 7.5061e-09, 1.8015e-11,\n 1.1495e-08],\n [1.6794e-09, 6.7727e-09, 8.4228e-09, ..., 7.6173e-09, 1.8305e-11,\n 2.5474e-08],\n [1.7717e-09, 7.4916e-09, 1.0791e-08, ..., 1.1585e-08, 1.4210e-11,\n 5.2197e-09]], device='cuda:0')" + }, + "3": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 8.9260e-06, 1.3182e-05, 3.3777e-06, ..., -1.4793e-06,\n 4.1738e-06, -4.4678e-06],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 7.7012e-06, 3.3585e-07, 3.7704e-06, ..., -7.8435e-08,\n 3.3536e-06, 4.9736e-06],\n [-8.6478e-06, 1.3429e-05, 1.4103e-05, ..., -3.7324e-06,\n -2.7325e-06, -3.9780e-06],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.1836e-13, 4.0667e-13, 3.2931e-15, ..., 4.2779e-19, 1.5370e-14,\n 1.3125e-14],\n [5.7954e-10, 2.3036e-09, 1.5975e-10, ..., 1.7105e-10, 2.1756e-10,\n 1.6122e-10],\n [2.6404e-14, 3.9817e-14, 1.4698e-14, ..., 2.5463e-15, 3.6122e-15,\n 1.0907e-14],\n ...,\n [3.0208e-09, 4.2271e-09, 6.1356e-10, ..., 8.0961e-10, 1.4419e-09,\n 1.3131e-09],\n [3.8999e-09, 2.7353e-09, 7.2976e-10, ..., 1.1388e-09, 6.9701e-10,\n 7.0186e-10],\n [2.0522e-12, 1.7285e-12, 1.5607e-13, ..., 1.9302e-13, 3.2515e-13,\n 1.0847e-13]], device='cuda:0')" + }, + "4": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 5.6052e-45, 1.4233e-05, 5.6052e-45, ..., -4.4756e-05,\n -5.2153e-05, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.0937e-11, 3.3981e-07, 1.3583e-11, ..., 1.1560e-06, 1.0502e-06,\n 3.8164e-10], device='cuda:0')" + }, + "5": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 5.6052e-45, -8.7324e-08, -5.6052e-45, ..., 2.6997e-06,\n -2.6697e-06, 5.6052e-45],\n [ 5.6052e-45, -1.0024e-06, -5.6052e-45, ..., -7.3641e-07,\n -1.2129e-06, 5.6052e-45],\n [ 5.6052e-45, -3.0734e-06, 5.6052e-45, ..., -2.6699e-05,\n 1.1811e-06, -5.6052e-45],\n ...,\n [-5.6052e-45, 1.0647e-06, 5.6052e-45, ..., 1.5419e-05,\n -1.0154e-06, 5.6052e-45],\n [ 5.6052e-45, -6.7561e-07, 5.6052e-45, ..., 6.2919e-07,\n 1.7473e-06, 5.6052e-45],\n [-5.6052e-45, -5.8238e-07, 5.6052e-45, ..., -1.2896e-05,\n 3.5084e-07, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3831e-15, 1.2386e-11, 2.3150e-15, ..., 9.5324e-11, 6.7848e-11,\n 3.1917e-13],\n [6.8554e-15, 3.7551e-11, 6.1589e-15, ..., 6.5938e-11, 5.9125e-11,\n 4.7147e-13],\n [4.5785e-15, 2.5382e-11, 3.7970e-14, ..., 2.4886e-10, 7.5237e-11,\n 8.5626e-13],\n ...,\n [1.1117e-15, 1.6705e-11, 9.1478e-14, ..., 2.0298e-10, 7.8572e-11,\n 1.4390e-12],\n [2.2958e-14, 1.2923e-10, 8.6749e-15, ..., 2.2712e-10, 1.3933e-10,\n 5.5331e-13],\n [7.1020e-16, 2.4000e-11, 8.7909e-15, ..., 2.3453e-10, 9.5708e-11,\n 7.2644e-13]], device='cuda:0')" + }, + "15": { + "step": "tensor(21284.)", + "exp_avg": "tensor([5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.0768e-14], device='cuda:0')" + }, + "16": { + "step": "tensor(21284.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0103e-16, 5.7945e-16, 2.1280e-16], device='cuda:0')" + }, + "17": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([7.2265e-13, 5.1028e-14, 2.9948e-14, 5.7300e-14, 4.5082e-14],\n device='cuda:0')" + }, + "19": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.6387e-22, 2.7126e-21, 4.8342e-22, ..., 1.0935e-22, 1.2948e-21,\n 6.9808e-22],\n [3.0414e-19, 3.0555e-19, 2.3384e-22, ..., 5.6246e-20, 1.1248e-20,\n 2.3967e-20],\n [3.1717e-18, 3.6314e-18, 3.6093e-22, ..., 2.9281e-19, 3.9628e-19,\n 1.2028e-19],\n ...,\n [9.5287e-20, 4.2956e-20, 8.2281e-21, ..., 2.7393e-21, 2.5433e-20,\n 3.1289e-21],\n [1.2054e-20, 9.8385e-21, 2.5097e-22, ..., 2.2470e-22, 1.2479e-21,\n 1.4451e-22],\n [1.0893e-17, 1.2757e-17, 1.9448e-21, ..., 9.8372e-19, 1.3981e-18,\n 4.7748e-19]], device='cuda:0')" + }, + "20": { + "step": "tensor(21284.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.3544e-19, 2.8334e-16, 1.9711e-15, 4.9700e-17, 3.3878e-16, 1.3163e-15,\n 1.4892e-17, 2.0235e-15, 1.5809e-16, 3.6737e-18, 9.1274e-15, 2.9085e-17,\n 9.2101e-18, 2.0906e-16, 2.1609e-16, 1.7134e-15, 5.7824e-16, 5.7561e-17,\n 2.8658e-15, 8.6073e-16, 8.6190e-16, 8.2873e-17, 2.9527e-16, 6.0145e-16,\n 3.5870e-16, 5.8288e-17, 2.6620e-17, 3.9776e-15, 2.6729e-18, 8.4097e-17,\n 1.6205e-16, 7.9522e-16, 1.3175e-15, 4.9840e-17, 6.9895e-17, 1.8882e-16,\n 3.7035e-18, 2.3901e-16, 1.4395e-15, 7.5845e-18, 3.0601e-17, 9.7335e-17,\n 2.6104e-15, 4.4645e-16, 3.0861e-17, 2.4176e-18, 1.0714e-17, 8.5658e-17,\n 2.1035e-17, 4.6719e-19, 1.6298e-18, 1.8917e-17, 1.0113e-16, 5.4554e-16,\n 7.1462e-18, 8.0963e-16, 1.4252e-15, 6.4063e-15, 7.2931e-16, 1.3696e-18,\n 1.0451e-14, 3.5389e-19, 3.5033e-17, 5.4118e-16, 1.5864e-15, 2.3285e-15,\n 3.7915e-17, 4.0037e-17, 8.4123e-15, 7.8646e-17, 1.2150e-15, 3.3390e-16,\n 6.2401e-16, 1.0864e-16, 1.6417e-18, 1.5641e-16, 1.1320e-15, 1.8426e-17,\n 1.5853e-20, 6.3710e-19, 1.6702e-18, 2.4876e-17, 2.6254e-15, 1.1057e-15,\n 1.7108e-16, 4.1268e-17, 8.9465e-16, 1.5336e-16, 2.3702e-16, 5.9206e-15,\n 3.4007e-17, 2.2161e-16, 4.0846e-17, 1.6944e-15, 2.6600e-16, 4.8108e-17,\n 2.2370e-19, 6.6491e-17, 2.1480e-15, 4.2667e-16, 1.1624e-18, 1.5774e-15,\n 1.2712e-17, 7.9218e-19, 6.7141e-16, 1.1860e-16, 2.1788e-18, 2.8652e-17,\n 8.9103e-17, 1.1674e-16, 1.1167e-18, 2.3967e-17, 9.4524e-18, 2.6946e-18,\n 1.3637e-16, 5.7686e-17, 1.6869e-15, 4.6674e-17, 1.0202e-15, 4.4671e-16,\n 2.1495e-17, 3.8345e-16, 5.7973e-16, 4.4960e-17, 7.6153e-16, 8.6785e-17,\n 3.3746e-15, 5.7521e-17, 1.3318e-16, 9.1965e-18, 3.9304e-18, 6.1626e-17,\n 2.1671e-17, 2.8619e-16, 5.6662e-17, 9.4181e-16, 3.4077e-18, 2.1848e-17,\n 5.9510e-16, 8.4180e-17, 1.7866e-18, 4.8850e-18, 8.7935e-15, 2.2408e-14,\n 5.4527e-16, 2.3876e-18, 7.1699e-16, 6.0982e-16, 4.7324e-16, 1.2596e-17,\n 2.3711e-18, 4.2571e-16, 1.5536e-15, 4.5304e-16, 7.6598e-16, 2.2972e-16,\n 4.4094e-18, 2.2066e-17, 3.4216e-17, 2.3467e-15, 1.1998e-17, 3.5165e-16,\n 4.4100e-16, 1.1199e-17, 1.1361e-17, 1.6022e-14, 1.4732e-15, 3.6125e-18,\n 4.5542e-15, 3.3109e-19, 4.2687e-16, 2.3006e-17, 5.6595e-16, 1.2216e-16,\n 2.2701e-16, 3.1426e-15, 5.3041e-16, 2.4888e-16, 5.4172e-17, 1.4164e-15,\n 2.1802e-16, 5.2326e-16, 1.3233e-14, 1.6898e-16, 8.5138e-18, 1.3286e-19,\n 2.2020e-17, 2.0341e-15, 1.0518e-16, 4.4458e-16, 2.2308e-15, 8.5026e-16,\n 1.7818e-17, 1.0126e-17, 1.6365e-15, 2.8465e-16, 2.9404e-18, 6.3723e-16,\n 4.6990e-18, 5.4493e-16, 1.2043e-15, 6.7564e-17, 2.9759e-16, 8.4095e-18,\n 1.3989e-16, 4.3111e-15, 6.6857e-17, 4.8898e-16, 1.5107e-18, 3.7022e-15,\n 7.0173e-16, 4.3157e-18, 3.3783e-17, 2.7786e-18, 2.6025e-15, 2.6156e-17,\n 1.8546e-16, 1.1010e-15, 5.7142e-17, 4.8736e-16, 3.9202e-16, 3.0335e-16,\n 1.7582e-17, 9.9209e-16, 7.5023e-17, 3.4757e-16, 4.0458e-15, 3.6335e-17,\n 1.5659e-18, 1.2732e-17, 1.5362e-16, 2.2371e-16, 8.7588e-18, 2.9093e-16,\n 1.2151e-16, 1.3960e-15, 1.6850e-16, 9.7226e-16, 1.0296e-16, 1.7134e-17,\n 6.1770e-17, 1.0381e-17, 2.8109e-15, 1.8611e-16, 2.5974e-17, 5.5342e-16,\n 2.0146e-17, 5.8134e-17, 1.8285e-15, 4.9938e-17, 6.3179e-16, 3.6058e-18,\n 4.0491e-16, 2.7556e-17, 3.4862e-18, 6.6983e-15], device='cuda:0')" + }, + "21": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.2741e-20, 3.2447e-20, 4.5126e-18, 2.8166e-20, 1.3859e-19, 1.5858e-19,\n 7.5171e-20, 5.2597e-19, 3.3831e-21, 1.8147e-20, 5.6347e-18, 1.1151e-20,\n 1.6431e-21, 7.7311e-20, 4.2337e-19, 1.4515e-18, 6.2440e-19, 4.4760e-21,\n 1.0641e-17, 5.3381e-18, 4.8102e-18, 6.3241e-21, 1.0186e-20, 3.6855e-19,\n 3.0430e-18, 4.3454e-20, 6.9355e-20, 3.8368e-18, 1.2599e-21, 9.6066e-20,\n 3.2634e-20, 6.2534e-19, 4.5803e-18, 1.2276e-20, 5.7762e-20, 1.4191e-18,\n 2.0339e-20, 1.1479e-19, 2.8169e-18, 1.7258e-22, 1.3748e-20, 5.9635e-21,\n 2.9006e-18, 1.6021e-19, 2.4720e-20, 2.0211e-21, 1.4478e-20, 1.1865e-20,\n 6.9213e-21, 2.1212e-20, 2.1453e-21, 2.3539e-20, 1.6031e-19, 1.2428e-19,\n 1.0665e-22, 1.4491e-19, 8.0756e-20, 4.8833e-18, 3.5847e-19, 3.9474e-21,\n 2.2794e-17, 1.4543e-22, 1.0178e-21, 6.6870e-20, 3.0998e-18, 1.8910e-18,\n 2.5365e-20, 4.0916e-20, 2.9689e-17, 1.1109e-20, 2.2127e-18, 6.9975e-21,\n 8.6332e-20, 6.1432e-19, 6.7485e-21, 5.1560e-20, 1.1530e-18, 9.0991e-20,\n 1.9462e-20, 1.7282e-22, 8.9958e-21, 5.8452e-20, 6.5186e-19, 3.2433e-18,\n 1.6071e-19, 2.2413e-19, 1.2086e-19, 9.3381e-21, 5.8380e-20, 1.6143e-17,\n 7.8747e-20, 6.9615e-20, 2.0042e-20, 2.0486e-18, 3.7126e-20, 1.3441e-19,\n 4.0833e-22, 1.8521e-19, 3.7351e-18, 7.5525e-19, 2.6875e-21, 4.1949e-18,\n 1.9737e-21, 5.7887e-21, 1.8943e-19, 2.0729e-19, 8.1767e-21, 2.8608e-20,\n 1.9880e-20, 1.6896e-20, 2.7746e-22, 3.0876e-20, 5.1574e-20, 7.3191e-21,\n 3.3369e-20, 3.7819e-21, 4.4597e-18, 1.0973e-19, 1.4041e-18, 6.3228e-20,\n 5.6426e-20, 3.7796e-19, 4.7715e-19, 3.1658e-20, 1.5548e-18, 6.9548e-22,\n 3.5001e-18, 8.5198e-22, 4.0166e-20, 3.2222e-21, 3.0166e-21, 3.3737e-21,\n 4.6107e-21, 1.1374e-19, 1.1842e-20, 2.4301e-18, 5.5518e-21, 2.8802e-21,\n 3.6132e-19, 1.2685e-20, 5.3678e-21, 2.2257e-20, 3.8863e-17, 4.4112e-17,\n 8.4549e-20, 1.0067e-22, 7.7443e-19, 6.6447e-19, 1.1169e-18, 5.7239e-22,\n 4.4071e-21, 8.5572e-19, 1.6398e-18, 4.6582e-20, 1.4399e-19, 4.2366e-19,\n 3.3307e-23, 7.1848e-20, 1.5964e-20, 1.6168e-19, 2.1603e-21, 5.7635e-19,\n 2.1922e-20, 6.6320e-20, 2.1039e-20, 3.7479e-17, 8.6378e-19, 2.2545e-21,\n 5.9693e-18, 4.2831e-23, 4.6958e-20, 3.5738e-20, 3.8162e-18, 1.6283e-19,\n 1.0015e-19, 1.8423e-18, 1.7045e-19, 1.2170e-20, 7.8309e-20, 1.6047e-18,\n 4.1828e-21, 1.5186e-19, 3.4648e-17, 5.7423e-20, 3.1134e-21, 2.6248e-21,\n 9.9821e-22, 1.6316e-18, 2.3880e-19, 4.4327e-19, 1.3917e-18, 1.1250e-19,\n 6.9335e-21, 8.5708e-22, 9.5975e-19, 5.6873e-21, 4.6643e-22, 3.7512e-20,\n 6.8186e-21, 1.7652e-19, 3.2044e-19, 5.8641e-21, 3.1126e-20, 7.3070e-21,\n 9.4022e-21, 1.0210e-17, 7.0557e-20, 4.2763e-20, 2.3763e-20, 5.9764e-18,\n 2.2177e-19, 3.0386e-21, 2.5450e-21, 3.9475e-20, 2.5449e-19, 7.5243e-22,\n 6.5449e-21, 1.8395e-18, 2.6743e-19, 7.7865e-20, 7.2000e-19, 1.4296e-18,\n 2.2688e-21, 8.0127e-19, 4.0424e-20, 1.1619e-19, 8.1571e-19, 1.4426e-20,\n 4.6342e-22, 1.2460e-21, 1.8373e-20, 3.9634e-19, 1.3001e-21, 5.7342e-19,\n 1.3949e-20, 1.6322e-18, 1.7026e-18, 6.4399e-19, 1.0307e-19, 3.6722e-22,\n 9.6402e-21, 1.9063e-21, 1.0396e-18, 4.1979e-20, 9.8069e-21, 2.6825e-19,\n 2.3418e-21, 1.9070e-21, 9.6470e-18, 4.5387e-20, 6.6989e-20, 5.8994e-21,\n 2.9137e-19, 4.7326e-20, 6.2679e-21, 1.8736e-17], device='cuda:0')" + }, + "22": { + "step": "tensor(21284.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4811e-21, 3.9738e-19, 3.1492e-18, 4.4202e-20, 4.8508e-19, 1.8215e-18,\n 2.1586e-20, 2.7031e-18, 2.3359e-19, 3.1303e-20, 1.1038e-17, 1.5448e-19,\n 3.9698e-20, 3.6341e-19, 4.4664e-19, 1.7651e-18, 9.0956e-19, 1.0314e-19,\n 3.0398e-18, 1.6266e-18, 1.4318e-18, 1.3908e-19, 3.0602e-19, 1.1857e-18,\n 1.1410e-18, 8.2567e-20, 2.3630e-20, 4.5091e-18, 5.0336e-22, 1.6132e-19,\n 6.9927e-20, 1.0971e-18, 2.1080e-18, 1.5244e-19, 1.2284e-19, 8.2168e-19,\n 1.0677e-20, 1.5767e-19, 2.2498e-18, 1.4416e-21, 1.2078e-19, 1.5227e-19,\n 3.9626e-18, 6.6966e-19, 1.7098e-19, 1.5919e-20, 4.1381e-21, 1.7297e-20,\n 4.0763e-20, 1.7065e-21, 9.8388e-22, 3.2811e-20, 2.0475e-19, 4.9563e-19,\n 3.0729e-22, 8.7888e-19, 1.9327e-18, 8.6544e-18, 7.4983e-19, 1.8080e-20,\n 1.4012e-17, 4.9724e-21, 1.9290e-20, 6.9140e-19, 2.3189e-18, 3.2717e-18,\n 7.6970e-20, 1.1724e-19, 1.1459e-17, 1.3863e-19, 1.7407e-18, 3.4567e-19,\n 7.9422e-19, 5.6317e-19, 3.9282e-21, 1.0905e-19, 1.6584e-18, 6.5394e-20,\n 4.2444e-22, 4.1799e-21, 6.0578e-20, 2.0662e-20, 3.5711e-18, 2.0100e-18,\n 3.0420e-19, 2.9356e-19, 9.7270e-19, 2.3737e-19, 3.8562e-19, 7.0191e-18,\n 2.2787e-19, 3.5542e-19, 4.5898e-21, 2.3939e-18, 3.8977e-19, 5.0612e-20,\n 1.8363e-20, 6.6643e-20, 3.1102e-18, 8.2966e-19, 1.5383e-20, 2.3243e-18,\n 2.7086e-20, 9.8527e-21, 7.1226e-19, 4.8643e-19, 5.0437e-21, 1.8759e-19,\n 1.4017e-19, 1.9055e-19, 6.4290e-21, 8.4895e-21, 9.3045e-20, 2.6239e-21,\n 1.8860e-19, 1.0007e-19, 2.5678e-18, 3.2841e-20, 1.4778e-18, 6.4816e-19,\n 1.7965e-20, 4.9549e-19, 8.9549e-19, 1.0267e-19, 1.4066e-18, 6.0546e-21,\n 3.8103e-18, 1.0184e-20, 2.2013e-19, 1.4751e-21, 8.7827e-21, 8.3180e-21,\n 7.1471e-21, 3.6177e-19, 8.5624e-20, 1.5804e-18, 2.7396e-20, 1.2814e-20,\n 8.8523e-19, 1.2492e-19, 4.7318e-21, 5.6240e-22, 1.0946e-17, 2.8233e-17,\n 5.2078e-19, 3.7646e-21, 1.1781e-18, 9.4239e-19, 7.7896e-19, 2.5407e-20,\n 2.3476e-20, 1.0002e-18, 1.6249e-18, 6.6089e-19, 1.0879e-18, 3.5703e-19,\n 1.4719e-23, 1.6939e-20, 6.0217e-20, 2.8399e-18, 8.1902e-22, 5.6925e-19,\n 6.3095e-19, 1.8934e-20, 9.6258e-21, 2.1204e-17, 2.1081e-18, 8.9694e-21,\n 5.4332e-18, 1.2855e-21, 4.3563e-19, 1.2669e-20, 1.2713e-18, 2.4515e-19,\n 3.8476e-19, 3.7081e-18, 4.4099e-19, 2.5324e-19, 2.0457e-20, 2.0405e-18,\n 2.4628e-19, 5.4998e-19, 1.7673e-17, 1.3316e-19, 2.3352e-20, 7.8661e-22,\n 4.2993e-20, 2.1935e-18, 4.5232e-19, 7.1016e-19, 3.0823e-18, 9.6331e-19,\n 3.1074e-20, 2.0538e-20, 1.7248e-18, 4.0015e-19, 4.0112e-22, 8.9908e-19,\n 3.0068e-20, 4.7276e-19, 1.2564e-18, 1.1833e-19, 2.8888e-19, 7.7706e-20,\n 1.3700e-19, 6.0061e-18, 5.8120e-20, 7.0558e-19, 2.7482e-21, 5.0570e-18,\n 7.3239e-19, 1.4661e-23, 2.7666e-21, 7.2459e-21, 3.5084e-18, 1.1374e-20,\n 2.7900e-19, 1.6213e-18, 8.4803e-20, 7.2064e-19, 7.9114e-19, 6.5390e-19,\n 3.7963e-20, 1.1464e-18, 2.2254e-19, 4.0106e-19, 5.4050e-18, 1.2107e-19,\n 4.6512e-21, 5.4989e-21, 1.9998e-19, 3.9571e-19, 1.4637e-21, 6.9035e-19,\n 4.6637e-20, 2.1506e-18, 7.8051e-19, 1.4089e-18, 4.0320e-19, 1.1890e-22,\n 9.3613e-20, 2.3829e-20, 3.2683e-18, 3.5204e-19, 1.0498e-19, 8.3277e-19,\n 3.4597e-20, 9.8123e-20, 3.0812e-18, 1.0304e-19, 9.0145e-19, 2.0791e-21,\n 3.6121e-19, 3.7494e-20, 2.0994e-21, 9.4672e-18], device='cuda:0')" + }, + "23": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.9732e-18, 7.3947e-18, 5.3214e-21, ..., 5.2132e-19, 9.3647e-19,\n 1.7348e-19],\n [5.4768e-18, 6.1990e-18, 3.8805e-23, ..., 3.4548e-19, 7.2323e-19,\n 1.5048e-19],\n [1.6351e-18, 1.9222e-18, 4.0434e-21, ..., 1.4721e-19, 1.8121e-19,\n 5.7660e-20],\n ...,\n [1.6112e-19, 1.0114e-19, 7.8634e-22, ..., 9.9859e-21, 2.7828e-20,\n 4.2132e-21],\n [5.5175e-19, 5.8692e-19, 7.5906e-22, ..., 5.6449e-20, 6.8712e-20,\n 2.7273e-20],\n [4.9631e-19, 7.2169e-19, 6.3544e-22, ..., 6.8182e-20, 6.4534e-20,\n 3.8013e-20]], device='cuda:0')" + }, + "24": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.7140e-15, 2.9102e-15, 9.5445e-16, 6.1029e-16, 5.8037e-17, 3.8480e-15,\n 3.2501e-17, 3.4466e-15, 3.3770e-15, 3.0791e-18, 4.1963e-15, 2.8637e-16,\n 3.8195e-16, 2.6844e-17, 4.1677e-18, 2.4430e-17, 5.9172e-20, 4.2960e-17,\n 8.4249e-16, 9.9926e-16, 2.2613e-16, 4.1630e-16, 5.0013e-16, 5.1993e-18,\n 2.9961e-16, 3.1716e-16, 1.2143e-16, 7.8636e-16, 1.0198e-16, 4.2978e-18,\n 5.1298e-17, 2.8983e-16, 4.6513e-16, 5.9297e-16, 3.5883e-19, 2.3250e-19,\n 3.1193e-16, 4.3564e-18, 3.2493e-16, 2.5192e-16, 6.7441e-19, 1.5838e-15,\n 4.0322e-15, 1.0530e-18, 1.0782e-16, 3.6457e-18, 5.1789e-16, 3.0593e-18,\n 1.1341e-16, 1.0480e-15, 5.6460e-18, 8.3376e-17, 1.7097e-18, 4.9410e-16,\n 2.8041e-17, 1.1606e-17, 7.6014e-15, 6.6333e-15, 2.3008e-17, 8.3920e-19,\n 9.9158e-15, 6.4325e-17, 7.6767e-16, 2.6814e-15, 2.1331e-16, 4.6135e-15,\n 1.0800e-17, 4.1473e-17, 4.8827e-16, 8.0435e-17, 9.7119e-16, 1.8175e-16,\n 1.6300e-16, 6.2261e-17, 3.6742e-17, 1.0380e-16, 5.0634e-16, 2.3399e-19,\n 3.2058e-16, 9.5712e-17, 4.7634e-16, 1.0971e-15, 4.1126e-17, 1.8668e-18,\n 5.5418e-18, 2.8617e-16, 1.1522e-15, 6.5771e-18, 1.7585e-15, 1.0772e-16,\n 3.9336e-17, 2.8926e-18, 5.2282e-17, 9.8665e-16, 2.1761e-15, 1.1548e-15,\n 7.4999e-18, 5.6351e-18, 9.4904e-16, 7.4075e-17, 4.8677e-16, 5.5569e-18,\n 1.2351e-16, 1.1981e-18, 6.6973e-17, 2.2983e-18, 4.4947e-16, 1.4451e-19,\n 5.2855e-16, 1.3629e-16, 2.5272e-17, 2.4519e-17, 2.3487e-16, 2.5340e-18,\n 5.8398e-16, 5.4751e-16, 4.1315e-17, 5.4562e-15, 1.8705e-19, 2.0470e-15,\n 1.2485e-16, 1.8594e-17, 3.3125e-16, 2.9443e-18, 6.0292e-18, 5.6974e-18,\n 3.5305e-15, 2.1248e-16, 1.0269e-18, 1.6119e-17, 7.1064e-17, 1.1316e-16,\n 2.6990e-16, 4.1553e-16, 1.3435e-17, 3.3465e-16, 7.4703e-16, 6.7393e-17,\n 1.6205e-18, 3.5501e-17, 3.1085e-16, 7.4299e-16, 9.4571e-16, 1.8671e-14,\n 1.3111e-17, 3.4425e-19, 5.4280e-16, 7.1368e-17, 3.7810e-18, 4.9507e-17,\n 1.2140e-16, 3.2167e-17, 5.5225e-17, 3.5054e-16, 2.8325e-17, 2.7763e-17,\n 1.3289e-16, 3.0262e-15, 2.5987e-17, 9.9450e-15, 1.5392e-16, 1.5857e-17,\n 6.8880e-19, 2.2738e-15, 5.7009e-16, 1.1352e-15, 1.1425e-17, 2.1618e-16,\n 3.9675e-15, 9.0207e-18, 2.0801e-15, 5.2361e-16, 3.0799e-16, 5.4619e-17,\n 4.4784e-16, 3.1473e-15, 1.1292e-15, 5.1662e-16, 9.2950e-17, 1.3998e-15,\n 3.4885e-15, 3.3879e-16, 3.9390e-15, 3.8845e-19, 4.4494e-16, 2.3993e-19,\n 5.9872e-16, 3.0072e-17, 9.5162e-17, 1.0513e-17, 2.3766e-15, 1.5407e-18,\n 3.7953e-16, 8.6036e-18, 1.4243e-16, 1.7780e-16, 1.1510e-16, 3.5199e-15,\n 1.9229e-16, 8.7055e-18, 3.3281e-16, 1.5095e-17, 1.9182e-17, 7.2440e-17,\n 1.3597e-16, 6.5357e-16, 6.3205e-17, 5.9887e-17, 4.6926e-17, 8.7731e-18,\n 2.1959e-17, 6.8085e-16, 1.2156e-18, 2.4573e-15, 2.1180e-16, 1.1400e-16,\n 1.0714e-15, 2.3628e-16, 2.3032e-15, 6.5573e-16, 2.3471e-18, 9.1752e-16,\n 2.4017e-17, 1.1602e-18, 1.0285e-16, 2.9710e-17, 4.8922e-16, 1.9110e-15,\n 7.7117e-19, 1.6843e-17, 2.7162e-16, 1.7680e-16, 2.9555e-18, 2.2882e-17,\n 5.5293e-17, 1.1066e-15, 1.2716e-16, 8.2681e-17, 2.9849e-16, 7.7047e-17,\n 1.6324e-15, 1.4894e-16, 1.7750e-15, 6.2024e-16, 6.3018e-17, 2.8599e-17,\n 6.2218e-17, 6.2468e-16, 1.0193e-15, 1.3208e-17, 1.0749e-15, 1.0875e-17,\n 8.4775e-16, 6.5040e-17, 3.6352e-16, 4.5596e-16], device='cuda:0')" + }, + "25": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.4891e-18, 2.9829e-18, 5.9382e-18, 1.0202e-20, 8.3261e-20, 9.5702e-18,\n 9.6129e-20, 3.6531e-18, 1.5787e-18, 5.3270e-21, 1.2488e-18, 4.1064e-18,\n 4.6870e-19, 1.7912e-21, 4.1891e-21, 3.8833e-20, 1.1888e-20, 3.0288e-21,\n 2.8956e-19, 3.1185e-18, 1.8490e-18, 4.8956e-20, 4.4145e-20, 6.0527e-20,\n 2.9410e-18, 1.0631e-19, 5.7048e-20, 1.0441e-19, 8.2384e-20, 6.6811e-21,\n 1.6461e-21, 3.1685e-19, 6.0019e-20, 8.6457e-19, 1.8873e-21, 2.8987e-21,\n 1.2234e-19, 2.7075e-23, 1.8667e-19, 1.5735e-19, 2.2199e-21, 7.4965e-18,\n 2.1285e-17, 8.4019e-21, 8.7660e-19, 5.8689e-22, 2.2283e-19, 1.1366e-22,\n 2.9872e-20, 2.5175e-19, 9.8579e-22, 7.8533e-21, 2.1642e-22, 5.1939e-19,\n 2.0656e-20, 7.0094e-21, 3.6614e-18, 8.2850e-18, 1.0113e-20, 2.0475e-21,\n 1.3698e-17, 1.4565e-20, 2.3185e-18, 1.3042e-18, 7.6691e-20, 6.8487e-18,\n 8.9842e-21, 5.4944e-20, 7.1798e-21, 2.3223e-20, 2.8439e-19, 1.9544e-20,\n 2.5726e-20, 5.9497e-20, 3.9195e-20, 1.2938e-20, 1.8282e-19, 5.7721e-21,\n 8.8586e-20, 1.6598e-20, 1.3016e-18, 1.7545e-19, 1.1502e-19, 3.0260e-20,\n 5.4810e-21, 3.6992e-18, 5.5151e-19, 3.4513e-21, 2.4057e-18, 1.3304e-20,\n 7.6171e-21, 3.1091e-21, 1.4353e-21, 4.9221e-19, 6.2915e-18, 2.9161e-19,\n 6.4970e-21, 4.6736e-21, 5.3711e-19, 4.7639e-20, 7.8722e-19, 6.9479e-21,\n 5.4509e-20, 5.8373e-21, 2.6792e-21, 5.4745e-22, 4.0957e-19, 1.5463e-21,\n 1.4365e-19, 1.5325e-20, 1.5257e-21, 3.1584e-19, 6.3336e-19, 2.3421e-21,\n 4.1584e-19, 1.6590e-18, 4.0946e-21, 1.2622e-17, 2.0382e-22, 3.2066e-18,\n 4.7989e-21, 1.9880e-21, 1.4642e-19, 4.8348e-23, 5.5947e-21, 2.6303e-22,\n 4.4478e-18, 6.1662e-20, 3.7893e-24, 1.9786e-21, 3.3356e-21, 9.3439e-21,\n 1.2407e-19, 6.2760e-19, 6.3149e-21, 7.7225e-20, 2.7547e-18, 1.2565e-20,\n 1.7501e-21, 1.0874e-19, 2.9221e-20, 1.5702e-19, 1.5287e-19, 4.5498e-17,\n 2.5951e-20, 4.6850e-23, 3.5545e-19, 9.3916e-21, 1.3064e-21, 3.5152e-20,\n 2.7018e-19, 4.0625e-21, 4.9371e-22, 1.3557e-19, 6.3543e-21, 8.3455e-20,\n 2.6745e-20, 2.7672e-18, 3.7667e-21, 9.4504e-18, 3.6701e-20, 5.8345e-22,\n 1.3869e-20, 6.7835e-18, 1.1714e-18, 1.6376e-20, 2.6178e-20, 3.0609e-20,\n 1.1316e-17, 2.0301e-22, 3.2441e-18, 5.3349e-20, 2.8668e-19, 1.6907e-20,\n 2.0847e-19, 3.1516e-18, 4.8597e-19, 1.2264e-19, 2.0944e-20, 1.0224e-18,\n 2.4451e-18, 1.3759e-20, 1.3536e-18, 7.4094e-22, 2.6945e-19, 4.4500e-21,\n 4.4515e-19, 3.9215e-20, 1.2570e-19, 1.8609e-20, 6.8320e-19, 1.2679e-20,\n 3.5816e-20, 3.6735e-21, 7.7926e-21, 1.8412e-20, 6.5573e-20, 1.0111e-17,\n 3.1395e-20, 8.4207e-21, 5.4597e-20, 8.5987e-22, 3.7132e-21, 1.8093e-19,\n 3.3352e-19, 1.3990e-19, 5.3332e-20, 2.6730e-21, 9.3954e-21, 2.0145e-20,\n 1.7733e-20, 8.2011e-19, 1.4941e-21, 8.2952e-18, 9.6022e-21, 1.0839e-20,\n 3.5002e-19, 3.1195e-20, 4.5454e-19, 5.8497e-20, 1.6966e-20, 1.9498e-18,\n 1.6530e-20, 3.0183e-21, 3.4006e-19, 1.2944e-21, 9.0936e-21, 3.4718e-18,\n 4.3406e-22, 1.3218e-21, 3.2845e-20, 1.4210e-19, 3.0447e-22, 6.4057e-21,\n 1.6721e-21, 3.6936e-18, 7.1268e-20, 2.5517e-21, 3.7203e-18, 6.1988e-21,\n 2.3826e-18, 4.3606e-20, 8.7992e-19, 8.7527e-19, 2.6619e-20, 1.1322e-21,\n 6.1915e-22, 9.6149e-19, 1.1356e-18, 1.0933e-19, 2.8824e-19, 5.3478e-21,\n 1.4513e-18, 1.8165e-19, 4.1990e-20, 1.8936e-19], device='cuda:0')" + }, + "26": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.7632e-18, 3.6315e-18, 2.0320e-18, 6.7250e-19, 1.6347e-19, 4.9632e-18,\n 3.1594e-20, 4.7039e-18, 4.2337e-18, 1.5534e-21, 5.2526e-18, 1.2216e-18,\n 7.0648e-19, 3.7798e-20, 1.6840e-21, 1.4490e-20, 5.3629e-22, 5.8899e-20,\n 1.0155e-18, 2.0622e-18, 8.0851e-19, 5.3030e-19, 5.6384e-19, 2.0933e-20,\n 1.1710e-18, 3.8659e-19, 3.0565e-19, 8.6810e-19, 1.2800e-19, 2.4015e-21,\n 1.5150e-20, 6.2003e-19, 5.9303e-19, 1.3571e-18, 2.0407e-22, 1.9682e-21,\n 4.1292e-19, 1.0381e-21, 4.2944e-19, 2.9987e-19, 1.1709e-20, 2.2572e-18,\n 6.1168e-18, 3.0113e-21, 6.0713e-19, 5.8605e-21, 4.4785e-19, 4.5617e-21,\n 1.4058e-19, 1.3189e-18, 9.4441e-21, 1.1239e-19, 2.5814e-21, 3.8771e-19,\n 3.8204e-20, 2.7081e-21, 9.5579e-18, 8.3838e-18, 3.3682e-21, 1.4848e-20,\n 1.2343e-17, 9.5408e-20, 9.5264e-19, 3.4641e-18, 2.6607e-19, 5.9127e-18,\n 3.5093e-21, 7.0327e-20, 6.1995e-19, 1.0872e-19, 1.2168e-18, 1.8210e-19,\n 2.6310e-19, 2.3593e-19, 4.8537e-20, 3.2312e-20, 6.4144e-19, 2.7770e-20,\n 5.0828e-19, 1.2614e-19, 6.9152e-19, 1.3743e-18, 3.9515e-20, 1.0694e-20,\n 3.8489e-20, 8.4509e-19, 1.4480e-18, 8.0260e-21, 2.3755e-18, 1.5908e-19,\n 5.7199e-20, 2.5377e-20, 2.5400e-20, 1.2173e-18, 2.7003e-18, 1.5703e-18,\n 7.0000e-20, 6.0257e-22, 1.4527e-18, 2.5275e-19, 7.6052e-19, 1.2628e-21,\n 2.0898e-19, 7.3652e-21, 3.6589e-20, 3.3311e-20, 5.8656e-19, 7.0673e-23,\n 7.8526e-19, 1.7615e-19, 3.5000e-20, 2.8156e-19, 4.4230e-19, 1.9723e-21,\n 8.0586e-19, 7.9358e-19, 5.9017e-20, 7.3001e-18, 9.8561e-22, 2.4756e-18,\n 9.5463e-20, 2.1468e-20, 4.1127e-19, 2.0910e-21, 5.4222e-20, 4.5867e-23,\n 4.2641e-18, 1.7742e-19, 2.6906e-23, 7.3583e-21, 9.6721e-20, 5.8142e-20,\n 2.1361e-19, 7.2973e-19, 9.3063e-21, 4.3374e-19, 1.3904e-18, 7.8858e-20,\n 1.7092e-21, 4.2432e-20, 4.0214e-19, 7.6884e-19, 1.4478e-18, 2.4245e-17,\n 4.9387e-21, 4.5059e-22, 7.6381e-19, 9.5284e-20, 1.5512e-22, 4.1899e-20,\n 4.9132e-19, 4.9909e-20, 3.7724e-20, 4.4278e-19, 3.7566e-20, 1.6274e-19,\n 3.3407e-20, 3.7543e-18, 3.0094e-20, 1.2514e-17, 6.5126e-20, 2.3495e-20,\n 2.2283e-21, 3.2663e-18, 6.6707e-19, 1.4448e-18, 1.6931e-20, 2.7619e-19,\n 5.0073e-18, 1.3078e-20, 2.5157e-18, 5.9089e-19, 7.3171e-19, 6.9822e-20,\n 6.2308e-19, 4.0301e-18, 1.2426e-18, 6.9503e-19, 7.5083e-20, 1.7392e-18,\n 4.3359e-18, 3.7169e-19, 4.9606e-18, 2.2090e-20, 5.5552e-19, 1.7032e-21,\n 7.8967e-19, 1.3654e-20, 3.0645e-19, 7.5090e-21, 2.9844e-18, 1.3406e-21,\n 4.8216e-19, 1.1199e-20, 1.0692e-19, 2.3221e-19, 9.3424e-20, 4.4148e-18,\n 2.5119e-19, 2.2990e-21, 2.8013e-19, 2.2228e-20, 1.1027e-20, 3.0731e-19,\n 1.1216e-19, 8.2475e-19, 9.4162e-20, 7.9379e-20, 1.0959e-19, 4.2645e-20,\n 6.5428e-21, 7.8291e-19, 1.7919e-21, 3.5810e-18, 2.7362e-19, 1.0420e-19,\n 1.3653e-18, 2.9795e-19, 2.8923e-18, 8.4252e-19, 6.3465e-21, 1.5863e-18,\n 1.4160e-20, 1.1549e-21, 4.4507e-19, 2.8187e-20, 6.2942e-19, 2.7597e-18,\n 2.3235e-22, 3.6624e-21, 4.0839e-19, 2.1869e-19, 5.1266e-22, 1.1033e-19,\n 1.2647e-20, 2.0661e-18, 2.4146e-19, 1.0882e-19, 1.1658e-18, 2.0735e-20,\n 2.0448e-18, 2.0482e-19, 2.0738e-18, 1.1379e-18, 1.8568e-19, 3.9360e-20,\n 8.4374e-20, 7.9189e-19, 1.3793e-18, 1.7836e-19, 1.3779e-18, 1.7287e-21,\n 9.9139e-19, 5.9042e-20, 4.5915e-19, 1.0168e-18], device='cuda:0')" + }, + "27": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.2939e-20, 2.0719e-22, 4.1859e-21, ..., 3.2328e-22, 1.1658e-21,\n 5.7196e-23],\n [7.1671e-18, 7.0922e-18, 4.9330e-21, ..., 4.9991e-19, 9.6064e-19,\n 2.0103e-19],\n [5.7435e-19, 6.7349e-19, 2.7388e-22, ..., 5.3126e-20, 7.8801e-20,\n 3.3085e-20],\n ...,\n [5.9405e-18, 6.3290e-18, 4.3392e-21, ..., 3.4559e-19, 8.1912e-19,\n 1.4184e-19],\n [8.2572e-20, 1.1379e-19, 1.6296e-21, ..., 9.2632e-21, 7.2502e-21,\n 2.3152e-21],\n [1.0454e-17, 1.0831e-17, 2.9731e-21, ..., 8.0526e-19, 1.3395e-18,\n 3.2421e-19]], device='cuda:0')" + }, + "28": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.3896e-18, 3.8880e-15, 4.0122e-16, 8.2742e-16, 1.5825e-16, 1.3126e-17,\n 4.0481e-16, 2.0052e-15, 1.1719e-15, 1.4582e-17, 4.6304e-17, 2.7826e-17,\n 6.2708e-17, 6.0273e-17, 1.1933e-17, 5.8709e-17, 2.0823e-18, 2.2375e-16,\n 1.7715e-16, 1.4304e-15, 2.4708e-17, 6.3077e-16, 3.9103e-15, 7.2820e-16,\n 1.0594e-15, 7.5294e-18, 2.2520e-17, 4.2077e-15, 9.5162e-20, 6.8809e-18,\n 2.2104e-17, 8.1326e-16, 4.6610e-16, 3.3405e-16, 7.2375e-17, 1.9293e-16,\n 1.3186e-16, 4.7604e-17, 3.1990e-16, 6.5319e-17, 5.9965e-17, 3.0411e-16,\n 2.7925e-15, 2.6591e-16, 4.7451e-17, 2.4845e-16, 9.4597e-17, 2.0776e-16,\n 4.3419e-18, 4.7546e-16, 2.8738e-16, 6.1382e-15, 4.8054e-17, 1.1249e-16,\n 1.8409e-17, 1.6222e-15, 1.4348e-14, 3.1160e-15, 2.3565e-16, 3.3397e-16,\n 2.7122e-15, 4.7365e-17, 5.3711e-16, 5.7418e-15, 1.3071e-16, 2.0987e-15,\n 1.4225e-16, 1.3530e-16, 5.1182e-15, 1.0458e-15, 7.3605e-19, 6.5735e-17,\n 4.4741e-17, 6.3534e-17, 4.9648e-19, 2.0632e-17, 1.9976e-18, 7.8672e-18,\n 4.1473e-16, 3.5233e-17, 3.7506e-18, 1.0730e-15, 1.6170e-16, 7.9065e-16,\n 1.6501e-18, 1.2742e-16, 8.8761e-16, 1.4641e-17, 3.1763e-15, 1.9010e-17,\n 6.3641e-16, 2.6213e-16, 8.9665e-16, 1.3173e-16, 5.9817e-16, 6.6325e-15,\n 1.0118e-17, 4.5677e-18, 3.9615e-16, 1.6197e-16, 4.7216e-18, 6.8480e-17,\n 3.0767e-17, 3.6286e-16, 8.0128e-18, 1.1260e-16, 5.0415e-16, 1.9751e-19,\n 7.6735e-17, 3.8249e-18, 3.0240e-19, 5.4093e-17, 4.3227e-19, 3.0958e-17,\n 5.2209e-16, 8.9984e-16, 5.5647e-16, 4.2498e-15, 4.2199e-18, 2.6480e-17,\n 1.7567e-15, 3.4994e-18, 6.7786e-18, 5.5260e-19, 7.4697e-18, 6.3689e-18,\n 4.5314e-16, 7.0456e-17, 8.2802e-17, 3.4254e-16, 1.6555e-16, 2.6928e-17,\n 6.7405e-17, 2.0928e-17, 4.5042e-17, 1.2921e-19, 1.1491e-16, 9.5990e-17,\n 3.6577e-16, 6.7200e-15, 4.6668e-16, 2.6898e-18, 2.3312e-16, 1.1438e-14,\n 2.9501e-16, 5.0604e-17, 3.1252e-17, 4.0620e-17, 3.2384e-17, 8.1561e-17,\n 1.5889e-17, 1.1078e-17, 8.7061e-18, 8.4443e-16, 7.1453e-18, 3.5013e-18,\n 3.5896e-18, 1.6244e-15, 4.2812e-17, 2.7533e-16, 2.4993e-16, 2.1892e-19,\n 6.8864e-18, 2.6020e-15, 1.6112e-17, 1.6850e-15, 1.5490e-18, 3.2731e-16,\n 4.8576e-15, 7.7700e-17, 2.4948e-15, 5.4826e-17, 1.1726e-16, 4.3348e-19,\n 5.9646e-18, 6.3872e-17, 2.5311e-16, 3.3266e-19, 2.0860e-15, 4.4479e-17,\n 1.0045e-15, 7.1386e-16, 3.5104e-17, 2.4289e-17, 3.1456e-18, 1.2938e-16,\n 3.6062e-17, 1.4820e-17, 5.2806e-17, 2.3347e-16, 1.2461e-15, 7.4080e-20,\n 4.1225e-16, 6.8458e-16, 5.3951e-16, 1.6895e-14, 5.4099e-18, 2.9417e-16,\n 5.2532e-17, 4.3463e-16, 1.4939e-16, 4.8714e-16, 1.5353e-16, 8.1844e-17,\n 1.6704e-16, 1.5044e-16, 8.7217e-19, 3.2060e-16, 7.6265e-19, 3.6681e-15,\n 2.7903e-17, 6.8621e-18, 5.6492e-18, 3.6971e-15, 1.1497e-15, 2.7487e-16,\n 2.0335e-16, 3.1767e-15, 1.0451e-14, 2.8555e-18, 8.7769e-18, 9.4170e-16,\n 4.1989e-18, 6.1880e-16, 4.7957e-17, 1.3043e-17, 2.9413e-15, 2.3729e-15,\n 2.6277e-18, 9.9566e-18, 2.1987e-15, 1.2199e-16, 1.7247e-16, 9.2177e-17,\n 1.1428e-16, 2.0457e-18, 6.9661e-17, 2.2944e-18, 3.4986e-16, 5.7748e-17,\n 1.6943e-18, 2.9118e-16, 2.4277e-15, 1.1999e-16, 3.7385e-16, 9.6307e-17,\n 2.8012e-15, 2.6877e-17, 1.1264e-15, 1.9668e-16, 3.3994e-17, 1.4971e-19,\n 8.9866e-17, 2.6752e-15, 5.0567e-17, 5.5479e-15], device='cuda:0')" + }, + "29": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.0553e-20, 6.5309e-18, 1.5795e-19, 1.7594e-20, 2.6501e-20, 6.8182e-20,\n 9.2264e-21, 7.4587e-19, 2.0527e-19, 4.2330e-20, 1.6109e-19, 8.0644e-21,\n 3.9721e-20, 3.3968e-19, 4.3170e-20, 1.0185e-19, 9.5845e-22, 3.8167e-19,\n 7.5789e-21, 2.2991e-18, 4.5648e-20, 1.6190e-19, 1.7955e-17, 2.9914e-19,\n 3.0567e-18, 1.2545e-21, 5.5117e-21, 2.7588e-18, 3.3697e-21, 1.8285e-22,\n 5.9770e-21, 6.4070e-19, 1.0339e-19, 5.7032e-19, 8.1506e-21, 1.7687e-19,\n 1.1937e-20, 8.2712e-21, 3.2524e-19, 5.6991e-21, 1.2586e-19, 6.8121e-20,\n 3.6980e-18, 3.1274e-19, 9.8688e-20, 7.5678e-19, 4.9434e-21, 8.5045e-20,\n 7.0440e-21, 5.7193e-19, 1.5147e-19, 1.8201e-17, 2.8025e-20, 8.2026e-21,\n 4.2468e-20, 1.2962e-18, 3.1620e-17, 1.4035e-18, 8.8271e-20, 1.9261e-19,\n 3.7425e-19, 8.9817e-20, 1.4508e-19, 2.8597e-18, 6.5933e-21, 1.0087e-18,\n 1.2903e-19, 7.9786e-19, 1.9165e-17, 1.6665e-18, 8.5937e-21, 1.3405e-19,\n 1.7243e-20, 1.6477e-19, 6.3917e-20, 2.8257e-21, 2.6464e-20, 9.3213e-22,\n 1.1001e-19, 2.9722e-20, 9.6950e-23, 6.7455e-19, 7.9903e-21, 6.9794e-18,\n 2.2587e-21, 1.5355e-18, 1.2451e-19, 1.6178e-20, 6.5214e-18, 5.6411e-20,\n 1.6584e-18, 2.0021e-20, 2.5041e-19, 8.5864e-21, 1.2357e-19, 7.0359e-18,\n 3.1978e-19, 1.2865e-21, 2.7448e-19, 1.0714e-19, 3.6035e-20, 4.7863e-21,\n 1.3748e-20, 5.2322e-20, 1.0729e-20, 3.8872e-19, 1.0716e-18, 2.7055e-22,\n 1.6895e-20, 1.6113e-20, 8.3499e-21, 1.4122e-19, 8.4785e-20, 1.8737e-21,\n 2.4162e-18, 1.0789e-18, 3.8141e-19, 9.4962e-18, 2.1019e-22, 7.6883e-21,\n 1.0015e-18, 8.8584e-21, 1.0895e-21, 2.2358e-21, 3.6074e-21, 8.0249e-22,\n 8.4797e-21, 3.7764e-21, 4.7807e-21, 2.7619e-20, 2.9912e-20, 5.5996e-22,\n 2.1779e-21, 1.1692e-20, 1.6408e-20, 3.8418e-21, 6.1268e-20, 9.8060e-20,\n 7.5233e-19, 2.1295e-17, 6.9570e-20, 1.6848e-20, 4.4492e-20, 5.0290e-18,\n 5.4452e-20, 6.1033e-20, 3.2156e-21, 9.3013e-22, 3.8202e-21, 2.7762e-20,\n 5.2264e-21, 1.6359e-21, 1.2241e-20, 2.8061e-18, 1.6816e-21, 7.9929e-21,\n 3.2812e-22, 1.0703e-18, 1.5963e-22, 1.9187e-20, 1.0298e-19, 2.2592e-21,\n 6.2740e-20, 9.2777e-18, 9.3143e-22, 1.5525e-19, 1.5319e-20, 1.7489e-19,\n 6.5094e-18, 1.2398e-19, 5.2826e-18, 1.1035e-21, 1.4256e-19, 2.5343e-21,\n 4.4203e-20, 1.0675e-19, 3.1915e-20, 9.5208e-21, 4.6008e-19, 1.4291e-21,\n 1.0546e-19, 7.4686e-19, 1.9440e-19, 1.4980e-19, 1.7793e-20, 7.9211e-20,\n 9.1247e-20, 7.2940e-21, 5.4785e-20, 1.3373e-19, 6.0024e-19, 1.9114e-20,\n 7.7363e-20, 5.4010e-19, 5.0905e-20, 4.8989e-17, 1.0110e-22, 2.1055e-20,\n 2.1716e-21, 6.4103e-20, 9.0188e-21, 6.4907e-19, 1.2042e-20, 2.0466e-19,\n 4.4539e-20, 2.6089e-21, 5.8627e-21, 5.8131e-20, 6.0585e-21, 7.5135e-18,\n 1.8544e-20, 2.0626e-21, 4.2538e-21, 1.2456e-17, 4.6539e-20, 4.4024e-20,\n 2.2194e-20, 7.3134e-18, 1.9973e-17, 2.8855e-20, 4.0643e-21, 2.7340e-18,\n 5.4486e-22, 2.4220e-19, 2.4224e-20, 8.8715e-22, 5.5557e-19, 1.7457e-18,\n 9.1002e-23, 8.4513e-21, 3.9278e-19, 9.3707e-20, 8.5365e-19, 2.4114e-18,\n 8.8422e-21, 1.2035e-20, 9.5225e-20, 7.7485e-21, 6.3233e-19, 5.1422e-22,\n 2.5762e-20, 3.6920e-19, 2.8151e-18, 8.6214e-20, 3.2951e-19, 1.5521e-20,\n 2.4637e-18, 8.9158e-22, 8.3658e-19, 4.6966e-19, 4.7436e-21, 9.3977e-23,\n 1.0092e-20, 1.7772e-19, 3.8856e-21, 1.0844e-17], device='cuda:0')" + }, + "30": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.5436e-21, 5.3532e-18, 6.2571e-19, 9.5018e-19, 2.4809e-19, 3.1768e-20,\n 5.5398e-19, 2.7558e-18, 1.6210e-18, 6.7031e-20, 4.5718e-20, 7.4016e-20,\n 2.1715e-19, 4.0575e-19, 9.4851e-20, 3.5130e-20, 3.5785e-21, 4.0992e-19,\n 2.0813e-19, 2.1143e-18, 1.6678e-20, 9.1669e-19, 3.8931e-18, 1.2693e-18,\n 1.7595e-18, 1.4626e-20, 5.3453e-20, 4.7560e-18, 9.8616e-23, 1.1555e-20,\n 2.0916e-20, 1.2268e-18, 6.8186e-19, 1.0667e-18, 1.6998e-20, 4.8028e-19,\n 2.0662e-19, 4.5658e-20, 5.6266e-19, 3.7202e-20, 2.1848e-19, 4.6632e-19,\n 3.9967e-18, 4.9231e-19, 2.5587e-19, 4.6812e-19, 7.5079e-20, 5.1682e-20,\n 2.5094e-21, 5.7766e-19, 4.3142e-19, 8.8239e-18, 9.9406e-20, 7.8708e-20,\n 5.3734e-20, 1.7432e-18, 1.9232e-17, 4.2441e-18, 2.3783e-19, 4.9586e-19,\n 3.5805e-18, 1.4623e-19, 5.3066e-19, 6.9687e-18, 1.9429e-19, 2.9120e-18,\n 2.2376e-19, 4.2863e-19, 7.2703e-18, 1.5763e-18, 7.9378e-22, 4.9293e-20,\n 9.7216e-20, 3.5747e-19, 8.1239e-20, 7.4660e-21, 8.7020e-21, 3.5966e-22,\n 6.2559e-19, 8.6980e-20, 2.4578e-21, 1.3504e-18, 2.1638e-19, 1.8178e-18,\n 1.3399e-20, 4.9709e-19, 1.0117e-18, 8.2102e-21, 4.7072e-18, 1.4860e-20,\n 1.1578e-18, 3.7586e-19, 9.3613e-19, 1.8108e-19, 8.4780e-19, 8.0637e-18,\n 2.1360e-19, 3.7583e-22, 6.8910e-19, 3.4963e-19, 2.0783e-20, 1.0299e-19,\n 9.0889e-20, 4.5508e-19, 3.9594e-21, 5.5207e-19, 8.4860e-19, 4.8553e-21,\n 2.1797e-19, 3.4462e-21, 3.7617e-21, 1.5835e-19, 8.2214e-20, 4.4171e-20,\n 5.7536e-19, 1.3706e-18, 8.7085e-19, 5.1673e-18, 8.1815e-21, 3.3576e-20,\n 1.8770e-18, 1.4550e-21, 1.1078e-20, 1.1908e-21, 4.6472e-20, 1.3201e-20,\n 4.8354e-19, 3.8210e-20, 4.1883e-20, 2.9989e-19, 2.5008e-19, 6.3951e-21,\n 5.6205e-20, 3.1988e-21, 4.4850e-20, 2.2240e-21, 2.4844e-19, 1.5217e-19,\n 6.1865e-19, 9.3904e-18, 6.5299e-19, 1.6253e-21, 4.5823e-19, 1.4370e-17,\n 2.5899e-19, 2.5494e-20, 6.3109e-20, 6.2654e-20, 5.8121e-20, 1.3742e-19,\n 1.2084e-21, 2.7415e-20, 4.0129e-21, 1.3587e-18, 8.5321e-21, 1.2131e-21,\n 3.9648e-23, 2.2674e-18, 3.0330e-20, 3.2327e-19, 1.1646e-19, 1.8506e-22,\n 2.5951e-20, 3.1556e-18, 2.9268e-20, 2.2369e-18, 1.1027e-20, 5.2875e-19,\n 5.5869e-18, 8.8250e-20, 2.5688e-18, 3.4528e-20, 3.7697e-19, 7.6681e-22,\n 2.2361e-20, 3.1245e-20, 2.0693e-19, 2.7055e-21, 2.3230e-18, 6.4989e-20,\n 1.1867e-18, 6.3779e-19, 4.9160e-20, 7.1735e-20, 5.9041e-21, 2.3826e-19,\n 1.9226e-19, 4.4351e-21, 2.5089e-19, 3.8592e-19, 1.7357e-18, 2.8292e-23,\n 6.0376e-19, 9.9026e-19, 5.2382e-19, 2.2556e-17, 4.4540e-22, 4.2294e-19,\n 7.9738e-20, 3.7955e-19, 1.1903e-19, 8.0223e-19, 1.2509e-19, 2.4989e-19,\n 1.7141e-19, 2.1395e-19, 4.2514e-21, 4.6570e-19, 7.4671e-21, 4.7405e-18,\n 6.7788e-21, 1.1838e-21, 1.2395e-21, 4.7466e-18, 1.5334e-18, 2.5592e-19,\n 2.9301e-19, 4.5133e-18, 1.4041e-17, 1.9921e-20, 1.6632e-20, 1.3467e-18,\n 7.4356e-21, 7.2531e-19, 1.8221e-19, 5.8433e-21, 3.8961e-18, 3.0870e-18,\n 2.8317e-21, 2.3996e-20, 2.7547e-18, 2.0721e-19, 2.7433e-19, 5.5938e-19,\n 4.9793e-20, 1.6634e-20, 3.1384e-19, 8.5419e-22, 9.1757e-19, 2.6055e-20,\n 2.2804e-21, 5.6191e-19, 2.7119e-18, 2.6024e-19, 5.4530e-19, 1.5893e-19,\n 3.8818e-18, 4.6038e-20, 1.6675e-18, 3.0760e-19, 5.0455e-20, 1.3609e-22,\n 1.0662e-19, 3.5277e-18, 7.3447e-20, 7.6946e-18], device='cuda:0')" + }, + "31": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3430e-18, 2.4404e-18, 1.2337e-21, ..., 1.7104e-19, 2.7977e-19,\n 8.1303e-20],\n [3.1591e-20, 4.0813e-20, 7.3362e-23, ..., 2.9673e-21, 3.6988e-21,\n 7.9926e-22],\n [2.0711e-18, 2.2796e-18, 9.0095e-22, ..., 1.8224e-19, 2.1973e-19,\n 7.9283e-20],\n ...,\n [1.4879e-17, 1.6706e-17, 3.3315e-22, ..., 1.3508e-18, 1.6711e-18,\n 7.5887e-19],\n [1.0996e-18, 1.2591e-18, 2.6895e-21, ..., 8.8261e-20, 1.5163e-19,\n 4.4655e-20],\n [1.2672e-20, 1.3357e-20, 2.3111e-24, ..., 8.8003e-22, 1.0664e-21,\n 2.4282e-22]], device='cuda:0')" + }, + "32": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.2237e-15, 1.4736e-17, 1.1764e-15, 4.7395e-17, 1.0081e-18, 1.6119e-17,\n 2.3683e-16, 2.2955e-15, 8.5515e-16, 4.8423e-17, 8.4917e-15, 6.6384e-17,\n 4.3360e-19, 4.1201e-19, 6.5689e-18, 1.5843e-16, 9.8743e-18, 7.6854e-16,\n 1.2751e-15, 4.8034e-16, 8.6416e-16, 2.3355e-17, 7.7324e-17, 3.3849e-15,\n 2.2416e-17, 2.9895e-16, 1.0631e-15, 1.9175e-17, 1.4471e-18, 4.0841e-16,\n 1.6425e-17, 1.4330e-15, 3.7120e-16, 3.0783e-16, 9.1021e-17, 9.6237e-16,\n 2.2823e-17, 1.2195e-16, 8.7507e-16, 6.5174e-17, 3.0353e-17, 2.5013e-16,\n 8.4141e-16, 2.1186e-17, 1.5315e-18, 7.8902e-18, 3.9686e-16, 6.4033e-18,\n 8.1546e-16, 7.4214e-16, 1.2561e-16, 7.5126e-16, 9.9288e-18, 3.7246e-16,\n 1.3979e-16, 1.0751e-15, 8.4812e-16, 3.1707e-15, 1.6906e-16, 5.9591e-16,\n 1.1345e-14, 8.1417e-18, 1.1304e-17, 1.0754e-15, 1.3521e-17, 2.2457e-15,\n 2.7008e-17, 2.2359e-17, 1.8442e-18, 4.3091e-16, 6.2437e-16, 9.0649e-16,\n 7.4856e-16, 2.0326e-16, 5.1000e-18, 5.2650e-18, 9.7549e-17, 7.0329e-17,\n 8.6177e-16, 2.8706e-17, 4.8441e-19, 3.7093e-15, 3.1346e-15, 9.2207e-19,\n 2.7704e-16, 8.0990e-17, 1.2893e-15, 3.9138e-16, 3.7476e-15, 1.5826e-17,\n 9.8295e-16, 4.5219e-16, 5.6696e-17, 1.8429e-17, 1.6095e-16, 2.7731e-15,\n 4.5110e-16, 5.8008e-18, 2.0840e-15, 1.5708e-16, 1.1036e-15, 2.9444e-18,\n 5.2161e-17, 2.5657e-16, 4.4620e-17, 2.2507e-16, 1.1339e-16, 1.4058e-18,\n 1.5222e-16, 9.3398e-18, 6.0329e-16, 5.8212e-21, 5.8473e-18, 2.7812e-18,\n 1.8413e-15, 2.9370e-16, 6.9376e-16, 1.5608e-15, 2.9358e-17, 3.6383e-15,\n 1.6241e-16, 1.4760e-16, 4.0158e-16, 1.8545e-18, 3.3995e-17, 1.3683e-17,\n 1.3722e-17, 2.1107e-16, 4.1517e-17, 1.6402e-17, 1.0083e-17, 2.8645e-16,\n 1.1477e-17, 1.9208e-19, 5.0525e-18, 2.9892e-17, 9.9497e-18, 4.0985e-17,\n 2.1250e-16, 3.5471e-15, 6.8619e-16, 4.1141e-17, 4.4869e-15, 2.4511e-14,\n 1.8301e-15, 6.2738e-17, 2.2414e-17, 9.0104e-16, 5.1109e-19, 8.5438e-17,\n 9.8891e-17, 4.0399e-18, 3.0975e-17, 8.4370e-16, 7.7626e-16, 3.0054e-16,\n 9.9777e-17, 2.0109e-15, 2.4451e-17, 1.3886e-16, 1.2388e-17, 2.0083e-18,\n 3.8076e-16, 8.5118e-16, 9.1470e-18, 5.6599e-16, 6.2204e-16, 2.0353e-17,\n 2.3073e-15, 7.0808e-18, 3.2851e-15, 8.6725e-16, 7.5944e-16, 1.6864e-18,\n 3.1245e-17, 8.6962e-17, 3.4528e-16, 1.3742e-15, 2.0739e-15, 4.2482e-18,\n 5.5478e-15, 4.4374e-18, 4.1725e-15, 4.4254e-17, 2.6778e-17, 2.7063e-16,\n 1.3191e-17, 2.3884e-17, 3.2530e-16, 7.1763e-17, 1.2527e-16, 4.1228e-17,\n 1.8300e-15, 8.6821e-16, 1.1330e-16, 1.6424e-14, 5.5799e-17, 4.7972e-18,\n 3.1572e-17, 4.2049e-17, 2.6090e-15, 1.3312e-17, 5.2191e-16, 6.6197e-18,\n 2.5644e-17, 4.9989e-16, 1.5371e-17, 1.7716e-15, 1.5581e-17, 6.3814e-15,\n 4.3745e-18, 6.0883e-16, 1.3346e-17, 1.6347e-15, 9.4158e-17, 2.9034e-17,\n 1.2647e-18, 6.7806e-17, 1.0911e-14, 9.0110e-16, 1.4484e-16, 9.6482e-18,\n 3.3786e-18, 1.6638e-16, 8.0454e-17, 2.2068e-16, 2.8411e-15, 1.2044e-16,\n 3.7671e-17, 3.0448e-17, 2.1184e-17, 9.8285e-17, 8.3229e-18, 4.8880e-16,\n 1.3150e-16, 6.7239e-16, 6.5588e-17, 3.7303e-18, 3.6995e-16, 1.0188e-15,\n 1.5208e-15, 1.0830e-17, 9.0499e-16, 6.3441e-16, 5.9309e-18, 9.0502e-18,\n 1.0131e-15, 2.8979e-16, 1.4213e-16, 1.5497e-17, 7.8345e-16, 2.8482e-18,\n 6.7261e-16, 9.5974e-15, 7.0833e-16, 6.3694e-18], device='cuda:0')" + }, + "33": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.3408e-20, 5.2277e-20, 4.2004e-19, 3.0906e-20, 1.1040e-19, 7.0705e-20,\n 1.1233e-20, 2.1161e-18, 9.8692e-20, 6.6533e-20, 1.2783e-17, 2.8880e-20,\n 2.2114e-20, 1.2632e-21, 1.3582e-19, 4.4951e-21, 8.5193e-21, 2.1777e-18,\n 7.0123e-19, 2.9891e-19, 5.4880e-19, 8.4358e-20, 3.9389e-21, 6.5333e-18,\n 6.1026e-21, 8.4574e-20, 6.1506e-19, 1.1335e-20, 1.2779e-20, 5.6810e-19,\n 2.8399e-21, 2.7559e-18, 4.7833e-20, 3.5506e-19, 1.3176e-20, 5.4844e-18,\n 1.1338e-20, 2.1495e-20, 4.9634e-19, 1.9386e-21, 1.3305e-20, 4.5402e-20,\n 3.1059e-19, 4.1350e-19, 1.7801e-21, 6.2153e-22, 1.3453e-19, 1.0697e-22,\n 2.7198e-18, 2.3785e-19, 7.7094e-20, 3.4282e-20, 4.2862e-20, 9.5524e-20,\n 1.4298e-19, 2.2375e-19, 1.0504e-19, 1.3728e-18, 3.8914e-20, 2.9802e-19,\n 1.4252e-17, 2.4642e-21, 3.8858e-20, 8.1992e-20, 1.9659e-21, 1.3001e-18,\n 1.4231e-21, 2.3440e-21, 5.8575e-20, 1.0556e-19, 3.1577e-19, 4.1246e-20,\n 6.7857e-20, 5.4050e-19, 3.3942e-20, 1.3533e-21, 1.3173e-21, 1.0778e-20,\n 2.3997e-19, 5.3565e-19, 8.2371e-23, 1.0879e-17, 2.4710e-18, 2.0347e-20,\n 1.2904e-19, 8.5367e-20, 5.9768e-19, 1.5238e-19, 1.3064e-17, 1.1031e-20,\n 4.6222e-18, 3.0647e-18, 1.2523e-21, 1.0061e-19, 1.3854e-20, 6.4341e-19,\n 1.5950e-18, 1.1782e-20, 8.8317e-19, 7.2473e-20, 1.2473e-18, 8.8867e-21,\n 3.3041e-20, 3.2525e-20, 1.0649e-20, 1.9849e-18, 5.7866e-21, 1.3013e-20,\n 2.5854e-20, 3.9263e-21, 2.5672e-19, 1.1774e-21, 2.6388e-20, 1.5461e-20,\n 2.9447e-18, 1.4711e-18, 6.0854e-19, 5.4208e-19, 5.8365e-21, 4.9666e-18,\n 6.1484e-21, 4.9986e-21, 7.4251e-19, 4.8706e-22, 2.2587e-20, 2.8429e-21,\n 1.2279e-20, 4.2040e-20, 4.4892e-20, 2.5805e-20, 5.4799e-20, 1.5246e-19,\n 6.7294e-21, 1.6765e-22, 3.1338e-21, 5.8002e-21, 2.5309e-20, 1.3535e-20,\n 1.0769e-19, 2.9669e-18, 1.5626e-18, 1.4016e-20, 5.0290e-18, 9.7884e-17,\n 1.8725e-18, 1.9469e-20, 2.7794e-21, 5.8697e-19, 1.1566e-21, 1.4310e-19,\n 4.0219e-19, 5.5198e-21, 4.6692e-20, 6.5890e-19, 1.2647e-19, 1.9616e-18,\n 7.4107e-21, 8.3832e-19, 7.2845e-21, 3.9396e-19, 3.1575e-21, 4.0423e-21,\n 1.0432e-20, 8.6270e-19, 5.1871e-21, 5.2082e-21, 1.5277e-18, 1.6929e-21,\n 3.1399e-18, 5.5041e-21, 7.2461e-18, 2.6815e-19, 1.0201e-18, 9.1990e-22,\n 7.3933e-21, 3.5795e-20, 8.9911e-20, 4.9360e-19, 3.0416e-19, 1.5923e-20,\n 3.8759e-18, 9.2096e-21, 1.1883e-18, 1.1161e-19, 2.4811e-21, 8.6632e-20,\n 5.8390e-21, 3.6411e-20, 7.5060e-19, 3.1754e-21, 6.7253e-21, 1.0104e-20,\n 7.8918e-18, 1.2175e-18, 4.7634e-21, 4.0535e-17, 1.4402e-21, 1.6527e-20,\n 3.4277e-21, 5.1951e-21, 2.0554e-18, 6.3124e-20, 1.0489e-19, 2.5296e-20,\n 2.3463e-20, 1.0534e-19, 1.4895e-19, 9.1590e-19, 8.2367e-21, 2.0037e-17,\n 1.0969e-20, 1.7008e-19, 1.2263e-21, 2.3713e-18, 4.9424e-20, 5.1969e-20,\n 6.5547e-21, 4.3043e-21, 1.6859e-17, 2.4808e-18, 2.7008e-19, 2.4041e-21,\n 2.2281e-22, 2.9145e-19, 3.6178e-20, 4.3620e-20, 5.7020e-19, 1.4132e-20,\n 1.9105e-21, 7.2293e-21, 9.3889e-20, 2.9785e-21, 1.6136e-21, 7.9709e-19,\n 2.2932e-20, 2.0283e-19, 1.9817e-20, 7.2624e-22, 2.4255e-18, 1.0392e-18,\n 2.9461e-18, 1.9376e-21, 8.2948e-20, 3.0658e-19, 2.0837e-21, 3.6021e-22,\n 4.0496e-19, 1.3825e-19, 1.0670e-20, 2.0425e-20, 1.9965e-19, 9.1829e-22,\n 8.4637e-19, 7.3335e-18, 3.3877e-19, 6.6968e-20], device='cuda:0')" + }, + "34": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4919e-18, 1.9517e-20, 1.6806e-18, 4.0994e-20, 1.1176e-19, 2.5702e-20,\n 3.4327e-19, 3.2789e-18, 1.1861e-18, 1.0924e-19, 1.0148e-17, 2.7138e-19,\n 7.6775e-21, 1.4331e-21, 1.5649e-19, 1.5438e-19, 2.6004e-20, 1.2552e-18,\n 1.4197e-18, 9.6013e-19, 1.3557e-18, 3.4630e-20, 6.3976e-20, 4.7146e-18,\n 1.1014e-19, 4.4327e-19, 1.4162e-18, 5.8178e-21, 4.3058e-21, 6.2860e-19,\n 1.3401e-20, 1.9342e-18, 5.4760e-19, 8.8731e-19, 7.9395e-21, 1.8393e-18,\n 7.9928e-20, 5.3875e-20, 1.2667e-18, 8.3237e-21, 8.8188e-20, 3.8662e-19,\n 1.4038e-18, 1.9821e-19, 1.7183e-20, 1.9225e-20, 3.5472e-19, 1.1555e-21,\n 1.2131e-18, 9.6410e-19, 1.9277e-19, 1.0351e-18, 3.6282e-20, 2.8406e-19,\n 2.3629e-19, 1.2148e-18, 1.1392e-18, 4.2901e-18, 1.9029e-19, 7.7217e-19,\n 1.4948e-17, 1.3973e-20, 9.9647e-21, 1.3133e-18, 2.2740e-20, 3.1022e-18,\n 3.1999e-22, 4.0879e-20, 6.1964e-21, 6.3142e-19, 8.7924e-19, 9.6491e-19,\n 9.8986e-19, 6.0380e-19, 3.9483e-20, 7.5638e-22, 1.4569e-19, 4.9126e-20,\n 1.1692e-18, 2.7433e-19, 8.8243e-22, 4.2894e-18, 4.2395e-18, 7.9988e-21,\n 2.7624e-19, 1.9209e-19, 1.5035e-18, 5.7696e-19, 5.5345e-18, 1.1299e-20,\n 1.7898e-18, 1.0778e-18, 2.7957e-20, 2.9126e-20, 2.4521e-19, 3.5250e-18,\n 8.4942e-19, 2.9768e-20, 2.8897e-18, 3.9722e-19, 1.6574e-18, 3.3658e-21,\n 2.4412e-19, 3.1167e-19, 2.4218e-20, 9.9680e-19, 1.6873e-19, 3.9373e-21,\n 2.9804e-19, 2.0448e-21, 8.8035e-19, 2.0015e-22, 5.1885e-20, 7.0105e-21,\n 2.2301e-18, 6.4087e-19, 1.0628e-18, 1.9908e-18, 5.5052e-20, 4.9382e-18,\n 1.2447e-19, 1.5723e-19, 6.7689e-19, 9.1774e-21, 1.5399e-19, 1.0093e-21,\n 4.8974e-21, 1.0993e-19, 1.8123e-20, 8.6777e-21, 1.9492e-20, 1.3534e-19,\n 1.1392e-20, 9.8566e-21, 1.4715e-20, 5.1498e-20, 9.6137e-21, 7.4301e-20,\n 3.4123e-19, 4.8244e-18, 1.1019e-18, 2.9458e-20, 5.7685e-18, 3.0443e-17,\n 1.8193e-18, 7.9743e-20, 9.5299e-20, 1.3113e-18, 1.4440e-21, 1.2378e-19,\n 4.9872e-19, 2.3332e-20, 1.5536e-20, 1.2372e-18, 1.0949e-18, 4.7980e-19,\n 4.2455e-20, 2.7451e-18, 4.5549e-20, 1.5558e-19, 9.8941e-22, 2.3808e-21,\n 5.2497e-19, 1.1801e-18, 1.3406e-21, 7.7222e-19, 1.0505e-18, 4.1820e-20,\n 2.5098e-18, 1.7504e-21, 3.5692e-18, 9.6759e-19, 1.2995e-18, 2.1966e-21,\n 6.5633e-20, 1.2405e-19, 3.0104e-19, 1.6606e-18, 2.3298e-18, 6.0128e-21,\n 6.5848e-18, 7.2301e-22, 5.5811e-18, 1.2455e-19, 5.1046e-20, 4.2928e-19,\n 7.1803e-20, 1.5497e-20, 8.3080e-19, 1.1570e-19, 1.8702e-19, 5.9386e-20,\n 3.0449e-18, 1.2327e-18, 8.6059e-20, 2.1709e-17, 2.0038e-21, 9.0466e-21,\n 5.4083e-20, 2.6259e-20, 2.8092e-18, 9.9695e-20, 4.8514e-19, 8.7271e-20,\n 2.1194e-20, 7.2695e-19, 6.7909e-20, 2.4268e-18, 4.6913e-20, 8.0346e-18,\n 4.4089e-22, 6.4857e-19, 5.8582e-21, 2.2572e-18, 1.2705e-19, 1.7336e-20,\n 2.1914e-21, 1.0515e-19, 1.4493e-17, 1.5157e-18, 3.9668e-19, 5.8150e-20,\n 1.0102e-20, 2.5351e-19, 2.6457e-19, 1.8388e-19, 3.8178e-18, 2.4041e-19,\n 6.6175e-20, 6.6893e-21, 2.7066e-20, 1.5185e-19, 8.0249e-22, 9.2363e-19,\n 6.0953e-20, 9.9674e-19, 2.5199e-19, 8.9579e-21, 1.2366e-18, 9.5119e-19,\n 2.2252e-18, 2.2628e-20, 1.0151e-18, 9.8140e-19, 4.7118e-20, 1.8735e-20,\n 1.4263e-18, 4.4874e-19, 2.0837e-19, 9.5035e-20, 1.1203e-18, 2.6667e-22,\n 7.5710e-19, 1.2654e-17, 1.0012e-18, 1.9765e-20], device='cuda:0')" + }, + "35": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.2324e-19, 1.4548e-19, 7.5888e-23, ..., 1.1017e-20, 1.2693e-20,\n 5.4309e-21],\n [6.3151e-21, 3.6000e-21, 3.6848e-22, ..., 1.5549e-22, 1.8144e-21,\n 8.3753e-22],\n [4.3723e-18, 5.0218e-18, 6.1933e-21, ..., 3.5477e-19, 6.0500e-19,\n 1.5729e-19],\n ...,\n [1.4634e-17, 1.5062e-17, 1.6484e-20, ..., 9.9154e-19, 1.7737e-18,\n 4.8775e-19],\n [4.0543e-22, 1.3990e-23, 1.5329e-22, ..., 7.6923e-23, 8.9332e-23,\n 2.6076e-23],\n [8.2004e-18, 9.3390e-18, 5.9626e-21, ..., 6.9884e-19, 1.1212e-18,\n 2.9181e-19]], device='cuda:0')" + }, + "36": { + "step": "tensor(21284.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.1271e-17, 2.3348e-19, 2.5660e-15, 1.1579e-14, 2.8285e-16, 6.0360e-18,\n 1.6927e-15, 9.9558e-16, 7.0855e-16, 1.6348e-18, 9.6103e-17, 5.2456e-19,\n 5.4493e-16, 3.2516e-17, 1.6889e-17, 2.1714e-16, 2.9157e-17, 2.7403e-16,\n 1.0417e-17, 2.3433e-16, 5.9592e-17, 1.3386e-18, 1.7741e-15, 3.5927e-15,\n 2.8479e-16, 4.2960e-17, 4.3399e-16, 3.8405e-16, 1.6063e-17, 9.1912e-17,\n 1.3915e-16, 2.1464e-16, 4.3086e-16, 3.4618e-16, 3.8432e-18, 2.9372e-16,\n 1.0192e-15, 1.3956e-16, 7.6708e-16, 5.8481e-18, 1.5630e-15, 9.0978e-18,\n 9.8953e-16, 1.1926e-15, 2.4374e-17, 3.0722e-18, 9.8187e-17, 3.9350e-17,\n 1.2169e-17, 1.5171e-15, 3.1158e-16, 1.4816e-15, 4.6968e-19, 1.8843e-16,\n 5.7950e-17, 1.7300e-17, 4.1188e-15, 6.1532e-17, 3.7166e-16, 6.3506e-19,\n 1.9435e-18, 4.4379e-18, 5.6494e-16, 2.8362e-16, 2.2050e-16, 1.3360e-15,\n 2.7041e-16, 9.7833e-18, 3.1920e-16, 1.2787e-15, 2.5993e-16, 1.5669e-16,\n 1.0476e-16, 1.5698e-18, 2.8156e-19, 1.3496e-16, 2.3930e-16, 2.7838e-17,\n 2.3518e-15, 2.3817e-19, 3.2826e-18, 2.5886e-16, 3.5340e-15, 7.4942e-19,\n 2.8634e-16, 8.6084e-18, 3.0485e-16, 3.6285e-17, 4.5647e-16, 3.1835e-15,\n 4.6456e-16, 1.1314e-16, 1.9104e-17, 3.2142e-16, 2.1253e-15, 1.1214e-14,\n 4.3472e-18, 5.8974e-18, 7.0333e-17, 2.0385e-16, 3.9618e-16, 7.6284e-17,\n 1.0256e-16, 3.5562e-16, 2.1765e-15, 5.2045e-18, 4.5244e-19, 1.2896e-16,\n 1.5794e-16, 1.2216e-15, 1.2192e-15, 1.6160e-18, 9.9470e-19, 1.2790e-17,\n 1.7384e-17, 3.9989e-16, 3.8575e-16, 1.0400e-16, 4.5469e-16, 2.0637e-17,\n 3.5414e-15, 1.2572e-15, 1.5368e-18, 1.1503e-16, 3.4664e-16, 1.0061e-17,\n 3.5139e-18, 8.6543e-17, 2.9683e-18, 2.0416e-16, 2.8656e-17, 1.2054e-16,\n 1.0396e-15, 1.5862e-16, 1.0961e-16, 9.8462e-17, 4.3234e-16, 4.5659e-17,\n 8.6320e-17, 1.0461e-18, 1.6051e-17, 2.3167e-15, 6.6777e-16, 7.2476e-17,\n 2.3486e-15, 1.2704e-18, 4.2574e-19, 5.2840e-16, 7.3187e-17, 2.3609e-17,\n 1.4624e-17, 1.6158e-16, 8.6968e-17, 3.6059e-18, 1.5628e-17, 2.3399e-17,\n 1.4695e-17, 3.9623e-15, 1.1768e-15, 4.0275e-15, 8.9912e-17, 1.2379e-16,\n 4.5264e-15, 1.2985e-19, 8.5978e-17, 1.2655e-14, 1.4530e-15, 2.7041e-16,\n 3.4115e-15, 4.7078e-18, 8.3694e-17, 7.4230e-16, 1.5065e-16, 3.2304e-20,\n 9.9945e-17, 3.8813e-15, 2.2368e-17, 4.8696e-18, 5.7755e-15, 1.7097e-15,\n 2.0386e-18, 1.7817e-17, 2.1938e-18, 1.0507e-16, 4.1852e-16, 2.3677e-19,\n 1.4803e-17, 3.3670e-15, 1.7794e-17, 4.8721e-19, 3.7486e-18, 6.7467e-16,\n 7.9058e-16, 6.5898e-16, 8.6325e-16, 9.6872e-17, 1.0832e-18, 2.7358e-15,\n 3.6040e-16, 1.4823e-16, 2.0456e-16, 9.4346e-17, 8.0919e-17, 1.6477e-17,\n 1.0270e-17, 1.4744e-17, 9.2395e-18, 5.5581e-17, 3.5187e-18, 8.3390e-16,\n 2.9543e-16, 2.3405e-17, 2.6829e-17, 1.2083e-16, 7.4155e-15, 9.8195e-18,\n 4.4550e-17, 4.5548e-18, 2.1369e-15, 3.1745e-15, 3.5823e-19, 1.6564e-16,\n 1.2275e-16, 4.3047e-16, 7.3850e-19, 1.0321e-17, 1.4218e-15, 1.6164e-15,\n 4.5159e-19, 9.9306e-19, 2.3347e-19, 2.4842e-17, 2.3388e-16, 6.9782e-18,\n 5.3847e-17, 6.2310e-16, 1.0587e-16, 1.4971e-19, 8.6277e-16, 1.0779e-17,\n 1.3720e-15, 1.0602e-15, 2.9109e-15, 2.1662e-15, 5.1656e-17, 4.5021e-18,\n 6.9885e-18, 7.5932e-16, 1.7296e-15, 2.2150e-17, 2.1980e-15, 5.5945e-18,\n 3.6526e-16, 7.4997e-15, 8.4096e-20, 4.8030e-15], device='cuda:0')" + }, + "37": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.0135e-19, 3.2265e-20, 6.0430e-18, 2.1501e-17, 5.0994e-20, 6.3429e-20,\n 7.8458e-19, 3.3034e-19, 1.0146e-19, 3.4314e-22, 2.6111e-19, 1.1907e-20,\n 9.9527e-19, 5.7702e-20, 1.3442e-19, 4.7665e-21, 1.9770e-19, 1.1199e-19,\n 7.9011e-21, 1.8047e-19, 2.4568e-20, 9.6635e-21, 6.0652e-19, 1.7770e-17,\n 6.9912e-19, 1.2286e-20, 3.5838e-19, 1.4397e-20, 4.4057e-20, 1.0691e-20,\n 1.2706e-19, 4.3420e-19, 4.7993e-20, 4.6042e-19, 9.3914e-22, 2.0492e-18,\n 2.5975e-18, 6.0616e-20, 1.2971e-18, 2.2509e-20, 4.5585e-18, 3.4597e-21,\n 8.1280e-19, 4.1467e-18, 1.3081e-20, 5.3774e-20, 7.4157e-21, 1.2189e-21,\n 3.6253e-21, 1.1864e-18, 3.0365e-19, 4.1317e-19, 1.7848e-21, 1.4083e-20,\n 7.2976e-21, 2.6855e-21, 1.0722e-18, 2.5955e-19, 3.1415e-20, 9.3927e-21,\n 5.8243e-20, 5.1626e-21, 3.3353e-19, 8.3847e-21, 3.7820e-20, 3.5894e-19,\n 3.2015e-19, 1.7913e-21, 1.8931e-20, 2.8302e-18, 1.1430e-19, 7.4781e-21,\n 2.8627e-20, 8.3114e-21, 1.3083e-21, 8.6402e-21, 6.3157e-20, 7.7205e-21,\n 2.8746e-18, 2.3572e-22, 3.1520e-21, 2.2352e-20, 3.7075e-18, 2.1171e-20,\n 4.8214e-19, 1.0268e-21, 1.0010e-19, 6.8419e-21, 1.7801e-19, 1.8827e-18,\n 4.2277e-19, 1.9920e-20, 1.6201e-20, 1.4232e-20, 3.3777e-18, 4.7390e-17,\n 2.4404e-20, 1.3684e-20, 2.5282e-20, 5.3455e-19, 5.7457e-20, 5.0986e-21,\n 2.8010e-19, 1.1478e-19, 1.6707e-18, 5.8821e-22, 1.3055e-21, 4.5161e-19,\n 3.2235e-20, 1.5781e-18, 4.7823e-18, 1.5538e-21, 3.9087e-22, 1.2824e-21,\n 2.3457e-20, 1.5004e-19, 2.9789e-19, 1.3513e-20, 1.6184e-19, 9.7529e-20,\n 2.6287e-18, 2.7256e-18, 3.7164e-22, 1.3141e-19, 2.5613e-18, 1.0087e-21,\n 9.0053e-21, 1.1123e-20, 1.6718e-21, 4.3731e-20, 2.3828e-21, 5.5853e-20,\n 5.7871e-19, 3.1420e-20, 1.2861e-20, 4.9095e-21, 4.6160e-19, 2.4670e-21,\n 1.6384e-19, 3.5606e-20, 1.9030e-21, 1.4951e-18, 1.6027e-19, 4.3157e-19,\n 6.6014e-18, 1.7084e-21, 1.8088e-21, 4.9956e-19, 1.4917e-19, 4.2844e-21,\n 5.8265e-21, 1.6090e-19, 3.6745e-21, 1.7227e-21, 8.4902e-22, 7.9963e-19,\n 2.4395e-21, 1.4849e-17, 5.3018e-19, 1.2026e-18, 1.1547e-20, 1.1661e-19,\n 1.9088e-17, 7.6666e-21, 5.9127e-21, 2.3435e-17, 3.5768e-18, 6.1964e-19,\n 3.7772e-18, 2.7116e-20, 1.0159e-22, 3.4355e-19, 1.1336e-19, 2.5545e-20,\n 3.0217e-21, 4.1301e-18, 1.8545e-20, 1.1635e-20, 6.5094e-18, 3.4206e-18,\n 3.1059e-20, 3.8775e-20, 1.0032e-19, 8.2285e-20, 8.2456e-20, 2.1565e-21,\n 3.5447e-21, 5.0954e-18, 5.4735e-21, 2.7653e-21, 2.6562e-20, 1.0163e-19,\n 2.6325e-18, 1.1185e-18, 3.2079e-19, 4.2051e-19, 7.6428e-23, 1.4759e-17,\n 4.1499e-19, 4.9485e-21, 2.1070e-20, 3.0587e-21, 9.3075e-21, 1.9193e-19,\n 1.3971e-21, 2.6873e-21, 1.1891e-19, 3.4139e-22, 3.8642e-20, 3.1083e-19,\n 2.0396e-19, 1.9765e-21, 1.0363e-21, 3.4876e-20, 9.9514e-18, 3.1136e-22,\n 7.9260e-22, 3.3242e-21, 2.2551e-19, 7.2428e-18, 4.4550e-21, 4.9156e-20,\n 9.3232e-20, 8.1245e-19, 1.6092e-20, 1.6817e-20, 9.0808e-20, 6.1686e-19,\n 9.7649e-21, 5.6347e-21, 3.7516e-20, 5.7562e-21, 5.0139e-19, 4.6120e-21,\n 1.7495e-21, 1.0054e-18, 1.1263e-19, 6.7916e-22, 3.0313e-18, 2.5967e-21,\n 2.4113e-19, 1.4347e-18, 3.4918e-18, 6.6389e-18, 1.9247e-20, 2.2263e-20,\n 4.1766e-20, 5.1461e-19, 5.9194e-18, 1.3551e-20, 5.2433e-18, 4.0376e-22,\n 1.9832e-19, 5.4553e-18, 2.6471e-21, 3.1809e-17], device='cuda:0')" + }, + "38": { + "step": "tensor(21284.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.3785e-20, 9.9308e-23, 4.0610e-18, 1.4486e-17, 4.4620e-19, 1.4563e-20,\n 2.4386e-18, 1.5670e-18, 1.0605e-18, 1.1844e-21, 7.2635e-20, 4.4264e-21,\n 1.1194e-18, 2.1597e-19, 1.4956e-19, 2.0910e-19, 1.7177e-19, 4.5329e-19,\n 9.1838e-21, 5.7950e-19, 1.9673e-19, 5.5218e-21, 2.2107e-18, 5.4339e-18,\n 8.7105e-19, 5.2392e-20, 7.6653e-19, 3.9359e-19, 1.6723e-20, 1.5680e-19,\n 2.1536e-19, 5.5856e-19, 6.5050e-19, 1.0384e-18, 8.7390e-21, 9.7941e-19,\n 1.7939e-18, 5.6552e-20, 1.2808e-18, 3.3447e-20, 2.1945e-18, 1.8817e-20,\n 1.8407e-18, 1.6651e-18, 1.2513e-19, 8.9791e-20, 6.9225e-20, 4.2425e-21,\n 1.2117e-21, 1.9747e-18, 4.8573e-19, 2.1503e-18, 1.6274e-21, 1.4052e-19,\n 3.0171e-20, 1.2205e-20, 5.7405e-18, 1.0072e-19, 4.5565e-19, 8.3087e-21,\n 1.8789e-23, 4.1467e-20, 5.5434e-19, 3.7362e-19, 3.3961e-19, 1.9701e-18,\n 3.6603e-19, 1.9862e-20, 4.7385e-19, 2.0220e-18, 4.0779e-19, 1.3408e-19,\n 2.6356e-19, 2.7206e-21, 8.3302e-22, 1.0684e-19, 3.9008e-19, 6.8488e-21,\n 3.2343e-18, 9.8698e-22, 7.1637e-21, 3.2376e-19, 5.0670e-18, 5.1124e-21,\n 3.5025e-19, 2.3556e-20, 2.7586e-19, 6.6489e-20, 7.6057e-19, 4.1052e-18,\n 7.8734e-19, 2.0179e-19, 5.8748e-21, 4.8133e-19, 3.1397e-18, 1.4575e-17,\n 1.1705e-20, 3.6903e-20, 2.0604e-19, 5.8139e-19, 6.0404e-19, 1.2482e-19,\n 4.8129e-19, 3.9620e-19, 2.6130e-18, 4.2239e-20, 1.5821e-21, 4.9865e-19,\n 2.7577e-19, 1.8303e-18, 2.0365e-18, 3.0179e-20, 9.5537e-21, 2.0294e-20,\n 7.7088e-21, 6.4202e-19, 6.3695e-19, 1.7940e-19, 6.9230e-19, 3.4425e-20,\n 4.1680e-18, 1.5068e-18, 3.5491e-21, 2.4249e-19, 1.1292e-18, 2.2362e-20,\n 8.6085e-23, 7.0657e-21, 5.1161e-24, 1.3403e-19, 5.1442e-20, 1.9602e-19,\n 1.1418e-18, 2.6820e-19, 1.7705e-19, 1.5375e-19, 8.4008e-19, 1.9280e-20,\n 1.6968e-19, 1.9736e-21, 2.6969e-20, 2.6815e-18, 1.0963e-18, 1.0197e-19,\n 2.3873e-18, 3.0376e-20, 2.7618e-22, 8.4248e-19, 1.8924e-19, 1.4074e-20,\n 5.4590e-20, 4.9861e-19, 7.5659e-20, 5.9552e-21, 2.6482e-20, 2.4721e-19,\n 7.2103e-22, 5.7834e-18, 1.7291e-18, 5.0962e-18, 2.9755e-20, 2.2249e-19,\n 6.7902e-18, 8.7479e-23, 1.4290e-19, 1.7659e-17, 2.3146e-18, 5.5660e-19,\n 4.3176e-18, 3.9730e-20, 6.6576e-20, 7.5643e-19, 4.8883e-19, 5.6824e-20,\n 1.6233e-19, 4.9522e-18, 6.8902e-21, 2.4433e-21, 6.9844e-18, 2.5803e-18,\n 2.6681e-21, 1.3224e-20, 3.2026e-21, 1.5149e-19, 6.3632e-19, 1.0618e-21,\n 2.4393e-20, 3.9293e-18, 8.9076e-20, 1.0785e-21, 1.3170e-21, 7.6234e-19,\n 1.4980e-18, 1.0034e-18, 9.1623e-19, 1.5163e-19, 3.9720e-22, 4.2560e-18,\n 6.7189e-19, 1.2607e-19, 1.5284e-19, 1.5293e-19, 6.3542e-20, 2.1351e-19,\n 4.0466e-22, 2.5693e-20, 6.1304e-20, 9.3752e-20, 6.9778e-21, 1.3769e-18,\n 2.8731e-19, 3.1450e-20, 4.9498e-20, 2.8377e-19, 1.0392e-17, 2.5413e-21,\n 7.2410e-20, 8.1278e-21, 3.0042e-18, 4.7919e-18, 2.9071e-22, 3.0007e-19,\n 7.6846e-20, 6.7608e-19, 4.5099e-21, 6.0969e-21, 1.9818e-18, 2.2244e-18,\n 1.1300e-21, 2.3788e-21, 3.3115e-22, 4.2698e-20, 3.7551e-19, 7.3537e-20,\n 2.0725e-20, 1.2237e-18, 3.3660e-19, 2.6266e-23, 1.6640e-18, 1.0016e-21,\n 1.9676e-18, 1.7009e-18, 3.5172e-18, 3.0344e-18, 1.0633e-19, 1.2790e-20,\n 1.6520e-20, 1.1643e-18, 2.9148e-18, 5.7968e-20, 3.5769e-18, 5.0946e-21,\n 4.1490e-19, 1.0424e-17, 5.3204e-22, 7.7211e-18], device='cuda:0')" + }, + "39": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.8239e-21, 1.0315e-19, 2.0907e-20, ..., 8.1621e-23, 9.9647e-21,\n 3.2148e-19],\n [4.0766e-20, 5.8082e-21, 1.6573e-21, ..., 4.0396e-20, 5.4935e-23,\n 4.6073e-19],\n [6.5989e-22, 1.6652e-19, 4.2024e-21, ..., 2.7940e-21, 1.0398e-20,\n 2.5881e-19],\n ...,\n [7.9495e-19, 8.5711e-20, 1.3853e-18, ..., 5.5852e-20, 1.7579e-19,\n 1.4122e-17],\n [2.4329e-19, 2.3389e-19, 2.6607e-18, ..., 3.8566e-20, 5.1791e-20,\n 4.3212e-18],\n [7.6080e-19, 1.8240e-17, 2.3021e-16, ..., 5.2884e-18, 1.6416e-19,\n 1.2789e-16]], device='cuda:0')" + }, + "40": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.2760e-19, 3.1608e-19, 8.5117e-20, 3.1920e-20, 2.6258e-19, 7.3217e-20,\n 2.9308e-20, 9.5638e-20, 2.5318e-20, 6.8606e-20, 2.3218e-19, 5.7597e-20,\n 2.1326e-21, 2.6998e-20, 8.6018e-21, 4.3724e-21, 3.5254e-19, 1.1915e-19,\n 5.9571e-20, 3.1990e-19, 2.7906e-20, 5.3054e-19, 7.3066e-20, 4.0047e-20,\n 1.2705e-19, 2.2551e-19, 1.5784e-21, 6.0992e-21, 1.6149e-19, 2.7518e-19,\n 1.9877e-19, 2.1836e-19, 1.2002e-19, 2.7184e-19, 4.2937e-19, 1.2989e-18,\n 3.6020e-19, 5.4936e-20, 4.1206e-19, 2.7141e-19, 1.1017e-19, 1.6991e-19,\n 2.6035e-19, 1.3829e-19, 2.6900e-19, 1.1211e-19, 2.0515e-19, 8.7436e-20,\n 5.7877e-21, 7.5377e-19, 9.8712e-20, 1.5557e-19, 1.1869e-20, 7.2008e-19,\n 2.0201e-20, 1.2519e-19, 3.0636e-19, 6.4295e-21, 3.7689e-19, 7.7510e-20,\n 1.2585e-18, 7.5896e-19, 2.6254e-20, 8.7769e-20, 5.6996e-21, 2.0725e-20,\n 7.8819e-20, 4.0894e-20, 1.1031e-19, 4.7680e-19, 1.9400e-19, 9.9393e-20,\n 5.2072e-20, 1.4494e-19, 5.1961e-19, 7.6962e-19, 1.7668e-19, 4.3357e-20,\n 2.8868e-19, 2.9315e-20, 5.7501e-19, 6.2035e-20, 1.5317e-20, 1.0541e-20,\n 1.2585e-20, 4.6696e-20, 1.1120e-18, 1.3595e-19, 4.8546e-21, 2.7777e-19,\n 3.5333e-21, 1.6597e-19, 2.2173e-20, 1.4752e-18, 2.5439e-21, 1.0936e-19,\n 2.3814e-19, 1.7557e-21, 5.2207e-21, 1.4163e-19, 4.1404e-19, 1.1716e-21,\n 6.1904e-19, 3.7770e-20, 7.0222e-20, 6.6476e-20, 9.3451e-20, 5.7146e-19,\n 3.8197e-19, 2.1168e-19, 5.2412e-20, 1.4724e-19, 9.2718e-20, 1.1202e-19,\n 6.2338e-20, 1.4264e-19, 1.1943e-19, 2.1406e-19, 3.4675e-19, 1.4328e-18,\n 1.0094e-20, 1.3404e-19, 2.4435e-19, 7.0978e-21, 4.2609e-21, 5.3194e-21,\n 1.9100e-19, 8.2748e-19, 8.0748e-21, 1.4035e-20, 7.4383e-20, 6.5544e-19,\n 7.9388e-22, 4.2255e-19, 1.0019e-19, 1.7083e-19, 3.6569e-19, 1.1894e-20,\n 1.0842e-19, 2.1778e-21, 1.2164e-19, 1.9181e-20, 2.0730e-20, 3.0739e-20,\n 4.9883e-21, 2.4012e-22, 6.9183e-20, 4.3517e-20, 1.5656e-20, 2.2179e-20,\n 4.1162e-21, 1.8324e-20, 1.3285e-19, 2.4817e-19, 1.7965e-20, 1.7576e-20,\n 5.3398e-20, 1.9066e-20, 9.0255e-20, 1.4066e-20, 3.8269e-19, 2.7304e-21,\n 3.6196e-21, 5.3177e-19, 1.8811e-19, 1.5284e-18, 1.5835e-18, 2.3064e-19,\n 3.1545e-19, 1.7780e-18, 5.0970e-20, 6.2732e-20, 1.0053e-19, 6.1696e-19,\n 4.9934e-20, 4.8173e-19, 1.1308e-20, 2.3228e-19, 2.1171e-19, 2.6379e-18,\n 6.1243e-19, 1.0069e-18, 7.0491e-20, 1.5365e-19, 1.9081e-18, 3.8205e-20,\n 4.9382e-22, 1.7879e-21, 6.4309e-19, 1.6017e-19, 6.1425e-19, 2.8680e-19,\n 1.5405e-19, 2.5282e-20, 9.7066e-19, 9.5307e-19, 4.9758e-19, 1.4956e-19,\n 5.3711e-19, 3.5583e-21, 1.4896e-19, 3.5311e-19, 2.1750e-21, 4.7469e-21,\n 8.5623e-20, 7.1551e-19, 4.9952e-19, 7.7644e-20, 1.4544e-20, 6.8555e-22,\n 8.7840e-19, 4.3733e-19, 1.1820e-20, 7.6707e-21, 2.4504e-19, 7.8809e-20,\n 1.0654e-19, 4.4887e-19, 4.0133e-19, 9.7667e-19, 1.4627e-20, 1.6633e-19,\n 3.8169e-21, 1.1318e-18, 6.8230e-20, 3.8855e-20, 1.9787e-20, 1.3824e-20,\n 1.0083e-19, 1.6146e-20, 2.1055e-19, 2.3407e-20, 7.4879e-21, 1.2237e-19,\n 6.4663e-20, 1.0401e-20, 1.0695e-20, 5.9154e-20, 4.9135e-20, 2.1136e-20,\n 1.7530e-20, 2.3558e-20, 2.7809e-20, 3.8119e-20, 1.2496e-19, 2.8944e-20,\n 3.0570e-20, 1.8001e-20, 1.0776e-20, 5.0321e-20, 1.3645e-19, 1.8035e-19,\n 1.5080e-20, 1.1838e-19, 2.0485e-20, 1.5568e-19, 3.0645e-34, 2.5511e-35,\n 7.1459e-35, 1.5796e-35, 1.5428e-36, 4.2915e-35, 2.1601e-35, 6.5708e-35,\n 4.9692e-37, 4.7626e-35, 9.9727e-36, 2.0482e-35, 9.1800e-35, 3.5339e-36,\n 2.0818e-36, 5.4542e-36, 1.3343e-35, 4.4864e-36, 1.6631e-34, 3.2647e-35,\n 3.1323e-36, 2.8768e-37, 7.4713e-36, 5.0512e-35, 4.8375e-36, 1.1257e-36,\n 1.7527e-36, 1.2389e-35, 2.6516e-37, 8.9582e-36, 3.9997e-35, 3.7772e-35,\n 2.3242e-35, 1.2970e-36, 7.6062e-36, 8.8306e-36, 2.6965e-35, 5.9626e-36,\n 1.8975e-35, 4.2054e-35, 2.6685e-35, 1.6429e-35, 5.6935e-36, 4.7495e-35,\n 4.6957e-35, 7.8680e-36, 3.6590e-36, 7.3682e-36, 4.8029e-35, 1.4474e-35,\n 5.4638e-36, 1.2779e-36, 1.7802e-35, 1.9846e-35, 4.6794e-35, 4.9351e-35,\n 3.5195e-35, 2.0477e-34, 2.6712e-35, 2.0591e-35, 7.3069e-35, 3.7796e-35,\n 4.2989e-35, 4.1732e-35, 1.0366e-35, 1.3278e-35, 2.1477e-36, 7.1449e-36,\n 2.6118e-35, 7.0615e-36, 3.5764e-35, 2.3422e-36, 6.0074e-37, 2.2337e-35,\n 5.8290e-36, 1.9332e-35, 1.2353e-35, 1.1230e-35, 3.4369e-35, 1.4448e-35,\n 1.3784e-35, 1.0880e-34, 6.2787e-36, 8.9332e-36, 1.8780e-35, 1.1818e-36,\n 1.1033e-35, 3.1374e-37, 1.0629e-35, 8.9168e-37, 4.3559e-36, 2.6717e-35,\n 1.4344e-36, 1.1720e-35, 3.7205e-36, 9.9590e-36, 1.5733e-35, 7.6283e-36,\n 6.0963e-36, 1.3086e-35, 1.2578e-35, 2.7630e-36, 4.4725e-35, 4.9455e-35,\n 7.0765e-36, 1.7968e-35, 1.2296e-35, 5.3248e-35, 2.1217e-35, 7.1266e-36,\n 2.0833e-36, 1.1394e-35, 2.7758e-35, 2.7110e-36, 1.2078e-35, 8.3394e-37,\n 4.1981e-36, 1.1774e-35, 1.9391e-36, 6.1586e-36, 7.6848e-36, 9.5662e-36,\n 3.4760e-35, 1.3913e-35, 5.5659e-35, 7.8649e-35, 4.0377e-35, 1.1181e-34,\n 2.1752e-36, 2.1641e-36, 2.1716e-35, 7.9934e-36, 8.3970e-37, 2.1057e-37,\n 7.8910e-36, 2.0153e-36, 9.3102e-36, 5.8758e-36, 3.0637e-35, 3.9924e-36,\n 7.4375e-36, 8.1385e-35, 7.9911e-36, 1.3960e-36, 3.5545e-36, 1.0958e-36,\n 1.0794e-36, 1.3273e-35, 2.2746e-35, 2.5208e-35, 1.4147e-36, 3.6018e-35,\n 3.7267e-36, 3.4747e-36, 8.6933e-35, 1.7317e-35, 2.8047e-36, 1.1737e-36,\n 7.6673e-36, 3.3306e-36, 1.7199e-35, 5.4388e-37, 1.1923e-35, 2.7350e-36,\n 4.4931e-35, 5.5840e-36, 1.0433e-35, 3.9324e-35, 1.5799e-35, 7.7734e-36,\n 3.2714e-35, 1.2506e-35, 2.1131e-35, 2.0392e-36, 1.9695e-34, 2.7228e-36,\n 4.1693e-35, 6.5395e-36, 8.7152e-36, 2.2536e-35, 8.0744e-37, 5.1446e-36,\n 2.1800e-35, 1.4831e-35, 1.3289e-35, 1.1923e-35, 2.2950e-35, 4.3396e-36,\n 1.6636e-35, 1.7946e-35, 1.8227e-35, 1.7046e-35, 3.7406e-35, 6.3552e-35,\n 1.0175e-35, 2.8044e-36, 3.1866e-35, 8.8485e-36, 7.6432e-37, 2.0672e-36,\n 2.4184e-35, 5.5090e-35, 2.2218e-35, 9.7207e-36, 2.6797e-36, 4.6866e-37,\n 1.8677e-35, 3.1652e-35, 4.7373e-36, 1.8425e-36, 5.2329e-36, 7.1708e-36,\n 9.0157e-36, 5.1982e-36, 1.8707e-35, 2.6255e-35, 5.3787e-35, 5.3582e-36,\n 5.6281e-36, 1.5110e-35, 1.9110e-35, 1.2074e-35, 1.1718e-36, 3.6038e-35,\n 1.1537e-36, 5.2699e-36, 1.9196e-35, 1.8394e-36, 2.3196e-35, 3.5497e-35,\n 5.5326e-36, 3.5826e-35, 3.5267e-35, 2.0705e-35, 1.7007e-35, 2.2427e-35,\n 5.2856e-37, 1.5503e-35, 4.7953e-36, 1.3104e-35, 1.9605e-36, 6.5460e-36,\n 1.2550e-36, 8.0670e-35, 1.2953e-35, 2.0799e-35, 2.4974e-36, 7.4675e-36,\n 2.1795e-35, 8.3723e-35, 1.9970e-35, 3.5695e-35, 4.3028e-35, 3.3407e-35,\n 5.3616e-36, 7.1206e-36, 1.2630e-15, 4.7014e-17, 2.7686e-16, 1.8194e-17,\n 7.1594e-17, 2.3837e-17, 4.8783e-18, 1.0009e-16, 5.5263e-17, 2.2751e-17,\n 7.8999e-16, 1.5407e-17, 4.6019e-17, 6.1459e-16, 9.0411e-17, 1.2268e-16,\n 2.0401e-17, 7.5671e-19, 5.1808e-17, 7.2803e-17, 1.5325e-16, 1.1093e-16,\n 6.6841e-17, 1.6519e-17, 1.6243e-18, 1.3308e-17, 3.8109e-18, 9.5675e-17,\n 8.6590e-16, 2.4771e-17, 2.7679e-16, 6.5360e-17, 6.2056e-17, 1.5693e-16,\n 4.6021e-19, 3.7879e-16, 1.9394e-16, 6.1053e-17, 3.9656e-16, 9.5265e-18,\n 1.5560e-17, 3.0049e-17, 2.8222e-17, 7.6631e-17, 4.3617e-17, 1.3105e-18,\n 9.3218e-18, 9.9599e-17, 1.6460e-17, 1.9070e-16, 6.5383e-16, 1.3771e-16,\n 1.4474e-17, 3.1564e-16, 4.0398e-17, 2.1991e-16, 2.0742e-16, 3.3642e-17,\n 2.9831e-16, 2.5046e-17, 1.0782e-17, 1.9223e-19, 3.1188e-18, 5.1517e-18,\n 1.2820e-16, 7.7118e-17, 5.8860e-17, 2.4539e-17, 7.8241e-17, 1.1614e-17,\n 1.6057e-16, 3.4677e-16, 2.1153e-16, 5.3238e-18, 9.1767e-18, 2.7772e-16,\n 1.9991e-16, 5.3611e-16, 2.4098e-16, 1.7798e-17, 9.9948e-18, 2.0629e-17,\n 9.8505e-17, 3.7593e-17, 9.3213e-17, 1.7177e-17, 1.1321e-16, 1.3112e-17,\n 3.3397e-17, 3.6311e-16, 5.0487e-16, 4.1118e-17, 1.7852e-16, 1.2235e-16,\n 7.2286e-17, 1.8027e-16, 1.7728e-16, 1.1911e-17, 7.4599e-17, 6.0456e-17,\n 5.0276e-17, 4.8237e-16, 6.5906e-17, 4.0087e-16, 2.0744e-16, 3.8439e-17,\n 7.9983e-17, 9.3810e-18, 1.9821e-17, 1.9237e-18, 2.1490e-16, 2.2356e-17,\n 2.0986e-16, 2.9318e-17, 5.0881e-16, 1.2901e-16, 8.3145e-17, 4.4193e-16,\n 7.1067e-17, 4.3658e-17, 5.1688e-18, 6.9253e-17, 1.0391e-16, 2.5264e-17,\n 2.5539e-17, 1.0652e-16, 2.1176e-17, 1.1080e-17, 2.5667e-16, 1.0417e-17,\n 1.4527e-17, 1.6572e-17, 1.8031e-17, 7.1966e-17, 1.0014e-16, 4.5077e-17,\n 4.4535e-17, 2.9324e-18, 9.4031e-17, 6.5624e-17, 1.1372e-17, 5.7735e-17,\n 2.0237e-17, 1.2658e-17, 1.3086e-16, 3.0261e-17, 3.2262e-19, 8.7175e-17,\n 6.5499e-17, 2.0235e-17, 1.8076e-16, 1.0251e-16, 3.3931e-17, 1.3944e-17,\n 3.4209e-16, 1.2179e-17, 1.2404e-17, 1.3920e-16, 5.1855e-17, 1.2575e-16,\n 1.2064e-16, 4.3106e-17, 1.8561e-16, 1.1145e-16, 9.2672e-18, 1.7818e-17,\n 2.3712e-19, 2.0671e-17, 1.3103e-16, 1.8981e-19, 6.7557e-16, 3.4577e-19,\n 4.0814e-17, 1.6268e-17, 3.6986e-16, 1.4279e-16, 6.3507e-17, 1.3382e-16,\n 3.4339e-16, 5.2290e-17, 5.9108e-18, 2.4484e-17, 4.0695e-17, 2.1865e-17,\n 7.1767e-18, 1.5696e-17, 2.8092e-17, 6.3922e-18, 2.9879e-16, 1.7965e-18,\n 6.5053e-17, 1.3462e-17, 4.7634e-16, 2.3320e-16, 9.7489e-18, 3.2948e-17,\n 1.8293e-16, 9.1225e-18, 9.1534e-18, 2.1609e-16, 5.1243e-17, 1.0573e-16,\n 3.1219e-17, 1.4381e-16, 1.6052e-17, 3.6459e-17, 1.7158e-16, 8.0536e-18,\n 1.4026e-16, 6.6022e-17, 2.3277e-17, 1.0521e-16, 5.0698e-16, 1.0313e-17,\n 4.7129e-17, 8.7693e-17, 4.0136e-19, 7.3025e-17, 6.6878e-17, 1.5823e-16,\n 6.3625e-18, 1.0899e-16, 2.3036e-17, 3.6291e-17, 1.0268e-16, 4.0809e-19,\n 1.6378e-16, 2.7501e-17, 2.2793e-16, 7.7160e-17, 1.0629e-16, 1.9222e-17,\n 5.0097e-17, 1.8096e-17, 3.1377e-16, 6.2683e-17, 4.2680e-17, 2.9859e-16,\n 2.6935e-16, 2.5757e-16, 5.4552e-17, 2.6109e-16, 9.4829e-18, 1.1140e-18,\n 2.4951e-17, 1.1771e-17, 7.2454e-18, 1.6345e-17, 2.0531e-16, 6.7744e-18,\n 1.7782e-17, 9.3072e-17, 3.4366e-17, 1.2737e-17, 5.3273e-18, 2.3973e-16],\n device='cuda:0')" + }, + "41": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5461e-17, 1.8633e-17, 6.0063e-18, ..., 9.6079e-17, 3.3172e-18,\n 3.7436e-17],\n [1.2010e-17, 4.9155e-18, 2.8307e-18, ..., 2.2587e-17, 1.0730e-18,\n 1.9614e-17],\n [2.7883e-18, 1.6481e-19, 6.9055e-19, ..., 8.8607e-19, 2.2142e-19,\n 9.2772e-19],\n ...,\n [1.0190e-17, 7.3594e-19, 2.7462e-18, ..., 4.5303e-18, 6.3329e-19,\n 2.3098e-18],\n [5.2677e-18, 8.7536e-19, 1.3082e-18, ..., 4.4444e-18, 3.3122e-19,\n 1.0727e-18],\n [1.2785e-17, 3.8317e-18, 2.9610e-18, ..., 1.6572e-17, 1.1207e-18,\n 1.1924e-17]], device='cuda:0')" + }, + "42": { + "step": "tensor(21284.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.1315e-15, 2.8879e-16, 2.5530e-18, 1.1127e-15, 3.4365e-16, 2.9604e-17,\n 4.0294e-17, 6.2043e-17, 7.0252e-18, 1.5338e-16, 1.9719e-16, 2.2541e-16,\n 3.3576e-16, 6.7131e-17, 3.7066e-16, 1.6679e-16, 7.0844e-16, 1.0718e-16,\n 1.0903e-17, 8.6462e-16, 1.0984e-16, 7.5448e-17, 2.2653e-16, 9.0078e-16,\n 4.9228e-17, 1.2286e-15, 2.1992e-16, 2.2877e-16, 1.1964e-17, 7.1174e-17,\n 1.0243e-15, 2.5908e-17, 6.0027e-16, 1.9590e-17, 3.9670e-16, 8.1443e-18,\n 1.5290e-16, 3.2425e-16, 1.9930e-16, 1.8827e-16, 1.2555e-17, 2.6378e-16,\n 9.0766e-16, 2.4408e-17, 4.2669e-16, 3.1709e-17, 2.4344e-17, 9.1071e-18,\n 2.7631e-16, 8.6291e-17, 1.0523e-16, 5.8068e-17, 4.4569e-17, 1.1991e-16,\n 1.8366e-15, 1.9279e-17, 5.3436e-17, 2.6965e-17, 5.8036e-18, 4.6148e-16,\n 1.4358e-16, 5.2129e-18, 3.0133e-16, 1.2405e-15, 5.0816e-17, 7.7133e-17,\n 5.5547e-16, 8.6912e-18, 1.7355e-15, 1.7763e-17, 2.4916e-16, 1.7511e-16,\n 1.1853e-16, 6.1943e-18, 4.1769e-17, 5.4053e-17, 2.9298e-16, 9.9987e-17,\n 1.5983e-16, 3.0847e-16, 3.3453e-17, 3.1971e-17, 2.8719e-15, 9.6315e-17,\n 6.4019e-18, 1.1983e-17, 2.4516e-17, 1.2874e-17, 2.5116e-16, 1.9782e-17,\n 6.6809e-18, 5.0111e-16, 3.3903e-17, 9.5738e-16, 2.2721e-16, 3.0266e-17,\n 7.0200e-16, 2.5369e-16, 1.1598e-15, 8.6733e-18, 1.3190e-16, 1.1755e-15,\n 1.5437e-15, 2.5810e-15, 3.2054e-17, 1.1241e-16, 6.7279e-18, 4.1074e-16,\n 2.0089e-16, 1.0161e-17, 1.8019e-17, 1.9532e-16, 2.2036e-17, 3.3882e-17,\n 2.9778e-17, 1.6029e-16, 1.6698e-17, 1.2997e-16, 1.4637e-17, 1.1697e-16,\n 9.4565e-16, 5.4999e-18, 2.3289e-15, 8.7617e-17, 1.6929e-15, 1.5549e-17,\n 5.5442e-16, 6.8073e-16, 7.8736e-16, 6.5367e-17, 8.3576e-16, 6.3571e-16,\n 1.0183e-16, 7.6630e-18, 1.0184e-17, 9.0021e-16, 7.8651e-16, 5.4753e-16,\n 9.7009e-17, 1.5182e-17, 1.6493e-16, 1.0864e-15, 2.8200e-16, 4.2175e-16,\n 2.2054e-16, 4.9188e-18, 2.7691e-16, 1.9903e-16, 1.8769e-17, 8.0435e-18,\n 4.3667e-18, 7.2830e-16, 2.8069e-17, 8.8001e-17, 5.7279e-16, 1.2038e-15,\n 3.8534e-16, 8.7843e-16, 1.0083e-16, 7.4381e-16, 7.9227e-16, 6.7547e-16,\n 3.0913e-16, 1.0674e-15, 5.4199e-18, 1.3449e-15, 1.4766e-16, 1.7024e-16,\n 9.7243e-17, 1.3549e-17, 2.7661e-17, 6.3666e-18, 1.4196e-16, 4.1318e-17,\n 2.0030e-16, 6.3086e-16, 3.3827e-16, 1.1112e-17, 3.1996e-16, 7.1307e-18,\n 1.4774e-17, 4.2609e-16, 7.5795e-18, 1.7610e-16, 1.0938e-17, 2.7412e-16,\n 4.5454e-16, 1.2587e-16, 2.0407e-16, 4.3904e-16, 7.9514e-16, 4.8959e-16,\n 4.8051e-16, 8.7743e-18, 6.6875e-17, 1.0054e-17, 1.3873e-17, 1.1347e-16,\n 5.1337e-18, 1.5019e-16, 4.5874e-16, 5.1927e-16, 6.7246e-17, 1.9894e-16,\n 6.3372e-17, 1.0447e-15, 1.0974e-17, 7.5862e-16, 3.0818e-17, 7.0531e-17,\n 2.1648e-17, 1.8293e-16, 1.9868e-17, 1.0628e-16, 7.9518e-16, 1.2667e-16,\n 6.4963e-17, 9.0443e-18, 2.1217e-17, 6.1432e-17, 3.6039e-16, 1.5992e-17,\n 7.3415e-18, 1.6557e-16, 4.1972e-16, 2.2762e-15, 2.5131e-16, 5.3052e-17,\n 4.4109e-17, 1.2704e-15, 4.4409e-16, 8.4686e-17, 1.4107e-15, 4.1522e-17,\n 6.3050e-17, 1.5319e-16, 6.5208e-17, 3.8002e-16, 6.6417e-17, 1.1535e-18,\n 6.0656e-16, 3.8962e-17, 5.5234e-16, 7.0316e-17, 1.0854e-17, 2.0298e-17,\n 6.7897e-16, 2.5908e-17, 9.8259e-16, 1.1449e-15, 6.4033e-16, 1.1637e-16,\n 2.4214e-16, 2.0040e-17, 2.9572e-17, 2.0632e-16], device='cuda:0')" + }, + "43": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2463e-16, 1.4118e-15, 1.7642e-16, ..., 3.6542e-15, 3.4432e-15,\n 1.5922e-15],\n [1.4987e-17, 9.6426e-17, 1.1402e-17, ..., 2.5405e-16, 2.4760e-16,\n 1.0666e-16],\n [1.0459e-17, 6.0679e-17, 8.1939e-18, ..., 1.5804e-16, 1.4054e-16,\n 7.3609e-17],\n [1.6403e-17, 1.0989e-16, 1.3049e-17, ..., 2.8141e-16, 2.7285e-16,\n 1.1656e-16],\n [1.6180e-17, 9.2571e-17, 1.2858e-17, ..., 2.4019e-16, 2.1231e-16,\n 1.1382e-16]], device='cuda:0')" + }, + "44": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([8.0293e-14, 5.6698e-15, 3.3274e-15, 6.3666e-15, 5.0087e-15],\n device='cuda:0')" + }, + "45": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2500e-16, 1.4125e-15, 1.7669e-16, ..., 3.6567e-15, 3.4432e-15,\n 1.5946e-15],\n [1.4994e-17, 9.6439e-17, 1.1407e-17, ..., 2.5409e-16, 2.4760e-16,\n 1.0671e-16],\n [1.0495e-17, 6.0740e-17, 8.2199e-18, ..., 1.5827e-16, 1.4054e-16,\n 7.3838e-17],\n [1.6411e-17, 1.0990e-16, 1.3055e-17, ..., 2.8147e-16, 2.7285e-16,\n 1.1661e-16],\n [1.6241e-17, 9.2675e-17, 1.2903e-17, ..., 2.4059e-16, 2.1231e-16,\n 1.1422e-16]], device='cuda:0')" + }, + "46": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([8.0299e-14, 5.6699e-15, 3.3280e-15, 6.3668e-15, 5.0098e-15],\n device='cuda:0')" + }, + "47": { + "step": "tensor(21284.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2463e-16, 1.4118e-15, 1.7642e-16, ..., 3.6542e-15, 3.4432e-15,\n 1.5922e-15],\n [1.4987e-17, 9.6426e-17, 1.1402e-17, ..., 2.5405e-16, 2.4760e-16,\n 1.0666e-16],\n [1.0459e-17, 6.0679e-17, 8.1939e-18, ..., 1.5804e-16, 1.4054e-16,\n 7.3609e-17],\n [1.6403e-17, 1.0989e-16, 1.3049e-17, ..., 2.8141e-16, 2.7285e-16,\n 1.1656e-16],\n [1.6180e-17, 9.2571e-17, 1.2858e-17, ..., 2.4019e-16, 2.1231e-16,\n 1.1382e-16]], device='cuda:0')" + }, + "48": { + "step": "tensor(21284.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([8.0293e-14, 5.6698e-15, 3.3274e-15, 6.3666e-15, 5.0087e-15],\n device='cuda:0')" + }, + "6": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 4.9854e-07, 2.1596e-06, -9.6562e-06, ..., 5.9631e-09,\n 6.4181e-06, 2.4993e-06],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-6.8664e-44, -1.5554e-43, 2.3262e-43, ..., 2.2701e-43,\n 3.3351e-43, 8.5479e-44],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4335e-08, 8.3232e-09, 1.6180e-09, ..., 9.1739e-10, 1.0913e-09,\n 1.2542e-09],\n [1.1822e-13, 4.4540e-14, 4.6942e-15, ..., 9.7048e-15, 3.2589e-14,\n 4.8714e-15],\n [8.9143e-13, 1.0339e-13, 5.6054e-14, ..., 1.7149e-14, 1.2477e-13,\n 9.5696e-15],\n ...,\n [8.1242e-12, 3.4987e-11, 6.1224e-12, ..., 4.3114e-12, 4.5763e-12,\n 3.5401e-12],\n [2.0966e-12, 6.7308e-13, 1.1468e-13, ..., 8.5360e-14, 6.7355e-13,\n 1.8351e-14],\n [6.8850e-13, 8.2457e-13, 3.0293e-14, ..., 3.5408e-14, 1.1645e-13,\n 8.7478e-15]], device='cuda:0')" + }, + "7": { + "step": "tensor(7512.)", + "exp_avg": "tensor([1.9908e-05, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45, 8.9949e-42,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.7078e-06, 1.8833e-11, 1.0545e-10, ..., 7.7286e-09, 3.1394e-10,\n 1.9187e-10], device='cuda:0')" + }, + "8": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[-2.8770e-07, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.3249e-44, -5.6052e-45],\n [ 9.8273e-07, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 4.2039e-44, 5.6052e-45],\n [ 1.0263e-07, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 1.8217e-43, -5.6052e-45],\n ...,\n [-1.3544e-06, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -1.6816e-44, -5.6052e-45],\n [-3.0603e-08, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -9.3887e-44, -5.6052e-45],\n [-2.6994e-07, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 1.6816e-44, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.3412e-11, 6.9678e-14, 1.3855e-13, ..., 1.8502e-12, 4.0079e-13,\n 2.0643e-13],\n [6.7597e-11, 8.2975e-14, 1.5516e-13, ..., 2.2718e-12, 4.0440e-13,\n 3.4972e-13],\n [6.4600e-11, 7.0760e-14, 1.4303e-13, ..., 4.1788e-12, 4.2178e-13,\n 4.5041e-13],\n ...,\n [1.2245e-10, 1.2052e-13, 2.8781e-13, ..., 3.0939e-12, 7.2467e-13,\n 6.5419e-13],\n [7.3906e-11, 1.7032e-13, 2.7577e-13, ..., 2.0130e-12, 9.9125e-13,\n 4.9023e-13],\n [1.1118e-10, 6.4154e-14, 1.4427e-13, ..., 2.6487e-12, 4.5594e-13,\n 7.6545e-13]], device='cuda:0')" + }, + "9": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 2.6317e-05, -1.6313e-05, 4.6890e-07, ..., 1.7440e-06,\n 9.6505e-06, 1.5082e-06],\n [ 2.6435e-08, -3.9305e-08, -8.2584e-09, ..., 3.7843e-08,\n -1.9831e-08, -1.1462e-08],\n [-6.2581e-06, 1.5904e-05, 1.7100e-05, ..., -5.3792e-06,\n 6.4331e-06, 7.0246e-06],\n ...,\n [-2.4415e-07, 6.5924e-07, -4.2370e-07, ..., -3.1356e-06,\n -1.1131e-06, -3.1441e-06],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 3.2730e-06, 5.0244e-06, 3.6937e-06, ..., 1.3829e-06,\n 1.4456e-06, -1.2209e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.1181e-09, 2.5779e-09, 4.6190e-10, ..., 4.6640e-10, 7.2866e-10,\n 2.8604e-10],\n [3.6632e-10, 2.5726e-10, 3.3101e-11, ..., 4.6296e-11, 4.2138e-11,\n 3.9822e-11],\n [2.6983e-09, 2.6513e-09, 6.1308e-10, ..., 2.9139e-10, 9.0128e-10,\n 2.5469e-10],\n ...,\n [7.8327e-10, 2.7699e-09, 2.4455e-10, ..., 3.1425e-10, 3.3213e-10,\n 2.6594e-10],\n [6.2069e-10, 4.1678e-10, 4.7345e-11, ..., 5.5707e-11, 6.7778e-11,\n 3.7032e-11],\n [4.5264e-09, 2.6453e-09, 5.9442e-10, ..., 1.2929e-09, 6.6125e-10,\n 5.6445e-10]], device='cuda:0')" + }, + "10": { + "step": "tensor(7512.)", + "exp_avg": "tensor([ 2.3891e-04, -3.8899e-07, 1.0684e-04, ..., -5.5825e-05,\n 5.6052e-45, 4.8544e-05], device='cuda:0')", + "exp_avg_sq": "tensor([8.3291e-07, 8.4591e-08, 5.8645e-07, ..., 4.8595e-07, 8.3363e-08,\n 1.5053e-06], device='cuda:0')" + }, + "11": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[-6.0484e-08, -3.5157e-09, -1.6698e-06, ..., -5.4709e-07,\n -5.6052e-45, 3.0677e-08],\n [ 2.6674e-07, 1.3797e-08, -1.5694e-06, ..., -4.6201e-07,\n -5.6052e-45, -2.5403e-08],\n [-9.0853e-07, 6.9575e-09, -2.0038e-06, ..., -3.9492e-07,\n -5.6052e-45, 4.4455e-07],\n ...,\n [-7.3567e-07, 2.1666e-08, -2.9420e-06, ..., 2.0468e-07,\n 5.6052e-45, 9.2520e-09],\n [-3.5218e-07, 4.0375e-08, 6.2799e-07, ..., 3.1335e-07,\n -5.6052e-45, -4.2172e-07],\n [ 1.9479e-06, 2.3553e-09, 3.3146e-06, ..., -5.4778e-07,\n -5.6052e-45, -8.3423e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.0021e-11, 4.4199e-12, 1.8480e-11, ..., 8.7259e-12, 4.3408e-12,\n 3.1252e-11],\n [5.0513e-11, 7.9713e-12, 2.6129e-11, ..., 2.4337e-11, 7.8321e-12,\n 2.7669e-11],\n [9.1091e-11, 8.9976e-12, 2.9579e-11, ..., 2.1574e-11, 8.1540e-12,\n 2.7565e-11],\n ...,\n [1.4373e-10, 8.1480e-12, 2.9111e-11, ..., 2.5018e-11, 8.6969e-12,\n 2.4706e-11],\n [7.9418e-11, 1.0341e-11, 2.7521e-11, ..., 1.6803e-11, 9.8605e-12,\n 3.5564e-11],\n [6.4596e-11, 9.6854e-12, 2.9664e-11, ..., 3.0424e-11, 8.5014e-12,\n 3.3017e-11]], device='cuda:0')" + }, + "12": { + "step": "tensor(10016.)", + "exp_avg": "tensor([[ 1.0733e-06, 5.9301e-07, 1.1084e-06, ..., -1.1260e-06,\n -7.9756e-07, 4.1220e-07],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 3.9096e-06, 1.3317e-06, 6.0891e-06, ..., 1.4191e-06,\n 4.4139e-06, 9.3660e-07],\n ...,\n [-3.6163e-06, -5.3090e-07, 1.8161e-06, ..., -2.2841e-06,\n 2.0056e-07, 2.6083e-06],\n [-4.1390e-06, 6.1189e-06, -8.3921e-07, ..., -6.2971e-07,\n -1.9904e-06, 1.1207e-06],\n [ 1.5186e-06, 4.9709e-06, 1.9487e-06, ..., -3.3802e-06,\n -1.6063e-06, -5.4591e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2621e-10, 6.0628e-10, 8.1398e-11, ..., 1.7367e-10, 9.0598e-11,\n 7.0216e-11],\n [4.9119e-14, 1.3428e-13, 1.4050e-18, ..., 7.4577e-15, 1.4780e-14,\n 8.0864e-15],\n [1.0850e-09, 3.9958e-10, 7.3343e-11, ..., 1.1709e-10, 1.5914e-10,\n 4.7252e-11],\n ...,\n [1.3515e-09, 5.0402e-10, 6.0527e-11, ..., 9.8959e-11, 2.8179e-10,\n 4.7911e-11],\n [7.5568e-10, 7.8101e-10, 1.3478e-10, ..., 9.2007e-11, 2.8693e-10,\n 5.4235e-11],\n [1.2033e-09, 1.0247e-09, 1.1591e-10, ..., 1.0771e-10, 3.0513e-10,\n 1.4318e-10]], device='cuda:0')" + }, + "13": { + "step": "tensor(10016.)", + "exp_avg": "tensor([-2.7623e-05, 5.6052e-45, 5.5472e-05, ..., -3.8822e-05,\n -7.6574e-05, 5.0669e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.7265e-07, 6.6402e-11, 1.3076e-07, ..., 1.4914e-07, 1.5993e-07,\n 3.9732e-07], device='cuda:0')" + }, + "14": { + "step": "tensor(10016.)", + "exp_avg": "tensor([[ 1.4559e-07, -5.6052e-45, 5.2317e-07, ..., -1.1216e-07,\n -1.7687e-08, 6.0074e-08],\n [ 1.6496e-07, -5.6052e-45, -7.7360e-07, ..., -1.5523e-08,\n -4.3459e-08, 4.4930e-08],\n [ 1.7335e-07, -5.6052e-45, 5.2840e-08, ..., 5.0578e-08,\n 2.5160e-08, 1.0040e-07],\n ...,\n [ 3.5437e-08, -5.6052e-45, 4.6285e-08, ..., 7.7685e-09,\n 8.6288e-08, -5.1922e-08],\n [-1.2871e-07, 5.6052e-45, -1.1390e-06, ..., 1.6663e-07,\n -6.7402e-08, -2.6010e-07],\n [ 4.1622e-07, -5.6052e-45, -6.5756e-07, ..., -1.7113e-07,\n 2.5219e-07, -1.0156e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.5707e-13, 2.2841e-14, 8.6361e-13, ..., 4.9136e-13, 5.5343e-13,\n 6.1365e-13],\n [1.3276e-12, 2.3052e-15, 2.2673e-12, ..., 1.4636e-12, 8.4700e-13,\n 2.5842e-12],\n [2.2500e-12, 3.5452e-14, 3.3538e-12, ..., 9.8571e-13, 1.3532e-12,\n 8.2118e-12],\n ...,\n [1.9485e-12, 3.9681e-14, 3.7247e-12, ..., 2.8779e-13, 1.1903e-12,\n 8.5940e-13],\n [1.7618e-12, 3.9193e-14, 3.8227e-12, ..., 1.3400e-12, 9.9202e-13,\n 2.8894e-12],\n [2.6756e-12, 3.7781e-16, 3.3051e-12, ..., 1.3900e-12, 7.3951e-13,\n 3.1421e-12]], device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 0.005000500000000001, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 0, + 1, + 2 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 3, + 4, + 5 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 6, + 7, + 8 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_1024", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 9, + 10, + 11 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_1280", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 12, + 13, + 14 + ] + }, + { + "lr": 0.0025005, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.005, + "params": [ + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 20, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 10, + "base_lrs": [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.005 + ], + "last_epoch": 20, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.0025005 + ] + }, + "metrics": { + "final_val_acc": 75.094 + }, + "train_config": { + "name": "david_training", + "run_id": "20251012_032356", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": "clip_vit_b16", + "num_classes": 1000, + "preset": "high_accuracy", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": true, + "num_epochs": 20, + "batch_size": 1024, + "learning_rate": 0.01, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.1, + "rose_max_weight": 0.5, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": false, + "cayley_weight": 0.001, + "scale_loss_balance": null, + "use_mixed_precision": false, + "gradient_clip": 5.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "performance", + "freeze_threshold": 70.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-07, + "gradient_scale_multiplier": 5.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/gated-david", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file