Update best_model_acc76.45_metadata.json - Run 20251012_145649
Browse files
weights/David-hierarchical-progressive/20251012_145649/best_model_acc76.45_metadata.json
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 6,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(8764.)",
|
| 7 |
+
"exp_avg": "tensor([[-2.5930e-04, 7.4049e-04, 4.0309e-04, ..., -2.0171e-04,\n -1.9386e-04, -2.6270e-04],\n [ 1.4533e-04, -7.0071e-04, 4.6066e-04, ..., 8.6023e-05,\n 1.1630e-04, -6.6133e-05],\n [ 1.8618e-04, 6.1642e-04, -3.4604e-04, ..., -1.7930e-04,\n 3.6930e-05, 4.8601e-05],\n ...,\n [-4.2094e-05, 3.1226e-04, 3.5174e-04, ..., -2.4118e-04,\n 7.1443e-05, -9.1564e-05],\n [-2.3728e-04, -1.3047e-03, -6.1838e-04, ..., 8.1878e-04,\n 1.8464e-04, 6.7112e-05],\n [-1.6674e-06, 6.2940e-04, -1.1027e-04, ..., 2.1463e-04,\n -1.8162e-04, -5.7131e-06]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[1.0434e-06, 4.9703e-06, 2.2109e-06, ..., 1.3003e-06, 4.3543e-07,\n 1.6217e-06],\n [6.4484e-07, 6.5618e-06, 2.1528e-06, ..., 7.7323e-07, 3.1984e-07,\n 5.0449e-07],\n [8.7762e-07, 4.9084e-06, 3.5103e-06, ..., 6.7855e-07, 3.9311e-07,\n 7.7989e-07],\n ...,\n [8.8989e-07, 4.7306e-06, 3.1401e-06, ..., 7.2407e-07, 4.5088e-07,\n 7.3793e-07],\n [1.2095e-06, 1.0114e-05, 3.5884e-06, ..., 1.1245e-06, 6.5766e-07,\n 6.5272e-07],\n [9.9015e-07, 8.3827e-06, 2.9915e-06, ..., 6.2935e-07, 4.6426e-07,\n 5.4936e-07]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(8764.)",
|
| 12 |
+
"exp_avg": "tensor([-1.5193e-03, -4.4591e-03, 7.1053e-03, -6.5828e-03, -3.1795e-03,\n 4.6355e-03, 5.8450e-04, 7.0924e-05, -7.7313e-03, -4.4748e-03,\n -9.4507e-03, 1.1286e-02, -6.3030e-03, -5.7630e-03, 9.0949e-03,\n 1.2107e-03, -7.4865e-03, -1.4047e-04, 3.3507e-03, 2.2486e-03,\n 1.1722e-02, -4.2688e-03, -1.3190e-02, 5.4630e-03, 3.6816e-04,\n -5.4873e-03, 9.0681e-04, 5.2477e-03, 1.1481e-03, -4.2701e-04,\n -1.2856e-02, 3.7272e-03, 1.1509e-02, 6.5716e-03, 1.1090e-02,\n 4.7191e-03, -3.9578e-03, -1.3452e-03, -6.7048e-03, 7.2267e-03,\n 1.9737e-02, 3.7527e-03, -1.0963e-03, -2.9175e-03, 6.5644e-03,\n 1.3623e-02, 7.6836e-03, -8.2199e-03, 3.4627e-04, -1.6537e-03,\n 6.8059e-03, 1.0392e-03, -2.1649e-03, 3.4304e-03, -1.7083e-03,\n 2.1792e-03, 8.3987e-03, -6.1753e-03, -1.5668e-03, -1.4155e-02,\n -4.1352e-03, -1.0537e-02, 5.5218e-03, 1.2700e-04, 3.6228e-04,\n 3.2518e-03, 1.0522e-03, -1.3251e-02, 1.9515e-03, 9.2547e-03,\n -4.6613e-03, 1.3113e-02, 1.1692e-02, -1.1399e-02, 7.7365e-04,\n -1.0673e-02, 2.0035e-02, 2.4566e-02, 2.9099e-04, -1.2592e-02,\n 2.0582e-02, -6.1618e-03, -1.0858e-02, 1.3229e-03, -7.9781e-04,\n 1.5037e-02, -2.0953e-03, -1.9642e-03, 3.3614e-03, -6.0351e-03,\n -1.0299e-02, 1.3608e-02, 1.5063e-02, -1.2260e-03, 5.0113e-03,\n -7.5556e-03, -8.0941e-03, 8.1610e-03, -1.2587e-02, -1.4015e-02,\n 2.5340e-02, 6.8679e-03, -3.8330e-03, -8.0946e-03, -4.8008e-03,\n 3.1473e-03, -1.2550e-02, 1.9463e-02, 2.2923e-03, 3.8075e-04,\n 1.0306e-02, 1.1304e-03, -2.3728e-03, -3.0506e-03, 5.1638e-04,\n -5.7666e-03, 2.0158e-04, 8.7081e-03, -3.6905e-03, -6.2057e-03,\n -1.4517e-03, 5.2392e-03, -2.3146e-02, 2.6284e-03, 6.9156e-03,\n -2.9771e-03, 1.7933e-02, 2.2547e-03, -8.2733e-03, 5.2844e-03,\n -7.7778e-03, -1.4290e-02, 7.0793e-03, 2.4083e-03, 1.6808e-02,\n 7.6884e-03, 1.0729e-02, -1.2158e-02, -2.6400e-03, -3.9794e-03,\n -3.2571e-03, -1.2840e-02, -2.2967e-02, -9.6626e-03, -1.2349e-02,\n -2.5929e-03, -4.4662e-03, 3.5320e-03, 1.5417e-02, 8.8127e-03,\n 7.6788e-04, -8.5919e-03, 2.1329e-03, 1.4201e-03, -5.8470e-04,\n 1.2027e-02, -1.7659e-02, 1.0692e-02, -5.7707e-03, 1.2914e-03,\n -4.5082e-04, 2.4606e-02, -6.3079e-03, -2.2301e-02, 4.5191e-03,\n 1.4735e-03, 7.5789e-03, 1.1614e-03, 1.3107e-02, -7.9116e-03,\n -1.4107e-04, 3.3528e-03, 5.6117e-03, 5.5719e-04, -1.7506e-03,\n 3.8071e-03, -3.3199e-03, 1.2007e-02, -3.3376e-03, 6.7455e-03,\n -5.7910e-03, -6.0498e-03, -7.1684e-03, -5.0866e-03, 4.8411e-03,\n -6.4850e-03, 1.0685e-02, -1.1505e-02, 1.3439e-03, 1.0298e-02,\n 5.0233e-03, -7.5074e-03, -1.8875e-03, -4.0996e-03, -5.0834e-03,\n 6.9429e-03, 5.6452e-03, 3.3496e-03, 2.8135e-02, -3.3520e-03,\n 3.6825e-04, 7.3070e-03, 8.3576e-03, -4.1319e-03, 1.8519e-03,\n 5.8204e-03, 3.9812e-03, 4.4824e-03, 2.6306e-03, 1.0259e-02,\n -4.0132e-03, 2.3676e-03, -2.3741e-03, -4.1143e-06, -1.1473e-02,\n -8.6575e-03, -1.3567e-03, -1.0646e-03, 4.0878e-03, 1.1614e-02,\n 3.4938e-03, 1.1175e-02, 4.2708e-03, 5.5917e-03, -6.7394e-03,\n 1.3507e-02, 2.5423e-02, 2.9743e-02, -8.9392e-03, 8.2192e-03,\n 8.3246e-03, 2.6304e-03, -2.9286e-03, -8.7903e-03, -6.9766e-03,\n 2.6887e-03, -8.3364e-03, 1.8711e-02, -9.8856e-03, 6.4722e-05,\n -1.1160e-02, -3.2238e-03, -2.0984e-02, 1.0948e-02, -1.1262e-02,\n 1.9323e-03, 1.5227e-03, 4.7681e-03, 1.2862e-02, -4.4097e-03,\n 5.9266e-03, -2.1287e-02, 5.8614e-03, -8.7561e-03, -1.2339e-02,\n -1.1507e-02, 1.1912e-03, -6.0299e-03, -3.2144e-02, 1.9664e-02,\n -1.7309e-04, 2.4762e-03, -4.1717e-03, -8.1374e-03, 1.7508e-02,\n -2.1520e-03, -3.8674e-03, -3.5054e-02, -9.3763e-03, 2.3947e-02,\n 1.2928e-03, -5.9221e-03, -3.0207e-03, -4.2103e-03, 3.5822e-03,\n 2.6462e-04, -2.3931e-03, -3.6430e-04, -4.9404e-03, 7.7238e-03,\n 3.7100e-03, 7.7939e-03, 1.4082e-04, 7.0826e-03, 3.6410e-02,\n -1.2234e-02, -9.3075e-03, 8.0630e-03, -1.2915e-04, 6.3341e-03,\n -1.6490e-02, -1.4655e-02, -4.1107e-04, 2.9939e-03, -2.6546e-03,\n 3.7630e-03, -1.2850e-02, -1.6641e-03, 4.4756e-03, -4.5296e-04,\n -5.0033e-03, -5.9977e-03, 7.0746e-03, -1.5542e-02, 8.7259e-03,\n 1.2717e-02, -3.1350e-02, -3.4018e-04, 1.0977e-02, 1.2337e-02,\n 2.4757e-02, -4.1437e-03, 5.9285e-03, -8.9077e-03, -7.5642e-03,\n 1.1634e-02, -3.4839e-03, 1.7525e-03, 1.2623e-02, -1.2888e-02,\n 5.9608e-03, -1.8689e-03, 2.4732e-02, -1.1188e-02, 1.6276e-02,\n 8.2121e-03, -5.5520e-03, 1.2886e-02, 2.1881e-02, 7.8955e-03,\n -1.6707e-02, 2.1577e-03, -6.1353e-03, 1.5193e-02, 7.7684e-03,\n 4.3973e-03, -1.8253e-03, -1.3366e-02, -6.4051e-03, -3.1358e-02,\n -8.7865e-03, 1.9130e-02, 3.0335e-03, -1.1211e-02, 1.0383e-02,\n -3.6268e-03, -7.0729e-03, 2.1776e-03, 3.8148e-03, -3.6153e-03,\n -1.6197e-02, -1.2438e-03, -3.2768e-03, -6.2864e-03, 7.1431e-03,\n 7.6637e-03, -1.1132e-03, -2.7226e-03, 1.2004e-02, -6.1475e-03,\n -1.3678e-02, 1.5430e-02, 1.8887e-03, 5.2666e-03, -1.2596e-02,\n 1.3986e-02, 5.1649e-03, 9.9844e-03, -1.1590e-02, -7.0522e-03,\n -3.0687e-02, 1.4892e-02, 6.9179e-03, 1.6579e-03, -4.0342e-03,\n -9.5131e-03, 2.2730e-02, 8.2733e-03, -1.5140e-02, 2.3753e-04,\n 8.0571e-04, 6.3958e-04, -6.3597e-03, 3.1334e-03, 3.2248e-03,\n -4.9136e-03, -8.2982e-04, 1.9776e-02, -2.7588e-03, 4.4332e-03,\n 6.6687e-03, 1.5351e-02, 3.0260e-02, -8.2091e-04, -1.5270e-02,\n -1.7301e-02, 4.5926e-03, 2.3197e-03, 1.5636e-02, 7.4228e-03,\n -1.1380e-02, -7.6872e-03, -2.3008e-04, -1.2367e-04, 1.0724e-03,\n -4.6085e-03, 4.0928e-04, -1.1420e-02, 1.7877e-04, 7.4910e-03,\n 1.6031e-03, 5.3519e-03, -5.1482e-03, 1.1239e-02, -1.1771e-02,\n -6.8431e-03, -4.3363e-05, -1.0336e-02, 7.4061e-03, -5.4389e-03,\n 2.2793e-03, -3.5463e-03, -3.6992e-03, 1.3517e-02, -1.3512e-02,\n -2.5810e-03, 4.4267e-03, -3.9901e-02, -2.4318e-03, -1.7773e-02,\n 4.5388e-03, 5.2524e-03, -1.4869e-02, 1.2160e-02, -5.8866e-03,\n 9.0534e-03, -4.6406e-03, -1.9224e-03, 1.8167e-02, 6.9995e-03,\n 3.1519e-03, -1.2705e-02, 7.1267e-04, -9.9666e-03, 4.0190e-03,\n -4.2715e-03, 1.1146e-02, 7.0163e-04, 1.0579e-02, 8.2561e-04,\n -7.2275e-03, 7.0864e-05, 5.9294e-03, -1.0689e-03, 1.0834e-02,\n 2.8868e-03, 5.4802e-04, -2.7470e-03, -2.6632e-03, -5.0826e-03,\n -2.0014e-02, 3.4024e-04, 6.4873e-03, -1.3074e-05, -3.0416e-02,\n -4.4887e-03, -6.9707e-03, 3.1749e-03, -3.4078e-03, -1.2904e-02,\n 7.8316e-03, 1.5418e-03, 4.8794e-03, 8.8956e-03, 1.6863e-02,\n 3.2222e-04, 7.7671e-03, 1.3156e-02, 4.0770e-03, -2.3279e-03,\n -6.4535e-03, 2.7175e-03, 8.3535e-04, 4.0872e-04, 4.2897e-03,\n 6.9215e-03, -1.0622e-02, -3.4937e-03, -5.3507e-03, -1.7231e-03,\n -5.1878e-03, -1.3843e-02, -5.4689e-03, 3.1539e-03, 3.4423e-03,\n -1.0206e-02, -5.5291e-03, 1.4681e-03, -1.3991e-02, -4.4572e-03,\n -5.2325e-03, 2.4938e-03, 4.4260e-03, 1.6987e-03, -1.4847e-02,\n -6.5368e-03, 3.3508e-03, -6.3646e-03, -4.7714e-03, -1.0965e-03,\n -1.1597e-02, -1.4543e-03], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([0.0009, 0.0007, 0.0007, 0.0010, 0.0007, 0.0014, 0.0007, 0.0008, 0.0009,\n 0.0012, 0.0009, 0.0009, 0.0009, 0.0008, 0.0007, 0.0008, 0.0008, 0.0011,\n 0.0009, 0.0009, 0.0007, 0.0006, 0.0009, 0.0008, 0.0006, 0.0006, 0.0011,\n 0.0008, 0.0008, 0.0008, 0.0008, 0.0009, 0.0006, 0.0007, 0.0009, 0.0009,\n 0.0006, 0.0007, 0.0011, 0.0009, 0.0007, 0.0008, 0.0007, 0.0009, 0.0007,\n 0.0008, 0.0009, 0.0010, 0.0008, 0.0007, 0.0008, 0.0007, 0.0007, 0.0007,\n 0.0010, 0.0011, 0.0009, 0.0008, 0.0006, 0.0008, 0.0007, 0.0008, 0.0008,\n 0.0008, 0.0008, 0.0008, 0.0008, 0.0007, 0.0010, 0.0009, 0.0007, 0.0007,\n 0.0006, 0.0006, 0.0008, 0.0008, 0.0009, 0.0009, 0.0007, 0.0007, 0.0009,\n 0.0008, 0.0007, 0.0007, 0.0009, 0.0011, 0.0006, 0.0007, 0.0010, 0.0007,\n 0.0008, 0.0007, 0.0008, 0.0008, 0.0009, 0.0007, 0.0006, 0.0009, 0.0009,\n 0.0008, 0.0011, 0.0008, 0.0012, 0.0011, 0.0005, 0.0007, 0.0010, 0.0009,\n 0.0013, 0.0008, 0.0007, 0.0009, 0.0011, 0.0006, 0.0011, 0.0009, 0.0009,\n 0.0004, 0.0008, 0.0009, 0.0009, 0.0010, 0.0011, 0.0007, 0.0009, 0.0007,\n 0.0010, 0.0006, 0.0007, 0.0007, 0.0008, 0.0008, 0.0009, 0.0007, 0.0009,\n 0.0011, 0.0007, 0.0009, 0.0009, 0.0008, 0.0007, 0.0008, 0.0007, 0.0010,\n 0.0007, 0.0005, 0.0011, 0.0007, 0.0010, 0.0008, 0.0007, 0.0007, 0.0010,\n 0.0008, 0.0006, 0.0008, 0.0007, 0.0010, 0.0009, 0.0009, 0.0008, 0.0009,\n 0.0008, 0.0011, 0.0008, 0.0007, 0.0007, 0.0009, 0.0010, 0.0009, 0.0009,\n 0.0009, 0.0010, 0.0007, 0.0011, 0.0008, 0.0009, 0.0008, 0.0007, 0.0007,\n 0.0006, 0.0007, 0.0008, 0.0006, 0.0008, 0.0011, 0.0007, 0.0009, 0.0008,\n 0.0006, 0.0007, 0.0010, 0.0010, 0.0008, 0.0009, 0.0008, 0.0009, 0.0007,\n 0.0008, 0.0006, 0.0010, 0.0010, 0.0007, 0.0006, 0.0007, 0.0008, 0.0006,\n 0.0006, 0.0009, 0.0008, 0.0007, 0.0008, 0.0005, 0.0006, 0.0007, 0.0007,\n 0.0006, 0.0007, 0.0007, 0.0009, 0.0009, 0.0009, 0.0011, 0.0007, 0.0009,\n 0.0007, 0.0008, 0.0010, 0.0011, 0.0008, 0.0006, 0.0007, 0.0009, 0.0006,\n 0.0010, 0.0009, 0.0014, 0.0009, 0.0008, 0.0009, 0.0009, 0.0008, 0.0008,\n 0.0006, 0.0009, 0.0008, 0.0009, 0.0009, 0.0008, 0.0008, 0.0007, 0.0007,\n 0.0010, 0.0007, 0.0010, 0.0009, 0.0006, 0.0005, 0.0010, 0.0010, 0.0006,\n 0.0008, 0.0008, 0.0006, 0.0006, 0.0007, 0.0006, 0.0012, 0.0010, 0.0011,\n 0.0007, 0.0006, 0.0009, 0.0007, 0.0009, 0.0007, 0.0012, 0.0005, 0.0006,\n 0.0005, 0.0008, 0.0008, 0.0010, 0.0007, 0.0009, 0.0009, 0.0009, 0.0008,\n 0.0010, 0.0010, 0.0008, 0.0008, 0.0008, 0.0007, 0.0007, 0.0009, 0.0005,\n 0.0009, 0.0007, 0.0008, 0.0012, 0.0007, 0.0009, 0.0006, 0.0010, 0.0008,\n 0.0010, 0.0008, 0.0003, 0.0009, 0.0009, 0.0008, 0.0007, 0.0009, 0.0007,\n 0.0007, 0.0009, 0.0006, 0.0007, 0.0010, 0.0008, 0.0007, 0.0010, 0.0005,\n 0.0010, 0.0008, 0.0008, 0.0007, 0.0008, 0.0007, 0.0008, 0.0009, 0.0010,\n 0.0009, 0.0009, 0.0010, 0.0012, 0.0010, 0.0009, 0.0011, 0.0009, 0.0008,\n 0.0010, 0.0006, 0.0008, 0.0008, 0.0009, 0.0008, 0.0007, 0.0013, 0.0010,\n 0.0007, 0.0009, 0.0007, 0.0006, 0.0011, 0.0006, 0.0007, 0.0008, 0.0006,\n 0.0008, 0.0009, 0.0007, 0.0008, 0.0008, 0.0011, 0.0012, 0.0006, 0.0006,\n 0.0007, 0.0011, 0.0008, 0.0011, 0.0007, 0.0007, 0.0006, 0.0010, 0.0007,\n 0.0010, 0.0010, 0.0009, 0.0006, 0.0008, 0.0009, 0.0009, 0.0008, 0.0006,\n 0.0011, 0.0007, 0.0007, 0.0009, 0.0008, 0.0007, 0.0010, 0.0009, 0.0010,\n 0.0008, 0.0006, 0.0011, 0.0010, 0.0010, 0.0008, 0.0011, 0.0006, 0.0009,\n 0.0006, 0.0009, 0.0008, 0.0007, 0.0006, 0.0006, 0.0007, 0.0007, 0.0012,\n 0.0008, 0.0009, 0.0010, 0.0008, 0.0006, 0.0010, 0.0006, 0.0008, 0.0009,\n 0.0009, 0.0006, 0.0007, 0.0008, 0.0010, 0.0008, 0.0009, 0.0008, 0.0006,\n 0.0010, 0.0005, 0.0005, 0.0009, 0.0008, 0.0008, 0.0008, 0.0006, 0.0012,\n 0.0012, 0.0012, 0.0007, 0.0007, 0.0008, 0.0008, 0.0006, 0.0011, 0.0005,\n 0.0008, 0.0009, 0.0006, 0.0007, 0.0009, 0.0007, 0.0007, 0.0008, 0.0007,\n 0.0007, 0.0006, 0.0006, 0.0009, 0.0007, 0.0011, 0.0007, 0.0006, 0.0009,\n 0.0008, 0.0010, 0.0009, 0.0008, 0.0010, 0.0009, 0.0007, 0.0005, 0.0006,\n 0.0010, 0.0004, 0.0007, 0.0006, 0.0008, 0.0009, 0.0009, 0.0010, 0.0010,\n 0.0007, 0.0007, 0.0007, 0.0008, 0.0008, 0.0009, 0.0008, 0.0006, 0.0009,\n 0.0009, 0.0009, 0.0006, 0.0007, 0.0010, 0.0010, 0.0009, 0.0008, 0.0008,\n 0.0009, 0.0007, 0.0010, 0.0009, 0.0007, 0.0007, 0.0010, 0.0007],\n device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(8764.)",
|
| 17 |
+
"exp_avg": "tensor([[ 1.1613e-04, -2.4537e-04, 6.5847e-05, ..., -5.1372e-05,\n 8.5061e-04, 1.5832e-04],\n [ 2.8584e-04, 2.2454e-06, -6.1328e-05, ..., -1.8359e-04,\n 1.7699e-04, -2.6203e-05],\n [-2.3968e-04, 3.0246e-04, 1.9443e-05, ..., -2.8390e-05,\n -4.6356e-04, -2.4774e-04],\n ...,\n [ 1.8845e-04, 3.1283e-04, -1.3213e-04, ..., -2.1226e-04,\n -7.1429e-04, 2.4199e-04],\n [-3.2353e-04, -4.0743e-05, 6.4612e-05, ..., -4.5458e-05,\n -5.2141e-04, 4.7491e-05],\n [-2.4117e-04, -2.0880e-04, -9.6746e-05, ..., -1.0472e-04,\n -1.1187e-04, -2.1694e-05]], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([[3.8919e-07, 5.2336e-07, 2.3781e-07, ..., 5.8101e-07, 1.3414e-06,\n 7.5221e-07],\n [3.2471e-07, 5.0528e-07, 3.7763e-07, ..., 6.0895e-07, 1.2327e-06,\n 5.4268e-07],\n [3.9090e-07, 4.5282e-07, 3.5805e-07, ..., 5.3190e-07, 1.2924e-06,\n 5.9403e-07],\n ...,\n [3.6554e-07, 4.6305e-07, 2.7245e-07, ..., 5.6196e-07, 1.3753e-06,\n 5.1980e-07],\n [5.6290e-07, 5.2327e-07, 3.1121e-07, ..., 7.2062e-07, 1.3256e-06,\n 9.3174e-07],\n [4.5768e-07, 5.8335e-07, 3.1705e-07, ..., 5.9369e-07, 1.4820e-06,\n 5.3072e-07]], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(8764.)",
|
| 22 |
+
"exp_avg": "tensor([[ 2.1243e-04, -1.9024e-04, 6.7456e-04, ..., 3.7971e-04,\n -8.4958e-06, 4.3485e-04],\n [-1.8222e-05, -3.0634e-04, -2.0430e-04, ..., 9.7293e-05,\n -1.5021e-04, 2.2676e-04],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [-1.5431e-05, 1.6406e-04, 6.4511e-05, ..., -2.4012e-05,\n 7.7112e-06, 1.9414e-04],\n [-6.9010e-05, -1.3142e-04, 1.4816e-04, ..., -8.6680e-05,\n -1.1996e-04, -5.7838e-05],\n [-2.8656e-05, 2.7663e-05, 6.0927e-05, ..., -1.4824e-05,\n -8.2945e-05, -3.6555e-05]], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([[6.7763e-07, 1.9219e-07, 6.4740e-07, ..., 2.7960e-06, 2.8169e-07,\n 1.5628e-06],\n [4.0925e-07, 2.8420e-07, 5.4086e-07, ..., 2.4034e-07, 5.9353e-07,\n 1.5713e-07],\n [1.3379e-13, 3.1572e-13, 5.9184e-14, ..., 3.6936e-14, 3.8954e-13,\n 5.4175e-15],\n ...,\n [4.2371e-07, 1.8482e-07, 9.9044e-08, ..., 1.8461e-07, 5.2166e-08,\n 4.3371e-07],\n [3.7823e-07, 2.2407e-07, 2.1899e-07, ..., 2.6054e-07, 1.5732e-06,\n 1.2749e-07],\n [4.9982e-07, 1.1280e-07, 1.2024e-06, ..., 1.4525e-07, 2.2616e-07,\n 3.9323e-07]], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(8764.)",
|
| 27 |
+
"exp_avg": "tensor([ 6.4035e-03, -3.5768e-04, 5.6052e-45, ..., 5.5611e-03,\n -2.4265e-03, 2.4726e-03], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([3.2276e-04, 2.2413e-04, 8.7553e-10, ..., 2.1658e-04, 1.8092e-04,\n 2.3484e-04], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(8764.)",
|
| 32 |
+
"exp_avg": "tensor([[-4.0797e-05, -1.3492e-04, 5.6052e-45, ..., -1.2031e-04,\n 9.9185e-06, 6.8955e-05],\n [ 2.0200e-05, -1.1098e-04, 5.6052e-45, ..., 1.6978e-05,\n 2.4169e-05, 7.4597e-05],\n [ 7.9566e-05, -2.9168e-04, -5.6052e-45, ..., 7.9041e-05,\n 4.4458e-05, 2.6446e-05],\n ...,\n [-6.8248e-05, -3.9297e-04, -5.6052e-45, ..., 1.2512e-04,\n 6.7625e-05, -3.2781e-05],\n [-6.9210e-07, -1.8447e-04, 5.6052e-45, ..., -5.6553e-05,\n 2.7026e-05, -8.2407e-05],\n [ 3.6743e-05, 8.6505e-05, -5.6052e-45, ..., -1.2276e-04,\n 4.3893e-05, -1.9657e-05]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[7.4205e-08, 6.8368e-08, 7.6466e-14, ..., 5.8813e-08, 6.9377e-08,\n 9.9229e-08],\n [1.0207e-07, 9.0560e-08, 1.4183e-13, ..., 5.7872e-08, 8.2100e-08,\n 1.0272e-07],\n [8.7513e-08, 9.3190e-08, 2.4697e-13, ..., 6.9361e-08, 7.8890e-08,\n 1.1725e-07],\n ...,\n [1.1799e-07, 9.9062e-08, 1.0931e-13, ..., 6.5212e-08, 7.3212e-08,\n 1.0301e-07],\n [1.0439e-07, 1.0298e-07, 1.4754e-13, ..., 8.0210e-08, 5.1861e-08,\n 1.3201e-07],\n [8.7664e-08, 8.1901e-08, 3.1117e-13, ..., 7.7709e-08, 6.0490e-08,\n 1.4304e-07]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(8764.)",
|
| 37 |
+
"exp_avg": "tensor([[-4.9396e-06, 1.2893e-04, 5.6158e-04, ..., 9.2359e-05,\n 2.5381e-04, 7.7004e-05],\n [-7.2475e-05, -2.0379e-05, 4.2341e-04, ..., -1.6983e-04,\n -3.6217e-04, -1.3232e-04],\n [ 6.9453e-04, 4.5013e-04, 8.7206e-06, ..., 5.0492e-04,\n 2.7372e-04, 2.0829e-04],\n ...,\n [ 4.1332e-05, 6.9902e-04, 1.3068e-04, ..., 1.0929e-04,\n 8.8836e-05, -1.7106e-04],\n [ 3.4280e-04, 5.0931e-04, 7.9912e-04, ..., 2.3957e-04,\n 1.1239e-05, 5.7839e-04],\n [-3.7124e-04, 3.9552e-04, -2.9053e-04, ..., 5.4298e-04,\n 5.4961e-04, -8.9139e-05]], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([[5.0080e-07, 1.0236e-06, 1.0071e-06, ..., 2.7401e-07, 1.9198e-07,\n 3.1141e-07],\n [4.1562e-07, 6.9354e-07, 1.0579e-06, ..., 2.0561e-07, 1.3698e-07,\n 2.7794e-07],\n [6.2417e-07, 9.2182e-07, 1.0921e-06, ..., 2.7651e-07, 2.0695e-07,\n 2.6418e-07],\n ...,\n [6.5080e-07, 1.1636e-06, 1.3363e-06, ..., 3.5346e-07, 2.4087e-07,\n 3.2455e-07],\n [5.6985e-07, 9.5288e-07, 8.9030e-07, ..., 2.4000e-07, 1.6020e-07,\n 2.2636e-07],\n [5.6878e-07, 1.2874e-06, 1.3161e-06, ..., 3.3131e-07, 2.5914e-07,\n 2.8825e-07]], device='cuda:0')"
|
| 39 |
+
},
|
| 40 |
+
"7": {
|
| 41 |
+
"step": "tensor(8764.)",
|
| 42 |
+
"exp_avg": "tensor([-3.6169e-03, -4.8657e-03, -6.8010e-03, 8.9287e-03, -2.9836e-04,\n -7.7100e-03, 3.2346e-03, 4.8105e-03, 3.1107e-03, 4.7610e-03,\n 2.8612e-03, -1.9238e-03, -4.3002e-03, -3.4577e-03, -1.2674e-03,\n -2.3410e-04, -1.0363e-03, -7.8978e-03, -5.5637e-03, 2.8738e-03,\n 1.4627e-05, 2.0726e-03, 6.2676e-03, -8.9720e-03, -6.7240e-03,\n -4.5903e-03, -1.6831e-02, 3.2987e-03, -6.0711e-03, -6.3619e-04,\n 2.8244e-03, 9.3018e-03, -4.0292e-03, 1.2995e-03, 1.3610e-02,\n -6.8335e-03, 1.0007e-02, 4.8697e-03, -5.2443e-03, 4.8201e-03,\n -1.1612e-03, 4.8178e-03, 5.1909e-04, -5.1202e-04, -1.0861e-02,\n -2.3574e-03, 4.2757e-03, 6.9999e-04, 7.7903e-03, 7.9367e-03,\n -6.8035e-03, 1.2079e-03, 1.3934e-02, -2.0513e-03, -1.3848e-02,\n -1.4266e-03, -4.0156e-03, 1.6037e-04, 4.3782e-03, -1.4474e-02,\n 3.2594e-03, 4.5863e-03, -6.0424e-03, -9.2385e-03, -1.3074e-03,\n -3.0470e-03, 3.3141e-03, -4.2466e-03, -5.6255e-03, 2.3786e-03,\n -2.6322e-03, -3.1634e-03, -1.9006e-03, -5.3196e-04, 1.3974e-03,\n -2.7020e-03, 1.8637e-03, 1.4088e-02, 7.1868e-03, -3.0983e-03,\n 2.1949e-03, -2.3239e-03, -9.8445e-03, 4.3183e-03, -7.0603e-04,\n -1.0057e-02, -7.7664e-03, -4.2562e-03, -6.4112e-03, -3.5292e-03,\n 7.9422e-03, -6.6620e-04, 1.8875e-03, -7.5260e-04, 3.3116e-03,\n 6.6485e-03, -4.4150e-03, -1.5126e-03, 1.1456e-03, 2.9424e-03,\n -2.5046e-04, -1.3273e-03, -3.5620e-03, -5.4947e-03, 4.9064e-04,\n -2.3563e-04, 4.3141e-03, 6.2634e-04, -4.4112e-03, 1.4625e-03,\n -3.1309e-03, -5.1615e-04, 6.4858e-04, -2.9651e-03, 1.5983e-02,\n -7.8143e-03, -2.0761e-04, -1.4799e-03, 8.3736e-03, -3.7615e-03,\n 4.9296e-03, -7.4464e-04, 7.3958e-04, -3.3522e-03, 4.8029e-04,\n 2.9141e-02, 1.8755e-03, -1.6147e-02, -2.6691e-03, -3.0548e-03,\n -2.6197e-03, -4.4040e-03, 1.3227e-03, -2.0173e-03, 9.6158e-03,\n 8.8358e-03, 5.4502e-03, -3.4575e-04, 1.0392e-02, -2.1125e-03,\n 4.3944e-04, 1.2271e-03, 5.4829e-04, -1.3773e-03, -2.1104e-03,\n -1.4124e-03, 2.2020e-03, -1.5087e-02, 1.7091e-03, 1.4836e-03,\n -3.3952e-03, -7.2040e-04, 5.3200e-03, 4.4588e-03, 7.5495e-03,\n -2.3965e-03, -6.4682e-03, 3.9093e-03, -2.8337e-03, -2.9891e-03,\n 8.0557e-03, -3.8628e-03, -6.9979e-03, -7.1474e-03, 5.3005e-03,\n -4.0964e-03, -4.2510e-03, -2.2188e-03, 1.6647e-03, -1.3966e-03,\n -5.4685e-03, 7.6545e-04, -6.2205e-03, -4.3114e-03, 1.2977e-03,\n 6.2161e-04, -7.7067e-03, -7.1533e-03, 4.1679e-03, 3.6805e-03,\n 1.6836e-03, -6.7063e-03, -7.2884e-03, -1.2155e-03, 3.7579e-03,\n 8.9800e-04, -6.3052e-03, 3.3678e-03, 9.9090e-03, -1.2498e-03,\n -6.7303e-03, 1.7907e-03, 2.2952e-04, -1.9774e-03, -3.5696e-03,\n 1.0398e-03, 9.5392e-04, -3.6085e-03, -1.9518e-03, -3.4406e-03,\n -9.5471e-03, 3.4710e-04, 7.3925e-03, -2.9166e-03, 3.2982e-03,\n -1.0443e-02, -4.2566e-03, -8.0340e-03, 5.7330e-03, -2.8856e-03,\n -4.0690e-04, 7.0474e-03, 2.0080e-03, -3.9298e-04, 8.2539e-03,\n 1.0926e-03, -7.6345e-03, -2.7583e-03, -8.8053e-04, -2.4564e-03,\n 9.3885e-04, 1.7426e-03, 6.6971e-03, 8.0757e-03, -7.8295e-03,\n -9.3001e-03, -7.0172e-04, 5.4364e-03, -4.3131e-03, -1.3769e-04,\n 3.0989e-03, -9.4253e-04, 3.3122e-03, 1.3599e-03, 6.0608e-03,\n 1.1484e-03, 1.3839e-03, -5.0162e-03, -5.8814e-03, -6.4699e-03,\n 8.8205e-03, -2.6768e-03, 2.9391e-03, -5.9224e-03, 7.8916e-04,\n 4.0796e-03, -5.5564e-03, -1.2838e-02, 9.7147e-03, -6.2789e-03,\n -5.2357e-03, 7.1774e-03, -3.2168e-03, -3.3477e-04, -5.5242e-05,\n 1.3688e-02, 1.8059e-03, -8.8574e-03, 1.0890e-03, -1.7337e-04,\n -9.0451e-04, 1.0206e-03, -5.1610e-03, -2.7072e-03, 3.1546e-03,\n 4.8214e-04, -1.9352e-03, -2.1704e-03, -5.4355e-03, -1.2858e-02,\n 1.3667e-02, 7.7451e-03, 4.4789e-03, -1.2310e-04, -9.3319e-03,\n 8.3192e-03, 7.2942e-04, -8.6750e-04, -2.9670e-03, 5.7800e-03,\n -1.3905e-03, 1.1472e-02, -9.4803e-03, 7.6153e-03, -5.1308e-04,\n -3.3322e-03, -1.1812e-03, 2.6847e-03, -1.8388e-03, -2.6632e-03,\n -9.4981e-04, -7.5891e-03, 1.2502e-03, -1.1403e-02, -4.5009e-03,\n -8.3840e-03, 1.3528e-03, -1.6472e-03, 1.2848e-03, 1.5910e-03,\n -3.2532e-03, 3.2937e-03, 2.3802e-03, -3.6302e-03, -2.8316e-04,\n -3.3149e-03, -1.6660e-03, 7.2093e-03, 3.7370e-03, -1.3929e-03,\n 7.6871e-03, -2.0655e-03, 7.3211e-04, -1.2141e-03, -6.1319e-03,\n -7.3432e-03, -1.2227e-03, 8.7875e-03, -3.5843e-03, -7.6901e-03,\n 4.1222e-03, 9.9243e-03, -4.2520e-04, -8.4195e-03, 7.9408e-04,\n -7.0948e-03, 1.4602e-03, 1.9703e-03, -4.9346e-03, -2.4549e-03,\n -4.4740e-03, -3.6902e-03, 8.0236e-03, -2.6790e-03, -9.2212e-03,\n 2.2875e-03, -1.9877e-03, -8.5885e-04, -1.9482e-03, -6.0354e-03,\n 7.2300e-03, 5.6336e-04, -1.1082e-02, 6.8084e-03, -2.4010e-03,\n -8.4700e-04, -4.8792e-03, -1.0681e-02, 1.4240e-03, 5.5511e-03,\n -3.7188e-04, -1.9121e-02, 4.6628e-03, -1.0541e-03, 1.4397e-02,\n 8.1240e-03, -2.9288e-03, 6.5062e-05, -2.2849e-03, 9.1071e-03,\n 3.3704e-03, 6.2551e-03, -1.6555e-03, -8.0321e-03, 1.5342e-03,\n -3.3447e-04, -9.6187e-04, -3.3411e-04, -9.4144e-03, 8.5934e-03,\n 4.6768e-03, 5.8247e-03, 9.9069e-04, -3.1541e-03, 3.3872e-03,\n 6.1268e-03, -4.6873e-04, 1.0193e-02, 6.2689e-03, -2.1511e-03,\n -2.4363e-03, 2.8463e-03, -4.9339e-03, 3.0618e-03, -4.0503e-03,\n -9.5927e-04, -4.9835e-03, 1.0441e-03, -9.2915e-03, -8.5660e-03,\n -3.4690e-03, -3.0111e-03, -4.2556e-03, -1.2516e-03, -9.2222e-04,\n 1.7134e-02, -1.4924e-03, -6.5020e-03, -1.5797e-03, -4.0968e-03,\n -1.0066e-02, 3.8404e-03, -6.7702e-03, 1.5265e-03, 3.1642e-03,\n -3.2324e-03, -1.0213e-02, -1.4466e-04, 2.5972e-04, -1.4072e-03,\n -6.1759e-03, -2.0851e-03, -1.1461e-02, -2.7618e-03, 3.2063e-03,\n -5.4406e-03, 4.3896e-03, -1.3960e-02, -3.7649e-03, 2.0915e-03,\n -6.6898e-03, -6.1371e-03, -2.3217e-03, -3.8191e-03, 6.3428e-03,\n -2.8140e-03, -5.1035e-03, 1.3257e-03, -5.0419e-04, -6.9766e-04,\n -4.2144e-03, -6.1276e-03, 2.8205e-03, 8.6099e-03, -1.3538e-03,\n 1.0372e-02, 5.1606e-03, 2.1373e-03, 4.6463e-03, -2.7763e-03,\n 3.8651e-03, 8.5966e-03, 5.1834e-03, 7.8660e-04, -8.1417e-04,\n -1.6991e-03, 4.1859e-03, 2.7748e-03, -1.5962e-03, -3.1145e-03,\n -4.7534e-04, -5.1666e-03, -5.7462e-03, -4.1331e-03, 2.0315e-03,\n 3.2323e-03, -6.4772e-03, -7.7644e-03, -3.0035e-03, 2.5840e-03,\n 1.6970e-03, 2.5019e-05, 5.3808e-03, -8.3671e-03, 1.2406e-02,\n -6.4069e-04, 9.3505e-04, 8.3545e-04, 5.8971e-03, 6.8266e-03,\n -4.1802e-03, -6.4611e-03, 1.5790e-02, 3.6278e-03, 4.6195e-03,\n -8.3487e-03, 8.8064e-04, 5.3509e-03, 1.0559e-03, 6.7750e-03,\n -8.0695e-03, 4.1291e-03, 1.1197e-03, -6.3619e-03, -3.4696e-03,\n 9.0914e-03, -1.5177e-02, -1.1240e-02, -1.7655e-02, 3.7865e-03,\n -5.2211e-03, 2.5515e-03, -1.4598e-03, -7.2285e-03, -2.5499e-03,\n -8.9046e-03, 3.0017e-03, 2.4146e-03, 2.7308e-03, 3.7130e-03,\n -4.8965e-03, 2.2864e-03, 1.4274e-02, -7.8558e-03, 1.1431e-03,\n -7.2555e-03, 2.4877e-03, 1.4774e-04, -3.7766e-03, 6.2107e-04,\n 1.5609e-02, -9.1930e-03], device='cuda:0')",
|
| 43 |
+
"exp_avg_sq": "tensor([0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0002, 0.0003,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0004, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0004,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0004,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0004, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0003, 0.0001, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0004, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0004,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0001, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0004, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0004, 0.0004, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0004, 0.0003, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0004, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0003, 0.0002, 0.0004, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0004, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0001, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0004, 0.0004, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0001, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003],\n device='cuda:0')"
|
| 44 |
+
},
|
| 45 |
+
"8": {
|
| 46 |
+
"step": "tensor(8764.)",
|
| 47 |
+
"exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-4.4351e-05, -1.7115e-05, -9.9487e-06, ..., 6.9125e-06,\n -1.3103e-05, -8.8260e-05],\n [ 1.2963e-04, 7.1316e-05, 2.0631e-05, ..., -2.2838e-05,\n 4.2538e-04, 9.3337e-05],\n ...,\n [-4.4010e-04, 4.1626e-06, -7.4140e-05, ..., -2.7364e-05,\n 1.3134e-05, -8.5909e-05],\n [ 7.1439e-05, 1.0383e-04, 3.0997e-06, ..., 5.1062e-05,\n 1.1519e-04, 4.3364e-05],\n [ 2.2915e-04, -2.4079e-06, 3.0039e-05, ..., 7.3116e-06,\n -9.2827e-05, 2.4608e-04]], device='cuda:0')",
|
| 48 |
+
"exp_avg_sq": "tensor([[1.6744e-15, 5.2364e-14, 3.0044e-15, ..., 2.2372e-13, 2.2416e-15,\n 7.9169e-16],\n [1.4807e-07, 2.3019e-07, 1.8880e-07, ..., 5.6286e-08, 4.8355e-08,\n 1.1723e-07],\n [6.2630e-08, 1.8307e-07, 2.1456e-07, ..., 7.5975e-08, 1.2153e-06,\n 2.4317e-07],\n ...,\n [5.6802e-07, 3.5121e-08, 1.6296e-07, ..., 2.2925e-07, 5.4979e-08,\n 5.1232e-07],\n [7.6711e-08, 1.0464e-07, 1.6586e-08, ..., 2.2711e-07, 1.2974e-06,\n 3.5608e-08],\n [1.0194e-07, 2.6567e-08, 2.7063e-08, ..., 8.0632e-08, 2.7206e-07,\n 3.5089e-07]], device='cuda:0')"
|
| 49 |
+
},
|
| 50 |
+
"9": {
|
| 51 |
+
"step": "tensor(8764.)",
|
| 52 |
+
"exp_avg": "tensor([ 5.6052e-45, -5.1914e-03, 8.2541e-03, ..., -3.8596e-03,\n 5.0723e-03, -6.1015e-04], device='cuda:0')",
|
| 53 |
+
"exp_avg_sq": "tensor([2.7560e-10, 1.7893e-04, 1.4841e-04, ..., 1.1237e-04, 1.0291e-04,\n 8.6540e-05], device='cuda:0')"
|
| 54 |
+
},
|
| 55 |
+
"10": {
|
| 56 |
+
"step": "tensor(8764.)",
|
| 57 |
+
"exp_avg": "tensor([[-5.6052e-45, -4.7411e-06, 4.8629e-05, ..., 3.3045e-05,\n 9.9746e-06, -4.3416e-06],\n [ 5.6052e-45, -4.1643e-05, -2.2159e-05, ..., 1.8889e-05,\n -2.4303e-07, -2.4821e-07],\n [ 5.6052e-45, 1.8212e-05, -4.3453e-05, ..., 4.5973e-05,\n -1.9274e-04, -5.0667e-05],\n ...,\n [-5.6052e-45, 5.4834e-05, -6.9441e-06, ..., 3.7907e-05,\n -9.4904e-06, -1.8234e-05],\n [-5.6052e-45, -5.6269e-05, -2.6123e-05, ..., -1.6216e-04,\n 5.7277e-06, -1.5374e-04],\n [ 5.6052e-45, 7.2040e-06, 1.9939e-05, ..., -3.5679e-05,\n 2.9311e-05, 2.7959e-05]], device='cuda:0')",
|
| 58 |
+
"exp_avg_sq": "tensor([[1.1469e-14, 2.2924e-08, 3.7819e-08, ..., 2.9580e-08, 1.9497e-08,\n 1.7909e-08],\n [1.4589e-14, 2.3200e-08, 4.7620e-08, ..., 4.7509e-08, 2.3287e-08,\n 2.2218e-08],\n [1.6323e-15, 2.6929e-08, 3.4149e-08, ..., 4.7079e-08, 3.1163e-08,\n 2.7663e-08],\n ...,\n [1.4800e-14, 3.0782e-08, 8.9392e-08, ..., 3.8591e-08, 3.4006e-08,\n 2.8188e-08],\n [2.0748e-15, 2.7472e-08, 3.6783e-08, ..., 3.9787e-08, 2.6683e-08,\n 2.8045e-08],\n [5.2248e-16, 2.7328e-08, 6.3238e-08, ..., 3.4147e-08, 3.0573e-08,\n 2.8328e-08]], device='cuda:0')"
|
| 59 |
+
},
|
| 60 |
+
"11": {
|
| 61 |
+
"step": "tensor(8764.)",
|
| 62 |
+
"exp_avg": "tensor([[ 1.1977e-04, -1.4029e-04, -1.0840e-04, ..., 1.2688e-05,\n -1.7587e-04, -2.2856e-04],\n [ 2.0058e-05, 3.5881e-05, 1.0928e-04, ..., -9.5214e-05,\n -1.1534e-05, -7.4375e-05],\n [-3.8379e-05, 1.5285e-04, 1.0244e-04, ..., -1.4372e-04,\n 6.5528e-06, -3.9647e-05],\n ...,\n [ 1.0850e-04, -1.8057e-04, -5.5349e-05, ..., 3.3719e-06,\n -4.7828e-05, 1.1048e-04],\n [-2.3443e-05, 1.4972e-05, -1.1462e-04, ..., -1.5264e-04,\n 8.7994e-05, -1.8067e-05],\n [ 2.6400e-05, 6.0178e-05, 8.8580e-05, ..., -3.7769e-05,\n -8.1129e-06, -1.4177e-04]], device='cuda:0')",
|
| 63 |
+
"exp_avg_sq": "tensor([[1.3515e-07, 2.2281e-07, 3.7045e-07, ..., 1.1423e-07, 7.8287e-08,\n 1.4752e-07],\n [1.2960e-07, 1.9038e-07, 2.6403e-07, ..., 1.2394e-07, 8.2538e-08,\n 9.8904e-08],\n [1.0475e-07, 1.9435e-07, 1.6298e-07, ..., 9.1636e-08, 6.0610e-08,\n 7.9742e-08],\n ...,\n [1.3045e-07, 2.8006e-07, 2.9848e-07, ..., 1.3195e-07, 9.7053e-08,\n 1.3617e-07],\n [1.2017e-07, 2.1507e-07, 2.2830e-07, ..., 1.0840e-07, 5.8904e-08,\n 1.1674e-07],\n [1.1801e-07, 2.3646e-07, 2.4830e-07, ..., 1.1732e-07, 7.6662e-08,\n 1.1327e-07]], device='cuda:0')"
|
| 64 |
+
},
|
| 65 |
+
"12": {
|
| 66 |
+
"step": "tensor(8764.)",
|
| 67 |
+
"exp_avg": "tensor([ 8.2227e-04, 7.1150e-04, -4.7610e-03, -1.8129e-03, -9.8523e-03,\n 1.1648e-03, -9.7054e-04, 4.0909e-03, 2.0361e-03, 4.8996e-04,\n 1.0089e-04, -3.4519e-03, 7.3385e-04, -3.3425e-03, -7.5177e-03,\n 1.1081e-03, 2.3242e-03, -2.4379e-03, -1.6493e-03, 7.9284e-04,\n 5.1791e-03, -5.1043e-03, -4.5346e-03, 9.4504e-03, -2.7882e-03,\n -1.8575e-03, 2.2211e-03, -1.6808e-03, -3.3777e-03, -4.7886e-03,\n -2.5461e-03, -3.1509e-03, 1.4097e-02, 1.2019e-03, -2.1550e-04,\n -3.4787e-04, 4.2229e-03, 2.1768e-03, -1.4282e-03, 1.0336e-02,\n -6.4811e-03, -3.7732e-03, -4.0382e-03, -2.1986e-03, 4.0349e-03,\n -1.7757e-03, 3.4123e-03, 2.7830e-03, -2.8050e-04, 1.4418e-03,\n -2.2879e-03, 1.9043e-03, -7.1144e-03, 3.9189e-03, -1.2104e-03,\n 1.6845e-03, -3.1562e-03, 2.7882e-03, -3.9974e-03, -1.1770e-03,\n 5.7403e-03, -4.3802e-03, -2.5561e-03, -2.0237e-03, 2.8005e-03,\n 3.7214e-03, -6.3485e-03, -9.1885e-04, -4.9854e-03, -2.8240e-03,\n -2.2754e-03, 2.7590e-03, -5.1207e-04, -6.4476e-04, -1.9057e-03,\n 1.3683e-03, -2.7659e-03, -7.5411e-04, -6.7554e-06, -3.5820e-03,\n 2.1838e-04, 1.2058e-03, 6.7217e-03, -3.6463e-03, 5.6952e-03,\n 1.0565e-03, -2.7148e-03, -6.4399e-03, -1.8879e-03, 4.8237e-03,\n -8.8697e-05, 3.8451e-04, 1.5028e-03, 3.4978e-03, -1.9258e-03,\n 3.4140e-03, -4.5175e-03, -3.7942e-03, 1.0169e-03, -3.6328e-03,\n 7.6490e-04, 6.5365e-03, 8.1564e-04, 3.2533e-03, -8.5590e-04,\n 1.2315e-03, 4.5627e-03, 1.4746e-03, -1.0217e-03, 5.8885e-03,\n 4.1750e-15, -7.1578e-03, 3.1725e-03, 3.6224e-03, -7.6722e-04,\n 1.0229e-03, 1.7997e-03, -2.5907e-03, -2.3403e-03, -3.5660e-03,\n -3.6601e-03, -7.4054e-03, 6.3243e-04, 8.9190e-04, -8.9005e-04,\n -4.7618e-05, 1.1662e-03, 6.3297e-04, 6.2437e-04, -4.1242e-03,\n -3.5108e-03, -1.8221e-03, -1.0862e-03, 3.5918e-03, 4.6726e-03,\n 4.7728e-03, -3.0469e-03, 8.3089e-05, 5.0162e-04, 1.5011e-03,\n -3.6005e-03, -1.7326e-03, -6.5845e-04, -3.7213e-03, 7.4544e-03,\n -1.0315e-03, 3.3927e-03, -1.5421e-03, 6.0741e-03, -6.7331e-03,\n -4.7322e-04, -5.2083e-04, -1.3750e-03, -2.7572e-03, -3.0472e-03,\n 6.1528e-03, -1.2490e-03, 2.7140e-03, 2.8391e-03, -6.6269e-03,\n -1.0841e-03, 5.9904e-04, -7.6318e-03, 3.5112e-03, 3.4773e-03,\n -6.3151e-03, -1.0369e-03, -5.8321e-03, 1.8062e-03, -2.4871e-04,\n -2.9360e-03, 1.2828e-03, -4.9499e-03, 1.1708e-02, -2.6199e-03,\n 1.6153e-03, -9.0061e-03, 1.2836e-03, 7.9063e-03, -5.2300e-05,\n -2.7108e-07, 5.6932e-03, 3.6593e-03, -1.9095e-03, -1.1843e-03,\n 3.5035e-03, -1.0904e-03, 5.3573e-03, 4.7097e-03, -2.2025e-03,\n 6.4771e-03, 3.6642e-03, -7.3183e-03, 4.0172e-03, 9.8865e-03,\n 3.2260e-03, 1.0791e-03, -7.3076e-03, -1.1042e-03, 3.7907e-03,\n 3.1248e-03, 3.3731e-03, -6.2239e-04, 4.1507e-03, 4.6285e-03,\n 4.4689e-03, -5.0562e-03, -2.9029e-03, 2.3992e-03, 2.1358e-03,\n -4.3428e-03, 2.2752e-03, -1.4807e-03, -6.9458e-04, -3.8399e-03,\n 3.6555e-03, -9.5301e-04, 5.4316e-03, 6.2101e-03, 1.5027e-03,\n -1.2897e-04, -3.7052e-03, 3.1776e-03, 4.4423e-03, -6.1048e-03,\n -3.8652e-04, -5.6091e-04, -2.1909e-03, -7.7120e-04, 2.5490e-04,\n 7.2666e-03, 3.6705e-03, 5.5902e-03, 1.0322e-03, 2.8392e-03,\n 1.6622e-03, 4.6280e-03, 9.0514e-04, -2.3420e-03, -6.2422e-03,\n -9.8124e-03, -7.8278e-03, -6.7467e-04, -7.4316e-03, 4.5913e-03,\n 4.3466e-03, 9.9258e-03, 1.1168e-02, -1.0509e-03, -3.1190e-03,\n 2.5035e-03, 1.8386e-03, 7.7560e-04, -6.2200e-03, 1.0426e-03,\n -5.7563e-03, 3.9682e-03, 4.4225e-03, -3.0779e-03, 2.3950e-03,\n 3.2001e-03, 3.8801e-04, -2.6674e-03, -5.8778e-03, -1.1220e-03,\n 1.1840e-03, -9.5390e-04, 2.2365e-03, 7.5381e-03, 5.9087e-04,\n -5.3534e-03, -6.1401e-04, 1.2236e-04, -3.3635e-03, 1.0444e-04,\n 5.2201e-04, -4.1700e-04, -7.4656e-04, 5.2245e-04, 1.0076e-03,\n -4.4592e-03, 5.5818e-03, -5.0465e-03, -3.5470e-03, 1.2664e-03,\n -7.1584e-05, -2.8344e-03, 1.3341e-03, -5.4661e-03, -3.0390e-03,\n -1.3022e-03, 3.2732e-03, 3.0543e-03, 6.4927e-03, -1.8662e-03,\n 7.2318e-03, -5.6673e-03, 1.8693e-03, -4.0331e-03, 1.6555e-03,\n -4.7490e-03, -9.5830e-03, 4.4232e-03, -5.5649e-03, 2.0106e-03,\n 1.8973e-03, -4.2449e-03, -5.2809e-03, 6.4058e-05, -2.2761e-04,\n 1.7034e-03, -7.1469e-03, -6.7336e-03, 1.1287e-03, -1.2825e-05,\n -3.1751e-03, -1.0135e-03, 4.6115e-03, -1.7894e-03, 1.7089e-03,\n -5.4292e-03, -2.7827e-03, 2.4007e-03, -7.3972e-04, 2.7723e-03,\n 5.2758e-03, 5.7923e-03, 1.2078e-03, 4.4261e-03, 6.7477e-03,\n 2.0252e-03, -1.6490e-03, -3.6416e-03, -1.8390e-03, -6.3663e-04,\n -2.8092e-03, -7.0202e-03, -4.4914e-03, 1.3186e-03, -3.5986e-04,\n 2.3640e-03, -1.6347e-03, 1.7378e-03, -7.7956e-03, 4.4799e-03,\n -7.0954e-03, -5.3673e-05, 1.8887e-03, 9.4718e-04, 6.5917e-04,\n 1.0100e-03, -5.6229e-03, -2.3987e-03, -1.5081e-03, 1.2020e-02,\n 6.9140e-03, 5.1316e-03, -4.6343e-03, -6.3086e-04, 2.4386e-04,\n 9.9708e-04, 4.2097e-05, -7.1322e-03, -7.4363e-04, 1.0145e-03,\n -1.1809e-02, 1.6610e-03, -4.2366e-03, -4.5313e-03, -2.1166e-03,\n 4.8737e-03, 1.0141e-03, -4.6684e-03, -3.0860e-03, 6.3760e-03,\n 9.9884e-05, 1.6888e-03, 3.9419e-03, -8.2221e-03, -4.7366e-03,\n -3.0287e-03, -1.4267e-03, 4.1982e-03, 4.1030e-03, 1.4856e-03,\n -9.4087e-04, -2.0105e-03, -4.8920e-03, -3.7439e-03, -7.0285e-03,\n 8.5160e-03, -3.2099e-03, 6.3170e-04, 1.0008e-03, -6.7672e-03,\n 1.7251e-03, -2.9381e-03, -2.1127e-03, 8.0303e-04, 2.4744e-03,\n 3.7647e-03, -2.8418e-03, -2.8399e-03, -5.0716e-03, -7.6721e-03,\n -4.7135e-03, 3.1244e-03, 1.0617e-03, 3.7841e-04, 1.7939e-03,\n 1.6040e-03, 2.9393e-03, -1.0172e-02, 3.0593e-03, 7.1268e-03,\n 7.3031e-03, 2.8952e-04, 5.6902e-03, 6.8799e-03, 1.9996e-03,\n -8.4771e-04, -7.1776e-04, -9.0661e-04, 2.9901e-03, -4.1720e-04,\n -3.3377e-03, -1.3722e-03, 3.4810e-03, 8.5968e-03, 2.0075e-03,\n -2.9685e-03, -2.3886e-03, 3.7729e-03, 4.9881e-03, -3.7622e-04,\n 3.7709e-04, 8.7912e-04, -2.6207e-03, -2.2193e-03, 6.2818e-04,\n 2.2680e-03, 1.1039e-03, -1.5797e-03, -3.4445e-03, -1.6189e-03,\n 7.7797e-03, -4.9808e-03, 2.0728e-03, -5.6591e-03, -4.8890e-03,\n -1.9629e-04, -3.4646e-03, 2.6944e-03, 4.4847e-03, -4.4433e-04,\n 2.3863e-04, -4.0961e-04, 4.8969e-03, 1.7186e-03, -6.0495e-04,\n -3.2688e-03, 3.1972e-03, 9.3647e-03, 2.9532e-03, -1.4663e-03,\n -2.4483e-04, -3.9077e-03, 4.5833e-03, 4.1251e-03, 1.0566e-04,\n -1.4313e-03, 2.5822e-03, 3.9499e-03, 2.4968e-03, -4.1222e-03,\n -5.2425e-04, 5.7743e-03, 2.1791e-03, 4.7248e-05, 4.4913e-04,\n -6.2756e-03, 6.4741e-04, 9.2292e-04, 5.6223e-03, -4.8822e-03,\n 1.3235e-03, 1.5016e-04, 1.9809e-04, -4.3433e-03, 4.2574e-03,\n 1.9699e-03, -4.7494e-04, 2.8569e-03, 1.9349e-04, 6.6990e-03,\n 7.0214e-04, 7.6230e-03, 3.6830e-04, 3.4915e-03, -3.8690e-03,\n 1.6321e-04, -5.1364e-03, 1.5083e-03, 2.9003e-03, 1.4902e-03,\n 2.7676e-04, 8.8620e-04, -9.0237e-04, -4.3355e-03, -8.6469e-03,\n -2.5159e-03, -7.6872e-04, -1.0677e-03, -2.8648e-03, -6.7435e-03,\n 6.8805e-03, -1.4899e-03, -3.7745e-04, -1.4838e-03, 5.0981e-03,\n 6.4029e-04, 8.3523e-04, -7.1179e-03, -8.2014e-04, 2.3696e-03,\n -5.5733e-05, -6.4388e-03, 2.8372e-03, -9.0016e-03, 2.5446e-03,\n 3.2651e-03, -6.0604e-03, -3.2254e-03, -6.2279e-03, 3.9524e-03,\n 1.0124e-04, -8.5508e-03, 5.0399e-04, 3.6303e-03, 3.6301e-03,\n 9.8370e-04, -1.7245e-03, 1.0706e-03, 3.5007e-03, 5.6909e-04,\n 1.9425e-03, -2.6066e-03, -9.2354e-04, 3.4781e-03, 2.8775e-03,\n 2.0162e-03, -7.0621e-04, 7.2823e-04, 6.8577e-04, -1.7422e-03,\n -7.3583e-03, 6.6964e-03, -1.4307e-03, 1.5168e-03, 4.8261e-03,\n -4.3139e-03, -3.1496e-03, 2.6006e-03, 9.1407e-04, -8.1402e-03,\n -3.1955e-03, -2.5157e-05, -9.1851e-04, 1.4693e-03, 4.1090e-03,\n 7.8701e-04, 3.5122e-04, 2.6592e-03, 4.1920e-03, 6.4912e-03,\n 7.1441e-04, -8.8114e-04, -4.3431e-03, 5.6052e-45, -2.2785e-03,\n -8.5469e-04, 5.6052e-45, 2.9449e-03, 2.1215e-03, 4.8486e-04,\n 2.9540e-03, 1.8462e-03, -3.6414e-03, -2.7219e-03, 1.8835e-03,\n -2.7991e-03, 2.9228e-03, 4.1746e-03, 1.0611e-03, -3.2645e-03,\n -1.3997e-03, 6.4017e-03, 1.9298e-03, -5.9569e-03, 7.1052e-03,\n -1.5770e-04, -1.2375e-03, 3.8907e-03, 5.9098e-03, 2.2058e-03,\n 3.2399e-03, -2.0834e-03, -2.8268e-04, -3.5476e-04, 6.7939e-03,\n 3.5735e-03, -4.9762e-03, -3.6094e-04, -6.7236e-04, -2.0965e-03,\n 2.2386e-03, 6.1248e-03, -6.0373e-03, 4.9238e-03, 5.0398e-05,\n -1.5135e-03, 2.1267e-03, 1.4644e-03, -4.7580e-03, -2.5212e-05,\n 2.2044e-04, 6.1580e-03, -1.0579e-02, 2.5325e-03, -4.4449e-03,\n -5.2604e-03, 8.0053e-04, -5.4627e-03, 3.6171e-03, -1.2594e-03,\n 3.0725e-03, 1.5275e-03, -5.1532e-03, 7.0164e-03, 2.5049e-03,\n -4.3887e-04, 2.6673e-04, 1.6571e-03, 4.0933e-03, 3.1286e-03,\n 4.3535e-03, -1.3547e-03, 4.5653e-04, 4.5154e-04, 4.3659e-03,\n -1.1664e-03, -3.2114e-04, -4.3343e-03, 1.4662e-03, -2.5472e-03,\n 5.2354e-03, 2.3472e-03, -3.1321e-03, 1.4748e-04, -5.2659e-03,\n -2.1912e-03, 2.7786e-03, -1.1405e-03, -2.5531e-03, 3.5565e-03,\n -2.3944e-03, -3.8786e-04, 4.1179e-04, -6.0217e-04, -1.6224e-03,\n 1.2436e-03, -2.1792e-03, -4.6178e-03, 1.8901e-03, -6.2499e-03,\n 5.3976e-03, 2.9755e-03, -4.5007e-03, 2.3910e-03, 4.2012e-03,\n -4.3189e-03, -6.7473e-03, -1.6881e-05, -1.3494e-03, 2.0298e-03,\n 3.0130e-03, 8.0997e-04, -7.5240e-04, 4.6066e-03, -3.9199e-03,\n 6.6974e-03, -7.1142e-03, -2.8090e-03, 2.3816e-03, -9.8144e-03,\n -1.4477e-03, -5.0994e-03, -3.0753e-03, -1.9084e-03, -4.4470e-03,\n -2.5510e-04, 2.1812e-03, 1.2904e-03, -1.4172e-03, 1.1360e-02,\n -2.9011e-03, -1.2451e-03, -3.5976e-03, 4.3413e-03, 3.3658e-03,\n 1.4267e-03, -2.3081e-03, 1.0964e-02, -2.3811e-03, -2.7141e-03,\n -6.4857e-03, -8.3053e-03, -6.9326e-04, 1.2092e-03, -2.6537e-03,\n 4.0782e-03, 3.4976e-04, 3.7157e-03, 1.6361e-03, -1.0230e-04,\n 3.4340e-03, 1.0367e-03, 6.9532e-03, 1.7878e-03, -1.3773e-03,\n -9.7728e-04, -4.2922e-03, -8.7681e-04, -4.3792e-03, 5.4465e-04,\n 3.4734e-03, -3.7972e-03, -1.4718e-03, -7.4409e-03, 3.5306e-03,\n -3.9814e-03, -5.0024e-03, -2.4084e-03, -9.4345e-03, -1.7066e-03,\n 4.7344e-03, 7.7331e-03, -8.2704e-03, 4.2908e-03, 2.4975e-03,\n -2.8137e-03, 1.2598e-02, -2.9923e-03, -3.0796e-03, -1.8999e-03,\n -4.9831e-03, 1.1579e-03, -5.9650e-03, -3.4929e-03, 1.3252e-03,\n -4.3930e-03, 4.6991e-03, -1.7773e-03, -1.3930e-03, 3.0620e-04,\n 2.3246e-03, -9.4538e-04, -2.4310e-03], device='cuda:0')",
|
| 68 |
+
"exp_avg_sq": "tensor([1.2109e-04, 1.1188e-04, 9.6168e-05, 1.2200e-04, 1.7215e-04, 1.5105e-04,\n 1.1981e-04, 1.2690e-04, 1.5392e-04, 1.3407e-04, 1.2912e-04, 1.8825e-04,\n 1.2570e-04, 1.4951e-04, 1.0851e-04, 7.4731e-05, 1.1616e-04, 1.2427e-04,\n 1.6963e-04, 8.8874e-05, 1.2325e-04, 1.5594e-04, 1.2893e-04, 1.3281e-04,\n 1.3677e-04, 1.1506e-04, 9.1947e-05, 1.3459e-04, 9.5622e-05, 1.2852e-04,\n 1.3921e-04, 1.2195e-04, 1.4261e-04, 9.6471e-05, 1.4790e-04, 1.4974e-04,\n 1.4139e-04, 1.1551e-04, 1.1335e-04, 1.1109e-04, 1.5119e-04, 1.4016e-04,\n 1.3630e-04, 9.4506e-05, 9.6929e-05, 1.0905e-04, 4.4470e-05, 1.1242e-04,\n 1.5603e-04, 1.1351e-04, 7.8997e-05, 1.1882e-04, 1.2184e-04, 9.1562e-05,\n 1.6333e-04, 1.2248e-04, 7.0451e-05, 9.0949e-05, 1.5003e-04, 1.2900e-04,\n 1.7716e-04, 1.5511e-04, 1.3403e-04, 1.2174e-04, 1.2111e-04, 1.5358e-04,\n 1.3263e-04, 1.1785e-04, 1.5567e-04, 1.1718e-04, 1.0906e-04, 1.2089e-04,\n 1.2926e-04, 1.3402e-04, 1.3106e-04, 1.2539e-04, 1.3312e-04, 1.4087e-04,\n 1.3975e-04, 1.6999e-04, 1.3364e-04, 1.0482e-04, 1.5653e-04, 1.5618e-04,\n 1.3358e-04, 1.5968e-04, 1.1316e-04, 1.4019e-04, 1.1199e-04, 1.4326e-04,\n 1.1716e-04, 1.0174e-04, 1.4637e-04, 1.2162e-04, 9.3747e-05, 1.6294e-04,\n 1.0432e-04, 1.3523e-04, 1.3919e-04, 1.4484e-04, 1.3448e-04, 1.7337e-04,\n 1.3128e-04, 9.2801e-05, 1.2667e-04, 1.2071e-04, 1.3058e-04, 1.2081e-04,\n 1.2793e-04, 1.0250e-04, 2.0947e-10, 8.1096e-05, 1.3737e-04, 1.3038e-04,\n 1.1762e-04, 1.5002e-04, 1.4590e-04, 1.2892e-04, 1.4507e-04, 1.3341e-04,\n 1.1400e-04, 1.7807e-04, 1.5789e-04, 1.4380e-04, 1.6951e-04, 1.0289e-04,\n 1.0507e-04, 1.1757e-04, 1.3217e-04, 1.3170e-04, 1.2028e-04, 1.5479e-04,\n 1.3320e-04, 1.4182e-04, 1.4013e-04, 1.1069e-04, 1.4884e-04, 1.4205e-04,\n 1.5626e-04, 1.0813e-04, 9.5471e-05, 9.2904e-05, 1.2403e-04, 1.1736e-04,\n 1.1958e-04, 1.2962e-04, 9.5513e-05, 1.8915e-04, 1.1189e-04, 1.2129e-04,\n 1.2876e-04, 1.0023e-04, 1.1943e-04, 1.4978e-04, 1.5494e-04, 1.3802e-04,\n 1.7064e-04, 1.6869e-04, 1.2881e-04, 1.5548e-04, 1.4429e-04, 1.2633e-04,\n 1.2669e-04, 1.4026e-04, 1.1971e-04, 1.2207e-04, 1.3656e-04, 1.4304e-04,\n 1.3999e-04, 9.6417e-05, 1.3132e-04, 1.2881e-04, 1.3738e-04, 1.1756e-04,\n 1.0028e-04, 1.2694e-04, 1.0738e-04, 1.0444e-04, 1.3002e-04, 1.4626e-04,\n 5.9527e-09, 1.3986e-04, 1.2098e-04, 1.4847e-04, 1.7482e-04, 1.0612e-04,\n 1.6731e-04, 1.2526e-04, 1.3431e-04, 1.0097e-04, 1.1006e-04, 1.3181e-04,\n 1.5138e-04, 1.1532e-04, 1.1899e-04, 1.2435e-04, 1.6030e-04, 1.1868e-04,\n 9.4591e-05, 1.5037e-04, 1.2213e-04, 1.1847e-04, 1.3127e-04, 6.1508e-05,\n 1.1907e-04, 8.0218e-05, 9.9563e-05, 1.1270e-04, 1.1668e-04, 1.2196e-04,\n 1.5097e-04, 9.7425e-05, 1.5301e-04, 1.1358e-04, 6.2956e-05, 1.4809e-04,\n 1.1920e-04, 1.2568e-04, 1.3920e-04, 1.6369e-04, 1.7385e-04, 8.5664e-05,\n 1.1261e-04, 1.2882e-04, 1.0505e-04, 9.9191e-05, 1.0123e-04, 1.0832e-04,\n 1.2386e-04, 1.1788e-04, 1.0582e-04, 1.2667e-04, 1.4620e-04, 1.0365e-04,\n 1.5298e-04, 1.3099e-04, 1.2197e-04, 9.7127e-05, 1.6646e-04, 1.6005e-04,\n 1.0825e-04, 1.4528e-04, 1.3145e-04, 1.1031e-04, 1.1978e-04, 1.0748e-04,\n 1.7581e-04, 1.0490e-04, 1.6058e-04, 1.3388e-04, 1.5425e-04, 1.6120e-04,\n 1.2025e-04, 1.2815e-04, 1.0980e-04, 1.0616e-04, 1.4830e-04, 1.7550e-04,\n 1.1025e-04, 1.2294e-04, 1.4651e-04, 1.8984e-04, 8.4928e-05, 1.6497e-04,\n 1.1298e-04, 4.0654e-05, 1.1665e-04, 1.3564e-04, 1.4737e-04, 8.3741e-05,\n 1.0560e-04, 1.2237e-04, 1.2533e-04, 1.1549e-04, 1.1977e-04, 1.2322e-04,\n 1.1881e-04, 1.0676e-04, 1.2680e-04, 6.8138e-05, 1.6950e-04, 1.5598e-04,\n 1.0690e-04, 1.3929e-04, 1.2820e-04, 1.3112e-04, 8.3132e-05, 1.1890e-04,\n 1.3288e-04, 1.5507e-04, 1.0256e-04, 1.0642e-04, 1.3160e-04, 1.5936e-04,\n 1.4057e-04, 9.6629e-05, 1.3948e-04, 9.7821e-05, 9.9361e-05, 1.3614e-04,\n 1.1519e-04, 1.5114e-04, 1.4937e-04, 1.2457e-04, 1.0638e-04, 1.4402e-04,\n 1.6746e-04, 1.0791e-04, 1.4074e-04, 1.0122e-04, 1.6262e-04, 1.3973e-04,\n 1.2584e-04, 1.0126e-04, 1.2269e-04, 1.0668e-04, 1.5712e-04, 1.1720e-04,\n 1.3029e-04, 9.3230e-05, 1.2402e-04, 1.1466e-04, 1.1938e-04, 1.0757e-04,\n 1.4107e-04, 1.2970e-04, 1.2722e-04, 1.4371e-04, 9.6695e-05, 8.6775e-05,\n 1.3060e-04, 9.1389e-05, 1.2573e-04, 9.9564e-05, 1.2702e-04, 1.0566e-04,\n 1.1971e-04, 1.2028e-04, 1.3877e-04, 1.3514e-04, 1.3815e-04, 9.9549e-05,\n 1.3385e-04, 1.7375e-04, 1.3116e-04, 1.7283e-04, 1.3449e-04, 1.5356e-04,\n 1.0635e-04, 1.1534e-04, 1.3473e-04, 1.7661e-04, 9.4720e-05, 7.5310e-05,\n 1.5047e-04, 1.5768e-04, 1.1847e-04, 1.1945e-04, 1.0472e-04, 1.2910e-04,\n 8.7627e-05, 9.6630e-05, 1.5483e-04, 1.5704e-04, 8.2807e-05, 1.4783e-04,\n 9.1818e-05, 1.0801e-04, 1.2005e-04, 1.0926e-04, 1.1534e-04, 1.1525e-04,\n 1.2257e-04, 1.0671e-04, 1.6267e-04, 9.2808e-05, 8.3113e-05, 1.3833e-04,\n 1.5222e-04, 1.3240e-04, 7.8521e-05, 1.7302e-04, 1.2285e-04, 1.0562e-04,\n 1.3959e-04, 1.6235e-04, 1.2970e-04, 1.0230e-04, 9.9688e-05, 1.3985e-04,\n 1.7423e-04, 1.1207e-04, 1.1687e-04, 1.1102e-04, 1.0635e-04, 1.2838e-04,\n 9.9465e-05, 1.4165e-04, 1.2475e-04, 1.5321e-04, 1.1507e-04, 1.2092e-04,\n 1.3377e-04, 1.0612e-04, 1.4978e-04, 9.4700e-05, 1.2312e-04, 1.3609e-04,\n 1.4025e-04, 1.2953e-04, 1.6900e-04, 1.2247e-04, 1.2560e-04, 1.2939e-04,\n 1.5360e-04, 1.4863e-04, 8.4648e-05, 1.4514e-04, 1.2064e-04, 1.1203e-04,\n 1.3175e-04, 1.1515e-04, 1.3995e-04, 1.0183e-04, 9.8871e-05, 1.0426e-04,\n 8.1078e-05, 1.5300e-04, 1.4191e-04, 1.2500e-04, 1.4500e-04, 1.3728e-04,\n 9.7585e-05, 1.2093e-04, 9.2498e-05, 1.1035e-04, 1.2356e-04, 1.1614e-04,\n 1.2774e-04, 1.1272e-04, 1.5696e-04, 9.5597e-05, 1.1537e-04, 1.4033e-04,\n 1.4579e-04, 1.4065e-04, 1.0517e-04, 9.6076e-05, 1.5707e-04, 1.4089e-04,\n 1.4326e-04, 1.2455e-04, 9.9097e-05, 1.1418e-04, 1.5706e-04, 1.2687e-04,\n 1.0506e-04, 8.8742e-05, 1.4058e-04, 1.0933e-04, 9.5061e-05, 1.4141e-04,\n 1.2578e-04, 1.7810e-04, 9.6881e-05, 1.1581e-04, 1.3409e-04, 1.2742e-04,\n 1.1285e-04, 9.9349e-05, 8.2364e-05, 1.1655e-04, 1.3221e-04, 1.3120e-04,\n 1.0760e-04, 1.5415e-04, 1.2058e-04, 9.5647e-05, 1.2488e-04, 8.8012e-05,\n 1.1410e-04, 1.3576e-04, 9.0943e-05, 1.3907e-04, 1.5720e-04, 1.2562e-04,\n 1.1284e-04, 1.5102e-04, 8.4001e-05, 1.3630e-04, 1.7111e-04, 1.7230e-04,\n 1.4275e-04, 1.3942e-04, 1.4599e-04, 1.2043e-04, 1.4750e-04, 1.2660e-04,\n 1.7153e-04, 1.6447e-04, 1.1590e-04, 1.3777e-04, 9.9079e-05, 1.3873e-04,\n 1.5267e-04, 1.3800e-04, 9.7292e-05, 1.0720e-04, 1.6208e-04, 1.1575e-04,\n 1.4427e-04, 1.5241e-04, 1.5262e-04, 1.0479e-04, 1.2787e-04, 1.3777e-04,\n 1.2543e-04, 1.3637e-04, 1.1669e-04, 1.0460e-04, 1.0615e-04, 1.2340e-04,\n 1.4708e-04, 1.7102e-04, 1.6256e-04, 1.2824e-04, 1.4526e-04, 1.1807e-04,\n 1.4535e-04, 6.9857e-05, 1.2684e-04, 1.2063e-04, 1.6190e-04, 1.2894e-04,\n 1.5401e-04, 1.0665e-04, 1.4557e-04, 1.2028e-04, 1.3035e-04, 1.4974e-04,\n 1.1721e-04, 1.3421e-04, 1.0101e-04, 1.1075e-04, 1.5134e-04, 1.2025e-04,\n 1.3069e-04, 1.3978e-04, 1.3838e-04, 1.1192e-04, 1.4444e-04, 1.2665e-04,\n 1.1908e-04, 1.3356e-04, 1.0588e-04, 1.5734e-04, 1.5365e-04, 1.5925e-04,\n 1.4301e-04, 1.4909e-04, 1.5991e-04, 1.2922e-04, 1.3822e-04, 9.4701e-05,\n 7.7269e-05, 1.0760e-04, 1.1710e-04, 1.4991e-04, 1.2039e-04, 1.1770e-04,\n 1.1724e-04, 8.8914e-05, 1.3993e-04, 1.1597e-04, 1.2995e-04, 7.0512e-05,\n 1.3511e-04, 1.1108e-04, 4.6036e-11, 1.1423e-04, 8.0156e-05, 5.7060e-12,\n 9.5971e-05, 1.3566e-04, 9.4227e-05, 1.4606e-04, 1.0839e-04, 1.1662e-04,\n 1.0176e-04, 1.3165e-04, 9.2454e-05, 8.4134e-05, 9.9940e-05, 1.0785e-04,\n 1.3257e-04, 1.0493e-04, 1.3085e-04, 1.7131e-04, 1.6088e-04, 1.6148e-04,\n 1.0800e-04, 1.0679e-04, 1.1492e-04, 1.4201e-04, 1.3286e-04, 9.5180e-05,\n 1.1902e-04, 1.3269e-04, 1.1940e-04, 1.0629e-04, 1.2310e-04, 9.5282e-05,\n 7.9631e-05, 1.5645e-04, 1.0142e-04, 9.1439e-05, 1.4055e-04, 1.4260e-04,\n 1.0850e-04, 1.0600e-04, 1.3417e-04, 1.2732e-04, 1.1686e-04, 1.0231e-04,\n 1.0648e-04, 1.3680e-04, 1.0906e-04, 1.6204e-04, 1.1589e-04, 1.0810e-04,\n 1.6073e-04, 1.0045e-04, 1.2386e-04, 1.2441e-04, 1.4403e-04, 1.3414e-04,\n 1.6152e-04, 1.3857e-04, 1.3398e-04, 1.1221e-04, 9.6490e-05, 1.2125e-04,\n 8.7114e-05, 8.6675e-05, 1.3824e-04, 1.7725e-04, 1.4677e-04, 1.1567e-04,\n 1.1129e-04, 9.2560e-05, 7.7187e-05, 1.7604e-04, 1.4415e-04, 1.3302e-04,\n 1.2851e-04, 1.2116e-04, 1.1501e-04, 1.0509e-04, 1.4893e-04, 1.2593e-04,\n 1.1886e-04, 7.8195e-05, 1.1610e-04, 1.1404e-04, 1.1276e-04, 1.2336e-04,\n 8.2239e-05, 1.3757e-04, 8.8136e-05, 1.1349e-04, 1.6654e-04, 1.3072e-04,\n 1.1895e-04, 1.0053e-04, 1.6279e-04, 1.3412e-04, 1.3541e-04, 1.2559e-04,\n 1.0437e-04, 1.1264e-04, 1.2774e-04, 1.4927e-04, 1.5050e-04, 1.2266e-04,\n 1.0541e-04, 1.3614e-04, 1.2585e-04, 1.4740e-04, 1.1254e-04, 1.1505e-04,\n 1.3220e-04, 1.6036e-04, 1.3459e-04, 1.4511e-04, 1.1134e-04, 1.2302e-04,\n 1.5730e-04, 8.3423e-05, 1.5457e-04, 1.1770e-04, 1.1018e-04, 9.3749e-05,\n 1.3336e-04, 1.5243e-04, 1.2975e-04, 1.1462e-04, 7.5082e-05, 1.1642e-04,\n 1.4440e-04, 1.3582e-04, 1.2424e-04, 1.6351e-04, 1.0670e-04, 1.7298e-04,\n 1.0754e-04, 1.1927e-04, 1.5633e-04, 1.0782e-04, 1.3474e-04, 1.4631e-04,\n 1.2380e-04, 6.0506e-05, 1.4744e-04, 1.2378e-04, 9.6236e-05, 1.1639e-04,\n 1.0185e-04, 1.2697e-04, 1.5713e-04, 1.8423e-04, 1.1340e-04, 1.4970e-04,\n 1.0105e-04, 1.2420e-04, 1.4488e-04, 1.6215e-04, 1.1666e-04, 1.9611e-04,\n 1.3365e-04, 9.7068e-05, 1.6645e-04, 1.3865e-04, 1.1716e-04, 1.3054e-04,\n 1.0165e-04, 1.3456e-04, 1.3223e-04, 1.2497e-04, 1.1331e-04, 1.1577e-04,\n 1.4120e-04, 1.5491e-04, 9.8405e-05, 1.4038e-04, 1.0058e-04, 9.6922e-05,\n 1.5085e-04, 1.2277e-04, 1.3434e-04, 8.8172e-05, 1.1629e-04, 1.4832e-04,\n 1.8490e-04, 1.3832e-04, 1.6014e-04, 1.4179e-04, 1.0515e-04, 1.1699e-04],\n device='cuda:0')"
|
| 69 |
+
},
|
| 70 |
+
"13": {
|
| 71 |
+
"step": "tensor(8764.)",
|
| 72 |
+
"exp_avg": "tensor([[ 1.7658e-17, -5.7177e-08, 4.2810e-22, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 8.9692e-06, -2.3937e-06, -1.7397e-06, ..., -4.1297e-06,\n 1.5350e-08, 4.7284e-09],\n [-2.1026e-05, 3.2858e-07, 6.3431e-11, ..., 3.7893e-06,\n 1.0297e-06, -2.1334e-05],\n ...,\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 2.3501e-04, 5.0076e-05, 9.6753e-06, ..., 2.1181e-06,\n -5.2313e-06, 7.6000e-06],\n [ 1.3663e-07, -1.9808e-06, 7.0118e-07, ..., -5.2288e-06,\n 5.7519e-08, 2.2147e-06]], device='cuda:0')",
|
| 73 |
+
"exp_avg_sq": "tensor([[2.2867e-10, 1.2776e-09, 2.4988e-13, ..., 1.4814e-14, 5.9152e-14,\n 3.3748e-14],\n [2.2431e-09, 2.0555e-09, 1.3423e-09, ..., 9.1724e-09, 4.0217e-11,\n 5.7296e-11],\n [3.2363e-08, 2.1695e-09, 8.3174e-12, ..., 1.2375e-08, 3.1596e-09,\n 7.3780e-09],\n ...,\n [1.3410e-14, 1.4468e-13, 2.6354e-16, ..., 8.0643e-14, 1.8263e-13,\n 1.0391e-14],\n [7.5709e-08, 1.2096e-07, 4.1608e-09, ..., 9.7547e-08, 3.4646e-08,\n 1.9070e-08],\n [7.7886e-10, 6.8587e-10, 2.6697e-09, ..., 2.2071e-09, 8.8472e-11,\n 3.4570e-09]], device='cuda:0')"
|
| 74 |
+
},
|
| 75 |
+
"14": {
|
| 76 |
+
"step": "tensor(8764.)",
|
| 77 |
+
"exp_avg": "tensor([-9.0057e-07, -4.0984e-04, 4.4506e-04, ..., 5.6052e-45,\n 2.3942e-03, -1.6860e-03], device='cuda:0')",
|
| 78 |
+
"exp_avg_sq": "tensor([1.1032e-07, 9.5006e-06, 1.6044e-05, ..., 1.8014e-10, 2.8523e-05,\n 2.1409e-06], device='cuda:0')"
|
| 79 |
+
},
|
| 80 |
+
"15": {
|
| 81 |
+
"step": "tensor(8764.)",
|
| 82 |
+
"exp_avg": "tensor([[ 5.5597e-09, 9.0054e-08, -5.4465e-06, ..., 5.6052e-45,\n -2.2692e-05, -3.5267e-07],\n [ 4.4042e-08, 4.6321e-08, -9.3300e-06, ..., -5.6052e-45,\n 2.5955e-05, 2.9675e-06],\n [-2.6697e-08, -1.1793e-07, 2.0645e-06, ..., -5.6052e-45,\n 3.6675e-06, 6.6893e-06],\n ...,\n [ 8.0643e-08, -9.7611e-08, -5.2545e-06, ..., -5.6052e-45,\n 1.8322e-06, 7.2717e-06],\n [ 6.3013e-08, 3.9321e-07, 1.8895e-05, ..., 5.6052e-45,\n 6.0859e-06, -5.6368e-07],\n [-1.2160e-07, 2.4126e-07, -8.0317e-06, ..., 5.6052e-45,\n 2.4034e-05, 2.1622e-06]], device='cuda:0')",
|
| 83 |
+
"exp_avg_sq": "tensor([[1.8523e-12, 1.1662e-11, 1.6764e-10, ..., 2.0123e-13, 2.5986e-09,\n 4.4361e-11],\n [1.6459e-11, 3.4870e-11, 3.7272e-10, ..., 8.5914e-14, 3.0620e-09,\n 1.0175e-10],\n [1.9137e-11, 6.2402e-11, 2.1495e-10, ..., 2.5491e-14, 3.2919e-09,\n 1.7090e-10],\n ...,\n [2.8066e-11, 4.9948e-11, 1.8677e-10, ..., 1.2129e-14, 3.1439e-09,\n 2.4728e-10],\n [1.6345e-11, 4.1101e-11, 4.2436e-10, ..., 4.0592e-13, 2.7439e-09,\n 8.2189e-11],\n [4.6317e-11, 5.6754e-11, 7.1410e-10, ..., 1.3395e-13, 3.2050e-09,\n 1.1219e-10]], device='cuda:0')"
|
| 84 |
+
},
|
| 85 |
+
"16": {
|
| 86 |
+
"step": "tensor(8764.)",
|
| 87 |
+
"exp_avg": "tensor([[-8.7917e-05, -6.5515e-05, 1.7643e-05, ..., -6.6320e-06,\n 2.2158e-05, -8.7488e-05],\n [ 1.5174e-04, -1.1783e-04, 2.0303e-04, ..., 5.6440e-05,\n -2.8064e-05, 1.6012e-04],\n [-3.7796e-05, -2.8505e-05, -3.3415e-05, ..., -7.5413e-05,\n -1.5508e-05, 7.1069e-05],\n ...,\n [ 6.3818e-05, 7.2831e-05, -1.7221e-04, ..., 6.4677e-05,\n 3.9564e-05, 1.9249e-05],\n [ 1.9063e-05, 7.7363e-06, -3.2469e-05, ..., -6.4381e-05,\n 7.4295e-06, 5.0385e-05],\n [ 1.9760e-05, 5.4987e-05, -2.5244e-05, ..., 1.3180e-04,\n 5.3002e-05, -1.6975e-04]], device='cuda:0')",
|
| 88 |
+
"exp_avg_sq": "tensor([[2.9965e-08, 6.9594e-08, 6.8928e-08, ..., 4.4583e-08, 3.5096e-08,\n 5.0826e-08],\n [3.0102e-08, 6.1403e-08, 4.7999e-08, ..., 5.2113e-08, 2.8767e-08,\n 5.3447e-08],\n [2.1736e-08, 4.5513e-08, 3.9594e-08, ..., 3.4201e-08, 2.5626e-08,\n 2.8598e-08],\n ...,\n [2.4970e-08, 6.2478e-08, 4.2846e-08, ..., 4.9157e-08, 2.9134e-08,\n 5.4892e-08],\n [2.9934e-08, 5.8372e-08, 6.2799e-08, ..., 5.1244e-08, 2.5609e-08,\n 4.4148e-08],\n [2.8148e-08, 5.4728e-08, 7.1486e-08, ..., 4.0391e-08, 2.7776e-08,\n 5.0550e-08]], device='cuda:0')"
|
| 89 |
+
},
|
| 90 |
+
"17": {
|
| 91 |
+
"step": "tensor(8764.)",
|
| 92 |
+
"exp_avg": "tensor([-7.6745e-04, 5.8742e-03, -1.6784e-03, ..., 5.3343e-05,\n -1.9964e-04, -1.8310e-04], device='cuda:0')",
|
| 93 |
+
"exp_avg_sq": "tensor([5.1435e-05, 5.1830e-05, 3.6607e-05, ..., 5.0520e-05, 4.4610e-05,\n 4.2968e-05], device='cuda:0')"
|
| 94 |
+
}
|
| 95 |
+
},
|
| 96 |
+
"param_groups": [
|
| 97 |
+
{
|
| 98 |
+
"lr": 0.00020690126647990973,
|
| 99 |
+
"name": "scale_256",
|
| 100 |
+
"betas": [
|
| 101 |
+
0.9,
|
| 102 |
+
0.999
|
| 103 |
+
],
|
| 104 |
+
"eps": 1e-08,
|
| 105 |
+
"weight_decay": 1e-05,
|
| 106 |
+
"amsgrad": false,
|
| 107 |
+
"maximize": false,
|
| 108 |
+
"foreach": null,
|
| 109 |
+
"capturable": false,
|
| 110 |
+
"differentiable": false,
|
| 111 |
+
"fused": null,
|
| 112 |
+
"decoupled_weight_decay": true,
|
| 113 |
+
"initial_lr": 0.001,
|
| 114 |
+
"params": [
|
| 115 |
+
0,
|
| 116 |
+
1,
|
| 117 |
+
2
|
| 118 |
+
]
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"lr": 0.00020690126647990973,
|
| 122 |
+
"name": "scale_512",
|
| 123 |
+
"betas": [
|
| 124 |
+
0.9,
|
| 125 |
+
0.999
|
| 126 |
+
],
|
| 127 |
+
"eps": 1e-08,
|
| 128 |
+
"weight_decay": 1e-05,
|
| 129 |
+
"amsgrad": false,
|
| 130 |
+
"maximize": false,
|
| 131 |
+
"foreach": null,
|
| 132 |
+
"capturable": false,
|
| 133 |
+
"differentiable": false,
|
| 134 |
+
"fused": null,
|
| 135 |
+
"decoupled_weight_decay": true,
|
| 136 |
+
"initial_lr": 0.001,
|
| 137 |
+
"params": [
|
| 138 |
+
3,
|
| 139 |
+
4,
|
| 140 |
+
5,
|
| 141 |
+
6,
|
| 142 |
+
7
|
| 143 |
+
]
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"lr": 0.00020690126647990973,
|
| 147 |
+
"name": "scale_768",
|
| 148 |
+
"betas": [
|
| 149 |
+
0.9,
|
| 150 |
+
0.999
|
| 151 |
+
],
|
| 152 |
+
"eps": 1e-08,
|
| 153 |
+
"weight_decay": 1e-05,
|
| 154 |
+
"amsgrad": false,
|
| 155 |
+
"maximize": false,
|
| 156 |
+
"foreach": null,
|
| 157 |
+
"capturable": false,
|
| 158 |
+
"differentiable": false,
|
| 159 |
+
"fused": null,
|
| 160 |
+
"decoupled_weight_decay": true,
|
| 161 |
+
"initial_lr": 0.001,
|
| 162 |
+
"params": [
|
| 163 |
+
8,
|
| 164 |
+
9,
|
| 165 |
+
10,
|
| 166 |
+
11,
|
| 167 |
+
12
|
| 168 |
+
]
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"lr": 0.00020690126647990973,
|
| 172 |
+
"name": "scale_1024",
|
| 173 |
+
"betas": [
|
| 174 |
+
0.9,
|
| 175 |
+
0.999
|
| 176 |
+
],
|
| 177 |
+
"eps": 1e-08,
|
| 178 |
+
"weight_decay": 1e-05,
|
| 179 |
+
"amsgrad": false,
|
| 180 |
+
"maximize": false,
|
| 181 |
+
"foreach": null,
|
| 182 |
+
"capturable": false,
|
| 183 |
+
"differentiable": false,
|
| 184 |
+
"fused": null,
|
| 185 |
+
"decoupled_weight_decay": true,
|
| 186 |
+
"initial_lr": 0.001,
|
| 187 |
+
"params": [
|
| 188 |
+
13,
|
| 189 |
+
14,
|
| 190 |
+
15,
|
| 191 |
+
16,
|
| 192 |
+
17
|
| 193 |
+
]
|
| 194 |
+
}
|
| 195 |
+
]
|
| 196 |
+
},
|
| 197 |
+
"scheduler_state_dict": {
|
| 198 |
+
"T_0": 10,
|
| 199 |
+
"T_i": 10,
|
| 200 |
+
"T_mult": 2,
|
| 201 |
+
"eta_min": 1e-06,
|
| 202 |
+
"T_cur": 7,
|
| 203 |
+
"base_lrs": [
|
| 204 |
+
0.001,
|
| 205 |
+
0.001,
|
| 206 |
+
0.001,
|
| 207 |
+
0.001
|
| 208 |
+
],
|
| 209 |
+
"last_epoch": 7,
|
| 210 |
+
"_step_count": 0,
|
| 211 |
+
"_is_initial": false,
|
| 212 |
+
"_get_lr_called_within_step": false,
|
| 213 |
+
"_last_lr": [
|
| 214 |
+
0.00020690126647990973,
|
| 215 |
+
0.00020690126647990973,
|
| 216 |
+
0.00020690126647990973,
|
| 217 |
+
0.00020690126647990973
|
| 218 |
+
]
|
| 219 |
+
},
|
| 220 |
+
"metrics": {
|
| 221 |
+
"best_val_acc": 76.452,
|
| 222 |
+
"best_epoch": 6,
|
| 223 |
+
"scale_accuracies": {
|
| 224 |
+
"256": 70.05,
|
| 225 |
+
"512": 73.994,
|
| 226 |
+
"768": 75.51,
|
| 227 |
+
"1024": 75.462
|
| 228 |
+
},
|
| 229 |
+
"training_history": {
|
| 230 |
+
"epochs": [
|
| 231 |
+
1,
|
| 232 |
+
2,
|
| 233 |
+
3,
|
| 234 |
+
4,
|
| 235 |
+
5,
|
| 236 |
+
6,
|
| 237 |
+
7
|
| 238 |
+
],
|
| 239 |
+
"train_loss": [
|
| 240 |
+
3.9118613697850284,
|
| 241 |
+
2.66607952194092,
|
| 242 |
+
2.3952484759278954,
|
| 243 |
+
2.201966982775222,
|
| 244 |
+
2.026744091663117,
|
| 245 |
+
1.8584000322575005,
|
| 246 |
+
1.6992347222357131
|
| 247 |
+
],
|
| 248 |
+
"train_acc": [
|
| 249 |
+
68.33870994179526,
|
| 250 |
+
76.85976925724749,
|
| 251 |
+
79.21098498478341,
|
| 252 |
+
81.16849715923061,
|
| 253 |
+
83.13014618703104,
|
| 254 |
+
85.12582668769957,
|
| 255 |
+
87.11042354353492
|
| 256 |
+
],
|
| 257 |
+
"val_acc": [
|
| 258 |
+
72.328,
|
| 259 |
+
74.248,
|
| 260 |
+
74.928,
|
| 261 |
+
75.464,
|
| 262 |
+
75.994,
|
| 263 |
+
76.29,
|
| 264 |
+
76.452
|
| 265 |
+
],
|
| 266 |
+
"scale_accs": {
|
| 267 |
+
"256": [
|
| 268 |
+
65.922,
|
| 269 |
+
67.866,
|
| 270 |
+
68.668,
|
| 271 |
+
69.028,
|
| 272 |
+
69.476,
|
| 273 |
+
69.894,
|
| 274 |
+
70.05
|
| 275 |
+
],
|
| 276 |
+
"512": [
|
| 277 |
+
70.014,
|
| 278 |
+
71.776,
|
| 279 |
+
72.65,
|
| 280 |
+
72.974,
|
| 281 |
+
73.372,
|
| 282 |
+
73.71,
|
| 283 |
+
73.994
|
| 284 |
+
],
|
| 285 |
+
"768": [
|
| 286 |
+
71.312,
|
| 287 |
+
73.326,
|
| 288 |
+
74.046,
|
| 289 |
+
74.52,
|
| 290 |
+
74.848,
|
| 291 |
+
75.304,
|
| 292 |
+
75.51
|
| 293 |
+
],
|
| 294 |
+
"1024": [
|
| 295 |
+
71.288,
|
| 296 |
+
73.572,
|
| 297 |
+
74.36,
|
| 298 |
+
74.86,
|
| 299 |
+
75.24,
|
| 300 |
+
75.4,
|
| 301 |
+
75.462
|
| 302 |
+
]
|
| 303 |
+
},
|
| 304 |
+
"lr": [
|
| 305 |
+
0.0009755527298894294,
|
| 306 |
+
0.0009046039886902864,
|
| 307 |
+
0.0007940987335200904,
|
| 308 |
+
0.0006548539886902864,
|
| 309 |
+
0.0005005000000000001,
|
| 310 |
+
0.0003461460113097139,
|
| 311 |
+
0.00020690126647990973
|
| 312 |
+
]
|
| 313 |
+
}
|
| 314 |
+
},
|
| 315 |
+
"train_config": {
|
| 316 |
+
"name": "david_training",
|
| 317 |
+
"run_id": "20251012_145649",
|
| 318 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 319 |
+
"model_variant": "clip_vit_laion_b32",
|
| 320 |
+
"num_classes": 1000,
|
| 321 |
+
"preset": "hierarchical_refinement",
|
| 322 |
+
"custom_config_path": null,
|
| 323 |
+
"num_classes_override": null,
|
| 324 |
+
"use_belly_override": null,
|
| 325 |
+
"belly_expand_override": null,
|
| 326 |
+
"progressive_training_override": false,
|
| 327 |
+
"scale_warmup_epochs_override": null,
|
| 328 |
+
"num_epochs": 10,
|
| 329 |
+
"batch_size": 1024,
|
| 330 |
+
"learning_rate": 0.001,
|
| 331 |
+
"weight_decay": 1e-05,
|
| 332 |
+
"warmup_epochs": 3,
|
| 333 |
+
"use_rose_loss": true,
|
| 334 |
+
"rose_initial_weight": 0.1,
|
| 335 |
+
"rose_max_weight": 0.5,
|
| 336 |
+
"rose_weight_schedule": "adaptive",
|
| 337 |
+
"use_cayley_loss": false,
|
| 338 |
+
"cayley_weight": 0.001,
|
| 339 |
+
"scale_loss_balance": null,
|
| 340 |
+
"use_mixed_precision": true,
|
| 341 |
+
"gradient_clip": 10.0,
|
| 342 |
+
"scheduler_type": "cosine_restarts",
|
| 343 |
+
"min_lr": 1e-06,
|
| 344 |
+
"freeze_strategy": "never",
|
| 345 |
+
"freeze_threshold": 90.0,
|
| 346 |
+
"unfreeze_on_plateau": true,
|
| 347 |
+
"patience": 10,
|
| 348 |
+
"track_gradients": true,
|
| 349 |
+
"gradient_scale_threshold": 1e-05,
|
| 350 |
+
"gradient_scale_multiplier": 10.0,
|
| 351 |
+
"log_interval": 50,
|
| 352 |
+
"val_interval": 1,
|
| 353 |
+
"save_interval": 5,
|
| 354 |
+
"log_fusion_weights": true,
|
| 355 |
+
"log_loss_components": true,
|
| 356 |
+
"save_format": "safetensors",
|
| 357 |
+
"hf_repo": "AbstractPhil/gated-david",
|
| 358 |
+
"upload_to_hub": true,
|
| 359 |
+
"base_dir": "./david_training",
|
| 360 |
+
"num_workers": 10,
|
| 361 |
+
"pin_memory": true,
|
| 362 |
+
"prefetch_factor": 4,
|
| 363 |
+
"persistent_workers": true
|
| 364 |
+
}
|
| 365 |
+
}
|