AbstractPhil commited on
Commit
f50e4dc
·
verified ·
1 Parent(s): 9f55bb4

Update best_model_acc76.45_metadata.json - Run 20251012_145649

Browse files
weights/David-hierarchical-progressive/20251012_145649/best_model_acc76.45_metadata.json ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(8764.)",
7
+ "exp_avg": "tensor([[-2.5930e-04, 7.4049e-04, 4.0309e-04, ..., -2.0171e-04,\n -1.9386e-04, -2.6270e-04],\n [ 1.4533e-04, -7.0071e-04, 4.6066e-04, ..., 8.6023e-05,\n 1.1630e-04, -6.6133e-05],\n [ 1.8618e-04, 6.1642e-04, -3.4604e-04, ..., -1.7930e-04,\n 3.6930e-05, 4.8601e-05],\n ...,\n [-4.2094e-05, 3.1226e-04, 3.5174e-04, ..., -2.4118e-04,\n 7.1443e-05, -9.1564e-05],\n [-2.3728e-04, -1.3047e-03, -6.1838e-04, ..., 8.1878e-04,\n 1.8464e-04, 6.7112e-05],\n [-1.6674e-06, 6.2940e-04, -1.1027e-04, ..., 2.1463e-04,\n -1.8162e-04, -5.7131e-06]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.0434e-06, 4.9703e-06, 2.2109e-06, ..., 1.3003e-06, 4.3543e-07,\n 1.6217e-06],\n [6.4484e-07, 6.5618e-06, 2.1528e-06, ..., 7.7323e-07, 3.1984e-07,\n 5.0449e-07],\n [8.7762e-07, 4.9084e-06, 3.5103e-06, ..., 6.7855e-07, 3.9311e-07,\n 7.7989e-07],\n ...,\n [8.8989e-07, 4.7306e-06, 3.1401e-06, ..., 7.2407e-07, 4.5088e-07,\n 7.3793e-07],\n [1.2095e-06, 1.0114e-05, 3.5884e-06, ..., 1.1245e-06, 6.5766e-07,\n 6.5272e-07],\n [9.9015e-07, 8.3827e-06, 2.9915e-06, ..., 6.2935e-07, 4.6426e-07,\n 5.4936e-07]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(8764.)",
12
+ "exp_avg": "tensor([-1.5193e-03, -4.4591e-03, 7.1053e-03, -6.5828e-03, -3.1795e-03,\n 4.6355e-03, 5.8450e-04, 7.0924e-05, -7.7313e-03, -4.4748e-03,\n -9.4507e-03, 1.1286e-02, -6.3030e-03, -5.7630e-03, 9.0949e-03,\n 1.2107e-03, -7.4865e-03, -1.4047e-04, 3.3507e-03, 2.2486e-03,\n 1.1722e-02, -4.2688e-03, -1.3190e-02, 5.4630e-03, 3.6816e-04,\n -5.4873e-03, 9.0681e-04, 5.2477e-03, 1.1481e-03, -4.2701e-04,\n -1.2856e-02, 3.7272e-03, 1.1509e-02, 6.5716e-03, 1.1090e-02,\n 4.7191e-03, -3.9578e-03, -1.3452e-03, -6.7048e-03, 7.2267e-03,\n 1.9737e-02, 3.7527e-03, -1.0963e-03, -2.9175e-03, 6.5644e-03,\n 1.3623e-02, 7.6836e-03, -8.2199e-03, 3.4627e-04, -1.6537e-03,\n 6.8059e-03, 1.0392e-03, -2.1649e-03, 3.4304e-03, -1.7083e-03,\n 2.1792e-03, 8.3987e-03, -6.1753e-03, -1.5668e-03, -1.4155e-02,\n -4.1352e-03, -1.0537e-02, 5.5218e-03, 1.2700e-04, 3.6228e-04,\n 3.2518e-03, 1.0522e-03, -1.3251e-02, 1.9515e-03, 9.2547e-03,\n -4.6613e-03, 1.3113e-02, 1.1692e-02, -1.1399e-02, 7.7365e-04,\n -1.0673e-02, 2.0035e-02, 2.4566e-02, 2.9099e-04, -1.2592e-02,\n 2.0582e-02, -6.1618e-03, -1.0858e-02, 1.3229e-03, -7.9781e-04,\n 1.5037e-02, -2.0953e-03, -1.9642e-03, 3.3614e-03, -6.0351e-03,\n -1.0299e-02, 1.3608e-02, 1.5063e-02, -1.2260e-03, 5.0113e-03,\n -7.5556e-03, -8.0941e-03, 8.1610e-03, -1.2587e-02, -1.4015e-02,\n 2.5340e-02, 6.8679e-03, -3.8330e-03, -8.0946e-03, -4.8008e-03,\n 3.1473e-03, -1.2550e-02, 1.9463e-02, 2.2923e-03, 3.8075e-04,\n 1.0306e-02, 1.1304e-03, -2.3728e-03, -3.0506e-03, 5.1638e-04,\n -5.7666e-03, 2.0158e-04, 8.7081e-03, -3.6905e-03, -6.2057e-03,\n -1.4517e-03, 5.2392e-03, -2.3146e-02, 2.6284e-03, 6.9156e-03,\n -2.9771e-03, 1.7933e-02, 2.2547e-03, -8.2733e-03, 5.2844e-03,\n -7.7778e-03, -1.4290e-02, 7.0793e-03, 2.4083e-03, 1.6808e-02,\n 7.6884e-03, 1.0729e-02, -1.2158e-02, -2.6400e-03, -3.9794e-03,\n -3.2571e-03, -1.2840e-02, -2.2967e-02, -9.6626e-03, -1.2349e-02,\n -2.5929e-03, -4.4662e-03, 3.5320e-03, 1.5417e-02, 8.8127e-03,\n 7.6788e-04, -8.5919e-03, 2.1329e-03, 1.4201e-03, -5.8470e-04,\n 1.2027e-02, -1.7659e-02, 1.0692e-02, -5.7707e-03, 1.2914e-03,\n -4.5082e-04, 2.4606e-02, -6.3079e-03, -2.2301e-02, 4.5191e-03,\n 1.4735e-03, 7.5789e-03, 1.1614e-03, 1.3107e-02, -7.9116e-03,\n -1.4107e-04, 3.3528e-03, 5.6117e-03, 5.5719e-04, -1.7506e-03,\n 3.8071e-03, -3.3199e-03, 1.2007e-02, -3.3376e-03, 6.7455e-03,\n -5.7910e-03, -6.0498e-03, -7.1684e-03, -5.0866e-03, 4.8411e-03,\n -6.4850e-03, 1.0685e-02, -1.1505e-02, 1.3439e-03, 1.0298e-02,\n 5.0233e-03, -7.5074e-03, -1.8875e-03, -4.0996e-03, -5.0834e-03,\n 6.9429e-03, 5.6452e-03, 3.3496e-03, 2.8135e-02, -3.3520e-03,\n 3.6825e-04, 7.3070e-03, 8.3576e-03, -4.1319e-03, 1.8519e-03,\n 5.8204e-03, 3.9812e-03, 4.4824e-03, 2.6306e-03, 1.0259e-02,\n -4.0132e-03, 2.3676e-03, -2.3741e-03, -4.1143e-06, -1.1473e-02,\n -8.6575e-03, -1.3567e-03, -1.0646e-03, 4.0878e-03, 1.1614e-02,\n 3.4938e-03, 1.1175e-02, 4.2708e-03, 5.5917e-03, -6.7394e-03,\n 1.3507e-02, 2.5423e-02, 2.9743e-02, -8.9392e-03, 8.2192e-03,\n 8.3246e-03, 2.6304e-03, -2.9286e-03, -8.7903e-03, -6.9766e-03,\n 2.6887e-03, -8.3364e-03, 1.8711e-02, -9.8856e-03, 6.4722e-05,\n -1.1160e-02, -3.2238e-03, -2.0984e-02, 1.0948e-02, -1.1262e-02,\n 1.9323e-03, 1.5227e-03, 4.7681e-03, 1.2862e-02, -4.4097e-03,\n 5.9266e-03, -2.1287e-02, 5.8614e-03, -8.7561e-03, -1.2339e-02,\n -1.1507e-02, 1.1912e-03, -6.0299e-03, -3.2144e-02, 1.9664e-02,\n -1.7309e-04, 2.4762e-03, -4.1717e-03, -8.1374e-03, 1.7508e-02,\n -2.1520e-03, -3.8674e-03, -3.5054e-02, -9.3763e-03, 2.3947e-02,\n 1.2928e-03, -5.9221e-03, -3.0207e-03, -4.2103e-03, 3.5822e-03,\n 2.6462e-04, -2.3931e-03, -3.6430e-04, -4.9404e-03, 7.7238e-03,\n 3.7100e-03, 7.7939e-03, 1.4082e-04, 7.0826e-03, 3.6410e-02,\n -1.2234e-02, -9.3075e-03, 8.0630e-03, -1.2915e-04, 6.3341e-03,\n -1.6490e-02, -1.4655e-02, -4.1107e-04, 2.9939e-03, -2.6546e-03,\n 3.7630e-03, -1.2850e-02, -1.6641e-03, 4.4756e-03, -4.5296e-04,\n -5.0033e-03, -5.9977e-03, 7.0746e-03, -1.5542e-02, 8.7259e-03,\n 1.2717e-02, -3.1350e-02, -3.4018e-04, 1.0977e-02, 1.2337e-02,\n 2.4757e-02, -4.1437e-03, 5.9285e-03, -8.9077e-03, -7.5642e-03,\n 1.1634e-02, -3.4839e-03, 1.7525e-03, 1.2623e-02, -1.2888e-02,\n 5.9608e-03, -1.8689e-03, 2.4732e-02, -1.1188e-02, 1.6276e-02,\n 8.2121e-03, -5.5520e-03, 1.2886e-02, 2.1881e-02, 7.8955e-03,\n -1.6707e-02, 2.1577e-03, -6.1353e-03, 1.5193e-02, 7.7684e-03,\n 4.3973e-03, -1.8253e-03, -1.3366e-02, -6.4051e-03, -3.1358e-02,\n -8.7865e-03, 1.9130e-02, 3.0335e-03, -1.1211e-02, 1.0383e-02,\n -3.6268e-03, -7.0729e-03, 2.1776e-03, 3.8148e-03, -3.6153e-03,\n -1.6197e-02, -1.2438e-03, -3.2768e-03, -6.2864e-03, 7.1431e-03,\n 7.6637e-03, -1.1132e-03, -2.7226e-03, 1.2004e-02, -6.1475e-03,\n -1.3678e-02, 1.5430e-02, 1.8887e-03, 5.2666e-03, -1.2596e-02,\n 1.3986e-02, 5.1649e-03, 9.9844e-03, -1.1590e-02, -7.0522e-03,\n -3.0687e-02, 1.4892e-02, 6.9179e-03, 1.6579e-03, -4.0342e-03,\n -9.5131e-03, 2.2730e-02, 8.2733e-03, -1.5140e-02, 2.3753e-04,\n 8.0571e-04, 6.3958e-04, -6.3597e-03, 3.1334e-03, 3.2248e-03,\n -4.9136e-03, -8.2982e-04, 1.9776e-02, -2.7588e-03, 4.4332e-03,\n 6.6687e-03, 1.5351e-02, 3.0260e-02, -8.2091e-04, -1.5270e-02,\n -1.7301e-02, 4.5926e-03, 2.3197e-03, 1.5636e-02, 7.4228e-03,\n -1.1380e-02, -7.6872e-03, -2.3008e-04, -1.2367e-04, 1.0724e-03,\n -4.6085e-03, 4.0928e-04, -1.1420e-02, 1.7877e-04, 7.4910e-03,\n 1.6031e-03, 5.3519e-03, -5.1482e-03, 1.1239e-02, -1.1771e-02,\n -6.8431e-03, -4.3363e-05, -1.0336e-02, 7.4061e-03, -5.4389e-03,\n 2.2793e-03, -3.5463e-03, -3.6992e-03, 1.3517e-02, -1.3512e-02,\n -2.5810e-03, 4.4267e-03, -3.9901e-02, -2.4318e-03, -1.7773e-02,\n 4.5388e-03, 5.2524e-03, -1.4869e-02, 1.2160e-02, -5.8866e-03,\n 9.0534e-03, -4.6406e-03, -1.9224e-03, 1.8167e-02, 6.9995e-03,\n 3.1519e-03, -1.2705e-02, 7.1267e-04, -9.9666e-03, 4.0190e-03,\n -4.2715e-03, 1.1146e-02, 7.0163e-04, 1.0579e-02, 8.2561e-04,\n -7.2275e-03, 7.0864e-05, 5.9294e-03, -1.0689e-03, 1.0834e-02,\n 2.8868e-03, 5.4802e-04, -2.7470e-03, -2.6632e-03, -5.0826e-03,\n -2.0014e-02, 3.4024e-04, 6.4873e-03, -1.3074e-05, -3.0416e-02,\n -4.4887e-03, -6.9707e-03, 3.1749e-03, -3.4078e-03, -1.2904e-02,\n 7.8316e-03, 1.5418e-03, 4.8794e-03, 8.8956e-03, 1.6863e-02,\n 3.2222e-04, 7.7671e-03, 1.3156e-02, 4.0770e-03, -2.3279e-03,\n -6.4535e-03, 2.7175e-03, 8.3535e-04, 4.0872e-04, 4.2897e-03,\n 6.9215e-03, -1.0622e-02, -3.4937e-03, -5.3507e-03, -1.7231e-03,\n -5.1878e-03, -1.3843e-02, -5.4689e-03, 3.1539e-03, 3.4423e-03,\n -1.0206e-02, -5.5291e-03, 1.4681e-03, -1.3991e-02, -4.4572e-03,\n -5.2325e-03, 2.4938e-03, 4.4260e-03, 1.6987e-03, -1.4847e-02,\n -6.5368e-03, 3.3508e-03, -6.3646e-03, -4.7714e-03, -1.0965e-03,\n -1.1597e-02, -1.4543e-03], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([0.0009, 0.0007, 0.0007, 0.0010, 0.0007, 0.0014, 0.0007, 0.0008, 0.0009,\n 0.0012, 0.0009, 0.0009, 0.0009, 0.0008, 0.0007, 0.0008, 0.0008, 0.0011,\n 0.0009, 0.0009, 0.0007, 0.0006, 0.0009, 0.0008, 0.0006, 0.0006, 0.0011,\n 0.0008, 0.0008, 0.0008, 0.0008, 0.0009, 0.0006, 0.0007, 0.0009, 0.0009,\n 0.0006, 0.0007, 0.0011, 0.0009, 0.0007, 0.0008, 0.0007, 0.0009, 0.0007,\n 0.0008, 0.0009, 0.0010, 0.0008, 0.0007, 0.0008, 0.0007, 0.0007, 0.0007,\n 0.0010, 0.0011, 0.0009, 0.0008, 0.0006, 0.0008, 0.0007, 0.0008, 0.0008,\n 0.0008, 0.0008, 0.0008, 0.0008, 0.0007, 0.0010, 0.0009, 0.0007, 0.0007,\n 0.0006, 0.0006, 0.0008, 0.0008, 0.0009, 0.0009, 0.0007, 0.0007, 0.0009,\n 0.0008, 0.0007, 0.0007, 0.0009, 0.0011, 0.0006, 0.0007, 0.0010, 0.0007,\n 0.0008, 0.0007, 0.0008, 0.0008, 0.0009, 0.0007, 0.0006, 0.0009, 0.0009,\n 0.0008, 0.0011, 0.0008, 0.0012, 0.0011, 0.0005, 0.0007, 0.0010, 0.0009,\n 0.0013, 0.0008, 0.0007, 0.0009, 0.0011, 0.0006, 0.0011, 0.0009, 0.0009,\n 0.0004, 0.0008, 0.0009, 0.0009, 0.0010, 0.0011, 0.0007, 0.0009, 0.0007,\n 0.0010, 0.0006, 0.0007, 0.0007, 0.0008, 0.0008, 0.0009, 0.0007, 0.0009,\n 0.0011, 0.0007, 0.0009, 0.0009, 0.0008, 0.0007, 0.0008, 0.0007, 0.0010,\n 0.0007, 0.0005, 0.0011, 0.0007, 0.0010, 0.0008, 0.0007, 0.0007, 0.0010,\n 0.0008, 0.0006, 0.0008, 0.0007, 0.0010, 0.0009, 0.0009, 0.0008, 0.0009,\n 0.0008, 0.0011, 0.0008, 0.0007, 0.0007, 0.0009, 0.0010, 0.0009, 0.0009,\n 0.0009, 0.0010, 0.0007, 0.0011, 0.0008, 0.0009, 0.0008, 0.0007, 0.0007,\n 0.0006, 0.0007, 0.0008, 0.0006, 0.0008, 0.0011, 0.0007, 0.0009, 0.0008,\n 0.0006, 0.0007, 0.0010, 0.0010, 0.0008, 0.0009, 0.0008, 0.0009, 0.0007,\n 0.0008, 0.0006, 0.0010, 0.0010, 0.0007, 0.0006, 0.0007, 0.0008, 0.0006,\n 0.0006, 0.0009, 0.0008, 0.0007, 0.0008, 0.0005, 0.0006, 0.0007, 0.0007,\n 0.0006, 0.0007, 0.0007, 0.0009, 0.0009, 0.0009, 0.0011, 0.0007, 0.0009,\n 0.0007, 0.0008, 0.0010, 0.0011, 0.0008, 0.0006, 0.0007, 0.0009, 0.0006,\n 0.0010, 0.0009, 0.0014, 0.0009, 0.0008, 0.0009, 0.0009, 0.0008, 0.0008,\n 0.0006, 0.0009, 0.0008, 0.0009, 0.0009, 0.0008, 0.0008, 0.0007, 0.0007,\n 0.0010, 0.0007, 0.0010, 0.0009, 0.0006, 0.0005, 0.0010, 0.0010, 0.0006,\n 0.0008, 0.0008, 0.0006, 0.0006, 0.0007, 0.0006, 0.0012, 0.0010, 0.0011,\n 0.0007, 0.0006, 0.0009, 0.0007, 0.0009, 0.0007, 0.0012, 0.0005, 0.0006,\n 0.0005, 0.0008, 0.0008, 0.0010, 0.0007, 0.0009, 0.0009, 0.0009, 0.0008,\n 0.0010, 0.0010, 0.0008, 0.0008, 0.0008, 0.0007, 0.0007, 0.0009, 0.0005,\n 0.0009, 0.0007, 0.0008, 0.0012, 0.0007, 0.0009, 0.0006, 0.0010, 0.0008,\n 0.0010, 0.0008, 0.0003, 0.0009, 0.0009, 0.0008, 0.0007, 0.0009, 0.0007,\n 0.0007, 0.0009, 0.0006, 0.0007, 0.0010, 0.0008, 0.0007, 0.0010, 0.0005,\n 0.0010, 0.0008, 0.0008, 0.0007, 0.0008, 0.0007, 0.0008, 0.0009, 0.0010,\n 0.0009, 0.0009, 0.0010, 0.0012, 0.0010, 0.0009, 0.0011, 0.0009, 0.0008,\n 0.0010, 0.0006, 0.0008, 0.0008, 0.0009, 0.0008, 0.0007, 0.0013, 0.0010,\n 0.0007, 0.0009, 0.0007, 0.0006, 0.0011, 0.0006, 0.0007, 0.0008, 0.0006,\n 0.0008, 0.0009, 0.0007, 0.0008, 0.0008, 0.0011, 0.0012, 0.0006, 0.0006,\n 0.0007, 0.0011, 0.0008, 0.0011, 0.0007, 0.0007, 0.0006, 0.0010, 0.0007,\n 0.0010, 0.0010, 0.0009, 0.0006, 0.0008, 0.0009, 0.0009, 0.0008, 0.0006,\n 0.0011, 0.0007, 0.0007, 0.0009, 0.0008, 0.0007, 0.0010, 0.0009, 0.0010,\n 0.0008, 0.0006, 0.0011, 0.0010, 0.0010, 0.0008, 0.0011, 0.0006, 0.0009,\n 0.0006, 0.0009, 0.0008, 0.0007, 0.0006, 0.0006, 0.0007, 0.0007, 0.0012,\n 0.0008, 0.0009, 0.0010, 0.0008, 0.0006, 0.0010, 0.0006, 0.0008, 0.0009,\n 0.0009, 0.0006, 0.0007, 0.0008, 0.0010, 0.0008, 0.0009, 0.0008, 0.0006,\n 0.0010, 0.0005, 0.0005, 0.0009, 0.0008, 0.0008, 0.0008, 0.0006, 0.0012,\n 0.0012, 0.0012, 0.0007, 0.0007, 0.0008, 0.0008, 0.0006, 0.0011, 0.0005,\n 0.0008, 0.0009, 0.0006, 0.0007, 0.0009, 0.0007, 0.0007, 0.0008, 0.0007,\n 0.0007, 0.0006, 0.0006, 0.0009, 0.0007, 0.0011, 0.0007, 0.0006, 0.0009,\n 0.0008, 0.0010, 0.0009, 0.0008, 0.0010, 0.0009, 0.0007, 0.0005, 0.0006,\n 0.0010, 0.0004, 0.0007, 0.0006, 0.0008, 0.0009, 0.0009, 0.0010, 0.0010,\n 0.0007, 0.0007, 0.0007, 0.0008, 0.0008, 0.0009, 0.0008, 0.0006, 0.0009,\n 0.0009, 0.0009, 0.0006, 0.0007, 0.0010, 0.0010, 0.0009, 0.0008, 0.0008,\n 0.0009, 0.0007, 0.0010, 0.0009, 0.0007, 0.0007, 0.0010, 0.0007],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(8764.)",
17
+ "exp_avg": "tensor([[ 1.1613e-04, -2.4537e-04, 6.5847e-05, ..., -5.1372e-05,\n 8.5061e-04, 1.5832e-04],\n [ 2.8584e-04, 2.2454e-06, -6.1328e-05, ..., -1.8359e-04,\n 1.7699e-04, -2.6203e-05],\n [-2.3968e-04, 3.0246e-04, 1.9443e-05, ..., -2.8390e-05,\n -4.6356e-04, -2.4774e-04],\n ...,\n [ 1.8845e-04, 3.1283e-04, -1.3213e-04, ..., -2.1226e-04,\n -7.1429e-04, 2.4199e-04],\n [-3.2353e-04, -4.0743e-05, 6.4612e-05, ..., -4.5458e-05,\n -5.2141e-04, 4.7491e-05],\n [-2.4117e-04, -2.0880e-04, -9.6746e-05, ..., -1.0472e-04,\n -1.1187e-04, -2.1694e-05]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[3.8919e-07, 5.2336e-07, 2.3781e-07, ..., 5.8101e-07, 1.3414e-06,\n 7.5221e-07],\n [3.2471e-07, 5.0528e-07, 3.7763e-07, ..., 6.0895e-07, 1.2327e-06,\n 5.4268e-07],\n [3.9090e-07, 4.5282e-07, 3.5805e-07, ..., 5.3190e-07, 1.2924e-06,\n 5.9403e-07],\n ...,\n [3.6554e-07, 4.6305e-07, 2.7245e-07, ..., 5.6196e-07, 1.3753e-06,\n 5.1980e-07],\n [5.6290e-07, 5.2327e-07, 3.1121e-07, ..., 7.2062e-07, 1.3256e-06,\n 9.3174e-07],\n [4.5768e-07, 5.8335e-07, 3.1705e-07, ..., 5.9369e-07, 1.4820e-06,\n 5.3072e-07]], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(8764.)",
22
+ "exp_avg": "tensor([[ 2.1243e-04, -1.9024e-04, 6.7456e-04, ..., 3.7971e-04,\n -8.4958e-06, 4.3485e-04],\n [-1.8222e-05, -3.0634e-04, -2.0430e-04, ..., 9.7293e-05,\n -1.5021e-04, 2.2676e-04],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [-1.5431e-05, 1.6406e-04, 6.4511e-05, ..., -2.4012e-05,\n 7.7112e-06, 1.9414e-04],\n [-6.9010e-05, -1.3142e-04, 1.4816e-04, ..., -8.6680e-05,\n -1.1996e-04, -5.7838e-05],\n [-2.8656e-05, 2.7663e-05, 6.0927e-05, ..., -1.4824e-05,\n -8.2945e-05, -3.6555e-05]], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([[6.7763e-07, 1.9219e-07, 6.4740e-07, ..., 2.7960e-06, 2.8169e-07,\n 1.5628e-06],\n [4.0925e-07, 2.8420e-07, 5.4086e-07, ..., 2.4034e-07, 5.9353e-07,\n 1.5713e-07],\n [1.3379e-13, 3.1572e-13, 5.9184e-14, ..., 3.6936e-14, 3.8954e-13,\n 5.4175e-15],\n ...,\n [4.2371e-07, 1.8482e-07, 9.9044e-08, ..., 1.8461e-07, 5.2166e-08,\n 4.3371e-07],\n [3.7823e-07, 2.2407e-07, 2.1899e-07, ..., 2.6054e-07, 1.5732e-06,\n 1.2749e-07],\n [4.9982e-07, 1.1280e-07, 1.2024e-06, ..., 1.4525e-07, 2.2616e-07,\n 3.9323e-07]], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(8764.)",
27
+ "exp_avg": "tensor([ 6.4035e-03, -3.5768e-04, 5.6052e-45, ..., 5.5611e-03,\n -2.4265e-03, 2.4726e-03], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([3.2276e-04, 2.2413e-04, 8.7553e-10, ..., 2.1658e-04, 1.8092e-04,\n 2.3484e-04], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(8764.)",
32
+ "exp_avg": "tensor([[-4.0797e-05, -1.3492e-04, 5.6052e-45, ..., -1.2031e-04,\n 9.9185e-06, 6.8955e-05],\n [ 2.0200e-05, -1.1098e-04, 5.6052e-45, ..., 1.6978e-05,\n 2.4169e-05, 7.4597e-05],\n [ 7.9566e-05, -2.9168e-04, -5.6052e-45, ..., 7.9041e-05,\n 4.4458e-05, 2.6446e-05],\n ...,\n [-6.8248e-05, -3.9297e-04, -5.6052e-45, ..., 1.2512e-04,\n 6.7625e-05, -3.2781e-05],\n [-6.9210e-07, -1.8447e-04, 5.6052e-45, ..., -5.6553e-05,\n 2.7026e-05, -8.2407e-05],\n [ 3.6743e-05, 8.6505e-05, -5.6052e-45, ..., -1.2276e-04,\n 4.3893e-05, -1.9657e-05]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[7.4205e-08, 6.8368e-08, 7.6466e-14, ..., 5.8813e-08, 6.9377e-08,\n 9.9229e-08],\n [1.0207e-07, 9.0560e-08, 1.4183e-13, ..., 5.7872e-08, 8.2100e-08,\n 1.0272e-07],\n [8.7513e-08, 9.3190e-08, 2.4697e-13, ..., 6.9361e-08, 7.8890e-08,\n 1.1725e-07],\n ...,\n [1.1799e-07, 9.9062e-08, 1.0931e-13, ..., 6.5212e-08, 7.3212e-08,\n 1.0301e-07],\n [1.0439e-07, 1.0298e-07, 1.4754e-13, ..., 8.0210e-08, 5.1861e-08,\n 1.3201e-07],\n [8.7664e-08, 8.1901e-08, 3.1117e-13, ..., 7.7709e-08, 6.0490e-08,\n 1.4304e-07]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(8764.)",
37
+ "exp_avg": "tensor([[-4.9396e-06, 1.2893e-04, 5.6158e-04, ..., 9.2359e-05,\n 2.5381e-04, 7.7004e-05],\n [-7.2475e-05, -2.0379e-05, 4.2341e-04, ..., -1.6983e-04,\n -3.6217e-04, -1.3232e-04],\n [ 6.9453e-04, 4.5013e-04, 8.7206e-06, ..., 5.0492e-04,\n 2.7372e-04, 2.0829e-04],\n ...,\n [ 4.1332e-05, 6.9902e-04, 1.3068e-04, ..., 1.0929e-04,\n 8.8836e-05, -1.7106e-04],\n [ 3.4280e-04, 5.0931e-04, 7.9912e-04, ..., 2.3957e-04,\n 1.1239e-05, 5.7839e-04],\n [-3.7124e-04, 3.9552e-04, -2.9053e-04, ..., 5.4298e-04,\n 5.4961e-04, -8.9139e-05]], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([[5.0080e-07, 1.0236e-06, 1.0071e-06, ..., 2.7401e-07, 1.9198e-07,\n 3.1141e-07],\n [4.1562e-07, 6.9354e-07, 1.0579e-06, ..., 2.0561e-07, 1.3698e-07,\n 2.7794e-07],\n [6.2417e-07, 9.2182e-07, 1.0921e-06, ..., 2.7651e-07, 2.0695e-07,\n 2.6418e-07],\n ...,\n [6.5080e-07, 1.1636e-06, 1.3363e-06, ..., 3.5346e-07, 2.4087e-07,\n 3.2455e-07],\n [5.6985e-07, 9.5288e-07, 8.9030e-07, ..., 2.4000e-07, 1.6020e-07,\n 2.2636e-07],\n [5.6878e-07, 1.2874e-06, 1.3161e-06, ..., 3.3131e-07, 2.5914e-07,\n 2.8825e-07]], device='cuda:0')"
39
+ },
40
+ "7": {
41
+ "step": "tensor(8764.)",
42
+ "exp_avg": "tensor([-3.6169e-03, -4.8657e-03, -6.8010e-03, 8.9287e-03, -2.9836e-04,\n -7.7100e-03, 3.2346e-03, 4.8105e-03, 3.1107e-03, 4.7610e-03,\n 2.8612e-03, -1.9238e-03, -4.3002e-03, -3.4577e-03, -1.2674e-03,\n -2.3410e-04, -1.0363e-03, -7.8978e-03, -5.5637e-03, 2.8738e-03,\n 1.4627e-05, 2.0726e-03, 6.2676e-03, -8.9720e-03, -6.7240e-03,\n -4.5903e-03, -1.6831e-02, 3.2987e-03, -6.0711e-03, -6.3619e-04,\n 2.8244e-03, 9.3018e-03, -4.0292e-03, 1.2995e-03, 1.3610e-02,\n -6.8335e-03, 1.0007e-02, 4.8697e-03, -5.2443e-03, 4.8201e-03,\n -1.1612e-03, 4.8178e-03, 5.1909e-04, -5.1202e-04, -1.0861e-02,\n -2.3574e-03, 4.2757e-03, 6.9999e-04, 7.7903e-03, 7.9367e-03,\n -6.8035e-03, 1.2079e-03, 1.3934e-02, -2.0513e-03, -1.3848e-02,\n -1.4266e-03, -4.0156e-03, 1.6037e-04, 4.3782e-03, -1.4474e-02,\n 3.2594e-03, 4.5863e-03, -6.0424e-03, -9.2385e-03, -1.3074e-03,\n -3.0470e-03, 3.3141e-03, -4.2466e-03, -5.6255e-03, 2.3786e-03,\n -2.6322e-03, -3.1634e-03, -1.9006e-03, -5.3196e-04, 1.3974e-03,\n -2.7020e-03, 1.8637e-03, 1.4088e-02, 7.1868e-03, -3.0983e-03,\n 2.1949e-03, -2.3239e-03, -9.8445e-03, 4.3183e-03, -7.0603e-04,\n -1.0057e-02, -7.7664e-03, -4.2562e-03, -6.4112e-03, -3.5292e-03,\n 7.9422e-03, -6.6620e-04, 1.8875e-03, -7.5260e-04, 3.3116e-03,\n 6.6485e-03, -4.4150e-03, -1.5126e-03, 1.1456e-03, 2.9424e-03,\n -2.5046e-04, -1.3273e-03, -3.5620e-03, -5.4947e-03, 4.9064e-04,\n -2.3563e-04, 4.3141e-03, 6.2634e-04, -4.4112e-03, 1.4625e-03,\n -3.1309e-03, -5.1615e-04, 6.4858e-04, -2.9651e-03, 1.5983e-02,\n -7.8143e-03, -2.0761e-04, -1.4799e-03, 8.3736e-03, -3.7615e-03,\n 4.9296e-03, -7.4464e-04, 7.3958e-04, -3.3522e-03, 4.8029e-04,\n 2.9141e-02, 1.8755e-03, -1.6147e-02, -2.6691e-03, -3.0548e-03,\n -2.6197e-03, -4.4040e-03, 1.3227e-03, -2.0173e-03, 9.6158e-03,\n 8.8358e-03, 5.4502e-03, -3.4575e-04, 1.0392e-02, -2.1125e-03,\n 4.3944e-04, 1.2271e-03, 5.4829e-04, -1.3773e-03, -2.1104e-03,\n -1.4124e-03, 2.2020e-03, -1.5087e-02, 1.7091e-03, 1.4836e-03,\n -3.3952e-03, -7.2040e-04, 5.3200e-03, 4.4588e-03, 7.5495e-03,\n -2.3965e-03, -6.4682e-03, 3.9093e-03, -2.8337e-03, -2.9891e-03,\n 8.0557e-03, -3.8628e-03, -6.9979e-03, -7.1474e-03, 5.3005e-03,\n -4.0964e-03, -4.2510e-03, -2.2188e-03, 1.6647e-03, -1.3966e-03,\n -5.4685e-03, 7.6545e-04, -6.2205e-03, -4.3114e-03, 1.2977e-03,\n 6.2161e-04, -7.7067e-03, -7.1533e-03, 4.1679e-03, 3.6805e-03,\n 1.6836e-03, -6.7063e-03, -7.2884e-03, -1.2155e-03, 3.7579e-03,\n 8.9800e-04, -6.3052e-03, 3.3678e-03, 9.9090e-03, -1.2498e-03,\n -6.7303e-03, 1.7907e-03, 2.2952e-04, -1.9774e-03, -3.5696e-03,\n 1.0398e-03, 9.5392e-04, -3.6085e-03, -1.9518e-03, -3.4406e-03,\n -9.5471e-03, 3.4710e-04, 7.3925e-03, -2.9166e-03, 3.2982e-03,\n -1.0443e-02, -4.2566e-03, -8.0340e-03, 5.7330e-03, -2.8856e-03,\n -4.0690e-04, 7.0474e-03, 2.0080e-03, -3.9298e-04, 8.2539e-03,\n 1.0926e-03, -7.6345e-03, -2.7583e-03, -8.8053e-04, -2.4564e-03,\n 9.3885e-04, 1.7426e-03, 6.6971e-03, 8.0757e-03, -7.8295e-03,\n -9.3001e-03, -7.0172e-04, 5.4364e-03, -4.3131e-03, -1.3769e-04,\n 3.0989e-03, -9.4253e-04, 3.3122e-03, 1.3599e-03, 6.0608e-03,\n 1.1484e-03, 1.3839e-03, -5.0162e-03, -5.8814e-03, -6.4699e-03,\n 8.8205e-03, -2.6768e-03, 2.9391e-03, -5.9224e-03, 7.8916e-04,\n 4.0796e-03, -5.5564e-03, -1.2838e-02, 9.7147e-03, -6.2789e-03,\n -5.2357e-03, 7.1774e-03, -3.2168e-03, -3.3477e-04, -5.5242e-05,\n 1.3688e-02, 1.8059e-03, -8.8574e-03, 1.0890e-03, -1.7337e-04,\n -9.0451e-04, 1.0206e-03, -5.1610e-03, -2.7072e-03, 3.1546e-03,\n 4.8214e-04, -1.9352e-03, -2.1704e-03, -5.4355e-03, -1.2858e-02,\n 1.3667e-02, 7.7451e-03, 4.4789e-03, -1.2310e-04, -9.3319e-03,\n 8.3192e-03, 7.2942e-04, -8.6750e-04, -2.9670e-03, 5.7800e-03,\n -1.3905e-03, 1.1472e-02, -9.4803e-03, 7.6153e-03, -5.1308e-04,\n -3.3322e-03, -1.1812e-03, 2.6847e-03, -1.8388e-03, -2.6632e-03,\n -9.4981e-04, -7.5891e-03, 1.2502e-03, -1.1403e-02, -4.5009e-03,\n -8.3840e-03, 1.3528e-03, -1.6472e-03, 1.2848e-03, 1.5910e-03,\n -3.2532e-03, 3.2937e-03, 2.3802e-03, -3.6302e-03, -2.8316e-04,\n -3.3149e-03, -1.6660e-03, 7.2093e-03, 3.7370e-03, -1.3929e-03,\n 7.6871e-03, -2.0655e-03, 7.3211e-04, -1.2141e-03, -6.1319e-03,\n -7.3432e-03, -1.2227e-03, 8.7875e-03, -3.5843e-03, -7.6901e-03,\n 4.1222e-03, 9.9243e-03, -4.2520e-04, -8.4195e-03, 7.9408e-04,\n -7.0948e-03, 1.4602e-03, 1.9703e-03, -4.9346e-03, -2.4549e-03,\n -4.4740e-03, -3.6902e-03, 8.0236e-03, -2.6790e-03, -9.2212e-03,\n 2.2875e-03, -1.9877e-03, -8.5885e-04, -1.9482e-03, -6.0354e-03,\n 7.2300e-03, 5.6336e-04, -1.1082e-02, 6.8084e-03, -2.4010e-03,\n -8.4700e-04, -4.8792e-03, -1.0681e-02, 1.4240e-03, 5.5511e-03,\n -3.7188e-04, -1.9121e-02, 4.6628e-03, -1.0541e-03, 1.4397e-02,\n 8.1240e-03, -2.9288e-03, 6.5062e-05, -2.2849e-03, 9.1071e-03,\n 3.3704e-03, 6.2551e-03, -1.6555e-03, -8.0321e-03, 1.5342e-03,\n -3.3447e-04, -9.6187e-04, -3.3411e-04, -9.4144e-03, 8.5934e-03,\n 4.6768e-03, 5.8247e-03, 9.9069e-04, -3.1541e-03, 3.3872e-03,\n 6.1268e-03, -4.6873e-04, 1.0193e-02, 6.2689e-03, -2.1511e-03,\n -2.4363e-03, 2.8463e-03, -4.9339e-03, 3.0618e-03, -4.0503e-03,\n -9.5927e-04, -4.9835e-03, 1.0441e-03, -9.2915e-03, -8.5660e-03,\n -3.4690e-03, -3.0111e-03, -4.2556e-03, -1.2516e-03, -9.2222e-04,\n 1.7134e-02, -1.4924e-03, -6.5020e-03, -1.5797e-03, -4.0968e-03,\n -1.0066e-02, 3.8404e-03, -6.7702e-03, 1.5265e-03, 3.1642e-03,\n -3.2324e-03, -1.0213e-02, -1.4466e-04, 2.5972e-04, -1.4072e-03,\n -6.1759e-03, -2.0851e-03, -1.1461e-02, -2.7618e-03, 3.2063e-03,\n -5.4406e-03, 4.3896e-03, -1.3960e-02, -3.7649e-03, 2.0915e-03,\n -6.6898e-03, -6.1371e-03, -2.3217e-03, -3.8191e-03, 6.3428e-03,\n -2.8140e-03, -5.1035e-03, 1.3257e-03, -5.0419e-04, -6.9766e-04,\n -4.2144e-03, -6.1276e-03, 2.8205e-03, 8.6099e-03, -1.3538e-03,\n 1.0372e-02, 5.1606e-03, 2.1373e-03, 4.6463e-03, -2.7763e-03,\n 3.8651e-03, 8.5966e-03, 5.1834e-03, 7.8660e-04, -8.1417e-04,\n -1.6991e-03, 4.1859e-03, 2.7748e-03, -1.5962e-03, -3.1145e-03,\n -4.7534e-04, -5.1666e-03, -5.7462e-03, -4.1331e-03, 2.0315e-03,\n 3.2323e-03, -6.4772e-03, -7.7644e-03, -3.0035e-03, 2.5840e-03,\n 1.6970e-03, 2.5019e-05, 5.3808e-03, -8.3671e-03, 1.2406e-02,\n -6.4069e-04, 9.3505e-04, 8.3545e-04, 5.8971e-03, 6.8266e-03,\n -4.1802e-03, -6.4611e-03, 1.5790e-02, 3.6278e-03, 4.6195e-03,\n -8.3487e-03, 8.8064e-04, 5.3509e-03, 1.0559e-03, 6.7750e-03,\n -8.0695e-03, 4.1291e-03, 1.1197e-03, -6.3619e-03, -3.4696e-03,\n 9.0914e-03, -1.5177e-02, -1.1240e-02, -1.7655e-02, 3.7865e-03,\n -5.2211e-03, 2.5515e-03, -1.4598e-03, -7.2285e-03, -2.5499e-03,\n -8.9046e-03, 3.0017e-03, 2.4146e-03, 2.7308e-03, 3.7130e-03,\n -4.8965e-03, 2.2864e-03, 1.4274e-02, -7.8558e-03, 1.1431e-03,\n -7.2555e-03, 2.4877e-03, 1.4774e-04, -3.7766e-03, 6.2107e-04,\n 1.5609e-02, -9.1930e-03], device='cuda:0')",
43
+ "exp_avg_sq": "tensor([0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0002, 0.0003,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0004, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0004,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0004,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0004, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0003, 0.0001, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0004, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0004,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0001, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0004, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0004, 0.0004, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0004, 0.0003, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0004, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0003, 0.0002, 0.0004, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0004, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0001, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0004, 0.0004, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0001, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003],\n device='cuda:0')"
44
+ },
45
+ "8": {
46
+ "step": "tensor(8764.)",
47
+ "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-4.4351e-05, -1.7115e-05, -9.9487e-06, ..., 6.9125e-06,\n -1.3103e-05, -8.8260e-05],\n [ 1.2963e-04, 7.1316e-05, 2.0631e-05, ..., -2.2838e-05,\n 4.2538e-04, 9.3337e-05],\n ...,\n [-4.4010e-04, 4.1626e-06, -7.4140e-05, ..., -2.7364e-05,\n 1.3134e-05, -8.5909e-05],\n [ 7.1439e-05, 1.0383e-04, 3.0997e-06, ..., 5.1062e-05,\n 1.1519e-04, 4.3364e-05],\n [ 2.2915e-04, -2.4079e-06, 3.0039e-05, ..., 7.3116e-06,\n -9.2827e-05, 2.4608e-04]], device='cuda:0')",
48
+ "exp_avg_sq": "tensor([[1.6744e-15, 5.2364e-14, 3.0044e-15, ..., 2.2372e-13, 2.2416e-15,\n 7.9169e-16],\n [1.4807e-07, 2.3019e-07, 1.8880e-07, ..., 5.6286e-08, 4.8355e-08,\n 1.1723e-07],\n [6.2630e-08, 1.8307e-07, 2.1456e-07, ..., 7.5975e-08, 1.2153e-06,\n 2.4317e-07],\n ...,\n [5.6802e-07, 3.5121e-08, 1.6296e-07, ..., 2.2925e-07, 5.4979e-08,\n 5.1232e-07],\n [7.6711e-08, 1.0464e-07, 1.6586e-08, ..., 2.2711e-07, 1.2974e-06,\n 3.5608e-08],\n [1.0194e-07, 2.6567e-08, 2.7063e-08, ..., 8.0632e-08, 2.7206e-07,\n 3.5089e-07]], device='cuda:0')"
49
+ },
50
+ "9": {
51
+ "step": "tensor(8764.)",
52
+ "exp_avg": "tensor([ 5.6052e-45, -5.1914e-03, 8.2541e-03, ..., -3.8596e-03,\n 5.0723e-03, -6.1015e-04], device='cuda:0')",
53
+ "exp_avg_sq": "tensor([2.7560e-10, 1.7893e-04, 1.4841e-04, ..., 1.1237e-04, 1.0291e-04,\n 8.6540e-05], device='cuda:0')"
54
+ },
55
+ "10": {
56
+ "step": "tensor(8764.)",
57
+ "exp_avg": "tensor([[-5.6052e-45, -4.7411e-06, 4.8629e-05, ..., 3.3045e-05,\n 9.9746e-06, -4.3416e-06],\n [ 5.6052e-45, -4.1643e-05, -2.2159e-05, ..., 1.8889e-05,\n -2.4303e-07, -2.4821e-07],\n [ 5.6052e-45, 1.8212e-05, -4.3453e-05, ..., 4.5973e-05,\n -1.9274e-04, -5.0667e-05],\n ...,\n [-5.6052e-45, 5.4834e-05, -6.9441e-06, ..., 3.7907e-05,\n -9.4904e-06, -1.8234e-05],\n [-5.6052e-45, -5.6269e-05, -2.6123e-05, ..., -1.6216e-04,\n 5.7277e-06, -1.5374e-04],\n [ 5.6052e-45, 7.2040e-06, 1.9939e-05, ..., -3.5679e-05,\n 2.9311e-05, 2.7959e-05]], device='cuda:0')",
58
+ "exp_avg_sq": "tensor([[1.1469e-14, 2.2924e-08, 3.7819e-08, ..., 2.9580e-08, 1.9497e-08,\n 1.7909e-08],\n [1.4589e-14, 2.3200e-08, 4.7620e-08, ..., 4.7509e-08, 2.3287e-08,\n 2.2218e-08],\n [1.6323e-15, 2.6929e-08, 3.4149e-08, ..., 4.7079e-08, 3.1163e-08,\n 2.7663e-08],\n ...,\n [1.4800e-14, 3.0782e-08, 8.9392e-08, ..., 3.8591e-08, 3.4006e-08,\n 2.8188e-08],\n [2.0748e-15, 2.7472e-08, 3.6783e-08, ..., 3.9787e-08, 2.6683e-08,\n 2.8045e-08],\n [5.2248e-16, 2.7328e-08, 6.3238e-08, ..., 3.4147e-08, 3.0573e-08,\n 2.8328e-08]], device='cuda:0')"
59
+ },
60
+ "11": {
61
+ "step": "tensor(8764.)",
62
+ "exp_avg": "tensor([[ 1.1977e-04, -1.4029e-04, -1.0840e-04, ..., 1.2688e-05,\n -1.7587e-04, -2.2856e-04],\n [ 2.0058e-05, 3.5881e-05, 1.0928e-04, ..., -9.5214e-05,\n -1.1534e-05, -7.4375e-05],\n [-3.8379e-05, 1.5285e-04, 1.0244e-04, ..., -1.4372e-04,\n 6.5528e-06, -3.9647e-05],\n ...,\n [ 1.0850e-04, -1.8057e-04, -5.5349e-05, ..., 3.3719e-06,\n -4.7828e-05, 1.1048e-04],\n [-2.3443e-05, 1.4972e-05, -1.1462e-04, ..., -1.5264e-04,\n 8.7994e-05, -1.8067e-05],\n [ 2.6400e-05, 6.0178e-05, 8.8580e-05, ..., -3.7769e-05,\n -8.1129e-06, -1.4177e-04]], device='cuda:0')",
63
+ "exp_avg_sq": "tensor([[1.3515e-07, 2.2281e-07, 3.7045e-07, ..., 1.1423e-07, 7.8287e-08,\n 1.4752e-07],\n [1.2960e-07, 1.9038e-07, 2.6403e-07, ..., 1.2394e-07, 8.2538e-08,\n 9.8904e-08],\n [1.0475e-07, 1.9435e-07, 1.6298e-07, ..., 9.1636e-08, 6.0610e-08,\n 7.9742e-08],\n ...,\n [1.3045e-07, 2.8006e-07, 2.9848e-07, ..., 1.3195e-07, 9.7053e-08,\n 1.3617e-07],\n [1.2017e-07, 2.1507e-07, 2.2830e-07, ..., 1.0840e-07, 5.8904e-08,\n 1.1674e-07],\n [1.1801e-07, 2.3646e-07, 2.4830e-07, ..., 1.1732e-07, 7.6662e-08,\n 1.1327e-07]], device='cuda:0')"
64
+ },
65
+ "12": {
66
+ "step": "tensor(8764.)",
67
+ "exp_avg": "tensor([ 8.2227e-04, 7.1150e-04, -4.7610e-03, -1.8129e-03, -9.8523e-03,\n 1.1648e-03, -9.7054e-04, 4.0909e-03, 2.0361e-03, 4.8996e-04,\n 1.0089e-04, -3.4519e-03, 7.3385e-04, -3.3425e-03, -7.5177e-03,\n 1.1081e-03, 2.3242e-03, -2.4379e-03, -1.6493e-03, 7.9284e-04,\n 5.1791e-03, -5.1043e-03, -4.5346e-03, 9.4504e-03, -2.7882e-03,\n -1.8575e-03, 2.2211e-03, -1.6808e-03, -3.3777e-03, -4.7886e-03,\n -2.5461e-03, -3.1509e-03, 1.4097e-02, 1.2019e-03, -2.1550e-04,\n -3.4787e-04, 4.2229e-03, 2.1768e-03, -1.4282e-03, 1.0336e-02,\n -6.4811e-03, -3.7732e-03, -4.0382e-03, -2.1986e-03, 4.0349e-03,\n -1.7757e-03, 3.4123e-03, 2.7830e-03, -2.8050e-04, 1.4418e-03,\n -2.2879e-03, 1.9043e-03, -7.1144e-03, 3.9189e-03, -1.2104e-03,\n 1.6845e-03, -3.1562e-03, 2.7882e-03, -3.9974e-03, -1.1770e-03,\n 5.7403e-03, -4.3802e-03, -2.5561e-03, -2.0237e-03, 2.8005e-03,\n 3.7214e-03, -6.3485e-03, -9.1885e-04, -4.9854e-03, -2.8240e-03,\n -2.2754e-03, 2.7590e-03, -5.1207e-04, -6.4476e-04, -1.9057e-03,\n 1.3683e-03, -2.7659e-03, -7.5411e-04, -6.7554e-06, -3.5820e-03,\n 2.1838e-04, 1.2058e-03, 6.7217e-03, -3.6463e-03, 5.6952e-03,\n 1.0565e-03, -2.7148e-03, -6.4399e-03, -1.8879e-03, 4.8237e-03,\n -8.8697e-05, 3.8451e-04, 1.5028e-03, 3.4978e-03, -1.9258e-03,\n 3.4140e-03, -4.5175e-03, -3.7942e-03, 1.0169e-03, -3.6328e-03,\n 7.6490e-04, 6.5365e-03, 8.1564e-04, 3.2533e-03, -8.5590e-04,\n 1.2315e-03, 4.5627e-03, 1.4746e-03, -1.0217e-03, 5.8885e-03,\n 4.1750e-15, -7.1578e-03, 3.1725e-03, 3.6224e-03, -7.6722e-04,\n 1.0229e-03, 1.7997e-03, -2.5907e-03, -2.3403e-03, -3.5660e-03,\n -3.6601e-03, -7.4054e-03, 6.3243e-04, 8.9190e-04, -8.9005e-04,\n -4.7618e-05, 1.1662e-03, 6.3297e-04, 6.2437e-04, -4.1242e-03,\n -3.5108e-03, -1.8221e-03, -1.0862e-03, 3.5918e-03, 4.6726e-03,\n 4.7728e-03, -3.0469e-03, 8.3089e-05, 5.0162e-04, 1.5011e-03,\n -3.6005e-03, -1.7326e-03, -6.5845e-04, -3.7213e-03, 7.4544e-03,\n -1.0315e-03, 3.3927e-03, -1.5421e-03, 6.0741e-03, -6.7331e-03,\n -4.7322e-04, -5.2083e-04, -1.3750e-03, -2.7572e-03, -3.0472e-03,\n 6.1528e-03, -1.2490e-03, 2.7140e-03, 2.8391e-03, -6.6269e-03,\n -1.0841e-03, 5.9904e-04, -7.6318e-03, 3.5112e-03, 3.4773e-03,\n -6.3151e-03, -1.0369e-03, -5.8321e-03, 1.8062e-03, -2.4871e-04,\n -2.9360e-03, 1.2828e-03, -4.9499e-03, 1.1708e-02, -2.6199e-03,\n 1.6153e-03, -9.0061e-03, 1.2836e-03, 7.9063e-03, -5.2300e-05,\n -2.7108e-07, 5.6932e-03, 3.6593e-03, -1.9095e-03, -1.1843e-03,\n 3.5035e-03, -1.0904e-03, 5.3573e-03, 4.7097e-03, -2.2025e-03,\n 6.4771e-03, 3.6642e-03, -7.3183e-03, 4.0172e-03, 9.8865e-03,\n 3.2260e-03, 1.0791e-03, -7.3076e-03, -1.1042e-03, 3.7907e-03,\n 3.1248e-03, 3.3731e-03, -6.2239e-04, 4.1507e-03, 4.6285e-03,\n 4.4689e-03, -5.0562e-03, -2.9029e-03, 2.3992e-03, 2.1358e-03,\n -4.3428e-03, 2.2752e-03, -1.4807e-03, -6.9458e-04, -3.8399e-03,\n 3.6555e-03, -9.5301e-04, 5.4316e-03, 6.2101e-03, 1.5027e-03,\n -1.2897e-04, -3.7052e-03, 3.1776e-03, 4.4423e-03, -6.1048e-03,\n -3.8652e-04, -5.6091e-04, -2.1909e-03, -7.7120e-04, 2.5490e-04,\n 7.2666e-03, 3.6705e-03, 5.5902e-03, 1.0322e-03, 2.8392e-03,\n 1.6622e-03, 4.6280e-03, 9.0514e-04, -2.3420e-03, -6.2422e-03,\n -9.8124e-03, -7.8278e-03, -6.7467e-04, -7.4316e-03, 4.5913e-03,\n 4.3466e-03, 9.9258e-03, 1.1168e-02, -1.0509e-03, -3.1190e-03,\n 2.5035e-03, 1.8386e-03, 7.7560e-04, -6.2200e-03, 1.0426e-03,\n -5.7563e-03, 3.9682e-03, 4.4225e-03, -3.0779e-03, 2.3950e-03,\n 3.2001e-03, 3.8801e-04, -2.6674e-03, -5.8778e-03, -1.1220e-03,\n 1.1840e-03, -9.5390e-04, 2.2365e-03, 7.5381e-03, 5.9087e-04,\n -5.3534e-03, -6.1401e-04, 1.2236e-04, -3.3635e-03, 1.0444e-04,\n 5.2201e-04, -4.1700e-04, -7.4656e-04, 5.2245e-04, 1.0076e-03,\n -4.4592e-03, 5.5818e-03, -5.0465e-03, -3.5470e-03, 1.2664e-03,\n -7.1584e-05, -2.8344e-03, 1.3341e-03, -5.4661e-03, -3.0390e-03,\n -1.3022e-03, 3.2732e-03, 3.0543e-03, 6.4927e-03, -1.8662e-03,\n 7.2318e-03, -5.6673e-03, 1.8693e-03, -4.0331e-03, 1.6555e-03,\n -4.7490e-03, -9.5830e-03, 4.4232e-03, -5.5649e-03, 2.0106e-03,\n 1.8973e-03, -4.2449e-03, -5.2809e-03, 6.4058e-05, -2.2761e-04,\n 1.7034e-03, -7.1469e-03, -6.7336e-03, 1.1287e-03, -1.2825e-05,\n -3.1751e-03, -1.0135e-03, 4.6115e-03, -1.7894e-03, 1.7089e-03,\n -5.4292e-03, -2.7827e-03, 2.4007e-03, -7.3972e-04, 2.7723e-03,\n 5.2758e-03, 5.7923e-03, 1.2078e-03, 4.4261e-03, 6.7477e-03,\n 2.0252e-03, -1.6490e-03, -3.6416e-03, -1.8390e-03, -6.3663e-04,\n -2.8092e-03, -7.0202e-03, -4.4914e-03, 1.3186e-03, -3.5986e-04,\n 2.3640e-03, -1.6347e-03, 1.7378e-03, -7.7956e-03, 4.4799e-03,\n -7.0954e-03, -5.3673e-05, 1.8887e-03, 9.4718e-04, 6.5917e-04,\n 1.0100e-03, -5.6229e-03, -2.3987e-03, -1.5081e-03, 1.2020e-02,\n 6.9140e-03, 5.1316e-03, -4.6343e-03, -6.3086e-04, 2.4386e-04,\n 9.9708e-04, 4.2097e-05, -7.1322e-03, -7.4363e-04, 1.0145e-03,\n -1.1809e-02, 1.6610e-03, -4.2366e-03, -4.5313e-03, -2.1166e-03,\n 4.8737e-03, 1.0141e-03, -4.6684e-03, -3.0860e-03, 6.3760e-03,\n 9.9884e-05, 1.6888e-03, 3.9419e-03, -8.2221e-03, -4.7366e-03,\n -3.0287e-03, -1.4267e-03, 4.1982e-03, 4.1030e-03, 1.4856e-03,\n -9.4087e-04, -2.0105e-03, -4.8920e-03, -3.7439e-03, -7.0285e-03,\n 8.5160e-03, -3.2099e-03, 6.3170e-04, 1.0008e-03, -6.7672e-03,\n 1.7251e-03, -2.9381e-03, -2.1127e-03, 8.0303e-04, 2.4744e-03,\n 3.7647e-03, -2.8418e-03, -2.8399e-03, -5.0716e-03, -7.6721e-03,\n -4.7135e-03, 3.1244e-03, 1.0617e-03, 3.7841e-04, 1.7939e-03,\n 1.6040e-03, 2.9393e-03, -1.0172e-02, 3.0593e-03, 7.1268e-03,\n 7.3031e-03, 2.8952e-04, 5.6902e-03, 6.8799e-03, 1.9996e-03,\n -8.4771e-04, -7.1776e-04, -9.0661e-04, 2.9901e-03, -4.1720e-04,\n -3.3377e-03, -1.3722e-03, 3.4810e-03, 8.5968e-03, 2.0075e-03,\n -2.9685e-03, -2.3886e-03, 3.7729e-03, 4.9881e-03, -3.7622e-04,\n 3.7709e-04, 8.7912e-04, -2.6207e-03, -2.2193e-03, 6.2818e-04,\n 2.2680e-03, 1.1039e-03, -1.5797e-03, -3.4445e-03, -1.6189e-03,\n 7.7797e-03, -4.9808e-03, 2.0728e-03, -5.6591e-03, -4.8890e-03,\n -1.9629e-04, -3.4646e-03, 2.6944e-03, 4.4847e-03, -4.4433e-04,\n 2.3863e-04, -4.0961e-04, 4.8969e-03, 1.7186e-03, -6.0495e-04,\n -3.2688e-03, 3.1972e-03, 9.3647e-03, 2.9532e-03, -1.4663e-03,\n -2.4483e-04, -3.9077e-03, 4.5833e-03, 4.1251e-03, 1.0566e-04,\n -1.4313e-03, 2.5822e-03, 3.9499e-03, 2.4968e-03, -4.1222e-03,\n -5.2425e-04, 5.7743e-03, 2.1791e-03, 4.7248e-05, 4.4913e-04,\n -6.2756e-03, 6.4741e-04, 9.2292e-04, 5.6223e-03, -4.8822e-03,\n 1.3235e-03, 1.5016e-04, 1.9809e-04, -4.3433e-03, 4.2574e-03,\n 1.9699e-03, -4.7494e-04, 2.8569e-03, 1.9349e-04, 6.6990e-03,\n 7.0214e-04, 7.6230e-03, 3.6830e-04, 3.4915e-03, -3.8690e-03,\n 1.6321e-04, -5.1364e-03, 1.5083e-03, 2.9003e-03, 1.4902e-03,\n 2.7676e-04, 8.8620e-04, -9.0237e-04, -4.3355e-03, -8.6469e-03,\n -2.5159e-03, -7.6872e-04, -1.0677e-03, -2.8648e-03, -6.7435e-03,\n 6.8805e-03, -1.4899e-03, -3.7745e-04, -1.4838e-03, 5.0981e-03,\n 6.4029e-04, 8.3523e-04, -7.1179e-03, -8.2014e-04, 2.3696e-03,\n -5.5733e-05, -6.4388e-03, 2.8372e-03, -9.0016e-03, 2.5446e-03,\n 3.2651e-03, -6.0604e-03, -3.2254e-03, -6.2279e-03, 3.9524e-03,\n 1.0124e-04, -8.5508e-03, 5.0399e-04, 3.6303e-03, 3.6301e-03,\n 9.8370e-04, -1.7245e-03, 1.0706e-03, 3.5007e-03, 5.6909e-04,\n 1.9425e-03, -2.6066e-03, -9.2354e-04, 3.4781e-03, 2.8775e-03,\n 2.0162e-03, -7.0621e-04, 7.2823e-04, 6.8577e-04, -1.7422e-03,\n -7.3583e-03, 6.6964e-03, -1.4307e-03, 1.5168e-03, 4.8261e-03,\n -4.3139e-03, -3.1496e-03, 2.6006e-03, 9.1407e-04, -8.1402e-03,\n -3.1955e-03, -2.5157e-05, -9.1851e-04, 1.4693e-03, 4.1090e-03,\n 7.8701e-04, 3.5122e-04, 2.6592e-03, 4.1920e-03, 6.4912e-03,\n 7.1441e-04, -8.8114e-04, -4.3431e-03, 5.6052e-45, -2.2785e-03,\n -8.5469e-04, 5.6052e-45, 2.9449e-03, 2.1215e-03, 4.8486e-04,\n 2.9540e-03, 1.8462e-03, -3.6414e-03, -2.7219e-03, 1.8835e-03,\n -2.7991e-03, 2.9228e-03, 4.1746e-03, 1.0611e-03, -3.2645e-03,\n -1.3997e-03, 6.4017e-03, 1.9298e-03, -5.9569e-03, 7.1052e-03,\n -1.5770e-04, -1.2375e-03, 3.8907e-03, 5.9098e-03, 2.2058e-03,\n 3.2399e-03, -2.0834e-03, -2.8268e-04, -3.5476e-04, 6.7939e-03,\n 3.5735e-03, -4.9762e-03, -3.6094e-04, -6.7236e-04, -2.0965e-03,\n 2.2386e-03, 6.1248e-03, -6.0373e-03, 4.9238e-03, 5.0398e-05,\n -1.5135e-03, 2.1267e-03, 1.4644e-03, -4.7580e-03, -2.5212e-05,\n 2.2044e-04, 6.1580e-03, -1.0579e-02, 2.5325e-03, -4.4449e-03,\n -5.2604e-03, 8.0053e-04, -5.4627e-03, 3.6171e-03, -1.2594e-03,\n 3.0725e-03, 1.5275e-03, -5.1532e-03, 7.0164e-03, 2.5049e-03,\n -4.3887e-04, 2.6673e-04, 1.6571e-03, 4.0933e-03, 3.1286e-03,\n 4.3535e-03, -1.3547e-03, 4.5653e-04, 4.5154e-04, 4.3659e-03,\n -1.1664e-03, -3.2114e-04, -4.3343e-03, 1.4662e-03, -2.5472e-03,\n 5.2354e-03, 2.3472e-03, -3.1321e-03, 1.4748e-04, -5.2659e-03,\n -2.1912e-03, 2.7786e-03, -1.1405e-03, -2.5531e-03, 3.5565e-03,\n -2.3944e-03, -3.8786e-04, 4.1179e-04, -6.0217e-04, -1.6224e-03,\n 1.2436e-03, -2.1792e-03, -4.6178e-03, 1.8901e-03, -6.2499e-03,\n 5.3976e-03, 2.9755e-03, -4.5007e-03, 2.3910e-03, 4.2012e-03,\n -4.3189e-03, -6.7473e-03, -1.6881e-05, -1.3494e-03, 2.0298e-03,\n 3.0130e-03, 8.0997e-04, -7.5240e-04, 4.6066e-03, -3.9199e-03,\n 6.6974e-03, -7.1142e-03, -2.8090e-03, 2.3816e-03, -9.8144e-03,\n -1.4477e-03, -5.0994e-03, -3.0753e-03, -1.9084e-03, -4.4470e-03,\n -2.5510e-04, 2.1812e-03, 1.2904e-03, -1.4172e-03, 1.1360e-02,\n -2.9011e-03, -1.2451e-03, -3.5976e-03, 4.3413e-03, 3.3658e-03,\n 1.4267e-03, -2.3081e-03, 1.0964e-02, -2.3811e-03, -2.7141e-03,\n -6.4857e-03, -8.3053e-03, -6.9326e-04, 1.2092e-03, -2.6537e-03,\n 4.0782e-03, 3.4976e-04, 3.7157e-03, 1.6361e-03, -1.0230e-04,\n 3.4340e-03, 1.0367e-03, 6.9532e-03, 1.7878e-03, -1.3773e-03,\n -9.7728e-04, -4.2922e-03, -8.7681e-04, -4.3792e-03, 5.4465e-04,\n 3.4734e-03, -3.7972e-03, -1.4718e-03, -7.4409e-03, 3.5306e-03,\n -3.9814e-03, -5.0024e-03, -2.4084e-03, -9.4345e-03, -1.7066e-03,\n 4.7344e-03, 7.7331e-03, -8.2704e-03, 4.2908e-03, 2.4975e-03,\n -2.8137e-03, 1.2598e-02, -2.9923e-03, -3.0796e-03, -1.8999e-03,\n -4.9831e-03, 1.1579e-03, -5.9650e-03, -3.4929e-03, 1.3252e-03,\n -4.3930e-03, 4.6991e-03, -1.7773e-03, -1.3930e-03, 3.0620e-04,\n 2.3246e-03, -9.4538e-04, -2.4310e-03], device='cuda:0')",
68
+ "exp_avg_sq": "tensor([1.2109e-04, 1.1188e-04, 9.6168e-05, 1.2200e-04, 1.7215e-04, 1.5105e-04,\n 1.1981e-04, 1.2690e-04, 1.5392e-04, 1.3407e-04, 1.2912e-04, 1.8825e-04,\n 1.2570e-04, 1.4951e-04, 1.0851e-04, 7.4731e-05, 1.1616e-04, 1.2427e-04,\n 1.6963e-04, 8.8874e-05, 1.2325e-04, 1.5594e-04, 1.2893e-04, 1.3281e-04,\n 1.3677e-04, 1.1506e-04, 9.1947e-05, 1.3459e-04, 9.5622e-05, 1.2852e-04,\n 1.3921e-04, 1.2195e-04, 1.4261e-04, 9.6471e-05, 1.4790e-04, 1.4974e-04,\n 1.4139e-04, 1.1551e-04, 1.1335e-04, 1.1109e-04, 1.5119e-04, 1.4016e-04,\n 1.3630e-04, 9.4506e-05, 9.6929e-05, 1.0905e-04, 4.4470e-05, 1.1242e-04,\n 1.5603e-04, 1.1351e-04, 7.8997e-05, 1.1882e-04, 1.2184e-04, 9.1562e-05,\n 1.6333e-04, 1.2248e-04, 7.0451e-05, 9.0949e-05, 1.5003e-04, 1.2900e-04,\n 1.7716e-04, 1.5511e-04, 1.3403e-04, 1.2174e-04, 1.2111e-04, 1.5358e-04,\n 1.3263e-04, 1.1785e-04, 1.5567e-04, 1.1718e-04, 1.0906e-04, 1.2089e-04,\n 1.2926e-04, 1.3402e-04, 1.3106e-04, 1.2539e-04, 1.3312e-04, 1.4087e-04,\n 1.3975e-04, 1.6999e-04, 1.3364e-04, 1.0482e-04, 1.5653e-04, 1.5618e-04,\n 1.3358e-04, 1.5968e-04, 1.1316e-04, 1.4019e-04, 1.1199e-04, 1.4326e-04,\n 1.1716e-04, 1.0174e-04, 1.4637e-04, 1.2162e-04, 9.3747e-05, 1.6294e-04,\n 1.0432e-04, 1.3523e-04, 1.3919e-04, 1.4484e-04, 1.3448e-04, 1.7337e-04,\n 1.3128e-04, 9.2801e-05, 1.2667e-04, 1.2071e-04, 1.3058e-04, 1.2081e-04,\n 1.2793e-04, 1.0250e-04, 2.0947e-10, 8.1096e-05, 1.3737e-04, 1.3038e-04,\n 1.1762e-04, 1.5002e-04, 1.4590e-04, 1.2892e-04, 1.4507e-04, 1.3341e-04,\n 1.1400e-04, 1.7807e-04, 1.5789e-04, 1.4380e-04, 1.6951e-04, 1.0289e-04,\n 1.0507e-04, 1.1757e-04, 1.3217e-04, 1.3170e-04, 1.2028e-04, 1.5479e-04,\n 1.3320e-04, 1.4182e-04, 1.4013e-04, 1.1069e-04, 1.4884e-04, 1.4205e-04,\n 1.5626e-04, 1.0813e-04, 9.5471e-05, 9.2904e-05, 1.2403e-04, 1.1736e-04,\n 1.1958e-04, 1.2962e-04, 9.5513e-05, 1.8915e-04, 1.1189e-04, 1.2129e-04,\n 1.2876e-04, 1.0023e-04, 1.1943e-04, 1.4978e-04, 1.5494e-04, 1.3802e-04,\n 1.7064e-04, 1.6869e-04, 1.2881e-04, 1.5548e-04, 1.4429e-04, 1.2633e-04,\n 1.2669e-04, 1.4026e-04, 1.1971e-04, 1.2207e-04, 1.3656e-04, 1.4304e-04,\n 1.3999e-04, 9.6417e-05, 1.3132e-04, 1.2881e-04, 1.3738e-04, 1.1756e-04,\n 1.0028e-04, 1.2694e-04, 1.0738e-04, 1.0444e-04, 1.3002e-04, 1.4626e-04,\n 5.9527e-09, 1.3986e-04, 1.2098e-04, 1.4847e-04, 1.7482e-04, 1.0612e-04,\n 1.6731e-04, 1.2526e-04, 1.3431e-04, 1.0097e-04, 1.1006e-04, 1.3181e-04,\n 1.5138e-04, 1.1532e-04, 1.1899e-04, 1.2435e-04, 1.6030e-04, 1.1868e-04,\n 9.4591e-05, 1.5037e-04, 1.2213e-04, 1.1847e-04, 1.3127e-04, 6.1508e-05,\n 1.1907e-04, 8.0218e-05, 9.9563e-05, 1.1270e-04, 1.1668e-04, 1.2196e-04,\n 1.5097e-04, 9.7425e-05, 1.5301e-04, 1.1358e-04, 6.2956e-05, 1.4809e-04,\n 1.1920e-04, 1.2568e-04, 1.3920e-04, 1.6369e-04, 1.7385e-04, 8.5664e-05,\n 1.1261e-04, 1.2882e-04, 1.0505e-04, 9.9191e-05, 1.0123e-04, 1.0832e-04,\n 1.2386e-04, 1.1788e-04, 1.0582e-04, 1.2667e-04, 1.4620e-04, 1.0365e-04,\n 1.5298e-04, 1.3099e-04, 1.2197e-04, 9.7127e-05, 1.6646e-04, 1.6005e-04,\n 1.0825e-04, 1.4528e-04, 1.3145e-04, 1.1031e-04, 1.1978e-04, 1.0748e-04,\n 1.7581e-04, 1.0490e-04, 1.6058e-04, 1.3388e-04, 1.5425e-04, 1.6120e-04,\n 1.2025e-04, 1.2815e-04, 1.0980e-04, 1.0616e-04, 1.4830e-04, 1.7550e-04,\n 1.1025e-04, 1.2294e-04, 1.4651e-04, 1.8984e-04, 8.4928e-05, 1.6497e-04,\n 1.1298e-04, 4.0654e-05, 1.1665e-04, 1.3564e-04, 1.4737e-04, 8.3741e-05,\n 1.0560e-04, 1.2237e-04, 1.2533e-04, 1.1549e-04, 1.1977e-04, 1.2322e-04,\n 1.1881e-04, 1.0676e-04, 1.2680e-04, 6.8138e-05, 1.6950e-04, 1.5598e-04,\n 1.0690e-04, 1.3929e-04, 1.2820e-04, 1.3112e-04, 8.3132e-05, 1.1890e-04,\n 1.3288e-04, 1.5507e-04, 1.0256e-04, 1.0642e-04, 1.3160e-04, 1.5936e-04,\n 1.4057e-04, 9.6629e-05, 1.3948e-04, 9.7821e-05, 9.9361e-05, 1.3614e-04,\n 1.1519e-04, 1.5114e-04, 1.4937e-04, 1.2457e-04, 1.0638e-04, 1.4402e-04,\n 1.6746e-04, 1.0791e-04, 1.4074e-04, 1.0122e-04, 1.6262e-04, 1.3973e-04,\n 1.2584e-04, 1.0126e-04, 1.2269e-04, 1.0668e-04, 1.5712e-04, 1.1720e-04,\n 1.3029e-04, 9.3230e-05, 1.2402e-04, 1.1466e-04, 1.1938e-04, 1.0757e-04,\n 1.4107e-04, 1.2970e-04, 1.2722e-04, 1.4371e-04, 9.6695e-05, 8.6775e-05,\n 1.3060e-04, 9.1389e-05, 1.2573e-04, 9.9564e-05, 1.2702e-04, 1.0566e-04,\n 1.1971e-04, 1.2028e-04, 1.3877e-04, 1.3514e-04, 1.3815e-04, 9.9549e-05,\n 1.3385e-04, 1.7375e-04, 1.3116e-04, 1.7283e-04, 1.3449e-04, 1.5356e-04,\n 1.0635e-04, 1.1534e-04, 1.3473e-04, 1.7661e-04, 9.4720e-05, 7.5310e-05,\n 1.5047e-04, 1.5768e-04, 1.1847e-04, 1.1945e-04, 1.0472e-04, 1.2910e-04,\n 8.7627e-05, 9.6630e-05, 1.5483e-04, 1.5704e-04, 8.2807e-05, 1.4783e-04,\n 9.1818e-05, 1.0801e-04, 1.2005e-04, 1.0926e-04, 1.1534e-04, 1.1525e-04,\n 1.2257e-04, 1.0671e-04, 1.6267e-04, 9.2808e-05, 8.3113e-05, 1.3833e-04,\n 1.5222e-04, 1.3240e-04, 7.8521e-05, 1.7302e-04, 1.2285e-04, 1.0562e-04,\n 1.3959e-04, 1.6235e-04, 1.2970e-04, 1.0230e-04, 9.9688e-05, 1.3985e-04,\n 1.7423e-04, 1.1207e-04, 1.1687e-04, 1.1102e-04, 1.0635e-04, 1.2838e-04,\n 9.9465e-05, 1.4165e-04, 1.2475e-04, 1.5321e-04, 1.1507e-04, 1.2092e-04,\n 1.3377e-04, 1.0612e-04, 1.4978e-04, 9.4700e-05, 1.2312e-04, 1.3609e-04,\n 1.4025e-04, 1.2953e-04, 1.6900e-04, 1.2247e-04, 1.2560e-04, 1.2939e-04,\n 1.5360e-04, 1.4863e-04, 8.4648e-05, 1.4514e-04, 1.2064e-04, 1.1203e-04,\n 1.3175e-04, 1.1515e-04, 1.3995e-04, 1.0183e-04, 9.8871e-05, 1.0426e-04,\n 8.1078e-05, 1.5300e-04, 1.4191e-04, 1.2500e-04, 1.4500e-04, 1.3728e-04,\n 9.7585e-05, 1.2093e-04, 9.2498e-05, 1.1035e-04, 1.2356e-04, 1.1614e-04,\n 1.2774e-04, 1.1272e-04, 1.5696e-04, 9.5597e-05, 1.1537e-04, 1.4033e-04,\n 1.4579e-04, 1.4065e-04, 1.0517e-04, 9.6076e-05, 1.5707e-04, 1.4089e-04,\n 1.4326e-04, 1.2455e-04, 9.9097e-05, 1.1418e-04, 1.5706e-04, 1.2687e-04,\n 1.0506e-04, 8.8742e-05, 1.4058e-04, 1.0933e-04, 9.5061e-05, 1.4141e-04,\n 1.2578e-04, 1.7810e-04, 9.6881e-05, 1.1581e-04, 1.3409e-04, 1.2742e-04,\n 1.1285e-04, 9.9349e-05, 8.2364e-05, 1.1655e-04, 1.3221e-04, 1.3120e-04,\n 1.0760e-04, 1.5415e-04, 1.2058e-04, 9.5647e-05, 1.2488e-04, 8.8012e-05,\n 1.1410e-04, 1.3576e-04, 9.0943e-05, 1.3907e-04, 1.5720e-04, 1.2562e-04,\n 1.1284e-04, 1.5102e-04, 8.4001e-05, 1.3630e-04, 1.7111e-04, 1.7230e-04,\n 1.4275e-04, 1.3942e-04, 1.4599e-04, 1.2043e-04, 1.4750e-04, 1.2660e-04,\n 1.7153e-04, 1.6447e-04, 1.1590e-04, 1.3777e-04, 9.9079e-05, 1.3873e-04,\n 1.5267e-04, 1.3800e-04, 9.7292e-05, 1.0720e-04, 1.6208e-04, 1.1575e-04,\n 1.4427e-04, 1.5241e-04, 1.5262e-04, 1.0479e-04, 1.2787e-04, 1.3777e-04,\n 1.2543e-04, 1.3637e-04, 1.1669e-04, 1.0460e-04, 1.0615e-04, 1.2340e-04,\n 1.4708e-04, 1.7102e-04, 1.6256e-04, 1.2824e-04, 1.4526e-04, 1.1807e-04,\n 1.4535e-04, 6.9857e-05, 1.2684e-04, 1.2063e-04, 1.6190e-04, 1.2894e-04,\n 1.5401e-04, 1.0665e-04, 1.4557e-04, 1.2028e-04, 1.3035e-04, 1.4974e-04,\n 1.1721e-04, 1.3421e-04, 1.0101e-04, 1.1075e-04, 1.5134e-04, 1.2025e-04,\n 1.3069e-04, 1.3978e-04, 1.3838e-04, 1.1192e-04, 1.4444e-04, 1.2665e-04,\n 1.1908e-04, 1.3356e-04, 1.0588e-04, 1.5734e-04, 1.5365e-04, 1.5925e-04,\n 1.4301e-04, 1.4909e-04, 1.5991e-04, 1.2922e-04, 1.3822e-04, 9.4701e-05,\n 7.7269e-05, 1.0760e-04, 1.1710e-04, 1.4991e-04, 1.2039e-04, 1.1770e-04,\n 1.1724e-04, 8.8914e-05, 1.3993e-04, 1.1597e-04, 1.2995e-04, 7.0512e-05,\n 1.3511e-04, 1.1108e-04, 4.6036e-11, 1.1423e-04, 8.0156e-05, 5.7060e-12,\n 9.5971e-05, 1.3566e-04, 9.4227e-05, 1.4606e-04, 1.0839e-04, 1.1662e-04,\n 1.0176e-04, 1.3165e-04, 9.2454e-05, 8.4134e-05, 9.9940e-05, 1.0785e-04,\n 1.3257e-04, 1.0493e-04, 1.3085e-04, 1.7131e-04, 1.6088e-04, 1.6148e-04,\n 1.0800e-04, 1.0679e-04, 1.1492e-04, 1.4201e-04, 1.3286e-04, 9.5180e-05,\n 1.1902e-04, 1.3269e-04, 1.1940e-04, 1.0629e-04, 1.2310e-04, 9.5282e-05,\n 7.9631e-05, 1.5645e-04, 1.0142e-04, 9.1439e-05, 1.4055e-04, 1.4260e-04,\n 1.0850e-04, 1.0600e-04, 1.3417e-04, 1.2732e-04, 1.1686e-04, 1.0231e-04,\n 1.0648e-04, 1.3680e-04, 1.0906e-04, 1.6204e-04, 1.1589e-04, 1.0810e-04,\n 1.6073e-04, 1.0045e-04, 1.2386e-04, 1.2441e-04, 1.4403e-04, 1.3414e-04,\n 1.6152e-04, 1.3857e-04, 1.3398e-04, 1.1221e-04, 9.6490e-05, 1.2125e-04,\n 8.7114e-05, 8.6675e-05, 1.3824e-04, 1.7725e-04, 1.4677e-04, 1.1567e-04,\n 1.1129e-04, 9.2560e-05, 7.7187e-05, 1.7604e-04, 1.4415e-04, 1.3302e-04,\n 1.2851e-04, 1.2116e-04, 1.1501e-04, 1.0509e-04, 1.4893e-04, 1.2593e-04,\n 1.1886e-04, 7.8195e-05, 1.1610e-04, 1.1404e-04, 1.1276e-04, 1.2336e-04,\n 8.2239e-05, 1.3757e-04, 8.8136e-05, 1.1349e-04, 1.6654e-04, 1.3072e-04,\n 1.1895e-04, 1.0053e-04, 1.6279e-04, 1.3412e-04, 1.3541e-04, 1.2559e-04,\n 1.0437e-04, 1.1264e-04, 1.2774e-04, 1.4927e-04, 1.5050e-04, 1.2266e-04,\n 1.0541e-04, 1.3614e-04, 1.2585e-04, 1.4740e-04, 1.1254e-04, 1.1505e-04,\n 1.3220e-04, 1.6036e-04, 1.3459e-04, 1.4511e-04, 1.1134e-04, 1.2302e-04,\n 1.5730e-04, 8.3423e-05, 1.5457e-04, 1.1770e-04, 1.1018e-04, 9.3749e-05,\n 1.3336e-04, 1.5243e-04, 1.2975e-04, 1.1462e-04, 7.5082e-05, 1.1642e-04,\n 1.4440e-04, 1.3582e-04, 1.2424e-04, 1.6351e-04, 1.0670e-04, 1.7298e-04,\n 1.0754e-04, 1.1927e-04, 1.5633e-04, 1.0782e-04, 1.3474e-04, 1.4631e-04,\n 1.2380e-04, 6.0506e-05, 1.4744e-04, 1.2378e-04, 9.6236e-05, 1.1639e-04,\n 1.0185e-04, 1.2697e-04, 1.5713e-04, 1.8423e-04, 1.1340e-04, 1.4970e-04,\n 1.0105e-04, 1.2420e-04, 1.4488e-04, 1.6215e-04, 1.1666e-04, 1.9611e-04,\n 1.3365e-04, 9.7068e-05, 1.6645e-04, 1.3865e-04, 1.1716e-04, 1.3054e-04,\n 1.0165e-04, 1.3456e-04, 1.3223e-04, 1.2497e-04, 1.1331e-04, 1.1577e-04,\n 1.4120e-04, 1.5491e-04, 9.8405e-05, 1.4038e-04, 1.0058e-04, 9.6922e-05,\n 1.5085e-04, 1.2277e-04, 1.3434e-04, 8.8172e-05, 1.1629e-04, 1.4832e-04,\n 1.8490e-04, 1.3832e-04, 1.6014e-04, 1.4179e-04, 1.0515e-04, 1.1699e-04],\n device='cuda:0')"
69
+ },
70
+ "13": {
71
+ "step": "tensor(8764.)",
72
+ "exp_avg": "tensor([[ 1.7658e-17, -5.7177e-08, 4.2810e-22, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 8.9692e-06, -2.3937e-06, -1.7397e-06, ..., -4.1297e-06,\n 1.5350e-08, 4.7284e-09],\n [-2.1026e-05, 3.2858e-07, 6.3431e-11, ..., 3.7893e-06,\n 1.0297e-06, -2.1334e-05],\n ...,\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 2.3501e-04, 5.0076e-05, 9.6753e-06, ..., 2.1181e-06,\n -5.2313e-06, 7.6000e-06],\n [ 1.3663e-07, -1.9808e-06, 7.0118e-07, ..., -5.2288e-06,\n 5.7519e-08, 2.2147e-06]], device='cuda:0')",
73
+ "exp_avg_sq": "tensor([[2.2867e-10, 1.2776e-09, 2.4988e-13, ..., 1.4814e-14, 5.9152e-14,\n 3.3748e-14],\n [2.2431e-09, 2.0555e-09, 1.3423e-09, ..., 9.1724e-09, 4.0217e-11,\n 5.7296e-11],\n [3.2363e-08, 2.1695e-09, 8.3174e-12, ..., 1.2375e-08, 3.1596e-09,\n 7.3780e-09],\n ...,\n [1.3410e-14, 1.4468e-13, 2.6354e-16, ..., 8.0643e-14, 1.8263e-13,\n 1.0391e-14],\n [7.5709e-08, 1.2096e-07, 4.1608e-09, ..., 9.7547e-08, 3.4646e-08,\n 1.9070e-08],\n [7.7886e-10, 6.8587e-10, 2.6697e-09, ..., 2.2071e-09, 8.8472e-11,\n 3.4570e-09]], device='cuda:0')"
74
+ },
75
+ "14": {
76
+ "step": "tensor(8764.)",
77
+ "exp_avg": "tensor([-9.0057e-07, -4.0984e-04, 4.4506e-04, ..., 5.6052e-45,\n 2.3942e-03, -1.6860e-03], device='cuda:0')",
78
+ "exp_avg_sq": "tensor([1.1032e-07, 9.5006e-06, 1.6044e-05, ..., 1.8014e-10, 2.8523e-05,\n 2.1409e-06], device='cuda:0')"
79
+ },
80
+ "15": {
81
+ "step": "tensor(8764.)",
82
+ "exp_avg": "tensor([[ 5.5597e-09, 9.0054e-08, -5.4465e-06, ..., 5.6052e-45,\n -2.2692e-05, -3.5267e-07],\n [ 4.4042e-08, 4.6321e-08, -9.3300e-06, ..., -5.6052e-45,\n 2.5955e-05, 2.9675e-06],\n [-2.6697e-08, -1.1793e-07, 2.0645e-06, ..., -5.6052e-45,\n 3.6675e-06, 6.6893e-06],\n ...,\n [ 8.0643e-08, -9.7611e-08, -5.2545e-06, ..., -5.6052e-45,\n 1.8322e-06, 7.2717e-06],\n [ 6.3013e-08, 3.9321e-07, 1.8895e-05, ..., 5.6052e-45,\n 6.0859e-06, -5.6368e-07],\n [-1.2160e-07, 2.4126e-07, -8.0317e-06, ..., 5.6052e-45,\n 2.4034e-05, 2.1622e-06]], device='cuda:0')",
83
+ "exp_avg_sq": "tensor([[1.8523e-12, 1.1662e-11, 1.6764e-10, ..., 2.0123e-13, 2.5986e-09,\n 4.4361e-11],\n [1.6459e-11, 3.4870e-11, 3.7272e-10, ..., 8.5914e-14, 3.0620e-09,\n 1.0175e-10],\n [1.9137e-11, 6.2402e-11, 2.1495e-10, ..., 2.5491e-14, 3.2919e-09,\n 1.7090e-10],\n ...,\n [2.8066e-11, 4.9948e-11, 1.8677e-10, ..., 1.2129e-14, 3.1439e-09,\n 2.4728e-10],\n [1.6345e-11, 4.1101e-11, 4.2436e-10, ..., 4.0592e-13, 2.7439e-09,\n 8.2189e-11],\n [4.6317e-11, 5.6754e-11, 7.1410e-10, ..., 1.3395e-13, 3.2050e-09,\n 1.1219e-10]], device='cuda:0')"
84
+ },
85
+ "16": {
86
+ "step": "tensor(8764.)",
87
+ "exp_avg": "tensor([[-8.7917e-05, -6.5515e-05, 1.7643e-05, ..., -6.6320e-06,\n 2.2158e-05, -8.7488e-05],\n [ 1.5174e-04, -1.1783e-04, 2.0303e-04, ..., 5.6440e-05,\n -2.8064e-05, 1.6012e-04],\n [-3.7796e-05, -2.8505e-05, -3.3415e-05, ..., -7.5413e-05,\n -1.5508e-05, 7.1069e-05],\n ...,\n [ 6.3818e-05, 7.2831e-05, -1.7221e-04, ..., 6.4677e-05,\n 3.9564e-05, 1.9249e-05],\n [ 1.9063e-05, 7.7363e-06, -3.2469e-05, ..., -6.4381e-05,\n 7.4295e-06, 5.0385e-05],\n [ 1.9760e-05, 5.4987e-05, -2.5244e-05, ..., 1.3180e-04,\n 5.3002e-05, -1.6975e-04]], device='cuda:0')",
88
+ "exp_avg_sq": "tensor([[2.9965e-08, 6.9594e-08, 6.8928e-08, ..., 4.4583e-08, 3.5096e-08,\n 5.0826e-08],\n [3.0102e-08, 6.1403e-08, 4.7999e-08, ..., 5.2113e-08, 2.8767e-08,\n 5.3447e-08],\n [2.1736e-08, 4.5513e-08, 3.9594e-08, ..., 3.4201e-08, 2.5626e-08,\n 2.8598e-08],\n ...,\n [2.4970e-08, 6.2478e-08, 4.2846e-08, ..., 4.9157e-08, 2.9134e-08,\n 5.4892e-08],\n [2.9934e-08, 5.8372e-08, 6.2799e-08, ..., 5.1244e-08, 2.5609e-08,\n 4.4148e-08],\n [2.8148e-08, 5.4728e-08, 7.1486e-08, ..., 4.0391e-08, 2.7776e-08,\n 5.0550e-08]], device='cuda:0')"
89
+ },
90
+ "17": {
91
+ "step": "tensor(8764.)",
92
+ "exp_avg": "tensor([-7.6745e-04, 5.8742e-03, -1.6784e-03, ..., 5.3343e-05,\n -1.9964e-04, -1.8310e-04], device='cuda:0')",
93
+ "exp_avg_sq": "tensor([5.1435e-05, 5.1830e-05, 3.6607e-05, ..., 5.0520e-05, 4.4610e-05,\n 4.2968e-05], device='cuda:0')"
94
+ }
95
+ },
96
+ "param_groups": [
97
+ {
98
+ "lr": 0.00020690126647990973,
99
+ "name": "scale_256",
100
+ "betas": [
101
+ 0.9,
102
+ 0.999
103
+ ],
104
+ "eps": 1e-08,
105
+ "weight_decay": 1e-05,
106
+ "amsgrad": false,
107
+ "maximize": false,
108
+ "foreach": null,
109
+ "capturable": false,
110
+ "differentiable": false,
111
+ "fused": null,
112
+ "decoupled_weight_decay": true,
113
+ "initial_lr": 0.001,
114
+ "params": [
115
+ 0,
116
+ 1,
117
+ 2
118
+ ]
119
+ },
120
+ {
121
+ "lr": 0.00020690126647990973,
122
+ "name": "scale_512",
123
+ "betas": [
124
+ 0.9,
125
+ 0.999
126
+ ],
127
+ "eps": 1e-08,
128
+ "weight_decay": 1e-05,
129
+ "amsgrad": false,
130
+ "maximize": false,
131
+ "foreach": null,
132
+ "capturable": false,
133
+ "differentiable": false,
134
+ "fused": null,
135
+ "decoupled_weight_decay": true,
136
+ "initial_lr": 0.001,
137
+ "params": [
138
+ 3,
139
+ 4,
140
+ 5,
141
+ 6,
142
+ 7
143
+ ]
144
+ },
145
+ {
146
+ "lr": 0.00020690126647990973,
147
+ "name": "scale_768",
148
+ "betas": [
149
+ 0.9,
150
+ 0.999
151
+ ],
152
+ "eps": 1e-08,
153
+ "weight_decay": 1e-05,
154
+ "amsgrad": false,
155
+ "maximize": false,
156
+ "foreach": null,
157
+ "capturable": false,
158
+ "differentiable": false,
159
+ "fused": null,
160
+ "decoupled_weight_decay": true,
161
+ "initial_lr": 0.001,
162
+ "params": [
163
+ 8,
164
+ 9,
165
+ 10,
166
+ 11,
167
+ 12
168
+ ]
169
+ },
170
+ {
171
+ "lr": 0.00020690126647990973,
172
+ "name": "scale_1024",
173
+ "betas": [
174
+ 0.9,
175
+ 0.999
176
+ ],
177
+ "eps": 1e-08,
178
+ "weight_decay": 1e-05,
179
+ "amsgrad": false,
180
+ "maximize": false,
181
+ "foreach": null,
182
+ "capturable": false,
183
+ "differentiable": false,
184
+ "fused": null,
185
+ "decoupled_weight_decay": true,
186
+ "initial_lr": 0.001,
187
+ "params": [
188
+ 13,
189
+ 14,
190
+ 15,
191
+ 16,
192
+ 17
193
+ ]
194
+ }
195
+ ]
196
+ },
197
+ "scheduler_state_dict": {
198
+ "T_0": 10,
199
+ "T_i": 10,
200
+ "T_mult": 2,
201
+ "eta_min": 1e-06,
202
+ "T_cur": 7,
203
+ "base_lrs": [
204
+ 0.001,
205
+ 0.001,
206
+ 0.001,
207
+ 0.001
208
+ ],
209
+ "last_epoch": 7,
210
+ "_step_count": 0,
211
+ "_is_initial": false,
212
+ "_get_lr_called_within_step": false,
213
+ "_last_lr": [
214
+ 0.00020690126647990973,
215
+ 0.00020690126647990973,
216
+ 0.00020690126647990973,
217
+ 0.00020690126647990973
218
+ ]
219
+ },
220
+ "metrics": {
221
+ "best_val_acc": 76.452,
222
+ "best_epoch": 6,
223
+ "scale_accuracies": {
224
+ "256": 70.05,
225
+ "512": 73.994,
226
+ "768": 75.51,
227
+ "1024": 75.462
228
+ },
229
+ "training_history": {
230
+ "epochs": [
231
+ 1,
232
+ 2,
233
+ 3,
234
+ 4,
235
+ 5,
236
+ 6,
237
+ 7
238
+ ],
239
+ "train_loss": [
240
+ 3.9118613697850284,
241
+ 2.66607952194092,
242
+ 2.3952484759278954,
243
+ 2.201966982775222,
244
+ 2.026744091663117,
245
+ 1.8584000322575005,
246
+ 1.6992347222357131
247
+ ],
248
+ "train_acc": [
249
+ 68.33870994179526,
250
+ 76.85976925724749,
251
+ 79.21098498478341,
252
+ 81.16849715923061,
253
+ 83.13014618703104,
254
+ 85.12582668769957,
255
+ 87.11042354353492
256
+ ],
257
+ "val_acc": [
258
+ 72.328,
259
+ 74.248,
260
+ 74.928,
261
+ 75.464,
262
+ 75.994,
263
+ 76.29,
264
+ 76.452
265
+ ],
266
+ "scale_accs": {
267
+ "256": [
268
+ 65.922,
269
+ 67.866,
270
+ 68.668,
271
+ 69.028,
272
+ 69.476,
273
+ 69.894,
274
+ 70.05
275
+ ],
276
+ "512": [
277
+ 70.014,
278
+ 71.776,
279
+ 72.65,
280
+ 72.974,
281
+ 73.372,
282
+ 73.71,
283
+ 73.994
284
+ ],
285
+ "768": [
286
+ 71.312,
287
+ 73.326,
288
+ 74.046,
289
+ 74.52,
290
+ 74.848,
291
+ 75.304,
292
+ 75.51
293
+ ],
294
+ "1024": [
295
+ 71.288,
296
+ 73.572,
297
+ 74.36,
298
+ 74.86,
299
+ 75.24,
300
+ 75.4,
301
+ 75.462
302
+ ]
303
+ },
304
+ "lr": [
305
+ 0.0009755527298894294,
306
+ 0.0009046039886902864,
307
+ 0.0007940987335200904,
308
+ 0.0006548539886902864,
309
+ 0.0005005000000000001,
310
+ 0.0003461460113097139,
311
+ 0.00020690126647990973
312
+ ]
313
+ }
314
+ },
315
+ "train_config": {
316
+ "name": "david_training",
317
+ "run_id": "20251012_145649",
318
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
319
+ "model_variant": "clip_vit_laion_b32",
320
+ "num_classes": 1000,
321
+ "preset": "hierarchical_refinement",
322
+ "custom_config_path": null,
323
+ "num_classes_override": null,
324
+ "use_belly_override": null,
325
+ "belly_expand_override": null,
326
+ "progressive_training_override": false,
327
+ "scale_warmup_epochs_override": null,
328
+ "num_epochs": 10,
329
+ "batch_size": 1024,
330
+ "learning_rate": 0.001,
331
+ "weight_decay": 1e-05,
332
+ "warmup_epochs": 3,
333
+ "use_rose_loss": true,
334
+ "rose_initial_weight": 0.1,
335
+ "rose_max_weight": 0.5,
336
+ "rose_weight_schedule": "adaptive",
337
+ "use_cayley_loss": false,
338
+ "cayley_weight": 0.001,
339
+ "scale_loss_balance": null,
340
+ "use_mixed_precision": true,
341
+ "gradient_clip": 10.0,
342
+ "scheduler_type": "cosine_restarts",
343
+ "min_lr": 1e-06,
344
+ "freeze_strategy": "never",
345
+ "freeze_threshold": 90.0,
346
+ "unfreeze_on_plateau": true,
347
+ "patience": 10,
348
+ "track_gradients": true,
349
+ "gradient_scale_threshold": 1e-05,
350
+ "gradient_scale_multiplier": 10.0,
351
+ "log_interval": 50,
352
+ "val_interval": 1,
353
+ "save_interval": 5,
354
+ "log_fusion_weights": true,
355
+ "log_loss_components": true,
356
+ "save_format": "safetensors",
357
+ "hf_repo": "AbstractPhil/gated-david",
358
+ "upload_to_hub": true,
359
+ "base_dir": "./david_training",
360
+ "num_workers": 10,
361
+ "pin_memory": true,
362
+ "prefetch_factor": 4,
363
+ "persistent_workers": true
364
+ }
365
+ }