Update best_model_acc65.66_metadata.json - Run 20251012_235237
Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc65.66_metadata.json
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 5,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(22524.)",
|
| 7 |
+
"exp_avg": "tensor([[ 4.1502e-05, -1.0199e-04, 4.1579e-05, ..., 2.2934e-05,\n 4.3949e-06, -1.7620e-05],\n [-3.6801e-05, -3.8278e-04, 1.8524e-04, ..., -1.0277e-05,\n 5.3981e-05, -4.9701e-05],\n [ 3.9609e-05, -2.4522e-05, -1.5769e-06, ..., 3.0416e-05,\n 3.3777e-05, 9.7399e-06],\n ...,\n [ 4.5955e-05, 1.7653e-04, -1.1404e-05, ..., -8.1255e-05,\n -3.6247e-05, 2.7496e-06],\n [ 1.5161e-05, -7.4698e-05, -4.1333e-05, ..., -4.6826e-06,\n 2.3446e-05, 2.6877e-05],\n [-1.1690e-05, -3.3630e-05, -6.1750e-05, ..., 1.9867e-05,\n -1.2669e-06, 1.2757e-05]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[1.3187e-07, 1.3137e-07, 5.0881e-08, ..., 5.6999e-08, 3.0541e-08,\n 2.6351e-08],\n [4.4737e-08, 1.6516e-07, 7.7279e-08, ..., 5.8257e-08, 2.3221e-08,\n 2.5557e-08],\n [2.0487e-08, 3.9296e-08, 2.6240e-08, ..., 6.0610e-08, 1.4086e-08,\n 1.7251e-08],\n ...,\n [3.7394e-08, 3.2113e-07, 4.7289e-08, ..., 7.5418e-08, 2.2102e-08,\n 3.4706e-08],\n [8.7985e-08, 1.4008e-07, 7.2938e-08, ..., 5.5660e-08, 2.9543e-08,\n 3.2182e-08],\n [5.1875e-09, 2.0096e-08, 1.0308e-08, ..., 4.6342e-09, 2.1548e-09,\n 3.7924e-09]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(22524.)",
|
| 12 |
+
"exp_avg": "tensor([ 5.1402e-04, -2.7189e-03, 1.2605e-03, 3.3690e-04, -1.1569e-03,\n 5.8594e-04, 1.7903e-03, 1.3979e-03, -2.0561e-04, 1.3530e-03,\n 1.0701e-03, -1.1938e-04, -9.3372e-04, 5.8662e-04, 2.5773e-04,\n -2.0966e-04, 2.0787e-03, 2.5688e-03, 1.9664e-03, -1.2256e-03,\n -5.5697e-06, 1.3885e-03, 1.4156e-03, 1.8598e-03, -4.1185e-05,\n 1.7616e-03, 4.0217e-04, 9.9919e-04, -2.8930e-04, 4.8934e-04,\n 1.1381e-03, -2.0902e-03, -2.6396e-03, -1.9068e-03, 8.6114e-04,\n 6.6223e-04, -1.5507e-03, 1.6222e-04, -1.1205e-05, -7.2279e-04,\n -7.0826e-04, 1.4554e-03, -1.7498e-04, 7.0313e-04, -4.4441e-04,\n 6.7149e-04, -5.7770e-04, 6.4124e-04, -1.2285e-03, 5.5832e-04,\n 6.7087e-04, 9.5578e-04, -1.9093e-04, -2.4800e-03, 8.8482e-05,\n 1.6981e-03, -1.9448e-03, 1.2010e-03, 2.2257e-03, 7.4027e-04,\n -2.2290e-03, -2.7532e-04, 2.4922e-03, 4.1140e-04, -8.1509e-04,\n -4.3300e-04, 4.2274e-04, 1.1167e-03, -7.8651e-04, -3.2351e-05,\n -3.5622e-03, -2.7900e-03, 2.2023e-03, 1.3900e-03, -1.0340e-03,\n -1.6842e-03, -1.4829e-03, -6.9117e-04, 1.0110e-03, 1.5396e-03,\n -5.9182e-04, 3.6909e-03, -8.7109e-04, -3.9076e-05, 2.0461e-03,\n 1.1025e-03, -3.3501e-04, -5.6383e-04, 1.2972e-03, -1.0467e-03,\n -1.4505e-03, -3.6357e-03, -6.7970e-04, -1.7478e-03, -9.5377e-04,\n 6.0719e-04, 2.3134e-03, -2.0119e-04, 6.4741e-04, 5.6596e-04,\n 1.7536e-03, 7.2590e-04, -1.4515e-03, 1.4972e-03, 6.5045e-05,\n -7.9280e-04, 2.9965e-04, 4.6449e-03, 2.8613e-03, -7.3930e-04,\n 1.7777e-03, -1.1075e-03, 4.3264e-03, 2.6153e-03, 6.3430e-04,\n 1.7876e-03, -2.2646e-03, 1.0662e-04, 1.6779e-03, -6.0217e-04,\n -2.2407e-03, -1.1742e-03, 1.0726e-03, 4.0268e-05, -3.8815e-04,\n 8.5590e-04, -7.6096e-04, -1.2769e-03, -1.9827e-03, -1.0798e-03,\n 1.6971e-03, -1.3070e-03, 1.7522e-03, -2.0527e-03, -5.7369e-03,\n 3.8600e-04, -9.2849e-05, -1.3376e-05, 4.4086e-04, 1.9748e-03,\n -3.3356e-04, 1.9267e-03, -1.2494e-03, 5.4917e-04, 1.6519e-04,\n -8.4486e-04, -1.0119e-03, -1.5367e-03, 5.4145e-04, -2.4393e-03,\n -6.8385e-04, 1.2733e-03, 1.4255e-03, 2.3521e-03, -2.3481e-03,\n -2.9086e-04, -5.2869e-04, 1.6018e-03, 1.0802e-03, 1.8176e-03,\n -1.8691e-03, 3.4137e-04, 1.3320e-03, 2.7829e-03, -8.3513e-04,\n 1.1276e-03, -1.0240e-03, -7.7442e-04, 3.8218e-04, 1.0742e-03,\n -6.3160e-04, 7.3851e-04, -3.4579e-03, -3.3817e-04, -8.2469e-04,\n 6.1891e-04, -3.2384e-03, -5.1715e-04, 5.7545e-04, -7.3826e-04,\n -7.8310e-04, 3.4284e-04, -5.7922e-03, 9.3131e-04, -1.4692e-03,\n -2.7357e-04, 9.6692e-04, 1.8296e-04, 1.1854e-03, -4.4859e-05,\n -5.0967e-04, 1.0919e-03, 1.1694e-03, -4.0182e-05, -3.1023e-04,\n -5.3742e-04, 2.2501e-04, -1.8992e-03, 1.2177e-04, 8.3354e-05,\n -3.1336e-04, 1.8076e-03, 1.6801e-04, -9.0654e-04, 6.3466e-04,\n 4.1534e-06, 3.0049e-04, 7.7939e-04, 2.0866e-03, -1.4446e-03,\n -3.0019e-04, -1.8425e-03, -4.1433e-04, -1.0645e-04, 2.7422e-04,\n -4.1692e-04, -5.6828e-05, 5.9548e-04, -2.0084e-03, 1.5397e-04,\n -2.7091e-04, 7.3346e-04, 1.0100e-03, -8.7198e-04, -3.0134e-03,\n 3.1143e-03, -6.9378e-04, 1.9654e-03, -1.5173e-04, 1.1511e-03,\n 3.5623e-03, -2.7305e-04, 6.6441e-04, 9.5432e-04, -1.2196e-04,\n 5.8859e-06, -1.1420e-04, -1.8906e-04, -2.3525e-03, 4.1734e-03,\n -4.6205e-04, 1.7964e-03, 2.8168e-03, 6.7417e-04, 1.7920e-03,\n -2.4930e-04, 2.2977e-03, 1.7980e-03, -1.9008e-03, 2.3103e-03,\n 2.9650e-03, -1.9424e-03, -9.2190e-04, 4.2877e-04, 1.6020e-03,\n 1.6712e-03, 1.5538e-03, -8.9202e-04, -1.5070e-03, -3.2735e-04,\n 2.3652e-03, -9.4540e-04, 4.5620e-04, 1.7488e-04, -1.6705e-03,\n -1.2680e-04, 2.0588e-03, -2.4788e-04, 1.4215e-04, -1.3646e-03,\n -3.7105e-03, -2.0848e-04, 1.7450e-03, 8.5070e-05, 7.4261e-04,\n 1.7090e-03, 2.1448e-03, 4.8378e-04, -5.2268e-05, -1.1484e-04,\n 5.0160e-04, -5.8156e-04, 1.4729e-03, -1.8662e-03, 1.4850e-03,\n -3.5936e-04, 1.0903e-03, -2.0908e-03, -1.8093e-03, 1.4866e-03,\n -3.3089e-04, 9.4930e-04, -2.6257e-03, -1.3082e-03, 2.9897e-03,\n -6.9375e-04, 9.5165e-04, -5.9633e-04, -6.5559e-04, 3.8313e-04,\n -1.6559e-03, 3.0251e-04, 7.5622e-05, 3.2718e-03, 1.3238e-03,\n 5.0325e-04, 9.2899e-04, -2.5705e-03, -1.4385e-06, 2.3423e-03,\n -2.8422e-04, 1.8540e-03, 1.3392e-03, 1.0732e-03, 3.4379e-03,\n 3.3692e-04, -8.3634e-04, 7.5330e-05, -3.2221e-03, -2.6883e-04,\n -1.1239e-03, -2.1150e-03, -7.6158e-04, -3.3800e-04, -5.1590e-04,\n 3.4531e-03, -1.0224e-04, -1.1233e-03, 1.7122e-03, 8.8979e-04,\n 2.4381e-03, -1.1209e-03, -9.1113e-04, 8.4504e-04, -1.0948e-03,\n -7.1946e-04, -2.9795e-03, -6.0897e-04, 1.0110e-03, 1.0812e-03,\n -1.6578e-03, 1.2332e-03, -7.9297e-04, 4.5335e-04, -2.0359e-03,\n -2.5269e-03, 2.6049e-03, -2.5479e-04, -1.5052e-03, 3.1194e-03,\n -1.2913e-03, -1.2773e-03, 1.7054e-04, -3.9720e-04, 1.8350e-04,\n -9.5284e-04, -2.2012e-03, -1.6767e-04, 8.5451e-04, -1.5972e-03,\n 3.3455e-04, -2.1719e-03, 8.4647e-04, 5.1839e-04, 1.2380e-03,\n -1.4123e-03, 9.6407e-04, -8.2138e-04, -3.6454e-03, 5.7235e-04,\n 2.3563e-04, 1.8221e-03, 3.3414e-04, -2.7066e-04, -4.3683e-03,\n 9.7018e-04, -2.7576e-04, 2.1787e-04, -2.2286e-03, -2.8905e-03,\n -6.4449e-05, 1.4082e-03, 5.2254e-04, 6.8362e-04, 4.5914e-04,\n -2.4606e-03, 1.2687e-03, 2.1452e-03, -1.4195e-03, -1.8633e-03,\n -5.4388e-04, -2.7647e-04, -4.7759e-04, -2.6597e-03, -4.0393e-05,\n 1.5906e-04, 1.0735e-03, -1.3445e-03, 4.5473e-04, 2.7066e-03,\n -1.6609e-03, 5.9215e-04, 1.9029e-03, 4.6424e-04, 2.6815e-05,\n -1.6087e-03, 3.6074e-04, -1.1364e-03, 7.8350e-04, -3.3260e-03,\n 8.2214e-04, 1.2927e-03, -1.8304e-03, -3.4275e-03, -2.4817e-03,\n -5.5342e-04, 8.1622e-04, 1.1976e-03, 9.7664e-06, -2.1628e-03,\n 1.9234e-03, -1.8732e-04, -1.3652e-05, -1.5294e-04, 1.2746e-03,\n -3.1676e-03, 1.6368e-03, -3.7985e-04, 7.2970e-05, -3.1059e-04,\n -9.1850e-04, -1.1728e-03, 7.6831e-04, 3.0085e-03, 4.0350e-04,\n 9.4730e-05, -2.0740e-03, 2.6138e-03, 6.6729e-04, -1.2289e-03,\n 2.3801e-03, -1.1380e-03, 9.0999e-05, -3.4657e-04, -1.3205e-04,\n -1.5199e-03, -7.9316e-04, 2.0415e-03, -2.1194e-03, -1.8568e-04,\n 1.9617e-03, -5.9229e-04, 1.3289e-03, 2.0806e-03, 1.3557e-03,\n 2.6862e-04, -1.8312e-03, -2.7907e-03, -1.7954e-04, 5.5290e-05,\n -2.0067e-03, 3.8675e-04, 8.2497e-04, -1.1107e-03, 6.7721e-04,\n -9.4580e-04, -7.1383e-04, -1.6586e-03, 2.0724e-03, -7.1272e-04,\n 1.3600e-04, -1.8981e-03, -1.0530e-03, -3.6873e-03, 4.8561e-04,\n 1.0792e-04, -1.5083e-04, 1.8125e-03, -1.8823e-03, -1.3637e-03,\n -2.4650e-04, -1.2468e-03, -1.8351e-03, -1.2681e-03, 9.8941e-04,\n 9.7699e-04, 8.8175e-04, -6.1232e-04, -2.9390e-03, 2.4432e-03,\n 7.6789e-04, 1.2437e-03, 1.3226e-03, -9.2796e-04, 5.5941e-04,\n 5.6284e-04, -7.8425e-04, -2.3240e-04, -1.9564e-03, 5.2066e-04,\n 5.0718e-04, -4.3105e-04, -1.0631e-03, 4.4951e-04, 1.1619e-04,\n -2.1605e-04, -3.0171e-04, 7.8153e-04, 3.4122e-04, -1.4882e-03,\n 5.0176e-04, 3.2372e-04], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([4.2348e-05, 3.8665e-05, 2.6507e-05, 4.3562e-05, 3.3946e-05, 5.9657e-05,\n 3.0859e-05, 5.6347e-05, 3.9074e-05, 4.3735e-05, 5.6142e-05, 6.3024e-05,\n 8.6259e-05, 2.0416e-05, 6.3101e-05, 4.6777e-05, 4.0568e-05, 3.5391e-05,\n 4.4744e-05, 5.4078e-05, 3.6544e-05, 3.0264e-05, 2.8970e-05, 5.7101e-05,\n 7.4724e-05, 3.4066e-05, 2.9428e-05, 6.2846e-05, 1.1251e-04, 3.9589e-05,\n 2.0469e-05, 2.9764e-05, 4.9481e-05, 3.7239e-05, 2.3742e-05, 4.5893e-05,\n 3.2666e-05, 2.6817e-05, 2.6224e-05, 2.6239e-05, 3.3187e-05, 2.5645e-05,\n 5.9708e-06, 6.8365e-05, 1.0546e-04, 3.2462e-05, 2.3903e-05, 2.4662e-05,\n 4.4691e-05, 7.0454e-05, 7.7314e-05, 2.2505e-05, 2.9526e-05, 4.4189e-05,\n 5.1120e-05, 3.6031e-05, 3.4320e-05, 9.9834e-05, 3.2414e-05, 3.3559e-05,\n 9.7544e-05, 2.5420e-05, 2.9939e-05, 3.0675e-05, 3.5755e-05, 2.8259e-05,\n 3.1667e-05, 2.9470e-05, 5.2811e-05, 3.4833e-05, 3.7723e-05, 3.1256e-05,\n 3.1447e-05, 2.8979e-05, 3.0804e-05, 3.5887e-05, 4.6284e-05, 3.8755e-05,\n 4.9626e-05, 3.3189e-05, 5.5674e-05, 3.7464e-05, 2.9887e-05, 1.7791e-05,\n 3.7296e-05, 3.9806e-05, 4.7889e-05, 2.8889e-05, 2.4879e-05, 4.2703e-05,\n 2.7984e-05, 5.9427e-05, 3.2632e-05, 3.5856e-05, 4.0226e-05, 4.7199e-05,\n 5.7213e-05, 3.7544e-05, 3.4798e-05, 1.9637e-05, 4.4725e-05, 6.6189e-05,\n 5.8829e-05, 3.1652e-05, 6.4242e-05, 5.5251e-05, 3.8698e-05, 8.9079e-05,\n 3.2862e-05, 4.4024e-05, 5.8176e-05, 5.1866e-05, 3.8407e-05, 5.0327e-05,\n 3.8235e-05, 4.4897e-05, 5.4544e-05, 5.0328e-05, 3.2513e-05, 3.8446e-05,\n 1.5204e-05, 5.5653e-05, 4.3088e-05, 5.3234e-05, 3.1136e-05, 4.5941e-05,\n 3.1405e-05, 4.7469e-05, 4.1022e-05, 7.3388e-05, 4.3334e-05, 3.4710e-05,\n 4.9166e-05, 3.7540e-05, 8.8256e-05, 3.3998e-05, 2.5327e-05, 4.2630e-05,\n 3.1737e-05, 4.2684e-05, 1.6581e-05, 2.8928e-05, 4.6319e-05, 9.0722e-05,\n 4.0309e-05, 3.9656e-05, 2.5222e-05, 4.9866e-05, 4.3361e-05, 8.5040e-05,\n 2.0731e-05, 5.0382e-05, 2.4363e-05, 6.4316e-05, 2.6994e-05, 3.1681e-05,\n 4.8493e-05, 3.5931e-05, 3.0680e-05, 2.7884e-05, 5.3932e-05, 1.3125e-05,\n 3.9202e-05, 7.0705e-05, 1.6330e-05, 6.0329e-05, 3.6424e-05, 1.7655e-05,\n 1.7493e-05, 4.4501e-05, 3.1793e-05, 4.4706e-05, 4.0118e-05, 3.2970e-05,\n 3.4174e-05, 2.4113e-05, 7.6111e-05, 2.2736e-05, 1.8059e-05, 4.0016e-05,\n 2.9928e-05, 2.8335e-05, 7.3487e-05, 3.1645e-05, 3.1057e-05, 4.1598e-05,\n 1.0137e-04, 2.1151e-05, 2.0226e-05, 1.4861e-05, 3.6408e-05, 4.9923e-05,\n 5.6419e-05, 5.9977e-05, 2.4501e-05, 1.8109e-05, 4.4444e-05, 2.5219e-05,\n 1.8560e-05, 1.7933e-05, 2.4801e-05, 4.1884e-05, 2.6571e-05, 2.0028e-05,\n 3.6421e-05, 3.7505e-05, 3.4626e-05, 2.5478e-05, 5.2484e-05, 2.0702e-05,\n 5.2554e-05, 5.3890e-05, 3.6025e-05, 2.5074e-05, 5.4300e-05, 1.6043e-05,\n 2.6003e-05, 2.5690e-05, 5.4736e-05, 5.0371e-05, 2.3544e-05, 1.8798e-05,\n 3.2045e-05, 1.6035e-05, 2.7129e-05, 5.1590e-05, 2.7280e-05, 1.9834e-05,\n 2.2772e-05, 6.0086e-05, 4.1550e-05, 1.8802e-05, 4.6962e-05, 3.5299e-05,\n 3.0546e-05, 3.3297e-05, 3.4867e-05, 6.2468e-06, 3.9716e-05, 1.1537e-04,\n 2.9706e-05, 6.9642e-05, 2.9091e-05, 5.2484e-05, 7.0567e-05, 5.2000e-05,\n 4.5179e-05, 2.7704e-05, 4.8482e-05, 3.4082e-05, 5.3081e-05, 2.6013e-05,\n 4.1515e-05, 4.9108e-05, 4.2497e-05, 3.7008e-05, 2.7298e-05, 4.7246e-05,\n 5.7758e-05, 2.9035e-05, 2.9079e-05, 6.2902e-05, 2.7038e-05, 5.3118e-05,\n 4.1472e-05, 2.5472e-05, 2.5729e-05, 3.4691e-05, 2.4148e-05, 4.1124e-05,\n 3.5298e-05, 4.0923e-05, 3.4205e-05, 2.9569e-05, 2.1637e-05, 5.1014e-05,\n 2.0117e-05, 6.5316e-05, 3.6082e-05, 2.1491e-05, 3.1119e-05, 2.4620e-05,\n 3.5296e-05, 5.3926e-05, 2.1022e-05, 5.7788e-05, 3.5847e-05, 3.5826e-05,\n 6.5263e-05, 4.6989e-05, 2.5950e-05, 3.4510e-05, 5.6462e-05, 2.7964e-05,\n 4.8908e-05, 2.3342e-05, 2.5931e-05, 5.7908e-05, 9.3825e-05, 3.5317e-05,\n 2.5819e-05, 3.1147e-05, 3.5441e-05, 3.2159e-05, 6.0836e-05, 2.3497e-05,\n 4.0063e-05, 3.7018e-05, 7.7706e-05, 4.0356e-05, 4.6559e-05, 5.5035e-05,\n 2.4111e-05, 2.4455e-05, 4.9244e-05, 2.9401e-05, 3.7440e-05, 1.5650e-05,\n 3.7388e-05, 4.7852e-05, 8.6661e-05, 5.6695e-05, 4.2078e-05, 4.4467e-05,\n 5.7186e-05, 9.6109e-05, 2.6834e-05, 5.2431e-05, 3.1757e-05, 3.3529e-05,\n 3.4463e-05, 1.0382e-04, 1.6046e-05, 2.3598e-05, 2.7633e-05, 3.6033e-05,\n 5.7480e-05, 2.5571e-05, 6.1596e-05, 4.7142e-05, 3.3389e-05, 3.0550e-05,\n 4.4516e-05, 2.5301e-05, 5.5637e-05, 5.1594e-05, 4.0409e-05, 2.4890e-05,\n 4.2089e-05, 4.4493e-05, 2.7658e-05, 3.9019e-05, 3.3040e-05, 3.1393e-05,\n 3.6271e-05, 2.5561e-05, 2.7919e-05, 3.3346e-05, 2.6148e-05, 2.4748e-05,\n 2.1951e-05, 7.8866e-05, 1.2159e-04, 3.1352e-05, 3.7828e-05, 2.8637e-05,\n 3.3762e-05, 5.4845e-05, 4.3631e-05, 3.0083e-05, 2.0802e-05, 3.3590e-05,\n 2.0132e-05, 3.8220e-05, 6.8322e-05, 3.5987e-05, 2.1184e-05, 1.4781e-05,\n 2.8493e-05, 3.2318e-05, 3.6863e-05, 5.1328e-05, 4.6893e-05, 3.0738e-05,\n 3.3003e-05, 1.0566e-04, 3.0827e-05, 5.2198e-05, 3.6553e-05, 3.8406e-05,\n 4.5925e-05, 5.3702e-05, 2.8825e-05, 3.5993e-05, 3.9746e-05, 2.6683e-05,\n 2.0558e-05, 6.4887e-05, 4.2083e-05, 7.1877e-05, 4.0841e-05, 3.0400e-05,\n 4.4821e-05, 3.2543e-05, 4.3486e-05, 3.5557e-05, 7.9201e-05, 2.1900e-05,\n 2.9593e-05, 4.7194e-05, 4.6889e-05, 5.3230e-05, 4.4690e-05, 4.6897e-05,\n 2.9784e-05, 2.2963e-05, 6.1125e-05, 2.1351e-05, 5.2924e-05, 7.0757e-05,\n 3.9613e-05, 3.2868e-05, 4.7187e-05, 4.7751e-05, 3.7142e-05, 4.0010e-05,\n 7.3370e-05, 3.5523e-05, 2.7336e-05, 3.6180e-05, 2.9456e-05, 1.7807e-05,\n 5.8571e-05, 9.7380e-05, 3.2299e-05, 2.7477e-05, 5.9155e-05, 4.1694e-05,\n 3.5770e-05, 4.0392e-05, 4.3701e-05, 6.9314e-05, 3.2952e-05, 1.1533e-04,\n 4.3303e-05, 3.8146e-05, 2.5031e-05, 3.7214e-05, 3.4235e-05, 3.8088e-05,\n 3.9036e-05, 3.6265e-05, 8.4022e-05, 6.8873e-05, 3.1037e-05, 1.9217e-05,\n 8.0470e-05, 3.7330e-05, 1.8534e-05, 4.6925e-05, 3.9359e-05, 2.9294e-05,\n 3.1565e-05, 3.0260e-05, 7.2171e-05, 3.0381e-05, 3.0972e-05, 3.5560e-05,\n 5.7223e-05, 4.6150e-05, 4.3423e-05, 6.5770e-05, 4.9687e-05, 4.0081e-05,\n 4.1093e-05, 4.2531e-05, 3.8954e-05, 3.5366e-05, 3.7863e-05, 2.5649e-05,\n 2.1148e-05, 3.1878e-05, 5.4117e-05, 3.7497e-05, 3.0055e-05, 3.3848e-05,\n 4.9877e-05, 3.4101e-05, 7.1244e-05, 3.7499e-05, 3.4671e-05, 3.8284e-05,\n 5.4379e-05, 2.6262e-05, 2.2638e-05, 3.4706e-05, 4.4466e-05, 2.3810e-05,\n 2.3744e-05, 2.8718e-05, 4.4207e-05, 2.7768e-05, 4.1987e-05, 3.1123e-05,\n 2.9330e-05, 1.7081e-05, 3.3748e-05, 4.3495e-05, 3.5961e-05, 3.8815e-05,\n 4.3631e-05, 3.4723e-06], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(22524.)",
|
| 17 |
+
"exp_avg": "tensor([ 5.7795e-04, -7.4331e-03, 2.1155e-03, 1.0250e-03, -2.9490e-03,\n 1.4496e-03, 4.3261e-03, 4.5253e-03, 1.3704e-03, 5.4829e-03,\n 2.1542e-03, 1.9000e-04, -1.9215e-03, 1.5906e-03, 7.0116e-04,\n 4.4173e-04, 5.0563e-03, 6.0426e-03, 3.7999e-03, -4.0834e-03,\n -1.2531e-03, 2.3619e-03, 2.6947e-03, 5.0038e-03, -1.3676e-03,\n 2.5125e-03, 1.0100e-03, 2.1984e-03, -8.6677e-04, 6.2137e-04,\n 4.0074e-03, -6.2530e-03, -4.5567e-03, -3.6236e-03, 1.9405e-03,\n 1.0348e-03, -3.5991e-03, 1.5794e-03, 3.7226e-04, -1.4277e-03,\n -1.4315e-03, 3.0591e-03, -5.6052e-45, 2.7366e-03, -2.4976e-04,\n 1.5976e-03, -1.5800e-03, 8.0348e-04, -5.6232e-03, -7.0909e-04,\n 2.0546e-03, 1.7124e-03, -9.6579e-04, -5.1107e-03, -1.3941e-04,\n 4.6562e-03, -6.1325e-03, 1.7444e-03, 5.5359e-03, 7.4894e-04,\n -2.7944e-03, 1.5300e-05, 8.3891e-03, 1.2853e-03, -1.1432e-03,\n -2.3602e-03, -3.7836e-04, 1.7748e-03, -2.0694e-03, 1.0597e-03,\n -9.9795e-03, -3.8560e-03, 4.7634e-03, 3.6813e-03, -2.6485e-03,\n -3.8625e-03, -1.4949e-03, -1.6477e-03, 8.2309e-04, 4.0232e-03,\n -1.6630e-03, 6.9889e-03, -3.4815e-03, -4.0122e-04, 2.6331e-03,\n 3.1857e-03, 3.7113e-04, -1.2820e-03, 1.9091e-03, -3.4965e-03,\n -2.4410e-03, -4.8235e-03, -2.3043e-03, -4.3670e-03, -2.2190e-03,\n 1.3678e-03, 5.8296e-03, 3.5865e-04, 2.1506e-03, 3.2211e-03,\n 3.3786e-03, 1.8820e-03, -3.2473e-03, 3.1595e-03, -3.8020e-04,\n -2.1503e-03, -1.3056e-03, 1.1878e-02, 7.4938e-03, 6.1818e-04,\n 2.9839e-03, -3.6331e-03, 1.0343e-02, 4.7254e-03, 1.9334e-03,\n 2.9372e-03, -4.7791e-03, -2.9221e-04, 4.0169e-03, -3.1038e-03,\n -7.7723e-03, -1.5457e-03, 5.0096e-03, 8.2069e-04, -1.1926e-03,\n 4.7807e-03, -8.2988e-04, -2.7047e-03, -7.5759e-03, -3.4387e-03,\n 3.0274e-03, -3.4046e-03, 6.5419e-03, -5.0816e-03, -1.2092e-02,\n 2.1719e-03, -3.7441e-04, -1.3459e-04, 1.5229e-03, 3.6467e-03,\n -1.6514e-03, 5.8636e-03, -3.6016e-03, 8.5401e-04, 7.0664e-04,\n -2.4378e-03, -2.3959e-03, -4.1165e-03, 7.4398e-04, -5.8709e-03,\n -1.8729e-03, 2.1058e-03, 3.5748e-03, 5.4311e-03, -4.7139e-03,\n -1.0292e-03, -7.5126e-04, 4.3922e-03, 2.9987e-03, 5.2787e-03,\n -5.3789e-03, 8.4874e-04, 5.0517e-03, 4.6135e-03, -2.2435e-03,\n 4.9736e-04, -2.8274e-03, -1.8673e-03, 8.0382e-04, 2.8765e-03,\n -1.9853e-03, 1.7671e-03, -7.1297e-03, -1.6881e-03, -1.4489e-03,\n 1.7001e-03, -5.9258e-03, -7.9291e-04, 4.6999e-04, -3.0206e-04,\n -2.0203e-03, 2.0340e-03, -1.5231e-02, 2.9996e-03, -6.6251e-03,\n -1.1620e-03, 4.0884e-03, -1.8846e-04, 3.0585e-03, -5.1213e-04,\n -1.0286e-03, 1.4105e-03, 1.3727e-03, -2.2722e-04, -7.3580e-04,\n -1.4404e-03, -3.2245e-04, -5.3229e-03, 1.7623e-04, 2.4548e-04,\n -1.0532e-03, 4.9187e-03, -4.9651e-04, -2.4666e-03, 4.8233e-04,\n -2.3633e-03, 1.3029e-03, 3.2303e-03, 4.8868e-03, -3.1939e-03,\n -4.1937e-04, -4.3601e-03, -6.1948e-04, -1.5259e-04, 1.0477e-03,\n -1.1030e-03, -1.8190e-04, 3.2364e-03, -3.4530e-03, 8.9323e-04,\n -6.4778e-04, 2.2816e-03, 3.1215e-03, -3.2287e-03, -9.7464e-03,\n 7.5402e-03, -2.0084e-03, 4.6495e-03, -6.2869e-04, 1.7095e-03,\n 8.2370e-03, -3.3222e-04, 3.4952e-03, 3.3273e-03, -1.2968e-05,\n -1.2718e-03, -3.5464e-04, 5.6052e-45, -2.7256e-03, 8.0337e-03,\n -9.9721e-04, 4.1456e-03, 7.9167e-03, 6.1412e-04, 2.3846e-03,\n -2.0745e-03, 4.6699e-03, 3.2014e-03, -5.0803e-03, 6.6285e-03,\n 6.2535e-03, -4.7215e-03, -3.5247e-03, 8.3340e-04, 2.6911e-03,\n 4.1682e-03, 2.4815e-03, -2.0172e-03, -2.6035e-03, -5.1921e-04,\n 5.1283e-03, -2.8588e-03, 1.7868e-03, 3.0932e-04, -3.2653e-03,\n 4.5733e-04, 6.7290e-03, 1.7249e-04, 1.7517e-04, -6.5536e-03,\n -1.2311e-02, -7.4369e-04, 2.5094e-03, 9.6581e-04, 1.0819e-03,\n 3.5601e-03, 6.3552e-03, 2.4708e-04, -8.2344e-06, -4.2497e-04,\n 1.5104e-03, -1.4679e-03, 1.4358e-03, -3.3475e-03, 4.5517e-03,\n -2.1484e-03, 2.2163e-03, -3.4089e-03, -4.4074e-03, 2.5672e-03,\n -1.0036e-03, 1.8349e-03, -5.2012e-03, -2.1923e-03, 5.7912e-03,\n -2.4914e-03, 1.8811e-03, -6.2251e-04, -1.2786e-03, 2.2028e-04,\n -3.4293e-03, 5.3058e-04, 1.4192e-03, 8.6245e-03, 2.1897e-03,\n 1.4490e-03, 1.3696e-03, -6.6445e-03, -2.3739e-04, 9.3231e-03,\n -8.9646e-05, 4.2451e-03, 4.1370e-03, 2.9431e-03, 6.6334e-03,\n 1.6481e-03, -1.2278e-03, 1.0017e-04, -8.6128e-03, -4.3176e-04,\n -4.5058e-03, -6.7511e-03, -2.5980e-03, -8.4462e-04, -1.4375e-03,\n 9.3637e-03, -9.6864e-04, -5.1220e-03, 3.9526e-03, 4.7042e-03,\n 7.0952e-03, -1.9509e-03, -1.9708e-03, 1.6870e-03, -1.8937e-03,\n -1.8212e-03, -3.4887e-03, -1.5164e-03, 2.2979e-03, 1.2930e-03,\n -3.8256e-03, 3.1670e-03, -1.4302e-03, 2.2007e-03, -2.8828e-03,\n -6.3020e-03, 6.8801e-03, -8.6000e-04, -3.2391e-03, 8.3393e-03,\n -5.1798e-03, -1.6322e-03, 3.3686e-04, -1.9153e-03, 1.2523e-03,\n -3.2508e-03, -5.7653e-03, -1.6294e-03, 1.0594e-03, -4.0285e-03,\n 3.4884e-04, -3.4653e-03, 1.6991e-03, 1.7263e-03, 2.1687e-03,\n -3.8093e-03, 1.1232e-03, -2.0200e-03, -5.3627e-03, 1.0615e-03,\n 8.5656e-04, 3.7565e-03, 9.6002e-04, 6.4591e-04, -6.8325e-03,\n 1.9212e-03, -1.5134e-03, 1.7262e-05, -7.3841e-03, -2.5342e-03,\n -8.1218e-04, 3.6266e-03, 2.2960e-03, 2.3519e-03, 7.0666e-04,\n -3.1776e-03, 3.1817e-03, 3.0483e-03, -2.6494e-03, -2.6755e-03,\n -1.5572e-03, 6.3391e-04, -2.1166e-03, -1.0870e-02, -4.1747e-04,\n -2.3425e-04, 3.2763e-03, -7.0253e-04, 6.4167e-04, 5.5758e-03,\n -3.3865e-03, 1.1315e-03, 4.5354e-03, 7.7970e-04, 3.2635e-04,\n -2.3752e-03, -5.7689e-05, -2.2472e-03, 1.5109e-03, -9.7424e-03,\n 2.5588e-03, 1.1881e-03, -5.2880e-03, -6.1447e-03, -6.3885e-03,\n -3.0713e-03, 3.0080e-03, 4.2449e-03, -1.1033e-03, -3.7591e-03,\n 5.1208e-03, -7.9791e-04, -4.6007e-04, -2.4011e-03, 5.9433e-03,\n -9.3130e-03, 2.6088e-03, -4.9470e-04, 8.5226e-05, -1.9944e-03,\n -1.5618e-03, -3.2966e-03, 1.6554e-03, 7.2352e-03, 1.6296e-03,\n 7.2155e-04, -3.8172e-03, 9.0511e-03, 1.4197e-03, -2.0205e-03,\n 6.5986e-03, -2.0206e-03, -1.1415e-04, -1.8784e-03, -2.1497e-04,\n -4.8272e-03, -2.1051e-03, 3.7303e-03, -4.6524e-03, 1.9555e-03,\n 4.8200e-03, -1.4964e-03, 1.8582e-03, 4.8742e-03, 4.7120e-03,\n 1.4217e-03, -3.2095e-03, -3.1997e-03, -7.0544e-04, 5.2158e-04,\n -5.9686e-03, 7.2832e-04, 2.8847e-03, -3.1569e-03, 9.1630e-04,\n -1.4250e-03, -1.3467e-03, -4.0018e-03, 3.9399e-03, 7.5347e-04,\n -7.6027e-04, -4.6508e-03, -3.2066e-03, -7.2941e-03, 1.4557e-03,\n -1.0035e-03, -5.5335e-04, 4.8298e-03, -6.4966e-03, -2.9747e-03,\n -2.2735e-04, -5.4246e-03, -4.2226e-03, -2.0965e-03, 1.7785e-03,\n 1.6643e-03, 2.5073e-03, -4.0917e-04, -5.7858e-03, 4.9138e-03,\n 2.0771e-03, 2.2730e-03, 1.1808e-03, -1.8441e-03, 9.1980e-04,\n 1.8577e-03, -4.6008e-04, -2.2420e-03, -4.6398e-03, 1.6479e-03,\n 1.0231e-03, -2.0258e-03, -1.8307e-03, 5.2699e-04, 6.9287e-04,\n -2.1432e-04, -5.2498e-04, 1.5796e-03, 2.6384e-04, -2.2827e-03,\n 1.2453e-03, -1.0271e-29], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([3.6649e-04, 2.1893e-04, 1.0513e-04, 2.3162e-04, 1.2537e-04, 2.8837e-04,\n 1.9137e-04, 2.7278e-04, 2.4014e-04, 3.3033e-04, 3.4814e-04, 3.6648e-04,\n 3.1035e-04, 1.7190e-04, 1.9625e-04, 2.0000e-04, 1.9767e-04, 1.9181e-04,\n 2.0822e-04, 2.8174e-04, 2.6315e-04, 1.0522e-04, 1.2477e-04, 2.9704e-04,\n 4.4233e-04, 2.1824e-04, 1.8522e-04, 3.5072e-04, 3.8313e-04, 2.4685e-04,\n 1.9033e-04, 2.3752e-04, 2.0931e-04, 1.6556e-04, 1.4993e-04, 1.2947e-04,\n 1.3008e-04, 1.6749e-04, 1.3853e-04, 1.6773e-04, 1.1966e-04, 9.7447e-05,\n 1.2351e-13, 2.1672e-04, 4.4314e-04, 1.5077e-04, 1.6245e-04, 6.5159e-05,\n 4.6366e-04, 2.5817e-04, 3.2820e-04, 1.1843e-04, 1.3755e-04, 3.0980e-04,\n 1.9253e-04, 1.8311e-04, 2.6452e-04, 1.3227e-04, 1.6213e-04, 1.0475e-04,\n 1.8835e-04, 1.9554e-04, 2.2131e-04, 1.3935e-04, 1.2190e-04, 1.4537e-04,\n 2.3073e-04, 1.3328e-04, 2.3138e-04, 1.2772e-04, 2.4456e-04, 1.0505e-04,\n 2.5187e-04, 2.4715e-04, 1.5337e-04, 2.1820e-04, 2.2252e-04, 1.7602e-04,\n 2.3094e-04, 2.4639e-04, 2.4295e-04, 1.5912e-04, 1.4242e-04, 8.8735e-05,\n 1.4830e-04, 3.2866e-04, 4.1902e-04, 1.8189e-04, 9.3718e-05, 5.6387e-04,\n 2.2537e-04, 1.6259e-04, 2.2316e-04, 2.5838e-04, 3.1550e-04, 2.5739e-04,\n 2.4322e-04, 2.0516e-04, 2.6151e-04, 3.7585e-04, 2.5753e-04, 2.6619e-04,\n 2.2584e-04, 1.9716e-04, 2.7056e-04, 4.0387e-04, 1.5722e-04, 4.5274e-04,\n 2.1015e-04, 2.0203e-04, 2.1001e-04, 3.4070e-04, 2.5227e-04, 1.6478e-04,\n 1.9162e-04, 1.2407e-04, 2.0088e-04, 2.8054e-04, 1.5575e-04, 2.9901e-04,\n 1.6013e-04, 3.0534e-04, 4.2485e-04, 3.9365e-04, 3.4079e-04, 4.5286e-04,\n 2.0902e-04, 3.0934e-04, 3.9044e-04, 4.5707e-04, 2.4338e-04, 2.2631e-04,\n 3.5812e-04, 2.1806e-04, 3.7513e-04, 1.3366e-04, 1.9382e-04, 3.2959e-04,\n 1.8200e-04, 1.0710e-04, 1.6901e-04, 1.9425e-04, 2.7761e-04, 2.3200e-04,\n 1.8387e-04, 1.7564e-04, 1.0331e-04, 3.4725e-04, 2.2276e-04, 3.3227e-04,\n 1.0502e-04, 2.8775e-04, 1.4506e-04, 2.6287e-04, 9.7978e-05, 9.0939e-05,\n 4.0235e-04, 2.8154e-04, 2.4362e-04, 1.5756e-04, 3.5076e-04, 1.3450e-04,\n 4.1471e-04, 1.8585e-04, 1.0506e-04, 4.1723e-04, 2.4633e-04, 9.6612e-05,\n 9.2208e-05, 2.0582e-04, 9.4599e-05, 2.2818e-04, 2.1737e-04, 4.1506e-04,\n 2.4304e-04, 1.3776e-04, 2.3648e-04, 1.0982e-04, 1.3174e-04, 2.4309e-04,\n 1.8920e-04, 2.0633e-04, 5.4133e-04, 2.0192e-04, 6.2390e-04, 1.5625e-04,\n 6.0663e-04, 1.3960e-04, 1.7807e-04, 9.0757e-05, 1.6675e-04, 2.7359e-04,\n 1.4850e-04, 1.1851e-04, 1.4274e-04, 9.7688e-05, 2.2913e-04, 1.3250e-04,\n 8.7335e-05, 9.5393e-05, 1.3251e-04, 1.9464e-04, 1.3415e-04, 1.0298e-04,\n 5.9853e-05, 1.9074e-04, 1.6353e-04, 1.3376e-04, 3.2069e-04, 1.0199e-04,\n 3.3461e-04, 2.3683e-04, 3.2589e-05, 1.1630e-04, 2.5913e-04, 2.1679e-04,\n 9.8432e-05, 5.4560e-04, 2.5226e-04, 3.9846e-04, 1.6205e-04, 1.5966e-04,\n 2.0089e-04, 1.6187e-04, 2.1860e-04, 2.8924e-04, 1.0554e-04, 1.1796e-04,\n 7.8440e-05, 1.9082e-04, 1.8842e-04, 1.2419e-04, 5.9792e-04, 2.8998e-04,\n 9.8733e-05, 1.5700e-04, 1.9523e-04, 1.7520e-14, 1.3945e-04, 4.0538e-04,\n 1.2523e-04, 3.0973e-04, 4.1567e-04, 2.8481e-04, 1.7778e-04, 3.5611e-04,\n 1.3982e-04, 1.1428e-04, 2.7475e-04, 1.6992e-04, 3.0029e-04, 1.0160e-04,\n 4.2316e-04, 2.7489e-04, 1.4527e-04, 2.4056e-04, 1.9220e-04, 1.6662e-04,\n 1.1935e-04, 1.9167e-04, 2.3444e-04, 2.5270e-04, 1.6743e-04, 1.8552e-04,\n 1.9968e-04, 2.2793e-04, 2.2900e-04, 2.2070e-04, 1.9483e-04, 4.0614e-04,\n 3.3361e-04, 2.9832e-04, 7.4210e-05, 2.2229e-04, 1.1243e-04, 1.4723e-04,\n 1.7563e-04, 2.5781e-04, 2.3978e-04, 2.0068e-04, 2.2717e-04, 8.6211e-05,\n 1.4988e-04, 1.5988e-04, 1.4922e-04, 2.5056e-04, 2.7789e-04, 1.2099e-04,\n 5.2360e-04, 2.0258e-04, 1.2221e-04, 8.5053e-05, 1.7148e-04, 9.8040e-05,\n 2.2302e-04, 1.4095e-04, 1.7476e-04, 1.6424e-04, 4.1250e-04, 2.0554e-04,\n 1.1983e-04, 1.2832e-04, 3.3854e-04, 1.7806e-04, 2.0229e-04, 2.0336e-04,\n 1.7291e-04, 1.7924e-04, 6.9395e-04, 4.4699e-04, 4.1850e-04, 2.2691e-04,\n 1.7305e-04, 1.9237e-04, 1.6309e-04, 2.9745e-04, 1.6124e-04, 1.0223e-04,\n 2.0987e-04, 2.6346e-04, 5.5804e-04, 5.2724e-04, 2.0482e-04, 2.9109e-04,\n 3.5545e-04, 3.9620e-04, 2.1423e-04, 3.6219e-04, 1.4404e-04, 2.7910e-04,\n 3.7522e-04, 4.4106e-04, 7.7716e-05, 1.4388e-04, 1.6791e-04, 1.8590e-04,\n 9.0644e-05, 1.5014e-04, 2.6375e-04, 2.3748e-04, 1.9162e-04, 1.2944e-04,\n 2.0940e-04, 3.3768e-04, 1.8174e-04, 2.4834e-04, 2.1058e-04, 1.1035e-04,\n 1.6819e-04, 2.5313e-04, 2.2181e-04, 2.0322e-04, 2.1972e-04, 2.2954e-04,\n 2.7206e-04, 1.5622e-04, 2.2841e-04, 1.4241e-04, 1.1910e-04, 1.3540e-04,\n 1.1657e-04, 4.5378e-04, 4.0043e-04, 3.6664e-04, 2.0181e-04, 2.0198e-04,\n 6.8813e-05, 3.7032e-04, 1.5222e-04, 1.6868e-04, 1.2635e-04, 2.0234e-04,\n 2.9976e-04, 2.0998e-04, 1.6694e-04, 1.9090e-04, 1.2796e-04, 1.2703e-04,\n 2.6642e-04, 1.0475e-04, 2.3948e-04, 2.0743e-04, 2.6995e-04, 2.3198e-04,\n 2.0008e-04, 3.7724e-04, 1.5392e-04, 1.6521e-04, 2.2231e-04, 1.4051e-04,\n 1.3905e-04, 1.3766e-04, 1.7041e-04, 5.2573e-04, 1.5615e-04, 9.8564e-05,\n 1.5874e-04, 4.9095e-04, 2.0194e-04, 3.1563e-04, 1.8294e-04, 1.2091e-04,\n 3.9700e-04, 1.7081e-04, 2.0468e-04, 1.3807e-04, 5.5350e-04, 5.3367e-05,\n 3.0260e-04, 2.7933e-04, 2.7578e-04, 6.1370e-05, 3.1716e-04, 1.8698e-04,\n 2.0853e-04, 5.4878e-04, 4.6418e-04, 2.8608e-04, 2.5528e-04, 1.5787e-04,\n 2.1181e-04, 1.8700e-04, 3.6789e-04, 2.8795e-04, 5.8815e-04, 2.4361e-04,\n 2.1383e-04, 1.7992e-04, 2.7259e-04, 1.6263e-04, 1.9320e-04, 1.1053e-04,\n 1.7135e-04, 4.3208e-04, 3.5613e-04, 2.1162e-04, 2.6926e-04, 4.2867e-04,\n 2.2162e-04, 1.1545e-04, 2.9425e-04, 2.2325e-04, 1.8076e-04, 6.2540e-04,\n 1.7476e-04, 2.8389e-04, 2.9964e-04, 1.3513e-04, 2.3612e-04, 2.7510e-04,\n 2.9338e-04, 1.3595e-04, 3.0082e-04, 4.9965e-04, 2.5287e-04, 1.1725e-04,\n 2.9234e-04, 1.5854e-04, 1.0583e-04, 1.6823e-04, 2.1262e-04, 1.3808e-04,\n 2.2037e-04, 1.5888e-04, 1.7716e-04, 1.3806e-04, 1.2620e-04, 2.2603e-04,\n 3.6175e-04, 2.8318e-04, 2.6655e-04, 3.9645e-04, 2.2310e-04, 1.5194e-04,\n 2.3079e-04, 2.2671e-04, 2.1563e-04, 2.2329e-04, 3.9198e-04, 1.3695e-04,\n 8.6023e-05, 5.1742e-04, 2.3656e-04, 1.4651e-04, 1.8036e-04, 1.1354e-04,\n 1.1495e-04, 1.9700e-04, 3.8388e-04, 1.1240e-04, 1.6113e-04, 1.1832e-04,\n 2.9301e-04, 1.5538e-04, 1.8422e-04, 2.7454e-04, 2.1692e-04, 1.9761e-04,\n 1.2225e-04, 1.8404e-04, 1.8924e-04, 1.4802e-04, 1.7018e-04, 1.9731e-04,\n 1.0490e-04, 2.1583e-04, 1.1739e-04, 1.7986e-04, 8.6270e-05, 1.0261e-04,\n 1.1063e-04, 1.3505e-10], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(22524.)",
|
| 22 |
+
"exp_avg": "tensor([ 4.3288e-04, -3.1008e-03, 1.3452e-03, 3.0642e-04, -1.2838e-03,\n 4.1036e-04, 2.5464e-03, 1.8019e-03, 1.3671e-04, 2.3784e-03,\n 1.2635e-03, -1.8929e-04, -9.4491e-04, 7.4359e-04, 2.5306e-04,\n -4.0006e-04, 2.2055e-03, 3.1206e-03, 2.0087e-03, -1.1237e-03,\n -4.0961e-04, 1.2634e-03, 1.3659e-03, 2.1852e-03, 4.2533e-05,\n 1.8625e-03, 4.1294e-04, 1.3432e-03, -2.4805e-04, 7.7218e-04,\n 1.6335e-03, -2.2308e-03, -2.9646e-03, -1.7891e-03, 1.0517e-03,\n 5.4362e-04, -1.5554e-03, 4.1913e-04, 7.1329e-05, -6.3697e-04,\n -4.8218e-04, 1.6162e-03, 5.6052e-45, 8.3093e-04, -2.4381e-04,\n 5.9041e-04, -7.1836e-04, 5.5007e-04, -1.6399e-03, 5.3995e-04,\n 9.4425e-04, 7.6504e-04, -7.3017e-04, -3.0366e-03, -1.4591e-04,\n 1.8858e-03, -2.6788e-03, 1.0189e-03, 2.2067e-03, 4.2837e-04,\n -1.5632e-03, -3.3807e-04, 3.8412e-03, 6.5103e-04, -9.6307e-04,\n -1.0448e-03, 4.5285e-04, 1.0674e-03, -5.3084e-04, 5.8905e-04,\n -4.0556e-03, -2.8224e-03, 2.3011e-03, 1.4788e-03, -1.5356e-03,\n -1.6370e-03, -8.9200e-04, -8.8138e-04, 7.9235e-04, 1.9745e-03,\n -1.0260e-03, 3.3608e-03, -1.1231e-03, -8.9904e-05, 1.7380e-03,\n 1.5193e-03, -1.5798e-04, -6.3953e-04, 1.0241e-03, -1.4273e-03,\n -1.5600e-03, -2.9293e-03, -9.9856e-04, -2.1029e-03, -8.6747e-04,\n 8.0231e-04, 2.4891e-03, 1.1832e-04, 8.7952e-04, 1.0325e-03,\n 1.9100e-03, 1.2096e-03, -1.5354e-03, 1.8418e-03, 1.0381e-04,\n -9.2896e-04, 6.7639e-05, 6.2609e-03, 3.6207e-03, -2.9970e-04,\n 1.5997e-03, -1.8221e-03, 4.7342e-03, 2.4503e-03, 7.7171e-04,\n 1.4933e-03, -2.3797e-03, -2.2867e-05, 2.0192e-03, -5.2211e-04,\n -2.5994e-03, -6.3571e-04, 1.7957e-03, 1.0549e-04, -4.8880e-04,\n 1.6643e-03, -9.1598e-04, -9.0853e-04, -3.0004e-03, -1.7959e-03,\n 1.7351e-03, -1.3151e-03, 2.9110e-03, -2.4693e-03, -7.3426e-03,\n 7.9036e-04, -3.2507e-04, -3.2351e-05, 7.5935e-04, 1.8042e-03,\n -6.1211e-04, 2.0593e-03, -1.2300e-03, 6.3648e-04, 1.9456e-04,\n -1.1968e-03, -9.4144e-04, -1.6063e-03, 3.9614e-04, -3.0595e-03,\n -8.1809e-04, 1.4291e-03, 1.6486e-03, 2.4082e-03, -2.2372e-03,\n -8.1461e-04, -1.9923e-04, 2.0551e-03, 1.1437e-03, 2.1659e-03,\n -2.3940e-03, 2.4414e-04, 1.8623e-03, 2.2729e-03, -1.0373e-03,\n 2.3175e-04, -1.3145e-03, -1.0246e-03, 5.7864e-04, 1.2350e-03,\n -8.7250e-04, 1.1243e-03, -3.2433e-03, -4.9543e-04, -9.9719e-04,\n 6.4996e-04, -3.9166e-03, -5.0507e-04, 4.8743e-04, -8.3069e-04,\n -1.0407e-03, 3.5722e-04, -7.0869e-03, 1.3158e-03, -2.2269e-03,\n -3.2035e-04, 1.8230e-03, 7.0668e-05, 1.4582e-03, -1.7547e-04,\n -4.0485e-04, 1.0359e-03, 9.5552e-04, -1.0365e-04, -4.1135e-04,\n -5.4838e-04, -2.2051e-04, -2.3133e-03, 6.3705e-05, 1.0244e-04,\n -1.8153e-05, 2.2965e-03, -1.2119e-05, -1.0370e-03, 2.1052e-04,\n -1.6126e-04, 3.3046e-04, 1.1358e-03, 2.7418e-03, -1.5646e-03,\n -4.2258e-04, -1.7899e-03, -3.4889e-04, 2.1486e-05, 1.0673e-03,\n -7.7597e-04, -2.0005e-04, 1.1054e-03, -1.5871e-03, 3.5733e-04,\n -1.9962e-04, 1.0601e-03, 1.2205e-03, -1.0779e-03, -3.8654e-03,\n 3.8465e-03, -8.6696e-04, 2.1939e-03, -4.2885e-04, 8.1938e-04,\n 3.3859e-03, -3.5037e-04, 1.0569e-03, 9.6216e-04, 1.6673e-04,\n -2.9004e-05, -1.8986e-04, 5.6052e-45, -1.4267e-03, 4.1378e-03,\n -4.6189e-04, 1.9976e-03, 3.5461e-03, 3.8278e-04, 1.6762e-03,\n -1.8591e-04, 2.3601e-03, 2.0110e-03, -2.1100e-03, 2.9582e-03,\n 3.5461e-03, -2.1485e-03, -1.3786e-03, 7.2027e-04, 1.3267e-03,\n 1.8462e-03, 2.1988e-03, -1.0974e-03, -1.3229e-03, -1.1572e-04,\n 2.6026e-03, -1.3455e-03, 7.4635e-04, 4.9213e-04, -1.5075e-03,\n 1.2247e-04, 2.3815e-03, 3.8753e-04, -1.2963e-04, -2.1727e-03,\n -5.0545e-03, -1.8640e-04, 1.3199e-03, 4.3353e-04, 7.9039e-04,\n 1.5657e-03, 3.4319e-03, 5.1298e-04, 1.8386e-05, -3.1771e-04,\n 7.8274e-04, -6.4189e-04, 1.0086e-03, -2.0662e-03, 1.9001e-03,\n -5.8155e-04, 1.1030e-03, -1.5982e-03, -2.1326e-03, 1.2168e-03,\n -4.2690e-04, 1.0057e-03, -2.4656e-03, -1.0872e-03, 2.5231e-03,\n -1.0576e-03, 1.2169e-03, -4.6536e-04, -4.5240e-04, 3.1419e-04,\n -1.7547e-03, 3.6234e-04, 9.1485e-05, 4.0260e-03, 1.2127e-03,\n 5.2949e-04, 6.8478e-04, -3.0815e-03, 1.1687e-04, 3.1890e-03,\n -5.1331e-04, 1.6748e-03, 1.6942e-03, 1.3852e-03, 3.0942e-03,\n 8.8600e-04, -6.1810e-04, 1.3015e-04, -3.2837e-03, -5.2032e-04,\n -1.5956e-03, -3.3604e-03, -9.8356e-04, -5.4411e-04, -3.9819e-04,\n 3.9686e-03, -4.7774e-04, -2.1661e-03, 2.0171e-03, 1.5036e-03,\n 3.6513e-03, -9.2901e-04, -9.1716e-04, 8.9005e-04, -9.5622e-04,\n -7.6822e-04, -2.3581e-03, -6.1230e-04, 1.1031e-03, 8.1613e-04,\n -1.6796e-03, 1.2518e-03, -5.5040e-04, 9.2606e-04, -2.1764e-03,\n -3.2163e-03, 3.6456e-03, -5.8368e-04, -1.7077e-03, 4.0818e-03,\n -2.0360e-03, -1.1033e-03, 1.9445e-04, -6.2015e-04, 5.0648e-04,\n -1.2217e-03, -3.1078e-03, -3.3299e-04, 4.6968e-04, -1.9638e-03,\n 4.5045e-04, -2.6435e-03, 3.1428e-04, 5.1459e-04, 1.1882e-03,\n -1.7650e-03, 7.1022e-04, -6.9016e-04, -3.3294e-03, 2.4095e-04,\n 2.3117e-04, 2.1692e-03, 6.5860e-05, 2.2037e-04, -3.4393e-03,\n 1.0560e-03, -6.2612e-04, 4.2027e-04, -2.8744e-03, -1.3402e-03,\n -8.5709e-05, 1.6350e-03, 1.2901e-03, 6.7987e-04, 3.2817e-04,\n -2.5098e-03, 1.4932e-03, 2.0948e-03, -1.4060e-03, -1.4966e-03,\n -6.2453e-04, 8.0368e-05, -9.1536e-04, -4.7435e-03, 2.4132e-04,\n 8.4299e-05, 1.1305e-03, -3.9357e-04, 6.0072e-04, 2.8050e-03,\n -1.7308e-03, 3.6525e-04, 2.0405e-03, 6.6089e-04, 3.7658e-04,\n -1.6085e-03, 4.8207e-04, -1.0673e-03, 9.7206e-04, -4.5304e-03,\n 7.5708e-04, 1.2114e-03, -2.6773e-03, -3.4819e-03, -2.9311e-03,\n -6.7686e-04, 9.8001e-04, 2.2306e-03, -3.6066e-04, -1.9680e-03,\n 2.1626e-03, 5.2337e-07, -1.6247e-04, -1.0230e-03, 2.4997e-03,\n -4.0477e-03, 1.4413e-03, -1.2262e-04, -3.6817e-05, -5.0346e-04,\n -1.0361e-03, -1.5670e-03, 1.2016e-03, 3.5017e-03, 4.3389e-04,\n 2.1036e-04, -1.3572e-03, 3.4875e-03, 8.2139e-04, -9.7842e-04,\n 3.0798e-03, -1.0934e-03, -2.3462e-05, -1.0906e-03, -5.9501e-05,\n -1.7758e-03, -1.2255e-03, 1.8321e-03, -2.5134e-03, 4.1103e-04,\n 1.9974e-03, -4.6433e-04, 1.1335e-03, 2.6835e-03, 2.0142e-03,\n 4.9434e-04, -1.9277e-03, -2.0172e-03, -3.3813e-05, 1.4998e-04,\n -2.9654e-03, 5.7108e-04, 9.7377e-04, -1.4330e-03, 4.3379e-04,\n -6.4229e-04, -6.3708e-04, -2.2634e-03, 1.8138e-03, -2.4442e-04,\n -2.0718e-04, -2.4646e-03, -1.3214e-03, -4.1223e-03, 6.5347e-04,\n -3.0092e-04, -3.0392e-04, 2.2683e-03, -2.5038e-03, -1.3946e-03,\n 7.0186e-05, -1.7636e-03, -1.6631e-03, -9.9882e-04, 1.0648e-03,\n 1.1282e-03, 1.0544e-03, -2.6630e-04, -2.5274e-03, 2.5127e-03,\n 1.1157e-03, 1.1614e-03, 8.4336e-04, -8.6638e-04, 4.4861e-04,\n 8.3486e-04, -6.0445e-04, -9.2626e-04, -2.4644e-03, 4.4285e-04,\n 4.2068e-04, -6.4621e-04, -7.4500e-04, 6.0261e-04, 1.8504e-04,\n -1.0772e-04, -4.2753e-04, 7.6399e-04, 1.6676e-05, -1.2871e-03,\n 6.1738e-04, 1.3120e-30], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([6.4360e-05, 4.0839e-05, 3.2481e-05, 4.9559e-05, 2.7651e-05, 6.4115e-05,\n 4.0549e-05, 7.9034e-05, 5.1180e-05, 6.5963e-05, 7.5201e-05, 7.5023e-05,\n 9.3426e-05, 3.4819e-05, 6.2713e-05, 5.4801e-05, 4.2795e-05, 4.8236e-05,\n 4.4302e-05, 6.2589e-05, 5.5401e-05, 2.7023e-05, 2.8743e-05, 6.4113e-05,\n 1.1706e-04, 6.1688e-05, 3.8897e-05, 7.4150e-05, 1.1247e-04, 5.1336e-05,\n 3.4890e-05, 3.6936e-05, 6.4753e-05, 4.4665e-05, 3.1953e-05, 4.4079e-05,\n 3.3203e-05, 3.0363e-05, 3.3585e-05, 3.1358e-05, 2.9213e-05, 2.9589e-05,\n 2.2092e-15, 7.7602e-05, 1.2035e-04, 3.1442e-05, 4.3826e-05, 1.5302e-05,\n 7.0604e-05, 9.8822e-05, 1.0949e-04, 2.3927e-05, 3.2093e-05, 5.4882e-05,\n 6.9869e-05, 3.6284e-05, 4.7693e-05, 5.5315e-05, 4.0126e-05, 2.7829e-05,\n 7.3585e-05, 3.5389e-05, 4.9773e-05, 3.4750e-05, 3.0919e-05, 3.3627e-05,\n 4.4171e-05, 3.0147e-05, 4.9161e-05, 3.4747e-05, 5.0996e-05, 4.2781e-05,\n 3.7738e-05, 3.6949e-05, 4.0680e-05, 4.3119e-05, 3.7877e-05, 3.9283e-05,\n 5.5592e-05, 5.9579e-05, 5.3975e-05, 4.2555e-05, 3.3806e-05, 2.0277e-05,\n 3.6519e-05, 6.2570e-05, 6.8954e-05, 3.6641e-05, 2.2630e-05, 6.8304e-05,\n 3.8401e-05, 4.5209e-05, 6.2756e-05, 4.9854e-05, 5.8341e-05, 4.7741e-05,\n 7.1373e-05, 4.3691e-05, 5.6300e-05, 4.1014e-05, 4.0523e-05, 5.7194e-05,\n 6.5343e-05, 4.4104e-05, 6.6590e-05, 9.7574e-05, 4.1279e-05, 1.3698e-04,\n 4.2193e-05, 4.0698e-05, 6.6924e-05, 6.5042e-05, 5.3339e-05, 4.7183e-05,\n 5.2154e-05, 3.8186e-05, 5.6372e-05, 7.8027e-05, 3.0692e-05, 5.0024e-05,\n 2.3821e-05, 6.6124e-05, 6.0155e-05, 1.0196e-04, 5.3585e-05, 7.5483e-05,\n 4.1368e-05, 6.6264e-05, 7.5160e-05, 1.2761e-04, 5.7397e-05, 3.5312e-05,\n 7.0277e-05, 4.4081e-05, 1.3556e-04, 3.4825e-05, 5.2152e-05, 5.8936e-05,\n 3.8002e-05, 2.7441e-05, 3.0285e-05, 4.1726e-05, 4.9049e-05, 8.2244e-05,\n 3.9971e-05, 4.6260e-05, 2.5599e-05, 7.5275e-05, 4.7095e-05, 1.0531e-04,\n 2.4368e-05, 7.0980e-05, 3.1209e-05, 7.1592e-05, 2.7254e-05, 3.3966e-05,\n 7.3803e-05, 5.0743e-05, 3.8757e-05, 3.7569e-05, 7.2235e-05, 1.9753e-05,\n 7.5332e-05, 4.6376e-05, 1.8716e-05, 9.2416e-05, 4.4243e-05, 2.1571e-05,\n 1.9124e-05, 5.4156e-05, 3.1425e-05, 5.9526e-05, 4.3824e-05, 9.5568e-05,\n 4.8191e-05, 2.3077e-05, 9.0981e-05, 2.6031e-05, 2.3838e-05, 6.9064e-05,\n 4.1699e-05, 3.8634e-05, 1.1518e-04, 4.0903e-05, 8.7394e-05, 3.8815e-05,\n 1.5201e-04, 3.7393e-05, 4.3020e-05, 1.8385e-05, 4.0407e-05, 5.4180e-05,\n 4.5991e-05, 3.8030e-05, 2.9264e-05, 2.0659e-05, 7.1818e-05, 3.0011e-05,\n 1.7472e-05, 2.1403e-05, 3.4997e-05, 4.7775e-05, 3.5861e-05, 2.3632e-05,\n 2.3546e-05, 4.1250e-05, 3.2116e-05, 2.7950e-05, 6.6070e-05, 2.6359e-05,\n 7.3071e-05, 6.5922e-05, 2.2129e-05, 2.9718e-05, 5.2796e-05, 2.8971e-05,\n 2.7001e-05, 5.6727e-05, 4.7298e-05, 7.3319e-05, 3.0561e-05, 3.1453e-05,\n 3.5143e-05, 2.6703e-05, 4.1540e-05, 7.2223e-05, 2.7267e-05, 2.3890e-05,\n 2.1635e-05, 4.5140e-05, 3.6153e-05, 2.8504e-05, 9.4849e-05, 5.4124e-05,\n 2.8663e-05, 3.8274e-05, 4.2153e-05, 6.1345e-16, 3.7237e-05, 1.1101e-04,\n 3.3261e-05, 8.5804e-05, 5.6737e-05, 7.0331e-05, 8.0171e-05, 6.5419e-05,\n 4.7698e-05, 3.0439e-05, 6.0286e-05, 3.7937e-05, 8.4698e-05, 2.4447e-05,\n 7.2456e-05, 7.1782e-05, 3.4660e-05, 5.1317e-05, 5.8346e-05, 5.1772e-05,\n 4.2336e-05, 4.7734e-05, 4.1927e-05, 6.1242e-05, 3.9822e-05, 6.4377e-05,\n 4.5817e-05, 4.8161e-05, 3.2498e-05, 5.0516e-05, 3.4770e-05, 7.3729e-05,\n 6.4300e-05, 5.7336e-05, 2.6136e-05, 4.9294e-05, 2.6141e-05, 3.7904e-05,\n 4.8564e-05, 7.7996e-05, 4.3313e-05, 3.6838e-05, 5.6668e-05, 2.6485e-05,\n 3.1840e-05, 5.3778e-05, 3.2011e-05, 5.6442e-05, 5.0807e-05, 2.8995e-05,\n 9.5812e-05, 4.6022e-05, 2.7193e-05, 2.9347e-05, 4.8205e-05, 2.6347e-05,\n 4.1911e-05, 2.8252e-05, 4.0004e-05, 5.2347e-05, 1.4363e-04, 4.7637e-05,\n 2.9556e-05, 2.8644e-05, 5.1852e-05, 3.9715e-05, 6.1190e-05, 3.3225e-05,\n 3.7191e-05, 4.7575e-05, 1.2711e-04, 6.2370e-05, 6.9941e-05, 4.8597e-05,\n 3.0065e-05, 3.0702e-05, 4.5695e-05, 4.5241e-05, 4.1932e-05, 2.0578e-05,\n 3.8300e-05, 6.9896e-05, 1.3700e-04, 1.3849e-04, 5.2846e-05, 6.2495e-05,\n 7.6972e-05, 1.0403e-04, 5.4288e-05, 8.7186e-05, 3.9549e-05, 4.9790e-05,\n 9.2406e-05, 9.1164e-05, 1.5972e-05, 3.4680e-05, 3.4340e-05, 4.4793e-05,\n 3.2751e-05, 2.7060e-05, 7.1370e-05, 4.6064e-05, 4.6329e-05, 3.0640e-05,\n 4.4364e-05, 7.6558e-05, 6.2905e-05, 8.1748e-05, 6.4756e-05, 2.9979e-05,\n 4.9760e-05, 7.1188e-05, 4.4403e-05, 4.3244e-05, 5.2950e-05, 3.3860e-05,\n 5.1951e-05, 3.5670e-05, 4.6787e-05, 2.8156e-05, 3.2179e-05, 2.5519e-05,\n 2.2065e-05, 1.1374e-04, 1.0851e-04, 5.8814e-05, 4.7623e-05, 3.7731e-05,\n 1.9483e-05, 7.3298e-05, 4.3780e-05, 2.8660e-05, 2.8299e-05, 4.3119e-05,\n 5.0635e-05, 5.2114e-05, 5.3618e-05, 3.7996e-05, 2.8760e-05, 2.3831e-05,\n 4.3808e-05, 3.0291e-05, 4.7920e-05, 5.1244e-05, 6.5991e-05, 3.4280e-05,\n 4.2578e-05, 1.2979e-04, 3.4405e-05, 4.6864e-05, 4.2836e-05, 4.0570e-05,\n 4.0639e-05, 3.8359e-05, 4.6903e-05, 8.0298e-05, 4.0480e-05, 2.1239e-05,\n 2.6372e-05, 8.2849e-05, 5.4781e-05, 8.1313e-05, 3.8369e-05, 2.8105e-05,\n 7.2658e-05, 3.1334e-05, 4.5852e-05, 3.7953e-05, 1.0703e-04, 1.5358e-05,\n 6.6157e-05, 7.4937e-05, 5.3521e-05, 3.3300e-05, 8.9055e-05, 6.1587e-05,\n 3.8747e-05, 4.8419e-05, 9.7167e-05, 7.1249e-05, 5.8195e-05, 4.8496e-05,\n 5.2367e-05, 3.9438e-05, 6.1747e-05, 5.6858e-05, 7.6853e-05, 4.8183e-05,\n 6.9619e-05, 3.8660e-05, 4.7806e-05, 3.6730e-05, 4.0159e-05, 2.7180e-05,\n 5.0448e-05, 1.1850e-04, 5.3208e-05, 4.3099e-05, 5.6490e-05, 7.0495e-05,\n 4.5168e-05, 3.0080e-05, 6.1466e-05, 5.8563e-05, 4.1987e-05, 1.8896e-04,\n 5.7810e-05, 4.7252e-05, 6.0655e-05, 4.3262e-05, 6.3200e-05, 5.0052e-05,\n 5.0205e-05, 3.2925e-05, 9.2397e-05, 1.2701e-04, 5.6048e-05, 2.3291e-05,\n 9.8663e-05, 3.7060e-05, 2.3152e-05, 4.3600e-05, 4.8739e-05, 3.8631e-05,\n 3.6456e-05, 4.2944e-05, 4.7692e-05, 3.1492e-05, 3.1200e-05, 5.4201e-05,\n 6.0360e-05, 5.9278e-05, 4.8385e-05, 1.4041e-04, 5.7837e-05, 5.4946e-05,\n 6.6771e-05, 4.7018e-05, 5.4286e-05, 4.8759e-05, 6.2880e-05, 2.8074e-05,\n 2.5935e-05, 7.2523e-05, 5.4902e-05, 3.0035e-05, 3.8010e-05, 4.1417e-05,\n 3.8202e-05, 4.0460e-05, 8.4219e-05, 3.8974e-05, 3.8105e-05, 3.4575e-05,\n 5.4037e-05, 4.2064e-05, 3.3835e-05, 4.2914e-05, 4.9689e-05, 4.3322e-05,\n 3.2041e-05, 4.0196e-05, 4.3710e-05, 3.1109e-05, 3.8917e-05, 4.0844e-05,\n 2.6143e-05, 4.0897e-05, 3.1540e-05, 4.7631e-05, 3.7272e-05, 4.7143e-05,\n 3.7238e-05, 2.1523e-12], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(22524.)",
|
| 27 |
+
"exp_avg": "tensor([[-1.7762e-06, 9.6380e-06, -3.1472e-06, ..., -8.7844e-06,\n -4.4107e-06, 3.3455e-33],\n [-2.0609e-06, 2.8813e-05, 6.4532e-06, ..., -3.1242e-06,\n 1.3630e-05, 1.0215e-32],\n [ 1.0821e-05, 2.1950e-05, -1.3335e-05, ..., 1.1606e-05,\n 6.3576e-06, -5.1802e-33],\n ...,\n [ 8.9378e-06, 5.1585e-06, 2.7941e-06, ..., -1.2623e-05,\n 5.2511e-06, 1.4019e-32],\n [ 7.3211e-06, -7.5804e-06, -4.2395e-06, ..., 5.8698e-07,\n -1.3788e-05, 9.7074e-33],\n [-1.6573e-08, 6.3162e-06, 2.3305e-06, ..., 1.2092e-07,\n 5.0534e-07, 1.2583e-32]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[7.5074e-10, 1.3257e-09, 7.6822e-10, ..., 1.4315e-09, 5.4239e-10,\n 5.7611e-17],\n [1.2302e-09, 2.6200e-09, 1.7295e-09, ..., 4.1038e-09, 2.3918e-09,\n 3.2397e-16],\n [9.8428e-10, 1.4507e-09, 1.9081e-09, ..., 3.7738e-09, 8.5311e-10,\n 2.9493e-16],\n ...,\n [1.7331e-09, 3.3129e-09, 1.4365e-09, ..., 5.3766e-09, 1.1566e-09,\n 3.3065e-16],\n [1.9381e-09, 4.5251e-09, 1.5141e-09, ..., 3.2379e-09, 2.0201e-09,\n 5.8849e-16],\n [2.1769e-09, 2.1233e-09, 1.2102e-09, ..., 3.5445e-09, 1.8718e-09,\n 3.3976e-16]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(15016.)",
|
| 32 |
+
"exp_avg": "tensor([[-9.1431e-06, 9.5956e-06, 1.7234e-08, ..., -4.7996e-06,\n -1.1460e-05, 1.1094e-32],\n [-1.2825e-05, 3.5898e-05, 1.9040e-06, ..., -1.5266e-05,\n 1.4578e-05, 1.0198e-32],\n [ 1.0388e-05, 2.8110e-05, -7.0152e-06, ..., 1.1447e-05,\n -3.9429e-06, -6.5453e-33],\n ...,\n [-2.3132e-06, -9.5046e-06, -6.1596e-06, ..., -5.1061e-06,\n 9.8603e-06, 8.8063e-33],\n [-4.6983e-06, -2.0970e-05, -2.3346e-07, ..., 1.1820e-05,\n 6.3998e-06, 3.1642e-32],\n [ 5.5994e-06, -9.6279e-07, -5.2967e-06, ..., -3.4098e-06,\n -4.0018e-06, 3.9310e-33]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[9.0155e-10, 1.1760e-09, 5.6167e-10, ..., 1.0820e-09, 8.0744e-10,\n 2.5195e-16],\n [8.5320e-10, 2.5372e-09, 1.6723e-09, ..., 4.9628e-09, 2.1574e-09,\n 1.6478e-15],\n [9.6054e-10, 1.8991e-09, 1.4854e-09, ..., 4.3783e-09, 1.1883e-09,\n 4.8185e-16],\n ...,\n [2.1050e-09, 1.5705e-09, 2.0377e-09, ..., 7.4155e-09, 1.4303e-09,\n 1.0012e-16],\n [1.1402e-09, 2.4358e-09, 1.2135e-09, ..., 2.5514e-09, 1.1861e-09,\n 3.6325e-15],\n [2.1114e-09, 1.5555e-09, 1.1188e-09, ..., 2.6415e-09, 2.0759e-09,\n 3.4752e-17]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(15016.)",
|
| 37 |
+
"exp_avg": "tensor([-0.0007, 0.0007], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([7.4453e-06, 7.4453e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.0034555695366224513,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.01,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.0034555695366224513,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.01,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.0034555695366224513,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.01,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.001728112022559819,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 6,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.01,
|
| 139 |
+
0.01,
|
| 140 |
+
0.01,
|
| 141 |
+
0.005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 6,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.0034555695366224513,
|
| 149 |
+
0.0034555695366224513,
|
| 150 |
+
0.0034555695366224513,
|
| 151 |
+
0.001728112022559819
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 65.66133333333333,
|
| 156 |
+
"best_epoch": 5,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 65.17733333333334,
|
| 159 |
+
"512": 65.52333333333333
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3,
|
| 166 |
+
4,
|
| 167 |
+
5,
|
| 168 |
+
6
|
| 169 |
+
],
|
| 170 |
+
"train_loss": [
|
| 171 |
+
3.9435249049420933,
|
| 172 |
+
3.3040703793567867,
|
| 173 |
+
4.3101251841734625,
|
| 174 |
+
4.185147669827233,
|
| 175 |
+
4.123004540650211,
|
| 176 |
+
4.076372152195373
|
| 177 |
+
],
|
| 178 |
+
"train_acc": [
|
| 179 |
+
54.38726307083047,
|
| 180 |
+
59.31631083223343,
|
| 181 |
+
60.291879721118846,
|
| 182 |
+
61.30111583163371,
|
| 183 |
+
61.94625681117294,
|
| 184 |
+
62.46739626189768
|
| 185 |
+
],
|
| 186 |
+
"val_acc": [
|
| 187 |
+
61.635333333333335,
|
| 188 |
+
62.978,
|
| 189 |
+
64.12,
|
| 190 |
+
64.73133333333334,
|
| 191 |
+
65.312,
|
| 192 |
+
65.66133333333333
|
| 193 |
+
],
|
| 194 |
+
"scale_accs": {
|
| 195 |
+
"256": [
|
| 196 |
+
61.635333333333335,
|
| 197 |
+
62.978,
|
| 198 |
+
63.782,
|
| 199 |
+
64.34866666666667,
|
| 200 |
+
64.754,
|
| 201 |
+
65.17733333333334
|
| 202 |
+
],
|
| 203 |
+
"512": [
|
| 204 |
+
63.839333333333336,
|
| 205 |
+
64.522,
|
| 206 |
+
65.18466666666667,
|
| 207 |
+
65.52333333333333
|
| 208 |
+
]
|
| 209 |
+
},
|
| 210 |
+
"lr": [
|
| 211 |
+
0.00975530705321762,
|
| 212 |
+
0.00904518046337755,
|
| 213 |
+
0.00793913236883622,
|
| 214 |
+
0.00654543046337755,
|
| 215 |
+
0.005000500000000001,
|
| 216 |
+
0.0034555695366224513
|
| 217 |
+
]
|
| 218 |
+
}
|
| 219 |
+
},
|
| 220 |
+
"train_config": {
|
| 221 |
+
"name": "david_training",
|
| 222 |
+
"run_id": "20251012_235237",
|
| 223 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 224 |
+
"model_variant": [
|
| 225 |
+
"clip_vit_b16",
|
| 226 |
+
"clip_vit_laion_b32",
|
| 227 |
+
"clip_vit_b32"
|
| 228 |
+
],
|
| 229 |
+
"num_classes": 1000,
|
| 230 |
+
"preset": "small_fast",
|
| 231 |
+
"custom_config_path": null,
|
| 232 |
+
"num_classes_override": null,
|
| 233 |
+
"use_belly_override": null,
|
| 234 |
+
"belly_expand_override": null,
|
| 235 |
+
"progressive_training_override": true,
|
| 236 |
+
"scale_warmup_epochs_override": {
|
| 237 |
+
"256": 0,
|
| 238 |
+
"512": 2
|
| 239 |
+
},
|
| 240 |
+
"num_epochs": 10,
|
| 241 |
+
"batch_size": 1024,
|
| 242 |
+
"learning_rate": 0.01,
|
| 243 |
+
"weight_decay": 1e-05,
|
| 244 |
+
"warmup_epochs": 3,
|
| 245 |
+
"use_rose_loss": true,
|
| 246 |
+
"rose_initial_weight": 0.1,
|
| 247 |
+
"rose_max_weight": 0.8,
|
| 248 |
+
"rose_weight_schedule": "adaptive",
|
| 249 |
+
"use_cayley_loss": false,
|
| 250 |
+
"cayley_weight": 0.01,
|
| 251 |
+
"scale_loss_balance": null,
|
| 252 |
+
"use_mixed_precision": false,
|
| 253 |
+
"gradient_clip": 15.0,
|
| 254 |
+
"scheduler_type": "cosine_restarts",
|
| 255 |
+
"min_lr": 1e-06,
|
| 256 |
+
"freeze_strategy": "never",
|
| 257 |
+
"freeze_threshold": 90.0,
|
| 258 |
+
"unfreeze_on_plateau": true,
|
| 259 |
+
"patience": 10,
|
| 260 |
+
"track_gradients": true,
|
| 261 |
+
"gradient_scale_threshold": 1e-05,
|
| 262 |
+
"gradient_scale_multiplier": 10.0,
|
| 263 |
+
"log_interval": 50,
|
| 264 |
+
"val_interval": 1,
|
| 265 |
+
"save_interval": 5,
|
| 266 |
+
"log_fusion_weights": true,
|
| 267 |
+
"log_loss_components": true,
|
| 268 |
+
"save_format": "safetensors",
|
| 269 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 270 |
+
"upload_to_hub": true,
|
| 271 |
+
"base_dir": "./david_training",
|
| 272 |
+
"num_workers": 10,
|
| 273 |
+
"pin_memory": true,
|
| 274 |
+
"prefetch_factor": 4,
|
| 275 |
+
"persistent_workers": true
|
| 276 |
+
}
|
| 277 |
+
}
|