AbstractPhil commited on
Commit
d99a22c
·
verified ·
1 Parent(s): 71f284d

Update best_model_acc76.81_metadata.json - Run 20251012_145649

Browse files
weights/David-hierarchical-progressive/20251012_145649/best_model_acc76.81_metadata.json ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(10016.)",
7
+ "exp_avg": "tensor([[ 5.8434e-05, -2.3254e-04, -4.8691e-04, ..., 1.7913e-05,\n 1.0259e-04, 1.1514e-04],\n [ 6.7311e-04, -1.0589e-03, 1.3973e-03, ..., 3.5484e-04,\n 1.2034e-03, -4.0851e-04],\n [-5.5116e-08, -7.3639e-04, -9.5960e-04, ..., 1.5283e-04,\n 9.2675e-05, -4.9489e-05],\n ...,\n [ 4.3931e-04, 4.5877e-04, -2.5926e-04, ..., -4.5055e-04,\n -3.2899e-04, 3.9771e-04],\n [ 1.8669e-04, -1.0431e-03, -8.8796e-04, ..., -4.0601e-05,\n -2.9471e-04, -1.0335e-04],\n [-7.5169e-05, -1.8214e-04, -4.2290e-04, ..., 1.1281e-04,\n 5.5526e-06, 3.9965e-05]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[9.4537e-07, 5.2095e-06, 2.1279e-06, ..., 1.0916e-06, 3.8069e-07,\n 1.4271e-06],\n [5.6545e-07, 5.2957e-06, 1.9481e-06, ..., 6.1315e-07, 4.0020e-07,\n 4.1815e-07],\n [9.1250e-07, 4.8694e-06, 3.1284e-06, ..., 6.2329e-07, 3.2619e-07,\n 7.8155e-07],\n ...,\n [9.0785e-07, 4.1631e-06, 2.7811e-06, ..., 7.7582e-07, 4.0421e-07,\n 7.2599e-07],\n [1.1276e-06, 9.1891e-06, 4.3158e-06, ..., 9.4987e-07, 6.0740e-07,\n 6.1771e-07],\n [7.5804e-07, 6.6055e-06, 2.4865e-06, ..., 5.3252e-07, 3.7951e-07,\n 4.8443e-07]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(10016.)",
12
+ "exp_avg": "tensor([-3.4371e-04, -1.9290e-03, 9.4140e-03, 2.1923e-03, 4.0506e-03,\n -1.5899e-02, -1.0300e-02, -6.1114e-03, 7.2429e-03, -7.5826e-04,\n 1.2153e-03, -2.8278e-03, 8.7820e-03, 1.4959e-02, -7.0854e-03,\n 1.2097e-02, 6.1848e-03, 7.0732e-04, 1.3527e-02, 8.0930e-03,\n 4.3148e-03, 7.9388e-03, 3.5340e-03, 3.5847e-03, -3.3715e-03,\n -8.2504e-03, -7.6338e-04, 1.2929e-02, -4.0668e-03, 5.6283e-03,\n 4.7921e-03, -2.7595e-04, 1.0056e-04, -3.9252e-03, -6.9349e-03,\n 2.8919e-04, 8.4473e-05, -9.5499e-03, 3.6342e-03, 1.0643e-02,\n -3.7963e-03, -2.6891e-03, -5.7273e-04, 1.0817e-03, -2.4818e-03,\n -8.3196e-04, -1.1444e-02, -8.4468e-03, -1.9807e-04, -1.2795e-02,\n 3.8837e-03, 1.2661e-02, 5.1315e-03, 9.6999e-03, 4.8280e-03,\n -8.5932e-03, 6.7626e-03, -1.3122e-03, -2.9687e-03, -1.2821e-03,\n -7.5305e-04, 1.3123e-03, -8.8482e-03, -1.0324e-02, -9.0839e-03,\n -1.0091e-02, 7.3120e-03, 1.1236e-02, 9.1988e-03, -2.3934e-03,\n 9.9029e-03, 1.0634e-02, -2.8992e-03, -8.5087e-03, 1.3945e-03,\n -3.8066e-03, -6.2492e-03, 5.1846e-03, -5.1795e-04, 4.2227e-03,\n 1.5521e-02, 9.6473e-04, 9.8328e-03, -7.8347e-03, 1.2915e-02,\n -2.5985e-03, 9.0081e-04, 4.9802e-03, -3.0881e-02, 9.4125e-03,\n -6.9726e-03, 1.4950e-04, -6.9870e-03, -4.7840e-03, -5.8473e-04,\n 1.3106e-02, -1.5864e-02, 3.2203e-03, 2.0433e-02, -2.5912e-03,\n 1.1835e-02, -1.3589e-03, -9.0947e-03, 2.1042e-03, 2.7426e-03,\n 1.7739e-03, -1.6534e-03, 2.6633e-03, 1.9650e-03, 1.7148e-02,\n 1.2622e-03, 1.6579e-03, -9.5205e-03, -1.4140e-04, -3.6837e-03,\n 3.5798e-03, 2.8765e-03, 3.1618e-03, -8.1427e-03, -3.7583e-03,\n -7.4689e-03, -6.4621e-03, -2.4037e-02, 2.6475e-03, -4.3331e-03,\n 8.3732e-03, 1.2845e-02, 5.2966e-03, -2.2008e-03, -2.3924e-03,\n -7.7121e-03, 2.6867e-03, -1.0804e-02, 3.8722e-03, -4.9697e-03,\n -2.8476e-02, -7.1654e-03, -1.1080e-02, -2.3589e-02, -2.5554e-04,\n 2.7039e-03, 1.3869e-03, -2.3744e-03, -3.0416e-03, -8.5799e-04,\n -1.6618e-02, 5.1532e-03, -2.8230e-03, -8.9291e-03, 3.0571e-03,\n 3.7333e-03, -6.9299e-04, -5.9181e-03, -1.3184e-02, 1.0527e-02,\n 3.6333e-02, 5.0646e-03, -5.2585e-03, 8.4123e-03, 7.1925e-03,\n -3.9445e-03, -1.6330e-02, -6.8641e-03, 1.0538e-02, -9.8258e-03,\n -9.1911e-03, 3.6861e-04, -1.4191e-02, 8.1221e-03, -3.6259e-03,\n 1.7142e-02, 1.5961e-02, 7.9181e-03, -9.0489e-03, -4.1081e-03,\n -3.7205e-03, 1.3779e-03, -4.5944e-03, -9.2597e-03, -1.7484e-03,\n 7.2192e-04, 9.8700e-05, -1.9522e-03, -2.9726e-03, -3.6982e-04,\n 1.4875e-02, 1.9171e-02, -4.3099e-03, -9.2910e-03, 1.4918e-02,\n -1.0511e-02, -1.7392e-03, 1.3553e-02, 1.0029e-02, -3.7518e-04,\n -2.1495e-02, -6.8941e-03, -4.3983e-03, -1.0311e-03, 1.7147e-03,\n 3.7789e-03, 1.2282e-02, -2.9072e-03, -5.3362e-03, 1.1657e-02,\n 4.7979e-03, -2.6918e-03, -2.7207e-03, 8.5516e-03, -9.9391e-03,\n 8.9750e-03, 4.5543e-04, -3.1983e-03, -1.2280e-03, 3.3087e-04,\n -1.3404e-02, -4.1775e-03, 9.4365e-03, 7.2097e-03, -3.9043e-03,\n -3.5278e-03, 4.2011e-04, 4.3003e-04, -3.3061e-03, 2.8092e-03,\n -3.7726e-03, 6.3369e-04, -1.2497e-02, 1.1335e-03, -6.3770e-03,\n 6.5183e-04, 3.3980e-03, 1.3288e-02, 7.7358e-03, 2.0302e-03,\n -1.1094e-02, -3.8328e-03, -5.8324e-03, 5.7827e-03, 1.2424e-02,\n -1.0537e-02, -5.6294e-03, 6.4171e-03, 7.9137e-03, 9.9767e-04,\n 1.3842e-03, -1.9984e-03, 1.3817e-02, -6.0428e-03, -1.1570e-02,\n 1.5517e-02, 6.4805e-04, 3.9296e-03, 3.7642e-03, -1.8638e-04,\n -7.3791e-03, -6.4930e-03, 1.3053e-02, -4.3694e-05, -2.4063e-03,\n 1.4475e-02, -5.5941e-03, 1.0827e-02, -8.9902e-04, -3.4976e-03,\n -8.8094e-03, -6.4564e-04, -1.2012e-02, -3.0685e-03, 8.9421e-04,\n -3.8612e-03, 2.6351e-03, -1.6071e-02, -3.7719e-03, -6.7651e-03,\n 5.0262e-03, 1.2154e-02, 2.1767e-04, 7.8446e-04, 5.6082e-03,\n -2.9973e-03, 8.2102e-03, 2.2578e-03, -3.1197e-03, 1.3115e-02,\n -2.6183e-03, 1.7291e-03, 9.1240e-03, 4.0831e-03, 8.5218e-03,\n -7.8041e-03, -1.2330e-02, 3.5756e-03, -3.6917e-04, -4.2632e-03,\n -3.1188e-04, -1.9168e-03, -1.2324e-02, 8.3092e-03, -8.4434e-03,\n -6.0503e-03, -4.0181e-03, 9.1862e-04, -2.5210e-03, -1.5842e-02,\n -8.2697e-03, -6.4203e-03, 8.3634e-04, 6.3157e-04, -1.7854e-03,\n 2.2412e-02, 9.9686e-03, 5.7897e-03, 7.1803e-03, -1.7880e-02,\n 5.2098e-05, 1.2101e-02, -3.8819e-03, -5.3500e-03, 5.4161e-03,\n 5.0308e-03, 1.0366e-02, -2.7226e-03, 8.9693e-03, -9.9044e-03,\n 5.7811e-03, -1.4037e-02, 8.8976e-03, 2.6517e-03, -1.2311e-02,\n -2.4240e-03, 2.5636e-03, -1.8631e-02, -1.2665e-02, 3.6459e-03,\n -9.9654e-03, 1.6509e-03, 1.2653e-02, 1.1161e-03, 8.6532e-03,\n 3.3819e-03, 1.4925e-02, -3.3565e-03, -2.3417e-03, 3.4274e-03,\n 4.7876e-03, 3.3751e-03, 2.8368e-03, 2.5400e-03, -8.5593e-03,\n 1.4780e-02, -1.8203e-03, 2.6637e-03, 4.5508e-04, -2.8519e-02,\n -2.3097e-02, -2.9705e-04, 3.4198e-03, -9.1631e-03, 1.2165e-02,\n 5.6735e-05, -8.6309e-03, -1.3994e-02, -2.2539e-03, 6.2533e-03,\n 3.4795e-03, 5.0719e-03, 4.7713e-03, -7.4864e-03, 1.3079e-03,\n -1.4538e-02, 7.5645e-03, 5.8860e-03, 1.5730e-02, 4.8705e-03,\n 1.5353e-03, -2.1678e-02, -1.0310e-02, 6.1004e-03, 5.1575e-03,\n -7.4900e-03, 2.6401e-03, -7.7517e-03, -6.3621e-03, -1.1400e-03,\n -7.2952e-03, 3.3541e-04, 8.8894e-03, -2.4422e-03, -5.1184e-03,\n -5.0865e-03, -5.6854e-03, -1.1763e-02, -2.3169e-02, 4.4708e-03,\n -6.8514e-03, 8.5964e-03, 2.6079e-03, 4.4970e-03, 2.4140e-03,\n 1.1252e-03, -2.4444e-03, 1.9985e-02, 8.0328e-04, 8.2190e-03,\n 3.4273e-03, 1.6571e-02, -2.9898e-02, 8.7783e-03, -7.9232e-03,\n 7.6543e-04, -8.2508e-05, -2.5699e-03, 1.5321e-02, 5.0148e-03,\n 1.0669e-02, -6.5577e-03, -2.6287e-03, -2.6971e-03, 1.3160e-03,\n -1.3916e-02, -4.3128e-03, -1.0311e-02, 3.4029e-04, 4.2125e-03,\n -6.5241e-03, 8.2922e-04, 2.7567e-03, -8.4276e-03, 7.6819e-04,\n 2.0148e-02, 1.0713e-02, -1.8896e-03, -2.1177e-03, 3.1806e-03,\n -4.6418e-03, -9.7884e-03, 2.7837e-02, 6.7409e-04, -2.9944e-03,\n -1.4737e-03, -7.2015e-03, -1.1577e-02, 4.1659e-03, -8.1868e-04,\n 8.6056e-03, -1.4709e-02, 5.5211e-03, 1.8390e-03, -2.2767e-03,\n -7.2628e-04, -4.3639e-03, 1.1705e-02, -1.5516e-03, 2.1628e-03,\n 6.6423e-03, -1.4505e-03, 3.6067e-03, 7.0541e-04, 8.4488e-03,\n -2.9437e-03, 3.0495e-03, -1.3179e-03, -3.8313e-03, 1.2460e-02,\n 3.8670e-03, 6.0224e-03, -2.2369e-03, 1.0610e-02, 1.2025e-02,\n 3.2769e-02, -4.3321e-03, 3.2535e-03, -6.7325e-03, 9.3032e-03,\n -1.7271e-03, -5.4432e-03, 1.1992e-02, 3.8595e-03, -2.9871e-03,\n -4.5665e-03, 1.2341e-02, -8.2521e-03, 2.4882e-03, 9.4213e-03,\n -1.5896e-03, -5.5994e-03, -6.5958e-03, -1.3890e-03, 1.8331e-02,\n 2.8531e-02, 4.7330e-03, 1.5404e-02, 2.2826e-03, 9.4016e-03,\n 6.0602e-04, 5.8020e-03, 8.1731e-04, 1.8515e-02, -1.4579e-02,\n 5.4787e-03, 2.1025e-03, 3.0013e-03, -1.3917e-02, 1.1158e-02,\n -7.1939e-03, 2.7697e-03, -8.7865e-04, 7.5504e-03, 4.0084e-03,\n -8.1972e-03, 1.1769e-04], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([0.0008, 0.0006, 0.0007, 0.0009, 0.0007, 0.0010, 0.0008, 0.0006, 0.0008,\n 0.0009, 0.0009, 0.0007, 0.0008, 0.0007, 0.0006, 0.0006, 0.0008, 0.0011,\n 0.0009, 0.0008, 0.0008, 0.0005, 0.0012, 0.0007, 0.0006, 0.0005, 0.0010,\n 0.0012, 0.0010, 0.0011, 0.0007, 0.0008, 0.0005, 0.0006, 0.0010, 0.0009,\n 0.0006, 0.0006, 0.0009, 0.0008, 0.0006, 0.0008, 0.0007, 0.0008, 0.0007,\n 0.0015, 0.0008, 0.0009, 0.0007, 0.0007, 0.0007, 0.0006, 0.0007, 0.0006,\n 0.0009, 0.0008, 0.0008, 0.0008, 0.0005, 0.0008, 0.0005, 0.0007, 0.0006,\n 0.0007, 0.0009, 0.0009, 0.0006, 0.0006, 0.0010, 0.0007, 0.0007, 0.0008,\n 0.0006, 0.0006, 0.0008, 0.0008, 0.0008, 0.0007, 0.0007, 0.0007, 0.0008,\n 0.0007, 0.0007, 0.0006, 0.0009, 0.0011, 0.0005, 0.0006, 0.0009, 0.0007,\n 0.0007, 0.0006, 0.0012, 0.0007, 0.0007, 0.0006, 0.0006, 0.0008, 0.0009,\n 0.0007, 0.0039, 0.0007, 0.0010, 0.0011, 0.0005, 0.0006, 0.0008, 0.0008,\n 0.0011, 0.0007, 0.0006, 0.0008, 0.0010, 0.0005, 0.0010, 0.0016, 0.0008,\n 0.0003, 0.0008, 0.0008, 0.0007, 0.0009, 0.0010, 0.0006, 0.0009, 0.0007,\n 0.0010, 0.0005, 0.0007, 0.0006, 0.0008, 0.0007, 0.0009, 0.0007, 0.0009,\n 0.0011, 0.0007, 0.0008, 0.0009, 0.0007, 0.0006, 0.0007, 0.0007, 0.0008,\n 0.0006, 0.0005, 0.0009, 0.0007, 0.0008, 0.0007, 0.0007, 0.0005, 0.0008,\n 0.0008, 0.0005, 0.0014, 0.0007, 0.0010, 0.0008, 0.0008, 0.0007, 0.0008,\n 0.0006, 0.0010, 0.0007, 0.0007, 0.0007, 0.0009, 0.0010, 0.0009, 0.0008,\n 0.0007, 0.0009, 0.0006, 0.0010, 0.0016, 0.0008, 0.0008, 0.0006, 0.0006,\n 0.0005, 0.0006, 0.0007, 0.0006, 0.0008, 0.0011, 0.0007, 0.0008, 0.0008,\n 0.0006, 0.0006, 0.0008, 0.0007, 0.0008, 0.0008, 0.0007, 0.0007, 0.0006,\n 0.0007, 0.0005, 0.0008, 0.0010, 0.0006, 0.0005, 0.0006, 0.0008, 0.0006,\n 0.0006, 0.0008, 0.0006, 0.0007, 0.0008, 0.0005, 0.0005, 0.0005, 0.0007,\n 0.0005, 0.0007, 0.0006, 0.0009, 0.0007, 0.0009, 0.0011, 0.0006, 0.0009,\n 0.0006, 0.0007, 0.0008, 0.0010, 0.0007, 0.0005, 0.0006, 0.0008, 0.0006,\n 0.0007, 0.0007, 0.0013, 0.0008, 0.0007, 0.0008, 0.0008, 0.0008, 0.0007,\n 0.0007, 0.0008, 0.0007, 0.0008, 0.0008, 0.0007, 0.0008, 0.0007, 0.0006,\n 0.0009, 0.0006, 0.0009, 0.0008, 0.0005, 0.0006, 0.0010, 0.0017, 0.0006,\n 0.0007, 0.0008, 0.0005, 0.0005, 0.0005, 0.0005, 0.0010, 0.0010, 0.0009,\n 0.0007, 0.0006, 0.0008, 0.0007, 0.0008, 0.0007, 0.0010, 0.0005, 0.0006,\n 0.0005, 0.0020, 0.0008, 0.0008, 0.0006, 0.0007, 0.0008, 0.0008, 0.0007,\n 0.0009, 0.0008, 0.0007, 0.0008, 0.0008, 0.0007, 0.0007, 0.0009, 0.0005,\n 0.0009, 0.0007, 0.0007, 0.0010, 0.0006, 0.0009, 0.0005, 0.0011, 0.0006,\n 0.0011, 0.0007, 0.0003, 0.0009, 0.0008, 0.0009, 0.0007, 0.0008, 0.0006,\n 0.0006, 0.0009, 0.0005, 0.0006, 0.0008, 0.0008, 0.0006, 0.0009, 0.0004,\n 0.0008, 0.0008, 0.0007, 0.0008, 0.0008, 0.0006, 0.0007, 0.0008, 0.0009,\n 0.0008, 0.0008, 0.0009, 0.0011, 0.0010, 0.0008, 0.0009, 0.0009, 0.0007,\n 0.0009, 0.0006, 0.0006, 0.0007, 0.0008, 0.0007, 0.0006, 0.0009, 0.0009,\n 0.0010, 0.0008, 0.0006, 0.0007, 0.0009, 0.0005, 0.0007, 0.0015, 0.0006,\n 0.0016, 0.0009, 0.0006, 0.0008, 0.0008, 0.0009, 0.0010, 0.0005, 0.0005,\n 0.0006, 0.0010, 0.0018, 0.0011, 0.0007, 0.0007, 0.0005, 0.0009, 0.0006,\n 0.0008, 0.0009, 0.0008, 0.0007, 0.0008, 0.0008, 0.0008, 0.0008, 0.0005,\n 0.0010, 0.0006, 0.0008, 0.0010, 0.0007, 0.0007, 0.0009, 0.0008, 0.0009,\n 0.0007, 0.0005, 0.0009, 0.0009, 0.0009, 0.0007, 0.0009, 0.0006, 0.0008,\n 0.0006, 0.0008, 0.0016, 0.0007, 0.0005, 0.0005, 0.0006, 0.0007, 0.0010,\n 0.0007, 0.0008, 0.0009, 0.0006, 0.0007, 0.0009, 0.0006, 0.0008, 0.0008,\n 0.0009, 0.0005, 0.0006, 0.0007, 0.0008, 0.0007, 0.0016, 0.0009, 0.0005,\n 0.0009, 0.0005, 0.0004, 0.0008, 0.0008, 0.0011, 0.0007, 0.0005, 0.0011,\n 0.0011, 0.0010, 0.0006, 0.0008, 0.0007, 0.0008, 0.0006, 0.0009, 0.0004,\n 0.0007, 0.0008, 0.0005, 0.0007, 0.0008, 0.0007, 0.0006, 0.0007, 0.0023,\n 0.0005, 0.0005, 0.0005, 0.0008, 0.0007, 0.0013, 0.0006, 0.0005, 0.0008,\n 0.0007, 0.0009, 0.0009, 0.0007, 0.0021, 0.0007, 0.0007, 0.0005, 0.0007,\n 0.0010, 0.0004, 0.0009, 0.0006, 0.0007, 0.0008, 0.0008, 0.0008, 0.0009,\n 0.0007, 0.0006, 0.0007, 0.0007, 0.0008, 0.0009, 0.0006, 0.0005, 0.0007,\n 0.0008, 0.0006, 0.0005, 0.0006, 0.0008, 0.0009, 0.0008, 0.0007, 0.0008,\n 0.0007, 0.0006, 0.0009, 0.0008, 0.0007, 0.0007, 0.0010, 0.0006],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(10016.)",
17
+ "exp_avg": "tensor([[-9.8179e-06, -3.9193e-04, -1.0578e-05, ..., -2.1139e-04,\n -3.2366e-04, 1.6616e-03],\n [-5.0350e-05, -7.4427e-05, -1.1566e-04, ..., -3.8804e-04,\n -2.9391e-07, -3.6509e-04],\n [-7.2161e-05, 1.6187e-04, 3.8308e-05, ..., 1.8103e-04,\n -2.4759e-04, -7.4580e-04],\n ...,\n [-8.1134e-05, -2.1613e-05, -1.0743e-04, ..., 1.1268e-05,\n -4.2747e-04, 8.2383e-04],\n [-1.2400e-05, -9.7584e-05, -1.2765e-04, ..., -4.1980e-04,\n 7.7251e-04, 3.6294e-06],\n [ 3.8643e-05, -6.4742e-04, -1.6820e-05, ..., 5.2389e-05,\n -7.7878e-04, -5.9653e-04]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[3.2166e-07, 4.3945e-07, 2.0910e-07, ..., 4.8037e-07, 3.0179e-06,\n 8.8235e-07],\n [3.0756e-07, 4.4031e-07, 3.1181e-07, ..., 5.3277e-07, 8.0102e-06,\n 5.0448e-07],\n [3.3394e-07, 4.4960e-07, 3.5716e-07, ..., 5.2348e-07, 1.9419e-06,\n 5.6065e-07],\n ...,\n [3.4269e-07, 4.2970e-07, 2.3079e-07, ..., 4.9293e-07, 1.6045e-06,\n 5.6070e-07],\n [5.0686e-07, 4.3616e-07, 2.8003e-07, ..., 5.5336e-07, 3.8378e-06,\n 6.8144e-07],\n [4.0607e-07, 5.4086e-07, 2.8032e-07, ..., 5.0043e-07, 1.4115e-06,\n 5.7669e-07]], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(10016.)",
22
+ "exp_avg": "tensor([[-6.3814e-04, 7.6909e-05, -5.5737e-06, ..., -9.4342e-04,\n 5.2747e-05, -8.3993e-04],\n [ 2.2399e-04, 2.0352e-04, 1.8692e-04, ..., 3.8986e-05,\n -6.9355e-05, -7.7768e-05],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6247e-05, 3.3235e-05, 3.9450e-05, ..., -1.9844e-05,\n 1.9714e-06, 3.0520e-06],\n [-5.6778e-05, 2.3375e-05, 3.6694e-06, ..., 6.0181e-05,\n 5.4625e-05, -7.5317e-06],\n [-1.2534e-04, 7.6436e-06, 3.3863e-04, ..., -5.5008e-05,\n -8.2829e-05, -2.2312e-05]], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([[6.2519e-07, 2.3939e-07, 6.4600e-07, ..., 2.6150e-06, 2.0219e-07,\n 1.6974e-06],\n [5.6853e-07, 3.4531e-07, 5.5654e-07, ..., 2.8345e-07, 5.5817e-07,\n 2.1500e-07],\n [3.8232e-14, 9.0220e-14, 1.6912e-14, ..., 1.0555e-14, 1.1131e-13,\n 1.5481e-15],\n ...,\n [4.5183e-07, 1.2684e-07, 1.2056e-07, ..., 1.6490e-07, 3.9520e-07,\n 3.7259e-07],\n [3.7035e-07, 2.1795e-07, 1.8784e-07, ..., 2.3851e-07, 1.3810e-06,\n 9.7009e-08],\n [4.5702e-07, 1.4390e-07, 1.2334e-06, ..., 1.7660e-07, 1.6074e-07,\n 4.1726e-07]], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(10016.)",
27
+ "exp_avg": "tensor([-2.8674e-03, 6.8824e-03, 5.6052e-45, ..., 4.8983e-03,\n -2.9249e-05, 1.9671e-03], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([2.4248e-04, 2.4851e-04, 2.5019e-10, ..., 1.9875e-04, 1.6044e-04,\n 2.7299e-04], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(10016.)",
32
+ "exp_avg": "tensor([[ 1.4814e-05, -2.0463e-05, 5.6052e-45, ..., -5.6146e-05,\n 5.3578e-06, -4.4523e-05],\n [-5.2935e-05, 1.4398e-04, 5.6052e-45, ..., 3.2780e-05,\n 3.5009e-06, -4.7773e-05],\n [-3.0843e-05, 4.3461e-05, -5.6052e-45, ..., 7.4325e-05,\n 2.8524e-06, 6.5204e-06],\n ...,\n [ 1.9745e-05, -3.5036e-05, -5.6052e-45, ..., 4.2465e-05,\n -2.2283e-05, 5.5465e-05],\n [ 5.5021e-05, -1.4208e-05, 5.6052e-45, ..., -4.7988e-05,\n -2.2606e-05, -2.2480e-05],\n [ 2.3099e-05, 1.1158e-05, -5.6052e-45, ..., 1.9876e-06,\n -2.9665e-05, -5.5437e-05]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[6.6543e-08, 7.3715e-08, 2.1851e-14, ..., 6.0568e-08, 6.8394e-08,\n 1.7841e-07],\n [7.9908e-08, 9.8651e-08, 4.0528e-14, ..., 6.4769e-08, 8.1875e-08,\n 1.6282e-07],\n [7.3521e-08, 9.9453e-08, 7.0574e-14, ..., 6.6188e-08, 7.1478e-08,\n 1.4577e-07],\n ...,\n [1.0618e-07, 1.0576e-07, 3.1236e-14, ..., 7.8248e-08, 7.0178e-08,\n 1.5782e-07],\n [8.3416e-08, 1.2309e-07, 4.2162e-14, ..., 9.1995e-08, 5.2918e-08,\n 1.0290e-07],\n [7.8920e-08, 7.7235e-08, 8.8920e-14, ..., 7.8652e-08, 6.1793e-08,\n 1.4354e-07]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(10016.)",
37
+ "exp_avg": "tensor([[-2.0375e-04, -1.1472e-04, 4.6290e-05, ..., -7.2616e-08,\n -1.4589e-06, -5.2584e-05],\n [-6.5752e-04, -6.3275e-04, 5.7298e-04, ..., 8.6312e-05,\n -1.3223e-04, 3.4049e-04],\n [-4.2218e-05, -3.4476e-04, 1.1774e-04, ..., -5.0655e-05,\n 5.6003e-05, -2.0091e-05],\n ...,\n [ 4.4240e-04, -1.0664e-04, 3.1808e-05, ..., -8.9072e-05,\n -4.0371e-05, -4.7473e-05],\n [ 1.8582e-04, 2.5634e-04, -1.3594e-04, ..., -3.5038e-05,\n 2.0596e-04, -9.3114e-06],\n [-3.7536e-04, -2.6246e-05, 4.9860e-04, ..., 4.8895e-05,\n 3.8706e-05, -8.3456e-05]], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([[5.4252e-07, 9.9487e-07, 1.0503e-06, ..., 2.5638e-07, 1.9178e-07,\n 3.1939e-07],\n [4.2945e-07, 6.5671e-07, 1.1153e-06, ..., 1.9373e-07, 1.5734e-07,\n 2.9352e-07],\n [5.6634e-07, 1.0079e-06, 1.2031e-06, ..., 2.7488e-07, 2.0735e-07,\n 2.6156e-07],\n ...,\n [6.1294e-07, 1.0450e-06, 1.1330e-06, ..., 3.2043e-07, 2.0246e-07,\n 3.2213e-07],\n [5.9897e-07, 8.7112e-07, 9.0165e-07, ..., 2.3568e-07, 1.7915e-07,\n 2.1945e-07],\n [6.0014e-07, 1.3592e-06, 1.2822e-06, ..., 3.2163e-07, 2.4147e-07,\n 3.1532e-07]], device='cuda:0')"
39
+ },
40
+ "7": {
41
+ "step": "tensor(10016.)",
42
+ "exp_avg": "tensor([-3.9521e-04, 1.0025e-02, -3.4425e-03, -2.9042e-03, -6.7222e-03,\n 1.0203e-03, 4.8489e-03, -7.6007e-03, 5.7024e-03, 1.1802e-02,\n -9.0425e-03, -3.7384e-03, -7.4702e-03, -2.5631e-03, -7.0003e-03,\n 1.1659e-02, -5.6465e-03, 2.8962e-03, -6.9680e-04, 1.2211e-03,\n 9.2135e-03, -2.2254e-03, 4.8373e-03, -1.2298e-02, -1.2401e-03,\n -2.3686e-03, -1.1104e-02, -7.4321e-03, -1.5058e-03, -4.7160e-04,\n -4.9015e-03, -2.2634e-03, 2.1154e-03, -5.2001e-03, 3.6309e-03,\n -1.0871e-04, -2.9993e-03, 4.3212e-03, 2.3041e-03, 9.1185e-04,\n -5.7235e-03, 7.3680e-03, -5.3314e-03, -2.8665e-04, -6.0414e-03,\n -6.1348e-04, -5.3371e-03, 4.9001e-03, 2.0444e-04, -2.1179e-03,\n -2.4674e-03, -2.8249e-03, -1.4244e-03, -7.7803e-03, -3.8502e-03,\n 4.8267e-03, -3.0442e-03, -5.2260e-03, 5.2635e-03, -8.0501e-03,\n 1.3684e-03, -1.1726e-02, -8.5025e-04, -1.6670e-03, -1.1754e-03,\n -7.6438e-03, -1.2396e-03, 4.3827e-03, 7.7939e-03, 3.6075e-05,\n 4.2692e-03, -9.5785e-03, 4.3890e-03, -3.0940e-03, -3.3552e-03,\n 5.6820e-03, -2.3257e-03, -3.3224e-03, -3.4770e-03, -2.1778e-04,\n -4.4759e-03, -6.7084e-03, 4.8150e-03, 5.9377e-03, -1.1838e-03,\n -2.2041e-03, 1.2224e-02, 1.2478e-03, -1.1409e-02, 7.2782e-04,\n -1.9295e-03, -1.3966e-04, -8.3682e-03, -3.7070e-03, 7.4027e-03,\n -1.5470e-03, 5.7257e-03, -1.0227e-03, 3.1343e-03, 4.1820e-03,\n 5.1908e-03, -6.1196e-03, -1.9125e-03, -4.7909e-03, 2.1087e-04,\n -1.2080e-03, -5.9246e-03, -4.4767e-03, -2.4906e-04, 2.1554e-03,\n -1.7107e-02, 3.1666e-03, -5.0006e-03, -7.4028e-03, 3.8437e-03,\n -2.3674e-03, 1.1301e-02, -1.0264e-03, 6.5232e-03, -2.9649e-03,\n -1.6587e-02, 1.0873e-02, -1.0257e-02, -4.1017e-03, -1.5764e-03,\n 9.1392e-03, 8.1071e-04, 4.3726e-03, -2.6642e-04, 3.3062e-04,\n -4.6299e-03, 5.8692e-03, 8.7721e-03, 7.9242e-03, -1.0703e-02,\n 1.3893e-02, -9.0666e-04, -4.0267e-03, 1.2698e-03, -8.4542e-03,\n 5.1651e-03, -3.9460e-03, 9.4298e-03, -1.0449e-03, -2.3215e-03,\n 7.5169e-03, -1.0512e-03, 1.3178e-02, 6.2400e-03, -2.9740e-03,\n 7.1779e-04, -5.8196e-03, 2.4668e-03, 5.3077e-03, 4.4163e-03,\n 5.8331e-03, -1.5610e-02, -2.1604e-03, 7.0498e-03, -5.2894e-03,\n -3.8458e-03, -6.9328e-03, 1.2215e-02, -2.3548e-03, -8.0606e-04,\n -9.1788e-03, 6.2454e-05, -5.1439e-03, -6.1222e-03, 3.5556e-03,\n 1.1890e-03, 3.8583e-03, -4.5485e-03, 1.3420e-03, -1.7920e-03,\n 6.0425e-03, 4.9181e-03, -2.2668e-03, 1.7077e-04, -5.6849e-03,\n 4.8488e-03, 4.3522e-03, 1.7871e-03, -1.4684e-02, 2.3140e-03,\n 8.6033e-05, -3.9198e-03, -4.0802e-04, 5.6357e-03, 3.9160e-03,\n -9.0433e-03, 6.8174e-03, -7.4115e-03, -3.8170e-04, 2.5698e-03,\n 3.4807e-05, -2.3930e-03, -3.6728e-03, 3.2852e-03, -5.3406e-03,\n -3.2338e-03, 8.3457e-03, 3.0176e-03, -5.5960e-04, -7.0133e-03,\n 2.3665e-03, 1.3641e-02, -1.8825e-03, -3.0672e-03, -1.0232e-04,\n -2.4603e-03, 2.9875e-03, -4.0616e-03, -7.1023e-03, -1.9323e-03,\n 4.3275e-03, -1.1012e-03, 1.0114e-02, -2.2518e-03, -7.7681e-03,\n -1.7961e-03, -1.9481e-02, -7.5818e-03, -2.7408e-04, 2.7084e-03,\n -3.5748e-03, -5.1957e-03, 9.3878e-03, 3.1779e-03, 1.0938e-03,\n 6.1945e-04, 1.7251e-03, -3.7319e-03, -5.3101e-03, -4.4769e-03,\n -4.7314e-04, -5.6810e-03, 3.2375e-03, 1.4762e-03, 4.8414e-03,\n -1.7911e-03, 2.1479e-03, 2.5961e-03, 5.4203e-04, 5.9252e-03,\n -8.8867e-04, 2.8670e-03, 4.8645e-03, 5.3620e-03, -2.7102e-03,\n -5.3472e-03, -5.6664e-03, 4.5370e-03, -3.7915e-03, 1.3887e-03,\n 3.8007e-03, 1.2682e-03, -1.2434e-02, -5.0079e-03, -7.3970e-04,\n -4.2770e-03, -8.5579e-03, -2.2283e-03, -6.9061e-04, -1.2984e-03,\n 1.0016e-03, -3.2562e-04, -2.7769e-04, 4.4940e-03, 1.2528e-03,\n 2.3750e-03, 5.0851e-04, -1.9663e-03, 1.7216e-03, -9.6886e-03,\n 5.1206e-03, -2.8122e-03, -2.7588e-03, 2.5488e-05, -5.6652e-03,\n -9.4268e-03, 1.0801e-02, 1.5877e-03, 9.6101e-04, -6.6337e-03,\n 6.3818e-03, 4.8607e-03, -3.5454e-03, -6.3665e-03, 1.7998e-03,\n -3.9888e-03, -1.3914e-03, -7.5663e-03, -6.3880e-03, 6.8363e-04,\n 4.3407e-03, -9.3103e-03, 1.7285e-04, -1.3273e-03, -4.3378e-03,\n 7.0337e-03, 2.0537e-03, -9.5827e-03, 1.5962e-03, 2.3515e-03,\n 1.3660e-03, -6.1290e-03, 6.6379e-03, -1.8438e-03, -7.5895e-05,\n 1.6560e-03, -5.6014e-03, -6.1315e-04, -1.4239e-02, 5.5588e-03,\n 8.8219e-03, -4.7887e-03, 1.5715e-03, -7.1064e-03, 1.1085e-03,\n 6.0732e-03, 6.0689e-03, 3.5259e-03, 1.7815e-03, -1.2584e-02,\n 5.9342e-03, 6.0829e-03, 3.5645e-03, 5.2666e-03, 2.4634e-04,\n 2.1046e-03, -2.5782e-03, 4.6474e-03, -1.0212e-02, 7.7583e-04,\n 2.6275e-04, 1.5249e-02, -4.6752e-03, 6.0174e-03, -5.5512e-03,\n 2.7517e-03, 2.6377e-03, 1.0021e-02, -2.0088e-03, -3.2339e-03,\n 1.0279e-02, -4.6556e-04, 5.8517e-03, -7.3744e-03, 2.5898e-03,\n 7.9514e-03, 4.8085e-03, 2.2945e-03, 9.4433e-04, 3.8969e-03,\n 1.0340e-02, 3.0956e-03, 3.9128e-03, 4.3123e-03, -1.4898e-03,\n -1.1888e-02, 5.1116e-03, -4.9139e-03, -2.8362e-02, -9.8843e-03,\n 1.6758e-03, -2.3603e-03, 1.7608e-03, -7.8440e-03, 7.0892e-04,\n 3.2205e-03, -4.6825e-03, 8.3627e-04, -1.3537e-02, 5.0557e-03,\n -3.3185e-03, -3.6996e-03, 9.0317e-03, 8.2876e-03, 8.5831e-04,\n -3.8710e-03, 9.1183e-03, -4.8693e-03, -9.6358e-03, -4.1750e-04,\n -1.6613e-03, -5.0153e-03, 6.5941e-03, -9.2195e-03, 4.0140e-03,\n -9.7223e-03, -4.5391e-03, 2.4463e-03, 3.6960e-04, 2.4746e-03,\n 1.1040e-03, -2.6493e-03, -8.6802e-03, 1.0557e-02, -9.8260e-04,\n -1.3089e-03, -6.4534e-03, 4.9732e-03, -1.2373e-03, 7.6062e-03,\n -1.1041e-03, 2.3304e-03, 4.4068e-03, -4.1253e-03, -2.2678e-03,\n 1.5003e-03, 8.3664e-03, -2.9959e-03, -7.0351e-03, 4.1937e-03,\n 1.6262e-03, 3.1483e-03, -2.0198e-03, -2.4643e-03, -6.6412e-03,\n -2.0961e-03, -6.7762e-03, -1.3035e-03, -4.3704e-03, 3.2740e-03,\n -1.8165e-03, 2.9152e-03, 8.1934e-03, -9.9574e-04, -2.6204e-03,\n -6.7812e-03, 7.5646e-03, -7.1002e-04, -6.6834e-04, 5.9905e-03,\n -3.7822e-03, 1.2120e-02, 1.4795e-03, 7.2880e-03, 7.2132e-03,\n 5.4268e-03, -7.9386e-03, -5.0589e-03, 3.2681e-03, -5.5829e-03,\n 5.6897e-03, 7.9523e-03, -5.0982e-03, 4.0790e-03, 2.7666e-03,\n -3.2733e-03, 6.6724e-03, -1.2583e-02, -7.1632e-03, 6.8125e-03,\n 4.3877e-03, -1.0835e-02, -1.8892e-03, 1.0706e-03, 2.7667e-03,\n 5.8198e-03, 3.1441e-03, -8.0715e-03, 3.9204e-03, -5.1461e-03,\n 1.0621e-03, 5.9918e-03, -5.7822e-03, 2.3137e-03, -9.4364e-04,\n -4.4228e-03, -1.1528e-02, 9.2474e-04, -8.5033e-03, -7.1016e-03,\n 5.5167e-03, 2.7969e-03, -1.2835e-02, -5.5546e-03, 2.2180e-04,\n -4.2571e-03, 2.4392e-03, 2.8141e-03, -1.2486e-02, 8.3619e-03,\n 2.8235e-03, -4.3806e-03, 4.3757e-05, 2.5895e-03, -6.8112e-03,\n 9.0937e-03, 2.2535e-03, -1.9697e-04, 1.1993e-02, -4.3605e-03,\n -6.8050e-03, -6.2675e-03, -4.9430e-03, 7.3747e-04, 1.6215e-03,\n 3.5734e-05, -6.0029e-03, 3.9269e-04, 5.0830e-03, -5.7076e-03,\n -4.0865e-03, -2.8646e-03, -3.2247e-03, 1.0603e-02, -6.6219e-03,\n -3.6004e-03, -1.5037e-03], device='cuda:0')",
43
+ "exp_avg_sq": "tensor([0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0004,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0004, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002,\n 0.0004, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002,\n 0.0004, 0.0002, 0.0002, 0.0003, 0.0003, 0.0004, 0.0002, 0.0002, 0.0004,\n 0.0006, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0004, 0.0003, 0.0003, 0.0003, 0.0004, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0004, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0001, 0.0004, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0004,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0001, 0.0002, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0004, 0.0003, 0.0003, 0.0002, 0.0005, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0004,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0004, 0.0004, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0003, 0.0004, 0.0003, 0.0003, 0.0002, 0.0004, 0.0003,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0001, 0.0003, 0.0003, 0.0004,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0001, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0004,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0003, 0.0004, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0005, 0.0003,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0004, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0005, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0004, 0.0002,\n 0.0003, 0.0003, 0.0004, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0004, 0.0005, 0.0003, 0.0001, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0004, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0004, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003,\n 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0004,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0005, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0004, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0004, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0004, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0004, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0004, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0002, 0.0001, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003],\n device='cuda:0')"
44
+ },
45
+ "8": {
46
+ "step": "tensor(10016.)",
47
+ "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-6.9138e-05, -1.2312e-05, -5.8816e-05, ..., 5.0560e-06,\n 1.9417e-05, 6.2098e-06],\n [-1.6012e-04, -1.1943e-04, 4.7393e-06, ..., 5.9892e-06,\n -7.5625e-05, -7.2494e-05],\n ...,\n [-1.3601e-04, -3.0510e-05, 2.5649e-05, ..., -2.3644e-05,\n -2.6034e-07, -8.3294e-06],\n [ 2.2074e-05, -6.0803e-05, -1.8976e-05, ..., 3.0242e-04,\n 1.8729e-04, -1.4066e-07],\n [-6.8249e-06, 6.4685e-05, 1.7750e-06, ..., 9.0929e-05,\n 9.5740e-05, -2.0322e-04]], device='cuda:0')",
48
+ "exp_avg_sq": "tensor([[4.7847e-16, 1.4963e-14, 8.5853e-16, ..., 6.3930e-14, 6.4055e-16,\n 2.2623e-16],\n [9.1135e-08, 1.9131e-07, 1.2570e-07, ..., 5.0857e-08, 2.3963e-08,\n 1.4810e-07],\n [5.5389e-08, 2.3195e-07, 1.2225e-07, ..., 6.7383e-08, 1.2240e-06,\n 2.2594e-07],\n ...,\n [6.3117e-07, 4.2607e-08, 2.7051e-07, ..., 3.5239e-07, 7.9591e-08,\n 4.3143e-07],\n [8.2115e-08, 1.3001e-07, 1.5956e-08, ..., 2.0530e-07, 1.3245e-06,\n 4.0999e-08],\n [9.8069e-08, 2.6608e-08, 1.6103e-08, ..., 9.7957e-08, 2.8779e-07,\n 4.0330e-07]], device='cuda:0')"
49
+ },
50
+ "9": {
51
+ "step": "tensor(10016.)",
52
+ "exp_avg": "tensor([ 5.6052e-45, -2.7223e-03, -3.3773e-03, ..., -2.7787e-03,\n 3.7337e-03, -3.8267e-04], device='cuda:0')",
53
+ "exp_avg_sq": "tensor([7.8755e-11, 1.5572e-04, 1.1583e-04, ..., 1.0950e-04, 1.1547e-04,\n 1.0450e-04], device='cuda:0')"
54
+ },
55
+ "10": {
56
+ "step": "tensor(10016.)",
57
+ "exp_avg": "tensor([[-5.6052e-45, -5.1279e-05, 3.8522e-05, ..., 4.1050e-05,\n 6.4086e-05, -4.6342e-05],\n [ 5.6052e-45, 7.9444e-06, 9.3124e-05, ..., -2.3203e-05,\n 1.1999e-04, 9.7304e-06],\n [ 5.6052e-45, 3.0812e-05, 1.6797e-04, ..., 3.5138e-05,\n 8.1928e-06, -6.5194e-07],\n ...,\n [-5.6052e-45, -7.9221e-06, 2.5721e-06, ..., -2.4073e-05,\n -6.7549e-05, 2.5774e-05],\n [-5.6052e-45, 1.8916e-05, -4.7156e-05, ..., 4.7352e-06,\n 1.0818e-04, -5.4907e-06],\n [ 5.6052e-45, -1.9450e-05, 4.3095e-05, ..., 3.2493e-05,\n 1.1070e-04, 3.6408e-05]], device='cuda:0')",
58
+ "exp_avg_sq": "tensor([[3.2775e-15, 1.6084e-08, 2.8766e-08, ..., 2.9162e-08, 2.3776e-08,\n 1.8736e-08],\n [4.1690e-15, 1.7418e-08, 3.7121e-08, ..., 4.3834e-08, 2.6442e-08,\n 2.4465e-08],\n [4.6645e-16, 1.7603e-08, 3.3363e-08, ..., 3.9736e-08, 2.8770e-08,\n 2.9205e-08],\n ...,\n [4.2293e-15, 2.4469e-08, 4.8487e-08, ..., 3.8257e-08, 3.1702e-08,\n 2.9256e-08],\n [5.9290e-16, 1.9406e-08, 3.4870e-08, ..., 3.5013e-08, 2.9518e-08,\n 2.9686e-08],\n [1.4930e-16, 1.8927e-08, 3.9169e-08, ..., 3.2886e-08, 3.1370e-08,\n 3.0219e-08]], device='cuda:0')"
59
+ },
60
+ "11": {
61
+ "step": "tensor(10016.)",
62
+ "exp_avg": "tensor([[ 2.7918e-04, -1.5356e-04, -7.4032e-05, ..., 9.4630e-05,\n 3.0045e-04, -3.0393e-04],\n [-2.7881e-04, 6.1667e-04, -1.8527e-04, ..., 2.9498e-04,\n -2.6132e-04, 4.6222e-04],\n [ 3.6948e-05, 9.8126e-05, -4.2253e-05, ..., -2.5411e-05,\n 2.6627e-07, -3.0085e-05],\n ...,\n [-1.5912e-04, 1.2181e-04, -3.0326e-04, ..., 4.9550e-05,\n -7.2909e-06, 2.1938e-04],\n [ 2.2404e-05, -8.3184e-05, 9.7745e-05, ..., 6.7612e-05,\n -9.8166e-05, -4.9762e-05],\n [ 1.2146e-04, 2.3275e-04, -4.3099e-04, ..., 2.9296e-05,\n 4.5706e-05, 4.6985e-06]], device='cuda:0')",
63
+ "exp_avg_sq": "tensor([[1.5153e-07, 2.8350e-07, 4.4203e-07, ..., 1.4283e-07, 9.3884e-08,\n 1.5133e-07],\n [1.4795e-07, 2.3895e-07, 2.9021e-07, ..., 1.3087e-07, 8.6928e-08,\n 1.1189e-07],\n [1.0092e-07, 1.9491e-07, 1.6870e-07, ..., 9.5465e-08, 5.6607e-08,\n 8.5845e-08],\n ...,\n [1.3937e-07, 2.7584e-07, 3.2595e-07, ..., 1.3323e-07, 9.5508e-08,\n 1.3785e-07],\n [1.2475e-07, 2.2165e-07, 2.6099e-07, ..., 1.2188e-07, 6.5325e-08,\n 1.1142e-07],\n [1.2817e-07, 2.5210e-07, 2.6429e-07, ..., 1.2372e-07, 8.3215e-08,\n 1.3020e-07]], device='cuda:0')"
64
+ },
65
+ "12": {
66
+ "step": "tensor(10016.)",
67
+ "exp_avg": "tensor([-1.2362e-02, 4.8426e-03, 4.2435e-05, 4.7581e-03, -4.4610e-03,\n -4.1041e-03, 8.6052e-04, 4.1919e-03, -2.7306e-04, 7.3426e-03,\n 4.2142e-03, 1.3940e-03, -4.8281e-03, -3.7743e-03, 1.1693e-02,\n 7.8936e-04, 3.3608e-03, -6.8911e-03, -6.7572e-03, 5.7389e-04,\n 5.2455e-03, 1.8914e-03, 4.5871e-04, 6.6114e-03, -3.2408e-03,\n 1.2811e-03, -2.7133e-03, 6.4130e-06, 1.7689e-03, -6.3978e-03,\n -1.7877e-03, -8.5626e-04, 3.4507e-03, 1.7616e-02, 1.4528e-03,\n -2.0426e-03, -1.5736e-03, 3.1344e-03, -7.5360e-04, -1.3056e-03,\n -2.3573e-03, 3.4339e-03, -4.7169e-03, -3.7668e-03, -1.5302e-03,\n 5.6249e-03, 1.2891e-03, 2.3795e-03, -2.3714e-03, 1.3578e-03,\n 3.0613e-03, -7.2436e-03, -1.4404e-03, 1.0855e-03, -5.7028e-03,\n -9.5953e-03, -2.0521e-04, 1.4532e-03, -2.7054e-03, -1.2896e-04,\n 3.8033e-03, -4.5963e-04, -2.5754e-03, 1.3256e-03, -2.7579e-03,\n -8.1060e-03, -5.6894e-03, 1.5754e-03, 2.9705e-03, 4.1224e-04,\n 1.2149e-02, 2.5033e-03, -8.1352e-03, 7.7594e-03, 1.2377e-03,\n 3.2524e-04, 2.5302e-03, -8.5485e-03, -4.1523e-03, 1.7023e-04,\n -3.2737e-03, 3.2371e-03, -2.8390e-03, -2.2746e-04, -1.3227e-02,\n 4.3165e-03, 1.0302e-03, -2.8495e-03, -1.6891e-03, -7.7948e-03,\n 1.0579e-03, 5.6093e-03, -2.2958e-03, -3.9503e-03, -4.7718e-03,\n -3.2123e-03, -2.2116e-03, 1.3662e-03, -9.9732e-03, 8.0507e-04,\n -3.3424e-04, 7.4161e-05, -1.1116e-04, -1.4425e-03, 1.6729e-03,\n -2.6887e-04, 3.6994e-03, -3.3811e-03, 2.0443e-03, 2.5806e-03,\n 5.6052e-45, -3.2966e-03, 3.5423e-03, 5.6301e-03, 1.7831e-03,\n -4.1052e-03, 7.0937e-03, 1.0424e-03, -2.4113e-03, 6.5810e-04,\n -7.2726e-04, -7.2963e-04, -3.1367e-03, 1.5913e-03, 7.7091e-04,\n -2.3521e-03, 6.5002e-03, 4.7918e-03, -2.7830e-03, -5.2083e-03,\n -3.3431e-03, 6.0453e-03, -4.7910e-03, -2.6396e-03, 3.5029e-03,\n 8.7180e-03, -3.3310e-03, 2.6029e-03, 1.9717e-03, -8.8752e-03,\n 2.4481e-04, -2.4809e-03, -4.6229e-03, -3.2714e-03, 1.1832e-03,\n 1.3331e-03, -2.7661e-03, -2.5374e-03, 3.3626e-03, -6.7685e-03,\n 7.8843e-04, -7.0228e-03, 5.0203e-04, 3.0622e-03, 6.8837e-04,\n 9.2512e-03, -4.9876e-03, 4.6615e-03, 4.0325e-03, 2.3639e-03,\n 3.8030e-03, 5.8370e-03, 9.1732e-04, -1.8064e-02, -3.4921e-03,\n -6.5712e-03, -1.0312e-03, 6.1403e-03, 1.7892e-03, 1.1406e-02,\n -6.9362e-03, -2.3388e-04, 7.0567e-04, -1.1597e-03, -4.5423e-04,\n -2.4626e-03, 4.2784e-04, 1.7602e-04, -3.9556e-03, -5.4373e-03,\n -2.0213e-10, 3.7716e-03, 1.6375e-02, -2.4144e-03, 5.5370e-03,\n -4.2917e-03, -5.0335e-04, -4.4871e-03, 3.7728e-03, 2.1546e-03,\n -6.6244e-03, -5.0628e-04, -1.4231e-02, 1.7738e-03, -5.3237e-04,\n -6.4909e-03, 2.3597e-03, -4.8596e-03, -9.4450e-03, 9.2398e-04,\n 3.6984e-03, 2.5975e-03, 3.5956e-04, 9.5600e-04, 3.9694e-03,\n 2.4087e-03, -2.6511e-03, 5.4193e-05, 7.9662e-05, 1.8027e-03,\n 1.5548e-03, 1.9803e-03, -9.6274e-04, -2.9733e-03, -2.8247e-03,\n 2.4685e-03, -3.0381e-03, -5.3877e-04, -2.9163e-03, 1.6749e-04,\n -7.5939e-03, 2.6583e-03, 6.1998e-03, 2.4250e-03, 1.3574e-03,\n 4.3586e-03, 2.3468e-03, -2.9166e-03, -4.3847e-04, -2.3787e-04,\n 1.4941e-03, -1.0155e-02, -5.2721e-03, -1.0565e-03, 6.3465e-04,\n -3.4529e-04, -7.5671e-05, -9.1225e-04, -4.3602e-04, 2.6909e-03,\n 6.6984e-03, -3.6481e-03, -5.7938e-04, 7.5350e-04, 4.0014e-03,\n -8.7070e-04, 3.3990e-03, -3.3599e-03, -3.2076e-03, 2.4642e-03,\n -1.4620e-03, -2.6199e-03, -9.3949e-03, -1.4974e-03, -2.5438e-03,\n -6.8798e-04, -9.9637e-04, 1.7896e-03, 4.9238e-04, -2.5884e-03,\n 6.3870e-03, -1.0647e-03, -5.4066e-03, 2.9073e-03, -4.8231e-03,\n -6.0630e-03, -1.2472e-03, -8.5727e-04, 3.5300e-03, -2.8676e-04,\n -4.1597e-04, -5.2479e-03, -2.7137e-03, -3.2160e-04, 4.4025e-03,\n 3.0784e-03, 1.1431e-02, 2.0285e-03, 1.7031e-03, 1.0293e-02,\n 2.4199e-04, -4.5749e-03, -4.0242e-03, 1.4463e-03, 2.4092e-03,\n -7.1802e-03, 4.9746e-04, -4.1313e-03, 1.0551e-03, 5.3761e-03,\n -4.9199e-03, 5.3326e-03, 1.8980e-03, -7.2225e-04, 1.7943e-03,\n -6.1770e-04, 8.0325e-04, -1.3220e-03, -5.9735e-04, 4.8617e-03,\n -4.5893e-04, 1.5850e-03, 1.6150e-03, -3.0792e-03, -4.0128e-03,\n -2.5635e-03, -5.0721e-03, 3.5915e-04, -3.5777e-03, -1.3501e-03,\n -2.6228e-03, 9.5358e-04, 2.1438e-03, 3.2275e-03, -3.2084e-04,\n 3.9421e-03, -3.8991e-03, 3.3232e-03, 1.5553e-03, 9.2384e-04,\n -3.1384e-04, -2.9385e-03, 5.6863e-03, 6.9047e-04, -4.6521e-03,\n 1.9361e-03, 9.0672e-04, 4.3968e-03, -5.6467e-03, -7.9863e-03,\n -3.5170e-03, 6.9565e-03, -1.4608e-02, 1.1179e-03, 2.5743e-03,\n 1.5549e-03, -8.6890e-03, 2.4666e-03, -4.0714e-03, 6.9457e-03,\n 7.8534e-03, -4.5806e-04, -1.1397e-02, -1.9002e-02, 8.4757e-04,\n -9.6951e-03, -5.2217e-03, 2.9691e-03, 8.1126e-04, 2.6873e-03,\n -4.9991e-03, -5.3155e-03, 9.7303e-04, 1.1095e-03, 6.1797e-03,\n 3.4390e-03, 1.5365e-03, -2.4632e-03, -4.1641e-03, -4.4364e-04,\n -4.0237e-04, 5.9726e-03, -7.7426e-03, 2.7730e-04, 1.8413e-03,\n -9.7074e-03, 4.6846e-04, -6.8032e-03, -1.1643e-02, -2.8529e-04,\n -3.6272e-03, -2.0720e-03, -3.4704e-03, 1.8255e-04, 2.8669e-03,\n -7.6563e-04, 3.8490e-03, 1.9436e-03, 4.5483e-03, -5.1952e-03,\n -8.3922e-05, 8.8942e-03, -5.8488e-04, -2.9095e-03, -1.5095e-03,\n -3.9257e-03, -1.2697e-03, -5.1601e-03, -5.5318e-03, -5.0270e-03,\n 9.5257e-03, 6.3037e-03, -7.7977e-04, 6.3136e-03, 1.8394e-03,\n 4.0686e-03, 5.8780e-04, 4.5720e-04, -1.8813e-03, -1.8643e-03,\n -3.3006e-03, 2.9823e-03, 3.6702e-03, 8.6518e-04, -7.7259e-03,\n -1.0507e-02, -8.7940e-04, 5.4435e-03, -6.3860e-04, -1.8165e-03,\n -4.5528e-03, -2.6657e-03, 5.9157e-03, 2.6464e-04, -2.1975e-03,\n 2.0525e-03, 2.0984e-03, 1.1639e-03, -7.2515e-04, -3.8395e-03,\n 2.2152e-03, -2.1731e-04, 5.2664e-04, -1.4326e-03, -1.0979e-04,\n -3.7866e-04, -7.0764e-04, 1.8092e-03, -1.8494e-04, -1.4408e-03,\n 4.4175e-03, 2.5716e-03, 2.0648e-03, -3.6445e-03, 4.4568e-03,\n -1.0592e-03, -1.9801e-04, 1.0865e-03, 1.5521e-03, 2.0048e-03,\n -5.2336e-03, -2.7242e-04, -3.6302e-03, -4.8336e-04, -4.5825e-03,\n -5.4696e-03, -1.3041e-03, -8.1387e-04, 7.2242e-03, -2.0298e-03,\n 2.0887e-03, -1.1619e-03, 1.7778e-03, 2.1975e-03, -2.3864e-03,\n -4.8164e-03, -3.6731e-03, 1.4712e-04, 5.4075e-03, -2.9435e-03,\n -3.2101e-03, 8.2311e-03, 4.1681e-03, -1.0463e-03, -6.2323e-03,\n -2.4433e-03, -4.3647e-03, 3.7075e-03, -8.7442e-04, -2.5293e-04,\n 3.4069e-03, 1.8322e-03, -3.1866e-03, -5.1810e-03, 1.5410e-03,\n 3.0975e-03, -3.6225e-03, -2.7655e-05, 1.9386e-03, 4.7627e-03,\n -3.3704e-03, -2.5566e-03, -1.0715e-02, 3.0529e-05, 3.9055e-03,\n 2.7830e-05, 1.9624e-04, -4.6432e-03, 4.0302e-03, -1.0576e-02,\n -4.4137e-04, -1.4729e-03, -6.3787e-03, 5.3202e-04, -4.0078e-03,\n 2.5271e-03, 8.2336e-05, -4.0183e-03, -1.3064e-02, -4.0682e-03,\n -3.5950e-04, -3.1154e-03, 7.1672e-03, 2.8421e-03, 5.6836e-03,\n 1.5544e-03, -3.5602e-03, -1.3510e-03, 6.6105e-04, -3.0740e-03,\n -1.9409e-03, -8.0939e-05, 4.6949e-03, 4.0852e-03, 1.6674e-03,\n -6.1048e-03, 5.5242e-03, -2.8969e-03, -3.7392e-03, -1.9174e-03,\n -6.7005e-03, 3.8178e-03, 1.6693e-03, -7.8061e-03, -4.2694e-03,\n -2.8236e-03, -2.2543e-03, 5.5524e-03, -2.1002e-03, 3.0940e-03,\n 2.8652e-03, -5.1914e-03, 8.9555e-04, 2.0336e-03, -5.7068e-03,\n 9.1791e-03, 2.9323e-03, 3.6921e-03, 2.7419e-04, 2.2312e-03,\n -6.3356e-04, 3.4513e-03, -8.4965e-03, -9.5236e-04, -1.3246e-03,\n -3.2195e-04, 4.5748e-03, -8.1771e-03, 2.3835e-03, 3.1181e-04,\n 3.9811e-03, 7.0609e-04, -9.1902e-04, 1.9923e-03, 1.8854e-03,\n -7.5955e-03, 7.6143e-03, 7.6940e-03, 5.1461e-03, 3.2520e-03,\n 4.1258e-03, -1.0355e-02, 8.1252e-03, -3.8718e-04, -3.7488e-03,\n -4.9064e-03, -6.4643e-03, -2.7740e-03, -1.1903e-03, -3.9756e-03,\n 4.5865e-03, -6.0601e-04, 6.8082e-03, -6.1926e-03, 4.8684e-03,\n 4.8652e-03, -1.3216e-03, -7.7797e-03, 5.6052e-45, 3.5830e-03,\n -1.9046e-03, 5.6052e-45, 2.1079e-03, -2.6540e-04, 7.8702e-03,\n 7.4814e-04, 1.1340e-03, -1.7724e-03, 5.8423e-03, 2.7732e-03,\n -7.3776e-04, -1.4863e-02, 3.7635e-04, 1.4853e-03, -1.0527e-03,\n -3.4491e-04, 3.6610e-03, 5.7801e-03, -2.2318e-03, 1.1456e-02,\n 1.3849e-03, -1.7808e-03, 5.0586e-03, 5.4970e-03, 3.3165e-03,\n 3.9431e-03, 3.1437e-03, 1.4971e-03, 7.8246e-03, -4.5162e-03,\n 4.0038e-03, 2.0705e-03, -5.1072e-03, 1.0977e-03, -3.7285e-03,\n -2.8981e-03, -1.8340e-03, 2.2486e-04, 4.3198e-03, -2.5817e-03,\n -1.0133e-03, -3.7289e-03, -1.6974e-03, 4.8136e-03, -1.0117e-03,\n -7.1306e-03, -2.6321e-03, -5.2273e-03, 4.1488e-03, 6.1613e-05,\n -8.9231e-03, 1.3439e-03, 2.8611e-03, 8.9477e-03, 1.3137e-03,\n -4.1734e-03, 1.9019e-04, -6.7901e-04, 5.5178e-03, -2.3774e-03,\n -1.7077e-03, 2.3566e-03, 9.3695e-04, 5.7926e-05, 1.2075e-03,\n 5.7336e-03, 3.8003e-04, -2.7299e-03, -3.4732e-03, -8.7883e-03,\n -3.3848e-03, 6.6345e-03, 7.3999e-04, 8.7103e-04, 1.0714e-02,\n -4.1309e-03, -3.1731e-04, -1.8351e-03, -2.8740e-03, -5.9273e-03,\n 4.5184e-03, -2.0540e-03, 7.3621e-03, -1.5298e-03, 5.4302e-03,\n -6.7819e-03, 4.8677e-03, -2.2747e-03, -3.6403e-05, -1.5250e-04,\n -2.7276e-03, -1.4928e-03, -1.9693e-03, 1.2470e-03, -1.8988e-03,\n -2.0833e-03, 6.0956e-03, 8.0044e-03, -6.3674e-03, 2.6906e-05,\n -1.0136e-03, -4.6617e-03, -8.0675e-06, 3.7371e-03, 5.6648e-03,\n -7.0229e-04, 1.2003e-03, -5.3236e-03, -3.5319e-03, -3.8248e-04,\n 3.9754e-04, -2.8520e-04, -2.0551e-03, 1.5806e-03, 1.3067e-03,\n 1.3150e-02, -4.6744e-03, -5.4716e-05, 1.7303e-03, 9.3419e-04,\n -3.3515e-03, 4.7675e-03, -1.5112e-02, -1.0312e-02, -4.6687e-03,\n 4.8101e-03, 2.8019e-03, -3.1211e-03, -1.0701e-03, -1.0987e-02,\n 2.0711e-03, 3.7137e-03, -7.0050e-03, -6.8570e-03, 3.1680e-03,\n -1.1161e-03, 7.5987e-03, -2.0404e-03, 8.0063e-03, -3.3115e-03,\n 5.5871e-03, 1.4998e-03, -1.0306e-03, 1.0134e-03, -1.0226e-03,\n 2.8237e-05, 3.2373e-04, 4.4945e-03, -5.7604e-03, -4.6422e-03,\n -1.4502e-02, -2.3260e-03, 1.7398e-03, 4.0087e-03, -1.3774e-03,\n 6.4903e-04, -2.5706e-03, 9.8487e-03, 3.8708e-04, 3.3323e-04,\n -5.0237e-04, -1.0505e-02, -1.2583e-03, 8.6705e-04, -1.8099e-03,\n 2.4474e-03, -1.0750e-02, 8.7612e-03, 6.9397e-04, -3.9490e-04,\n -3.7896e-04, 5.4317e-03, -1.7160e-04, -4.9943e-03, -1.1382e-04,\n -3.9502e-03, -3.5174e-03, 2.5938e-03, -6.6687e-03, -4.0353e-03,\n 2.2087e-03, -3.3559e-05, 2.1603e-03, -3.7889e-03, -1.9286e-02,\n 4.9877e-03, -1.6521e-03, 2.1798e-03], device='cuda:0')",
68
+ "exp_avg_sq": "tensor([1.4397e-04, 1.2377e-04, 1.0217e-04, 1.2850e-04, 1.7877e-04, 1.6144e-04,\n 1.2759e-04, 1.3688e-04, 1.4887e-04, 1.3709e-04, 1.2926e-04, 2.1042e-04,\n 1.2935e-04, 1.5021e-04, 1.1814e-04, 7.6249e-05, 1.1091e-04, 1.2694e-04,\n 1.9537e-04, 8.1168e-05, 1.4714e-04, 1.6151e-04, 1.1379e-04, 1.4061e-04,\n 1.3234e-04, 1.2285e-04, 1.2775e-04, 1.3611e-04, 1.0802e-04, 1.4405e-04,\n 1.4892e-04, 1.2546e-04, 2.1764e-04, 1.4200e-04, 1.4535e-04, 1.3832e-04,\n 1.5023e-04, 1.3274e-04, 1.1313e-04, 1.1328e-04, 1.5848e-04, 1.4208e-04,\n 1.4210e-04, 9.8630e-05, 8.7156e-05, 1.1380e-04, 4.1578e-05, 1.0446e-04,\n 1.4213e-04, 1.0854e-04, 8.6322e-05, 1.2047e-04, 1.2172e-04, 9.0330e-05,\n 1.5370e-04, 1.4109e-04, 7.6719e-05, 8.8374e-05, 1.5869e-04, 1.3220e-04,\n 1.6190e-04, 1.6019e-04, 1.3973e-04, 1.2134e-04, 1.1859e-04, 1.7934e-04,\n 1.2138e-04, 1.6787e-04, 1.7218e-04, 1.1193e-04, 1.1164e-04, 1.2149e-04,\n 1.2997e-04, 1.3040e-04, 1.2211e-04, 1.3881e-04, 1.3397e-04, 1.2893e-04,\n 1.4446e-04, 1.7562e-04, 1.2517e-04, 1.1036e-04, 1.5457e-04, 1.9541e-04,\n 1.4162e-04, 1.6362e-04, 1.1891e-04, 1.4277e-04, 1.2668e-04, 1.4117e-04,\n 1.1429e-04, 9.7965e-05, 1.4390e-04, 1.1857e-04, 1.0930e-04, 1.5705e-04,\n 1.1330e-04, 1.2500e-04, 1.4740e-04, 1.5785e-04, 1.3312e-04, 1.5995e-04,\n 1.3154e-04, 8.9002e-05, 1.4171e-04, 1.2488e-04, 1.4173e-04, 1.2792e-04,\n 1.2866e-04, 1.0945e-04, 5.9857e-11, 8.7575e-05, 1.5495e-04, 1.2923e-04,\n 1.1470e-04, 1.6260e-04, 1.5343e-04, 1.3454e-04, 1.6494e-04, 1.4352e-04,\n 1.3789e-04, 1.6900e-04, 1.5602e-04, 1.4478e-04, 1.7977e-04, 1.0305e-04,\n 1.0954e-04, 1.3746e-04, 1.3297e-04, 2.1306e-04, 1.2108e-04, 1.5505e-04,\n 1.3145e-04, 1.3901e-04, 1.5418e-04, 1.1100e-04, 1.4421e-04, 1.4288e-04,\n 1.2768e-04, 1.3272e-04, 9.9406e-05, 8.7871e-05, 1.2756e-04, 1.5315e-04,\n 1.7099e-04, 1.2956e-04, 1.0451e-04, 1.7660e-04, 1.0600e-04, 1.2123e-04,\n 1.3305e-04, 9.9305e-05, 1.1711e-04, 1.5488e-04, 1.5703e-04, 1.3440e-04,\n 1.6520e-04, 1.6046e-04, 1.2115e-04, 1.5396e-04, 1.4002e-04, 1.4549e-04,\n 1.4484e-04, 1.7440e-04, 1.1275e-04, 1.2220e-04, 1.3361e-04, 1.4311e-04,\n 1.3648e-04, 1.1799e-04, 2.0053e-04, 1.2639e-04, 1.4426e-04, 1.1042e-04,\n 9.9491e-05, 1.2251e-04, 1.2454e-04, 1.0201e-04, 1.6357e-04, 1.5322e-04,\n 5.5248e-09, 1.3472e-04, 1.6568e-04, 1.5801e-04, 1.8133e-04, 1.1260e-04,\n 1.8476e-04, 1.3170e-04, 1.2275e-04, 1.0276e-04, 1.2267e-04, 1.2434e-04,\n 1.6251e-04, 1.1622e-04, 1.0219e-04, 1.1276e-04, 1.6145e-04, 1.2917e-04,\n 9.8928e-05, 1.6227e-04, 1.1919e-04, 1.1997e-04, 1.2616e-04, 8.5314e-05,\n 1.1534e-04, 8.1613e-05, 8.8450e-05, 1.2042e-04, 1.0307e-04, 1.0998e-04,\n 1.5215e-04, 9.4531e-05, 1.3922e-04, 1.1999e-04, 7.8594e-05, 1.4750e-04,\n 1.2271e-04, 1.2798e-04, 1.5229e-04, 1.7660e-04, 1.7315e-04, 9.3903e-05,\n 1.0527e-04, 1.3002e-04, 1.0877e-04, 9.7539e-05, 9.5444e-05, 1.5439e-04,\n 1.2352e-04, 1.2333e-04, 1.1649e-04, 1.5186e-04, 1.5051e-04, 9.3963e-05,\n 1.4102e-04, 1.4187e-04, 1.3940e-04, 9.9730e-05, 1.8314e-04, 1.6464e-04,\n 1.1041e-04, 1.3912e-04, 1.9426e-04, 1.2470e-04, 1.1298e-04, 9.5718e-05,\n 1.7164e-04, 1.0604e-04, 2.1267e-04, 1.4577e-04, 1.5827e-04, 1.5512e-04,\n 1.3803e-04, 1.3528e-04, 1.0764e-04, 1.1155e-04, 1.4394e-04, 1.6768e-04,\n 1.0940e-04, 1.2639e-04, 1.7650e-04, 1.6539e-04, 8.7503e-05, 1.6367e-04,\n 1.2469e-04, 4.2498e-05, 1.2412e-04, 1.3267e-04, 1.4817e-04, 8.9616e-05,\n 1.2347e-04, 1.4531e-04, 1.3222e-04, 1.2516e-04, 1.1776e-04, 1.2399e-04,\n 1.4433e-04, 1.0422e-04, 1.3432e-04, 6.9418e-05, 1.6351e-04, 1.5535e-04,\n 1.0522e-04, 1.6589e-04, 1.3356e-04, 1.2859e-04, 8.0739e-05, 1.2168e-04,\n 1.4612e-04, 1.5132e-04, 9.8490e-05, 1.1809e-04, 1.3354e-04, 1.5873e-04,\n 1.3571e-04, 8.9218e-05, 1.8044e-04, 9.1607e-05, 1.0827e-04, 1.4262e-04,\n 1.2826e-04, 1.4677e-04, 1.4268e-04, 1.2899e-04, 1.1146e-04, 1.4885e-04,\n 1.3999e-04, 1.1821e-04, 1.6666e-04, 1.0279e-04, 1.9660e-04, 1.3645e-04,\n 1.3630e-04, 9.3474e-05, 1.1934e-04, 1.1070e-04, 1.6800e-04, 1.2491e-04,\n 1.3869e-04, 1.4588e-04, 1.5361e-04, 1.3721e-04, 1.3496e-04, 1.0867e-04,\n 1.3738e-04, 1.2532e-04, 1.2219e-04, 1.4192e-04, 1.4837e-04, 8.2829e-05,\n 1.4887e-04, 8.9025e-05, 1.5266e-04, 1.0473e-04, 1.2212e-04, 9.9648e-05,\n 1.3039e-04, 1.2935e-04, 1.4807e-04, 1.3298e-04, 1.3734e-04, 1.0144e-04,\n 1.3963e-04, 1.7521e-04, 1.5031e-04, 1.8303e-04, 1.3145e-04, 1.5347e-04,\n 1.2074e-04, 1.2464e-04, 1.4833e-04, 1.5618e-04, 8.7985e-05, 1.0980e-04,\n 1.4764e-04, 1.4852e-04, 1.1440e-04, 1.0592e-04, 1.1693e-04, 1.3024e-04,\n 8.9252e-05, 1.0267e-04, 1.6947e-04, 1.6524e-04, 9.6238e-05, 1.4641e-04,\n 9.1975e-05, 1.0253e-04, 1.5971e-04, 1.3633e-04, 1.3212e-04, 1.1775e-04,\n 1.3177e-04, 1.0693e-04, 1.6692e-04, 9.7020e-05, 8.1375e-05, 1.4515e-04,\n 1.3687e-04, 1.2320e-04, 7.1143e-05, 1.7585e-04, 1.2897e-04, 1.1003e-04,\n 1.3032e-04, 1.6322e-04, 1.5538e-04, 1.0633e-04, 1.0261e-04, 1.3227e-04,\n 2.0543e-04, 1.3641e-04, 1.0836e-04, 1.2079e-04, 1.1135e-04, 1.2448e-04,\n 1.1059e-04, 1.5372e-04, 1.2465e-04, 1.7118e-04, 1.1418e-04, 1.1986e-04,\n 1.2203e-04, 1.1075e-04, 1.7888e-04, 9.6197e-05, 1.0966e-04, 1.4500e-04,\n 1.5050e-04, 1.7626e-04, 1.8353e-04, 1.3376e-04, 1.2816e-04, 1.1304e-04,\n 1.5357e-04, 1.3896e-04, 7.4242e-05, 1.5330e-04, 1.3700e-04, 1.1613e-04,\n 1.3571e-04, 1.0825e-04, 1.4356e-04, 1.1213e-04, 1.1244e-04, 9.7952e-05,\n 1.0738e-04, 1.5700e-04, 1.3195e-04, 1.4169e-04, 1.4724e-04, 1.4128e-04,\n 9.5404e-05, 1.2025e-04, 8.2415e-05, 1.1254e-04, 1.2740e-04, 1.0838e-04,\n 1.3855e-04, 1.1065e-04, 1.6353e-04, 9.3105e-05, 1.0561e-04, 1.6762e-04,\n 1.5442e-04, 1.4706e-04, 1.2046e-04, 1.0306e-04, 1.7153e-04, 1.2674e-04,\n 1.5259e-04, 1.1602e-04, 1.1173e-04, 1.2734e-04, 1.5921e-04, 1.2237e-04,\n 9.0331e-05, 8.0974e-05, 1.5653e-04, 1.1476e-04, 9.9305e-05, 1.4693e-04,\n 1.2349e-04, 1.8428e-04, 1.0509e-04, 1.1553e-04, 1.3850e-04, 1.2322e-04,\n 1.2249e-04, 9.8280e-05, 8.1191e-05, 1.2940e-04, 1.4233e-04, 1.3254e-04,\n 1.1581e-04, 1.5633e-04, 1.2930e-04, 1.0156e-04, 1.5866e-04, 1.0059e-04,\n 1.1303e-04, 1.5857e-04, 1.0051e-04, 1.2645e-04, 1.7231e-04, 1.2677e-04,\n 1.1506e-04, 1.4674e-04, 9.2782e-05, 1.6464e-04, 2.2722e-04, 1.8032e-04,\n 1.2945e-04, 1.4301e-04, 1.1891e-04, 1.2504e-04, 1.4891e-04, 1.3226e-04,\n 1.7480e-04, 1.6026e-04, 1.0607e-04, 1.5283e-04, 1.2498e-04, 1.3170e-04,\n 1.5455e-04, 1.5796e-04, 1.1196e-04, 9.9589e-05, 1.7299e-04, 1.0742e-04,\n 1.5451e-04, 1.5797e-04, 2.0861e-04, 1.1231e-04, 1.3532e-04, 1.4196e-04,\n 1.4649e-04, 1.2627e-04, 1.8106e-04, 1.2614e-04, 1.4002e-04, 1.3265e-04,\n 1.5130e-04, 1.7927e-04, 1.6413e-04, 1.1504e-04, 1.3100e-04, 1.3806e-04,\n 1.3435e-04, 6.8334e-05, 1.3032e-04, 1.3058e-04, 1.4928e-04, 1.3047e-04,\n 1.4413e-04, 1.1063e-04, 2.1002e-04, 1.4356e-04, 1.2790e-04, 1.3415e-04,\n 1.5990e-04, 1.5012e-04, 1.0487e-04, 1.3253e-04, 1.6729e-04, 1.2034e-04,\n 1.2220e-04, 1.4586e-04, 1.6145e-04, 1.0426e-04, 1.4367e-04, 1.5817e-04,\n 1.2183e-04, 1.4874e-04, 1.0434e-04, 1.7888e-04, 1.7226e-04, 1.3433e-04,\n 1.5682e-04, 1.4969e-04, 1.5082e-04, 1.5190e-04, 1.4344e-04, 1.0546e-04,\n 8.3370e-05, 1.2035e-04, 1.2069e-04, 1.5340e-04, 1.0962e-04, 1.3961e-04,\n 1.0516e-04, 9.0014e-05, 1.3545e-04, 1.1135e-04, 1.3544e-04, 7.7433e-05,\n 1.6131e-04, 1.1612e-04, 1.3155e-11, 1.0779e-04, 7.9550e-05, 1.6305e-12,\n 1.0241e-04, 1.2634e-04, 1.3064e-04, 1.6470e-04, 1.1179e-04, 1.1722e-04,\n 1.0335e-04, 1.3703e-04, 9.9412e-05, 1.0514e-04, 9.4243e-05, 1.2143e-04,\n 1.3556e-04, 1.0469e-04, 1.2941e-04, 1.5977e-04, 1.6014e-04, 1.7198e-04,\n 1.3958e-04, 1.4022e-04, 1.0847e-04, 1.5220e-04, 1.4102e-04, 1.1174e-04,\n 1.2346e-04, 1.4510e-04, 1.3447e-04, 1.1973e-04, 1.3007e-04, 8.3223e-05,\n 7.4501e-05, 1.4623e-04, 9.4624e-05, 9.7463e-05, 1.5613e-04, 1.5734e-04,\n 1.2404e-04, 1.2258e-04, 1.4026e-04, 1.2699e-04, 1.2241e-04, 1.0102e-04,\n 1.0406e-04, 1.8867e-04, 1.0018e-04, 1.6032e-04, 1.1271e-04, 1.1318e-04,\n 1.4615e-04, 1.1228e-04, 1.2054e-04, 1.4284e-04, 1.4025e-04, 1.3528e-04,\n 1.6239e-04, 1.2984e-04, 1.3916e-04, 1.1019e-04, 1.0091e-04, 1.2812e-04,\n 1.0081e-04, 9.3921e-05, 1.2433e-04, 1.7805e-04, 1.5837e-04, 1.2741e-04,\n 1.1040e-04, 1.1018e-04, 8.2960e-05, 1.8514e-04, 1.3843e-04, 1.3651e-04,\n 1.4743e-04, 1.2152e-04, 1.0826e-04, 1.1324e-04, 1.6604e-04, 1.6579e-04,\n 1.4663e-04, 7.8856e-05, 1.1579e-04, 1.1423e-04, 1.3799e-04, 1.2075e-04,\n 7.9649e-05, 1.3988e-04, 9.3607e-05, 1.3033e-04, 1.5211e-04, 1.2213e-04,\n 1.2953e-04, 9.4128e-05, 1.6500e-04, 1.3326e-04, 1.3066e-04, 1.5590e-04,\n 1.0805e-04, 1.1064e-04, 1.2168e-04, 1.5114e-04, 1.3567e-04, 1.1552e-04,\n 1.2766e-04, 1.3503e-04, 1.1382e-04, 1.3121e-04, 2.3324e-04, 1.0930e-04,\n 1.7915e-04, 1.7870e-04, 1.5135e-04, 1.7638e-04, 1.2001e-04, 1.5721e-04,\n 1.5670e-04, 8.7976e-05, 1.5670e-04, 1.3649e-04, 1.0738e-04, 1.0172e-04,\n 1.7143e-04, 1.6905e-04, 1.3027e-04, 1.1191e-04, 8.8815e-05, 1.2190e-04,\n 1.2992e-04, 1.7495e-04, 1.2172e-04, 1.6993e-04, 1.0162e-04, 1.7343e-04,\n 1.0805e-04, 1.3068e-04, 1.4633e-04, 1.5192e-04, 1.3156e-04, 1.4548e-04,\n 1.2512e-04, 6.9926e-05, 1.4525e-04, 1.3193e-04, 1.0549e-04, 1.1084e-04,\n 1.0095e-04, 1.4056e-04, 1.5886e-04, 1.7566e-04, 2.7370e-04, 1.5281e-04,\n 1.1088e-04, 1.2249e-04, 1.5003e-04, 1.5537e-04, 1.2041e-04, 2.1373e-04,\n 1.4307e-04, 1.2595e-04, 1.6096e-04, 1.4984e-04, 1.1680e-04, 1.4086e-04,\n 1.0885e-04, 1.4280e-04, 1.3810e-04, 1.3811e-04, 1.6897e-04, 1.1781e-04,\n 1.5041e-04, 1.5325e-04, 1.0765e-04, 1.4480e-04, 1.4178e-04, 8.9309e-05,\n 1.5058e-04, 1.4266e-04, 1.4943e-04, 9.3698e-05, 1.1444e-04, 1.3229e-04,\n 1.7948e-04, 1.4482e-04, 1.8772e-04, 1.4660e-04, 1.0734e-04, 1.3124e-04],\n device='cuda:0')"
69
+ },
70
+ "13": {
71
+ "step": "tensor(10016.)",
72
+ "exp_avg": "tensor([[-6.1363e-09, 7.2334e-08, 5.0091e-28, ..., 5.6052e-45,\n 5.6052e-45, -3.9710e-10],\n [ 1.4285e-08, -2.5341e-08, 1.9502e-06, ..., -2.3595e-05,\n 5.8889e-10, 2.2324e-08],\n [ 3.3169e-05, 9.8152e-07, 2.0405e-12, ..., 6.6912e-07,\n 3.2649e-06, 5.9885e-06],\n ...,\n [-2.2920e-08, -2.1741e-06, 2.4555e-08, ..., 1.6459e-10,\n 4.4120e-06, 4.7652e-10],\n [-3.5793e-05, 6.6166e-05, 2.2962e-05, ..., 8.3499e-05,\n 1.1753e-05, 3.4325e-06],\n [-5.6426e-06, -9.2314e-07, -2.7210e-06, ..., 4.2888e-06,\n 1.4630e-07, 1.6101e-06]], device='cuda:0')",
73
+ "exp_avg_sq": "tensor([[9.0861e-11, 7.7071e-10, 7.1414e-14, ..., 4.2333e-15, 1.6903e-14,\n 1.1651e-13],\n [2.7192e-09, 2.7834e-09, 1.7541e-09, ..., 7.6215e-09, 1.3847e-11,\n 5.2911e-11],\n [2.5365e-08, 1.3067e-09, 7.6523e-11, ..., 5.6182e-09, 2.8468e-09,\n 1.0987e-08],\n ...,\n [8.8755e-12, 2.3799e-10, 7.3178e-11, ..., 3.5475e-12, 7.6091e-10,\n 3.7085e-12],\n [7.3470e-08, 1.1025e-07, 3.5199e-09, ..., 9.4816e-08, 2.9809e-08,\n 5.5446e-08],\n [8.3996e-10, 1.3226e-09, 4.3223e-09, ..., 1.6295e-09, 5.8887e-11,\n 3.7161e-09]], device='cuda:0')"
74
+ },
75
+ "14": {
76
+ "step": "tensor(10016.)",
77
+ "exp_avg": "tensor([ 8.4053e-07, -2.8375e-04, 1.1672e-03, ..., 2.6282e-05,\n -9.0570e-04, 2.2487e-05], device='cuda:0')",
78
+ "exp_avg_sq": "tensor([4.4870e-08, 6.9875e-06, 1.7259e-05, ..., 8.4712e-08, 3.3131e-05,\n 4.4143e-06], device='cuda:0')"
79
+ },
80
+ "15": {
81
+ "step": "tensor(10016.)",
82
+ "exp_avg": "tensor([[-1.6305e-08, -8.7997e-08, -4.7132e-07, ..., 1.6174e-08,\n 3.4971e-05, -1.8760e-07],\n [ 4.6036e-08, -1.7214e-06, -5.5308e-07, ..., 1.4994e-07,\n -2.6305e-05, 1.9461e-07],\n [-9.9347e-08, -1.5296e-06, -4.0179e-07, ..., -2.7504e-07,\n -1.3182e-05, 1.3336e-07],\n ...,\n [-6.9925e-08, -2.8702e-06, 2.4486e-06, ..., -5.0443e-07,\n -2.3990e-05, 9.4248e-07],\n [-8.0443e-08, -1.1736e-06, 7.3177e-07, ..., -3.0056e-07,\n -2.9521e-05, 7.3204e-07],\n [ 2.9789e-08, 2.1474e-08, 4.9540e-06, ..., 9.6830e-07,\n -1.0228e-05, 2.6195e-07]], device='cuda:0')",
83
+ "exp_avg_sq": "tensor([[3.5256e-12, 9.4131e-12, 1.1960e-10, ..., 5.2038e-12, 2.2162e-09,\n 9.9672e-11],\n [2.6280e-11, 3.3654e-11, 3.1086e-10, ..., 3.8603e-12, 2.6851e-09,\n 1.5992e-10],\n [1.0710e-10, 6.5061e-11, 1.8882e-10, ..., 5.2287e-12, 3.0363e-09,\n 2.9037e-10],\n ...,\n [6.1061e-11, 5.4176e-11, 1.3091e-10, ..., 6.9695e-12, 2.5777e-09,\n 3.5822e-10],\n [7.2047e-11, 3.6911e-11, 3.1608e-10, ..., 5.8995e-12, 2.1753e-09,\n 1.9655e-10],\n [2.6783e-11, 4.2062e-11, 4.4769e-10, ..., 6.6988e-12, 2.6769e-09,\n 1.9122e-10]], device='cuda:0')"
84
+ },
85
+ "16": {
86
+ "step": "tensor(10016.)",
87
+ "exp_avg": "tensor([[ 8.2380e-05, 6.5084e-05, 1.0311e-04, ..., 1.2849e-04,\n 1.3057e-04, -9.2503e-05],\n [ 3.5889e-05, 8.4844e-05, 4.7675e-06, ..., 3.5988e-05,\n 4.6114e-05, -7.4570e-05],\n [ 5.1255e-05, -2.4299e-05, -1.4370e-04, ..., -7.8470e-05,\n -5.1998e-07, 4.9369e-05],\n ...,\n [-8.2436e-06, -4.2404e-05, -1.6660e-05, ..., -5.1326e-05,\n -9.2931e-05, -7.9902e-05],\n [-2.8541e-05, -8.2306e-06, 5.8641e-05, ..., 4.1260e-05,\n -3.3258e-05, 5.0079e-05],\n [-9.0685e-05, 9.7723e-06, 6.3406e-05, ..., 9.6014e-05,\n 3.4638e-05, -1.0201e-04]], device='cuda:0')",
88
+ "exp_avg_sq": "tensor([[3.1140e-08, 7.2478e-08, 7.2476e-08, ..., 4.7893e-08, 3.3987e-08,\n 5.4271e-08],\n [3.0832e-08, 6.2692e-08, 4.2618e-08, ..., 5.7801e-08, 2.8759e-08,\n 5.0313e-08],\n [2.1503e-08, 4.2472e-08, 4.0952e-08, ..., 3.3765e-08, 2.5986e-08,\n 2.8429e-08],\n ...,\n [2.6376e-08, 6.3698e-08, 4.8551e-08, ..., 5.5968e-08, 3.1420e-08,\n 5.8933e-08],\n [3.0958e-08, 6.3025e-08, 6.8669e-08, ..., 5.3008e-08, 2.9016e-08,\n 5.0839e-08],\n [3.1416e-08, 5.6747e-08, 7.8726e-08, ..., 4.8764e-08, 2.9079e-08,\n 5.5736e-08]], device='cuda:0')"
89
+ },
90
+ "17": {
91
+ "step": "tensor(10016.)",
92
+ "exp_avg": "tensor([-0.0011, 0.0009, -0.0004, ..., 0.0021, 0.0003, -0.0049],\n device='cuda:0')",
93
+ "exp_avg_sq": "tensor([5.2263e-05, 4.8068e-05, 3.2749e-05, ..., 5.2545e-05, 4.7678e-05,\n 4.7230e-05], device='cuda:0')"
94
+ }
95
+ },
96
+ "param_groups": [
97
+ {
98
+ "lr": 9.639601130971382e-05,
99
+ "name": "scale_256",
100
+ "betas": [
101
+ 0.9,
102
+ 0.999
103
+ ],
104
+ "eps": 1e-08,
105
+ "weight_decay": 1e-05,
106
+ "amsgrad": false,
107
+ "maximize": false,
108
+ "foreach": null,
109
+ "capturable": false,
110
+ "differentiable": false,
111
+ "fused": null,
112
+ "decoupled_weight_decay": true,
113
+ "initial_lr": 0.001,
114
+ "params": [
115
+ 0,
116
+ 1,
117
+ 2
118
+ ]
119
+ },
120
+ {
121
+ "lr": 9.639601130971382e-05,
122
+ "name": "scale_512",
123
+ "betas": [
124
+ 0.9,
125
+ 0.999
126
+ ],
127
+ "eps": 1e-08,
128
+ "weight_decay": 1e-05,
129
+ "amsgrad": false,
130
+ "maximize": false,
131
+ "foreach": null,
132
+ "capturable": false,
133
+ "differentiable": false,
134
+ "fused": null,
135
+ "decoupled_weight_decay": true,
136
+ "initial_lr": 0.001,
137
+ "params": [
138
+ 3,
139
+ 4,
140
+ 5,
141
+ 6,
142
+ 7
143
+ ]
144
+ },
145
+ {
146
+ "lr": 9.639601130971382e-05,
147
+ "name": "scale_768",
148
+ "betas": [
149
+ 0.9,
150
+ 0.999
151
+ ],
152
+ "eps": 1e-08,
153
+ "weight_decay": 1e-05,
154
+ "amsgrad": false,
155
+ "maximize": false,
156
+ "foreach": null,
157
+ "capturable": false,
158
+ "differentiable": false,
159
+ "fused": null,
160
+ "decoupled_weight_decay": true,
161
+ "initial_lr": 0.001,
162
+ "params": [
163
+ 8,
164
+ 9,
165
+ 10,
166
+ 11,
167
+ 12
168
+ ]
169
+ },
170
+ {
171
+ "lr": 9.639601130971382e-05,
172
+ "name": "scale_1024",
173
+ "betas": [
174
+ 0.9,
175
+ 0.999
176
+ ],
177
+ "eps": 1e-08,
178
+ "weight_decay": 1e-05,
179
+ "amsgrad": false,
180
+ "maximize": false,
181
+ "foreach": null,
182
+ "capturable": false,
183
+ "differentiable": false,
184
+ "fused": null,
185
+ "decoupled_weight_decay": true,
186
+ "initial_lr": 0.001,
187
+ "params": [
188
+ 13,
189
+ 14,
190
+ 15,
191
+ 16,
192
+ 17
193
+ ]
194
+ }
195
+ ]
196
+ },
197
+ "scheduler_state_dict": {
198
+ "T_0": 10,
199
+ "T_i": 10,
200
+ "T_mult": 2,
201
+ "eta_min": 1e-06,
202
+ "T_cur": 8,
203
+ "base_lrs": [
204
+ 0.001,
205
+ 0.001,
206
+ 0.001,
207
+ 0.001
208
+ ],
209
+ "last_epoch": 8,
210
+ "_step_count": 0,
211
+ "_is_initial": false,
212
+ "_get_lr_called_within_step": false,
213
+ "_last_lr": [
214
+ 9.639601130971382e-05,
215
+ 9.639601130971382e-05,
216
+ 9.639601130971382e-05,
217
+ 9.639601130971382e-05
218
+ ]
219
+ },
220
+ "metrics": {
221
+ "best_val_acc": 76.812,
222
+ "best_epoch": 7,
223
+ "scale_accuracies": {
224
+ "256": 70.048,
225
+ "512": 74.262,
226
+ "768": 75.674,
227
+ "1024": 75.564
228
+ },
229
+ "training_history": {
230
+ "epochs": [
231
+ 1,
232
+ 2,
233
+ 3,
234
+ 4,
235
+ 5,
236
+ 6,
237
+ 7,
238
+ 8
239
+ ],
240
+ "train_loss": [
241
+ 3.9118613697850284,
242
+ 2.66607952194092,
243
+ 2.3952484759278954,
244
+ 2.201966982775222,
245
+ 2.026744091663117,
246
+ 1.8584000322575005,
247
+ 1.6992347222357131,
248
+ 1.5605441822221104
249
+ ],
250
+ "train_acc": [
251
+ 68.33870994179526,
252
+ 76.85976925724749,
253
+ 79.21098498478341,
254
+ 81.16849715923061,
255
+ 83.13014618703104,
256
+ 85.12582668769957,
257
+ 87.11042354353492,
258
+ 88.98340341266986
259
+ ],
260
+ "val_acc": [
261
+ 72.328,
262
+ 74.248,
263
+ 74.928,
264
+ 75.464,
265
+ 75.994,
266
+ 76.29,
267
+ 76.452,
268
+ 76.812
269
+ ],
270
+ "scale_accs": {
271
+ "256": [
272
+ 65.922,
273
+ 67.866,
274
+ 68.668,
275
+ 69.028,
276
+ 69.476,
277
+ 69.894,
278
+ 70.05,
279
+ 70.048
280
+ ],
281
+ "512": [
282
+ 70.014,
283
+ 71.776,
284
+ 72.65,
285
+ 72.974,
286
+ 73.372,
287
+ 73.71,
288
+ 73.994,
289
+ 74.262
290
+ ],
291
+ "768": [
292
+ 71.312,
293
+ 73.326,
294
+ 74.046,
295
+ 74.52,
296
+ 74.848,
297
+ 75.304,
298
+ 75.51,
299
+ 75.674
300
+ ],
301
+ "1024": [
302
+ 71.288,
303
+ 73.572,
304
+ 74.36,
305
+ 74.86,
306
+ 75.24,
307
+ 75.4,
308
+ 75.462,
309
+ 75.564
310
+ ]
311
+ },
312
+ "lr": [
313
+ 0.0009755527298894294,
314
+ 0.0009046039886902864,
315
+ 0.0007940987335200904,
316
+ 0.0006548539886902864,
317
+ 0.0005005000000000001,
318
+ 0.0003461460113097139,
319
+ 0.00020690126647990973,
320
+ 9.639601130971382e-05
321
+ ]
322
+ }
323
+ },
324
+ "train_config": {
325
+ "name": "david_training",
326
+ "run_id": "20251012_145649",
327
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
328
+ "model_variant": "clip_vit_laion_b32",
329
+ "num_classes": 1000,
330
+ "preset": "hierarchical_refinement",
331
+ "custom_config_path": null,
332
+ "num_classes_override": null,
333
+ "use_belly_override": null,
334
+ "belly_expand_override": null,
335
+ "progressive_training_override": false,
336
+ "scale_warmup_epochs_override": null,
337
+ "num_epochs": 10,
338
+ "batch_size": 1024,
339
+ "learning_rate": 0.001,
340
+ "weight_decay": 1e-05,
341
+ "warmup_epochs": 3,
342
+ "use_rose_loss": true,
343
+ "rose_initial_weight": 0.1,
344
+ "rose_max_weight": 0.5,
345
+ "rose_weight_schedule": "adaptive",
346
+ "use_cayley_loss": false,
347
+ "cayley_weight": 0.001,
348
+ "scale_loss_balance": null,
349
+ "use_mixed_precision": true,
350
+ "gradient_clip": 10.0,
351
+ "scheduler_type": "cosine_restarts",
352
+ "min_lr": 1e-06,
353
+ "freeze_strategy": "never",
354
+ "freeze_threshold": 90.0,
355
+ "unfreeze_on_plateau": true,
356
+ "patience": 10,
357
+ "track_gradients": true,
358
+ "gradient_scale_threshold": 1e-05,
359
+ "gradient_scale_multiplier": 10.0,
360
+ "log_interval": 50,
361
+ "val_interval": 1,
362
+ "save_interval": 5,
363
+ "log_fusion_weights": true,
364
+ "log_loss_components": true,
365
+ "save_format": "safetensors",
366
+ "hf_repo": "AbstractPhil/gated-david",
367
+ "upload_to_hub": true,
368
+ "base_dir": "./david_training",
369
+ "num_workers": 10,
370
+ "pin_memory": true,
371
+ "prefetch_factor": 4,
372
+ "persistent_workers": true
373
+ }
374
+ }