AbstractPhil commited on
Commit
ddcd241
·
verified ·
1 Parent(s): 6f0d25f

Update best_model_acc66.28_metadata.json - Run 20251012_231445

Browse files
weights/David-fully_shared-weighted_sum/20251012_231445/best_model_acc66.28_metadata.json ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(26278.)",
7
+ "exp_avg": "tensor([[-4.2216e-05, 3.4922e-05, 8.3355e-05, ..., 7.5792e-05,\n 6.2477e-05, -5.3563e-05],\n [ 3.5335e-05, -3.4181e-06, -5.3904e-06, ..., -4.2810e-05,\n -4.7551e-05, 4.4824e-05],\n [-7.9505e-05, -4.7338e-05, 9.4380e-05, ..., -2.9315e-05,\n -5.1078e-05, -5.6049e-05],\n ...,\n [-5.5358e-05, 8.0772e-05, -6.9350e-07, ..., 8.4547e-05,\n -1.7186e-05, -1.2147e-05],\n [ 3.3603e-05, 2.9259e-05, 1.8986e-05, ..., -4.3070e-05,\n -1.3605e-05, 2.6267e-05],\n [ 1.7970e-06, -3.4303e-05, -4.7899e-06, ..., -3.1704e-05,\n 3.8658e-06, 6.8777e-06]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.8038e-08, 4.3914e-08, 2.6747e-08, ..., 3.1234e-08, 2.3964e-08,\n 1.3040e-08],\n [6.3412e-08, 1.0811e-07, 4.5658e-08, ..., 4.4146e-08, 5.6617e-08,\n 2.2169e-08],\n [1.1763e-07, 1.4681e-07, 8.2479e-08, ..., 6.4800e-08, 3.2432e-08,\n 5.2573e-08],\n ...,\n [2.8928e-08, 1.1915e-07, 2.8192e-08, ..., 2.1913e-08, 1.7083e-08,\n 1.3223e-08],\n [3.2816e-08, 4.2625e-07, 3.9864e-08, ..., 8.2254e-08, 2.0110e-08,\n 2.6708e-08],\n [4.9195e-08, 7.1388e-08, 2.3162e-08, ..., 7.9428e-08, 2.3354e-08,\n 2.5236e-08]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(26278.)",
12
+ "exp_avg": "tensor([-1.3888e-03, -5.6880e-04, -1.2415e-03, -1.3169e-03, -1.8888e-03,\n -2.5116e-03, 2.2600e-03, 1.1729e-04, 3.0192e-04, -1.0268e-03,\n -2.5420e-03, -7.5817e-04, 1.6319e-03, -1.2207e-04, 8.8981e-04,\n 1.2477e-03, -1.4871e-03, -1.9442e-03, -6.5036e-04, -9.6239e-04,\n -1.1192e-04, -5.3665e-03, -1.2331e-03, -7.8050e-04, 8.4861e-03,\n -1.1841e-03, -2.3269e-04, 5.1067e-04, -6.7550e-04, -3.9477e-04,\n 5.9995e-04, 2.1062e-03, -3.2214e-04, -1.7706e-03, -1.9941e-03,\n -9.7955e-04, 1.1581e-03, -8.9375e-04, 2.9212e-04, -1.6081e-02,\n 9.2150e-04, 2.5287e-04, 4.5036e-04, -3.6185e-04, -7.8464e-04,\n -1.8190e-03, 8.1539e-04, 2.2005e-04, -4.0962e-04, -3.1752e-04,\n -7.3477e-04, 8.6829e-05, 2.4202e-03, 1.3109e-03, -7.4705e-05,\n -5.4430e-04, 5.7968e-04, 2.5899e-04, 2.3642e-03, -1.0505e-04,\n -1.5449e-03, 2.0755e-03, -2.4338e-03, -1.9195e-03, 5.4167e-03,\n -5.4906e-04, 9.7928e-04, 2.6662e-03, 1.5839e-03, 2.2064e-03,\n 1.4575e-03, -2.9873e-03, 9.6783e-04, -8.0904e-04, 1.3644e-03,\n -5.0700e-04, 7.9870e-04, -2.6040e-04, -1.0394e-03, -1.5546e-03,\n -2.3580e-04, -1.7364e-03, -1.8364e-04, -9.3512e-05, 2.1481e-03,\n -2.0137e-03, 4.0252e-03, -1.4313e-03, 2.3957e-03, -2.7125e-03,\n -4.8477e-04, 4.6032e-04, 1.7446e-03, 1.1739e-04, -3.4518e-05,\n 2.8543e-03, -1.0236e-03, -1.7234e-03, -1.3706e-03, -7.5572e-04,\n -4.2323e-03, 2.0494e-03, -1.3405e-03, -1.5640e-03, -3.0688e-04,\n 6.1738e-04, 2.7807e-03, -3.8826e-04, -1.2628e-03, 5.8456e-04,\n -3.5127e-03, 4.6013e-04, 5.3699e-05, -1.0542e-03, -4.2372e-04,\n -4.9782e-05, -6.5152e-04, 3.1293e-04, 9.6928e-04, 8.5472e-04,\n 1.8682e-03, 1.2648e-03, -1.5712e-03, 3.2093e-05, 5.9614e-03,\n -2.3575e-03, -4.1555e-03, -3.0617e-03, -1.1577e-03, -3.4662e-03,\n 2.6891e-03, 2.2359e-03, -2.1832e-04, -2.4807e-03, -1.7449e-03,\n 4.5304e-04, -1.3870e-03, 2.4175e-03, 8.2632e-04, 2.0893e-03,\n 9.4513e-04, 8.9054e-04, -8.3137e-04, -3.0582e-03, 9.5492e-04,\n -2.3813e-03, 1.1287e-03, 1.0036e-03, -2.7870e-03, -1.9251e-04,\n 1.4138e-03, 9.6310e-04, -2.2753e-03, 2.7306e-03, -1.3102e-03,\n 1.4901e-03, 1.4248e-03, 1.4913e-03, -1.9330e-03, 1.4421e-04,\n -1.2771e-03, -9.2513e-04, -1.7196e-04, 2.6978e-03, 2.3177e-03,\n 1.1998e-03, -1.5075e-03, -2.9733e-03, 2.2228e-03, 2.5477e-04,\n 4.0001e-04, 3.4008e-04, 7.6096e-04, 1.5574e-03, -1.2435e-03,\n 1.5572e-04, -9.7058e-04, 1.0084e-03, -7.8106e-04, -1.7489e-03,\n -1.2789e-03, -2.8883e-03, 3.1670e-04, -1.0174e-04, 1.3763e-04,\n 8.0176e-04, -2.0556e-03, 4.0086e-04, -9.9699e-04, -1.9344e-04,\n 7.5280e-04, 2.1200e-04, -4.6447e-05, 7.9122e-04, -6.8457e-05,\n 1.3430e-03, 1.1493e-03, 6.6260e-04, -6.7038e-04, -1.6914e-03,\n 2.0324e-03, -8.8149e-04, 2.5735e-03, 9.0090e-04, -1.4163e-03,\n 3.7056e-03, -8.0786e-04, -2.7635e-03, 7.5176e-04, 1.5344e-04,\n 3.3121e-03, -2.4642e-03, 1.3641e-03, 8.1173e-05, -2.7079e-03,\n 4.8002e-05, 1.2557e-03, -2.2061e-04, 5.5944e-04, 1.0636e-03,\n 2.2235e-03, -9.7783e-04, 1.6721e-03, -1.3247e-03, -9.8325e-04,\n 1.9227e-03, -2.7985e-04, 9.7375e-04, -2.8574e-03, -1.5581e-04,\n -4.4213e-04, -1.2472e-03, 4.7881e-04, 2.1159e-03, 1.2646e-03,\n 1.4305e-03, 1.6034e-03, -2.1044e-04, 1.6532e-03, -8.3210e-05,\n -2.5337e-03, 5.8369e-04, 4.5289e-04, 2.1324e-04, 1.3495e-04,\n -2.6571e-04, 1.8268e-03, -2.3780e-03, -8.9822e-04, -1.2034e-04,\n 1.2219e-03, -3.5128e-04, 4.1022e-04, 3.0148e-04, 6.5538e-04,\n -2.9249e-04, 6.1527e-04, 9.5238e-04, 1.7887e-03, 1.5062e-03,\n 5.1468e-04, -1.0205e-03, -1.0966e-03, -1.0599e-03, 6.1341e-04,\n 7.1796e-04, 6.7780e-04, -5.0463e-04, -1.5578e-03, 2.6250e-04,\n -1.7104e-03, 1.4355e-03, -1.1479e-03, 2.8673e-03, -1.0132e-03,\n 1.7573e-03, -2.0134e-03, 1.5299e-04, 3.2605e-04, 2.0744e-04,\n -2.6707e-06, 1.6347e-03, -2.7504e-03, -5.6443e-04, 5.6493e-04,\n 2.5473e-03, 1.6122e-03, -5.6844e-04, 3.3141e-04, 1.2209e-03,\n 2.0318e-03, 1.0292e-03, -1.0524e-03, -7.7816e-04, 8.1033e-04,\n 8.4288e-04, -9.3228e-04, -4.8853e-03, -1.0282e-03, 2.4941e-03,\n -3.5814e-03, -7.0330e-04, 1.5867e-04, 4.0904e-04, -3.2311e-03,\n 1.5977e-05, 1.7731e-03, -2.6617e-03, 2.2569e-03, 1.6957e-03,\n -7.2659e-04, -2.3706e-03, 1.5407e-03, 1.6370e-03, 2.6946e-03,\n 2.7495e-03, 5.6023e-05, 5.7331e-04, 1.6799e-03, -1.8988e-03,\n 6.1233e-04, -1.1218e-03, 9.5203e-04, -3.1018e-03, -6.6408e-04,\n -1.5771e-03, -1.1622e-03, 1.4674e-03, -2.2233e-03, -8.3787e-04,\n -8.5131e-04, -1.8343e-03, 3.0249e-04, 6.0798e-04, -4.9788e-03,\n -1.3963e-03, 8.8780e-04, 1.5585e-03, 3.4339e-04, -2.0102e-03,\n 1.2955e-03, -3.4752e-04, -8.4576e-04, -6.5764e-04, -3.7048e-04,\n -5.4150e-03, 1.2272e-03, 1.7658e-04, -1.0582e-03, -4.4864e-04,\n -1.5124e-03, -2.5756e-03, -2.4471e-05, 3.0724e-03, -4.7003e-03,\n -3.5901e-04, 4.5548e-04, 1.0946e-03, 9.0026e-04, -5.2630e-04,\n 8.5836e-04, 2.2674e-03, -5.3041e-04, 2.2073e-04, 1.1415e-03,\n 8.0641e-04, 5.7124e-05, 2.1074e-03, 2.2170e-03, 4.0805e-04,\n 2.0511e-04, 5.6080e-04, 4.9394e-04, 1.9112e-04, 1.0646e-03,\n 1.3190e-03, -1.0766e-03, 1.2439e-03, 4.4439e-04, -6.6607e-04,\n 1.2720e-03, 1.8234e-04, 1.7253e-03, -9.1552e-04, 1.9892e-03,\n -2.2406e-04, 1.6749e-05, -9.1716e-04, 1.9955e-03, 4.1655e-04,\n 2.2405e-04, -3.2064e-04, 7.6185e-05, 1.4220e-04, 2.4358e-03,\n -2.1883e-03, 5.5109e-05, -1.0337e-03, 7.1341e-04, -1.3761e-03,\n -4.9977e-04, 3.7004e-04, 2.3965e-05, -2.4580e-03, -2.3494e-03,\n -3.1744e-04, 6.9431e-04, -1.1316e-04, 9.9550e-04, -5.4386e-04,\n 1.0792e-03, 6.6248e-04, 1.4994e-04, 1.2498e-03, 4.5451e-04,\n 8.1382e-04, -6.5691e-04, -1.9972e-03, 1.6865e-03, 2.8759e-03,\n 1.5926e-03, -6.4539e-05, 3.6857e-03, -2.3658e-03, 3.8696e-04,\n 1.3038e-03, 3.2328e-04, 1.5885e-03, 1.5001e-03, -4.9950e-04,\n -2.2573e-03, 4.1324e-03, 3.1424e-04, 1.2741e-03, 4.0108e-04,\n -7.4690e-04, 1.6611e-03, 5.9360e-04, -7.0802e-04, 6.1574e-04,\n 1.0686e-03, -7.3347e-04, -8.4337e-04, -1.4751e-03, -8.2179e-04,\n 1.5002e-03, 7.0973e-04, 1.4834e-03, 3.9059e-05, -1.2406e-03,\n 1.0505e-03, 8.3193e-04, 1.2764e-03, 5.7652e-04, 8.8227e-03,\n -8.4467e-04, 2.1228e-03, 2.7120e-05, -5.5144e-03, 2.3764e-03,\n -2.0778e-03, -2.4859e-03, 3.9159e-04, -3.1530e-04, 5.9321e-04,\n 1.2835e-03, -1.5721e-04, -2.7044e-03, 9.7505e-04, 1.4327e-03,\n -5.5554e-03, 3.6689e-05, 1.1791e-03, -8.2363e-04, -1.3081e-03,\n 3.2948e-03, 5.5926e-04, -1.1492e-03, 1.0517e-03, 1.8967e-03,\n 1.7021e-03, 1.1304e-03, -3.5478e-04, -1.6599e-03, 5.6433e-03,\n -5.6306e-04, -2.1854e-04, 1.0051e-04, 9.0705e-04, 2.3200e-03,\n -2.6069e-03, 4.4063e-04, 1.5239e-03, -1.1990e-03, -1.0552e-03,\n -3.3748e-03, -3.2732e-03, -2.7105e-03, -6.2244e-05, 5.8313e-04,\n 6.4297e-04, 8.8729e-04, 1.4150e-03, -3.4115e-04, -5.3441e-04,\n 1.3850e-03, -1.8424e-03, -3.8112e-04, -6.0457e-04, 1.1113e-03,\n -2.2024e-03, 2.8502e-04], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([2.0021e-05, 4.0456e-05, 4.9054e-05, 4.4441e-05, 2.8911e-05, 6.9555e-05,\n 6.9597e-05, 2.0401e-05, 3.1187e-05, 3.9978e-05, 5.2585e-05, 3.4785e-05,\n 2.6509e-05, 3.4519e-05, 8.6205e-05, 2.5840e-05, 4.5182e-05, 9.4388e-05,\n 1.3528e-05, 3.4398e-05, 2.8037e-05, 4.3349e-05, 6.0013e-05, 3.3931e-05,\n 3.8352e-05, 2.2344e-05, 7.1408e-05, 4.6913e-05, 4.3351e-05, 3.0622e-05,\n 3.5561e-05, 3.8248e-05, 1.8590e-05, 9.6596e-05, 4.9006e-05, 3.4512e-05,\n 2.2623e-05, 5.0992e-05, 5.7674e-05, 1.1363e-04, 3.0205e-05, 5.4852e-05,\n 4.1958e-05, 2.7401e-05, 3.6720e-05, 4.4431e-05, 2.7740e-05, 2.2552e-05,\n 2.2706e-05, 2.4416e-05, 3.2024e-05, 3.1644e-05, 8.4836e-05, 3.7772e-05,\n 5.4110e-05, 3.5240e-05, 3.6476e-05, 2.5812e-05, 3.4274e-05, 3.7629e-05,\n 2.1280e-05, 2.8685e-05, 1.7675e-05, 7.6101e-05, 7.7589e-05, 6.0544e-05,\n 3.8862e-05, 6.2444e-05, 2.9220e-05, 4.6166e-05, 3.1135e-05, 5.5362e-05,\n 3.8884e-05, 8.4980e-05, 3.2056e-05, 4.9096e-05, 5.9514e-05, 2.5235e-05,\n 3.6809e-05, 5.1773e-05, 3.4865e-05, 3.2127e-05, 2.9692e-05, 5.3088e-05,\n 4.3378e-05, 3.8404e-05, 1.1348e-04, 2.2307e-05, 3.5029e-05, 5.1282e-05,\n 3.5254e-05, 2.5300e-05, 7.3183e-05, 1.3467e-04, 4.0245e-05, 5.1703e-05,\n 4.6546e-05, 5.8422e-05, 4.2330e-05, 3.6809e-05, 4.3886e-05, 3.7807e-05,\n 2.2442e-05, 3.3525e-05, 2.6614e-05, 2.7099e-05, 3.4237e-05, 2.1781e-05,\n 4.1803e-05, 1.7146e-05, 8.3580e-05, 1.6735e-05, 5.9605e-05, 3.4145e-05,\n 3.2756e-05, 1.8751e-05, 3.5664e-05, 1.0176e-04, 3.1692e-05, 4.4226e-05,\n 4.1090e-05, 4.8438e-05, 5.9293e-05, 5.2771e-05, 3.6887e-05, 4.9544e-05,\n 4.0322e-05, 6.7635e-05, 5.8588e-05, 4.0787e-05, 4.0028e-05, 9.5082e-05,\n 5.2755e-05, 4.1513e-05, 7.1411e-05, 2.7090e-05, 2.8735e-05, 2.1674e-05,\n 5.7981e-05, 4.1225e-05, 2.8897e-05, 5.8506e-05, 4.3194e-05, 1.0579e-04,\n 3.4002e-05, 5.0653e-05, 5.1071e-05, 3.4605e-05, 8.8173e-05, 4.2502e-05,\n 3.5201e-05, 4.9649e-05, 4.3282e-05, 4.6756e-05, 3.7300e-05, 5.4878e-05,\n 1.9695e-05, 1.8960e-05, 4.9504e-05, 2.3234e-05, 4.8559e-05, 4.9870e-05,\n 5.4048e-05, 3.5054e-05, 4.7828e-05, 2.8294e-05, 4.7475e-05, 3.1507e-05,\n 6.3964e-05, 7.8620e-05, 2.8474e-05, 4.7983e-05, 2.6762e-05, 4.2517e-05,\n 2.9270e-05, 1.6756e-05, 3.4105e-05, 4.5836e-05, 3.3602e-05, 3.6748e-05,\n 4.7357e-05, 5.3043e-05, 1.1759e-04, 3.5481e-05, 1.5690e-05, 5.1413e-05,\n 5.1453e-05, 5.6521e-05, 5.9689e-05, 4.8524e-05, 3.5698e-06, 3.5554e-05,\n 1.4383e-05, 2.0360e-05, 2.6480e-05, 3.7067e-05, 2.8450e-05, 3.3346e-05,\n 3.6697e-05, 2.6943e-05, 4.3645e-05, 3.3527e-05, 3.3526e-05, 2.1222e-05,\n 2.5587e-05, 8.8939e-05, 3.3939e-05, 2.7330e-05, 3.0866e-05, 2.8826e-05,\n 4.0884e-05, 3.8640e-05, 3.3779e-05, 3.8645e-05, 3.5068e-05, 4.4020e-05,\n 4.2869e-05, 3.2843e-05, 3.4720e-05, 3.1002e-05, 4.9970e-05, 7.2151e-05,\n 2.6603e-05, 4.0864e-05, 3.1194e-05, 2.9913e-05, 3.8983e-05, 3.5381e-05,\n 5.6413e-05, 7.0464e-05, 4.6481e-05, 2.8823e-05, 3.2184e-05, 2.8451e-05,\n 2.4385e-05, 3.9025e-05, 1.9798e-05, 3.4981e-05, 4.4592e-05, 4.0688e-05,\n 2.6938e-05, 3.1853e-05, 2.2775e-05, 3.8232e-05, 1.3066e-05, 3.0940e-05,\n 7.0798e-05, 3.5062e-05, 3.1628e-05, 3.9077e-05, 5.6780e-05, 5.3821e-05,\n 2.4373e-05, 4.0142e-05, 3.2865e-05, 2.8904e-05, 3.3242e-05, 3.4105e-05,\n 3.9969e-05, 7.2664e-05, 3.4438e-05, 2.7404e-05, 3.3288e-05, 1.8536e-05,\n 4.4730e-05, 4.5408e-05, 5.7854e-05, 2.6138e-05, 1.8744e-05, 1.7772e-05,\n 2.9795e-05, 3.5460e-05, 2.8467e-05, 5.4621e-05, 2.3229e-05, 4.7187e-05,\n 8.3392e-05, 2.8243e-05, 4.3819e-05, 2.5008e-05, 3.4994e-05, 4.4723e-05,\n 3.3389e-05, 3.4237e-05, 3.6821e-05, 4.3083e-05, 5.8459e-05, 3.9795e-05,\n 3.1741e-06, 6.9701e-05, 4.8783e-05, 2.1746e-05, 4.8196e-05, 5.4628e-05,\n 4.7060e-05, 5.7660e-06, 3.1573e-05, 6.5914e-05, 4.0616e-05, 9.0284e-05,\n 5.3200e-05, 5.2895e-05, 4.1340e-05, 2.8825e-05, 5.7188e-05, 4.3916e-05,\n 2.6481e-05, 2.9233e-05, 4.5844e-05, 2.8956e-05, 4.8159e-05, 5.3382e-05,\n 7.8904e-05, 2.5035e-05, 3.2024e-05, 4.5121e-05, 3.3940e-05, 3.9976e-05,\n 1.5048e-05, 3.4506e-05, 4.7918e-05, 4.8405e-05, 3.1233e-05, 5.1845e-05,\n 5.7056e-05, 7.9709e-05, 3.1251e-05, 4.2516e-05, 1.6919e-05, 3.2851e-05,\n 3.5166e-05, 6.2828e-05, 3.6202e-05, 2.1521e-05, 7.5309e-05, 5.0372e-05,\n 1.0160e-05, 2.8826e-05, 2.9254e-05, 7.1871e-05, 2.3331e-05, 4.6149e-05,\n 4.5109e-05, 2.9583e-05, 4.3595e-05, 4.2562e-05, 3.0316e-05, 3.7056e-05,\n 4.5051e-05, 6.8743e-05, 2.2919e-05, 7.5689e-05, 2.3555e-05, 6.7579e-05,\n 8.2056e-05, 4.1801e-05, 3.6687e-05, 4.5270e-05, 2.6223e-05, 3.2285e-05,\n 7.0790e-05, 3.4470e-05, 2.7596e-05, 1.8045e-05, 2.9720e-05, 3.6744e-05,\n 1.8781e-05, 2.9251e-05, 6.8458e-05, 2.2267e-05, 3.5095e-05, 4.4803e-06,\n 2.5956e-05, 3.6557e-05, 3.9871e-05, 3.7636e-05, 7.5368e-05, 4.7522e-05,\n 4.4333e-05, 3.8990e-05, 5.0140e-05, 5.6964e-05, 3.2908e-05, 2.6745e-05,\n 3.4010e-05, 5.1404e-05, 2.9379e-05, 2.5099e-05, 3.5106e-05, 2.3446e-05,\n 1.9446e-05, 3.1208e-05, 2.9371e-05, 3.2598e-05, 3.4621e-05, 4.9166e-05,\n 3.4530e-05, 4.4773e-05, 2.6328e-05, 6.3873e-05, 3.4004e-05, 3.6393e-05,\n 3.2055e-05, 2.9105e-05, 3.4458e-05, 1.6827e-05, 1.8651e-05, 4.8042e-05,\n 5.2483e-05, 5.9089e-05, 3.0505e-05, 5.7117e-05, 4.6488e-05, 2.7005e-05,\n 3.8502e-05, 4.0873e-05, 5.0613e-05, 4.5154e-05, 4.0699e-05, 3.4097e-05,\n 3.1866e-05, 3.4287e-05, 3.4528e-05, 9.3914e-05, 2.7562e-05, 5.4898e-05,\n 2.6576e-05, 3.3357e-05, 3.7836e-05, 2.3280e-05, 4.3777e-05, 6.8110e-05,\n 4.3585e-05, 4.6163e-05, 3.9144e-05, 4.6842e-05, 4.4859e-05, 5.2112e-05,\n 4.5305e-05, 4.9414e-05, 1.8356e-05, 8.1245e-05, 4.0536e-05, 4.7962e-05,\n 2.4400e-05, 4.0512e-05, 3.7681e-05, 3.0213e-05, 5.1946e-05, 4.3545e-05,\n 8.6178e-05, 5.4133e-05, 2.7898e-05, 4.9340e-05, 1.3658e-04, 3.1842e-05,\n 4.9037e-05, 3.4537e-05, 4.2678e-05, 4.6076e-05, 5.2217e-05, 3.9211e-05,\n 4.0889e-05, 4.0256e-05, 2.0203e-05, 2.4330e-05, 3.5770e-05, 6.4804e-05,\n 2.7031e-05, 6.0207e-05, 5.7677e-05, 1.9589e-05, 2.7768e-05, 3.6496e-05,\n 4.9110e-05, 2.5229e-05, 3.6176e-05, 5.5223e-05, 2.9698e-05, 5.8378e-05,\n 6.0031e-05, 2.6949e-05, 2.1507e-05, 3.4744e-05, 6.3986e-05, 2.8520e-05,\n 2.0359e-05, 3.0388e-05, 4.3216e-05, 3.3462e-05, 3.4439e-05, 4.3800e-05,\n 1.1375e-04, 3.6736e-05, 3.8425e-05, 2.2306e-05, 7.2298e-05, 3.4783e-05,\n 3.9583e-05, 4.5980e-05, 3.3319e-05, 5.8626e-05, 4.2566e-05, 1.7134e-05,\n 5.1180e-05, 4.1547e-05, 4.8038e-05, 2.8517e-05, 1.2042e-04, 2.0190e-05,\n 3.6570e-05, 4.0572e-05], device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(26278.)",
17
+ "exp_avg": "tensor([-7.6897e-03, 1.9002e-04, -3.7919e-03, -1.1010e-03, -4.4799e-03,\n -4.0783e-03, 4.9042e-03, -6.8787e-05, 9.0141e-04, -2.3462e-03,\n -6.4506e-03, -9.2155e-04, 3.8227e-03, -3.1806e-04, 3.9543e-03,\n 4.2391e-03, -3.0693e-03, -3.3535e-03, -2.5829e-03, -2.8740e-03,\n -4.0338e-04, -1.0561e-02, -2.1028e-03, -1.9356e-03, 2.8275e-02,\n -2.5178e-03, -1.9809e-03, 3.1075e-04, -1.2820e-03, -2.0885e-03,\n 2.0361e-03, 5.2958e-03, -9.7456e-04, -5.0198e-03, -2.0802e-03,\n -3.5772e-03, 3.2736e-03, 9.0159e-04, -5.5991e-04, -3.3889e-02,\n 1.5967e-03, 1.1464e-03, -1.0919e-04, -5.5948e-04, -2.2748e-03,\n -4.9087e-03, 2.1780e-03, 9.0738e-04, -7.9725e-04, -9.7702e-04,\n -2.8848e-03, -1.8467e-04, 4.7576e-03, 3.1647e-03, -9.0313e-04,\n -3.4795e-04, 2.2179e-03, 1.3697e-04, 5.8712e-03, -5.9429e-04,\n -3.8632e-03, 3.6814e-03, -8.0052e-03, -3.4080e-03, 1.0508e-02,\n -1.8191e-03, 2.5392e-03, 7.3738e-03, 2.4579e-03, 4.9707e-03,\n 2.4608e-03, -4.7238e-03, 1.1113e-03, -8.5502e-04, 2.8026e-03,\n -1.8994e-03, 3.3191e-03, -1.7317e-03, -2.8038e-03, -4.5204e-03,\n -1.2450e-04, -5.1235e-03, -6.8910e-04, -1.3689e-03, 3.6294e-03,\n -7.0895e-03, 5.8045e-03, -2.9737e-03, 6.4892e-03, -8.2072e-03,\n -8.0150e-04, 3.1849e-04, 5.5366e-03, -9.2619e-05, -4.2826e-04,\n 6.3916e-03, -1.6302e-03, -2.8686e-03, -3.0843e-03, -1.7819e-03,\n -9.8842e-03, 3.2889e-03, -2.8518e-03, -4.2795e-03, -4.4306e-04,\n 6.0393e-04, 4.9586e-03, -1.9567e-03, -9.9892e-04, -5.3295e-04,\n -1.0190e-02, 9.0825e-05, -1.6650e-03, -9.5086e-04, -1.2821e-03,\n -5.2874e-05, -1.0800e-03, -5.6735e-04, 6.7850e-04, 2.2350e-03,\n 3.3230e-03, 2.6119e-03, -3.5875e-03, 1.3837e-04, 1.4850e-02,\n -3.9731e-03, -4.8869e-03, -7.4785e-03, -8.6738e-04, -8.0380e-03,\n 6.2141e-03, 5.2514e-03, -2.0435e-03, -4.5211e-03, -4.5456e-03,\n -1.8783e-05, -2.3254e-03, 5.6319e-03, 1.9955e-03, 5.3915e-03,\n 1.0600e-03, 9.7256e-04, -1.9966e-03, -6.6816e-03, 2.4112e-03,\n -5.6374e-03, 1.7696e-03, 1.1231e-03, -5.9526e-03, -2.8059e-03,\n 3.3600e-03, 7.1248e-04, -3.9101e-03, 3.9061e-03, -4.7933e-03,\n 2.3497e-03, 3.3142e-03, 3.7553e-03, -6.6453e-03, 7.2848e-04,\n -3.0844e-03, -2.4988e-03, -1.6643e-03, 5.0696e-03, 5.4606e-03,\n 3.3199e-03, -4.5242e-03, -5.1049e-03, 4.8543e-03, 1.3846e-03,\n 1.4397e-03, -3.3747e-04, 1.6214e-03, 2.9242e-03, -5.4663e-03,\n -4.2669e-04, -1.1326e-03, 2.4023e-03, -4.3499e-04, -5.0291e-03,\n -3.6815e-03, -6.4841e-03, 2.7877e-04, 2.5906e-04, -4.4925e-04,\n 1.3389e-03, -5.2328e-03, 1.0234e-03, -2.9638e-03, -1.5748e-03,\n -5.6052e-45, 1.4183e-04, -4.2790e-05, 1.6383e-03, 3.0280e-04,\n 1.9135e-03, 2.1457e-03, 3.9090e-04, -6.4591e-04, -3.0680e-03,\n 4.1851e-03, -2.4699e-03, 6.2495e-03, 2.6893e-03, -3.8839e-03,\n 7.0966e-03, -2.4329e-03, -4.4962e-03, 1.2913e-03, 5.4681e-04,\n 8.9995e-03, -8.0950e-03, 3.2605e-03, -1.2821e-03, -1.8154e-03,\n -1.0212e-03, 2.1151e-03, -1.1338e-03, 6.1974e-04, 1.8112e-03,\n 6.9953e-03, -2.8178e-03, 3.2669e-03, -1.9816e-03, -2.1640e-03,\n 3.5516e-03, -6.4110e-04, 1.1903e-03, -8.1805e-03, 1.0460e-03,\n -5.4634e-04, -1.3723e-03, 1.4433e-04, 4.7723e-03, 2.6811e-03,\n 2.8239e-03, 4.1431e-03, -7.1149e-04, 2.3871e-03, -3.4866e-04,\n -5.7471e-03, 1.1618e-03, 1.9419e-04, 5.2411e-06, -3.7259e-04,\n -4.0847e-04, 2.1822e-03, -2.8951e-03, -2.4294e-03, -4.7489e-04,\n 1.2178e-03, 3.9843e-04, 9.7964e-04, 3.4450e-04, 8.1012e-04,\n -3.9275e-04, 1.5628e-03, 3.2135e-03, 3.4853e-03, 3.3704e-03,\n 8.7933e-04, -2.4996e-03, -2.5893e-03, -1.2779e-03, 1.2808e-03,\n 1.6790e-03, 1.9290e-03, -1.1793e-03, -4.2645e-03, 6.3778e-04,\n -2.9346e-03, 3.6725e-03, -3.1880e-03, 6.3794e-03, -3.4341e-03,\n 4.6392e-03, -4.7530e-03, 2.0856e-04, 6.5580e-04, 8.1732e-04,\n -3.9661e-04, 2.9778e-03, -5.4019e-03, 5.4643e-04, 9.5333e-04,\n 7.2239e-03, 3.0801e-03, 1.7399e-04, 5.6052e-45, 2.5370e-03,\n 4.2282e-03, 1.7329e-03, -3.3248e-03, -2.0439e-03, 5.6834e-04,\n 5.6052e-45, -2.1950e-03, -8.2915e-03, -1.2414e-03, 5.5571e-03,\n -1.0467e-02, -1.3700e-03, 3.1299e-04, 6.6520e-04, -5.4837e-03,\n -4.7761e-04, 4.4246e-03, -9.6390e-03, 7.4848e-03, 4.1400e-03,\n -8.8003e-04, -7.6673e-03, 3.5470e-03, 3.2961e-03, 4.1316e-03,\n 6.0465e-03, -5.9705e-04, 5.8381e-04, 2.5858e-03, -5.3097e-03,\n 8.2673e-04, -4.0445e-03, 9.7407e-04, -6.5773e-03, -8.6435e-04,\n -5.6138e-03, -2.3923e-03, 2.2408e-03, -7.6978e-03, -2.1559e-03,\n -1.9360e-03, -2.2504e-03, 1.5798e-03, 1.7191e-03, -1.1967e-02,\n -1.1843e-03, 1.1768e-03, 3.0776e-03, 3.2253e-04, -3.4615e-03,\n 3.1286e-03, -6.6938e-04, -2.1903e-04, -3.4288e-03, 6.2140e-04,\n -1.2623e-02, 2.1201e-03, 6.5232e-04, -2.8161e-03, -1.3075e-03,\n -5.2972e-03, -8.8835e-03, 5.7160e-04, 6.7383e-03, -1.5180e-02,\n -2.2609e-03, 7.0903e-04, 2.8464e-03, 2.4034e-03, -3.2253e-04,\n 9.2077e-04, 2.6394e-03, -7.3484e-04, -4.5022e-04, 2.5028e-03,\n 2.1101e-03, 1.0345e-04, 5.1472e-03, 5.3825e-03, 9.8330e-04,\n -4.4323e-04, 5.6052e-45, 3.8240e-04, -9.5812e-04, 9.4104e-04,\n 2.6047e-03, -1.9735e-03, 1.3085e-03, 1.5765e-03, -1.2980e-03,\n 3.5775e-03, 1.3482e-04, 3.7052e-03, -1.4145e-03, 4.4826e-03,\n -7.7275e-04, 6.1067e-04, -2.5364e-03, 4.5707e-03, 5.6880e-04,\n 7.4673e-04, -7.1530e-04, 6.8858e-04, 1.3302e-03, 7.2082e-03,\n -6.6398e-03, -5.4456e-04, -3.2882e-03, 1.9302e-03, -3.2490e-03,\n -2.4236e-04, 1.0021e-03, 1.1533e-03, -4.9945e-03, -7.6344e-03,\n -6.0463e-04, 1.4308e-03, -7.4228e-04, 1.4440e-03, -2.6348e-03,\n 2.3838e-03, 1.2605e-03, 3.2341e-04, 5.8042e-03, 9.2218e-04,\n 7.0968e-04, -1.1575e-03, -6.3958e-03, 4.9233e-03, 6.3607e-03,\n 4.0359e-03, 2.7628e-04, 1.1330e-02, -4.2468e-03, 1.7179e-03,\n 1.2058e-03, 6.6121e-04, 3.5221e-03, 3.8607e-03, -1.0031e-03,\n -4.7593e-03, 7.6702e-03, -3.3132e-04, 2.2605e-03, 2.0164e-03,\n -3.3043e-03, 4.8487e-03, 6.2175e-05, -1.4954e-03, -3.3648e-05,\n 4.0112e-03, -2.7233e-03, -2.0292e-03, -2.8153e-03, -1.9378e-03,\n 3.1097e-03, 2.7655e-03, 5.0108e-03, 1.0286e-04, -1.5381e-03,\n 2.1585e-03, 6.1324e-04, 2.2891e-03, 1.7539e-03, 1.7378e-02,\n -1.9632e-03, 6.7004e-03, -3.3221e-04, -1.1214e-02, 3.2649e-03,\n -2.1497e-03, -5.4334e-03, 3.4029e-04, -7.2943e-04, 1.6074e-03,\n 2.6689e-03, -5.7172e-04, -5.2785e-03, 2.1119e-03, 3.8333e-03,\n -1.1772e-02, -6.5394e-04, 3.6144e-03, -2.1780e-03, -2.8296e-03,\n 9.0653e-03, 1.5606e-03, -3.5062e-03, 2.0327e-03, 2.7970e-03,\n 3.5893e-03, 2.0356e-03, -5.5813e-04, -2.6629e-03, 1.2248e-02,\n -1.3759e-03, -7.3129e-04, -3.7122e-04, 3.6146e-03, 6.9491e-03,\n -7.6096e-03, 2.5058e-04, 3.1264e-03, -2.4796e-03, -3.3832e-03,\n -8.1455e-03, -8.0127e-03, -8.4875e-03, 2.6307e-04, 1.6291e-03,\n 1.2058e-03, 2.3722e-03, 2.7769e-03, -1.1976e-03, -1.3049e-03,\n 2.2457e-03, -4.6329e-03, -1.3077e-03, -1.2352e-03, 6.1930e-03,\n -5.8356e-03, 2.5159e-04], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([4.2681e-04, 2.2578e-04, 1.9629e-04, 2.4193e-04, 1.5378e-04, 2.8761e-04,\n 2.4739e-04, 1.8907e-04, 1.3020e-04, 1.4957e-04, 2.7328e-04, 3.3837e-04,\n 1.6066e-04, 2.2915e-04, 3.1122e-04, 3.3439e-04, 1.0592e-04, 4.7406e-04,\n 5.0899e-05, 1.7811e-04, 1.9958e-04, 2.0498e-04, 2.2116e-04, 1.6253e-04,\n 4.6580e-04, 1.4532e-04, 5.2571e-04, 1.4093e-04, 2.1847e-04, 1.1299e-04,\n 2.3727e-04, 2.6315e-04, 1.0182e-04, 4.7228e-04, 1.5138e-04, 1.7388e-04,\n 1.3180e-04, 1.6685e-04, 2.4985e-04, 5.3250e-04, 2.8497e-04, 1.5379e-03,\n 1.1235e-04, 1.6239e-04, 2.2643e-04, 4.1994e-04, 2.8934e-04, 1.3355e-04,\n 1.8449e-04, 1.7838e-04, 1.7715e-04, 4.5472e-04, 2.9494e-04, 2.5578e-04,\n 4.2108e-04, 1.7345e-04, 2.0950e-04, 1.1110e-04, 1.6347e-04, 1.7076e-04,\n 1.6419e-04, 9.8652e-05, 2.2481e-04, 2.1422e-04, 2.5609e-04, 2.5276e-04,\n 2.8337e-04, 3.2865e-04, 1.7320e-04, 2.4113e-04, 1.2773e-04, 1.8778e-04,\n 1.4958e-04, 4.5049e-04, 1.6273e-04, 1.3688e-04, 4.8157e-04, 2.6546e-04,\n 2.0039e-04, 2.2547e-04, 1.4572e-04, 1.4239e-04, 1.4480e-04, 3.7350e-04,\n 1.5987e-04, 3.2794e-04, 2.8592e-04, 8.5171e-05, 2.3328e-04, 3.9827e-04,\n 1.3741e-04, 2.7742e-04, 6.4189e-04, 4.4165e-04, 1.6828e-04, 2.6445e-04,\n 1.3244e-04, 2.1367e-04, 2.8844e-04, 2.7343e-04, 2.8711e-04, 1.2901e-04,\n 1.4510e-04, 2.8332e-04, 7.7633e-05, 1.8815e-04, 1.6696e-04, 1.6709e-04,\n 9.9884e-05, 1.1579e-04, 4.5122e-04, 1.1941e-04, 7.7555e-04, 2.0201e-04,\n 9.9338e-05, 6.0130e-05, 3.1315e-04, 4.0602e-04, 1.2880e-04, 2.1691e-04,\n 1.4463e-04, 1.1253e-04, 3.1768e-04, 4.2787e-04, 2.2307e-04, 1.9671e-04,\n 1.0466e-04, 2.7889e-04, 4.2272e-04, 2.0985e-04, 1.5503e-04, 3.1476e-04,\n 3.0043e-04, 1.4211e-04, 3.5665e-04, 2.7847e-04, 1.1780e-04, 9.0188e-05,\n 4.5188e-04, 2.2500e-04, 1.1338e-04, 8.1267e-05, 1.8394e-04, 7.4761e-04,\n 2.6025e-04, 2.1443e-04, 2.6538e-04, 1.2105e-04, 3.3005e-04, 6.1965e-04,\n 1.0642e-04, 1.5200e-04, 5.6451e-05, 1.2552e-04, 2.5047e-04, 2.6330e-04,\n 1.4829e-04, 1.7347e-04, 3.4550e-04, 1.8680e-04, 1.5094e-04, 2.8021e-04,\n 5.0730e-04, 1.6295e-04, 2.4163e-04, 2.2093e-04, 3.3109e-04, 1.0108e-04,\n 2.0430e-04, 4.6426e-04, 2.3049e-04, 2.1252e-04, 1.3304e-04, 1.9834e-04,\n 2.8892e-04, 1.5515e-04, 1.3170e-04, 9.7939e-05, 1.0034e-04, 3.2881e-04,\n 1.8068e-04, 2.6555e-04, 3.5464e-04, 2.2047e-04, 1.0749e-04, 2.2586e-04,\n 3.5699e-04, 2.2423e-04, 2.7836e-04, 1.4032e-04, 1.7503e-14, 1.9070e-04,\n 1.1483e-04, 9.6582e-05, 1.5457e-04, 1.5911e-04, 1.3233e-04, 9.9510e-05,\n 1.3691e-04, 1.6473e-04, 1.3959e-04, 1.8435e-04, 2.4616e-04, 2.0826e-04,\n 1.0465e-04, 3.2950e-04, 1.9618e-04, 9.9652e-05, 1.6039e-04, 1.1795e-04,\n 2.6894e-04, 3.0463e-04, 2.2974e-04, 1.7620e-04, 2.4943e-05, 3.0500e-04,\n 1.2577e-04, 1.3788e-04, 2.2438e-04, 2.5576e-04, 3.3979e-04, 3.3312e-04,\n 1.5559e-04, 9.4790e-05, 1.0185e-04, 1.1717e-04, 1.6008e-04, 9.4443e-05,\n 4.5649e-04, 1.7070e-04, 2.5923e-04, 1.8651e-04, 1.8853e-04, 2.7983e-04,\n 1.9440e-04, 1.8747e-04, 1.9253e-04, 2.4139e-04, 1.6224e-04, 2.2206e-04,\n 1.2038e-04, 1.3033e-04, 2.1117e-04, 3.3103e-04, 1.3320e-04, 2.2230e-04,\n 1.7666e-04, 8.7891e-05, 1.4278e-04, 2.2525e-04, 1.9912e-04, 2.0534e-04,\n 1.3823e-04, 9.4360e-05, 8.1853e-05, 1.7088e-04, 1.4660e-04, 2.4591e-04,\n 1.9310e-04, 3.1438e-04, 3.9485e-04, 1.1953e-04, 1.9814e-04, 1.1085e-04,\n 2.6704e-04, 4.1126e-04, 8.4002e-04, 1.4920e-04, 1.5973e-04, 1.3482e-04,\n 1.5565e-04, 1.7492e-04, 1.1163e-04, 2.7721e-04, 1.5859e-04, 2.5587e-04,\n 3.8644e-04, 8.0136e-05, 1.5941e-04, 1.8157e-04, 1.2345e-04, 1.5864e-04,\n 1.8997e-04, 1.5967e-04, 1.9589e-04, 2.3682e-04, 1.8533e-04, 1.7430e-04,\n 1.1060e-17, 2.8843e-04, 1.4768e-04, 1.1879e-04, 2.7759e-04, 2.3830e-04,\n 2.8636e-04, 1.0843e-16, 2.4773e-04, 2.1556e-04, 2.3702e-04, 4.5350e-04,\n 3.9888e-04, 3.6151e-04, 1.5884e-04, 1.0717e-04, 2.4757e-04, 2.1819e-04,\n 2.0930e-04, 2.8320e-04, 3.7168e-04, 1.7157e-04, 1.7599e-04, 3.3577e-04,\n 4.1276e-04, 5.8047e-05, 7.2290e-05, 2.6615e-04, 1.2553e-04, 2.0207e-04,\n 4.9973e-05, 1.8085e-04, 1.5154e-04, 4.4495e-04, 9.8180e-05, 2.2419e-04,\n 1.9579e-04, 4.5955e-04, 1.4870e-04, 2.2423e-04, 1.4069e-04, 2.3918e-04,\n 2.8784e-04, 1.3122e-04, 1.4255e-04, 1.2487e-04, 3.3740e-04, 2.4316e-04,\n 1.0971e-04, 9.7759e-05, 2.1478e-04, 4.3745e-04, 1.5749e-04, 1.3345e-04,\n 1.9208e-04, 3.0580e-04, 2.0346e-04, 3.1254e-04, 8.7116e-05, 1.0463e-04,\n 1.8508e-04, 2.1409e-04, 1.2899e-04, 6.0488e-04, 1.0235e-04, 2.4997e-04,\n 8.6239e-04, 2.2963e-04, 2.0067e-04, 2.3611e-04, 1.5285e-04, 9.2017e-05,\n 4.4447e-04, 7.9059e-05, 1.6130e-04, 1.9010e-04, 1.1004e-04, 1.2205e-04,\n 1.7148e-04, 1.7277e-04, 2.7268e-04, 1.5651e-04, 1.5951e-04, 2.2799e-17,\n 2.6184e-04, 1.2847e-04, 1.4841e-04, 1.4065e-04, 2.9401e-04, 1.6339e-04,\n 2.3885e-04, 1.6361e-04, 1.9683e-04, 2.7104e-04, 2.3383e-04, 2.2503e-04,\n 1.4170e-04, 3.8583e-04, 2.2714e-04, 1.4499e-04, 2.1863e-04, 1.1920e-04,\n 1.7365e-04, 9.5289e-05, 1.3578e-04, 3.0863e-04, 2.1161e-04, 3.0579e-04,\n 1.2197e-04, 3.6598e-04, 1.5113e-04, 2.8258e-04, 1.3341e-04, 1.8922e-04,\n 1.2431e-04, 1.5563e-04, 3.3495e-04, 6.3560e-05, 7.3914e-05, 2.7101e-04,\n 1.8853e-04, 2.6135e-04, 1.3708e-04, 1.7634e-04, 2.7629e-04, 5.2283e-04,\n 1.3039e-04, 1.9229e-04, 2.8416e-04, 3.7065e-04, 3.9019e-04, 1.3894e-04,\n 1.9105e-04, 1.0062e-04, 4.0335e-04, 2.1459e-04, 1.6950e-04, 1.3253e-04,\n 1.3471e-04, 1.7166e-04, 1.7907e-04, 1.0929e-04, 1.7090e-04, 3.4173e-04,\n 2.8644e-04, 2.3352e-04, 2.1857e-04, 3.6249e-04, 4.3151e-04, 3.3512e-04,\n 2.8108e-04, 2.2818e-04, 1.9997e-04, 3.8592e-04, 1.4978e-04, 2.1576e-04,\n 1.7393e-04, 1.8889e-04, 2.5701e-04, 2.0226e-04, 2.7115e-04, 1.5584e-04,\n 1.6721e-04, 2.8978e-04, 2.2696e-04, 4.1823e-04, 5.6481e-04, 1.3674e-04,\n 4.9898e-04, 1.3867e-04, 1.4342e-04, 1.7388e-04, 1.6991e-04, 1.6706e-04,\n 1.5983e-04, 3.1463e-04, 1.4296e-04, 1.1337e-04, 3.2491e-04, 2.7933e-04,\n 1.2723e-04, 2.5434e-04, 2.8859e-04, 1.4578e-04, 1.4225e-04, 2.0077e-04,\n 1.7069e-04, 1.7173e-04, 2.0366e-04, 4.0027e-04, 1.0713e-04, 1.9271e-04,\n 3.8340e-04, 2.1592e-04, 4.8178e-04, 1.7532e-04, 3.5491e-04, 8.8265e-05,\n 8.9885e-05, 1.7631e-04, 2.2052e-04, 2.4549e-04, 3.4577e-04, 9.7917e-05,\n 4.6462e-04, 1.2589e-04, 1.5658e-04, 1.4000e-04, 4.4057e-04, 2.4177e-04,\n 1.9747e-04, 1.5840e-04, 3.6055e-04, 2.2856e-04, 3.0921e-04, 1.6108e-04,\n 4.2949e-04, 1.9884e-04, 2.2121e-04, 2.2280e-04, 2.4335e-04, 6.6484e-04,\n 1.7923e-04, 1.8742e-04], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(26278.)",
22
+ "exp_avg": "tensor([-2.7523e-03, -5.6686e-04, -1.8479e-03, -7.3129e-04, -2.3998e-03,\n -2.5981e-03, 2.5661e-03, 1.9926e-04, 2.1660e-04, -1.0168e-03,\n -3.0800e-03, -6.2824e-04, 2.1399e-03, 2.8172e-06, 1.6066e-03,\n 1.5021e-03, -1.6586e-03, -1.8795e-03, -8.3590e-04, -1.5290e-03,\n -1.5748e-04, -5.7921e-03, -1.2298e-03, -8.4254e-04, 1.3726e-02,\n -1.6001e-03, -3.9197e-04, 3.2737e-04, -4.4128e-04, -6.1220e-04,\n 8.0427e-04, 2.7927e-03, -3.0573e-04, -2.1661e-03, -1.6694e-03,\n -1.6818e-03, 1.1731e-03, 4.6243e-04, 1.4071e-04, -2.0425e-02,\n 1.0922e-03, 4.1351e-04, 1.5816e-04, -3.9963e-04, -1.2891e-03,\n -2.3477e-03, 7.9115e-04, 5.3878e-04, -4.5728e-04, -5.4648e-04,\n -1.1911e-03, 5.1055e-05, 2.9284e-03, 1.5540e-03, -2.0510e-04,\n -4.9532e-04, 7.6833e-05, 2.5310e-05, 2.8944e-03, 5.0334e-06,\n -1.6253e-03, 1.8330e-03, -3.7805e-03, -2.0965e-03, 6.2533e-03,\n -9.5983e-04, 9.1422e-04, 3.3742e-03, 1.8510e-03, 1.9261e-03,\n 1.0938e-03, -2.3539e-03, 8.2707e-04, -1.0955e-03, 1.5104e-03,\n -8.9593e-04, 1.0908e-03, -7.2251e-04, -1.1838e-03, -1.5272e-03,\n -1.7230e-04, -1.8311e-03, -3.2864e-04, 1.6384e-04, 2.2392e-03,\n -2.5939e-03, 3.6079e-03, -1.6705e-03, 3.5953e-03, -3.6039e-03,\n -6.8278e-04, 1.0046e-04, 2.3628e-03, -4.2687e-07, -1.1669e-04,\n 2.6787e-03, -1.1254e-03, -1.2813e-03, -2.5755e-03, -7.4021e-04,\n -4.7306e-03, 2.0329e-03, -1.4299e-03, -2.2254e-03, -4.1772e-04,\n 2.5468e-04, 2.4748e-03, -7.1898e-04, -1.0429e-03, 1.5595e-04,\n -4.8726e-03, 3.9163e-04, -2.3739e-04, -5.1717e-04, -2.0208e-04,\n 3.2033e-05, -5.5387e-04, 1.0024e-05, 7.6103e-04, 1.0883e-03,\n 1.8177e-03, 1.2682e-03, -1.5711e-03, 1.5861e-04, 6.8607e-03,\n -2.3318e-03, -3.6350e-03, -3.6288e-03, -8.1009e-04, -4.4136e-03,\n 3.7844e-03, 2.5563e-03, -5.2595e-04, -2.1015e-03, -2.4633e-03,\n 2.7108e-04, -1.2210e-03, 2.7529e-03, 1.2377e-03, 2.7220e-03,\n 5.9671e-04, 7.4098e-04, -8.2175e-04, -3.3271e-03, 9.7650e-04,\n -3.0417e-03, 9.9043e-04, 7.3697e-04, -3.4268e-03, -1.1790e-03,\n 1.7785e-03, 7.9697e-04, -1.9767e-03, 3.1858e-03, -1.5034e-03,\n 1.5312e-03, 1.4169e-03, 1.8280e-03, -2.8073e-03, -6.1525e-05,\n -1.6766e-03, -9.4016e-04, -2.1040e-05, 2.8914e-03, 2.4666e-03,\n 1.5937e-03, -2.3174e-03, -3.0758e-03, 2.0950e-03, 7.0087e-04,\n 5.9192e-04, -1.4411e-05, 6.5735e-04, 1.5459e-03, -2.1787e-03,\n 4.3711e-05, -4.8086e-04, 1.0212e-03, -4.9783e-04, -2.4318e-03,\n -1.6627e-03, -3.5138e-03, 8.5567e-05, -1.1318e-04, 8.0459e-06,\n 1.0635e-03, -2.9356e-03, 1.1940e-04, -1.3326e-03, -4.2219e-04,\n 5.6052e-45, 4.5053e-04, -3.5355e-05, 7.1903e-04, 1.3053e-04,\n 1.7362e-03, 1.3109e-03, 4.2150e-04, -9.3864e-04, -1.8106e-03,\n 2.3089e-03, -1.0122e-03, 2.6046e-03, 1.1175e-03, -1.8127e-03,\n 4.1851e-03, -8.7380e-04, -2.6769e-03, 5.8594e-04, -5.7939e-05,\n 4.4239e-03, -3.6697e-03, 1.5902e-03, 1.7409e-05, -2.0469e-03,\n -3.1707e-04, 1.2592e-03, -4.8896e-04, 7.5827e-04, 1.1714e-03,\n 2.1460e-03, -1.6574e-03, 1.7887e-03, -1.3338e-03, -1.2000e-03,\n 1.8193e-03, -3.0428e-04, 8.4578e-04, -3.8938e-03, 1.5714e-04,\n -4.2177e-04, -1.1963e-03, 5.6752e-05, 2.2374e-03, 1.6448e-03,\n 1.5634e-03, 1.6374e-03, -6.2069e-05, 1.6340e-03, -1.9358e-04,\n -2.4360e-03, 8.0558e-04, 2.9462e-04, -1.7760e-04, -1.0695e-04,\n -8.6461e-05, 1.4791e-03, -2.6524e-03, -1.0196e-03, -1.0064e-04,\n 8.9449e-04, -3.7074e-04, 2.6226e-04, 3.3626e-04, 7.9795e-04,\n -7.3025e-04, 7.4664e-04, 9.6212e-04, 1.5963e-03, 1.7879e-03,\n 7.5527e-04, -1.4285e-03, -1.4605e-03, -9.6398e-04, 7.8532e-04,\n 5.5088e-04, 9.3599e-04, -3.6506e-04, -2.3156e-03, 6.9919e-05,\n -1.7902e-03, 1.5214e-03, -1.3189e-03, 3.3129e-03, -1.2029e-03,\n 2.2682e-03, -2.5502e-03, 8.9352e-05, 3.4000e-04, 4.3785e-04,\n -3.6993e-05, 1.8065e-03, -3.3978e-03, -5.4643e-04, 4.4621e-04,\n 3.4835e-03, 1.5748e-03, -5.9549e-04, 5.6052e-45, 1.5784e-03,\n 2.0907e-03, 8.6417e-04, -1.5001e-03, -1.0112e-03, 4.0270e-04,\n 5.6052e-45, -1.2189e-03, -5.2044e-03, -1.2069e-03, 3.4057e-03,\n -3.9190e-03, -8.3766e-04, 3.1026e-04, 1.6195e-04, -3.4522e-03,\n -6.4380e-05, 2.1752e-03, -3.9278e-03, 3.0290e-03, 2.0074e-03,\n -5.8819e-04, -3.5147e-03, 2.3860e-03, 1.8090e-03, 2.7708e-03,\n 3.1711e-03, -1.6702e-04, 6.7523e-04, 1.5746e-03, -2.4567e-03,\n 2.3327e-04, -1.7288e-03, 9.3761e-04, -4.2186e-03, -5.1842e-04,\n -2.3097e-03, -1.1797e-03, 1.4718e-03, -3.0539e-03, -6.7396e-04,\n -1.0709e-03, -1.2192e-03, 6.6513e-04, 6.9962e-04, -6.0786e-03,\n -1.1431e-03, 5.5976e-04, 1.8505e-03, 2.0855e-04, -1.8167e-03,\n 1.4430e-03, -4.2039e-04, -5.7711e-04, -1.4889e-03, 1.0683e-04,\n -7.1356e-03, 1.3134e-03, 4.5287e-04, -1.1742e-03, -4.4635e-04,\n -2.2288e-03, -3.7693e-03, 1.6139e-04, 3.3834e-03, -5.4377e-03,\n -7.9113e-04, 2.4787e-04, 1.4616e-03, 8.6773e-04, -6.8837e-04,\n 7.3964e-04, 1.8639e-03, -6.0477e-04, 1.4092e-05, 1.4639e-03,\n 1.2689e-03, 9.9172e-05, 2.5685e-03, 2.4634e-03, 6.2526e-04,\n -1.7120e-04, 5.6052e-45, 2.8921e-04, -1.2498e-04, 9.7671e-04,\n 1.1409e-03, -1.0090e-03, 1.1785e-03, 6.3661e-04, -4.5994e-04,\n 1.3792e-03, -1.4268e-04, 2.1473e-03, -1.2512e-03, 2.4819e-03,\n -2.8690e-04, 3.6257e-04, -1.0053e-03, 2.4870e-03, 2.6576e-04,\n 2.7436e-04, -2.8099e-04, 1.9309e-04, 4.8955e-04, 3.0773e-03,\n -3.0657e-03, -1.6492e-04, -1.5344e-03, 1.0951e-03, -1.6482e-03,\n -2.0500e-04, 4.1826e-04, 2.9458e-04, -2.2273e-03, -3.2608e-03,\n -2.9904e-04, 7.4668e-04, -1.7866e-04, 8.5960e-04, -1.0096e-03,\n 1.1024e-03, 9.3280e-04, 1.5257e-04, 1.8402e-03, 3.9945e-04,\n 6.9190e-04, -6.1235e-04, -3.4713e-03, 2.4138e-03, 2.9601e-03,\n 1.8016e-03, 1.8960e-05, 5.3047e-03, -2.5749e-03, 6.1589e-04,\n 8.4060e-04, 6.1022e-04, 1.9123e-03, 1.8071e-03, -4.5020e-04,\n -2.5466e-03, 4.7497e-03, 3.8290e-04, 1.3281e-03, 2.5406e-04,\n -1.4030e-03, 2.4454e-03, 1.7857e-04, -7.9592e-04, 1.0865e-04,\n 1.6048e-03, -1.2302e-03, -9.7997e-04, -1.5330e-03, -8.8618e-04,\n 1.5335e-03, 1.0565e-03, 1.3820e-03, 1.6931e-04, -1.2377e-03,\n 9.7161e-04, 1.1624e-03, 1.3607e-03, 1.0472e-03, 9.6302e-03,\n -1.1267e-03, 2.0304e-03, -8.9693e-05, -6.3565e-03, 2.1371e-03,\n -1.6645e-03, -2.4930e-03, 5.7473e-04, -1.3533e-04, 4.6203e-04,\n 1.3018e-03, 4.5324e-05, -2.6194e-03, 1.0359e-03, 1.7983e-03,\n -5.9117e-03, -3.5619e-04, 1.3643e-03, -1.0064e-03, -1.6907e-03,\n 4.0769e-03, 7.4109e-04, -1.6602e-03, 1.1856e-03, 1.3896e-03,\n 1.6593e-03, 1.2644e-03, -5.3332e-04, -2.0642e-03, 6.8914e-03,\n -5.2905e-04, -1.4706e-04, 1.4646e-04, 1.1994e-03, 2.4947e-03,\n -3.1938e-03, 3.8931e-04, 1.6247e-03, -1.1923e-03, -1.7192e-03,\n -4.1645e-03, -4.4725e-03, -3.4466e-03, -2.6115e-04, 8.3099e-04,\n -6.7644e-05, 1.1688e-03, 1.1394e-03, -3.0232e-04, -5.9506e-04,\n 1.4902e-03, -2.4045e-03, -4.1974e-04, -5.8986e-04, 1.7601e-03,\n -2.9725e-03, 8.5357e-05], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([5.3631e-05, 7.4193e-05, 6.6082e-05, 5.6424e-05, 4.0612e-05, 8.3921e-05,\n 7.6514e-05, 3.3950e-05, 3.6209e-05, 3.9927e-05, 7.2293e-05, 5.7532e-05,\n 3.7590e-05, 6.4462e-05, 9.6340e-05, 5.1532e-05, 4.3005e-05, 1.4162e-04,\n 1.4324e-05, 5.3232e-05, 3.9057e-05, 5.3416e-05, 6.1497e-05, 4.6389e-05,\n 8.5082e-05, 3.5152e-05, 1.0742e-04, 5.0598e-05, 5.5495e-05, 3.3078e-05,\n 4.7452e-05, 6.2035e-05, 2.7387e-05, 1.1006e-04, 5.1626e-05, 4.1627e-05,\n 2.9299e-05, 5.7629e-05, 8.9925e-05, 1.5679e-04, 5.9807e-05, 2.0952e-04,\n 3.8214e-05, 3.5408e-05, 6.5346e-05, 8.1028e-05, 6.3462e-05, 2.8758e-05,\n 3.5037e-05, 3.5460e-05, 3.9887e-05, 5.6214e-05, 1.1023e-04, 5.2491e-05,\n 7.0475e-05, 3.8724e-05, 5.4379e-05, 3.1664e-05, 4.7236e-05, 4.5456e-05,\n 3.0273e-05, 2.6521e-05, 3.9440e-05, 6.6922e-05, 8.5279e-05, 8.0939e-05,\n 6.1279e-05, 7.1758e-05, 4.8443e-05, 5.0353e-05, 3.4683e-05, 5.1947e-05,\n 4.1701e-05, 1.1713e-04, 4.4264e-05, 4.0233e-05, 1.4506e-04, 3.8153e-05,\n 4.7547e-05, 5.5238e-05, 4.6600e-05, 3.3990e-05, 3.7191e-05, 6.8274e-05,\n 5.1353e-05, 5.8688e-05, 1.0375e-04, 2.7339e-05, 6.6670e-05, 7.1676e-05,\n 4.6705e-05, 3.9744e-05, 1.3803e-04, 1.4676e-04, 4.0791e-05, 5.1446e-05,\n 3.4658e-05, 6.0097e-05, 7.8223e-05, 5.2379e-05, 6.3002e-05, 4.3685e-05,\n 3.2955e-05, 6.5059e-05, 3.3658e-05, 4.4915e-05, 3.7309e-05, 3.1613e-05,\n 4.2595e-05, 2.9703e-05, 1.2177e-04, 3.0742e-05, 1.3735e-04, 3.9809e-05,\n 3.3129e-05, 2.0372e-05, 8.1382e-05, 1.5594e-04, 4.1259e-05, 6.3846e-05,\n 4.6117e-05, 3.8457e-05, 7.5936e-05, 8.4330e-05, 4.9874e-05, 5.7112e-05,\n 3.3779e-05, 7.5584e-05, 7.5625e-05, 6.6402e-05, 5.0374e-05, 1.0742e-04,\n 6.2114e-05, 3.4808e-05, 1.0048e-04, 4.6571e-05, 3.3256e-05, 2.9239e-05,\n 1.2838e-04, 5.9950e-05, 3.3376e-05, 4.0854e-05, 4.8195e-05, 1.8028e-04,\n 4.5330e-05, 6.7425e-05, 5.5439e-05, 3.1052e-05, 1.0349e-04, 1.1369e-04,\n 3.9002e-05, 4.3923e-05, 3.3734e-05, 5.3191e-05, 4.3669e-05, 6.2106e-05,\n 3.3346e-05, 3.2480e-05, 7.4688e-05, 3.7567e-05, 5.2354e-05, 5.0663e-05,\n 8.8038e-05, 4.2181e-05, 6.1845e-05, 5.6792e-05, 9.7393e-05, 3.2436e-05,\n 6.0705e-05, 1.4304e-04, 5.6190e-05, 7.2192e-05, 2.9258e-05, 5.2055e-05,\n 4.3483e-05, 3.0471e-05, 3.3199e-05, 4.3579e-05, 3.5968e-05, 7.6652e-05,\n 4.5536e-05, 6.4170e-05, 1.3062e-04, 3.9969e-05, 1.7080e-05, 7.0040e-05,\n 7.0886e-05, 4.6511e-05, 8.0976e-05, 5.0199e-05, 3.0312e-16, 4.9506e-05,\n 2.4318e-05, 2.2951e-05, 4.7425e-05, 4.6727e-05, 3.1517e-05, 2.9693e-05,\n 3.5274e-05, 4.0569e-05, 4.2380e-05, 4.6069e-05, 4.4049e-05, 3.0511e-05,\n 3.2072e-05, 1.2522e-04, 4.3142e-05, 3.1859e-05, 3.7656e-05, 3.6899e-05,\n 6.8747e-05, 7.0149e-05, 5.2374e-05, 4.6052e-05, 1.9679e-05, 8.0624e-05,\n 4.2464e-05, 2.9777e-05, 5.4937e-05, 5.3434e-05, 6.4609e-05, 9.2458e-05,\n 3.2806e-05, 3.5657e-05, 3.2811e-05, 3.6881e-05, 4.6099e-05, 3.1678e-05,\n 9.4718e-05, 8.2003e-05, 7.5082e-05, 4.8719e-05, 5.4157e-05, 4.6180e-05,\n 3.9679e-05, 5.8041e-05, 3.4158e-05, 4.5981e-05, 5.2789e-05, 5.0990e-05,\n 2.7603e-05, 3.8280e-05, 4.0326e-05, 6.8787e-05, 2.1261e-05, 5.8893e-05,\n 6.0417e-05, 4.0616e-05, 3.5664e-05, 4.8085e-05, 7.0015e-05, 6.8960e-05,\n 3.4587e-05, 3.2709e-05, 3.1780e-05, 4.3868e-05, 4.3926e-05, 5.7242e-05,\n 5.8618e-05, 9.1735e-05, 6.4008e-05, 3.6475e-05, 4.8394e-05, 2.7319e-05,\n 5.7377e-05, 6.9432e-05, 1.3329e-04, 3.4376e-05, 2.8908e-05, 2.9985e-05,\n 3.3200e-05, 5.0267e-05, 3.2349e-05, 8.2762e-05, 3.2164e-05, 6.7923e-05,\n 1.1265e-04, 3.3444e-05, 5.2630e-05, 4.8181e-05, 3.5488e-05, 4.7686e-05,\n 5.3100e-05, 4.0231e-05, 5.3241e-05, 7.1065e-05, 4.8894e-05, 6.3723e-05,\n 9.2231e-19, 9.9410e-05, 4.5448e-05, 2.7754e-05, 7.9177e-05, 6.2462e-05,\n 7.3639e-05, 4.8517e-18, 5.3026e-05, 9.0032e-05, 4.7520e-05, 1.3923e-04,\n 6.8188e-05, 6.5991e-05, 5.5170e-05, 2.6916e-05, 7.6861e-05, 5.6157e-05,\n 4.0632e-05, 4.3185e-05, 7.2909e-05, 3.7964e-05, 4.6210e-05, 8.8670e-05,\n 1.0920e-04, 2.8371e-05, 3.4707e-05, 6.6802e-05, 3.0692e-05, 5.2009e-05,\n 1.3551e-05, 4.1993e-05, 5.1364e-05, 9.4225e-05, 3.3348e-05, 7.1447e-05,\n 5.8195e-05, 1.3851e-04, 4.7310e-05, 5.3576e-05, 2.8469e-05, 4.4474e-05,\n 6.3869e-05, 5.9229e-05, 3.4737e-05, 2.3304e-05, 9.3664e-05, 7.0516e-05,\n 1.4044e-05, 3.1687e-05, 4.1063e-05, 1.2710e-04, 3.3257e-05, 3.9910e-05,\n 5.4216e-05, 4.4582e-05, 5.1462e-05, 7.7471e-05, 2.8665e-05, 3.8955e-05,\n 5.0193e-05, 6.1030e-05, 2.8930e-05, 1.3925e-04, 3.1101e-05, 7.4974e-05,\n 1.3613e-04, 5.4504e-05, 3.9630e-05, 8.1717e-05, 3.5747e-05, 2.7700e-05,\n 1.3352e-04, 3.1718e-05, 3.8097e-05, 3.1553e-05, 3.5318e-05, 5.2073e-05,\n 2.8417e-05, 4.6984e-05, 8.4943e-05, 3.5165e-05, 4.6278e-05, 1.1041e-18,\n 4.3339e-05, 2.6935e-05, 3.9481e-05, 4.1804e-05, 9.0874e-05, 4.5496e-05,\n 6.2686e-05, 3.9442e-05, 5.0358e-05, 6.9915e-05, 6.4814e-05, 6.0577e-05,\n 4.6537e-05, 9.7411e-05, 4.6990e-05, 2.7583e-05, 5.5707e-05, 3.0355e-05,\n 4.0021e-05, 3.0402e-05, 4.1950e-05, 7.4508e-05, 5.7125e-05, 6.2213e-05,\n 3.7717e-05, 8.2714e-05, 3.9863e-05, 6.9076e-05, 3.7311e-05, 4.4080e-05,\n 3.2403e-05, 3.7349e-05, 5.3819e-05, 1.5279e-05, 1.9971e-05, 6.1248e-05,\n 5.5721e-05, 6.6949e-05, 3.4000e-05, 7.0149e-05, 5.3210e-05, 7.2588e-05,\n 3.4356e-05, 4.0651e-05, 7.0593e-05, 8.1387e-05, 1.2293e-04, 3.7556e-05,\n 3.7314e-05, 3.2656e-05, 7.5555e-05, 7.9592e-05, 3.9609e-05, 5.8355e-05,\n 3.9358e-05, 5.3423e-05, 4.9393e-05, 2.7818e-05, 4.4421e-05, 9.9515e-05,\n 7.0944e-05, 6.4693e-05, 5.1065e-05, 6.9234e-05, 9.4923e-05, 8.0611e-05,\n 6.8947e-05, 6.6199e-05, 4.2003e-05, 9.4660e-05, 5.4329e-05, 6.2319e-05,\n 2.9434e-05, 4.7032e-05, 5.6269e-05, 3.8284e-05, 7.9256e-05, 5.1109e-05,\n 8.6820e-05, 7.4219e-05, 4.3445e-05, 9.8628e-05, 1.6852e-04, 4.1434e-05,\n 7.2128e-05, 2.8122e-05, 4.7608e-05, 4.7728e-05, 5.9142e-05, 3.6836e-05,\n 4.9163e-05, 6.0496e-05, 2.4264e-05, 3.1434e-05, 5.8573e-05, 6.6481e-05,\n 3.7973e-05, 7.5163e-05, 7.5579e-05, 3.4006e-05, 3.7156e-05, 5.0437e-05,\n 6.4908e-05, 3.9745e-05, 4.9284e-05, 8.1348e-05, 3.1844e-05, 6.4201e-05,\n 6.5243e-05, 4.4220e-05, 5.3492e-05, 4.6009e-05, 1.0146e-04, 3.4297e-05,\n 1.9813e-05, 4.2176e-05, 4.5435e-05, 3.7684e-05, 6.5871e-05, 2.9869e-05,\n 1.1879e-04, 3.6306e-05, 4.7866e-05, 3.8358e-05, 1.0300e-04, 4.9747e-05,\n 4.8210e-05, 4.7968e-05, 6.4283e-05, 6.9840e-05, 5.3791e-05, 2.6991e-05,\n 7.9532e-05, 4.9803e-05, 5.3409e-05, 3.8206e-05, 1.4008e-04, 5.0631e-05,\n 4.9671e-05, 4.9846e-05], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(26278.)",
27
+ "exp_avg": "tensor([[-6.1191e-07, -5.6187e-06, -8.2004e-07, ..., 2.9905e-06,\n 9.7810e-06, 6.8934e-07],\n [ 3.7287e-06, 4.6018e-06, -4.1981e-06, ..., 8.5799e-06,\n -5.6872e-06, 4.7518e-06],\n [ 5.1553e-06, 5.3276e-06, -1.2665e-05, ..., -7.5437e-06,\n -3.6543e-06, 7.4008e-07],\n ...,\n [-1.9437e-05, -4.4917e-06, -5.8698e-06, ..., -1.9773e-06,\n -1.1838e-06, 2.9037e-05],\n [-1.5112e-05, -2.2403e-05, 1.3287e-05, ..., 7.5709e-07,\n -1.4767e-05, -8.1773e-06],\n [-3.3910e-05, 2.3188e-06, 8.5829e-06, ..., -3.7267e-06,\n -1.7280e-05, -1.5632e-05]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[7.2464e-10, 2.1387e-09, 5.2963e-10, ..., 5.3833e-10, 8.7679e-10,\n 1.2396e-09],\n [1.0692e-09, 1.5238e-09, 2.2989e-09, ..., 1.1042e-09, 2.3745e-09,\n 2.1154e-09],\n [1.1829e-09, 2.0614e-09, 2.0157e-09, ..., 9.5384e-10, 2.0868e-09,\n 1.6516e-09],\n ...,\n [1.6977e-09, 3.2188e-09, 1.5765e-09, ..., 6.8041e-10, 3.8987e-09,\n 2.3001e-09],\n [8.9935e-10, 3.2860e-09, 2.2947e-09, ..., 9.1205e-10, 1.7873e-09,\n 2.0504e-09],\n [3.1364e-09, 2.8927e-09, 1.7250e-09, ..., 8.8625e-10, 1.9014e-09,\n 2.2855e-09]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(26278.)",
32
+ "exp_avg": "tensor([[ 8.4323e-07, -6.8700e-06, -4.3760e-06, ..., 2.9828e-07,\n -3.8857e-06, -6.2386e-06],\n [ 1.1167e-05, 9.0254e-06, 3.2592e-06, ..., 9.2075e-06,\n -2.5914e-06, 4.5409e-06],\n [ 1.0712e-05, 2.0713e-06, -1.4337e-05, ..., -1.1075e-05,\n -2.7006e-06, -2.9708e-06],\n ...,\n [ 7.2836e-06, 8.6519e-06, -1.6489e-06, ..., 9.4987e-07,\n -1.3917e-05, -2.1681e-05],\n [-9.0447e-06, -4.1253e-06, -8.7917e-06, ..., -2.1991e-06,\n -6.8379e-06, 1.6644e-05],\n [-1.5453e-07, 1.1127e-05, 1.3214e-05, ..., -2.2648e-06,\n 1.1429e-06, 4.9498e-06]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[4.1972e-10, 1.0963e-09, 5.1746e-10, ..., 2.4637e-10, 4.1274e-10,\n 9.3735e-10],\n [1.0898e-09, 2.5815e-09, 7.7726e-10, ..., 7.5492e-10, 2.1526e-09,\n 1.5760e-09],\n [8.6842e-10, 1.3285e-09, 1.1462e-09, ..., 8.8886e-10, 1.8578e-09,\n 1.5696e-09],\n ...,\n [1.0121e-09, 1.2333e-09, 2.0042e-09, ..., 4.5798e-10, 3.7781e-09,\n 1.4017e-09],\n [1.3435e-09, 2.0598e-09, 2.4197e-09, ..., 1.0602e-09, 8.1259e-10,\n 1.1417e-09],\n [8.0923e-10, 2.6908e-09, 1.0349e-09, ..., 5.3287e-10, 7.2843e-10,\n 1.7737e-09]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(26278.)",
37
+ "exp_avg": "tensor([ 0.0006, -0.0006], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([3.5854e-06, 3.5854e-06], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 0.0020618676311637816,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.01,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 0.0020618676311637816,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.01,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 0.0020618676311637816,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.01,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 0.0010313307618949636,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 10,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 7,
137
+ "base_lrs": [
138
+ 0.01,
139
+ 0.01,
140
+ 0.01,
141
+ 0.005
142
+ ],
143
+ "last_epoch": 7,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 0.0020618676311637816,
149
+ 0.0020618676311637816,
150
+ 0.0020618676311637816,
151
+ 0.0010313307618949636
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 66.282,
156
+ "best_epoch": 6,
157
+ "scale_accuracies": {
158
+ "256": 65.70466666666667,
159
+ "512": 66.24
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3,
166
+ 4,
167
+ 5,
168
+ 6,
169
+ 7
170
+ ],
171
+ "train_loss": [
172
+ 5.311051666323785,
173
+ 4.462767010682684,
174
+ 4.340839946911445,
175
+ 4.262519323832187,
176
+ 4.204208532545754,
177
+ 4.159249462977202,
178
+ 4.114808493176228
179
+ ],
180
+ "train_acc": [
181
+ 54.91727464101089,
182
+ 60.04988680892759,
183
+ 61.02839572566782,
184
+ 61.696614622970046,
185
+ 62.27501957199959,
186
+ 62.741287175416375,
187
+ 63.16837175273273
188
+ ],
189
+ "val_acc": [
190
+ 63.041333333333334,
191
+ 64.17333333333333,
192
+ 64.75866666666667,
193
+ 65.36133333333333,
194
+ 65.65466666666667,
195
+ 66.04266666666666,
196
+ 66.282
197
+ ],
198
+ "scale_accs": {
199
+ "256": [
200
+ 62.11666666666667,
201
+ 63.38733333333333,
202
+ 63.992666666666665,
203
+ 64.614,
204
+ 64.958,
205
+ 65.344,
206
+ 65.70466666666667
207
+ ],
208
+ "512": [
209
+ 62.967333333333336,
210
+ 64.19266666666667,
211
+ 64.73066666666666,
212
+ 65.34666666666666,
213
+ 65.59266666666667,
214
+ 65.97266666666667,
215
+ 66.24
216
+ ]
217
+ },
218
+ "lr": [
219
+ 0.00975530705321762,
220
+ 0.00904518046337755,
221
+ 0.00793913236883622,
222
+ 0.00654543046337755,
223
+ 0.005000500000000001,
224
+ 0.0034555695366224513,
225
+ 0.0020618676311637816
226
+ ]
227
+ }
228
+ },
229
+ "train_config": {
230
+ "name": "david_training",
231
+ "run_id": "20251012_231445",
232
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
233
+ "model_variant": [
234
+ "clip_vit_b16",
235
+ "clip_vit_laion_b32",
236
+ "clip_vit_b32"
237
+ ],
238
+ "num_classes": 1000,
239
+ "preset": "small_fast",
240
+ "custom_config_path": null,
241
+ "num_classes_override": null,
242
+ "use_belly_override": null,
243
+ "belly_expand_override": null,
244
+ "progressive_training_override": true,
245
+ "scale_warmup_epochs_override": {
246
+ "256": 0,
247
+ "512": 0
248
+ },
249
+ "num_epochs": 10,
250
+ "batch_size": 1024,
251
+ "learning_rate": 0.01,
252
+ "weight_decay": 1e-05,
253
+ "warmup_epochs": 3,
254
+ "use_rose_loss": true,
255
+ "rose_initial_weight": 0.2,
256
+ "rose_max_weight": 0.6,
257
+ "rose_weight_schedule": "adaptive",
258
+ "use_cayley_loss": false,
259
+ "cayley_weight": 0.01,
260
+ "scale_loss_balance": null,
261
+ "use_mixed_precision": false,
262
+ "gradient_clip": 5.0,
263
+ "scheduler_type": "cosine_restarts",
264
+ "min_lr": 1e-06,
265
+ "freeze_strategy": "never",
266
+ "freeze_threshold": 90.0,
267
+ "unfreeze_on_plateau": true,
268
+ "patience": 10,
269
+ "track_gradients": true,
270
+ "gradient_scale_threshold": 1e-05,
271
+ "gradient_scale_multiplier": 10.0,
272
+ "log_interval": 50,
273
+ "val_interval": 1,
274
+ "save_interval": 5,
275
+ "log_fusion_weights": true,
276
+ "log_loss_components": true,
277
+ "save_format": "safetensors",
278
+ "hf_repo": "AbstractPhil/david-shared-space",
279
+ "upload_to_hub": true,
280
+ "base_dir": "./david_training",
281
+ "num_workers": 10,
282
+ "pin_memory": true,
283
+ "prefetch_factor": 4,
284
+ "persistent_workers": true
285
+ }
286
+ }