Update best_model_acc71.73_metadata.json - Run 20251012_141246
Browse files
weights/David-fully_shared-weighted_sum/20251012_141246/best_model_acc71.73_metadata.json
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 9,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(12520.)",
|
| 7 |
+
"exp_avg": "tensor([[-1.0193e-03, -3.9272e-03, -1.5823e-04, ..., -2.6369e-04,\n -5.7864e-04, -7.2561e-04],\n [-2.2954e-04, -9.4550e-04, 9.4458e-04, ..., -1.8983e-05,\n 4.6150e-04, 1.7186e-04],\n [ 2.4122e-03, 1.5233e-03, 5.2615e-03, ..., 1.1379e-03,\n 1.0916e-04, 1.5103e-04],\n ...,\n [-4.5913e-04, 4.1391e-03, 2.5503e-03, ..., -4.5579e-04,\n 2.3861e-04, -1.2218e-03],\n [ 8.0811e-04, 3.0176e-03, -3.6676e-05, ..., -2.0293e-04,\n -6.3479e-04, 1.3269e-04],\n [-2.2015e-04, -1.3078e-03, 1.1851e-03, ..., -5.2010e-04,\n -1.2506e-04, 2.9933e-04]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[4.8175e-06, 3.0986e-05, 1.3620e-05, ..., 3.3631e-06, 2.3452e-06,\n 2.6317e-06],\n [1.0004e-05, 4.5502e-05, 1.8860e-05, ..., 4.5611e-06, 4.0402e-06,\n 3.6555e-06],\n [4.7375e-06, 2.4524e-05, 1.4721e-05, ..., 2.6622e-06, 2.1295e-06,\n 2.7396e-06],\n ...,\n [8.2641e-06, 4.3019e-05, 2.5758e-05, ..., 9.3965e-06, 3.2670e-06,\n 8.8751e-06],\n [3.3765e-06, 2.3847e-05, 1.4868e-05, ..., 3.0570e-06, 2.0195e-06,\n 2.2608e-06],\n [9.8345e-06, 4.7035e-05, 2.2059e-05, ..., 5.0497e-06, 3.4121e-06,\n 3.4242e-06]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(12520.)",
|
| 12 |
+
"exp_avg": "tensor([-4.9703e-02, -1.7859e-02, -3.3339e-02, -1.3414e-03, 3.4545e-02,\n 1.9017e-02, -1.2731e-02, 2.9561e-02, -8.5451e-03, 3.5937e-02,\n 1.3703e-02, 4.7768e-02, 1.2145e-02, -1.7309e-02, 1.7690e-03,\n -2.2219e-02, 1.1317e-02, 2.1110e-02, 1.3439e-02, 1.2014e-03,\n -8.4470e-03, 4.2558e-03, -2.9272e-03, -2.1347e-02, 4.6297e-03,\n -3.0310e-03, -2.7247e-02, 3.7147e-04, -3.5352e-02, -1.5328e-03,\n 2.6373e-02, 4.7613e-02, 1.9729e-02, -9.9012e-04, 5.3960e-02,\n -3.3687e-02, -2.6935e-02, 3.6678e-03, -3.7192e-03, 2.6175e-02,\n -1.1588e-02, 6.2963e-03, -1.5463e-02, -3.5438e-02, -1.3963e-03,\n 1.2298e-02, 1.2093e-02, -1.0914e-02, 4.3178e-02, -2.0362e-02,\n 2.1679e-04, 1.6105e-03, -5.1843e-03, -1.3798e-02, -7.9718e-03,\n -5.2390e-03, 7.2697e-03, -1.0998e-02, 5.8189e-02, 1.5406e-03,\n 4.5403e-02, 6.4548e-03, 1.3421e-02, 3.3535e-03, -3.1217e-02,\n 1.1657e-02, 6.8176e-03, -1.8686e-02, -1.2587e-02, 8.1082e-03,\n -1.7567e-02, -2.7073e-02, 3.2007e-02, 2.9051e-02, -1.7060e-03,\n -1.2963e-02, -1.1190e-02, 2.2761e-03, -3.2350e-03, 2.4414e-02,\n -3.3076e-02, -3.0831e-02, 5.7410e-03, 2.0274e-02, 4.7875e-02,\n 1.7453e-03, -3.8792e-03, 6.6895e-03, -1.4360e-02, -1.4219e-02,\n -3.4291e-02, 1.2638e-02, -5.5800e-02, 3.8152e-03, -2.1824e-03,\n -1.0928e-02, -9.2300e-03, 8.8051e-03, 2.2906e-02, -2.6843e-03,\n -1.2674e-03, -9.3169e-03, -1.4219e-03, -4.4086e-03, 9.7293e-03,\n -3.0485e-02, 4.2513e-02, -1.0672e-03, -1.3109e-02, -5.0764e-03,\n -1.1176e-02, -4.4431e-03, -1.3416e-02, 3.0360e-02, 8.0234e-03,\n 2.4983e-02, 9.2318e-03, -8.8241e-03, -1.3282e-02, -1.6089e-02,\n 3.2281e-03, 9.8265e-03, -4.8502e-02, -1.4068e-02, -4.7850e-03,\n 2.5528e-02, 1.5373e-03, 5.8712e-04, 2.5432e-02, -2.3121e-02,\n -4.2545e-03, -4.0277e-02, 6.6289e-03, 1.5404e-03, -2.6665e-02,\n -1.6105e-02, 1.9943e-02, 4.7342e-03, -3.6919e-03, -4.7197e-02,\n -2.7225e-02, 9.9290e-03, 5.7264e-04, -1.7051e-02, 1.2034e-02,\n 8.0999e-03, 1.6639e-02, -1.5262e-02, 3.9922e-02, -1.1900e-02,\n -1.0345e-02, 2.4429e-02, 3.0483e-02, -2.2913e-02, -9.2265e-03,\n 2.2993e-02, -2.6279e-02, 6.0952e-03, 1.3378e-02, 7.4427e-03,\n 5.6227e-03, -3.3159e-03, -7.6942e-03, 2.5696e-04, -5.6911e-03,\n 5.2631e-03, 1.4404e-03, -4.9403e-03, -1.8378e-02, 1.9436e-03,\n -1.1532e-02, 1.9465e-02, -5.9944e-02, -4.7005e-03, 2.5399e-02,\n -1.3714e-02, 3.2497e-02, 5.9436e-03, 1.5326e-03, -1.7804e-02,\n 3.9245e-03, -2.5322e-02, -7.6104e-03, -5.9242e-04, 3.0409e-02,\n 9.0816e-03, -9.2248e-02, 7.5686e-02, 4.1823e-02, -2.9851e-02,\n -1.5160e-02, -3.4332e-03, -6.4735e-03, -8.1361e-02, 1.6881e-02,\n -1.2082e-03, -3.3118e-03, -2.6764e-02, -4.6164e-04, 1.1022e-02,\n -1.4051e-02, -1.4151e-02, 1.6059e-02, -3.4576e-03, -3.3733e-02,\n -2.1667e-02, 1.7097e-02, 1.8038e-02, 1.6687e-02, -6.4005e-02,\n 7.5324e-03, -5.1882e-04, -2.4586e-02, -1.0087e-02, -6.8016e-02,\n 1.8813e-02, 1.4735e-02, -3.0618e-02, 2.6177e-02, 6.9380e-04,\n -3.3502e-03, -4.8358e-03, -1.0017e-02, 1.4018e-02, 5.2471e-02,\n 1.5472e-02, 7.6629e-03, -2.3570e-02, 3.7260e-02, 1.3464e-04,\n -3.6019e-03, 2.7480e-02, 2.3311e-02, 1.2863e-02, 2.6285e-02,\n -9.6491e-03, 1.1000e-02, -9.8927e-03, -2.2841e-02, -1.5093e-02,\n 7.6009e-04, -1.4443e-02, 1.7873e-02, -6.3593e-03, -6.2564e-02,\n 5.1140e-03, 1.9793e-02, 1.8376e-02, 1.8684e-02, 1.7836e-02,\n -1.4531e-02, -6.1798e-03, 9.6419e-03, 4.1045e-02, -3.3347e-03,\n 3.9695e-02, -5.7387e-03, -5.7357e-03, -3.5658e-02, 1.0514e-02,\n 3.6286e-03, 4.1179e-03, -1.4851e-02, 1.4852e-02, -3.3220e-02,\n -1.0200e-02, -6.6883e-04, -3.4417e-02, -2.2467e-02, 6.1605e-02,\n -1.5126e-02, 1.4589e-02, 1.8572e-02, 1.5571e-02, -1.2520e-02,\n 1.3532e-02, -4.5546e-03, 2.3759e-02, -1.3373e-02, -2.3115e-02,\n 6.9955e-03, 3.9246e-02, -1.2681e-03, 9.4552e-03, -1.1840e-02,\n 2.5156e-02, -2.3975e-02, -8.1251e-04, 2.3395e-02, 8.8698e-03,\n -2.0231e-02, 1.7868e-02, -5.4942e-02, 5.8737e-03, 2.1004e-02,\n 9.1513e-03, 9.1182e-03, 4.1792e-03, 6.6612e-03, 2.0673e-02,\n -1.1427e-03, -1.9380e-03, -1.7823e-02, 1.7798e-02, 1.0322e-02,\n 2.3060e-02, -1.8720e-02, -2.0595e-03, -1.9326e-02, -2.0382e-02,\n 2.1937e-02, -2.6470e-02, -9.2237e-03, -1.1964e-02, -1.0305e-03,\n -2.2473e-02, -2.9618e-03, 2.6682e-02, 3.3177e-02, -1.9284e-02,\n -5.8234e-03, -1.0979e-02, 1.1873e-02, -2.1987e-03, 1.8803e-02,\n 8.3817e-03, 1.1779e-02, 1.1705e-02, -6.9298e-03, -2.0914e-02,\n 2.8827e-02, -1.1707e-02, 1.0348e-02, -3.6553e-03, -1.0770e-02,\n 3.4258e-03, -1.9562e-02, 3.2797e-03, -5.0656e-03, 2.0330e-02,\n 8.7339e-03, 4.6241e-02, -7.6635e-03, -1.0513e-02, 2.6546e-02,\n 1.2827e-02, -1.0286e-02, 1.1086e-02, -3.0737e-02, -7.6194e-04,\n 1.5436e-02, 5.0965e-03, 1.6364e-02, -2.9667e-02, -1.0014e-02,\n 4.3811e-02, 1.6028e-02, -1.2909e-02, 1.8865e-03, -7.4355e-03,\n -2.4094e-02, -3.0941e-02, -8.8609e-03, -9.1083e-03, 2.3242e-02,\n 2.0226e-04, -9.5811e-03, 2.8283e-02, -2.6738e-02, -2.6784e-02,\n -9.7209e-03, -6.0425e-03, -3.8321e-03, 9.1857e-03, -1.3853e-02,\n -1.6442e-02, 2.2919e-02, 1.7862e-03, 3.1908e-03, 1.1635e-02,\n -1.3430e-02, -1.0587e-02, 8.0067e-03, 2.5577e-05, 1.5558e-02,\n 3.5322e-03, 7.4958e-02, 2.5128e-02, 1.8872e-02, 2.1498e-02,\n 1.9866e-02, 2.6312e-02, 1.6370e-02, 2.2606e-02, 1.0131e-02,\n 9.2258e-04, 6.6672e-03, -1.5002e-02, 1.2376e-02, 2.4676e-02,\n 7.5654e-03, -8.8712e-03, -2.1376e-02, 1.2794e-02, -1.0227e-02,\n -7.3344e-03, 2.9793e-03, -5.0934e-04, 2.1110e-02, 1.7151e-02,\n -8.0944e-03, 8.9908e-03, -3.3691e-02, 1.9736e-02, -2.5486e-02,\n -4.2898e-02, -2.0234e-02, -4.8331e-02, 2.9667e-02, -1.4585e-02,\n -4.3978e-02, -5.7860e-02, 1.6403e-02, 5.6316e-03, -6.4882e-02,\n 1.1834e-02, -2.7115e-02, 2.9686e-03, 1.2876e-02, -9.9546e-04,\n -1.6113e-02, -4.7741e-02, -6.6454e-04, -1.1206e-02, -1.9771e-03,\n -1.0625e-02, 1.5796e-02, 4.7027e-02, -1.3964e-03, -2.2486e-02,\n -2.0015e-03, -1.8101e-02, 8.5380e-03, 2.2448e-02, 5.0707e-02,\n 5.9743e-03, -3.9393e-02, 1.1312e-02, -2.3972e-02, 8.2880e-03,\n 3.1250e-02, 3.1875e-02, -3.4938e-02, 2.7211e-03, 1.3084e-02,\n -1.9125e-02, -1.9334e-02, -1.9733e-02, 2.5985e-02, -5.9557e-03,\n -7.8828e-03, 9.6630e-03, 6.2458e-03, -6.6385e-03, 2.4627e-02,\n -1.5185e-02, -1.4953e-02, 3.5917e-02, -3.2107e-03, 8.4770e-03,\n -6.5643e-03, 2.8805e-02, -3.6618e-03, -1.4122e-02, -4.9166e-03,\n -1.3990e-02, -4.2672e-02, 2.4505e-02, 3.7491e-03, 4.9925e-02,\n 8.0827e-03, -4.4023e-02, -1.2641e-02, -9.0188e-02, 4.6959e-02,\n 1.2931e-02, 2.2619e-02, 2.8009e-02, -4.3736e-03, 3.6495e-02,\n 7.1013e-03, -7.5271e-03, 3.3577e-02, 1.0479e-02, -2.7111e-02,\n 4.9822e-03, 1.8423e-02, -9.8045e-03, 2.5129e-02, -1.4757e-02,\n 3.2123e-02, 8.8985e-03, -1.8683e-03, 2.4873e-02, 4.1968e-03,\n -4.1777e-02, -1.1754e-02, 5.9716e-03, -5.0463e-03, -4.3165e-02,\n 3.7644e-02, -1.3895e-02], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([0.0034, 0.0058, 0.0029, 0.0024, 0.0034, 0.0053, 0.0022, 0.0036, 0.0042,\n 0.0045, 0.0025, 0.0035, 0.0063, 0.0023, 0.0055, 0.0067, 0.0043, 0.0040,\n 0.0034, 0.0046, 0.0032, 0.0069, 0.0047, 0.0035, 0.0019, 0.0062, 0.0038,\n 0.0050, 0.0056, 0.0044, 0.0041, 0.0082, 0.0029, 0.0101, 0.0032, 0.0055,\n 0.0035, 0.0032, 0.0041, 0.0057, 0.0035, 0.0042, 0.0041, 0.0034, 0.0065,\n 0.0039, 0.0026, 0.0033, 0.0044, 0.0040, 0.0050, 0.0030, 0.0029, 0.0034,\n 0.0035, 0.0033, 0.0035, 0.0020, 0.0029, 0.0036, 0.0052, 0.0029, 0.0070,\n 0.0045, 0.0058, 0.0063, 0.0032, 0.0035, 0.0037, 0.0033, 0.0047, 0.0024,\n 0.0035, 0.0078, 0.0051, 0.0021, 0.0062, 0.0032, 0.0038, 0.0054, 0.0049,\n 0.0066, 0.0024, 0.0059, 0.0043, 0.0053, 0.0036, 0.0066, 0.0050, 0.0029,\n 0.0079, 0.0050, 0.0036, 0.0036, 0.0048, 0.0021, 0.0027, 0.0039, 0.0021,\n 0.0030, 0.0037, 0.0046, 0.0037, 0.0034, 0.0027, 0.0053, 0.0061, 0.0024,\n 0.0042, 0.0054, 0.0041, 0.0042, 0.0058, 0.0029, 0.0039, 0.0028, 0.0020,\n 0.0027, 0.0071, 0.0053, 0.0046, 0.0092, 0.0064, 0.0051, 0.0067, 0.0052,\n 0.0026, 0.0018, 0.0059, 0.0059, 0.0057, 0.0034, 0.0045, 0.0035, 0.0024,\n 0.0014, 0.0031, 0.0045, 0.0014, 0.0056, 0.0049, 0.0036, 0.0048, 0.0034,\n 0.0045, 0.0039, 0.0031, 0.0029, 0.0023, 0.0044, 0.0041, 0.0029, 0.0043,\n 0.0035, 0.0034, 0.0024, 0.0041, 0.0038, 0.0048, 0.0022, 0.0035, 0.0026,\n 0.0030, 0.0035, 0.0036, 0.0033, 0.0028, 0.0030, 0.0056, 0.0063, 0.0062,\n 0.0046, 0.0061, 0.0039, 0.0040, 0.0026, 0.0059, 0.0037, 0.0043, 0.0039,\n 0.0050, 0.0030, 0.0024, 0.0041, 0.0042, 0.0054, 0.0062, 0.0049, 0.0051,\n 0.0042, 0.0043, 0.0027, 0.0033, 0.0046, 0.0038, 0.0069, 0.0025, 0.0032,\n 0.0026, 0.0036, 0.0024, 0.0021, 0.0039, 0.0044, 0.0051, 0.0046, 0.0091,\n 0.0039, 0.0017, 0.0039, 0.0032, 0.0021, 0.0063, 0.0026, 0.0043, 0.0050,\n 0.0036, 0.0046, 0.0029, 0.0045, 0.0033, 0.0042, 0.0049, 0.0043, 0.0055,\n 0.0027, 0.0015, 0.0036, 0.0055, 0.0027, 0.0028, 0.0031, 0.0044, 0.0057,\n 0.0043, 0.0019, 0.0027, 0.0033, 0.0044, 0.0038, 0.0013, 0.0031, 0.0044,\n 0.0048, 0.0048, 0.0032, 0.0033, 0.0038, 0.0040, 0.0054, 0.0015, 0.0027,\n 0.0048, 0.0043, 0.0039, 0.0066, 0.0030, 0.0043, 0.0035, 0.0063, 0.0040,\n 0.0048, 0.0046, 0.0050, 0.0032, 0.0024, 0.0037, 0.0026, 0.0030, 0.0069,\n 0.0045, 0.0038, 0.0035, 0.0051, 0.0046, 0.0023, 0.0030, 0.0035, 0.0054,\n 0.0046, 0.0027, 0.0064, 0.0059, 0.0018, 0.0014, 0.0036, 0.0033, 0.0044,\n 0.0024, 0.0044, 0.0025, 0.0022, 0.0042, 0.0036, 0.0034, 0.0045, 0.0031,\n 0.0017, 0.0050, 0.0075, 0.0031, 0.0022, 0.0030, 0.0040, 0.0048, 0.0050,\n 0.0048, 0.0037, 0.0057, 0.0033, 0.0067, 0.0039, 0.0055, 0.0045, 0.0043,\n 0.0040, 0.0038, 0.0049, 0.0035, 0.0045, 0.0034, 0.0043, 0.0030, 0.0042,\n 0.0029, 0.0066, 0.0041, 0.0034, 0.0032, 0.0037, 0.0035, 0.0040, 0.0032,\n 0.0037, 0.0060, 0.0043, 0.0024, 0.0017, 0.0040, 0.0045, 0.0054, 0.0080,\n 0.0021, 0.0034, 0.0042, 0.0050, 0.0032, 0.0026, 0.0040, 0.0026, 0.0021,\n 0.0048, 0.0031, 0.0029, 0.0027, 0.0028, 0.0023, 0.0042, 0.0045, 0.0027,\n 0.0033, 0.0080, 0.0024, 0.0100, 0.0040, 0.0044, 0.0015, 0.0030, 0.0032,\n 0.0031, 0.0043, 0.0040, 0.0038, 0.0028, 0.0024, 0.0053, 0.0047, 0.0041,\n 0.0066, 0.0041, 0.0031, 0.0015, 0.0024, 0.0020, 0.0047, 0.0069, 0.0049,\n 0.0030, 0.0030, 0.0079, 0.0028, 0.0031, 0.0024, 0.0036, 0.0048, 0.0038,\n 0.0034, 0.0035, 0.0062, 0.0105, 0.0022, 0.0029, 0.0025, 0.0033, 0.0033,\n 0.0062, 0.0033, 0.0022, 0.0036, 0.0038, 0.0030, 0.0027, 0.0021, 0.0047,\n 0.0046, 0.0057, 0.0036, 0.0081, 0.0037, 0.0065, 0.0057, 0.0037, 0.0073,\n 0.0026, 0.0053, 0.0026, 0.0029, 0.0040, 0.0023, 0.0031, 0.0034, 0.0093,\n 0.0063, 0.0035, 0.0039, 0.0026, 0.0061, 0.0077, 0.0026, 0.0038, 0.0068,\n 0.0038, 0.0023, 0.0040, 0.0070, 0.0030, 0.0023, 0.0030, 0.0054, 0.0028,\n 0.0039, 0.0049, 0.0039, 0.0036, 0.0040, 0.0052, 0.0055, 0.0023, 0.0043,\n 0.0038, 0.0023, 0.0042, 0.0039, 0.0043, 0.0062, 0.0021, 0.0034, 0.0067,\n 0.0042, 0.0062, 0.0032, 0.0036, 0.0033, 0.0035, 0.0050, 0.0020, 0.0072,\n 0.0022, 0.0031, 0.0040, 0.0044, 0.0044, 0.0039, 0.0092, 0.0056, 0.0036,\n 0.0033, 0.0067, 0.0045, 0.0047, 0.0030, 0.0044, 0.0032, 0.0032, 0.0040,\n 0.0063, 0.0031, 0.0066, 0.0034, 0.0040, 0.0037, 0.0019, 0.0023, 0.0034,\n 0.0020, 0.0028, 0.0039, 0.0007, 0.0044, 0.0069, 0.0030, 0.0044],\n device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(12520.)",
|
| 17 |
+
"exp_avg": "tensor([-1.1180e-02, -3.5444e-03, -7.5523e-03, -3.1396e-04, 8.3092e-03,\n 2.1934e-03, -7.2721e-03, 3.1607e-03, -9.3607e-04, 4.6839e-03,\n 2.7613e-03, 8.0027e-03, 1.4340e-03, -5.7757e-03, -7.6407e-04,\n -3.3509e-03, 3.1819e-03, 3.9284e-03, 3.3233e-03, -7.2177e-04,\n -6.1101e-04, 6.7832e-04, 2.7663e-03, -7.4092e-03, -4.0944e-04,\n -1.2691e-04, -7.1087e-03, -1.8791e-04, -5.1824e-03, 4.1880e-04,\n 3.7081e-03, 5.2575e-03, 3.3580e-03, -1.0954e-03, 1.3516e-02,\n -3.6586e-03, -4.9542e-03, 6.5436e-04, 4.3729e-05, 5.2623e-03,\n -3.1464e-03, 1.7913e-03, -5.5525e-03, -7.3201e-03, -5.0845e-04,\n 2.3535e-03, 2.6742e-03, -7.3928e-04, 4.9575e-03, -3.1135e-03,\n 6.1364e-04, 9.3869e-04, -1.0378e-04, -2.3639e-03, -2.6686e-03,\n -2.0390e-03, -1.2529e-04, -4.0472e-03, 2.4532e-02, -1.1118e-03,\n 6.8383e-03, 1.8850e-03, 2.6339e-03, 7.8248e-04, -4.8861e-03,\n 2.2835e-03, 1.2425e-03, -2.9064e-03, -2.2630e-03, 1.1097e-03,\n 5.3146e-04, -7.5587e-03, 5.5932e-03, 3.5940e-03, -2.2823e-04,\n -6.2207e-03, -1.2476e-03, 1.7873e-03, -1.6495e-04, 3.9088e-03,\n -5.5427e-03, -6.2092e-03, 1.8382e-03, 1.9948e-03, 1.2367e-02,\n -6.9902e-04, -5.0914e-04, -1.8373e-04, -1.4628e-03, -4.6816e-03,\n -3.2372e-03, 3.4847e-03, -5.7880e-03, 4.1611e-04, -1.6977e-04,\n -2.5412e-03, -2.4731e-03, 1.7565e-03, 8.2815e-03, -7.9405e-04,\n -2.6268e-04, -4.6616e-04, -2.4186e-04, -5.0113e-04, 3.1480e-03,\n -3.6497e-03, 7.9389e-03, -6.0516e-04, -1.5282e-03, -8.7467e-04,\n -2.2389e-03, -8.0566e-05, -1.7271e-03, 2.8552e-03, 6.0012e-04,\n 9.0318e-03, 1.9808e-03, -2.1478e-03, -2.8567e-03, -3.5022e-03,\n 9.3520e-04, 5.3256e-04, -6.6389e-03, -3.8099e-03, -2.5187e-04,\n 2.6019e-03, 2.3394e-04, -6.4515e-04, 3.3536e-03, -3.9837e-03,\n -3.6785e-04, -9.9746e-03, 4.4879e-04, 1.1014e-03, -7.1575e-03,\n -1.4154e-02, 4.5943e-03, 1.3575e-03, 4.9234e-04, -8.6894e-03,\n -6.5973e-03, 1.1993e-03, 4.1399e-04, -3.5593e-03, 2.5502e-03,\n 1.8519e-03, 2.8592e-03, -4.4508e-03, 1.8311e-02, -2.0946e-03,\n -2.5616e-03, 1.4600e-02, 7.0507e-03, -6.3042e-03, -1.7948e-03,\n 1.1530e-02, -3.2101e-03, 1.3660e-03, 2.2205e-03, 1.6632e-03,\n 9.2512e-04, -7.9409e-04, -1.8097e-03, 4.2547e-04, -1.5707e-03,\n 4.7725e-04, -1.0932e-04, -1.5900e-03, -2.4929e-03, -5.9703e-05,\n -1.9553e-03, 2.9791e-03, -5.2424e-03, -1.6863e-03, 3.3107e-03,\n -2.9658e-03, 5.1048e-03, 1.0336e-03, -2.1781e-04, -1.6472e-03,\n -1.3148e-03, -1.0807e-02, -1.0424e-03, 7.1391e-04, 6.6318e-03,\n 5.4243e-04, -1.0156e-02, 7.3770e-03, 9.4745e-03, -5.3446e-03,\n -2.5499e-03, -1.9064e-03, -5.1073e-04, -1.8293e-02, 4.1387e-03,\n -1.0319e-04, -1.3958e-03, -3.7334e-03, -1.0323e-03, 2.8522e-03,\n -3.6657e-03, -4.1255e-03, 3.0260e-03, -1.0174e-03, -4.7350e-03,\n -3.5538e-03, 1.7754e-03, 3.2708e-03, 8.3514e-03, -7.0776e-03,\n 1.2307e-03, 1.3676e-04, -2.8628e-03, -3.4789e-03, -1.2031e-02,\n 4.5171e-03, 1.0675e-03, -8.0993e-03, 8.9032e-03, 3.9801e-04,\n -1.7084e-04, -6.7317e-04, -1.7238e-03, 1.3948e-03, 9.9355e-03,\n 4.4692e-03, 6.8518e-03, -3.8938e-03, 4.3775e-03, -4.1053e-04,\n -4.3791e-04, 5.3487e-03, 2.5107e-03, 2.0787e-03, 3.6183e-03,\n -3.3919e-03, 3.3432e-03, -2.7832e-03, -3.5338e-03, -3.6294e-03,\n -9.8064e-04, -3.1487e-03, 3.4603e-03, -2.8228e-03, -1.2878e-02,\n 1.4082e-03, 7.3881e-03, 6.8876e-03, 3.1127e-03, 3.5155e-03,\n -1.1109e-02, -1.6721e-03, 1.4410e-03, 5.3125e-03, -7.7418e-04,\n 3.6284e-03, -2.3940e-03, -1.9046e-03, -1.1394e-02, 7.5793e-04,\n 8.6041e-04, 1.9388e-03, -2.4222e-03, 2.3913e-03, -5.1146e-03,\n -3.0179e-03, -1.5554e-04, -9.5646e-03, -6.1881e-03, 6.0466e-03,\n -2.7591e-03, 2.9475e-03, 3.7532e-03, 2.1972e-03, -1.4611e-03,\n 3.7652e-03, -1.8562e-03, 4.1938e-03, -2.2360e-03, -5.5710e-03,\n 2.3678e-03, 5.9842e-03, 3.9484e-04, 7.5572e-03, -8.1632e-03,\n 3.5008e-03, -5.8960e-03, -1.2240e-04, 6.5818e-03, 1.4321e-03,\n -5.8790e-03, 9.6106e-03, -7.6317e-03, -1.0740e-04, 4.0479e-03,\n 2.7601e-03, 1.1024e-03, 3.6984e-03, 1.1804e-03, 2.7603e-03,\n 2.9154e-03, -6.4779e-05, -7.1293e-03, 3.3901e-03, 1.2828e-03,\n 3.0808e-03, -4.4452e-03, 1.2396e-03, -3.1317e-03, -3.6915e-03,\n 3.5666e-03, -6.0522e-03, -1.5807e-03, -2.8906e-03, 8.9714e-06,\n -5.1965e-03, 9.3649e-04, 4.8702e-03, 7.3294e-03, -3.0377e-03,\n -1.3297e-03, -2.3694e-03, 1.6645e-03, -1.8852e-03, 3.3348e-03,\n 1.0153e-03, 4.0438e-03, 3.1492e-03, -1.9782e-03, -4.8265e-03,\n 6.7159e-03, -1.7107e-03, 1.0126e-03, -9.0692e-04, -2.3409e-03,\n 2.1992e-04, -5.6572e-03, 1.7308e-03, -3.2756e-04, 3.0308e-03,\n 1.0654e-03, 4.2442e-03, -5.3514e-03, -2.1844e-03, 5.5725e-03,\n 3.8837e-04, -1.6772e-03, 2.3128e-03, -4.3854e-03, -9.7492e-04,\n 6.1909e-03, 8.5757e-04, 4.6740e-03, -4.8544e-03, -2.1750e-04,\n 1.0046e-02, 4.3965e-03, -1.2274e-03, -9.0899e-04, -2.3756e-03,\n -6.3821e-03, -4.7182e-03, -5.8265e-03, -1.3583e-03, 4.4413e-03,\n 3.8997e-04, -1.4441e-02, 3.8992e-03, -8.2670e-03, -8.0414e-03,\n -2.0917e-03, 4.4220e-04, -6.4782e-04, -2.6763e-04, -2.0605e-03,\n -4.0206e-03, 2.1835e-03, -5.3440e-04, 2.6746e-04, 7.1215e-04,\n -3.2916e-03, -5.9156e-03, 1.9412e-03, 8.1923e-04, 1.1748e-03,\n -2.8107e-04, 7.7895e-03, 8.7235e-03, 6.3108e-03, 3.5338e-03,\n 4.8249e-03, 4.8356e-03, 5.1652e-03, 4.2728e-03, 1.3193e-03,\n -1.2651e-04, 2.2804e-03, -2.3097e-03, 5.5067e-04, 3.6729e-03,\n 3.5464e-03, -1.3064e-03, -6.8030e-03, 2.6288e-03, -2.3863e-03,\n -1.2934e-03, 1.0344e-03, -1.6342e-03, 2.9073e-03, 2.3095e-03,\n -1.4261e-03, 8.2179e-04, -9.0205e-03, 2.4381e-03, -5.1978e-03,\n -3.4890e-03, -3.0046e-03, -3.5992e-03, 4.3612e-03, -1.3081e-03,\n -4.1804e-03, -1.0918e-02, 3.0772e-03, 2.6523e-03, -8.9417e-03,\n 3.8307e-03, -6.3304e-03, 5.5723e-04, 1.6720e-03, -2.7034e-03,\n -4.0839e-03, -4.6264e-03, -1.0191e-03, -2.0276e-03, 6.4449e-04,\n -1.8876e-03, 1.4530e-03, 4.9138e-03, -8.0503e-04, -4.2316e-03,\n -2.7361e-04, -4.0336e-03, 2.5780e-03, 4.4277e-03, 6.1024e-03,\n 4.3336e-04, -6.8057e-03, 3.3254e-03, -4.2181e-03, 2.4340e-03,\n 4.4249e-03, 3.9499e-03, -8.3969e-03, 3.7351e-04, 2.5703e-03,\n -1.1077e-03, -4.4682e-03, -5.9056e-03, 4.1108e-03, -1.9352e-03,\n -3.3676e-03, 2.6245e-04, 9.0140e-04, -3.1480e-04, 4.9073e-03,\n -2.4652e-03, -2.1936e-03, 3.1118e-03, 3.6491e-04, 1.1020e-03,\n -3.7231e-04, 5.4302e-03, -1.9602e-03, -2.7188e-03, -2.2788e-03,\n -8.7818e-03, -4.2937e-03, 5.4648e-03, 6.4389e-05, 9.9780e-03,\n 1.5709e-03, -7.6470e-03, -3.0353e-03, -1.1005e-02, 4.2810e-03,\n 3.8448e-03, 5.5827e-03, 3.3306e-03, -6.0299e-04, 5.8831e-03,\n 1.2538e-03, -8.9997e-04, 1.1294e-02, 9.7651e-04, -6.7213e-03,\n 1.0850e-03, 6.9323e-03, -1.5865e-03, 3.7449e-03, -1.5945e-03,\n 6.9426e-03, 3.7173e-03, -9.7204e-04, 4.1612e-03, 1.0865e-03,\n -1.2129e-02, -2.5811e-03, -7.5798e-03, -1.7299e-03, -4.4406e-03,\n 1.3287e-02, -2.8542e-03], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([2.1662e-04, 9.9610e-05, 1.9793e-04, 1.6106e-04, 2.1323e-04, 1.2263e-04,\n 4.3911e-04, 6.4398e-05, 6.4934e-05, 1.0881e-04, 3.5256e-04, 9.0310e-05,\n 9.9103e-05, 1.8079e-04, 6.1111e-05, 1.2084e-04, 1.6156e-04, 1.1171e-04,\n 1.0953e-04, 1.6893e-04, 5.5737e-05, 9.0631e-05, 9.5286e-05, 3.1662e-04,\n 3.0057e-04, 1.0226e-04, 1.7513e-04, 1.3975e-04, 1.2567e-04, 1.4834e-04,\n 1.5826e-04, 1.1870e-04, 1.7425e-04, 1.2931e-04, 1.9826e-04, 6.1916e-05,\n 1.1361e-04, 1.1535e-04, 2.1788e-04, 2.7717e-04, 2.0558e-04, 2.0143e-04,\n 3.2457e-04, 1.3374e-04, 7.2545e-05, 2.4611e-04, 2.1587e-04, 1.7615e-04,\n 7.9717e-05, 1.1309e-04, 1.2541e-04, 2.0420e-04, 1.9856e-04, 7.9802e-05,\n 2.1259e-04, 2.4717e-04, 5.8234e-05, 2.7154e-04, 5.0008e-04, 2.3498e-04,\n 1.1442e-04, 2.3151e-04, 2.2771e-04, 1.3230e-04, 2.6064e-04, 1.9825e-04,\n 1.1768e-04, 1.1959e-04, 8.5278e-05, 1.1901e-04, 1.1095e-04, 1.9597e-04,\n 1.0094e-04, 8.2411e-05, 1.5396e-04, 3.4813e-04, 8.5922e-05, 3.5492e-04,\n 7.5784e-05, 6.4856e-05, 1.3445e-04, 2.1261e-04, 2.1245e-04, 5.4718e-05,\n 2.7176e-04, 1.4718e-04, 2.1884e-04, 9.2604e-05, 4.8861e-05, 1.8041e-04,\n 8.1846e-05, 2.0843e-04, 7.5179e-05, 1.7671e-04, 1.2993e-04, 4.1585e-04,\n 1.2773e-04, 8.8355e-05, 2.2937e-04, 5.3564e-05, 5.8601e-05, 1.2954e-04,\n 7.1637e-05, 2.0600e-04, 1.3530e-04, 7.5373e-05, 1.6875e-04, 1.1698e-04,\n 1.4079e-04, 1.0299e-04, 1.8214e-04, 6.1480e-05, 1.1268e-04, 7.6026e-05,\n 1.0728e-04, 3.9892e-04, 2.9670e-04, 1.1029e-04, 1.8956e-04, 2.0724e-04,\n 9.0006e-05, 8.4686e-05, 1.8163e-04, 3.0351e-04, 1.0207e-04, 1.3629e-04,\n 8.2415e-05, 2.3098e-04, 8.6688e-05, 1.4059e-04, 9.7301e-05, 1.4337e-04,\n 1.2049e-04, 2.1220e-04, 2.4741e-04, 1.0634e-03, 1.3210e-04, 1.0503e-04,\n 5.8267e-04, 2.0411e-04, 1.5076e-04, 6.7351e-05, 6.4223e-05, 1.4097e-04,\n 2.1520e-04, 2.3976e-04, 1.2737e-04, 2.5054e-04, 6.0851e-04, 2.0597e-04,\n 1.1757e-04, 1.0839e-03, 2.1223e-04, 2.6675e-04, 1.0591e-04, 5.8862e-04,\n 1.1370e-04, 3.6508e-04, 1.1528e-04, 1.0863e-04, 1.0575e-04, 4.2938e-04,\n 2.5326e-04, 1.8716e-04, 1.3120e-04, 1.4383e-04, 4.4551e-04, 1.6581e-04,\n 8.5254e-05, 1.0198e-04, 1.0335e-04, 7.6033e-05, 1.1561e-04, 2.0720e-04,\n 1.0451e-04, 2.2061e-04, 1.2716e-04, 7.7532e-05, 1.0719e-04, 1.0367e-04,\n 2.7400e-04, 2.6264e-04, 4.4992e-04, 1.0360e-04, 1.8070e-04, 7.0597e-05,\n 1.3010e-04, 6.7733e-05, 1.8310e-04, 1.1337e-04, 1.7605e-04, 7.3859e-05,\n 9.1261e-05, 2.8240e-04, 1.5843e-04, 6.4861e-05, 5.2224e-04, 7.9399e-05,\n 5.1881e-04, 1.4267e-04, 2.2770e-04, 1.6963e-04, 2.3984e-04, 9.4702e-05,\n 1.0443e-04, 1.0107e-04, 1.0504e-04, 7.9052e-05, 3.2396e-04, 8.2927e-05,\n 3.5732e-04, 4.2989e-04, 1.2092e-04, 2.2069e-04, 2.3105e-04, 1.9043e-04,\n 7.8439e-05, 2.6992e-04, 3.8716e-04, 5.9943e-05, 2.7654e-04, 2.3423e-04,\n 7.0129e-05, 6.6526e-05, 1.9704e-04, 5.6616e-04, 5.9158e-04, 1.0228e-04,\n 8.2570e-05, 1.3997e-04, 1.1161e-04, 1.2278e-04, 1.1020e-04, 7.3339e-05,\n 1.0887e-04, 3.3136e-04, 3.3003e-04, 2.1773e-04, 7.0781e-05, 1.4377e-04,\n 1.3802e-03, 1.4618e-04, 1.7856e-04, 2.5268e-04, 2.4219e-04, 2.1408e-04,\n 2.2963e-04, 2.5439e-04, 1.3844e-04, 1.3088e-04, 7.2493e-04, 3.3653e-04,\n 8.0284e-05, 1.0148e-04, 1.8286e-04, 1.0736e-04, 2.7016e-04, 2.8440e-04,\n 3.4460e-04, 1.6608e-04, 1.9445e-04, 1.5608e-04, 8.4017e-05, 2.1709e-04,\n 2.1517e-04, 2.7474e-04, 3.5552e-04, 3.5860e-04, 2.8846e-04, 6.7758e-05,\n 1.4089e-04, 2.4477e-04, 2.6057e-04, 4.9538e-05, 1.0457e-04, 2.4137e-04,\n 2.3395e-04, 1.0911e-04, 8.1589e-05, 4.1077e-04, 1.1340e-04, 1.2400e-04,\n 1.0337e-04, 7.4466e-04, 5.3197e-04, 8.8300e-05, 9.9812e-05, 9.8527e-05,\n 2.3411e-04, 4.1352e-04, 2.9294e-04, 5.9548e-04, 1.2493e-04, 1.1768e-04,\n 1.0649e-04, 1.4332e-04, 1.1550e-04, 7.3172e-04, 2.2278e-04, 1.1552e-04,\n 1.0980e-04, 1.8469e-04, 1.8446e-04, 1.4794e-04, 2.3535e-04, 6.6090e-05,\n 1.7952e-04, 1.2012e-04, 1.3588e-04, 1.1554e-04, 1.7365e-04, 2.2801e-04,\n 1.0919e-04, 3.0133e-04, 7.2609e-05, 1.9793e-04, 1.3066e-04, 1.2191e-04,\n 1.8132e-04, 7.1924e-05, 1.3512e-04, 1.7548e-04, 1.3216e-04, 1.8689e-04,\n 1.2063e-04, 1.1903e-04, 3.7374e-04, 3.6043e-04, 2.0809e-04, 2.6052e-04,\n 1.1055e-04, 1.2374e-04, 8.7139e-05, 1.4573e-04, 1.6610e-04, 4.6867e-05,\n 1.4282e-04, 3.5092e-04, 7.8190e-05, 1.2637e-04, 8.6752e-05, 1.0037e-04,\n 7.2115e-04, 1.2185e-04, 7.3007e-05, 7.7492e-05, 1.2505e-04, 1.5231e-04,\n 1.0641e-04, 4.4499e-04, 2.7578e-04, 9.9778e-05, 1.1459e-04, 1.3935e-04,\n 1.1706e-04, 2.1767e-04, 2.8349e-04, 1.9223e-04, 6.3614e-05, 2.9293e-04,\n 2.8205e-04, 1.4150e-04, 1.9727e-04, 1.1284e-04, 9.6297e-05, 7.8502e-05,\n 1.4450e-03, 7.1024e-05, 1.8766e-04, 3.0881e-04, 8.8324e-05, 7.7117e-05,\n 6.5159e-05, 2.2765e-04, 2.2411e-04, 9.7198e-05, 7.2703e-05, 1.6067e-04,\n 1.2053e-04, 1.1041e-04, 6.5134e-05, 6.3087e-04, 2.9163e-04, 1.8672e-04,\n 1.2218e-04, 7.0083e-05, 7.0071e-05, 3.2205e-04, 2.5784e-04, 1.3405e-04,\n 2.1786e-04, 1.1997e-04, 2.7266e-04, 1.2056e-04, 1.0158e-04, 1.4569e-04,\n 9.5382e-05, 1.2566e-04, 1.1132e-04, 1.9317e-04, 2.6112e-04, 1.0603e-04,\n 2.8294e-04, 7.8806e-05, 2.5466e-04, 8.9903e-05, 1.3298e-04, 3.0976e-04,\n 1.3626e-04, 1.5768e-04, 1.3433e-04, 1.2915e-04, 2.2107e-04, 8.9279e-05,\n 8.8730e-05, 6.9262e-05, 1.7540e-04, 9.4491e-05, 1.7249e-04, 1.1421e-04,\n 7.5536e-05, 1.3230e-04, 9.3897e-05, 3.9138e-04, 1.2827e-04, 2.9959e-04,\n 2.2672e-04, 1.8016e-04, 6.5076e-05, 3.4649e-04, 1.6824e-04, 7.8072e-05,\n 1.7569e-04, 1.7231e-04, 1.2665e-04, 9.3953e-05, 8.6530e-05, 1.5499e-04,\n 1.8630e-04, 1.2632e-04, 2.0043e-04, 1.5013e-04, 5.3469e-04, 1.6266e-04,\n 1.0437e-04, 1.2845e-04, 1.9841e-04, 1.5164e-04, 5.6477e-05, 2.6669e-04,\n 1.1006e-04, 8.8623e-05, 1.6216e-04, 1.8535e-04, 2.2631e-04, 5.3140e-05,\n 2.2873e-04, 2.4505e-04, 1.6450e-04, 2.4773e-04, 3.2124e-04, 3.1531e-04,\n 2.9189e-04, 9.8504e-05, 1.4272e-04, 9.6795e-05, 1.1928e-04, 8.0778e-05,\n 3.3497e-04, 1.0283e-04, 1.6285e-04, 8.9457e-05, 2.4455e-04, 1.8756e-04,\n 1.0095e-04, 6.6436e-04, 1.2774e-04, 1.3173e-04, 2.2144e-04, 2.2862e-04,\n 1.0136e-04, 9.3904e-05, 2.6081e-04, 3.0814e-04, 1.0480e-04, 3.8659e-04,\n 2.4903e-04, 1.3065e-04, 7.3117e-05, 1.5566e-04, 1.6721e-04, 8.4570e-05,\n 2.9091e-04, 1.4324e-04, 1.3869e-04, 2.1449e-04, 3.2882e-04, 6.5234e-05,\n 3.0541e-04, 7.3490e-05, 7.7218e-05, 4.5111e-04, 1.7537e-04, 2.1052e-04,\n 1.6772e-04, 3.0308e-04, 2.9887e-04, 3.2522e-01, 1.5590e-04, 1.0976e-04,\n 3.1493e-04, 1.0692e-04], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(12520.)",
|
| 22 |
+
"exp_avg": "tensor([-1.0703e-02, -2.8942e-03, -8.3129e-03, 2.7173e-04, 8.1760e-03,\n 2.8985e-03, -3.9763e-03, 4.8547e-03, -2.0685e-03, 6.9446e-03,\n 3.3967e-03, 8.9961e-03, 1.9014e-03, -4.5341e-03, 1.1484e-03,\n -3.4111e-03, 2.6972e-03, 3.5820e-03, 2.5010e-03, -5.4804e-04,\n -1.5767e-03, 7.7475e-04, 1.0495e-03, -5.3573e-03, 5.2728e-04,\n -1.2325e-03, -6.5597e-03, -2.2363e-04, -6.4965e-03, -3.6123e-04,\n 5.0393e-03, 8.1327e-03, 4.2016e-03, -2.3379e-04, 1.3159e-02,\n -5.1751e-03, -5.8559e-03, 8.0799e-04, -4.7718e-04, 5.5059e-03,\n -2.4724e-03, 7.7387e-04, -3.9822e-03, -6.6663e-03, -6.3557e-04,\n 2.8095e-03, 2.8133e-03, -1.9373e-03, 7.3731e-03, -5.0276e-03,\n 4.7021e-04, 8.3201e-04, -1.0550e-03, -3.5117e-03, -1.8464e-03,\n -1.3946e-03, 1.1623e-03, -3.3599e-03, 1.6260e-02, 4.4351e-04,\n 7.9857e-03, 7.6321e-04, 3.1025e-03, 4.0213e-04, -5.5769e-03,\n 2.3584e-03, 1.1738e-03, -3.4983e-03, -2.4583e-03, 1.4639e-03,\n -2.4175e-03, -6.3119e-03, 6.0652e-03, 5.9590e-03, -3.7616e-04,\n -4.3248e-03, -1.0935e-03, 2.3824e-04, -7.7207e-04, 4.0206e-03,\n -6.0180e-03, -5.8550e-03, 1.1471e-03, 3.3303e-03, 1.0793e-02,\n 1.7789e-04, -9.5634e-04, 6.5487e-04, -2.5796e-03, -4.3566e-03,\n -4.2315e-03, 3.1062e-03, -8.3521e-03, 2.6802e-04, -1.9191e-04,\n -2.5805e-03, -2.1381e-03, 1.1273e-03, 5.9573e-03, -4.2724e-04,\n -3.4322e-04, -1.6162e-03, 3.6944e-04, -1.3045e-03, 2.0396e-03,\n -4.7372e-03, 7.7837e-03, -5.7304e-04, -2.5323e-03, -1.2659e-03,\n -3.1497e-03, -3.4246e-04, -2.9873e-03, 3.8862e-03, 1.1016e-03,\n 7.0671e-03, 2.6838e-03, -1.9266e-03, -2.5930e-03, -4.1672e-03,\n 9.5654e-04, 1.0769e-03, -1.0173e-02, -3.2908e-03, -1.1270e-03,\n 4.1682e-03, 1.9226e-04, -1.7840e-04, 4.6184e-03, -4.6771e-03,\n -9.0945e-04, -9.2761e-03, 1.5134e-03, 7.2063e-04, -5.6392e-03,\n -5.2411e-03, 4.4951e-03, 1.2504e-03, -5.7696e-04, -1.2105e-02,\n -6.1997e-03, 1.8727e-03, -2.6521e-05, -4.0427e-03, 2.4022e-03,\n 6.9718e-04, 3.4018e-03, -4.1952e-03, 1.1945e-02, -2.6208e-03,\n -2.6055e-03, 7.0616e-03, 7.0927e-03, -6.5486e-03, -1.6350e-03,\n 7.0306e-03, -6.0157e-03, 1.4211e-03, 2.0913e-03, 1.4870e-03,\n 1.0970e-03, -4.7729e-04, -2.0577e-03, -1.1362e-04, -1.8406e-03,\n 1.0405e-03, 1.0254e-04, -1.3505e-03, -2.3671e-03, -2.1612e-04,\n -1.9930e-03, 3.7271e-03, -1.0111e-02, -9.5926e-04, 4.5455e-03,\n -2.6094e-03, 6.2172e-03, 8.6249e-04, 1.1965e-03, -2.6064e-03,\n -2.0002e-04, -5.7317e-03, -2.7478e-03, 2.5890e-04, 6.9306e-03,\n 1.4068e-03, -1.4852e-02, 1.1811e-02, 8.0699e-03, -5.4762e-03,\n -4.6646e-03, -8.0628e-04, -1.2590e-03, -1.8741e-02, 3.3279e-03,\n -4.8891e-04, -7.0438e-04, -3.9380e-03, -2.2298e-04, 1.9198e-03,\n -3.2727e-03, -3.4590e-03, 3.1317e-03, -9.7189e-04, -6.6553e-03,\n -4.2725e-03, 2.7480e-03, 3.1999e-03, 5.0911e-03, -1.1162e-02,\n 1.8886e-03, -5.4695e-04, -4.7887e-03, -3.0211e-03, -1.1030e-02,\n 2.8673e-03, 2.3236e-03, -6.6595e-03, 6.9219e-03, 3.8587e-04,\n -6.0002e-04, -5.6606e-04, -1.9190e-03, 2.5231e-03, 1.1429e-02,\n 3.9664e-03, 3.2702e-03, -4.5098e-03, 5.6254e-03, -1.0413e-04,\n -8.1918e-04, 5.3777e-03, 3.7159e-03, 2.5227e-03, 4.9716e-03,\n -1.8867e-03, 3.2660e-03, -2.2347e-03, -2.3590e-03, -3.3353e-03,\n -6.8829e-04, -3.2706e-03, 3.0669e-03, -2.1248e-03, -1.2974e-02,\n 6.7792e-04, 5.0356e-03, 2.9887e-03, 3.6869e-03, 3.7039e-03,\n -5.1806e-03, -1.6891e-03, 1.7180e-03, 7.1891e-03, -1.1511e-03,\n 6.2446e-03, -1.3550e-03, -1.1970e-03, -9.4404e-03, 1.0251e-03,\n 9.1439e-04, 1.9823e-03, -2.6484e-03, 2.5180e-03, -5.8033e-03,\n -2.2991e-03, -1.4741e-04, -9.5937e-03, -4.9066e-03, 9.1486e-03,\n -3.5041e-03, 3.3677e-03, 3.7122e-03, 2.7381e-03, -1.9569e-03,\n 3.6743e-03, -1.0446e-03, 4.5596e-03, -2.5267e-03, -5.6266e-03,\n 2.0066e-03, 7.5434e-03, -2.7226e-05, 3.6538e-03, -4.4511e-03,\n 5.1005e-03, -4.9619e-03, -3.8770e-04, 6.2513e-03, 2.2494e-03,\n -4.2809e-03, 6.0151e-03, -1.0087e-02, 8.4354e-04, 3.8739e-03,\n 2.7189e-03, 2.1206e-03, 1.5951e-03, 1.8135e-03, 2.7971e-03,\n -5.3081e-05, -8.3236e-04, -4.2341e-03, 3.9377e-03, 2.1447e-03,\n 3.4220e-03, -3.3863e-03, -4.2358e-04, -3.6739e-03, -3.5900e-03,\n 3.9141e-03, -6.4126e-03, -1.7788e-03, -2.9647e-03, -6.3874e-04,\n -4.7976e-03, 2.4402e-04, 5.7557e-03, 6.3641e-03, -3.6487e-03,\n -8.4133e-04, -2.2269e-03, 2.1446e-03, -5.0522e-04, 3.8102e-03,\n 1.3508e-03, 3.5856e-03, 2.4221e-03, -2.4632e-03, -4.5232e-03,\n 5.2027e-03, -1.8329e-03, 1.5577e-03, -7.9813e-04, -2.6198e-03,\n 9.7329e-04, -4.5511e-03, 1.2243e-03, -4.7343e-04, 3.2673e-03,\n 1.5447e-03, 6.4523e-03, -2.5503e-03, -1.6978e-03, 5.2786e-03,\n 1.7378e-03, -1.3922e-03, 2.5030e-03, -5.5840e-03, -7.4377e-04,\n 4.4865e-03, 1.0271e-03, 3.9482e-03, -5.3763e-03, -1.9038e-03,\n 1.0850e-02, 3.8077e-03, -1.1295e-03, 2.5540e-04, -2.1112e-03,\n -3.7417e-03, -5.3091e-03, -2.1462e-03, -1.4453e-03, 4.4819e-03,\n 8.8914e-04, -4.1564e-03, 4.7144e-03, -6.8560e-03, -6.8304e-03,\n -1.7305e-03, -1.1043e-03, -7.0911e-04, 1.6244e-03, -3.1413e-03,\n -2.2929e-03, 3.4781e-03, 6.4268e-05, 7.4984e-04, 1.8693e-03,\n -2.7368e-03, -3.5811e-03, 1.8834e-03, -2.7200e-04, 2.7873e-03,\n 6.4068e-04, 1.2971e-02, 5.7999e-03, 4.4020e-03, 4.1933e-03,\n 4.6187e-03, 6.3620e-03, 3.8920e-03, 3.6246e-03, 1.4400e-03,\n -8.4925e-05, 8.5451e-04, -2.8243e-03, 1.3152e-03, 3.9966e-03,\n 2.0713e-03, -2.4593e-03, -5.6527e-03, 2.2849e-03, -2.5092e-03,\n -1.3601e-03, 5.0184e-04, -3.7558e-04, 4.1210e-03, 3.7273e-03,\n -1.7090e-03, 2.0484e-03, -7.7457e-03, 4.3197e-03, -3.6580e-03,\n -5.7812e-03, -3.6532e-03, -7.3131e-03, 4.8163e-03, -3.0999e-03,\n -6.8240e-03, -1.0567e-02, 2.7215e-03, 1.3402e-03, -1.3130e-02,\n 2.5690e-03, -5.7812e-03, 6.9343e-04, 2.1149e-03, 1.5452e-04,\n -3.2494e-03, -6.1115e-03, -7.2039e-04, -2.5362e-03, 1.0738e-04,\n -2.2308e-03, 2.6749e-03, 7.7404e-03, -3.2061e-04, -4.2533e-03,\n -8.8279e-04, -3.9667e-03, 1.5944e-03, 5.6925e-03, 9.0675e-03,\n 1.6649e-04, -7.1785e-03, 2.7649e-03, -3.0918e-03, 2.0589e-03,\n 6.4373e-03, 4.5509e-03, -8.5671e-03, 7.1672e-04, 2.6248e-03,\n -2.5318e-03, -5.0277e-03, -4.9177e-03, 4.9472e-03, -1.7538e-03,\n -2.1628e-03, 1.7985e-03, 8.8906e-04, -1.9905e-04, 5.2893e-03,\n -3.3894e-03, -2.5611e-03, 5.3512e-03, -5.4877e-04, 1.7048e-03,\n -1.5856e-03, 5.2233e-03, -4.6739e-04, -2.7061e-03, -9.3177e-04,\n -5.6086e-03, -6.5667e-03, 4.2224e-03, 9.1287e-04, 9.9510e-03,\n 1.8399e-03, -9.0556e-03, -2.9879e-03, -1.6419e-02, 7.6723e-03,\n 2.5744e-03, 4.3279e-03, 5.2202e-03, -1.0966e-03, 6.6483e-03,\n 7.5733e-04, -1.5106e-03, 8.2267e-03, 1.6876e-03, -6.4025e-03,\n 1.4304e-03, 4.6533e-03, -1.6324e-03, 5.9713e-03, -2.7134e-03,\n 6.3670e-03, 2.2764e-03, -2.5342e-04, 4.7187e-03, 6.6316e-04,\n -1.1014e-02, -3.0217e-03, 4.2655e-05, -1.1885e-03, -7.2697e-03,\n 9.5135e-03, -2.5884e-03], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([1.5745e-04, 1.4529e-04, 1.5292e-04, 1.3033e-04, 1.8859e-04, 1.6279e-04,\n 1.8144e-04, 1.1721e-04, 1.1326e-04, 1.4122e-04, 2.0831e-04, 1.2046e-04,\n 1.9255e-04, 1.2722e-04, 1.3680e-04, 2.1332e-04, 1.7108e-04, 1.2605e-04,\n 1.1987e-04, 1.8893e-04, 9.6206e-05, 1.7456e-04, 1.3720e-04, 2.1250e-04,\n 1.3790e-04, 1.9328e-04, 1.6693e-04, 2.0794e-04, 1.8553e-04, 1.9938e-04,\n 1.6751e-04, 2.6260e-04, 1.4503e-04, 2.9169e-04, 1.7758e-04, 1.2903e-04,\n 1.4471e-04, 1.2980e-04, 1.8128e-04, 3.1529e-04, 1.7771e-04, 2.1823e-04,\n 2.6005e-04, 1.3487e-04, 1.6835e-04, 1.9489e-04, 1.5176e-04, 1.5232e-04,\n 1.3766e-04, 1.7503e-04, 1.8289e-04, 1.6068e-04, 1.5238e-04, 1.2902e-04,\n 1.5406e-04, 1.8198e-04, 9.3063e-05, 1.3848e-04, 2.4896e-04, 1.6832e-04,\n 1.6952e-04, 1.4597e-04, 2.9420e-04, 1.6345e-04, 2.6953e-04, 2.7970e-04,\n 1.3493e-04, 1.2943e-04, 1.2878e-04, 1.3201e-04, 1.9452e-04, 1.4833e-04,\n 1.3724e-04, 1.9947e-04, 1.8299e-04, 1.7671e-04, 1.6507e-04, 1.7004e-04,\n 1.2566e-04, 1.2548e-04, 1.5752e-04, 2.3845e-04, 1.4065e-04, 1.4681e-04,\n 2.3527e-04, 1.8237e-04, 2.3011e-04, 1.7098e-04, 1.1440e-04, 1.7619e-04,\n 2.0849e-04, 2.2516e-04, 1.0626e-04, 1.8015e-04, 1.9887e-04, 1.6959e-04,\n 1.2250e-04, 1.4883e-04, 1.3636e-04, 8.1533e-05, 1.0602e-04, 1.6255e-04,\n 1.1546e-04, 1.6421e-04, 1.1829e-04, 1.3487e-04, 2.0303e-04, 1.1651e-04,\n 1.7395e-04, 1.7645e-04, 2.0376e-04, 9.7683e-05, 2.1458e-04, 1.0532e-04,\n 1.2797e-04, 2.1774e-04, 1.5033e-04, 1.2185e-04, 2.6982e-04, 2.4656e-04,\n 1.4309e-04, 1.6479e-04, 2.4369e-04, 2.1606e-04, 2.1422e-04, 1.9685e-04,\n 9.6082e-05, 1.1550e-04, 1.6778e-04, 2.3217e-04, 1.5461e-04, 1.6288e-04,\n 1.5442e-04, 1.5779e-04, 1.3037e-04, 1.9700e-04, 1.3077e-04, 1.5153e-04,\n 1.5856e-04, 2.7737e-04, 2.1083e-04, 1.0150e-04, 1.3371e-04, 1.5031e-04,\n 2.0365e-04, 1.8375e-04, 1.3153e-04, 1.6520e-04, 2.4064e-04, 1.9974e-04,\n 1.4634e-04, 3.1216e-04, 1.9203e-04, 2.3595e-04, 1.2167e-04, 2.4733e-04,\n 1.4030e-04, 2.3998e-04, 1.5661e-04, 9.1792e-05, 1.3092e-04, 1.9944e-04,\n 1.7897e-04, 1.6218e-04, 1.5095e-04, 1.3477e-04, 2.0704e-04, 1.2775e-04,\n 1.3255e-04, 1.8888e-04, 1.9514e-04, 1.4219e-04, 1.8696e-04, 1.8290e-04,\n 1.4988e-04, 1.4194e-04, 1.9438e-04, 1.1314e-04, 1.7247e-04, 1.3673e-04,\n 2.7896e-04, 1.9050e-04, 1.9462e-04, 1.3954e-04, 1.9986e-04, 1.3869e-04,\n 1.7574e-04, 1.2525e-04, 2.1117e-04, 1.5664e-04, 1.8441e-04, 9.6092e-05,\n 1.1624e-04, 2.1300e-04, 1.6456e-04, 1.5205e-04, 2.1448e-04, 1.0360e-04,\n 1.9119e-04, 1.4755e-04, 1.4549e-04, 1.1805e-04, 1.7541e-04, 1.3991e-04,\n 1.8431e-04, 1.7144e-04, 2.5290e-04, 1.1623e-04, 1.3388e-04, 1.3682e-04,\n 2.2393e-04, 1.8587e-04, 2.1437e-04, 1.3428e-04, 1.7580e-04, 1.9220e-04,\n 1.3124e-04, 2.4146e-04, 1.9965e-04, 9.9597e-05, 1.8829e-04, 1.8257e-04,\n 1.2500e-04, 1.1241e-04, 2.4688e-04, 2.1598e-04, 1.6765e-04, 1.2833e-04,\n 1.4622e-04, 1.3080e-04, 1.1390e-04, 1.1741e-04, 1.5909e-04, 1.4573e-04,\n 1.6534e-04, 1.4509e-04, 2.0396e-04, 1.8521e-04, 1.3144e-04, 1.5396e-04,\n 2.4084e-04, 1.4232e-04, 1.8797e-04, 2.3099e-04, 2.4763e-04, 1.8935e-04,\n 1.6838e-04, 1.8006e-04, 1.5746e-04, 1.9440e-04, 1.8909e-04, 2.1804e-04,\n 1.2470e-04, 1.5747e-04, 1.6557e-04, 1.7857e-04, 1.7467e-04, 2.1991e-04,\n 2.2832e-04, 2.1712e-04, 2.0918e-04, 1.9030e-04, 1.2259e-04, 2.6929e-04,\n 1.6075e-04, 1.4651e-04, 2.4348e-04, 1.9850e-04, 1.8794e-04, 1.5227e-04,\n 1.8637e-04, 1.7379e-04, 1.8184e-04, 9.9646e-05, 1.5052e-04, 1.4092e-04,\n 1.4998e-04, 1.2642e-04, 1.5144e-04, 2.8933e-04, 1.1810e-04, 2.2827e-04,\n 1.8493e-04, 1.9622e-04, 1.4822e-04, 1.3550e-04, 1.2487e-04, 1.6034e-04,\n 1.5129e-04, 2.8397e-04, 1.7743e-04, 1.8500e-04, 1.5693e-04, 1.4409e-04,\n 1.2630e-04, 1.8586e-04, 1.3881e-04, 1.9341e-04, 2.5129e-04, 2.1607e-04,\n 1.0825e-04, 1.3006e-04, 1.5088e-04, 1.6959e-04, 2.3667e-04, 1.2874e-04,\n 1.7184e-04, 1.4466e-04, 2.0946e-04, 1.3242e-04, 2.5636e-04, 2.0723e-04,\n 1.9206e-04, 2.4587e-04, 1.2777e-04, 1.8699e-04, 1.5173e-04, 1.8693e-04,\n 1.5134e-04, 1.2186e-04, 1.5570e-04, 1.6235e-04, 1.3731e-04, 1.7489e-04,\n 1.1497e-04, 2.2332e-04, 2.2914e-04, 2.2876e-04, 1.8453e-04, 2.1461e-04,\n 1.1817e-04, 1.4317e-04, 1.1464e-04, 1.5036e-04, 2.3096e-04, 9.6367e-05,\n 1.1252e-04, 1.2457e-04, 1.2619e-04, 1.5384e-04, 1.4826e-04, 1.8248e-04,\n 2.2916e-04, 1.4538e-04, 1.2793e-04, 1.3523e-04, 1.1485e-04, 1.3399e-04,\n 1.3993e-04, 2.0237e-04, 1.4887e-04, 1.7287e-04, 1.2180e-04, 1.2012e-04,\n 1.1181e-04, 1.7403e-04, 1.5155e-04, 1.7966e-04, 1.1309e-04, 1.7091e-04,\n 1.7961e-04, 2.7506e-04, 1.4700e-04, 2.6651e-04, 1.3820e-04, 1.5481e-04,\n 2.4703e-04, 1.0403e-04, 1.6211e-04, 1.9039e-04, 1.0435e-04, 1.2876e-04,\n 1.0964e-04, 1.6505e-04, 1.4885e-04, 1.7809e-04, 1.1406e-04, 1.6969e-04,\n 2.5562e-04, 1.7032e-04, 9.5438e-05, 1.6414e-04, 1.4774e-04, 1.2972e-04,\n 1.5495e-04, 1.6190e-04, 1.5378e-04, 2.0847e-04, 1.4997e-04, 2.7877e-04,\n 1.4804e-04, 1.3784e-04, 1.5609e-04, 1.2720e-04, 1.6641e-04, 1.7110e-04,\n 1.2015e-04, 1.3076e-04, 2.0483e-04, 3.4257e-04, 1.5742e-04, 1.1801e-04,\n 1.9702e-04, 9.1743e-05, 1.9451e-04, 1.7165e-04, 1.3599e-04, 1.5452e-04,\n 1.5009e-04, 1.9637e-04, 1.2511e-04, 1.2295e-04, 1.4064e-04, 1.6470e-04,\n 1.3096e-04, 1.4279e-04, 1.4079e-04, 2.1662e-04, 1.8270e-04, 2.3549e-04,\n 1.3121e-04, 1.3440e-04, 1.9125e-04, 2.0228e-04, 1.9981e-04, 1.5584e-04,\n 1.6800e-04, 1.5845e-04, 8.5878e-05, 1.8891e-04, 1.5176e-04, 2.0238e-04,\n 2.6004e-04, 1.6190e-04, 1.3962e-04, 1.0400e-04, 1.5591e-04, 2.5160e-04,\n 1.4145e-04, 1.2718e-04, 3.0373e-04, 1.5539e-04, 2.2985e-04, 1.9630e-04,\n 2.2426e-04, 1.2729e-04, 1.1904e-04, 1.3052e-04, 1.3163e-04, 1.5747e-04,\n 1.4241e-04, 1.1992e-04, 1.6318e-04, 1.7069e-04, 1.9632e-04, 1.0405e-04,\n 2.7521e-04, 1.4183e-04, 1.7021e-04, 1.9044e-04, 1.5309e-04, 2.3684e-04,\n 2.1963e-04, 1.4029e-04, 2.3325e-04, 9.3078e-05, 1.3347e-04, 1.5429e-04,\n 2.5466e-04, 1.7798e-04, 1.4661e-04, 1.1692e-04, 1.7653e-04, 1.7261e-04,\n 1.7487e-04, 2.0439e-04, 2.1310e-04, 1.0948e-04, 1.7874e-04, 2.1683e-04,\n 1.5911e-04, 1.6465e-04, 1.9187e-04, 3.9832e-04, 1.5268e-04, 2.2361e-04,\n 1.6521e-04, 2.1066e-04, 1.1710e-04, 1.6729e-04, 1.4511e-04, 1.1764e-04,\n 1.9667e-04, 1.3945e-04, 1.6816e-04, 2.2484e-04, 1.7005e-04, 1.4959e-04,\n 2.0270e-04, 1.2115e-04, 1.1794e-04, 1.5654e-04, 1.1855e-04, 1.6327e-04,\n 1.1855e-04, 1.9656e-04, 2.2485e-04, 8.5002e-04, 1.9084e-04, 2.2414e-04,\n 2.0356e-04, 1.4652e-04], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(12520.)",
|
| 27 |
+
"exp_avg": "tensor([[-5.5106e-05, -1.0152e-05, -2.9147e-05, ..., 1.8254e-04,\n 5.8381e-05, 1.0848e-04],\n [ 4.3070e-04, 1.0480e-04, 8.1528e-05, ..., -1.6144e-04,\n -5.7774e-04, 7.3881e-05],\n [-2.2840e-04, 1.4989e-04, 7.8359e-05, ..., -1.5792e-04,\n -4.1823e-04, -4.3666e-04],\n ...,\n [-2.7423e-04, -2.4271e-04, 7.9358e-05, ..., 1.4911e-04,\n 4.6811e-05, 2.2030e-04],\n [ 4.7102e-05, -7.1363e-05, 5.0066e-05, ..., 9.5828e-05,\n 6.6547e-05, -4.6094e-05],\n [ 1.7751e-04, -1.5783e-04, -1.9104e-04, ..., 5.5701e-05,\n -2.5862e-04, -2.6955e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[2.7374e-07, 1.3124e-07, 1.6542e-07, ..., 1.8294e-07, 2.4830e-07,\n 1.7589e-07],\n [4.4629e-07, 2.4384e-07, 2.2847e-07, ..., 5.0564e-07, 5.5865e-07,\n 7.3888e-07],\n [3.9625e-07, 3.2368e-07, 2.9385e-07, ..., 4.9627e-07, 5.2429e-07,\n 5.6006e-07],\n ...,\n [3.8774e-07, 6.2680e-07, 3.0069e-07, ..., 3.9994e-07, 5.9668e-07,\n 5.0718e-07],\n [2.5717e-07, 4.0021e-07, 2.9676e-07, ..., 5.5159e-07, 5.7863e-07,\n 5.4680e-07],\n [4.9592e-07, 4.2564e-07, 2.9534e-07, ..., 4.2484e-07, 5.6518e-07,\n 5.1931e-07]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(12520.)",
|
| 32 |
+
"exp_avg": "tensor([[ 1.0904e-04, 4.1566e-05, 9.3250e-05, ..., 1.2335e-04,\n -7.1549e-05, 1.6816e-04],\n [ 2.0987e-04, 3.5128e-05, -2.5475e-06, ..., -1.6237e-05,\n -9.5748e-05, 6.3477e-05],\n [-1.1149e-04, 9.4882e-05, 3.8091e-05, ..., -1.4232e-04,\n -1.9914e-04, -2.8290e-04],\n ...,\n [ 1.1539e-04, -1.2301e-04, 3.0498e-04, ..., -6.5792e-05,\n -1.5094e-04, -1.3551e-04],\n [-1.5197e-04, 4.5966e-05, -1.1011e-04, ..., -1.5446e-04,\n 1.5143e-04, -3.4747e-05],\n [-9.7458e-07, 6.6231e-07, 1.5338e-05, ..., -8.9043e-06,\n -1.8253e-04, 1.0116e-05]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[7.0704e-08, 4.0568e-08, 6.0063e-08, ..., 4.3100e-08, 9.8600e-08,\n 6.5291e-08],\n [1.4195e-07, 8.6568e-08, 9.8495e-08, ..., 1.4824e-07, 1.8457e-07,\n 2.5019e-07],\n [1.4109e-07, 1.1460e-07, 1.1264e-07, ..., 2.0186e-07, 1.9340e-07,\n 2.3345e-07],\n ...,\n [1.3431e-07, 2.0559e-07, 2.0778e-07, ..., 1.6637e-07, 2.4385e-07,\n 1.4896e-07],\n [1.4315e-07, 1.2620e-07, 1.0244e-07, ..., 1.9626e-07, 2.6884e-07,\n 2.0085e-07],\n [1.1986e-07, 1.2491e-07, 1.4566e-07, ..., 1.6158e-07, 2.3473e-07,\n 2.5248e-07]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(12520.)",
|
| 37 |
+
"exp_avg": "tensor([-0.0009, 0.0009], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([7.5451e-06, 7.5451e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.001,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.001,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.001,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.001,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.001,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.001,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.0005,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.0005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 20,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 0,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.001,
|
| 139 |
+
0.001,
|
| 140 |
+
0.001,
|
| 141 |
+
0.0005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 10,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.001,
|
| 149 |
+
0.001,
|
| 150 |
+
0.001,
|
| 151 |
+
0.0005
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 71.726,
|
| 156 |
+
"best_epoch": 9,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 71.258,
|
| 159 |
+
"512": 71.69
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3,
|
| 166 |
+
4,
|
| 167 |
+
5,
|
| 168 |
+
6,
|
| 169 |
+
7,
|
| 170 |
+
8,
|
| 171 |
+
9,
|
| 172 |
+
10
|
| 173 |
+
],
|
| 174 |
+
"train_loss": [
|
| 175 |
+
5.60248446921571,
|
| 176 |
+
4.156974341351384,
|
| 177 |
+
3.7702821485531595,
|
| 178 |
+
3.570641661223512,
|
| 179 |
+
3.4472002215659656,
|
| 180 |
+
3.3609565016560663,
|
| 181 |
+
3.300025675433893,
|
| 182 |
+
3.2499928289709,
|
| 183 |
+
3.213850290440142,
|
| 184 |
+
3.193320405940278
|
| 185 |
+
],
|
| 186 |
+
"train_acc": [
|
| 187 |
+
63.38018384800733,
|
| 188 |
+
69.48813074329888,
|
| 189 |
+
70.23393515443342,
|
| 190 |
+
70.76774534467404,
|
| 191 |
+
71.32536195515495,
|
| 192 |
+
71.77877669343653,
|
| 193 |
+
72.23593801588707,
|
| 194 |
+
72.61926040867428,
|
| 195 |
+
72.93288072515136,
|
| 196 |
+
73.1782039343817
|
| 197 |
+
],
|
| 198 |
+
"val_acc": [
|
| 199 |
+
67.966,
|
| 200 |
+
69.586,
|
| 201 |
+
69.866,
|
| 202 |
+
70.47,
|
| 203 |
+
70.854,
|
| 204 |
+
71.1,
|
| 205 |
+
71.388,
|
| 206 |
+
71.674,
|
| 207 |
+
71.626,
|
| 208 |
+
71.726
|
| 209 |
+
],
|
| 210 |
+
"scale_accs": {
|
| 211 |
+
"256": [
|
| 212 |
+
66.908,
|
| 213 |
+
68.868,
|
| 214 |
+
69.194,
|
| 215 |
+
69.78,
|
| 216 |
+
70.214,
|
| 217 |
+
70.592,
|
| 218 |
+
70.794,
|
| 219 |
+
71.166,
|
| 220 |
+
71.074,
|
| 221 |
+
71.258
|
| 222 |
+
],
|
| 223 |
+
"512": [
|
| 224 |
+
67.774,
|
| 225 |
+
69.268,
|
| 226 |
+
69.844,
|
| 227 |
+
70.366,
|
| 228 |
+
70.82,
|
| 229 |
+
71.088,
|
| 230 |
+
71.292,
|
| 231 |
+
71.628,
|
| 232 |
+
71.684,
|
| 233 |
+
71.69
|
| 234 |
+
]
|
| 235 |
+
},
|
| 236 |
+
"lr": [
|
| 237 |
+
0.0009755527298894294,
|
| 238 |
+
0.0009046039886902864,
|
| 239 |
+
0.0007940987335200904,
|
| 240 |
+
0.0006548539886902864,
|
| 241 |
+
0.0005005000000000001,
|
| 242 |
+
0.0003461460113097139,
|
| 243 |
+
0.00020690126647990973,
|
| 244 |
+
9.639601130971382e-05,
|
| 245 |
+
2.5447270110570814e-05,
|
| 246 |
+
0.001
|
| 247 |
+
]
|
| 248 |
+
}
|
| 249 |
+
},
|
| 250 |
+
"train_config": {
|
| 251 |
+
"name": "david_training",
|
| 252 |
+
"run_id": "20251012_141246",
|
| 253 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 254 |
+
"model_variant": "clip_vit_laion_b32",
|
| 255 |
+
"num_classes": 1000,
|
| 256 |
+
"preset": "small_fast",
|
| 257 |
+
"custom_config_path": null,
|
| 258 |
+
"num_classes_override": null,
|
| 259 |
+
"use_belly_override": null,
|
| 260 |
+
"belly_expand_override": null,
|
| 261 |
+
"progressive_training_override": false,
|
| 262 |
+
"scale_warmup_epochs_override": null,
|
| 263 |
+
"num_epochs": 10,
|
| 264 |
+
"batch_size": 1024,
|
| 265 |
+
"learning_rate": 0.001,
|
| 266 |
+
"weight_decay": 1e-05,
|
| 267 |
+
"warmup_epochs": 3,
|
| 268 |
+
"use_rose_loss": true,
|
| 269 |
+
"rose_initial_weight": 0.1,
|
| 270 |
+
"rose_max_weight": 0.5,
|
| 271 |
+
"rose_weight_schedule": "adaptive",
|
| 272 |
+
"use_cayley_loss": false,
|
| 273 |
+
"cayley_weight": 0.001,
|
| 274 |
+
"scale_loss_balance": null,
|
| 275 |
+
"use_mixed_precision": true,
|
| 276 |
+
"gradient_clip": 10.0,
|
| 277 |
+
"scheduler_type": "cosine_restarts",
|
| 278 |
+
"min_lr": 1e-06,
|
| 279 |
+
"freeze_strategy": "never",
|
| 280 |
+
"freeze_threshold": 90.0,
|
| 281 |
+
"unfreeze_on_plateau": true,
|
| 282 |
+
"patience": 10,
|
| 283 |
+
"track_gradients": true,
|
| 284 |
+
"gradient_scale_threshold": 1e-05,
|
| 285 |
+
"gradient_scale_multiplier": 10.0,
|
| 286 |
+
"log_interval": 50,
|
| 287 |
+
"val_interval": 1,
|
| 288 |
+
"save_interval": 5,
|
| 289 |
+
"log_fusion_weights": true,
|
| 290 |
+
"log_loss_components": true,
|
| 291 |
+
"save_format": "safetensors",
|
| 292 |
+
"hf_repo": "AbstractPhil/gated-david",
|
| 293 |
+
"upload_to_hub": true,
|
| 294 |
+
"base_dir": "./david_training",
|
| 295 |
+
"num_workers": 10,
|
| 296 |
+
"pin_memory": true,
|
| 297 |
+
"prefetch_factor": 4,
|
| 298 |
+
"persistent_workers": true
|
| 299 |
+
}
|
| 300 |
+
}
|