AbstractPhil commited on
Commit
1d68fe2
·
verified ·
1 Parent(s): 35c14be

Update best_model_acc71.73_metadata.json - Run 20251012_141246

Browse files
weights/David-fully_shared-weighted_sum/20251012_141246/best_model_acc71.73_metadata.json ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(12520.)",
7
+ "exp_avg": "tensor([[-1.0193e-03, -3.9272e-03, -1.5823e-04, ..., -2.6369e-04,\n -5.7864e-04, -7.2561e-04],\n [-2.2954e-04, -9.4550e-04, 9.4458e-04, ..., -1.8983e-05,\n 4.6150e-04, 1.7186e-04],\n [ 2.4122e-03, 1.5233e-03, 5.2615e-03, ..., 1.1379e-03,\n 1.0916e-04, 1.5103e-04],\n ...,\n [-4.5913e-04, 4.1391e-03, 2.5503e-03, ..., -4.5579e-04,\n 2.3861e-04, -1.2218e-03],\n [ 8.0811e-04, 3.0176e-03, -3.6676e-05, ..., -2.0293e-04,\n -6.3479e-04, 1.3269e-04],\n [-2.2015e-04, -1.3078e-03, 1.1851e-03, ..., -5.2010e-04,\n -1.2506e-04, 2.9933e-04]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[4.8175e-06, 3.0986e-05, 1.3620e-05, ..., 3.3631e-06, 2.3452e-06,\n 2.6317e-06],\n [1.0004e-05, 4.5502e-05, 1.8860e-05, ..., 4.5611e-06, 4.0402e-06,\n 3.6555e-06],\n [4.7375e-06, 2.4524e-05, 1.4721e-05, ..., 2.6622e-06, 2.1295e-06,\n 2.7396e-06],\n ...,\n [8.2641e-06, 4.3019e-05, 2.5758e-05, ..., 9.3965e-06, 3.2670e-06,\n 8.8751e-06],\n [3.3765e-06, 2.3847e-05, 1.4868e-05, ..., 3.0570e-06, 2.0195e-06,\n 2.2608e-06],\n [9.8345e-06, 4.7035e-05, 2.2059e-05, ..., 5.0497e-06, 3.4121e-06,\n 3.4242e-06]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(12520.)",
12
+ "exp_avg": "tensor([-4.9703e-02, -1.7859e-02, -3.3339e-02, -1.3414e-03, 3.4545e-02,\n 1.9017e-02, -1.2731e-02, 2.9561e-02, -8.5451e-03, 3.5937e-02,\n 1.3703e-02, 4.7768e-02, 1.2145e-02, -1.7309e-02, 1.7690e-03,\n -2.2219e-02, 1.1317e-02, 2.1110e-02, 1.3439e-02, 1.2014e-03,\n -8.4470e-03, 4.2558e-03, -2.9272e-03, -2.1347e-02, 4.6297e-03,\n -3.0310e-03, -2.7247e-02, 3.7147e-04, -3.5352e-02, -1.5328e-03,\n 2.6373e-02, 4.7613e-02, 1.9729e-02, -9.9012e-04, 5.3960e-02,\n -3.3687e-02, -2.6935e-02, 3.6678e-03, -3.7192e-03, 2.6175e-02,\n -1.1588e-02, 6.2963e-03, -1.5463e-02, -3.5438e-02, -1.3963e-03,\n 1.2298e-02, 1.2093e-02, -1.0914e-02, 4.3178e-02, -2.0362e-02,\n 2.1679e-04, 1.6105e-03, -5.1843e-03, -1.3798e-02, -7.9718e-03,\n -5.2390e-03, 7.2697e-03, -1.0998e-02, 5.8189e-02, 1.5406e-03,\n 4.5403e-02, 6.4548e-03, 1.3421e-02, 3.3535e-03, -3.1217e-02,\n 1.1657e-02, 6.8176e-03, -1.8686e-02, -1.2587e-02, 8.1082e-03,\n -1.7567e-02, -2.7073e-02, 3.2007e-02, 2.9051e-02, -1.7060e-03,\n -1.2963e-02, -1.1190e-02, 2.2761e-03, -3.2350e-03, 2.4414e-02,\n -3.3076e-02, -3.0831e-02, 5.7410e-03, 2.0274e-02, 4.7875e-02,\n 1.7453e-03, -3.8792e-03, 6.6895e-03, -1.4360e-02, -1.4219e-02,\n -3.4291e-02, 1.2638e-02, -5.5800e-02, 3.8152e-03, -2.1824e-03,\n -1.0928e-02, -9.2300e-03, 8.8051e-03, 2.2906e-02, -2.6843e-03,\n -1.2674e-03, -9.3169e-03, -1.4219e-03, -4.4086e-03, 9.7293e-03,\n -3.0485e-02, 4.2513e-02, -1.0672e-03, -1.3109e-02, -5.0764e-03,\n -1.1176e-02, -4.4431e-03, -1.3416e-02, 3.0360e-02, 8.0234e-03,\n 2.4983e-02, 9.2318e-03, -8.8241e-03, -1.3282e-02, -1.6089e-02,\n 3.2281e-03, 9.8265e-03, -4.8502e-02, -1.4068e-02, -4.7850e-03,\n 2.5528e-02, 1.5373e-03, 5.8712e-04, 2.5432e-02, -2.3121e-02,\n -4.2545e-03, -4.0277e-02, 6.6289e-03, 1.5404e-03, -2.6665e-02,\n -1.6105e-02, 1.9943e-02, 4.7342e-03, -3.6919e-03, -4.7197e-02,\n -2.7225e-02, 9.9290e-03, 5.7264e-04, -1.7051e-02, 1.2034e-02,\n 8.0999e-03, 1.6639e-02, -1.5262e-02, 3.9922e-02, -1.1900e-02,\n -1.0345e-02, 2.4429e-02, 3.0483e-02, -2.2913e-02, -9.2265e-03,\n 2.2993e-02, -2.6279e-02, 6.0952e-03, 1.3378e-02, 7.4427e-03,\n 5.6227e-03, -3.3159e-03, -7.6942e-03, 2.5696e-04, -5.6911e-03,\n 5.2631e-03, 1.4404e-03, -4.9403e-03, -1.8378e-02, 1.9436e-03,\n -1.1532e-02, 1.9465e-02, -5.9944e-02, -4.7005e-03, 2.5399e-02,\n -1.3714e-02, 3.2497e-02, 5.9436e-03, 1.5326e-03, -1.7804e-02,\n 3.9245e-03, -2.5322e-02, -7.6104e-03, -5.9242e-04, 3.0409e-02,\n 9.0816e-03, -9.2248e-02, 7.5686e-02, 4.1823e-02, -2.9851e-02,\n -1.5160e-02, -3.4332e-03, -6.4735e-03, -8.1361e-02, 1.6881e-02,\n -1.2082e-03, -3.3118e-03, -2.6764e-02, -4.6164e-04, 1.1022e-02,\n -1.4051e-02, -1.4151e-02, 1.6059e-02, -3.4576e-03, -3.3733e-02,\n -2.1667e-02, 1.7097e-02, 1.8038e-02, 1.6687e-02, -6.4005e-02,\n 7.5324e-03, -5.1882e-04, -2.4586e-02, -1.0087e-02, -6.8016e-02,\n 1.8813e-02, 1.4735e-02, -3.0618e-02, 2.6177e-02, 6.9380e-04,\n -3.3502e-03, -4.8358e-03, -1.0017e-02, 1.4018e-02, 5.2471e-02,\n 1.5472e-02, 7.6629e-03, -2.3570e-02, 3.7260e-02, 1.3464e-04,\n -3.6019e-03, 2.7480e-02, 2.3311e-02, 1.2863e-02, 2.6285e-02,\n -9.6491e-03, 1.1000e-02, -9.8927e-03, -2.2841e-02, -1.5093e-02,\n 7.6009e-04, -1.4443e-02, 1.7873e-02, -6.3593e-03, -6.2564e-02,\n 5.1140e-03, 1.9793e-02, 1.8376e-02, 1.8684e-02, 1.7836e-02,\n -1.4531e-02, -6.1798e-03, 9.6419e-03, 4.1045e-02, -3.3347e-03,\n 3.9695e-02, -5.7387e-03, -5.7357e-03, -3.5658e-02, 1.0514e-02,\n 3.6286e-03, 4.1179e-03, -1.4851e-02, 1.4852e-02, -3.3220e-02,\n -1.0200e-02, -6.6883e-04, -3.4417e-02, -2.2467e-02, 6.1605e-02,\n -1.5126e-02, 1.4589e-02, 1.8572e-02, 1.5571e-02, -1.2520e-02,\n 1.3532e-02, -4.5546e-03, 2.3759e-02, -1.3373e-02, -2.3115e-02,\n 6.9955e-03, 3.9246e-02, -1.2681e-03, 9.4552e-03, -1.1840e-02,\n 2.5156e-02, -2.3975e-02, -8.1251e-04, 2.3395e-02, 8.8698e-03,\n -2.0231e-02, 1.7868e-02, -5.4942e-02, 5.8737e-03, 2.1004e-02,\n 9.1513e-03, 9.1182e-03, 4.1792e-03, 6.6612e-03, 2.0673e-02,\n -1.1427e-03, -1.9380e-03, -1.7823e-02, 1.7798e-02, 1.0322e-02,\n 2.3060e-02, -1.8720e-02, -2.0595e-03, -1.9326e-02, -2.0382e-02,\n 2.1937e-02, -2.6470e-02, -9.2237e-03, -1.1964e-02, -1.0305e-03,\n -2.2473e-02, -2.9618e-03, 2.6682e-02, 3.3177e-02, -1.9284e-02,\n -5.8234e-03, -1.0979e-02, 1.1873e-02, -2.1987e-03, 1.8803e-02,\n 8.3817e-03, 1.1779e-02, 1.1705e-02, -6.9298e-03, -2.0914e-02,\n 2.8827e-02, -1.1707e-02, 1.0348e-02, -3.6553e-03, -1.0770e-02,\n 3.4258e-03, -1.9562e-02, 3.2797e-03, -5.0656e-03, 2.0330e-02,\n 8.7339e-03, 4.6241e-02, -7.6635e-03, -1.0513e-02, 2.6546e-02,\n 1.2827e-02, -1.0286e-02, 1.1086e-02, -3.0737e-02, -7.6194e-04,\n 1.5436e-02, 5.0965e-03, 1.6364e-02, -2.9667e-02, -1.0014e-02,\n 4.3811e-02, 1.6028e-02, -1.2909e-02, 1.8865e-03, -7.4355e-03,\n -2.4094e-02, -3.0941e-02, -8.8609e-03, -9.1083e-03, 2.3242e-02,\n 2.0226e-04, -9.5811e-03, 2.8283e-02, -2.6738e-02, -2.6784e-02,\n -9.7209e-03, -6.0425e-03, -3.8321e-03, 9.1857e-03, -1.3853e-02,\n -1.6442e-02, 2.2919e-02, 1.7862e-03, 3.1908e-03, 1.1635e-02,\n -1.3430e-02, -1.0587e-02, 8.0067e-03, 2.5577e-05, 1.5558e-02,\n 3.5322e-03, 7.4958e-02, 2.5128e-02, 1.8872e-02, 2.1498e-02,\n 1.9866e-02, 2.6312e-02, 1.6370e-02, 2.2606e-02, 1.0131e-02,\n 9.2258e-04, 6.6672e-03, -1.5002e-02, 1.2376e-02, 2.4676e-02,\n 7.5654e-03, -8.8712e-03, -2.1376e-02, 1.2794e-02, -1.0227e-02,\n -7.3344e-03, 2.9793e-03, -5.0934e-04, 2.1110e-02, 1.7151e-02,\n -8.0944e-03, 8.9908e-03, -3.3691e-02, 1.9736e-02, -2.5486e-02,\n -4.2898e-02, -2.0234e-02, -4.8331e-02, 2.9667e-02, -1.4585e-02,\n -4.3978e-02, -5.7860e-02, 1.6403e-02, 5.6316e-03, -6.4882e-02,\n 1.1834e-02, -2.7115e-02, 2.9686e-03, 1.2876e-02, -9.9546e-04,\n -1.6113e-02, -4.7741e-02, -6.6454e-04, -1.1206e-02, -1.9771e-03,\n -1.0625e-02, 1.5796e-02, 4.7027e-02, -1.3964e-03, -2.2486e-02,\n -2.0015e-03, -1.8101e-02, 8.5380e-03, 2.2448e-02, 5.0707e-02,\n 5.9743e-03, -3.9393e-02, 1.1312e-02, -2.3972e-02, 8.2880e-03,\n 3.1250e-02, 3.1875e-02, -3.4938e-02, 2.7211e-03, 1.3084e-02,\n -1.9125e-02, -1.9334e-02, -1.9733e-02, 2.5985e-02, -5.9557e-03,\n -7.8828e-03, 9.6630e-03, 6.2458e-03, -6.6385e-03, 2.4627e-02,\n -1.5185e-02, -1.4953e-02, 3.5917e-02, -3.2107e-03, 8.4770e-03,\n -6.5643e-03, 2.8805e-02, -3.6618e-03, -1.4122e-02, -4.9166e-03,\n -1.3990e-02, -4.2672e-02, 2.4505e-02, 3.7491e-03, 4.9925e-02,\n 8.0827e-03, -4.4023e-02, -1.2641e-02, -9.0188e-02, 4.6959e-02,\n 1.2931e-02, 2.2619e-02, 2.8009e-02, -4.3736e-03, 3.6495e-02,\n 7.1013e-03, -7.5271e-03, 3.3577e-02, 1.0479e-02, -2.7111e-02,\n 4.9822e-03, 1.8423e-02, -9.8045e-03, 2.5129e-02, -1.4757e-02,\n 3.2123e-02, 8.8985e-03, -1.8683e-03, 2.4873e-02, 4.1968e-03,\n -4.1777e-02, -1.1754e-02, 5.9716e-03, -5.0463e-03, -4.3165e-02,\n 3.7644e-02, -1.3895e-02], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([0.0034, 0.0058, 0.0029, 0.0024, 0.0034, 0.0053, 0.0022, 0.0036, 0.0042,\n 0.0045, 0.0025, 0.0035, 0.0063, 0.0023, 0.0055, 0.0067, 0.0043, 0.0040,\n 0.0034, 0.0046, 0.0032, 0.0069, 0.0047, 0.0035, 0.0019, 0.0062, 0.0038,\n 0.0050, 0.0056, 0.0044, 0.0041, 0.0082, 0.0029, 0.0101, 0.0032, 0.0055,\n 0.0035, 0.0032, 0.0041, 0.0057, 0.0035, 0.0042, 0.0041, 0.0034, 0.0065,\n 0.0039, 0.0026, 0.0033, 0.0044, 0.0040, 0.0050, 0.0030, 0.0029, 0.0034,\n 0.0035, 0.0033, 0.0035, 0.0020, 0.0029, 0.0036, 0.0052, 0.0029, 0.0070,\n 0.0045, 0.0058, 0.0063, 0.0032, 0.0035, 0.0037, 0.0033, 0.0047, 0.0024,\n 0.0035, 0.0078, 0.0051, 0.0021, 0.0062, 0.0032, 0.0038, 0.0054, 0.0049,\n 0.0066, 0.0024, 0.0059, 0.0043, 0.0053, 0.0036, 0.0066, 0.0050, 0.0029,\n 0.0079, 0.0050, 0.0036, 0.0036, 0.0048, 0.0021, 0.0027, 0.0039, 0.0021,\n 0.0030, 0.0037, 0.0046, 0.0037, 0.0034, 0.0027, 0.0053, 0.0061, 0.0024,\n 0.0042, 0.0054, 0.0041, 0.0042, 0.0058, 0.0029, 0.0039, 0.0028, 0.0020,\n 0.0027, 0.0071, 0.0053, 0.0046, 0.0092, 0.0064, 0.0051, 0.0067, 0.0052,\n 0.0026, 0.0018, 0.0059, 0.0059, 0.0057, 0.0034, 0.0045, 0.0035, 0.0024,\n 0.0014, 0.0031, 0.0045, 0.0014, 0.0056, 0.0049, 0.0036, 0.0048, 0.0034,\n 0.0045, 0.0039, 0.0031, 0.0029, 0.0023, 0.0044, 0.0041, 0.0029, 0.0043,\n 0.0035, 0.0034, 0.0024, 0.0041, 0.0038, 0.0048, 0.0022, 0.0035, 0.0026,\n 0.0030, 0.0035, 0.0036, 0.0033, 0.0028, 0.0030, 0.0056, 0.0063, 0.0062,\n 0.0046, 0.0061, 0.0039, 0.0040, 0.0026, 0.0059, 0.0037, 0.0043, 0.0039,\n 0.0050, 0.0030, 0.0024, 0.0041, 0.0042, 0.0054, 0.0062, 0.0049, 0.0051,\n 0.0042, 0.0043, 0.0027, 0.0033, 0.0046, 0.0038, 0.0069, 0.0025, 0.0032,\n 0.0026, 0.0036, 0.0024, 0.0021, 0.0039, 0.0044, 0.0051, 0.0046, 0.0091,\n 0.0039, 0.0017, 0.0039, 0.0032, 0.0021, 0.0063, 0.0026, 0.0043, 0.0050,\n 0.0036, 0.0046, 0.0029, 0.0045, 0.0033, 0.0042, 0.0049, 0.0043, 0.0055,\n 0.0027, 0.0015, 0.0036, 0.0055, 0.0027, 0.0028, 0.0031, 0.0044, 0.0057,\n 0.0043, 0.0019, 0.0027, 0.0033, 0.0044, 0.0038, 0.0013, 0.0031, 0.0044,\n 0.0048, 0.0048, 0.0032, 0.0033, 0.0038, 0.0040, 0.0054, 0.0015, 0.0027,\n 0.0048, 0.0043, 0.0039, 0.0066, 0.0030, 0.0043, 0.0035, 0.0063, 0.0040,\n 0.0048, 0.0046, 0.0050, 0.0032, 0.0024, 0.0037, 0.0026, 0.0030, 0.0069,\n 0.0045, 0.0038, 0.0035, 0.0051, 0.0046, 0.0023, 0.0030, 0.0035, 0.0054,\n 0.0046, 0.0027, 0.0064, 0.0059, 0.0018, 0.0014, 0.0036, 0.0033, 0.0044,\n 0.0024, 0.0044, 0.0025, 0.0022, 0.0042, 0.0036, 0.0034, 0.0045, 0.0031,\n 0.0017, 0.0050, 0.0075, 0.0031, 0.0022, 0.0030, 0.0040, 0.0048, 0.0050,\n 0.0048, 0.0037, 0.0057, 0.0033, 0.0067, 0.0039, 0.0055, 0.0045, 0.0043,\n 0.0040, 0.0038, 0.0049, 0.0035, 0.0045, 0.0034, 0.0043, 0.0030, 0.0042,\n 0.0029, 0.0066, 0.0041, 0.0034, 0.0032, 0.0037, 0.0035, 0.0040, 0.0032,\n 0.0037, 0.0060, 0.0043, 0.0024, 0.0017, 0.0040, 0.0045, 0.0054, 0.0080,\n 0.0021, 0.0034, 0.0042, 0.0050, 0.0032, 0.0026, 0.0040, 0.0026, 0.0021,\n 0.0048, 0.0031, 0.0029, 0.0027, 0.0028, 0.0023, 0.0042, 0.0045, 0.0027,\n 0.0033, 0.0080, 0.0024, 0.0100, 0.0040, 0.0044, 0.0015, 0.0030, 0.0032,\n 0.0031, 0.0043, 0.0040, 0.0038, 0.0028, 0.0024, 0.0053, 0.0047, 0.0041,\n 0.0066, 0.0041, 0.0031, 0.0015, 0.0024, 0.0020, 0.0047, 0.0069, 0.0049,\n 0.0030, 0.0030, 0.0079, 0.0028, 0.0031, 0.0024, 0.0036, 0.0048, 0.0038,\n 0.0034, 0.0035, 0.0062, 0.0105, 0.0022, 0.0029, 0.0025, 0.0033, 0.0033,\n 0.0062, 0.0033, 0.0022, 0.0036, 0.0038, 0.0030, 0.0027, 0.0021, 0.0047,\n 0.0046, 0.0057, 0.0036, 0.0081, 0.0037, 0.0065, 0.0057, 0.0037, 0.0073,\n 0.0026, 0.0053, 0.0026, 0.0029, 0.0040, 0.0023, 0.0031, 0.0034, 0.0093,\n 0.0063, 0.0035, 0.0039, 0.0026, 0.0061, 0.0077, 0.0026, 0.0038, 0.0068,\n 0.0038, 0.0023, 0.0040, 0.0070, 0.0030, 0.0023, 0.0030, 0.0054, 0.0028,\n 0.0039, 0.0049, 0.0039, 0.0036, 0.0040, 0.0052, 0.0055, 0.0023, 0.0043,\n 0.0038, 0.0023, 0.0042, 0.0039, 0.0043, 0.0062, 0.0021, 0.0034, 0.0067,\n 0.0042, 0.0062, 0.0032, 0.0036, 0.0033, 0.0035, 0.0050, 0.0020, 0.0072,\n 0.0022, 0.0031, 0.0040, 0.0044, 0.0044, 0.0039, 0.0092, 0.0056, 0.0036,\n 0.0033, 0.0067, 0.0045, 0.0047, 0.0030, 0.0044, 0.0032, 0.0032, 0.0040,\n 0.0063, 0.0031, 0.0066, 0.0034, 0.0040, 0.0037, 0.0019, 0.0023, 0.0034,\n 0.0020, 0.0028, 0.0039, 0.0007, 0.0044, 0.0069, 0.0030, 0.0044],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(12520.)",
17
+ "exp_avg": "tensor([-1.1180e-02, -3.5444e-03, -7.5523e-03, -3.1396e-04, 8.3092e-03,\n 2.1934e-03, -7.2721e-03, 3.1607e-03, -9.3607e-04, 4.6839e-03,\n 2.7613e-03, 8.0027e-03, 1.4340e-03, -5.7757e-03, -7.6407e-04,\n -3.3509e-03, 3.1819e-03, 3.9284e-03, 3.3233e-03, -7.2177e-04,\n -6.1101e-04, 6.7832e-04, 2.7663e-03, -7.4092e-03, -4.0944e-04,\n -1.2691e-04, -7.1087e-03, -1.8791e-04, -5.1824e-03, 4.1880e-04,\n 3.7081e-03, 5.2575e-03, 3.3580e-03, -1.0954e-03, 1.3516e-02,\n -3.6586e-03, -4.9542e-03, 6.5436e-04, 4.3729e-05, 5.2623e-03,\n -3.1464e-03, 1.7913e-03, -5.5525e-03, -7.3201e-03, -5.0845e-04,\n 2.3535e-03, 2.6742e-03, -7.3928e-04, 4.9575e-03, -3.1135e-03,\n 6.1364e-04, 9.3869e-04, -1.0378e-04, -2.3639e-03, -2.6686e-03,\n -2.0390e-03, -1.2529e-04, -4.0472e-03, 2.4532e-02, -1.1118e-03,\n 6.8383e-03, 1.8850e-03, 2.6339e-03, 7.8248e-04, -4.8861e-03,\n 2.2835e-03, 1.2425e-03, -2.9064e-03, -2.2630e-03, 1.1097e-03,\n 5.3146e-04, -7.5587e-03, 5.5932e-03, 3.5940e-03, -2.2823e-04,\n -6.2207e-03, -1.2476e-03, 1.7873e-03, -1.6495e-04, 3.9088e-03,\n -5.5427e-03, -6.2092e-03, 1.8382e-03, 1.9948e-03, 1.2367e-02,\n -6.9902e-04, -5.0914e-04, -1.8373e-04, -1.4628e-03, -4.6816e-03,\n -3.2372e-03, 3.4847e-03, -5.7880e-03, 4.1611e-04, -1.6977e-04,\n -2.5412e-03, -2.4731e-03, 1.7565e-03, 8.2815e-03, -7.9405e-04,\n -2.6268e-04, -4.6616e-04, -2.4186e-04, -5.0113e-04, 3.1480e-03,\n -3.6497e-03, 7.9389e-03, -6.0516e-04, -1.5282e-03, -8.7467e-04,\n -2.2389e-03, -8.0566e-05, -1.7271e-03, 2.8552e-03, 6.0012e-04,\n 9.0318e-03, 1.9808e-03, -2.1478e-03, -2.8567e-03, -3.5022e-03,\n 9.3520e-04, 5.3256e-04, -6.6389e-03, -3.8099e-03, -2.5187e-04,\n 2.6019e-03, 2.3394e-04, -6.4515e-04, 3.3536e-03, -3.9837e-03,\n -3.6785e-04, -9.9746e-03, 4.4879e-04, 1.1014e-03, -7.1575e-03,\n -1.4154e-02, 4.5943e-03, 1.3575e-03, 4.9234e-04, -8.6894e-03,\n -6.5973e-03, 1.1993e-03, 4.1399e-04, -3.5593e-03, 2.5502e-03,\n 1.8519e-03, 2.8592e-03, -4.4508e-03, 1.8311e-02, -2.0946e-03,\n -2.5616e-03, 1.4600e-02, 7.0507e-03, -6.3042e-03, -1.7948e-03,\n 1.1530e-02, -3.2101e-03, 1.3660e-03, 2.2205e-03, 1.6632e-03,\n 9.2512e-04, -7.9409e-04, -1.8097e-03, 4.2547e-04, -1.5707e-03,\n 4.7725e-04, -1.0932e-04, -1.5900e-03, -2.4929e-03, -5.9703e-05,\n -1.9553e-03, 2.9791e-03, -5.2424e-03, -1.6863e-03, 3.3107e-03,\n -2.9658e-03, 5.1048e-03, 1.0336e-03, -2.1781e-04, -1.6472e-03,\n -1.3148e-03, -1.0807e-02, -1.0424e-03, 7.1391e-04, 6.6318e-03,\n 5.4243e-04, -1.0156e-02, 7.3770e-03, 9.4745e-03, -5.3446e-03,\n -2.5499e-03, -1.9064e-03, -5.1073e-04, -1.8293e-02, 4.1387e-03,\n -1.0319e-04, -1.3958e-03, -3.7334e-03, -1.0323e-03, 2.8522e-03,\n -3.6657e-03, -4.1255e-03, 3.0260e-03, -1.0174e-03, -4.7350e-03,\n -3.5538e-03, 1.7754e-03, 3.2708e-03, 8.3514e-03, -7.0776e-03,\n 1.2307e-03, 1.3676e-04, -2.8628e-03, -3.4789e-03, -1.2031e-02,\n 4.5171e-03, 1.0675e-03, -8.0993e-03, 8.9032e-03, 3.9801e-04,\n -1.7084e-04, -6.7317e-04, -1.7238e-03, 1.3948e-03, 9.9355e-03,\n 4.4692e-03, 6.8518e-03, -3.8938e-03, 4.3775e-03, -4.1053e-04,\n -4.3791e-04, 5.3487e-03, 2.5107e-03, 2.0787e-03, 3.6183e-03,\n -3.3919e-03, 3.3432e-03, -2.7832e-03, -3.5338e-03, -3.6294e-03,\n -9.8064e-04, -3.1487e-03, 3.4603e-03, -2.8228e-03, -1.2878e-02,\n 1.4082e-03, 7.3881e-03, 6.8876e-03, 3.1127e-03, 3.5155e-03,\n -1.1109e-02, -1.6721e-03, 1.4410e-03, 5.3125e-03, -7.7418e-04,\n 3.6284e-03, -2.3940e-03, -1.9046e-03, -1.1394e-02, 7.5793e-04,\n 8.6041e-04, 1.9388e-03, -2.4222e-03, 2.3913e-03, -5.1146e-03,\n -3.0179e-03, -1.5554e-04, -9.5646e-03, -6.1881e-03, 6.0466e-03,\n -2.7591e-03, 2.9475e-03, 3.7532e-03, 2.1972e-03, -1.4611e-03,\n 3.7652e-03, -1.8562e-03, 4.1938e-03, -2.2360e-03, -5.5710e-03,\n 2.3678e-03, 5.9842e-03, 3.9484e-04, 7.5572e-03, -8.1632e-03,\n 3.5008e-03, -5.8960e-03, -1.2240e-04, 6.5818e-03, 1.4321e-03,\n -5.8790e-03, 9.6106e-03, -7.6317e-03, -1.0740e-04, 4.0479e-03,\n 2.7601e-03, 1.1024e-03, 3.6984e-03, 1.1804e-03, 2.7603e-03,\n 2.9154e-03, -6.4779e-05, -7.1293e-03, 3.3901e-03, 1.2828e-03,\n 3.0808e-03, -4.4452e-03, 1.2396e-03, -3.1317e-03, -3.6915e-03,\n 3.5666e-03, -6.0522e-03, -1.5807e-03, -2.8906e-03, 8.9714e-06,\n -5.1965e-03, 9.3649e-04, 4.8702e-03, 7.3294e-03, -3.0377e-03,\n -1.3297e-03, -2.3694e-03, 1.6645e-03, -1.8852e-03, 3.3348e-03,\n 1.0153e-03, 4.0438e-03, 3.1492e-03, -1.9782e-03, -4.8265e-03,\n 6.7159e-03, -1.7107e-03, 1.0126e-03, -9.0692e-04, -2.3409e-03,\n 2.1992e-04, -5.6572e-03, 1.7308e-03, -3.2756e-04, 3.0308e-03,\n 1.0654e-03, 4.2442e-03, -5.3514e-03, -2.1844e-03, 5.5725e-03,\n 3.8837e-04, -1.6772e-03, 2.3128e-03, -4.3854e-03, -9.7492e-04,\n 6.1909e-03, 8.5757e-04, 4.6740e-03, -4.8544e-03, -2.1750e-04,\n 1.0046e-02, 4.3965e-03, -1.2274e-03, -9.0899e-04, -2.3756e-03,\n -6.3821e-03, -4.7182e-03, -5.8265e-03, -1.3583e-03, 4.4413e-03,\n 3.8997e-04, -1.4441e-02, 3.8992e-03, -8.2670e-03, -8.0414e-03,\n -2.0917e-03, 4.4220e-04, -6.4782e-04, -2.6763e-04, -2.0605e-03,\n -4.0206e-03, 2.1835e-03, -5.3440e-04, 2.6746e-04, 7.1215e-04,\n -3.2916e-03, -5.9156e-03, 1.9412e-03, 8.1923e-04, 1.1748e-03,\n -2.8107e-04, 7.7895e-03, 8.7235e-03, 6.3108e-03, 3.5338e-03,\n 4.8249e-03, 4.8356e-03, 5.1652e-03, 4.2728e-03, 1.3193e-03,\n -1.2651e-04, 2.2804e-03, -2.3097e-03, 5.5067e-04, 3.6729e-03,\n 3.5464e-03, -1.3064e-03, -6.8030e-03, 2.6288e-03, -2.3863e-03,\n -1.2934e-03, 1.0344e-03, -1.6342e-03, 2.9073e-03, 2.3095e-03,\n -1.4261e-03, 8.2179e-04, -9.0205e-03, 2.4381e-03, -5.1978e-03,\n -3.4890e-03, -3.0046e-03, -3.5992e-03, 4.3612e-03, -1.3081e-03,\n -4.1804e-03, -1.0918e-02, 3.0772e-03, 2.6523e-03, -8.9417e-03,\n 3.8307e-03, -6.3304e-03, 5.5723e-04, 1.6720e-03, -2.7034e-03,\n -4.0839e-03, -4.6264e-03, -1.0191e-03, -2.0276e-03, 6.4449e-04,\n -1.8876e-03, 1.4530e-03, 4.9138e-03, -8.0503e-04, -4.2316e-03,\n -2.7361e-04, -4.0336e-03, 2.5780e-03, 4.4277e-03, 6.1024e-03,\n 4.3336e-04, -6.8057e-03, 3.3254e-03, -4.2181e-03, 2.4340e-03,\n 4.4249e-03, 3.9499e-03, -8.3969e-03, 3.7351e-04, 2.5703e-03,\n -1.1077e-03, -4.4682e-03, -5.9056e-03, 4.1108e-03, -1.9352e-03,\n -3.3676e-03, 2.6245e-04, 9.0140e-04, -3.1480e-04, 4.9073e-03,\n -2.4652e-03, -2.1936e-03, 3.1118e-03, 3.6491e-04, 1.1020e-03,\n -3.7231e-04, 5.4302e-03, -1.9602e-03, -2.7188e-03, -2.2788e-03,\n -8.7818e-03, -4.2937e-03, 5.4648e-03, 6.4389e-05, 9.9780e-03,\n 1.5709e-03, -7.6470e-03, -3.0353e-03, -1.1005e-02, 4.2810e-03,\n 3.8448e-03, 5.5827e-03, 3.3306e-03, -6.0299e-04, 5.8831e-03,\n 1.2538e-03, -8.9997e-04, 1.1294e-02, 9.7651e-04, -6.7213e-03,\n 1.0850e-03, 6.9323e-03, -1.5865e-03, 3.7449e-03, -1.5945e-03,\n 6.9426e-03, 3.7173e-03, -9.7204e-04, 4.1612e-03, 1.0865e-03,\n -1.2129e-02, -2.5811e-03, -7.5798e-03, -1.7299e-03, -4.4406e-03,\n 1.3287e-02, -2.8542e-03], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([2.1662e-04, 9.9610e-05, 1.9793e-04, 1.6106e-04, 2.1323e-04, 1.2263e-04,\n 4.3911e-04, 6.4398e-05, 6.4934e-05, 1.0881e-04, 3.5256e-04, 9.0310e-05,\n 9.9103e-05, 1.8079e-04, 6.1111e-05, 1.2084e-04, 1.6156e-04, 1.1171e-04,\n 1.0953e-04, 1.6893e-04, 5.5737e-05, 9.0631e-05, 9.5286e-05, 3.1662e-04,\n 3.0057e-04, 1.0226e-04, 1.7513e-04, 1.3975e-04, 1.2567e-04, 1.4834e-04,\n 1.5826e-04, 1.1870e-04, 1.7425e-04, 1.2931e-04, 1.9826e-04, 6.1916e-05,\n 1.1361e-04, 1.1535e-04, 2.1788e-04, 2.7717e-04, 2.0558e-04, 2.0143e-04,\n 3.2457e-04, 1.3374e-04, 7.2545e-05, 2.4611e-04, 2.1587e-04, 1.7615e-04,\n 7.9717e-05, 1.1309e-04, 1.2541e-04, 2.0420e-04, 1.9856e-04, 7.9802e-05,\n 2.1259e-04, 2.4717e-04, 5.8234e-05, 2.7154e-04, 5.0008e-04, 2.3498e-04,\n 1.1442e-04, 2.3151e-04, 2.2771e-04, 1.3230e-04, 2.6064e-04, 1.9825e-04,\n 1.1768e-04, 1.1959e-04, 8.5278e-05, 1.1901e-04, 1.1095e-04, 1.9597e-04,\n 1.0094e-04, 8.2411e-05, 1.5396e-04, 3.4813e-04, 8.5922e-05, 3.5492e-04,\n 7.5784e-05, 6.4856e-05, 1.3445e-04, 2.1261e-04, 2.1245e-04, 5.4718e-05,\n 2.7176e-04, 1.4718e-04, 2.1884e-04, 9.2604e-05, 4.8861e-05, 1.8041e-04,\n 8.1846e-05, 2.0843e-04, 7.5179e-05, 1.7671e-04, 1.2993e-04, 4.1585e-04,\n 1.2773e-04, 8.8355e-05, 2.2937e-04, 5.3564e-05, 5.8601e-05, 1.2954e-04,\n 7.1637e-05, 2.0600e-04, 1.3530e-04, 7.5373e-05, 1.6875e-04, 1.1698e-04,\n 1.4079e-04, 1.0299e-04, 1.8214e-04, 6.1480e-05, 1.1268e-04, 7.6026e-05,\n 1.0728e-04, 3.9892e-04, 2.9670e-04, 1.1029e-04, 1.8956e-04, 2.0724e-04,\n 9.0006e-05, 8.4686e-05, 1.8163e-04, 3.0351e-04, 1.0207e-04, 1.3629e-04,\n 8.2415e-05, 2.3098e-04, 8.6688e-05, 1.4059e-04, 9.7301e-05, 1.4337e-04,\n 1.2049e-04, 2.1220e-04, 2.4741e-04, 1.0634e-03, 1.3210e-04, 1.0503e-04,\n 5.8267e-04, 2.0411e-04, 1.5076e-04, 6.7351e-05, 6.4223e-05, 1.4097e-04,\n 2.1520e-04, 2.3976e-04, 1.2737e-04, 2.5054e-04, 6.0851e-04, 2.0597e-04,\n 1.1757e-04, 1.0839e-03, 2.1223e-04, 2.6675e-04, 1.0591e-04, 5.8862e-04,\n 1.1370e-04, 3.6508e-04, 1.1528e-04, 1.0863e-04, 1.0575e-04, 4.2938e-04,\n 2.5326e-04, 1.8716e-04, 1.3120e-04, 1.4383e-04, 4.4551e-04, 1.6581e-04,\n 8.5254e-05, 1.0198e-04, 1.0335e-04, 7.6033e-05, 1.1561e-04, 2.0720e-04,\n 1.0451e-04, 2.2061e-04, 1.2716e-04, 7.7532e-05, 1.0719e-04, 1.0367e-04,\n 2.7400e-04, 2.6264e-04, 4.4992e-04, 1.0360e-04, 1.8070e-04, 7.0597e-05,\n 1.3010e-04, 6.7733e-05, 1.8310e-04, 1.1337e-04, 1.7605e-04, 7.3859e-05,\n 9.1261e-05, 2.8240e-04, 1.5843e-04, 6.4861e-05, 5.2224e-04, 7.9399e-05,\n 5.1881e-04, 1.4267e-04, 2.2770e-04, 1.6963e-04, 2.3984e-04, 9.4702e-05,\n 1.0443e-04, 1.0107e-04, 1.0504e-04, 7.9052e-05, 3.2396e-04, 8.2927e-05,\n 3.5732e-04, 4.2989e-04, 1.2092e-04, 2.2069e-04, 2.3105e-04, 1.9043e-04,\n 7.8439e-05, 2.6992e-04, 3.8716e-04, 5.9943e-05, 2.7654e-04, 2.3423e-04,\n 7.0129e-05, 6.6526e-05, 1.9704e-04, 5.6616e-04, 5.9158e-04, 1.0228e-04,\n 8.2570e-05, 1.3997e-04, 1.1161e-04, 1.2278e-04, 1.1020e-04, 7.3339e-05,\n 1.0887e-04, 3.3136e-04, 3.3003e-04, 2.1773e-04, 7.0781e-05, 1.4377e-04,\n 1.3802e-03, 1.4618e-04, 1.7856e-04, 2.5268e-04, 2.4219e-04, 2.1408e-04,\n 2.2963e-04, 2.5439e-04, 1.3844e-04, 1.3088e-04, 7.2493e-04, 3.3653e-04,\n 8.0284e-05, 1.0148e-04, 1.8286e-04, 1.0736e-04, 2.7016e-04, 2.8440e-04,\n 3.4460e-04, 1.6608e-04, 1.9445e-04, 1.5608e-04, 8.4017e-05, 2.1709e-04,\n 2.1517e-04, 2.7474e-04, 3.5552e-04, 3.5860e-04, 2.8846e-04, 6.7758e-05,\n 1.4089e-04, 2.4477e-04, 2.6057e-04, 4.9538e-05, 1.0457e-04, 2.4137e-04,\n 2.3395e-04, 1.0911e-04, 8.1589e-05, 4.1077e-04, 1.1340e-04, 1.2400e-04,\n 1.0337e-04, 7.4466e-04, 5.3197e-04, 8.8300e-05, 9.9812e-05, 9.8527e-05,\n 2.3411e-04, 4.1352e-04, 2.9294e-04, 5.9548e-04, 1.2493e-04, 1.1768e-04,\n 1.0649e-04, 1.4332e-04, 1.1550e-04, 7.3172e-04, 2.2278e-04, 1.1552e-04,\n 1.0980e-04, 1.8469e-04, 1.8446e-04, 1.4794e-04, 2.3535e-04, 6.6090e-05,\n 1.7952e-04, 1.2012e-04, 1.3588e-04, 1.1554e-04, 1.7365e-04, 2.2801e-04,\n 1.0919e-04, 3.0133e-04, 7.2609e-05, 1.9793e-04, 1.3066e-04, 1.2191e-04,\n 1.8132e-04, 7.1924e-05, 1.3512e-04, 1.7548e-04, 1.3216e-04, 1.8689e-04,\n 1.2063e-04, 1.1903e-04, 3.7374e-04, 3.6043e-04, 2.0809e-04, 2.6052e-04,\n 1.1055e-04, 1.2374e-04, 8.7139e-05, 1.4573e-04, 1.6610e-04, 4.6867e-05,\n 1.4282e-04, 3.5092e-04, 7.8190e-05, 1.2637e-04, 8.6752e-05, 1.0037e-04,\n 7.2115e-04, 1.2185e-04, 7.3007e-05, 7.7492e-05, 1.2505e-04, 1.5231e-04,\n 1.0641e-04, 4.4499e-04, 2.7578e-04, 9.9778e-05, 1.1459e-04, 1.3935e-04,\n 1.1706e-04, 2.1767e-04, 2.8349e-04, 1.9223e-04, 6.3614e-05, 2.9293e-04,\n 2.8205e-04, 1.4150e-04, 1.9727e-04, 1.1284e-04, 9.6297e-05, 7.8502e-05,\n 1.4450e-03, 7.1024e-05, 1.8766e-04, 3.0881e-04, 8.8324e-05, 7.7117e-05,\n 6.5159e-05, 2.2765e-04, 2.2411e-04, 9.7198e-05, 7.2703e-05, 1.6067e-04,\n 1.2053e-04, 1.1041e-04, 6.5134e-05, 6.3087e-04, 2.9163e-04, 1.8672e-04,\n 1.2218e-04, 7.0083e-05, 7.0071e-05, 3.2205e-04, 2.5784e-04, 1.3405e-04,\n 2.1786e-04, 1.1997e-04, 2.7266e-04, 1.2056e-04, 1.0158e-04, 1.4569e-04,\n 9.5382e-05, 1.2566e-04, 1.1132e-04, 1.9317e-04, 2.6112e-04, 1.0603e-04,\n 2.8294e-04, 7.8806e-05, 2.5466e-04, 8.9903e-05, 1.3298e-04, 3.0976e-04,\n 1.3626e-04, 1.5768e-04, 1.3433e-04, 1.2915e-04, 2.2107e-04, 8.9279e-05,\n 8.8730e-05, 6.9262e-05, 1.7540e-04, 9.4491e-05, 1.7249e-04, 1.1421e-04,\n 7.5536e-05, 1.3230e-04, 9.3897e-05, 3.9138e-04, 1.2827e-04, 2.9959e-04,\n 2.2672e-04, 1.8016e-04, 6.5076e-05, 3.4649e-04, 1.6824e-04, 7.8072e-05,\n 1.7569e-04, 1.7231e-04, 1.2665e-04, 9.3953e-05, 8.6530e-05, 1.5499e-04,\n 1.8630e-04, 1.2632e-04, 2.0043e-04, 1.5013e-04, 5.3469e-04, 1.6266e-04,\n 1.0437e-04, 1.2845e-04, 1.9841e-04, 1.5164e-04, 5.6477e-05, 2.6669e-04,\n 1.1006e-04, 8.8623e-05, 1.6216e-04, 1.8535e-04, 2.2631e-04, 5.3140e-05,\n 2.2873e-04, 2.4505e-04, 1.6450e-04, 2.4773e-04, 3.2124e-04, 3.1531e-04,\n 2.9189e-04, 9.8504e-05, 1.4272e-04, 9.6795e-05, 1.1928e-04, 8.0778e-05,\n 3.3497e-04, 1.0283e-04, 1.6285e-04, 8.9457e-05, 2.4455e-04, 1.8756e-04,\n 1.0095e-04, 6.6436e-04, 1.2774e-04, 1.3173e-04, 2.2144e-04, 2.2862e-04,\n 1.0136e-04, 9.3904e-05, 2.6081e-04, 3.0814e-04, 1.0480e-04, 3.8659e-04,\n 2.4903e-04, 1.3065e-04, 7.3117e-05, 1.5566e-04, 1.6721e-04, 8.4570e-05,\n 2.9091e-04, 1.4324e-04, 1.3869e-04, 2.1449e-04, 3.2882e-04, 6.5234e-05,\n 3.0541e-04, 7.3490e-05, 7.7218e-05, 4.5111e-04, 1.7537e-04, 2.1052e-04,\n 1.6772e-04, 3.0308e-04, 2.9887e-04, 3.2522e-01, 1.5590e-04, 1.0976e-04,\n 3.1493e-04, 1.0692e-04], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(12520.)",
22
+ "exp_avg": "tensor([-1.0703e-02, -2.8942e-03, -8.3129e-03, 2.7173e-04, 8.1760e-03,\n 2.8985e-03, -3.9763e-03, 4.8547e-03, -2.0685e-03, 6.9446e-03,\n 3.3967e-03, 8.9961e-03, 1.9014e-03, -4.5341e-03, 1.1484e-03,\n -3.4111e-03, 2.6972e-03, 3.5820e-03, 2.5010e-03, -5.4804e-04,\n -1.5767e-03, 7.7475e-04, 1.0495e-03, -5.3573e-03, 5.2728e-04,\n -1.2325e-03, -6.5597e-03, -2.2363e-04, -6.4965e-03, -3.6123e-04,\n 5.0393e-03, 8.1327e-03, 4.2016e-03, -2.3379e-04, 1.3159e-02,\n -5.1751e-03, -5.8559e-03, 8.0799e-04, -4.7718e-04, 5.5059e-03,\n -2.4724e-03, 7.7387e-04, -3.9822e-03, -6.6663e-03, -6.3557e-04,\n 2.8095e-03, 2.8133e-03, -1.9373e-03, 7.3731e-03, -5.0276e-03,\n 4.7021e-04, 8.3201e-04, -1.0550e-03, -3.5117e-03, -1.8464e-03,\n -1.3946e-03, 1.1623e-03, -3.3599e-03, 1.6260e-02, 4.4351e-04,\n 7.9857e-03, 7.6321e-04, 3.1025e-03, 4.0213e-04, -5.5769e-03,\n 2.3584e-03, 1.1738e-03, -3.4983e-03, -2.4583e-03, 1.4639e-03,\n -2.4175e-03, -6.3119e-03, 6.0652e-03, 5.9590e-03, -3.7616e-04,\n -4.3248e-03, -1.0935e-03, 2.3824e-04, -7.7207e-04, 4.0206e-03,\n -6.0180e-03, -5.8550e-03, 1.1471e-03, 3.3303e-03, 1.0793e-02,\n 1.7789e-04, -9.5634e-04, 6.5487e-04, -2.5796e-03, -4.3566e-03,\n -4.2315e-03, 3.1062e-03, -8.3521e-03, 2.6802e-04, -1.9191e-04,\n -2.5805e-03, -2.1381e-03, 1.1273e-03, 5.9573e-03, -4.2724e-04,\n -3.4322e-04, -1.6162e-03, 3.6944e-04, -1.3045e-03, 2.0396e-03,\n -4.7372e-03, 7.7837e-03, -5.7304e-04, -2.5323e-03, -1.2659e-03,\n -3.1497e-03, -3.4246e-04, -2.9873e-03, 3.8862e-03, 1.1016e-03,\n 7.0671e-03, 2.6838e-03, -1.9266e-03, -2.5930e-03, -4.1672e-03,\n 9.5654e-04, 1.0769e-03, -1.0173e-02, -3.2908e-03, -1.1270e-03,\n 4.1682e-03, 1.9226e-04, -1.7840e-04, 4.6184e-03, -4.6771e-03,\n -9.0945e-04, -9.2761e-03, 1.5134e-03, 7.2063e-04, -5.6392e-03,\n -5.2411e-03, 4.4951e-03, 1.2504e-03, -5.7696e-04, -1.2105e-02,\n -6.1997e-03, 1.8727e-03, -2.6521e-05, -4.0427e-03, 2.4022e-03,\n 6.9718e-04, 3.4018e-03, -4.1952e-03, 1.1945e-02, -2.6208e-03,\n -2.6055e-03, 7.0616e-03, 7.0927e-03, -6.5486e-03, -1.6350e-03,\n 7.0306e-03, -6.0157e-03, 1.4211e-03, 2.0913e-03, 1.4870e-03,\n 1.0970e-03, -4.7729e-04, -2.0577e-03, -1.1362e-04, -1.8406e-03,\n 1.0405e-03, 1.0254e-04, -1.3505e-03, -2.3671e-03, -2.1612e-04,\n -1.9930e-03, 3.7271e-03, -1.0111e-02, -9.5926e-04, 4.5455e-03,\n -2.6094e-03, 6.2172e-03, 8.6249e-04, 1.1965e-03, -2.6064e-03,\n -2.0002e-04, -5.7317e-03, -2.7478e-03, 2.5890e-04, 6.9306e-03,\n 1.4068e-03, -1.4852e-02, 1.1811e-02, 8.0699e-03, -5.4762e-03,\n -4.6646e-03, -8.0628e-04, -1.2590e-03, -1.8741e-02, 3.3279e-03,\n -4.8891e-04, -7.0438e-04, -3.9380e-03, -2.2298e-04, 1.9198e-03,\n -3.2727e-03, -3.4590e-03, 3.1317e-03, -9.7189e-04, -6.6553e-03,\n -4.2725e-03, 2.7480e-03, 3.1999e-03, 5.0911e-03, -1.1162e-02,\n 1.8886e-03, -5.4695e-04, -4.7887e-03, -3.0211e-03, -1.1030e-02,\n 2.8673e-03, 2.3236e-03, -6.6595e-03, 6.9219e-03, 3.8587e-04,\n -6.0002e-04, -5.6606e-04, -1.9190e-03, 2.5231e-03, 1.1429e-02,\n 3.9664e-03, 3.2702e-03, -4.5098e-03, 5.6254e-03, -1.0413e-04,\n -8.1918e-04, 5.3777e-03, 3.7159e-03, 2.5227e-03, 4.9716e-03,\n -1.8867e-03, 3.2660e-03, -2.2347e-03, -2.3590e-03, -3.3353e-03,\n -6.8829e-04, -3.2706e-03, 3.0669e-03, -2.1248e-03, -1.2974e-02,\n 6.7792e-04, 5.0356e-03, 2.9887e-03, 3.6869e-03, 3.7039e-03,\n -5.1806e-03, -1.6891e-03, 1.7180e-03, 7.1891e-03, -1.1511e-03,\n 6.2446e-03, -1.3550e-03, -1.1970e-03, -9.4404e-03, 1.0251e-03,\n 9.1439e-04, 1.9823e-03, -2.6484e-03, 2.5180e-03, -5.8033e-03,\n -2.2991e-03, -1.4741e-04, -9.5937e-03, -4.9066e-03, 9.1486e-03,\n -3.5041e-03, 3.3677e-03, 3.7122e-03, 2.7381e-03, -1.9569e-03,\n 3.6743e-03, -1.0446e-03, 4.5596e-03, -2.5267e-03, -5.6266e-03,\n 2.0066e-03, 7.5434e-03, -2.7226e-05, 3.6538e-03, -4.4511e-03,\n 5.1005e-03, -4.9619e-03, -3.8770e-04, 6.2513e-03, 2.2494e-03,\n -4.2809e-03, 6.0151e-03, -1.0087e-02, 8.4354e-04, 3.8739e-03,\n 2.7189e-03, 2.1206e-03, 1.5951e-03, 1.8135e-03, 2.7971e-03,\n -5.3081e-05, -8.3236e-04, -4.2341e-03, 3.9377e-03, 2.1447e-03,\n 3.4220e-03, -3.3863e-03, -4.2358e-04, -3.6739e-03, -3.5900e-03,\n 3.9141e-03, -6.4126e-03, -1.7788e-03, -2.9647e-03, -6.3874e-04,\n -4.7976e-03, 2.4402e-04, 5.7557e-03, 6.3641e-03, -3.6487e-03,\n -8.4133e-04, -2.2269e-03, 2.1446e-03, -5.0522e-04, 3.8102e-03,\n 1.3508e-03, 3.5856e-03, 2.4221e-03, -2.4632e-03, -4.5232e-03,\n 5.2027e-03, -1.8329e-03, 1.5577e-03, -7.9813e-04, -2.6198e-03,\n 9.7329e-04, -4.5511e-03, 1.2243e-03, -4.7343e-04, 3.2673e-03,\n 1.5447e-03, 6.4523e-03, -2.5503e-03, -1.6978e-03, 5.2786e-03,\n 1.7378e-03, -1.3922e-03, 2.5030e-03, -5.5840e-03, -7.4377e-04,\n 4.4865e-03, 1.0271e-03, 3.9482e-03, -5.3763e-03, -1.9038e-03,\n 1.0850e-02, 3.8077e-03, -1.1295e-03, 2.5540e-04, -2.1112e-03,\n -3.7417e-03, -5.3091e-03, -2.1462e-03, -1.4453e-03, 4.4819e-03,\n 8.8914e-04, -4.1564e-03, 4.7144e-03, -6.8560e-03, -6.8304e-03,\n -1.7305e-03, -1.1043e-03, -7.0911e-04, 1.6244e-03, -3.1413e-03,\n -2.2929e-03, 3.4781e-03, 6.4268e-05, 7.4984e-04, 1.8693e-03,\n -2.7368e-03, -3.5811e-03, 1.8834e-03, -2.7200e-04, 2.7873e-03,\n 6.4068e-04, 1.2971e-02, 5.7999e-03, 4.4020e-03, 4.1933e-03,\n 4.6187e-03, 6.3620e-03, 3.8920e-03, 3.6246e-03, 1.4400e-03,\n -8.4925e-05, 8.5451e-04, -2.8243e-03, 1.3152e-03, 3.9966e-03,\n 2.0713e-03, -2.4593e-03, -5.6527e-03, 2.2849e-03, -2.5092e-03,\n -1.3601e-03, 5.0184e-04, -3.7558e-04, 4.1210e-03, 3.7273e-03,\n -1.7090e-03, 2.0484e-03, -7.7457e-03, 4.3197e-03, -3.6580e-03,\n -5.7812e-03, -3.6532e-03, -7.3131e-03, 4.8163e-03, -3.0999e-03,\n -6.8240e-03, -1.0567e-02, 2.7215e-03, 1.3402e-03, -1.3130e-02,\n 2.5690e-03, -5.7812e-03, 6.9343e-04, 2.1149e-03, 1.5452e-04,\n -3.2494e-03, -6.1115e-03, -7.2039e-04, -2.5362e-03, 1.0738e-04,\n -2.2308e-03, 2.6749e-03, 7.7404e-03, -3.2061e-04, -4.2533e-03,\n -8.8279e-04, -3.9667e-03, 1.5944e-03, 5.6925e-03, 9.0675e-03,\n 1.6649e-04, -7.1785e-03, 2.7649e-03, -3.0918e-03, 2.0589e-03,\n 6.4373e-03, 4.5509e-03, -8.5671e-03, 7.1672e-04, 2.6248e-03,\n -2.5318e-03, -5.0277e-03, -4.9177e-03, 4.9472e-03, -1.7538e-03,\n -2.1628e-03, 1.7985e-03, 8.8906e-04, -1.9905e-04, 5.2893e-03,\n -3.3894e-03, -2.5611e-03, 5.3512e-03, -5.4877e-04, 1.7048e-03,\n -1.5856e-03, 5.2233e-03, -4.6739e-04, -2.7061e-03, -9.3177e-04,\n -5.6086e-03, -6.5667e-03, 4.2224e-03, 9.1287e-04, 9.9510e-03,\n 1.8399e-03, -9.0556e-03, -2.9879e-03, -1.6419e-02, 7.6723e-03,\n 2.5744e-03, 4.3279e-03, 5.2202e-03, -1.0966e-03, 6.6483e-03,\n 7.5733e-04, -1.5106e-03, 8.2267e-03, 1.6876e-03, -6.4025e-03,\n 1.4304e-03, 4.6533e-03, -1.6324e-03, 5.9713e-03, -2.7134e-03,\n 6.3670e-03, 2.2764e-03, -2.5342e-04, 4.7187e-03, 6.6316e-04,\n -1.1014e-02, -3.0217e-03, 4.2655e-05, -1.1885e-03, -7.2697e-03,\n 9.5135e-03, -2.5884e-03], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([1.5745e-04, 1.4529e-04, 1.5292e-04, 1.3033e-04, 1.8859e-04, 1.6279e-04,\n 1.8144e-04, 1.1721e-04, 1.1326e-04, 1.4122e-04, 2.0831e-04, 1.2046e-04,\n 1.9255e-04, 1.2722e-04, 1.3680e-04, 2.1332e-04, 1.7108e-04, 1.2605e-04,\n 1.1987e-04, 1.8893e-04, 9.6206e-05, 1.7456e-04, 1.3720e-04, 2.1250e-04,\n 1.3790e-04, 1.9328e-04, 1.6693e-04, 2.0794e-04, 1.8553e-04, 1.9938e-04,\n 1.6751e-04, 2.6260e-04, 1.4503e-04, 2.9169e-04, 1.7758e-04, 1.2903e-04,\n 1.4471e-04, 1.2980e-04, 1.8128e-04, 3.1529e-04, 1.7771e-04, 2.1823e-04,\n 2.6005e-04, 1.3487e-04, 1.6835e-04, 1.9489e-04, 1.5176e-04, 1.5232e-04,\n 1.3766e-04, 1.7503e-04, 1.8289e-04, 1.6068e-04, 1.5238e-04, 1.2902e-04,\n 1.5406e-04, 1.8198e-04, 9.3063e-05, 1.3848e-04, 2.4896e-04, 1.6832e-04,\n 1.6952e-04, 1.4597e-04, 2.9420e-04, 1.6345e-04, 2.6953e-04, 2.7970e-04,\n 1.3493e-04, 1.2943e-04, 1.2878e-04, 1.3201e-04, 1.9452e-04, 1.4833e-04,\n 1.3724e-04, 1.9947e-04, 1.8299e-04, 1.7671e-04, 1.6507e-04, 1.7004e-04,\n 1.2566e-04, 1.2548e-04, 1.5752e-04, 2.3845e-04, 1.4065e-04, 1.4681e-04,\n 2.3527e-04, 1.8237e-04, 2.3011e-04, 1.7098e-04, 1.1440e-04, 1.7619e-04,\n 2.0849e-04, 2.2516e-04, 1.0626e-04, 1.8015e-04, 1.9887e-04, 1.6959e-04,\n 1.2250e-04, 1.4883e-04, 1.3636e-04, 8.1533e-05, 1.0602e-04, 1.6255e-04,\n 1.1546e-04, 1.6421e-04, 1.1829e-04, 1.3487e-04, 2.0303e-04, 1.1651e-04,\n 1.7395e-04, 1.7645e-04, 2.0376e-04, 9.7683e-05, 2.1458e-04, 1.0532e-04,\n 1.2797e-04, 2.1774e-04, 1.5033e-04, 1.2185e-04, 2.6982e-04, 2.4656e-04,\n 1.4309e-04, 1.6479e-04, 2.4369e-04, 2.1606e-04, 2.1422e-04, 1.9685e-04,\n 9.6082e-05, 1.1550e-04, 1.6778e-04, 2.3217e-04, 1.5461e-04, 1.6288e-04,\n 1.5442e-04, 1.5779e-04, 1.3037e-04, 1.9700e-04, 1.3077e-04, 1.5153e-04,\n 1.5856e-04, 2.7737e-04, 2.1083e-04, 1.0150e-04, 1.3371e-04, 1.5031e-04,\n 2.0365e-04, 1.8375e-04, 1.3153e-04, 1.6520e-04, 2.4064e-04, 1.9974e-04,\n 1.4634e-04, 3.1216e-04, 1.9203e-04, 2.3595e-04, 1.2167e-04, 2.4733e-04,\n 1.4030e-04, 2.3998e-04, 1.5661e-04, 9.1792e-05, 1.3092e-04, 1.9944e-04,\n 1.7897e-04, 1.6218e-04, 1.5095e-04, 1.3477e-04, 2.0704e-04, 1.2775e-04,\n 1.3255e-04, 1.8888e-04, 1.9514e-04, 1.4219e-04, 1.8696e-04, 1.8290e-04,\n 1.4988e-04, 1.4194e-04, 1.9438e-04, 1.1314e-04, 1.7247e-04, 1.3673e-04,\n 2.7896e-04, 1.9050e-04, 1.9462e-04, 1.3954e-04, 1.9986e-04, 1.3869e-04,\n 1.7574e-04, 1.2525e-04, 2.1117e-04, 1.5664e-04, 1.8441e-04, 9.6092e-05,\n 1.1624e-04, 2.1300e-04, 1.6456e-04, 1.5205e-04, 2.1448e-04, 1.0360e-04,\n 1.9119e-04, 1.4755e-04, 1.4549e-04, 1.1805e-04, 1.7541e-04, 1.3991e-04,\n 1.8431e-04, 1.7144e-04, 2.5290e-04, 1.1623e-04, 1.3388e-04, 1.3682e-04,\n 2.2393e-04, 1.8587e-04, 2.1437e-04, 1.3428e-04, 1.7580e-04, 1.9220e-04,\n 1.3124e-04, 2.4146e-04, 1.9965e-04, 9.9597e-05, 1.8829e-04, 1.8257e-04,\n 1.2500e-04, 1.1241e-04, 2.4688e-04, 2.1598e-04, 1.6765e-04, 1.2833e-04,\n 1.4622e-04, 1.3080e-04, 1.1390e-04, 1.1741e-04, 1.5909e-04, 1.4573e-04,\n 1.6534e-04, 1.4509e-04, 2.0396e-04, 1.8521e-04, 1.3144e-04, 1.5396e-04,\n 2.4084e-04, 1.4232e-04, 1.8797e-04, 2.3099e-04, 2.4763e-04, 1.8935e-04,\n 1.6838e-04, 1.8006e-04, 1.5746e-04, 1.9440e-04, 1.8909e-04, 2.1804e-04,\n 1.2470e-04, 1.5747e-04, 1.6557e-04, 1.7857e-04, 1.7467e-04, 2.1991e-04,\n 2.2832e-04, 2.1712e-04, 2.0918e-04, 1.9030e-04, 1.2259e-04, 2.6929e-04,\n 1.6075e-04, 1.4651e-04, 2.4348e-04, 1.9850e-04, 1.8794e-04, 1.5227e-04,\n 1.8637e-04, 1.7379e-04, 1.8184e-04, 9.9646e-05, 1.5052e-04, 1.4092e-04,\n 1.4998e-04, 1.2642e-04, 1.5144e-04, 2.8933e-04, 1.1810e-04, 2.2827e-04,\n 1.8493e-04, 1.9622e-04, 1.4822e-04, 1.3550e-04, 1.2487e-04, 1.6034e-04,\n 1.5129e-04, 2.8397e-04, 1.7743e-04, 1.8500e-04, 1.5693e-04, 1.4409e-04,\n 1.2630e-04, 1.8586e-04, 1.3881e-04, 1.9341e-04, 2.5129e-04, 2.1607e-04,\n 1.0825e-04, 1.3006e-04, 1.5088e-04, 1.6959e-04, 2.3667e-04, 1.2874e-04,\n 1.7184e-04, 1.4466e-04, 2.0946e-04, 1.3242e-04, 2.5636e-04, 2.0723e-04,\n 1.9206e-04, 2.4587e-04, 1.2777e-04, 1.8699e-04, 1.5173e-04, 1.8693e-04,\n 1.5134e-04, 1.2186e-04, 1.5570e-04, 1.6235e-04, 1.3731e-04, 1.7489e-04,\n 1.1497e-04, 2.2332e-04, 2.2914e-04, 2.2876e-04, 1.8453e-04, 2.1461e-04,\n 1.1817e-04, 1.4317e-04, 1.1464e-04, 1.5036e-04, 2.3096e-04, 9.6367e-05,\n 1.1252e-04, 1.2457e-04, 1.2619e-04, 1.5384e-04, 1.4826e-04, 1.8248e-04,\n 2.2916e-04, 1.4538e-04, 1.2793e-04, 1.3523e-04, 1.1485e-04, 1.3399e-04,\n 1.3993e-04, 2.0237e-04, 1.4887e-04, 1.7287e-04, 1.2180e-04, 1.2012e-04,\n 1.1181e-04, 1.7403e-04, 1.5155e-04, 1.7966e-04, 1.1309e-04, 1.7091e-04,\n 1.7961e-04, 2.7506e-04, 1.4700e-04, 2.6651e-04, 1.3820e-04, 1.5481e-04,\n 2.4703e-04, 1.0403e-04, 1.6211e-04, 1.9039e-04, 1.0435e-04, 1.2876e-04,\n 1.0964e-04, 1.6505e-04, 1.4885e-04, 1.7809e-04, 1.1406e-04, 1.6969e-04,\n 2.5562e-04, 1.7032e-04, 9.5438e-05, 1.6414e-04, 1.4774e-04, 1.2972e-04,\n 1.5495e-04, 1.6190e-04, 1.5378e-04, 2.0847e-04, 1.4997e-04, 2.7877e-04,\n 1.4804e-04, 1.3784e-04, 1.5609e-04, 1.2720e-04, 1.6641e-04, 1.7110e-04,\n 1.2015e-04, 1.3076e-04, 2.0483e-04, 3.4257e-04, 1.5742e-04, 1.1801e-04,\n 1.9702e-04, 9.1743e-05, 1.9451e-04, 1.7165e-04, 1.3599e-04, 1.5452e-04,\n 1.5009e-04, 1.9637e-04, 1.2511e-04, 1.2295e-04, 1.4064e-04, 1.6470e-04,\n 1.3096e-04, 1.4279e-04, 1.4079e-04, 2.1662e-04, 1.8270e-04, 2.3549e-04,\n 1.3121e-04, 1.3440e-04, 1.9125e-04, 2.0228e-04, 1.9981e-04, 1.5584e-04,\n 1.6800e-04, 1.5845e-04, 8.5878e-05, 1.8891e-04, 1.5176e-04, 2.0238e-04,\n 2.6004e-04, 1.6190e-04, 1.3962e-04, 1.0400e-04, 1.5591e-04, 2.5160e-04,\n 1.4145e-04, 1.2718e-04, 3.0373e-04, 1.5539e-04, 2.2985e-04, 1.9630e-04,\n 2.2426e-04, 1.2729e-04, 1.1904e-04, 1.3052e-04, 1.3163e-04, 1.5747e-04,\n 1.4241e-04, 1.1992e-04, 1.6318e-04, 1.7069e-04, 1.9632e-04, 1.0405e-04,\n 2.7521e-04, 1.4183e-04, 1.7021e-04, 1.9044e-04, 1.5309e-04, 2.3684e-04,\n 2.1963e-04, 1.4029e-04, 2.3325e-04, 9.3078e-05, 1.3347e-04, 1.5429e-04,\n 2.5466e-04, 1.7798e-04, 1.4661e-04, 1.1692e-04, 1.7653e-04, 1.7261e-04,\n 1.7487e-04, 2.0439e-04, 2.1310e-04, 1.0948e-04, 1.7874e-04, 2.1683e-04,\n 1.5911e-04, 1.6465e-04, 1.9187e-04, 3.9832e-04, 1.5268e-04, 2.2361e-04,\n 1.6521e-04, 2.1066e-04, 1.1710e-04, 1.6729e-04, 1.4511e-04, 1.1764e-04,\n 1.9667e-04, 1.3945e-04, 1.6816e-04, 2.2484e-04, 1.7005e-04, 1.4959e-04,\n 2.0270e-04, 1.2115e-04, 1.1794e-04, 1.5654e-04, 1.1855e-04, 1.6327e-04,\n 1.1855e-04, 1.9656e-04, 2.2485e-04, 8.5002e-04, 1.9084e-04, 2.2414e-04,\n 2.0356e-04, 1.4652e-04], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(12520.)",
27
+ "exp_avg": "tensor([[-5.5106e-05, -1.0152e-05, -2.9147e-05, ..., 1.8254e-04,\n 5.8381e-05, 1.0848e-04],\n [ 4.3070e-04, 1.0480e-04, 8.1528e-05, ..., -1.6144e-04,\n -5.7774e-04, 7.3881e-05],\n [-2.2840e-04, 1.4989e-04, 7.8359e-05, ..., -1.5792e-04,\n -4.1823e-04, -4.3666e-04],\n ...,\n [-2.7423e-04, -2.4271e-04, 7.9358e-05, ..., 1.4911e-04,\n 4.6811e-05, 2.2030e-04],\n [ 4.7102e-05, -7.1363e-05, 5.0066e-05, ..., 9.5828e-05,\n 6.6547e-05, -4.6094e-05],\n [ 1.7751e-04, -1.5783e-04, -1.9104e-04, ..., 5.5701e-05,\n -2.5862e-04, -2.6955e-05]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[2.7374e-07, 1.3124e-07, 1.6542e-07, ..., 1.8294e-07, 2.4830e-07,\n 1.7589e-07],\n [4.4629e-07, 2.4384e-07, 2.2847e-07, ..., 5.0564e-07, 5.5865e-07,\n 7.3888e-07],\n [3.9625e-07, 3.2368e-07, 2.9385e-07, ..., 4.9627e-07, 5.2429e-07,\n 5.6006e-07],\n ...,\n [3.8774e-07, 6.2680e-07, 3.0069e-07, ..., 3.9994e-07, 5.9668e-07,\n 5.0718e-07],\n [2.5717e-07, 4.0021e-07, 2.9676e-07, ..., 5.5159e-07, 5.7863e-07,\n 5.4680e-07],\n [4.9592e-07, 4.2564e-07, 2.9534e-07, ..., 4.2484e-07, 5.6518e-07,\n 5.1931e-07]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(12520.)",
32
+ "exp_avg": "tensor([[ 1.0904e-04, 4.1566e-05, 9.3250e-05, ..., 1.2335e-04,\n -7.1549e-05, 1.6816e-04],\n [ 2.0987e-04, 3.5128e-05, -2.5475e-06, ..., -1.6237e-05,\n -9.5748e-05, 6.3477e-05],\n [-1.1149e-04, 9.4882e-05, 3.8091e-05, ..., -1.4232e-04,\n -1.9914e-04, -2.8290e-04],\n ...,\n [ 1.1539e-04, -1.2301e-04, 3.0498e-04, ..., -6.5792e-05,\n -1.5094e-04, -1.3551e-04],\n [-1.5197e-04, 4.5966e-05, -1.1011e-04, ..., -1.5446e-04,\n 1.5143e-04, -3.4747e-05],\n [-9.7458e-07, 6.6231e-07, 1.5338e-05, ..., -8.9043e-06,\n -1.8253e-04, 1.0116e-05]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[7.0704e-08, 4.0568e-08, 6.0063e-08, ..., 4.3100e-08, 9.8600e-08,\n 6.5291e-08],\n [1.4195e-07, 8.6568e-08, 9.8495e-08, ..., 1.4824e-07, 1.8457e-07,\n 2.5019e-07],\n [1.4109e-07, 1.1460e-07, 1.1264e-07, ..., 2.0186e-07, 1.9340e-07,\n 2.3345e-07],\n ...,\n [1.3431e-07, 2.0559e-07, 2.0778e-07, ..., 1.6637e-07, 2.4385e-07,\n 1.4896e-07],\n [1.4315e-07, 1.2620e-07, 1.0244e-07, ..., 1.9626e-07, 2.6884e-07,\n 2.0085e-07],\n [1.1986e-07, 1.2491e-07, 1.4566e-07, ..., 1.6158e-07, 2.3473e-07,\n 2.5248e-07]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(12520.)",
37
+ "exp_avg": "tensor([-0.0009, 0.0009], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([7.5451e-06, 7.5451e-06], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 0.001,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.001,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 0.001,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.001,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 0.001,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.001,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 0.0005,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.0005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 20,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 0,
137
+ "base_lrs": [
138
+ 0.001,
139
+ 0.001,
140
+ 0.001,
141
+ 0.0005
142
+ ],
143
+ "last_epoch": 10,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 0.001,
149
+ 0.001,
150
+ 0.001,
151
+ 0.0005
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 71.726,
156
+ "best_epoch": 9,
157
+ "scale_accuracies": {
158
+ "256": 71.258,
159
+ "512": 71.69
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3,
166
+ 4,
167
+ 5,
168
+ 6,
169
+ 7,
170
+ 8,
171
+ 9,
172
+ 10
173
+ ],
174
+ "train_loss": [
175
+ 5.60248446921571,
176
+ 4.156974341351384,
177
+ 3.7702821485531595,
178
+ 3.570641661223512,
179
+ 3.4472002215659656,
180
+ 3.3609565016560663,
181
+ 3.300025675433893,
182
+ 3.2499928289709,
183
+ 3.213850290440142,
184
+ 3.193320405940278
185
+ ],
186
+ "train_acc": [
187
+ 63.38018384800733,
188
+ 69.48813074329888,
189
+ 70.23393515443342,
190
+ 70.76774534467404,
191
+ 71.32536195515495,
192
+ 71.77877669343653,
193
+ 72.23593801588707,
194
+ 72.61926040867428,
195
+ 72.93288072515136,
196
+ 73.1782039343817
197
+ ],
198
+ "val_acc": [
199
+ 67.966,
200
+ 69.586,
201
+ 69.866,
202
+ 70.47,
203
+ 70.854,
204
+ 71.1,
205
+ 71.388,
206
+ 71.674,
207
+ 71.626,
208
+ 71.726
209
+ ],
210
+ "scale_accs": {
211
+ "256": [
212
+ 66.908,
213
+ 68.868,
214
+ 69.194,
215
+ 69.78,
216
+ 70.214,
217
+ 70.592,
218
+ 70.794,
219
+ 71.166,
220
+ 71.074,
221
+ 71.258
222
+ ],
223
+ "512": [
224
+ 67.774,
225
+ 69.268,
226
+ 69.844,
227
+ 70.366,
228
+ 70.82,
229
+ 71.088,
230
+ 71.292,
231
+ 71.628,
232
+ 71.684,
233
+ 71.69
234
+ ]
235
+ },
236
+ "lr": [
237
+ 0.0009755527298894294,
238
+ 0.0009046039886902864,
239
+ 0.0007940987335200904,
240
+ 0.0006548539886902864,
241
+ 0.0005005000000000001,
242
+ 0.0003461460113097139,
243
+ 0.00020690126647990973,
244
+ 9.639601130971382e-05,
245
+ 2.5447270110570814e-05,
246
+ 0.001
247
+ ]
248
+ }
249
+ },
250
+ "train_config": {
251
+ "name": "david_training",
252
+ "run_id": "20251012_141246",
253
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
254
+ "model_variant": "clip_vit_laion_b32",
255
+ "num_classes": 1000,
256
+ "preset": "small_fast",
257
+ "custom_config_path": null,
258
+ "num_classes_override": null,
259
+ "use_belly_override": null,
260
+ "belly_expand_override": null,
261
+ "progressive_training_override": false,
262
+ "scale_warmup_epochs_override": null,
263
+ "num_epochs": 10,
264
+ "batch_size": 1024,
265
+ "learning_rate": 0.001,
266
+ "weight_decay": 1e-05,
267
+ "warmup_epochs": 3,
268
+ "use_rose_loss": true,
269
+ "rose_initial_weight": 0.1,
270
+ "rose_max_weight": 0.5,
271
+ "rose_weight_schedule": "adaptive",
272
+ "use_cayley_loss": false,
273
+ "cayley_weight": 0.001,
274
+ "scale_loss_balance": null,
275
+ "use_mixed_precision": true,
276
+ "gradient_clip": 10.0,
277
+ "scheduler_type": "cosine_restarts",
278
+ "min_lr": 1e-06,
279
+ "freeze_strategy": "never",
280
+ "freeze_threshold": 90.0,
281
+ "unfreeze_on_plateau": true,
282
+ "patience": 10,
283
+ "track_gradients": true,
284
+ "gradient_scale_threshold": 1e-05,
285
+ "gradient_scale_multiplier": 10.0,
286
+ "log_interval": 50,
287
+ "val_interval": 1,
288
+ "save_interval": 5,
289
+ "log_fusion_weights": true,
290
+ "log_loss_components": true,
291
+ "save_format": "safetensors",
292
+ "hf_repo": "AbstractPhil/gated-david",
293
+ "upload_to_hub": true,
294
+ "base_dir": "./david_training",
295
+ "num_workers": 10,
296
+ "pin_memory": true,
297
+ "prefetch_factor": 4,
298
+ "persistent_workers": true
299
+ }
300
+ }