AbstractPhil commited on
Commit
501f962
·
verified ·
1 Parent(s): 11ba1eb

Update best_model_acc76.86_metadata.json - Run 20251012_145649

Browse files
weights/David-hierarchical-progressive/20251012_145649/best_model_acc76.86_metadata.json ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(11268.)",
7
+ "exp_avg": "tensor([[-1.1685e-04, -2.3069e-04, 4.5324e-05, ..., 1.2603e-04,\n -1.5863e-04, 2.0243e-04],\n [-2.3048e-05, -3.2424e-05, 1.0282e-04, ..., -9.5718e-06,\n 2.1657e-04, -2.0288e-06],\n [-4.2647e-05, 1.0743e-03, 5.4178e-04, ..., -6.9786e-04,\n 9.1124e-05, -3.0852e-04],\n ...,\n [ 2.3188e-04, -1.2546e-04, -7.0373e-04, ..., 2.7944e-04,\n 1.2716e-04, 4.3343e-05],\n [-6.4416e-04, -2.5853e-03, -1.9541e-03, ..., -7.1879e-04,\n -7.4480e-04, -6.1803e-04],\n [ 2.6068e-05, -6.6333e-04, -2.0083e-04, ..., -1.2870e-04,\n -2.4984e-05, -1.0416e-04]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[7.7838e-07, 3.7546e-06, 1.7433e-06, ..., 9.3452e-07, 2.9582e-07,\n 1.1904e-06],\n [4.7161e-07, 4.5499e-06, 1.5017e-06, ..., 4.8986e-07, 2.8911e-07,\n 3.1873e-07],\n [7.7352e-07, 4.3406e-06, 2.6566e-06, ..., 5.6082e-07, 3.1232e-07,\n 7.9450e-07],\n ...,\n [7.5850e-07, 3.7649e-06, 2.5818e-06, ..., 6.5529e-07, 3.4038e-07,\n 6.6514e-07],\n [9.3902e-07, 8.3628e-06, 3.8473e-06, ..., 8.5546e-07, 5.2545e-07,\n 5.1933e-07],\n [6.0450e-07, 5.3777e-06, 2.6713e-06, ..., 3.8742e-07, 3.0988e-07,\n 3.9683e-07]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(11268.)",
12
+ "exp_avg": "tensor([ 4.1364e-03, 2.8036e-03, -9.4203e-03, 1.5231e-03, -5.2922e-03,\n 1.2152e-03, 5.1824e-03, 1.5238e-02, 6.3883e-03, 3.5643e-03,\n 5.0932e-03, 6.5141e-03, 1.0051e-02, 1.5875e-03, 3.2538e-03,\n 6.0873e-03, 7.4947e-03, 2.8245e-03, 2.8867e-03, -1.9659e-04,\n 2.8143e-03, -1.2023e-02, -1.8398e-02, 3.9656e-03, 9.5128e-03,\n -1.1704e-03, -1.5774e-02, -2.3675e-03, -2.7860e-03, -2.3242e-02,\n -5.7804e-03, 4.9137e-02, 1.9274e-02, 1.4887e-02, 3.0910e-04,\n -6.2885e-03, 6.7523e-03, 7.3748e-03, -1.3726e-03, 7.6143e-03,\n 5.2154e-03, 4.8698e-03, -2.3204e-03, -4.6776e-03, 2.3663e-03,\n -8.6751e-04, 7.5453e-03, 3.2881e-02, 7.8844e-03, -6.6477e-03,\n 9.2704e-03, -3.6995e-03, -3.8514e-03, -1.9090e-03, -6.7332e-03,\n -5.7924e-03, 4.4060e-04, -3.6406e-03, 3.1443e-03, 6.1026e-03,\n -4.9657e-03, -9.1092e-04, -1.8573e-02, -1.5623e-02, -2.3465e-03,\n 3.9067e-03, -9.5050e-03, 5.6054e-03, -1.3761e-03, -8.5125e-04,\n -6.9580e-03, -6.2318e-04, -6.4729e-03, -6.3770e-03, 2.9867e-03,\n -9.9869e-03, -6.5894e-03, -1.3645e-02, -4.1637e-04, -2.2616e-03,\n -9.8936e-03, -2.8302e-03, -5.0281e-03, 5.6383e-03, -7.9990e-03,\n -1.2430e-02, 4.8003e-03, 5.8021e-03, 5.6016e-03, 1.3685e-02,\n -8.2637e-04, -9.7777e-03, 2.3765e-03, -4.6388e-03, 2.0188e-03,\n -1.9676e-02, 1.1052e-02, 2.5352e-03, 1.2262e-02, 1.3188e-02,\n 6.6848e-03, 1.7180e-03, 1.4019e-03, -1.8566e-02, 2.8026e-03,\n 5.5085e-03, -1.7398e-03, 1.4767e-03, -6.0877e-03, -6.4535e-03,\n 5.1941e-03, 5.6465e-03, -1.6060e-04, 1.8758e-03, 1.1462e-03,\n -6.6983e-03, 4.1497e-03, -5.1648e-03, -1.0620e-02, -1.5086e-02,\n 5.1525e-03, -2.1600e-03, -1.2500e-02, 5.1317e-02, -1.0015e-02,\n 2.1209e-03, 3.1948e-03, -7.8129e-03, -2.9637e-03, -1.1631e-02,\n -1.8068e-02, -1.6510e-03, 2.5355e-02, -4.3960e-03, 1.1019e-05,\n -1.5948e-03, 2.2518e-02, -3.2556e-02, 5.2544e-03, -8.3215e-03,\n -2.1162e-03, 5.6176e-03, 1.1277e-02, 7.5551e-03, 1.0876e-03,\n -2.5473e-03, -6.4001e-03, -2.1926e-03, -6.4848e-03, -1.0838e-02,\n 1.5099e-03, 5.1866e-03, 6.1710e-03, -1.1773e-02, 3.6575e-04,\n 8.8548e-03, 8.9397e-03, 6.0125e-03, 2.0352e-03, 4.0846e-03,\n 5.5213e-04, -1.5899e-02, 4.8822e-04, 9.7190e-03, 1.2320e-02,\n -3.5152e-03, 4.1294e-03, 1.6484e-03, 5.2272e-03, -3.4636e-03,\n -1.9938e-03, 6.3080e-03, -7.8859e-04, -5.0946e-03, -4.7656e-03,\n -2.6497e-03, -1.8249e-03, 6.3370e-03, -1.6462e-02, 2.9639e-03,\n 6.3680e-03, 9.8619e-03, -8.2283e-03, 1.0116e-02, -1.7178e-02,\n -7.5244e-03, 8.7004e-03, -1.0514e-02, -9.3466e-03, -3.5568e-05,\n -6.7994e-03, -4.1739e-03, -8.9216e-03, 8.5050e-03, 5.0953e-04,\n -3.3791e-02, 5.2541e-03, -2.8191e-03, -3.4618e-03, 1.5097e-03,\n 7.5875e-03, 1.2367e-02, -1.5532e-03, -3.6307e-03, -1.0802e-02,\n -3.3814e-03, 2.1920e-02, -2.1401e-03, -4.0640e-03, 1.0281e-02,\n -4.0721e-03, 9.1303e-03, 6.0195e-03, -1.8835e-03, -2.7825e-03,\n 1.6806e-02, -3.6599e-03, 3.5774e-03, 1.0604e-02, -9.2866e-03,\n -3.0365e-03, -1.4241e-02, 1.3818e-02, -7.0783e-03, 4.2689e-03,\n -9.7542e-03, 7.7638e-04, -1.8233e-02, 1.3217e-02, 3.0171e-06,\n -4.4541e-04, -1.4679e-03, -6.9183e-04, -9.5381e-03, 1.4148e-03,\n 9.5721e-04, 5.0729e-03, 2.2235e-03, -2.3296e-03, 2.7614e-03,\n -4.2252e-04, -6.7427e-03, 3.2510e-03, -2.3510e-02, 2.9690e-03,\n -1.3953e-02, 1.1056e-03, 1.7319e-02, 1.0271e-02, 1.3966e-03,\n 3.3611e-03, 4.5024e-03, 1.5438e-02, -1.4151e-03, 1.5154e-02,\n 4.6808e-03, 8.8505e-03, 1.5637e-03, 8.8826e-04, -2.2813e-03,\n 3.1160e-03, 3.4559e-03, 3.9943e-03, -1.7325e-04, -7.9351e-03,\n -7.9499e-03, 2.1583e-02, -2.9617e-02, 2.6663e-03, -1.9385e-02,\n -6.2172e-03, -3.9279e-03, 7.8075e-03, -9.2989e-03, 2.4636e-02,\n 1.4007e-04, -1.1264e-02, 5.7918e-03, 3.5283e-03, 8.3409e-03,\n 3.8179e-03, -9.1356e-03, -6.2707e-03, -6.3993e-03, 3.6270e-03,\n -1.2253e-02, 1.3598e-02, -4.1658e-03, 9.0207e-04, 5.5752e-03,\n 5.6998e-03, 4.0438e-03, 9.7331e-03, 1.6812e-03, -1.5651e-02,\n 3.3011e-03, 3.3539e-03, 2.4141e-03, -8.6125e-04, 2.6393e-03,\n 4.7768e-03, 2.3589e-04, -1.9466e-02, -4.0362e-04, 2.5150e-03,\n -1.9464e-02, 2.4215e-02, -3.4188e-02, -2.1177e-04, -5.7024e-03,\n -1.9905e-03, -1.3386e-03, 2.0530e-03, -2.6258e-03, -4.0206e-02,\n -3.3523e-02, 1.2541e-02, -7.6734e-03, 3.1678e-03, 1.6228e-02,\n 3.0614e-03, 1.9318e-03, -3.5825e-03, 1.5005e-02, 1.3444e-02,\n -1.2516e-03, -6.2624e-05, 2.1621e-03, 1.2800e-02, -7.0336e-03,\n 2.0445e-03, 2.0985e-02, 5.6474e-03, -1.4783e-04, -3.8626e-04,\n 3.0232e-03, -2.2780e-05, -4.7585e-03, -4.0016e-04, 1.5453e-03,\n -5.8470e-04, 1.4028e-03, 7.1921e-03, -5.4611e-03, -5.0102e-03,\n -9.3383e-04, 9.4885e-03, -3.4038e-03, -7.9554e-03, 1.0037e-02,\n 3.3312e-03, -7.3519e-03, 5.0805e-03, 2.6105e-03, 3.6685e-03,\n 1.1407e-02, -9.3094e-03, 1.1988e-02, -8.7309e-03, -1.1294e-02,\n -1.1097e-02, -7.4746e-03, 3.0398e-04, -7.1617e-03, -3.2523e-02,\n -7.2682e-03, -3.3911e-04, -6.3377e-04, -5.6104e-03, -4.5035e-03,\n 7.8195e-05, -2.1109e-03, -9.6675e-03, 6.3618e-03, -3.6465e-03,\n -1.7168e-04, -7.1854e-03, 1.6412e-03, -1.9722e-02, 5.2361e-04,\n 7.8811e-02, 1.5404e-02, 4.0893e-03, 3.6069e-03, -2.3669e-03,\n 9.7648e-03, -8.5945e-03, -2.3564e-02, 3.3461e-03, 3.5866e-03,\n -7.1610e-03, -1.2985e-03, 4.9977e-03, 1.3620e-03, -2.6651e-02,\n -4.2360e-04, -1.3301e-02, -1.3010e-02, -4.4045e-03, -1.1769e-02,\n 1.2803e-05, 7.8561e-03, -9.2411e-03, -2.4750e-03, -3.0044e-02,\n 6.7805e-03, -3.7439e-03, -1.0615e-02, -6.3550e-03, -1.0117e-02,\n -1.9079e-02, -1.2339e-02, -5.2599e-04, -1.0749e-02, 6.8386e-03,\n -1.3470e-03, -5.0233e-03, -9.7264e-03, -1.0827e-02, 1.7839e-02,\n 7.9854e-03, 1.5069e-03, 8.8365e-03, 1.1377e-03, 1.7057e-02,\n 1.1555e-03, 5.6400e-05, -3.6280e-03, 1.1473e-02, 5.9707e-03,\n 1.8616e-02, 6.4415e-03, -4.5400e-03, -5.1533e-03, -1.9880e-03,\n 3.0032e-03, 1.5430e-02, 2.8405e-03, 4.3252e-03, 4.8663e-03,\n -1.6893e-03, 1.0844e-02, 2.3014e-02, 1.4174e-03, -1.8245e-02,\n 5.3498e-03, -7.0509e-03, 3.9498e-03, 3.7049e-03, 5.9945e-03,\n 3.2490e-03, 3.4216e-03, -3.1562e-03, 8.6309e-03, -1.5280e-02,\n -7.5804e-04, 2.3397e-03, 1.2579e-03, -4.2271e-03, 5.2033e-02,\n 2.3210e-03, 3.5760e-03, 2.5186e-03, 3.6379e-03, 1.6315e-02,\n 4.3294e-03, 4.2600e-04, 5.1046e-03, 5.3001e-03, -4.7600e-03,\n 5.5077e-03, -6.6134e-03, 1.0780e-02, -1.0336e-02, 8.3947e-03,\n -4.1472e-03, -1.6524e-02, -9.4112e-03, -4.3173e-03, -1.5720e-03,\n 6.4621e-03, 1.0422e-03, 5.3547e-04, 1.6364e-03, -3.8095e-02,\n 1.2536e-02, 6.5058e-03, -6.0915e-03, -9.2915e-03, 7.1662e-04,\n 2.3246e-03, 3.8525e-03, 7.9266e-03, -5.1085e-04, -8.4657e-03,\n -1.9401e-02, 8.9231e-05, -1.7087e-02, -3.6102e-03, 1.0281e-02,\n 7.7888e-03, -4.2051e-03, -1.5990e-02, -6.0856e-04, 4.3464e-04,\n 7.6156e-03, 2.7250e-04, 1.5257e-03, 1.4407e-02, 6.4820e-03,\n -1.3228e-02, 5.9666e-03], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([0.0006, 0.0004, 0.0006, 0.0008, 0.0006, 0.0008, 0.0005, 0.0006, 0.0008,\n 0.0008, 0.0007, 0.0006, 0.0007, 0.0006, 0.0005, 0.0005, 0.0007, 0.0008,\n 0.0007, 0.0007, 0.0005, 0.0004, 0.0008, 0.0006, 0.0005, 0.0004, 0.0009,\n 0.0008, 0.0007, 0.0007, 0.0006, 0.0010, 0.0005, 0.0005, 0.0007, 0.0007,\n 0.0004, 0.0006, 0.0007, 0.0007, 0.0005, 0.0006, 0.0006, 0.0007, 0.0006,\n 0.0009, 0.0007, 0.0009, 0.0006, 0.0006, 0.0008, 0.0005, 0.0006, 0.0007,\n 0.0008, 0.0008, 0.0008, 0.0008, 0.0006, 0.0007, 0.0005, 0.0006, 0.0005,\n 0.0006, 0.0006, 0.0007, 0.0006, 0.0005, 0.0009, 0.0006, 0.0006, 0.0006,\n 0.0005, 0.0005, 0.0006, 0.0007, 0.0006, 0.0007, 0.0005, 0.0005, 0.0006,\n 0.0006, 0.0005, 0.0005, 0.0007, 0.0009, 0.0004, 0.0005, 0.0009, 0.0007,\n 0.0006, 0.0006, 0.0007, 0.0006, 0.0006, 0.0006, 0.0005, 0.0007, 0.0007,\n 0.0006, 0.0016, 0.0005, 0.0009, 0.0009, 0.0004, 0.0006, 0.0007, 0.0006,\n 0.0010, 0.0006, 0.0005, 0.0008, 0.0009, 0.0005, 0.0009, 0.0010, 0.0007,\n 0.0003, 0.0006, 0.0007, 0.0007, 0.0007, 0.0009, 0.0008, 0.0007, 0.0006,\n 0.0008, 0.0004, 0.0007, 0.0005, 0.0007, 0.0006, 0.0008, 0.0007, 0.0007,\n 0.0009, 0.0006, 0.0008, 0.0007, 0.0007, 0.0006, 0.0005, 0.0006, 0.0006,\n 0.0005, 0.0004, 0.0008, 0.0006, 0.0007, 0.0006, 0.0006, 0.0005, 0.0007,\n 0.0006, 0.0005, 0.0007, 0.0005, 0.0008, 0.0007, 0.0007, 0.0007, 0.0007,\n 0.0005, 0.0009, 0.0006, 0.0006, 0.0006, 0.0008, 0.0009, 0.0009, 0.0007,\n 0.0006, 0.0007, 0.0004, 0.0008, 0.0008, 0.0007, 0.0006, 0.0005, 0.0005,\n 0.0004, 0.0006, 0.0006, 0.0005, 0.0007, 0.0008, 0.0006, 0.0007, 0.0007,\n 0.0005, 0.0005, 0.0008, 0.0006, 0.0006, 0.0006, 0.0007, 0.0007, 0.0006,\n 0.0006, 0.0004, 0.0006, 0.0009, 0.0006, 0.0004, 0.0006, 0.0006, 0.0005,\n 0.0005, 0.0007, 0.0006, 0.0006, 0.0007, 0.0004, 0.0005, 0.0004, 0.0005,\n 0.0004, 0.0006, 0.0006, 0.0007, 0.0006, 0.0008, 0.0009, 0.0007, 0.0007,\n 0.0006, 0.0006, 0.0007, 0.0009, 0.0006, 0.0004, 0.0005, 0.0007, 0.0005,\n 0.0005, 0.0009, 0.0011, 0.0007, 0.0006, 0.0007, 0.0007, 0.0007, 0.0007,\n 0.0006, 0.0007, 0.0006, 0.0007, 0.0008, 0.0005, 0.0007, 0.0006, 0.0018,\n 0.0008, 0.0005, 0.0007, 0.0007, 0.0004, 0.0005, 0.0008, 0.0010, 0.0005,\n 0.0006, 0.0006, 0.0004, 0.0005, 0.0005, 0.0005, 0.0009, 0.0008, 0.0007,\n 0.0006, 0.0005, 0.0007, 0.0006, 0.0007, 0.0006, 0.0009, 0.0004, 0.0005,\n 0.0005, 0.0010, 0.0007, 0.0006, 0.0005, 0.0006, 0.0007, 0.0007, 0.0008,\n 0.0009, 0.0007, 0.0006, 0.0006, 0.0007, 0.0006, 0.0008, 0.0007, 0.0005,\n 0.0008, 0.0006, 0.0007, 0.0009, 0.0005, 0.0009, 0.0005, 0.0008, 0.0006,\n 0.0009, 0.0007, 0.0003, 0.0008, 0.0008, 0.0008, 0.0006, 0.0008, 0.0006,\n 0.0007, 0.0008, 0.0004, 0.0006, 0.0006, 0.0006, 0.0006, 0.0007, 0.0004,\n 0.0007, 0.0007, 0.0006, 0.0005, 0.0007, 0.0006, 0.0007, 0.0007, 0.0009,\n 0.0007, 0.0007, 0.0009, 0.0010, 0.0009, 0.0008, 0.0007, 0.0007, 0.0006,\n 0.0008, 0.0005, 0.0006, 0.0006, 0.0007, 0.0006, 0.0006, 0.0008, 0.0008,\n 0.0007, 0.0006, 0.0005, 0.0005, 0.0008, 0.0005, 0.0007, 0.0009, 0.0004,\n 0.0009, 0.0008, 0.0006, 0.0005, 0.0008, 0.0008, 0.0008, 0.0004, 0.0005,\n 0.0005, 0.0009, 0.0009, 0.0009, 0.0006, 0.0005, 0.0004, 0.0008, 0.0005,\n 0.0007, 0.0008, 0.0016, 0.0007, 0.0007, 0.0007, 0.0008, 0.0007, 0.0005,\n 0.0010, 0.0006, 0.0006, 0.0008, 0.0006, 0.0006, 0.0008, 0.0007, 0.0008,\n 0.0006, 0.0004, 0.0009, 0.0009, 0.0008, 0.0006, 0.0009, 0.0005, 0.0009,\n 0.0005, 0.0007, 0.0009, 0.0007, 0.0005, 0.0004, 0.0006, 0.0007, 0.0009,\n 0.0006, 0.0006, 0.0007, 0.0005, 0.0006, 0.0008, 0.0005, 0.0007, 0.0007,\n 0.0008, 0.0004, 0.0005, 0.0006, 0.0007, 0.0006, 0.0009, 0.0007, 0.0005,\n 0.0008, 0.0004, 0.0004, 0.0007, 0.0007, 0.0007, 0.0006, 0.0005, 0.0009,\n 0.0009, 0.0010, 0.0005, 0.0006, 0.0005, 0.0007, 0.0004, 0.0007, 0.0004,\n 0.0006, 0.0006, 0.0004, 0.0006, 0.0007, 0.0005, 0.0005, 0.0006, 0.0010,\n 0.0007, 0.0005, 0.0004, 0.0008, 0.0006, 0.0010, 0.0005, 0.0007, 0.0007,\n 0.0006, 0.0008, 0.0007, 0.0006, 0.0014, 0.0006, 0.0007, 0.0004, 0.0006,\n 0.0009, 0.0003, 0.0006, 0.0004, 0.0007, 0.0007, 0.0006, 0.0008, 0.0007,\n 0.0006, 0.0006, 0.0005, 0.0006, 0.0006, 0.0007, 0.0005, 0.0004, 0.0006,\n 0.0008, 0.0006, 0.0006, 0.0006, 0.0008, 0.0006, 0.0007, 0.0007, 0.0007,\n 0.0006, 0.0006, 0.0008, 0.0007, 0.0006, 0.0006, 0.0008, 0.0006],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(11268.)",
17
+ "exp_avg": "tensor([[ 1.4461e-04, 5.8922e-06, 8.6287e-05, ..., -4.1095e-04,\n 3.0882e-04, 2.1959e-04],\n [-2.1989e-05, -1.2813e-04, -5.6369e-05, ..., -4.3828e-04,\n -7.0481e-05, 9.8500e-05],\n [ 9.1396e-06, 7.4801e-05, 4.8994e-05, ..., -3.6861e-04,\n -5.5868e-04, -1.1809e-04],\n ...,\n [ 3.1455e-04, -1.9038e-04, 1.4647e-04, ..., -3.0098e-04,\n 5.3132e-05, -1.2049e-04],\n [ 3.1231e-04, 4.7615e-05, 2.1041e-04, ..., -1.6836e-04,\n -1.8784e-04, 1.6200e-05],\n [ 1.2367e-04, 6.0291e-05, -1.0295e-04, ..., 4.4392e-05,\n -8.5706e-04, 1.6179e-05]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[2.8448e-07, 4.1767e-07, 2.0037e-07, ..., 5.4468e-07, 1.4398e-06,\n 8.1644e-07],\n [2.7267e-07, 3.6098e-07, 3.0731e-07, ..., 4.9010e-07, 2.9930e-06,\n 4.1595e-07],\n [2.9431e-07, 4.1193e-07, 2.9433e-07, ..., 4.3835e-07, 1.2463e-06,\n 4.5530e-07],\n ...,\n [3.0323e-07, 3.9045e-07, 2.1987e-07, ..., 4.7278e-07, 1.1290e-06,\n 4.2543e-07],\n [3.7625e-07, 3.8610e-07, 2.2957e-07, ..., 5.8276e-07, 1.8076e-06,\n 6.5429e-07],\n [3.3706e-07, 5.2069e-07, 2.6706e-07, ..., 4.9365e-07, 1.2029e-06,\n 4.7723e-07]], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(11268.)",
22
+ "exp_avg": "tensor([[ 1.4493e-04, -4.5064e-05, 2.8791e-04, ..., -2.9881e-04,\n 3.8029e-05, -4.0278e-05],\n [-8.6815e-06, 4.4611e-06, -1.0120e-04, ..., 3.2682e-05,\n 1.2858e-04, -9.1267e-05],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 7.5058e-05, -7.1155e-05, 1.5281e-04, ..., 3.9989e-05,\n -3.3204e-05, 6.3434e-05],\n [ 1.3890e-04, 5.3086e-05, -1.3966e-05, ..., -9.5481e-06,\n -2.5225e-05, 1.5595e-05],\n [ 6.9145e-05, 2.2986e-06, 9.7444e-04, ..., -3.2240e-05,\n -5.0486e-05, 1.9500e-05]], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([[5.8808e-07, 1.9347e-07, 6.2380e-07, ..., 2.1621e-06, 1.9692e-07,\n 1.4829e-06],\n [4.2872e-07, 3.0837e-07, 5.0626e-07, ..., 2.5355e-07, 5.0108e-07,\n 1.4686e-07],\n [1.0925e-14, 2.5781e-14, 4.8328e-15, ..., 3.2438e-15, 3.1809e-14,\n 4.4238e-16],\n ...,\n [4.1502e-07, 1.9271e-07, 9.4519e-08, ..., 1.8710e-07, 1.4597e-07,\n 4.1802e-07],\n [3.8234e-07, 2.1633e-07, 1.5505e-07, ..., 2.1013e-07, 1.4037e-06,\n 2.0529e-07],\n [1.1874e-06, 1.0973e-07, 1.2272e-06, ..., 1.4825e-07, 1.4089e-07,\n 3.9051e-07]], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(11268.)",
27
+ "exp_avg": "tensor([-1.2687e-03, -6.3784e-03, 5.6052e-45, ..., 4.7665e-03,\n 1.0887e-03, 9.2544e-03], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([1.9958e-04, 2.0819e-04, 7.1570e-11, ..., 2.0092e-04, 1.5116e-04,\n 2.3151e-04], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(11268.)",
32
+ "exp_avg": "tensor([[ 5.2945e-05, 3.7541e-05, 5.6052e-45, ..., -7.6235e-05,\n -8.1562e-06, 6.1309e-05],\n [-7.8236e-05, 7.4647e-05, 5.6052e-45, ..., 2.0452e-06,\n -4.8421e-05, -2.9519e-05],\n [-4.4599e-05, -5.0931e-05, 5.6052e-45, ..., -6.6861e-06,\n 7.7820e-05, -3.3012e-05],\n ...,\n [ 3.7831e-05, -4.8087e-05, 5.6052e-45, ..., -2.6125e-05,\n 5.1937e-05, 6.0176e-05],\n [ 2.2033e-05, -5.8039e-06, -5.6052e-45, ..., -7.2977e-05,\n -9.7494e-06, -1.2131e-04],\n [-1.3946e-04, -1.5479e-05, -5.6052e-45, ..., 3.6275e-05,\n -5.6677e-05, -7.8871e-05]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[7.0174e-08, 5.7951e-08, 6.2440e-15, ..., 6.5441e-08, 7.0005e-08,\n 9.4633e-08],\n [7.3546e-08, 7.6848e-08, 1.1581e-14, ..., 8.3659e-08, 7.6802e-08,\n 1.2848e-07],\n [1.2314e-07, 8.2620e-08, 2.0167e-14, ..., 1.9243e-07, 6.8471e-08,\n 1.0034e-07],\n ...,\n [1.0059e-07, 8.3188e-08, 8.9259e-15, ..., 6.3773e-08, 6.7138e-08,\n 1.0799e-07],\n [7.7532e-08, 9.5208e-08, 1.2048e-14, ..., 1.0944e-07, 5.1211e-08,\n 8.7159e-08],\n [1.1030e-07, 7.1395e-08, 2.5410e-14, ..., 7.2822e-08, 5.9976e-08,\n 1.1503e-07]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(11268.)",
37
+ "exp_avg": "tensor([[ 3.5001e-05, 1.7822e-04, -5.9103e-05, ..., 1.7095e-05,\n 4.0542e-05, -1.5836e-04],\n [-1.0040e-04, -3.3251e-04, 2.1872e-04, ..., -4.6738e-05,\n 1.8792e-05, 2.2905e-04],\n [ 6.9500e-05, -3.7838e-04, 2.4310e-04, ..., 1.8180e-04,\n 1.3547e-04, -1.5315e-04],\n ...,\n [ 3.7128e-05, 2.8219e-04, -3.5330e-04, ..., -5.4278e-05,\n 8.7148e-05, 7.9686e-05],\n [-1.9591e-04, 3.1997e-04, 2.2430e-04, ..., 2.1021e-04,\n -2.5169e-04, 5.2411e-05],\n [-7.5841e-05, 4.9654e-04, 4.8611e-04, ..., -1.2180e-04,\n -2.7089e-04, -7.7828e-05]], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([[4.1246e-07, 8.9637e-07, 8.5357e-07, ..., 2.2403e-07, 1.6145e-07,\n 2.7297e-07],\n [4.1604e-07, 5.8915e-07, 1.0356e-06, ..., 1.8278e-07, 1.2903e-07,\n 2.8684e-07],\n [4.8465e-07, 8.4886e-07, 1.0642e-06, ..., 2.2491e-07, 1.8279e-07,\n 2.3127e-07],\n ...,\n [6.0516e-07, 9.9025e-07, 1.1142e-06, ..., 2.7677e-07, 1.8933e-07,\n 3.2706e-07],\n [4.9895e-07, 7.8077e-07, 7.0788e-07, ..., 1.8902e-07, 1.3465e-07,\n 1.7746e-07],\n [6.1349e-07, 1.2089e-06, 1.3608e-06, ..., 3.0307e-07, 2.4729e-07,\n 2.8169e-07]], device='cuda:0')"
39
+ },
40
+ "7": {
41
+ "step": "tensor(11268.)",
42
+ "exp_avg": "tensor([-2.8626e-03, 4.2887e-03, -7.1115e-03, -5.5399e-04, -1.6195e-04,\n -8.9988e-03, -2.3608e-03, -2.3189e-03, -2.5529e-03, 3.3662e-03,\n 1.2811e-03, -7.2326e-04, -1.7788e-03, -2.9055e-03, 7.0869e-03,\n -3.0375e-03, 1.0402e-02, -2.0283e-02, -2.6606e-03, 1.3185e-02,\n 9.7780e-03, 2.5158e-03, 4.2440e-03, -1.0192e-02, 2.2362e-03,\n 7.2798e-03, 8.0636e-03, 3.0448e-04, -6.9712e-03, 4.8059e-03,\n 9.1935e-04, -1.0211e-02, 5.5154e-03, 6.4450e-03, 3.0396e-03,\n -7.8074e-03, -3.6341e-03, 4.2218e-03, -3.0192e-04, 3.2170e-03,\n -1.8014e-03, 3.4845e-03, 8.0194e-03, -2.8213e-03, -6.7223e-04,\n 6.7199e-03, 2.9578e-03, -2.8908e-03, -5.4612e-03, 4.5918e-03,\n 1.3496e-03, 5.4750e-03, 1.0764e-04, 1.5614e-03, -6.3434e-03,\n 1.1546e-03, 3.1378e-03, 1.7676e-02, -3.0708e-04, -9.5971e-03,\n -6.7763e-03, 8.6770e-04, -7.3301e-03, 2.2432e-03, 1.8586e-05,\n -1.2944e-03, 8.1958e-03, -2.6839e-03, -3.4847e-03, 6.3340e-03,\n -7.8391e-03, -2.2081e-03, -1.2914e-02, -2.2318e-03, 3.7860e-03,\n 1.3807e-04, 9.0395e-04, 5.1350e-04, 1.0215e-02, -5.2659e-03,\n 1.6883e-03, -5.4989e-03, -2.1785e-03, 7.2376e-03, 1.2463e-02,\n -1.1679e-03, -8.8628e-03, -2.5592e-03, 3.4827e-03, -2.6498e-03,\n 2.5798e-03, -9.4972e-04, -2.7486e-03, 9.3637e-03, -2.1633e-03,\n -1.8448e-03, 1.3059e-03, -6.1259e-03, 3.2016e-03, 3.0311e-03,\n -1.4796e-03, -2.4563e-03, -3.7220e-03, 6.7485e-04, -4.9846e-03,\n -9.3463e-03, -8.9852e-03, 3.8179e-03, -1.0523e-02, -5.6999e-04,\n -8.3585e-03, 3.6076e-03, -4.1932e-04, 5.4689e-03, -1.1580e-03,\n 8.6820e-04, 8.7179e-03, 6.4600e-04, 8.3655e-03, 6.5264e-03,\n -6.9516e-04, -3.6393e-03, -1.9533e-03, 4.0427e-03, -7.7638e-03,\n -8.7861e-03, 4.1686e-03, -4.9423e-03, 3.0589e-03, -1.8493e-04,\n -1.0630e-03, -1.3999e-03, 1.4929e-04, 2.0753e-03, 2.2034e-03,\n 2.4634e-03, 2.7191e-04, -1.6230e-02, 2.6063e-03, -2.9658e-03,\n -2.9065e-03, -1.0887e-02, 2.2412e-02, -7.2590e-04, 5.4358e-03,\n 1.3109e-03, -2.9899e-03, -9.3299e-03, 9.1125e-03, -9.4361e-04,\n -7.0342e-04, 2.6778e-03, -1.6241e-02, -1.5078e-03, 8.0889e-03,\n -1.2412e-03, 5.7857e-03, 1.3068e-03, -6.2176e-03, -5.2615e-03,\n -3.0261e-04, 8.0319e-03, 2.4618e-03, -2.1664e-02, -1.3465e-03,\n -4.8608e-03, -8.4136e-03, -2.9955e-03, 1.6651e-03, -2.9116e-03,\n -4.8485e-04, -9.5121e-03, -6.8080e-03, -7.3199e-03, -1.4951e-03,\n 7.1787e-03, -3.0809e-03, -6.1113e-03, 5.4711e-04, 9.8089e-03,\n -8.6048e-03, 4.3872e-03, 1.3664e-03, 7.6411e-03, -2.3687e-03,\n 1.3346e-03, 5.2714e-03, 2.5839e-04, 2.5201e-03, -2.9883e-03,\n -2.5346e-03, -1.2910e-03, -1.2185e-03, 4.1390e-03, 1.9529e-03,\n -3.7292e-03, -7.0787e-04, 9.4130e-05, -6.8702e-03, -2.7437e-04,\n -5.0103e-03, 1.7699e-02, 3.0436e-03, -1.0293e-02, 1.7889e-03,\n 5.1549e-03, -5.7797e-03, 3.8427e-03, -2.2036e-03, -6.3546e-03,\n -7.3848e-05, 1.2204e-03, 3.7007e-03, -6.6175e-03, 2.0832e-03,\n 2.2489e-03, 4.5905e-03, 1.5432e-02, 1.5373e-03, 2.7539e-03,\n -2.7393e-03, -8.1036e-04, 6.5010e-03, -1.0456e-03, 4.7627e-03,\n -3.1818e-03, 5.7914e-03, 2.5188e-03, 1.6307e-02, 3.8147e-03,\n -5.9300e-03, 8.9938e-03, -4.8164e-03, 2.5701e-03, -3.3284e-03,\n -4.3932e-03, -4.0734e-03, 3.3015e-03, -1.2881e-03, 2.0812e-03,\n 9.1042e-03, 7.1003e-03, -3.4806e-03, -2.1532e-03, -3.3623e-03,\n -1.2974e-03, -1.2734e-04, -3.4311e-03, -2.9343e-03, 1.0149e-02,\n 5.7426e-03, 5.1537e-03, 4.7976e-03, 1.5256e-03, -3.4395e-04,\n -3.6141e-03, 5.1192e-03, -6.6022e-03, -7.6473e-03, -2.2726e-03,\n 4.7706e-03, 6.2029e-03, -1.8241e-02, 1.0282e-02, 8.3914e-03,\n 9.6000e-03, -7.5749e-03, -9.4951e-03, 1.9184e-03, 6.5756e-03,\n -5.1019e-03, -7.2113e-03, 5.4169e-03, -3.5489e-03, 3.6864e-03,\n -1.2212e-02, 2.1879e-03, -1.0012e-03, 1.1187e-02, 5.3525e-04,\n -2.6867e-03, 3.8446e-03, 5.3504e-03, -2.4368e-03, 1.9272e-03,\n 9.9024e-04, 2.0431e-03, -3.6331e-03, -8.4358e-03, 9.0972e-04,\n 3.2460e-03, 5.0361e-03, -2.3930e-03, 1.8383e-03, -8.4476e-03,\n -7.5504e-03, 7.3885e-03, 6.7424e-03, -4.9359e-03, 2.5688e-03,\n -7.6571e-03, 9.3905e-03, 4.5031e-03, 5.1218e-03, -9.0009e-03,\n 3.6676e-04, 6.3917e-03, -6.8906e-03, -7.3368e-03, 4.6198e-03,\n -1.4499e-03, -7.9494e-04, 1.0073e-03, 1.0296e-02, -1.0094e-02,\n 3.4964e-03, -5.7245e-03, 3.6541e-03, 4.8353e-04, -7.0122e-03,\n 9.2652e-04, -6.3684e-04, -4.7858e-04, 3.6567e-03, -1.2011e-02,\n -5.2637e-03, 2.7226e-03, -1.8028e-03, -6.0267e-03, 1.3020e-03,\n -3.2745e-03, 9.5525e-03, -1.1584e-02, 1.2375e-03, -4.3362e-03,\n -3.7866e-03, 1.3906e-03, -3.6153e-03, -2.3298e-04, -1.0045e-03,\n -5.0704e-03, 6.8880e-03, -3.6286e-03, -2.8452e-03, -1.3810e-02,\n -1.0007e-03, -7.2693e-03, -2.5156e-03, 6.8720e-04, 1.6139e-03,\n -3.4377e-03, -2.2105e-03, -3.8081e-03, -6.0146e-03, -8.9909e-03,\n 1.6778e-04, 7.7555e-03, 1.8111e-03, 1.0631e-02, -8.9038e-04,\n -4.4654e-03, 7.6696e-03, 8.1064e-03, 1.7049e-02, 6.7303e-03,\n -2.9966e-03, -3.3909e-03, -1.9361e-03, 5.8758e-03, 1.2887e-02,\n 1.2723e-03, 7.1627e-04, -1.8168e-02, -2.3371e-03, 2.9481e-03,\n 9.5505e-03, -1.8310e-03, -5.9751e-03, -2.1925e-03, 1.7734e-03,\n 9.4625e-03, -1.6845e-04, -9.1692e-03, -4.9847e-03, 9.8858e-03,\n 1.5427e-02, 2.4374e-03, -1.4788e-02, 1.3924e-03, 3.9667e-03,\n -3.7612e-04, 2.4357e-03, -1.2540e-02, 2.5904e-03, 1.6893e-03,\n -1.9397e-03, -5.3921e-03, -5.1996e-03, 5.9810e-03, -5.8691e-03,\n 1.9643e-03, 1.4722e-03, -2.6817e-04, 8.4779e-03, -1.8594e-03,\n -1.7568e-03, 2.5953e-03, -2.0548e-03, -3.3519e-04, -1.7819e-03,\n 1.1401e-02, 3.2611e-03, 4.3467e-03, 9.1219e-03, 1.9970e-03,\n -1.2798e-03, 3.0445e-03, -1.0800e-02, 2.1613e-03, -1.8163e-03,\n -5.1978e-03, -8.0887e-03, 6.2232e-04, -3.5761e-03, -1.6061e-03,\n 4.6974e-03, 4.0667e-03, -8.4550e-03, -1.1124e-04, -2.5472e-03,\n 5.7024e-03, 3.6210e-03, -4.3020e-03, 5.6581e-03, 4.5905e-03,\n -8.0993e-03, 8.2668e-03, 5.2170e-03, -1.0991e-03, 1.4470e-03,\n -3.5725e-03, 5.5958e-03, 1.1483e-03, 4.5982e-03, 1.6507e-04,\n -4.7229e-03, -1.1338e-02, 5.7506e-03, 5.3028e-03, 9.0936e-03,\n -2.4349e-03, -4.2274e-03, 4.3839e-03, 9.2736e-03, -6.5040e-03,\n -2.4929e-03, 8.7949e-03, -3.2130e-03, -8.2627e-03, -4.6228e-03,\n 5.8370e-03, 4.7023e-03, 5.0534e-03, 6.4438e-03, 1.6110e-04,\n -2.5300e-03, -4.9808e-03, 3.1606e-03, -6.8864e-03, -6.2209e-03,\n 1.6008e-02, 1.0673e-02, -5.6338e-03, -8.4255e-03, -1.0926e-02,\n -5.2321e-03, -7.2893e-04, 5.7968e-03, -1.3510e-02, 4.1856e-03,\n 1.3235e-03, -4.0250e-03, 1.1722e-02, 1.1790e-02, 4.4740e-04,\n 3.7140e-03, 1.4684e-04, -3.9648e-03, 7.7227e-03, -1.0976e-02,\n 3.2860e-03, -8.1139e-03, -1.6657e-03, -7.9335e-03, 1.3762e-03,\n -3.6077e-03, 1.3616e-03, -2.4939e-03, -6.2840e-03, -9.5954e-03,\n 6.2613e-03, -4.6534e-03, -4.6705e-03, -1.8821e-02, 6.8832e-03,\n 7.2442e-04, 6.3711e-03, -2.4542e-03, 4.6049e-03, 1.6123e-06,\n 5.7117e-03, 1.3220e-02], device='cuda:0')",
43
+ "exp_avg_sq": "tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0004, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0004, 0.0003,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0001, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0001, 0.0002, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0001, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0004, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002,\n 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0004,\n 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0001, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0003, 0.0001, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0002, 0.0003, 0.0004, 0.0003, 0.0002, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0001, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0002, 0.0004, 0.0004, 0.0003, 0.0001, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003, 0.0003, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002,\n 0.0002, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0003,\n 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,\n 0.0002, 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003,\n 0.0003, 0.0003, 0.0002, 0.0002, 0.0001, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0002, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0003, 0.0003, 0.0002,\n 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0002,\n 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002,\n 0.0003, 0.0002, 0.0001, 0.0002, 0.0002, 0.0003, 0.0003, 0.0002, 0.0003,\n 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003],\n device='cuda:0')"
44
+ },
45
+ "8": {
46
+ "step": "tensor(11268.)",
47
+ "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-2.1726e-05, -2.0737e-05, 5.4676e-05, ..., -8.9502e-06,\n 5.8811e-06, -2.2299e-05],\n [ 3.4456e-05, -2.3864e-05, -1.7990e-05, ..., -4.5371e-05,\n -5.5688e-04, -4.6960e-05],\n ...,\n [ 7.3045e-05, -1.4396e-05, -7.2382e-05, ..., 1.4152e-05,\n 4.4425e-06, -7.2016e-05],\n [-2.1788e-05, -4.4295e-05, 1.3472e-05, ..., 2.4097e-05,\n 2.0921e-04, 7.7787e-06],\n [ 4.0601e-05, -2.6863e-05, -4.4024e-07, ..., -2.0676e-04,\n 2.2939e-04, -5.8245e-05]], device='cuda:0')",
48
+ "exp_avg_sq": "tensor([[1.3673e-16, 4.2759e-15, 2.4533e-16, ..., 1.8268e-14, 1.8304e-16,\n 6.4647e-17],\n [1.1339e-07, 1.8338e-07, 1.2011e-07, ..., 8.2151e-08, 2.5583e-08,\n 1.0864e-07],\n [5.4698e-08, 1.9423e-07, 7.4262e-08, ..., 6.1816e-08, 1.8420e-06,\n 1.9290e-07],\n ...,\n [6.6442e-07, 3.5122e-08, 1.6364e-07, ..., 2.5239e-07, 4.3154e-08,\n 3.9322e-07],\n [1.0466e-07, 1.1309e-07, 4.2107e-08, ..., 2.2913e-07, 1.3951e-06,\n 4.1551e-08],\n [1.1452e-07, 3.0372e-08, 1.1301e-08, ..., 1.1388e-07, 2.7448e-07,\n 4.4935e-07]], device='cuda:0')"
49
+ },
50
+ "9": {
51
+ "step": "tensor(11268.)",
52
+ "exp_avg": "tensor([ 5.6052e-45, 9.3823e-04, 1.2805e-04, ..., -2.0295e-04,\n -1.2530e-02, -2.7217e-04], device='cuda:0')",
53
+ "exp_avg_sq": "tensor([2.2505e-11, 9.8493e-05, 1.9850e-04, ..., 1.0752e-04, 1.3991e-04,\n 1.0475e-04], device='cuda:0')"
54
+ },
55
+ "10": {
56
+ "step": "tensor(11268.)",
57
+ "exp_avg": "tensor([[-5.6052e-45, -2.1821e-05, -3.0600e-06, ..., 8.3007e-05,\n -4.8164e-05, -1.2502e-05],\n [ 5.6052e-45, -2.6125e-05, 4.0042e-06, ..., -2.9611e-05,\n -2.7832e-05, -1.8229e-05],\n [ 5.6052e-45, 2.0598e-05, 3.2717e-05, ..., 3.8066e-05,\n -1.8209e-06, 5.3381e-05],\n ...,\n [-5.6052e-45, -6.9626e-06, 2.7558e-05, ..., 4.8448e-05,\n -1.3998e-04, 5.0081e-06],\n [-5.6052e-45, 1.3571e-05, 6.5693e-06, ..., -5.4018e-05,\n -4.2927e-05, 1.6900e-05],\n [ 5.6052e-45, -8.2615e-06, 6.0227e-05, ..., 1.5422e-04,\n -4.0207e-05, -6.5065e-06]], device='cuda:0')",
58
+ "exp_avg_sq": "tensor([[9.3656e-16, 1.7560e-08, 3.6847e-08, ..., 2.8607e-08, 2.3215e-08,\n 2.6294e-08],\n [1.1913e-15, 1.7392e-08, 8.5748e-08, ..., 4.2599e-08, 2.4294e-08,\n 2.6348e-08],\n [1.3329e-16, 2.4858e-08, 2.7031e-08, ..., 3.6301e-08, 2.6226e-08,\n 3.0348e-08],\n ...,\n [1.2086e-15, 2.9596e-08, 8.1072e-08, ..., 3.9924e-08, 3.2625e-08,\n 3.2216e-08],\n [1.6942e-16, 1.9515e-08, 2.3398e-07, ..., 3.5773e-08, 2.8556e-08,\n 3.4718e-08],\n [4.2664e-17, 2.9021e-08, 3.2040e-08, ..., 3.7590e-08, 3.1373e-08,\n 3.7765e-08]], device='cuda:0')"
59
+ },
60
+ "11": {
61
+ "step": "tensor(11268.)",
62
+ "exp_avg": "tensor([[ 1.9591e-05, -4.4255e-05, 1.4146e-05, ..., 1.0548e-06,\n 6.3409e-05, -1.4703e-05],\n [-2.3175e-05, 2.0619e-04, 3.9949e-05, ..., 4.5876e-05,\n -6.3685e-05, 4.9687e-05],\n [-1.2234e-04, -1.2767e-04, -1.4183e-04, ..., 4.6679e-05,\n -3.0980e-05, 5.5822e-05],\n ...,\n [-1.7407e-04, -5.1237e-05, 8.2549e-05, ..., -2.1028e-05,\n 2.3704e-05, -1.0726e-04],\n [ 1.2774e-04, -2.8176e-04, 7.6253e-05, ..., 1.0252e-04,\n -7.9473e-05, 5.9226e-05],\n [ 1.7500e-05, -9.9622e-05, 4.0144e-05, ..., -2.0196e-05,\n -2.5387e-05, -3.6720e-05]], device='cuda:0')",
63
+ "exp_avg_sq": "tensor([[1.3366e-07, 2.2290e-07, 4.8887e-07, ..., 1.1728e-07, 8.8583e-08,\n 1.4913e-07],\n [1.2916e-07, 2.1199e-07, 2.6583e-07, ..., 1.2961e-07, 7.9870e-08,\n 1.0002e-07],\n [9.4893e-08, 2.2935e-07, 1.4930e-07, ..., 9.8380e-08, 6.4310e-08,\n 7.9121e-08],\n ...,\n [1.5817e-07, 3.1925e-07, 3.0201e-07, ..., 1.2405e-07, 9.4205e-08,\n 1.3860e-07],\n [1.2477e-07, 2.3858e-07, 2.6895e-07, ..., 1.0876e-07, 6.6774e-08,\n 1.1689e-07],\n [1.2944e-07, 2.5648e-07, 2.5728e-07, ..., 1.2112e-07, 7.9251e-08,\n 1.1978e-07]], device='cuda:0')"
64
+ },
65
+ "12": {
66
+ "step": "tensor(11268.)",
67
+ "exp_avg": "tensor([ 3.9888e-04, 7.9467e-03, 6.7003e-03, -2.7448e-03, 3.2326e-03,\n 2.8952e-03, 2.9881e-03, -6.7294e-04, 7.1821e-03, -1.0105e-02,\n -1.2411e-03, -2.8311e-03, -7.7678e-04, 8.5863e-04, -4.2297e-03,\n 6.6721e-04, 3.7070e-03, 1.7800e-05, 2.7295e-03, -4.3256e-03,\n 1.2243e-03, 2.1820e-03, 1.2684e-03, 1.0907e-03, 1.6098e-03,\n 1.2710e-03, 1.5030e-04, 3.3085e-03, -2.9048e-04, -5.6359e-03,\n 1.6284e-04, -4.3971e-03, -1.4706e-04, 7.6509e-04, -5.2736e-04,\n -3.8128e-03, 3.5841e-04, 6.9302e-03, 3.0138e-03, -1.0100e-03,\n -7.2757e-05, 2.4625e-03, -1.7605e-03, -3.2637e-03, 1.8695e-03,\n -2.0455e-03, -1.9237e-03, 2.5855e-03, 1.4509e-03, -6.4965e-05,\n -5.0918e-03, -2.1450e-03, 4.6037e-04, -2.3641e-03, -2.9530e-03,\n -8.8683e-03, 1.4394e-03, -6.8392e-03, 4.3664e-03, -3.3110e-03,\n 4.9554e-03, -7.5782e-03, -8.3288e-04, -1.1032e-04, -4.3858e-03,\n 3.0159e-03, -2.6051e-03, 6.9854e-04, -2.0324e-03, -1.4355e-04,\n -2.2040e-04, 4.8863e-03, 1.5034e-02, 4.4508e-04, -3.0376e-03,\n -1.3404e-04, 3.3593e-03, -2.8454e-03, -6.1999e-03, -1.2064e-02,\n -1.3188e-03, -2.4659e-03, 2.8432e-03, 4.2169e-03, 2.9903e-04,\n 2.8665e-03, 8.3949e-03, 2.0767e-03, -2.8302e-03, 1.8778e-03,\n 3.9550e-03, -1.3771e-03, -1.9739e-03, -4.6033e-03, -1.7193e-03,\n -1.3829e-02, -8.6500e-04, -3.3551e-03, 5.3060e-03, -4.0949e-04,\n -2.3713e-03, 4.5799e-03, -1.2987e-02, -3.3888e-03, 2.4825e-03,\n -2.4103e-03, -5.7758e-03, -3.7289e-03, 2.0270e-03, 3.9634e-03,\n 5.6052e-45, 4.6268e-03, 2.9712e-03, 7.2240e-03, 4.2841e-03,\n -4.3930e-03, -7.4313e-03, 1.3878e-03, 1.6531e-03, 2.8327e-03,\n 6.9008e-03, -4.9628e-03, -6.1043e-03, 1.5843e-03, 2.8848e-03,\n -3.5008e-03, 1.7107e-03, -1.6936e-03, 3.4542e-03, -4.0412e-03,\n 1.4603e-03, -7.6474e-04, 4.5288e-03, 4.4704e-04, 8.0541e-03,\n -1.3461e-03, 4.8204e-03, -4.4052e-03, 2.4119e-03, -1.8190e-03,\n 5.4405e-04, 1.4450e-03, 4.9430e-03, -4.8663e-03, -1.2408e-02,\n -2.6154e-03, 1.6070e-03, -8.2412e-03, 7.5990e-03, -1.7013e-03,\n -6.7516e-04, -5.6697e-03, 3.0047e-03, -1.9310e-03, 3.5222e-03,\n -3.4397e-03, 3.0099e-03, -2.2921e-03, -8.9357e-04, -4.4558e-03,\n 3.8766e-04, -1.1626e-03, 3.9219e-03, 2.2599e-03, -4.1072e-04,\n -4.3463e-03, 2.4629e-03, 2.1734e-04, -2.3870e-03, -2.9868e-06,\n 9.7476e-04, 1.6423e-04, -3.6739e-03, -4.5326e-03, 3.3945e-03,\n 9.9968e-03, 8.6072e-04, -7.6817e-04, 3.8741e-03, 2.8087e-03,\n 3.2286e-10, 3.9387e-03, -1.4117e-03, 1.5503e-03, -5.4925e-04,\n 1.5278e-03, -3.1457e-03, -8.3902e-03, 5.0009e-04, 3.5426e-03,\n 3.9891e-03, 2.1968e-03, 3.6063e-03, 2.5497e-03, -5.1075e-03,\n 1.9254e-03, 8.5173e-03, -1.1836e-06, -1.9279e-03, 1.8746e-03,\n 5.8524e-04, -6.4941e-03, 1.9992e-03, 1.4079e-04, -1.6945e-03,\n -1.2908e-03, 3.2992e-04, 2.6757e-03, 4.9722e-03, 6.5761e-04,\n -4.1404e-03, 8.6042e-03, -2.3478e-03, -6.9166e-03, -3.7018e-04,\n 1.0240e-03, -4.1017e-03, -1.5103e-04, -1.0875e-03, 2.5911e-03,\n 5.4050e-03, 8.6570e-04, -5.7893e-03, -5.6558e-03, 1.2702e-03,\n 1.4263e-03, -3.0787e-03, 1.4611e-03, 4.4373e-03, 3.8061e-03,\n 7.8530e-05, 2.6800e-03, 5.3444e-03, -4.2675e-04, 5.6296e-03,\n -2.0435e-03, -4.7860e-03, 2.6947e-03, -4.2857e-03, 2.0802e-03,\n 6.7999e-03, -1.2131e-02, 5.8355e-04, -4.8525e-03, -1.6053e-04,\n -1.5726e-03, -3.1616e-03, 2.9481e-03, 3.8384e-03, 2.3803e-03,\n -2.0046e-04, 7.0719e-05, 1.0247e-02, -2.1549e-03, 3.1103e-03,\n 1.5176e-02, 1.9920e-03, -3.6220e-03, 2.4869e-03, -5.4227e-04,\n -6.1620e-03, 4.1837e-03, 4.7410e-03, -2.1671e-03, -8.5640e-04,\n -4.0522e-04, -1.6107e-03, -4.1920e-03, 1.0637e-02, -3.6593e-03,\n -1.4227e-03, 1.2755e-03, -1.3710e-03, -3.9421e-03, 1.0448e-03,\n 1.6742e-03, 3.3275e-04, 4.0197e-03, -1.9821e-04, 3.1201e-03,\n -6.9235e-03, -6.5108e-05, -9.2150e-04, -4.2524e-03, -1.0235e-02,\n 4.6723e-03, 1.8834e-03, 2.7684e-03, 2.9872e-03, -2.3744e-03,\n 2.7656e-03, 1.4523e-03, -9.3661e-05, 8.3346e-03, -2.6820e-03,\n -9.0777e-05, 1.8839e-03, 1.0353e-03, -2.3393e-03, -6.6759e-03,\n -7.3035e-04, 3.0417e-03, -3.9230e-04, -1.9463e-03, 2.0669e-03,\n -1.5656e-03, 8.2067e-03, -1.1039e-02, -3.0100e-03, -1.2807e-03,\n -3.7149e-03, 3.6632e-04, 5.7342e-03, -2.2828e-03, 1.0151e-02,\n -3.3616e-04, -1.7913e-04, 3.8551e-03, -5.5256e-03, 1.4548e-03,\n -6.2881e-03, 2.2104e-03, -4.4067e-03, 1.9897e-03, -1.0410e-02,\n 1.1331e-03, -2.4448e-03, 4.2343e-03, 2.8014e-03, 1.8964e-03,\n -1.1804e-03, 3.2102e-03, 6.5619e-04, -8.1570e-04, 5.1050e-03,\n -2.0179e-03, 6.9196e-04, 3.0226e-03, -3.7784e-03, -1.5496e-03,\n -9.1865e-05, 4.9191e-03, -6.4929e-03, 5.8389e-03, -1.3583e-03,\n -1.7993e-03, 4.3639e-03, -2.1823e-03, 1.3495e-03, -9.1996e-04,\n -7.3210e-03, 2.7094e-03, -2.1998e-03, 9.7316e-04, -3.3933e-03,\n -3.6946e-03, -3.4267e-03, -6.8026e-03, 4.8241e-03, 9.8790e-03,\n -4.9003e-04, -1.0372e-03, 2.9329e-03, -5.0202e-03, -2.9892e-03,\n 8.7469e-03, 1.4217e-03, 9.0084e-04, 4.7282e-04, 8.6479e-04,\n -4.6309e-03, 6.3886e-03, 8.0948e-04, 4.9197e-04, -3.0310e-03,\n 1.9473e-03, -9.8371e-04, -2.8947e-03, -4.5504e-03, -2.6896e-03,\n -1.4454e-03, 5.4660e-03, -7.4152e-04, -2.5708e-04, -1.4831e-04,\n -3.9670e-03, -3.3123e-03, -1.5713e-03, 4.6244e-03, 1.7501e-03,\n 3.5603e-03, -3.6508e-03, 2.1356e-05, -4.5331e-03, 3.2183e-03,\n -8.1073e-03, -8.9105e-03, -1.2926e-03, 7.1990e-04, 4.1537e-04,\n 4.0302e-04, -1.7227e-03, 1.3665e-03, -4.2204e-04, -3.2162e-03,\n 5.6490e-03, 7.8713e-04, -6.4784e-04, 6.9357e-04, -2.1671e-03,\n 4.7387e-03, 1.5575e-03, 1.5952e-03, -1.2261e-03, -2.6895e-03,\n 2.1686e-03, -1.9505e-03, -1.8585e-04, -1.4845e-03, 1.7198e-02,\n -1.1250e-03, -4.7860e-03, -5.9528e-03, -4.8569e-03, 5.1175e-03,\n 5.6643e-04, 1.5220e-03, 2.3298e-03, 1.3295e-02, -1.5463e-03,\n -6.3826e-04, 4.4198e-03, -4.5399e-03, 1.0775e-03, -1.6161e-03,\n 2.8795e-03, 5.9631e-03, 7.7031e-03, -4.4472e-03, 9.5864e-04,\n 7.1545e-04, 5.2503e-03, -6.0460e-03, 2.9251e-03, 4.0147e-03,\n -6.6726e-04, -1.5519e-03, -3.2926e-03, -1.5585e-03, 4.8742e-03,\n -3.7067e-04, 3.7534e-03, 5.0042e-03, -3.2044e-03, -7.7318e-04,\n -9.5690e-04, -1.5120e-03, -1.3983e-03, -8.8385e-04, -2.7792e-04,\n 1.0565e-02, -1.7463e-03, 5.1252e-03, -2.1297e-03, -9.9846e-04,\n 1.0632e-03, -8.2394e-03, 6.5699e-03, 2.1225e-03, 7.0765e-04,\n -1.2615e-03, 2.9144e-03, -4.1462e-03, 1.9709e-03, 8.1502e-06,\n -2.3707e-03, 9.4999e-04, -8.9363e-04, -5.2543e-03, 2.3234e-03,\n 6.0251e-03, -6.5128e-04, -8.0008e-03, -9.3588e-04, 4.5600e-03,\n 2.2001e-03, 2.7023e-03, 1.5642e-03, -3.4238e-03, 1.9436e-03,\n 3.5339e-03, -2.4574e-03, 7.0254e-03, -3.6188e-03, 9.6640e-04,\n 1.4338e-03, -1.5608e-04, -2.0122e-03, 4.5445e-03, -9.0947e-04,\n 3.9174e-03, -4.2029e-04, -7.5578e-04, 1.2051e-03, -4.2445e-04,\n -9.5738e-04, 1.0723e-03, 1.5198e-03, 1.4345e-03, 3.1255e-03,\n 2.7234e-03, -3.1540e-03, 3.9234e-03, 2.0083e-03, -1.6907e-03,\n 1.8327e-03, -2.4284e-04, 1.0203e-05, -1.2888e-03, -2.5539e-03,\n -1.3442e-03, -5.7779e-03, -1.9309e-03, 2.9982e-03, -5.1670e-03,\n 9.5829e-04, 1.3222e-02, 4.9001e-04, 2.8186e-03, 6.3497e-03,\n 2.8696e-03, -6.3793e-03, 2.8223e-03, 3.1514e-03, 1.1178e-03,\n 2.0018e-03, 1.1292e-03, 5.4351e-04, 4.7094e-03, -1.3518e-03,\n 2.7389e-03, 1.3584e-04, 3.2302e-03, 3.8091e-03, 2.0277e-03,\n -1.7565e-03, 5.6723e-06, 5.3214e-03, -4.4665e-03, -6.1631e-03,\n -9.3236e-03, 5.4928e-04, -4.7966e-03, -1.5203e-03, -1.2621e-04,\n -2.1698e-04, 4.8544e-03, -7.9772e-03, 3.9735e-03, 8.1280e-03,\n 1.2733e-02, 1.2252e-03, 5.6812e-03, -5.4328e-03, 2.1937e-03,\n -1.9667e-03, -3.7842e-03, -4.8939e-03, 1.2823e-03, 1.2035e-03,\n 3.0897e-03, 2.0942e-03, -1.3517e-03, -7.7183e-03, 1.0225e-04,\n -3.1535e-03, 7.3757e-03, 7.9603e-04, 5.6052e-45, -4.9662e-04,\n -3.6290e-04, 5.6052e-45, -2.1143e-03, -3.6255e-04, 5.2362e-04,\n -6.3891e-03, 9.5591e-03, 7.9045e-04, -1.6372e-02, 2.0203e-03,\n -6.5198e-03, 7.8321e-03, 1.2752e-04, -1.4014e-04, -4.6424e-03,\n -7.7256e-04, 3.3493e-03, 6.7388e-03, -6.6085e-03, -6.9472e-03,\n -6.7992e-03, -2.2767e-04, -1.0556e-03, 1.5633e-03, -3.5198e-03,\n -1.6988e-03, -1.4237e-03, 1.2409e-03, -3.1819e-03, -3.9401e-03,\n -5.0490e-04, -2.1695e-03, -2.2678e-03, 6.2743e-03, 9.6476e-05,\n -3.8944e-03, 2.1778e-03, -3.9064e-03, -3.4922e-03, 5.0118e-03,\n -2.5605e-03, -3.2283e-03, 5.8717e-03, 3.1598e-03, 3.7594e-04,\n -1.6771e-04, 7.4908e-03, -8.3100e-03, -2.9243e-03, -2.6559e-03,\n -3.0878e-03, -1.1234e-03, -3.9949e-03, -2.8770e-03, -1.8039e-03,\n -1.0069e-03, 3.7282e-03, -3.4997e-03, 1.8246e-02, 3.9451e-03,\n -5.4234e-03, -2.5736e-03, 1.2278e-03, 4.7277e-03, 3.9737e-03,\n 1.2361e-03, -1.8901e-03, -1.5723e-03, 6.6807e-03, -1.4246e-03,\n 2.5006e-03, -3.8893e-03, -5.4476e-04, 9.4698e-04, -9.2328e-03,\n -3.2344e-03, 6.4479e-03, -7.4415e-03, -4.5784e-03, -1.0577e-02,\n -1.2649e-03, -2.4881e-03, 3.1223e-04, 3.1392e-03, 3.2459e-04,\n 8.7006e-04, -6.3300e-03, -8.8118e-04, 8.1277e-03, -4.2568e-05,\n 2.3001e-03, 2.7580e-03, 1.5321e-03, -4.9927e-03, 4.7913e-03,\n 2.7164e-03, -5.5866e-03, -2.7971e-03, 3.2425e-03, 3.9796e-04,\n -3.5025e-03, -9.3133e-03, -7.5214e-04, 5.6302e-03, 3.1165e-04,\n -1.1092e-02, 9.2127e-03, 3.8949e-03, -2.2632e-03, -3.4423e-03,\n 1.0889e-04, 2.2690e-03, 2.4881e-03, -7.3377e-04, 1.2864e-03,\n -1.8800e-03, -2.6714e-03, 4.0528e-03, 2.8495e-03, 3.5760e-03,\n -8.8938e-04, -1.4312e-03, 1.1633e-03, 2.9857e-03, -6.2436e-03,\n 1.1324e-03, 1.9313e-03, 6.6734e-03, 3.4508e-04, -2.1894e-03,\n 5.2531e-03, -9.8011e-03, -3.6065e-03, 1.7223e-03, -4.9016e-03,\n 5.0966e-03, -5.4821e-03, -9.2639e-03, -2.5341e-03, 5.5931e-03,\n 6.6671e-03, -4.5535e-04, -1.7886e-03, 5.9281e-03, -8.1438e-04,\n 2.0315e-03, -3.7951e-03, 3.0807e-03, -3.0220e-03, 4.3572e-03,\n 1.1468e-03, 1.3349e-03, 1.0306e-03, 1.7657e-03, 1.3164e-03,\n -1.1141e-02, -2.2597e-04, -1.9380e-03, -1.9736e-04, 8.6100e-04,\n -9.5691e-03, -4.4067e-03, 1.7959e-03, -7.6922e-04, -6.7799e-03,\n -5.2038e-03, 4.4296e-03, 2.9576e-03, 7.2042e-03, -3.7882e-03,\n 1.1884e-03, 1.2886e-03, 1.5818e-03, -1.8153e-03, 1.0284e-02,\n 1.6479e-03, -7.4971e-04, 2.6200e-03, -4.7072e-03, -3.9603e-03,\n 4.6454e-04, 5.8630e-03, -6.4776e-04, 5.3020e-03, 7.5458e-03,\n -4.9092e-03, 1.8242e-03, 7.4415e-04], device='cuda:0')",
68
+ "exp_avg_sq": "tensor([1.2446e-04, 1.2208e-04, 9.1455e-05, 1.3154e-04, 1.6002e-04, 1.5470e-04,\n 1.1655e-04, 1.2643e-04, 1.4950e-04, 1.2548e-04, 1.3324e-04, 2.1421e-04,\n 1.2233e-04, 1.4240e-04, 1.0789e-04, 8.4426e-05, 1.0564e-04, 1.1650e-04,\n 1.6259e-04, 7.6154e-05, 1.2426e-04, 1.4849e-04, 1.1825e-04, 1.3535e-04,\n 1.2079e-04, 1.4615e-04, 9.3214e-05, 1.2609e-04, 1.0320e-04, 1.3157e-04,\n 1.3033e-04, 1.1148e-04, 1.5668e-04, 1.0402e-04, 1.2513e-04, 1.9417e-04,\n 1.3777e-04, 1.2482e-04, 1.2729e-04, 1.3028e-04, 1.3790e-04, 1.3503e-04,\n 1.3974e-04, 8.3699e-05, 7.9792e-05, 1.0932e-04, 3.8403e-05, 1.0384e-04,\n 1.3184e-04, 1.0492e-04, 8.9401e-05, 1.2681e-04, 1.1868e-04, 9.5428e-05,\n 1.5043e-04, 1.3626e-04, 7.8222e-05, 9.0595e-05, 1.5511e-04, 1.0134e-04,\n 1.5634e-04, 1.5770e-04, 1.4041e-04, 1.1162e-04, 1.2461e-04, 1.5051e-04,\n 1.6579e-04, 1.3593e-04, 1.4665e-04, 1.0137e-04, 1.1431e-04, 1.1693e-04,\n 1.2986e-04, 1.5234e-04, 1.2169e-04, 1.1682e-04, 1.2147e-04, 1.2175e-04,\n 1.2599e-04, 1.7388e-04, 1.2139e-04, 1.1017e-04, 1.5075e-04, 1.6496e-04,\n 1.1867e-04, 1.4213e-04, 1.2249e-04, 1.3943e-04, 1.2221e-04, 1.4624e-04,\n 1.1669e-04, 9.3950e-05, 1.4103e-04, 1.2662e-04, 9.5898e-05, 1.7251e-04,\n 1.0582e-04, 1.0915e-04, 1.3556e-04, 1.4249e-04, 1.2243e-04, 1.6663e-04,\n 1.3900e-04, 8.6287e-05, 1.3285e-04, 1.1123e-04, 1.3053e-04, 1.2011e-04,\n 1.2412e-04, 9.7012e-05, 1.7105e-11, 9.0944e-05, 1.4919e-04, 1.2167e-04,\n 1.0503e-04, 1.4942e-04, 1.5258e-04, 1.3031e-04, 1.3560e-04, 1.5388e-04,\n 1.1634e-04, 1.5658e-04, 1.5636e-04, 1.4326e-04, 1.6291e-04, 9.6502e-05,\n 1.4434e-04, 1.1993e-04, 1.2751e-04, 1.4729e-04, 1.0212e-04, 1.4662e-04,\n 1.2964e-04, 1.3913e-04, 1.4560e-04, 1.0621e-04, 1.2681e-04, 1.4352e-04,\n 1.1978e-04, 1.0087e-04, 9.3121e-05, 9.0596e-05, 1.0672e-04, 1.2772e-04,\n 1.3673e-04, 1.3748e-04, 9.3687e-05, 1.5764e-04, 1.1014e-04, 1.2810e-04,\n 1.3045e-04, 8.9558e-05, 1.1864e-04, 1.4742e-04, 1.5426e-04, 1.2449e-04,\n 1.5308e-04, 1.5127e-04, 1.1643e-04, 1.5769e-04, 1.4259e-04, 1.2401e-04,\n 1.3357e-04, 1.4615e-04, 1.0083e-04, 1.1281e-04, 1.2250e-04, 1.5328e-04,\n 1.3581e-04, 1.0140e-04, 1.4200e-04, 1.0888e-04, 1.2056e-04, 1.1032e-04,\n 1.0293e-04, 1.2037e-04, 1.0876e-04, 1.0462e-04, 1.2180e-04, 1.4336e-04,\n 2.3270e-09, 1.3157e-04, 1.2872e-04, 1.3613e-04, 1.7056e-04, 1.1308e-04,\n 2.0234e-04, 1.2347e-04, 1.2935e-04, 9.1255e-05, 1.1728e-04, 1.2989e-04,\n 1.5453e-04, 1.0055e-04, 1.0896e-04, 1.0192e-04, 1.5638e-04, 1.2507e-04,\n 9.8577e-05, 1.5694e-04, 1.1348e-04, 1.1678e-04, 1.2309e-04, 5.8826e-05,\n 1.0957e-04, 7.0900e-05, 8.7784e-05, 1.1943e-04, 9.9511e-05, 1.3166e-04,\n 1.4360e-04, 9.5072e-05, 1.3651e-04, 1.1517e-04, 6.3431e-05, 1.3952e-04,\n 1.1271e-04, 1.0683e-04, 1.4266e-04, 1.7283e-04, 1.7183e-04, 8.5000e-05,\n 9.4111e-05, 1.6165e-04, 1.1636e-04, 1.0014e-04, 9.4564e-05, 1.2139e-04,\n 1.4163e-04, 1.2353e-04, 1.1198e-04, 1.5085e-04, 1.4405e-04, 9.2514e-05,\n 1.4030e-04, 1.2718e-04, 1.2504e-04, 9.4740e-05, 1.6095e-04, 1.4799e-04,\n 1.0730e-04, 1.5828e-04, 1.4272e-04, 1.1264e-04, 1.1071e-04, 1.0282e-04,\n 1.5892e-04, 8.4255e-05, 1.6371e-04, 1.3641e-04, 1.4193e-04, 1.5548e-04,\n 1.3417e-04, 1.3395e-04, 9.9407e-05, 1.1679e-04, 1.3716e-04, 1.6394e-04,\n 9.8188e-05, 1.2582e-04, 1.5362e-04, 1.8158e-04, 9.8595e-05, 1.6332e-04,\n 1.1197e-04, 3.5488e-05, 1.0436e-04, 1.2389e-04, 1.5768e-04, 7.9429e-05,\n 1.1719e-04, 1.1558e-04, 1.3069e-04, 1.2080e-04, 1.0956e-04, 1.1972e-04,\n 1.2523e-04, 8.9861e-05, 1.3210e-04, 5.8582e-05, 1.5196e-04, 1.4425e-04,\n 1.0011e-04, 1.4353e-04, 1.4519e-04, 1.1650e-04, 7.2020e-05, 1.2623e-04,\n 1.3690e-04, 1.5089e-04, 1.0035e-04, 1.1073e-04, 1.2685e-04, 1.5444e-04,\n 1.2288e-04, 9.1566e-05, 1.5738e-04, 8.0953e-05, 1.0865e-04, 1.4241e-04,\n 1.0601e-04, 1.2820e-04, 1.2902e-04, 1.6559e-04, 1.0621e-04, 1.2527e-04,\n 1.3185e-04, 1.1701e-04, 1.4192e-04, 8.9534e-05, 1.6985e-04, 1.2400e-04,\n 1.3277e-04, 9.0971e-05, 1.2457e-04, 1.0185e-04, 1.5271e-04, 1.1605e-04,\n 1.3331e-04, 1.0965e-04, 1.3060e-04, 1.2004e-04, 1.1673e-04, 9.2804e-05,\n 1.2931e-04, 1.1369e-04, 1.2423e-04, 1.2046e-04, 1.1032e-04, 7.9252e-05,\n 1.3063e-04, 8.7433e-05, 1.1761e-04, 9.3385e-05, 1.3316e-04, 9.6777e-05,\n 1.4007e-04, 1.1355e-04, 1.3626e-04, 1.1621e-04, 1.2230e-04, 1.0435e-04,\n 1.3629e-04, 1.6411e-04, 1.3463e-04, 1.6175e-04, 1.2154e-04, 1.4834e-04,\n 9.7600e-05, 1.0693e-04, 1.4898e-04, 1.5619e-04, 8.2352e-05, 8.6799e-05,\n 1.2901e-04, 1.4816e-04, 1.1508e-04, 9.9853e-05, 9.9641e-05, 1.3027e-04,\n 8.5179e-05, 1.0273e-04, 1.6505e-04, 1.7506e-04, 7.8948e-05, 1.3154e-04,\n 8.4807e-05, 1.0695e-04, 1.3173e-04, 1.3840e-04, 1.0864e-04, 1.3280e-04,\n 1.3506e-04, 1.1223e-04, 1.7423e-04, 9.2786e-05, 7.0773e-05, 1.2776e-04,\n 1.3710e-04, 1.2046e-04, 8.0869e-05, 1.6351e-04, 1.2612e-04, 1.0018e-04,\n 1.2268e-04, 1.5915e-04, 1.3591e-04, 9.6057e-05, 1.0450e-04, 1.0847e-04,\n 1.6799e-04, 1.1223e-04, 1.0428e-04, 1.0575e-04, 1.0920e-04, 1.1205e-04,\n 1.0320e-04, 1.3685e-04, 1.3009e-04, 1.6210e-04, 1.2466e-04, 1.0844e-04,\n 1.0977e-04, 1.3714e-04, 1.4876e-04, 9.4657e-05, 1.0179e-04, 1.3210e-04,\n 1.4035e-04, 1.4687e-04, 1.5729e-04, 1.2607e-04, 1.4885e-04, 1.1470e-04,\n 1.4716e-04, 1.4051e-04, 7.6224e-05, 1.5465e-04, 1.2775e-04, 1.2341e-04,\n 1.2893e-04, 9.6718e-05, 1.3804e-04, 9.1615e-05, 1.0591e-04, 9.4002e-05,\n 8.1366e-05, 1.5899e-04, 1.3645e-04, 1.2753e-04, 1.3671e-04, 1.3024e-04,\n 9.1309e-05, 1.1127e-04, 7.7672e-05, 1.0706e-04, 1.1518e-04, 1.0645e-04,\n 1.2507e-04, 9.6011e-05, 1.5150e-04, 9.8351e-05, 1.0118e-04, 1.2649e-04,\n 1.4119e-04, 1.2846e-04, 1.0845e-04, 1.0771e-04, 1.4706e-04, 1.2997e-04,\n 1.4254e-04, 1.3018e-04, 1.1569e-04, 1.1610e-04, 1.4388e-04, 1.2491e-04,\n 8.7265e-05, 1.0695e-04, 1.6613e-04, 1.0144e-04, 1.0845e-04, 1.3987e-04,\n 1.0550e-04, 1.7310e-04, 9.0887e-05, 1.0937e-04, 1.3070e-04, 1.0858e-04,\n 1.0767e-04, 9.8916e-05, 9.7975e-05, 1.1598e-04, 1.2670e-04, 1.1570e-04,\n 9.6395e-05, 1.4398e-04, 1.4238e-04, 9.4603e-05, 1.2322e-04, 9.1944e-05,\n 1.0111e-04, 1.3942e-04, 9.2636e-05, 1.1555e-04, 1.5199e-04, 1.2308e-04,\n 1.0442e-04, 1.3397e-04, 9.7984e-05, 1.3663e-04, 1.8195e-04, 1.7734e-04,\n 1.2666e-04, 1.4242e-04, 1.1560e-04, 1.0583e-04, 1.4175e-04, 1.2864e-04,\n 1.4977e-04, 1.4269e-04, 1.0820e-04, 1.2556e-04, 1.0354e-04, 1.3170e-04,\n 1.4275e-04, 1.3288e-04, 1.0804e-04, 9.5981e-05, 1.4914e-04, 9.9455e-05,\n 1.3154e-04, 1.5966e-04, 1.4176e-04, 9.2047e-05, 1.1833e-04, 1.3689e-04,\n 1.3387e-04, 1.1043e-04, 1.4109e-04, 1.1581e-04, 1.0818e-04, 1.3714e-04,\n 1.2071e-04, 1.8464e-04, 1.9644e-04, 9.9638e-05, 1.4370e-04, 1.2558e-04,\n 1.3757e-04, 7.2586e-05, 1.1709e-04, 1.1990e-04, 1.3788e-04, 1.2337e-04,\n 1.4280e-04, 9.6809e-05, 1.5102e-04, 1.1942e-04, 1.2649e-04, 1.4973e-04,\n 1.2418e-04, 1.4054e-04, 9.4775e-05, 1.1290e-04, 1.5163e-04, 1.3437e-04,\n 1.1240e-04, 1.4836e-04, 1.4648e-04, 1.0140e-04, 1.5087e-04, 1.3113e-04,\n 1.0228e-04, 1.3901e-04, 9.9479e-05, 1.7849e-04, 1.4256e-04, 1.0483e-04,\n 1.3871e-04, 1.3858e-04, 1.5178e-04, 1.3885e-04, 1.3960e-04, 9.6109e-05,\n 8.0198e-05, 1.1219e-04, 1.1363e-04, 1.3064e-04, 1.0035e-04, 1.1665e-04,\n 1.1872e-04, 9.0757e-05, 1.1952e-04, 1.1549e-04, 1.2174e-04, 6.8687e-05,\n 1.2695e-04, 1.0494e-04, 3.7592e-12, 1.1086e-04, 6.7549e-05, 4.6594e-13,\n 1.0485e-04, 1.1185e-04, 1.1025e-04, 1.4888e-04, 1.1074e-04, 1.0390e-04,\n 1.2660e-04, 1.3487e-04, 8.9836e-05, 9.4254e-05, 9.8610e-05, 1.1476e-04,\n 1.2717e-04, 1.0563e-04, 1.2938e-04, 1.5953e-04, 1.5300e-04, 1.4807e-04,\n 1.1747e-04, 1.1195e-04, 9.9049e-05, 1.5111e-04, 1.4134e-04, 9.2760e-05,\n 1.2124e-04, 1.2707e-04, 1.2525e-04, 9.9253e-05, 1.2665e-04, 7.9710e-05,\n 6.3793e-05, 1.4529e-04, 8.1482e-05, 9.7861e-05, 1.3348e-04, 1.2620e-04,\n 1.1923e-04, 1.0932e-04, 1.2404e-04, 1.2849e-04, 1.1386e-04, 8.6516e-05,\n 9.6989e-05, 1.6091e-04, 8.8812e-05, 1.4341e-04, 1.1666e-04, 1.0461e-04,\n 1.4303e-04, 9.9923e-05, 1.1380e-04, 1.1602e-04, 1.2748e-04, 1.4503e-04,\n 1.5498e-04, 1.1341e-04, 1.6409e-04, 1.0365e-04, 1.0196e-04, 1.1245e-04,\n 9.3315e-05, 8.0228e-05, 1.2524e-04, 1.6048e-04, 1.5064e-04, 1.1370e-04,\n 1.0338e-04, 9.8764e-05, 7.7645e-05, 1.7383e-04, 1.3229e-04, 1.2844e-04,\n 1.2747e-04, 1.1279e-04, 1.1284e-04, 1.1659e-04, 1.4693e-04, 1.2345e-04,\n 1.3270e-04, 7.7600e-05, 1.0874e-04, 1.1744e-04, 1.2581e-04, 1.1110e-04,\n 8.2567e-05, 1.3018e-04, 9.5977e-05, 1.0786e-04, 1.4504e-04, 1.1028e-04,\n 1.2967e-04, 9.0734e-05, 1.5543e-04, 1.3025e-04, 1.2111e-04, 1.3755e-04,\n 9.3582e-05, 9.6410e-05, 1.1815e-04, 1.4391e-04, 1.2534e-04, 1.0676e-04,\n 1.0931e-04, 1.2245e-04, 1.0113e-04, 1.1392e-04, 1.4299e-04, 1.0741e-04,\n 1.3010e-04, 1.4654e-04, 1.3028e-04, 1.5005e-04, 1.0525e-04, 1.2845e-04,\n 1.4644e-04, 8.6857e-05, 1.3907e-04, 1.2073e-04, 1.0605e-04, 1.0496e-04,\n 1.5969e-04, 1.2917e-04, 1.0867e-04, 1.0062e-04, 9.0190e-05, 1.2047e-04,\n 1.2592e-04, 1.4100e-04, 1.2718e-04, 1.6109e-04, 1.0578e-04, 1.7978e-04,\n 1.0122e-04, 1.2561e-04, 1.3283e-04, 1.1605e-04, 1.1896e-04, 1.3086e-04,\n 1.0944e-04, 6.5445e-05, 1.4358e-04, 1.2483e-04, 8.8097e-05, 1.0810e-04,\n 9.5653e-05, 1.1543e-04, 1.4869e-04, 1.6229e-04, 1.4721e-04, 1.3802e-04,\n 9.9028e-05, 1.0213e-04, 1.2427e-04, 1.4633e-04, 1.2422e-04, 1.8827e-04,\n 1.2124e-04, 9.9293e-05, 1.7241e-04, 1.4839e-04, 9.9356e-05, 1.3643e-04,\n 1.0681e-04, 1.4543e-04, 1.1701e-04, 1.2717e-04, 1.5450e-04, 1.1037e-04,\n 1.3487e-04, 1.3645e-04, 9.5021e-05, 1.2357e-04, 1.2160e-04, 8.5564e-05,\n 1.5314e-04, 1.2916e-04, 1.3508e-04, 1.0250e-04, 1.1554e-04, 1.2494e-04,\n 1.7266e-04, 1.2439e-04, 1.6171e-04, 1.4591e-04, 1.0647e-04, 1.1932e-04],\n device='cuda:0')"
69
+ },
70
+ "13": {
71
+ "step": "tensor(11268.)",
72
+ "exp_avg": "tensor([[ 1.9974e-18, 5.7139e-19, 1.1098e-23, ..., 5.6052e-45,\n 5.6052e-45, 1.0073e-18],\n [-1.9355e-06, 2.0648e-08, 5.2149e-06, ..., 4.2318e-06,\n 2.0533e-07, 5.2118e-07],\n [ 8.4528e-08, 2.2367e-09, 6.3277e-11, ..., 1.3935e-07,\n -4.0658e-06, -6.4988e-07],\n ...,\n [ 8.1854e-07, 3.7627e-06, 3.2151e-06, ..., -4.2386e-06,\n -2.0279e-05, -1.6565e-07],\n [ 8.5994e-05, -7.7183e-05, -5.0272e-07, ..., 4.5689e-06,\n 1.0952e-05, 1.4333e-06],\n [ 5.2320e-07, -4.3710e-07, 2.7421e-06, ..., 9.5699e-07,\n 1.6284e-06, 1.2349e-06]], device='cuda:0')",
73
+ "exp_avg_sq": "tensor([[2.6120e-11, 7.5107e-10, 2.0433e-14, ..., 1.2097e-15, 4.8302e-15,\n 7.2752e-14],\n [3.9858e-09, 1.6953e-09, 1.8446e-09, ..., 4.9903e-09, 1.3859e-09,\n 1.0458e-10],\n [1.3211e-08, 2.3179e-09, 6.5851e-11, ..., 5.1894e-09, 2.9605e-09,\n 7.5860e-09],\n ...,\n [7.7498e-11, 1.8701e-09, 1.7608e-10, ..., 7.6505e-11, 5.6629e-09,\n 3.2515e-11],\n [8.8534e-08, 1.0122e-07, 2.1192e-09, ..., 7.0342e-08, 3.0113e-08,\n 3.6089e-08],\n [1.0458e-09, 1.6958e-09, 6.7809e-09, ..., 1.1848e-09, 1.1710e-09,\n 4.8454e-09]], device='cuda:0')"
74
+ },
75
+ "14": {
76
+ "step": "tensor(11268.)",
77
+ "exp_avg": "tensor([ 1.5581e-17, -2.1107e-05, -6.9515e-04, ..., -5.1993e-05,\n -9.6327e-04, -1.6186e-04], device='cuda:0')",
78
+ "exp_avg_sq": "tensor([2.4559e-08, 6.9683e-06, 1.6741e-05, ..., 7.7946e-07, 3.2152e-05,\n 3.9810e-06], device='cuda:0')"
79
+ },
80
+ "15": {
81
+ "step": "tensor(11268.)",
82
+ "exp_avg": "tensor([[-3.1674e-21, -1.1551e-07, 8.0739e-07, ..., 3.0362e-06,\n 5.2911e-06, 2.5750e-07],\n [ 6.1454e-21, 2.0117e-07, -1.2229e-06, ..., -2.3490e-07,\n 1.6169e-05, -1.1852e-06],\n [ 3.7618e-21, -3.0686e-07, -2.3180e-07, ..., -2.0061e-08,\n -2.5672e-05, -4.3518e-07],\n ...,\n [-6.0005e-22, -8.1999e-08, 1.8366e-07, ..., -8.4215e-07,\n -4.5364e-06, -8.9393e-07],\n [-6.9042e-21, 2.0620e-08, 4.1593e-07, ..., -1.4497e-06,\n -1.5961e-05, 1.3812e-07],\n [-1.0550e-21, -3.3770e-07, -2.7031e-06, ..., -2.0806e-06,\n -1.1968e-05, -2.3218e-08]], device='cuda:0')",
83
+ "exp_avg_sq": "tensor([[4.1883e-12, 1.2840e-11, 9.8590e-11, ..., 5.3101e-11, 1.9263e-09,\n 1.5467e-10],\n [1.6798e-11, 3.0447e-11, 2.0858e-10, ..., 5.4621e-11, 2.4854e-09,\n 1.7170e-10],\n [1.0269e-10, 5.8177e-11, 1.6286e-10, ..., 5.4092e-11, 2.7296e-09,\n 3.3870e-10],\n ...,\n [5.6518e-11, 5.6846e-11, 1.2048e-10, ..., 3.9702e-11, 2.2022e-09,\n 5.2707e-10],\n [8.0152e-11, 4.0328e-11, 2.6773e-10, ..., 4.5045e-11, 1.8606e-09,\n 2.9287e-10],\n [2.0879e-11, 4.1581e-11, 4.3272e-10, ..., 9.6462e-11, 2.4003e-09,\n 2.1527e-10]], device='cuda:0')"
84
+ },
85
+ "16": {
86
+ "step": "tensor(11268.)",
87
+ "exp_avg": "tensor([[ 8.6278e-05, 6.3030e-05, 2.2485e-04, ..., 1.9092e-05,\n 5.8393e-05, -5.9214e-05],\n [ 1.3269e-04, -9.7712e-05, -9.2853e-05, ..., -6.9220e-05,\n 1.2003e-04, 1.7855e-04],\n [ 2.3029e-05, -7.1720e-05, -8.5981e-06, ..., -4.4127e-05,\n 9.2161e-05, 7.3986e-05],\n ...,\n [ 1.7877e-05, -1.5422e-04, -4.0440e-05, ..., -2.4252e-05,\n 4.4831e-06, 3.8288e-05],\n [-1.6182e-05, -1.4197e-05, 1.2644e-06, ..., 3.4813e-05,\n 1.3512e-05, -4.1409e-05],\n [-1.0034e-04, -1.9697e-05, 1.1773e-04, ..., -4.7218e-05,\n -1.4400e-04, -7.1644e-05]], device='cuda:0')",
88
+ "exp_avg_sq": "tensor([[3.1619e-08, 7.0817e-08, 8.2981e-08, ..., 4.3544e-08, 3.2506e-08,\n 5.5898e-08],\n [2.8826e-08, 6.2605e-08, 4.4276e-08, ..., 5.2188e-08, 2.8441e-08,\n 5.1506e-08],\n [1.9433e-08, 4.6560e-08, 3.7912e-08, ..., 3.4149e-08, 2.8072e-08,\n 2.5545e-08],\n ...,\n [2.4932e-08, 6.5646e-08, 5.3131e-08, ..., 5.6716e-08, 2.9111e-08,\n 5.6584e-08],\n [3.1032e-08, 5.9331e-08, 6.5148e-08, ..., 5.0276e-08, 2.6827e-08,\n 4.6895e-08],\n [3.3547e-08, 6.1215e-08, 8.8630e-08, ..., 4.6714e-08, 2.8454e-08,\n 5.5895e-08]], device='cuda:0')"
89
+ },
90
+ "17": {
91
+ "step": "tensor(11268.)",
92
+ "exp_avg": "tensor([-0.0057, 0.0049, 0.0011, ..., -0.0013, -0.0012, -0.0052],\n device='cuda:0')",
93
+ "exp_avg_sq": "tensor([5.1315e-05, 5.2370e-05, 3.1940e-05, ..., 5.2027e-05, 5.0154e-05,\n 4.8541e-05], device='cuda:0')"
94
+ }
95
+ },
96
+ "param_groups": [
97
+ {
98
+ "lr": 2.5447270110570814e-05,
99
+ "name": "scale_256",
100
+ "betas": [
101
+ 0.9,
102
+ 0.999
103
+ ],
104
+ "eps": 1e-08,
105
+ "weight_decay": 1e-05,
106
+ "amsgrad": false,
107
+ "maximize": false,
108
+ "foreach": null,
109
+ "capturable": false,
110
+ "differentiable": false,
111
+ "fused": null,
112
+ "decoupled_weight_decay": true,
113
+ "initial_lr": 0.001,
114
+ "params": [
115
+ 0,
116
+ 1,
117
+ 2
118
+ ]
119
+ },
120
+ {
121
+ "lr": 2.5447270110570814e-05,
122
+ "name": "scale_512",
123
+ "betas": [
124
+ 0.9,
125
+ 0.999
126
+ ],
127
+ "eps": 1e-08,
128
+ "weight_decay": 1e-05,
129
+ "amsgrad": false,
130
+ "maximize": false,
131
+ "foreach": null,
132
+ "capturable": false,
133
+ "differentiable": false,
134
+ "fused": null,
135
+ "decoupled_weight_decay": true,
136
+ "initial_lr": 0.001,
137
+ "params": [
138
+ 3,
139
+ 4,
140
+ 5,
141
+ 6,
142
+ 7
143
+ ]
144
+ },
145
+ {
146
+ "lr": 2.5447270110570814e-05,
147
+ "name": "scale_768",
148
+ "betas": [
149
+ 0.9,
150
+ 0.999
151
+ ],
152
+ "eps": 1e-08,
153
+ "weight_decay": 1e-05,
154
+ "amsgrad": false,
155
+ "maximize": false,
156
+ "foreach": null,
157
+ "capturable": false,
158
+ "differentiable": false,
159
+ "fused": null,
160
+ "decoupled_weight_decay": true,
161
+ "initial_lr": 0.001,
162
+ "params": [
163
+ 8,
164
+ 9,
165
+ 10,
166
+ 11,
167
+ 12
168
+ ]
169
+ },
170
+ {
171
+ "lr": 2.5447270110570814e-05,
172
+ "name": "scale_1024",
173
+ "betas": [
174
+ 0.9,
175
+ 0.999
176
+ ],
177
+ "eps": 1e-08,
178
+ "weight_decay": 1e-05,
179
+ "amsgrad": false,
180
+ "maximize": false,
181
+ "foreach": null,
182
+ "capturable": false,
183
+ "differentiable": false,
184
+ "fused": null,
185
+ "decoupled_weight_decay": true,
186
+ "initial_lr": 0.001,
187
+ "params": [
188
+ 13,
189
+ 14,
190
+ 15,
191
+ 16,
192
+ 17
193
+ ]
194
+ }
195
+ ]
196
+ },
197
+ "scheduler_state_dict": {
198
+ "T_0": 10,
199
+ "T_i": 10,
200
+ "T_mult": 2,
201
+ "eta_min": 1e-06,
202
+ "T_cur": 9,
203
+ "base_lrs": [
204
+ 0.001,
205
+ 0.001,
206
+ 0.001,
207
+ 0.001
208
+ ],
209
+ "last_epoch": 9,
210
+ "_step_count": 0,
211
+ "_is_initial": false,
212
+ "_get_lr_called_within_step": false,
213
+ "_last_lr": [
214
+ 2.5447270110570814e-05,
215
+ 2.5447270110570814e-05,
216
+ 2.5447270110570814e-05,
217
+ 2.5447270110570814e-05
218
+ ]
219
+ },
220
+ "metrics": {
221
+ "best_val_acc": 76.862,
222
+ "best_epoch": 8,
223
+ "scale_accuracies": {
224
+ "256": 70.244,
225
+ "512": 74.392,
226
+ "768": 75.64,
227
+ "1024": 75.514
228
+ },
229
+ "training_history": {
230
+ "epochs": [
231
+ 1,
232
+ 2,
233
+ 3,
234
+ 4,
235
+ 5,
236
+ 6,
237
+ 7,
238
+ 8,
239
+ 9
240
+ ],
241
+ "train_loss": [
242
+ 3.9118613697850284,
243
+ 2.66607952194092,
244
+ 2.3952484759278954,
245
+ 2.201966982775222,
246
+ 2.026744091663117,
247
+ 1.8584000322575005,
248
+ 1.6992347222357131,
249
+ 1.5605441822221104,
250
+ 1.4593032695614874
251
+ ],
252
+ "train_acc": [
253
+ 68.33870994179526,
254
+ 76.85976925724749,
255
+ 79.21098498478341,
256
+ 81.16849715923061,
257
+ 83.13014618703104,
258
+ 85.12582668769957,
259
+ 87.11042354353492,
260
+ 88.98340341266986,
261
+ 90.4128813808036
262
+ ],
263
+ "val_acc": [
264
+ 72.328,
265
+ 74.248,
266
+ 74.928,
267
+ 75.464,
268
+ 75.994,
269
+ 76.29,
270
+ 76.452,
271
+ 76.812,
272
+ 76.862
273
+ ],
274
+ "scale_accs": {
275
+ "256": [
276
+ 65.922,
277
+ 67.866,
278
+ 68.668,
279
+ 69.028,
280
+ 69.476,
281
+ 69.894,
282
+ 70.05,
283
+ 70.048,
284
+ 70.244
285
+ ],
286
+ "512": [
287
+ 70.014,
288
+ 71.776,
289
+ 72.65,
290
+ 72.974,
291
+ 73.372,
292
+ 73.71,
293
+ 73.994,
294
+ 74.262,
295
+ 74.392
296
+ ],
297
+ "768": [
298
+ 71.312,
299
+ 73.326,
300
+ 74.046,
301
+ 74.52,
302
+ 74.848,
303
+ 75.304,
304
+ 75.51,
305
+ 75.674,
306
+ 75.64
307
+ ],
308
+ "1024": [
309
+ 71.288,
310
+ 73.572,
311
+ 74.36,
312
+ 74.86,
313
+ 75.24,
314
+ 75.4,
315
+ 75.462,
316
+ 75.564,
317
+ 75.514
318
+ ]
319
+ },
320
+ "lr": [
321
+ 0.0009755527298894294,
322
+ 0.0009046039886902864,
323
+ 0.0007940987335200904,
324
+ 0.0006548539886902864,
325
+ 0.0005005000000000001,
326
+ 0.0003461460113097139,
327
+ 0.00020690126647990973,
328
+ 9.639601130971382e-05,
329
+ 2.5447270110570814e-05
330
+ ]
331
+ }
332
+ },
333
+ "train_config": {
334
+ "name": "david_training",
335
+ "run_id": "20251012_145649",
336
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
337
+ "model_variant": "clip_vit_laion_b32",
338
+ "num_classes": 1000,
339
+ "preset": "hierarchical_refinement",
340
+ "custom_config_path": null,
341
+ "num_classes_override": null,
342
+ "use_belly_override": null,
343
+ "belly_expand_override": null,
344
+ "progressive_training_override": false,
345
+ "scale_warmup_epochs_override": null,
346
+ "num_epochs": 10,
347
+ "batch_size": 1024,
348
+ "learning_rate": 0.001,
349
+ "weight_decay": 1e-05,
350
+ "warmup_epochs": 3,
351
+ "use_rose_loss": true,
352
+ "rose_initial_weight": 0.1,
353
+ "rose_max_weight": 0.5,
354
+ "rose_weight_schedule": "adaptive",
355
+ "use_cayley_loss": false,
356
+ "cayley_weight": 0.001,
357
+ "scale_loss_balance": null,
358
+ "use_mixed_precision": true,
359
+ "gradient_clip": 10.0,
360
+ "scheduler_type": "cosine_restarts",
361
+ "min_lr": 1e-06,
362
+ "freeze_strategy": "never",
363
+ "freeze_threshold": 90.0,
364
+ "unfreeze_on_plateau": true,
365
+ "patience": 10,
366
+ "track_gradients": true,
367
+ "gradient_scale_threshold": 1e-05,
368
+ "gradient_scale_multiplier": 10.0,
369
+ "log_interval": 50,
370
+ "val_interval": 1,
371
+ "save_interval": 5,
372
+ "log_fusion_weights": true,
373
+ "log_loss_components": true,
374
+ "save_format": "safetensors",
375
+ "hf_repo": "AbstractPhil/gated-david",
376
+ "upload_to_hub": true,
377
+ "base_dir": "./david_training",
378
+ "num_workers": 10,
379
+ "pin_memory": true,
380
+ "prefetch_factor": 4,
381
+ "persistent_workers": true
382
+ }
383
+ }