AbstractPhil commited on
Commit
7e6f31c
·
verified ·
1 Parent(s): 926d341

Update best_model_acc65.65_metadata.json - Run 20251012_231445

Browse files
weights/David-fully_shared-weighted_sum/20251012_231445/best_model_acc65.65_metadata.json ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(18770.)",
7
+ "exp_avg": "tensor([[-2.3598e-05, -4.1862e-05, 6.6672e-05, ..., -2.3631e-05,\n -5.3385e-05, 3.0993e-06],\n [ 4.9752e-05, 4.8732e-05, -4.5940e-05, ..., -3.0536e-06,\n 4.2514e-06, -1.6109e-05],\n [-1.3085e-04, 9.1638e-05, 3.1982e-06, ..., 8.4330e-05,\n 4.2293e-05, 9.0224e-05],\n ...,\n [ 1.4814e-05, 1.6123e-06, -8.3805e-07, ..., 2.3295e-05,\n -9.9388e-06, -8.0618e-06],\n [ 1.2860e-04, -5.8304e-05, 1.4084e-05, ..., 6.1260e-05,\n 1.2668e-05, 3.8712e-06],\n [ 4.6011e-05, 1.0769e-04, -5.2284e-05, ..., -1.7131e-06,\n 5.4529e-06, -1.9977e-05]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.9260e-08, 5.0751e-08, 2.8953e-08, ..., 3.3327e-08, 2.5739e-08,\n 1.3199e-08],\n [6.9557e-08, 1.0142e-07, 4.4611e-08, ..., 4.0126e-08, 5.4617e-08,\n 2.1379e-08],\n [1.3436e-07, 1.5746e-07, 9.1527e-08, ..., 6.8132e-08, 3.3341e-08,\n 6.0998e-08],\n ...,\n [2.6424e-08, 1.0955e-07, 2.6864e-08, ..., 2.0759e-08, 1.4300e-08,\n 1.3503e-08],\n [3.5347e-08, 4.4317e-07, 4.2551e-08, ..., 8.3670e-08, 2.1340e-08,\n 2.6268e-08],\n [5.0012e-08, 7.6147e-08, 2.5532e-08, ..., 8.6602e-08, 2.3466e-08,\n 2.5566e-08]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(18770.)",
12
+ "exp_avg": "tensor([-1.1519e-03, 1.2287e-03, 9.1899e-04, -1.1498e-03, -6.1745e-04,\n -3.1006e-03, -1.1451e-03, 6.3585e-04, 9.4821e-04, 8.8022e-04,\n 1.4262e-03, 1.3401e-04, 6.7691e-04, -2.7368e-03, -4.6490e-04,\n -2.7945e-03, -1.2246e-03, -4.1366e-03, 1.6435e-03, -6.6395e-04,\n 6.2158e-04, -7.2233e-04, 2.2450e-03, -8.9202e-04, 4.4049e-04,\n 1.8873e-03, -2.1634e-03, -6.1460e-05, -2.2135e-03, -1.2571e-03,\n -7.4536e-04, -3.2018e-04, -7.7667e-05, 1.2174e-03, -1.4786e-03,\n 6.6395e-05, 1.8492e-04, -7.3125e-04, -2.4285e-03, -4.6080e-04,\n 8.6460e-05, 2.2753e-04, -7.1019e-04, -4.4443e-04, -1.2711e-03,\n 3.7088e-03, -6.7395e-04, 7.6243e-04, -1.7523e-03, 1.0514e-04,\n 4.4701e-04, -1.6286e-03, 2.3501e-03, 1.3886e-03, -3.9154e-04,\n -1.3957e-03, -3.8810e-03, -1.4327e-03, -5.8499e-04, -1.6007e-03,\n 2.6149e-04, 1.8563e-03, -5.1826e-04, -1.2495e-03, -1.7250e-03,\n -3.6553e-04, 4.5398e-04, -2.3444e-03, -1.3653e-03, 1.9898e-03,\n 6.2524e-04, -2.0556e-03, -1.0721e-03, -1.1043e-03, -7.8529e-04,\n -4.1276e-04, -1.6605e-03, 4.4836e-04, -4.1192e-03, 1.1903e-03,\n 9.6167e-04, 1.6015e-03, -6.5723e-06, -2.0720e-03, -8.5028e-04,\n 4.8692e-04, 5.6534e-04, 4.8223e-04, 1.4184e-03, 8.9990e-04,\n -3.6445e-04, 1.5089e-03, 7.6400e-04, 4.2694e-03, 2.1159e-03,\n -1.7653e-04, -3.1881e-03, 1.6231e-03, -2.0638e-03, 8.2317e-04,\n 2.4338e-04, -5.1865e-04, 6.5478e-04, 4.3378e-04, 1.5303e-03,\n 1.4479e-03, 9.9627e-04, 8.1216e-04, -2.0701e-03, -1.8747e-03,\n 3.9094e-03, -1.0827e-03, -1.6957e-03, 8.2060e-04, 4.4270e-04,\n -6.9416e-04, -1.2412e-03, -5.2348e-04, 1.2314e-04, -2.6425e-03,\n -1.6560e-03, 3.5972e-03, -3.8333e-03, -5.5609e-04, 2.1010e-04,\n 6.8649e-04, 1.0576e-03, -1.4431e-03, 1.2881e-03, 2.4479e-04,\n 8.3315e-04, -1.8088e-03, 4.5049e-03, 1.0305e-03, -3.4607e-05,\n -3.2403e-04, 2.5651e-03, -1.7153e-03, -6.8194e-04, -1.1801e-03,\n 2.1198e-03, -3.2220e-04, 4.9306e-04, -1.8793e-03, 5.1747e-04,\n 1.6823e-03, -2.9782e-03, 8.6237e-04, 1.2026e-03, 4.7349e-05,\n -5.5082e-04, 1.3869e-03, 2.1393e-03, 1.3357e-03, -8.2610e-04,\n 3.0376e-04, 3.3432e-04, 7.6170e-04, 9.0490e-04, 4.5019e-04,\n 2.2825e-03, 1.1392e-03, -2.5775e-03, -4.2742e-03, 1.1576e-03,\n 1.3365e-04, -3.4988e-03, 2.1254e-03, -2.6871e-03, -2.0464e-03,\n -3.1237e-03, 6.1248e-04, 1.9500e-04, -1.0822e-03, -1.6166e-04,\n -5.3842e-04, 1.6416e-03, 3.1098e-03, -8.8234e-05, 1.2932e-03,\n -1.8100e-04, 2.7417e-03, -3.4451e-03, -3.5462e-03, -3.4242e-04,\n -2.3220e-03, -1.5560e-03, 8.0334e-05, 1.2908e-03, -2.6598e-03,\n 3.7346e-04, 4.7516e-04, 6.6655e-04, 3.9349e-04, -2.3770e-03,\n 3.0011e-03, 1.0501e-04, -3.3560e-04, 2.5825e-03, -1.3562e-03,\n -6.8662e-04, -1.2385e-03, 2.5900e-03, 2.8195e-04, -4.3369e-05,\n -3.0090e-03, 2.7329e-03, -4.0958e-03, -8.1140e-04, -7.4984e-05,\n 1.9538e-03, 8.4012e-05, 1.0922e-03, 1.9033e-04, 3.9665e-04,\n 3.1963e-03, -1.5964e-03, -1.1090e-03, 3.1968e-03, -3.9344e-05,\n -1.6921e-03, -2.2646e-03, -2.1544e-03, -1.9762e-03, 1.4475e-03,\n -2.8835e-04, -5.1707e-04, -2.1651e-04, 3.8177e-03, 3.0818e-03,\n 1.4755e-03, 1.7188e-03, -1.1825e-03, 1.5145e-03, -2.3149e-03,\n 8.7820e-04, 6.0551e-04, 1.7137e-04, 2.1759e-04, 1.6036e-03,\n -6.9037e-05, 4.8093e-04, 2.3805e-03, 2.3973e-03, 1.7487e-03,\n 4.4715e-04, 1.8990e-04, 7.3177e-04, -9.1700e-04, -9.1033e-04,\n 1.1497e-03, -8.9501e-04, -1.4297e-03, 1.4092e-03, 1.8642e-03,\n -6.6689e-04, -2.9862e-03, -4.2890e-03, 8.1208e-04, 3.9320e-03,\n -4.2340e-04, 1.5518e-03, 7.1568e-04, 1.0041e-04, -1.4571e-03,\n -7.4957e-04, -7.7463e-04, -4.3149e-04, -4.9500e-04, 1.3368e-03,\n -4.3603e-04, -8.6377e-04, 5.9771e-04, 1.8326e-03, -1.1406e-03,\n 5.5974e-04, -2.0600e-03, 9.4960e-04, 1.0058e-03, 1.8263e-03,\n 9.1967e-04, 2.4289e-03, 5.4560e-04, 2.8551e-03, -9.8745e-04,\n 1.2092e-03, 1.2534e-03, 6.0544e-04, 8.8209e-05, -4.6730e-03,\n 2.6469e-04, 1.2943e-03, -1.3858e-03, 1.8318e-04, 1.8718e-03,\n 9.9793e-05, 6.8064e-04, 2.9098e-03, -2.1280e-03, 1.1649e-03,\n 1.8976e-03, -1.6025e-03, 1.3339e-03, -5.9784e-04, 2.6443e-04,\n 2.9122e-04, 6.6806e-04, -7.1712e-04, -2.7660e-04, -4.7844e-04,\n -1.7766e-03, 1.3711e-03, 4.2503e-03, -1.9391e-03, -4.6591e-04,\n -8.8136e-04, -1.1322e-03, -1.2903e-03, 1.1400e-04, 9.7026e-04,\n 3.7541e-04, 4.3438e-04, -1.8579e-04, -3.4527e-03, 7.4659e-05,\n -2.1643e-03, 5.1727e-04, -1.2672e-03, 8.8675e-04, -3.0976e-03,\n 9.1659e-04, 2.5272e-04, 4.8561e-04, 2.6028e-04, 2.1385e-03,\n -1.7628e-03, 9.1963e-04, -1.7048e-03, -2.5907e-03, -2.4277e-04,\n -7.3407e-04, 1.3906e-03, 1.0218e-03, -2.2323e-03, -3.7917e-03,\n 1.2342e-03, 2.0904e-03, 5.1414e-04, 1.3470e-03, -1.1200e-04,\n -1.1679e-03, 3.5701e-03, 6.9821e-04, 1.3621e-03, 3.2562e-03,\n -9.2282e-04, -4.8733e-03, -1.5702e-03, -6.7789e-04, 2.7383e-03,\n -2.7036e-04, 1.0075e-03, -2.4884e-03, -3.4628e-04, -2.5002e-03,\n -1.7502e-03, -2.4502e-04, -1.9461e-03, -4.3588e-03, -1.4305e-03,\n 2.6593e-03, -5.4657e-04, -4.8046e-05, 8.2477e-04, -1.9205e-03,\n -3.6857e-04, -2.2426e-03, -1.0530e-03, -2.0762e-03, -1.9166e-04,\n 8.9699e-04, 1.6626e-03, -9.6650e-04, -1.6467e-03, 4.4062e-04,\n 1.4982e-03, -1.4130e-03, 1.0626e-03, -1.4979e-03, -5.8324e-05,\n 1.6036e-04, 9.1087e-04, 2.7234e-03, -1.0908e-03, -4.2692e-04,\n 8.4190e-04, -9.9131e-04, -4.1497e-03, 7.3927e-04, 4.7177e-03,\n 1.9787e-03, 1.7823e-04, -9.2918e-04, 3.1811e-03, 1.5296e-03,\n -6.6216e-04, 2.3158e-03, 3.7617e-05, -1.2038e-03, 4.0595e-03,\n -1.1381e-03, 3.3152e-05, -2.5209e-04, 6.6084e-04, -4.6916e-04,\n -1.9128e-03, -5.4920e-04, 1.0876e-03, 1.8853e-03, -1.1086e-03,\n 4.8880e-04, -1.2109e-03, -2.0309e-03, 1.2557e-03, 2.6934e-04,\n 6.1639e-04, 1.3125e-03, -2.2940e-03, 5.9765e-06, 8.3618e-04,\n -1.2236e-03, 2.8362e-03, 1.3060e-03, -2.1139e-04, -1.1458e-03,\n -7.9419e-04, 2.6544e-04, 1.9553e-04, -2.7563e-04, 9.4142e-04,\n -1.3727e-04, 1.6002e-04, -2.1842e-04, 1.1947e-03, 8.0810e-04,\n -3.6959e-05, 1.6576e-03, 2.7807e-03, 1.8049e-03, 2.4515e-04,\n 2.0128e-03, 2.4503e-03, 1.2086e-03, 3.4692e-03, 1.8723e-03,\n 1.6929e-03, 2.3547e-03, -5.5228e-04, 5.6342e-04, -3.1889e-04,\n 2.6581e-03, 1.9466e-03, -1.3808e-03, -3.7530e-04, -7.1113e-05,\n -1.8853e-03, 5.0226e-05, -2.0805e-04, -1.3461e-03, 8.2916e-04,\n 2.3705e-03, -1.3128e-03, 1.5971e-03, 1.6481e-03, 1.1685e-03,\n 1.7381e-03, 1.1357e-03, 1.0348e-03, -7.4540e-05, -1.4974e-03,\n 8.2374e-04, -1.3952e-03, -7.8893e-04, 6.4415e-04, -1.9044e-03,\n 1.5655e-04, 2.1548e-04, -1.8152e-03, -2.8717e-03, -6.7385e-04,\n -1.8081e-03, 1.5885e-03, 2.1861e-03, -5.3723e-04, 2.0807e-04,\n -7.8998e-04, -5.7798e-04, -6.1160e-04, 4.3933e-04, 1.1794e-03,\n -2.3669e-03, 2.0789e-03, 1.1753e-03, -6.9719e-05, -5.3235e-04,\n -9.6049e-04, -2.3345e-03, -2.1531e-03, -3.4641e-03, 2.2894e-04,\n 3.4745e-03, -4.8162e-04], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([2.0576e-05, 3.7258e-05, 5.5737e-05, 4.4188e-05, 3.1634e-05, 8.4819e-05,\n 7.5010e-05, 2.1627e-05, 3.4304e-05, 4.2478e-05, 5.5958e-05, 3.7651e-05,\n 2.5220e-05, 3.8641e-05, 8.7015e-05, 3.7446e-05, 4.9530e-05, 1.0154e-04,\n 1.4618e-05, 3.9471e-05, 3.0026e-05, 5.0138e-05, 6.9763e-05, 3.8470e-05,\n 2.3784e-05, 2.6154e-05, 7.1798e-05, 5.4594e-05, 4.6045e-05, 3.5173e-05,\n 3.7302e-05, 4.7331e-05, 1.8501e-05, 1.1070e-04, 5.7160e-05, 3.4372e-05,\n 2.6502e-05, 5.7220e-05, 6.2890e-05, 5.3513e-05, 3.2729e-05, 3.5801e-05,\n 4.5161e-05, 3.0700e-05, 4.1768e-05, 4.8745e-05, 3.0552e-05, 2.2105e-05,\n 2.4651e-05, 2.8191e-05, 3.5154e-05, 3.6375e-05, 9.5801e-05, 3.5672e-05,\n 6.0121e-05, 3.7146e-05, 3.9693e-05, 3.0327e-05, 3.9359e-05, 4.4637e-05,\n 2.0594e-05, 2.9863e-05, 1.9914e-05, 7.9109e-05, 8.4846e-05, 6.3174e-05,\n 4.1018e-05, 6.7867e-05, 3.1040e-05, 5.1082e-05, 3.5001e-05, 5.8963e-05,\n 4.0222e-05, 8.9539e-05, 3.7504e-05, 5.1184e-05, 8.0326e-05, 3.0069e-05,\n 4.4276e-05, 5.1824e-05, 4.0762e-05, 3.4946e-05, 3.2832e-05, 5.6633e-05,\n 4.9478e-05, 3.9967e-05, 1.2461e-04, 2.3171e-05, 4.4808e-05, 5.6148e-05,\n 4.1910e-05, 2.7919e-05, 1.0511e-04, 1.4452e-04, 4.4715e-05, 5.5599e-05,\n 5.4502e-05, 6.4082e-05, 4.6636e-05, 3.9637e-05, 3.9999e-05, 3.9269e-05,\n 2.5309e-05, 3.7568e-05, 2.8509e-05, 2.9009e-05, 3.8453e-05, 2.2622e-05,\n 4.4618e-05, 1.8688e-05, 9.2065e-05, 1.8417e-05, 6.5629e-05, 3.6339e-05,\n 3.5325e-05, 2.0588e-05, 3.8500e-05, 6.7299e-05, 3.2918e-05, 4.5162e-05,\n 4.4003e-05, 4.9683e-05, 6.6411e-05, 6.1157e-05, 4.4016e-05, 5.5938e-05,\n 3.9171e-05, 7.5931e-05, 5.8658e-05, 4.3627e-05, 4.1609e-05, 1.0656e-04,\n 5.8738e-05, 4.6466e-05, 7.9532e-05, 3.3279e-05, 3.0664e-05, 2.2016e-05,\n 6.4894e-05, 4.0134e-05, 3.0965e-05, 6.9081e-05, 4.4650e-05, 1.2026e-04,\n 3.7629e-05, 5.5964e-05, 5.3296e-05, 3.7494e-05, 1.0538e-04, 4.7232e-05,\n 3.8969e-05, 5.9526e-05, 5.0400e-05, 5.3201e-05, 4.1861e-05, 6.2759e-05,\n 2.2646e-05, 1.9718e-05, 4.9152e-05, 3.0400e-05, 5.4888e-05, 5.4618e-05,\n 5.5548e-05, 4.0474e-05, 5.4877e-05, 3.1515e-05, 5.4384e-05, 3.2507e-05,\n 6.7947e-05, 8.5715e-05, 3.1955e-05, 4.3742e-05, 2.9372e-05, 3.6305e-05,\n 3.2784e-05, 2.2086e-05, 3.8139e-05, 4.2895e-05, 3.4094e-05, 4.3431e-05,\n 4.5588e-05, 5.9323e-05, 1.3067e-04, 4.0157e-05, 1.8885e-05, 5.4813e-05,\n 5.6735e-05, 5.7322e-05, 6.1298e-05, 5.0052e-05, 3.9706e-06, 3.8401e-05,\n 1.5760e-05, 2.1118e-05, 2.9520e-05, 3.8537e-05, 2.7918e-05, 3.5741e-05,\n 4.4193e-05, 3.1292e-05, 4.8338e-05, 3.4650e-05, 3.9026e-05, 2.1206e-05,\n 2.7147e-05, 7.8365e-05, 3.6116e-05, 3.2373e-05, 3.6647e-05, 3.1728e-05,\n 4.3347e-05, 3.7406e-05, 3.5445e-05, 4.3214e-05, 3.9044e-05, 4.3964e-05,\n 4.3402e-05, 3.5119e-05, 3.9089e-05, 3.3995e-05, 5.3283e-05, 7.9382e-05,\n 2.9863e-05, 5.2271e-05, 3.1471e-05, 3.4681e-05, 4.2144e-05, 3.5506e-05,\n 6.1168e-05, 7.2640e-05, 4.9886e-05, 3.5334e-05, 3.8573e-05, 3.2271e-05,\n 2.9029e-05, 3.8095e-05, 2.1509e-05, 3.6334e-05, 4.8470e-05, 4.0177e-05,\n 2.5776e-05, 3.6443e-05, 2.2738e-05, 4.4342e-05, 1.6354e-05, 3.3994e-05,\n 8.0251e-05, 4.5221e-05, 3.4898e-05, 4.8062e-05, 5.9652e-05, 6.2077e-05,\n 2.3287e-05, 3.9923e-05, 3.5526e-05, 3.0703e-05, 3.2887e-05, 3.7450e-05,\n 3.8322e-05, 7.5055e-05, 3.9120e-05, 2.6259e-05, 4.0649e-05, 2.1353e-05,\n 4.7251e-05, 5.6772e-05, 6.6888e-05, 3.1434e-05, 2.2011e-05, 2.0070e-05,\n 3.2124e-05, 3.6270e-05, 2.6331e-05, 6.3744e-05, 2.6004e-05, 5.0179e-05,\n 8.5129e-05, 2.6864e-05, 5.0232e-05, 2.6063e-05, 4.0111e-05, 4.7791e-05,\n 3.8595e-05, 4.4831e-05, 3.9887e-05, 4.5394e-05, 6.8408e-05, 4.5810e-05,\n 3.1221e-06, 7.6964e-05, 4.9673e-05, 2.4865e-05, 5.2972e-05, 5.9838e-05,\n 4.5228e-05, 6.1169e-06, 3.6466e-05, 7.5621e-05, 4.6728e-05, 1.1372e-04,\n 6.1997e-05, 5.4139e-05, 4.6963e-05, 3.2605e-05, 5.8028e-05, 4.6359e-05,\n 2.9329e-05, 3.2149e-05, 5.3320e-05, 3.0286e-05, 5.6130e-05, 6.4420e-05,\n 8.3675e-05, 2.9200e-05, 3.5989e-05, 4.9505e-05, 3.7630e-05, 4.4380e-05,\n 1.7716e-05, 4.0266e-05, 5.0248e-05, 5.1427e-05, 3.4778e-05, 5.4164e-05,\n 6.5924e-05, 8.7292e-05, 3.3950e-05, 4.5370e-05, 1.8823e-05, 3.6188e-05,\n 3.5713e-05, 6.2752e-05, 3.7976e-05, 2.2223e-05, 9.1570e-05, 2.2141e-04,\n 1.1144e-05, 3.1830e-05, 3.3875e-05, 8.4517e-05, 2.5644e-05, 5.3520e-05,\n 4.8388e-05, 3.2831e-05, 5.3856e-05, 4.2700e-05, 3.5062e-05, 3.9447e-05,\n 4.7800e-05, 7.2313e-05, 2.3754e-05, 7.4051e-05, 2.4419e-05, 5.1182e-05,\n 9.2949e-05, 4.7346e-05, 4.6085e-05, 4.2867e-05, 3.1102e-05, 3.5863e-05,\n 7.7325e-05, 4.0611e-05, 3.1219e-05, 2.0609e-05, 3.3916e-05, 3.7695e-05,\n 2.1595e-05, 2.9031e-05, 7.7265e-05, 2.7424e-05, 4.1285e-05, 4.5197e-06,\n 3.1682e-05, 3.6247e-05, 4.6963e-05, 3.9542e-05, 8.7977e-05, 5.9174e-05,\n 5.7068e-05, 4.0334e-05, 6.1733e-05, 6.3871e-05, 3.4743e-05, 2.7326e-05,\n 3.6185e-05, 5.7595e-05, 3.2572e-05, 2.5801e-05, 3.6672e-05, 2.7763e-05,\n 2.1384e-05, 2.8090e-05, 3.4738e-05, 3.2797e-05, 3.8797e-05, 5.5375e-05,\n 4.0571e-05, 4.3444e-05, 2.8974e-05, 6.9212e-05, 3.7998e-05, 3.8342e-05,\n 3.4821e-05, 2.9850e-05, 3.4927e-05, 1.8290e-05, 1.9481e-05, 5.4499e-05,\n 5.5651e-05, 6.1712e-05, 3.1623e-05, 5.3106e-05, 5.7873e-05, 1.9993e-04,\n 4.1333e-05, 4.3333e-05, 5.5516e-05, 4.6650e-05, 4.8564e-05, 4.0907e-05,\n 3.3580e-05, 4.1567e-05, 4.1107e-05, 8.9185e-05, 3.4787e-05, 5.6162e-05,\n 3.0838e-05, 3.8264e-05, 4.2663e-05, 2.6113e-05, 4.6070e-05, 7.2799e-05,\n 5.0227e-05, 4.7189e-05, 4.8912e-05, 4.9516e-05, 4.9043e-05, 5.3400e-05,\n 5.3583e-05, 5.6019e-05, 1.9386e-05, 9.3046e-05, 4.7321e-05, 5.2483e-05,\n 3.0563e-05, 4.4297e-05, 4.2242e-05, 3.0095e-05, 5.9945e-05, 5.4075e-05,\n 5.3466e-05, 5.6778e-05, 3.2314e-05, 5.2408e-05, 1.3445e-04, 2.6958e-05,\n 4.8838e-05, 3.4340e-05, 3.8965e-05, 4.3486e-05, 5.8671e-05, 4.4908e-05,\n 4.4333e-05, 4.7145e-05, 2.1812e-05, 2.5202e-05, 3.6961e-05, 6.1297e-05,\n 3.0428e-05, 6.2389e-05, 6.7846e-05, 2.0735e-05, 2.5653e-05, 4.0371e-05,\n 5.3498e-05, 3.0943e-05, 3.9769e-05, 5.7271e-05, 3.3369e-05, 6.3957e-05,\n 6.3659e-05, 3.2872e-05, 2.3777e-05, 3.7030e-05, 7.0283e-05, 2.9520e-05,\n 2.0439e-05, 3.1686e-05, 5.3506e-05, 3.2880e-05, 3.4918e-05, 5.1103e-05,\n 1.3691e-04, 4.0187e-05, 4.2065e-05, 2.2019e-05, 6.5512e-05, 3.7336e-05,\n 4.2162e-05, 4.7965e-05, 3.6444e-05, 3.9919e-05, 4.9346e-05, 1.7653e-05,\n 5.6702e-05, 4.0349e-05, 5.3874e-05, 2.8754e-05, 5.7992e-05, 1.8961e-05,\n 4.1115e-05, 4.3514e-05], device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(18770.)",
17
+ "exp_avg": "tensor([-4.4971e-03, 3.1625e-03, 2.0781e-03, -1.6519e-03, -1.7546e-03,\n -6.5749e-03, -1.6709e-03, 1.2328e-03, 2.0504e-03, 1.0393e-04,\n 2.9509e-03, -8.9165e-05, 1.4189e-03, -6.9135e-03, -1.2871e-03,\n -1.0196e-02, -1.3093e-03, -8.2067e-03, 2.4916e-03, -1.0129e-03,\n 1.5358e-03, -2.9348e-03, 3.8551e-03, -1.8261e-03, 4.9999e-04,\n 4.6594e-03, -5.4376e-03, 4.4213e-04, -3.6981e-03, -2.8991e-03,\n -1.9758e-03, -1.5383e-03, -9.3958e-05, 4.2127e-03, -1.9071e-03,\n 6.2064e-04, 9.3452e-04, -2.2456e-03, -3.1199e-03, -1.2188e-03,\n 2.1826e-04, -2.4023e-03, -1.3137e-03, -9.3419e-04, -4.1243e-03,\n 1.0717e-02, -1.2651e-03, 2.3434e-03, -7.0332e-03, -4.1433e-04,\n -1.7000e-04, -6.4342e-03, 4.2850e-03, 2.5718e-03, -1.7669e-03,\n -3.6795e-03, -7.4482e-03, -3.3177e-03, -1.7435e-03, -4.5252e-03,\n 5.0837e-04, 3.9949e-03, -1.3096e-03, -2.8879e-03, -4.1657e-03,\n -1.2284e-03, 3.6492e-03, -3.5993e-03, -3.0759e-03, 4.2675e-03,\n 1.3786e-03, -4.0640e-03, -1.2719e-03, -3.1589e-03, -1.8066e-03,\n -1.1794e-03, -5.9840e-03, 1.4922e-03, -8.9880e-03, 5.5095e-04,\n 6.2291e-04, 3.2401e-03, 3.4990e-05, -3.8520e-03, -1.7548e-03,\n 2.2102e-03, -1.5361e-04, 8.1466e-04, 3.7686e-03, 3.3491e-03,\n -8.1337e-04, 6.1110e-03, 6.8410e-03, 5.8365e-03, 5.7421e-03,\n -6.8442e-04, -4.5701e-03, 2.4127e-03, -6.2568e-03, 3.2115e-03,\n 7.4840e-04, -1.2402e-03, 1.1442e-03, 4.9162e-04, 2.3061e-03,\n 4.3821e-03, 1.2925e-03, 6.8307e-04, -3.0497e-03, -5.0482e-03,\n 9.8042e-03, -2.9582e-03, -6.8780e-03, 1.2215e-03, 1.3140e-04,\n -1.4557e-03, -4.4363e-03, -2.6072e-03, -7.0360e-04, -6.1981e-03,\n -3.0704e-03, 5.6477e-03, -8.6565e-03, -2.4515e-03, 1.8838e-03,\n 8.3565e-04, 2.1020e-03, -2.8787e-03, 4.0462e-03, -1.0580e-03,\n 1.7338e-03, -2.1363e-03, 1.0413e-02, 2.9248e-03, -3.0701e-04,\n -1.5250e-03, 4.2742e-03, -3.6224e-03, -3.5495e-03, -2.5818e-03,\n 4.0992e-03, -8.2436e-05, 1.3384e-03, -6.2180e-03, 2.8879e-03,\n 3.6033e-03, -5.8713e-03, 1.9115e-03, 2.4881e-03, -9.9003e-04,\n -8.4656e-04, 2.1612e-03, 1.3196e-03, 2.6710e-03, -2.5856e-03,\n 1.0558e-04, 1.0150e-03, 2.7910e-03, 1.8504e-03, 3.6562e-03,\n 3.2925e-03, 1.5826e-03, -1.0643e-02, -8.7056e-03, 2.9054e-03,\n 1.1629e-03, -9.9060e-03, 2.2592e-03, -4.9950e-03, -3.7971e-03,\n -7.0965e-03, 5.2585e-04, 5.5509e-04, -3.1135e-03, -9.9056e-04,\n -1.9045e-03, 2.6383e-03, 5.4537e-03, -5.9655e-04, 4.0231e-03,\n -9.7167e-05, 6.3762e-03, -3.7984e-03, -8.3709e-03, -7.8890e-04,\n -3.6289e-03, -4.7407e-03, -5.0567e-05, 1.7103e-03, -2.8350e-03,\n 5.6052e-45, 8.6387e-04, 2.0852e-03, 1.0045e-03, -3.2185e-03,\n 7.7109e-03, 4.7762e-05, -9.7959e-04, 5.7425e-03, -3.5291e-03,\n -2.1234e-03, -1.8170e-03, 6.4563e-03, 3.9607e-04, -5.0004e-05,\n -6.5794e-03, 6.1312e-03, -7.7444e-03, -2.3667e-03, 5.6779e-04,\n 4.7789e-03, 1.2660e-04, 3.0446e-03, 3.6172e-04, -6.9881e-05,\n 7.8966e-03, -1.9121e-03, -2.2838e-03, 7.0121e-03, -1.9502e-05,\n -3.9183e-03, -4.9497e-03, -6.1043e-03, -1.6671e-03, 2.9850e-03,\n -7.3923e-04, -6.7756e-04, -7.3704e-04, 9.9548e-03, 4.5514e-03,\n 1.9488e-03, 3.4542e-03, -2.3618e-03, 3.0127e-03, -8.1235e-03,\n 2.0298e-03, 2.1139e-03, 1.4819e-05, -5.1465e-04, 3.9694e-03,\n -1.6621e-05, 1.6061e-03, 7.1659e-03, 6.6635e-03, 5.6631e-03,\n 6.0169e-04, -2.7567e-04, 1.7621e-03, -1.8043e-03, -3.1360e-03,\n 1.3932e-03, -2.6604e-03, -3.2421e-03, 1.9523e-03, 2.9626e-03,\n -2.0923e-03, -4.6712e-03, -9.1179e-03, 5.8926e-04, 7.1079e-03,\n -1.5166e-03, 3.4098e-03, 2.3174e-03, 4.0940e-04, -2.8562e-03,\n -8.5859e-04, -3.5476e-03, -7.2099e-04, -7.0393e-04, 3.3719e-03,\n -1.5447e-03, -1.0118e-03, 6.1132e-04, 2.7572e-03, -2.1663e-03,\n 2.7646e-03, -5.7268e-03, 2.6262e-03, 1.1126e-03, 5.0170e-03,\n 9.7309e-04, 4.6381e-03, 1.2347e-03, 6.8027e-03, -9.4671e-04,\n 2.3773e-03, 2.0090e-03, 1.2679e-03, 5.6052e-45, -1.0029e-02,\n 2.8506e-04, 2.9094e-03, -1.8260e-03, 9.9605e-04, 3.2873e-03,\n 5.6052e-45, 2.6433e-03, 5.7744e-03, -5.1514e-03, 2.0239e-03,\n 6.8727e-03, -4.2989e-03, 2.5963e-03, -7.5748e-04, -7.1264e-04,\n -8.5091e-05, 2.6254e-03, -3.7494e-03, -7.7317e-05, -1.5677e-03,\n -3.1326e-03, 4.4210e-03, 8.7605e-03, -2.8973e-03, -1.2600e-03,\n -3.1716e-03, -1.9290e-03, -3.8498e-03, 5.1039e-04, 1.0315e-03,\n -4.6509e-04, 1.0035e-03, -2.8778e-04, -7.2380e-03, -6.2279e-04,\n -5.9995e-03, 2.0039e-03, -2.3115e-03, 2.0540e-03, -9.0499e-03,\n 2.8069e-03, 7.6860e-04, -1.6163e-04, 4.3183e-04, 3.8054e-03,\n -1.9591e-03, 3.6800e-03, -2.8182e-03, -7.2970e-03, -2.8772e-03,\n -1.8370e-03, 3.1453e-03, 2.4820e-03, -6.5197e-03, -7.9566e-03,\n 2.5834e-03, 4.0337e-03, 5.2685e-04, 2.2129e-03, -1.3070e-04,\n -2.0668e-03, 1.0161e-02, 2.0743e-03, 2.6112e-03, 1.0546e-02,\n 1.1150e-04, -7.2627e-03, -3.9453e-03, -8.1611e-04, 4.8582e-03,\n -1.5963e-04, 2.2608e-03, -6.2034e-03, -1.0606e-03, -5.2159e-03,\n -3.7276e-03, -1.8816e-04, -4.9761e-03, -8.7898e-03, -3.5143e-03,\n 4.8825e-03, 5.6052e-45, -1.6595e-03, 1.3800e-03, -4.3986e-03,\n -3.1799e-04, -4.1500e-03, -1.8587e-03, -4.3707e-03, -1.7955e-04,\n 8.5400e-04, 3.2399e-03, -2.3188e-03, -4.3189e-03, 5.0781e-04,\n 3.2121e-03, -3.6817e-03, 2.6938e-03, -2.7352e-03, -9.5957e-04,\n 9.5363e-04, 1.5739e-03, 6.2950e-03, -2.5114e-03, 1.6273e-04,\n 1.6333e-03, -1.5843e-03, -1.2027e-02, 1.5189e-03, 9.9514e-03,\n 4.0330e-03, -1.1355e-05, -1.9350e-03, 6.8535e-03, 2.3089e-03,\n -9.5674e-04, 5.1358e-03, 6.5749e-04, -2.5023e-03, 8.0639e-03,\n -3.0424e-03, -2.7794e-04, -4.4937e-04, 1.6819e-03, -2.4216e-03,\n -1.7622e-03, -1.9814e-03, 3.8193e-03, 4.7499e-03, -2.9289e-03,\n 1.1175e-03, -1.4089e-03, -8.9386e-03, 1.3809e-03, 2.1046e-03,\n 1.2856e-03, 2.8907e-03, -3.1092e-03, -1.3405e-04, 2.1213e-03,\n -2.0657e-03, 6.0993e-03, 3.5079e-03, 4.0867e-05, -2.1170e-03,\n -1.1665e-03, -2.9520e-04, 4.5173e-04, -1.4341e-04, 2.5327e-03,\n 2.3673e-05, 1.3683e-03, -4.1092e-04, 2.5931e-03, 1.5985e-03,\n 1.1457e-03, 4.1881e-03, 8.2339e-03, 3.8488e-03, -1.2880e-04,\n 2.4704e-03, 5.5360e-03, 3.7306e-03, 9.5337e-03, 4.3893e-03,\n 3.5010e-03, 7.2497e-03, -2.4745e-04, -8.6558e-06, -9.7720e-04,\n 4.3668e-03, 4.4912e-03, -1.2154e-03, -1.2398e-03, 2.3861e-04,\n -4.7422e-03, 1.2705e-03, -1.2152e-04, -3.4683e-03, 2.0853e-03,\n 5.8682e-03, -3.0387e-03, 3.9757e-03, 4.0862e-03, 2.4532e-03,\n 5.0292e-03, 3.1154e-03, 1.5469e-03, -6.9708e-04, -3.0053e-03,\n 1.2238e-03, -3.9951e-03, -5.3558e-03, 2.0917e-03, -3.8210e-03,\n 5.4574e-04, 2.9318e-04, -6.2855e-03, -5.9646e-03, -3.1987e-03,\n -4.4657e-03, 2.0467e-03, 4.9281e-03, -3.9057e-04, 8.5482e-04,\n -1.5059e-03, -2.1712e-03, -1.5314e-03, 1.9668e-03, 2.3027e-03,\n -7.2786e-03, 3.2133e-03, 3.5144e-03, -9.1722e-04, -1.8087e-03,\n -2.5504e-03, -4.4881e-03, -6.2534e-03, -5.2542e-03, 6.2835e-04,\n 7.0863e-03, -2.3438e-04], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([4.6855e-04, 1.9985e-04, 1.9430e-04, 2.2960e-04, 1.6523e-04, 2.7466e-04,\n 2.3213e-04, 1.8479e-04, 1.1842e-04, 1.3597e-04, 2.7599e-04, 3.0473e-04,\n 1.5218e-04, 2.2822e-04, 2.5217e-04, 4.6823e-04, 1.0430e-04, 4.3486e-04,\n 4.6830e-05, 1.9523e-04, 2.0491e-04, 2.1558e-04, 2.1472e-04, 1.5130e-04,\n 2.7740e-04, 1.4431e-04, 4.8883e-04, 1.3697e-04, 2.0874e-04, 1.1388e-04,\n 2.3341e-04, 2.5722e-04, 1.0508e-04, 4.6014e-04, 1.4669e-04, 1.5778e-04,\n 1.4623e-04, 1.7674e-04, 2.5721e-04, 2.4368e-04, 2.8230e-04, 1.3256e-03,\n 1.1464e-04, 1.7858e-04, 2.3691e-04, 4.2395e-04, 2.8554e-04, 1.3065e-04,\n 1.9744e-04, 1.9819e-04, 1.6144e-04, 5.0914e-04, 2.8137e-04, 2.1769e-04,\n 4.0526e-04, 1.6175e-04, 2.1516e-04, 1.0290e-04, 1.6975e-04, 1.9262e-04,\n 1.6005e-04, 9.3432e-05, 2.3884e-04, 1.9207e-04, 2.7645e-04, 2.3100e-04,\n 2.7432e-04, 3.0454e-04, 1.6280e-04, 2.2108e-04, 1.2388e-04, 1.5945e-04,\n 1.3449e-04, 4.6865e-04, 1.5368e-04, 1.3111e-04, 5.4598e-04, 2.9435e-04,\n 2.2418e-04, 1.8921e-04, 1.7116e-04, 1.3142e-04, 1.3850e-04, 3.4889e-04,\n 1.5650e-04, 3.1231e-04, 2.6950e-04, 8.3040e-05, 2.7373e-04, 3.9281e-04,\n 1.4829e-04, 3.5236e-04, 7.4514e-04, 3.4772e-04, 1.4950e-04, 2.4961e-04,\n 1.3031e-04, 2.0416e-04, 2.6809e-04, 2.5833e-04, 2.5714e-04, 1.2391e-04,\n 1.3474e-04, 2.9256e-04, 7.3635e-05, 1.9133e-04, 1.6398e-04, 1.5541e-04,\n 9.8664e-05, 1.2169e-04, 4.0961e-04, 1.1846e-04, 8.2748e-04, 1.9748e-04,\n 8.9624e-05, 6.0764e-05, 3.2435e-04, 2.6357e-04, 1.1970e-04, 2.1450e-04,\n 1.5213e-04, 1.0206e-04, 2.9859e-04, 4.1860e-04, 2.1944e-04, 1.8891e-04,\n 1.0735e-04, 2.6200e-04, 3.7597e-04, 1.8835e-04, 1.4884e-04, 3.2037e-04,\n 2.7115e-04, 1.3918e-04, 3.5449e-04, 3.2439e-04, 1.2676e-04, 8.5785e-05,\n 5.0574e-04, 2.1920e-04, 1.1365e-04, 8.1121e-05, 1.6898e-04, 7.3435e-04,\n 2.6653e-04, 2.0004e-04, 2.6049e-04, 1.0869e-04, 3.1979e-04, 6.8342e-04,\n 1.2078e-04, 1.6349e-04, 5.4777e-05, 1.1856e-04, 2.8328e-04, 2.9299e-04,\n 1.6196e-04, 1.4851e-04, 3.2725e-04, 2.3831e-04, 1.5071e-04, 2.7107e-04,\n 5.0069e-04, 1.6178e-04, 2.2755e-04, 2.2841e-04, 3.5579e-04, 9.2373e-05,\n 1.9863e-04, 4.9591e-04, 2.4658e-04, 1.7764e-04, 1.2529e-04, 1.5647e-04,\n 3.0304e-04, 1.7203e-04, 1.3832e-04, 9.2999e-05, 9.9752e-05, 3.4891e-04,\n 1.6932e-04, 2.8185e-04, 2.9500e-04, 2.1784e-04, 1.2058e-04, 2.0466e-04,\n 3.3049e-04, 2.0851e-04, 2.3747e-04, 1.4423e-04, 1.0556e-12, 1.7277e-04,\n 1.2462e-04, 9.5317e-05, 1.6799e-04, 1.4727e-04, 1.3058e-04, 9.6117e-05,\n 1.3840e-04, 1.6914e-04, 1.1942e-04, 1.7241e-04, 2.6624e-04, 2.0107e-04,\n 1.0251e-04, 2.6316e-04, 1.8592e-04, 1.0903e-04, 1.5735e-04, 1.2015e-04,\n 2.9292e-04, 2.6367e-04, 2.1547e-04, 1.6575e-04, 2.7183e-05, 3.1483e-04,\n 1.1837e-04, 1.2779e-04, 2.2803e-04, 2.3414e-04, 2.9705e-04, 3.3477e-04,\n 1.7022e-04, 1.1287e-04, 9.9873e-05, 1.1493e-04, 1.6452e-04, 8.1079e-05,\n 4.8304e-04, 1.7186e-04, 2.5317e-04, 1.9399e-04, 2.1862e-04, 3.2838e-04,\n 2.1837e-04, 1.9880e-04, 2.0112e-04, 2.2473e-04, 1.5969e-04, 1.8711e-04,\n 1.1797e-04, 1.4061e-04, 2.0055e-04, 3.1955e-04, 1.3797e-04, 2.2816e-04,\n 1.7745e-04, 1.0939e-04, 1.3650e-04, 2.3751e-04, 1.8849e-04, 1.9494e-04,\n 1.3030e-04, 9.1387e-05, 7.3830e-05, 1.6252e-04, 1.3523e-04, 2.3552e-04,\n 1.7462e-04, 2.9969e-04, 3.8877e-04, 1.1384e-04, 2.2022e-04, 1.0321e-04,\n 2.7385e-04, 4.1349e-04, 9.2310e-04, 1.5067e-04, 1.6103e-04, 1.2194e-04,\n 1.5678e-04, 1.6726e-04, 1.0169e-04, 2.6322e-04, 1.6397e-04, 2.2962e-04,\n 3.4586e-04, 7.3543e-05, 1.5727e-04, 1.6131e-04, 1.3448e-04, 1.5738e-04,\n 1.9838e-04, 1.7856e-04, 1.8217e-04, 2.2880e-04, 1.7255e-04, 1.9447e-04,\n 2.0231e-14, 2.8627e-04, 1.4568e-04, 1.3276e-04, 2.5379e-04, 2.0780e-04,\n 2.3776e-04, 1.9834e-13, 2.3977e-04, 2.1222e-04, 2.6753e-04, 4.6754e-04,\n 4.2377e-04, 3.0277e-04, 1.5780e-04, 1.1245e-04, 2.0888e-04, 2.1510e-04,\n 2.1419e-04, 2.8417e-04, 3.9128e-04, 1.5785e-04, 1.5904e-04, 3.0330e-04,\n 3.9560e-04, 6.0775e-05, 7.7925e-05, 2.5517e-04, 1.1417e-04, 2.0283e-04,\n 4.6506e-05, 1.8638e-04, 1.3239e-04, 4.1249e-04, 9.3817e-05, 2.1556e-04,\n 2.0664e-04, 4.3908e-04, 1.3881e-04, 2.0445e-04, 1.5586e-04, 2.4031e-04,\n 2.5286e-04, 1.1532e-04, 1.3704e-04, 1.1610e-04, 3.3791e-04, 5.3524e-04,\n 1.1613e-04, 9.4805e-05, 2.2403e-04, 4.3963e-04, 1.6784e-04, 1.2980e-04,\n 1.8700e-04, 3.2772e-04, 2.3381e-04, 2.8200e-04, 9.1068e-05, 9.4180e-05,\n 1.6409e-04, 2.0224e-04, 1.1777e-04, 5.6191e-04, 1.0232e-04, 1.8159e-04,\n 8.2442e-04, 2.3011e-04, 2.0221e-04, 2.1689e-04, 1.5180e-04, 9.1005e-05,\n 4.1518e-04, 8.5682e-05, 1.8731e-04, 1.9255e-04, 1.2342e-04, 1.1802e-04,\n 1.8490e-04, 1.7698e-04, 2.5610e-04, 1.5593e-04, 1.6246e-04, 4.1704e-14,\n 3.3638e-04, 1.1752e-04, 1.4979e-04, 1.2962e-04, 2.7245e-04, 1.7257e-04,\n 2.6100e-04, 1.4884e-04, 2.0717e-04, 2.8935e-04, 2.2661e-04, 2.3231e-04,\n 1.2638e-04, 3.8311e-04, 2.2398e-04, 1.3942e-04, 2.0793e-04, 1.3017e-04,\n 1.6951e-04, 8.1877e-05, 1.5563e-04, 2.7697e-04, 2.0451e-04, 2.9776e-04,\n 1.2850e-04, 3.2847e-04, 1.4348e-04, 2.6763e-04, 1.2602e-04, 1.9445e-04,\n 1.2331e-04, 1.4201e-04, 2.9846e-04, 6.3668e-05, 7.3772e-05, 2.7233e-04,\n 1.8707e-04, 2.2827e-04, 1.2969e-04, 1.5241e-04, 2.8059e-04, 2.9636e-03,\n 1.1785e-04, 1.8251e-04, 2.7755e-04, 3.1994e-04, 4.2195e-04, 1.4932e-04,\n 1.7661e-04, 9.6536e-05, 4.6122e-04, 1.7544e-04, 1.8438e-04, 1.3423e-04,\n 1.4743e-04, 1.8639e-04, 1.6978e-04, 1.1629e-04, 1.5388e-04, 3.0941e-04,\n 2.7578e-04, 2.1654e-04, 2.4604e-04, 3.6857e-04, 4.2893e-04, 2.9947e-04,\n 2.9144e-04, 2.2757e-04, 2.0075e-04, 3.5572e-04, 1.4786e-04, 1.9069e-04,\n 2.0165e-04, 1.8510e-04, 2.3456e-04, 1.8505e-04, 2.6028e-04, 1.6058e-04,\n 9.8050e-05, 2.4741e-04, 2.3014e-04, 4.1747e-04, 4.2200e-04, 1.1573e-04,\n 4.5253e-04, 1.2022e-04, 1.2512e-04, 1.7210e-04, 1.7028e-04, 1.6534e-04,\n 1.5705e-04, 3.1308e-04, 1.4817e-04, 1.0154e-04, 3.0508e-04, 2.4667e-04,\n 1.4802e-04, 2.3371e-04, 2.9173e-04, 1.3450e-04, 1.3088e-04, 2.0726e-04,\n 1.6058e-04, 1.8309e-04, 2.2074e-04, 3.6293e-04, 1.1707e-04, 1.7181e-04,\n 3.3485e-04, 2.3822e-04, 6.8805e-04, 1.6865e-04, 3.2178e-04, 8.4551e-05,\n 8.7243e-05, 1.9359e-04, 2.3620e-04, 2.2515e-04, 3.0645e-04, 9.5266e-05,\n 4.3776e-04, 1.1796e-04, 1.4306e-04, 1.3123e-04, 3.5357e-04, 2.3120e-04,\n 1.8795e-04, 1.6196e-04, 3.5050e-04, 1.5998e-04, 3.2702e-04, 1.7491e-04,\n 3.9147e-04, 1.8281e-04, 2.2416e-04, 1.9583e-04, 1.0761e-04, 7.1712e-04,\n 1.8512e-04, 1.6976e-04], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(18770.)",
22
+ "exp_avg": "tensor([-1.5321e-03, 2.0652e-03, 1.4511e-03, -1.0985e-03, -9.2322e-04,\n -3.2942e-03, -1.0019e-03, 1.1438e-03, 9.9846e-04, 5.6266e-04,\n 1.6348e-03, 3.5713e-04, 9.0898e-04, -3.6450e-03, -4.3980e-04,\n -3.5171e-03, -9.4008e-04, -4.3730e-03, 1.4733e-03, -6.7553e-04,\n 8.8403e-04, -7.4801e-04, 2.1726e-03, -1.0914e-03, 4.3583e-04,\n 2.3191e-03, -2.5121e-03, 1.6585e-04, -2.2398e-03, -9.8163e-04,\n -8.2726e-04, -4.6388e-04, -1.9678e-05, 1.4887e-03, -1.4079e-03,\n 2.3169e-04, 2.9284e-04, -1.2270e-03, -2.4439e-03, -5.8229e-04,\n -1.5848e-04, -5.4282e-04, -5.7555e-04, -7.9408e-04, -1.8222e-03,\n 5.0332e-03, -1.2083e-03, 9.1552e-04, -2.1715e-03, -8.7925e-05,\n 1.2113e-04, -2.4737e-03, 2.6157e-03, 1.7799e-03, -3.9576e-04,\n -1.3975e-03, -4.5035e-03, -1.6568e-03, -5.5566e-04, -2.2527e-03,\n 7.1018e-05, 1.9291e-03, -6.2902e-04, -1.5504e-03, -2.1446e-03,\n -6.7010e-04, 1.1722e-03, -2.0190e-03, -1.2848e-03, 1.7233e-03,\n 7.9941e-04, -2.0065e-03, -1.0146e-03, -1.3252e-03, -8.5731e-04,\n -5.2800e-04, -2.6964e-03, 6.3303e-04, -4.6121e-03, 8.2677e-04,\n 9.7141e-04, 1.5836e-03, -1.0927e-04, -2.1023e-03, -1.0035e-03,\n 7.5785e-04, 2.4098e-04, 8.5002e-04, 2.0042e-03, 1.9845e-03,\n -2.0677e-04, 1.9300e-03, 1.9206e-03, 3.6188e-03, 2.8134e-03,\n -3.0773e-04, -2.7461e-03, 1.5815e-03, -3.0038e-03, 1.2011e-03,\n 5.2815e-04, -7.4731e-04, 6.8472e-04, 6.5174e-04, 1.6770e-03,\n 1.8471e-03, 9.8306e-04, 8.0443e-04, -2.0953e-03, -2.4229e-03,\n 4.8076e-03, -1.4191e-03, -2.5364e-03, 8.6464e-04, 8.0550e-05,\n -5.7995e-04, -1.9048e-03, -1.2092e-03, 3.5385e-05, -3.7914e-03,\n -1.7623e-03, 3.2168e-03, -4.5696e-03, -8.4615e-04, 8.4859e-04,\n 7.6275e-04, 1.1310e-03, -1.8050e-03, 1.5309e-03, 5.5161e-06,\n 7.5714e-04, -9.5088e-04, 4.8279e-03, 1.1126e-03, 6.2979e-05,\n -6.0731e-04, 2.8402e-03, -2.3379e-03, -1.8404e-03, -1.4025e-03,\n 2.1719e-03, -1.3534e-04, 7.4375e-04, -2.7687e-03, 6.2131e-04,\n 2.2479e-03, -2.8707e-03, 7.7993e-04, 1.5169e-03, -5.6373e-05,\n -5.5903e-04, 1.3302e-03, 1.8610e-03, 1.7397e-03, -1.6596e-03,\n 3.0503e-04, 5.9881e-04, 1.1858e-03, 1.1063e-03, 9.0926e-04,\n 2.2738e-03, 8.5930e-04, -4.6474e-03, -4.4081e-03, 1.3913e-03,\n 3.7923e-04, -5.2004e-03, 2.1386e-03, -2.5987e-03, -2.3490e-03,\n -3.8686e-03, 8.1316e-04, -5.4149e-06, -1.4072e-03, -2.6771e-04,\n -7.5729e-04, 1.3526e-03, 2.9569e-03, -1.5585e-04, 1.8528e-03,\n -7.4159e-05, 3.4002e-03, -2.4947e-03, -3.2683e-03, -2.9375e-04,\n -2.1813e-03, -2.0295e-03, 2.0527e-04, 1.1894e-03, -2.4007e-03,\n 5.6052e-45, 3.9575e-04, 7.9128e-04, 5.2548e-04, -2.5042e-03,\n 3.9425e-03, -1.3929e-04, -3.9690e-04, 2.5585e-03, -1.6721e-03,\n -9.2283e-04, -8.5500e-04, 2.9592e-03, 2.9501e-04, 6.7839e-06,\n -4.0505e-03, 2.8915e-03, -4.4123e-03, -1.3431e-03, 1.8434e-04,\n 2.3953e-03, 2.2493e-05, 1.0772e-03, 1.2089e-04, 1.3860e-04,\n 4.1707e-03, -1.6863e-03, -7.5921e-04, 3.7958e-03, -6.1559e-05,\n -2.0740e-03, -2.3049e-03, -2.5924e-03, -1.8913e-03, 1.6001e-03,\n -1.9274e-04, -5.7292e-04, -2.9460e-04, 4.8079e-03, 3.3833e-03,\n 1.6929e-03, 1.8954e-03, -1.5418e-03, 1.3333e-03, -3.3745e-03,\n 1.0217e-03, 7.4153e-04, 3.0756e-04, 6.1032e-05, 1.9585e-03,\n 2.4753e-05, 5.7405e-04, 3.1248e-03, 2.7997e-03, 1.9916e-03,\n 4.8760e-04, 4.1660e-04, 8.2834e-04, -9.4382e-04, -1.3132e-03,\n 1.2497e-03, -1.0424e-03, -1.5856e-03, 1.4062e-03, 1.9510e-03,\n -3.9381e-04, -2.6845e-03, -5.0890e-03, 9.8861e-04, 4.0427e-03,\n -4.3481e-04, 1.9572e-03, 1.1890e-03, 4.3208e-05, -1.4428e-03,\n -8.3726e-04, -1.1634e-03, -1.8308e-04, -2.6465e-04, 1.8911e-03,\n -4.8420e-04, -9.5306e-04, 7.7886e-04, 2.0380e-03, -7.5093e-04,\n 9.9847e-04, -2.4634e-03, 1.3508e-03, 1.1210e-03, 2.4225e-03,\n 7.0794e-04, 2.7403e-03, 6.1209e-04, 3.4889e-03, -8.5780e-04,\n 1.6534e-03, 1.2266e-03, 6.8736e-04, 5.6052e-45, -5.6102e-03,\n 2.7526e-04, 1.5000e-03, -1.9118e-03, 2.1213e-04, 2.0491e-03,\n 5.6052e-45, 1.0513e-03, 3.6124e-03, -2.1093e-03, 1.4732e-03,\n 2.4026e-03, -1.9619e-03, 1.5619e-03, -5.3470e-04, 3.3852e-04,\n 1.3195e-04, 1.0463e-03, -8.9703e-04, -2.4244e-04, -6.9266e-04,\n -1.5749e-03, 1.9196e-03, 4.9866e-03, -2.0803e-03, -2.6445e-04,\n -1.8204e-03, -9.2556e-04, -1.4159e-03, 2.6608e-04, 1.2168e-03,\n 3.4341e-04, 8.7127e-04, -3.0664e-04, -4.0703e-03, 2.5549e-05,\n -3.0628e-03, 8.4321e-04, -1.0880e-03, 1.0331e-03, -3.6014e-03,\n 1.2227e-03, 5.4159e-04, 3.7619e-04, 2.9312e-04, 2.4761e-03,\n -1.8016e-03, 1.3130e-03, -1.6597e-03, -3.2925e-03, -6.4085e-04,\n -1.0191e-03, 1.4884e-03, 1.1239e-03, -2.3779e-03, -4.1419e-03,\n 1.4298e-03, 2.3712e-03, 3.9838e-04, 1.1760e-03, 5.9805e-05,\n -1.2195e-03, 5.5225e-03, 9.2116e-04, 1.4262e-03, 4.4252e-03,\n -4.3315e-04, -4.4774e-03, -2.2116e-03, -5.9281e-04, 2.5078e-03,\n -5.2227e-04, 1.0486e-03, -3.1236e-03, -3.1726e-04, -2.8884e-03,\n -2.4053e-03, -2.4500e-04, -2.7088e-03, -5.4627e-03, -2.0492e-03,\n 3.1905e-03, 5.6052e-45, -3.6042e-04, 7.4600e-04, -2.0561e-03,\n -2.7578e-04, -2.7837e-03, -7.8594e-04, -2.4918e-03, -9.1401e-05,\n 1.0075e-03, 1.7770e-03, -1.0216e-03, -2.6371e-03, 3.9360e-04,\n 1.5801e-03, -2.1972e-03, 1.1967e-03, -1.2623e-03, -3.2417e-04,\n 2.7017e-04, 9.3517e-04, 3.3808e-03, -1.4029e-03, 4.2956e-04,\n 7.5754e-04, -9.7354e-04, -5.8982e-03, 6.6510e-04, 4.8257e-03,\n 1.9673e-03, 5.7954e-04, -8.3651e-04, 3.8636e-03, 1.4368e-03,\n -6.1115e-04, 2.4381e-03, 6.4397e-04, -7.2200e-04, 4.4622e-03,\n -1.4083e-03, 2.3789e-04, -1.1192e-04, 1.5416e-03, -7.5622e-04,\n -1.2796e-03, -6.2882e-04, 1.3791e-03, 3.1411e-03, -8.9556e-04,\n 4.4840e-04, -1.0227e-03, -3.3539e-03, 1.1275e-03, 7.5168e-04,\n 7.6800e-04, 1.7529e-03, -2.7293e-03, -1.3135e-04, 1.1305e-03,\n -1.3941e-03, 3.5061e-03, 1.5410e-03, -2.8763e-04, -1.0703e-03,\n -5.6698e-04, -3.4867e-04, 1.1204e-04, -5.5054e-04, 1.5456e-03,\n 3.0123e-05, 1.5092e-04, -3.0448e-04, 1.4027e-03, 8.8405e-04,\n 1.2620e-04, 2.1865e-03, 3.3641e-03, 2.2200e-03, -1.8386e-04,\n 2.2703e-03, 2.9595e-03, 1.6343e-03, 4.8360e-03, 2.1905e-03,\n 1.8023e-03, 3.1618e-03, -2.3541e-04, 4.3434e-04, -5.2037e-04,\n 2.7763e-03, 2.1240e-03, -1.5086e-03, -4.5326e-04, 1.8460e-04,\n -2.3771e-03, 4.3870e-04, -6.7030e-05, -2.0051e-03, 1.0903e-03,\n 3.1688e-03, -1.7610e-03, 2.0241e-03, 1.9663e-03, 1.6865e-03,\n 2.1586e-03, 1.5928e-03, 9.5159e-04, -2.9609e-04, -1.6060e-03,\n 6.9696e-04, -1.7037e-03, -1.9234e-03, 8.7024e-04, -2.5207e-03,\n 3.6734e-04, 2.6730e-04, -2.7773e-03, -3.1197e-03, -5.4989e-04,\n -2.3627e-03, 1.4769e-03, 2.4651e-03, -6.5563e-04, 4.6831e-04,\n -1.0043e-03, -8.9794e-04, -5.9342e-04, 6.0091e-04, 1.3664e-03,\n -3.4834e-03, 2.1444e-03, 1.5902e-03, -2.9838e-04, -5.6167e-04,\n -6.9101e-04, -2.4171e-03, -2.3906e-03, -3.5404e-03, 2.2507e-04,\n 3.7119e-03, -2.0360e-04], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([5.6799e-05, 6.8115e-05, 7.3785e-05, 5.8792e-05, 4.4456e-05, 9.6406e-05,\n 7.9839e-05, 3.6406e-05, 3.7770e-05, 4.0235e-05, 7.6724e-05, 6.0326e-05,\n 3.8376e-05, 7.0198e-05, 8.9208e-05, 7.8711e-05, 4.5018e-05, 1.4134e-04,\n 1.3908e-05, 6.2504e-05, 4.2196e-05, 6.0258e-05, 6.8293e-05, 4.8595e-05,\n 4.3859e-05, 3.7663e-05, 1.0830e-04, 5.6690e-05, 5.6629e-05, 3.6153e-05,\n 5.0065e-05, 7.3656e-05, 2.9242e-05, 1.2236e-04, 5.5214e-05, 4.0944e-05,\n 3.4055e-05, 6.5661e-05, 9.8250e-05, 5.9541e-05, 6.3798e-05, 1.5647e-04,\n 4.0364e-05, 3.9537e-05, 7.2481e-05, 8.8490e-05, 6.6649e-05, 2.9056e-05,\n 3.8438e-05, 4.0741e-05, 4.1973e-05, 6.6347e-05, 1.2315e-04, 5.0065e-05,\n 7.7950e-05, 4.0502e-05, 5.5952e-05, 3.3867e-05, 5.3219e-05, 5.5172e-05,\n 3.0743e-05, 2.7283e-05, 4.2626e-05, 6.7167e-05, 9.7029e-05, 8.3688e-05,\n 6.2457e-05, 7.7735e-05, 5.2003e-05, 5.4101e-05, 3.8798e-05, 5.2117e-05,\n 4.1388e-05, 1.2767e-04, 5.0100e-05, 4.0797e-05, 1.8781e-04, 4.5134e-05,\n 5.5199e-05, 5.2637e-05, 5.2759e-05, 3.5082e-05, 4.0388e-05, 7.0740e-05,\n 5.6685e-05, 5.9538e-05, 1.1004e-04, 2.7817e-05, 8.1466e-05, 7.5377e-05,\n 5.5639e-05, 4.8055e-05, 1.9903e-04, 1.4358e-04, 4.3317e-05, 5.2222e-05,\n 4.0230e-05, 6.4280e-05, 7.9574e-05, 5.6277e-05, 5.7151e-05, 4.5363e-05,\n 3.4749e-05, 7.3895e-05, 3.2788e-05, 4.7086e-05, 4.1494e-05, 3.0969e-05,\n 4.3398e-05, 3.2292e-05, 1.2677e-04, 3.2656e-05, 1.4985e-04, 4.0719e-05,\n 3.3715e-05, 2.1435e-05, 8.4781e-05, 1.0442e-04, 4.1275e-05, 6.7097e-05,\n 5.0256e-05, 3.8970e-05, 8.2034e-05, 9.3491e-05, 5.8420e-05, 5.9314e-05,\n 3.4745e-05, 8.0400e-05, 7.5840e-05, 6.6544e-05, 5.0349e-05, 1.1639e-04,\n 6.4914e-05, 3.7973e-05, 1.0806e-04, 5.6508e-05, 3.7139e-05, 2.9084e-05,\n 1.4502e-04, 6.1057e-05, 3.5032e-05, 4.5549e-05, 4.9786e-05, 1.9438e-04,\n 5.1173e-05, 7.1489e-05, 5.8921e-05, 3.2000e-05, 1.1852e-04, 1.2332e-04,\n 4.3993e-05, 5.2849e-05, 3.5591e-05, 5.8382e-05, 5.0412e-05, 6.8546e-05,\n 3.8639e-05, 3.2223e-05, 7.3996e-05, 4.8977e-05, 5.5599e-05, 5.3059e-05,\n 9.2045e-05, 4.6922e-05, 6.5806e-05, 6.1184e-05, 1.0908e-04, 3.1924e-05,\n 6.2930e-05, 1.5501e-04, 6.3155e-05, 6.4373e-05, 3.0921e-05, 4.4312e-05,\n 4.7977e-05, 3.8644e-05, 3.6852e-05, 4.0617e-05, 3.6731e-05, 8.8087e-05,\n 4.3229e-05, 7.3635e-05, 1.2906e-04, 4.4490e-05, 2.0576e-05, 7.2461e-05,\n 7.5172e-05, 4.6972e-05, 8.1481e-05, 5.0361e-05, 1.4028e-14, 5.0109e-05,\n 2.7611e-05, 2.3686e-05, 5.3675e-05, 4.6773e-05, 3.2110e-05, 3.1243e-05,\n 4.0919e-05, 4.5802e-05, 4.3036e-05, 4.7965e-05, 5.1465e-05, 3.0845e-05,\n 3.3111e-05, 1.1438e-04, 4.4246e-05, 3.6937e-05, 4.2218e-05, 4.0685e-05,\n 7.4055e-05, 6.0278e-05, 5.3075e-05, 4.8547e-05, 2.1446e-05, 8.3255e-05,\n 4.3468e-05, 3.0268e-05, 6.0165e-05, 5.7839e-05, 6.6655e-05, 1.0269e-04,\n 3.6582e-05, 4.4568e-05, 3.1708e-05, 4.0369e-05, 5.0673e-05, 2.9921e-05,\n 1.0411e-04, 8.0705e-05, 8.0090e-05, 5.5657e-05, 6.3484e-05, 5.3461e-05,\n 4.6865e-05, 6.1453e-05, 3.8126e-05, 4.7460e-05, 5.6240e-05, 4.8257e-05,\n 2.7225e-05, 4.1902e-05, 3.9849e-05, 7.4350e-05, 2.4949e-05, 6.2526e-05,\n 6.7995e-05, 5.2340e-05, 3.8151e-05, 5.7271e-05, 7.2368e-05, 7.2198e-05,\n 3.2672e-05, 3.4092e-05, 2.9871e-05, 4.5207e-05, 4.2413e-05, 6.1409e-05,\n 5.5642e-05, 9.4374e-05, 7.0129e-05, 3.6096e-05, 5.6673e-05, 2.8539e-05,\n 6.0786e-05, 8.1111e-05, 1.5362e-04, 3.7848e-05, 3.2976e-05, 3.2383e-05,\n 3.5770e-05, 5.1501e-05, 3.0617e-05, 8.7588e-05, 3.6144e-05, 6.9274e-05,\n 1.0921e-04, 3.2471e-05, 5.7919e-05, 4.8857e-05, 4.0862e-05, 5.0607e-05,\n 5.8161e-05, 5.0376e-05, 5.3678e-05, 7.2427e-05, 5.4319e-05, 7.4329e-05,\n 1.6871e-15, 1.0742e-04, 4.5730e-05, 3.2292e-05, 8.2981e-05, 6.1619e-05,\n 6.8196e-05, 8.8749e-15, 5.7355e-05, 9.9490e-05, 5.5289e-05, 1.6285e-04,\n 7.6623e-05, 6.4209e-05, 5.9618e-05, 3.0369e-05, 7.3956e-05, 5.8732e-05,\n 4.5289e-05, 4.7128e-05, 8.5579e-05, 3.9301e-05, 5.0012e-05, 9.9191e-05,\n 1.1479e-04, 3.0621e-05, 3.8323e-05, 7.1047e-05, 3.3354e-05, 5.5888e-05,\n 1.4776e-05, 4.6553e-05, 5.1717e-05, 9.6542e-05, 3.4440e-05, 7.0886e-05,\n 6.6444e-05, 1.4592e-04, 4.7598e-05, 5.5787e-05, 3.1834e-05, 4.7229e-05,\n 6.4547e-05, 5.7717e-05, 3.6146e-05, 2.4035e-05, 1.0809e-04, 2.7758e-04,\n 1.5516e-05, 3.3693e-05, 4.7008e-05, 1.4043e-04, 3.7118e-05, 4.2839e-05,\n 5.6169e-05, 5.0013e-05, 6.4412e-05, 7.7441e-05, 3.2462e-05, 3.7143e-05,\n 5.0515e-05, 6.2613e-05, 2.7612e-05, 1.3388e-04, 3.1066e-05, 5.7317e-05,\n 1.4713e-04, 6.2114e-05, 4.5880e-05, 7.3816e-05, 4.2464e-05, 2.9035e-05,\n 1.3762e-04, 3.6119e-05, 4.3087e-05, 3.4579e-05, 3.9492e-05, 5.0620e-05,\n 3.1708e-05, 4.9304e-05, 9.4826e-05, 3.9958e-05, 5.2583e-05, 2.0198e-15,\n 5.5974e-05, 2.6777e-05, 4.3579e-05, 4.1493e-05, 1.0041e-04, 5.3734e-05,\n 7.5165e-05, 3.9716e-05, 6.3376e-05, 7.9115e-05, 6.8059e-05, 6.0934e-05,\n 4.6289e-05, 1.0643e-04, 4.9061e-05, 2.8489e-05, 5.7712e-05, 3.5224e-05,\n 4.3288e-05, 2.7222e-05, 4.9979e-05, 7.2246e-05, 6.2573e-05, 6.7472e-05,\n 4.2017e-05, 8.0190e-05, 4.0690e-05, 7.2796e-05, 4.0899e-05, 4.6637e-05,\n 3.5415e-05, 3.7376e-05, 5.3672e-05, 1.6461e-05, 2.0135e-05, 6.6818e-05,\n 5.8948e-05, 6.5207e-05, 3.3421e-05, 6.3631e-05, 6.1079e-05, 5.6895e-04,\n 3.4855e-05, 4.2037e-05, 7.7045e-05, 7.8350e-05, 1.4313e-04, 4.3986e-05,\n 3.8669e-05, 3.6453e-05, 9.2108e-05, 7.4916e-05, 4.8979e-05, 6.0462e-05,\n 4.6281e-05, 5.9751e-05, 5.4369e-05, 3.0898e-05, 4.5229e-05, 1.0035e-04,\n 7.8532e-05, 6.6043e-05, 6.0915e-05, 7.4728e-05, 1.0312e-04, 8.1829e-05,\n 7.8227e-05, 6.9826e-05, 4.6071e-05, 1.0113e-04, 5.8890e-05, 6.7126e-05,\n 3.6007e-05, 5.0144e-05, 5.9397e-05, 3.7487e-05, 8.5176e-05, 5.8006e-05,\n 4.3172e-05, 7.2336e-05, 4.8600e-05, 1.0675e-04, 1.5391e-04, 3.8015e-05,\n 6.9805e-05, 2.6759e-05, 4.2420e-05, 4.7442e-05, 6.3957e-05, 4.1693e-05,\n 5.2164e-05, 6.6833e-05, 2.6772e-05, 3.1201e-05, 5.8914e-05, 6.2393e-05,\n 4.2430e-05, 7.4929e-05, 8.5697e-05, 3.5508e-05, 3.4683e-05, 5.7202e-05,\n 6.7495e-05, 4.6745e-05, 5.4629e-05, 8.2366e-05, 3.6758e-05, 6.5686e-05,\n 6.6090e-05, 5.2103e-05, 6.8119e-05, 4.6447e-05, 1.0343e-04, 3.4331e-05,\n 2.0775e-05, 4.7958e-05, 5.4560e-05, 3.7167e-05, 6.5622e-05, 3.3848e-05,\n 1.3498e-04, 3.7742e-05, 4.8769e-05, 3.6410e-05, 9.2083e-05, 5.2360e-05,\n 5.0906e-05, 5.2304e-05, 6.7465e-05, 4.8713e-05, 6.1592e-05, 2.9004e-05,\n 8.3448e-05, 4.8248e-05, 5.7551e-05, 3.7971e-05, 4.6549e-05, 5.1351e-05,\n 5.3090e-05, 5.1553e-05], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(18770.)",
27
+ "exp_avg": "tensor([[ 1.4859e-05, -7.1145e-07, -2.5788e-06, ..., 9.4721e-07,\n -4.6220e-06, -8.2243e-06],\n [-5.5397e-06, 2.8778e-06, 1.0151e-05, ..., 1.1461e-05,\n -6.1212e-06, -1.4126e-05],\n [-1.1341e-05, 7.0675e-06, -4.2546e-06, ..., -7.8893e-06,\n 1.3332e-05, 8.6468e-08],\n ...,\n [ 1.3198e-05, 3.8076e-05, 5.9495e-07, ..., 1.6620e-06,\n 6.2613e-06, -4.2322e-06],\n [ 8.8065e-06, -1.6231e-05, -9.8003e-06, ..., -2.6918e-06,\n 1.1530e-05, 5.2474e-06],\n [-1.3391e-06, 1.2101e-05, -3.4356e-06, ..., -6.5187e-06,\n -7.8361e-06, -6.2296e-06]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[8.7149e-10, 2.3571e-09, 5.8825e-10, ..., 5.4168e-10, 8.5290e-10,\n 1.4345e-09],\n [1.2179e-09, 1.5843e-09, 2.6557e-09, ..., 1.1383e-09, 2.5801e-09,\n 2.3357e-09],\n [1.4325e-09, 2.1056e-09, 2.4359e-09, ..., 1.0070e-09, 2.1563e-09,\n 2.1783e-09],\n ...,\n [1.8006e-09, 3.5653e-09, 1.7852e-09, ..., 7.0127e-10, 3.9491e-09,\n 2.5950e-09],\n [1.0566e-09, 3.9298e-09, 2.6403e-09, ..., 9.7863e-10, 1.9456e-09,\n 2.3064e-09],\n [3.3926e-09, 2.8506e-09, 1.8769e-09, ..., 8.9874e-10, 1.9499e-09,\n 2.7968e-09]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(18770.)",
32
+ "exp_avg": "tensor([[ 1.1463e-05, 8.6235e-07, -4.7868e-06, ..., 3.9872e-06,\n 1.5058e-06, 6.3083e-07],\n [-6.8725e-06, 1.3532e-05, 8.8116e-06, ..., 7.2247e-06,\n -2.5471e-05, -1.9116e-06],\n [-3.2090e-06, 4.6997e-06, -1.0896e-05, ..., -8.5589e-06,\n 1.2628e-06, -9.2848e-07],\n ...,\n [-1.8998e-06, -1.0929e-05, 4.9636e-06, ..., 3.7205e-06,\n 2.4972e-05, 1.0999e-05],\n [ 1.3324e-06, 2.1772e-05, -1.1627e-05, ..., -2.4648e-07,\n 6.3569e-06, -7.9189e-06],\n [-8.1993e-06, 2.2917e-05, 2.0612e-06, ..., -1.7892e-06,\n 1.3441e-05, 1.8959e-07]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[4.9685e-10, 1.2471e-09, 5.6612e-10, ..., 2.5780e-10, 4.2546e-10,\n 1.0345e-09],\n [1.2401e-09, 2.9683e-09, 9.6686e-10, ..., 7.9107e-10, 2.3807e-09,\n 1.6582e-09],\n [1.0102e-09, 1.4224e-09, 1.3380e-09, ..., 8.4208e-10, 1.8765e-09,\n 1.8448e-09],\n ...,\n [1.2482e-09, 1.3141e-09, 2.3472e-09, ..., 4.7794e-10, 4.2125e-09,\n 1.5583e-09],\n [1.6246e-09, 2.3957e-09, 2.8515e-09, ..., 1.0192e-09, 8.9429e-10,\n 1.3557e-09],\n [8.9632e-10, 2.9988e-09, 1.1533e-09, ..., 5.7656e-10, 8.0032e-10,\n 1.9894e-09]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(18770.)",
37
+ "exp_avg": "tensor([ 0.0001, -0.0001], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([3.9042e-06, 3.9042e-06], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 0.005000500000000001,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.01,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 0.005000500000000001,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.01,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 0.005000500000000001,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.01,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 0.0025005,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 10,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 5,
137
+ "base_lrs": [
138
+ 0.01,
139
+ 0.01,
140
+ 0.01,
141
+ 0.005
142
+ ],
143
+ "last_epoch": 5,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 0.005000500000000001,
149
+ 0.005000500000000001,
150
+ 0.005000500000000001,
151
+ 0.0025005
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 65.65466666666667,
156
+ "best_epoch": 4,
157
+ "scale_accuracies": {
158
+ "256": 64.958,
159
+ "512": 65.59266666666667
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3,
166
+ 4,
167
+ 5
168
+ ],
169
+ "train_loss": [
170
+ 5.311051666323785,
171
+ 4.462767010682684,
172
+ 4.340839946911445,
173
+ 4.262519323832187,
174
+ 4.204208532545754
175
+ ],
176
+ "train_acc": [
177
+ 54.91727464101089,
178
+ 60.04988680892759,
179
+ 61.02839572566782,
180
+ 61.696614622970046,
181
+ 62.27501957199959
182
+ ],
183
+ "val_acc": [
184
+ 63.041333333333334,
185
+ 64.17333333333333,
186
+ 64.75866666666667,
187
+ 65.36133333333333,
188
+ 65.65466666666667
189
+ ],
190
+ "scale_accs": {
191
+ "256": [
192
+ 62.11666666666667,
193
+ 63.38733333333333,
194
+ 63.992666666666665,
195
+ 64.614,
196
+ 64.958
197
+ ],
198
+ "512": [
199
+ 62.967333333333336,
200
+ 64.19266666666667,
201
+ 64.73066666666666,
202
+ 65.34666666666666,
203
+ 65.59266666666667
204
+ ]
205
+ },
206
+ "lr": [
207
+ 0.00975530705321762,
208
+ 0.00904518046337755,
209
+ 0.00793913236883622,
210
+ 0.00654543046337755,
211
+ 0.005000500000000001
212
+ ]
213
+ }
214
+ },
215
+ "train_config": {
216
+ "name": "david_training",
217
+ "run_id": "20251012_231445",
218
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
219
+ "model_variant": [
220
+ "clip_vit_b16",
221
+ "clip_vit_laion_b32",
222
+ "clip_vit_b32"
223
+ ],
224
+ "num_classes": 1000,
225
+ "preset": "small_fast",
226
+ "custom_config_path": null,
227
+ "num_classes_override": null,
228
+ "use_belly_override": null,
229
+ "belly_expand_override": null,
230
+ "progressive_training_override": true,
231
+ "scale_warmup_epochs_override": {
232
+ "256": 0,
233
+ "512": 0
234
+ },
235
+ "num_epochs": 10,
236
+ "batch_size": 1024,
237
+ "learning_rate": 0.01,
238
+ "weight_decay": 1e-05,
239
+ "warmup_epochs": 3,
240
+ "use_rose_loss": true,
241
+ "rose_initial_weight": 0.2,
242
+ "rose_max_weight": 0.6,
243
+ "rose_weight_schedule": "adaptive",
244
+ "use_cayley_loss": false,
245
+ "cayley_weight": 0.01,
246
+ "scale_loss_balance": null,
247
+ "use_mixed_precision": false,
248
+ "gradient_clip": 5.0,
249
+ "scheduler_type": "cosine_restarts",
250
+ "min_lr": 1e-06,
251
+ "freeze_strategy": "never",
252
+ "freeze_threshold": 90.0,
253
+ "unfreeze_on_plateau": true,
254
+ "patience": 10,
255
+ "track_gradients": true,
256
+ "gradient_scale_threshold": 1e-05,
257
+ "gradient_scale_multiplier": 10.0,
258
+ "log_interval": 50,
259
+ "val_interval": 1,
260
+ "save_interval": 5,
261
+ "log_fusion_weights": true,
262
+ "log_loss_components": true,
263
+ "save_format": "safetensors",
264
+ "hf_repo": "AbstractPhil/david-shared-space",
265
+ "upload_to_hub": true,
266
+ "base_dir": "./david_training",
267
+ "num_workers": 10,
268
+ "pin_memory": true,
269
+ "prefetch_factor": 4,
270
+ "persistent_workers": true
271
+ }
272
+ }