Update best_model_acc66.52_metadata.json - Run 20251012_235237
Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc66.52_metadata.json
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 9,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(37540.)",
|
| 7 |
+
"exp_avg": "tensor([[ 7.6576e-06, -6.9571e-06, -2.7473e-05, ..., 7.9680e-05,\n 3.3979e-05, 5.1305e-05],\n [-5.0585e-05, 1.7962e-05, -3.7185e-05, ..., -1.2985e-04,\n 4.3566e-06, 7.6309e-06],\n [-2.8614e-05, 2.5399e-06, 3.9196e-05, ..., 3.7820e-05,\n 2.4588e-05, 1.4347e-05],\n ...,\n [ 1.3798e-05, 8.4456e-05, -6.9370e-07, ..., 5.8636e-05,\n -1.0476e-04, -7.7922e-05],\n [ 3.1067e-06, 1.9564e-05, 2.8571e-07, ..., 6.0912e-05,\n -2.9881e-05, 8.0718e-06],\n [ 5.3865e-06, 3.5067e-05, 2.4116e-05, ..., -2.3890e-05,\n -1.9414e-06, -1.2044e-05]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[1.2872e-07, 1.2258e-07, 5.7439e-08, ..., 6.3248e-08, 3.7421e-08,\n 2.8073e-08],\n [5.4002e-08, 1.6963e-07, 8.2916e-08, ..., 5.9740e-08, 2.5010e-08,\n 2.6429e-08],\n [2.4165e-08, 4.6023e-08, 2.9054e-08, ..., 5.5025e-08, 1.3227e-08,\n 1.7383e-08],\n ...,\n [5.0721e-08, 3.9234e-07, 4.7091e-08, ..., 7.4618e-08, 2.4082e-08,\n 3.6960e-08],\n [8.6590e-08, 1.6055e-07, 6.2575e-08, ..., 5.7677e-08, 2.9861e-08,\n 3.1624e-08],\n [4.8063e-09, 2.0990e-08, 1.0977e-08, ..., 4.3689e-09, 2.2217e-09,\n 4.1434e-09]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(37540.)",
|
| 12 |
+
"exp_avg": "tensor([ 2.0828e-03, -1.3818e-03, 8.4175e-04, -6.2577e-04, -1.2947e-03,\n 1.5775e-03, 1.5341e-03, -8.2825e-04, 1.8124e-03, 2.1929e-03,\n -6.1118e-03, -4.1619e-03, -4.4314e-03, -7.3333e-04, 1.3859e-03,\n 1.0612e-03, -2.0317e-03, 8.4101e-04, -2.7274e-03, 1.3828e-03,\n -9.1939e-04, 7.1183e-04, -1.0517e-03, -1.1868e-03, 2.0095e-03,\n -4.1077e-04, 3.9684e-03, -3.9769e-03, 1.1516e-03, 6.3854e-04,\n 4.5332e-04, 2.8640e-04, 1.3753e-04, 1.9701e-03, -3.3911e-04,\n -2.7360e-04, -2.8604e-03, 1.4674e-03, -1.4383e-03, 1.1805e-03,\n -3.4330e-03, -1.2544e-03, -4.5889e-05, -2.2365e-03, -2.6021e-05,\n 1.9396e-03, 1.0070e-03, 2.3618e-04, -5.3268e-04, 3.9703e-04,\n -2.3997e-03, -8.6844e-04, 8.9027e-05, -3.9681e-03, 4.0754e-03,\n 1.6797e-03, -9.2983e-04, 3.4796e-03, 8.5755e-04, 1.8915e-04,\n -5.0879e-03, 1.7293e-03, -9.8976e-05, 6.0543e-04, 6.6063e-04,\n -1.8174e-03, -1.2442e-03, 2.6525e-04, 1.2206e-03, 3.5056e-04,\n 2.3145e-03, -1.6461e-04, -1.4777e-03, 9.7918e-04, 3.5581e-04,\n 6.5640e-04, 4.6549e-04, -4.0793e-04, -2.9414e-04, 3.8608e-04,\n -1.9966e-04, -4.3793e-04, 1.0045e-03, -4.8426e-04, 5.3521e-05,\n -2.3052e-03, 1.4767e-03, 7.3011e-04, -8.3394e-04, -1.7215e-03,\n 4.0061e-04, 3.5872e-03, 1.3826e-03, 2.8955e-03, -3.0732e-04,\n -2.4054e-03, -7.7073e-05, 2.3782e-05, -9.0942e-04, 5.6911e-04,\n -1.9260e-03, -4.0992e-04, 8.8211e-04, 2.4451e-04, 1.5176e-03,\n -4.4504e-04, 8.3641e-04, -1.3563e-04, 7.1724e-04, 2.8308e-03,\n 3.2082e-03, -1.5316e-03, 1.1900e-03, -4.2741e-04, 2.6989e-04,\n -2.7190e-04, -3.4674e-03, 4.9413e-04, 1.0400e-03, -3.0033e-05,\n 9.3242e-04, 6.1488e-04, -3.8959e-03, -3.4540e-03, -9.6729e-04,\n -3.3049e-04, 9.7889e-04, -2.1902e-03, 1.3628e-03, 1.4654e-03,\n -5.2209e-03, 1.1012e-03, -4.5887e-03, 1.2484e-03, -2.7004e-03,\n -2.8681e-04, -1.4463e-03, -1.1449e-03, -9.3014e-05, -1.4716e-04,\n 1.3165e-04, -8.3069e-04, 8.4240e-04, 1.0779e-03, 2.8040e-03,\n 8.8335e-04, 7.8078e-04, 8.1659e-04, -1.9591e-03, 1.4092e-03,\n 2.0871e-03, 9.4042e-04, -1.2908e-03, -2.2922e-03, -1.7086e-03,\n -2.4621e-03, -3.7031e-03, -3.4892e-03, -8.5471e-04, 1.4778e-03,\n -3.4395e-03, -6.0359e-04, -2.3061e-03, -2.1914e-03, 2.2863e-04,\n 5.4481e-03, 7.1278e-04, 1.4031e-03, -6.7245e-04, 1.2933e-04,\n 3.2925e-03, 1.9162e-03, 2.8889e-04, 3.5266e-03, 1.6193e-03,\n 6.1240e-04, -1.9618e-03, 4.2723e-04, 7.0764e-04, -1.1101e-03,\n 4.8008e-04, 1.5577e-03, -5.8568e-04, 5.1556e-04, 1.1109e-03,\n 1.1113e-03, -3.9520e-03, 1.4696e-03, 7.5057e-04, -7.4523e-04,\n 2.0793e-04, 2.1303e-03, 4.6697e-04, 1.4304e-03, 8.6122e-04,\n 7.3995e-04, 5.2455e-04, 3.0909e-04, -3.6616e-04, 6.2833e-04,\n -1.3531e-03, -2.3259e-03, 6.7654e-04, -4.7576e-04, -2.5708e-04,\n 4.6576e-04, 3.6605e-04, 4.4406e-04, 4.8799e-04, -2.5066e-03,\n 1.3899e-04, 4.3924e-04, -1.1228e-03, 8.9172e-04, 8.6745e-04,\n 2.2086e-03, -1.6839e-03, -5.8684e-04, -7.4517e-04, -8.3195e-04,\n -2.9246e-04, -7.2848e-04, -1.0165e-03, -2.5217e-03, -3.6768e-04,\n 2.4808e-03, 2.0805e-03, -1.7514e-03, -1.4449e-04, 2.7338e-03,\n 1.3204e-03, 1.1672e-03, 1.6984e-03, 6.4572e-04, 1.9204e-03,\n 1.7828e-04, 2.7972e-03, 5.1969e-04, -1.9482e-03, -3.6402e-04,\n -9.6369e-04, -1.7706e-03, -5.1118e-04, 9.3309e-04, -3.4474e-03,\n -1.0113e-03, -7.9023e-04, 9.3176e-04, 6.5225e-04, 2.9523e-04,\n -2.8183e-03, -1.6664e-03, -2.2107e-04, 1.2650e-03, 1.6527e-03,\n 1.8641e-04, 1.6136e-03, 2.8795e-04, 5.9868e-04, 2.1921e-03,\n -2.6634e-04, 3.2861e-03, 9.2107e-04, 1.6345e-03, 1.2677e-03,\n 9.3148e-06, -8.9999e-04, -3.8243e-04, -7.9941e-04, -1.9659e-04,\n 5.1361e-04, 3.5258e-04, -2.6052e-04, 3.0516e-03, -4.1816e-04,\n 3.0755e-03, -1.8924e-03, -1.8077e-03, 1.2575e-03, 2.0539e-04,\n 1.2922e-03, 3.1493e-04, -1.3824e-03, -4.0586e-03, 1.9299e-03,\n -3.6527e-03, -9.7162e-04, 2.5632e-03, 3.6356e-04, 1.5055e-03,\n -1.1835e-03, -1.5456e-04, -2.0131e-03, 9.1751e-05, 1.1309e-03,\n -1.2788e-03, -3.2073e-03, 8.1501e-04, 6.2655e-03, 8.1703e-04,\n 1.9756e-03, 1.7181e-03, 1.3678e-03, 7.4011e-04, -9.7396e-04,\n 4.9104e-04, -1.5427e-03, 9.6301e-04, -1.8575e-03, -5.9130e-04,\n -1.2260e-03, -2.4176e-04, -1.4915e-04, -1.4156e-03, 3.9219e-04,\n -1.0586e-03, 6.4308e-04, -8.8937e-04, 1.7006e-03, -8.0248e-04,\n 2.9145e-03, -1.4652e-03, 1.1596e-03, -7.1254e-04, -6.5718e-05,\n 2.7462e-03, -9.9494e-04, 2.3007e-03, -5.8360e-04, 2.3505e-03,\n -2.0390e-03, -2.9013e-03, -1.9330e-04, 1.0796e-04, -1.0407e-03,\n -1.4175e-03, -1.1993e-03, -2.3483e-03, 2.8184e-03, 1.7901e-03,\n 9.5474e-04, 1.9350e-03, 2.5359e-03, -4.7810e-04, 1.9798e-03,\n -1.1991e-04, -2.7499e-03, 1.0760e-04, 5.4707e-04, 1.4022e-03,\n 8.4608e-04, -1.5302e-03, 1.0226e-03, -9.7653e-04, 1.7799e-03,\n -2.2068e-04, 5.8167e-04, -1.2402e-03, 1.1919e-03, -7.5970e-03,\n 1.9262e-03, -3.0209e-03, -2.3289e-03, -2.0373e-04, 2.6179e-05,\n 2.1588e-03, -2.1674e-04, -5.8041e-04, -7.9613e-04, -3.5368e-04,\n -1.9492e-04, 1.7312e-04, 1.0058e-03, 2.4837e-04, 3.2989e-03,\n 4.9424e-04, 4.2962e-05, -4.6112e-04, -6.4164e-04, 2.3049e-03,\n 1.3264e-03, 6.3839e-04, -1.8934e-03, -1.7395e-03, 7.3327e-04,\n -1.9281e-04, 6.0338e-06, 8.6310e-04, -2.4673e-03, 1.2883e-03,\n 8.2241e-04, 1.0306e-03, -9.6128e-04, -3.4643e-03, -2.0317e-03,\n 1.6700e-03, 7.7589e-04, -3.1568e-03, 1.3727e-03, 1.7698e-03,\n -3.5546e-03, 1.6904e-03, 1.8217e-04, -7.4159e-05, -9.2511e-04,\n 4.1851e-04, -1.0640e-03, -7.4788e-04, -1.4488e-03, 9.2512e-05,\n 2.2587e-04, 3.5288e-03, -2.9132e-03, -4.9767e-03, -2.2739e-03,\n -2.1567e-06, -1.9581e-03, -2.9829e-04, -2.9929e-03, -3.2346e-03,\n 5.0863e-04, -1.7549e-03, 2.0345e-03, 1.3449e-03, 2.0420e-03,\n -1.8154e-03, -3.3600e-03, -1.9756e-03, 1.1207e-03, -1.9644e-04,\n -1.6061e-03, 9.4379e-04, -1.6313e-03, 1.6141e-04, 4.3150e-04,\n -1.6898e-03, -1.5094e-03, 1.2210e-03, -2.3933e-03, 3.5683e-04,\n -1.0611e-03, 2.0193e-03, -3.0929e-04, -2.3582e-03, -5.4865e-04,\n -8.0780e-04, 6.2918e-04, 3.2679e-04, -9.9715e-05, 7.2321e-04,\n -1.1020e-03, 6.2546e-04, 3.9640e-04, -1.8043e-04, 3.6203e-04,\n 1.9012e-04, 2.7528e-03, -2.5694e-04, -1.2688e-03, 4.5258e-03,\n -5.5757e-04, -3.1280e-03, 1.3666e-03, 3.3265e-03, 3.7372e-03,\n 5.1235e-04, 1.8880e-03, 2.8227e-03, 7.9397e-04, 8.8246e-04,\n 6.1833e-04, 4.0185e-03, -2.3233e-03, 3.5111e-03, -2.3949e-03,\n -1.5921e-04, 1.0596e-03, 2.2493e-03, 9.8000e-05, -1.4094e-03,\n -3.6918e-04, 5.4164e-04, -7.7819e-04, 9.5695e-05, -7.9322e-04,\n -3.5190e-03, 3.5245e-04, 2.7021e-04, 3.6312e-03, 2.5377e-03,\n -1.2576e-03, 8.6967e-04, -1.3445e-03, 2.3327e-03, 4.6563e-04,\n 4.0375e-04, 2.3313e-03, -7.3763e-05, 1.6212e-03, 1.0962e-03,\n 2.3818e-04, -1.9685e-03, 1.6766e-03, -1.5333e-03, 2.8392e-04,\n 1.5734e-03, 6.5799e-04, -1.8383e-03, 5.8867e-04, 1.2631e-04,\n 1.0597e-03, -2.7452e-04], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([4.1533e-05, 4.0106e-05, 2.5756e-05, 2.8759e-05, 3.2301e-05, 6.5833e-05,\n 2.8265e-05, 5.5374e-05, 4.1935e-05, 4.2788e-05, 5.9959e-05, 6.5670e-05,\n 8.6217e-05, 2.4175e-05, 6.1062e-05, 4.7250e-05, 4.2808e-05, 3.9168e-05,\n 4.6981e-05, 5.1073e-05, 3.6213e-05, 2.8133e-05, 2.9469e-05, 5.9451e-05,\n 8.0680e-05, 3.1957e-05, 3.1025e-05, 6.3641e-05, 5.9739e-05, 4.0718e-05,\n 1.8221e-05, 3.0233e-05, 4.6621e-05, 3.7180e-05, 2.3444e-05, 4.4834e-05,\n 3.2331e-05, 2.6290e-05, 2.5726e-05, 2.4501e-05, 3.3787e-05, 2.4236e-05,\n 5.3484e-06, 6.7890e-05, 1.0125e-04, 3.2385e-05, 2.3719e-05, 2.5132e-05,\n 4.4920e-05, 7.1857e-05, 7.6576e-05, 2.5174e-05, 3.0711e-05, 3.9597e-05,\n 4.9977e-05, 3.7644e-05, 3.5502e-05, 9.7849e-05, 3.4117e-05, 3.2724e-05,\n 1.0564e-04, 2.5904e-05, 2.6079e-05, 3.0868e-05, 3.3190e-05, 2.7632e-05,\n 3.1698e-05, 2.9345e-05, 5.1058e-05, 3.9853e-05, 3.6045e-05, 3.2102e-05,\n 3.0493e-05, 3.3876e-05, 2.9935e-05, 3.4729e-05, 4.7083e-05, 3.8081e-05,\n 4.6957e-05, 3.4924e-05, 5.6772e-05, 3.6721e-05, 3.0593e-05, 2.1243e-05,\n 3.3301e-05, 4.0236e-05, 4.9993e-05, 2.9101e-05, 2.4955e-05, 4.3756e-05,\n 2.9178e-05, 6.5545e-05, 3.3100e-05, 3.5977e-05, 3.9343e-05, 4.3195e-05,\n 5.1027e-05, 4.0553e-05, 3.4759e-05, 2.0113e-05, 4.6093e-05, 6.7927e-05,\n 5.9611e-05, 3.3521e-05, 6.2302e-05, 5.6237e-05, 3.8513e-05, 8.4367e-05,\n 3.1969e-05, 4.1241e-05, 5.4297e-05, 5.1753e-05, 3.4767e-05, 4.9735e-05,\n 3.8052e-05, 4.3836e-05, 4.8570e-05, 4.6709e-05, 3.0541e-05, 3.8666e-05,\n 1.7085e-05, 4.8758e-05, 4.6079e-05, 6.0607e-05, 2.8746e-05, 4.2056e-05,\n 3.4779e-05, 4.2942e-05, 4.1739e-05, 7.4174e-05, 4.3782e-05, 3.6914e-05,\n 4.6066e-05, 3.3600e-05, 8.5718e-05, 3.4322e-05, 2.6886e-05, 3.7710e-05,\n 2.9527e-05, 4.6607e-05, 1.6752e-05, 2.9746e-05, 4.2277e-05, 8.6064e-05,\n 4.1203e-05, 3.9811e-05, 2.6085e-05, 5.5983e-05, 4.4363e-05, 8.4225e-05,\n 2.0317e-05, 4.5867e-05, 2.4020e-05, 6.6222e-05, 2.8388e-05, 3.3882e-05,\n 5.0765e-05, 3.4841e-05, 3.3394e-05, 2.6767e-05, 5.2567e-05, 1.3410e-05,\n 3.6314e-05, 6.8733e-05, 1.6766e-05, 5.5851e-05, 3.7195e-05, 1.9229e-05,\n 1.5981e-05, 4.1399e-05, 2.9559e-05, 4.9363e-05, 3.7845e-05, 3.2821e-05,\n 3.6524e-05, 2.4730e-05, 7.6940e-05, 2.2898e-05, 2.3153e-05, 4.0107e-05,\n 3.0541e-05, 2.7850e-05, 6.9661e-05, 3.8745e-05, 3.0138e-05, 3.5563e-05,\n 9.5519e-05, 2.1832e-05, 2.0531e-05, 1.5033e-05, 3.4751e-05, 4.6687e-05,\n 5.9150e-05, 5.3214e-05, 2.4045e-05, 1.8214e-05, 4.5739e-05, 2.5136e-05,\n 1.8412e-05, 2.1107e-05, 2.8301e-05, 4.3563e-05, 3.0194e-05, 2.0792e-05,\n 3.1863e-05, 3.7337e-05, 3.0960e-05, 3.0847e-05, 4.4928e-05, 1.9751e-05,\n 3.9260e-05, 5.6047e-05, 3.6553e-05, 2.5916e-05, 5.0772e-05, 1.5940e-05,\n 2.5506e-05, 2.3856e-05, 5.4651e-05, 5.3129e-05, 2.3111e-05, 1.7825e-05,\n 3.1220e-05, 1.6597e-05, 2.7899e-05, 5.5117e-05, 2.6588e-05, 1.9742e-05,\n 2.2492e-05, 5.1867e-05, 4.0791e-05, 2.1022e-05, 5.0644e-05, 3.5109e-05,\n 2.9478e-05, 3.4995e-05, 3.4606e-05, 5.9982e-06, 4.1946e-05, 1.1449e-04,\n 2.8412e-05, 6.6413e-05, 2.9238e-05, 5.2339e-05, 7.6674e-05, 5.1754e-05,\n 3.8525e-05, 4.5812e-05, 5.0357e-05, 3.2059e-05, 5.2623e-05, 2.6907e-05,\n 4.3430e-05, 4.6239e-05, 3.6272e-05, 3.4417e-05, 2.5971e-05, 4.7361e-05,\n 5.3919e-05, 2.8436e-05, 2.6047e-05, 6.5820e-05, 2.5870e-05, 4.9855e-05,\n 4.2009e-05, 2.5310e-05, 2.2690e-05, 3.3814e-05, 2.5800e-05, 3.1435e-05,\n 3.9853e-05, 3.6758e-05, 3.0130e-05, 3.0977e-05, 2.4337e-05, 5.0544e-05,\n 2.0844e-05, 6.0617e-05, 3.5190e-05, 2.5758e-05, 3.3746e-05, 2.4725e-05,\n 3.2547e-05, 4.5965e-05, 2.5059e-05, 5.5437e-05, 3.3471e-05, 3.2506e-05,\n 6.3523e-05, 4.8942e-05, 2.4767e-05, 3.2209e-05, 4.8878e-05, 2.6520e-05,\n 4.7398e-05, 2.3940e-05, 2.9282e-05, 5.5707e-05, 9.9697e-05, 3.1800e-05,\n 2.7124e-05, 3.1208e-05, 3.5041e-05, 2.8340e-05, 5.5141e-05, 2.3275e-05,\n 4.1985e-05, 3.5512e-05, 7.5819e-05, 3.9233e-05, 4.9741e-05, 5.3404e-05,\n 2.4130e-05, 2.4832e-05, 4.8956e-05, 2.9073e-05, 3.8615e-05, 1.6768e-05,\n 3.5446e-05, 4.5873e-05, 8.5762e-05, 3.3081e-05, 4.0840e-05, 4.1603e-05,\n 5.9463e-05, 9.7768e-05, 2.6678e-05, 5.0702e-05, 3.2602e-05, 3.0372e-05,\n 3.5253e-05, 5.1316e-05, 1.5487e-05, 2.4782e-05, 2.9727e-05, 3.3039e-05,\n 5.1503e-05, 2.6364e-05, 5.6619e-05, 4.5044e-05, 3.0662e-05, 3.0544e-05,\n 4.1495e-05, 2.4068e-05, 5.4455e-05, 5.0183e-05, 3.6978e-05, 2.4149e-05,\n 3.9488e-05, 4.3648e-05, 2.6877e-05, 3.8378e-05, 2.8558e-05, 2.9324e-05,\n 3.2517e-05, 2.5561e-05, 2.5484e-05, 3.1436e-05, 3.0725e-05, 2.6936e-05,\n 2.2451e-05, 8.4551e-05, 1.1465e-04, 3.0174e-05, 4.0916e-05, 2.9419e-05,\n 3.5588e-05, 4.6756e-05, 2.3208e-05, 3.1670e-05, 2.1140e-05, 3.4571e-05,\n 2.0963e-05, 4.0034e-05, 6.4976e-05, 3.2905e-05, 2.1562e-05, 1.5542e-05,\n 2.7826e-05, 3.2125e-05, 3.4984e-05, 4.0176e-05, 5.6534e-05, 3.3220e-05,\n 3.4938e-05, 1.0949e-04, 2.9952e-05, 4.9539e-05, 3.7387e-05, 3.8058e-05,\n 4.9342e-05, 5.4683e-05, 2.8112e-05, 3.1373e-05, 3.7454e-05, 2.3121e-05,\n 2.1541e-05, 5.7406e-05, 4.2257e-05, 6.9882e-05, 4.5626e-05, 2.8514e-05,\n 4.6789e-05, 3.4557e-05, 4.4934e-05, 3.2674e-05, 7.5436e-05, 1.7554e-05,\n 3.0581e-05, 4.6007e-05, 4.6645e-05, 4.9682e-05, 4.7196e-05, 4.7295e-05,\n 2.8695e-05, 2.3517e-05, 5.4798e-05, 2.1595e-05, 5.4607e-05, 6.5463e-05,\n 3.9874e-05, 2.9878e-05, 4.9702e-05, 4.1104e-05, 3.5839e-05, 3.5756e-05,\n 6.9484e-05, 3.9303e-05, 2.6938e-05, 3.6929e-05, 3.0146e-05, 1.8631e-05,\n 5.4071e-05, 4.0707e-05, 3.0393e-05, 2.9072e-05, 5.5333e-05, 4.0807e-05,\n 3.4518e-05, 3.6691e-05, 4.0549e-05, 6.1101e-05, 3.2392e-05, 1.1507e-04,\n 4.9610e-05, 3.6659e-05, 2.5292e-05, 3.5591e-05, 3.3644e-05, 3.5910e-05,\n 3.8795e-05, 3.6988e-05, 4.3633e-05, 6.5602e-05, 3.1722e-05, 2.0583e-05,\n 7.4752e-05, 3.9389e-05, 2.0539e-05, 4.4305e-05, 4.1567e-05, 3.0371e-05,\n 3.4780e-05, 3.2032e-05, 6.9404e-05, 2.8186e-05, 3.0391e-05, 3.5845e-05,\n 5.9941e-05, 4.7159e-05, 3.9158e-05, 6.7599e-05, 5.1807e-05, 3.8565e-05,\n 4.2487e-05, 4.4033e-05, 3.9015e-05, 3.3728e-05, 3.6343e-05, 2.6962e-05,\n 2.0360e-05, 3.4751e-05, 5.2868e-05, 3.7716e-05, 3.1294e-05, 3.5856e-05,\n 4.8819e-05, 3.5942e-05, 7.3747e-05, 3.5585e-05, 3.2374e-05, 3.7843e-05,\n 5.0826e-05, 2.6615e-05, 2.1271e-05, 3.1650e-05, 4.5108e-05, 2.4709e-05,\n 2.2627e-05, 2.9343e-05, 4.5309e-05, 2.8242e-05, 3.9530e-05, 2.9988e-05,\n 3.0453e-05, 2.3554e-05, 3.5145e-05, 3.8561e-05, 3.3230e-05, 4.1495e-05,\n 3.7800e-05, 3.3460e-06], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(37540.)",
|
| 17 |
+
"exp_avg": "tensor([ 7.4649e-03, -2.2890e-03, 7.3713e-04, -2.6668e-03, -1.7782e-03,\n 3.2867e-03, 3.7610e-03, -1.5295e-03, 4.2056e-03, 5.9008e-03,\n -1.4550e-02, -9.5067e-03, -8.3128e-03, -2.3622e-03, 3.1756e-03,\n 4.8431e-03, -4.3019e-03, 3.7095e-03, -6.1456e-03, 4.1464e-03,\n -3.2417e-03, 6.0059e-04, -3.4692e-03, -3.1158e-03, 3.8791e-03,\n -1.9091e-03, 7.4878e-03, -1.1021e-02, 3.8632e-03, 1.3431e-03,\n -3.1316e-04, -2.2505e-05, -3.6499e-04, 4.6260e-03, -1.4126e-03,\n -4.2867e-04, -5.3905e-03, 4.8128e-03, -2.4077e-03, 2.9254e-03,\n -5.8221e-03, -3.3263e-03, -5.6052e-45, -4.2937e-03, 1.5277e-03,\n 3.2784e-03, 3.1743e-03, 1.2532e-03, -1.2590e-03, 1.7650e-03,\n -5.9158e-03, -2.4862e-03, 6.4528e-04, -1.0419e-02, 7.0987e-03,\n 4.8987e-03, -3.6480e-03, 4.6049e-03, 1.7760e-03, 1.1808e-04,\n -6.8446e-03, 4.5104e-03, 3.5065e-04, 1.0068e-03, 1.5264e-03,\n -4.0397e-03, -4.9358e-03, -8.2552e-05, 3.8520e-03, 1.1459e-03,\n 6.4587e-03, -3.2807e-04, -4.5499e-03, 2.6242e-03, 9.9314e-04,\n 1.8341e-03, 1.5544e-03, -1.3476e-03, -1.6614e-03, 5.9325e-04,\n 2.3710e-05, -1.4086e-03, 1.9448e-03, -9.1688e-04, 8.0331e-04,\n -6.3740e-03, 4.1547e-03, 1.7651e-03, -5.4890e-04, -5.9268e-03,\n 3.0804e-03, 6.8277e-03, 4.1226e-03, 7.8936e-03, 5.9336e-04,\n -6.8160e-03, -1.8229e-04, -1.0038e-03, -3.4146e-03, 3.0196e-03,\n -5.9953e-03, -5.9250e-04, 1.6116e-03, 9.8158e-04, 2.9443e-03,\n -5.2498e-03, 1.1602e-03, 1.2592e-03, 1.4311e-03, 5.8937e-03,\n 4.9222e-03, -4.5075e-03, 2.9831e-03, -1.8879e-04, -8.7321e-04,\n 9.7161e-05, -8.2328e-03, 4.8747e-04, 1.7999e-03, -3.8103e-04,\n 2.8947e-03, 1.4488e-03, -1.4523e-02, -8.3484e-03, -3.2035e-03,\n -1.1726e-03, 1.9945e-03, -5.0180e-03, 5.4974e-03, 5.7269e-03,\n -1.1998e-02, 1.8008e-03, -1.4449e-02, 1.6424e-03, -5.2390e-03,\n -8.8288e-04, -4.0179e-03, -3.0165e-03, 6.8797e-04, -7.5495e-04,\n 7.0360e-04, -1.4082e-03, 2.9128e-03, 1.7204e-03, 7.5205e-03,\n 1.7921e-03, 7.5497e-04, 2.7211e-03, -4.2982e-03, 1.8491e-03,\n 6.6861e-03, 3.4969e-03, -3.9917e-03, -4.4501e-03, -2.8432e-03,\n -4.8521e-03, -1.1606e-02, -9.9542e-03, -2.9988e-03, 3.9442e-03,\n -8.9220e-03, -1.6605e-03, -6.3311e-03, -3.6241e-03, 9.6560e-04,\n 1.7516e-02, 1.0688e-03, 3.7229e-03, -1.5898e-03, 1.6649e-04,\n 6.8577e-03, 5.9544e-03, -6.3483e-04, 1.3994e-02, 4.2951e-03,\n 8.2202e-04, -2.8488e-03, 1.4229e-03, 1.9326e-03, -3.3680e-03,\n 1.5087e-03, 4.0375e-03, -3.7547e-03, 9.6281e-04, 3.9076e-03,\n 2.2189e-03, -1.2529e-02, 3.8442e-03, 3.0467e-03, -2.5458e-03,\n -1.1176e-04, 5.2551e-03, 7.0100e-04, 2.6889e-03, 2.0916e-03,\n 2.8937e-03, 1.9250e-03, 1.1226e-04, -7.5371e-04, 1.1696e-03,\n -4.0508e-03, -4.9236e-03, 1.6752e-03, -8.0655e-04, -4.7810e-04,\n 1.3928e-03, 9.6464e-04, 1.6115e-04, -2.1011e-04, -5.1708e-03,\n -9.4520e-04, -3.4425e-04, -1.2385e-03, 1.2181e-03, 2.5004e-03,\n 7.4780e-03, -3.4304e-03, -2.8553e-03, -2.7742e-03, -2.2793e-03,\n -7.0767e-05, -2.0817e-03, -1.9261e-03, -7.3563e-03, 8.4649e-04,\n 7.5372e-03, 4.1436e-03, -3.4698e-03, -2.1081e-04, 5.7087e-03,\n 3.5468e-03, 2.7736e-03, 8.3431e-03, 2.1968e-03, 3.1999e-03,\n 6.0133e-04, 7.3917e-03, 5.6052e-45, -2.6250e-03, 3.3028e-05,\n -2.8862e-03, -3.0909e-03, -1.2029e-03, 1.8254e-03, -4.0535e-03,\n -4.4526e-03, -1.6630e-03, 2.0274e-03, 1.0858e-03, -1.5406e-04,\n -6.9267e-03, -2.5352e-03, 1.1257e-03, 3.5321e-03, 2.5583e-03,\n 2.7697e-04, 6.2415e-03, 6.4367e-04, 1.4071e-03, 6.5206e-03,\n -1.0961e-03, 7.0297e-03, 2.1768e-03, 2.8894e-03, 1.9288e-03,\n 1.0170e-03, -3.9570e-03, -4.8761e-04, -1.2136e-03, -1.4745e-03,\n 1.4869e-03, 1.0981e-03, -1.1356e-04, 8.9313e-03, -2.3004e-03,\n 6.3780e-03, -7.0968e-03, -1.9443e-03, 3.2546e-03, 8.3202e-04,\n 3.3282e-03, -6.8290e-04, -2.1151e-03, -6.8894e-03, 4.0719e-03,\n -6.9789e-03, -3.5867e-03, 5.4274e-03, 2.0238e-04, 2.7656e-03,\n -1.7511e-03, 6.1903e-04, -4.7169e-03, 1.6468e-04, 2.2103e-03,\n -2.9756e-03, -8.0809e-03, 1.6310e-03, 1.3781e-02, 1.1300e-03,\n 4.9262e-03, 2.8040e-03, 4.4187e-03, 2.3435e-03, -2.3800e-03,\n 7.8491e-04, -2.8898e-03, 1.5530e-03, -5.2461e-03, -2.9994e-03,\n -1.6557e-03, -4.9141e-04, -1.1528e-03, -4.5324e-03, -3.7147e-05,\n -3.8163e-03, 1.5781e-03, -2.6086e-03, 4.0413e-03, -1.4725e-03,\n 8.2543e-03, -2.6835e-03, 5.3613e-03, -2.7845e-03, -2.4669e-03,\n 5.5232e-03, -3.2562e-03, 7.9872e-03, -2.0347e-03, 7.9808e-03,\n -5.7973e-03, -6.9855e-03, -5.2733e-04, 1.1907e-03, -2.8515e-03,\n -3.1499e-03, -2.2759e-03, -5.8295e-03, 5.7313e-03, 4.1718e-03,\n 1.8066e-03, 4.2800e-03, 6.2820e-03, -2.0862e-03, 4.1258e-03,\n 3.7645e-04, -6.4674e-03, 5.5836e-04, 1.4193e-03, 3.5659e-03,\n 3.6565e-03, -3.2535e-03, 3.2836e-03, -3.2310e-03, 5.1434e-03,\n -8.5923e-04, 2.9613e-03, -3.6289e-03, 2.3261e-03, -1.6570e-02,\n 4.5668e-03, -6.4301e-03, -4.4650e-03, 2.8457e-04, 7.4346e-04,\n 5.4833e-03, -1.9485e-04, -1.9068e-03, -9.4865e-04, -1.0307e-03,\n -4.5339e-04, -7.1010e-04, 3.5788e-03, 1.6604e-03, 6.4609e-03,\n 2.5523e-04, 5.2654e-04, -7.4579e-04, -2.2760e-03, 3.5103e-03,\n 3.3968e-03, 1.2289e-03, -4.9360e-03, -5.1750e-03, 2.8522e-03,\n -2.7954e-04, -8.0101e-05, 1.5885e-03, -6.7710e-03, 3.0846e-03,\n 1.3970e-03, 1.7450e-03, -2.7442e-03, -1.7551e-02, -2.4786e-03,\n 3.4674e-03, 1.8076e-03, -9.5090e-03, 3.9049e-03, 3.0114e-03,\n -7.5897e-03, 3.6686e-03, -4.1534e-04, -1.1686e-04, -2.4691e-03,\n 5.2076e-04, -5.3531e-03, -1.1709e-03, -5.2876e-03, -3.1503e-04,\n 4.9609e-04, 4.8561e-03, -9.5977e-03, -1.0757e-02, -6.6833e-03,\n 5.8973e-04, -5.8896e-03, -6.7124e-04, -6.2149e-03, -5.5602e-03,\n 1.8334e-03, -3.3171e-03, 7.3769e-03, 4.4380e-03, 7.5819e-03,\n -5.0180e-03, -5.6029e-03, -3.2764e-03, 3.2793e-03, -2.7749e-04,\n -4.1234e-03, 1.9722e-03, -2.5982e-03, -7.9489e-04, 6.0103e-04,\n -4.8193e-03, -3.1872e-03, 3.5170e-03, -6.0963e-03, 4.5359e-04,\n -3.3489e-03, 4.5549e-03, -1.4004e-03, -4.3066e-03, -8.3372e-04,\n -2.5633e-03, 2.2133e-03, 3.0108e-04, -5.5332e-04, 2.0992e-03,\n -3.5813e-03, 1.0945e-03, 1.1749e-03, -2.8987e-03, 3.3331e-04,\n -3.0554e-05, 5.5687e-03, -1.1002e-03, -2.5489e-03, 9.9434e-03,\n -1.0214e-03, -6.5141e-03, 3.4860e-03, 7.0261e-03, 7.2993e-03,\n 1.7877e-03, 3.9104e-03, 7.2812e-03, 2.0563e-03, -5.7194e-04,\n 8.5302e-04, 8.6980e-03, -5.4169e-03, 5.5474e-03, -5.3561e-03,\n -1.3982e-04, 1.6156e-03, 5.8646e-03, -1.8502e-03, -2.0457e-03,\n -1.3697e-03, 2.3099e-03, -2.9158e-03, 2.6460e-04, -9.7487e-04,\n -4.8210e-03, 6.4455e-04, 8.2595e-04, 7.2607e-03, 4.1884e-03,\n -2.7972e-03, 2.4236e-03, -3.4667e-03, 4.0032e-03, 1.6760e-03,\n 1.6164e-03, 4.9940e-03, -1.3016e-03, 2.8567e-03, 2.4677e-03,\n 1.2946e-03, -3.6434e-03, 4.6591e-03, -4.6671e-03, 1.0785e-03,\n 5.2191e-03, 2.0822e-03, -3.1548e-03, 9.3035e-04, 3.2081e-04,\n 1.8656e-03, -5.6052e-45], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([3.7293e-04, 2.6854e-04, 1.0015e-04, 1.9611e-04, 1.3656e-04, 3.5621e-04,\n 1.9428e-04, 3.2394e-04, 2.6959e-04, 3.5335e-04, 4.3115e-04, 4.2359e-04,\n 3.4890e-04, 2.1481e-04, 2.2201e-04, 2.1240e-04, 2.2709e-04, 2.1182e-04,\n 2.2622e-04, 2.8223e-04, 2.7564e-04, 1.0632e-04, 1.3320e-04, 3.5158e-04,\n 5.5205e-04, 2.3381e-04, 1.9736e-04, 4.0580e-04, 2.6226e-04, 2.8375e-04,\n 1.8656e-04, 2.5466e-04, 1.9754e-04, 1.8340e-04, 1.4828e-04, 1.3879e-04,\n 1.4109e-04, 1.9176e-04, 1.5522e-04, 1.5624e-04, 1.2768e-04, 9.9213e-05,\n 3.6912e-20, 2.2851e-04, 5.0545e-04, 1.6952e-04, 1.6947e-04, 7.3742e-05,\n 4.6774e-04, 3.2111e-04, 3.4921e-04, 1.3869e-04, 1.5144e-04, 3.0726e-04,\n 2.0041e-04, 2.1543e-04, 2.7966e-04, 1.3877e-04, 1.7369e-04, 1.0239e-04,\n 2.3043e-04, 2.0436e-04, 1.9929e-04, 1.4614e-04, 1.2607e-04, 1.4947e-04,\n 2.4891e-04, 1.3866e-04, 2.6220e-04, 1.4707e-04, 2.5642e-04, 1.0121e-04,\n 2.6187e-04, 3.0523e-04, 1.7176e-04, 2.2014e-04, 2.3437e-04, 1.9552e-04,\n 2.3330e-04, 2.7009e-04, 2.6450e-04, 1.5959e-04, 1.6454e-04, 9.8305e-05,\n 1.3991e-04, 3.4060e-04, 4.9761e-04, 1.8604e-04, 9.7503e-05, 5.9859e-04,\n 2.4735e-04, 1.9274e-04, 2.3356e-04, 2.9934e-04, 3.0529e-04, 2.7187e-04,\n 2.2983e-04, 2.2614e-04, 3.0673e-04, 3.6311e-04, 2.7783e-04, 2.9505e-04,\n 2.5487e-04, 2.1943e-04, 2.9146e-04, 4.6033e-04, 1.7064e-04, 5.0494e-04,\n 2.2828e-04, 1.8133e-04, 1.8503e-04, 3.6644e-04, 2.5758e-04, 1.7548e-04,\n 2.0940e-04, 1.2390e-04, 2.0585e-04, 2.6955e-04, 1.6469e-04, 3.1632e-04,\n 1.7965e-04, 3.1074e-04, 4.7064e-04, 4.8807e-04, 3.1553e-04, 4.1966e-04,\n 2.4882e-04, 3.4313e-04, 4.0198e-04, 4.8481e-04, 2.7658e-04, 2.6039e-04,\n 3.4076e-04, 2.1873e-04, 4.5001e-04, 1.4050e-04, 2.0433e-04, 3.2944e-04,\n 1.9171e-04, 1.2529e-04, 1.7923e-04, 2.2379e-04, 2.7221e-04, 2.6585e-04,\n 1.9881e-04, 1.8211e-04, 1.0586e-04, 4.0554e-04, 2.4542e-04, 4.0636e-04,\n 1.0693e-04, 2.9948e-04, 1.4329e-04, 3.1782e-04, 1.0959e-04, 1.0638e-04,\n 4.7841e-04, 2.8489e-04, 2.6295e-04, 1.5382e-04, 3.8428e-04, 1.5145e-04,\n 4.0113e-04, 2.1039e-04, 1.0543e-04, 4.1555e-04, 2.7643e-04, 1.0833e-04,\n 8.1793e-05, 2.0580e-04, 1.0219e-04, 2.7655e-04, 2.3650e-04, 4.3707e-04,\n 2.7115e-04, 1.5169e-04, 2.7722e-04, 1.1447e-04, 1.6495e-04, 2.4604e-04,\n 2.0045e-04, 2.1404e-04, 6.4368e-04, 2.5072e-04, 5.6158e-04, 1.5058e-04,\n 7.0613e-04, 1.4218e-04, 1.8852e-04, 9.7072e-05, 1.8223e-04, 2.8137e-04,\n 1.6282e-04, 1.1411e-04, 1.4130e-04, 9.8994e-05, 2.4832e-04, 1.3530e-04,\n 8.4111e-05, 1.0909e-04, 1.5020e-04, 2.2285e-04, 1.6103e-04, 1.1537e-04,\n 6.3116e-05, 2.0463e-04, 1.5678e-04, 1.5755e-04, 3.1968e-04, 1.0703e-04,\n 2.9691e-04, 2.7389e-04, 3.4778e-05, 1.3234e-04, 2.8315e-04, 2.2393e-04,\n 1.0204e-04, 5.1430e-04, 2.7320e-04, 4.2407e-04, 1.6151e-04, 1.5865e-04,\n 2.1054e-04, 1.6601e-04, 2.4074e-04, 3.3254e-04, 1.1533e-04, 1.1592e-04,\n 8.1911e-05, 1.9760e-04, 1.9484e-04, 1.3387e-04, 6.9392e-04, 2.8958e-04,\n 1.0451e-04, 1.6907e-04, 1.8501e-04, 5.2357e-21, 1.6565e-04, 4.9505e-04,\n 1.2480e-04, 3.3325e-04, 4.2574e-04, 3.2424e-04, 1.9422e-04, 3.9685e-04,\n 1.3965e-04, 1.7018e-04, 3.0725e-04, 1.6598e-04, 3.0815e-04, 9.5429e-05,\n 4.9229e-04, 2.8664e-04, 1.4082e-04, 2.4042e-04, 1.9196e-04, 1.8179e-04,\n 1.2325e-04, 1.8156e-04, 2.3057e-04, 2.9329e-04, 1.7691e-04, 1.8327e-04,\n 2.0645e-04, 2.1203e-04, 2.1843e-04, 2.2670e-04, 2.1618e-04, 3.5827e-04,\n 3.7218e-04, 2.9734e-04, 6.2343e-05, 2.3273e-04, 1.3469e-04, 1.6466e-04,\n 1.9015e-04, 2.6616e-04, 2.4834e-04, 2.3746e-04, 2.3094e-04, 8.2261e-05,\n 1.5534e-04, 1.5285e-04, 1.6982e-04, 2.5078e-04, 3.0029e-04, 1.2612e-04,\n 5.7379e-04, 2.1974e-04, 1.3152e-04, 8.6080e-05, 1.6857e-04, 1.0397e-04,\n 2.3701e-04, 1.4910e-04, 2.2309e-04, 1.7470e-04, 5.0919e-04, 1.8685e-04,\n 1.3483e-04, 1.3954e-04, 3.6262e-04, 1.7442e-04, 2.1740e-04, 2.0291e-04,\n 2.0243e-04, 1.7679e-04, 8.0322e-04, 4.9150e-04, 4.9054e-04, 2.1939e-04,\n 1.6693e-04, 1.8758e-04, 1.9006e-04, 3.1727e-04, 1.7461e-04, 1.0326e-04,\n 2.1024e-04, 2.6830e-04, 6.8879e-04, 4.2311e-04, 2.2741e-04, 3.1328e-04,\n 4.1121e-04, 4.6997e-04, 2.2135e-04, 3.8410e-04, 1.5752e-04, 2.7992e-04,\n 4.3653e-04, 4.0695e-04, 7.4752e-05, 1.4982e-04, 1.8390e-04, 1.8128e-04,\n 8.7231e-05, 1.5013e-04, 2.5014e-04, 2.5574e-04, 1.7501e-04, 1.4308e-04,\n 2.1793e-04, 3.5583e-04, 2.0032e-04, 2.5637e-04, 1.9930e-04, 1.1388e-04,\n 1.6777e-04, 2.7371e-04, 2.3059e-04, 2.1852e-04, 2.2483e-04, 2.3662e-04,\n 2.6301e-04, 1.6299e-04, 2.2135e-04, 1.4548e-04, 1.4615e-04, 1.5034e-04,\n 1.1881e-04, 5.1101e-04, 4.6100e-04, 3.6164e-04, 2.0926e-04, 1.9930e-04,\n 7.7382e-05, 3.6586e-04, 1.0793e-04, 1.9110e-04, 1.1869e-04, 2.3691e-04,\n 3.1011e-04, 2.4526e-04, 1.7028e-04, 1.9371e-04, 1.3228e-04, 1.4016e-04,\n 2.6821e-04, 1.0347e-04, 2.4896e-04, 1.8030e-04, 3.4258e-04, 2.7466e-04,\n 2.2618e-04, 4.7828e-04, 1.6696e-04, 1.7953e-04, 2.5795e-04, 1.5322e-04,\n 1.6336e-04, 1.5822e-04, 1.7044e-04, 5.0364e-04, 1.5863e-04, 1.0205e-04,\n 1.6777e-04, 5.0493e-04, 2.2519e-04, 3.3477e-04, 2.1314e-04, 1.1443e-04,\n 4.6340e-04, 2.0505e-04, 2.3724e-04, 1.4120e-04, 6.2408e-04, 4.7897e-05,\n 3.0895e-04, 2.7851e-04, 3.0960e-04, 6.3664e-05, 3.4556e-04, 2.1116e-04,\n 2.1070e-04, 5.4246e-04, 4.6556e-04, 2.9468e-04, 2.8569e-04, 1.7059e-04,\n 2.2271e-04, 1.7695e-04, 4.4793e-04, 2.6640e-04, 5.9552e-04, 2.2076e-04,\n 2.2906e-04, 2.2036e-04, 3.0174e-04, 1.8041e-04, 1.8556e-04, 1.1332e-04,\n 1.8052e-04, 2.3965e-04, 3.1561e-04, 2.3224e-04, 2.7864e-04, 4.2261e-04,\n 2.3397e-04, 1.1920e-04, 2.9602e-04, 2.3204e-04, 1.7849e-04, 7.7175e-04,\n 1.9551e-04, 2.9520e-04, 3.0498e-04, 1.3052e-04, 2.3315e-04, 2.9927e-04,\n 2.8925e-04, 1.5309e-04, 1.9149e-04, 5.6916e-04, 2.9023e-04, 1.1442e-04,\n 3.2372e-04, 1.8358e-04, 1.1258e-04, 1.8443e-04, 2.1428e-04, 1.4148e-04,\n 2.5272e-04, 1.5668e-04, 1.9730e-04, 1.4082e-04, 1.2254e-04, 2.3561e-04,\n 4.1821e-04, 3.1596e-04, 2.7039e-04, 4.4715e-04, 2.4297e-04, 1.6631e-04,\n 2.5322e-04, 2.4010e-04, 2.2768e-04, 2.1600e-04, 3.9219e-04, 1.4547e-04,\n 8.8413e-05, 5.1652e-04, 2.7122e-04, 1.7136e-04, 2.0348e-04, 1.4006e-04,\n 1.2433e-04, 2.1051e-04, 4.1444e-04, 1.0535e-04, 1.7161e-04, 1.2728e-04,\n 2.9315e-04, 1.6218e-04, 1.7991e-04, 2.6256e-04, 2.2903e-04, 2.0051e-04,\n 1.2214e-04, 1.9033e-04, 2.0591e-04, 1.5912e-04, 1.8645e-04, 2.2093e-04,\n 1.2378e-04, 2.7316e-04, 1.2965e-04, 1.6620e-04, 8.2328e-05, 1.1669e-04,\n 1.0123e-04, 4.0359e-17], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(37540.)",
|
| 22 |
+
"exp_avg": "tensor([ 2.9654e-03, -1.2500e-03, 6.7177e-04, -7.3940e-04, -7.5982e-04,\n 1.7752e-03, 1.7449e-03, -9.7975e-04, 2.0891e-03, 2.5210e-03,\n -6.2406e-03, -4.5456e-03, -4.5029e-03, -9.4336e-04, 1.5244e-03,\n 9.3528e-04, -2.0536e-03, 1.8397e-03, -2.7390e-03, 1.6341e-03,\n -1.2807e-03, 5.8737e-04, -1.0764e-03, -7.0466e-04, 2.3656e-03,\n -4.7143e-04, 3.5914e-03, -4.5747e-03, 2.0775e-03, 7.9802e-04,\n 2.2991e-04, 7.7350e-05, 5.3702e-05, 2.0075e-03, -3.2593e-04,\n -2.4859e-04, -2.7950e-03, 1.5548e-03, -1.4625e-03, 1.5546e-03,\n -2.8444e-03, -1.1736e-03, 5.6052e-45, -2.3945e-03, 1.5461e-04,\n 1.7329e-03, 1.4595e-03, 3.7237e-04, -7.9500e-04, 5.8678e-04,\n -3.1426e-03, -1.0700e-03, 4.0724e-04, -4.4253e-03, 4.6633e-03,\n 1.8100e-03, -1.6056e-03, 2.6262e-03, 8.3645e-04, 3.6053e-04,\n -4.0663e-03, 2.0107e-03, -9.1820e-05, 7.4408e-04, 4.1069e-04,\n -2.2284e-03, -1.5178e-03, 3.6400e-04, 1.2675e-03, 4.1979e-04,\n 3.0079e-03, -6.9357e-04, -1.5372e-03, 1.0079e-03, 4.9819e-04,\n 6.4724e-04, 4.6711e-04, -2.4747e-04, -4.7180e-04, 4.5655e-04,\n -9.9134e-05, -5.4234e-04, 9.4731e-04, -4.1930e-04, 1.5845e-04,\n -2.7185e-03, 1.9793e-03, 1.0406e-03, -5.8334e-04, -1.9598e-03,\n 8.4691e-04, 3.2130e-03, 2.1996e-03, 3.5836e-03, 8.0343e-05,\n -2.8376e-03, 4.8090e-04, -1.3876e-04, -1.1314e-03, 8.3842e-04,\n -2.0312e-03, 1.8013e-05, 1.0487e-03, 3.3339e-04, 1.3333e-03,\n -1.9556e-03, 6.6768e-04, 6.5251e-04, 7.7625e-04, 2.6057e-03,\n 3.1880e-03, -1.6231e-03, 1.4419e-03, -1.7826e-04, 6.1498e-04,\n -1.1591e-04, -4.0203e-03, 2.7931e-04, 8.3157e-04, -4.6821e-04,\n 1.2144e-03, 9.9537e-04, -5.9598e-03, -4.5128e-03, -1.0309e-03,\n -6.3044e-04, 1.1471e-03, -2.1561e-03, 2.3701e-03, 2.7188e-03,\n -5.5347e-03, 6.8213e-04, -6.1888e-03, 1.1841e-03, -3.4733e-03,\n -2.5422e-04, -1.8167e-03, -1.6240e-03, -7.3808e-05, -2.5507e-04,\n 3.7851e-04, -9.0110e-04, 1.0675e-03, 8.5626e-04, 2.9827e-03,\n 1.2385e-03, 5.6850e-04, 1.2467e-03, -1.6063e-03, 9.3219e-04,\n 2.5024e-03, 1.4797e-03, -1.7998e-03, -2.6286e-03, -1.8197e-03,\n -2.7006e-03, -4.9250e-03, -3.7959e-03, -8.8528e-04, 1.9198e-03,\n -3.7084e-03, -7.8276e-04, -2.8491e-03, -1.8720e-03, 4.1061e-04,\n 7.4819e-03, 7.5865e-04, 1.6720e-03, -6.4197e-04, 7.7431e-05,\n 3.3539e-03, 2.8197e-03, 1.5837e-05, 6.4552e-03, 2.0400e-03,\n 5.4777e-04, -1.8164e-03, 5.1335e-04, 8.0442e-04, -2.1017e-03,\n 6.8817e-04, 1.7089e-03, -1.1314e-03, 5.8812e-04, 2.0976e-03,\n 1.1133e-03, -4.8370e-03, 1.9565e-03, 1.3459e-03, -9.5166e-04,\n 1.5946e-04, 2.1475e-03, 5.8281e-04, 1.1419e-03, 8.7814e-04,\n 9.3691e-04, 8.8815e-04, 1.0692e-05, -3.1061e-04, 6.0746e-04,\n -1.9952e-03, -2.2172e-03, 8.9900e-04, -4.3706e-04, -6.2925e-05,\n 6.1800e-04, 4.5387e-04, 1.4552e-04, 5.8213e-04, -2.8191e-03,\n -1.2993e-04, 1.0536e-04, -8.5295e-04, 8.7465e-04, 8.4404e-04,\n 2.7448e-03, -1.5758e-03, -8.8245e-04, -1.1985e-03, -9.5383e-04,\n -1.1411e-04, -4.3344e-04, -9.2405e-04, -3.1591e-03, 2.9990e-04,\n 3.6385e-03, 2.2705e-03, -1.6511e-03, 1.5358e-05, 2.6517e-03,\n 1.3148e-03, 1.3778e-03, 3.0068e-03, 1.1435e-03, 2.0588e-03,\n 2.4534e-04, 3.1631e-03, 5.6052e-45, -1.8618e-03, -1.2365e-04,\n -7.9451e-04, -1.6664e-03, -6.7638e-04, 9.1296e-04, -3.2864e-03,\n -1.4733e-03, -7.0765e-04, 1.3328e-03, 4.2814e-04, 2.4013e-04,\n -3.6688e-03, -1.3003e-03, 1.4309e-04, 1.6047e-03, 1.0999e-03,\n 8.7496e-05, 3.0094e-03, 4.0373e-04, 7.2174e-04, 2.9557e-03,\n -3.8180e-04, 3.2846e-03, 8.2861e-04, 1.8516e-03, 9.8604e-04,\n 1.6333e-04, -1.1304e-03, -1.9388e-04, -4.3127e-04, -4.6903e-04,\n 8.7816e-04, 4.7973e-04, -1.2177e-04, 3.9604e-03, -5.2164e-04,\n 2.7361e-03, -3.6314e-03, -1.4577e-03, 1.4691e-03, 1.9558e-04,\n 2.0281e-03, 2.7971e-04, -1.2300e-03, -4.4149e-03, 2.2651e-03,\n -3.1918e-03, -1.3312e-03, 2.4841e-03, -1.3331e-04, 1.6157e-03,\n -9.2056e-04, -8.4416e-06, -2.0884e-03, -1.2038e-04, 1.0389e-03,\n -1.6404e-03, -4.1794e-03, 8.7600e-04, 7.7662e-03, 8.1827e-04,\n 2.2383e-03, 1.4495e-03, 1.7044e-03, 7.4430e-04, -1.2990e-03,\n 4.9508e-04, -1.4980e-03, 1.1855e-03, -2.9426e-03, -7.9709e-04,\n -1.2182e-03, -1.3378e-04, -1.8476e-05, -1.5872e-03, 1.8693e-04,\n -1.6129e-03, 8.8492e-04, -8.9101e-04, 1.5152e-03, -6.0780e-04,\n 3.6233e-03, -2.1165e-03, 1.6553e-03, -1.1075e-03, -6.5599e-04,\n 2.8118e-03, -1.1916e-03, 3.8338e-03, -1.0555e-03, 2.7312e-03,\n -3.3549e-03, -2.9604e-03, -2.4345e-04, 1.2171e-04, -1.4098e-03,\n -1.7763e-03, -1.0314e-03, -2.7714e-03, 2.8293e-03, 1.9046e-03,\n 1.0453e-03, 1.9885e-03, 2.6466e-03, -1.0093e-03, 2.3123e-03,\n -1.9669e-04, -3.9782e-03, 4.5685e-05, 6.9760e-04, 1.6071e-03,\n 1.2931e-03, -1.4338e-03, 1.4521e-03, -1.0253e-03, 2.4127e-03,\n -2.5139e-04, 1.1290e-03, -1.1508e-03, 1.0417e-03, -7.3506e-03,\n 2.1850e-03, -3.0121e-03, -1.7393e-03, -3.0228e-05, -6.1725e-05,\n 2.8277e-03, -2.3106e-04, -7.0824e-04, -6.3497e-04, -2.9783e-04,\n -2.4159e-04, 2.1017e-04, 1.4699e-03, 3.9408e-04, 3.1292e-03,\n 2.7912e-04, 1.5218e-04, -5.4368e-04, -8.5459e-04, 2.0251e-03,\n 1.7655e-03, 8.8089e-04, -2.2715e-03, -1.6771e-03, 1.3919e-03,\n -1.2667e-04, 8.8277e-05, 1.0224e-03, -2.7333e-03, 1.4267e-03,\n 8.7945e-04, 6.5099e-04, -1.2855e-03, -6.2671e-03, -1.3479e-03,\n 1.6314e-03, 6.0690e-04, -4.1661e-03, 1.8716e-03, 1.6917e-03,\n -3.3893e-03, 1.3876e-03, 2.0458e-04, -1.5693e-04, -1.0990e-03,\n 2.3409e-04, -2.0406e-03, -6.8134e-04, -2.7946e-03, 1.7381e-04,\n 2.2607e-04, 2.8298e-03, -4.3562e-03, -5.9843e-03, -2.4289e-03,\n -7.1835e-05, -2.5509e-03, -1.2985e-04, -3.5420e-03, -2.7050e-03,\n 5.2543e-04, -1.5168e-03, 2.7723e-03, 1.8018e-03, 3.0947e-03,\n -2.0555e-03, -2.6250e-03, -1.8687e-03, 1.6658e-03, -3.6694e-05,\n -1.7492e-03, 1.0643e-03, -1.1985e-03, 1.1827e-04, 9.3341e-04,\n -2.2989e-03, -1.4310e-03, 1.3751e-03, -2.8277e-03, 1.6538e-04,\n -9.3622e-04, 2.0760e-03, -3.5874e-04, -2.7065e-03, -6.0228e-04,\n -8.3831e-04, 8.0319e-04, 1.2792e-04, -2.3449e-04, 6.3809e-04,\n -1.3619e-03, 5.5142e-04, 2.2855e-04, -1.3032e-03, 2.0694e-04,\n 1.0944e-04, 3.1462e-03, -5.6520e-04, -1.1738e-03, 4.8348e-03,\n -4.2515e-04, -3.3240e-03, 1.4806e-03, 3.6558e-03, 2.9508e-03,\n 6.4904e-04, 1.8356e-03, 3.4222e-03, 9.6802e-04, 2.5404e-04,\n 3.0471e-04, 4.9416e-03, -2.5328e-03, 3.8956e-03, -3.2399e-03,\n -2.4516e-05, 1.2093e-03, 2.6821e-03, -4.7145e-04, -1.0480e-03,\n -6.4059e-04, 8.9256e-04, -1.0067e-03, 2.2841e-04, -6.9117e-04,\n -3.8462e-03, 2.7132e-04, 3.8398e-04, 3.8502e-03, 2.4841e-03,\n -1.1611e-03, 9.9348e-04, -1.5761e-03, 2.4114e-03, 4.6870e-04,\n 5.8476e-04, 2.7244e-03, -3.5715e-04, 1.5854e-03, 1.2101e-03,\n 4.1145e-04, -1.7686e-03, 1.7500e-03, -2.0469e-03, 3.9759e-04,\n 2.3834e-03, 9.0653e-04, -1.8492e-03, 7.4888e-04, 1.4753e-04,\n 9.3660e-04, 5.6052e-45], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([6.2729e-05, 4.4333e-05, 3.1077e-05, 3.4052e-05, 2.7279e-05, 7.2108e-05,\n 3.8686e-05, 8.2701e-05, 5.4433e-05, 6.5642e-05, 8.3970e-05, 7.9616e-05,\n 9.6740e-05, 4.2349e-05, 6.2963e-05, 5.7047e-05, 4.5995e-05, 5.3231e-05,\n 4.5730e-05, 5.9657e-05, 5.4596e-05, 2.4782e-05, 2.8948e-05, 6.8218e-05,\n 1.3977e-04, 6.1583e-05, 4.0711e-05, 7.6685e-05, 5.8043e-05, 5.4759e-05,\n 3.2369e-05, 3.8383e-05, 6.0910e-05, 4.6578e-05, 3.0328e-05, 4.4264e-05,\n 3.4206e-05, 3.1130e-05, 3.3753e-05, 2.8503e-05, 2.9537e-05, 2.8725e-05,\n 6.6022e-22, 7.8334e-05, 1.2460e-04, 3.2401e-05, 4.3032e-05, 1.6217e-05,\n 7.0750e-05, 1.1121e-04, 1.1187e-04, 2.7177e-05, 3.4294e-05, 5.0045e-05,\n 6.9973e-05, 3.9644e-05, 5.0079e-05, 5.4853e-05, 4.0343e-05, 2.6795e-05,\n 8.1389e-05, 3.5616e-05, 4.4176e-05, 3.4971e-05, 2.9663e-05, 3.2874e-05,\n 4.5097e-05, 3.0239e-05, 4.9672e-05, 3.8631e-05, 5.0455e-05, 4.4442e-05,\n 3.8013e-05, 4.3970e-05, 4.1420e-05, 4.3356e-05, 3.8724e-05, 4.0132e-05,\n 5.4084e-05, 6.1869e-05, 5.5120e-05, 4.0818e-05, 3.6132e-05, 2.3822e-05,\n 3.3047e-05, 6.4927e-05, 7.5361e-05, 3.7264e-05, 2.2884e-05, 7.0265e-05,\n 3.9706e-05, 5.0605e-05, 6.5522e-05, 5.2267e-05, 5.5707e-05, 4.5817e-05,\n 6.5833e-05, 4.6313e-05, 5.9784e-05, 4.1419e-05, 4.2074e-05, 5.9689e-05,\n 6.8973e-05, 4.7344e-05, 6.7258e-05, 1.0344e-04, 4.2145e-05, 1.4070e-04,\n 4.2273e-05, 3.7388e-05, 6.1570e-05, 6.4687e-05, 4.9012e-05, 4.7745e-05,\n 5.2351e-05, 3.6395e-05, 5.1096e-05, 7.3089e-05, 2.9041e-05, 5.0453e-05,\n 2.6555e-05, 6.1478e-05, 6.4660e-05, 1.1795e-04, 4.6701e-05, 6.9060e-05,\n 4.6775e-05, 6.5362e-05, 7.5110e-05, 1.3395e-04, 6.1148e-05, 3.7917e-05,\n 6.4346e-05, 4.0914e-05, 1.4192e-04, 3.5670e-05, 5.3494e-05, 5.1718e-05,\n 3.6596e-05, 3.0622e-05, 3.1177e-05, 4.5800e-05, 4.5308e-05, 8.4482e-05,\n 4.1655e-05, 4.5842e-05, 2.5966e-05, 8.2680e-05, 4.9106e-05, 1.1336e-04,\n 2.3651e-05, 7.1250e-05, 2.9957e-05, 7.9464e-05, 2.9119e-05, 3.7493e-05,\n 7.6670e-05, 4.9856e-05, 4.0373e-05, 3.6331e-05, 7.3471e-05, 2.0065e-05,\n 6.8743e-05, 4.6686e-05, 1.8784e-05, 8.8314e-05, 4.6652e-05, 2.3861e-05,\n 1.6942e-05, 5.1369e-05, 2.9892e-05, 6.7148e-05, 4.3932e-05, 9.8337e-05,\n 5.0750e-05, 2.3994e-05, 9.8718e-05, 2.5626e-05, 3.3166e-05, 6.8502e-05,\n 4.1349e-05, 3.7815e-05, 1.1930e-04, 4.9022e-05, 8.1295e-05, 3.4438e-05,\n 1.5617e-04, 3.7773e-05, 4.3864e-05, 1.8460e-05, 3.9487e-05, 5.2322e-05,\n 4.7456e-05, 3.4283e-05, 2.8629e-05, 2.0480e-05, 7.5258e-05, 2.9311e-05,\n 1.7304e-05, 2.4513e-05, 4.1889e-05, 5.0966e-05, 4.3860e-05, 2.5621e-05,\n 2.1996e-05, 4.2767e-05, 2.9744e-05, 3.4281e-05, 5.7915e-05, 2.7039e-05,\n 5.5558e-05, 7.3126e-05, 2.3439e-05, 3.1467e-05, 5.2092e-05, 2.8854e-05,\n 2.6403e-05, 5.2331e-05, 4.7859e-05, 7.5917e-05, 3.0112e-05, 2.9713e-05,\n 3.4628e-05, 2.7058e-05, 4.3500e-05, 7.8398e-05, 2.7954e-05, 2.3107e-05,\n 2.1178e-05, 4.0213e-05, 3.5305e-05, 3.0624e-05, 1.0679e-04, 5.1988e-05,\n 2.8095e-05, 4.0067e-05, 4.1488e-05, 1.8333e-22, 3.9406e-05, 1.1595e-04,\n 3.1886e-05, 8.5844e-05, 5.6648e-05, 7.2903e-05, 8.7551e-05, 6.5247e-05,\n 4.3422e-05, 5.6083e-05, 6.4127e-05, 3.5430e-05, 8.4163e-05, 2.4085e-05,\n 7.8642e-05, 6.9603e-05, 3.0707e-05, 4.8541e-05, 5.8803e-05, 5.2446e-05,\n 3.9207e-05, 4.5288e-05, 3.8448e-05, 6.5975e-05, 4.1035e-05, 6.2476e-05,\n 4.6488e-05, 4.5823e-05, 2.9559e-05, 4.9863e-05, 3.7482e-05, 6.2647e-05,\n 7.1853e-05, 5.2975e-05, 2.2530e-05, 5.0922e-05, 3.0176e-05, 3.8802e-05,\n 4.9941e-05, 7.4141e-05, 4.2046e-05, 4.3583e-05, 5.9335e-05, 2.6532e-05,\n 2.9991e-05, 4.7343e-05, 3.7144e-05, 5.3185e-05, 5.0314e-05, 2.7546e-05,\n 9.7004e-05, 4.8091e-05, 2.7099e-05, 2.8953e-05, 4.2027e-05, 2.5580e-05,\n 4.1548e-05, 2.9381e-05, 4.6479e-05, 5.1363e-05, 1.6340e-04, 4.2025e-05,\n 3.1336e-05, 3.0501e-05, 5.2541e-05, 3.5849e-05, 5.8174e-05, 3.2296e-05,\n 4.0301e-05, 4.6502e-05, 1.2870e-04, 6.3613e-05, 7.6914e-05, 4.6282e-05,\n 2.9231e-05, 2.9802e-05, 4.8734e-05, 4.5345e-05, 4.3091e-05, 2.1059e-05,\n 3.5905e-05, 6.6922e-05, 1.4839e-04, 8.2585e-05, 5.3239e-05, 6.1636e-05,\n 8.2469e-05, 1.0995e-04, 5.5022e-05, 8.6318e-05, 4.1882e-05, 4.6299e-05,\n 9.9336e-05, 6.4680e-05, 1.5178e-05, 3.6370e-05, 3.6156e-05, 4.1726e-05,\n 2.9769e-05, 2.6800e-05, 6.4327e-05, 4.4580e-05, 4.0738e-05, 3.2037e-05,\n 4.1723e-05, 7.5688e-05, 6.3735e-05, 8.1923e-05, 6.1372e-05, 2.9413e-05,\n 4.6944e-05, 7.1949e-05, 4.2955e-05, 4.3945e-05, 4.8260e-05, 3.2438e-05,\n 4.8290e-05, 3.6225e-05, 4.3701e-05, 2.6714e-05, 3.7597e-05, 2.7447e-05,\n 2.2366e-05, 1.2396e-04, 1.1124e-04, 5.4965e-05, 5.0020e-05, 3.7697e-05,\n 2.1370e-05, 6.5819e-05, 2.3125e-05, 3.0872e-05, 2.7789e-05, 4.6312e-05,\n 5.1536e-05, 5.6586e-05, 5.2378e-05, 3.5628e-05, 2.9161e-05, 2.5657e-05,\n 4.1491e-05, 3.0286e-05, 4.6605e-05, 4.0163e-05, 8.0634e-05, 3.8697e-05,\n 4.6295e-05, 1.4436e-04, 3.4686e-05, 4.5464e-05, 4.6173e-05, 4.0683e-05,\n 4.4187e-05, 3.9909e-05, 4.6628e-05, 7.1099e-05, 3.8339e-05, 2.0202e-05,\n 2.7379e-05, 7.7090e-05, 5.6108e-05, 8.0250e-05, 4.2780e-05, 2.6165e-05,\n 7.8597e-05, 3.4962e-05, 4.8522e-05, 3.6983e-05, 1.0894e-04, 1.2641e-05,\n 6.7530e-05, 7.3210e-05, 5.4789e-05, 3.2205e-05, 9.2534e-05, 6.2213e-05,\n 3.8130e-05, 4.9534e-05, 8.9462e-05, 7.3357e-05, 6.1491e-05, 4.6982e-05,\n 5.3087e-05, 3.6519e-05, 6.8387e-05, 4.9972e-05, 7.3743e-05, 4.3424e-05,\n 6.7101e-05, 4.3545e-05, 4.9133e-05, 3.8087e-05, 3.9684e-05, 2.8928e-05,\n 4.7111e-05, 5.3263e-05, 4.9296e-05, 4.4246e-05, 5.5517e-05, 6.7320e-05,\n 4.5732e-05, 2.8358e-05, 5.7240e-05, 5.3696e-05, 4.0234e-05, 2.0628e-04,\n 6.5698e-05, 4.6208e-05, 6.2890e-05, 4.0726e-05, 6.1134e-05, 5.0005e-05,\n 4.8057e-05, 3.4026e-05, 4.6762e-05, 1.2719e-04, 5.9624e-05, 2.3722e-05,\n 9.5827e-05, 4.0155e-05, 2.4926e-05, 4.3899e-05, 4.8791e-05, 3.8747e-05,\n 3.9857e-05, 4.4084e-05, 4.7526e-05, 3.0582e-05, 2.9260e-05, 5.5844e-05,\n 6.4050e-05, 6.3035e-05, 4.5747e-05, 1.5280e-04, 6.0207e-05, 5.5374e-05,\n 7.6647e-05, 4.9293e-05, 5.5278e-05, 4.5964e-05, 5.9702e-05, 2.9280e-05,\n 2.5010e-05, 7.4728e-05, 5.6662e-05, 3.1288e-05, 4.0234e-05, 4.6612e-05,\n 3.8262e-05, 4.2115e-05, 8.8063e-05, 3.7282e-05, 3.6479e-05, 3.5727e-05,\n 5.1877e-05, 4.3512e-05, 3.0860e-05, 3.7790e-05, 4.9322e-05, 4.4400e-05,\n 3.1234e-05, 4.0693e-05, 4.5039e-05, 3.1518e-05, 3.8174e-05, 4.1496e-05,\n 2.8074e-05, 5.4616e-05, 3.2260e-05, 4.2214e-05, 3.3350e-05, 5.6877e-05,\n 3.0903e-05, 6.4321e-19], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(37540.)",
|
| 27 |
+
"exp_avg": "tensor([[ 1.9007e-06, 9.7005e-07, 7.0260e-06, ..., 4.5168e-06,\n -9.2743e-06, 5.6052e-45],\n [ 3.7199e-07, 6.2163e-06, 1.0900e-06, ..., 2.4610e-05,\n 7.3835e-06, 5.6052e-45],\n [-6.0830e-06, -4.6629e-06, 5.5125e-06, ..., -3.8044e-06,\n -3.9188e-06, -5.6052e-45],\n ...,\n [-3.2157e-06, -1.7265e-06, 6.4554e-06, ..., -1.2410e-05,\n 2.4787e-06, 5.6052e-45],\n [-3.2310e-06, -8.4949e-06, 1.3243e-06, ..., -9.6998e-06,\n -1.7772e-07, 5.6052e-45],\n [-6.6307e-06, 1.2358e-05, -1.9269e-05, ..., -1.2245e-05,\n 9.1702e-06, 5.6052e-45]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[7.0965e-10, 1.2184e-09, 6.5520e-10, ..., 1.5346e-09, 4.7218e-10,\n 1.7217e-23],\n [9.9317e-10, 2.1611e-09, 1.5295e-09, ..., 4.2201e-09, 2.2258e-09,\n 9.6818e-23],\n [9.1176e-10, 1.4187e-09, 2.2391e-09, ..., 3.8154e-09, 7.2815e-10,\n 8.8141e-23],\n ...,\n [1.4452e-09, 3.0005e-09, 1.2277e-09, ..., 5.2627e-09, 9.9567e-10,\n 9.8815e-23],\n [1.7322e-09, 4.1680e-09, 1.2010e-09, ..., 3.2851e-09, 1.7872e-09,\n 1.7587e-22],\n [1.9045e-09, 1.9465e-09, 1.1972e-09, ..., 3.6455e-09, 1.6017e-09,\n 1.0154e-22]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(30032.)",
|
| 32 |
+
"exp_avg": "tensor([[-2.9516e-06, -1.7066e-06, -2.6915e-07, ..., -6.1437e-06,\n 2.7092e-06, 5.6052e-45],\n [ 7.8299e-06, 1.0090e-05, 4.3490e-06, ..., 2.7084e-05,\n 1.3586e-05, 5.6052e-45],\n [ 1.8406e-06, -3.2558e-06, 7.3604e-06, ..., -5.5320e-06,\n 7.9952e-06, -5.6052e-45],\n ...,\n [-8.2202e-06, -1.6279e-05, -7.7270e-07, ..., 4.4149e-06,\n -4.4174e-06, 5.6052e-45],\n [ 1.3605e-07, -7.5159e-06, -7.6733e-06, ..., -1.3419e-05,\n 1.7526e-06, 5.6052e-45],\n [-1.2357e-05, -8.5573e-06, 1.0434e-06, ..., -7.6816e-06,\n -4.2162e-07, 5.6052e-45]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[7.9581e-10, 1.0342e-09, 5.1092e-10, ..., 1.0601e-09, 6.5026e-10,\n 7.5295e-23],\n [6.8033e-10, 2.1816e-09, 1.3876e-09, ..., 5.0245e-09, 1.9835e-09,\n 4.9246e-22],\n [8.0926e-10, 1.8100e-09, 1.5147e-09, ..., 4.4298e-09, 1.0443e-09,\n 1.4400e-22],\n ...,\n [1.8006e-09, 1.4634e-09, 1.7682e-09, ..., 7.0203e-09, 1.2593e-09,\n 2.9922e-23],\n [9.6089e-10, 2.1370e-09, 9.7935e-10, ..., 2.5580e-09, 1.0563e-09,\n 1.0856e-21],\n [1.9346e-09, 1.4321e-09, 1.0958e-09, ..., 2.6224e-09, 1.9659e-09,\n 1.0386e-23]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(30032.)",
|
| 37 |
+
"exp_avg": "tensor([ 0.0016, -0.0016], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([6.3627e-06, 6.3627e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.01,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.01,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.01,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.01,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.01,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.01,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.005,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 20,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 0,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.01,
|
| 139 |
+
0.01,
|
| 140 |
+
0.01,
|
| 141 |
+
0.005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 10,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.01,
|
| 149 |
+
0.01,
|
| 150 |
+
0.01,
|
| 151 |
+
0.005
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 66.52333333333333,
|
| 156 |
+
"best_epoch": 9,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 65.962,
|
| 159 |
+
"512": 66.43466666666667
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3,
|
| 166 |
+
4,
|
| 167 |
+
5,
|
| 168 |
+
6,
|
| 169 |
+
7,
|
| 170 |
+
8,
|
| 171 |
+
9,
|
| 172 |
+
10
|
| 173 |
+
],
|
| 174 |
+
"train_loss": [
|
| 175 |
+
3.9435249049420933,
|
| 176 |
+
3.3040703793567867,
|
| 177 |
+
4.3101251841734625,
|
| 178 |
+
4.185147669827233,
|
| 179 |
+
4.123004540650211,
|
| 180 |
+
4.076372152195373,
|
| 181 |
+
4.03838544134517,
|
| 182 |
+
4.0064857600531685,
|
| 183 |
+
3.9818663271297847,
|
| 184 |
+
3.9703641328395016
|
| 185 |
+
],
|
| 186 |
+
"train_acc": [
|
| 187 |
+
54.38726307083047,
|
| 188 |
+
59.31631083223343,
|
| 189 |
+
60.291879721118846,
|
| 190 |
+
61.30111583163371,
|
| 191 |
+
61.94625681117294,
|
| 192 |
+
62.46739626189768,
|
| 193 |
+
62.918183187671865,
|
| 194 |
+
63.32416721109218,
|
| 195 |
+
63.64023321445734,
|
| 196 |
+
63.87124655359788
|
| 197 |
+
],
|
| 198 |
+
"val_acc": [
|
| 199 |
+
61.635333333333335,
|
| 200 |
+
62.978,
|
| 201 |
+
64.12,
|
| 202 |
+
64.73133333333334,
|
| 203 |
+
65.312,
|
| 204 |
+
65.66133333333333,
|
| 205 |
+
66.03133333333334,
|
| 206 |
+
66.252,
|
| 207 |
+
66.38533333333334,
|
| 208 |
+
66.52333333333333
|
| 209 |
+
],
|
| 210 |
+
"scale_accs": {
|
| 211 |
+
"256": [
|
| 212 |
+
61.635333333333335,
|
| 213 |
+
62.978,
|
| 214 |
+
63.782,
|
| 215 |
+
64.34866666666667,
|
| 216 |
+
64.754,
|
| 217 |
+
65.17733333333334,
|
| 218 |
+
65.49933333333334,
|
| 219 |
+
65.74333333333334,
|
| 220 |
+
65.89133333333334,
|
| 221 |
+
65.962
|
| 222 |
+
],
|
| 223 |
+
"512": [
|
| 224 |
+
63.839333333333336,
|
| 225 |
+
64.522,
|
| 226 |
+
65.18466666666667,
|
| 227 |
+
65.52333333333333,
|
| 228 |
+
66.02266666666667,
|
| 229 |
+
66.17,
|
| 230 |
+
66.39533333333334,
|
| 231 |
+
66.43466666666667
|
| 232 |
+
]
|
| 233 |
+
},
|
| 234 |
+
"lr": [
|
| 235 |
+
0.00975530705321762,
|
| 236 |
+
0.00904518046337755,
|
| 237 |
+
0.00793913236883622,
|
| 238 |
+
0.00654543046337755,
|
| 239 |
+
0.005000500000000001,
|
| 240 |
+
0.0034555695366224513,
|
| 241 |
+
0.0020618676311637816,
|
| 242 |
+
0.0009558195366224509,
|
| 243 |
+
0.00024569294678237997,
|
| 244 |
+
0.01
|
| 245 |
+
]
|
| 246 |
+
}
|
| 247 |
+
},
|
| 248 |
+
"train_config": {
|
| 249 |
+
"name": "david_training",
|
| 250 |
+
"run_id": "20251012_235237",
|
| 251 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 252 |
+
"model_variant": [
|
| 253 |
+
"clip_vit_b16",
|
| 254 |
+
"clip_vit_laion_b32",
|
| 255 |
+
"clip_vit_b32"
|
| 256 |
+
],
|
| 257 |
+
"num_classes": 1000,
|
| 258 |
+
"preset": "small_fast",
|
| 259 |
+
"custom_config_path": null,
|
| 260 |
+
"num_classes_override": null,
|
| 261 |
+
"use_belly_override": null,
|
| 262 |
+
"belly_expand_override": null,
|
| 263 |
+
"progressive_training_override": true,
|
| 264 |
+
"scale_warmup_epochs_override": {
|
| 265 |
+
"256": 0,
|
| 266 |
+
"512": 2
|
| 267 |
+
},
|
| 268 |
+
"num_epochs": 10,
|
| 269 |
+
"batch_size": 1024,
|
| 270 |
+
"learning_rate": 0.01,
|
| 271 |
+
"weight_decay": 1e-05,
|
| 272 |
+
"warmup_epochs": 3,
|
| 273 |
+
"use_rose_loss": true,
|
| 274 |
+
"rose_initial_weight": 0.1,
|
| 275 |
+
"rose_max_weight": 0.8,
|
| 276 |
+
"rose_weight_schedule": "adaptive",
|
| 277 |
+
"use_cayley_loss": false,
|
| 278 |
+
"cayley_weight": 0.01,
|
| 279 |
+
"scale_loss_balance": null,
|
| 280 |
+
"use_mixed_precision": false,
|
| 281 |
+
"gradient_clip": 15.0,
|
| 282 |
+
"scheduler_type": "cosine_restarts",
|
| 283 |
+
"min_lr": 1e-06,
|
| 284 |
+
"freeze_strategy": "never",
|
| 285 |
+
"freeze_threshold": 90.0,
|
| 286 |
+
"unfreeze_on_plateau": true,
|
| 287 |
+
"patience": 10,
|
| 288 |
+
"track_gradients": true,
|
| 289 |
+
"gradient_scale_threshold": 1e-05,
|
| 290 |
+
"gradient_scale_multiplier": 10.0,
|
| 291 |
+
"log_interval": 50,
|
| 292 |
+
"val_interval": 1,
|
| 293 |
+
"save_interval": 5,
|
| 294 |
+
"log_fusion_weights": true,
|
| 295 |
+
"log_loss_components": true,
|
| 296 |
+
"save_format": "safetensors",
|
| 297 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 298 |
+
"upload_to_hub": true,
|
| 299 |
+
"base_dir": "./david_training",
|
| 300 |
+
"num_workers": 10,
|
| 301 |
+
"pin_memory": true,
|
| 302 |
+
"prefetch_factor": 4,
|
| 303 |
+
"persistent_workers": true
|
| 304 |
+
}
|
| 305 |
+
}
|