Update best_model_acc68.89_metadata.json - Run 20251012_191456
Browse files
weights/David-partial_shared-hierarchical_tree/20251012_191456/best_model_acc68.89_metadata.json
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(5006.)",
|
| 7 |
+
"exp_avg": "tensor([[ 2.4945e-05, 2.8583e-05, 5.7996e-06, ..., 4.9203e-05,\n 9.4608e-06, 1.0007e-05],\n [ 5.0163e-06, -1.0187e-05, 2.6593e-05, ..., -2.5859e-05,\n -5.9987e-06, -2.4653e-05],\n [-9.7157e-05, 6.3715e-05, 2.3175e-06, ..., 1.7711e-05,\n 1.7942e-05, 5.3639e-06],\n ...,\n [ 6.4589e-06, 3.2743e-05, -6.1635e-06, ..., -9.4605e-05,\n -4.5126e-05, -5.2149e-05],\n [-1.0949e-05, 2.5377e-06, -2.4514e-05, ..., 7.5178e-07,\n 1.8306e-05, -2.9749e-05],\n [ 6.2467e-05, 7.9811e-05, -1.9564e-07, ..., 1.9633e-05,\n -2.7604e-05, -1.6570e-05]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[9.3436e-09, 6.9791e-08, 1.5243e-08, ..., 4.7903e-08, 6.7782e-09,\n 8.0300e-09],\n [2.5165e-09, 2.8098e-09, 3.2698e-09, ..., 1.1725e-08, 1.4397e-09,\n 2.3582e-09],\n [1.8219e-08, 1.0245e-07, 3.6579e-08, ..., 1.1459e-08, 9.4262e-09,\n 1.1501e-08],\n ...,\n [1.8799e-08, 7.9136e-08, 2.0626e-08, ..., 6.0072e-08, 1.1901e-08,\n 1.4062e-08],\n [1.7674e-08, 6.4545e-08, 3.3053e-08, ..., 2.9884e-08, 8.2911e-09,\n 1.7677e-08],\n [2.0385e-08, 1.8839e-08, 1.1385e-08, ..., 5.7538e-08, 1.0799e-08,\n 1.5207e-08]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(5006.)",
|
| 12 |
+
"exp_avg": "tensor([ 6.5189e-04, -7.1051e-04, -1.4305e-03, 1.9414e-03, 5.2035e-04,\n 6.9420e-04, -2.9597e-04, 7.0099e-05, -5.2559e-05, 4.6450e-04,\n 8.6000e-04, -7.8125e-04, 2.1372e-04, 1.8099e-03, -1.0476e-03,\n -5.0883e-04, -1.1683e-04, -6.3845e-05, -2.5206e-04, 1.0121e-03,\n -2.2451e-03, -7.6322e-04, 3.8991e-04, -9.6693e-04, -1.1501e-04,\n -1.1887e-03, 3.6667e-04, 2.3975e-04, 1.3996e-03, -7.4921e-04,\n -1.2993e-03, 9.0697e-04, -5.3388e-05, -8.2591e-04, -2.1473e-03,\n 1.0016e-03, 1.0462e-03, 8.6768e-04, 7.5411e-04, -7.8278e-05,\n 5.5719e-04, -7.5977e-04, 1.4067e-03, -8.0512e-04, -1.1054e-05,\n 1.1898e-03, 1.2093e-03, 1.5809e-03, 5.6396e-04, 2.9676e-05,\n -1.1413e-03, -1.5573e-03, -3.0038e-04, -2.4875e-04, 3.1453e-04,\n 5.8076e-04, -8.7885e-04, -1.1203e-04, 1.2863e-03, 1.3307e-03,\n 2.3820e-03, 4.7987e-04, 1.0882e-04, 6.7568e-04, 1.5594e-05,\n -2.2197e-04, -1.3303e-04, 2.1896e-05, 2.7981e-04, -4.7938e-05,\n 7.9712e-04, -9.1202e-04, -1.2753e-03, -1.6697e-03, 1.1071e-03,\n 3.7477e-04, 4.4963e-04, 4.4479e-06, 1.8678e-03, -9.2108e-05,\n 4.1050e-04, -4.1316e-04, -1.2451e-04, 3.1338e-04, 5.6552e-04,\n -1.6674e-03, 1.0145e-04, -1.8505e-04, 1.4894e-03, 4.7282e-04,\n 5.8356e-04, 3.6385e-04, 1.8652e-04, 2.1698e-04, 4.1262e-03,\n 6.6601e-05, -7.6565e-04, 1.1419e-03, 7.2829e-04, -4.8682e-04,\n -7.0727e-04, 5.6052e-45, -1.3047e-04, 6.5306e-04, -6.6210e-04,\n -1.0467e-03, -3.9707e-04, -3.9044e-04, -2.4641e-04, 1.0740e-03,\n 1.0586e-03, 9.8950e-04, 2.0600e-03, 1.8854e-03, 1.1661e-04,\n -8.1481e-04, 4.5902e-04, -9.6471e-04, -2.2276e-05, 1.4548e-03,\n 1.2404e-03, 1.4142e-03, -2.6520e-04, -4.3335e-04, -1.6573e-03,\n -1.4431e-03, -5.4012e-04, 9.2068e-05, 5.6052e-45, -1.4128e-04,\n 1.1118e-03, 1.6925e-03, -1.6880e-03, -5.2946e-05, 4.7927e-04,\n -9.8447e-05, -7.2353e-04, 1.1960e-03, 4.1481e-05, 7.0459e-04,\n -1.2179e-03, 1.3286e-03, 7.1101e-05, -1.5891e-03, -2.1657e-06,\n -3.6245e-04, 4.8745e-04, 2.1289e-03, 1.2559e-05, -3.0457e-03,\n 1.1234e-03, 1.9658e-03, 2.0132e-03, -2.9459e-05, -1.3345e-03,\n -4.7598e-04, 3.2589e-04, -1.2442e-08, -1.5685e-04, 8.5466e-04,\n -2.0280e-04, 1.6825e-04, 5.0569e-04, 7.3244e-04, 6.9115e-05,\n 2.7672e-04, -1.7172e-03, -6.7847e-04, 4.6381e-04, -2.1091e-04,\n 8.2940e-04, 8.5688e-04, -1.0259e-03, 2.1065e-04, 1.4023e-04,\n -4.3696e-04, 4.0674e-04, 1.3580e-03, 1.0456e-05, 1.5495e-03,\n -2.6040e-03, 1.3985e-03, -5.4132e-04, -7.9450e-04, 2.1167e-03,\n 1.5349e-03, -1.1515e-03, -9.8597e-04, -8.9450e-04, 1.2456e-04,\n -9.0781e-04, -4.6420e-04, -8.1859e-04, 3.8945e-03, -1.4015e-04,\n -3.3920e-05, 1.1108e-04, -3.0562e-04, 1.4314e-04, 2.1522e-03,\n -1.1257e-03, -2.5942e-04, 1.1191e-03, -3.8017e-04, -3.4212e-04,\n 1.6151e-03, -1.7859e-04, -6.8698e-04, 1.3456e-04, -4.7191e-04,\n 3.3292e-04, -2.3099e-03, 1.5899e-03, 8.3514e-04, -1.9069e-04,\n -6.8321e-04, 7.6483e-04, 5.6293e-04, 1.2711e-03, 1.7713e-04,\n -1.2442e-04, 9.4057e-04, -9.2386e-04, 4.6319e-04, -9.0838e-04,\n -1.4721e-03, 5.1288e-04, -5.7677e-04, 2.6541e-04, -6.7415e-04,\n 1.4373e-03, 1.3000e-03, -7.3142e-05, -7.8408e-04, -1.6386e-03,\n -1.6526e-03, -9.5722e-05, -5.7263e-04, -2.3994e-03, -8.1021e-04,\n 1.0250e-03, -1.9659e-04, 6.1666e-04, 4.5573e-04, -4.3870e-04,\n 2.0935e-03, -4.8597e-04, -1.8064e-03, 1.3704e-03, 2.5407e-04,\n 2.3071e-04, 8.8840e-04, -2.1975e-07, -1.4479e-03, -2.1935e-03,\n -1.4187e-03, -4.9998e-04, 2.0937e-03, 4.5859e-04, 2.5223e-03,\n 7.2800e-04, 2.6186e-04, 2.8774e-05, 2.6032e-03, 2.0346e-03,\n 4.8668e-04, 4.6417e-04, -1.0419e-03, -1.7809e-03, -6.1811e-04,\n -1.8015e-04, 9.0251e-07, 8.7269e-04, 2.3964e-03, 4.3135e-04,\n 2.1743e-04, 4.0193e-04, -3.8224e-04, 1.0455e-03, -1.3756e-04,\n 3.1858e-04, -1.1087e-03, -1.9985e-05, -4.3221e-04, -7.2528e-04,\n -1.5845e-03, -1.7674e-04, 1.3734e-04, -5.3223e-04, -9.8870e-05,\n -1.6047e-04, -1.4818e-03, -1.2329e-03, 1.7255e-03, 1.4576e-03,\n -6.9947e-04, -2.6101e-04, -1.4707e-03, 1.0429e-03, -6.2400e-05,\n 2.7975e-03, 2.1640e-03, -9.2533e-04, -1.8589e-04, 2.8512e-04,\n 4.5956e-04, 3.6327e-03, 1.2006e-03, -1.3384e-03, 9.4675e-04,\n -1.8790e-03, 2.0817e-04, 4.4796e-04, 1.1094e-03, -1.1417e-03,\n -1.8372e-03, -1.4112e-04, 2.3386e-06, -3.9454e-04, 4.8592e-04,\n -9.4339e-04, -5.9765e-04, -1.8486e-03, -7.8984e-04, 1.4101e-03,\n -3.8516e-04, 2.0796e-05, 1.0199e-03, -1.6131e-03, -6.4597e-05,\n -1.5245e-03, 1.8736e-03, -2.0666e-04, -5.9829e-05, -2.5018e-03,\n -7.5196e-04, -1.2879e-03, -4.3388e-04, 6.8858e-04, -2.9177e-04,\n -6.0279e-04, -9.1825e-04, -1.3513e-04, -8.0038e-04, 7.8122e-05,\n 4.3521e-05, 2.3395e-04, -4.6801e-04, -7.1484e-04, 1.1958e-03,\n -9.1855e-04, 8.8337e-04, 2.7895e-04, 6.1369e-28, -1.6998e-04,\n -6.9151e-04, 9.2393e-05, 1.0324e-03, -1.9401e-04, -1.0470e-03,\n -4.7736e-05, -9.3115e-05, 1.0042e-03, 3.8688e-04, -9.6131e-04,\n -5.7601e-04, 4.5961e-04, 1.8876e-04, -5.5406e-04, 6.2793e-04,\n -2.2293e-04, 2.6269e-03, 1.0076e-03, -3.1629e-04, -7.6717e-05,\n -2.9771e-04, 7.2678e-04, 7.2881e-14, 7.0745e-04, -2.4006e-04,\n 4.0255e-04, 5.2918e-05, 1.0704e-03, 6.7928e-04, 5.6052e-45,\n -1.5777e-03, 1.3554e-03, 7.2343e-04, 4.3151e-04, 2.7892e-04,\n 8.0301e-04, 3.4783e-04, -1.5253e-04, 2.5509e-04, 1.3253e-03,\n 5.3646e-05, -1.0168e-03, 1.8952e-04, -8.7489e-05, 5.8136e-04,\n -1.2947e-03, 3.8990e-04, 1.5171e-04, 3.1552e-04, -7.2589e-04,\n 5.0418e-05, 5.6052e-45, 6.8908e-04, 1.0572e-03, -5.4851e-04,\n -1.9521e-03, 2.3649e-03, -9.3836e-04, 1.5906e-03, 4.6403e-04,\n 7.6945e-04, 7.8640e-04, -1.4185e-03, -4.8180e-04, 3.7332e-04,\n 3.5164e-04, 1.9074e-08, 1.8461e-05, -1.3031e-03, -6.3242e-04,\n 1.3381e-03, 6.8069e-08, -1.0791e-03, 1.0183e-03, 1.2875e-03,\n 7.6233e-05, -5.4052e-04, 2.2427e-04, 2.3588e-04, 6.4188e-05,\n -1.6505e-31, -1.1316e-03, 5.6672e-04, -1.9630e-04, 1.4817e-03,\n 6.9514e-04, 1.6017e-03, -1.2470e-03, 1.1512e-03, -1.2473e-04,\n -1.9821e-04, -2.9621e-03, -1.9568e-03, -1.0791e-04, 1.0993e-03,\n -8.4361e-05, -1.3636e-04, -1.0112e-04, 2.5217e-04, -8.1345e-04,\n -8.8385e-04, -2.3068e-04, 4.6726e-05, 7.7359e-04, -4.1774e-04,\n 2.2438e-03, -1.0630e-04, 1.6922e-03, 9.2605e-04, 8.8293e-04,\n 2.0475e-03, -5.9142e-04, -8.9083e-04, 2.8201e-03, 8.4123e-05,\n -1.2036e-03, -1.9306e-03, -6.7235e-04, -7.8752e-04, 9.0365e-04,\n 7.8674e-04, 2.2398e-04, 8.3169e-04, 1.3481e-03, 1.6370e-03,\n -1.7450e-03, -1.1667e-04, -1.2092e-03, 2.0910e-04, 5.1528e-04,\n -1.4521e-03, -1.2450e-03, -6.5476e-04, -2.0311e-04, -4.5985e-04,\n -6.6347e-04, -1.8412e-04, 3.0859e-04, 6.9281e-04, 2.0095e-03,\n 1.1197e-03, -1.3858e-03, -1.6818e-03, 1.8376e-04, 1.5341e-03,\n 7.5914e-04, -5.0937e-04, -1.8940e-03, -2.3800e-04, 2.6063e-04,\n 4.1237e-04, 2.6643e-04, -7.5102e-04, 1.1995e-03, -2.7288e-04,\n -1.1747e-03, 1.2808e-03, -9.3007e-05, -7.1923e-05, 4.3699e-04,\n -1.5616e-03, -1.1341e-03, -5.2708e-04, 1.3974e-04, 7.3514e-04,\n -2.1232e-03, 1.3575e-03, 3.1497e-04, 1.0942e-03, 1.3356e-03,\n -7.4756e-04, -1.4579e-04, 3.8552e-04, -6.0088e-04, 1.4417e-03,\n 1.4723e-03, -1.1006e-03, -1.5277e-03, 1.2828e-03, 3.1675e-04,\n -2.2979e-04, 4.3720e-04, 1.7474e-03, 1.6097e-03, 1.2459e-03,\n 2.7282e-04, -7.5149e-04, -2.3160e-04, -6.8068e-04, -6.3858e-04,\n 1.1431e-03, 9.0141e-04, 2.2351e-04, -7.5863e-04, -4.2522e-04,\n -4.4107e-04, 2.3071e-03, 1.2841e-04, -1.0844e-03, -1.5740e-03,\n 8.0811e-07, -1.3622e-03, -3.5392e-04, 1.5575e-03, 1.7245e-04,\n 6.6824e-04, -1.4506e-04, -7.3001e-04, -1.4978e-03, 1.9700e-03,\n -1.5599e-03, -5.2214e-04, -8.6114e-04, -5.4077e-04, 1.0560e-04,\n -1.1718e-05, -1.5254e-03, -2.0344e-03, -2.2467e-04, -2.1921e-03,\n -8.0442e-05, 3.1380e-04, -1.1894e-03, -1.3778e-03, 1.3122e-04,\n -1.4408e-03, -3.2851e-04, -1.8809e-04, -7.9899e-04, -1.0875e-04,\n -4.1589e-04, -5.3192e-04, -1.7143e-03, -1.3834e-03, 4.6131e-13,\n 8.3020e-04, -9.6248e-05, -1.3819e-04, -2.4561e-03, 2.0973e-04,\n 1.1817e-03, -8.3624e-04, 2.0075e-04, -9.9254e-04, -6.6476e-04,\n -9.5658e-04, -2.9242e-04, 1.8120e-03, -6.9308e-04, 9.6603e-06,\n -9.0515e-05, 3.2534e-03, 2.2501e-04, -2.3308e-03, 4.0607e-04,\n -3.3805e-04, 4.9326e-04, -1.9449e-03, 8.4684e-04, -1.7080e-03,\n 6.2298e-04, 4.4839e-03, -2.1967e-03, -1.8244e-04, 1.0756e-03,\n 4.6659e-04, -1.3493e-03, 6.1197e-25, -7.3935e-04, -2.2821e-03,\n -2.8836e-04, 1.7592e-03, 1.1080e-03, -1.1835e-03, 1.5744e-03,\n -3.5789e-04, 8.1785e-04, -3.4487e-04, -1.2563e-03, 5.7122e-05,\n 1.9147e-03, 6.4978e-04, -1.8645e-03, -8.1369e-04, 2.8914e-03,\n 1.0943e-03, 6.2041e-04, -1.6002e-03, 2.1477e-03, 1.0098e-03,\n 1.2841e-04, 1.0243e-03, 6.4684e-04, -1.6595e-03, -1.5281e-04,\n -1.1172e-03, -1.1165e-03, -3.8625e-04, -6.0410e-04, -8.2467e-05,\n -3.8791e-04, -1.2015e-03, -8.2917e-04, -1.9886e-03, -1.7386e-04,\n 4.8431e-04, -1.0896e-03, -7.4898e-04, -1.1886e-04, -1.2922e-03,\n 2.8019e-04, 1.3838e-03, -8.7716e-04, 7.2130e-04, 8.3694e-05,\n -1.9247e-05, -4.2464e-05, 2.8893e-05, 9.4955e-04, 2.0728e-04,\n 2.3361e-04, -4.4405e-04, -5.1006e-04, 5.8838e-04, -2.0349e-03,\n -1.9926e-03, 4.7516e-05, -6.7408e-04, 3.7281e-04, -2.3719e-03,\n 3.6137e-04, -7.3509e-04, -8.5889e-04, 2.6751e-04, -5.2877e-04,\n -4.0062e-04, 5.6052e-45, 4.2316e-04, 4.5598e-04, 9.0798e-04,\n 2.9711e-03, -6.2872e-04, 2.5224e-03, -1.8471e-03, -9.3842e-04,\n 1.0432e-04, -2.0437e-03, -4.2850e-04, -3.8544e-04, -1.2290e-03,\n -3.4325e-03, -5.0140e-04, -1.5459e-03, -6.6303e-04, 3.7786e-04,\n 8.2279e-04, 4.1820e-04, 1.0652e-03, -8.9979e-06, -5.8546e-04,\n 3.4113e-04, -2.7128e-04, 2.2699e-04, -3.3871e-03, 7.0352e-04,\n -1.6438e-04, 8.3765e-04, -1.5710e-04, -2.5436e-04, 1.0526e-03,\n -1.0777e-03, 6.8874e-05, 8.7034e-04, -6.1497e-04, -2.2648e-03,\n 1.1624e-03, -3.8385e-04, -7.1824e-04, 2.3287e-03, -1.9101e-03,\n -9.4836e-04, 1.0116e-03, 1.1643e-03, -9.3207e-07, -3.2647e-04,\n -4.0001e-04, 3.9780e-04, -1.0504e-03, -6.5143e-04, 3.5603e-04,\n -2.3368e-03, 6.4168e-04, 2.1366e-04, 3.8355e-04, 7.1101e-04,\n 5.9466e-05, 1.0771e-04, -8.0036e-04, -2.6961e-04, -3.8979e-04,\n 1.5843e-04, -6.4979e-04, -1.0303e-03, -4.5132e-04, 3.1903e-04,\n 5.5902e-04, -2.6467e-04, 9.4146e-04, 2.6229e-03, -2.2042e-05,\n -1.1573e-03, -2.1343e-04, 1.0607e-03], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([1.2022e-05, 3.6819e-06, 1.2650e-05, 1.6273e-05, 3.2740e-05, 1.2339e-05,\n 4.3362e-06, 1.9043e-05, 1.2937e-05, 1.1320e-05, 1.2540e-05, 8.5160e-06,\n 1.3647e-05, 1.7405e-05, 2.9764e-05, 1.6044e-05, 2.0970e-05, 1.2102e-05,\n 2.4108e-05, 3.3741e-06, 2.7689e-05, 8.9349e-06, 1.2543e-05, 1.8438e-05,\n 1.4399e-05, 3.3506e-05, 1.3002e-05, 1.1080e-05, 1.8602e-05, 1.8061e-05,\n 2.7153e-05, 3.5348e-06, 7.2857e-07, 2.5195e-05, 2.8016e-05, 3.2313e-05,\n 9.6414e-06, 2.2804e-05, 1.7779e-05, 2.3415e-05, 2.1854e-05, 3.5772e-05,\n 1.2718e-05, 2.0842e-05, 3.6642e-06, 1.8831e-05, 1.4481e-05, 2.7083e-05,\n 3.8891e-06, 2.3203e-05, 2.3648e-05, 1.6473e-05, 1.3247e-05, 6.4271e-06,\n 4.8756e-06, 2.7424e-05, 1.2030e-05, 1.2443e-05, 6.1226e-06, 3.0781e-05,\n 1.9958e-05, 2.1249e-05, 1.3699e-05, 1.4728e-05, 2.5772e-05, 2.9765e-05,\n 1.8318e-05, 3.4372e-06, 1.7454e-05, 9.6828e-06, 2.1834e-05, 2.0747e-05,\n 2.0574e-05, 2.5236e-05, 1.0742e-05, 1.4717e-05, 2.2619e-05, 1.4543e-05,\n 3.3246e-05, 9.2829e-06, 1.9477e-05, 1.0973e-05, 1.4365e-05, 1.3030e-05,\n 1.8639e-05, 2.9030e-05, 2.3346e-05, 3.0320e-05, 1.2668e-05, 1.5135e-05,\n 2.9724e-05, 3.2581e-05, 1.6054e-05, 2.2250e-05, 2.2298e-05, 2.7646e-05,\n 3.7392e-05, 2.2452e-05, 9.6044e-06, 1.8786e-05, 1.1634e-05, 1.1798e-09,\n 1.5515e-05, 3.3443e-05, 9.1911e-06, 2.8663e-05, 1.4070e-05, 8.8292e-06,\n 1.4197e-05, 1.4484e-05, 2.0374e-05, 2.6094e-05, 1.7833e-05, 2.3491e-05,\n 1.1049e-05, 1.8132e-05, 1.0360e-05, 2.2013e-05, 1.5970e-07, 1.2053e-05,\n 8.5358e-06, 1.5816e-05, 1.7799e-05, 2.0676e-05, 1.5829e-05, 8.4681e-06,\n 1.4420e-05, 1.6803e-05, 4.5679e-09, 8.3606e-06, 1.3111e-05, 8.3634e-06,\n 1.9280e-05, 7.0898e-06, 1.9905e-05, 1.2051e-05, 1.4779e-05, 1.8022e-05,\n 3.2394e-05, 2.3147e-05, 2.4242e-05, 1.7676e-05, 3.2637e-06, 1.9473e-05,\n 3.7704e-08, 3.0379e-05, 1.8222e-05, 4.9743e-05, 2.0280e-05, 2.0196e-05,\n 3.3516e-05, 1.0932e-05, 1.9352e-05, 1.8814e-05, 1.2582e-05, 3.4719e-05,\n 1.5129e-05, 1.5523e-07, 1.8434e-05, 1.1839e-05, 1.1970e-05, 2.0310e-05,\n 1.5046e-05, 3.0542e-05, 1.4641e-05, 9.9765e-06, 1.7882e-05, 1.7379e-05,\n 1.6219e-05, 1.8930e-05, 1.5319e-05, 2.1386e-05, 9.8368e-06, 1.3072e-05,\n 1.1243e-05, 2.0455e-05, 9.7873e-06, 2.1276e-05, 8.6488e-06, 2.9590e-05,\n 1.9124e-05, 1.4772e-05, 2.1883e-05, 1.4680e-05, 2.2097e-05, 2.1220e-05,\n 7.8563e-06, 1.5910e-05, 2.0347e-05, 1.5532e-05, 5.5845e-06, 1.5671e-05,\n 1.8511e-05, 3.7699e-05, 2.4830e-05, 1.0637e-05, 5.3438e-06, 2.6710e-06,\n 1.9203e-05, 2.6642e-05, 1.4076e-05, 1.4716e-05, 1.9165e-05, 6.9572e-06,\n 8.7780e-06, 1.1446e-05, 3.1394e-06, 3.0154e-05, 1.3209e-05, 2.6257e-05,\n 1.2075e-05, 1.7442e-05, 1.2952e-05, 3.6452e-05, 1.2965e-05, 2.2915e-05,\n 1.9539e-05, 2.5756e-05, 8.3171e-06, 2.2149e-05, 5.4618e-06, 1.9304e-05,\n 1.5631e-05, 1.5581e-05, 1.9963e-05, 1.7095e-05, 1.7399e-05, 2.3087e-05,\n 1.4008e-05, 1.8405e-05, 3.4143e-05, 2.3037e-05, 2.4612e-05, 3.8500e-06,\n 1.9938e-05, 1.4193e-05, 7.4275e-06, 3.0305e-05, 3.0038e-05, 5.5282e-06,\n 1.5569e-05, 1.4354e-05, 8.9599e-06, 1.2239e-05, 1.6838e-05, 2.7880e-05,\n 2.3402e-05, 2.6981e-05, 1.4783e-05, 3.1093e-05, 1.1766e-05, 3.1467e-05,\n 1.7855e-05, 1.5341e-05, 2.4188e-05, 2.1301e-05, 8.3754e-06, 2.7492e-05,\n 7.5920e-06, 2.7718e-05, 1.6199e-05, 3.5565e-05, 1.7763e-05, 2.7448e-05,\n 1.4892e-05, 2.2184e-05, 2.3179e-05, 1.5192e-05, 1.3533e-05, 7.6429e-06,\n 1.3293e-05, 4.3828e-05, 2.8093e-05, 1.4335e-05, 8.2035e-06, 2.8756e-05,\n 2.2892e-05, 7.1271e-06, 1.1559e-05, 2.8760e-07, 1.5228e-05, 2.2683e-05,\n 6.8804e-06, 2.1894e-05, 2.5657e-05, 1.1461e-05, 1.8380e-05, 2.1377e-06,\n 1.4391e-05, 6.1816e-06, 1.4164e-05, 2.5080e-05, 1.4185e-05, 2.4818e-05,\n 1.7370e-05, 2.8188e-05, 1.6919e-05, 9.5917e-06, 2.1113e-05, 1.5757e-05,\n 2.3364e-05, 1.5221e-05, 2.2079e-05, 9.0650e-06, 2.5046e-05, 1.9920e-05,\n 2.1267e-05, 1.7966e-05, 2.3037e-05, 1.0077e-05, 3.3351e-05, 3.1864e-05,\n 1.3462e-05, 8.0388e-06, 2.2534e-05, 2.2129e-05, 1.1229e-05, 1.0839e-05,\n 2.3554e-05, 1.9942e-05, 2.1259e-05, 2.3279e-05, 1.7923e-05, 1.4313e-05,\n 1.4858e-05, 1.1530e-05, 8.7391e-06, 1.4711e-05, 2.6972e-05, 2.1683e-05,\n 1.2079e-05, 3.4691e-05, 1.7196e-05, 2.2905e-05, 1.6267e-05, 2.2784e-05,\n 2.8263e-05, 2.6445e-05, 1.7823e-05, 2.2276e-05, 2.2295e-06, 1.5048e-05,\n 1.2162e-05, 2.7772e-05, 1.1880e-05, 2.0996e-05, 2.4323e-05, 9.3787e-06,\n 1.3292e-05, 2.7626e-05, 1.2641e-05, 9.7350e-06, 6.1021e-06, 1.1851e-09,\n 1.5190e-05, 1.9731e-05, 2.5435e-05, 2.3810e-05, 1.6774e-05, 1.1324e-05,\n 2.8031e-06, 5.4441e-06, 2.3089e-05, 1.0286e-05, 1.5804e-05, 3.7347e-05,\n 2.3359e-05, 9.4389e-06, 6.4246e-06, 1.1884e-05, 1.2904e-05, 1.7255e-05,\n 7.9202e-06, 1.2876e-05, 2.2040e-06, 1.7741e-05, 1.9306e-05, 6.7601e-08,\n 2.3425e-05, 1.1838e-05, 7.9613e-06, 2.2878e-05, 1.5537e-05, 2.5931e-05,\n 1.7469e-10, 3.7953e-05, 1.0606e-05, 1.2597e-05, 8.2103e-06, 1.2082e-05,\n 1.1903e-05, 2.1927e-05, 8.4119e-06, 2.6750e-05, 1.0659e-05, 2.0982e-05,\n 2.0454e-05, 2.8276e-05, 8.5198e-06, 1.3125e-05, 1.7716e-05, 1.1381e-05,\n 1.1153e-05, 1.1737e-05, 2.5830e-05, 2.4264e-05, 4.7180e-09, 8.2128e-06,\n 2.3932e-05, 1.3304e-05, 1.4727e-05, 1.4022e-05, 1.4702e-05, 1.9492e-05,\n 8.1973e-06, 1.2317e-05, 7.4890e-06, 1.1128e-05, 3.3998e-05, 1.4860e-05,\n 1.7347e-05, 1.3460e-09, 6.9946e-06, 2.0543e-05, 8.8776e-06, 2.4273e-05,\n 2.7117e-08, 1.1257e-05, 1.9774e-05, 6.4963e-06, 1.7455e-05, 1.2353e-05,\n 5.7655e-06, 6.8070e-06, 9.8656e-06, 1.1805e-08, 2.0010e-05, 1.5904e-05,\n 2.7432e-05, 2.0906e-05, 1.0642e-05, 3.0409e-05, 2.5541e-05, 2.4710e-05,\n 2.1400e-05, 1.9462e-05, 2.3783e-05, 3.5448e-05, 1.5288e-05, 3.0860e-05,\n 1.1831e-05, 1.5583e-05, 1.6391e-05, 1.0752e-05, 1.9762e-05, 2.3441e-05,\n 1.3160e-05, 7.0375e-06, 8.6226e-06, 9.4667e-06, 2.4755e-05, 2.9214e-05,\n 2.6253e-05, 1.4799e-05, 2.0199e-05, 3.5242e-05, 1.8081e-05, 7.0638e-06,\n 2.5496e-05, 1.4509e-05, 1.1963e-05, 1.5669e-05, 1.9799e-05, 3.2096e-05,\n 6.9180e-07, 1.3993e-05, 1.7766e-05, 1.6375e-05, 1.8753e-05, 1.6097e-05,\n 2.5791e-05, 2.0521e-05, 1.1484e-05, 2.8116e-05, 5.9154e-06, 2.1031e-05,\n 2.2607e-05, 1.8040e-05, 1.6707e-05, 6.1562e-06, 1.3320e-05, 1.5749e-05,\n 9.1274e-06, 1.7934e-05, 2.6557e-05, 1.1496e-05, 2.0528e-05, 2.2445e-05,\n 1.3241e-05, 1.8073e-05, 1.8836e-05, 1.3636e-05, 1.3631e-05, 2.1835e-05,\n 1.7937e-05, 9.0309e-06, 1.0633e-05, 1.3665e-05, 1.2301e-05, 3.0727e-05,\n 1.1979e-05, 1.2659e-05, 9.0791e-06, 2.8811e-05, 1.4881e-05, 5.9577e-06,\n 1.2322e-05, 1.1291e-05, 2.9928e-05, 7.9489e-06, 2.0313e-05, 1.3806e-05,\n 2.4677e-05, 1.7275e-05, 1.2081e-05, 2.5482e-05, 6.4264e-07, 2.1262e-05,\n 1.7625e-05, 2.2532e-05, 2.8941e-05, 2.5256e-05, 2.0192e-05, 4.8792e-06,\n 2.2608e-05, 1.4843e-05, 1.5832e-05, 4.2656e-05, 1.9753e-05, 2.1566e-05,\n 1.5789e-05, 1.9677e-05, 1.6469e-05, 9.5872e-06, 1.3842e-05, 1.7966e-05,\n 5.8351e-05, 2.2938e-05, 1.5886e-05, 8.1471e-06, 1.0953e-05, 2.3318e-05,\n 1.4873e-05, 7.7855e-06, 1.2671e-05, 2.9178e-08, 2.0792e-05, 1.5315e-05,\n 2.4542e-05, 1.0652e-05, 1.9634e-05, 1.6814e-05, 2.4219e-05, 1.3634e-05,\n 2.0871e-05, 1.4087e-05, 2.4534e-05, 1.4637e-05, 1.3002e-05, 1.1177e-05,\n 1.0329e-05, 1.3085e-05, 2.1136e-05, 1.8233e-05, 1.5571e-05, 1.8835e-05,\n 3.7834e-05, 1.1745e-05, 2.2476e-05, 2.7361e-05, 1.0419e-05, 1.8562e-05,\n 6.4055e-07, 1.9675e-05, 1.9661e-05, 1.6904e-05, 2.2329e-05, 2.9073e-05,\n 1.4614e-05, 8.3333e-08, 1.8173e-05, 1.9352e-05, 1.0512e-05, 1.0432e-05,\n 9.7717e-06, 2.5100e-05, 7.8766e-06, 1.3799e-05, 2.3770e-05, 2.2619e-05,\n 3.1063e-05, 1.4137e-05, 2.8293e-05, 1.0061e-05, 3.1470e-06, 8.3768e-06,\n 3.1037e-05, 2.6920e-05, 3.0913e-05, 1.3160e-05, 1.7630e-05, 1.7006e-05,\n 2.4732e-05, 2.2472e-05, 1.7226e-05, 1.9051e-05, 3.8870e-05, 1.0028e-05,\n 1.2548e-05, 1.6737e-05, 3.0568e-05, 1.1597e-05, 8.7844e-11, 8.9183e-06,\n 1.8944e-05, 4.1188e-06, 1.8935e-05, 9.2228e-06, 1.3191e-05, 3.0013e-05,\n 5.6897e-06, 2.8574e-06, 2.4666e-05, 1.7591e-05, 8.5987e-06, 1.5968e-05,\n 7.6827e-06, 1.6809e-05, 1.1127e-05, 4.3460e-05, 1.4652e-05, 1.2821e-05,\n 1.8792e-05, 2.9002e-05, 9.7967e-06, 3.5021e-05, 3.1314e-05, 2.4696e-05,\n 2.5003e-05, 2.6405e-05, 1.0759e-05, 1.9351e-05, 5.1916e-06, 1.7907e-05,\n 2.3219e-05, 1.6226e-05, 3.0252e-05, 2.6951e-05, 1.7671e-05, 6.9269e-06,\n 1.1052e-05, 1.7378e-05, 2.6662e-05, 2.0973e-05, 1.2955e-05, 6.9821e-06,\n 1.3625e-05, 2.6111e-05, 1.1460e-05, 1.3294e-05, 3.9375e-06, 2.0340e-06,\n 1.5368e-05, 3.0838e-05, 1.8137e-05, 3.1246e-05, 2.8045e-05, 1.6316e-05,\n 3.2102e-05, 4.6951e-05, 2.4129e-05, 1.9028e-06, 1.6929e-05, 1.1431e-05,\n 1.0707e-05, 1.4864e-05, 1.6160e-05, 3.3377e-05, 9.1921e-06, 1.0347e-05,\n 6.0415e-06, 2.5860e-09, 1.8995e-05, 2.4326e-05, 2.6798e-05, 3.3277e-05,\n 1.8074e-05, 3.3988e-05, 2.9932e-05, 9.2642e-06, 2.0008e-05, 2.1702e-05,\n 3.3090e-05, 1.1084e-05, 1.5175e-05, 3.5248e-05, 2.3054e-05, 1.2084e-05,\n 5.7557e-06, 1.3412e-05, 1.1627e-05, 1.5270e-05, 1.0401e-05, 1.7505e-05,\n 2.3964e-05, 1.1430e-05, 7.3177e-06, 2.4657e-05, 2.3078e-05, 7.6570e-06,\n 2.6297e-05, 2.3749e-05, 2.5672e-05, 2.0269e-05, 2.7706e-05, 1.4202e-05,\n 9.7931e-06, 2.1713e-05, 3.2980e-05, 3.4006e-05, 1.5760e-05, 2.5982e-05,\n 1.5497e-05, 2.4396e-05, 2.6878e-05, 3.0709e-05, 2.8150e-05, 9.3787e-06,\n 8.8625e-08, 2.6147e-05, 1.0880e-05, 1.3232e-05, 2.6865e-05, 2.6867e-05,\n 5.7033e-06, 1.1115e-05, 8.9384e-06, 1.1048e-05, 2.6226e-05, 2.7753e-05,\n 2.6962e-05, 3.7035e-07, 1.9316e-05, 1.0611e-05, 5.7988e-06, 2.1664e-05,\n 1.7839e-05, 1.9509e-05, 1.2280e-05, 7.8429e-06, 2.0201e-05, 1.9089e-05,\n 1.1030e-05, 2.6020e-05, 2.6970e-06, 2.5348e-05, 1.7891e-05, 2.3951e-05],\n device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(5006.)",
|
| 17 |
+
"exp_avg": "tensor([[-4.2654e-06, 2.0177e-06, 2.1165e-05, ..., -2.3385e-06,\n 1.1164e-07, -5.4115e-05],\n [-3.0244e-06, 4.8093e-05, -5.3032e-06, ..., -7.4752e-06,\n -6.3517e-06, 3.5492e-05],\n [ 1.3631e-06, -3.5314e-07, 2.3831e-06, ..., 1.6378e-05,\n -3.4406e-05, -7.0805e-05],\n ...,\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-2.1499e-06, -1.4017e-09, -1.2821e-07, ..., 2.5706e-05,\n 2.8746e-06, -2.4574e-05],\n [ 2.4244e-08, -2.9663e-06, 3.6721e-07, ..., 3.3575e-06,\n -1.8170e-06, -1.2693e-04]], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([[1.2510e-09, 3.9979e-09, 1.3634e-08, ..., 4.3256e-09, 4.2300e-09,\n 1.7008e-08],\n [2.4502e-08, 1.1603e-08, 1.4158e-08, ..., 7.9446e-09, 1.5514e-08,\n 3.0399e-08],\n [6.2807e-10, 3.2575e-10, 2.3403e-09, ..., 6.4384e-09, 7.0737e-08,\n 2.9378e-08],\n ...,\n [1.1127e-10, 3.6949e-10, 6.7984e-11, ..., 1.4251e-11, 5.2079e-11,\n 3.1500e-13],\n [1.4347e-10, 1.4498e-09, 1.6992e-09, ..., 1.1801e-09, 3.6796e-09,\n 4.7806e-09],\n [1.2315e-10, 2.1667e-09, 2.8359e-09, ..., 5.9585e-10, 2.3108e-09,\n 6.3255e-08]], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(5006.)",
|
| 22 |
+
"exp_avg": "tensor([-1.6914e-04, -1.3846e-04, -3.0736e-04, 2.1999e-06, -4.9442e-04,\n 1.3126e-04, -1.9523e-04, -2.9477e-05, -4.9971e-04, 1.8584e-04,\n 1.5121e-04, 3.4259e-05, 4.8972e-04, 5.6052e-45, 6.7313e-05,\n -2.8921e-04, 7.0968e-04, 5.6052e-45, -3.4406e-05, 1.5345e-04,\n -9.9875e-05, 2.4446e-26, 8.8106e-05, 2.7422e-04, -4.0112e-04,\n 5.6052e-45, -5.5234e-04, 1.4112e-04, -3.5030e-04, 1.0590e-04,\n 3.0897e-04, -1.1636e-04, 5.6052e-45, -4.1456e-05, 2.5852e-04,\n 1.0127e-04, 5.6052e-45, 3.5372e-04, -1.2518e-04, -3.3297e-06,\n 6.3680e-05, 1.1699e-04, 4.8717e-06, -2.5123e-04, 5.6052e-45,\n 2.2528e-04, 1.2008e-04, 2.0113e-04, 3.2611e-04, 3.2284e-04,\n 5.6052e-45, -3.7101e-04, 9.4512e-05, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -4.6420e-04, 1.6997e-04, 3.6827e-04, 3.9849e-05,\n 5.9923e-04, 1.5421e-21, 3.7751e-04, 1.6345e-04, -1.9160e-04,\n 3.8697e-04, -3.2425e-04, 4.3412e-04, 1.4926e-04, -3.1496e-04,\n -5.6968e-05, 5.6052e-45, 2.0578e-04, 4.2898e-05, -8.0851e-04,\n 6.2204e-05, 5.6052e-45, 4.3796e-05, -4.1615e-04, 5.6052e-45,\n -4.5803e-05, -2.2870e-04, 1.8876e-04, -3.7879e-05, 1.6693e-04,\n 5.6052e-45, 3.1011e-04, -3.5226e-04, 1.9125e-04, -4.5528e-04,\n 3.3298e-04, 2.6043e-05, 3.3668e-04, -1.6687e-05, 1.0982e-04,\n 3.6732e-05, 3.1888e-05, -1.2684e-04, 1.2136e-04, 3.4212e-04,\n 2.4835e-05, 2.8892e-05, 5.6052e-45, 5.6052e-45, -4.5761e-04,\n -3.0225e-05, 5.6052e-45, 1.7655e-04, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -6.7725e-05, 4.4870e-05, -3.3499e-04, 3.0295e-04,\n -1.0064e-04, -6.0990e-04, -1.6929e-05, -5.2433e-05, 5.6052e-45,\n 1.4060e-05, 6.6755e-05, 1.5395e-04, 1.8254e-04, 3.7889e-05,\n 5.6052e-45, -2.2487e-04, 4.1612e-23, 2.6335e-04, 1.9337e-04,\n -1.8143e-04, 2.7297e-04, -8.1093e-05, 5.6052e-45, 8.3789e-06,\n 7.0090e-05, 5.6052e-45, -1.7692e-04, -3.5033e-04, -8.0288e-05,\n 5.6052e-45, 3.1009e-06, 2.2159e-04, -2.8624e-05, 5.6052e-45,\n 6.3633e-05, 5.6052e-45, 6.3769e-04, 4.8598e-25, 5.6052e-45,\n 2.2497e-06, -1.1756e-04, 1.0464e-04, -2.0601e-04, 5.6052e-45,\n 1.7059e-04, -9.6046e-05, 6.8870e-04, 5.6052e-45, 7.5122e-05,\n -1.4024e-04, -1.2713e-05, 3.1846e-04, -1.2416e-12, -1.7915e-05,\n 1.4724e-04, 2.2380e-04, 1.3025e-04, -1.9914e-04, 8.7290e-05,\n -2.3891e-04, 7.4651e-05, -4.5703e-04, -5.7909e-05, 1.4514e-04,\n 2.2249e-04, 5.6052e-45, -1.5932e-04, -2.5229e-04, 1.1854e-04,\n -7.0285e-04, 1.9647e-04, 9.0215e-35, 3.2940e-05, 1.1111e-04,\n 5.6052e-45, 3.0454e-04, 1.6058e-04, 5.6052e-45, 5.6052e-45,\n 5.0756e-05, -5.1008e-05, 1.9955e-04, -5.3164e-04, 2.7289e-04,\n 2.8628e-04, 5.6052e-45, -4.3485e-04, 2.3351e-04, -1.8463e-04,\n 5.6052e-45, 1.9326e-04, 1.9760e-04, 2.6806e-04, -1.5206e-05,\n 4.0339e-04, -3.9303e-06, -3.8583e-04, -2.1527e-05, 5.6052e-45,\n -1.5633e-06, 9.3734e-04, 9.8354e-05, -7.4808e-05, -8.8124e-05,\n -1.9487e-04, 5.0391e-04, -1.7606e-04, -3.9472e-04, 2.0575e-05,\n 1.2178e-04, -8.6517e-06, -4.2139e-05, -2.6594e-04, 1.3625e-04,\n 4.1786e-04, -1.2493e-04, -5.1758e-05, -2.4076e-04, -2.8636e-40,\n 8.9313e-05, 6.9209e-28, 1.2837e-04, 3.7009e-04, -8.3461e-05,\n 3.6473e-05, -4.3587e-05, 2.3058e-04, 4.5373e-04, 3.5758e-04,\n 1.7000e-04, 6.2728e-05, -1.3079e-05, 5.6052e-45, 5.2504e-04,\n 4.4847e-06, 9.9472e-05, -3.7209e-05, -2.7041e-04, 1.8380e-19,\n 1.6635e-04, 2.0502e-09, -1.4716e-05, -5.8145e-04, 2.2644e-05,\n -5.5505e-04, 1.1688e-09, -1.3748e-04, -3.0229e-04, 2.2614e-05,\n 1.0670e-32, -1.5175e-04, -3.2183e-05, 2.0335e-04, -8.1282e-05,\n 1.0757e-04, 3.0941e-04, 3.9421e-05, -1.0389e-05, 1.6291e-04,\n -2.0117e-05, 9.8414e-05, 3.7412e-04, -3.7479e-04, 5.6052e-45,\n -2.0314e-05, 5.6052e-45, 3.3953e-04, 2.2705e-04, 5.6052e-45,\n 5.6052e-45, -1.2338e-04, 5.6052e-45, 1.7722e-04, -8.9984e-05,\n -5.0547e-05, 9.5133e-05, -7.7506e-05, 1.1054e-04, 1.2774e-04,\n -1.0903e-05, 2.5311e-04, -1.8688e-04, 7.9231e-05, -1.3752e-04,\n -3.0550e-04, -1.5833e-04, 1.7080e-04, -4.7825e-04, -4.0088e-05,\n 2.3868e-04, -1.9145e-05, 5.6052e-45, -1.5035e-04, -1.0417e-03,\n 5.6052e-45, -1.1905e-04, 9.8826e-05, -2.5038e-04, 1.5360e-04,\n 1.2196e-04, -3.0988e-05, -1.6558e-05, 1.1935e-05, 5.2082e-05,\n 2.2815e-04, -1.6687e-04, -2.3787e-04, -1.0017e-04, -3.3969e-05,\n 2.9853e-04, -2.4387e-04, 1.2913e-04, -4.4062e-05, 2.2247e-04,\n -1.5067e-04, 1.2905e-04, 3.2292e-05, -1.6673e-04, -1.2241e-04,\n -1.1951e-04, 5.0698e-05, 2.2200e-04, 3.4545e-04, -2.1880e-04,\n -6.0628e-05, -9.2778e-05, 8.8988e-05, 5.2636e-04, -2.5012e-05,\n 1.2512e-04, -6.1142e-05, 2.1339e-24, 1.2339e-04, 3.6831e-04,\n 1.9932e-04, 5.6052e-45, -1.1274e-04, 2.0657e-04, -2.5419e-04,\n -2.9336e-05, 5.8524e-05, -2.1158e-04, -9.4561e-06, -1.7319e-04,\n 8.6049e-05, 8.5408e-05, -5.3011e-04, 1.9542e-04, 1.3091e-04,\n -2.3177e-07, -2.5632e-04, 1.5890e-04, 3.5668e-05, -3.2007e-05,\n -2.7076e-04, -2.5575e-04, -3.5435e-05, 1.6268e-04, -9.5400e-04,\n 6.6818e-05, -3.2715e-04, -2.2971e-04, 1.2236e-04, 5.0591e-27,\n 5.8506e-06, 1.3446e-04, 1.0843e-04, 5.6052e-45, 5.6052e-45,\n -4.9004e-05, 2.3012e-04, 5.6052e-45, 2.4289e-05, 3.5897e-05,\n -2.1407e-04, -8.9151e-06, 2.2384e-05, 1.0379e-04, -3.9616e-05,\n -1.0418e-04, 5.6052e-45, 1.0079e-04, -1.8703e-04, 5.6052e-45,\n 1.0396e-04, -2.6651e-04, 5.6052e-45, 4.7204e-04, 5.1480e-04,\n 4.9547e-04, -2.6297e-04, 2.5640e-04, -1.7451e-04, -1.9797e-04,\n 3.9822e-04, 1.3942e-04, 4.5302e-05, -1.0093e-04, -4.7685e-04,\n 5.6052e-45, -3.6551e-04, 4.1613e-04, 6.7213e-05, -8.3239e-05,\n 3.3713e-04, -5.3163e-04, 5.6052e-45, 2.7877e-04, -2.7160e-04,\n 1.6746e-04, 3.3642e-05, -1.8033e-04, -1.1382e-04, 5.6052e-45,\n -3.3521e-05, -2.5742e-04, 8.9675e-05, -9.7464e-06, -3.9040e-04,\n -4.6995e-05, 1.7879e-04, 1.3912e-04, 2.1982e-05, 3.0467e-04,\n 7.3955e-05, 5.6052e-45, 2.5123e-05, -5.6503e-04, 1.4859e-04,\n 5.6052e-45, 5.6052e-45, 1.4005e-04, -7.1421e-06, -5.3263e-04,\n -2.2463e-04, -4.7179e-05, 4.7563e-06, -7.1312e-04, 3.5125e-04,\n 3.0665e-04, 1.6435e-04, -3.2374e-04, 5.5769e-05, -1.6420e-04,\n 5.6052e-45, -6.6615e-06, 5.6052e-45, -9.4923e-06, -3.8099e-04,\n -2.6503e-04, -9.5019e-06, 1.2160e-04, 3.3106e-05, 5.6052e-45,\n -3.9939e-05, -1.6835e-04, 1.6076e-04, -3.7950e-05, 5.6052e-45,\n 5.6514e-06, 1.8643e-04, -2.4532e-04, 1.0515e-04, -2.1638e-04,\n -6.4213e-05, -9.2824e-05, 7.2637e-05, -6.4998e-04, 5.6052e-45,\n -2.2803e-04, -4.9723e-05, 5.6052e-45, -8.6547e-05, -4.2790e-04,\n -3.9141e-04, 5.8049e-05, -1.4955e-04, 1.1721e-04, 5.6052e-45,\n 5.6052e-45, 5.1655e-04, 5.6052e-45, -4.6748e-05, -4.5198e-05,\n 1.8874e-04, 1.5556e-04, -1.2025e-04, 1.3228e-04, 5.6052e-45,\n 5.6052e-45, -1.6797e-04, 2.7365e-04, 5.6052e-45, 1.3581e-04,\n 5.6052e-45, 3.2494e-04, -3.9271e-04, 2.7489e-04, 5.6052e-45,\n 1.2019e-04, 2.4058e-04], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([7.1166e-07, 2.0085e-06, 1.4297e-06, 2.5557e-07, 1.4950e-06, 9.1943e-07,\n 2.5708e-06, 1.0848e-06, 2.0812e-06, 6.2948e-07, 1.8849e-06, 2.5206e-06,\n 1.4074e-06, 4.4898e-09, 3.2528e-07, 2.0862e-06, 1.8433e-06, 4.3333e-15,\n 8.3380e-07, 6.0256e-08, 1.7468e-06, 9.1652e-11, 4.2293e-07, 1.3261e-06,\n 1.0510e-06, 1.8436e-06, 2.5452e-06, 1.6306e-06, 1.4328e-06, 1.3155e-06,\n 1.0946e-06, 9.1376e-07, 4.4701e-08, 1.6745e-06, 1.8178e-06, 7.4927e-07,\n 2.7067e-07, 1.0086e-06, 1.1372e-06, 3.2466e-06, 1.0396e-06, 1.0522e-06,\n 5.0958e-07, 1.3801e-06, 9.7587e-07, 1.1779e-06, 2.2137e-06, 1.0752e-06,\n 1.7142e-06, 1.4029e-06, 2.5576e-08, 1.3490e-06, 1.1940e-06, 5.4101e-07,\n 4.6369e-07, 1.6493e-08, 1.8497e-06, 1.6272e-06, 1.8391e-06, 7.5809e-07,\n 1.5780e-06, 5.6198e-08, 1.2366e-06, 1.0663e-06, 5.1982e-07, 7.3390e-07,\n 3.4040e-07, 2.2690e-06, 2.1665e-06, 1.0643e-06, 1.1867e-06, 7.4119e-08,\n 9.6215e-07, 1.7406e-06, 1.2422e-06, 1.1427e-06, 2.2437e-07, 1.2706e-06,\n 2.0729e-06, 5.3983e-11, 1.7239e-06, 1.8339e-06, 1.7296e-06, 1.4715e-07,\n 1.5425e-06, 1.4929e-11, 1.5650e-06, 7.6169e-07, 1.8172e-06, 1.7350e-06,\n 1.0851e-06, 5.2103e-07, 8.7589e-07, 4.2973e-07, 5.8462e-07, 1.0838e-06,\n 1.2771e-07, 1.8736e-06, 1.1526e-06, 3.2898e-06, 9.8297e-07, 1.2364e-06,\n 1.5037e-07, 1.6029e-07, 2.5416e-06, 1.0752e-06, 3.1255e-08, 7.8593e-07,\n 4.1312e-07, 3.0199e-10, 7.3764e-07, 1.1278e-06, 1.2756e-06, 1.1742e-06,\n 1.3534e-06, 7.9588e-07, 1.5788e-06, 6.3175e-07, 1.9693e-06, 1.7838e-06,\n 2.3920e-06, 2.2884e-06, 1.5358e-06, 7.0340e-07, 1.8726e-06, 3.9172e-07,\n 1.1157e-06, 4.2023e-08, 1.4368e-06, 2.2049e-06, 2.7943e-06, 2.4149e-07,\n 5.3853e-07, 1.1988e-06, 5.1937e-07, 1.0345e-06, 1.1854e-07, 1.1355e-06,\n 8.6342e-07, 1.1853e-06, 6.4071e-07, 1.8691e-06, 9.4858e-07, 1.0769e-06,\n 7.3924e-09, 3.2984e-07, 3.4086e-06, 1.6247e-06, 6.5536e-07, 4.5631e-10,\n 5.8938e-07, 8.2828e-07, 1.2383e-06, 1.4693e-06, 5.0221e-08, 1.6428e-06,\n 2.7747e-07, 1.2499e-06, 2.5220e-15, 1.1022e-06, 7.7664e-07, 1.2770e-06,\n 1.2950e-06, 1.8126e-06, 1.1288e-06, 7.2512e-07, 2.3306e-06, 1.3474e-06,\n 6.1632e-07, 8.1039e-07, 1.9222e-06, 1.4068e-06, 2.0725e-06, 1.3094e-06,\n 1.8151e-06, 1.9301e-06, 5.3819e-09, 9.4427e-07, 6.7095e-07, 1.7528e-07,\n 1.9116e-06, 8.7941e-07, 6.0159e-07, 1.0896e-06, 1.4606e-06, 3.6601e-08,\n 4.4980e-07, 6.3285e-07, 4.6710e-07, 1.5284e-06, 4.6611e-07, 1.1505e-06,\n 9.7264e-07, 2.0148e-06, 1.8362e-06, 9.0585e-07, 1.2882e-07, 2.8926e-07,\n 1.2580e-06, 8.5284e-07, 8.4180e-08, 1.3851e-06, 3.9803e-07, 1.0196e-06,\n 1.4634e-06, 1.7785e-06, 2.8627e-07, 1.7253e-06, 1.6363e-06, 3.8119e-08,\n 2.1901e-06, 1.6520e-06, 1.7286e-06, 1.0856e-06, 1.9994e-06, 1.0426e-06,\n 1.9450e-06, 8.2700e-07, 1.5945e-06, 5.7764e-07, 7.5959e-07, 7.0300e-07,\n 7.4413e-07, 9.0518e-07, 1.0104e-06, 1.1685e-06, 1.2085e-06, 1.2334e-06,\n 1.7703e-06, 4.5744e-07, 6.1857e-07, 4.6954e-09, 1.0541e-06, 1.7773e-06,\n 1.2361e-06, 3.0192e-07, 1.3870e-06, 1.7686e-06, 1.0732e-06, 9.9854e-07,\n 1.6635e-06, 2.3666e-06, 9.4896e-07, 3.1492e-07, 1.2609e-06, 3.0879e-06,\n 9.7464e-07, 2.4648e-07, 6.6621e-07, 7.1970e-07, 1.3288e-06, 5.3477e-08,\n 1.4991e-08, 2.1227e-06, 9.3769e-08, 1.3227e-06, 8.4576e-07, 2.1177e-06,\n 1.5563e-06, 1.3665e-06, 1.2561e-06, 1.7883e-06, 1.0342e-06, 9.5909e-07,\n 9.1177e-07, 6.9023e-07, 1.7806e-06, 2.4417e-06, 1.9733e-07, 1.1633e-06,\n 1.8234e-06, 1.0850e-06, 2.1415e-06, 9.4484e-07, 1.4268e-10, 5.4649e-07,\n 6.7251e-07, 2.0862e-06, 7.7599e-07, 7.1615e-10, 8.9997e-07, 5.2031e-07,\n 1.8565e-06, 1.5096e-06, 1.0982e-06, 1.5103e-06, 1.1874e-06, 1.6380e-06,\n 1.3335e-06, 1.2090e-06, 1.5070e-06, 2.6086e-06, 1.3447e-06, 1.1516e-06,\n 1.3241e-06, 9.7643e-07, 4.4349e-07, 1.0083e-06, 1.1521e-06, 1.6093e-06,\n 9.7231e-07, 6.8416e-07, 7.7083e-08, 6.3253e-07, 1.0230e-06, 2.5447e-07,\n 1.1960e-06, 1.2505e-06, 1.5912e-06, 1.4537e-06, 5.2729e-07, 1.8852e-06,\n 8.6153e-07, 5.3792e-07, 1.9343e-06, 8.3606e-07, 1.6692e-06, 2.3435e-06,\n 1.1785e-07, 9.1581e-07, 1.2233e-06, 2.1900e-06, 1.8152e-06, 2.4246e-06,\n 7.8957e-07, 2.7383e-06, 1.1076e-07, 9.8419e-07, 1.6841e-06, 8.5930e-07,\n 1.3693e-06, 7.4561e-07, 6.0615e-07, 1.8833e-06, 1.2918e-06, 1.4603e-06,\n 1.6949e-06, 2.0586e-06, 1.4947e-06, 5.9190e-07, 1.2080e-06, 1.1269e-06,\n 2.8451e-07, 9.9222e-07, 1.5359e-06, 1.3216e-06, 2.1104e-09, 9.9202e-07,\n 1.0193e-06, 9.0340e-07, 3.5068e-07, 1.1622e-06, 1.2046e-06, 9.9508e-07,\n 1.0631e-06, 1.3799e-06, 9.4450e-07, 1.5884e-06, 1.9462e-06, 8.2739e-07,\n 4.9031e-07, 1.4382e-06, 1.5026e-06, 8.4915e-07, 2.1233e-06, 1.4964e-06,\n 9.4371e-07, 7.7710e-07, 5.4402e-07, 1.5389e-06, 5.4319e-07, 1.3392e-06,\n 1.1852e-06, 1.9022e-06, 1.6600e-07, 5.4061e-07, 4.5401e-07, 2.4802e-06,\n 8.0788e-07, 3.4486e-07, 1.6800e-06, 6.6655e-08, 5.5090e-10, 1.3828e-06,\n 1.3611e-06, 1.1020e-06, 6.7482e-07, 1.0114e-06, 2.1796e-06, 1.6190e-06,\n 6.8439e-07, 2.0722e-06, 7.3926e-07, 1.6480e-06, 1.7707e-06, 1.2785e-06,\n 1.2330e-06, 2.6703e-07, 1.5777e-06, 7.2053e-07, 1.8453e-06, 1.2045e-06,\n 1.6436e-06, 1.2673e-06, 1.6459e-06, 1.5832e-06, 2.6645e-06, 1.0088e-06,\n 8.7596e-07, 2.1007e-06, 3.8836e-07, 2.3339e-06, 1.5553e-06, 1.9864e-06,\n 7.0166e-07, 8.9852e-07, 2.2356e-06, 5.3147e-07, 1.0620e-06, 2.8970e-06,\n 1.7015e-06, 8.7673e-07, 3.7629e-07, 2.0995e-06, 9.1798e-10, 2.2186e-06,\n 1.7622e-06, 1.1165e-06, 2.4626e-06, 9.7958e-07, 6.9640e-07, 5.3213e-07,\n 4.6206e-07, 2.3416e-06, 1.6992e-06, 9.5378e-07, 8.1002e-07, 1.0446e-06,\n 1.2994e-06, 2.2497e-06, 2.2979e-07, 6.2631e-07, 5.8383e-07, 8.4185e-07,\n 1.6233e-06, 1.3761e-06, 3.5558e-08, 1.1350e-06, 1.8264e-06, 2.2019e-06,\n 9.5332e-07, 2.2350e-06, 2.2476e-06, 1.1269e-06, 8.8035e-07, 1.2565e-06,\n 1.1257e-06, 1.1554e-07, 1.3915e-06, 6.0398e-07, 7.1402e-07, 1.6731e-06,\n 1.1987e-07, 1.4866e-06, 1.2055e-06, 9.3428e-07, 4.2002e-07, 1.7014e-06,\n 1.5406e-06, 1.9308e-07, 1.6368e-06, 1.1496e-06, 1.5440e-06, 8.3728e-07,\n 1.7608e-06, 8.9899e-07, 5.6004e-07, 6.4384e-07, 1.3867e-06, 2.1003e-09,\n 1.1940e-06, 1.0293e-06, 1.6908e-06, 4.1159e-07, 2.6406e-06, 1.7412e-06,\n 1.1276e-06, 1.5070e-06, 7.1512e-07, 5.3756e-07, 2.5277e-07, 2.1766e-06,\n 2.8318e-09, 7.9215e-07, 1.7217e-06, 1.5493e-06, 1.6578e-06, 1.6262e-06,\n 8.0839e-07, 8.0432e-07, 4.9314e-07, 3.7485e-07, 1.7146e-06, 5.4418e-08,\n 1.4085e-06, 1.3242e-06, 9.6938e-07, 1.9862e-06, 1.0173e-06, 2.3300e-07,\n 6.4794e-07, 3.7468e-07], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(5006.)",
|
| 27 |
+
"exp_avg": "tensor([[-1.0191e-05, -2.3862e-05, -2.0066e-05, ..., -5.6052e-45,\n -1.2449e-05, 4.3192e-05],\n [ 2.3125e-05, 1.7456e-05, -2.5477e-05, ..., -5.6052e-45,\n -7.9273e-06, -1.5544e-05],\n [-2.8539e-07, -7.6041e-05, -1.5765e-05, ..., 5.6052e-45,\n 5.6248e-06, 6.7996e-07],\n ...,\n [-1.5983e-05, -2.6525e-05, -4.6440e-05, ..., -5.6052e-45,\n -9.1668e-06, -6.2310e-07],\n [-1.7159e-06, 9.0749e-06, -6.0235e-05, ..., 5.6052e-45,\n -8.6626e-06, -1.4002e-05],\n [ 3.5457e-06, 2.2865e-05, 3.7342e-07, ..., 5.6052e-45,\n 2.3803e-06, 3.0554e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[5.4992e-09, 2.6154e-08, 3.0295e-08, ..., 5.0509e-12, 5.1191e-10,\n 6.0166e-09],\n [7.4403e-09, 5.2875e-08, 4.0281e-08, ..., 4.7158e-11, 1.5623e-09,\n 1.8193e-08],\n [4.8063e-09, 6.7197e-08, 3.2656e-08, ..., 4.7716e-12, 8.4714e-10,\n 5.1334e-09],\n ...,\n [7.5777e-09, 6.2487e-08, 4.8800e-08, ..., 6.5247e-11, 1.5783e-09,\n 7.6164e-09],\n [7.4855e-09, 6.4401e-08, 4.4227e-08, ..., 6.8465e-11, 1.7110e-09,\n 5.4274e-09],\n [6.9514e-09, 8.0998e-08, 4.3390e-08, ..., 2.5503e-12, 1.4980e-09,\n 2.7034e-08]], device='cuda:0')"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"param_groups": [
|
| 32 |
+
{
|
| 33 |
+
"lr": 0.00904518046337755,
|
| 34 |
+
"name": "shared",
|
| 35 |
+
"betas": [
|
| 36 |
+
0.9,
|
| 37 |
+
0.999
|
| 38 |
+
],
|
| 39 |
+
"eps": 1e-08,
|
| 40 |
+
"weight_decay": 1e-05,
|
| 41 |
+
"amsgrad": false,
|
| 42 |
+
"maximize": false,
|
| 43 |
+
"foreach": null,
|
| 44 |
+
"capturable": false,
|
| 45 |
+
"differentiable": false,
|
| 46 |
+
"fused": null,
|
| 47 |
+
"decoupled_weight_decay": true,
|
| 48 |
+
"initial_lr": 0.01,
|
| 49 |
+
"params": [
|
| 50 |
+
0,
|
| 51 |
+
1
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"lr": 0.00904518046337755,
|
| 56 |
+
"name": "scale_256",
|
| 57 |
+
"betas": [
|
| 58 |
+
0.9,
|
| 59 |
+
0.999
|
| 60 |
+
],
|
| 61 |
+
"eps": 1e-08,
|
| 62 |
+
"weight_decay": 1e-05,
|
| 63 |
+
"amsgrad": false,
|
| 64 |
+
"maximize": false,
|
| 65 |
+
"foreach": null,
|
| 66 |
+
"capturable": false,
|
| 67 |
+
"differentiable": false,
|
| 68 |
+
"fused": null,
|
| 69 |
+
"decoupled_weight_decay": true,
|
| 70 |
+
"initial_lr": 0.01,
|
| 71 |
+
"params": [
|
| 72 |
+
2,
|
| 73 |
+
3,
|
| 74 |
+
4
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"lr": 0.00904518046337755,
|
| 79 |
+
"name": "scale_512",
|
| 80 |
+
"betas": [
|
| 81 |
+
0.9,
|
| 82 |
+
0.999
|
| 83 |
+
],
|
| 84 |
+
"eps": 1e-08,
|
| 85 |
+
"weight_decay": 1e-05,
|
| 86 |
+
"amsgrad": false,
|
| 87 |
+
"maximize": false,
|
| 88 |
+
"foreach": null,
|
| 89 |
+
"capturable": false,
|
| 90 |
+
"differentiable": false,
|
| 91 |
+
"fused": null,
|
| 92 |
+
"decoupled_weight_decay": true,
|
| 93 |
+
"initial_lr": 0.01,
|
| 94 |
+
"params": [
|
| 95 |
+
5,
|
| 96 |
+
6,
|
| 97 |
+
7
|
| 98 |
+
]
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"lr": 0.00904518046337755,
|
| 102 |
+
"name": "scale_768",
|
| 103 |
+
"betas": [
|
| 104 |
+
0.9,
|
| 105 |
+
0.999
|
| 106 |
+
],
|
| 107 |
+
"eps": 1e-08,
|
| 108 |
+
"weight_decay": 1e-05,
|
| 109 |
+
"amsgrad": false,
|
| 110 |
+
"maximize": false,
|
| 111 |
+
"foreach": null,
|
| 112 |
+
"capturable": false,
|
| 113 |
+
"differentiable": false,
|
| 114 |
+
"fused": null,
|
| 115 |
+
"decoupled_weight_decay": true,
|
| 116 |
+
"initial_lr": 0.01,
|
| 117 |
+
"params": [
|
| 118 |
+
8,
|
| 119 |
+
9,
|
| 120 |
+
10
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"lr": 0.00904518046337755,
|
| 125 |
+
"name": "scale_1024",
|
| 126 |
+
"betas": [
|
| 127 |
+
0.9,
|
| 128 |
+
0.999
|
| 129 |
+
],
|
| 130 |
+
"eps": 1e-08,
|
| 131 |
+
"weight_decay": 1e-05,
|
| 132 |
+
"amsgrad": false,
|
| 133 |
+
"maximize": false,
|
| 134 |
+
"foreach": null,
|
| 135 |
+
"capturable": false,
|
| 136 |
+
"differentiable": false,
|
| 137 |
+
"fused": null,
|
| 138 |
+
"decoupled_weight_decay": true,
|
| 139 |
+
"initial_lr": 0.01,
|
| 140 |
+
"params": [
|
| 141 |
+
11,
|
| 142 |
+
12,
|
| 143 |
+
13
|
| 144 |
+
]
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"lr": 0.004522637977440181,
|
| 148 |
+
"name": "fusion",
|
| 149 |
+
"betas": [
|
| 150 |
+
0.9,
|
| 151 |
+
0.999
|
| 152 |
+
],
|
| 153 |
+
"eps": 1e-08,
|
| 154 |
+
"weight_decay": 1e-05,
|
| 155 |
+
"amsgrad": false,
|
| 156 |
+
"maximize": false,
|
| 157 |
+
"foreach": null,
|
| 158 |
+
"capturable": false,
|
| 159 |
+
"differentiable": false,
|
| 160 |
+
"fused": null,
|
| 161 |
+
"decoupled_weight_decay": true,
|
| 162 |
+
"initial_lr": 0.005,
|
| 163 |
+
"params": [
|
| 164 |
+
14,
|
| 165 |
+
15,
|
| 166 |
+
16,
|
| 167 |
+
17,
|
| 168 |
+
18,
|
| 169 |
+
19,
|
| 170 |
+
20,
|
| 171 |
+
21,
|
| 172 |
+
22,
|
| 173 |
+
23,
|
| 174 |
+
24,
|
| 175 |
+
25,
|
| 176 |
+
26,
|
| 177 |
+
27,
|
| 178 |
+
28,
|
| 179 |
+
29,
|
| 180 |
+
30,
|
| 181 |
+
31,
|
| 182 |
+
32,
|
| 183 |
+
33,
|
| 184 |
+
34,
|
| 185 |
+
35,
|
| 186 |
+
36,
|
| 187 |
+
37,
|
| 188 |
+
38,
|
| 189 |
+
39,
|
| 190 |
+
40,
|
| 191 |
+
41,
|
| 192 |
+
42,
|
| 193 |
+
43,
|
| 194 |
+
44,
|
| 195 |
+
45,
|
| 196 |
+
46,
|
| 197 |
+
47,
|
| 198 |
+
48,
|
| 199 |
+
49,
|
| 200 |
+
50,
|
| 201 |
+
51,
|
| 202 |
+
52,
|
| 203 |
+
53,
|
| 204 |
+
54,
|
| 205 |
+
55,
|
| 206 |
+
56,
|
| 207 |
+
57,
|
| 208 |
+
58,
|
| 209 |
+
59,
|
| 210 |
+
60,
|
| 211 |
+
61,
|
| 212 |
+
62,
|
| 213 |
+
63,
|
| 214 |
+
64
|
| 215 |
+
]
|
| 216 |
+
}
|
| 217 |
+
]
|
| 218 |
+
},
|
| 219 |
+
"scheduler_state_dict": {
|
| 220 |
+
"T_0": 10,
|
| 221 |
+
"T_i": 10,
|
| 222 |
+
"T_mult": 2,
|
| 223 |
+
"eta_min": 1e-06,
|
| 224 |
+
"T_cur": 2,
|
| 225 |
+
"base_lrs": [
|
| 226 |
+
0.01,
|
| 227 |
+
0.01,
|
| 228 |
+
0.01,
|
| 229 |
+
0.01,
|
| 230 |
+
0.01,
|
| 231 |
+
0.005
|
| 232 |
+
],
|
| 233 |
+
"last_epoch": 2,
|
| 234 |
+
"_step_count": 0,
|
| 235 |
+
"_is_initial": false,
|
| 236 |
+
"_get_lr_called_within_step": false,
|
| 237 |
+
"_last_lr": [
|
| 238 |
+
0.00904518046337755,
|
| 239 |
+
0.00904518046337755,
|
| 240 |
+
0.00904518046337755,
|
| 241 |
+
0.00904518046337755,
|
| 242 |
+
0.00904518046337755,
|
| 243 |
+
0.004522637977440181
|
| 244 |
+
]
|
| 245 |
+
},
|
| 246 |
+
"metrics": {
|
| 247 |
+
"best_val_acc": 68.889,
|
| 248 |
+
"best_epoch": 1,
|
| 249 |
+
"scale_accuracies": {
|
| 250 |
+
"256": 68.889
|
| 251 |
+
},
|
| 252 |
+
"training_history": {
|
| 253 |
+
"epochs": [
|
| 254 |
+
1,
|
| 255 |
+
2
|
| 256 |
+
],
|
| 257 |
+
"train_loss": [
|
| 258 |
+
3.4310503170769358,
|
| 259 |
+
2.1717353230040883
|
| 260 |
+
],
|
| 261 |
+
"train_acc": [
|
| 262 |
+
54.52540535308824,
|
| 263 |
+
69.02296890257085
|
| 264 |
+
],
|
| 265 |
+
"val_acc": [
|
| 266 |
+
66.689,
|
| 267 |
+
68.889
|
| 268 |
+
],
|
| 269 |
+
"scale_accs": {
|
| 270 |
+
"256": [
|
| 271 |
+
66.689,
|
| 272 |
+
68.889
|
| 273 |
+
]
|
| 274 |
+
},
|
| 275 |
+
"lr": [
|
| 276 |
+
0.00975530705321762,
|
| 277 |
+
0.00904518046337755
|
| 278 |
+
]
|
| 279 |
+
}
|
| 280 |
+
},
|
| 281 |
+
"train_config": {
|
| 282 |
+
"name": "david_training",
|
| 283 |
+
"run_id": "20251012_191456",
|
| 284 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 285 |
+
"model_variant": [
|
| 286 |
+
"clip_vit_b32",
|
| 287 |
+
"clip_vit_laion_b32"
|
| 288 |
+
],
|
| 289 |
+
"num_classes": 1000,
|
| 290 |
+
"preset": "balanced",
|
| 291 |
+
"custom_config_path": null,
|
| 292 |
+
"num_classes_override": null,
|
| 293 |
+
"use_belly_override": null,
|
| 294 |
+
"belly_expand_override": null,
|
| 295 |
+
"progressive_training_override": true,
|
| 296 |
+
"scale_warmup_epochs_override": {
|
| 297 |
+
"256": 0,
|
| 298 |
+
"512": 2,
|
| 299 |
+
"768": 5,
|
| 300 |
+
"1024": 8
|
| 301 |
+
},
|
| 302 |
+
"num_epochs": 10,
|
| 303 |
+
"batch_size": 1024,
|
| 304 |
+
"learning_rate": 0.01,
|
| 305 |
+
"weight_decay": 1e-05,
|
| 306 |
+
"warmup_epochs": 3,
|
| 307 |
+
"use_rose_loss": true,
|
| 308 |
+
"rose_initial_weight": 0.2,
|
| 309 |
+
"rose_max_weight": 0.8,
|
| 310 |
+
"rose_weight_schedule": "adaptive",
|
| 311 |
+
"use_cayley_loss": true,
|
| 312 |
+
"cayley_weight": 0.01,
|
| 313 |
+
"scale_loss_balance": null,
|
| 314 |
+
"use_mixed_precision": false,
|
| 315 |
+
"gradient_clip": 10.0,
|
| 316 |
+
"scheduler_type": "cosine_restarts",
|
| 317 |
+
"min_lr": 1e-06,
|
| 318 |
+
"freeze_strategy": "never",
|
| 319 |
+
"freeze_threshold": 90.0,
|
| 320 |
+
"unfreeze_on_plateau": true,
|
| 321 |
+
"patience": 10,
|
| 322 |
+
"track_gradients": true,
|
| 323 |
+
"gradient_scale_threshold": 1e-05,
|
| 324 |
+
"gradient_scale_multiplier": 10.0,
|
| 325 |
+
"log_interval": 50,
|
| 326 |
+
"val_interval": 1,
|
| 327 |
+
"save_interval": 5,
|
| 328 |
+
"log_fusion_weights": true,
|
| 329 |
+
"log_loss_components": true,
|
| 330 |
+
"save_format": "safetensors",
|
| 331 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 332 |
+
"upload_to_hub": true,
|
| 333 |
+
"base_dir": "./david_training",
|
| 334 |
+
"num_workers": 10,
|
| 335 |
+
"pin_memory": true,
|
| 336 |
+
"prefetch_factor": 4,
|
| 337 |
+
"persistent_workers": true
|
| 338 |
+
}
|
| 339 |
+
}
|