Update best_model_acc66.69_metadata.json - Run 20251012_191456
Browse files
weights/David-partial_shared-hierarchical_tree/20251012_191456/best_model_acc66.69_metadata.json
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 0,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(2503.)",
|
| 7 |
+
"exp_avg": "tensor([[-7.9923e-06, 8.1822e-05, 1.4995e-05, ..., -4.7118e-05,\n 2.0594e-05, 1.6409e-05],\n [-2.9694e-05, 2.0150e-05, -8.1958e-06, ..., 1.8414e-05,\n -1.5185e-05, -7.9566e-06],\n [-1.9915e-05, 7.8212e-05, 7.7970e-05, ..., 1.1656e-05,\n -5.1447e-06, 6.6105e-06],\n ...,\n [-6.3423e-05, -1.2289e-05, 4.6927e-05, ..., 8.0601e-06,\n -8.8350e-05, 2.6056e-05],\n [ 6.4603e-05, 4.9392e-05, 2.2317e-05, ..., -4.0054e-05,\n 5.8970e-06, -1.4432e-05],\n [ 1.3055e-05, -3.1709e-06, 4.2902e-05, ..., 3.5429e-05,\n -1.7405e-05, -4.5160e-05]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[1.1449e-08, 8.4523e-08, 1.7553e-08, ..., 5.2554e-08, 8.4301e-09,\n 9.4886e-09],\n [2.8141e-09, 2.7041e-09, 3.0642e-09, ..., 8.0863e-09, 1.4065e-09,\n 1.9903e-09],\n [1.8452e-08, 1.2612e-07, 4.2249e-08, ..., 1.3192e-08, 1.1087e-08,\n 1.2235e-08],\n ...,\n [2.5265e-08, 1.2174e-07, 2.7970e-08, ..., 8.0522e-08, 1.5674e-08,\n 2.0899e-08],\n [2.3545e-08, 8.9706e-08, 4.3532e-08, ..., 3.7666e-08, 1.1704e-08,\n 2.0746e-08],\n [3.1488e-08, 3.1398e-08, 2.0223e-08, ..., 9.5903e-08, 1.7684e-08,\n 2.3249e-08]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(2503.)",
|
| 12 |
+
"exp_avg": "tensor([-2.7397e-04, 4.9872e-04, 1.4055e-04, -5.2997e-04, 1.1590e-03,\n -3.8368e-04, -1.8709e-04, 4.2653e-04, 1.2722e-03, -5.6705e-04,\n -4.1328e-04, -3.0537e-04, -5.8794e-04, -1.4259e-03, -1.2735e-03,\n -1.2047e-04, 3.6669e-04, -3.8176e-05, -1.5573e-03, -4.2052e-05,\n 1.8901e-03, 2.9344e-05, -1.1576e-03, -9.9120e-04, -2.2957e-03,\n 3.1708e-03, -1.1918e-03, -9.2778e-05, -2.8566e-04, -9.5876e-05,\n 7.2994e-04, -3.6246e-04, 1.0611e-06, 1.8346e-03, -8.0173e-04,\n -6.1536e-04, 9.8982e-04, -1.9392e-03, 5.2304e-04, -3.1672e-04,\n -2.0714e-03, -3.3987e-04, 1.2974e-03, -1.6558e-03, 3.9636e-04,\n 1.4541e-03, 3.6941e-04, 1.7832e-03, -2.4420e-03, -1.1299e-03,\n -3.7569e-04, 8.3327e-04, 1.6418e-03, 2.8452e-04, 1.2817e-04,\n 1.3835e-03, -4.6377e-04, -8.8429e-04, 1.9152e-04, 4.1255e-04,\n -2.4617e-03, -5.8755e-04, -2.4713e-03, -1.8216e-03, 5.6761e-04,\n 1.5461e-03, -8.3481e-04, 3.1386e-04, -1.0235e-03, 1.3344e-04,\n 1.1669e-03, 2.3012e-04, 8.5053e-04, -1.7771e-04, -4.7999e-04,\n -2.0209e-03, -1.9307e-03, -1.2438e-03, 1.3523e-03, 2.8166e-04,\n 3.7614e-03, 2.0112e-03, -9.8823e-04, 1.4678e-03, -1.9142e-03,\n -6.2796e-04, -1.0339e-03, -4.4594e-04, -1.9888e-04, 9.5888e-04,\n -2.6023e-03, -4.7618e-04, 2.4314e-03, -1.8317e-03, -2.0447e-03,\n -1.2566e-03, -1.0118e-03, 1.0525e-03, -2.0819e-03, 1.6004e-03,\n -6.9728e-04, 1.5915e-18, -2.0259e-04, -3.3036e-03, -1.0502e-03,\n 1.5693e-03, -2.3398e-04, 8.0618e-04, -7.6297e-04, 1.5901e-04,\n -8.8177e-04, 7.8197e-04, -6.5566e-04, -1.0760e-03, 6.7568e-04,\n -1.8660e-03, 2.3659e-04, 1.3092e-03, -1.4192e-05, 6.0931e-05,\n -2.0430e-04, -7.5269e-04, 5.2116e-04, -2.2075e-03, -8.4150e-04,\n 1.2081e-03, -9.8338e-04, 5.1654e-04, 1.2325e-27, -2.4151e-05,\n -3.5934e-04, -8.2776e-04, 4.3520e-04, 3.5103e-04, -1.0441e-04,\n 1.1883e-03, 1.8546e-03, -2.3217e-03, 2.2631e-03, 1.6228e-03,\n -7.8825e-04, 1.1220e-03, 1.2664e-04, 4.0765e-04, 1.1490e-08,\n -2.5743e-03, -1.6248e-03, -1.8734e-03, -7.4008e-04, -5.1470e-04,\n -2.2074e-03, -9.4391e-04, 1.7462e-03, 3.5120e-03, 9.7996e-04,\n -1.7665e-03, 2.2076e-03, -9.0782e-11, -9.2035e-05, -6.6064e-04,\n -5.0901e-04, 8.0235e-04, 7.7143e-04, -1.3067e-03, 1.3017e-05,\n -6.7678e-04, 3.9681e-04, -1.1594e-04, -2.7800e-03, 5.2649e-05,\n 3.2996e-04, 5.9510e-04, -1.5633e-03, 1.0842e-03, 1.3109e-03,\n 1.2718e-03, -1.0564e-03, 6.9049e-04, -3.3910e-04, -2.4405e-04,\n -6.8366e-04, -3.1119e-04, 4.4138e-04, 8.0032e-04, -2.4781e-04,\n -1.3493e-03, -8.9461e-04, 3.3923e-04, 3.7696e-03, 2.6393e-05,\n 1.1429e-04, 8.5777e-04, -1.4281e-03, 5.8274e-05, -2.5051e-04,\n -5.6224e-04, 6.1182e-07, 3.1712e-04, 1.6338e-03, 7.4114e-04,\n -1.1137e-03, -7.1886e-05, 1.1669e-03, 4.5515e-04, 1.1904e-03,\n -1.8541e-04, -1.1219e-04, 8.8473e-04, 2.6873e-04, 3.9970e-05,\n 2.8636e-04, 2.2836e-03, 5.9780e-04, -2.2716e-03, -1.4932e-03,\n 3.9962e-04, 2.7283e-03, 2.7247e-03, -3.2626e-04, 1.9748e-04,\n 2.0134e-04, 4.7888e-04, -1.1508e-03, 1.3760e-03, 1.1892e-03,\n -3.3387e-04, 7.8096e-04, 5.1256e-04, -8.0812e-04, 3.4787e-04,\n 7.8888e-04, -2.8998e-04, 8.4602e-06, 6.0819e-04, 1.9865e-03,\n 1.4342e-03, 8.6522e-04, 1.0813e-03, 2.1550e-03, -7.7866e-04,\n 8.3278e-04, 7.7327e-04, 1.2659e-03, -4.0274e-04, -2.9060e-03,\n 1.7176e-03, 2.0665e-03, 1.0942e-03, -8.8371e-04, -2.4042e-03,\n 3.5087e-04, 2.5629e-03, -3.2119e-03, -3.8474e-04, -1.9302e-04,\n 2.7286e-03, 7.3030e-04, -2.8650e-04, 2.5327e-04, -1.0656e-03,\n 1.4366e-03, -2.1309e-03, -1.8947e-05, -2.6470e-03, 9.4414e-04,\n -1.8494e-04, 4.0894e-04, 1.2493e-03, -5.6178e-04, 7.6039e-04,\n 5.6810e-04, 1.3139e-03, -8.6493e-04, 1.3332e-03, 1.0920e-03,\n -8.0039e-04, 1.1863e-03, -2.9767e-04, -8.3965e-04, 2.0126e-06,\n -2.3780e-04, -2.5572e-04, -1.1347e-04, -1.4785e-04, -2.4642e-03,\n -1.5233e-03, -7.1060e-04, 2.5860e-04, 3.4668e-04, 4.6865e-04,\n -9.5036e-04, -6.6675e-04, 4.8465e-04, -6.0799e-04, 5.8983e-04,\n -1.7824e-04, -7.7429e-04, 3.3100e-04, -1.5631e-03, -2.3187e-03,\n -1.6261e-03, 4.9923e-04, -7.3279e-04, 2.0555e-05, 4.0586e-04,\n -2.5493e-03, -3.0006e-04, -5.0317e-04, 1.7176e-04, -4.9827e-04,\n -1.2079e-03, -1.0533e-03, 9.5118e-04, 1.4830e-03, 1.5082e-03,\n 2.3110e-03, 1.0889e-03, 2.6374e-04, -1.5416e-03, 6.8677e-04,\n 1.9080e-04, 7.2776e-04, 1.2382e-04, -6.7332e-04, -2.8223e-03,\n 8.6150e-04, 4.0973e-04, -2.7206e-04, -5.5385e-04, 2.3110e-03,\n 8.1295e-05, -2.4157e-03, -8.0260e-04, 2.2068e-03, -7.7052e-05,\n 1.3356e-03, -1.5968e-03, -1.1953e-04, 2.4669e-04, 1.2874e-03,\n -4.6566e-04, -5.4541e-04, -1.3366e-03, 2.5600e-04, -2.4649e-03,\n -5.9491e-04, -3.4545e-04, 1.4168e-03, 2.4977e-03, -6.8726e-04,\n -6.3073e-05, -7.1581e-04, 4.4461e-04, 1.9470e-10, -9.2506e-04,\n -7.8879e-04, -7.6950e-04, 3.4878e-04, 1.3016e-03, 7.1974e-04,\n 3.7055e-04, -2.1634e-03, -1.4721e-03, -1.4421e-03, 1.3735e-04,\n 7.6526e-04, 4.5716e-04, -1.6555e-04, 1.0200e-03, -9.1071e-05,\n -3.9470e-04, -3.5339e-04, 6.4999e-04, 3.8122e-04, 5.5279e-05,\n -6.2678e-05, 9.7998e-04, 6.9320e-12, -5.7711e-05, 4.6507e-04,\n 7.8871e-04, 1.9152e-03, 1.4921e-03, 2.2900e-03, 5.6052e-45,\n -2.6286e-03, -1.3992e-03, -2.4372e-04, -1.0549e-03, 1.4655e-03,\n 1.2083e-03, 2.4293e-05, 1.2523e-03, 3.0799e-03, 9.5804e-04,\n 1.3408e-03, 5.8852e-04, -2.9059e-04, -8.3456e-04, 1.1084e-03,\n 5.7804e-04, 1.0133e-03, -5.3327e-04, 2.8786e-04, 1.7573e-03,\n 4.6782e-03, 7.9004e-12, -1.9110e-04, 6.4289e-04, 4.3033e-05,\n 3.1434e-03, -1.9546e-03, -8.0125e-04, 2.6687e-03, -5.5255e-04,\n 5.7637e-04, 1.6266e-04, -2.2837e-04, -1.1962e-03, 2.2472e-03,\n 5.3501e-04, 1.1212e-14, 3.4725e-04, 7.1261e-04, 1.3200e-04,\n -8.5970e-05, 2.4065e-08, 7.5355e-04, -1.1130e-03, 1.7753e-04,\n -8.2613e-04, 8.4758e-04, 1.4441e-04, -7.1188e-04, 2.0192e-05,\n 3.5067e-07, -5.2312e-04, -2.1175e-04, -5.8680e-04, -3.0340e-04,\n 7.6739e-04, -1.8309e-03, -5.7869e-04, 8.9784e-04, 2.4418e-03,\n 5.3725e-04, -1.0865e-03, -5.0063e-04, -8.8597e-04, -4.8094e-04,\n -8.3147e-04, -2.4741e-04, 1.7408e-03, -1.1693e-03, -9.4726e-05,\n 3.6653e-04, 5.0909e-04, 1.7642e-04, -3.1274e-03, 4.9282e-04,\n -6.4965e-04, -9.7179e-05, -2.9536e-04, 3.9888e-04, -1.3379e-03,\n 9.4296e-04, 4.7307e-05, -6.4166e-04, 2.7498e-03, 3.1472e-03,\n -1.5255e-04, -5.2102e-04, -1.2462e-03, -2.9272e-04, 4.2536e-08,\n 2.4048e-04, 1.6439e-03, 1.7110e-04, -1.6127e-04, 6.3095e-04,\n 1.4304e-03, 3.4042e-04, -2.5156e-03, -3.4785e-03, 7.2689e-04,\n 2.8097e-03, 1.8454e-03, -3.3400e-03, 4.8123e-04, 2.7095e-04,\n -8.6917e-04, 2.8502e-04, -1.1447e-04, -3.0350e-04, 2.9906e-04,\n 1.1720e-03, 1.3399e-03, 1.8824e-03, -3.5511e-04, -1.9156e-04,\n 1.2064e-03, -3.4031e-05, -8.8409e-04, 1.2309e-03, -5.0683e-04,\n -1.2947e-03, -4.4993e-04, -6.1646e-04, -2.6502e-04, -4.8142e-04,\n 9.8627e-06, -2.9450e-04, -2.1406e-03, 1.4446e-03, -1.4635e-03,\n -1.5567e-04, -2.0861e-03, -2.4399e-06, 8.8926e-04, -3.9586e-04,\n 3.5676e-05, 2.3447e-03, -3.7394e-04, -2.1861e-04, -1.4346e-03,\n -1.1775e-03, 2.0456e-06, -9.3904e-04, 6.4579e-04, 1.2035e-03,\n -1.9204e-03, -7.3071e-04, 2.6979e-03, 8.1999e-04, 7.6412e-04,\n 4.3225e-05, 4.4558e-04, 1.2704e-03, -6.6245e-04, -5.2947e-04,\n -1.4848e-03, 6.5103e-04, -1.1794e-03, 7.3091e-04, -1.5176e-04,\n 1.0910e-03, 5.4563e-04, -1.6732e-03, -7.6339e-04, -2.2352e-04,\n -9.7683e-04, -1.7297e-03, -7.3328e-04, -2.2311e-04, -2.9391e-03,\n -7.3374e-10, 1.5492e-03, -1.8828e-03, 1.8228e-03, 1.2282e-03,\n -1.6178e-03, 3.3670e-04, -6.8068e-04, -1.8562e-03, -1.5763e-03,\n 4.5021e-04, -6.1336e-04, 5.6208e-04, -2.0969e-03, -4.6995e-04,\n 1.7462e-05, 1.0416e-03, 8.0144e-04, -1.8010e-03, 1.4412e-03,\n 2.4658e-04, 2.4834e-04, -1.8629e-04, 1.9888e-03, 4.1321e-04,\n 9.4088e-04, 2.9329e-04, 1.2397e-06, 1.7473e-04, -5.3336e-04,\n 1.6619e-03, -2.5799e-03, 6.9241e-04, -1.4001e-03, 5.8637e-07,\n 5.4332e-04, -6.8814e-04, 2.8637e-03, 2.7836e-04, -1.1587e-03,\n 2.7440e-04, 6.6385e-04, 9.7886e-04, -4.9716e-03, -3.3140e-07,\n -6.9159e-05, -6.1805e-04, 1.5383e-03, -3.0831e-04, -8.7241e-04,\n -5.7133e-04, 1.3154e-03, -1.6331e-03, 1.9168e-03, 3.2104e-04,\n -4.0081e-04, -1.7028e-03, 3.3388e-04, 2.6223e-03, -1.2148e-03,\n -2.3953e-03, 7.5212e-04, 5.0004e-04, -1.4136e-03, 6.7064e-04,\n -1.1903e-03, -2.2290e-04, 5.6052e-45, 1.1522e-03, -5.4559e-04,\n 3.4360e-04, 7.0575e-04, -4.3509e-04, 6.7863e-04, 3.1497e-04,\n -2.2958e-04, 5.8414e-04, -8.3584e-04, -1.3708e-04, -3.8279e-04,\n 1.5387e-04, 4.7351e-04, 7.8224e-04, 7.9845e-04, 2.4722e-03,\n -1.8838e-04, 1.5110e-03, -1.4500e-03, 6.3231e-04, 3.8374e-04,\n 5.4293e-04, -1.6440e-04, -1.1475e-03, 6.2828e-04, -2.2315e-04,\n -2.4629e-04, -2.7128e-04, -1.9805e-04, 2.0066e-03, 1.8233e-04,\n -7.8697e-04, -1.1666e-03, -2.3119e-03, -1.1945e-03, -5.8130e-04,\n -1.2740e-03, -9.1989e-04, 1.2626e-03, -1.1604e-03, -1.2305e-03,\n 5.4770e-04, -6.2061e-04, 1.9631e-03, -1.1539e-03, -2.4235e-04,\n 9.2330e-04, 4.5695e-04, 9.3278e-04, -8.3946e-04, -1.7956e-03,\n 2.3032e-03, -7.2942e-04, 9.9436e-04, 2.0778e-03, -1.3922e-03,\n 1.5204e-03, -1.0042e-04, 3.5709e-03, -1.0404e-03, 1.4014e-03,\n 8.5028e-04, -2.5015e-03, -1.7669e-03, -1.2653e-03, 6.7387e-04,\n 4.7097e-04, 2.2524e-07, -3.4363e-04, 4.3337e-05, 2.2153e-04,\n 2.9609e-03, -1.7217e-03, 1.3317e-03, 5.2233e-04, 1.0736e-04,\n -3.9493e-04, 1.5705e-03, 3.4615e-04, 6.0481e-04, -2.6935e-03,\n 3.3022e-03, 1.3800e-03, -2.0538e-03, 4.4383e-04, -9.5681e-04,\n 1.7732e-03, 2.1073e-03, 5.5165e-04, -2.0231e-04, 7.9497e-05,\n 7.8565e-04, -8.2807e-04, 2.8634e-04, -6.6183e-05, 1.1276e-04,\n 6.5932e-04, 1.8806e-03, -2.7049e-03, 1.1238e-03, 2.5714e-04,\n 3.0085e-04, -5.8747e-04, 8.0860e-05, -1.3437e-04, 2.4778e-03,\n 6.2933e-04, -5.0370e-04, -9.3128e-04, -2.8988e-04, -1.9585e-03,\n 7.8267e-04, -3.5712e-04, -2.4485e-04, -1.3078e-06, -7.2766e-04,\n 4.1649e-04, 5.9118e-04, 2.5786e-03, -1.3444e-03, -1.7123e-04,\n 4.5704e-05, 4.8396e-05, -7.8942e-05, 2.8272e-03, 3.0765e-03,\n 1.9910e-03, 4.1118e-06, 1.3209e-04, -1.0838e-03, -7.5399e-04,\n -8.3305e-04, -9.2255e-04, -1.8434e-05, -1.2533e-03, -4.6511e-04,\n 2.9655e-04, 2.2889e-04, -2.5923e-04, -1.3906e-03, -5.4164e-04,\n -1.4647e-03, 1.1445e-04, 2.1953e-04], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([1.4944e-05, 3.0751e-06, 1.3539e-05, 1.9657e-05, 5.3869e-05, 1.5152e-05,\n 1.4183e-06, 2.9735e-05, 2.1051e-05, 1.9028e-05, 1.5825e-05, 1.2482e-05,\n 1.9447e-05, 2.3602e-05, 4.2710e-05, 2.0649e-05, 2.8824e-05, 1.7355e-05,\n 3.2213e-05, 2.5784e-06, 4.2802e-05, 1.3005e-05, 9.3618e-06, 2.8711e-05,\n 1.5668e-05, 4.9173e-05, 1.7183e-05, 1.4368e-05, 3.1982e-05, 2.7693e-05,\n 4.7454e-05, 4.8373e-06, 1.7481e-07, 3.8738e-05, 4.1347e-05, 5.8897e-05,\n 1.3676e-05, 3.2398e-05, 2.6750e-05, 3.7434e-05, 3.5237e-05, 4.6467e-05,\n 1.9504e-05, 2.5834e-05, 2.7952e-06, 2.6620e-05, 2.4000e-05, 3.2540e-05,\n 3.2705e-06, 3.2732e-05, 3.5050e-05, 2.1831e-05, 2.0621e-05, 7.0473e-06,\n 5.0332e-06, 4.0393e-05, 1.9522e-05, 1.5082e-05, 7.8391e-06, 5.1891e-05,\n 2.4399e-05, 2.9468e-05, 2.0967e-05, 2.4108e-05, 3.8815e-05, 5.2132e-05,\n 3.3050e-05, 5.1087e-06, 2.6620e-05, 1.5274e-05, 2.5597e-05, 2.7670e-05,\n 2.8755e-05, 3.5752e-05, 1.6373e-05, 2.2578e-05, 3.4783e-05, 1.9758e-05,\n 4.7367e-05, 1.2633e-05, 3.1330e-05, 1.5384e-05, 2.1365e-05, 1.7633e-05,\n 3.4345e-05, 4.3877e-05, 3.8517e-05, 4.9225e-05, 1.6947e-05, 1.9481e-05,\n 3.9346e-05, 5.3731e-05, 1.6491e-05, 3.0101e-05, 3.1504e-05, 3.9026e-05,\n 5.9023e-05, 3.3119e-05, 1.1203e-05, 2.1680e-05, 1.4839e-05, 1.4433e-08,\n 2.0974e-05, 5.2758e-05, 1.0726e-05, 3.9827e-05, 1.5841e-05, 1.2528e-05,\n 1.9993e-05, 1.8093e-05, 2.4371e-05, 3.5725e-05, 2.6178e-05, 3.8419e-05,\n 8.9289e-06, 2.4645e-05, 1.5203e-05, 3.1992e-05, 7.4040e-08, 1.9993e-05,\n 7.1512e-06, 1.8822e-05, 2.2267e-05, 3.2228e-05, 2.3503e-05, 1.3286e-05,\n 2.3547e-05, 2.6326e-05, 5.4852e-08, 1.3030e-05, 2.2320e-05, 8.9296e-06,\n 2.7849e-05, 8.8463e-06, 2.6574e-05, 1.8438e-05, 2.1611e-05, 2.5116e-05,\n 4.7182e-05, 3.9215e-05, 3.9975e-05, 2.2827e-05, 2.3148e-06, 3.0532e-05,\n 1.9941e-07, 3.7507e-05, 2.3891e-05, 7.2040e-05, 2.9812e-05, 3.1055e-05,\n 6.0503e-05, 1.5215e-05, 2.4580e-05, 2.6196e-05, 2.3394e-05, 5.6804e-05,\n 1.5952e-05, 7.7143e-07, 2.5762e-05, 1.7085e-05, 1.7727e-05, 2.8126e-05,\n 2.0248e-05, 5.0355e-05, 2.2199e-05, 1.4843e-05, 2.5263e-05, 2.5394e-05,\n 2.0088e-05, 3.1254e-05, 2.4360e-05, 3.2598e-05, 1.2729e-05, 2.0049e-05,\n 1.5803e-05, 2.4226e-05, 1.4002e-05, 2.6658e-05, 9.1322e-06, 4.6875e-05,\n 3.1873e-05, 2.1621e-05, 3.0556e-05, 2.2219e-05, 3.3073e-05, 3.0005e-05,\n 8.9483e-06, 2.3359e-05, 2.9144e-05, 2.2518e-05, 6.1800e-06, 2.2677e-05,\n 2.3603e-05, 6.0985e-05, 3.1210e-05, 1.4799e-05, 5.5746e-06, 2.7784e-06,\n 2.3127e-05, 3.7585e-05, 1.6136e-05, 1.5495e-05, 2.8400e-05, 9.3805e-06,\n 1.0976e-05, 1.7006e-05, 7.2636e-07, 5.2058e-05, 1.6256e-05, 4.5267e-05,\n 1.6143e-05, 2.2727e-05, 1.7938e-05, 5.5413e-05, 2.0488e-05, 3.4802e-05,\n 2.7719e-05, 4.3403e-05, 1.2318e-05, 2.6796e-05, 8.1957e-06, 2.7575e-05,\n 1.8338e-05, 2.1603e-05, 2.9700e-05, 2.2201e-05, 2.2161e-05, 4.0235e-05,\n 2.1939e-05, 2.9780e-05, 5.5316e-05, 3.1898e-05, 3.2737e-05, 3.6410e-06,\n 2.7990e-05, 1.9040e-05, 1.1385e-05, 4.0003e-05, 4.5006e-05, 7.7536e-06,\n 1.9857e-05, 2.3756e-05, 8.9401e-06, 1.7300e-05, 2.0628e-05, 3.8870e-05,\n 3.7655e-05, 5.1044e-05, 1.8298e-05, 5.2376e-05, 1.4225e-05, 4.6484e-05,\n 2.1304e-05, 2.3944e-05, 3.3064e-05, 3.4846e-05, 1.0328e-05, 3.9421e-05,\n 5.3382e-06, 4.3918e-05, 2.5586e-05, 5.4532e-05, 2.1291e-05, 4.4265e-05,\n 1.9668e-05, 3.2593e-05, 3.4480e-05, 2.7135e-05, 1.8682e-05, 9.9302e-06,\n 1.8642e-05, 6.3250e-05, 3.9613e-05, 1.7660e-05, 1.3202e-05, 4.4259e-05,\n 3.4809e-05, 5.6221e-06, 1.5013e-05, 1.6401e-06, 1.9306e-05, 4.0653e-05,\n 6.1921e-06, 2.8253e-05, 3.2449e-05, 1.4777e-05, 2.6465e-05, 1.4060e-06,\n 2.3227e-05, 6.8856e-06, 2.0059e-05, 3.2697e-05, 1.8922e-05, 3.0082e-05,\n 2.5204e-05, 5.0039e-05, 2.3864e-05, 1.3041e-05, 2.7496e-05, 2.5667e-05,\n 3.0114e-05, 2.7285e-05, 2.5446e-05, 1.4478e-05, 4.0311e-05, 2.6630e-05,\n 2.9299e-05, 2.7675e-05, 3.0013e-05, 1.1935e-05, 5.6046e-05, 5.0709e-05,\n 2.2014e-05, 9.5978e-06, 3.9028e-05, 3.3110e-05, 1.8083e-05, 1.7433e-05,\n 3.7735e-05, 2.6133e-05, 2.9744e-05, 3.6985e-05, 2.4554e-05, 2.2749e-05,\n 2.5188e-05, 1.6161e-05, 7.1178e-06, 1.9757e-05, 4.5593e-05, 2.4971e-05,\n 1.8708e-05, 5.2325e-05, 2.4556e-05, 3.8705e-05, 2.0591e-05, 2.9568e-05,\n 4.1596e-05, 3.2918e-05, 2.4298e-05, 2.8607e-05, 6.3864e-07, 1.8460e-05,\n 1.7189e-05, 3.8184e-05, 1.5625e-05, 2.9334e-05, 3.2671e-05, 1.3952e-05,\n 1.9619e-05, 4.1809e-05, 1.5479e-05, 1.3013e-05, 8.8145e-06, 8.3452e-09,\n 1.7761e-05, 3.2974e-05, 3.4636e-05, 3.7187e-05, 2.8038e-05, 1.3611e-05,\n 2.9439e-06, 8.6152e-06, 3.8641e-05, 1.1532e-05, 1.8027e-05, 5.7228e-05,\n 4.1810e-05, 1.4206e-05, 8.1338e-06, 1.6286e-05, 1.5622e-05, 2.4279e-05,\n 5.7712e-06, 1.6025e-05, 2.4311e-06, 3.0301e-05, 3.2032e-05, 4.4544e-07,\n 3.8086e-05, 1.5157e-05, 1.0680e-05, 3.4759e-05, 2.4571e-05, 4.0423e-05,\n 2.1371e-09, 5.8264e-05, 1.6267e-05, 1.6019e-05, 1.0769e-05, 1.5775e-05,\n 1.6383e-05, 3.2276e-05, 1.0632e-05, 3.6039e-05, 1.2039e-05, 3.3577e-05,\n 3.0134e-05, 3.4325e-05, 9.3197e-06, 1.6851e-05, 2.7968e-05, 1.5952e-05,\n 1.7361e-05, 1.7823e-05, 3.8829e-05, 3.7986e-05, 5.7694e-08, 8.8934e-06,\n 4.1512e-05, 1.3586e-05, 2.3205e-05, 1.6513e-05, 1.7998e-05, 3.2963e-05,\n 1.2563e-05, 1.8826e-05, 6.5522e-06, 1.4550e-05, 4.4782e-05, 2.3613e-05,\n 3.1305e-05, 1.3924e-08, 7.9707e-06, 3.6110e-05, 1.0417e-05, 3.1121e-05,\n 1.4584e-07, 1.4184e-05, 2.9511e-05, 1.0104e-05, 2.0777e-05, 1.8691e-05,\n 8.0129e-06, 7.9956e-06, 1.0944e-05, 1.2789e-07, 3.3639e-05, 1.7623e-05,\n 4.1344e-05, 2.7817e-05, 1.1160e-05, 4.7629e-05, 3.0005e-05, 2.9420e-05,\n 3.0727e-05, 2.4969e-05, 3.6783e-05, 5.3449e-05, 2.5217e-05, 5.0802e-05,\n 1.7081e-05, 1.8649e-05, 2.0015e-05, 1.1965e-05, 2.9783e-05, 2.7073e-05,\n 2.0480e-05, 6.0168e-06, 1.3270e-05, 1.2279e-05, 2.7660e-05, 4.6787e-05,\n 3.9654e-05, 1.8375e-05, 2.8326e-05, 6.9543e-05, 2.7011e-05, 8.2943e-06,\n 3.6753e-05, 2.3653e-05, 1.5920e-05, 2.3785e-05, 2.7710e-05, 4.5605e-05,\n 1.3467e-07, 2.7322e-05, 2.7256e-05, 2.5195e-05, 2.7543e-05, 2.6566e-05,\n 4.0176e-05, 3.7388e-05, 1.4687e-05, 4.5311e-05, 7.5516e-06, 2.3699e-05,\n 3.1853e-05, 2.6055e-05, 2.7246e-05, 8.2471e-06, 1.6604e-05, 2.3714e-05,\n 1.3196e-05, 2.6898e-05, 3.1795e-05, 1.6592e-05, 3.5076e-05, 3.1082e-05,\n 1.9293e-05, 2.2254e-05, 2.5906e-05, 2.1658e-05, 1.7750e-05, 2.8049e-05,\n 2.8371e-05, 1.2221e-05, 1.3329e-05, 1.7468e-05, 1.8165e-05, 4.4542e-05,\n 1.6765e-05, 1.7940e-05, 1.3301e-05, 3.5760e-05, 1.9639e-05, 6.2283e-06,\n 2.1707e-05, 1.4506e-05, 5.4453e-05, 1.1904e-05, 2.7894e-05, 1.9580e-05,\n 4.0634e-05, 2.3377e-05, 1.3768e-05, 4.2668e-05, 4.9277e-07, 2.9906e-05,\n 2.5380e-05, 3.6759e-05, 4.2988e-05, 3.4441e-05, 3.4752e-05, 5.9202e-06,\n 3.3717e-05, 2.0114e-05, 1.7030e-05, 5.4219e-05, 2.6161e-05, 2.6288e-05,\n 2.0424e-05, 2.6455e-05, 2.0613e-05, 1.4755e-05, 1.9378e-05, 2.8112e-05,\n 8.6184e-05, 3.4443e-05, 2.0711e-05, 1.2132e-05, 1.2987e-05, 3.8710e-05,\n 2.4204e-05, 1.0601e-05, 1.5981e-05, 4.9594e-08, 3.6932e-05, 2.3209e-05,\n 3.1545e-05, 1.6675e-05, 2.7410e-05, 1.8132e-05, 2.8973e-05, 1.5516e-05,\n 3.7769e-05, 1.9840e-05, 3.1927e-05, 1.9670e-05, 1.7327e-05, 1.5711e-05,\n 1.3538e-05, 2.0385e-05, 3.0782e-05, 3.0337e-05, 2.2688e-05, 2.8616e-05,\n 6.4013e-05, 1.8557e-05, 3.4592e-05, 3.8573e-05, 1.4800e-05, 2.1029e-05,\n 4.9707e-08, 2.3830e-05, 2.7492e-05, 2.2986e-05, 2.7239e-05, 4.2759e-05,\n 2.6316e-05, 8.6012e-08, 2.4511e-05, 2.9173e-05, 1.3376e-05, 1.3282e-05,\n 1.5196e-05, 3.5741e-05, 9.2755e-06, 1.6745e-05, 3.3085e-05, 3.1786e-05,\n 5.9005e-05, 1.9349e-05, 3.9370e-05, 1.3029e-05, 4.6217e-06, 9.1631e-06,\n 4.3533e-05, 4.4868e-05, 4.8140e-05, 1.3398e-05, 2.3626e-05, 2.7538e-05,\n 3.1680e-05, 3.1382e-05, 2.5465e-05, 2.7486e-05, 6.9744e-05, 9.9815e-06,\n 1.8758e-05, 2.4342e-05, 4.6862e-05, 1.3160e-05, 1.0214e-09, 1.2695e-05,\n 2.5649e-05, 5.5241e-06, 2.7559e-05, 1.2195e-05, 1.3341e-05, 4.5246e-05,\n 6.7843e-06, 2.4269e-06, 3.9910e-05, 2.4286e-05, 1.2387e-05, 2.9194e-05,\n 9.4934e-06, 2.5661e-05, 1.5040e-05, 7.8852e-05, 2.1723e-05, 1.8449e-05,\n 2.3169e-05, 4.6989e-05, 8.5388e-06, 5.7439e-05, 4.5947e-05, 3.5424e-05,\n 3.0027e-05, 4.1983e-05, 1.6062e-05, 2.4842e-05, 4.4049e-06, 2.8597e-05,\n 3.4565e-05, 2.2723e-05, 4.1973e-05, 3.8252e-05, 2.5178e-05, 8.7301e-06,\n 1.8508e-05, 2.3194e-05, 4.5793e-05, 3.4617e-05, 1.9658e-05, 7.1596e-06,\n 2.2318e-05, 4.0236e-05, 1.6923e-05, 2.1328e-05, 4.5493e-06, 1.7769e-06,\n 2.1227e-05, 4.2489e-05, 2.5320e-05, 4.2089e-05, 4.4451e-05, 2.1199e-05,\n 5.0022e-05, 7.0743e-05, 3.7996e-05, 1.4145e-06, 1.8356e-05, 1.1616e-05,\n 1.1676e-05, 1.8997e-05, 3.0532e-05, 5.3497e-05, 1.0680e-05, 1.4619e-05,\n 6.9931e-06, 3.1495e-08, 2.4369e-05, 3.2770e-05, 3.7650e-05, 5.3649e-05,\n 2.6329e-05, 4.5175e-05, 4.3633e-05, 1.2868e-05, 2.2790e-05, 3.6434e-05,\n 4.4949e-05, 1.5530e-05, 1.9653e-05, 4.4412e-05, 3.6589e-05, 1.6897e-05,\n 7.8387e-06, 1.9680e-05, 1.5592e-05, 2.1560e-05, 1.4632e-05, 3.0075e-05,\n 4.2585e-05, 1.5566e-05, 8.2010e-06, 3.8954e-05, 3.3865e-05, 8.4350e-06,\n 3.8812e-05, 2.9901e-05, 4.4438e-05, 3.0793e-05, 3.6577e-05, 1.9666e-05,\n 1.6187e-05, 2.9707e-05, 5.0608e-05, 5.8699e-05, 2.5754e-05, 4.0235e-05,\n 2.0275e-05, 3.3652e-05, 4.4434e-05, 4.5319e-05, 3.7886e-05, 1.1729e-05,\n 9.9707e-08, 3.5502e-05, 1.4748e-05, 2.0319e-05, 4.6248e-05, 5.0228e-05,\n 6.6088e-06, 1.4029e-05, 8.6241e-06, 1.3401e-05, 3.6690e-05, 4.5754e-05,\n 4.2430e-05, 1.7586e-08, 3.0169e-05, 1.6284e-05, 5.6896e-06, 2.3782e-05,\n 2.1798e-05, 2.9290e-05, 1.4695e-05, 6.4673e-06, 2.9216e-05, 2.2154e-05,\n 1.6224e-05, 3.1621e-05, 1.9073e-06, 3.5479e-05, 2.4344e-05, 3.9017e-05],\n device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(2503.)",
|
| 17 |
+
"exp_avg": "tensor([[-5.1918e-07, 4.7724e-06, 2.9329e-06, ..., 8.4271e-06,\n -1.4940e-05, -9.3978e-05],\n [-3.6704e-05, -2.0822e-06, 4.1368e-05, ..., -1.3933e-05,\n -5.1272e-05, 2.9777e-05],\n [-4.8715e-07, -1.9632e-06, 1.8008e-05, ..., -2.1298e-05,\n 2.4451e-04, -7.3349e-06],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -1.3613e-32, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.3062e-09, -1.9002e-06, 3.3396e-06, ..., -1.2642e-05,\n -1.3222e-06, -4.1389e-05]], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([[3.9297e-09, 2.4564e-09, 1.7988e-08, ..., 7.3066e-09, 3.8860e-09,\n 1.2767e-08],\n [5.9513e-08, 1.6939e-08, 4.5162e-08, ..., 3.2033e-08, 2.7687e-08,\n 8.7035e-08],\n [5.6499e-10, 6.9031e-10, 2.5330e-09, ..., 1.1215e-08, 1.2707e-07,\n 4.7835e-08],\n ...,\n [1.3613e-09, 4.5204e-09, 8.3171e-10, ..., 1.7435e-10, 6.3713e-10,\n 3.8537e-12],\n [6.8473e-10, 1.7737e-08, 2.0094e-08, ..., 4.4051e-17, 8.4332e-09,\n 1.1670e-10],\n [2.3977e-10, 2.7735e-09, 1.1570e-09, ..., 7.0437e-10, 2.2358e-09,\n 6.0572e-08]], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(2503.)",
|
| 22 |
+
"exp_avg": "tensor([ 3.6330e-06, -1.3455e-06, 7.9947e-04, -5.5653e-09, 3.3201e-05,\n -1.8794e-04, -2.4044e-05, -2.2052e-04, 4.3166e-04, 3.0227e-05,\n -4.9014e-04, -5.9625e-04, -3.8748e-04, 1.9305e-30, -1.8756e-04,\n 2.7113e-04, 7.0065e-04, 5.6052e-45, 2.4138e-04, -6.0276e-11,\n -1.1086e-04, 5.6052e-45, 4.4983e-05, 1.4292e-04, 2.8360e-05,\n 5.6052e-45, 3.1000e-04, -1.2114e-03, -3.9675e-04, -5.4859e-05,\n -4.0179e-04, 4.8463e-04, 2.9895e-10, -1.7000e-04, 1.0984e-04,\n -2.0634e-05, 5.6052e-45, 4.2855e-04, -9.2613e-05, 2.0870e-04,\n -2.4956e-04, 1.7384e-05, -4.6310e-05, 6.3174e-05, 5.6052e-45,\n -2.2846e-04, -1.2879e-04, -6.8177e-06, 3.4749e-04, -1.4057e-04,\n 5.6052e-45, -5.9086e-05, 5.5541e-04, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -2.3407e-04, 1.6856e-04, 4.9958e-04, 2.2700e-05,\n 2.0760e-05, 6.5232e-28, 4.5841e-04, 1.3431e-04, -2.2657e-07,\n 1.1458e-04, 1.9196e-07, 3.9819e-04, 2.0331e-04, 2.5090e-04,\n -1.8464e-05, 1.3019e-30, 5.7714e-04, -2.2914e-05, -1.1386e-04,\n 4.2855e-04, 4.2735e-40, -3.2202e-04, -4.2190e-04, 5.6052e-45,\n -5.4978e-04, 2.2133e-04, -2.0268e-04, -6.5691e-34, 3.9738e-04,\n 2.4779e-29, -6.8391e-05, 2.7927e-04, 2.7445e-04, -1.5746e-04,\n 1.8946e-04, -3.3092e-11, -3.1826e-04, 4.4998e-06, -8.7370e-05,\n -3.0187e-04, 5.3487e-15, 5.9760e-04, 3.7656e-04, 2.7263e-04,\n -3.0536e-04, -1.4256e-03, 1.9342e-22, 3.3392e-13, 7.7895e-05,\n 2.5045e-06, 5.6052e-45, -2.4636e-04, 1.1085e-15, 5.6052e-45,\n 5.6052e-45, 4.3671e-04, 3.1274e-04, -1.2121e-05, -3.8758e-04,\n 2.5753e-04, -4.9816e-04, -2.2748e-04, 1.8347e-04, 5.6052e-45,\n -7.3609e-05, 2.1657e-04, 2.3208e-04, -1.1133e-09, 3.9092e-04,\n 5.6052e-45, -2.3604e-04, 1.8565e-16, 3.5077e-05, 4.1950e-04,\n -3.8371e-04, 5.6052e-45, -3.7131e-05, 8.2699e-37, 5.6052e-45,\n -1.1618e-04, 5.6052e-45, -8.2890e-04, 7.6794e-05, -2.8126e-04,\n -4.4237e-12, -2.2338e-04, 3.2088e-04, -2.2894e-04, 5.6052e-45,\n -4.8721e-06, 5.6052e-45, 1.6175e-04, 1.9477e-10, 3.3981e-41,\n 1.3157e-08, -8.7624e-05, -1.7706e-04, -2.2788e-04, 5.6052e-45,\n 9.6942e-05, 5.6052e-45, 3.1686e-04, 5.6052e-45, -4.6560e-04,\n -8.6004e-05, 1.6146e-04, -1.4672e-04, 5.6052e-45, -4.1421e-05,\n 4.2792e-04, -1.0546e-04, 4.4735e-04, 3.7017e-12, -2.0824e-04,\n 3.4135e-04, 5.6052e-45, 5.9793e-04, 2.0516e-04, 6.1380e-04,\n -5.7627e-04, 7.7548e-14, -2.8127e-05, -2.2511e-05, 5.6052e-45,\n -3.6764e-04, 4.2067e-04, 5.6052e-45, -3.7714e-09, -6.3389e-04,\n 5.6052e-45, 8.3649e-23, 2.1115e-04, 6.1283e-08, 5.6052e-45,\n -8.5306e-04, -8.1024e-05, 5.7127e-04, -2.2898e-05, -5.3693e-04,\n -1.5505e-04, 5.6052e-45, 5.6052e-45, 1.1477e-04, -4.1576e-04,\n 1.6274e-18, -2.0731e-04, -3.5512e-04, 3.2274e-04, 3.4382e-04,\n -1.7731e-04, 1.6697e-12, 1.6744e-04, 1.2921e-04, 5.6052e-45,\n -1.9260e-04, -5.1068e-04, 2.4304e-04, -1.1473e-04, 2.0854e-04,\n -2.8111e-04, -9.9954e-04, -8.6139e-05, 7.4884e-04, -1.6624e-04,\n 5.6052e-45, -4.6577e-04, 5.6052e-45, -2.7702e-04, -2.3965e-04,\n -2.4780e-04, -3.2531e-04, 1.6045e-04, 7.3194e-04, 5.6052e-45,\n -8.3115e-05, 5.6052e-45, -1.4219e-04, 2.9548e-04, -2.6717e-04,\n -1.6342e-04, -1.1596e-04, -8.5329e-05, 2.5095e-04, -3.3724e-04,\n -1.9155e-04, -8.0960e-04, 1.2676e-04, 5.6052e-45, -4.1343e-04,\n 1.9979e-04, 4.5240e-04, 1.3985e-09, 1.1619e-04, 5.6052e-45,\n 2.8762e-04, 1.2249e-08, 5.0138e-42, -6.5588e-04, 5.6052e-45,\n 3.8129e-04, 5.6052e-45, -2.2795e-04, -2.7987e-04, 2.5401e-04,\n 5.6052e-45, -5.8999e-04, -1.7458e-04, 5.0373e-04, -8.6360e-05,\n -8.6776e-05, 4.1970e-04, 3.8270e-05, 3.2702e-05, -3.6368e-04,\n 4.6122e-04, -8.8980e-04, 4.1933e-05, -9.2620e-05, 5.6052e-45,\n -3.1534e-05, 5.6052e-45, -1.5225e-04, -1.7992e-04, 5.6052e-45,\n 4.2418e-35, -1.2236e-04, 5.6052e-45, 2.9034e-04, 5.6052e-45,\n 6.6078e-04, -4.6309e-05, 3.9569e-04, 1.9518e-05, 2.2225e-04,\n 3.3124e-04, -1.7969e-04, 7.3496e-05, -3.1186e-04, 1.8136e-05,\n -2.7641e-04, -1.4376e-04, 2.2047e-04, 2.0172e-04, -1.1829e-04,\n 6.0818e-04, -1.0523e-04, 5.6052e-45, 4.4407e-05, 1.6915e-04,\n 5.6052e-45, 3.9991e-06, -3.4887e-04, 1.1029e-03, 1.0448e-04,\n 9.9710e-07, 1.1727e-04, 1.1812e-04, 2.3229e-04, -1.5733e-05,\n 9.8480e-05, 1.1638e-04, 5.7910e-05, 1.8446e-13, -1.8417e-04,\n -4.5532e-04, -1.4683e-04, -2.0754e-04, -5.2797e-04, -5.7177e-04,\n 5.6052e-45, 1.6641e-07, -3.2644e-04, -2.9681e-04, -1.1775e-04,\n 4.1196e-05, 2.5545e-04, 3.3353e-05, 1.3486e-04, -1.8469e-04,\n 2.5489e-04, -2.1201e-04, 2.9613e-04, -9.6837e-05, 1.4814e-04,\n -1.0895e-04, 2.3080e-05, 5.6052e-45, 1.0033e-04, 2.8013e-04,\n 3.2370e-04, 5.6052e-45, -2.7170e-04, -4.4578e-04, -7.9803e-05,\n 5.6052e-45, -3.5428e-05, 1.7888e-04, -1.1634e-03, -1.3006e-04,\n -2.4274e-04, 2.0183e-04, -6.1782e-04, 4.1042e-04, -3.4928e-04,\n -2.5585e-04, -6.3453e-04, -1.9040e-05, 1.7646e-04, -5.7581e-05,\n 3.7722e-04, 2.1204e-05, 3.4070e-04, -1.0974e-10, 2.0593e-04,\n -3.4589e-04, -2.4588e-04, -6.1726e-05, 2.3384e-05, -2.8158e-12,\n -2.8740e-04, 3.9417e-04, -8.1535e-04, 5.6052e-45, 5.6052e-45,\n 3.5410e-04, 2.4122e-33, 5.6052e-45, -4.7871e-04, -4.0039e-04,\n 1.0464e-04, 5.5783e-04, 1.9804e-04, 2.4533e-04, 2.9930e-04,\n 2.3203e-04, 5.6052e-45, -1.3147e-04, 1.6630e-04, 5.6052e-45,\n 3.7408e-04, -5.0047e-04, 5.6052e-45, 5.1515e-05, -9.5626e-05,\n -2.8756e-04, -6.2313e-05, -3.7017e-04, 4.1791e-05, 1.6530e-04,\n -8.6849e-04, -4.8631e-04, -2.5266e-04, 1.2741e-04, -1.9653e-05,\n 5.6052e-45, 3.6410e-04, -1.4256e-04, 4.3003e-04, -5.4770e-04,\n -3.0256e-04, -1.9819e-04, 5.6052e-45, -3.5285e-05, 2.6259e-04,\n -5.4880e-04, 2.0616e-04, -1.2258e-14, 1.2437e-03, 5.6052e-45,\n 1.3331e-04, 1.1009e-04, -6.3048e-04, 2.2500e-04, 2.8150e-04,\n -5.1933e-04, 3.3559e-04, 1.1745e-04, 2.8877e-04, -3.0927e-04,\n -1.5132e-04, 5.6052e-45, -3.4868e-05, 4.5547e-04, 1.5179e-04,\n 5.6052e-45, 5.6052e-45, -4.2672e-04, 4.2437e-04, 2.0406e-04,\n 2.3277e-04, 8.9461e-15, -5.5270e-04, -3.9560e-06, 4.7707e-04,\n 6.2458e-04, 4.0952e-05, 3.1815e-04, 5.1588e-04, 1.0568e-05,\n 5.6052e-45, -1.3758e-04, 5.6052e-45, -2.3332e-04, 4.5003e-04,\n -2.5491e-04, -2.1700e-04, 4.3955e-10, -1.3581e-08, 5.6052e-45,\n 4.7675e-05, -6.0455e-05, 2.8547e-04, 3.5748e-04, 1.2587e-12,\n 2.2528e-04, -1.0380e-04, 1.3976e-04, 8.1625e-05, -2.9823e-04,\n -1.9655e-04, 4.2761e-04, -2.9493e-05, 1.5554e-04, 5.6052e-45,\n -5.0857e-04, 5.4698e-04, 5.6052e-45, -2.3731e-11, -4.8022e-04,\n -1.1687e-04, 2.5572e-04, -1.1758e-04, -2.5603e-04, 5.6052e-45,\n 5.6052e-45, -3.8664e-04, 5.6052e-45, -1.6985e-04, 2.7613e-04,\n -4.0273e-04, -3.5267e-04, 2.6496e-04, -7.3022e-04, 5.6052e-45,\n 5.6052e-45, -3.0131e-04, 5.2447e-04, 5.6052e-45, 4.1463e-04,\n 5.6052e-45, 1.3609e-04, -5.7789e-04, 8.4427e-05, -3.4815e-32,\n 5.6052e-45, 1.1387e-04], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([1.4380e-06, 8.3631e-06, 4.4962e-06, 1.7302e-08, 5.7660e-06, 1.0279e-06,\n 2.1282e-05, 1.8849e-06, 6.2961e-06, 1.2296e-06, 6.8068e-06, 1.0554e-05,\n 5.6511e-06, 5.4928e-08, 1.1171e-06, 5.7104e-06, 7.0881e-06, 5.3014e-14,\n 3.3985e-06, 4.1147e-08, 5.7112e-06, 5.5107e-10, 5.1020e-07, 5.8354e-06,\n 3.5231e-06, 2.2555e-05, 1.3698e-05, 5.9692e-06, 6.0668e-06, 4.4153e-06,\n 4.6211e-06, 1.4144e-06, 5.4687e-07, 1.2432e-05, 5.1886e-06, 1.3893e-06,\n 3.3114e-06, 2.9759e-06, 3.4748e-06, 3.1175e-05, 2.3854e-06, 3.7644e-06,\n 3.7412e-07, 4.9527e-06, 1.1939e-05, 7.2011e-06, 6.6511e-06, 2.9851e-06,\n 1.1128e-05, 5.2746e-06, 3.1290e-07, 5.0303e-06, 3.7667e-06, 6.6187e-06,\n 5.6728e-06, 2.0177e-07, 8.5652e-06, 6.8065e-06, 4.5091e-06, 1.8137e-06,\n 4.9790e-06, 6.8753e-07, 3.3155e-06, 3.3611e-06, 1.0330e-06, 5.8528e-07,\n 1.3115e-06, 1.1647e-05, 1.1610e-05, 3.8691e-06, 7.3824e-06, 9.0678e-07,\n 2.4772e-06, 6.1188e-06, 6.5537e-06, 2.9431e-06, 2.7449e-06, 4.4177e-06,\n 8.2162e-06, 6.6042e-10, 4.9940e-06, 7.4294e-06, 8.6804e-06, 6.5523e-07,\n 8.0948e-06, 1.8218e-10, 4.6218e-06, 3.4316e-06, 1.4030e-05, 7.3414e-06,\n 3.5412e-06, 1.0784e-06, 1.3151e-06, 4.2660e-07, 9.2367e-07, 4.2178e-06,\n 1.0701e-08, 1.0242e-05, 6.8185e-06, 2.7178e-05, 3.4178e-06, 5.2940e-06,\n 1.8396e-06, 1.9610e-06, 1.6040e-05, 8.8529e-06, 3.8238e-07, 1.5434e-06,\n 5.0541e-06, 3.6945e-09, 9.0243e-06, 2.3795e-06, 4.2427e-06, 3.7128e-06,\n 6.3348e-06, 2.4526e-06, 5.0691e-06, 1.0646e-06, 8.3457e-06, 2.1823e-05,\n 6.8952e-06, 7.4046e-06, 5.4394e-06, 1.2381e-06, 8.5946e-06, 4.7923e-06,\n 2.4898e-06, 5.1411e-07, 4.4400e-06, 8.5800e-06, 2.6728e-05, 2.0670e-08,\n 3.2834e-07, 1.4666e-05, 6.3141e-06, 5.4866e-06, 1.4502e-06, 3.5331e-06,\n 1.4002e-06, 4.0004e-06, 7.8385e-06, 1.2410e-05, 2.3434e-06, 1.9009e-06,\n 9.0438e-08, 1.0046e-06, 4.1700e-05, 5.9602e-06, 8.0177e-06, 5.5821e-09,\n 1.3088e-06, 2.1453e-06, 4.4219e-06, 3.6269e-06, 6.1440e-07, 7.8722e-06,\n 1.5172e-07, 3.1855e-06, 3.0854e-14, 3.7947e-06, 2.0539e-06, 3.3933e-06,\n 3.6090e-06, 2.2175e-05, 2.9944e-06, 2.4458e-06, 1.1243e-05, 4.2765e-06,\n 2.1097e-07, 2.2249e-06, 5.5997e-06, 1.3273e-05, 8.5479e-06, 3.9201e-06,\n 7.2668e-06, 7.4453e-06, 6.5842e-08, 3.0191e-06, 1.7104e-06, 2.2515e-07,\n 5.3405e-06, 3.0071e-06, 7.3598e-06, 7.8488e-06, 6.3647e-06, 4.4777e-07,\n 3.5970e-07, 1.1813e-06, 5.7146e-06, 1.8699e-05, 9.0375e-07, 3.1572e-06,\n 1.9854e-06, 6.1165e-06, 6.7750e-06, 2.2824e-06, 1.5760e-06, 9.9494e-09,\n 3.0694e-06, 3.6736e-06, 1.0299e-06, 5.5396e-06, 7.6725e-07, 2.7353e-06,\n 6.3464e-06, 1.0597e-05, 3.4115e-06, 6.4604e-06, 8.2838e-06, 4.6634e-07,\n 1.6545e-05, 4.2974e-06, 4.9017e-06, 2.9869e-06, 1.2784e-05, 2.2916e-06,\n 8.9581e-06, 4.8328e-06, 7.1311e-06, 1.4465e-06, 5.0578e-06, 2.0492e-06,\n 9.0961e-06, 2.7503e-06, 3.4120e-06, 3.7548e-06, 4.0209e-06, 4.2755e-06,\n 5.9645e-06, 5.5964e-06, 8.4815e-07, 5.7444e-08, 2.2725e-06, 7.0637e-06,\n 4.7129e-06, 4.0926e-07, 4.4473e-06, 7.0278e-06, 2.4453e-06, 2.8171e-06,\n 5.8953e-06, 9.1281e-06, 4.9412e-06, 3.8527e-06, 3.9687e-06, 2.9053e-05,\n 2.6992e-06, 3.2215e-07, 2.4578e-06, 8.8048e-06, 2.6687e-06, 6.5424e-07,\n 2.9571e-09, 6.1665e-06, 8.8917e-09, 4.2931e-06, 1.0347e-05, 1.3405e-05,\n 4.0199e-06, 4.4375e-06, 1.5368e-05, 6.6338e-06, 3.5504e-06, 5.1828e-06,\n 1.8503e-06, 1.4463e-07, 7.1280e-06, 9.0295e-06, 1.1357e-07, 2.2807e-06,\n 4.3836e-06, 3.9459e-06, 7.9211e-06, 2.7792e-06, 1.7455e-09, 6.7998e-07,\n 8.2275e-06, 9.0773e-06, 1.5366e-06, 8.7614e-09, 1.1010e-05, 9.6445e-07,\n 2.2713e-05, 8.5931e-06, 1.2674e-05, 5.2680e-06, 8.7159e-06, 5.1294e-06,\n 4.2245e-06, 7.1952e-06, 5.4771e-06, 1.2874e-05, 7.6034e-06, 1.9718e-06,\n 3.0180e-06, 2.1435e-06, 4.8268e-07, 3.9675e-06, 4.5351e-06, 6.2543e-06,\n 2.3037e-06, 2.9713e-06, 9.4303e-07, 1.5098e-06, 2.7162e-06, 3.1131e-06,\n 1.1634e-05, 4.1649e-06, 6.0481e-06, 6.4588e-06, 7.3978e-07, 6.1103e-06,\n 2.7848e-06, 9.9317e-07, 1.6855e-05, 5.6597e-07, 9.7796e-06, 9.7489e-06,\n 1.3516e-08, 1.6916e-06, 3.5423e-06, 1.6225e-05, 5.8675e-06, 1.0506e-05,\n 1.6273e-06, 3.0747e-05, 3.4268e-07, 2.4973e-06, 5.8307e-06, 2.4655e-06,\n 3.3249e-06, 1.9439e-06, 2.0428e-06, 9.7775e-06, 6.1782e-06, 4.2259e-06,\n 6.9502e-06, 1.6029e-05, 4.7664e-06, 1.1190e-06, 3.2239e-06, 3.0494e-06,\n 3.4807e-06, 1.3263e-06, 4.4299e-06, 2.9454e-06, 2.5817e-08, 2.9351e-06,\n 1.7678e-06, 1.7746e-06, 1.5648e-08, 3.2180e-06, 3.1677e-06, 1.7360e-06,\n 3.1195e-06, 6.1651e-06, 5.3044e-06, 7.0266e-06, 5.6747e-06, 8.8611e-07,\n 3.0379e-07, 6.3226e-06, 7.0802e-06, 1.3007e-06, 1.3268e-05, 7.3512e-06,\n 3.0282e-06, 1.8816e-06, 4.2065e-07, 5.3132e-06, 1.5075e-06, 5.6215e-06,\n 3.7625e-06, 1.0921e-05, 2.0309e-06, 1.5039e-06, 1.4831e-07, 1.2279e-05,\n 9.8836e-06, 4.2190e-06, 5.2285e-06, 1.1717e-07, 6.7397e-09, 4.3822e-06,\n 5.3583e-06, 2.7530e-06, 2.2971e-06, 2.6062e-06, 9.9320e-06, 4.9795e-06,\n 1.3720e-06, 2.5352e-05, 7.6242e-07, 4.1771e-06, 2.1663e-05, 4.8679e-06,\n 4.5018e-06, 3.2668e-06, 9.5605e-06, 1.7113e-06, 7.1976e-06, 6.2427e-06,\n 1.0902e-05, 4.7270e-06, 6.5225e-06, 4.4038e-06, 1.0386e-05, 3.4593e-06,\n 2.3424e-06, 1.0230e-05, 4.7512e-06, 8.4843e-06, 4.8730e-06, 8.3657e-06,\n 4.2281e-07, 2.8322e-06, 8.8127e-06, 6.5020e-06, 3.1832e-06, 1.5829e-05,\n 5.5335e-06, 4.2399e-06, 4.6577e-07, 5.8970e-06, 1.1229e-08, 9.9992e-06,\n 1.0755e-05, 3.5820e-06, 1.7469e-05, 3.8679e-06, 2.0544e-06, 1.1516e-06,\n 1.1723e-06, 1.2563e-05, 6.8046e-06, 2.2805e-06, 9.9098e-06, 2.3997e-06,\n 3.9066e-06, 1.0392e-05, 2.8112e-06, 7.6622e-06, 4.8451e-07, 3.3319e-06,\n 4.9159e-06, 4.7332e-06, 1.6929e-07, 3.3356e-06, 7.0660e-06, 1.0039e-05,\n 3.2036e-06, 1.0906e-05, 1.6303e-05, 2.8311e-06, 2.4813e-06, 1.5372e-05,\n 2.2796e-06, 1.4135e-06, 3.8190e-06, 6.3804e-07, 9.0508e-07, 5.9800e-06,\n 1.1281e-06, 1.4708e-05, 1.4748e-05, 2.7183e-06, 3.9569e-07, 1.1606e-05,\n 5.5563e-06, 2.3621e-06, 4.9337e-06, 2.5584e-06, 4.1454e-06, 4.8147e-06,\n 7.7303e-06, 2.9155e-06, 7.9601e-07, 1.5112e-06, 5.1944e-06, 2.5695e-08,\n 4.2998e-06, 5.5181e-06, 2.0685e-05, 1.4593e-06, 1.9463e-05, 1.5477e-05,\n 4.3381e-06, 7.4607e-06, 1.5446e-06, 6.5765e-06, 3.0924e-06, 1.0866e-05,\n 3.4644e-08, 1.4716e-06, 5.3329e-06, 5.6884e-06, 6.4205e-06, 7.0139e-06,\n 2.1531e-06, 9.8401e-06, 6.0331e-06, 8.2722e-08, 4.1829e-06, 6.6575e-07,\n 4.6804e-06, 1.6201e-05, 5.6368e-06, 7.2327e-06, 3.6164e-06, 2.8505e-06,\n 6.2775e-06, 5.3090e-07], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(2503.)",
|
| 27 |
+
"exp_avg": "tensor([[ 4.8494e-05, 4.2713e-05, -4.9695e-05, ..., 3.9060e-33,\n 5.6052e-45, 2.2442e-06],\n [ 3.9726e-05, 1.2099e-06, -1.0968e-05, ..., -1.5017e-32,\n -5.6052e-45, -6.6520e-05],\n [ 1.0212e-05, -1.1635e-04, -3.8091e-05, ..., -2.6897e-32,\n -5.6052e-45, 8.1960e-06],\n ...,\n [-1.9141e-05, -9.7089e-05, 5.3461e-05, ..., 3.6027e-33,\n 5.6052e-45, -4.7683e-05],\n [ 6.0304e-05, 1.8637e-04, -4.2282e-05, ..., -1.0159e-32,\n 5.6052e-45, -7.3768e-06],\n [-3.9957e-06, 3.4668e-05, -1.1653e-05, ..., -2.9639e-32,\n -5.6052e-45, 1.2030e-04]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[8.5716e-09, 9.7148e-08, 1.0039e-07, ..., 6.1793e-11, 8.9151e-11,\n 7.1480e-09],\n [1.2965e-08, 1.8908e-07, 1.2804e-07, ..., 5.7694e-10, 3.4768e-11,\n 2.3434e-08],\n [7.2775e-09, 2.2664e-07, 1.1368e-07, ..., 5.8373e-11, 1.8469e-10,\n 7.0799e-09],\n ...,\n [1.0377e-08, 2.3219e-07, 1.5635e-07, ..., 7.9823e-10, 9.6002e-10,\n 1.4029e-08],\n [1.1623e-08, 2.3192e-07, 1.5124e-07, ..., 8.3760e-10, 3.9992e-10,\n 1.5621e-08],\n [1.0687e-08, 3.0626e-07, 1.5733e-07, ..., 3.1200e-11, 4.0007e-11,\n 4.3269e-08]], device='cuda:0')"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"param_groups": [
|
| 32 |
+
{
|
| 33 |
+
"lr": 0.00975530705321762,
|
| 34 |
+
"name": "shared",
|
| 35 |
+
"betas": [
|
| 36 |
+
0.9,
|
| 37 |
+
0.999
|
| 38 |
+
],
|
| 39 |
+
"eps": 1e-08,
|
| 40 |
+
"weight_decay": 1e-05,
|
| 41 |
+
"amsgrad": false,
|
| 42 |
+
"maximize": false,
|
| 43 |
+
"foreach": null,
|
| 44 |
+
"capturable": false,
|
| 45 |
+
"differentiable": false,
|
| 46 |
+
"fused": null,
|
| 47 |
+
"decoupled_weight_decay": true,
|
| 48 |
+
"initial_lr": 0.01,
|
| 49 |
+
"params": [
|
| 50 |
+
0,
|
| 51 |
+
1
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"lr": 0.00975530705321762,
|
| 56 |
+
"name": "scale_256",
|
| 57 |
+
"betas": [
|
| 58 |
+
0.9,
|
| 59 |
+
0.999
|
| 60 |
+
],
|
| 61 |
+
"eps": 1e-08,
|
| 62 |
+
"weight_decay": 1e-05,
|
| 63 |
+
"amsgrad": false,
|
| 64 |
+
"maximize": false,
|
| 65 |
+
"foreach": null,
|
| 66 |
+
"capturable": false,
|
| 67 |
+
"differentiable": false,
|
| 68 |
+
"fused": null,
|
| 69 |
+
"decoupled_weight_decay": true,
|
| 70 |
+
"initial_lr": 0.01,
|
| 71 |
+
"params": [
|
| 72 |
+
2,
|
| 73 |
+
3,
|
| 74 |
+
4
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"lr": 0.00975530705321762,
|
| 79 |
+
"name": "scale_512",
|
| 80 |
+
"betas": [
|
| 81 |
+
0.9,
|
| 82 |
+
0.999
|
| 83 |
+
],
|
| 84 |
+
"eps": 1e-08,
|
| 85 |
+
"weight_decay": 1e-05,
|
| 86 |
+
"amsgrad": false,
|
| 87 |
+
"maximize": false,
|
| 88 |
+
"foreach": null,
|
| 89 |
+
"capturable": false,
|
| 90 |
+
"differentiable": false,
|
| 91 |
+
"fused": null,
|
| 92 |
+
"decoupled_weight_decay": true,
|
| 93 |
+
"initial_lr": 0.01,
|
| 94 |
+
"params": [
|
| 95 |
+
5,
|
| 96 |
+
6,
|
| 97 |
+
7
|
| 98 |
+
]
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"lr": 0.00975530705321762,
|
| 102 |
+
"name": "scale_768",
|
| 103 |
+
"betas": [
|
| 104 |
+
0.9,
|
| 105 |
+
0.999
|
| 106 |
+
],
|
| 107 |
+
"eps": 1e-08,
|
| 108 |
+
"weight_decay": 1e-05,
|
| 109 |
+
"amsgrad": false,
|
| 110 |
+
"maximize": false,
|
| 111 |
+
"foreach": null,
|
| 112 |
+
"capturable": false,
|
| 113 |
+
"differentiable": false,
|
| 114 |
+
"fused": null,
|
| 115 |
+
"decoupled_weight_decay": true,
|
| 116 |
+
"initial_lr": 0.01,
|
| 117 |
+
"params": [
|
| 118 |
+
8,
|
| 119 |
+
9,
|
| 120 |
+
10
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"lr": 0.00975530705321762,
|
| 125 |
+
"name": "scale_1024",
|
| 126 |
+
"betas": [
|
| 127 |
+
0.9,
|
| 128 |
+
0.999
|
| 129 |
+
],
|
| 130 |
+
"eps": 1e-08,
|
| 131 |
+
"weight_decay": 1e-05,
|
| 132 |
+
"amsgrad": false,
|
| 133 |
+
"maximize": false,
|
| 134 |
+
"foreach": null,
|
| 135 |
+
"capturable": false,
|
| 136 |
+
"differentiable": false,
|
| 137 |
+
"fused": null,
|
| 138 |
+
"decoupled_weight_decay": true,
|
| 139 |
+
"initial_lr": 0.01,
|
| 140 |
+
"params": [
|
| 141 |
+
11,
|
| 142 |
+
12,
|
| 143 |
+
13
|
| 144 |
+
]
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"lr": 0.004877665762479736,
|
| 148 |
+
"name": "fusion",
|
| 149 |
+
"betas": [
|
| 150 |
+
0.9,
|
| 151 |
+
0.999
|
| 152 |
+
],
|
| 153 |
+
"eps": 1e-08,
|
| 154 |
+
"weight_decay": 1e-05,
|
| 155 |
+
"amsgrad": false,
|
| 156 |
+
"maximize": false,
|
| 157 |
+
"foreach": null,
|
| 158 |
+
"capturable": false,
|
| 159 |
+
"differentiable": false,
|
| 160 |
+
"fused": null,
|
| 161 |
+
"decoupled_weight_decay": true,
|
| 162 |
+
"initial_lr": 0.005,
|
| 163 |
+
"params": [
|
| 164 |
+
14,
|
| 165 |
+
15,
|
| 166 |
+
16,
|
| 167 |
+
17,
|
| 168 |
+
18,
|
| 169 |
+
19,
|
| 170 |
+
20,
|
| 171 |
+
21,
|
| 172 |
+
22,
|
| 173 |
+
23,
|
| 174 |
+
24,
|
| 175 |
+
25,
|
| 176 |
+
26,
|
| 177 |
+
27,
|
| 178 |
+
28,
|
| 179 |
+
29,
|
| 180 |
+
30,
|
| 181 |
+
31,
|
| 182 |
+
32,
|
| 183 |
+
33,
|
| 184 |
+
34,
|
| 185 |
+
35,
|
| 186 |
+
36,
|
| 187 |
+
37,
|
| 188 |
+
38,
|
| 189 |
+
39,
|
| 190 |
+
40,
|
| 191 |
+
41,
|
| 192 |
+
42,
|
| 193 |
+
43,
|
| 194 |
+
44,
|
| 195 |
+
45,
|
| 196 |
+
46,
|
| 197 |
+
47,
|
| 198 |
+
48,
|
| 199 |
+
49,
|
| 200 |
+
50,
|
| 201 |
+
51,
|
| 202 |
+
52,
|
| 203 |
+
53,
|
| 204 |
+
54,
|
| 205 |
+
55,
|
| 206 |
+
56,
|
| 207 |
+
57,
|
| 208 |
+
58,
|
| 209 |
+
59,
|
| 210 |
+
60,
|
| 211 |
+
61,
|
| 212 |
+
62,
|
| 213 |
+
63,
|
| 214 |
+
64
|
| 215 |
+
]
|
| 216 |
+
}
|
| 217 |
+
]
|
| 218 |
+
},
|
| 219 |
+
"scheduler_state_dict": {
|
| 220 |
+
"T_0": 10,
|
| 221 |
+
"T_i": 10,
|
| 222 |
+
"T_mult": 2,
|
| 223 |
+
"eta_min": 1e-06,
|
| 224 |
+
"T_cur": 1,
|
| 225 |
+
"base_lrs": [
|
| 226 |
+
0.01,
|
| 227 |
+
0.01,
|
| 228 |
+
0.01,
|
| 229 |
+
0.01,
|
| 230 |
+
0.01,
|
| 231 |
+
0.005
|
| 232 |
+
],
|
| 233 |
+
"last_epoch": 1,
|
| 234 |
+
"_step_count": 0,
|
| 235 |
+
"_is_initial": false,
|
| 236 |
+
"_get_lr_called_within_step": false,
|
| 237 |
+
"_last_lr": [
|
| 238 |
+
0.00975530705321762,
|
| 239 |
+
0.00975530705321762,
|
| 240 |
+
0.00975530705321762,
|
| 241 |
+
0.00975530705321762,
|
| 242 |
+
0.00975530705321762,
|
| 243 |
+
0.004877665762479736
|
| 244 |
+
]
|
| 245 |
+
},
|
| 246 |
+
"metrics": {
|
| 247 |
+
"best_val_acc": 66.689,
|
| 248 |
+
"best_epoch": 0,
|
| 249 |
+
"scale_accuracies": {
|
| 250 |
+
"256": 66.689
|
| 251 |
+
},
|
| 252 |
+
"training_history": {
|
| 253 |
+
"epochs": [
|
| 254 |
+
1
|
| 255 |
+
],
|
| 256 |
+
"train_loss": [
|
| 257 |
+
3.4310503170769358
|
| 258 |
+
],
|
| 259 |
+
"train_acc": [
|
| 260 |
+
54.52540535308824
|
| 261 |
+
],
|
| 262 |
+
"val_acc": [
|
| 263 |
+
66.689
|
| 264 |
+
],
|
| 265 |
+
"scale_accs": {
|
| 266 |
+
"256": [
|
| 267 |
+
66.689
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
"lr": [
|
| 271 |
+
0.00975530705321762
|
| 272 |
+
]
|
| 273 |
+
}
|
| 274 |
+
},
|
| 275 |
+
"train_config": {
|
| 276 |
+
"name": "david_training",
|
| 277 |
+
"run_id": "20251012_191456",
|
| 278 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 279 |
+
"model_variant": [
|
| 280 |
+
"clip_vit_b32",
|
| 281 |
+
"clip_vit_laion_b32"
|
| 282 |
+
],
|
| 283 |
+
"num_classes": 1000,
|
| 284 |
+
"preset": "balanced",
|
| 285 |
+
"custom_config_path": null,
|
| 286 |
+
"num_classes_override": null,
|
| 287 |
+
"use_belly_override": null,
|
| 288 |
+
"belly_expand_override": null,
|
| 289 |
+
"progressive_training_override": true,
|
| 290 |
+
"scale_warmup_epochs_override": {
|
| 291 |
+
"256": 0,
|
| 292 |
+
"512": 2,
|
| 293 |
+
"768": 5,
|
| 294 |
+
"1024": 8
|
| 295 |
+
},
|
| 296 |
+
"num_epochs": 10,
|
| 297 |
+
"batch_size": 1024,
|
| 298 |
+
"learning_rate": 0.01,
|
| 299 |
+
"weight_decay": 1e-05,
|
| 300 |
+
"warmup_epochs": 3,
|
| 301 |
+
"use_rose_loss": true,
|
| 302 |
+
"rose_initial_weight": 0.2,
|
| 303 |
+
"rose_max_weight": 0.8,
|
| 304 |
+
"rose_weight_schedule": "adaptive",
|
| 305 |
+
"use_cayley_loss": true,
|
| 306 |
+
"cayley_weight": 0.01,
|
| 307 |
+
"scale_loss_balance": null,
|
| 308 |
+
"use_mixed_precision": false,
|
| 309 |
+
"gradient_clip": 10.0,
|
| 310 |
+
"scheduler_type": "cosine_restarts",
|
| 311 |
+
"min_lr": 1e-06,
|
| 312 |
+
"freeze_strategy": "never",
|
| 313 |
+
"freeze_threshold": 90.0,
|
| 314 |
+
"unfreeze_on_plateau": true,
|
| 315 |
+
"patience": 10,
|
| 316 |
+
"track_gradients": true,
|
| 317 |
+
"gradient_scale_threshold": 1e-05,
|
| 318 |
+
"gradient_scale_multiplier": 10.0,
|
| 319 |
+
"log_interval": 50,
|
| 320 |
+
"val_interval": 1,
|
| 321 |
+
"save_interval": 5,
|
| 322 |
+
"log_fusion_weights": true,
|
| 323 |
+
"log_loss_components": true,
|
| 324 |
+
"save_format": "safetensors",
|
| 325 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 326 |
+
"upload_to_hub": true,
|
| 327 |
+
"base_dir": "./david_training",
|
| 328 |
+
"num_workers": 10,
|
| 329 |
+
"pin_memory": true,
|
| 330 |
+
"prefetch_factor": 4,
|
| 331 |
+
"persistent_workers": true
|
| 332 |
+
}
|
| 333 |
+
}
|