Update best_model_acc62.52_metadata.json - Run 20251012_221046
Browse files
weights/David-decoupled-deep_efficiency/20251012_221046/best_model_acc62.52_metadata.json
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 0,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(3754.)",
|
| 7 |
+
"exp_avg": "tensor([[-4.4749e-05, 1.4725e-04, 4.0209e-05, ..., -5.3002e-06,\n -1.4165e-05, 2.8785e-05],\n [ 5.5528e-05, 1.3407e-04, -7.4307e-05, ..., 3.2654e-05,\n 4.3700e-05, 7.5850e-05],\n [-7.6965e-05, 9.2139e-05, 1.8860e-05, ..., 2.1867e-05,\n -1.3094e-05, 3.8954e-06],\n ...,\n [ 7.6618e-05, 8.5101e-05, 1.0000e-04, ..., 2.8659e-05,\n -1.3026e-05, 1.7126e-05],\n [-1.2214e-05, 6.4223e-05, -2.3335e-05, ..., 1.3158e-04,\n 5.7863e-06, 1.3439e-05],\n [-1.0400e-04, -2.0184e-04, 9.1752e-05, ..., 2.5940e-06,\n 4.6468e-05, -1.0303e-04]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[2.7359e-08, 2.3284e-07, 7.0138e-08, ..., 2.2038e-08, 1.2536e-08,\n 1.4616e-08],\n [9.6217e-08, 3.8915e-07, 9.2744e-08, ..., 1.5339e-07, 4.1522e-08,\n 4.9984e-08],\n [1.7404e-07, 1.1525e-07, 2.7388e-08, ..., 4.0486e-08, 2.5376e-08,\n 2.6671e-08],\n ...,\n [4.9063e-08, 1.7711e-07, 8.7920e-08, ..., 4.4341e-08, 1.5060e-08,\n 5.5431e-08],\n [1.5269e-07, 2.0826e-07, 8.2987e-08, ..., 1.4950e-07, 3.8942e-08,\n 4.1262e-08],\n [2.1312e-08, 1.2214e-07, 9.2706e-08, ..., 1.6309e-08, 7.3698e-09,\n 2.1436e-08]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(3754.)",
|
| 12 |
+
"exp_avg": "tensor([-8.1367e-04, -4.9727e-04, -1.9642e-04, 4.1624e-04, -7.8312e-09,\n 1.8762e-03, -5.8328e-04, 8.3392e-05, 3.3990e-04, -4.0941e-04,\n 1.2410e-03, 5.6052e-45, -5.2167e-03, 1.9487e-04, 3.2701e-03,\n 9.8769e-04, 1.1269e-04, 4.4264e-04, -1.4434e-03, 5.7081e-04,\n -1.4201e-03, -1.2336e-03, 1.8030e-03, 2.4552e-04, -4.9408e-04,\n -8.1328e-04, -1.7902e-03, -7.4724e-04, 1.9403e-03, 4.9805e-04,\n -5.4805e-04, -1.9834e-03, -1.7212e-03, -1.6541e-03, 1.3718e-03,\n 1.3493e-04, -6.3605e-05, 1.1535e-03, 3.2156e-04, -1.3517e-03,\n 1.2956e-03, -1.2730e-03, -3.8118e-04, -8.4489e-04, -8.9233e-04,\n 3.3987e-04, -8.0669e-04, 3.0355e-04, -4.2855e-04, 9.5174e-04,\n 9.5905e-04, -4.4701e-04, -2.0572e-04, -2.7839e-03, 6.7109e-04,\n -2.6372e-03, 1.3612e-03, -4.3315e-05, 2.4029e-03, 1.0660e-04,\n 2.0569e-03, 2.1287e-03, 4.0652e-04, -3.2699e-11, -3.5535e-03,\n 2.0943e-03, 6.8498e-04, -4.5304e-12, 3.5901e-03, 8.4772e-04,\n -3.3510e-05, -8.0424e-06, -3.8541e-03, 1.8668e-04, -6.0599e-04,\n -2.6157e-03, 3.8743e-04, -1.4900e-03, 1.7907e-03, 2.5035e-03,\n 1.9545e-03, 3.1201e-03, -2.0438e-03, -2.4916e-03, -3.7299e-03,\n -3.4377e-03, 1.0809e-04, 1.9242e-03, -3.6899e-03, 5.6052e-45,\n -1.0445e-03, 2.5261e-04, -2.3670e-03, -1.0501e-03, -1.1143e-03,\n -1.0533e-03, -2.4643e-04, 3.8127e-04, -6.9685e-04, 5.5370e-04,\n 4.9100e-04, 3.7193e-04, 3.0040e-03, 6.6864e-04, 2.1311e-04,\n -1.3562e-04, -2.0445e-03, 1.0353e-03, 1.4375e-03, 2.4195e-03,\n 5.0358e-04, 6.5302e-04, 2.6690e-03, 1.2135e-03, 1.1476e-03,\n -1.1151e-03, -1.1266e-03, -1.3055e-04, 2.5802e-03, -3.9729e-04,\n 1.6163e-04, 7.6585e-04, -8.7372e-05, 1.0230e-03, -8.6288e-04,\n -1.3835e-03, -1.1270e-03, 1.0610e-03, 1.4084e-03, -1.9976e-03,\n 2.2596e-03, -6.2884e-04, -2.5123e-04, -2.1554e-03, -1.6450e-04,\n -1.2985e-04, 5.2091e-03, 2.3489e-04, 4.8430e-04, 4.6869e-04,\n -4.3170e-04, -2.5162e-04, -6.8161e-05, -2.3293e-04, -9.6389e-04,\n 7.9047e-04, 3.1049e-04, -2.0034e-04, 2.4829e-04, -9.2371e-04,\n 3.5966e-03, -5.6832e-04, -5.0496e-04, 7.4216e-04, 1.1504e-03,\n 1.3737e-03, 6.6952e-04, -1.8899e-04, 1.6868e-03, 1.6146e-03,\n 2.8243e-04, 2.5944e-03, -6.6431e-04, 3.5935e-04, 6.3384e-05,\n -9.9373e-04, -6.2823e-04, 3.2152e-04, 5.4877e-04, 4.7079e-04,\n -2.3184e-03, 5.3538e-04, 1.6515e-03, -1.0744e-03, 7.8039e-05,\n 7.9547e-04, -5.0773e-05, 3.6923e-03, 1.7267e-03, 4.3681e-04,\n -2.1966e-04, -4.6968e-04, -1.0880e-04, -2.0722e-03, 6.3926e-04,\n 4.7019e-04, 1.7456e-04, -5.4411e-04, 7.6683e-04, 9.2771e-04,\n -3.6417e-03, -4.1295e-05, -9.5306e-04, 7.0555e-04, 1.8279e-03,\n 8.8248e-04, 2.0808e-03, -1.5755e-03, -5.0833e-05, 2.6237e-03,\n -2.1856e-03, 1.7919e-03, -2.5416e-03, -1.7525e-03, -2.0636e-03,\n -1.5252e-04, 2.3727e-03, 5.6052e-45, -2.1118e-03, -7.5673e-04,\n -1.4113e-03, 2.8449e-03, 1.1457e-03, 1.2253e-03, -1.4174e-03,\n 2.6625e-03, -4.7899e-04, -1.4394e-03, 8.7591e-04, 1.4092e-04,\n -9.5176e-05, 9.3961e-04, -1.0062e-03, 1.7913e-03, -1.7141e-03,\n -4.7101e-05, 5.8291e-04, -2.2839e-04, 3.1676e-03, 6.5609e-04,\n -6.4759e-04, -2.9385e-04, 1.9144e-03, 2.9592e-03, 1.7372e-03,\n 9.8980e-04, -1.3639e-03, 1.5438e-04, -1.2169e-03, -1.9853e-03,\n 3.5970e-03, -4.1487e-04, 4.2076e-04, 1.1652e-03, 9.7023e-04,\n 2.1867e-03, 1.4974e-04, 4.0961e-03, -1.3125e-04, 5.8265e-04,\n -2.2352e-03, -7.6680e-04, -1.5706e-04, 8.5366e-04, 4.2754e-04,\n -1.2114e-04, 9.5753e-05, -1.7046e-03, 2.6724e-04, 8.3027e-04,\n -1.7633e-03, 8.0649e-04, 6.4147e-04, -6.0678e-04, 9.5589e-04,\n -8.6376e-04, 9.5832e-04, 1.5135e-03, -1.2679e-04, 2.3524e-04,\n -2.7402e-03, 1.9261e-03, -1.5786e-03, 3.2450e-03, 1.1703e-03,\n -6.5746e-05, 3.6863e-05, 4.8643e-05, -1.3448e-04, 5.5564e-04,\n 8.9180e-04, -2.1294e-03, -9.7765e-04, -1.2002e-03, 5.6416e-04,\n 6.2707e-04, 8.6123e-04, 7.9437e-04, -8.4291e-04, -4.0692e-03,\n -2.9456e-05, -1.9278e-03, 1.5858e-03, -2.0333e-03, -1.1549e-04,\n -1.3178e-04, 1.1006e-03, 1.2868e-03, 3.3223e-04, -8.9190e-04,\n 7.7729e-04, -1.9359e-03, 5.6172e-05, -3.3262e-03, 1.0916e-03,\n -1.6061e-04, 5.5256e-04, -1.3285e-03, -4.9060e-04, 1.8463e-03,\n 2.0704e-04, -4.8362e-03, -2.7467e-05, 2.4457e-03, 2.3183e-04,\n -2.0011e-03, 2.5882e-03, 5.4843e-05, 3.4988e-03, -1.0774e-03,\n -2.8791e-03, -2.1049e-03, -1.1404e-03, -1.0179e-03, 2.0403e-04,\n -1.1020e-03, 1.9153e-03, -5.0879e-03, 8.7456e-04, 7.7336e-04,\n -8.6910e-04, 1.5670e-03, -1.0073e-03, -5.3445e-04, -2.7277e-03,\n 3.1433e-03, 5.5835e-04, 1.9155e-03, 2.3089e-04, -2.7299e-03,\n 1.1266e-03, -1.6356e-04, 3.0958e-04, -4.0072e-03, -1.7838e-03,\n 2.3742e-03, 1.2951e-04, -3.0488e-03, 2.5931e-04, -7.9527e-04,\n 2.4933e-03, 7.0156e-04, -1.0500e-03, -5.1042e-04, -3.4115e-03,\n -2.8097e-04, 2.9949e-03, -1.8634e-05, 8.0695e-04, 2.2839e-03,\n 2.2516e-04, -6.1616e-03, -2.0914e-03, -1.2352e-03, 3.8066e-03,\n 7.8859e-04, -2.9602e-04, 9.6411e-04, -4.0930e-04, 3.4579e-03,\n 7.0849e-04, 3.3980e-04, -4.7161e-04, -3.3371e-04, -2.6211e-03,\n -1.2608e-03, -1.3335e-03, -8.5335e-05, -3.4740e-03, 4.8508e-04,\n -1.1350e-03, 2.0014e-03, 4.6344e-04, -1.0450e-03, 1.1318e-03,\n -1.6881e-03, 2.5003e-03, -9.1399e-04, -1.2158e-03, -4.9908e-05,\n -1.9117e-03, 1.9589e-03, 2.3057e-03, 1.8100e-03, -8.3746e-04,\n 3.8921e-04, -3.7045e-04, 6.4259e-04, -1.3137e-05, -2.2742e-03,\n -1.6005e-03, 1.8838e-04, -1.6177e-03, 2.8799e-03, 3.3492e-03,\n -1.8542e-03, -2.4921e-04, -1.5754e-03, -5.2379e-04, 1.3332e-03,\n -1.5188e-04, 4.4111e-04, -1.4944e-03, -1.2012e-04, -4.7634e-04,\n -4.9768e-04, 2.8108e-04, -7.7396e-04, -1.7094e-03, 2.9795e-04,\n -1.0855e-03, -1.1367e-03, 1.0956e-03, 3.5483e-06, -3.0103e-04,\n 1.3493e-03, -3.6295e-03, 2.1419e-03, -1.4176e-03, 3.3886e-04,\n 8.0136e-04, 3.2464e-03, 5.6052e-45, 1.7250e-03, -1.0690e-03,\n -8.1598e-06, 5.0956e-03, -4.4878e-04, -1.4565e-03, -3.6110e-03,\n 2.9139e-04, 8.7195e-04, 2.5014e-03, -1.4494e-03, -9.7636e-04,\n -2.6361e-04, 3.3479e-03, 1.0007e-03, 5.9731e-04, -7.6476e-04,\n 1.1284e-04, -4.6298e-04, -1.0221e-03, -1.1098e-03, 1.1489e-04,\n 2.2521e-03, 2.2030e-03, -1.1021e-03, 1.6674e-03, 1.5575e-03,\n 5.6052e-45, -5.6660e-04, -1.0405e-03, -2.5626e-03, 1.0611e-03,\n 1.0676e-03, -2.4274e-03, 2.1316e-03, 9.1119e-04, 1.3404e-03,\n 7.2306e-04, -2.5358e-03, -1.1410e-03, -1.9789e-03, -5.0153e-04,\n -4.5727e-03, -1.2091e-04, 8.4857e-04, -1.3097e-03, 2.1005e-03,\n 6.9048e-04, 1.1576e-03, 4.2954e-04, -3.2762e-03, 3.4072e-03,\n -2.2647e-03, -1.4537e-03, 3.3851e-04, 6.7889e-04, -2.3878e-03,\n -1.3313e-03, 9.3333e-04, 1.5462e-03, 1.9939e-04, 2.7098e-03,\n -2.2441e-03, 1.8803e-03, -1.2065e-03, -5.0322e-04, -3.5094e-03,\n 9.3891e-05, 2.4084e-03, -2.4659e-03, 2.5327e-03, 1.3607e-03,\n -1.7305e-03, -1.1718e-03, -4.6362e-04, -1.9883e-04, -5.2899e-04,\n 1.2901e-03, 5.7018e-04, -2.9603e-03, 4.4879e-04, -2.8203e-03,\n 1.2655e-03, -1.1335e-03, 2.9538e-03, 2.5082e-03, -9.8844e-04,\n -1.0733e-03, 3.5881e-03, -1.0556e-03, -3.7764e-03, -4.2339e-04,\n 1.6562e-03, 3.0200e-03, 1.2011e-03, 2.3737e-04, -1.4873e-04,\n 4.0332e-04, -3.8080e-03, 6.8526e-04, -1.8363e-03, 7.9228e-04,\n -4.1291e-03, 9.3338e-05, 5.1121e-04, 2.8580e-03, -3.2570e-03,\n 4.1448e-04, 1.4502e-03, 2.0674e-04, 7.0930e-04, -8.6215e-04,\n 2.9365e-03, 8.3438e-04, 1.8299e-03, 1.8106e-03, -1.6737e-03,\n 2.8248e-04, 5.2771e-04, 2.9061e-04, -1.0569e-03, 1.1242e-03,\n 2.1906e-04, 3.1560e-03, -7.6455e-05, 6.2387e-05, 1.2929e-04,\n 6.6199e-04, 1.7683e-03, -6.2497e-04, -1.9280e-03, 1.1437e-03,\n 3.6147e-04, -6.9456e-04, -1.2706e-03, 7.3737e-04, -9.6973e-04,\n 1.5202e-03, -1.0515e-04, 2.2749e-03, -2.3923e-04, 1.6866e-03,\n -3.1259e-03, -6.7030e-04, 9.1899e-04, -2.3185e-03, 1.7068e-03,\n -1.0078e-03, -7.0923e-04, 1.4189e-03, 1.1298e-03, -7.7039e-04,\n -4.0293e-04, 3.6223e-04, 2.1090e-04, 1.4420e-03, 1.8279e-04,\n -1.0262e-03, 1.0139e-03, -7.4034e-04, 1.6338e-03, 2.8892e-03,\n 1.2747e-03, 1.6528e-03, 3.6505e-04, 9.2641e-04, -2.3019e-04,\n 5.0369e-04, -4.1351e-05, 2.4120e-04, -2.1856e-03, 5.4232e-04,\n 1.0482e-03, -1.0749e-03, 3.3277e-03, 1.0134e-04, 1.1085e-03,\n -7.7962e-04, 1.2008e-03, -9.3034e-04, -3.0980e-03, -1.5012e-03,\n -5.2173e-05, 1.7408e-04, 1.9751e-03, 3.1282e-04, 1.6241e-03,\n -7.6673e-04, 3.4086e-04, -2.6384e-03, -4.8475e-03, -2.4161e-03,\n 1.1833e-03, -1.5770e-03, 1.5403e-04, -9.1986e-04, 1.0524e-03,\n -1.9944e-03, -7.0337e-04, 1.8231e-03, -2.6331e-04, 6.2168e-04,\n -2.2057e-05, -1.4172e-04, 9.7819e-04, 2.1654e-03, -2.1981e-03],\n device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([2.0154e-05, 8.2802e-05, 4.1377e-05, 5.8201e-05, 2.7666e-07, 1.3371e-04,\n 6.3215e-05, 7.5467e-05, 6.0311e-05, 2.5713e-05, 6.4536e-05, 9.8062e-08,\n 1.2011e-04, 2.5607e-05, 9.6979e-05, 2.6946e-05, 7.4975e-05, 6.6907e-05,\n 7.2662e-05, 9.0838e-05, 3.8164e-05, 2.2901e-05, 5.5431e-05, 7.3976e-05,\n 6.0370e-05, 2.5836e-05, 5.7701e-05, 2.7076e-05, 7.4471e-05, 5.1125e-05,\n 5.3557e-05, 9.6772e-05, 7.3849e-05, 3.0324e-05, 5.3788e-05, 3.3623e-05,\n 6.3435e-05, 1.4947e-05, 1.0229e-04, 5.6720e-05, 5.7996e-05, 3.1101e-05,\n 3.6252e-05, 2.1665e-05, 9.5999e-05, 1.2943e-04, 4.6959e-05, 1.4945e-05,\n 4.3585e-05, 8.5697e-05, 6.8489e-05, 3.6036e-05, 1.7792e-05, 9.1431e-05,\n 8.5387e-05, 1.4333e-04, 5.6405e-05, 7.0258e-05, 3.6230e-05, 4.2322e-05,\n 5.9743e-05, 9.8703e-05, 2.8072e-05, 2.3357e-07, 8.3855e-05, 1.1213e-04,\n 2.0748e-05, 4.1405e-08, 5.7863e-05, 8.0638e-05, 2.3171e-05, 7.0682e-05,\n 1.2494e-04, 3.5240e-05, 4.0836e-05, 4.4009e-05, 1.0840e-05, 6.2528e-05,\n 7.3188e-05, 5.3101e-05, 2.8500e-05, 4.8022e-05, 1.3270e-04, 7.1345e-05,\n 9.8039e-05, 4.9880e-05, 7.4288e-05, 7.5197e-05, 1.2188e-04, 6.2601e-09,\n 4.7078e-05, 4.5902e-05, 8.0037e-05, 4.5587e-05, 6.4892e-05, 5.7550e-05,\n 3.9802e-05, 8.4359e-05, 4.4066e-05, 6.1808e-05, 1.3663e-05, 1.2267e-04,\n 1.5534e-04, 2.7016e-05, 2.4059e-05, 5.5049e-05, 5.8961e-05, 5.2122e-05,\n 4.9386e-05, 6.1050e-05, 5.1711e-05, 7.4730e-05, 6.7354e-05, 3.2563e-05,\n 1.8646e-05, 6.6369e-05, 8.4351e-06, 2.6946e-05, 6.5055e-05, 2.6992e-05,\n 7.2243e-05, 4.6647e-05, 2.3662e-05, 2.3980e-05, 4.0668e-05, 3.4866e-05,\n 9.6333e-05, 8.3457e-05, 6.5208e-05, 7.9298e-05, 3.9412e-05, 3.7890e-05,\n 1.7838e-05, 6.4214e-05, 5.1830e-05, 2.0711e-05, 8.2491e-05, 3.8297e-05,\n 7.6388e-05, 6.5668e-05, 5.8247e-05, 2.6719e-05, 3.0070e-05, 4.6429e-05,\n 2.0942e-05, 5.2430e-05, 3.9129e-05, 3.4937e-05, 2.9151e-05, 8.2376e-05,\n 1.2240e-04, 2.2993e-05, 5.8033e-06, 2.3480e-05, 7.5797e-05, 5.0620e-05,\n 3.0101e-05, 1.5111e-05, 5.9141e-05, 1.1922e-04, 4.3985e-05, 7.0778e-05,\n 2.1452e-05, 2.9974e-05, 5.9602e-05, 7.0787e-05, 1.0572e-05, 5.9085e-05,\n 4.8838e-05, 3.3771e-05, 1.4795e-03, 2.8343e-05, 6.7596e-05, 2.5200e-05,\n 1.1581e-04, 7.6340e-05, 1.8768e-05, 7.8852e-05, 4.3319e-05, 4.3013e-05,\n 6.0303e-05, 1.0362e-04, 3.6780e-05, 6.6169e-05, 4.8046e-05, 8.7605e-05,\n 8.5679e-05, 2.4497e-05, 3.8737e-05, 5.7655e-05, 1.1327e-04, 2.0221e-05,\n 2.5678e-05, 3.9036e-05, 5.9405e-05, 7.4863e-05, 1.3981e-04, 4.9663e-05,\n 6.7746e-05, 9.4101e-05, 2.5562e-05, 5.0306e-05, 5.7220e-05, 5.8151e-05,\n 9.1796e-05, 2.5666e-05, 8.7783e-05, 1.1497e-07, 6.5675e-05, 5.7744e-05,\n 3.5179e-05, 6.6260e-05, 1.0486e-04, 2.4144e-05, 2.4743e-05, 8.4151e-05,\n 4.0905e-05, 7.6226e-06, 4.5574e-05, 1.8996e-05, 3.8531e-05, 4.9739e-05,\n 1.6613e-05, 4.3763e-05, 8.9930e-05, 3.9020e-05, 2.0102e-05, 3.1018e-05,\n 4.9381e-05, 2.1246e-05, 1.3685e-05, 3.9440e-05, 5.4019e-05, 4.4976e-05,\n 2.5335e-05, 3.5664e-05, 6.5777e-05, 1.5524e-05, 3.3335e-05, 7.4262e-05,\n 6.0234e-05, 3.4098e-05, 7.6233e-05, 7.9724e-05, 4.2272e-05, 3.9750e-05,\n 6.4428e-05, 4.7021e-05, 2.2019e-05, 9.3759e-05, 1.0903e-04, 6.2591e-05,\n 4.2635e-05, 7.1636e-05, 3.0634e-05, 9.3350e-05, 9.6588e-05, 3.3782e-05,\n 2.4793e-05, 1.7948e-05, 5.6969e-05, 3.0138e-05, 1.7028e-04, 2.0664e-05,\n 5.7346e-05, 9.7025e-05, 7.4213e-05, 4.9465e-05, 4.2274e-05, 2.0708e-05,\n 9.8778e-05, 4.6467e-05, 1.0464e-04, 8.2468e-05, 1.1371e-04, 1.9350e-05,\n 3.8852e-05, 6.6679e-05, 2.2131e-05, 4.3382e-05, 8.4914e-05, 5.0501e-05,\n 5.8088e-05, 6.3481e-05, 7.3401e-05, 3.2443e-05, 2.5234e-05, 1.5137e-05,\n 1.3448e-05, 5.4638e-05, 2.2688e-05, 7.5122e-05, 1.4985e-05, 4.4604e-05,\n 3.6136e-05, 2.1323e-05, 5.6948e-05, 1.3312e-04, 5.7456e-05, 1.9938e-05,\n 6.3773e-05, 5.8517e-05, 4.4364e-05, 7.9548e-05, 3.1828e-05, 2.7609e-05,\n 1.3751e-05, 8.3910e-05, 7.5579e-05, 1.6606e-04, 2.7323e-05, 9.2713e-05,\n 1.8283e-05, 4.1909e-05, 5.8471e-05, 9.0405e-05, 5.5889e-05, 3.7126e-05,\n 6.3142e-05, 1.3276e-04, 1.3477e-04, 3.4052e-05, 3.9605e-05, 3.9283e-05,\n 5.4168e-05, 1.5002e-05, 9.8671e-05, 8.7662e-05, 3.8084e-05, 8.2566e-05,\n 4.6938e-05, 3.6747e-05, 3.4818e-05, 5.3940e-05, 9.5945e-05, 5.8190e-05,\n 9.2539e-05, 6.1749e-05, 7.7528e-05, 9.4096e-05, 9.1491e-05, 1.8077e-05,\n 7.2332e-05, 6.9511e-05, 8.0511e-05, 6.8578e-05, 9.9081e-05, 1.0740e-04,\n 1.4787e-05, 4.2240e-05, 6.8638e-05, 7.0029e-05, 5.8209e-05, 7.0207e-05,\n 1.2905e-04, 5.1791e-05, 1.3230e-04, 3.1328e-05, 3.7703e-05, 8.7016e-05,\n 2.7134e-05, 5.1551e-05, 3.4037e-05, 9.6508e-05, 5.0870e-05, 1.5455e-05,\n 1.1583e-05, 3.4642e-05, 4.9443e-05, 1.3413e-04, 8.9876e-05, 7.4853e-05,\n 3.3722e-05, 5.2104e-05, 3.0012e-05, 6.1226e-05, 4.5965e-05, 2.0755e-05,\n 6.0192e-05, 1.6045e-04, 4.2866e-05, 7.2082e-05, 4.9185e-05, 2.7262e-05,\n 2.3920e-05, 4.6540e-05, 9.5091e-05, 4.8232e-05, 5.8458e-05, 9.3518e-06,\n 2.1562e-05, 4.6629e-05, 2.5563e-05, 3.2983e-05, 4.1256e-05, 5.3837e-05,\n 1.9471e-05, 2.6884e-05, 2.1591e-05, 4.7164e-05, 1.1849e-04, 1.2639e-04,\n 5.9991e-05, 6.1959e-05, 5.2603e-05, 8.3766e-05, 1.7410e-04, 1.1270e-04,\n 4.3404e-05, 4.2885e-05, 7.8638e-05, 3.2916e-05, 9.5563e-05, 3.0930e-05,\n 7.1301e-06, 6.1772e-05, 7.1582e-05, 6.4861e-05, 1.5175e-05, 4.7819e-05,\n 8.0532e-05, 4.7885e-05, 4.6217e-05, 5.2978e-05, 1.5163e-05, 5.7309e-05,\n 5.0127e-05, 5.6288e-05, 7.5206e-05, 3.0704e-05, 5.6026e-05, 7.0860e-05,\n 9.4935e-08, 3.8167e-05, 1.7378e-05, 1.1109e-04, 8.9561e-05, 1.0129e-04,\n 5.2488e-05, 1.9742e-04, 4.7105e-05, 4.1449e-05, 8.4381e-05, 8.5691e-05,\n 5.8117e-05, 7.7153e-05, 8.3289e-05, 9.4483e-05, 6.0720e-05, 4.3580e-05,\n 4.8150e-05, 7.2913e-05, 3.3791e-05, 3.9347e-05, 7.4294e-05, 7.9486e-05,\n 3.8494e-05, 5.4305e-05, 3.4612e-05, 2.2176e-05, 3.2542e-08, 4.3827e-05,\n 4.6214e-05, 2.2613e-05, 8.6411e-05, 7.4949e-05, 1.0844e-04, 4.8134e-05,\n 2.5165e-05, 5.1375e-05, 4.4508e-05, 6.4718e-05, 2.3473e-05, 6.8435e-05,\n 3.0449e-05, 1.6608e-04, 4.7180e-05, 4.0033e-05, 4.8861e-05, 7.8811e-05,\n 1.1130e-04, 5.9936e-05, 3.7847e-05, 1.2143e-04, 1.0472e-04, 3.4603e-05,\n 6.8928e-05, 3.9774e-05, 5.2932e-05, 8.9824e-05, 7.6342e-05, 3.7765e-05,\n 5.9049e-05, 4.7249e-05, 4.2584e-05, 6.6110e-05, 3.2549e-05, 4.6470e-05,\n 1.6718e-05, 9.8433e-05, 3.9022e-05, 4.3184e-05, 2.5180e-05, 1.0893e-04,\n 6.1968e-05, 1.0441e-04, 6.4247e-05, 6.1753e-05, 2.6640e-05, 4.1650e-05,\n 4.2180e-05, 5.0685e-05, 6.3948e-05, 5.8474e-05, 1.8964e-04, 8.8770e-05,\n 4.5382e-05, 6.4377e-05, 9.1724e-05, 2.1386e-05, 1.3245e-04, 7.1046e-05,\n 9.4482e-05, 3.4258e-05, 1.6250e-04, 2.5424e-05, 9.6437e-05, 2.9211e-05,\n 5.8509e-05, 1.1030e-05, 8.0235e-05, 7.5738e-05, 3.1076e-05, 3.6277e-05,\n 3.8421e-05, 9.6752e-05, 8.3197e-05, 9.7976e-05, 1.7442e-04, 1.2302e-04,\n 8.3615e-05, 7.3013e-05, 4.0972e-05, 8.6529e-05, 4.4829e-05, 1.0420e-04,\n 1.5958e-05, 3.2191e-05, 4.7364e-05, 4.5041e-05, 2.6920e-05, 1.0833e-04,\n 3.3251e-05, 7.6877e-05, 9.5259e-05, 4.3558e-05, 4.5480e-05, 1.0422e-04,\n 9.5622e-05, 7.3834e-05, 2.5551e-05, 8.4178e-05, 8.3270e-05, 1.3172e-04,\n 1.5012e-05, 2.9435e-05, 5.2833e-05, 7.8708e-05, 2.8242e-05, 1.4015e-05,\n 3.4560e-05, 2.4896e-05, 7.3177e-05, 4.1156e-05, 4.3540e-05, 5.5396e-05,\n 5.9097e-05, 2.8384e-05, 5.0072e-05, 3.1305e-05, 2.2703e-05, 1.3928e-05,\n 4.4112e-05, 6.3030e-05, 5.6137e-05, 7.4179e-05, 2.9836e-05, 6.9072e-05,\n 9.2407e-05, 1.2298e-04, 3.2462e-05, 2.0662e-05, 2.2276e-05, 3.6505e-05,\n 6.7992e-05, 8.7684e-05, 7.4997e-05, 7.6316e-05, 2.8529e-05, 4.6959e-05,\n 3.7555e-05, 8.9426e-05, 4.5815e-05, 2.5308e-05, 2.4237e-05, 2.2442e-05,\n 3.2402e-05, 1.5918e-04, 4.5283e-05, 6.3682e-05, 7.3752e-05, 4.3061e-05,\n 5.2020e-05, 1.4130e-04, 6.8139e-05, 4.7804e-05, 2.5954e-05, 2.1221e-05,\n 2.9560e-05, 3.9971e-05, 2.7097e-05, 6.2107e-05, 1.0919e-04, 7.1817e-05,\n 7.7619e-05, 4.3607e-05, 1.0661e-04, 5.4150e-05, 2.6929e-05, 3.0819e-05,\n 6.0811e-05, 5.6102e-05, 6.6251e-05, 6.3621e-05, 4.6197e-05, 3.9022e-05,\n 6.5501e-05, 3.2605e-05, 7.9737e-05, 1.5649e-05], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(3754.)",
|
| 17 |
+
"exp_avg": "tensor([[-5.7145e-06, 1.1063e-05, 2.0852e-05, ..., -3.4128e-05,\n -1.9831e-05, 8.1359e-06],\n [-2.8538e-05, 3.3111e-05, 1.1532e-05, ..., 2.4668e-06,\n -6.9036e-05, 2.2730e-05],\n [ 1.2580e-05, -3.5312e-05, 2.8274e-05, ..., -1.0950e-05,\n -7.7000e-06, 1.3544e-06],\n ...,\n [ 1.4112e-05, 5.2714e-05, -3.8939e-06, ..., -2.0392e-05,\n 4.5299e-05, 4.7248e-06],\n [-4.3884e-05, -1.8464e-05, 2.4305e-05, ..., -2.0412e-05,\n -1.4639e-06, -2.3918e-05],\n [-3.3159e-05, -2.7091e-05, -4.7088e-05, ..., 4.6097e-05,\n 1.4003e-05, -1.4675e-06]], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([[7.9731e-09, 1.5778e-08, 8.4568e-09, ..., 8.5942e-09, 1.9911e-08,\n 8.4072e-09],\n [2.4406e-08, 3.0611e-08, 2.3650e-08, ..., 2.4743e-08, 2.6419e-08,\n 2.0760e-08],\n [2.9345e-08, 2.1444e-08, 2.9055e-08, ..., 1.7540e-08, 2.5749e-08,\n 1.3699e-08],\n ...,\n [1.3579e-08, 3.5188e-08, 6.4344e-08, ..., 1.5934e-08, 3.1071e-08,\n 1.6421e-08],\n [2.4944e-08, 3.0704e-08, 4.8105e-08, ..., 1.7044e-08, 2.0681e-08,\n 2.2106e-08],\n [1.7889e-08, 2.6658e-08, 2.3520e-08, ..., 2.2739e-08, 2.7569e-08,\n 8.4465e-09]], device='cuda:0')"
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"param_groups": [
|
| 22 |
+
{
|
| 23 |
+
"lr": 0.00975530705321762,
|
| 24 |
+
"name": "scale_256",
|
| 25 |
+
"betas": [
|
| 26 |
+
0.9,
|
| 27 |
+
0.999
|
| 28 |
+
],
|
| 29 |
+
"eps": 1e-08,
|
| 30 |
+
"weight_decay": 1e-05,
|
| 31 |
+
"amsgrad": false,
|
| 32 |
+
"maximize": false,
|
| 33 |
+
"foreach": null,
|
| 34 |
+
"capturable": false,
|
| 35 |
+
"differentiable": false,
|
| 36 |
+
"fused": null,
|
| 37 |
+
"decoupled_weight_decay": true,
|
| 38 |
+
"initial_lr": 0.01,
|
| 39 |
+
"params": [
|
| 40 |
+
0,
|
| 41 |
+
1,
|
| 42 |
+
2
|
| 43 |
+
]
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"lr": 0.00975530705321762,
|
| 47 |
+
"name": "scale_512",
|
| 48 |
+
"betas": [
|
| 49 |
+
0.9,
|
| 50 |
+
0.999
|
| 51 |
+
],
|
| 52 |
+
"eps": 1e-08,
|
| 53 |
+
"weight_decay": 1e-05,
|
| 54 |
+
"amsgrad": false,
|
| 55 |
+
"maximize": false,
|
| 56 |
+
"foreach": null,
|
| 57 |
+
"capturable": false,
|
| 58 |
+
"differentiable": false,
|
| 59 |
+
"fused": null,
|
| 60 |
+
"decoupled_weight_decay": true,
|
| 61 |
+
"initial_lr": 0.01,
|
| 62 |
+
"params": [
|
| 63 |
+
3,
|
| 64 |
+
4,
|
| 65 |
+
5
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"lr": 0.00975530705321762,
|
| 70 |
+
"name": "scale_768",
|
| 71 |
+
"betas": [
|
| 72 |
+
0.9,
|
| 73 |
+
0.999
|
| 74 |
+
],
|
| 75 |
+
"eps": 1e-08,
|
| 76 |
+
"weight_decay": 1e-05,
|
| 77 |
+
"amsgrad": false,
|
| 78 |
+
"maximize": false,
|
| 79 |
+
"foreach": null,
|
| 80 |
+
"capturable": false,
|
| 81 |
+
"differentiable": false,
|
| 82 |
+
"fused": null,
|
| 83 |
+
"decoupled_weight_decay": true,
|
| 84 |
+
"initial_lr": 0.01,
|
| 85 |
+
"params": [
|
| 86 |
+
6,
|
| 87 |
+
7,
|
| 88 |
+
8
|
| 89 |
+
]
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"lr": 0.00975530705321762,
|
| 93 |
+
"name": "scale_1024",
|
| 94 |
+
"betas": [
|
| 95 |
+
0.9,
|
| 96 |
+
0.999
|
| 97 |
+
],
|
| 98 |
+
"eps": 1e-08,
|
| 99 |
+
"weight_decay": 1e-05,
|
| 100 |
+
"amsgrad": false,
|
| 101 |
+
"maximize": false,
|
| 102 |
+
"foreach": null,
|
| 103 |
+
"capturable": false,
|
| 104 |
+
"differentiable": false,
|
| 105 |
+
"fused": null,
|
| 106 |
+
"decoupled_weight_decay": true,
|
| 107 |
+
"initial_lr": 0.01,
|
| 108 |
+
"params": [
|
| 109 |
+
9,
|
| 110 |
+
10,
|
| 111 |
+
11
|
| 112 |
+
]
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"lr": 0.00975530705321762,
|
| 116 |
+
"name": "scale_1280",
|
| 117 |
+
"betas": [
|
| 118 |
+
0.9,
|
| 119 |
+
0.999
|
| 120 |
+
],
|
| 121 |
+
"eps": 1e-08,
|
| 122 |
+
"weight_decay": 1e-05,
|
| 123 |
+
"amsgrad": false,
|
| 124 |
+
"maximize": false,
|
| 125 |
+
"foreach": null,
|
| 126 |
+
"capturable": false,
|
| 127 |
+
"differentiable": false,
|
| 128 |
+
"fused": null,
|
| 129 |
+
"decoupled_weight_decay": true,
|
| 130 |
+
"initial_lr": 0.01,
|
| 131 |
+
"params": [
|
| 132 |
+
12,
|
| 133 |
+
13,
|
| 134 |
+
14
|
| 135 |
+
]
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"lr": 0.004877665762479736,
|
| 139 |
+
"name": "fusion",
|
| 140 |
+
"betas": [
|
| 141 |
+
0.9,
|
| 142 |
+
0.999
|
| 143 |
+
],
|
| 144 |
+
"eps": 1e-08,
|
| 145 |
+
"weight_decay": 1e-05,
|
| 146 |
+
"amsgrad": false,
|
| 147 |
+
"maximize": false,
|
| 148 |
+
"foreach": null,
|
| 149 |
+
"capturable": false,
|
| 150 |
+
"differentiable": false,
|
| 151 |
+
"fused": null,
|
| 152 |
+
"decoupled_weight_decay": true,
|
| 153 |
+
"initial_lr": 0.005,
|
| 154 |
+
"params": [
|
| 155 |
+
15,
|
| 156 |
+
16,
|
| 157 |
+
17,
|
| 158 |
+
18,
|
| 159 |
+
19,
|
| 160 |
+
20,
|
| 161 |
+
21,
|
| 162 |
+
22,
|
| 163 |
+
23,
|
| 164 |
+
24,
|
| 165 |
+
25,
|
| 166 |
+
26,
|
| 167 |
+
27,
|
| 168 |
+
28,
|
| 169 |
+
29,
|
| 170 |
+
30,
|
| 171 |
+
31,
|
| 172 |
+
32,
|
| 173 |
+
33,
|
| 174 |
+
34,
|
| 175 |
+
35,
|
| 176 |
+
36,
|
| 177 |
+
37,
|
| 178 |
+
38,
|
| 179 |
+
39,
|
| 180 |
+
40,
|
| 181 |
+
41,
|
| 182 |
+
42,
|
| 183 |
+
43,
|
| 184 |
+
44,
|
| 185 |
+
45,
|
| 186 |
+
46,
|
| 187 |
+
47,
|
| 188 |
+
48
|
| 189 |
+
]
|
| 190 |
+
}
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
"scheduler_state_dict": {
|
| 194 |
+
"T_0": 10,
|
| 195 |
+
"T_i": 10,
|
| 196 |
+
"T_mult": 2,
|
| 197 |
+
"eta_min": 1e-06,
|
| 198 |
+
"T_cur": 1,
|
| 199 |
+
"base_lrs": [
|
| 200 |
+
0.01,
|
| 201 |
+
0.01,
|
| 202 |
+
0.01,
|
| 203 |
+
0.01,
|
| 204 |
+
0.01,
|
| 205 |
+
0.005
|
| 206 |
+
],
|
| 207 |
+
"last_epoch": 1,
|
| 208 |
+
"_step_count": 0,
|
| 209 |
+
"_is_initial": false,
|
| 210 |
+
"_get_lr_called_within_step": false,
|
| 211 |
+
"_last_lr": [
|
| 212 |
+
0.00975530705321762,
|
| 213 |
+
0.00975530705321762,
|
| 214 |
+
0.00975530705321762,
|
| 215 |
+
0.00975530705321762,
|
| 216 |
+
0.00975530705321762,
|
| 217 |
+
0.004877665762479736
|
| 218 |
+
]
|
| 219 |
+
},
|
| 220 |
+
"metrics": {
|
| 221 |
+
"best_val_acc": 62.524,
|
| 222 |
+
"best_epoch": 0,
|
| 223 |
+
"scale_accuracies": {
|
| 224 |
+
"256": 62.524
|
| 225 |
+
},
|
| 226 |
+
"training_history": {
|
| 227 |
+
"epochs": [
|
| 228 |
+
1
|
| 229 |
+
],
|
| 230 |
+
"train_loss": [
|
| 231 |
+
2.9751985085156605
|
| 232 |
+
],
|
| 233 |
+
"train_acc": [
|
| 234 |
+
56.42811072509152
|
| 235 |
+
],
|
| 236 |
+
"val_acc": [
|
| 237 |
+
62.524
|
| 238 |
+
],
|
| 239 |
+
"scale_accs": {
|
| 240 |
+
"256": [
|
| 241 |
+
62.524
|
| 242 |
+
]
|
| 243 |
+
},
|
| 244 |
+
"lr": [
|
| 245 |
+
0.00975530705321762
|
| 246 |
+
]
|
| 247 |
+
}
|
| 248 |
+
},
|
| 249 |
+
"train_config": {
|
| 250 |
+
"name": "david_training",
|
| 251 |
+
"run_id": "20251012_221046",
|
| 252 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 253 |
+
"model_variant": [
|
| 254 |
+
"clip_vit_b16",
|
| 255 |
+
"clip_vit_laion_b32",
|
| 256 |
+
"clip_vit_b32"
|
| 257 |
+
],
|
| 258 |
+
"num_classes": 1000,
|
| 259 |
+
"preset": "high_accuracy",
|
| 260 |
+
"custom_config_path": null,
|
| 261 |
+
"num_classes_override": null,
|
| 262 |
+
"use_belly_override": null,
|
| 263 |
+
"belly_expand_override": null,
|
| 264 |
+
"progressive_training_override": true,
|
| 265 |
+
"scale_warmup_epochs_override": {
|
| 266 |
+
"256": 0,
|
| 267 |
+
"512": 1,
|
| 268 |
+
"768": 2,
|
| 269 |
+
"1024": 3,
|
| 270 |
+
"1280": 4
|
| 271 |
+
},
|
| 272 |
+
"num_epochs": 10,
|
| 273 |
+
"batch_size": 1024,
|
| 274 |
+
"learning_rate": 0.01,
|
| 275 |
+
"weight_decay": 1e-05,
|
| 276 |
+
"warmup_epochs": 3,
|
| 277 |
+
"use_rose_loss": true,
|
| 278 |
+
"rose_initial_weight": 0.2,
|
| 279 |
+
"rose_max_weight": 0.8,
|
| 280 |
+
"rose_weight_schedule": "adaptive",
|
| 281 |
+
"use_cayley_loss": false,
|
| 282 |
+
"cayley_weight": 0.01,
|
| 283 |
+
"scale_loss_balance": null,
|
| 284 |
+
"use_mixed_precision": false,
|
| 285 |
+
"gradient_clip": 10.0,
|
| 286 |
+
"scheduler_type": "cosine_restarts",
|
| 287 |
+
"min_lr": 1e-06,
|
| 288 |
+
"freeze_strategy": "never",
|
| 289 |
+
"freeze_threshold": 90.0,
|
| 290 |
+
"unfreeze_on_plateau": true,
|
| 291 |
+
"patience": 10,
|
| 292 |
+
"track_gradients": true,
|
| 293 |
+
"gradient_scale_threshold": 1e-05,
|
| 294 |
+
"gradient_scale_multiplier": 10.0,
|
| 295 |
+
"log_interval": 50,
|
| 296 |
+
"val_interval": 1,
|
| 297 |
+
"save_interval": 5,
|
| 298 |
+
"log_fusion_weights": true,
|
| 299 |
+
"log_loss_components": true,
|
| 300 |
+
"save_format": "safetensors",
|
| 301 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 302 |
+
"upload_to_hub": true,
|
| 303 |
+
"base_dir": "./david_training",
|
| 304 |
+
"num_workers": 10,
|
| 305 |
+
"pin_memory": true,
|
| 306 |
+
"prefetch_factor": 4,
|
| 307 |
+
"persistent_workers": true
|
| 308 |
+
}
|
| 309 |
+
}
|