AbstractPhil commited on
Commit
037b8e8
·
verified ·
1 Parent(s): c62a4c5

Update best_model_acc66.43_metadata.json - Run 20251012_210041

Browse files
weights/David-partial_shared-hierarchical_tree/20251012_210041/best_model_acc66.43_metadata.json ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(3754.)",
7
+ "exp_avg": "tensor([[ 6.4262e-05, -8.6894e-05, -1.2100e-05, ..., -9.9387e-05,\n 2.9941e-05, -1.9936e-05],\n [-3.8908e-05, -4.6575e-05, 4.8782e-05, ..., -2.9407e-07,\n -3.4625e-06, -3.3108e-05],\n [-4.7162e-05, 1.2523e-04, 1.3178e-05, ..., -9.5158e-07,\n -5.9426e-05, -2.3028e-05],\n ...,\n [ 3.7748e-05, 3.6945e-05, -5.1337e-05, ..., 1.2867e-04,\n 2.7052e-05, -1.8245e-05],\n [ 3.3787e-05, 1.8787e-05, -3.3004e-05, ..., 7.0335e-05,\n -2.9664e-05, 1.2666e-05],\n [ 1.0799e-04, -1.1550e-04, 1.5898e-05, ..., 9.3136e-06,\n -1.2529e-05, 1.1595e-05]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.3095e-08, 2.2631e-08, 1.1284e-08, ..., 6.0851e-08, 9.4719e-09,\n 1.3825e-08],\n [2.0988e-08, 1.9430e-08, 1.4248e-08, ..., 1.0167e-07, 1.4430e-08,\n 1.7325e-08],\n [1.1690e-07, 1.0180e-07, 1.9399e-08, ..., 1.8719e-08, 2.1559e-08,\n 1.6780e-08],\n ...,\n [1.9821e-08, 1.9600e-08, 1.4153e-08, ..., 8.6875e-08, 1.2080e-08,\n 1.4721e-08],\n [7.4260e-08, 1.5441e-07, 6.0123e-08, ..., 2.2493e-08, 2.0407e-08,\n 1.7176e-08],\n [8.4096e-08, 8.8266e-08, 4.0683e-08, ..., 4.0201e-08, 2.4109e-08,\n 1.7965e-08]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(3754.)",
12
+ "exp_avg": "tensor([-1.3120e-03, 4.9841e-04, -7.2686e-04, -2.2451e-05, -1.5959e-03,\n 1.5283e-03, 1.2459e-03, -1.9497e-03, 9.2653e-05, 6.6716e-04,\n 4.3605e-04, -1.1578e-03, -3.4612e-04, -3.9959e-04, 9.1409e-04,\n 6.0234e-04, 8.0772e-04, 9.7099e-04, -1.8793e-04, 6.9359e-04,\n -1.2908e-03, 9.8313e-04, -2.5570e-03, 2.3437e-04, 8.8799e-04,\n 1.2099e-04, -1.8960e-03, -2.4187e-03, 4.8296e-04, 1.5857e-03,\n 6.6567e-04, 1.1216e-03, 7.0138e-04, -5.2227e-04, 2.0375e-03,\n 6.0644e-04, 5.8554e-04, -8.5620e-04, -7.9124e-04, -1.7564e-05,\n 1.4031e-03, 4.9310e-04, -7.1622e-04, 3.0777e-04, 5.0693e-04,\n 9.9634e-04, 1.7722e-03, -1.2932e-03, -2.9913e-04, 4.7445e-04,\n 8.5169e-04, -1.6426e-03, 7.9294e-04, 8.3894e-04, -1.2646e-04,\n -3.9255e-04, 1.3146e-04, -1.2534e-03, 6.5862e-04, -1.7282e-04,\n -4.4854e-04, -7.9446e-04, -1.7830e-03, 3.7343e-04, 2.2005e-03,\n -4.1711e-04, 2.1193e-04, 2.4958e-03, -3.0670e-03, 9.1157e-04,\n -9.2169e-04, 2.6449e-04, -2.8742e-04, -1.1037e-03, 2.8178e-04,\n 4.2692e-04, 1.5844e-04, 2.0052e-03, -8.4151e-04, -7.6795e-04,\n 1.1693e-03, -5.7863e-04, -1.5231e-03, 9.1751e-04, -8.0194e-04,\n 1.0114e-04, 6.1798e-05, -1.1398e-03, -1.9073e-03, -3.5391e-04,\n 4.1612e-04, 5.2556e-04, -2.0961e-03, 3.0967e-04, 8.5466e-05,\n 1.0156e-03, 1.2626e-03, -1.3908e-04, -9.6134e-04, 1.6097e-03,\n -6.8470e-04, 1.3213e-03, -4.6042e-04, -8.7000e-04, -3.0142e-04,\n 4.7949e-04, 1.4687e-03, -1.2768e-03, -4.7928e-04, -3.8558e-04,\n -9.0933e-05, -8.6967e-04, -5.3188e-04, 1.4431e-03, -4.1883e-04,\n 1.2389e-03, 7.5317e-04, -5.9185e-04, -6.4775e-04, -1.4747e-03,\n -1.0558e-04, -5.0285e-04, 1.3575e-03, -6.0391e-05, -1.2416e-04,\n 9.3283e-05, -2.3910e-08, -4.8027e-04, 3.2106e-04, -9.1479e-04,\n 6.8058e-06, 5.7545e-04, -1.2717e-03, -5.2738e-04, -7.5607e-04,\n -7.7971e-05, 2.0301e-03, 1.3559e-04, -1.3262e-03, 8.9601e-04,\n 2.9234e-04, 6.1021e-04, 1.5527e-03, -9.3117e-04, -1.0273e-04,\n -4.8272e-04, 1.7094e-04, 1.7388e-03, 1.5457e-05, -9.4098e-04,\n 2.0317e-03, 2.3926e-03, 1.2262e-04, -3.1608e-03, 3.3656e-04,\n 3.7583e-03, 1.7960e-03, -4.7688e-04, -1.0913e-03, 8.9063e-04,\n 1.5557e-03, 1.1706e-04, -9.9001e-04, 6.8384e-04, 2.3629e-03,\n 3.3351e-04, 8.4936e-04, -2.9385e-03, 1.0584e-03, 6.8517e-04,\n -1.9574e-04, -1.4790e-03, -8.0607e-07, -3.6050e-04, 2.2749e-05,\n -1.6665e-03, 1.2876e-03, -1.0609e-03, -1.1323e-04, 1.6027e-04,\n 2.1217e-03, 1.2846e-03, -6.2706e-04, 1.0366e-03, -1.5507e-04,\n 1.8721e-03, 1.7942e-04, -7.5994e-05, -1.5701e-04, 1.6809e-03,\n -1.6934e-03, 3.2430e-04, 1.1745e-03, -9.0790e-04, 1.2763e-03,\n -5.0475e-05, -2.2593e-04, -1.7841e-03, 1.0593e-03, 5.2956e-06,\n -6.9613e-04, -1.0935e-03, -5.8378e-04, 4.2165e-04, 2.3559e-03,\n -9.7191e-04, 6.0388e-04, -4.0465e-04, -5.8604e-04, 6.3396e-04,\n -7.6341e-04, -4.2598e-04, -1.7363e-04, -6.0236e-06, 1.7975e-04,\n 1.5796e-03, -3.0398e-04, -4.5605e-04, -1.2142e-03, -5.5193e-04,\n 1.1068e-05, 6.3786e-06, -4.3727e-04, -7.9194e-04, -5.2852e-04,\n -2.3905e-03, -8.4193e-04, -3.9701e-04, -1.4959e-04, 1.4861e-04,\n -1.1523e-04, 7.3462e-04, 2.5997e-03, 5.9732e-04, 6.9707e-05,\n -5.2324e-04, -5.7124e-04, -6.4347e-04, 1.6059e-03, 2.9774e-04,\n -2.4193e-03, -3.7492e-05, 1.6667e-03, -6.2603e-04, -3.3442e-04,\n 3.5534e-04, -4.1860e-04, 6.7122e-05, -6.6189e-04, 1.1760e-03,\n -5.7717e-06, 8.0166e-04, 1.3023e-04, 9.1386e-04, 4.8996e-04,\n -2.8884e-04, -2.8704e-04, -1.8329e-03, -1.7503e-03, -1.0529e-03,\n -7.3971e-05, -5.4724e-04, 1.2648e-03, 5.6052e-45, 2.5099e-04,\n 6.3271e-04, 2.2118e-04, 5.6395e-04, 1.0143e-03, -1.3474e-03,\n -2.0507e-03, 7.3624e-04, 5.9858e-04, 1.8462e-05, -7.4670e-04,\n -6.7246e-04, 4.7454e-04, -2.2755e-03, 4.7345e-04, -1.0183e-03,\n -1.1981e-03, 4.2699e-04, 7.3664e-04, 6.9695e-04, -2.7144e-03,\n 4.7768e-05, 4.1807e-04, 4.2617e-04, -2.3149e-03, -8.1693e-04,\n -6.0155e-04, 1.3679e-03, -1.2944e-03, -6.9830e-04, 2.7356e-03,\n -4.6196e-04, -1.7206e-03, -8.9215e-04, -7.0483e-04, 7.6692e-05,\n -7.2550e-04, -7.2335e-04, -5.3244e-04, -3.6794e-04, -1.4521e-03,\n -7.0849e-04, 1.8791e-04, 4.5511e-04, 5.7155e-04, 3.6927e-05,\n 4.2779e-04, 1.8217e-03, 1.6890e-03, 8.2222e-04, -9.8572e-04,\n 6.5983e-04, 4.0440e-04, 1.9121e-04, -5.6095e-04, -7.9284e-04,\n -6.8550e-04, 1.2744e-03, 3.4695e-04, -1.3334e-03, -2.4604e-04,\n -9.3608e-04, -8.5349e-05, -2.6899e-04, 6.0673e-04, 2.7006e-04,\n -5.5532e-04, -1.5435e-03, -2.2207e-03, 1.2125e-04, 2.6392e-03,\n 4.3836e-07, 9.4169e-05, 1.2660e-03, -7.2202e-04, -8.8026e-04,\n -7.9350e-04, 5.8545e-04, 1.8975e-04, -4.0200e-05, 4.8168e-03,\n -2.2198e-13, -1.6682e-04, -2.0652e-04, 1.3972e-04, 1.2035e-03,\n -1.2259e-03, 4.7175e-05, 9.1467e-04, -1.0835e-04, -1.0709e-03,\n 4.6433e-04, 5.8079e-04, -7.0403e-04, -1.1905e-03, -1.8993e-03,\n -1.5491e-03, -4.6218e-04, -3.2737e-03, -2.6591e-04, 1.3453e-03,\n -4.8551e-04, 4.0584e-04, -6.1602e-04, -8.0723e-04, 2.5071e-04,\n -1.0160e-03, 7.3548e-04, -1.8734e-03, 8.2182e-05, -8.6889e-04,\n -4.5218e-04, -1.2020e-04, 6.9185e-04, -1.6538e-04, -4.4976e-04,\n 1.4196e-03, 6.2765e-04, -4.8830e-04, 7.6615e-04, 1.2031e-03,\n 1.0876e-04, 4.1267e-04, 1.2118e-03, 3.8457e-04, -1.5514e-04,\n -8.1685e-04, -9.5236e-04, -2.0100e-04, -1.6478e-03, -1.2561e-03,\n 5.2424e-04, -3.1405e-04, 7.5221e-04, 2.7281e-04, -1.0434e-03,\n -4.9355e-04, 7.4956e-04, -4.2972e-04, 7.7182e-04, -1.0631e-03,\n 2.0706e-03, 4.2381e-04, -1.0364e-03, 2.2164e-05, 6.0623e-04,\n 1.4283e-03, 3.6608e-04, 2.1639e-03, 2.2202e-04, -5.5188e-04,\n 2.2756e-04, -1.0223e-03, -2.3965e-03, -2.0103e-03, 7.9131e-04,\n 2.3403e-04, 6.8754e-04, 1.4194e-03, 1.0460e-03, 1.6491e-03,\n -1.5717e-03, 1.4432e-03, -1.8832e-03, 1.6681e-03, 7.6177e-05,\n -1.9791e-03, 2.2841e-04, 5.3761e-04, -6.0228e-06, 2.0779e-03,\n 1.2058e-03, 2.8800e-04, -1.6243e-03, -1.1245e-03, -1.2457e-06,\n 2.3990e-04, -2.2101e-03, -1.5338e-04, 4.3511e-04, 1.3281e-04,\n -4.7138e-04, 9.0856e-04, 8.1182e-04, 2.1372e-05, -9.0988e-05,\n -6.7654e-04, -1.3857e-05, -7.1471e-05, 1.0018e-03, 7.3250e-04,\n 5.0697e-04, 6.9857e-04, 1.3875e-03, -5.2408e-05, -6.1244e-04,\n 5.3837e-05, -5.8232e-06, 7.6490e-04, -9.2098e-04, 9.2010e-04,\n -6.8464e-04, -1.5153e-03, 2.6640e-03, -1.4656e-03, -7.6737e-04,\n 3.1769e-03, -1.3069e-05, 1.1701e-03, -1.6124e-03, -1.0659e-03,\n -2.4221e-04, 1.1554e-03, 4.4352e-04, 2.8130e-04, 1.3193e-04,\n -1.4548e-03, 1.4571e-03, -1.2432e-03, 1.6350e-03, -8.9248e-04,\n 2.0337e-04, -1.1455e-03, 1.2258e-03, 7.5951e-04, 5.8380e-04,\n 3.9911e-05, 1.5329e-03, 6.0869e-04, 4.9346e-04, -5.0452e-04,\n 9.2521e-04, -1.1165e-03, 1.2984e-03, -4.1798e-04, -2.2371e-03,\n 1.5326e-04, -1.8119e-03, -1.9654e-04, 1.0185e-03, -9.1514e-04,\n -2.9633e-04, 3.7527e-04, 2.5405e-03, 1.7551e-04, 6.5590e-04,\n -4.1922e-04, -3.0210e-04, -3.6863e-04, -1.7394e-03, 9.3426e-04,\n -2.0396e-04, 7.7838e-04, 8.6222e-04, 1.8995e-04, -2.8609e-04,\n 2.6846e-04, -3.4180e-05, 1.9451e-03, -5.4436e-04, -1.3234e-04,\n -1.1514e-03, -9.1617e-04, -3.6290e-04, 1.7782e-03, 6.6617e-04,\n 6.9576e-04, -1.3654e-03, 4.0890e-04, 3.0889e-04, -7.1621e-04,\n -1.8115e-03, 7.0870e-04, 1.2155e-03, 3.1072e-04, -3.6156e-04,\n 1.4115e-03, 8.1918e-04, -6.3362e-04, -6.8763e-04, 1.7872e-03,\n 9.4439e-04, 3.8448e-04, 2.8310e-04, 3.6675e-04, 6.6634e-04,\n -5.1434e-04, -3.0576e-04, 3.2674e-04, -1.6553e-04, 8.9446e-04,\n 2.4584e-04, 3.0878e-03, 4.9051e-04, 1.0795e-03, -1.3463e-03,\n 9.7571e-04, 1.1820e-03, -1.3396e-03, 1.8084e-03, -6.2742e-04,\n 4.6601e-05, -1.6293e-03, -1.4900e-03, 3.7929e-04, 3.2177e-04,\n -2.7568e-04, -2.0540e-03, -2.7910e-05, 9.1237e-04, 5.1372e-04,\n -2.6598e-03, 1.0485e-03, 6.3924e-05, -8.0280e-05, 7.3452e-05,\n 6.3778e-04, 3.9552e-04, 2.8462e-03, 1.1162e-03, -3.5320e-04,\n 2.0916e-04, -1.5322e-04, 4.1858e-05, -9.5183e-04, -2.0288e-04,\n 1.2508e-03, -3.5717e-04, -4.7713e-04, 1.9132e-03, 2.3420e-03,\n 2.3170e-03, -4.1532e-04, 1.0284e-03, 7.7939e-04, 5.7323e-04,\n 1.2005e-03, -1.5245e-03, -3.3495e-04, 3.3415e-04, -8.6547e-04,\n -1.2842e-04, 2.6814e-04, 2.3335e-04, -1.3016e-03, -2.3380e-04,\n 3.3397e-03, -7.9543e-04, 5.2367e-04, -1.3844e-03, -1.8095e-03,\n 4.1218e-04, 1.9837e-04, 8.8360e-04, 7.9050e-04, 1.8489e-03,\n 2.5243e-04, -8.1932e-04, 1.0707e-03, 2.3565e-04, 3.3899e-04,\n 6.8101e-04, -2.6226e-05, -1.2586e-03, -1.1396e-03, 1.5842e-04,\n 4.8556e-04, -4.0283e-04, 5.0953e-05, 1.7208e-03, -3.4494e-04,\n 1.8227e-04, -1.0297e-03, -7.1028e-04, 2.1206e-04, 5.4008e-04,\n 6.5833e-04, -1.4290e-03, 1.6950e-03, -6.2151e-04, 9.3428e-04,\n -1.1700e-03, 1.4598e-03, 5.3405e-04, 2.7821e-04, -1.8741e-03,\n 9.4742e-04, -2.4718e-03, 5.2826e-04, 1.5790e-04, -1.8995e-05,\n -3.2494e-04, 5.8415e-04, -8.6480e-04, 2.5573e-03, -1.8982e-03,\n -1.3009e-03, 3.5909e-04, 1.5136e-05, 8.6550e-04, 8.7825e-04,\n 2.5362e-04, -8.1598e-04, -1.6919e-03, 4.7398e-04, 7.1627e-04,\n -1.1153e-03, -2.7808e-03, 2.1626e-04, -1.7875e-03, 1.3804e-04,\n -3.7882e-04, -6.6523e-04, 6.7853e-04, -2.0749e-03, -1.1993e-03,\n 1.5290e-03, -2.1812e-04, -9.9246e-04, 2.1869e-03, -1.9063e-04,\n -7.7859e-04, -7.2674e-04, 5.0557e-04, 1.1844e-03, -1.4824e-04,\n -2.3905e-03, 6.4551e-04, -1.1101e-04, -8.6918e-04, 1.4870e-04,\n 1.3545e-03, 2.2912e-04, 2.0660e-03, -1.6954e-03, -2.8568e-04,\n -3.5097e-04, -1.2742e-03, 7.2298e-06, -9.1915e-05, 6.0659e-04,\n -8.2506e-04, -9.1537e-04, 7.0247e-04, -1.0689e-04, -6.2171e-04,\n -9.3364e-05, 6.2397e-04, 8.5854e-04, -1.3174e-03, 3.7487e-05,\n 3.4289e-04, 8.9561e-04, 2.6278e-04, 9.1791e-04, -6.9758e-04,\n -1.0862e-03, 9.0681e-05, 1.1911e-03, -1.4856e-03, 1.1389e-03,\n -1.6590e-03, -1.6117e-03, 7.6935e-04, 3.8245e-04, 5.8999e-04,\n 1.2726e-06, 5.6088e-05, 7.8331e-04, 2.2003e-04, 1.3278e-03,\n 1.3167e-03, -8.3319e-05, -1.1695e-03, -3.3443e-04, -4.8769e-04,\n -1.0139e-03, 1.3522e-03, 2.9868e-04, 4.7354e-05, -1.8150e-04,\n -2.6566e-04, 3.2334e-04, 2.3722e-04, -1.2191e-03, 1.1356e-25,\n 4.7904e-04, 1.9237e-03, 9.1337e-04, -6.4859e-04, -5.8757e-04,\n 1.8473e-03, -6.1950e-04, -2.1259e-04, 7.5539e-04, -2.6734e-05,\n -1.4108e-03, -1.0953e-04, -2.7148e-04, -3.3987e-04, -1.4196e-03,\n 3.3351e-03, -3.2884e-05, 1.7113e-03], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([2.0282e-05, 2.9672e-05, 2.8581e-05, 1.7444e-05, 1.5033e-05, 1.3385e-05,\n 3.1368e-05, 2.6972e-05, 2.3035e-05, 1.0332e-05, 3.6024e-06, 8.6182e-06,\n 2.2577e-05, 1.7439e-05, 1.0944e-05, 7.3915e-05, 2.2222e-05, 2.9698e-05,\n 2.6165e-05, 3.9284e-05, 2.0801e-05, 3.0452e-05, 4.5225e-05, 1.4516e-05,\n 4.1751e-05, 1.0003e-05, 2.1964e-05, 1.6770e-05, 4.2318e-05, 1.5521e-05,\n 3.3857e-05, 1.9576e-05, 1.3706e-05, 1.1057e-05, 1.5073e-05, 1.1441e-05,\n 9.6896e-06, 2.0188e-05, 4.4483e-05, 1.8975e-05, 4.1231e-05, 1.1090e-05,\n 3.1196e-05, 2.2881e-05, 2.2236e-05, 2.7617e-05, 1.3234e-05, 3.6655e-05,\n 5.7945e-05, 4.0603e-05, 3.8567e-05, 2.2443e-05, 2.9603e-05, 2.3925e-05,\n 1.5242e-05, 2.1958e-05, 2.3489e-05, 3.0312e-05, 2.9769e-05, 1.0758e-05,\n 2.0150e-05, 1.2939e-05, 2.4051e-05, 2.8003e-05, 2.5516e-05, 3.0715e-05,\n 2.0121e-05, 3.6681e-05, 2.9552e-05, 3.3163e-05, 1.8022e-05, 2.2966e-05,\n 1.6352e-05, 2.9012e-05, 2.8385e-05, 1.6002e-05, 2.2012e-05, 2.9028e-05,\n 2.4683e-05, 2.0049e-05, 3.4059e-05, 3.3204e-05, 2.1795e-05, 1.1217e-05,\n 2.0703e-05, 1.5067e-05, 4.2079e-05, 3.8295e-05, 1.6604e-05, 2.1890e-06,\n 1.2733e-05, 3.0914e-05, 2.9315e-05, 4.6058e-05, 2.0906e-05, 2.0295e-05,\n 1.8534e-05, 1.3261e-05, 2.0763e-05, 1.2772e-05, 2.4350e-05, 3.3861e-05,\n 1.1332e-05, 1.0110e-05, 5.1146e-06, 8.8128e-06, 2.3693e-05, 4.0848e-05,\n 7.2254e-06, 1.5329e-05, 4.6235e-05, 5.0611e-05, 2.1825e-05, 2.9912e-05,\n 1.3310e-05, 3.6440e-05, 2.2838e-05, 2.3186e-05, 2.5717e-05, 2.2062e-05,\n 1.2978e-05, 1.6018e-05, 7.2225e-06, 2.6828e-05, 5.2368e-05, 3.3683e-05,\n 2.8132e-07, 2.5774e-05, 1.4747e-05, 9.5594e-06, 1.4906e-05, 3.8143e-05,\n 2.6968e-05, 1.0721e-05, 1.2369e-05, 3.3028e-05, 1.6248e-05, 1.5545e-05,\n 3.0462e-05, 1.2150e-05, 2.5572e-05, 2.2803e-05, 2.3532e-05, 2.7275e-05,\n 7.2236e-06, 2.8570e-05, 1.4567e-05, 9.5214e-06, 2.8334e-05, 2.3888e-05,\n 2.9650e-05, 2.4388e-05, 2.2592e-05, 3.1215e-05, 2.3701e-05, 5.7197e-05,\n 4.2221e-05, 3.5370e-05, 3.6768e-05, 1.0591e-05, 2.6774e-05, 1.8956e-05,\n 1.5792e-05, 2.7075e-05, 5.9092e-05, 1.3897e-05, 2.7266e-05, 3.2276e-05,\n 2.6155e-05, 4.2971e-05, 2.5280e-05, 3.0821e-05, 8.9934e-06, 1.6578e-05,\n 1.5946e-05, 3.4278e-05, 3.6691e-05, 1.7456e-05, 8.0220e-06, 2.6929e-05,\n 3.0735e-05, 1.5124e-05, 9.7022e-06, 1.2382e-05, 2.5136e-05, 3.8136e-05,\n 2.3811e-05, 9.4409e-06, 2.0667e-05, 3.2734e-05, 1.3024e-05, 7.8869e-06,\n 1.0942e-05, 2.9888e-05, 3.8635e-05, 1.9457e-05, 1.7913e-05, 3.6151e-05,\n 4.6615e-05, 2.0524e-05, 1.4248e-05, 4.0816e-06, 1.2892e-05, 2.3868e-05,\n 1.4755e-05, 2.1987e-05, 3.2990e-05, 1.5482e-05, 3.3845e-05, 3.0223e-05,\n 1.6095e-05, 3.0741e-05, 1.7940e-05, 2.4297e-05, 2.3186e-05, 1.6371e-05,\n 2.3040e-05, 2.9986e-05, 3.6649e-05, 1.0963e-05, 3.6364e-05, 3.2778e-05,\n 3.0874e-05, 2.1095e-05, 3.1419e-05, 2.7726e-05, 2.0604e-05, 1.1830e-05,\n 1.5296e-05, 1.0127e-05, 1.8313e-05, 8.6933e-06, 1.9533e-05, 1.7363e-05,\n 3.4553e-05, 4.8300e-05, 2.3720e-05, 2.7128e-05, 2.1096e-05, 2.0064e-05,\n 2.7196e-05, 5.5933e-06, 1.0704e-05, 3.1042e-05, 1.4875e-05, 1.1096e-05,\n 1.4554e-05, 1.1286e-05, 2.7833e-05, 1.1387e-05, 1.2866e-05, 6.0496e-06,\n 2.1426e-05, 1.7449e-05, 2.4296e-05, 2.6924e-05, 1.9459e-05, 3.7619e-05,\n 3.6093e-05, 1.4762e-05, 2.1842e-05, 1.9518e-05, 1.7509e-05, 8.1102e-09,\n 2.1914e-05, 2.2185e-05, 2.1801e-05, 1.9750e-05, 1.6886e-05, 1.2684e-05,\n 3.1683e-05, 3.2026e-05, 2.5884e-05, 1.1233e-05, 1.4176e-05, 1.6831e-05,\n 1.4826e-05, 3.2396e-05, 2.2224e-05, 3.5122e-05, 2.9955e-05, 6.9381e-06,\n 2.5026e-05, 2.8682e-05, 3.5364e-05, 2.5226e-05, 1.9610e-05, 8.5886e-06,\n 1.9279e-05, 2.9883e-05, 3.9718e-06, 4.0133e-05, 2.6504e-05, 2.6739e-05,\n 3.1578e-05, 3.2813e-05, 2.1197e-05, 1.4725e-05, 1.7569e-05, 3.5091e-05,\n 3.8026e-06, 2.0109e-05, 5.0551e-05, 1.8444e-05, 3.5892e-05, 7.3139e-06,\n 1.1788e-05, 1.3413e-05, 1.8392e-05, 1.6437e-05, 2.2343e-05, 2.5784e-05,\n 1.9246e-05, 2.2566e-05, 1.5433e-05, 3.4154e-05, 2.9543e-05, 1.5514e-05,\n 2.4608e-05, 1.9921e-05, 4.5948e-05, 3.1195e-05, 1.3652e-05, 2.9310e-05,\n 1.9818e-05, 4.7275e-05, 3.6805e-05, 2.1017e-05, 1.2242e-05, 2.7872e-05,\n 1.4197e-05, 3.4074e-05, 3.2696e-05, 2.4056e-05, 3.5072e-05, 5.0814e-08,\n 2.6340e-05, 3.2375e-05, 1.2778e-05, 3.6368e-05, 4.7698e-05, 1.2769e-05,\n 1.0910e-05, 1.3130e-05, 4.7708e-05, 3.0779e-08, 2.1012e-05, 1.9214e-05,\n 2.4634e-05, 1.2921e-05, 1.2800e-05, 9.6584e-06, 4.3239e-05, 3.2418e-05,\n 2.9992e-05, 2.3137e-05, 5.2767e-05, 2.2846e-05, 3.1600e-05, 3.0335e-05,\n 1.4500e-05, 1.7048e-05, 3.8885e-05, 2.0512e-05, 3.1066e-05, 3.4173e-05,\n 1.1233e-05, 2.7850e-05, 1.9797e-05, 2.0836e-05, 2.8499e-05, 2.8161e-05,\n 4.1167e-05, 2.3869e-05, 2.3894e-05, 2.4634e-05, 1.6984e-05, 2.0191e-05,\n 2.4604e-05, 3.0306e-05, 1.1489e-05, 3.2578e-05, 6.9854e-06, 2.6708e-05,\n 1.7448e-05, 7.1908e-05, 1.4237e-05, 2.9761e-05, 2.4485e-05, 2.8455e-05,\n 3.4216e-05, 1.7152e-05, 1.7390e-05, 2.6786e-05, 3.6138e-05, 2.3077e-05,\n 7.3943e-06, 4.5449e-05, 1.7197e-05, 2.9935e-05, 5.0663e-05, 5.6990e-06,\n 3.4499e-05, 3.6791e-05, 1.2300e-05, 4.0353e-05, 1.9561e-05, 3.3732e-05,\n 3.0524e-05, 1.0737e-05, 2.1725e-05, 1.9029e-05, 2.1322e-05, 2.1325e-05,\n 1.1854e-05, 3.7971e-05, 3.8431e-05, 3.8589e-05, 2.2186e-05, 8.7726e-06,\n 1.5606e-05, 2.5558e-05, 2.9819e-05, 4.5471e-05, 2.4446e-05, 2.2150e-05,\n 3.1257e-05, 2.9346e-05, 2.4818e-05, 1.9817e-05, 1.0846e-05, 4.5860e-06,\n 1.2797e-05, 1.5809e-05, 2.5166e-05, 2.4998e-05, 1.0304e-06, 1.2537e-05,\n 3.6790e-05, 1.5639e-05, 2.3831e-05, 3.1523e-05, 7.7834e-06, 1.9717e-05,\n 3.0122e-05, 1.3191e-05, 4.5488e-05, 2.6868e-05, 1.2375e-05, 1.3444e-05,\n 1.0625e-05, 2.1328e-05, 2.7791e-05, 4.7399e-05, 2.4822e-05, 1.5055e-05,\n 1.1974e-05, 3.1169e-05, 1.8710e-05, 6.9066e-06, 3.0694e-05, 1.9778e-05,\n 1.7494e-05, 2.7859e-05, 3.1333e-05, 1.1893e-05, 2.7991e-05, 3.1436e-05,\n 3.0125e-05, 2.2045e-05, 2.7697e-05, 1.9777e-05, 1.6190e-05, 2.2730e-05,\n 1.5919e-05, 1.4377e-05, 4.2829e-05, 1.8082e-05, 2.4496e-05, 6.0589e-06,\n 2.1227e-05, 2.4818e-05, 3.9302e-05, 3.2474e-05, 3.4489e-05, 2.0251e-05,\n 3.0040e-05, 2.4902e-05, 2.7153e-05, 3.3075e-05, 2.6196e-05, 3.8712e-05,\n 4.6517e-06, 1.0865e-05, 2.2810e-05, 1.4894e-05, 3.8013e-05, 2.7499e-05,\n 3.3399e-05, 1.1090e-05, 4.0100e-05, 3.2452e-05, 2.3996e-05, 3.2102e-05,\n 2.0977e-05, 1.0636e-05, 9.8392e-06, 3.6512e-05, 1.8797e-05, 1.5976e-05,\n 2.3326e-05, 6.0562e-06, 6.6117e-06, 3.4776e-05, 2.0272e-05, 1.1378e-05,\n 2.3493e-05, 2.9061e-05, 1.7163e-05, 3.9823e-05, 3.3450e-05, 2.1469e-05,\n 2.1162e-05, 2.1059e-05, 3.7684e-05, 1.9205e-05, 1.2165e-05, 2.5866e-05,\n 2.0283e-05, 2.5103e-05, 2.2133e-05, 1.7136e-05, 1.3026e-05, 3.4159e-05,\n 3.3516e-05, 3.0549e-05, 2.0374e-05, 4.8992e-05, 9.8115e-06, 2.4847e-05,\n 3.6152e-05, 4.1138e-05, 2.0427e-05, 1.1749e-05, 2.0062e-05, 2.4960e-05,\n 1.6299e-05, 2.8445e-05, 3.5071e-05, 2.9040e-05, 3.0934e-05, 3.1789e-05,\n 2.3788e-05, 1.3568e-05, 2.2951e-05, 1.8338e-05, 1.8689e-05, 2.9147e-05,\n 4.9813e-05, 5.1704e-05, 2.2691e-05, 1.0278e-05, 1.4997e-05, 4.0519e-05,\n 2.4457e-05, 1.2073e-05, 2.1046e-05, 4.0220e-05, 1.3367e-05, 1.5218e-05,\n 2.5332e-05, 1.6566e-05, 9.2125e-08, 2.4021e-05, 2.9874e-05, 1.3583e-05,\n 1.8839e-05, 2.3115e-05, 1.1260e-05, 2.3099e-05, 1.2316e-05, 1.4488e-05,\n 3.6554e-05, 2.5718e-05, 1.8548e-05, 1.4360e-06, 2.3515e-05, 1.7448e-05,\n 3.3167e-05, 6.0871e-06, 1.1906e-05, 1.2754e-05, 1.4187e-05, 2.5777e-05,\n 1.9218e-05, 3.0570e-05, 8.4994e-06, 2.2117e-05, 1.6671e-05, 2.9800e-05,\n 3.8163e-05, 2.6396e-05, 1.6467e-05, 2.9351e-05, 3.2264e-05, 1.4670e-05,\n 3.0418e-05, 4.4219e-05, 2.6895e-05, 1.1345e-05, 3.3926e-05, 2.6769e-05,\n 9.5236e-06, 2.6323e-05, 4.1952e-05, 3.0334e-05, 1.6989e-05, 1.8623e-05,\n 2.4381e-05, 3.7519e-05, 2.0863e-05, 1.6170e-05, 1.8146e-05, 1.1321e-05,\n 3.0031e-05, 1.4734e-05, 7.5514e-06, 1.2638e-05, 2.2460e-05, 2.7339e-05,\n 1.5815e-05, 1.9188e-05, 3.4221e-05, 2.0869e-05, 3.9242e-05, 2.5732e-05,\n 3.4209e-05, 1.8856e-05, 1.5710e-05, 3.0452e-05, 2.0655e-05, 2.3714e-05,\n 3.6277e-05, 1.2188e-05, 1.4041e-05, 5.4915e-05, 3.5228e-05, 2.4272e-05,\n 3.9950e-05, 2.3763e-05, 1.7906e-05, 3.9688e-05, 9.5621e-06, 2.7981e-05,\n 5.3504e-05, 2.1000e-05, 1.1888e-05, 3.1270e-05, 4.7125e-05, 8.3585e-06,\n 3.8213e-05, 1.8848e-05, 3.5066e-05, 3.1162e-05, 1.5403e-05, 3.6596e-05,\n 2.3934e-05, 3.2594e-05, 2.6262e-05, 3.0751e-05, 2.7141e-05, 2.4946e-05,\n 5.9609e-06, 1.6353e-05, 5.9002e-05, 2.7955e-05, 2.2742e-05, 1.9121e-05,\n 2.3111e-05, 2.7048e-05, 1.0487e-05, 9.2650e-06, 3.7016e-05, 3.5080e-05,\n 1.7537e-05, 1.2440e-05, 2.4942e-05, 3.4423e-05, 2.6644e-05, 1.9898e-05,\n 2.9537e-05, 1.9340e-05, 1.5048e-05, 1.5310e-05, 2.7240e-05, 1.8409e-05,\n 1.5024e-05, 1.6114e-05, 2.2503e-05, 1.9514e-05, 1.7877e-05, 2.2483e-05,\n 3.2126e-05, 1.8675e-05, 1.3264e-05, 2.5264e-05, 1.3106e-05, 2.6173e-05,\n 1.3107e-05, 1.5119e-05, 3.1269e-05, 3.4443e-05, 1.3763e-05, 4.8562e-05,\n 1.1605e-05, 1.4466e-05, 3.2900e-05, 1.1747e-05, 1.4035e-05, 1.4059e-05,\n 1.7925e-05, 3.5089e-05, 2.1519e-05, 1.8283e-05, 1.1957e-05, 2.6463e-05,\n 1.6158e-05, 4.0550e-05, 1.8179e-05, 1.8059e-05, 2.7402e-05, 1.4917e-05,\n 3.1073e-05, 2.0145e-05, 2.3959e-05, 3.5580e-05, 1.8223e-05, 9.1900e-06,\n 2.8195e-05, 1.3036e-05, 1.3789e-05, 2.2120e-05, 1.3886e-05, 1.0146e-05,\n 2.0631e-05, 2.0873e-05, 2.4382e-05, 2.1868e-05, 2.3808e-05, 1.5222e-08,\n 1.5623e-05, 4.2484e-05, 4.3895e-05, 1.7701e-05, 2.5238e-05, 2.4502e-05,\n 2.8533e-05, 1.2974e-05, 2.0531e-05, 1.5076e-05, 2.7308e-05, 2.2324e-05,\n 2.6290e-05, 2.2141e-05, 2.3484e-05, 2.8886e-05, 2.8298e-05, 3.0643e-05],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(3754.)",
17
+ "exp_avg": "tensor([[ 2.3355e-06, 4.9595e-06, 3.4703e-05, ..., -1.0920e-05,\n 2.0886e-05, 4.2438e-06],\n [-4.4386e-05, -1.3516e-05, 4.7667e-05, ..., -1.1011e-04,\n 5.7531e-06, -5.8810e-06],\n [-6.2509e-05, -1.7580e-04, -1.1950e-04, ..., -4.2188e-04,\n -1.5665e-05, -7.9945e-06],\n ...,\n [-5.9022e-07, 2.6911e-05, -1.4782e-05, ..., 4.2458e-05,\n -2.9368e-06, 6.6720e-05],\n [ 7.5664e-06, 3.4130e-05, -9.0864e-05, ..., 1.1416e-05,\n -5.0397e-06, 1.6401e-04],\n [-1.7475e-06, 3.0808e-05, -3.3601e-05, ..., 1.4059e-05,\n 3.1749e-05, -3.1917e-04]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[1.2161e-08, 4.0446e-08, 8.7949e-09, ..., 7.6581e-09, 6.2202e-09,\n 3.0900e-09],\n [3.3999e-08, 6.5474e-08, 3.5022e-08, ..., 1.3853e-06, 6.6348e-09,\n 1.8448e-09],\n [1.7849e-08, 3.5999e-08, 1.1949e-07, ..., 6.8543e-08, 1.2304e-08,\n 5.2857e-09],\n ...,\n [2.2302e-09, 2.0952e-08, 2.3148e-08, ..., 2.0714e-08, 2.4971e-08,\n 5.3408e-08],\n [1.6792e-09, 1.1626e-08, 4.3204e-08, ..., 5.0676e-10, 6.4763e-09,\n 3.0195e-07],\n [3.9752e-09, 1.4315e-08, 5.4752e-08, ..., 1.0305e-08, 3.0896e-08,\n 7.6162e-07]], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(3754.)",
22
+ "exp_avg": "tensor([ 5.6272e-06, 1.4324e-04, -9.6138e-04, -4.2194e-05, 5.6052e-45,\n -2.9109e-04, 5.6052e-45, 5.8350e-04, -1.1950e-05, -1.0076e-05,\n 1.9506e-04, -1.3929e-04, -4.1736e-05, -1.6482e-04, 3.7054e-05,\n 7.5422e-05, 5.6052e-45, 8.9200e-05, -1.1088e-04, -5.7281e-05,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -4.5359e-04, -1.0449e-04,\n 4.2399e-04, 6.6566e-05, 1.9314e-05, 3.7871e-04, 5.6052e-45,\n -2.6224e-04, 5.6052e-45, 2.4347e-05, -1.2905e-04, 8.2636e-05,\n -2.6617e-05, 7.8713e-30, 5.6052e-45, 5.6052e-45, -6.9772e-04,\n 1.4426e-04, 5.6052e-45, -1.9586e-04, 5.6052e-45, 5.6052e-45,\n -8.6149e-05, -4.5421e-04, 2.1798e-05, 5.8129e-05, 5.0594e-04,\n 5.6052e-45, 5.9249e-04, 5.6052e-45, -3.6829e-05, 1.8027e-04,\n -4.0336e-05, -5.7017e-05, 5.6052e-45, -1.7320e-04, 5.6052e-45,\n -1.4483e-04, -5.7216e-04, -5.0434e-04, -4.5116e-05, 2.4180e-04,\n -3.6491e-05, -1.0538e-04, 1.5636e-04, 2.1724e-04, 1.6251e-04,\n 1.3577e-04, 2.4072e-04, 9.8949e-06, -2.7878e-04, -4.4851e-04,\n 1.1646e-04, 3.2430e-04, 5.6052e-45, 5.6052e-45, 1.6028e-04,\n -1.8102e-04, 4.3741e-04, -2.7021e-04, 1.3047e-04, -1.0971e-04,\n -2.4955e-09, 5.6052e-45, 2.0992e-04, -8.7155e-05, 5.6052e-45,\n -1.7808e-04, -1.2461e-05, 5.6052e-45, 7.0089e-05, -2.2038e-04,\n 5.6052e-45, 3.5962e-04, 3.3614e-04, -3.1647e-05, -9.3285e-05,\n 2.5412e-04, 1.3413e-19, -4.4582e-04, -3.0293e-05, 5.3834e-05,\n 2.7179e-04, -4.7318e-04, -8.7258e-07, -2.1021e-04, -5.1733e-04,\n 2.4572e-06, -1.3539e-04, 3.9113e-04, -2.1156e-04, -4.1321e-04,\n 6.2564e-05, 5.6052e-45, 9.2505e-05, 5.6052e-45, 2.9174e-04,\n -3.3821e-05, 2.2137e-04, 7.6767e-05, -3.3735e-05, -3.1721e-05,\n 5.6052e-45, -3.4284e-04, -5.7969e-04, 2.8476e-04, 3.6874e-05,\n 6.1760e-40, 2.1546e-19, -6.5746e-04, -1.1769e-06, 5.6052e-45,\n 5.0669e-05, 3.1565e-05, -1.1138e-04, -1.9872e-05, 1.3430e-04,\n -4.0199e-04, 5.6052e-45, -1.1814e-23, 3.1336e-04, 5.6052e-45,\n 5.6052e-45, 1.3972e-07, 5.6052e-45, 5.6052e-45, 1.9660e-04,\n -6.1120e-06, -8.8248e-05, 2.3486e-04, 5.6052e-45, -1.7840e-04,\n -5.6052e-45, 1.7743e-04, -7.4123e-05, 1.1570e-04, -3.6594e-04,\n -4.1496e-04, 4.8072e-05, 2.1763e-04, 5.6052e-45, 3.7791e-05,\n 1.3440e-34, 5.6052e-45, 5.6052e-45, -6.5231e-05, 2.1453e-13,\n -1.7340e-04, 2.5299e-04, 7.6243e-12, 3.1202e-21, 1.3487e-04,\n -4.7089e-05, 5.6052e-45, 5.6052e-45, -3.3888e-05, 5.4447e-04,\n 2.5162e-29, 4.0061e-04, -3.1796e-04, -3.2999e-04, -1.9451e-04,\n -6.8641e-04, 5.6052e-45, 5.6052e-45, -8.9112e-05, -5.8201e-05,\n 3.9230e-06, -3.3623e-04, 4.0802e-04, 5.6052e-45, -3.5423e-04,\n -1.6897e-04, 1.2142e-04, 2.9614e-04, 1.4289e-04, 5.6052e-45,\n 5.6052e-45, 1.8803e-04, 1.4560e-04, -5.4246e-05, -1.9680e-04,\n -6.4062e-05, 2.5257e-36, 2.8026e-44, -9.2116e-05, 5.1397e-04,\n 1.9804e-04, -1.1902e-04, -3.3097e-04, 5.6653e-04, -1.0388e-04,\n 1.7121e-05, -2.0096e-07, -3.8878e-04, 5.6052e-45, 5.6052e-45,\n -2.8368e-04, -3.3635e-05, 5.6052e-45, -3.8859e-05, -7.5280e-05,\n -2.4128e-04, -4.4479e-04, 1.8917e-04, 5.6052e-45, 8.8330e-05,\n -2.2438e-05, 1.4879e-04, 5.6052e-45, -4.0739e-04, -2.9654e-05,\n -1.5534e-04, 1.4433e-43, 1.0394e-04, 5.6052e-45, 2.1927e-04,\n 5.6052e-45, -8.3075e-05, 2.7324e-04, 7.2442e-05, -1.1700e-05,\n 5.6052e-45, -1.8772e-04, -1.6442e-04, -1.1957e-04, 1.3160e-04,\n -5.6768e-04, 5.6239e-05, 3.5925e-04, 1.7248e-04, -8.4066e-05,\n 7.1952e-05, 2.7381e-04, 5.6052e-45, -1.3955e-04, 3.8433e-04,\n -5.7897e-05, -1.9833e-05, 2.2031e-04, -2.0840e-04, 1.3458e-04,\n 3.1048e-04, -7.7804e-05, 5.6052e-45, 5.6052e-45, 1.7343e-04,\n 3.0288e-04, 5.6052e-45, 2.0220e-04, 4.9302e-07, 2.5061e-04,\n 3.3882e-04, -1.5377e-04, 2.4659e-04, 3.2215e-04, 2.8014e-06,\n 5.6052e-45, 1.7010e-04, 5.6052e-45, 5.6052e-45, -6.2701e-04,\n -1.5174e-04, -2.1984e-04, -8.0152e-04, 6.2763e-04, 1.8159e-07,\n -1.7347e-04, -8.8369e-04, -5.8262e-05, -4.7729e-05, 2.7264e-05,\n -8.5969e-10, 1.2180e-04, 5.6052e-45, 5.6052e-45, -1.3060e-04,\n -8.7907e-05, 4.0247e-04, -1.0678e-04, -2.3003e-04, 9.4256e-29,\n -1.9304e-05, -1.7731e-04, 5.6052e-45, 1.8360e-04, -1.5924e-04,\n -5.5851e-05, -5.8028e-04, 3.9599e-04, -1.3759e-04, 1.3678e-19,\n -3.6172e-04, 2.1436e-04, 5.6052e-45, 5.6052e-45, 2.2664e-04,\n -6.1424e-05, 6.7258e-05, -9.9860e-05, -5.4810e-05, 5.6052e-45,\n -3.5277e-04, 2.6284e-04, 5.1855e-04, 1.8190e-04, 1.3982e-04,\n -1.9597e-04, 5.6052e-45, -6.5290e-05, -1.2079e-04, 1.0151e-38,\n -2.0555e-04, -7.3948e-05, 5.6052e-45, 1.3000e-11, -3.9276e-04,\n 6.6620e-05, -1.9563e-04, 1.7505e-04, 3.6891e-05, 5.6052e-45,\n 2.2111e-04, 2.5618e-04, 5.6052e-45, -1.9100e-05, 5.6052e-45,\n 8.4068e-05, 1.0157e-04, 1.6541e-15, 5.6052e-45, 6.8204e-05,\n 1.7907e-04, -1.4165e-04, 2.8702e-04, -2.5998e-04, -6.5313e-04,\n -1.1341e-04, 5.6052e-45, 2.1537e-05, 2.8009e-04, -2.3795e-05,\n 6.6035e-05, -9.0331e-06, 4.9766e-04, 5.6052e-45, 5.6052e-45,\n 2.4819e-15, -1.5501e-04, 1.7606e-06, 5.6052e-45, 2.2034e-05,\n 5.7967e-04, -7.9582e-05, -2.1628e-04, 4.1762e-05, -1.9805e-04,\n 1.1654e-05, -3.1980e-07, 3.5819e-05, 5.6052e-45, -3.0141e-05,\n 1.7734e-04, 5.6052e-45, 5.6052e-45, 1.8236e-04, 5.6052e-45,\n 3.4626e-05, -3.9084e-04, 2.4896e-04, 5.6052e-45, -1.8680e-04,\n -1.0590e-04, -2.5886e-04, -5.3881e-06, 5.6052e-45, 5.6052e-45,\n 3.2270e-04, 2.1160e-04, -1.8238e-04, 1.5163e-05, -1.7831e-04,\n -3.3498e-04, -1.9937e-04, -5.4484e-04, -3.9381e-04, -9.1203e-05,\n 4.4522e-05, 6.9669e-05, -7.7005e-04, 5.6052e-45, -3.0612e-05,\n -4.2541e-05, 1.9604e-04, -1.3364e-04, -3.0795e-04, 9.1369e-05,\n -1.6080e-05, -1.1731e-04, 4.3704e-04, -1.7498e-04, 5.6052e-45,\n 1.0810e-04, 9.0245e-05, 5.6052e-45, -2.9346e-06, 1.0154e-04,\n 4.7601e-05, 1.8532e-04, -2.8926e-06, 5.6052e-45, 1.2551e-04,\n -3.3261e-04, 1.2621e-04, -1.6012e-04, -4.1903e-05, 4.7873e-05,\n -6.9876e-05, -1.4155e-04, -3.6484e-04, -1.6568e-04, 5.6052e-45,\n 3.5125e-04, 3.7484e-04, -5.7701e-05, 1.5894e-04, -4.7177e-04,\n -3.1945e-04, 5.6052e-45, 3.4045e-04, 9.5024e-05, -4.6507e-04,\n -2.5372e-05, -5.6052e-45, -1.2062e-05, 1.1627e-04, -7.9008e-05,\n 4.0956e-04, 6.8322e-05, 5.6052e-45, 3.9892e-04, 5.6052e-45,\n 6.3207e-05, 7.4186e-05, 5.6052e-45, 1.1233e-04, 1.3662e-04,\n 2.0729e-04, 1.3120e-04, 1.3584e-04, 7.7979e-05, 2.8968e-04,\n -3.1929e-05, 3.2799e-04, 1.5914e-04, 1.0316e-04, 2.7709e-11,\n -3.8804e-05, -4.5594e-04, 6.1451e-05, -1.5666e-04, -4.9043e-09,\n -1.4102e-04, 2.9184e-04, 2.4546e-09, 5.4187e-05, 1.6741e-05,\n 2.0063e-04, 5.6052e-45, 5.1748e-05, 5.4690e-05, 5.6052e-45,\n -3.4663e-04, 3.1042e-04, 5.6052e-45, 2.1383e-05, 1.2450e-04,\n 5.6052e-45, 7.7881e-05, -1.3585e-04, -5.0002e-04, 6.7744e-04,\n -1.9025e-04, -1.2821e-04, 5.6052e-45, -3.8773e-04, 9.6609e-06,\n 6.9866e-05, -2.5620e-05], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([1.1631e-06, 4.4718e-06, 5.6942e-06, 4.3096e-06, 2.8252e-06, 2.3752e-06,\n 1.7203e-07, 3.7008e-06, 1.5346e-06, 2.3304e-06, 4.4775e-06, 1.8863e-06,\n 1.1211e-06, 1.3059e-05, 3.3312e-06, 2.2865e-06, 2.1970e-06, 2.3821e-06,\n 2.0358e-06, 2.5055e-06, 2.5498e-06, 2.9596e-08, 1.0631e-06, 4.7244e-06,\n 3.6529e-06, 2.4480e-06, 2.5095e-07, 3.8187e-06, 1.4350e-06, 2.2365e-08,\n 3.3136e-06, 2.9756e-06, 1.2202e-06, 4.6051e-06, 2.0496e-06, 3.3603e-06,\n 5.2713e-07, 5.0638e-07, 2.8450e-08, 2.5313e-06, 7.3817e-07, 1.1762e-09,\n 2.7395e-06, 1.8409e-07, 4.0182e-08, 1.0905e-06, 2.6932e-06, 2.2486e-07,\n 2.7480e-06, 2.1917e-06, 1.2644e-07, 4.6288e-06, 4.0002e-07, 1.7099e-06,\n 1.6956e-06, 2.7769e-06, 1.8654e-06, 8.7253e-11, 3.6152e-06, 1.5455e-08,\n 1.4523e-06, 1.4407e-06, 4.6004e-06, 1.7740e-06, 1.8458e-06, 1.1970e-06,\n 5.3782e-07, 2.0484e-06, 2.6061e-06, 3.5440e-06, 1.4398e-06, 1.9648e-06,\n 1.0046e-06, 3.8571e-06, 3.6194e-06, 3.2468e-06, 2.3309e-06, 1.2762e-11,\n 1.6100e-07, 1.9823e-06, 9.7735e-07, 5.0361e-06, 2.4896e-06, 2.7593e-06,\n 3.5813e-06, 5.7915e-07, 8.3171e-08, 3.3530e-06, 7.4807e-07, 3.2898e-07,\n 3.4755e-06, 1.1275e-06, 1.3718e-06, 2.1566e-06, 4.4366e-06, 2.2303e-08,\n 2.3639e-06, 1.4789e-06, 1.7156e-07, 1.7011e-06, 8.4606e-07, 1.3895e-08,\n 3.3671e-06, 3.0202e-06, 7.9644e-07, 3.1133e-06, 3.1321e-06, 2.4649e-06,\n 8.6158e-07, 3.0609e-06, 3.8099e-06, 3.9730e-06, 4.2647e-06, 1.4942e-06,\n 7.6438e-06, 1.9663e-06, 4.7964e-07, 5.1509e-06, 2.4433e-07, 2.2457e-06,\n 4.9904e-07, 2.8755e-06, 2.5051e-06, 2.5922e-06, 6.3381e-06, 2.1758e-08,\n 1.0546e-06, 4.4735e-06, 1.0591e-06, 3.8850e-06, 9.0899e-08, 7.6783e-09,\n 2.5229e-06, 2.4109e-06, 2.4437e-06, 2.0094e-06, 4.9532e-09, 2.3722e-06,\n 1.9210e-06, 3.2150e-06, 1.5398e-06, 3.0173e-06, 1.7937e-07, 2.0051e-07,\n 2.7833e-07, 1.3694e-06, 4.2619e-09, 5.5451e-07, 1.8032e-08, 2.7211e-06,\n 4.5724e-07, 3.5447e-06, 9.0140e-07, 8.0469e-07, 1.6886e-06, 7.4390e-09,\n 2.3597e-06, 1.0042e-06, 2.3085e-07, 2.5544e-06, 1.0914e-06, 3.6027e-06,\n 1.8517e-06, 1.6381e-09, 1.2940e-06, 7.4787e-07, 5.1479e-06, 5.3029e-08,\n 2.2345e-06, 3.3798e-07, 2.1348e-06, 1.2986e-06, 4.1630e-07, 7.2850e-09,\n 1.7236e-06, 1.6095e-06, 5.4258e-07, 2.3243e-06, 1.4565e-07, 2.8150e-06,\n 1.0313e-06, 1.9300e-06, 2.8206e-06, 5.1609e-06, 2.6226e-06, 4.0349e-06,\n 1.0407e-08, 7.7207e-08, 3.7927e-06, 2.1027e-06, 1.4960e-06, 2.7157e-06,\n 2.7305e-06, 1.3838e-06, 1.7208e-06, 2.9595e-06, 8.5094e-07, 1.8255e-06,\n 2.4075e-06, 2.0531e-06, 5.1867e-07, 1.6688e-06, 3.3160e-06, 1.9109e-06,\n 3.1202e-06, 7.5040e-07, 1.2853e-06, 2.1625e-07, 1.8026e-06, 2.9265e-06,\n 5.5041e-06, 2.7586e-06, 2.8505e-06, 4.3227e-06, 1.5686e-06, 1.7777e-06,\n 1.2218e-07, 1.1289e-06, 2.4931e-07, 3.8177e-06, 3.2175e-06, 1.2802e-06,\n 4.3107e-06, 2.0523e-06, 1.8624e-06, 1.8656e-06, 2.1787e-06, 2.6779e-06,\n 1.8259e-09, 2.6488e-06, 3.1801e-06, 2.9909e-06, 7.0439e-07, 3.5045e-06,\n 1.3897e-06, 1.9982e-06, 1.6309e-07, 4.0882e-06, 5.6770e-07, 2.0995e-06,\n 9.4712e-07, 5.8565e-07, 2.1506e-06, 3.6408e-06, 1.7963e-06, 1.9560e-06,\n 8.1327e-07, 2.3894e-06, 9.4935e-07, 6.7366e-07, 4.1348e-06, 4.1933e-06,\n 3.2559e-06, 4.5089e-06, 3.7894e-06, 3.4444e-06, 2.4962e-06, 5.1368e-07,\n 1.9622e-06, 7.7965e-07, 2.1357e-06, 3.7347e-06, 1.6454e-06, 4.2803e-07,\n 1.8575e-06, 3.4388e-06, 3.3139e-06, 8.0237e-09, 4.7508e-08, 1.9110e-06,\n 8.0912e-07, 7.0407e-09, 4.3368e-06, 4.9129e-07, 3.5398e-07, 3.4904e-06,\n 2.0164e-06, 3.5262e-06, 5.0930e-06, 3.7946e-06, 7.4074e-07, 2.0800e-06,\n 1.6301e-06, 1.7665e-08, 3.0209e-06, 4.3426e-06, 2.4562e-06, 5.2085e-06,\n 4.7727e-06, 2.6421e-08, 1.4390e-06, 2.6943e-06, 1.2562e-06, 2.3853e-06,\n 3.4172e-06, 6.1562e-07, 3.2133e-06, 1.6937e-07, 8.7638e-11, 3.1101e-06,\n 2.4798e-06, 2.2119e-06, 1.4035e-06, 3.1632e-06, 4.1684e-09, 3.3019e-06,\n 3.1351e-06, 9.7691e-08, 2.0646e-06, 2.8850e-08, 1.7578e-06, 3.6306e-06,\n 2.5261e-06, 3.5952e-06, 5.0462e-08, 1.8914e-06, 1.0926e-06, 4.1194e-07,\n 1.3023e-06, 2.5746e-06, 9.1001e-07, 1.9123e-06, 2.8201e-06, 2.8109e-06,\n 1.6583e-06, 2.2515e-06, 5.2414e-06, 3.3412e-06, 7.3529e-06, 2.2955e-06,\n 1.7766e-06, 1.5360e-09, 1.7130e-06, 2.4530e-06, 7.5433e-08, 2.9350e-06,\n 1.3773e-06, 1.6909e-07, 1.1685e-07, 3.1291e-06, 1.2269e-07, 1.5514e-06,\n 9.1362e-07, 3.6007e-06, 2.9433e-07, 6.7155e-06, 5.4381e-07, 1.3764e-06,\n 2.5398e-06, 2.0968e-07, 6.3327e-07, 5.3690e-06, 4.0634e-08, 1.0365e-07,\n 1.3390e-06, 2.8585e-06, 2.7721e-06, 1.7475e-06, 3.7503e-06, 5.8045e-06,\n 1.7770e-06, 1.2855e-07, 2.1387e-06, 5.9029e-07, 4.1168e-07, 8.6679e-08,\n 1.3654e-06, 3.0758e-06, 1.6561e-07, 1.1094e-06, 2.0388e-06, 3.4602e-06,\n 8.8125e-07, 8.5531e-07, 4.3838e-07, 1.7088e-06, 1.0706e-06, 3.5302e-06,\n 1.9446e-06, 2.4145e-06, 2.7367e-06, 1.7883e-08, 2.7368e-06, 1.6621e-06,\n 7.3592e-06, 3.2106e-06, 4.3009e-08, 1.4138e-07, 3.1604e-06, 1.2844e-06,\n 1.9679e-06, 3.2109e-06, 4.6811e-06, 2.3248e-07, 4.4461e-06, 1.2150e-06,\n 2.6964e-06, 2.8752e-08, 8.7559e-08, 2.1825e-06, 2.5033e-06, 1.4454e-06,\n 6.7238e-06, 3.7222e-06, 1.0268e-06, 2.6162e-06, 1.6318e-06, 5.1604e-06,\n 2.0250e-06, 2.3106e-06, 1.8992e-06, 2.4622e-06, 1.7897e-06, 6.1006e-06,\n 1.5219e-06, 5.1442e-06, 2.4549e-06, 3.1122e-06, 3.9305e-06, 2.9354e-06,\n 5.1725e-06, 2.8822e-06, 3.9861e-06, 2.6178e-06, 1.0601e-09, 3.7703e-06,\n 3.7325e-06, 3.1130e-07, 1.1008e-06, 1.8036e-06, 7.8295e-07, 1.9572e-06,\n 1.8011e-07, 2.3482e-07, 2.9773e-06, 3.5543e-06, 3.4440e-06, 2.4435e-06,\n 4.1822e-06, 5.4895e-06, 9.9352e-07, 3.4888e-06, 2.1956e-06, 2.4142e-06,\n 7.5035e-06, 1.8838e-06, 2.5941e-06, 4.1388e-06, 2.3103e-06, 1.7092e-06,\n 5.3931e-06, 3.6533e-07, 2.3653e-06, 3.7317e-06, 2.4983e-06, 1.5452e-06,\n 2.9539e-07, 3.5518e-06, 3.6316e-06, 1.4133e-06, 3.9325e-06, 3.1890e-06,\n 2.2109e-08, 2.7904e-06, 1.6077e-06, 2.2098e-06, 3.1053e-06, 6.5751e-07,\n 1.5763e-06, 2.7205e-06, 1.0109e-06, 5.9767e-07, 3.4312e-06, 2.1198e-06,\n 1.3844e-06, 3.9915e-06, 3.8451e-06, 2.8602e-06, 4.3617e-07, 7.9262e-07,\n 9.2316e-07, 3.8784e-06, 4.2328e-06, 2.7728e-06, 4.6064e-07, 4.0461e-06,\n 2.4706e-06, 2.2370e-08, 1.9841e-06, 2.1871e-06, 1.0820e-06, 5.9129e-06,\n 2.2602e-06, 1.9646e-06, 3.0437e-07, 1.7991e-06, 2.4048e-06, 1.0605e-06,\n 2.0742e-06, 2.2684e-07, 3.3489e-06, 3.4936e-06, 2.9849e-06, 3.6442e-06,\n 2.6973e-06, 1.7019e-06, 1.4285e-06, 1.9331e-07, 2.5386e-06, 2.7714e-06,\n 6.9658e-07, 2.5970e-06], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(3754.)",
27
+ "exp_avg": "tensor([[-2.5816e-05, -4.1583e-05, -6.2645e-05, ..., 3.2304e-05,\n -9.6427e-06, -1.1348e-05],\n [-1.4375e-05, 9.1768e-06, 1.8871e-05, ..., -4.5550e-06,\n -7.9488e-06, 4.3768e-05],\n [-6.8588e-06, -7.0814e-05, 2.4236e-05, ..., -3.1594e-05,\n 1.0344e-05, 3.1625e-05],\n ...,\n [-1.4207e-05, 4.5682e-05, 5.7196e-05, ..., -2.2782e-06,\n -2.0900e-06, 6.0137e-05],\n [ 3.3753e-06, -1.9380e-04, -7.6259e-05, ..., 1.6913e-05,\n -5.7851e-07, 1.0358e-05],\n [ 2.7173e-05, -1.2475e-04, -1.3241e-04, ..., -1.5037e-05,\n -1.8117e-05, 1.0170e-04]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[1.7119e-08, 5.7205e-08, 3.3274e-08, ..., 1.3501e-08, 1.2205e-09,\n 2.2339e-08],\n [1.0626e-08, 1.3163e-07, 3.2224e-08, ..., 3.0851e-08, 4.2813e-09,\n 2.4235e-08],\n [1.1523e-08, 1.5539e-07, 2.8430e-08, ..., 4.0512e-08, 2.4608e-09,\n 1.7105e-08],\n ...,\n [1.2496e-08, 1.7766e-07, 4.3628e-08, ..., 8.0864e-08, 5.7689e-09,\n 4.7564e-08],\n [1.3666e-08, 1.4218e-07, 5.6629e-08, ..., 5.9775e-08, 4.0868e-09,\n 4.6036e-08],\n [1.7677e-08, 1.5832e-07, 3.7623e-08, ..., 3.2245e-08, 3.2544e-09,\n 4.5285e-08]], device='cuda:0')"
29
+ }
30
+ },
31
+ "param_groups": [
32
+ {
33
+ "lr": 0.00975530705321762,
34
+ "name": "shared",
35
+ "betas": [
36
+ 0.9,
37
+ 0.999
38
+ ],
39
+ "eps": 1e-08,
40
+ "weight_decay": 1e-05,
41
+ "amsgrad": false,
42
+ "maximize": false,
43
+ "foreach": null,
44
+ "capturable": false,
45
+ "differentiable": false,
46
+ "fused": null,
47
+ "decoupled_weight_decay": true,
48
+ "initial_lr": 0.01,
49
+ "params": [
50
+ 0,
51
+ 1
52
+ ]
53
+ },
54
+ {
55
+ "lr": 0.00975530705321762,
56
+ "name": "scale_256",
57
+ "betas": [
58
+ 0.9,
59
+ 0.999
60
+ ],
61
+ "eps": 1e-08,
62
+ "weight_decay": 1e-05,
63
+ "amsgrad": false,
64
+ "maximize": false,
65
+ "foreach": null,
66
+ "capturable": false,
67
+ "differentiable": false,
68
+ "fused": null,
69
+ "decoupled_weight_decay": true,
70
+ "initial_lr": 0.01,
71
+ "params": [
72
+ 2,
73
+ 3,
74
+ 4
75
+ ]
76
+ },
77
+ {
78
+ "lr": 0.00975530705321762,
79
+ "name": "scale_512",
80
+ "betas": [
81
+ 0.9,
82
+ 0.999
83
+ ],
84
+ "eps": 1e-08,
85
+ "weight_decay": 1e-05,
86
+ "amsgrad": false,
87
+ "maximize": false,
88
+ "foreach": null,
89
+ "capturable": false,
90
+ "differentiable": false,
91
+ "fused": null,
92
+ "decoupled_weight_decay": true,
93
+ "initial_lr": 0.01,
94
+ "params": [
95
+ 5,
96
+ 6,
97
+ 7
98
+ ]
99
+ },
100
+ {
101
+ "lr": 0.00975530705321762,
102
+ "name": "scale_768",
103
+ "betas": [
104
+ 0.9,
105
+ 0.999
106
+ ],
107
+ "eps": 1e-08,
108
+ "weight_decay": 1e-05,
109
+ "amsgrad": false,
110
+ "maximize": false,
111
+ "foreach": null,
112
+ "capturable": false,
113
+ "differentiable": false,
114
+ "fused": null,
115
+ "decoupled_weight_decay": true,
116
+ "initial_lr": 0.01,
117
+ "params": [
118
+ 8,
119
+ 9,
120
+ 10
121
+ ]
122
+ },
123
+ {
124
+ "lr": 0.00975530705321762,
125
+ "name": "scale_1024",
126
+ "betas": [
127
+ 0.9,
128
+ 0.999
129
+ ],
130
+ "eps": 1e-08,
131
+ "weight_decay": 1e-05,
132
+ "amsgrad": false,
133
+ "maximize": false,
134
+ "foreach": null,
135
+ "capturable": false,
136
+ "differentiable": false,
137
+ "fused": null,
138
+ "decoupled_weight_decay": true,
139
+ "initial_lr": 0.01,
140
+ "params": [
141
+ 11,
142
+ 12,
143
+ 13
144
+ ]
145
+ },
146
+ {
147
+ "lr": 0.004877665762479736,
148
+ "name": "fusion",
149
+ "betas": [
150
+ 0.9,
151
+ 0.999
152
+ ],
153
+ "eps": 1e-08,
154
+ "weight_decay": 1e-05,
155
+ "amsgrad": false,
156
+ "maximize": false,
157
+ "foreach": null,
158
+ "capturable": false,
159
+ "differentiable": false,
160
+ "fused": null,
161
+ "decoupled_weight_decay": true,
162
+ "initial_lr": 0.005,
163
+ "params": [
164
+ 14,
165
+ 15,
166
+ 16,
167
+ 17,
168
+ 18,
169
+ 19,
170
+ 20,
171
+ 21,
172
+ 22,
173
+ 23,
174
+ 24,
175
+ 25,
176
+ 26,
177
+ 27,
178
+ 28,
179
+ 29,
180
+ 30,
181
+ 31,
182
+ 32,
183
+ 33,
184
+ 34,
185
+ 35,
186
+ 36,
187
+ 37,
188
+ 38,
189
+ 39,
190
+ 40,
191
+ 41,
192
+ 42,
193
+ 43,
194
+ 44,
195
+ 45,
196
+ 46,
197
+ 47,
198
+ 48,
199
+ 49,
200
+ 50,
201
+ 51,
202
+ 52,
203
+ 53,
204
+ 54,
205
+ 55,
206
+ 56,
207
+ 57,
208
+ 58,
209
+ 59,
210
+ 60,
211
+ 61,
212
+ 62,
213
+ 63,
214
+ 64
215
+ ]
216
+ }
217
+ ]
218
+ },
219
+ "scheduler_state_dict": {
220
+ "T_0": 10,
221
+ "T_i": 10,
222
+ "T_mult": 2,
223
+ "eta_min": 1e-06,
224
+ "T_cur": 1,
225
+ "base_lrs": [
226
+ 0.01,
227
+ 0.01,
228
+ 0.01,
229
+ 0.01,
230
+ 0.01,
231
+ 0.005
232
+ ],
233
+ "last_epoch": 1,
234
+ "_step_count": 0,
235
+ "_is_initial": false,
236
+ "_get_lr_called_within_step": false,
237
+ "_last_lr": [
238
+ 0.00975530705321762,
239
+ 0.00975530705321762,
240
+ 0.00975530705321762,
241
+ 0.00975530705321762,
242
+ 0.00975530705321762,
243
+ 0.004877665762479736
244
+ ]
245
+ },
246
+ "metrics": {
247
+ "best_val_acc": 66.428,
248
+ "best_epoch": 0,
249
+ "scale_accuracies": {
250
+ "256": 66.428
251
+ },
252
+ "training_history": {
253
+ "epochs": [
254
+ 1
255
+ ],
256
+ "train_loss": [
257
+ 3.2117288923670015
258
+ ],
259
+ "train_acc": [
260
+ 56.118471154293964
261
+ ],
262
+ "val_acc": [
263
+ 66.428
264
+ ],
265
+ "scale_accs": {
266
+ "256": [
267
+ 66.428
268
+ ]
269
+ },
270
+ "lr": [
271
+ 0.00975530705321762
272
+ ]
273
+ }
274
+ },
275
+ "train_config": {
276
+ "name": "david_training",
277
+ "run_id": "20251012_210041",
278
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
279
+ "model_variant": [
280
+ "clip_vit_b16",
281
+ "clip_vit_laion_b32",
282
+ "clip_vit_b32"
283
+ ],
284
+ "num_classes": 1000,
285
+ "preset": "balanced",
286
+ "custom_config_path": null,
287
+ "num_classes_override": null,
288
+ "use_belly_override": null,
289
+ "belly_expand_override": null,
290
+ "progressive_training_override": true,
291
+ "scale_warmup_epochs_override": {
292
+ "256": 0,
293
+ "512": 2,
294
+ "768": 5,
295
+ "1024": 8
296
+ },
297
+ "num_epochs": 10,
298
+ "batch_size": 1024,
299
+ "learning_rate": 0.01,
300
+ "weight_decay": 1e-05,
301
+ "warmup_epochs": 3,
302
+ "use_rose_loss": true,
303
+ "rose_initial_weight": 0.2,
304
+ "rose_max_weight": 0.8,
305
+ "rose_weight_schedule": "adaptive",
306
+ "use_cayley_loss": false,
307
+ "cayley_weight": 0.01,
308
+ "scale_loss_balance": null,
309
+ "use_mixed_precision": false,
310
+ "gradient_clip": 10.0,
311
+ "scheduler_type": "cosine_restarts",
312
+ "min_lr": 1e-06,
313
+ "freeze_strategy": "never",
314
+ "freeze_threshold": 90.0,
315
+ "unfreeze_on_plateau": true,
316
+ "patience": 10,
317
+ "track_gradients": true,
318
+ "gradient_scale_threshold": 1e-05,
319
+ "gradient_scale_multiplier": 10.0,
320
+ "log_interval": 50,
321
+ "val_interval": 1,
322
+ "save_interval": 5,
323
+ "log_fusion_weights": true,
324
+ "log_loss_components": true,
325
+ "save_format": "safetensors",
326
+ "hf_repo": "AbstractPhil/david-shared-space",
327
+ "upload_to_hub": true,
328
+ "base_dir": "./david_training",
329
+ "num_workers": 10,
330
+ "pin_memory": true,
331
+ "prefetch_factor": 4,
332
+ "persistent_workers": true
333
+ }
334
+ }