AbstractPhil commited on
Commit
2abe4bd
·
verified ·
1 Parent(s): 618337d

Update best_model_acc66.04_metadata.json - Run 20251012_231445

Browse files
weights/David-fully_shared-weighted_sum/20251012_231445/best_model_acc66.04_metadata.json ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(22524.)",
7
+ "exp_avg": "tensor([[-2.0140e-05, 4.3557e-05, -2.8514e-05, ..., 9.7695e-05,\n 7.0913e-05, 1.7317e-05],\n [-2.7719e-05, -2.0841e-05, 1.1140e-04, ..., 1.2438e-05,\n -2.4036e-06, 5.9651e-06],\n [-4.8976e-05, 2.0057e-05, 4.7017e-05, ..., 6.2136e-05,\n -5.8781e-07, 6.0841e-05],\n ...,\n [-1.8867e-05, 1.8745e-05, 1.9236e-05, ..., -2.5041e-05,\n -1.5164e-05, -7.4327e-06],\n [-1.8311e-05, 4.6939e-05, 4.5506e-05, ..., 6.9831e-05,\n -8.2595e-06, 4.2299e-05],\n [-7.6802e-07, -8.9338e-05, -5.4552e-06, ..., -2.8225e-06,\n 3.9479e-05, 6.0676e-05]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.9250e-08, 4.4959e-08, 2.7241e-08, ..., 3.0378e-08, 2.3461e-08,\n 1.1804e-08],\n [6.0786e-08, 9.4591e-08, 4.4552e-08, ..., 3.5624e-08, 4.9340e-08,\n 1.9230e-08],\n [1.2164e-07, 2.2247e-07, 8.9121e-08, ..., 7.1067e-08, 4.1405e-08,\n 6.5837e-08],\n ...,\n [2.8363e-08, 1.3389e-07, 2.5333e-08, ..., 2.3687e-08, 1.8828e-08,\n 1.5936e-08],\n [3.1565e-08, 4.0499e-07, 3.9968e-08, ..., 8.5467e-08, 2.0214e-08,\n 2.6741e-08],\n [4.7097e-08, 6.7904e-08, 2.2807e-08, ..., 7.8811e-08, 2.2292e-08,\n 2.2321e-08]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(22524.)",
12
+ "exp_avg": "tensor([ 2.1376e-03, -2.2651e-03, 2.8685e-03, -2.8431e-03, -1.6321e-03,\n 7.8815e-04, 1.6387e-03, 1.2387e-05, 3.6277e-04, 4.5087e-04,\n 4.3780e-05, 7.5553e-04, 1.0313e-03, 5.9576e-04, 3.0034e-04,\n 1.7034e-03, -9.6667e-04, 2.2824e-03, -3.4367e-04, 1.4513e-03,\n -8.9234e-04, 5.6150e-04, 2.6569e-03, -1.5060e-04, -5.1357e-04,\n 1.6759e-03, 2.0599e-03, -2.0380e-03, 1.0908e-05, 1.9212e-04,\n -8.4541e-04, 1.0466e-03, -1.0820e-03, 1.3699e-04, 4.0851e-04,\n -2.4621e-04, 3.0267e-04, -3.9513e-03, 2.7451e-04, -1.6239e-03,\n 6.7864e-04, 2.0453e-03, 3.5591e-04, 7.5208e-04, -1.9814e-03,\n 8.6160e-04, 1.3622e-03, 1.7238e-03, -4.2296e-04, 2.9388e-04,\n -1.1369e-04, -8.3572e-05, 1.7607e-03, 1.6794e-03, -7.4012e-04,\n 1.6398e-03, 3.6522e-04, -8.5275e-04, -8.4582e-04, -1.0823e-03,\n 1.0837e-03, -1.3106e-03, -6.7725e-04, 1.3276e-03, -6.4744e-04,\n 3.9003e-03, 1.4390e-03, -1.3133e-03, 2.4365e-03, -3.6793e-05,\n -4.5788e-04, -9.4165e-04, -1.9424e-03, -9.9246e-04, 6.0387e-04,\n -2.1663e-03, -3.5401e-03, 4.0461e-04, 8.2538e-04, -2.1473e-03,\n 2.5253e-05, -5.0508e-04, 1.5659e-04, 1.3025e-03, -1.3195e-03,\n 1.4294e-03, -1.3516e-03, -2.4169e-04, -5.5611e-04, -7.1308e-04,\n 6.5990e-05, 1.1077e-04, 2.7549e-04, -7.0821e-03, 6.0666e-04,\n -7.9619e-04, -7.7560e-04, 1.2178e-03, 9.9380e-04, -1.8328e-03,\n 1.5025e-03, 2.5420e-03, 1.7241e-03, -6.7005e-04, 8.4671e-04,\n 2.3014e-03, 1.2412e-03, 1.4852e-03, 4.4687e-03, -1.0093e-03,\n 2.2956e-04, 1.1287e-03, -5.0017e-05, -1.6696e-03, 1.2922e-03,\n 2.7189e-03, -2.0813e-04, 5.1662e-04, -2.5945e-04, -1.7800e-04,\n -9.4261e-04, -5.8652e-05, 5.3813e-04, -5.2580e-05, -3.6293e-03,\n -2.0566e-03, -3.9847e-03, 2.9403e-03, 4.8696e-04, -2.4422e-03,\n 2.1161e-03, 5.3971e-04, -2.6274e-03, -4.8542e-04, -2.4314e-03,\n 4.6077e-04, -8.4083e-04, 2.0931e-03, 1.7771e-03, 3.2861e-03,\n -1.5090e-05, 1.3559e-03, 3.7177e-04, -2.5513e-03, -6.2061e-04,\n -1.1101e-03, 2.3231e-03, 1.5473e-03, -1.6749e-03, 6.7770e-04,\n -8.0645e-04, -4.3188e-05, -1.5304e-04, -2.6369e-03, 6.8345e-04,\n -5.8489e-04, -1.4593e-03, -9.4170e-04, -8.3568e-04, 8.2584e-04,\n 1.7839e-03, -1.3194e-04, 4.6670e-04, 3.4957e-03, -4.0857e-04,\n -2.2286e-04, -4.7952e-05, -3.4911e-03, 8.6175e-04, -6.8295e-04,\n -9.6191e-04, 1.2061e-03, 6.0783e-04, 5.3741e-04, -2.9470e-04,\n 7.7392e-04, -3.2803e-04, 1.5302e-03, 3.3940e-04, -9.3148e-04,\n -2.4179e-03, -3.5141e-03, -2.1488e-03, 2.7552e-04, 1.0881e-03,\n -5.6396e-03, -1.6671e-03, -1.0502e-03, 1.3829e-03, -2.5085e-03,\n -1.9296e-04, -1.7224e-03, -8.6649e-04, 9.7650e-04, 6.1659e-04,\n -1.3211e-03, 6.4591e-04, -5.7492e-04, -1.8364e-04, 1.7145e-03,\n 7.7657e-04, 1.7483e-03, -1.0740e-03, 9.1059e-04, 3.4227e-05,\n 3.3511e-03, -1.6909e-03, -1.6561e-03, 3.2764e-04, -1.0173e-03,\n 5.4345e-04, -1.9102e-04, 7.5116e-04, -1.4175e-03, -8.7607e-05,\n 3.9110e-03, 2.9755e-04, 7.1132e-04, -6.1028e-05, 7.4066e-04,\n 5.0740e-04, -5.5343e-04, 7.6526e-04, 1.2970e-03, -1.8354e-04,\n -1.9359e-03, 1.4730e-03, 4.2738e-04, 7.8788e-04, 1.7649e-03,\n -6.3476e-03, -2.2953e-04, 3.1683e-05, -2.1200e-03, 1.0032e-03,\n -8.4210e-04, 4.7379e-04, -5.2246e-05, 4.4540e-03, -1.0210e-03,\n 1.1336e-03, 1.0389e-03, 9.2207e-05, 2.1199e-03, -1.5610e-03,\n -1.2441e-04, 3.2760e-03, 1.0805e-03, 1.2863e-03, -1.2957e-03,\n 2.9566e-03, 1.6668e-03, -1.6922e-03, -1.7609e-03, -2.1026e-03,\n -2.8165e-04, 2.4395e-04, -3.1778e-03, -6.4804e-04, -5.2624e-04,\n -7.1395e-04, 5.4393e-04, 4.5366e-04, -9.2640e-05, -7.6603e-04,\n -2.0820e-03, -3.3794e-03, 6.7738e-04, -9.3760e-04, 2.1149e-03,\n -7.4598e-04, 2.5261e-03, 2.8991e-04, 3.0155e-04, 9.5037e-04,\n -1.4645e-03, 4.1133e-03, 2.1414e-03, 2.3044e-05, -7.8123e-05,\n 1.5990e-03, 2.2384e-03, -6.0330e-04, -4.4955e-04, -2.6639e-03,\n 1.9834e-04, -3.1950e-03, -6.0653e-04, -1.5799e-04, 1.9187e-04,\n -4.7210e-04, -7.8325e-04, 9.3262e-04, -3.7603e-03, 5.4993e-04,\n -1.7438e-04, -9.7520e-04, 1.0215e-03, -1.5495e-03, -3.0029e-03,\n -8.5315e-04, -3.4874e-04, 1.1944e-03, 1.0109e-03, -1.1722e-04,\n -7.4466e-04, -2.4575e-04, -2.7689e-03, 1.7356e-04, -3.7343e-04,\n 4.8652e-04, -1.5498e-03, 2.0602e-03, 4.6359e-06, -2.5741e-04,\n 2.0586e-03, 3.1772e-04, 1.0982e-03, 2.3244e-04, 1.2549e-03,\n 1.9806e-04, 4.0777e-04, -7.6837e-04, 1.8571e-03, -2.8390e-03,\n -3.5671e-03, 1.6501e-03, -1.2618e-04, 1.9727e-04, 2.7243e-03,\n -6.3145e-04, -3.3306e-03, -6.9748e-04, 9.5845e-04, 7.1380e-04,\n 3.2548e-03, 6.2928e-05, 1.2205e-03, -1.0771e-03, -4.9318e-04,\n 1.3775e-04, 1.7804e-03, 1.1291e-03, -6.1719e-04, 1.1511e-03,\n -2.4976e-03, 1.9294e-03, -3.9694e-04, 1.7808e-04, -1.6467e-04,\n -6.6760e-04, -3.4766e-05, -4.2042e-04, 7.8547e-04, 2.9366e-04,\n -1.9425e-03, 1.2850e-03, -2.1854e-03, -2.0835e-03, -3.3827e-04,\n 1.4510e-03, -3.3289e-04, 3.0176e-03, -1.3277e-03, -1.6884e-03,\n 2.0908e-03, 1.4249e-04, -4.5804e-04, -3.1227e-03, -8.9837e-04,\n 6.6252e-04, -6.0143e-04, 1.9291e-03, -5.7859e-05, 3.5034e-04,\n 8.6443e-04, -6.0061e-04, 1.2204e-03, -2.2328e-03, 1.2206e-03,\n -1.7621e-03, 1.0864e-03, 7.1073e-04, 1.7892e-03, 1.0850e-03,\n 6.1533e-04, -1.4896e-03, 9.1295e-07, -1.9165e-03, 7.4694e-04,\n -8.5028e-04, 1.6359e-03, 1.4988e-03, -8.1795e-04, 2.5070e-03,\n 6.7605e-04, -3.1058e-04, 1.8595e-03, -1.3826e-03, -5.4404e-04,\n 8.3408e-04, 3.5417e-04, 4.7209e-04, 1.4689e-03, -1.6089e-03,\n 6.4831e-04, -3.4443e-05, 1.5539e-03, 1.8180e-03, 3.2155e-05,\n -8.8386e-04, 6.2744e-04, -7.6172e-03, -1.5185e-03, 1.9569e-03,\n -2.6428e-03, -6.4335e-04, -3.3117e-04, 2.9207e-04, 2.0101e-03,\n 7.6723e-04, -1.2366e-03, 3.3578e-04, -8.6700e-04, 9.3778e-04,\n -2.2675e-03, 2.5049e-03, 1.2642e-03, -2.7096e-04, -2.4045e-04,\n -2.4126e-03, 1.2662e-03, 9.4277e-05, -3.7308e-03, -7.7613e-04,\n -1.1789e-03, 4.1334e-04, 1.0558e-03, -1.7680e-03, -3.6487e-03,\n 8.9844e-05, 1.9536e-03, 2.4407e-03, 8.0992e-04, -2.4445e-03,\n -6.4517e-04, 6.1561e-04, 2.0714e-04, -8.4138e-06, 1.2040e-03,\n 1.2326e-03, 1.2718e-04, -1.2015e-03, 1.0049e-03, 1.6421e-04,\n -3.1568e-04, -1.2035e-03, -5.0512e-04, -4.3212e-04, -4.4936e-04,\n 7.7285e-04, 2.0988e-03, 3.3464e-04, 1.0907e-03, 2.0626e-03,\n -3.6913e-04, 7.1518e-05, -5.5855e-03, 8.1888e-05, 3.7374e-03,\n 1.2333e-03, 8.3523e-04, 1.9212e-03, -1.2502e-03, 1.1413e-03,\n -3.1367e-04, -3.2162e-03, 1.9608e-03, -1.4394e-03, -2.8573e-03,\n 2.1110e-03, -9.1269e-04, 8.8021e-04, 2.6475e-04, 1.5587e-03,\n -8.4683e-04, -1.2547e-03, -1.7869e-03, -2.0549e-03, -5.6506e-04,\n 2.4620e-03, -1.3491e-03, 7.1741e-03, -2.3090e-03, -1.0610e-03,\n -4.5026e-04, -9.3126e-04, 1.1022e-03, 2.2261e-03, -6.4497e-04,\n 1.0501e-03, -1.4118e-04, 4.6880e-04, 3.5457e-04, 5.2603e-04,\n -8.7201e-04, 1.2082e-04, 3.7515e-05, 5.0507e-04, -7.2588e-04,\n 1.4690e-03, 3.4182e-05], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([1.9541e-05, 3.4729e-05, 5.5257e-05, 4.3988e-05, 3.1036e-05, 6.8888e-05,\n 6.7775e-05, 1.9730e-05, 3.4958e-05, 4.1988e-05, 5.1169e-05, 3.5103e-05,\n 2.5918e-05, 3.5373e-05, 9.0967e-05, 2.3552e-05, 4.5542e-05, 9.1313e-05,\n 1.4246e-05, 3.5422e-05, 2.8645e-05, 4.3656e-05, 6.3245e-05, 3.4413e-05,\n 2.0399e-05, 2.4020e-05, 6.8984e-05, 5.5448e-05, 4.2887e-05, 3.2594e-05,\n 3.5270e-05, 3.6584e-05, 1.7542e-05, 1.0673e-04, 5.1421e-05, 3.2465e-05,\n 2.5253e-05, 5.2642e-05, 5.8367e-05, 4.9715e-05, 3.3048e-05, 3.7049e-05,\n 3.8360e-05, 2.8605e-05, 4.1696e-05, 4.2523e-05, 2.7236e-05, 2.1045e-05,\n 2.2786e-05, 2.5806e-05, 2.7622e-05, 3.2325e-05, 9.0367e-05, 3.6415e-05,\n 6.1584e-05, 3.8444e-05, 3.4661e-05, 2.5982e-05, 3.6333e-05, 3.8929e-05,\n 2.1195e-05, 3.2106e-05, 1.9860e-05, 7.8446e-05, 7.9669e-05, 6.1922e-05,\n 3.8358e-05, 6.6074e-05, 2.7980e-05, 4.9350e-05, 3.2019e-05, 5.7897e-05,\n 4.5791e-05, 8.6452e-05, 3.4290e-05, 5.1996e-05, 6.6070e-05, 2.6022e-05,\n 3.9313e-05, 4.9484e-05, 3.8536e-05, 3.4691e-05, 2.8852e-05, 5.0987e-05,\n 4.6117e-05, 3.8387e-05, 1.1044e-04, 2.1757e-05, 3.5608e-05, 5.4719e-05,\n 3.9780e-05, 2.5492e-05, 7.5805e-05, 1.2771e-04, 4.4895e-05, 5.1289e-05,\n 4.7034e-05, 6.5502e-05, 4.3393e-05, 3.4421e-05, 3.8484e-05, 3.9415e-05,\n 2.2973e-05, 3.1796e-05, 2.6869e-05, 2.8061e-05, 3.5652e-05, 2.2028e-05,\n 4.6548e-05, 1.7768e-05, 8.5628e-05, 1.9485e-05, 5.9349e-05, 3.4481e-05,\n 3.4490e-05, 1.9928e-05, 3.6550e-05, 7.4507e-05, 3.0950e-05, 4.2206e-05,\n 4.2379e-05, 4.7049e-05, 6.2958e-05, 5.3074e-05, 3.7576e-05, 5.2955e-05,\n 4.0559e-05, 6.5156e-05, 5.6412e-05, 4.0633e-05, 3.9991e-05, 9.8220e-05,\n 5.3666e-05, 4.3170e-05, 7.3131e-05, 2.6742e-05, 2.6171e-05, 2.1095e-05,\n 6.1115e-05, 4.6175e-05, 2.8739e-05, 5.8605e-05, 4.0659e-05, 1.0229e-04,\n 3.6105e-05, 5.2633e-05, 5.0634e-05, 3.3265e-05, 8.8678e-05, 4.0951e-05,\n 3.7226e-05, 5.0360e-05, 4.3963e-05, 4.7758e-05, 3.8660e-05, 5.7645e-05,\n 2.2539e-05, 2.0044e-05, 4.6704e-05, 2.6216e-05, 4.7184e-05, 5.2090e-05,\n 5.3403e-05, 3.6252e-05, 5.6915e-05, 3.0101e-05, 4.7846e-05, 3.2458e-05,\n 6.5640e-05, 8.5715e-05, 2.7241e-05, 4.6013e-05, 2.8544e-05, 3.9170e-05,\n 2.7471e-05, 1.7925e-05, 3.3982e-05, 3.9365e-05, 3.6190e-05, 4.0189e-05,\n 8.9500e-05, 5.6088e-05, 1.2077e-04, 3.5949e-05, 1.6867e-05, 5.8158e-05,\n 5.2029e-05, 5.6640e-05, 5.7629e-05, 4.6950e-05, 3.6787e-06, 3.6199e-05,\n 1.4829e-05, 2.0508e-05, 2.7178e-05, 3.6857e-05, 2.6555e-05, 3.2512e-05,\n 3.5005e-05, 2.8432e-05, 4.3641e-05, 3.4148e-05, 3.5630e-05, 2.0503e-05,\n 2.8933e-05, 7.8778e-05, 3.5278e-05, 2.8269e-05, 3.0306e-05, 3.1817e-05,\n 4.3016e-05, 3.3646e-05, 3.3264e-05, 3.8665e-05, 3.7309e-05, 4.1410e-05,\n 4.6055e-05, 3.1837e-05, 3.5630e-05, 3.1888e-05, 5.2181e-05, 6.5605e-05,\n 2.8649e-05, 5.5544e-05, 2.8657e-05, 3.1145e-05, 3.6563e-05, 3.5282e-05,\n 5.6758e-05, 7.1830e-05, 5.3441e-05, 3.0323e-05, 3.2803e-05, 2.9539e-05,\n 2.3941e-05, 4.0274e-05, 2.1575e-05, 3.5510e-05, 4.9999e-05, 4.0535e-05,\n 2.4518e-05, 3.2886e-05, 2.2093e-05, 4.3427e-05, 1.2957e-05, 3.1469e-05,\n 7.8420e-05, 3.6436e-05, 3.0968e-05, 4.0762e-05, 6.0518e-05, 5.6563e-05,\n 2.3062e-05, 3.7819e-05, 3.4960e-05, 3.0505e-05, 3.1945e-05, 3.6083e-05,\n 3.6295e-05, 7.6118e-05, 3.6961e-05, 2.9085e-05, 4.8510e-05, 2.0098e-05,\n 4.5080e-05, 4.6692e-05, 5.7857e-05, 2.6658e-05, 1.9928e-05, 1.9316e-05,\n 2.9041e-05, 3.5335e-05, 2.8904e-05, 5.6362e-05, 2.3719e-05, 4.9468e-05,\n 8.4407e-05, 2.7179e-05, 4.8943e-05, 2.5990e-05, 3.7055e-05, 4.3703e-05,\n 3.2353e-05, 3.7056e-05, 3.8611e-05, 4.0675e-05, 6.4762e-05, 4.1186e-05,\n 2.9849e-06, 7.6074e-05, 4.7514e-05, 2.3642e-05, 4.7942e-05, 5.3840e-05,\n 4.1286e-05, 6.1806e-06, 3.1142e-05, 6.1073e-05, 4.3276e-05, 9.9647e-05,\n 5.9839e-05, 5.5611e-05, 4.5273e-05, 3.1101e-05, 6.0621e-05, 4.7165e-05,\n 2.5919e-05, 2.9835e-05, 5.0436e-05, 2.8824e-05, 5.2123e-05, 5.4728e-05,\n 7.5560e-05, 2.6383e-05, 3.4320e-05, 4.2807e-05, 3.4213e-05, 4.1701e-05,\n 1.5997e-05, 3.9751e-05, 4.9182e-05, 4.7039e-05, 3.1228e-05, 5.3203e-05,\n 6.0506e-05, 7.7002e-05, 3.0813e-05, 4.0144e-05, 1.8938e-05, 4.1508e-05,\n 3.1972e-05, 5.7163e-05, 3.6518e-05, 1.9455e-05, 8.1556e-05, 5.8516e-05,\n 1.1427e-05, 3.1902e-05, 3.1476e-05, 7.4706e-05, 2.2839e-05, 4.9465e-05,\n 4.8923e-05, 3.0160e-05, 5.1225e-05, 3.6927e-05, 3.2100e-05, 4.0008e-05,\n 4.6443e-05, 6.7024e-05, 2.3091e-05, 7.0006e-05, 2.3336e-05, 5.3587e-05,\n 8.4416e-05, 4.4995e-05, 3.7909e-05, 4.2821e-05, 2.6264e-05, 3.6843e-05,\n 7.7002e-05, 3.3121e-05, 2.8422e-05, 1.9061e-05, 3.2750e-05, 3.4235e-05,\n 1.8179e-05, 2.9522e-05, 7.1547e-05, 2.4539e-05, 3.9075e-05, 4.4444e-06,\n 2.8680e-05, 3.4987e-05, 4.0383e-05, 3.7881e-05, 7.9966e-05, 5.2856e-05,\n 4.5591e-05, 4.1405e-05, 5.1298e-05, 5.7517e-05, 3.1981e-05, 2.7876e-05,\n 3.5030e-05, 5.4217e-05, 3.1311e-05, 2.5415e-05, 3.5631e-05, 2.2913e-05,\n 2.0009e-05, 2.6482e-05, 3.1146e-05, 3.3170e-05, 3.7748e-05, 5.4476e-05,\n 3.7624e-05, 4.0688e-05, 2.8184e-05, 6.0321e-05, 3.0419e-05, 3.3632e-05,\n 3.2989e-05, 2.8652e-05, 3.4050e-05, 1.5849e-05, 1.9483e-05, 4.9454e-05,\n 4.9168e-05, 5.9707e-05, 3.2353e-05, 4.8764e-05, 5.2941e-05, 3.2344e-05,\n 4.2624e-05, 4.2666e-05, 5.6268e-05, 4.2297e-05, 4.3316e-05, 3.6673e-05,\n 3.6167e-05, 3.7546e-05, 3.4882e-05, 8.3504e-05, 2.8580e-05, 5.3198e-05,\n 2.5649e-05, 3.7355e-05, 3.9800e-05, 2.3870e-05, 4.5961e-05, 6.6630e-05,\n 4.3523e-05, 4.4442e-05, 4.3631e-05, 4.5667e-05, 4.3417e-05, 5.3330e-05,\n 4.9138e-05, 4.6626e-05, 1.9000e-05, 7.8950e-05, 4.0815e-05, 4.9983e-05,\n 2.5966e-05, 4.2081e-05, 4.1385e-05, 3.2074e-05, 5.5050e-05, 4.9641e-05,\n 4.9982e-05, 5.5452e-05, 2.7734e-05, 5.2531e-05, 1.3991e-04, 2.9784e-05,\n 4.9395e-05, 3.5933e-05, 4.0647e-05, 4.6430e-05, 5.3163e-05, 4.1073e-05,\n 4.0329e-05, 3.9654e-05, 2.1128e-05, 2.4416e-05, 3.5829e-05, 6.3281e-05,\n 2.7386e-05, 6.0578e-05, 6.5993e-05, 1.8841e-05, 2.7154e-05, 3.5613e-05,\n 5.2657e-05, 2.5073e-05, 4.0000e-05, 5.5013e-05, 2.9792e-05, 5.5147e-05,\n 5.9438e-05, 2.9099e-05, 2.3663e-05, 3.3062e-05, 6.7236e-05, 2.8031e-05,\n 2.0452e-05, 2.8996e-05, 5.1841e-05, 3.5131e-05, 3.7260e-05, 4.6474e-05,\n 1.2573e-04, 3.8582e-05, 3.8511e-05, 1.9904e-05, 6.0424e-05, 3.4848e-05,\n 3.7424e-05, 4.4362e-05, 3.6553e-05, 3.6239e-05, 4.4037e-05, 2.4689e-05,\n 5.0611e-05, 3.9555e-05, 5.0953e-05, 2.8931e-05, 5.3728e-05, 2.0796e-05,\n 3.9141e-05, 3.9550e-05], device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(22524.)",
17
+ "exp_avg": "tensor([ 6.4502e-03, -6.1902e-03, 6.0827e-03, -5.3119e-03, -4.0026e-03,\n 2.4208e-03, 2.8125e-03, 9.0161e-04, 1.4245e-03, 6.2019e-04,\n -6.4395e-04, 3.2703e-03, 2.9564e-03, 3.9612e-04, 9.3844e-05,\n 5.6890e-03, -1.7307e-03, 5.2097e-03, -6.7058e-04, 2.7854e-03,\n -2.7223e-03, 5.0484e-04, 5.0567e-03, 9.8819e-04, -2.0932e-03,\n 4.7078e-03, 5.1645e-03, -3.4414e-03, 6.7059e-04, 9.8011e-05,\n -1.6283e-03, 2.7625e-03, -2.7324e-03, 1.5256e-03, 1.6427e-04,\n -7.0528e-04, 1.3750e-03, -7.5897e-03, 1.1326e-03, -1.9955e-03,\n 2.6894e-03, 1.0549e-02, 6.7742e-04, 9.7563e-04, -5.4365e-03,\n 3.2400e-03, 5.0459e-03, 4.0842e-03, -1.9006e-03, 6.6028e-04,\n -6.6681e-04, 8.9623e-04, 2.7312e-03, 4.5182e-03, -1.6385e-03,\n 4.0565e-03, 1.3265e-03, -1.0571e-03, -2.1604e-03, -2.0820e-03,\n 2.9211e-03, -2.2994e-03, -2.4959e-03, 1.9406e-03, -1.9707e-03,\n 7.6474e-03, 3.4542e-03, -4.0609e-03, 5.6641e-03, 2.4849e-04,\n -1.2064e-03, -1.4677e-03, -5.5939e-03, -4.5238e-03, 1.6914e-03,\n -2.8720e-03, -9.1055e-03, -6.9519e-04, 2.2519e-03, -4.0584e-03,\n -2.7155e-04, -1.2181e-03, 1.2962e-03, 3.7188e-03, -2.2348e-03,\n 5.5363e-03, -2.9249e-03, -2.2731e-04, -2.5890e-03, -1.2498e-03,\n -3.3028e-04, 2.0814e-03, 1.2396e-03, -1.0061e-02, 8.9861e-04,\n -1.6956e-03, -1.2010e-03, 1.0900e-03, 2.1476e-03, -3.2365e-03,\n 4.4733e-03, 4.5941e-03, 3.2397e-03, -1.4102e-03, 2.1822e-03,\n 5.6162e-03, 3.3767e-03, 3.9598e-03, 7.8239e-03, -2.5686e-03,\n -2.1883e-04, 1.6983e-03, 2.7236e-04, -4.1817e-03, 2.9788e-03,\n 5.5591e-03, -1.1120e-03, 8.4918e-04, 1.6485e-04, -1.7108e-03,\n -2.0895e-03, -1.6440e-04, 7.3289e-04, -2.8576e-04, -6.7985e-03,\n -2.9081e-03, -6.2938e-03, 5.7370e-03, 2.6094e-03, -5.0102e-03,\n 3.0447e-03, 1.8602e-03, -6.0653e-03, -4.6246e-04, -3.5579e-03,\n 2.8258e-03, -1.5160e-03, 4.0679e-03, 3.0832e-03, 7.8727e-03,\n 9.7139e-04, 1.2137e-03, 1.4522e-04, -6.3759e-03, -7.1618e-04,\n -2.3071e-03, 4.9626e-03, 3.0520e-03, -2.7422e-03, 4.2411e-03,\n -1.2810e-03, 3.4790e-05, -1.9578e-04, -4.2784e-03, 1.6720e-03,\n -1.7613e-03, -4.9809e-03, -1.1846e-03, -1.2927e-03, 2.5331e-03,\n 3.0488e-03, -6.6385e-05, 2.7756e-03, 6.5318e-03, 1.7456e-05,\n -9.8614e-04, -3.2979e-04, -4.5262e-03, 9.7844e-04, -2.3680e-03,\n -3.8028e-03, 1.7447e-03, 4.1887e-04, 2.6506e-03, -1.6061e-03,\n 2.0766e-03, -7.6637e-04, 1.3036e-03, -2.5122e-04, -2.2136e-03,\n -3.8122e-03, -1.0333e-02, -4.8829e-03, 8.0991e-04, 3.1130e-03,\n -1.0985e-02, -3.6740e-03, -3.3549e-03, 2.2539e-03, -2.8345e-03,\n -5.6052e-45, -2.5641e-03, -4.0843e-03, 2.4049e-03, 1.6645e-03,\n -2.6491e-03, 1.1406e-03, -8.7265e-04, 2.7295e-05, 4.1519e-03,\n 9.4354e-04, 3.3471e-03, -4.1897e-03, 2.3061e-03, -6.1075e-04,\n 5.5672e-03, -4.6432e-03, -3.0692e-03, 6.9697e-04, -2.0546e-03,\n 3.4360e-04, 2.1235e-04, 1.1218e-03, -3.0486e-03, -5.2239e-05,\n 9.3140e-03, 8.3694e-04, 1.5182e-03, -8.3907e-04, 1.6255e-03,\n 1.5023e-03, 5.4642e-04, 7.3924e-04, 1.7927e-03, -3.6443e-04,\n -3.1563e-03, 2.9150e-03, 1.7218e-03, 2.5564e-03, 3.7025e-03,\n -1.6032e-02, 1.3406e-03, -9.2100e-04, -7.1484e-03, 2.3701e-03,\n -1.2711e-03, 1.9308e-03, -3.3226e-04, 9.5202e-03, -1.4469e-03,\n 2.3833e-03, 3.2629e-03, 1.4282e-03, 6.1254e-03, -5.2946e-03,\n 4.9983e-04, 5.9679e-03, 1.8216e-03, 2.6784e-03, -1.6081e-03,\n 5.7274e-03, 3.8011e-03, -4.2472e-03, -2.4480e-03, -2.9235e-03,\n -4.2837e-04, 3.2905e-04, -9.3679e-03, -5.7677e-04, -4.2738e-04,\n -3.5476e-03, 2.7161e-04, 2.2610e-03, -5.2143e-04, -3.0517e-03,\n -5.8230e-03, -1.1112e-02, 1.7774e-03, -1.6907e-03, 4.2091e-03,\n -1.1394e-03, 5.7296e-03, 1.9741e-04, -7.5018e-04, 2.6691e-03,\n -2.5849e-03, 8.1455e-03, 3.5560e-03, 3.4325e-04, 4.5690e-05,\n 2.9162e-03, 3.9910e-03, -2.8289e-03, -4.1727e-04, -7.0623e-03,\n -2.0688e-04, -6.0949e-03, -9.3245e-04, 5.6052e-45, 1.7637e-03,\n -1.4445e-03, -4.6074e-04, 2.6359e-03, -7.3095e-03, 2.5086e-03,\n 5.6052e-45, -4.0097e-03, 2.8763e-03, -3.5094e-03, -6.2326e-03,\n -9.4967e-04, -7.3594e-05, 2.1766e-03, 2.3049e-03, -8.1167e-04,\n -1.3464e-03, -8.4114e-05, -8.6349e-03, -1.5466e-03, -2.4895e-03,\n 1.3798e-03, -4.8893e-03, 4.0331e-03, -4.7020e-04, -1.3397e-03,\n 5.3947e-03, 8.7893e-04, 2.6922e-03, 5.9542e-04, 2.5239e-03,\n -9.3478e-04, 1.5200e-03, -1.3454e-03, 3.7670e-03, -7.0680e-03,\n -8.1598e-03, 3.5471e-03, 2.0949e-04, 7.5159e-04, 6.9493e-03,\n -1.6200e-03, -5.0073e-03, -1.3403e-03, 2.3509e-03, 1.2467e-03,\n 7.3145e-03, 7.3629e-04, 2.3755e-03, -3.3865e-03, -1.0648e-03,\n 1.7827e-04, 3.1723e-03, 3.1805e-03, -2.8532e-03, 3.3864e-03,\n -5.8641e-03, 3.4273e-03, -8.2033e-04, 3.9449e-04, -6.7182e-04,\n -1.2978e-03, -1.9287e-04, -8.1248e-04, 8.7981e-04, 2.8805e-03,\n -4.5118e-03, 3.0572e-03, -5.8271e-03, -5.6698e-03, -2.7079e-04,\n 4.4279e-03, -1.0923e-04, 8.0071e-03, -5.8774e-03, -3.6611e-03,\n 2.9848e-03, -3.3420e-04, -1.1462e-03, -6.5962e-03, -4.8037e-03,\n 5.7127e-04, 5.6052e-45, 5.9833e-03, -3.2073e-04, 1.3190e-03,\n 2.8909e-03, -2.0196e-03, 2.2378e-03, -4.2160e-03, 2.4690e-03,\n -2.9725e-03, 2.6444e-03, 1.7268e-03, 4.1439e-03, 3.0826e-03,\n 1.9302e-03, -5.2008e-03, 3.7464e-04, -5.3344e-03, 2.2662e-03,\n -2.7704e-03, 3.7590e-03, 2.3633e-03, -7.4918e-04, 6.3568e-03,\n 4.1282e-03, -4.9612e-04, 5.0661e-03, -3.2772e-03, -1.9494e-03,\n 2.2323e-03, 1.0148e-03, 8.0640e-04, 4.2568e-03, -4.9046e-03,\n 9.1283e-04, 2.7897e-05, 3.9680e-03, 2.7306e-03, -2.5979e-04,\n -1.6770e-03, 6.0748e-04, -1.5211e-02, -6.9864e-03, 3.4369e-03,\n -6.4410e-03, -4.8431e-04, 1.7465e-03, 2.5015e-03, 3.5349e-03,\n 1.9295e-03, -2.3066e-03, 1.8448e-03, -9.2538e-04, 2.0754e-03,\n -4.3194e-03, 5.2902e-03, 2.8777e-03, 2.1383e-04, -2.7332e-04,\n -5.9147e-03, 2.3482e-03, -2.2081e-04, -8.4431e-03, -1.7587e-03,\n -4.7531e-03, 2.7343e-03, 2.4534e-03, -4.4928e-03, -7.7049e-03,\n 8.5835e-05, 3.9653e-03, 4.5489e-03, 4.1125e-04, -6.8719e-03,\n -2.3227e-03, 1.7342e-03, -5.6072e-04, 1.3139e-03, 2.4618e-03,\n 2.3485e-03, 2.2177e-04, -3.4604e-03, 2.4399e-03, 2.3809e-05,\n 3.7148e-04, -4.2408e-03, -8.2553e-04, -7.5044e-04, -1.5947e-04,\n 5.3916e-04, 5.5583e-03, 2.1913e-03, 2.7486e-03, 7.1440e-03,\n -1.2276e-03, -5.0207e-05, -1.1741e-02, -6.6671e-04, 6.7450e-03,\n 2.3684e-03, 2.9662e-03, 3.4334e-03, -1.0819e-03, 1.7576e-03,\n 3.0174e-04, -6.3791e-03, 5.3256e-03, -2.8578e-03, -4.0886e-03,\n 5.0692e-03, -2.6763e-03, 4.8758e-03, 1.9269e-03, 4.6436e-03,\n -1.1421e-03, -2.6899e-03, -1.1245e-03, -4.1878e-03, -2.9200e-05,\n 8.1306e-03, -1.9177e-03, 1.6282e-02, -5.8939e-03, -1.2287e-03,\n -1.2115e-03, -2.1905e-03, 4.5711e-03, 4.6706e-03, -1.3782e-03,\n 3.3834e-03, -1.0965e-03, 2.4567e-03, 1.8122e-03, 1.5597e-03,\n -1.9973e-03, 1.0119e-03, -8.2820e-05, 7.0541e-04, -3.0698e-03,\n 3.3739e-03, 1.1459e-04], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([4.3295e-04, 1.9478e-04, 2.0048e-04, 2.3219e-04, 1.6735e-04, 2.6036e-04,\n 2.3472e-04, 1.7228e-04, 1.2952e-04, 1.4267e-04, 2.6118e-04, 3.2182e-04,\n 1.5421e-04, 2.2578e-04, 2.9185e-04, 3.1262e-04, 1.0607e-04, 4.3123e-04,\n 5.1502e-05, 1.7573e-04, 2.0573e-04, 1.9921e-04, 2.1443e-04, 1.5360e-04,\n 2.4619e-04, 1.4778e-04, 4.9889e-04, 1.5450e-04, 2.1426e-04, 1.1355e-04,\n 2.3437e-04, 2.3217e-04, 1.0069e-04, 4.8074e-04, 1.4672e-04, 1.5819e-04,\n 1.4411e-04, 1.7369e-04, 2.5051e-04, 2.3639e-04, 2.9429e-04, 1.2344e-03,\n 1.0622e-04, 1.6858e-04, 2.4100e-04, 3.8897e-04, 2.7589e-04, 1.2537e-04,\n 1.9065e-04, 1.8989e-04, 1.5361e-04, 4.5608e-04, 2.9446e-04, 2.3652e-04,\n 4.3622e-04, 1.7495e-04, 1.9466e-04, 9.8559e-05, 1.6805e-04, 1.7488e-04,\n 1.6173e-04, 1.0375e-04, 2.3469e-04, 2.0636e-04, 2.5909e-04, 2.4054e-04,\n 2.6967e-04, 3.3564e-04, 1.5866e-04, 2.4334e-04, 1.2344e-04, 1.8614e-04,\n 1.5358e-04, 4.5400e-04, 1.6228e-04, 1.3896e-04, 5.0194e-04, 2.5584e-04,\n 2.0949e-04, 2.0231e-04, 1.5857e-04, 1.4025e-04, 1.3290e-04, 3.4199e-04,\n 1.6668e-04, 3.1449e-04, 2.6941e-04, 8.0676e-05, 2.4024e-04, 4.0972e-04,\n 1.3756e-04, 3.0366e-04, 6.3034e-04, 3.7676e-04, 1.7596e-04, 2.4060e-04,\n 1.2503e-04, 2.3006e-04, 2.7536e-04, 2.6158e-04, 2.5941e-04, 1.2735e-04,\n 1.4004e-04, 2.6373e-04, 7.4831e-05, 2.0155e-04, 1.5524e-04, 1.6339e-04,\n 1.0425e-04, 1.2090e-04, 4.0944e-04, 1.3391e-04, 7.5956e-04, 1.9596e-04,\n 9.9281e-05, 6.1901e-05, 3.1779e-04, 3.0137e-04, 1.2485e-04, 2.0708e-04,\n 1.5399e-04, 1.0104e-04, 3.0949e-04, 4.1623e-04, 2.1269e-04, 1.8761e-04,\n 1.1215e-04, 2.5063e-04, 3.9108e-04, 1.9785e-04, 1.5031e-04, 3.0406e-04,\n 2.8283e-04, 1.3688e-04, 3.3825e-04, 2.6926e-04, 1.0892e-04, 8.8227e-05,\n 4.7529e-04, 2.4395e-04, 1.1022e-04, 7.8842e-05, 1.6841e-04, 6.8507e-04,\n 2.7682e-04, 2.1086e-04, 2.4899e-04, 1.0729e-04, 3.0958e-04, 5.9091e-04,\n 1.1583e-04, 1.4597e-04, 5.5268e-05, 1.2204e-04, 2.5366e-04, 2.7419e-04,\n 1.6790e-04, 1.6848e-04, 3.3306e-04, 2.0669e-04, 1.3830e-04, 2.6549e-04,\n 5.0228e-04, 1.6400e-04, 2.6207e-04, 2.1899e-04, 3.3493e-04, 9.6583e-05,\n 1.9872e-04, 4.8702e-04, 2.1230e-04, 1.9794e-04, 1.3400e-04, 1.7624e-04,\n 2.6868e-04, 1.5693e-04, 1.2838e-04, 9.0206e-05, 1.0799e-04, 3.3857e-04,\n 2.5267e-04, 2.7565e-04, 3.1598e-04, 2.2350e-04, 1.0840e-04, 2.3546e-04,\n 3.3866e-04, 2.2545e-04, 2.4937e-04, 1.3395e-04, 7.4858e-13, 1.8287e-04,\n 1.1615e-04, 9.4738e-05, 1.5323e-04, 1.5119e-04, 1.2788e-04, 9.5566e-05,\n 1.2224e-04, 1.7388e-04, 1.2443e-04, 1.8446e-04, 2.6062e-04, 2.0402e-04,\n 1.1359e-04, 2.9016e-04, 1.9080e-04, 9.7279e-05, 1.4950e-04, 1.2152e-04,\n 2.8562e-04, 2.6087e-04, 2.1403e-04, 1.6005e-04, 2.7349e-05, 2.8912e-04,\n 1.2674e-04, 1.3001e-04, 2.1723e-04, 2.4586e-04, 3.2822e-04, 2.9240e-04,\n 1.6297e-04, 1.2016e-04, 9.2569e-05, 1.1296e-04, 1.5193e-04, 9.0149e-05,\n 4.5876e-04, 1.7243e-04, 2.8683e-04, 1.8900e-04, 1.9403e-04, 2.9655e-04,\n 1.9187e-04, 1.9172e-04, 2.0414e-04, 2.3418e-04, 1.7508e-04, 2.0469e-04,\n 1.1173e-04, 1.3526e-04, 2.0176e-04, 3.5370e-04, 1.2890e-04, 2.2066e-04,\n 1.8485e-04, 9.5073e-05, 1.3388e-04, 2.2354e-04, 1.9373e-04, 2.0360e-04,\n 1.3129e-04, 9.0410e-05, 8.2800e-05, 1.6939e-04, 1.4113e-04, 2.4764e-04,\n 1.7737e-04, 3.1367e-04, 4.0507e-04, 1.2763e-04, 2.6344e-04, 1.1262e-04,\n 2.6893e-04, 3.8614e-04, 8.1248e-04, 1.4510e-04, 1.5531e-04, 1.3323e-04,\n 1.4759e-04, 1.6616e-04, 1.1624e-04, 2.5712e-04, 1.5634e-04, 2.5078e-04,\n 3.7176e-04, 7.7944e-05, 1.6218e-04, 1.7792e-04, 1.2243e-04, 1.5746e-04,\n 1.8812e-04, 1.5970e-04, 1.9235e-04, 2.2500e-04, 1.8305e-04, 1.7259e-04,\n 4.7302e-16, 3.0363e-04, 1.4952e-04, 1.2527e-04, 2.5930e-04, 2.1025e-04,\n 2.3286e-04, 4.6374e-15, 2.3102e-04, 1.9374e-04, 2.4278e-04, 4.6487e-04,\n 4.3451e-04, 3.4627e-04, 1.6208e-04, 1.1392e-04, 2.4028e-04, 2.2387e-04,\n 2.0018e-04, 2.8274e-04, 3.8510e-04, 1.6717e-04, 1.7374e-04, 3.0944e-04,\n 3.8304e-04, 5.6918e-05, 7.8285e-05, 2.4171e-04, 1.1896e-04, 2.0643e-04,\n 4.9479e-05, 1.8561e-04, 1.4425e-04, 4.1522e-04, 9.2367e-05, 2.2277e-04,\n 2.0255e-04, 4.1985e-04, 1.4084e-04, 2.0207e-04, 1.5576e-04, 2.7142e-04,\n 2.5107e-04, 1.2083e-04, 1.3944e-04, 1.1353e-04, 3.3009e-04, 2.5442e-04,\n 1.1739e-04, 1.0066e-04, 2.1739e-04, 4.2241e-04, 1.5730e-04, 1.2794e-04,\n 1.9598e-04, 3.2459e-04, 2.2773e-04, 2.6371e-04, 9.0183e-05, 1.0348e-04,\n 1.7844e-04, 2.0374e-04, 1.1875e-04, 5.3002e-04, 1.0254e-04, 1.9780e-04,\n 8.4214e-04, 2.4151e-04, 1.9810e-04, 2.1979e-04, 1.4082e-04, 9.9574e-05,\n 4.4148e-04, 7.2630e-05, 1.7341e-04, 1.9478e-04, 1.2063e-04, 1.1017e-04,\n 1.6474e-04, 1.7992e-04, 2.5961e-04, 1.6169e-04, 1.6609e-04, 9.7508e-16,\n 2.8776e-04, 1.2052e-04, 1.3981e-04, 1.3551e-04, 2.8983e-04, 1.7364e-04,\n 2.3622e-04, 1.6094e-04, 1.8741e-04, 2.7324e-04, 2.1464e-04, 2.3708e-04,\n 1.3928e-04, 3.9409e-04, 2.3333e-04, 1.4290e-04, 2.0970e-04, 1.1652e-04,\n 1.7324e-04, 8.2082e-05, 1.3984e-04, 2.9517e-04, 2.1838e-04, 3.1012e-04,\n 1.2641e-04, 3.2453e-04, 1.5557e-04, 2.5339e-04, 1.1554e-04, 1.7078e-04,\n 1.2021e-04, 1.4350e-04, 3.1226e-04, 5.8069e-05, 7.5522e-05, 2.7719e-04,\n 1.7101e-04, 2.4811e-04, 1.3680e-04, 1.5306e-04, 2.8798e-04, 6.1621e-04,\n 1.2939e-04, 1.9492e-04, 2.9167e-04, 3.1760e-04, 3.9826e-04, 1.4504e-04,\n 2.1172e-04, 1.0293e-04, 3.9997e-04, 1.7664e-04, 1.6316e-04, 1.2731e-04,\n 1.2888e-04, 1.7418e-04, 1.7354e-04, 1.1233e-04, 1.7233e-04, 3.1346e-04,\n 2.6762e-04, 2.1549e-04, 2.3483e-04, 3.4922e-04, 4.0492e-04, 3.2391e-04,\n 2.8895e-04, 2.0641e-04, 1.9773e-04, 3.4122e-04, 1.4145e-04, 2.1025e-04,\n 1.8165e-04, 1.8284e-04, 2.6709e-04, 1.9525e-04, 2.5764e-04, 1.7034e-04,\n 9.4616e-05, 2.6519e-04, 2.1836e-04, 4.3600e-04, 5.1933e-04, 1.3716e-04,\n 4.9601e-04, 1.4326e-04, 1.3552e-04, 1.8115e-04, 1.6999e-04, 1.6700e-04,\n 1.5511e-04, 2.9500e-04, 1.5308e-04, 1.0761e-04, 3.1355e-04, 2.7500e-04,\n 1.3005e-04, 2.3933e-04, 2.9594e-04, 1.3745e-04, 1.3850e-04, 1.8521e-04,\n 1.6927e-04, 1.5893e-04, 2.1585e-04, 3.7634e-04, 1.0573e-04, 1.6816e-04,\n 3.6736e-04, 2.1680e-04, 5.8053e-04, 1.6319e-04, 3.5092e-04, 8.8197e-05,\n 9.0212e-05, 1.6711e-04, 2.5825e-04, 2.5285e-04, 3.4638e-04, 9.4499e-05,\n 4.5430e-04, 1.2325e-04, 1.4985e-04, 1.2559e-04, 3.7444e-04, 2.2847e-04,\n 1.9068e-04, 1.5436e-04, 3.7584e-04, 1.6090e-04, 2.9784e-04, 2.4550e-04,\n 4.1009e-04, 1.8465e-04, 2.2552e-04, 2.1145e-04, 1.0643e-04, 7.3375e-04,\n 1.8254e-04, 1.7810e-04], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(22524.)",
22
+ "exp_avg": "tensor([ 3.2807e-03, -3.2828e-03, 3.8114e-03, -2.7683e-03, -1.9745e-03,\n 1.3865e-03, 1.5925e-03, 2.3504e-04, 6.0481e-04, 3.5871e-04,\n 2.5662e-04, 1.1974e-03, 1.2103e-03, 6.4549e-04, 4.0880e-04,\n 2.4565e-03, -9.7317e-04, 2.7078e-03, -3.4928e-04, 2.0422e-03,\n -9.6480e-04, 3.9088e-04, 2.6810e-03, 2.8120e-04, -6.5090e-04,\n 2.3607e-03, 3.0652e-03, -2.1845e-03, 9.4396e-05, 3.0889e-04,\n -7.9335e-04, 1.3423e-03, -1.4846e-03, 9.8462e-04, 4.3004e-04,\n -2.5067e-04, 5.0138e-04, -4.4420e-03, 5.6570e-04, -1.7506e-03,\n 1.5436e-03, 4.1145e-03, 2.7322e-04, 9.8661e-04, -2.6162e-03,\n 6.8119e-04, 2.1451e-03, 1.9301e-03, -6.9232e-04, 4.6129e-04,\n -7.0959e-05, -3.7780e-05, 1.8323e-03, 2.2546e-03, -7.5655e-04,\n 1.4078e-03, 4.8622e-04, -8.2963e-04, -7.4195e-04, -1.1622e-03,\n 1.3214e-03, -1.3199e-03, -8.5324e-04, 1.2469e-03, -8.7602e-04,\n 4.0504e-03, 1.7321e-03, -2.1634e-03, 2.9053e-03, -9.6672e-05,\n -5.2518e-04, -7.0794e-04, -2.3737e-03, -1.1654e-03, 8.1185e-04,\n -1.9091e-03, -4.9985e-03, 2.0848e-04, 1.2176e-03, -1.9484e-03,\n 2.7091e-04, -8.0830e-04, 6.8258e-04, 1.7561e-03, -1.4676e-03,\n 2.1279e-03, -1.5919e-03, -3.2611e-04, -8.2062e-04, -4.5056e-04,\n -6.7343e-04, 5.9218e-04, 1.2889e-03, -6.1953e-03, 9.4236e-04,\n -7.9231e-04, -5.7195e-04, 7.7222e-04, 1.1382e-03, -1.8642e-03,\n 2.0263e-03, 2.7166e-03, 1.8310e-03, -8.4957e-04, 1.0652e-03,\n 2.8437e-03, 1.4296e-03, 1.8512e-03, 4.6711e-03, -1.2803e-03,\n -4.2888e-04, 1.2179e-03, 3.2445e-04, -1.6559e-03, 1.3744e-03,\n 2.9295e-03, -5.5274e-04, 2.9930e-04, 9.9883e-05, -2.8612e-04,\n -8.3048e-04, 1.2108e-04, 1.2173e-04, -9.9231e-07, -3.5308e-03,\n -1.6462e-03, -4.6892e-03, 3.1227e-03, 5.8723e-04, -3.1571e-03,\n 2.0714e-03, 5.1727e-04, -2.8636e-03, -3.6358e-04, -2.6415e-03,\n 1.0252e-03, -7.8779e-04, 2.3086e-03, 2.9483e-03, 3.8351e-03,\n 2.1432e-04, 1.2538e-03, 1.7312e-04, -3.9308e-03, -2.0100e-04,\n -9.6755e-04, 2.3480e-03, 1.4665e-03, -1.4812e-03, 1.5809e-03,\n -8.0591e-04, 1.9204e-04, -3.5848e-04, -2.4643e-03, 8.8799e-04,\n -6.8003e-04, -1.6082e-03, -1.0282e-03, -6.0051e-04, 1.1639e-03,\n 1.8209e-03, 1.4796e-05, 1.1746e-03, 3.3968e-03, -5.8436e-04,\n -6.0593e-04, 7.4857e-05, -3.2750e-03, 9.3390e-04, -1.1875e-03,\n -1.4888e-03, 1.3927e-03, 6.8243e-04, 1.2045e-03, -6.1668e-04,\n 8.9898e-04, -1.3396e-04, 1.5205e-03, 1.1152e-04, -1.3579e-03,\n -1.9055e-03, -4.4492e-03, -2.5061e-03, 6.3086e-04, 1.2258e-03,\n -6.8381e-03, -1.9941e-03, -1.2220e-03, 1.2816e-03, -1.8556e-03,\n 5.6052e-45, -1.4757e-03, -1.5216e-03, 1.1337e-03, 8.7307e-04,\n -1.4587e-03, 6.6521e-04, -4.4493e-04, 7.8679e-05, 2.3998e-03,\n 5.0914e-04, 2.1851e-03, -1.4370e-03, 1.0183e-03, 1.2914e-04,\n 3.6883e-03, -2.0676e-03, -1.6225e-03, 5.5281e-04, -1.0042e-03,\n 8.6810e-04, 1.0603e-05, 1.0621e-03, -1.6801e-03, -6.8424e-05,\n 4.8618e-03, 4.5536e-04, 6.4988e-04, -3.6441e-05, 1.2048e-03,\n 4.8837e-04, -3.3687e-04, 5.7962e-04, 1.3134e-03, -9.8439e-05,\n -1.9854e-03, 1.4277e-03, 7.2341e-04, 1.0193e-03, 1.9558e-03,\n -8.4714e-03, 4.1463e-04, -2.8629e-04, -2.7142e-03, 1.3911e-03,\n -8.1920e-04, 7.0743e-04, 1.6117e-04, 5.2951e-03, -7.9439e-04,\n 1.2913e-03, 1.4019e-03, 5.2970e-04, 2.6958e-03, -1.8374e-03,\n 5.2435e-04, 2.9174e-03, 1.1630e-03, 1.2316e-03, -9.8256e-04,\n 3.6630e-03, 1.9817e-03, -1.9909e-03, -1.5567e-03, -1.6090e-03,\n -2.1643e-04, 2.0060e-04, -4.4170e-03, -2.5322e-04, -4.2741e-04,\n -9.2455e-04, 4.1225e-04, 1.2251e-03, 5.5747e-05, -1.1785e-03,\n -2.7429e-03, -4.9737e-03, 1.0036e-03, -8.5909e-04, 2.4576e-03,\n -5.3384e-04, 2.8890e-03, 2.3450e-04, -1.4620e-04, 1.0603e-03,\n -1.5008e-03, 4.7204e-03, 1.9562e-03, -5.7028e-05, 7.6520e-05,\n 1.5813e-03, 2.1471e-03, -9.0158e-04, -4.4590e-04, -3.6361e-03,\n 2.6553e-04, -3.0257e-03, -7.1627e-04, 5.6052e-45, 6.9602e-04,\n -4.3742e-04, -5.1348e-04, 1.4196e-03, -3.9513e-03, 1.2376e-03,\n 5.6052e-45, -1.4901e-03, 1.4082e-03, -1.3978e-03, -3.5278e-03,\n -3.3838e-04, -2.8287e-04, 1.2193e-03, 1.0811e-03, -2.6372e-04,\n -5.6863e-04, -1.1409e-04, -3.3159e-03, -4.3618e-04, -1.0104e-03,\n 1.0136e-03, -1.9152e-03, 2.4708e-03, 5.8727e-05, -6.8687e-04,\n 2.8290e-03, 4.6923e-04, 1.3784e-03, 2.8047e-04, 1.3549e-03,\n 2.6521e-04, 8.7788e-04, -7.7594e-04, 1.9300e-03, -3.4846e-03,\n -5.2638e-03, 1.8836e-03, -4.1415e-05, 3.9091e-04, 2.9518e-03,\n -8.4125e-04, -3.0479e-03, -7.3626e-04, 1.0335e-03, 5.9377e-04,\n 3.7944e-03, 1.9434e-04, 1.4332e-03, -1.2550e-03, 1.7970e-04,\n 1.5668e-04, 1.6233e-03, 1.2259e-03, -8.4847e-04, 1.3272e-03,\n -3.5063e-03, 2.1124e-03, -3.3427e-04, -1.5218e-04, -1.5100e-04,\n -5.7330e-04, 2.5431e-04, -4.2478e-04, 6.4468e-04, 1.3994e-03,\n -2.3233e-03, 1.4860e-03, -2.5366e-03, -2.6557e-03, -3.3183e-04,\n 2.4730e-03, -2.2016e-04, 3.7522e-03, -2.0476e-03, -1.4190e-03,\n 2.0849e-03, 4.7466e-05, -5.2502e-04, -3.4870e-03, -9.7351e-04,\n 5.9064e-04, 5.6052e-45, 2.3913e-03, -1.0346e-04, 3.0962e-04,\n 1.3846e-03, -1.0456e-03, 1.0647e-03, -2.7910e-03, 1.3847e-03,\n -1.8054e-03, 1.3715e-03, 7.1980e-04, 2.4934e-03, 1.5075e-03,\n 1.2763e-03, -1.8201e-03, 1.9880e-04, -2.3729e-03, 1.2059e-03,\n -1.6683e-03, 1.9650e-03, 1.6636e-03, -6.8222e-04, 3.5156e-03,\n 1.4586e-03, -4.1207e-04, 2.1562e-03, -1.8025e-03, -4.2908e-04,\n 1.0209e-03, 3.0681e-04, 5.8687e-04, 1.8707e-03, -2.1773e-03,\n 6.1041e-04, -1.0454e-05, 2.0579e-03, 1.7278e-03, 1.3400e-04,\n -7.6081e-04, 5.6013e-04, -7.2455e-03, -1.5875e-03, 1.9387e-03,\n -2.8686e-03, -5.3375e-04, 3.2453e-04, 1.6432e-03, 2.1241e-03,\n 9.7443e-04, -1.2243e-03, 8.9196e-04, -6.4085e-04, 1.0762e-03,\n -2.5058e-03, 3.1322e-03, 1.7152e-03, -1.1550e-04, -1.5844e-04,\n -2.2340e-03, 1.3255e-03, 4.8957e-05, -4.6770e-03, -4.2738e-04,\n -1.9821e-03, 1.5157e-03, 1.3381e-03, -1.9592e-03, -3.7833e-03,\n 2.4213e-04, 2.0887e-03, 2.6059e-03, 6.4190e-04, -3.0572e-03,\n -2.8503e-04, 1.2837e-03, 7.2339e-05, 5.6098e-04, 1.4106e-03,\n 7.5691e-04, 2.3951e-04, -1.4110e-03, 7.7631e-04, 3.8401e-04,\n -2.8548e-04, -1.5927e-03, -4.2102e-04, -5.2085e-04, -1.2372e-05,\n 8.4782e-04, 2.3274e-03, 8.7911e-04, 1.1605e-03, 2.5793e-03,\n -4.5679e-04, 3.2279e-04, -5.5961e-03, -4.9361e-04, 3.9539e-03,\n 1.3364e-03, 1.4875e-03, 1.8549e-03, -1.1325e-03, 1.5790e-03,\n -9.5663e-05, -3.6491e-03, 2.2528e-03, -1.5074e-03, -2.2567e-03,\n 2.1016e-03, -1.0082e-03, 1.7638e-03, 3.9093e-04, 2.4237e-03,\n -1.3639e-03, -1.0592e-03, -8.1280e-04, -1.9407e-03, 9.2902e-05,\n 3.3634e-03, -8.6240e-04, 7.5795e-03, -2.6429e-03, -9.0034e-04,\n -5.3294e-04, -9.7734e-04, 1.6505e-03, 2.4359e-03, -4.7409e-04,\n 1.7283e-03, -1.5643e-04, 7.1449e-04, 8.4121e-04, 9.6428e-04,\n -7.3220e-04, 1.7863e-05, 1.3102e-04, 7.9748e-04, -8.8584e-04,\n 1.5931e-03, -4.5979e-05], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([5.2773e-05, 6.4649e-05, 7.3033e-05, 5.6759e-05, 4.3472e-05, 8.2499e-05,\n 7.5264e-05, 3.2822e-05, 3.8571e-05, 4.0243e-05, 7.1248e-05, 5.8033e-05,\n 3.7180e-05, 6.6452e-05, 9.7120e-05, 4.7534e-05, 4.2353e-05, 1.3382e-04,\n 1.4310e-05, 5.3798e-05, 4.0764e-05, 5.3074e-05, 6.3390e-05, 4.5331e-05,\n 3.7611e-05, 3.6507e-05, 1.0428e-04, 5.9129e-05, 5.4710e-05, 3.5072e-05,\n 4.7080e-05, 5.7601e-05, 2.6828e-05, 1.1967e-04, 5.1653e-05, 3.8882e-05,\n 3.3035e-05, 6.0181e-05, 9.2097e-05, 5.6208e-05, 6.3016e-05, 1.5294e-04,\n 3.5732e-05, 3.6870e-05, 7.2829e-05, 7.7653e-05, 6.3025e-05, 2.7617e-05,\n 3.6440e-05, 3.8170e-05, 3.4795e-05, 5.7284e-05, 1.1754e-04, 5.0625e-05,\n 7.7983e-05, 4.1802e-05, 5.0085e-05, 3.0253e-05, 5.0833e-05, 4.7804e-05,\n 3.1058e-05, 2.9186e-05, 4.2403e-05, 6.8501e-05, 8.8283e-05, 8.1912e-05,\n 5.8226e-05, 7.7118e-05, 4.7085e-05, 5.3867e-05, 3.5821e-05, 5.4357e-05,\n 4.6285e-05, 1.2011e-04, 4.6968e-05, 4.2060e-05, 1.5651e-04, 3.8312e-05,\n 4.9745e-05, 5.1751e-05, 4.9587e-05, 3.4992e-05, 3.5164e-05, 6.4134e-05,\n 5.5198e-05, 5.7888e-05, 1.0216e-04, 2.6091e-05, 6.7716e-05, 7.5992e-05,\n 5.1483e-05, 4.2846e-05, 1.4506e-04, 1.3464e-04, 4.5087e-05, 4.8605e-05,\n 3.4982e-05, 6.7659e-05, 7.5929e-05, 4.9488e-05, 5.5395e-05, 4.4736e-05,\n 3.2708e-05, 6.1790e-05, 3.2453e-05, 4.7303e-05, 3.7443e-05, 3.1195e-05,\n 4.5860e-05, 3.1183e-05, 1.1988e-04, 3.6360e-05, 1.3664e-04, 4.0076e-05,\n 3.3906e-05, 2.0835e-05, 8.2391e-05, 1.1333e-04, 3.9776e-05, 6.1602e-05,\n 5.0001e-05, 3.6202e-05, 7.9359e-05, 8.5512e-05, 5.0345e-05, 5.7571e-05,\n 3.6755e-05, 7.1365e-05, 7.3146e-05, 6.4996e-05, 4.8922e-05, 1.0657e-04,\n 6.1433e-05, 3.5758e-05, 1.0030e-04, 4.6313e-05, 3.1151e-05, 2.8920e-05,\n 1.3471e-04, 6.7233e-05, 3.3019e-05, 4.0270e-05, 4.5864e-05, 1.7103e-04,\n 4.9977e-05, 6.9418e-05, 5.4111e-05, 2.8813e-05, 1.0375e-04, 1.0683e-04,\n 4.2450e-05, 4.4435e-05, 3.3568e-05, 5.4164e-05, 4.5251e-05, 6.5709e-05,\n 3.8159e-05, 3.3140e-05, 7.1798e-05, 4.1568e-05, 4.9151e-05, 5.1272e-05,\n 8.8527e-05, 4.4301e-05, 7.1782e-05, 5.9506e-05, 9.7436e-05, 3.1916e-05,\n 6.1218e-05, 1.5242e-04, 5.3360e-05, 6.8143e-05, 3.1154e-05, 4.7175e-05,\n 4.0972e-05, 3.1853e-05, 3.2537e-05, 3.7822e-05, 3.9047e-05, 8.1632e-05,\n 7.4711e-05, 6.7962e-05, 1.2658e-04, 4.0639e-05, 1.8013e-05, 7.8705e-05,\n 7.0965e-05, 4.7321e-05, 7.7426e-05, 4.7423e-05, 1.2964e-14, 4.9716e-05,\n 2.5216e-05, 2.2692e-05, 4.8337e-05, 4.5431e-05, 3.0305e-05, 2.8887e-05,\n 3.3665e-05, 4.2091e-05, 4.0545e-05, 4.7661e-05, 4.7024e-05, 3.0225e-05,\n 3.5581e-05, 1.1527e-04, 4.4845e-05, 3.2617e-05, 3.6817e-05, 4.0226e-05,\n 7.2703e-05, 5.5452e-05, 5.0566e-05, 4.4493e-05, 2.0990e-05, 7.6664e-05,\n 4.4510e-05, 2.8546e-05, 5.5571e-05, 5.4766e-05, 6.7182e-05, 8.4179e-05,\n 3.4864e-05, 5.2000e-05, 2.9591e-05, 3.7888e-05, 4.3795e-05, 3.0851e-05,\n 9.6451e-05, 8.3329e-05, 8.8462e-05, 5.0587e-05, 5.4204e-05, 4.8342e-05,\n 3.8789e-05, 6.0795e-05, 3.7684e-05, 4.6456e-05, 5.9575e-05, 5.0000e-05,\n 2.5383e-05, 3.8798e-05, 3.8575e-05, 7.6749e-05, 2.1561e-05, 5.8984e-05,\n 6.7053e-05, 4.2894e-05, 3.4870e-05, 4.9768e-05, 7.2568e-05, 7.0442e-05,\n 3.2187e-05, 3.1741e-05, 3.3345e-05, 4.4422e-05, 4.2354e-05, 6.0792e-05,\n 5.2810e-05, 9.4407e-05, 6.8396e-05, 3.9456e-05, 7.1231e-05, 2.9051e-05,\n 5.7332e-05, 6.8913e-05, 1.3430e-04, 3.3985e-05, 3.0326e-05, 3.1238e-05,\n 3.2223e-05, 4.9651e-05, 3.3984e-05, 8.0173e-05, 3.3073e-05, 6.8775e-05,\n 1.1161e-04, 3.2143e-05, 5.7423e-05, 4.9232e-05, 3.7120e-05, 4.7707e-05,\n 5.2074e-05, 4.2600e-05, 5.5271e-05, 6.7346e-05, 5.2657e-05, 6.5157e-05,\n 3.9447e-17, 1.0719e-04, 4.5060e-05, 2.9660e-05, 7.7615e-05, 5.8179e-05,\n 6.2483e-05, 2.0750e-16, 5.1555e-05, 8.3847e-05, 4.9902e-05, 1.4910e-04,\n 7.5559e-05, 6.8351e-05, 5.7462e-05, 2.9123e-05, 7.8936e-05, 5.8965e-05,\n 4.0948e-05, 4.4241e-05, 7.9677e-05, 3.8370e-05, 4.8205e-05, 8.7888e-05,\n 1.0295e-04, 2.8690e-05, 3.7098e-05, 6.3503e-05, 3.1065e-05, 5.4267e-05,\n 1.4255e-05, 4.5750e-05, 5.2359e-05, 9.0514e-05, 3.2846e-05, 7.1336e-05,\n 6.3094e-05, 1.3312e-04, 4.5241e-05, 5.1593e-05, 3.1549e-05, 5.4071e-05,\n 5.8039e-05, 5.5378e-05, 3.5044e-05, 2.1480e-05, 9.8334e-05, 7.9839e-05,\n 1.5501e-05, 3.3863e-05, 4.3794e-05, 1.2877e-04, 3.3616e-05, 4.1504e-05,\n 5.8088e-05, 4.7591e-05, 5.9747e-05, 6.8131e-05, 3.0185e-05, 3.8995e-05,\n 5.0888e-05, 5.9555e-05, 2.7331e-05, 1.2562e-04, 3.0117e-05, 5.7638e-05,\n 1.4008e-04, 5.9125e-05, 4.0760e-05, 7.4495e-05, 3.4850e-05, 3.0781e-05,\n 1.4006e-04, 2.9652e-05, 4.1068e-05, 3.2676e-05, 3.8529e-05, 4.6843e-05,\n 2.7504e-05, 4.9192e-05, 8.6774e-05, 3.7844e-05, 5.0748e-05, 4.7224e-17,\n 4.8577e-05, 2.5942e-05, 3.8754e-05, 4.0961e-05, 9.4822e-05, 4.9940e-05,\n 6.2242e-05, 4.1348e-05, 5.1311e-05, 7.2669e-05, 6.1872e-05, 6.3521e-05,\n 4.7260e-05, 1.0379e-04, 4.7130e-05, 2.8232e-05, 5.5567e-05, 3.0126e-05,\n 4.0566e-05, 2.6005e-05, 4.4272e-05, 7.4349e-05, 6.1813e-05, 6.6366e-05,\n 4.0465e-05, 7.4979e-05, 4.2285e-05, 6.4144e-05, 3.3170e-05, 4.2565e-05,\n 3.3256e-05, 3.5404e-05, 5.2433e-05, 1.4437e-05, 2.1181e-05, 6.2200e-05,\n 5.2139e-05, 6.6682e-05, 3.4995e-05, 5.8910e-05, 5.7711e-05, 8.9271e-05,\n 3.7898e-05, 4.1932e-05, 7.6567e-05, 7.3177e-05, 1.3149e-04, 3.9997e-05,\n 4.3827e-05, 3.5064e-05, 7.7276e-05, 7.0168e-05, 4.0311e-05, 5.6796e-05,\n 3.7991e-05, 5.7233e-05, 5.0820e-05, 2.8700e-05, 4.7113e-05, 9.4640e-05,\n 6.9593e-05, 6.1649e-05, 5.6064e-05, 6.7216e-05, 9.2408e-05, 8.1739e-05,\n 7.5602e-05, 6.0772e-05, 4.4088e-05, 8.8177e-05, 5.2473e-05, 6.5683e-05,\n 3.1025e-05, 4.7094e-05, 6.0533e-05, 3.8835e-05, 8.0590e-05, 5.7105e-05,\n 4.0988e-05, 7.4285e-05, 4.3336e-05, 1.0705e-04, 1.6755e-04, 4.2283e-05,\n 7.2995e-05, 2.9383e-05, 4.4612e-05, 4.9837e-05, 6.0066e-05, 3.9372e-05,\n 4.9641e-05, 5.7976e-05, 2.6203e-05, 3.0873e-05, 5.7122e-05, 6.5834e-05,\n 3.8261e-05, 7.3353e-05, 8.2491e-05, 3.2900e-05, 3.6291e-05, 4.9517e-05,\n 6.7411e-05, 3.8200e-05, 5.3546e-05, 7.9690e-05, 3.1872e-05, 5.9038e-05,\n 6.4970e-05, 4.5714e-05, 6.1277e-05, 4.2630e-05, 1.0589e-04, 3.4912e-05,\n 2.0575e-05, 4.0757e-05, 5.5198e-05, 4.0109e-05, 6.9597e-05, 3.0332e-05,\n 1.2650e-04, 3.7437e-05, 4.5907e-05, 3.3315e-05, 8.8798e-05, 4.8923e-05,\n 4.6678e-05, 4.7279e-05, 6.9069e-05, 4.5211e-05, 5.4202e-05, 4.4942e-05,\n 7.8952e-05, 4.7591e-05, 5.5766e-05, 3.7862e-05, 4.4305e-05, 5.4362e-05,\n 5.1673e-05, 4.8119e-05], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(22524.)",
27
+ "exp_avg": "tensor([[-2.5226e-06, 2.1407e-05, -3.2046e-06, ..., 3.5007e-06,\n 6.1405e-06, -1.9853e-05],\n [ 1.6754e-05, -3.1676e-06, -3.0679e-05, ..., 6.5196e-06,\n 6.9206e-06, -1.9575e-05],\n [ 1.0025e-05, 2.4222e-06, 3.2123e-06, ..., 1.3197e-06,\n -1.1925e-05, 6.3741e-06],\n ...,\n [-5.0460e-06, 9.4846e-06, 1.1757e-05, ..., 4.9519e-06,\n 2.2655e-05, 1.7917e-05],\n [-1.7195e-05, 6.4237e-06, -9.3630e-06, ..., -4.1148e-06,\n -1.1995e-05, 1.2330e-05],\n [-3.2906e-05, 1.9526e-06, 2.2616e-05, ..., 2.0173e-06,\n 3.6783e-06, -1.7183e-06]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[8.1058e-10, 2.0639e-09, 5.0102e-10, ..., 5.4216e-10, 8.5680e-10,\n 1.3325e-09],\n [1.2081e-09, 1.4564e-09, 2.4197e-09, ..., 1.0332e-09, 2.2055e-09,\n 2.1108e-09],\n [1.3115e-09, 1.8501e-09, 2.1574e-09, ..., 9.3915e-10, 2.1048e-09,\n 1.9682e-09],\n ...,\n [1.7973e-09, 3.1058e-09, 1.6144e-09, ..., 6.1038e-10, 3.6373e-09,\n 2.2591e-09],\n [1.0156e-09, 3.3110e-09, 2.4421e-09, ..., 8.6766e-10, 1.8078e-09,\n 2.0037e-09],\n [3.2131e-09, 2.7303e-09, 1.5983e-09, ..., 8.5261e-10, 1.7677e-09,\n 2.2815e-09]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(22524.)",
32
+ "exp_avg": "tensor([[-8.7352e-07, 3.6673e-06, 1.3712e-07, ..., 4.6857e-06,\n 6.1842e-06, -1.0624e-05],\n [ 1.6126e-05, -2.0474e-05, -1.1829e-05, ..., 1.0541e-05,\n -4.1957e-06, -1.3415e-05],\n [ 1.5571e-05, 9.9198e-06, 2.7930e-07, ..., -6.8663e-06,\n -8.1234e-06, 1.0349e-05],\n ...,\n [ 1.0575e-05, 2.1788e-06, -4.1562e-06, ..., -3.9001e-06,\n 2.2660e-05, 8.4891e-06],\n [-1.5159e-06, 1.6723e-06, -1.5422e-05, ..., -5.4248e-06,\n -6.3673e-06, 4.0683e-06],\n [ 6.2518e-06, -1.6416e-05, 1.3138e-06, ..., 3.5694e-06,\n -1.0068e-05, -6.2243e-07]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[4.9140e-10, 1.0605e-09, 5.0755e-10, ..., 2.2803e-10, 3.9137e-10,\n 8.7826e-10],\n [1.3068e-09, 2.5900e-09, 8.7481e-10, ..., 6.8415e-10, 2.1037e-09,\n 1.5015e-09],\n [1.0070e-09, 1.2466e-09, 1.2976e-09, ..., 7.9164e-10, 1.8541e-09,\n 1.6133e-09],\n ...,\n [1.2507e-09, 1.1632e-09, 2.1594e-09, ..., 4.0824e-10, 3.5681e-09,\n 1.3514e-09],\n [1.5852e-09, 2.0863e-09, 2.5312e-09, ..., 1.0191e-09, 8.2293e-10,\n 1.2336e-09],\n [9.0551e-10, 2.4795e-09, 1.0524e-09, ..., 4.9561e-10, 6.6428e-10,\n 1.7251e-09]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(22524.)",
37
+ "exp_avg": "tensor([-0.0002, 0.0002], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([3.5515e-06, 3.5515e-06], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 0.0034555695366224513,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.01,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 0.0034555695366224513,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.01,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 0.0034555695366224513,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.01,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 0.001728112022559819,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 10,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 6,
137
+ "base_lrs": [
138
+ 0.01,
139
+ 0.01,
140
+ 0.01,
141
+ 0.005
142
+ ],
143
+ "last_epoch": 6,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 0.0034555695366224513,
149
+ 0.0034555695366224513,
150
+ 0.0034555695366224513,
151
+ 0.001728112022559819
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 66.04266666666666,
156
+ "best_epoch": 5,
157
+ "scale_accuracies": {
158
+ "256": 65.344,
159
+ "512": 65.97266666666667
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3,
166
+ 4,
167
+ 5,
168
+ 6
169
+ ],
170
+ "train_loss": [
171
+ 5.311051666323785,
172
+ 4.462767010682684,
173
+ 4.340839946911445,
174
+ 4.262519323832187,
175
+ 4.204208532545754,
176
+ 4.159249462977202
177
+ ],
178
+ "train_acc": [
179
+ 54.91727464101089,
180
+ 60.04988680892759,
181
+ 61.02839572566782,
182
+ 61.696614622970046,
183
+ 62.27501957199959,
184
+ 62.741287175416375
185
+ ],
186
+ "val_acc": [
187
+ 63.041333333333334,
188
+ 64.17333333333333,
189
+ 64.75866666666667,
190
+ 65.36133333333333,
191
+ 65.65466666666667,
192
+ 66.04266666666666
193
+ ],
194
+ "scale_accs": {
195
+ "256": [
196
+ 62.11666666666667,
197
+ 63.38733333333333,
198
+ 63.992666666666665,
199
+ 64.614,
200
+ 64.958,
201
+ 65.344
202
+ ],
203
+ "512": [
204
+ 62.967333333333336,
205
+ 64.19266666666667,
206
+ 64.73066666666666,
207
+ 65.34666666666666,
208
+ 65.59266666666667,
209
+ 65.97266666666667
210
+ ]
211
+ },
212
+ "lr": [
213
+ 0.00975530705321762,
214
+ 0.00904518046337755,
215
+ 0.00793913236883622,
216
+ 0.00654543046337755,
217
+ 0.005000500000000001,
218
+ 0.0034555695366224513
219
+ ]
220
+ }
221
+ },
222
+ "train_config": {
223
+ "name": "david_training",
224
+ "run_id": "20251012_231445",
225
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
226
+ "model_variant": [
227
+ "clip_vit_b16",
228
+ "clip_vit_laion_b32",
229
+ "clip_vit_b32"
230
+ ],
231
+ "num_classes": 1000,
232
+ "preset": "small_fast",
233
+ "custom_config_path": null,
234
+ "num_classes_override": null,
235
+ "use_belly_override": null,
236
+ "belly_expand_override": null,
237
+ "progressive_training_override": true,
238
+ "scale_warmup_epochs_override": {
239
+ "256": 0,
240
+ "512": 0
241
+ },
242
+ "num_epochs": 10,
243
+ "batch_size": 1024,
244
+ "learning_rate": 0.01,
245
+ "weight_decay": 1e-05,
246
+ "warmup_epochs": 3,
247
+ "use_rose_loss": true,
248
+ "rose_initial_weight": 0.2,
249
+ "rose_max_weight": 0.6,
250
+ "rose_weight_schedule": "adaptive",
251
+ "use_cayley_loss": false,
252
+ "cayley_weight": 0.01,
253
+ "scale_loss_balance": null,
254
+ "use_mixed_precision": false,
255
+ "gradient_clip": 5.0,
256
+ "scheduler_type": "cosine_restarts",
257
+ "min_lr": 1e-06,
258
+ "freeze_strategy": "never",
259
+ "freeze_threshold": 90.0,
260
+ "unfreeze_on_plateau": true,
261
+ "patience": 10,
262
+ "track_gradients": true,
263
+ "gradient_scale_threshold": 1e-05,
264
+ "gradient_scale_multiplier": 10.0,
265
+ "log_interval": 50,
266
+ "val_interval": 1,
267
+ "save_interval": 5,
268
+ "log_fusion_weights": true,
269
+ "log_loss_components": true,
270
+ "save_format": "safetensors",
271
+ "hf_repo": "AbstractPhil/david-shared-space",
272
+ "upload_to_hub": true,
273
+ "base_dir": "./david_training",
274
+ "num_workers": 10,
275
+ "pin_memory": true,
276
+ "prefetch_factor": 4,
277
+ "persistent_workers": true
278
+ }
279
+ }