| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.4831883251890403, |
| "eval_steps": 500, |
| "global_step": 112000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.996689311774132e-05, |
| "loss": 2.8505, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9933786235482635e-05, |
| "loss": 2.0552, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.990067935322395e-05, |
| "loss": 1.8823, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.986757247096527e-05, |
| "loss": 1.7906, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9834465588706583e-05, |
| "loss": 1.7442, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.98013587064479e-05, |
| "loss": 1.6741, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9768251824189216e-05, |
| "loss": 1.6635, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9735144941930525e-05, |
| "loss": 1.6282, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.970203805967185e-05, |
| "loss": 1.5507, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9668931177413165e-05, |
| "loss": 1.5326, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.963582429515448e-05, |
| "loss": 1.5385, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.96027174128958e-05, |
| "loss": 1.5191, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.956961053063711e-05, |
| "loss": 1.5043, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.953650364837843e-05, |
| "loss": 1.4498, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.950339676611974e-05, |
| "loss": 1.4116, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.947028988386106e-05, |
| "loss": 1.4536, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.943718300160238e-05, |
| "loss": 1.4486, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.940407611934369e-05, |
| "loss": 1.3881, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.937096923708501e-05, |
| "loss": 1.3976, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.933786235482633e-05, |
| "loss": 1.4139, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9304755472567636e-05, |
| "loss": 1.3606, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.927164859030895e-05, |
| "loss": 1.3726, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9238541708050275e-05, |
| "loss": 1.3495, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9205434825791585e-05, |
| "loss": 1.3682, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.91723279435329e-05, |
| "loss": 1.3387, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9139221061274224e-05, |
| "loss": 1.3478, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.910611417901554e-05, |
| "loss": 1.3471, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.907300729675685e-05, |
| "loss": 1.3502, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.9039900414498166e-05, |
| "loss": 1.3192, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.900679353223949e-05, |
| "loss": 1.3058, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.89736866499808e-05, |
| "loss": 1.2715, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8940579767722115e-05, |
| "loss": 1.2697, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.890747288546344e-05, |
| "loss": 1.2916, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.887436600320475e-05, |
| "loss": 1.3412, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.884125912094606e-05, |
| "loss": 1.2475, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.880815223868738e-05, |
| "loss": 1.2941, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8775045356428696e-05, |
| "loss": 1.2782, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.874193847417001e-05, |
| "loss": 1.2175, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.870883159191133e-05, |
| "loss": 1.2491, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.8675724709652644e-05, |
| "loss": 1.2523, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.864261782739396e-05, |
| "loss": 1.2281, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.860951094513528e-05, |
| "loss": 1.2957, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.857640406287659e-05, |
| "loss": 1.2399, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.854329718061791e-05, |
| "loss": 1.1924, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.8510190298359225e-05, |
| "loss": 1.2297, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.847708341610054e-05, |
| "loss": 1.2049, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.844397653384186e-05, |
| "loss": 1.239, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.8410869651583174e-05, |
| "loss": 1.2048, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.837776276932449e-05, |
| "loss": 1.2137, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.8344655887065806e-05, |
| "loss": 1.2287, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.831154900480712e-05, |
| "loss": 1.2014, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.827844212254844e-05, |
| "loss": 1.1585, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.8245335240289755e-05, |
| "loss": 1.1711, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.821222835803107e-05, |
| "loss": 1.1586, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.817912147577239e-05, |
| "loss": 1.2042, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.81460145935137e-05, |
| "loss": 1.1517, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.811290771125502e-05, |
| "loss": 1.1824, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.8079800828996336e-05, |
| "loss": 1.1687, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.8046693946737646e-05, |
| "loss": 1.1349, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.801358706447897e-05, |
| "loss": 1.178, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.7980480182220285e-05, |
| "loss": 1.1487, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.79473732999616e-05, |
| "loss": 1.1274, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.791426641770291e-05, |
| "loss": 1.1583, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.7881159535444233e-05, |
| "loss": 1.0724, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.784805265318555e-05, |
| "loss": 1.1455, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.781494577092686e-05, |
| "loss": 1.0937, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.778183888866818e-05, |
| "loss": 1.1291, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.77487320064095e-05, |
| "loss": 1.0853, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.771562512415081e-05, |
| "loss": 1.1579, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.7682518241892124e-05, |
| "loss": 1.1043, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.764941135963345e-05, |
| "loss": 1.1075, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.7616304477374756e-05, |
| "loss": 1.0889, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.758319759511607e-05, |
| "loss": 1.0683, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.7550090712857396e-05, |
| "loss": 1.088, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.7516983830598705e-05, |
| "loss": 1.0876, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.748387694834002e-05, |
| "loss": 1.0683, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.745077006608134e-05, |
| "loss": 1.1057, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.7417663183822654e-05, |
| "loss": 1.0698, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.738455630156397e-05, |
| "loss": 1.0771, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.7351449419305286e-05, |
| "loss": 1.0594, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.731834253704661e-05, |
| "loss": 1.0968, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.728523565478792e-05, |
| "loss": 1.0522, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.7252128772529235e-05, |
| "loss": 1.0144, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.721902189027055e-05, |
| "loss": 1.1111, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.718591500801187e-05, |
| "loss": 1.0307, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.715280812575318e-05, |
| "loss": 1.0248, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.71197012434945e-05, |
| "loss": 1.0452, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.7086594361235816e-05, |
| "loss": 1.0108, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.705348747897713e-05, |
| "loss": 0.9867, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.702038059671845e-05, |
| "loss": 0.9852, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.6987273714459764e-05, |
| "loss": 1.023, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.695416683220108e-05, |
| "loss": 1.0288, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.69210599499424e-05, |
| "loss": 0.9871, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.688795306768371e-05, |
| "loss": 0.9374, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.685484618542503e-05, |
| "loss": 0.9943, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.6821739303166346e-05, |
| "loss": 1.0175, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.678863242090766e-05, |
| "loss": 1.0241, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.675552553864898e-05, |
| "loss": 0.9686, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 4.6722418656390294e-05, |
| "loss": 0.9604, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 4.668931177413161e-05, |
| "loss": 0.9428, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 4.6656204891872927e-05, |
| "loss": 0.9422, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.662309800961424e-05, |
| "loss": 0.9291, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.658999112735556e-05, |
| "loss": 0.9709, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.655688424509687e-05, |
| "loss": 0.8939, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.652377736283819e-05, |
| "loss": 0.9091, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.649067048057951e-05, |
| "loss": 0.8881, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.645756359832082e-05, |
| "loss": 0.9033, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 4.642445671606214e-05, |
| "loss": 0.9164, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 4.6391349833803456e-05, |
| "loss": 0.8774, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 4.6358242951544766e-05, |
| "loss": 0.8821, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 4.632513606928608e-05, |
| "loss": 0.9005, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 4.6292029187027405e-05, |
| "loss": 0.8919, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.6258922304768714e-05, |
| "loss": 0.8867, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.622581542251003e-05, |
| "loss": 0.8683, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 4.6192708540251354e-05, |
| "loss": 0.8654, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.615960165799267e-05, |
| "loss": 0.8778, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.612649477573398e-05, |
| "loss": 0.8472, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 4.6093387893475295e-05, |
| "loss": 0.8738, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 4.606028101121662e-05, |
| "loss": 0.8645, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 4.602717412895793e-05, |
| "loss": 0.8331, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 4.5994067246699244e-05, |
| "loss": 0.8553, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 4.596096036444057e-05, |
| "loss": 0.8358, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 4.5927853482181877e-05, |
| "loss": 0.8024, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 4.589474659992319e-05, |
| "loss": 0.7889, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.586163971766451e-05, |
| "loss": 0.8082, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.5828532835405825e-05, |
| "loss": 0.8247, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 4.579542595314714e-05, |
| "loss": 0.8027, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 4.576231907088846e-05, |
| "loss": 0.7913, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 4.5729212188629774e-05, |
| "loss": 0.7875, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.569610530637109e-05, |
| "loss": 0.7853, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.5662998424112406e-05, |
| "loss": 0.809, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.562989154185372e-05, |
| "loss": 0.7814, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 4.559678465959504e-05, |
| "loss": 0.7879, |
| "step": 66500 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 4.5563677777336355e-05, |
| "loss": 0.7588, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 4.553057089507767e-05, |
| "loss": 0.7654, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.549746401281899e-05, |
| "loss": 0.7426, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.5464357130560304e-05, |
| "loss": 0.7339, |
| "step": 68500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.543125024830162e-05, |
| "loss": 0.7302, |
| "step": 69000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 4.5398143366042936e-05, |
| "loss": 0.7671, |
| "step": 69500 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 4.536503648378425e-05, |
| "loss": 0.741, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 4.533192960152557e-05, |
| "loss": 0.7094, |
| "step": 70500 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.5298822719266885e-05, |
| "loss": 0.6589, |
| "step": 71000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 4.52657158370082e-05, |
| "loss": 0.7308, |
| "step": 71500 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 4.523260895474952e-05, |
| "loss": 0.7093, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.5199502072490827e-05, |
| "loss": 0.6963, |
| "step": 72500 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.516639519023215e-05, |
| "loss": 0.6853, |
| "step": 73000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.5133288307973466e-05, |
| "loss": 0.7045, |
| "step": 73500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 4.5100181425714775e-05, |
| "loss": 0.6737, |
| "step": 74000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 4.50670745434561e-05, |
| "loss": 0.6671, |
| "step": 74500 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 4.5033967661197414e-05, |
| "loss": 0.7017, |
| "step": 75000 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.500086077893873e-05, |
| "loss": 0.6869, |
| "step": 75500 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.496775389668004e-05, |
| "loss": 0.6791, |
| "step": 76000 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.493464701442136e-05, |
| "loss": 0.6988, |
| "step": 76500 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.490154013216268e-05, |
| "loss": 0.6329, |
| "step": 77000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 4.486843324990399e-05, |
| "loss": 0.6474, |
| "step": 77500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 4.483532636764531e-05, |
| "loss": 0.6792, |
| "step": 78000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 4.480221948538663e-05, |
| "loss": 0.6872, |
| "step": 78500 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 4.476911260312794e-05, |
| "loss": 0.6592, |
| "step": 79000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 4.4736005720869254e-05, |
| "loss": 0.6289, |
| "step": 79500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 4.4702898838610577e-05, |
| "loss": 0.6262, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 4.4669791956351886e-05, |
| "loss": 0.6517, |
| "step": 80500 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 4.46366850740932e-05, |
| "loss": 0.6525, |
| "step": 81000 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 4.4603578191834525e-05, |
| "loss": 0.6421, |
| "step": 81500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 4.4570471309575835e-05, |
| "loss": 0.6109, |
| "step": 82000 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 4.453736442731715e-05, |
| "loss": 0.6616, |
| "step": 82500 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 4.450425754505847e-05, |
| "loss": 0.6213, |
| "step": 83000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 4.447115066279979e-05, |
| "loss": 0.623, |
| "step": 83500 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 4.44380437805411e-05, |
| "loss": 0.6034, |
| "step": 84000 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 4.4404936898282416e-05, |
| "loss": 0.589, |
| "step": 84500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 4.437183001602374e-05, |
| "loss": 0.6605, |
| "step": 85000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 4.433872313376505e-05, |
| "loss": 0.6419, |
| "step": 85500 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 4.4305616251506364e-05, |
| "loss": 0.636, |
| "step": 86000 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 4.427250936924768e-05, |
| "loss": 0.6244, |
| "step": 86500 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 4.4239402486989e-05, |
| "loss": 0.62, |
| "step": 87000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 4.420629560473031e-05, |
| "loss": 0.5971, |
| "step": 87500 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 4.417318872247163e-05, |
| "loss": 0.6287, |
| "step": 88000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 4.4140081840212945e-05, |
| "loss": 0.5951, |
| "step": 88500 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 4.410697495795426e-05, |
| "loss": 0.5648, |
| "step": 89000 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 4.407386807569558e-05, |
| "loss": 0.6369, |
| "step": 89500 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 4.4040761193436894e-05, |
| "loss": 0.6173, |
| "step": 90000 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 4.400765431117821e-05, |
| "loss": 0.5884, |
| "step": 90500 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 4.3974547428919526e-05, |
| "loss": 0.5841, |
| "step": 91000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 4.3941440546660836e-05, |
| "loss": 0.5925, |
| "step": 91500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 4.390833366440216e-05, |
| "loss": 0.5721, |
| "step": 92000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 4.3875226782143475e-05, |
| "loss": 0.5779, |
| "step": 92500 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 4.384211989988479e-05, |
| "loss": 0.562, |
| "step": 93000 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 4.380901301762611e-05, |
| "loss": 0.5614, |
| "step": 93500 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 4.3775906135367424e-05, |
| "loss": 0.5836, |
| "step": 94000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 4.374279925310874e-05, |
| "loss": 0.5902, |
| "step": 94500 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 4.370969237085005e-05, |
| "loss": 0.5607, |
| "step": 95000 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 4.367658548859137e-05, |
| "loss": 0.5557, |
| "step": 95500 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 4.364347860633269e-05, |
| "loss": 0.5492, |
| "step": 96000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 4.3610371724074e-05, |
| "loss": 0.5815, |
| "step": 96500 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 4.357726484181532e-05, |
| "loss": 0.5609, |
| "step": 97000 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 4.354415795955664e-05, |
| "loss": 0.5671, |
| "step": 97500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 4.351105107729795e-05, |
| "loss": 0.5487, |
| "step": 98000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 4.347794419503926e-05, |
| "loss": 0.5282, |
| "step": 98500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 4.3444837312780586e-05, |
| "loss": 0.5501, |
| "step": 99000 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 4.3411730430521895e-05, |
| "loss": 0.5601, |
| "step": 99500 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 4.337862354826321e-05, |
| "loss": 0.5708, |
| "step": 100000 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 4.3345516666004535e-05, |
| "loss": 0.5609, |
| "step": 100500 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 4.331240978374585e-05, |
| "loss": 0.5328, |
| "step": 101000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 4.327930290148716e-05, |
| "loss": 0.5491, |
| "step": 101500 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 4.324619601922848e-05, |
| "loss": 0.5452, |
| "step": 102000 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 4.32130891369698e-05, |
| "loss": 0.5396, |
| "step": 102500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 4.317998225471111e-05, |
| "loss": 0.5432, |
| "step": 103000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 4.3146875372452425e-05, |
| "loss": 0.5524, |
| "step": 103500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 4.311376849019375e-05, |
| "loss": 0.521, |
| "step": 104000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 4.308066160793506e-05, |
| "loss": 0.5557, |
| "step": 104500 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 4.3047554725676374e-05, |
| "loss": 0.5342, |
| "step": 105000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 4.30144478434177e-05, |
| "loss": 0.535, |
| "step": 105500 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 4.2981340961159006e-05, |
| "loss": 0.5467, |
| "step": 106000 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 4.294823407890032e-05, |
| "loss": 0.5082, |
| "step": 106500 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 4.291512719664164e-05, |
| "loss": 0.5267, |
| "step": 107000 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 4.2882020314382955e-05, |
| "loss": 0.5532, |
| "step": 107500 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 4.284891343212427e-05, |
| "loss": 0.5548, |
| "step": 108000 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 4.281580654986559e-05, |
| "loss": 0.5538, |
| "step": 108500 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 4.2782699667606903e-05, |
| "loss": 0.5501, |
| "step": 109000 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 4.274959278534822e-05, |
| "loss": 0.525, |
| "step": 109500 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 4.2716485903089536e-05, |
| "loss": 0.5237, |
| "step": 110000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 4.268337902083085e-05, |
| "loss": 0.4852, |
| "step": 110500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 4.265027213857217e-05, |
| "loss": 0.507, |
| "step": 111000 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 4.2617165256313485e-05, |
| "loss": 0.5217, |
| "step": 111500 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 4.25840583740548e-05, |
| "loss": 0.5229, |
| "step": 112000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 755130, |
| "num_train_epochs": 10, |
| "save_steps": 2000, |
| "total_flos": 4.16057911148544e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|