| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 24.955603327413776, | |
| "eval_steps": 500, | |
| "global_step": 267000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9906533320871115e-05, | |
| "loss": 6.5864, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.981306664174222e-05, | |
| "loss": 5.4802, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9719599962613325e-05, | |
| "loss": 4.7221, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.962613328348444e-05, | |
| "loss": 4.2025, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.953266660435555e-05, | |
| "loss": 3.8489, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.943919992522666e-05, | |
| "loss": 3.5856, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.934573324609777e-05, | |
| "loss": 3.3865, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.925226656696888e-05, | |
| "loss": 3.2125, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.9158799887839984e-05, | |
| "loss": 3.0721, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.9065333208711096e-05, | |
| "loss": 2.9681, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.897186652958221e-05, | |
| "loss": 2.8801, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.8878399850453314e-05, | |
| "loss": 2.782, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.8784933171324426e-05, | |
| "loss": 2.7237, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.869146649219554e-05, | |
| "loss": 2.6715, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.859799981306664e-05, | |
| "loss": 2.637, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.850453313393775e-05, | |
| "loss": 2.5817, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.841106645480886e-05, | |
| "loss": 2.5346, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.831759977567997e-05, | |
| "loss": 2.49, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.8224133096551085e-05, | |
| "loss": 2.4597, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.813066641742219e-05, | |
| "loss": 2.4381, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.8037199738293296e-05, | |
| "loss": 2.3995, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.794373305916441e-05, | |
| "loss": 2.385, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.785026638003552e-05, | |
| "loss": 2.3463, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.775679970090663e-05, | |
| "loss": 2.3033, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.766333302177774e-05, | |
| "loss": 2.2915, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.756986634264885e-05, | |
| "loss": 2.2722, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.7476399663519955e-05, | |
| "loss": 2.2443, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.738293298439107e-05, | |
| "loss": 2.2412, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.728946630526218e-05, | |
| "loss": 2.22, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.7195999626133284e-05, | |
| "loss": 2.19, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 4.7102532947004396e-05, | |
| "loss": 2.1646, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 4.700906626787551e-05, | |
| "loss": 2.1665, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.6915599588746614e-05, | |
| "loss": 2.1406, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.682213290961772e-05, | |
| "loss": 2.1191, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 4.672866623048883e-05, | |
| "loss": 2.1071, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 4.663519955135994e-05, | |
| "loss": 2.0976, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 4.6541732872231055e-05, | |
| "loss": 2.0935, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 4.644826619310216e-05, | |
| "loss": 2.0682, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.6354799513973266e-05, | |
| "loss": 2.0487, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 4.626133283484438e-05, | |
| "loss": 2.0384, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.616786615571549e-05, | |
| "loss": 2.0184, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 4.60743994765866e-05, | |
| "loss": 2.0046, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.598093279745771e-05, | |
| "loss": 1.9972, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 4.588746611832882e-05, | |
| "loss": 1.9796, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 4.5793999439199925e-05, | |
| "loss": 1.9909, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 4.570053276007104e-05, | |
| "loss": 1.9776, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 4.560706608094214e-05, | |
| "loss": 1.9556, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 4.5513599401813255e-05, | |
| "loss": 1.9427, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 4.542013272268437e-05, | |
| "loss": 1.9452, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 4.532666604355548e-05, | |
| "loss": 1.9365, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 4.5233199364426584e-05, | |
| "loss": 1.925, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 4.513973268529769e-05, | |
| "loss": 1.9045, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.50462660061688e-05, | |
| "loss": 1.8886, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 4.4952799327039914e-05, | |
| "loss": 1.9086, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 4.4859332647911026e-05, | |
| "loss": 1.882, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 4.476586596878213e-05, | |
| "loss": 1.8831, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 4.4672399289653237e-05, | |
| "loss": 1.8745, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.457893261052435e-05, | |
| "loss": 1.8645, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.448546593139546e-05, | |
| "loss": 1.8481, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 4.439199925226657e-05, | |
| "loss": 1.8524, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 4.429853257313768e-05, | |
| "loss": 1.8299, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 4.420506589400879e-05, | |
| "loss": 1.835, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 4.4111599214879896e-05, | |
| "loss": 1.8246, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 4.401813253575101e-05, | |
| "loss": 1.8154, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 4.392466585662211e-05, | |
| "loss": 1.8097, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 4.3831199177493225e-05, | |
| "loss": 1.7977, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 4.373773249836434e-05, | |
| "loss": 1.7857, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 4.364426581923545e-05, | |
| "loss": 1.7933, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 4.3550799140106555e-05, | |
| "loss": 1.774, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 4.345733246097766e-05, | |
| "loss": 1.7747, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 4.336386578184877e-05, | |
| "loss": 1.7663, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 4.3270399102719884e-05, | |
| "loss": 1.7746, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 4.3176932423590996e-05, | |
| "loss": 1.7586, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 4.30834657444621e-05, | |
| "loss": 1.7545, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 4.298999906533321e-05, | |
| "loss": 1.7424, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 4.289653238620432e-05, | |
| "loss": 1.7472, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 4.280306570707543e-05, | |
| "loss": 1.7587, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 4.2709599027946537e-05, | |
| "loss": 1.7486, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 4.261613234881765e-05, | |
| "loss": 1.7288, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 4.252266566968876e-05, | |
| "loss": 1.7361, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 4.2429198990559866e-05, | |
| "loss": 1.7089, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 4.233573231143098e-05, | |
| "loss": 1.7253, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 4.2242265632302084e-05, | |
| "loss": 1.7134, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 4.2148798953173196e-05, | |
| "loss": 1.6936, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 4.205533227404431e-05, | |
| "loss": 1.7115, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 4.196186559491542e-05, | |
| "loss": 1.6981, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 4.1868398915786525e-05, | |
| "loss": 1.6913, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 4.177493223665763e-05, | |
| "loss": 1.6916, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 4.168146555752874e-05, | |
| "loss": 1.6953, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 4.1587998878399855e-05, | |
| "loss": 1.6778, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 4.149453219927097e-05, | |
| "loss": 1.6706, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 4.140106552014207e-05, | |
| "loss": 1.6703, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 4.130759884101318e-05, | |
| "loss": 1.6639, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 4.121413216188429e-05, | |
| "loss": 1.6728, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 4.11206654827554e-05, | |
| "loss": 1.6553, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 4.102719880362651e-05, | |
| "loss": 1.6433, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 4.093373212449762e-05, | |
| "loss": 1.6499, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 4.084026544536873e-05, | |
| "loss": 1.6553, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 4.0746798766239837e-05, | |
| "loss": 1.6401, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 4.065333208711095e-05, | |
| "loss": 1.6444, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 4.0559865407982054e-05, | |
| "loss": 1.6398, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 4.0466398728853166e-05, | |
| "loss": 1.6338, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 4.037293204972428e-05, | |
| "loss": 1.6194, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 4.027946537059539e-05, | |
| "loss": 1.6327, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 4.018599869146649e-05, | |
| "loss": 1.6232, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 4.00925320123376e-05, | |
| "loss": 1.6296, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 3.999906533320871e-05, | |
| "loss": 1.6152, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 3.9905598654079825e-05, | |
| "loss": 1.6012, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 3.981213197495093e-05, | |
| "loss": 1.6087, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 3.971866529582204e-05, | |
| "loss": 1.5971, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 3.962519861669315e-05, | |
| "loss": 1.5956, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 3.953173193756426e-05, | |
| "loss": 1.5947, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 3.943826525843537e-05, | |
| "loss": 1.5993, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 3.934479857930648e-05, | |
| "loss": 1.5816, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 3.925133190017759e-05, | |
| "loss": 1.5837, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 3.91578652210487e-05, | |
| "loss": 1.5854, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 3.906439854191981e-05, | |
| "loss": 1.5734, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 3.897093186279092e-05, | |
| "loss": 1.578, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 3.8877465183662024e-05, | |
| "loss": 1.5817, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 3.8783998504533137e-05, | |
| "loss": 1.578, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 3.869053182540425e-05, | |
| "loss": 1.5732, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 3.8597065146275354e-05, | |
| "loss": 1.5606, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 3.850359846714646e-05, | |
| "loss": 1.5709, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 3.841013178801757e-05, | |
| "loss": 1.5607, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 3.8316665108888684e-05, | |
| "loss": 1.5687, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 3.8223198429759796e-05, | |
| "loss": 1.5488, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 3.81297317506309e-05, | |
| "loss": 1.5601, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 3.803626507150201e-05, | |
| "loss": 1.5611, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 3.794279839237312e-05, | |
| "loss": 1.5515, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 3.784933171324423e-05, | |
| "loss": 1.5412, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 3.775586503411534e-05, | |
| "loss": 1.5434, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 3.766239835498645e-05, | |
| "loss": 1.5372, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 3.756893167585756e-05, | |
| "loss": 1.5293, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 3.7475464996728665e-05, | |
| "loss": 1.5335, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 3.738199831759978e-05, | |
| "loss": 1.5296, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 3.728853163847088e-05, | |
| "loss": 1.5238, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 3.7195064959341995e-05, | |
| "loss": 1.5269, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 3.710159828021311e-05, | |
| "loss": 1.5233, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 3.700813160108422e-05, | |
| "loss": 1.5234, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 3.6914664921955325e-05, | |
| "loss": 1.5277, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 3.682119824282643e-05, | |
| "loss": 1.5185, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 3.672773156369754e-05, | |
| "loss": 1.5185, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 3.6634264884568654e-05, | |
| "loss": 1.5316, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 3.6540798205439766e-05, | |
| "loss": 1.5165, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 3.644733152631087e-05, | |
| "loss": 1.5067, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 3.635386484718198e-05, | |
| "loss": 1.5108, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 3.626039816805309e-05, | |
| "loss": 1.4999, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 3.61669314889242e-05, | |
| "loss": 1.4997, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 3.607346480979531e-05, | |
| "loss": 1.513, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 3.597999813066642e-05, | |
| "loss": 1.5012, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 3.588653145153753e-05, | |
| "loss": 1.4837, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 3.5793064772408636e-05, | |
| "loss": 1.4824, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 3.569959809327975e-05, | |
| "loss": 1.4896, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 3.560613141415085e-05, | |
| "loss": 1.4865, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 3.5512664735021965e-05, | |
| "loss": 1.4937, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 3.541919805589308e-05, | |
| "loss": 1.4828, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 3.532573137676419e-05, | |
| "loss": 1.4767, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 3.5232264697635295e-05, | |
| "loss": 1.4878, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 3.51387980185064e-05, | |
| "loss": 1.4946, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 3.504533133937751e-05, | |
| "loss": 1.4756, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 3.4951864660248625e-05, | |
| "loss": 1.464, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 3.485839798111974e-05, | |
| "loss": 1.4754, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 3.476493130199084e-05, | |
| "loss": 1.472, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 3.467146462286195e-05, | |
| "loss": 1.4716, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 3.457799794373306e-05, | |
| "loss": 1.4888, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 3.448453126460417e-05, | |
| "loss": 1.4678, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 3.439106458547528e-05, | |
| "loss": 1.4694, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 3.429759790634639e-05, | |
| "loss": 1.4729, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 3.42041312272175e-05, | |
| "loss": 1.4558, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 3.4110664548088606e-05, | |
| "loss": 1.4597, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 3.401719786895972e-05, | |
| "loss": 1.449, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 3.3923731189830824e-05, | |
| "loss": 1.4543, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 3.3830264510701936e-05, | |
| "loss": 1.4437, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 3.373679783157305e-05, | |
| "loss": 1.4423, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 3.364333115244416e-05, | |
| "loss": 1.4417, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 3.3549864473315265e-05, | |
| "loss": 1.4505, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 3.345639779418637e-05, | |
| "loss": 1.4426, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 3.336293111505748e-05, | |
| "loss": 1.4359, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 3.3269464435928595e-05, | |
| "loss": 1.4463, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 3.317599775679971e-05, | |
| "loss": 1.4395, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 3.308253107767081e-05, | |
| "loss": 1.4388, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 3.298906439854192e-05, | |
| "loss": 1.4395, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 3.289559771941303e-05, | |
| "loss": 1.4335, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 3.280213104028414e-05, | |
| "loss": 1.4334, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 3.270866436115525e-05, | |
| "loss": 1.4457, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 3.261519768202636e-05, | |
| "loss": 1.4371, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 3.252173100289747e-05, | |
| "loss": 1.4299, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 3.242826432376858e-05, | |
| "loss": 1.4414, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 3.233479764463969e-05, | |
| "loss": 1.4191, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 3.2241330965510794e-05, | |
| "loss": 1.4261, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 3.2147864286381906e-05, | |
| "loss": 1.4292, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 3.205439760725302e-05, | |
| "loss": 1.4245, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 3.196093092812413e-05, | |
| "loss": 1.4169, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 3.186746424899523e-05, | |
| "loss": 1.4099, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 3.177399756986634e-05, | |
| "loss": 1.4087, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 3.168053089073745e-05, | |
| "loss": 1.4289, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 3.1587064211608565e-05, | |
| "loss": 1.4251, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 3.149359753247967e-05, | |
| "loss": 1.4228, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 3.140013085335078e-05, | |
| "loss": 1.4062, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 3.130666417422189e-05, | |
| "loss": 1.4032, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 3.1213197495093e-05, | |
| "loss": 1.4143, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 3.111973081596411e-05, | |
| "loss": 1.4038, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 3.102626413683522e-05, | |
| "loss": 1.3984, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 3.093279745770633e-05, | |
| "loss": 1.4098, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 3.083933077857744e-05, | |
| "loss": 1.4021, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 3.074586409944855e-05, | |
| "loss": 1.4041, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 3.065239742031966e-05, | |
| "loss": 1.3972, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 3.0558930741190765e-05, | |
| "loss": 1.3955, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 3.0465464062061877e-05, | |
| "loss": 1.4066, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 3.037199738293299e-05, | |
| "loss": 1.4019, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 3.0278530703804098e-05, | |
| "loss": 1.3893, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 3.0185064024675203e-05, | |
| "loss": 1.3995, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "learning_rate": 3.0091597345546312e-05, | |
| "loss": 1.3958, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2.9998130666417424e-05, | |
| "loss": 1.3839, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "learning_rate": 2.9904663987288533e-05, | |
| "loss": 1.3878, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "learning_rate": 2.9811197308159645e-05, | |
| "loss": 1.3848, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 2.9717730629030753e-05, | |
| "loss": 1.3804, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "learning_rate": 2.962426394990186e-05, | |
| "loss": 1.3841, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 2.953079727077297e-05, | |
| "loss": 1.3878, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 2.943733059164408e-05, | |
| "loss": 1.3662, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 2.934386391251519e-05, | |
| "loss": 1.3775, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "learning_rate": 2.92503972333863e-05, | |
| "loss": 1.3757, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 2.9156930554257412e-05, | |
| "loss": 1.3816, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "learning_rate": 2.9063463875128514e-05, | |
| "loss": 1.3769, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 10.52, | |
| "learning_rate": 2.8969997195999626e-05, | |
| "loss": 1.3824, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 2.8876530516870735e-05, | |
| "loss": 1.3753, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 10.61, | |
| "learning_rate": 2.8783063837741847e-05, | |
| "loss": 1.3728, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "learning_rate": 2.8689597158612956e-05, | |
| "loss": 1.3699, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "learning_rate": 2.8596130479484068e-05, | |
| "loss": 1.3758, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 2.8502663800355173e-05, | |
| "loss": 1.3763, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 2.8409197121226282e-05, | |
| "loss": 1.3677, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "learning_rate": 2.8315730442097394e-05, | |
| "loss": 1.3578, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "learning_rate": 2.8222263762968503e-05, | |
| "loss": 1.3693, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "learning_rate": 2.8128797083839615e-05, | |
| "loss": 1.3726, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 2.8035330404710724e-05, | |
| "loss": 1.3772, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "learning_rate": 2.794186372558183e-05, | |
| "loss": 1.3472, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 11.08, | |
| "learning_rate": 2.784839704645294e-05, | |
| "loss": 1.3638, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 11.12, | |
| "learning_rate": 2.775493036732405e-05, | |
| "loss": 1.3556, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 11.17, | |
| "learning_rate": 2.7661463688195162e-05, | |
| "loss": 1.3598, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "learning_rate": 2.756799700906627e-05, | |
| "loss": 1.344, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 11.26, | |
| "learning_rate": 2.7474530329937383e-05, | |
| "loss": 1.3532, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "learning_rate": 2.7381063650808485e-05, | |
| "loss": 1.351, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "learning_rate": 2.7287596971679597e-05, | |
| "loss": 1.3555, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "learning_rate": 2.7194130292550706e-05, | |
| "loss": 1.361, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "learning_rate": 2.7100663613421818e-05, | |
| "loss": 1.3472, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "learning_rate": 2.7007196934292926e-05, | |
| "loss": 1.3462, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 11.54, | |
| "learning_rate": 2.691373025516404e-05, | |
| "loss": 1.3539, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "learning_rate": 2.6820263576035144e-05, | |
| "loss": 1.3493, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 11.64, | |
| "learning_rate": 2.6726796896906253e-05, | |
| "loss": 1.3504, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 11.68, | |
| "learning_rate": 2.6633330217777365e-05, | |
| "loss": 1.3548, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 11.73, | |
| "learning_rate": 2.6539863538648473e-05, | |
| "loss": 1.3373, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "learning_rate": 2.6446396859519586e-05, | |
| "loss": 1.3506, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "learning_rate": 2.6352930180390694e-05, | |
| "loss": 1.3431, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "learning_rate": 2.62594635012618e-05, | |
| "loss": 1.3458, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 11.92, | |
| "learning_rate": 2.616599682213291e-05, | |
| "loss": 1.345, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 2.607253014300402e-05, | |
| "loss": 1.3478, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "learning_rate": 2.597906346387513e-05, | |
| "loss": 1.3453, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 12.06, | |
| "learning_rate": 2.588559678474624e-05, | |
| "loss": 1.3339, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "learning_rate": 2.5792130105617347e-05, | |
| "loss": 1.3325, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 12.15, | |
| "learning_rate": 2.5698663426488455e-05, | |
| "loss": 1.339, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 12.2, | |
| "learning_rate": 2.5605196747359567e-05, | |
| "loss": 1.3329, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 2.5511730068230676e-05, | |
| "loss": 1.3341, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "learning_rate": 2.5418263389101788e-05, | |
| "loss": 1.3396, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 12.34, | |
| "learning_rate": 2.5324796709972897e-05, | |
| "loss": 1.3341, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 12.38, | |
| "learning_rate": 2.5231330030844002e-05, | |
| "loss": 1.3358, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 12.43, | |
| "learning_rate": 2.5137863351715114e-05, | |
| "loss": 1.3294, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "learning_rate": 2.5044396672586223e-05, | |
| "loss": 1.3339, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 12.52, | |
| "learning_rate": 2.4950929993457335e-05, | |
| "loss": 1.3338, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 12.57, | |
| "learning_rate": 2.4857463314328444e-05, | |
| "loss": 1.324, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "learning_rate": 2.4763996635199553e-05, | |
| "loss": 1.3188, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 12.66, | |
| "learning_rate": 2.467052995607066e-05, | |
| "loss": 1.3244, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "learning_rate": 2.4577063276941773e-05, | |
| "loss": 1.3296, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 12.76, | |
| "learning_rate": 2.448359659781288e-05, | |
| "loss": 1.3148, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 2.439012991868399e-05, | |
| "loss": 1.3261, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 12.85, | |
| "learning_rate": 2.42966632395551e-05, | |
| "loss": 1.3166, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "learning_rate": 2.420319656042621e-05, | |
| "loss": 1.3137, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "learning_rate": 2.410972988129732e-05, | |
| "loss": 1.3217, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 2.401626320216843e-05, | |
| "loss": 1.3341, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "learning_rate": 2.3922796523039538e-05, | |
| "loss": 1.3248, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 13.09, | |
| "learning_rate": 2.3829329843910647e-05, | |
| "loss": 1.3087, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 13.13, | |
| "learning_rate": 2.373586316478176e-05, | |
| "loss": 1.3049, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 13.18, | |
| "learning_rate": 2.3642396485652864e-05, | |
| "loss": 1.3074, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 13.23, | |
| "learning_rate": 2.3548929806523976e-05, | |
| "loss": 1.3133, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 13.27, | |
| "learning_rate": 2.3455463127395085e-05, | |
| "loss": 1.3221, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 13.32, | |
| "learning_rate": 2.3361996448266194e-05, | |
| "loss": 1.3113, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 13.37, | |
| "learning_rate": 2.3268529769137302e-05, | |
| "loss": 1.3138, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 13.41, | |
| "learning_rate": 2.3175063090008414e-05, | |
| "loss": 1.3091, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 13.46, | |
| "learning_rate": 2.3081596410879523e-05, | |
| "loss": 1.3132, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 13.51, | |
| "learning_rate": 2.2988129731750632e-05, | |
| "loss": 1.3095, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "learning_rate": 2.2894663052621744e-05, | |
| "loss": 1.3046, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "learning_rate": 2.280119637349285e-05, | |
| "loss": 1.3136, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 13.65, | |
| "learning_rate": 2.270772969436396e-05, | |
| "loss": 1.3067, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 13.69, | |
| "learning_rate": 2.261426301523507e-05, | |
| "loss": 1.3025, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 13.74, | |
| "learning_rate": 2.252079633610618e-05, | |
| "loss": 1.3085, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "learning_rate": 2.2427329656977288e-05, | |
| "loss": 1.2976, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 13.83, | |
| "learning_rate": 2.23338629778484e-05, | |
| "loss": 1.3007, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "learning_rate": 2.224039629871951e-05, | |
| "loss": 1.3138, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 13.93, | |
| "learning_rate": 2.2146929619590617e-05, | |
| "loss": 1.3143, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 13.97, | |
| "learning_rate": 2.205346294046173e-05, | |
| "loss": 1.3029, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "learning_rate": 2.1959996261332835e-05, | |
| "loss": 1.2919, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 14.07, | |
| "learning_rate": 2.1866529582203947e-05, | |
| "loss": 1.2982, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "learning_rate": 2.1773062903075055e-05, | |
| "loss": 1.3012, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 14.16, | |
| "learning_rate": 2.1679596223946164e-05, | |
| "loss": 1.2841, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 14.21, | |
| "learning_rate": 2.1586129544817273e-05, | |
| "loss": 1.3044, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 14.25, | |
| "learning_rate": 2.1492662865688385e-05, | |
| "loss": 1.2973, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 14.3, | |
| "learning_rate": 2.1399196186559494e-05, | |
| "loss": 1.2884, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 2.1305729507430602e-05, | |
| "loss": 1.2883, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "learning_rate": 2.1212262828301714e-05, | |
| "loss": 1.2993, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 14.44, | |
| "learning_rate": 2.111879614917282e-05, | |
| "loss": 1.2919, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 14.49, | |
| "learning_rate": 2.1025329470043932e-05, | |
| "loss": 1.3026, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 14.53, | |
| "learning_rate": 2.093186279091504e-05, | |
| "loss": 1.2882, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 14.58, | |
| "learning_rate": 2.083839611178615e-05, | |
| "loss": 1.289, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "learning_rate": 2.0744929432657258e-05, | |
| "loss": 1.2917, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "learning_rate": 2.065146275352837e-05, | |
| "loss": 1.2897, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 14.72, | |
| "learning_rate": 2.0557996074399475e-05, | |
| "loss": 1.2859, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "learning_rate": 2.0464529395270588e-05, | |
| "loss": 1.2924, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 14.81, | |
| "learning_rate": 2.0371062716141696e-05, | |
| "loss": 1.2873, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "learning_rate": 2.0277596037012805e-05, | |
| "loss": 1.29, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 14.91, | |
| "learning_rate": 2.0184129357883917e-05, | |
| "loss": 1.2848, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "learning_rate": 2.0090662678755026e-05, | |
| "loss": 1.2831, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 1.9997195999626135e-05, | |
| "loss": 1.2841, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "learning_rate": 1.9903729320497243e-05, | |
| "loss": 1.2716, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 15.09, | |
| "learning_rate": 1.9810262641368352e-05, | |
| "loss": 1.2795, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 15.14, | |
| "learning_rate": 1.971679596223946e-05, | |
| "loss": 1.2836, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 15.19, | |
| "learning_rate": 1.9623329283110573e-05, | |
| "loss": 1.2854, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 1.952986260398168e-05, | |
| "loss": 1.2819, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 15.28, | |
| "learning_rate": 1.943639592485279e-05, | |
| "loss": 1.2762, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "learning_rate": 1.9342929245723902e-05, | |
| "loss": 1.2638, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "learning_rate": 1.9249462566595008e-05, | |
| "loss": 1.269, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "learning_rate": 1.915599588746612e-05, | |
| "loss": 1.2691, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 15.47, | |
| "learning_rate": 1.906252920833723e-05, | |
| "loss": 1.2802, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 15.52, | |
| "learning_rate": 1.8969062529208337e-05, | |
| "loss": 1.275, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 15.56, | |
| "learning_rate": 1.8875595850079446e-05, | |
| "loss": 1.278, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 15.61, | |
| "learning_rate": 1.8782129170950558e-05, | |
| "loss": 1.2768, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 15.66, | |
| "learning_rate": 1.8688662491821667e-05, | |
| "loss": 1.2761, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "learning_rate": 1.8595195812692775e-05, | |
| "loss": 1.271, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 15.75, | |
| "learning_rate": 1.8501729133563888e-05, | |
| "loss": 1.2687, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 15.8, | |
| "learning_rate": 1.8408262454434993e-05, | |
| "loss": 1.2644, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 15.84, | |
| "learning_rate": 1.8314795775306105e-05, | |
| "loss": 1.2732, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "learning_rate": 1.8221329096177214e-05, | |
| "loss": 1.2742, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 15.94, | |
| "learning_rate": 1.8127862417048322e-05, | |
| "loss": 1.266, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "learning_rate": 1.803439573791943e-05, | |
| "loss": 1.27, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 16.03, | |
| "learning_rate": 1.7940929058790543e-05, | |
| "loss": 1.2682, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 16.08, | |
| "learning_rate": 1.784746237966165e-05, | |
| "loss": 1.2584, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "learning_rate": 1.775399570053276e-05, | |
| "loss": 1.2702, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 16.17, | |
| "learning_rate": 1.766052902140387e-05, | |
| "loss": 1.2602, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 16.22, | |
| "learning_rate": 1.7567062342274978e-05, | |
| "loss": 1.2595, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 16.26, | |
| "learning_rate": 1.747359566314609e-05, | |
| "loss": 1.261, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 16.31, | |
| "learning_rate": 1.73801289840172e-05, | |
| "loss": 1.2556, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "learning_rate": 1.7286662304888308e-05, | |
| "loss": 1.2722, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 16.4, | |
| "learning_rate": 1.7193195625759416e-05, | |
| "loss": 1.2553, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "learning_rate": 1.709972894663053e-05, | |
| "loss": 1.2577, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 16.5, | |
| "learning_rate": 1.7006262267501634e-05, | |
| "loss": 1.2607, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 16.54, | |
| "learning_rate": 1.6912795588372746e-05, | |
| "loss": 1.2646, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 16.59, | |
| "learning_rate": 1.6819328909243855e-05, | |
| "loss": 1.267, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 16.64, | |
| "learning_rate": 1.6725862230114963e-05, | |
| "loss": 1.2596, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 16.68, | |
| "learning_rate": 1.6632395550986075e-05, | |
| "loss": 1.2553, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 16.73, | |
| "learning_rate": 1.6538928871857184e-05, | |
| "loss": 1.2538, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 16.78, | |
| "learning_rate": 1.6445462192728293e-05, | |
| "loss": 1.2626, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 16.82, | |
| "learning_rate": 1.63519955135994e-05, | |
| "loss": 1.2551, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 16.87, | |
| "learning_rate": 1.6258528834470514e-05, | |
| "loss": 1.2569, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "learning_rate": 1.616506215534162e-05, | |
| "loss": 1.2535, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "learning_rate": 1.607159547621273e-05, | |
| "loss": 1.2591, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 17.01, | |
| "learning_rate": 1.597812879708384e-05, | |
| "loss": 1.2508, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 17.06, | |
| "learning_rate": 1.588466211795495e-05, | |
| "loss": 1.2517, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 17.1, | |
| "learning_rate": 1.579119543882606e-05, | |
| "loss": 1.2546, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 17.15, | |
| "learning_rate": 1.569772875969717e-05, | |
| "loss": 1.241, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "learning_rate": 1.5604262080568278e-05, | |
| "loss": 1.2421, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 17.24, | |
| "learning_rate": 1.5510795401439387e-05, | |
| "loss": 1.243, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 17.29, | |
| "learning_rate": 1.54173287223105e-05, | |
| "loss": 1.2459, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 17.34, | |
| "learning_rate": 1.5323862043181604e-05, | |
| "loss": 1.2497, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "learning_rate": 1.5230395364052716e-05, | |
| "loss": 1.2433, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 17.43, | |
| "learning_rate": 1.5136928684923827e-05, | |
| "loss": 1.2497, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 17.48, | |
| "learning_rate": 1.5043462005794934e-05, | |
| "loss": 1.2416, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 17.53, | |
| "learning_rate": 1.4949995326666044e-05, | |
| "loss": 1.2457, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 17.57, | |
| "learning_rate": 1.4856528647537155e-05, | |
| "loss": 1.2516, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 17.62, | |
| "learning_rate": 1.4763061968408262e-05, | |
| "loss": 1.2492, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 17.67, | |
| "learning_rate": 1.4669595289279372e-05, | |
| "loss": 1.2462, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 17.71, | |
| "learning_rate": 1.4576128610150482e-05, | |
| "loss": 1.2485, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 17.76, | |
| "learning_rate": 1.4482661931021591e-05, | |
| "loss": 1.2398, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 17.81, | |
| "learning_rate": 1.4389195251892702e-05, | |
| "loss": 1.2464, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "learning_rate": 1.4295728572763812e-05, | |
| "loss": 1.2444, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "learning_rate": 1.4202261893634919e-05, | |
| "loss": 1.2464, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 17.95, | |
| "learning_rate": 1.410879521450603e-05, | |
| "loss": 1.2507, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 17.99, | |
| "learning_rate": 1.401532853537714e-05, | |
| "loss": 1.2374, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "learning_rate": 1.3921861856248247e-05, | |
| "loss": 1.2439, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 18.09, | |
| "learning_rate": 1.3828395177119357e-05, | |
| "loss": 1.2455, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 18.13, | |
| "learning_rate": 1.3734928497990468e-05, | |
| "loss": 1.2368, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 18.18, | |
| "learning_rate": 1.3641461818861575e-05, | |
| "loss": 1.2434, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 18.23, | |
| "learning_rate": 1.3547995139732685e-05, | |
| "loss": 1.2292, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 18.27, | |
| "learning_rate": 1.3454528460603796e-05, | |
| "loss": 1.2316, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 18.32, | |
| "learning_rate": 1.3361061781474904e-05, | |
| "loss": 1.2312, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 18.37, | |
| "learning_rate": 1.3267595102346015e-05, | |
| "loss": 1.229, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 18.41, | |
| "learning_rate": 1.3174128423217125e-05, | |
| "loss": 1.2375, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 18.46, | |
| "learning_rate": 1.3080661744088232e-05, | |
| "loss": 1.2346, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 18.51, | |
| "learning_rate": 1.2987195064959343e-05, | |
| "loss": 1.2334, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 18.55, | |
| "learning_rate": 1.2893728385830453e-05, | |
| "loss": 1.2317, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 18.6, | |
| "learning_rate": 1.280026170670156e-05, | |
| "loss": 1.2342, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 18.65, | |
| "learning_rate": 1.270679502757267e-05, | |
| "loss": 1.2327, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 18.69, | |
| "learning_rate": 1.261332834844378e-05, | |
| "loss": 1.2303, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 18.74, | |
| "learning_rate": 1.251986166931489e-05, | |
| "loss": 1.2406, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 18.79, | |
| "learning_rate": 1.2426394990186e-05, | |
| "loss": 1.2306, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 18.83, | |
| "learning_rate": 1.2332928311057109e-05, | |
| "loss": 1.2289, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 18.88, | |
| "learning_rate": 1.2239461631928219e-05, | |
| "loss": 1.231, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 18.93, | |
| "learning_rate": 1.2145994952799328e-05, | |
| "loss": 1.227, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 18.97, | |
| "learning_rate": 1.2052528273670437e-05, | |
| "loss": 1.2329, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "learning_rate": 1.1959061594541547e-05, | |
| "loss": 1.2278, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 19.07, | |
| "learning_rate": 1.1865594915412656e-05, | |
| "loss": 1.2342, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 19.11, | |
| "learning_rate": 1.1772128236283764e-05, | |
| "loss": 1.2174, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "learning_rate": 1.1678661557154875e-05, | |
| "loss": 1.2299, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 19.21, | |
| "learning_rate": 1.1585194878025985e-05, | |
| "loss": 1.2276, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 19.25, | |
| "learning_rate": 1.1491728198897094e-05, | |
| "loss": 1.2266, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 19.3, | |
| "learning_rate": 1.1398261519768204e-05, | |
| "loss": 1.2229, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "learning_rate": 1.1304794840639313e-05, | |
| "loss": 1.2258, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 19.39, | |
| "learning_rate": 1.1211328161510422e-05, | |
| "loss": 1.2275, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "learning_rate": 1.1117861482381532e-05, | |
| "loss": 1.2148, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 19.49, | |
| "learning_rate": 1.1024394803252641e-05, | |
| "loss": 1.2229, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "learning_rate": 1.093092812412375e-05, | |
| "loss": 1.2218, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 19.58, | |
| "learning_rate": 1.083746144499486e-05, | |
| "loss": 1.2114, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 19.63, | |
| "learning_rate": 1.0743994765865969e-05, | |
| "loss": 1.2243, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 19.67, | |
| "learning_rate": 1.0650528086737079e-05, | |
| "loss": 1.2211, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "learning_rate": 1.0557061407608188e-05, | |
| "loss": 1.2223, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 19.77, | |
| "learning_rate": 1.0463594728479298e-05, | |
| "loss": 1.2263, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "learning_rate": 1.0370128049350407e-05, | |
| "loss": 1.2184, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 19.86, | |
| "learning_rate": 1.0276661370221516e-05, | |
| "loss": 1.2193, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "learning_rate": 1.0183194691092626e-05, | |
| "loss": 1.2147, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 19.96, | |
| "learning_rate": 1.0089728011963735e-05, | |
| "loss": 1.2179, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 9.996261332834844e-06, | |
| "loss": 1.2093, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 20.05, | |
| "learning_rate": 9.902794653705954e-06, | |
| "loss": 1.2069, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 20.1, | |
| "learning_rate": 9.809327974577064e-06, | |
| "loss": 1.2147, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 20.14, | |
| "learning_rate": 9.715861295448173e-06, | |
| "loss": 1.2125, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 20.19, | |
| "learning_rate": 9.622394616319283e-06, | |
| "loss": 1.2221, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 20.24, | |
| "learning_rate": 9.528927937190392e-06, | |
| "loss": 1.2056, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 20.28, | |
| "learning_rate": 9.435461258061501e-06, | |
| "loss": 1.211, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 20.33, | |
| "learning_rate": 9.341994578932611e-06, | |
| "loss": 1.2146, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 20.38, | |
| "learning_rate": 9.24852789980372e-06, | |
| "loss": 1.2152, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 20.42, | |
| "learning_rate": 9.155061220674829e-06, | |
| "loss": 1.2146, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 20.47, | |
| "learning_rate": 9.06159454154594e-06, | |
| "loss": 1.2256, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 20.52, | |
| "learning_rate": 8.968127862417048e-06, | |
| "loss": 1.2058, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 20.56, | |
| "learning_rate": 8.874661183288158e-06, | |
| "loss": 1.2128, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 20.61, | |
| "learning_rate": 8.781194504159269e-06, | |
| "loss": 1.2137, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 20.66, | |
| "learning_rate": 8.687727825030377e-06, | |
| "loss": 1.2129, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 20.7, | |
| "learning_rate": 8.594261145901486e-06, | |
| "loss": 1.2181, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 20.75, | |
| "learning_rate": 8.500794466772597e-06, | |
| "loss": 1.201, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "learning_rate": 8.407327787643705e-06, | |
| "loss": 1.2162, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 20.84, | |
| "learning_rate": 8.313861108514814e-06, | |
| "loss": 1.2077, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 20.89, | |
| "learning_rate": 8.220394429385924e-06, | |
| "loss": 1.2098, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 20.94, | |
| "learning_rate": 8.126927750257033e-06, | |
| "loss": 1.2092, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "learning_rate": 8.033461071128144e-06, | |
| "loss": 1.2132, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 21.03, | |
| "learning_rate": 7.939994391999252e-06, | |
| "loss": 1.2166, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 21.08, | |
| "learning_rate": 7.846527712870363e-06, | |
| "loss": 1.2063, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "learning_rate": 7.753061033741471e-06, | |
| "loss": 1.2029, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 21.17, | |
| "learning_rate": 7.659594354612582e-06, | |
| "loss": 1.201, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 21.22, | |
| "learning_rate": 7.5661276754836905e-06, | |
| "loss": 1.2006, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 21.26, | |
| "learning_rate": 7.472660996354799e-06, | |
| "loss": 1.2028, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 21.31, | |
| "learning_rate": 7.37919431722591e-06, | |
| "loss": 1.1959, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 21.36, | |
| "learning_rate": 7.285727638097019e-06, | |
| "loss": 1.2126, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 21.4, | |
| "learning_rate": 7.192260958968128e-06, | |
| "loss": 1.208, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 21.45, | |
| "learning_rate": 7.098794279839238e-06, | |
| "loss": 1.2026, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 21.5, | |
| "learning_rate": 7.005327600710347e-06, | |
| "loss": 1.2069, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 21.54, | |
| "learning_rate": 6.911860921581456e-06, | |
| "loss": 1.2042, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 21.59, | |
| "learning_rate": 6.818394242452566e-06, | |
| "loss": 1.2057, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 21.64, | |
| "learning_rate": 6.724927563323676e-06, | |
| "loss": 1.212, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 21.68, | |
| "learning_rate": 6.6314608841947845e-06, | |
| "loss": 1.2012, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 21.73, | |
| "learning_rate": 6.537994205065895e-06, | |
| "loss": 1.2066, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 21.78, | |
| "learning_rate": 6.444527525937004e-06, | |
| "loss": 1.2042, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 21.82, | |
| "learning_rate": 6.351060846808113e-06, | |
| "loss": 1.2101, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 21.87, | |
| "learning_rate": 6.257594167679224e-06, | |
| "loss": 1.1961, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 21.92, | |
| "learning_rate": 6.164127488550332e-06, | |
| "loss": 1.1994, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "learning_rate": 6.070660809421441e-06, | |
| "loss": 1.2034, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 22.01, | |
| "learning_rate": 5.977194130292551e-06, | |
| "loss": 1.1977, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 22.06, | |
| "learning_rate": 5.883727451163661e-06, | |
| "loss": 1.1961, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 22.1, | |
| "learning_rate": 5.79026077203477e-06, | |
| "loss": 1.1929, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 22.15, | |
| "learning_rate": 5.696794092905879e-06, | |
| "loss": 1.1968, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 22.2, | |
| "learning_rate": 5.603327413776989e-06, | |
| "loss": 1.1946, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 22.25, | |
| "learning_rate": 5.5098607346480976e-06, | |
| "loss": 1.1971, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 22.29, | |
| "learning_rate": 5.416394055519208e-06, | |
| "loss": 1.2018, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 22.34, | |
| "learning_rate": 5.3229273763903175e-06, | |
| "loss": 1.1986, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 22.39, | |
| "learning_rate": 5.229460697261426e-06, | |
| "loss": 1.1877, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 22.43, | |
| "learning_rate": 5.135994018132536e-06, | |
| "loss": 1.1955, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 22.48, | |
| "learning_rate": 5.042527339003645e-06, | |
| "loss": 1.1989, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 22.53, | |
| "learning_rate": 4.949060659874755e-06, | |
| "loss": 1.1888, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 22.57, | |
| "learning_rate": 4.8555939807458645e-06, | |
| "loss": 1.2003, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 22.62, | |
| "learning_rate": 4.762127301616974e-06, | |
| "loss": 1.1945, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 22.67, | |
| "learning_rate": 4.668660622488083e-06, | |
| "loss": 1.1833, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 22.71, | |
| "learning_rate": 4.575193943359192e-06, | |
| "loss": 1.1866, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 22.76, | |
| "learning_rate": 4.481727264230303e-06, | |
| "loss": 1.1924, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 22.81, | |
| "learning_rate": 4.3882605851014115e-06, | |
| "loss": 1.1924, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 22.85, | |
| "learning_rate": 4.294793905972521e-06, | |
| "loss": 1.1894, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 22.9, | |
| "learning_rate": 4.201327226843631e-06, | |
| "loss": 1.2008, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 22.95, | |
| "learning_rate": 4.107860547714739e-06, | |
| "loss": 1.1959, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "learning_rate": 4.01439386858585e-06, | |
| "loss": 1.1996, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 23.04, | |
| "learning_rate": 3.920927189456959e-06, | |
| "loss": 1.1928, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 23.09, | |
| "learning_rate": 3.827460510328068e-06, | |
| "loss": 1.195, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 23.13, | |
| "learning_rate": 3.7339938311991776e-06, | |
| "loss": 1.1873, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 23.18, | |
| "learning_rate": 3.6405271520702876e-06, | |
| "loss": 1.1977, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 23.23, | |
| "learning_rate": 3.5470604729413963e-06, | |
| "loss": 1.1909, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 23.27, | |
| "learning_rate": 3.4535937938125063e-06, | |
| "loss": 1.1881, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 23.32, | |
| "learning_rate": 3.360127114683616e-06, | |
| "loss": 1.1914, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 23.37, | |
| "learning_rate": 3.266660435554725e-06, | |
| "loss": 1.1813, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 23.41, | |
| "learning_rate": 3.1731937564258346e-06, | |
| "loss": 1.2004, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 23.46, | |
| "learning_rate": 3.0797270772969437e-06, | |
| "loss": 1.1867, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 23.51, | |
| "learning_rate": 2.9862603981680533e-06, | |
| "loss": 1.1933, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 23.55, | |
| "learning_rate": 2.892793719039163e-06, | |
| "loss": 1.1834, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 23.6, | |
| "learning_rate": 2.799327039910272e-06, | |
| "loss": 1.18, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 23.65, | |
| "learning_rate": 2.7058603607813815e-06, | |
| "loss": 1.1923, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 23.69, | |
| "learning_rate": 2.612393681652491e-06, | |
| "loss": 1.1912, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 23.74, | |
| "learning_rate": 2.5189270025236007e-06, | |
| "loss": 1.1888, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 23.79, | |
| "learning_rate": 2.42546032339471e-06, | |
| "loss": 1.1843, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 23.83, | |
| "learning_rate": 2.3319936442658194e-06, | |
| "loss": 1.1911, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 23.88, | |
| "learning_rate": 2.238526965136929e-06, | |
| "loss": 1.1943, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 23.93, | |
| "learning_rate": 2.145060286008038e-06, | |
| "loss": 1.1811, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 23.97, | |
| "learning_rate": 2.0515936068791476e-06, | |
| "loss": 1.1904, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 24.02, | |
| "learning_rate": 1.958126927750257e-06, | |
| "loss": 1.1752, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 24.07, | |
| "learning_rate": 1.8646602486213666e-06, | |
| "loss": 1.1851, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 24.11, | |
| "learning_rate": 1.771193569492476e-06, | |
| "loss": 1.1874, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 24.16, | |
| "learning_rate": 1.6777268903635857e-06, | |
| "loss": 1.1904, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 24.21, | |
| "learning_rate": 1.5842602112346948e-06, | |
| "loss": 1.188, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 24.25, | |
| "learning_rate": 1.4907935321058044e-06, | |
| "loss": 1.1808, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 24.3, | |
| "learning_rate": 1.3973268529769137e-06, | |
| "loss": 1.1855, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 24.35, | |
| "learning_rate": 1.3038601738480233e-06, | |
| "loss": 1.1856, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 24.39, | |
| "learning_rate": 1.2103934947191327e-06, | |
| "loss": 1.1943, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 24.44, | |
| "learning_rate": 1.1169268155902422e-06, | |
| "loss": 1.19, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 24.49, | |
| "learning_rate": 1.0234601364613516e-06, | |
| "loss": 1.1819, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 24.54, | |
| "learning_rate": 9.299934573324609e-07, | |
| "loss": 1.1836, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 24.58, | |
| "learning_rate": 8.365267782035705e-07, | |
| "loss": 1.182, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 24.63, | |
| "learning_rate": 7.4306009907468e-07, | |
| "loss": 1.1844, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 24.68, | |
| "learning_rate": 6.495934199457893e-07, | |
| "loss": 1.1928, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 24.72, | |
| "learning_rate": 5.561267408168989e-07, | |
| "loss": 1.1838, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 24.77, | |
| "learning_rate": 4.626600616880083e-07, | |
| "loss": 1.1849, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 24.82, | |
| "learning_rate": 3.691933825591177e-07, | |
| "loss": 1.1851, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 24.86, | |
| "learning_rate": 2.7572670343022714e-07, | |
| "loss": 1.1919, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 24.91, | |
| "learning_rate": 1.8226002430133658e-07, | |
| "loss": 1.1752, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 24.96, | |
| "learning_rate": 8.879334517244602e-08, | |
| "loss": 1.1807, | |
| "step": 267000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 267475, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 500, | |
| "total_flos": 1.1243414742110208e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |