| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 590, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 1.7628610134124756, |
| "learning_rate": 8e-05, |
| "loss": 1.8622, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 1.8386660814285278, |
| "learning_rate": 0.00018, |
| "loss": 0.9707, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 1.295538067817688, |
| "learning_rate": 0.00019862068965517243, |
| "loss": 0.5712, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 0.5233324766159058, |
| "learning_rate": 0.00019689655172413795, |
| "loss": 0.4687, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 0.7628127336502075, |
| "learning_rate": 0.00019517241379310345, |
| "loss": 0.5601, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 0.5182906985282898, |
| "learning_rate": 0.00019344827586206898, |
| "loss": 0.4466, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 0.3616418242454529, |
| "learning_rate": 0.0001917241379310345, |
| "loss": 0.5812, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 0.48968932032585144, |
| "learning_rate": 0.00019, |
| "loss": 0.3477, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 0.8994768261909485, |
| "learning_rate": 0.00018827586206896554, |
| "loss": 0.3383, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.387116402387619, |
| "learning_rate": 0.00018655172413793104, |
| "loss": 0.4003, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 0.8058916926383972, |
| "learning_rate": 0.00018482758620689654, |
| "loss": 0.3992, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.0170212765957447, |
| "grad_norm": 0.4688344895839691, |
| "learning_rate": 0.00018310344827586207, |
| "loss": 0.3998, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.102127659574468, |
| "grad_norm": 0.5960966944694519, |
| "learning_rate": 0.0001813793103448276, |
| "loss": 0.2818, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.1872340425531915, |
| "grad_norm": 0.49853241443634033, |
| "learning_rate": 0.0001796551724137931, |
| "loss": 0.2186, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.2723404255319148, |
| "grad_norm": 0.521017849445343, |
| "learning_rate": 0.00017793103448275862, |
| "loss": 0.322, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.3574468085106384, |
| "grad_norm": 0.35342177748680115, |
| "learning_rate": 0.00017620689655172415, |
| "loss": 0.1905, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.4425531914893617, |
| "grad_norm": 0.37390947341918945, |
| "learning_rate": 0.00017448275862068965, |
| "loss": 0.2582, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.527659574468085, |
| "grad_norm": 0.42308491468429565, |
| "learning_rate": 0.00017275862068965518, |
| "loss": 0.2337, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.6127659574468085, |
| "grad_norm": 0.8888659477233887, |
| "learning_rate": 0.0001710344827586207, |
| "loss": 0.2465, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.697872340425532, |
| "grad_norm": 0.40344128012657166, |
| "learning_rate": 0.0001693103448275862, |
| "loss": 0.2591, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.7829787234042553, |
| "grad_norm": 0.45671504735946655, |
| "learning_rate": 0.00016758620689655173, |
| "loss": 0.2269, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.8680851063829786, |
| "grad_norm": 0.4616524577140808, |
| "learning_rate": 0.00016586206896551726, |
| "loss": 0.2033, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.9531914893617022, |
| "grad_norm": 0.4471171200275421, |
| "learning_rate": 0.00016413793103448276, |
| "loss": 0.266, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.0340425531914894, |
| "grad_norm": 0.7043401002883911, |
| "learning_rate": 0.0001624137931034483, |
| "loss": 0.1607, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.119148936170213, |
| "grad_norm": 0.4361288845539093, |
| "learning_rate": 0.00016068965517241382, |
| "loss": 0.1493, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.204255319148936, |
| "grad_norm": 0.45557570457458496, |
| "learning_rate": 0.00015896551724137932, |
| "loss": 0.1011, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.2893617021276595, |
| "grad_norm": 0.3149248957633972, |
| "learning_rate": 0.00015724137931034485, |
| "loss": 0.1112, |
| "step": 135 |
| }, |
| { |
| "epoch": 2.374468085106383, |
| "grad_norm": 0.2783677279949188, |
| "learning_rate": 0.00015551724137931037, |
| "loss": 0.1278, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.4595744680851066, |
| "grad_norm": 0.46953991055488586, |
| "learning_rate": 0.00015379310344827587, |
| "loss": 0.1157, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.5446808510638297, |
| "grad_norm": 0.5183996558189392, |
| "learning_rate": 0.0001520689655172414, |
| "loss": 0.107, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.629787234042553, |
| "grad_norm": 0.46928611397743225, |
| "learning_rate": 0.0001503448275862069, |
| "loss": 0.12, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.7148936170212767, |
| "grad_norm": 0.4748971462249756, |
| "learning_rate": 0.00014862068965517243, |
| "loss": 0.1406, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.7183696627616882, |
| "learning_rate": 0.00014689655172413793, |
| "loss": 0.1533, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.8851063829787233, |
| "grad_norm": 0.575391948223114, |
| "learning_rate": 0.00014517241379310346, |
| "loss": 0.1365, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.970212765957447, |
| "grad_norm": 0.33236995339393616, |
| "learning_rate": 0.00014344827586206896, |
| "loss": 0.1369, |
| "step": 175 |
| }, |
| { |
| "epoch": 3.051063829787234, |
| "grad_norm": 0.17017032206058502, |
| "learning_rate": 0.0001417241379310345, |
| "loss": 0.093, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.1361702127659576, |
| "grad_norm": 0.8167087435722351, |
| "learning_rate": 0.00014, |
| "loss": 0.1297, |
| "step": 185 |
| }, |
| { |
| "epoch": 3.2212765957446807, |
| "grad_norm": 0.4695407748222351, |
| "learning_rate": 0.00013827586206896552, |
| "loss": 0.0794, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.3063829787234043, |
| "grad_norm": 0.46323472261428833, |
| "learning_rate": 0.00013655172413793104, |
| "loss": 0.0909, |
| "step": 195 |
| }, |
| { |
| "epoch": 3.391489361702128, |
| "grad_norm": 0.4290667772293091, |
| "learning_rate": 0.00013482758620689654, |
| "loss": 0.0728, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.476595744680851, |
| "grad_norm": 0.3602962791919708, |
| "learning_rate": 0.00013310344827586207, |
| "loss": 0.0713, |
| "step": 205 |
| }, |
| { |
| "epoch": 3.5617021276595744, |
| "grad_norm": 0.3129134476184845, |
| "learning_rate": 0.0001313793103448276, |
| "loss": 0.0878, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.646808510638298, |
| "grad_norm": 0.4205905795097351, |
| "learning_rate": 0.0001296551724137931, |
| "loss": 0.0815, |
| "step": 215 |
| }, |
| { |
| "epoch": 3.731914893617021, |
| "grad_norm": 0.47979313135147095, |
| "learning_rate": 0.00012793103448275863, |
| "loss": 0.0696, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.8170212765957445, |
| "grad_norm": 0.4063044786453247, |
| "learning_rate": 0.00012620689655172415, |
| "loss": 0.0661, |
| "step": 225 |
| }, |
| { |
| "epoch": 3.902127659574468, |
| "grad_norm": 0.363741934299469, |
| "learning_rate": 0.00012448275862068966, |
| "loss": 0.0972, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.9872340425531916, |
| "grad_norm": 0.2973483204841614, |
| "learning_rate": 0.00012275862068965518, |
| "loss": 0.0807, |
| "step": 235 |
| }, |
| { |
| "epoch": 4.068085106382979, |
| "grad_norm": 0.481488436460495, |
| "learning_rate": 0.00012103448275862071, |
| "loss": 0.0678, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.153191489361702, |
| "grad_norm": 0.18169716000556946, |
| "learning_rate": 0.00011931034482758621, |
| "loss": 0.0555, |
| "step": 245 |
| }, |
| { |
| "epoch": 4.238297872340426, |
| "grad_norm": 0.3210119307041168, |
| "learning_rate": 0.00011758620689655173, |
| "loss": 0.0506, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.323404255319149, |
| "grad_norm": 0.3670145273208618, |
| "learning_rate": 0.00011586206896551725, |
| "loss": 0.0685, |
| "step": 255 |
| }, |
| { |
| "epoch": 4.408510638297872, |
| "grad_norm": 0.2257293462753296, |
| "learning_rate": 0.00011413793103448275, |
| "loss": 0.0592, |
| "step": 260 |
| }, |
| { |
| "epoch": 4.493617021276596, |
| "grad_norm": 0.16985096037387848, |
| "learning_rate": 0.00011241379310344828, |
| "loss": 0.0745, |
| "step": 265 |
| }, |
| { |
| "epoch": 4.578723404255319, |
| "grad_norm": 0.31408607959747314, |
| "learning_rate": 0.00011068965517241381, |
| "loss": 0.0866, |
| "step": 270 |
| }, |
| { |
| "epoch": 4.663829787234042, |
| "grad_norm": 0.2598167657852173, |
| "learning_rate": 0.00010896551724137931, |
| "loss": 0.0494, |
| "step": 275 |
| }, |
| { |
| "epoch": 4.748936170212766, |
| "grad_norm": 0.4023381471633911, |
| "learning_rate": 0.00010724137931034484, |
| "loss": 0.0632, |
| "step": 280 |
| }, |
| { |
| "epoch": 4.834042553191489, |
| "grad_norm": 0.24716579914093018, |
| "learning_rate": 0.00010551724137931037, |
| "loss": 0.0499, |
| "step": 285 |
| }, |
| { |
| "epoch": 4.919148936170213, |
| "grad_norm": 0.39397132396698, |
| "learning_rate": 0.00010379310344827587, |
| "loss": 0.0575, |
| "step": 290 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.09666766971349716, |
| "learning_rate": 0.0001020689655172414, |
| "loss": 0.0609, |
| "step": 295 |
| }, |
| { |
| "epoch": 5.085106382978723, |
| "grad_norm": 0.1626937985420227, |
| "learning_rate": 0.0001003448275862069, |
| "loss": 0.0455, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.170212765957447, |
| "grad_norm": 0.2916521728038788, |
| "learning_rate": 9.862068965517242e-05, |
| "loss": 0.0437, |
| "step": 305 |
| }, |
| { |
| "epoch": 5.25531914893617, |
| "grad_norm": 0.3213741183280945, |
| "learning_rate": 9.689655172413794e-05, |
| "loss": 0.0509, |
| "step": 310 |
| }, |
| { |
| "epoch": 5.340425531914893, |
| "grad_norm": 0.25438761711120605, |
| "learning_rate": 9.517241379310345e-05, |
| "loss": 0.0483, |
| "step": 315 |
| }, |
| { |
| "epoch": 5.425531914893617, |
| "grad_norm": 0.35173889994621277, |
| "learning_rate": 9.344827586206896e-05, |
| "loss": 0.0394, |
| "step": 320 |
| }, |
| { |
| "epoch": 5.51063829787234, |
| "grad_norm": 0.2214491218328476, |
| "learning_rate": 9.172413793103448e-05, |
| "loss": 0.0615, |
| "step": 325 |
| }, |
| { |
| "epoch": 5.595744680851064, |
| "grad_norm": 0.1461336314678192, |
| "learning_rate": 9e-05, |
| "loss": 0.0552, |
| "step": 330 |
| }, |
| { |
| "epoch": 5.680851063829787, |
| "grad_norm": 0.2651642858982086, |
| "learning_rate": 8.827586206896552e-05, |
| "loss": 0.0417, |
| "step": 335 |
| }, |
| { |
| "epoch": 5.76595744680851, |
| "grad_norm": 0.3055964708328247, |
| "learning_rate": 8.655172413793103e-05, |
| "loss": 0.0502, |
| "step": 340 |
| }, |
| { |
| "epoch": 5.851063829787234, |
| "grad_norm": 0.19299010932445526, |
| "learning_rate": 8.482758620689656e-05, |
| "loss": 0.044, |
| "step": 345 |
| }, |
| { |
| "epoch": 5.9361702127659575, |
| "grad_norm": 0.120607890188694, |
| "learning_rate": 8.310344827586208e-05, |
| "loss": 0.0407, |
| "step": 350 |
| }, |
| { |
| "epoch": 6.017021276595744, |
| "grad_norm": 0.13520629703998566, |
| "learning_rate": 8.137931034482759e-05, |
| "loss": 0.0512, |
| "step": 355 |
| }, |
| { |
| "epoch": 6.102127659574468, |
| "grad_norm": 0.20564086735248566, |
| "learning_rate": 7.965517241379312e-05, |
| "loss": 0.0416, |
| "step": 360 |
| }, |
| { |
| "epoch": 6.187234042553191, |
| "grad_norm": 0.115450419485569, |
| "learning_rate": 7.793103448275862e-05, |
| "loss": 0.0424, |
| "step": 365 |
| }, |
| { |
| "epoch": 6.272340425531915, |
| "grad_norm": 0.1926778256893158, |
| "learning_rate": 7.620689655172413e-05, |
| "loss": 0.0378, |
| "step": 370 |
| }, |
| { |
| "epoch": 6.357446808510638, |
| "grad_norm": 0.25700807571411133, |
| "learning_rate": 7.448275862068966e-05, |
| "loss": 0.0453, |
| "step": 375 |
| }, |
| { |
| "epoch": 6.4425531914893615, |
| "grad_norm": 0.10704706609249115, |
| "learning_rate": 7.275862068965517e-05, |
| "loss": 0.0323, |
| "step": 380 |
| }, |
| { |
| "epoch": 6.527659574468085, |
| "grad_norm": 0.18126443028450012, |
| "learning_rate": 7.103448275862069e-05, |
| "loss": 0.0393, |
| "step": 385 |
| }, |
| { |
| "epoch": 6.6127659574468085, |
| "grad_norm": 0.13170169293880463, |
| "learning_rate": 6.931034482758622e-05, |
| "loss": 0.0458, |
| "step": 390 |
| }, |
| { |
| "epoch": 6.697872340425532, |
| "grad_norm": 0.10165558010339737, |
| "learning_rate": 6.758620689655173e-05, |
| "loss": 0.0359, |
| "step": 395 |
| }, |
| { |
| "epoch": 6.782978723404256, |
| "grad_norm": 0.20285525918006897, |
| "learning_rate": 6.586206896551724e-05, |
| "loss": 0.0411, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.868085106382979, |
| "grad_norm": 0.17869718372821808, |
| "learning_rate": 6.413793103448276e-05, |
| "loss": 0.0454, |
| "step": 405 |
| }, |
| { |
| "epoch": 6.953191489361702, |
| "grad_norm": 0.1431434452533722, |
| "learning_rate": 6.241379310344829e-05, |
| "loss": 0.041, |
| "step": 410 |
| }, |
| { |
| "epoch": 7.034042553191489, |
| "grad_norm": 0.1481233537197113, |
| "learning_rate": 6.068965517241379e-05, |
| "loss": 0.0505, |
| "step": 415 |
| }, |
| { |
| "epoch": 7.1191489361702125, |
| "grad_norm": 0.10545721650123596, |
| "learning_rate": 5.896551724137931e-05, |
| "loss": 0.0361, |
| "step": 420 |
| }, |
| { |
| "epoch": 7.2042553191489365, |
| "grad_norm": 0.13733087480068207, |
| "learning_rate": 5.7241379310344835e-05, |
| "loss": 0.0361, |
| "step": 425 |
| }, |
| { |
| "epoch": 7.2893617021276595, |
| "grad_norm": 0.09701387584209442, |
| "learning_rate": 5.551724137931035e-05, |
| "loss": 0.0363, |
| "step": 430 |
| }, |
| { |
| "epoch": 7.374468085106383, |
| "grad_norm": 0.12478837370872498, |
| "learning_rate": 5.379310344827586e-05, |
| "loss": 0.0385, |
| "step": 435 |
| }, |
| { |
| "epoch": 7.459574468085107, |
| "grad_norm": 0.18108202517032623, |
| "learning_rate": 5.2068965517241384e-05, |
| "loss": 0.0307, |
| "step": 440 |
| }, |
| { |
| "epoch": 7.54468085106383, |
| "grad_norm": 0.1741178184747696, |
| "learning_rate": 5.03448275862069e-05, |
| "loss": 0.0349, |
| "step": 445 |
| }, |
| { |
| "epoch": 7.629787234042553, |
| "grad_norm": 0.18830710649490356, |
| "learning_rate": 4.862068965517241e-05, |
| "loss": 0.035, |
| "step": 450 |
| }, |
| { |
| "epoch": 7.714893617021277, |
| "grad_norm": 0.29267263412475586, |
| "learning_rate": 4.689655172413793e-05, |
| "loss": 0.0396, |
| "step": 455 |
| }, |
| { |
| "epoch": 7.8, |
| "grad_norm": 0.1248115673661232, |
| "learning_rate": 4.5172413793103454e-05, |
| "loss": 0.0417, |
| "step": 460 |
| }, |
| { |
| "epoch": 7.885106382978723, |
| "grad_norm": 0.12826257944107056, |
| "learning_rate": 4.344827586206897e-05, |
| "loss": 0.0352, |
| "step": 465 |
| }, |
| { |
| "epoch": 7.970212765957447, |
| "grad_norm": 0.10185350477695465, |
| "learning_rate": 4.172413793103448e-05, |
| "loss": 0.0383, |
| "step": 470 |
| }, |
| { |
| "epoch": 8.051063829787234, |
| "grad_norm": 0.11523312330245972, |
| "learning_rate": 4e-05, |
| "loss": 0.0372, |
| "step": 475 |
| }, |
| { |
| "epoch": 8.136170212765958, |
| "grad_norm": 0.07264875620603561, |
| "learning_rate": 3.827586206896552e-05, |
| "loss": 0.0291, |
| "step": 480 |
| }, |
| { |
| "epoch": 8.221276595744682, |
| "grad_norm": 0.1880428045988083, |
| "learning_rate": 3.655172413793104e-05, |
| "loss": 0.029, |
| "step": 485 |
| }, |
| { |
| "epoch": 8.306382978723404, |
| "grad_norm": 0.19860832393169403, |
| "learning_rate": 3.482758620689655e-05, |
| "loss": 0.0298, |
| "step": 490 |
| }, |
| { |
| "epoch": 8.391489361702128, |
| "grad_norm": 0.1690095216035843, |
| "learning_rate": 3.310344827586207e-05, |
| "loss": 0.0371, |
| "step": 495 |
| }, |
| { |
| "epoch": 8.476595744680852, |
| "grad_norm": 0.171220064163208, |
| "learning_rate": 3.137931034482759e-05, |
| "loss": 0.0294, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.561702127659574, |
| "grad_norm": 0.12849068641662598, |
| "learning_rate": 2.96551724137931e-05, |
| "loss": 0.0339, |
| "step": 505 |
| }, |
| { |
| "epoch": 8.646808510638298, |
| "grad_norm": 0.1075233593583107, |
| "learning_rate": 2.7931034482758622e-05, |
| "loss": 0.0346, |
| "step": 510 |
| }, |
| { |
| "epoch": 8.731914893617022, |
| "grad_norm": 0.14006929099559784, |
| "learning_rate": 2.620689655172414e-05, |
| "loss": 0.03, |
| "step": 515 |
| }, |
| { |
| "epoch": 8.817021276595744, |
| "grad_norm": 0.1683836579322815, |
| "learning_rate": 2.4482758620689654e-05, |
| "loss": 0.0321, |
| "step": 520 |
| }, |
| { |
| "epoch": 8.902127659574468, |
| "grad_norm": 0.12975075840950012, |
| "learning_rate": 2.2758620689655175e-05, |
| "loss": 0.0384, |
| "step": 525 |
| }, |
| { |
| "epoch": 8.987234042553192, |
| "grad_norm": 0.1698654145002365, |
| "learning_rate": 2.1034482758620692e-05, |
| "loss": 0.037, |
| "step": 530 |
| }, |
| { |
| "epoch": 9.068085106382979, |
| "grad_norm": 0.1573844850063324, |
| "learning_rate": 1.9310344827586207e-05, |
| "loss": 0.0328, |
| "step": 535 |
| }, |
| { |
| "epoch": 9.153191489361703, |
| "grad_norm": 0.14986495673656464, |
| "learning_rate": 1.7586206896551724e-05, |
| "loss": 0.0313, |
| "step": 540 |
| }, |
| { |
| "epoch": 9.238297872340425, |
| "grad_norm": 0.13991227746009827, |
| "learning_rate": 1.586206896551724e-05, |
| "loss": 0.0283, |
| "step": 545 |
| }, |
| { |
| "epoch": 9.323404255319149, |
| "grad_norm": 0.12908954918384552, |
| "learning_rate": 1.4137931034482759e-05, |
| "loss": 0.0283, |
| "step": 550 |
| }, |
| { |
| "epoch": 9.408510638297873, |
| "grad_norm": 0.1915467232465744, |
| "learning_rate": 1.2413793103448277e-05, |
| "loss": 0.0233, |
| "step": 555 |
| }, |
| { |
| "epoch": 9.493617021276595, |
| "grad_norm": 0.16341650485992432, |
| "learning_rate": 1.0689655172413794e-05, |
| "loss": 0.0265, |
| "step": 560 |
| }, |
| { |
| "epoch": 9.578723404255319, |
| "grad_norm": 0.1922847330570221, |
| "learning_rate": 8.96551724137931e-06, |
| "loss": 0.0314, |
| "step": 565 |
| }, |
| { |
| "epoch": 9.663829787234043, |
| "grad_norm": 0.2411348968744278, |
| "learning_rate": 7.241379310344828e-06, |
| "loss": 0.0334, |
| "step": 570 |
| }, |
| { |
| "epoch": 9.748936170212765, |
| "grad_norm": 0.20369961857795715, |
| "learning_rate": 5.517241379310345e-06, |
| "loss": 0.0336, |
| "step": 575 |
| }, |
| { |
| "epoch": 9.83404255319149, |
| "grad_norm": 0.19569233059883118, |
| "learning_rate": 3.793103448275862e-06, |
| "loss": 0.0286, |
| "step": 580 |
| }, |
| { |
| "epoch": 9.919148936170213, |
| "grad_norm": 0.19629313051700592, |
| "learning_rate": 2.0689655172413796e-06, |
| "loss": 0.029, |
| "step": 585 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.18082977831363678, |
| "learning_rate": 3.4482758620689656e-07, |
| "loss": 0.0283, |
| "step": 590 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 590, |
| "total_flos": 3.2063167604911104e+16, |
| "train_loss": 0.1322159972483829, |
| "train_runtime": 1074.5919, |
| "train_samples_per_second": 4.374, |
| "train_steps_per_second": 0.549 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 590, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.2063167604911104e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|