| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9995203836930455, | |
| "eval_steps": 500, | |
| "global_step": 1042, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0009592326139088729, | |
| "grad_norm": 3.8003539568068034, | |
| "learning_rate": 1.9047619047619051e-06, | |
| "loss": 1.3148, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004796163069544364, | |
| "grad_norm": 1.3427176668530907, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 1.3465, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.009592326139088728, | |
| "grad_norm": 0.6129061923124927, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 1.3062, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014388489208633094, | |
| "grad_norm": 0.5006461190956731, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 1.258, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.019184652278177457, | |
| "grad_norm": 0.3919323663241635, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 1.2099, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.023980815347721823, | |
| "grad_norm": 0.33016517225439684, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 1.1774, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02877697841726619, | |
| "grad_norm": 0.24241904389090518, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 1.1464, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03357314148681055, | |
| "grad_norm": 0.21825210432654424, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 1.1551, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.03836930455635491, | |
| "grad_norm": 0.2357580892216174, | |
| "learning_rate": 7.619047619047618e-05, | |
| "loss": 1.1329, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04316546762589928, | |
| "grad_norm": 0.20958683389560634, | |
| "learning_rate": 8.571428571428571e-05, | |
| "loss": 1.1255, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.047961630695443645, | |
| "grad_norm": 0.2313801330004238, | |
| "learning_rate": 9.523809523809524e-05, | |
| "loss": 1.126, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05275779376498801, | |
| "grad_norm": 0.2081771832083775, | |
| "learning_rate": 0.00010476190476190477, | |
| "loss": 1.0935, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.05755395683453238, | |
| "grad_norm": 0.2039705643041609, | |
| "learning_rate": 0.00011428571428571428, | |
| "loss": 1.1394, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06235011990407674, | |
| "grad_norm": 0.2148039800819315, | |
| "learning_rate": 0.0001238095238095238, | |
| "loss": 1.128, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0671462829736211, | |
| "grad_norm": 0.18570989413688033, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 1.1474, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07194244604316546, | |
| "grad_norm": 0.18151401221015465, | |
| "learning_rate": 0.00014285714285714287, | |
| "loss": 1.1297, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07673860911270983, | |
| "grad_norm": 0.18983827314633334, | |
| "learning_rate": 0.00015238095238095237, | |
| "loss": 1.0947, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0815347721822542, | |
| "grad_norm": 0.18321896415592545, | |
| "learning_rate": 0.00016190476190476192, | |
| "loss": 1.0993, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08633093525179857, | |
| "grad_norm": 0.18106666815994735, | |
| "learning_rate": 0.00017142857142857143, | |
| "loss": 1.1169, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09112709832134293, | |
| "grad_norm": 0.17126918097791768, | |
| "learning_rate": 0.00018095238095238095, | |
| "loss": 1.1336, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09592326139088729, | |
| "grad_norm": 0.1787566440192351, | |
| "learning_rate": 0.00019047619047619048, | |
| "loss": 1.1086, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10071942446043165, | |
| "grad_norm": 0.18251300143969484, | |
| "learning_rate": 0.0002, | |
| "loss": 1.0947, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.10551558752997602, | |
| "grad_norm": 0.19966595269166043, | |
| "learning_rate": 0.00019998594857383755, | |
| "loss": 1.1381, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11031175059952038, | |
| "grad_norm": 0.18103343210950873, | |
| "learning_rate": 0.0001999437982442017, | |
| "loss": 1.1246, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11510791366906475, | |
| "grad_norm": 0.17391603743469916, | |
| "learning_rate": 0.00019987356085653736, | |
| "loss": 1.1109, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11990407673860912, | |
| "grad_norm": 0.18378902142023112, | |
| "learning_rate": 0.00019977525614955387, | |
| "loss": 1.0974, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.12470023980815348, | |
| "grad_norm": 0.18844867723630976, | |
| "learning_rate": 0.00019964891174967784, | |
| "loss": 1.0954, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12949640287769784, | |
| "grad_norm": 0.17258678109282075, | |
| "learning_rate": 0.0001994945631632894, | |
| "loss": 1.1045, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.1342925659472422, | |
| "grad_norm": 0.19501601610290073, | |
| "learning_rate": 0.00019931225376674388, | |
| "loss": 1.1151, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13908872901678657, | |
| "grad_norm": 0.16557441103633752, | |
| "learning_rate": 0.00019910203479418172, | |
| "loss": 1.0931, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.14388489208633093, | |
| "grad_norm": 0.1808422208750127, | |
| "learning_rate": 0.00019886396532313032, | |
| "loss": 1.1439, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1486810551558753, | |
| "grad_norm": 0.17153212310391777, | |
| "learning_rate": 0.00019859811225790162, | |
| "loss": 1.1238, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.15347721822541965, | |
| "grad_norm": 0.1725494178552086, | |
| "learning_rate": 0.00019830455031078992, | |
| "loss": 1.0819, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.15827338129496402, | |
| "grad_norm": 0.17821716654615946, | |
| "learning_rate": 0.00019798336198107567, | |
| "loss": 1.106, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1630695443645084, | |
| "grad_norm": 0.17454115891711316, | |
| "learning_rate": 0.0001976346375318409, | |
| "loss": 1.1176, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.16786570743405277, | |
| "grad_norm": 0.17165723260861443, | |
| "learning_rate": 0.00019725847496460257, | |
| "loss": 1.104, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.17266187050359713, | |
| "grad_norm": 0.16959903827879758, | |
| "learning_rate": 0.00019685497999177146, | |
| "loss": 1.1032, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1774580335731415, | |
| "grad_norm": 0.16370292319935667, | |
| "learning_rate": 0.00019642426600694396, | |
| "loss": 1.1058, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.18225419664268586, | |
| "grad_norm": 0.17362596041949555, | |
| "learning_rate": 0.00019596645405303508, | |
| "loss": 1.0897, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.18705035971223022, | |
| "grad_norm": 0.1705391917273308, | |
| "learning_rate": 0.00019548167278826223, | |
| "loss": 1.0793, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.19184652278177458, | |
| "grad_norm": 0.16987370700655263, | |
| "learning_rate": 0.00019497005844998835, | |
| "loss": 1.0825, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.19664268585131894, | |
| "grad_norm": 0.1741514081542761, | |
| "learning_rate": 0.00019443175481643533, | |
| "loss": 1.0971, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.2014388489208633, | |
| "grad_norm": 0.1697720073525996, | |
| "learning_rate": 0.00019386691316627846, | |
| "loss": 1.1004, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.20623501199040767, | |
| "grad_norm": 0.16941098489834122, | |
| "learning_rate": 0.00019327569223613248, | |
| "loss": 1.1078, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.21103117505995203, | |
| "grad_norm": 0.16231625897366397, | |
| "learning_rate": 0.0001926582581759423, | |
| "loss": 1.0941, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2158273381294964, | |
| "grad_norm": 0.16520366738739287, | |
| "learning_rate": 0.00019201478450229012, | |
| "loss": 1.0868, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.22062350119904076, | |
| "grad_norm": 0.16835416401080175, | |
| "learning_rate": 0.00019134545204963212, | |
| "loss": 1.1037, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.22541966426858512, | |
| "grad_norm": 0.16657613469605134, | |
| "learning_rate": 0.0001906504489194791, | |
| "loss": 1.093, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2302158273381295, | |
| "grad_norm": 0.1746269506944523, | |
| "learning_rate": 0.00018992997042753434, | |
| "loss": 1.1124, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.23501199040767387, | |
| "grad_norm": 0.16885168676581888, | |
| "learning_rate": 0.0001891842190488045, | |
| "loss": 1.0972, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.23980815347721823, | |
| "grad_norm": 0.16263325756495298, | |
| "learning_rate": 0.00018841340436069826, | |
| "loss": 1.0725, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2446043165467626, | |
| "grad_norm": 0.16758205657577824, | |
| "learning_rate": 0.00018761774298412903, | |
| "loss": 1.1012, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.24940047961630696, | |
| "grad_norm": 0.1592358008735748, | |
| "learning_rate": 0.00018679745852263858, | |
| "loss": 1.1163, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2541966426858513, | |
| "grad_norm": 0.18307373045740924, | |
| "learning_rate": 0.0001859527814995577, | |
| "loss": 1.0883, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2589928057553957, | |
| "grad_norm": 0.1603879751504812, | |
| "learning_rate": 0.00018508394929322286, | |
| "loss": 1.0837, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2637889688249401, | |
| "grad_norm": 0.16169604116219924, | |
| "learning_rate": 0.0001841912060702659, | |
| "loss": 1.1059, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.2685851318944844, | |
| "grad_norm": 0.21284544973156141, | |
| "learning_rate": 0.00018327480271699645, | |
| "loss": 1.1197, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2733812949640288, | |
| "grad_norm": 0.16556421777414207, | |
| "learning_rate": 0.00018233499676889556, | |
| "loss": 1.0857, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.27817745803357313, | |
| "grad_norm": 0.17311293794732555, | |
| "learning_rate": 0.00018137205233824098, | |
| "loss": 1.1215, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2829736211031175, | |
| "grad_norm": 0.16045436773084543, | |
| "learning_rate": 0.00018038624003988404, | |
| "loss": 1.1102, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.28776978417266186, | |
| "grad_norm": 0.16385963917512852, | |
| "learning_rate": 0.0001793778369151991, | |
| "loss": 1.1237, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.29256594724220625, | |
| "grad_norm": 0.16336813988660637, | |
| "learning_rate": 0.00017834712635422716, | |
| "loss": 1.07, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.2973621103117506, | |
| "grad_norm": 0.17112565194779492, | |
| "learning_rate": 0.0001772943980160351, | |
| "loss": 1.1009, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.302158273381295, | |
| "grad_norm": 0.16697464006830123, | |
| "learning_rate": 0.0001762199477473131, | |
| "loss": 1.0972, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3069544364508393, | |
| "grad_norm": 0.16453710745761058, | |
| "learning_rate": 0.0001751240774992336, | |
| "loss": 1.074, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3117505995203837, | |
| "grad_norm": 0.17019017682628554, | |
| "learning_rate": 0.000174007095242594, | |
| "loss": 1.0605, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.31654676258992803, | |
| "grad_norm": 0.17847410199181396, | |
| "learning_rate": 0.00017286931488126839, | |
| "loss": 1.1109, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3213429256594724, | |
| "grad_norm": 0.1628727862316133, | |
| "learning_rate": 0.00017171105616399152, | |
| "loss": 1.1024, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3261390887290168, | |
| "grad_norm": 0.16567336419547818, | |
| "learning_rate": 0.0001705326445945002, | |
| "loss": 1.072, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.33093525179856115, | |
| "grad_norm": 0.16722439948774648, | |
| "learning_rate": 0.0001693344113400577, | |
| "loss": 1.0873, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.33573141486810554, | |
| "grad_norm": 0.16122286760160678, | |
| "learning_rate": 0.0001681166931383859, | |
| "loss": 1.0938, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3405275779376499, | |
| "grad_norm": 0.15969633584876602, | |
| "learning_rate": 0.00016687983220303282, | |
| "loss": 1.107, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.34532374100719426, | |
| "grad_norm": 0.20032537034543332, | |
| "learning_rate": 0.00016562417612720054, | |
| "loss": 1.0925, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3501199040767386, | |
| "grad_norm": 0.1636161155593432, | |
| "learning_rate": 0.00016435007778606178, | |
| "loss": 1.0925, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.354916067146283, | |
| "grad_norm": 0.17509644211474343, | |
| "learning_rate": 0.00016305789523759186, | |
| "loss": 1.0853, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3597122302158273, | |
| "grad_norm": 0.15845793640793487, | |
| "learning_rate": 0.00016174799162194407, | |
| "loss": 1.074, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.3645083932853717, | |
| "grad_norm": 0.1643115007590844, | |
| "learning_rate": 0.00016042073505939718, | |
| "loss": 1.0904, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.36930455635491605, | |
| "grad_norm": 0.159834353020724, | |
| "learning_rate": 0.0001590764985469029, | |
| "loss": 1.0913, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.37410071942446044, | |
| "grad_norm": 0.15671259478172928, | |
| "learning_rate": 0.00015771565985326323, | |
| "loss": 1.0949, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.37889688249400477, | |
| "grad_norm": 0.15892776737127523, | |
| "learning_rate": 0.0001563386014129667, | |
| "loss": 1.0882, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.38369304556354916, | |
| "grad_norm": 0.1554951930431914, | |
| "learning_rate": 0.00015494571021871308, | |
| "loss": 1.0849, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.38848920863309355, | |
| "grad_norm": 0.15596140521064988, | |
| "learning_rate": 0.00015353737771265787, | |
| "loss": 1.0892, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.3932853717026379, | |
| "grad_norm": 0.16030122057012702, | |
| "learning_rate": 0.00015211399967640537, | |
| "loss": 1.073, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3980815347721823, | |
| "grad_norm": 0.15896611447165424, | |
| "learning_rate": 0.00015067597611978327, | |
| "loss": 1.1113, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.4028776978417266, | |
| "grad_norm": 0.1532650010931279, | |
| "learning_rate": 0.000149223711168428, | |
| "loss": 1.0881, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.407673860911271, | |
| "grad_norm": 0.16086089606757772, | |
| "learning_rate": 0.00014775761295021417, | |
| "loss": 1.0887, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.41247002398081534, | |
| "grad_norm": 0.15347896448970985, | |
| "learning_rate": 0.00014627809348055908, | |
| "loss": 1.0838, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4172661870503597, | |
| "grad_norm": 0.16005243796029608, | |
| "learning_rate": 0.00014478556854663434, | |
| "loss": 1.1036, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.42206235011990406, | |
| "grad_norm": 0.23613805183753908, | |
| "learning_rate": 0.00014328045759051805, | |
| "loss": 1.0886, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.42685851318944845, | |
| "grad_norm": 0.15961403055430387, | |
| "learning_rate": 0.00014176318359131955, | |
| "loss": 1.0807, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.4316546762589928, | |
| "grad_norm": 0.15677042464881533, | |
| "learning_rate": 0.00014023417294631017, | |
| "loss": 1.1133, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4364508393285372, | |
| "grad_norm": 0.14974169551675579, | |
| "learning_rate": 0.0001386938553510936, | |
| "loss": 1.0799, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.4412470023980815, | |
| "grad_norm": 0.15803754347469678, | |
| "learning_rate": 0.00013714266367884884, | |
| "loss": 1.0735, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4460431654676259, | |
| "grad_norm": 0.1534708691023063, | |
| "learning_rate": 0.00013558103385868085, | |
| "loss": 1.0941, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.45083932853717024, | |
| "grad_norm": 0.15911138328699584, | |
| "learning_rate": 0.00013400940475311192, | |
| "loss": 1.1036, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.4556354916067146, | |
| "grad_norm": 0.1535927522203036, | |
| "learning_rate": 0.0001324282180347486, | |
| "loss": 1.077, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.460431654676259, | |
| "grad_norm": 0.15031744415995577, | |
| "learning_rate": 0.00013083791806215938, | |
| "loss": 1.1175, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.46522781774580335, | |
| "grad_norm": 0.15368131899023496, | |
| "learning_rate": 0.0001292389517549971, | |
| "loss": 1.0858, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.47002398081534774, | |
| "grad_norm": 0.15817599988761602, | |
| "learning_rate": 0.0001276317684684017, | |
| "loss": 1.1105, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4748201438848921, | |
| "grad_norm": 0.1544524844304045, | |
| "learning_rate": 0.0001260168198667189, | |
| "loss": 1.0675, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.47961630695443647, | |
| "grad_norm": 0.153601743822322, | |
| "learning_rate": 0.00012439455979656932, | |
| "loss": 1.0792, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4844124700239808, | |
| "grad_norm": 0.15310739937604212, | |
| "learning_rate": 0.00012276544415930476, | |
| "loss": 1.1037, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.4892086330935252, | |
| "grad_norm": 0.15940815066320235, | |
| "learning_rate": 0.00012112993078288702, | |
| "loss": 1.0893, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4940047961630695, | |
| "grad_norm": 0.15593480397437104, | |
| "learning_rate": 0.00011948847929322497, | |
| "loss": 1.0736, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.4988009592326139, | |
| "grad_norm": 0.15351285356914193, | |
| "learning_rate": 0.00011784155098500682, | |
| "loss": 1.0734, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5035971223021583, | |
| "grad_norm": 0.1648474397335208, | |
| "learning_rate": 0.00011618960869206285, | |
| "loss": 1.0902, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5083932853717026, | |
| "grad_norm": 0.15294486843196387, | |
| "learning_rate": 0.00011453311665729618, | |
| "loss": 1.0963, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.513189448441247, | |
| "grad_norm": 0.148639402509462, | |
| "learning_rate": 0.0001128725404022171, | |
| "loss": 1.0796, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5179856115107914, | |
| "grad_norm": 0.15562736350879572, | |
| "learning_rate": 0.00011120834659611831, | |
| "loss": 1.0906, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5227817745803357, | |
| "grad_norm": 0.1502074864032036, | |
| "learning_rate": 0.00010954100292492757, | |
| "loss": 1.0711, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5275779376498801, | |
| "grad_norm": 0.157682125372101, | |
| "learning_rate": 0.00010787097795977448, | |
| "loss": 1.0728, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5323741007194245, | |
| "grad_norm": 0.1516246607062371, | |
| "learning_rate": 0.00010619874102530885, | |
| "loss": 1.0833, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5371702637889688, | |
| "grad_norm": 0.14890743598378445, | |
| "learning_rate": 0.00010452476206780685, | |
| "loss": 1.1037, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5419664268585132, | |
| "grad_norm": 0.1544250948669629, | |
| "learning_rate": 0.00010284951152310292, | |
| "loss": 1.0882, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5467625899280576, | |
| "grad_norm": 0.15154065813090323, | |
| "learning_rate": 0.00010117346018438367, | |
| "loss": 1.0601, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5515587529976019, | |
| "grad_norm": 0.1541132913924603, | |
| "learning_rate": 9.949707906988165e-05, | |
| "loss": 1.0825, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.5563549160671463, | |
| "grad_norm": 0.15035087453783752, | |
| "learning_rate": 9.7820839290506e-05, | |
| "loss": 1.0893, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5611510791366906, | |
| "grad_norm": 0.1566481016467638, | |
| "learning_rate": 9.614521191744644e-05, | |
| "loss": 1.0717, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.565947242206235, | |
| "grad_norm": 0.14690623261948202, | |
| "learning_rate": 9.447066784978914e-05, | |
| "loss": 1.0843, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5707434052757794, | |
| "grad_norm": 0.14805971637155899, | |
| "learning_rate": 9.279767768218057e-05, | |
| "loss": 1.0639, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.5755395683453237, | |
| "grad_norm": 0.15165832191817782, | |
| "learning_rate": 9.112671157257698e-05, | |
| "loss": 1.0762, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.580335731414868, | |
| "grad_norm": 0.16254973894493494, | |
| "learning_rate": 8.945823911011648e-05, | |
| "loss": 1.0627, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.5851318944844125, | |
| "grad_norm": 0.15231765571558437, | |
| "learning_rate": 8.779272918315134e-05, | |
| "loss": 1.1191, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5899280575539568, | |
| "grad_norm": 0.15662286211747672, | |
| "learning_rate": 8.613064984747672e-05, | |
| "loss": 1.0814, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.5947242206235012, | |
| "grad_norm": 0.15604291232878043, | |
| "learning_rate": 8.44724681947939e-05, | |
| "loss": 1.062, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5995203836930456, | |
| "grad_norm": 0.17656148460505006, | |
| "learning_rate": 8.281865022144402e-05, | |
| "loss": 1.0877, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.60431654676259, | |
| "grad_norm": 0.15407547139459882, | |
| "learning_rate": 8.116966069744987e-05, | |
| "loss": 1.0883, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6091127098321343, | |
| "grad_norm": 0.15686792104917918, | |
| "learning_rate": 7.952596303590214e-05, | |
| "loss": 1.0726, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6139088729016786, | |
| "grad_norm": 0.14900379281026824, | |
| "learning_rate": 7.788801916272739e-05, | |
| "loss": 1.0873, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6187050359712231, | |
| "grad_norm": 0.15399136244929024, | |
| "learning_rate": 7.625628938687348e-05, | |
| "loss": 1.0673, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6235011990407674, | |
| "grad_norm": 0.1454561721402151, | |
| "learning_rate": 7.463123227094961e-05, | |
| "loss": 1.0648, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6282973621103117, | |
| "grad_norm": 0.15049453308757393, | |
| "learning_rate": 7.301330450235733e-05, | |
| "loss": 1.0952, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6330935251798561, | |
| "grad_norm": 0.14976244376291453, | |
| "learning_rate": 7.140296076494809e-05, | |
| "loss": 1.096, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6378896882494005, | |
| "grad_norm": 0.15150823609919292, | |
| "learning_rate": 6.980065361124437e-05, | |
| "loss": 1.0973, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.6426858513189448, | |
| "grad_norm": 0.1471164569431601, | |
| "learning_rate": 6.820683333525942e-05, | |
| "loss": 1.0886, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6474820143884892, | |
| "grad_norm": 0.1478399832545343, | |
| "learning_rate": 6.662194784595164e-05, | |
| "loss": 1.0778, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.6522781774580336, | |
| "grad_norm": 0.14733730171420292, | |
| "learning_rate": 6.504644254134969e-05, | |
| "loss": 1.0622, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.657074340527578, | |
| "grad_norm": 0.15089732572854292, | |
| "learning_rate": 6.34807601833826e-05, | |
| "loss": 1.054, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.6618705035971223, | |
| "grad_norm": 0.15257943560973228, | |
| "learning_rate": 6.19253407734514e-05, | |
| "loss": 1.0653, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.1555839364213723, | |
| "learning_rate": 6.038062142877583e-05, | |
| "loss": 1.0743, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.6714628297362111, | |
| "grad_norm": 0.15847863555269973, | |
| "learning_rate": 5.884703625955219e-05, | |
| "loss": 1.0714, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6762589928057554, | |
| "grad_norm": 0.14915505074346633, | |
| "learning_rate": 5.73250162469559e-05, | |
| "loss": 1.0777, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.6810551558752997, | |
| "grad_norm": 0.1558291126854066, | |
| "learning_rate": 5.581498912202339e-05, | |
| "loss": 1.0946, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6858513189448441, | |
| "grad_norm": 0.15124530700511218, | |
| "learning_rate": 5.431737924544763e-05, | |
| "loss": 1.0802, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.6906474820143885, | |
| "grad_norm": 0.151312999497745, | |
| "learning_rate": 5.283260748832072e-05, | |
| "loss": 1.0809, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6954436450839329, | |
| "grad_norm": 0.14330547504302646, | |
| "learning_rate": 5.1361091113856875e-05, | |
| "loss": 1.0801, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7002398081534772, | |
| "grad_norm": 0.1480298490267667, | |
| "learning_rate": 4.990324366012977e-05, | |
| "loss": 1.0553, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7050359712230215, | |
| "grad_norm": 0.153500392941282, | |
| "learning_rate": 4.845947482385645e-05, | |
| "loss": 1.0694, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.709832134292566, | |
| "grad_norm": 0.1446376081330043, | |
| "learning_rate": 4.7030190345260816e-05, | |
| "loss": 1.0684, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7146282973621103, | |
| "grad_norm": 0.15009181859267848, | |
| "learning_rate": 4.5615791894049286e-05, | |
| "loss": 1.0812, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7194244604316546, | |
| "grad_norm": 0.1489429463837929, | |
| "learning_rate": 4.4216676956529866e-05, | |
| "loss": 1.1022, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7242206235011991, | |
| "grad_norm": 0.14571898395542737, | |
| "learning_rate": 4.2833238723907275e-05, | |
| "loss": 1.0446, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7290167865707434, | |
| "grad_norm": 0.1613600673322617, | |
| "learning_rate": 4.146586598178506e-05, | |
| "loss": 1.0452, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7338129496402878, | |
| "grad_norm": 0.14371687625758103, | |
| "learning_rate": 4.011494300090565e-05, | |
| "loss": 1.0848, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.7386091127098321, | |
| "grad_norm": 0.1540465353357514, | |
| "learning_rate": 3.878084942915936e-05, | |
| "loss": 1.0866, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7434052757793765, | |
| "grad_norm": 0.14845273692539365, | |
| "learning_rate": 3.746396018489261e-05, | |
| "loss": 1.064, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.7482014388489209, | |
| "grad_norm": 0.1510032414720602, | |
| "learning_rate": 3.616464535154496e-05, | |
| "loss": 1.0775, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7529976019184652, | |
| "grad_norm": 0.14414705671457814, | |
| "learning_rate": 3.488327007364525e-05, | |
| "loss": 1.0664, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.7577937649880095, | |
| "grad_norm": 0.14609563936342954, | |
| "learning_rate": 3.3620194454195564e-05, | |
| "loss": 1.0732, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.762589928057554, | |
| "grad_norm": 0.15124924277730611, | |
| "learning_rate": 3.237577345347196e-05, | |
| "loss": 1.0796, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.7673860911270983, | |
| "grad_norm": 0.14663622293812884, | |
| "learning_rate": 3.115035678927063e-05, | |
| "loss": 1.0789, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7721822541966427, | |
| "grad_norm": 0.14712216022696742, | |
| "learning_rate": 2.9944288838627054e-05, | |
| "loss": 1.0812, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.7769784172661871, | |
| "grad_norm": 0.1466540809367841, | |
| "learning_rate": 2.875790854103634e-05, | |
| "loss": 1.0371, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7817745803357314, | |
| "grad_norm": 0.15209652176203295, | |
| "learning_rate": 2.7591549303201514e-05, | |
| "loss": 1.0666, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.7865707434052758, | |
| "grad_norm": 0.15265531709332175, | |
| "learning_rate": 2.6445538905336763e-05, | |
| "loss": 1.0716, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7913669064748201, | |
| "grad_norm": 0.1472763184905413, | |
| "learning_rate": 2.532019940905186e-05, | |
| "loss": 1.0774, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.7961630695443646, | |
| "grad_norm": 0.14917213471816188, | |
| "learning_rate": 2.421584706684359e-05, | |
| "loss": 1.0585, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8009592326139089, | |
| "grad_norm": 0.1487175153721826, | |
| "learning_rate": 2.3132792233219813e-05, | |
| "loss": 1.0845, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.8057553956834532, | |
| "grad_norm": 0.14774839089244932, | |
| "learning_rate": 2.207133927748104e-05, | |
| "loss": 1.0676, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8105515587529976, | |
| "grad_norm": 0.14579576803524566, | |
| "learning_rate": 2.103178649818387e-05, | |
| "loss": 1.0618, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.815347721822542, | |
| "grad_norm": 0.15161345454257397, | |
| "learning_rate": 2.0014426039310786e-05, | |
| "loss": 1.0647, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8201438848920863, | |
| "grad_norm": 0.15041577767342024, | |
| "learning_rate": 1.9019543808169115e-05, | |
| "loss": 1.0634, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.8249400479616307, | |
| "grad_norm": 0.14832965984424024, | |
| "learning_rate": 1.8047419395043086e-05, | |
| "loss": 1.078, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.829736211031175, | |
| "grad_norm": 0.14605268717157927, | |
| "learning_rate": 1.7098325994620934e-05, | |
| "loss": 1.0763, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8345323741007195, | |
| "grad_norm": 0.14667368288338628, | |
| "learning_rate": 1.6172530329219416e-05, | |
| "loss": 1.0792, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8393285371702638, | |
| "grad_norm": 0.1468054846510282, | |
| "learning_rate": 1.5270292573827173e-05, | |
| "loss": 1.0643, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.8441247002398081, | |
| "grad_norm": 0.14221973189330656, | |
| "learning_rate": 1.4391866282988265e-05, | |
| "loss": 1.0448, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8489208633093526, | |
| "grad_norm": 0.14260459844091208, | |
| "learning_rate": 1.3537498319545983e-05, | |
| "loss": 1.0729, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.8537170263788969, | |
| "grad_norm": 0.15011630591055675, | |
| "learning_rate": 1.2707428785267394e-05, | |
| "loss": 1.0737, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8585131894484412, | |
| "grad_norm": 0.1406245046991364, | |
| "learning_rate": 1.1901890953367911e-05, | |
| "loss": 1.0542, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.8633093525179856, | |
| "grad_norm": 0.1452217620527171, | |
| "learning_rate": 1.1121111202954836e-05, | |
| "loss": 1.0404, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.86810551558753, | |
| "grad_norm": 0.14762449970761843, | |
| "learning_rate": 1.0365308955408459e-05, | |
| "loss": 1.0668, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.8729016786570744, | |
| "grad_norm": 0.14280442405170934, | |
| "learning_rate": 9.634696612718242e-06, | |
| "loss": 1.0589, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8776978417266187, | |
| "grad_norm": 0.14727617951380836, | |
| "learning_rate": 8.929479497791926e-06, | |
| "loss": 1.0521, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.882494004796163, | |
| "grad_norm": 0.14230938752904093, | |
| "learning_rate": 8.24985579675388e-06, | |
| "loss": 1.0615, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8872901678657075, | |
| "grad_norm": 0.14834529182832706, | |
| "learning_rate": 7.59601650324917e-06, | |
| "loss": 1.0596, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.8920863309352518, | |
| "grad_norm": 0.15376739416260338, | |
| "learning_rate": 6.96814536476893e-06, | |
| "loss": 1.0596, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8968824940047961, | |
| "grad_norm": 0.15726124144994705, | |
| "learning_rate": 6.366418831011956e-06, | |
| "loss": 1.0639, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.9016786570743405, | |
| "grad_norm": 0.14356439831596873, | |
| "learning_rate": 5.79100600429745e-06, | |
| "loss": 1.09, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9064748201438849, | |
| "grad_norm": 0.1418205242753867, | |
| "learning_rate": 5.242068592042349e-06, | |
| "loss": 1.0747, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.9112709832134293, | |
| "grad_norm": 0.14390931236295634, | |
| "learning_rate": 4.7197608613169685e-06, | |
| "loss": 1.0718, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9160671462829736, | |
| "grad_norm": 0.1538309152736152, | |
| "learning_rate": 4.224229595491591e-06, | |
| "loss": 1.0717, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.920863309352518, | |
| "grad_norm": 0.14425154981452512, | |
| "learning_rate": 3.7556140529860563e-06, | |
| "loss": 1.1077, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9256594724220624, | |
| "grad_norm": 0.14548690711150744, | |
| "learning_rate": 3.314045928134224e-06, | |
| "loss": 1.0682, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.9304556354916067, | |
| "grad_norm": 0.14192393205318143, | |
| "learning_rate": 2.8996493141741687e-06, | |
| "loss": 1.0754, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.935251798561151, | |
| "grad_norm": 0.1483650093466018, | |
| "learning_rate": 2.5125406683743414e-06, | |
| "loss": 1.0496, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.9400479616306955, | |
| "grad_norm": 0.14456885419782456, | |
| "learning_rate": 2.152828779305793e-06, | |
| "loss": 1.0789, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9448441247002398, | |
| "grad_norm": 0.14797777926339026, | |
| "learning_rate": 1.8206147362695213e-06, | |
| "loss": 1.0694, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.9496402877697842, | |
| "grad_norm": 0.1533259331869082, | |
| "learning_rate": 1.5159919008874369e-06, | |
| "loss": 1.0581, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9544364508393285, | |
| "grad_norm": 0.14794187907454043, | |
| "learning_rate": 1.2390458808651083e-06, | |
| "loss": 1.0805, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.9592326139088729, | |
| "grad_norm": 0.14668802282059784, | |
| "learning_rate": 9.898545059335852e-07, | |
| "loss": 1.056, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9640287769784173, | |
| "grad_norm": 0.14996161626389323, | |
| "learning_rate": 7.684878059769363e-07, | |
| "loss": 1.0528, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.9688249400479616, | |
| "grad_norm": 0.14311589381161047, | |
| "learning_rate": 5.750079913519835e-07, | |
| "loss": 1.0712, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.973621103117506, | |
| "grad_norm": 0.14129817073562634, | |
| "learning_rate": 4.094694354052742e-07, | |
| "loss": 1.0547, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.9784172661870504, | |
| "grad_norm": 0.14555078065518706, | |
| "learning_rate": 2.7191865919276026e-07, | |
| "loss": 1.053, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9832134292565947, | |
| "grad_norm": 0.1507162059547356, | |
| "learning_rate": 1.623943184059229e-07, | |
| "loss": 1.0469, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.988009592326139, | |
| "grad_norm": 0.14248345329368778, | |
| "learning_rate": 8.092719250853975e-08, | |
| "loss": 1.0614, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.9928057553956835, | |
| "grad_norm": 0.1444089044813381, | |
| "learning_rate": 2.7540176086671144e-08, | |
| "loss": 1.081, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.9976019184652278, | |
| "grad_norm": 0.14756744866469018, | |
| "learning_rate": 2.2482724147177005e-09, | |
| "loss": 1.0966, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9995203836930455, | |
| "eval_loss": 1.0771065950393677, | |
| "eval_runtime": 1906.9852, | |
| "eval_samples_per_second": 3.517, | |
| "eval_steps_per_second": 0.879, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.9995203836930455, | |
| "step": 1042, | |
| "total_flos": 2151475014795264.0, | |
| "train_loss": 1.0904040080343236, | |
| "train_runtime": 22004.2311, | |
| "train_samples_per_second": 3.032, | |
| "train_steps_per_second": 0.047 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1042, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2151475014795264.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |