| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 2361, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012706480304955527, |
| "grad_norm": 29.66147254458293, |
| "learning_rate": 5.070422535211268e-07, |
| "loss": 2.2184, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.025412960609911054, |
| "grad_norm": 13.392595629867637, |
| "learning_rate": 1.0704225352112677e-06, |
| "loss": 1.8971, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03811944091486658, |
| "grad_norm": 11.776335522019831, |
| "learning_rate": 1.6338028169014086e-06, |
| "loss": 1.4505, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05082592121982211, |
| "grad_norm": 9.270920607469225, |
| "learning_rate": 2.19718309859155e-06, |
| "loss": 1.2166, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06353240152477764, |
| "grad_norm": 8.813337405138961, |
| "learning_rate": 2.7605633802816906e-06, |
| "loss": 1.1232, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07623888182973317, |
| "grad_norm": 8.759370998993289, |
| "learning_rate": 3.3239436619718313e-06, |
| "loss": 1.1167, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.08894536213468869, |
| "grad_norm": 9.774431896253528, |
| "learning_rate": 3.887323943661972e-06, |
| "loss": 1.0426, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10165184243964422, |
| "grad_norm": 6.934105141250302, |
| "learning_rate": 4.450704225352113e-06, |
| "loss": 0.9485, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11435832274459974, |
| "grad_norm": 9.541786088857878, |
| "learning_rate": 5.014084507042254e-06, |
| "loss": 0.971, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.12706480304955528, |
| "grad_norm": 8.771108246775684, |
| "learning_rate": 5.577464788732395e-06, |
| "loss": 1.0133, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1397712833545108, |
| "grad_norm": 8.314820650161352, |
| "learning_rate": 6.1408450704225356e-06, |
| "loss": 0.9729, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.15247776365946633, |
| "grad_norm": 8.539055316805458, |
| "learning_rate": 6.704225352112676e-06, |
| "loss": 0.9679, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.16518424396442186, |
| "grad_norm": 7.751785436062567, |
| "learning_rate": 7.267605633802818e-06, |
| "loss": 0.9634, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.17789072426937738, |
| "grad_norm": 8.338219679133466, |
| "learning_rate": 7.830985915492958e-06, |
| "loss": 0.9674, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1905972045743329, |
| "grad_norm": 6.970312776486159, |
| "learning_rate": 8.3943661971831e-06, |
| "loss": 0.9323, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.20330368487928843, |
| "grad_norm": 8.934334887580613, |
| "learning_rate": 8.957746478873241e-06, |
| "loss": 0.9296, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.21601016518424396, |
| "grad_norm": 8.855402068732014, |
| "learning_rate": 9.521126760563381e-06, |
| "loss": 0.8979, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.22871664548919948, |
| "grad_norm": 7.627448152527667, |
| "learning_rate": 1.0084507042253523e-05, |
| "loss": 0.9398, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.241423125794155, |
| "grad_norm": 8.293311252648705, |
| "learning_rate": 1.0647887323943662e-05, |
| "loss": 0.9903, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.25412960609911056, |
| "grad_norm": 7.860820039563278, |
| "learning_rate": 1.1211267605633804e-05, |
| "loss": 0.953, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2668360864040661, |
| "grad_norm": 12.101780056709387, |
| "learning_rate": 1.1774647887323944e-05, |
| "loss": 0.9014, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2795425667090216, |
| "grad_norm": 8.180635063012854, |
| "learning_rate": 1.2338028169014084e-05, |
| "loss": 0.9612, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.29224904701397714, |
| "grad_norm": 7.043614458189797, |
| "learning_rate": 1.2901408450704227e-05, |
| "loss": 0.9492, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.30495552731893266, |
| "grad_norm": 6.22917625029958, |
| "learning_rate": 1.3464788732394367e-05, |
| "loss": 0.9073, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3176620076238882, |
| "grad_norm": 6.158034885075652, |
| "learning_rate": 1.4028169014084507e-05, |
| "loss": 0.899, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3303684879288437, |
| "grad_norm": 6.511044998547078, |
| "learning_rate": 1.459154929577465e-05, |
| "loss": 0.9147, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.34307496823379924, |
| "grad_norm": 7.2764171631878405, |
| "learning_rate": 1.515492957746479e-05, |
| "loss": 0.9637, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.35578144853875476, |
| "grad_norm": 7.6464115804873405, |
| "learning_rate": 1.571830985915493e-05, |
| "loss": 0.9521, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3684879288437103, |
| "grad_norm": 7.027026272110715, |
| "learning_rate": 1.6281690140845072e-05, |
| "loss": 0.9738, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3811944091486658, |
| "grad_norm": 7.0177338278091765, |
| "learning_rate": 1.6845070422535213e-05, |
| "loss": 1.0046, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.39390088945362134, |
| "grad_norm": 6.988788493899054, |
| "learning_rate": 1.740845070422535e-05, |
| "loss": 0.9481, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.40660736975857686, |
| "grad_norm": 6.592900845765771, |
| "learning_rate": 1.7971830985915497e-05, |
| "loss": 0.9588, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4193138500635324, |
| "grad_norm": 5.836583770100009, |
| "learning_rate": 1.8535211267605635e-05, |
| "loss": 0.9404, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4320203303684879, |
| "grad_norm": 6.69521882642329, |
| "learning_rate": 1.9098591549295776e-05, |
| "loss": 0.9385, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.44472681067344344, |
| "grad_norm": 5.945281536047344, |
| "learning_rate": 1.9661971830985918e-05, |
| "loss": 0.9394, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.45743329097839897, |
| "grad_norm": 5.686864112756425, |
| "learning_rate": 1.9999803787597817e-05, |
| "loss": 0.9764, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4701397712833545, |
| "grad_norm": 5.459918154381771, |
| "learning_rate": 1.9997596486500402e-05, |
| "loss": 0.9827, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.48284625158831, |
| "grad_norm": 5.164520519383337, |
| "learning_rate": 1.999293716197302e-05, |
| "loss": 0.9291, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.49555273189326554, |
| "grad_norm": 6.177104322189275, |
| "learning_rate": 1.998582695676762e-05, |
| "loss": 1.0364, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5082592121982211, |
| "grad_norm": 5.346131678285112, |
| "learning_rate": 1.997626761474232e-05, |
| "loss": 0.9728, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5209656925031766, |
| "grad_norm": 5.075630186929998, |
| "learning_rate": 1.99642614804337e-05, |
| "loss": 0.957, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5336721728081322, |
| "grad_norm": 5.766342613636245, |
| "learning_rate": 1.9949811498481763e-05, |
| "loss": 0.9856, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5463786531130876, |
| "grad_norm": 4.773734435548506, |
| "learning_rate": 1.9932921212907753e-05, |
| "loss": 1.0065, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.5590851334180432, |
| "grad_norm": 4.8306675684219105, |
| "learning_rate": 1.991359476624493e-05, |
| "loss": 0.9175, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5717916137229987, |
| "grad_norm": 4.8928892220094236, |
| "learning_rate": 1.9891836898522566e-05, |
| "loss": 0.9014, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5844980940279543, |
| "grad_norm": 4.466687752161082, |
| "learning_rate": 1.9867652946103413e-05, |
| "loss": 0.9324, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5972045743329097, |
| "grad_norm": 4.706203130518702, |
| "learning_rate": 1.9841048840374885e-05, |
| "loss": 0.9311, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6099110546378653, |
| "grad_norm": 4.776570809357277, |
| "learning_rate": 1.9812031106294314e-05, |
| "loss": 0.952, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6226175349428208, |
| "grad_norm": 5.443465949868151, |
| "learning_rate": 1.978060686078866e-05, |
| "loss": 0.9067, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6353240152477764, |
| "grad_norm": 4.985579208130221, |
| "learning_rate": 1.974678381100896e-05, |
| "loss": 0.9559, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6480304955527318, |
| "grad_norm": 4.406803792781764, |
| "learning_rate": 1.9710570252440106e-05, |
| "loss": 0.9082, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6607369758576874, |
| "grad_norm": 4.620905177890474, |
| "learning_rate": 1.9671975066866254e-05, |
| "loss": 0.9241, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6734434561626429, |
| "grad_norm": 3.9984494959651533, |
| "learning_rate": 1.9631007720192475e-05, |
| "loss": 0.8811, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6861499364675985, |
| "grad_norm": 4.760193821922472, |
| "learning_rate": 1.9587678260123146e-05, |
| "loss": 0.9314, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6988564167725541, |
| "grad_norm": 4.4328809578626895, |
| "learning_rate": 1.9541997313697614e-05, |
| "loss": 0.9018, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7115628970775095, |
| "grad_norm": 3.785123304702001, |
| "learning_rate": 1.9493976084683814e-05, |
| "loss": 0.9349, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7242693773824651, |
| "grad_norm": 4.623522007776074, |
| "learning_rate": 1.9443626350830417e-05, |
| "loss": 0.9283, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7369758576874206, |
| "grad_norm": 4.367382745999128, |
| "learning_rate": 1.9390960460978188e-05, |
| "loss": 0.8936, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7496823379923762, |
| "grad_norm": 4.640745350515662, |
| "learning_rate": 1.933599133203131e-05, |
| "loss": 0.9529, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7623888182973316, |
| "grad_norm": 4.0946471225054974, |
| "learning_rate": 1.9278732445789364e-05, |
| "loss": 0.8961, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7750952986022872, |
| "grad_norm": 4.250614479191838, |
| "learning_rate": 1.9219197845640766e-05, |
| "loss": 0.9028, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7878017789072427, |
| "grad_norm": 4.147828875270731, |
| "learning_rate": 1.9157402133118454e-05, |
| "loss": 0.9302, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8005082592121983, |
| "grad_norm": 5.568981020266887, |
| "learning_rate": 1.909336046431871e-05, |
| "loss": 0.9233, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.8132147395171537, |
| "grad_norm": 4.311517125711432, |
| "learning_rate": 1.9027088546183968e-05, |
| "loss": 0.9694, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8259212198221093, |
| "grad_norm": 5.556351823725932, |
| "learning_rate": 1.8958602632650474e-05, |
| "loss": 0.9003, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8386277001270648, |
| "grad_norm": 4.269905367926679, |
| "learning_rate": 1.8887919520661867e-05, |
| "loss": 0.8805, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8513341804320204, |
| "grad_norm": 3.863721835826297, |
| "learning_rate": 1.8815056546049505e-05, |
| "loss": 0.9158, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.8640406607369758, |
| "grad_norm": 3.963824200874715, |
| "learning_rate": 1.8740031579280667e-05, |
| "loss": 0.8835, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8767471410419314, |
| "grad_norm": 3.680960497113959, |
| "learning_rate": 1.8662863021075632e-05, |
| "loss": 0.898, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.8894536213468869, |
| "grad_norm": 3.7414803428899606, |
| "learning_rate": 1.8583569797894673e-05, |
| "loss": 0.9253, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9021601016518425, |
| "grad_norm": 4.680988801232008, |
| "learning_rate": 1.8502171357296144e-05, |
| "loss": 0.848, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.9148665819567979, |
| "grad_norm": 3.9671267724005785, |
| "learning_rate": 1.8418687663166745e-05, |
| "loss": 0.8965, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9275730622617535, |
| "grad_norm": 4.137039499686447, |
| "learning_rate": 1.833313919082515e-05, |
| "loss": 0.8553, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.940279542566709, |
| "grad_norm": 4.784766455706121, |
| "learning_rate": 1.8245546922000207e-05, |
| "loss": 0.8695, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9529860228716646, |
| "grad_norm": 4.418195979726905, |
| "learning_rate": 1.815593233968492e-05, |
| "loss": 0.8497, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.96569250317662, |
| "grad_norm": 4.103893841492413, |
| "learning_rate": 1.806431742286752e-05, |
| "loss": 0.8746, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.9783989834815756, |
| "grad_norm": 3.798164417492566, |
| "learning_rate": 1.7970724641140864e-05, |
| "loss": 0.8708, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.9911054637865311, |
| "grad_norm": 4.623760315878684, |
| "learning_rate": 1.7875176949191506e-05, |
| "loss": 0.94, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.0038119440914866, |
| "grad_norm": 3.381126634985229, |
| "learning_rate": 1.7777697781169813e-05, |
| "loss": 0.8297, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.0165184243964422, |
| "grad_norm": 4.072031882597377, |
| "learning_rate": 1.7678311044942464e-05, |
| "loss": 0.6761, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.0292249047013977, |
| "grad_norm": 4.666218927514245, |
| "learning_rate": 1.757704111622878e-05, |
| "loss": 0.6868, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.0419313850063532, |
| "grad_norm": 3.010389554548932, |
| "learning_rate": 1.747391283262231e-05, |
| "loss": 0.6994, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.0546378653113089, |
| "grad_norm": 4.690411895539488, |
| "learning_rate": 1.736895148749911e-05, |
| "loss": 0.7141, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.0673443456162643, |
| "grad_norm": 4.135758513727204, |
| "learning_rate": 1.7262182823814297e-05, |
| "loss": 0.6941, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.0800508259212198, |
| "grad_norm": 4.378977675253243, |
| "learning_rate": 1.7153633027788252e-05, |
| "loss": 0.6662, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.0927573062261753, |
| "grad_norm": 3.8569291056754498, |
| "learning_rate": 1.704332872248418e-05, |
| "loss": 0.6575, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.105463786531131, |
| "grad_norm": 3.9650953005920666, |
| "learning_rate": 1.69312969612785e-05, |
| "loss": 0.6959, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.1181702668360864, |
| "grad_norm": 3.349386344864765, |
| "learning_rate": 1.6817565221225698e-05, |
| "loss": 0.6701, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.130876747141042, |
| "grad_norm": 4.530446985368436, |
| "learning_rate": 1.6702161396319266e-05, |
| "loss": 0.7168, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.1435832274459974, |
| "grad_norm": 4.048659358174538, |
| "learning_rate": 1.658511379065039e-05, |
| "loss": 0.7087, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.156289707750953, |
| "grad_norm": 3.897340539186477, |
| "learning_rate": 1.6466451111466044e-05, |
| "loss": 0.7509, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.1689961880559085, |
| "grad_norm": 3.106349799248209, |
| "learning_rate": 1.6346202462128228e-05, |
| "loss": 0.6793, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.181702668360864, |
| "grad_norm": 3.7338218401998753, |
| "learning_rate": 1.6224397334976023e-05, |
| "loss": 0.7172, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.1944091486658195, |
| "grad_norm": 5.11718627522725, |
| "learning_rate": 1.610106560409227e-05, |
| "loss": 0.6759, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.2071156289707752, |
| "grad_norm": 3.6889308944466177, |
| "learning_rate": 1.597623751797662e-05, |
| "loss": 0.6822, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.2198221092757306, |
| "grad_norm": 3.6223318506400135, |
| "learning_rate": 1.584994369212673e-05, |
| "loss": 0.7034, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.2325285895806861, |
| "grad_norm": 3.3333910693718662, |
| "learning_rate": 1.572221510152949e-05, |
| "loss": 0.767, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.2452350698856416, |
| "grad_norm": 4.265447578007238, |
| "learning_rate": 1.5593083073064037e-05, |
| "loss": 0.7358, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.2579415501905973, |
| "grad_norm": 3.874622904654225, |
| "learning_rate": 1.5462579277818498e-05, |
| "loss": 0.7336, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.2706480304955527, |
| "grad_norm": 3.925758808832438, |
| "learning_rate": 1.5330735723322282e-05, |
| "loss": 0.7102, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.2833545108005082, |
| "grad_norm": 4.212874894353556, |
| "learning_rate": 1.5197584745695904e-05, |
| "loss": 0.7053, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.2960609911054637, |
| "grad_norm": 3.7288496569236154, |
| "learning_rate": 1.506315900172014e-05, |
| "loss": 0.7223, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.3087674714104194, |
| "grad_norm": 3.79413472563588, |
| "learning_rate": 1.4927491460826626e-05, |
| "loss": 0.7185, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.3214739517153749, |
| "grad_norm": 4.197391869723048, |
| "learning_rate": 1.4790615397011703e-05, |
| "loss": 0.6293, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.3341804320203303, |
| "grad_norm": 3.3274802014296254, |
| "learning_rate": 1.4652564380675616e-05, |
| "loss": 0.7111, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.346886912325286, |
| "grad_norm": 3.984633199779957, |
| "learning_rate": 1.4513372270388967e-05, |
| "loss": 0.6926, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.3595933926302415, |
| "grad_norm": 4.32141196403412, |
| "learning_rate": 1.4373073204588556e-05, |
| "loss": 0.7126, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.372299872935197, |
| "grad_norm": 3.7790442182857302, |
| "learning_rate": 1.42317015932045e-05, |
| "loss": 0.6873, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.3850063532401524, |
| "grad_norm": 4.2661658978513355, |
| "learning_rate": 1.4089292109220852e-05, |
| "loss": 0.7642, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.397712833545108, |
| "grad_norm": 4.2591149854567645, |
| "learning_rate": 1.394587968017162e-05, |
| "loss": 0.6799, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.4104193138500636, |
| "grad_norm": 3.689601844022756, |
| "learning_rate": 1.3801499479574431e-05, |
| "loss": 0.6536, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.423125794155019, |
| "grad_norm": 4.289242494025662, |
| "learning_rate": 1.3656186918303804e-05, |
| "loss": 0.7092, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.4358322744599745, |
| "grad_norm": 3.891766076099888, |
| "learning_rate": 1.3509977635906241e-05, |
| "loss": 0.6536, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.4485387547649302, |
| "grad_norm": 3.4313665664745465, |
| "learning_rate": 1.3362907491859227e-05, |
| "loss": 0.6474, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.4612452350698857, |
| "grad_norm": 4.303628344639665, |
| "learning_rate": 1.3215012556776287e-05, |
| "loss": 0.715, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.4739517153748412, |
| "grad_norm": 4.009317272354951, |
| "learning_rate": 1.3066329103560267e-05, |
| "loss": 0.715, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.4866581956797966, |
| "grad_norm": 3.171330560062687, |
| "learning_rate": 1.2916893598506981e-05, |
| "loss": 0.6217, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.499364675984752, |
| "grad_norm": 3.3926952435565676, |
| "learning_rate": 1.276674269236145e-05, |
| "loss": 0.7366, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.5120711562897078, |
| "grad_norm": 3.8316403134343537, |
| "learning_rate": 1.2615913211328894e-05, |
| "loss": 0.6939, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.5247776365946633, |
| "grad_norm": 4.868361745818093, |
| "learning_rate": 1.2464442148042679e-05, |
| "loss": 0.6919, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.537484116899619, |
| "grad_norm": 3.5185484888328644, |
| "learning_rate": 1.2312366652491476e-05, |
| "loss": 0.6791, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.5501905972045744, |
| "grad_norm": 3.543401291583064, |
| "learning_rate": 1.2159724022907786e-05, |
| "loss": 0.6574, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.5628970775095299, |
| "grad_norm": 3.6437779582291063, |
| "learning_rate": 1.2006551696620135e-05, |
| "loss": 0.701, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.5756035578144854, |
| "grad_norm": 3.2559101294982025, |
| "learning_rate": 1.1852887240871145e-05, |
| "loss": 0.6546, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.5883100381194408, |
| "grad_norm": 3.9272330209126634, |
| "learning_rate": 1.1698768343603753e-05, |
| "loss": 0.6643, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.6010165184243963, |
| "grad_norm": 4.624643945291569, |
| "learning_rate": 1.1544232804217805e-05, |
| "loss": 0.6982, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.613722998729352, |
| "grad_norm": 3.7368581014964803, |
| "learning_rate": 1.1389318524299332e-05, |
| "loss": 0.6591, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.6264294790343075, |
| "grad_norm": 3.4323757873137177, |
| "learning_rate": 1.1234063498324764e-05, |
| "loss": 0.6743, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.6391359593392631, |
| "grad_norm": 4.208550713330492, |
| "learning_rate": 1.1078505804342327e-05, |
| "loss": 0.7147, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.6518424396442186, |
| "grad_norm": 2.978768874310465, |
| "learning_rate": 1.092268359463302e-05, |
| "loss": 0.671, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.664548919949174, |
| "grad_norm": 3.5924777944521606, |
| "learning_rate": 1.0766635086353298e-05, |
| "loss": 0.6713, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.6772554002541296, |
| "grad_norm": 3.495623048824376, |
| "learning_rate": 1.06103985521619e-05, |
| "loss": 0.6629, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.689961880559085, |
| "grad_norm": 4.086638389260075, |
| "learning_rate": 1.0454012310833034e-05, |
| "loss": 0.7035, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.7026683608640405, |
| "grad_norm": 3.475772078501932, |
| "learning_rate": 1.0297514717858286e-05, |
| "loss": 0.6631, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.7153748411689962, |
| "grad_norm": 3.5510342885210164, |
| "learning_rate": 1.0140944156039481e-05, |
| "loss": 0.685, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.7280813214739519, |
| "grad_norm": 3.5594852661382634, |
| "learning_rate": 9.984339026074881e-06, |
| "loss": 0.6549, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.7407878017789074, |
| "grad_norm": 3.3395635194008415, |
| "learning_rate": 9.827737737140983e-06, |
| "loss": 0.6467, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.7534942820838628, |
| "grad_norm": 3.219821540782638, |
| "learning_rate": 9.671178697472217e-06, |
| "loss": 0.6543, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.7662007623888183, |
| "grad_norm": 3.384594388965041, |
| "learning_rate": 9.514700304940901e-06, |
| "loss": 0.6922, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.7789072426937738, |
| "grad_norm": 3.64590250632275, |
| "learning_rate": 9.358340937639746e-06, |
| "loss": 0.6557, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.7916137229987292, |
| "grad_norm": 3.765353121248252, |
| "learning_rate": 9.202138944469168e-06, |
| "loss": 0.688, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.804320203303685, |
| "grad_norm": 3.7449398399867624, |
| "learning_rate": 9.046132635731816e-06, |
| "loss": 0.6675, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.8170266836086404, |
| "grad_norm": 3.942030599345544, |
| "learning_rate": 8.890360273736504e-06, |
| "loss": 0.6584, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.829733163913596, |
| "grad_norm": 4.037931457583538, |
| "learning_rate": 8.734860063413974e-06, |
| "loss": 0.6735, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.8424396442185516, |
| "grad_norm": 3.6205476660211247, |
| "learning_rate": 8.579670142946701e-06, |
| "loss": 0.7102, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.855146124523507, |
| "grad_norm": 3.821487835967331, |
| "learning_rate": 8.42482857441506e-06, |
| "loss": 0.6749, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.8678526048284625, |
| "grad_norm": 3.3623194464637574, |
| "learning_rate": 8.270373334462193e-06, |
| "loss": 0.672, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.880559085133418, |
| "grad_norm": 4.020841970961885, |
| "learning_rate": 8.116342304979783e-06, |
| "loss": 0.6863, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.8932655654383734, |
| "grad_norm": 4.08254040286643, |
| "learning_rate": 7.962773263817114e-06, |
| "loss": 0.6815, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.9059720457433291, |
| "grad_norm": 4.148274894889353, |
| "learning_rate": 7.809703875515613e-06, |
| "loss": 0.6417, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.9186785260482846, |
| "grad_norm": 4.640824882446659, |
| "learning_rate": 7.657171682071198e-06, |
| "loss": 0.62, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.9313850063532403, |
| "grad_norm": 4.7797510297359835, |
| "learning_rate": 7.505214093726692e-06, |
| "loss": 0.6439, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.9440914866581958, |
| "grad_norm": 3.613563186875674, |
| "learning_rate": 7.353868379796518e-06, |
| "loss": 0.6705, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.9567979669631512, |
| "grad_norm": 3.271201239131824, |
| "learning_rate": 7.203171659526e-06, |
| "loss": 0.6324, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.9695044472681067, |
| "grad_norm": 3.89489541610708, |
| "learning_rate": 7.053160892987434e-06, |
| "loss": 0.6757, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.9822109275730622, |
| "grad_norm": 3.701828258351079, |
| "learning_rate": 6.903872872015209e-06, |
| "loss": 0.6456, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.9949174078780176, |
| "grad_norm": 3.5373164710070957, |
| "learning_rate": 6.755344211182221e-06, |
| "loss": 0.6166, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.007623888182973, |
| "grad_norm": 2.425760113176382, |
| "learning_rate": 6.607611338819697e-06, |
| "loss": 0.5016, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.020330368487929, |
| "grad_norm": 3.427501282817139, |
| "learning_rate": 6.460710488082774e-06, |
| "loss": 0.374, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.0330368487928845, |
| "grad_norm": 3.4855149165350636, |
| "learning_rate": 6.31467768806388e-06, |
| "loss": 0.3524, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.04574332909784, |
| "grad_norm": 3.5473678303457996, |
| "learning_rate": 6.169548754956201e-06, |
| "loss": 0.3485, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.0584498094027954, |
| "grad_norm": 3.2554977371598466, |
| "learning_rate": 6.025359283269363e-06, |
| "loss": 0.348, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.071156289707751, |
| "grad_norm": 3.4222657332943376, |
| "learning_rate": 5.882144637099465e-06, |
| "loss": 0.3753, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.0838627700127064, |
| "grad_norm": 2.9777568505895675, |
| "learning_rate": 5.739939941455644e-06, |
| "loss": 0.3526, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.096569250317662, |
| "grad_norm": 3.7955516489911805, |
| "learning_rate": 5.598780073645267e-06, |
| "loss": 0.3543, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.1092757306226178, |
| "grad_norm": 3.8406500166667885, |
| "learning_rate": 5.458699654719873e-06, |
| "loss": 0.3642, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.121982210927573, |
| "grad_norm": 3.813395969645494, |
| "learning_rate": 5.319733040983972e-06, |
| "loss": 0.3428, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.1346886912325287, |
| "grad_norm": 3.7266891839301763, |
| "learning_rate": 5.181914315568782e-06, |
| "loss": 0.3403, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.147395171537484, |
| "grad_norm": 3.688709734552298, |
| "learning_rate": 5.0452772800729375e-06, |
| "loss": 0.3469, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.1601016518424396, |
| "grad_norm": 3.6629109337292403, |
| "learning_rate": 4.909855446272288e-06, |
| "loss": 0.3454, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.172808132147395, |
| "grad_norm": 3.7085182263998555, |
| "learning_rate": 4.775682027900739e-06, |
| "loss": 0.341, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.1855146124523506, |
| "grad_norm": 3.481723946532174, |
| "learning_rate": 4.6427899325042135e-06, |
| "loss": 0.3352, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.198221092757306, |
| "grad_norm": 3.2839395610983027, |
| "learning_rate": 4.511211753369712e-06, |
| "loss": 0.3447, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.210927573062262, |
| "grad_norm": 3.6755308055006464, |
| "learning_rate": 4.380979761531431e-06, |
| "loss": 0.3531, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.2236340533672174, |
| "grad_norm": 3.7905960831955916, |
| "learning_rate": 4.2521258978559324e-06, |
| "loss": 0.356, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.236340533672173, |
| "grad_norm": 3.627875927246556, |
| "learning_rate": 4.124681765208286e-06, |
| "loss": 0.3266, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.2490470139771284, |
| "grad_norm": 3.3246186092589447, |
| "learning_rate": 3.998678620701102e-06, |
| "loss": 0.3386, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.261753494282084, |
| "grad_norm": 3.804007286983282, |
| "learning_rate": 3.874147368028396e-06, |
| "loss": 0.3544, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.2744599745870393, |
| "grad_norm": 3.143040423820396, |
| "learning_rate": 3.751118549886065e-06, |
| "loss": 0.3227, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.2871664548919948, |
| "grad_norm": 3.352132852945674, |
| "learning_rate": 3.6296223404809903e-06, |
| "loss": 0.3399, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.2998729351969507, |
| "grad_norm": 4.043987038976339, |
| "learning_rate": 3.509688538130448e-06, |
| "loss": 0.3369, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.312579415501906, |
| "grad_norm": 3.954856965708331, |
| "learning_rate": 3.39134655795374e-06, |
| "loss": 0.341, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.3252858958068616, |
| "grad_norm": 3.5214147520563626, |
| "learning_rate": 3.2746254246578167e-06, |
| "loss": 0.3365, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.337992376111817, |
| "grad_norm": 3.218428553726758, |
| "learning_rate": 3.1595537654186114e-06, |
| "loss": 0.3546, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.3506988564167726, |
| "grad_norm": 3.163287967416541, |
| "learning_rate": 3.0461598028599305e-06, |
| "loss": 0.3431, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.363405336721728, |
| "grad_norm": 3.0988204272069573, |
| "learning_rate": 2.9344713481315225e-06, |
| "loss": 0.3303, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.3761118170266835, |
| "grad_norm": 3.9034586935786395, |
| "learning_rate": 2.8245157940880784e-06, |
| "loss": 0.3337, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.388818297331639, |
| "grad_norm": 3.5690630552722786, |
| "learning_rate": 2.7163201085708424e-06, |
| "loss": 0.3223, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.4015247776365944, |
| "grad_norm": 3.163806174642701, |
| "learning_rate": 2.6099108277934105e-06, |
| "loss": 0.3398, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.4142312579415504, |
| "grad_norm": 3.7465583268537275, |
| "learning_rate": 2.505314049833457e-06, |
| "loss": 0.3483, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.426937738246506, |
| "grad_norm": 3.516374761436456, |
| "learning_rate": 2.402555428231872e-06, |
| "loss": 0.3273, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.4396442185514613, |
| "grad_norm": 3.5353549798113284, |
| "learning_rate": 2.3016601657009364e-06, |
| "loss": 0.3374, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.4523506988564168, |
| "grad_norm": 3.357432157861631, |
| "learning_rate": 2.202653007943093e-06, |
| "loss": 0.3464, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.4650571791613722, |
| "grad_norm": 3.6506743298663675, |
| "learning_rate": 2.1055582375817475e-06, |
| "loss": 0.325, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.4777636594663277, |
| "grad_norm": 3.907282101797735, |
| "learning_rate": 2.0103996682057235e-06, |
| "loss": 0.3255, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.490470139771283, |
| "grad_norm": 3.711785490906897, |
| "learning_rate": 1.9172006385286723e-06, |
| "loss": 0.3391, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.503176620076239, |
| "grad_norm": 3.2473323176322135, |
| "learning_rate": 1.8259840066650136e-06, |
| "loss": 0.3389, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.5158831003811946, |
| "grad_norm": 3.6433209864443916, |
| "learning_rate": 1.7367721445237285e-06, |
| "loss": 0.3258, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.52858958068615, |
| "grad_norm": 4.12961794749056, |
| "learning_rate": 1.6495869323213654e-06, |
| "loss": 0.3185, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.5412960609911055, |
| "grad_norm": 4.1376649833602865, |
| "learning_rate": 1.564449753215711e-06, |
| "loss": 0.3247, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.554002541296061, |
| "grad_norm": 4.441583691608097, |
| "learning_rate": 1.4813814880612942e-06, |
| "loss": 0.3198, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.5667090216010164, |
| "grad_norm": 2.7847992176083047, |
| "learning_rate": 1.4004025102881402e-06, |
| "loss": 0.3143, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.579415501905972, |
| "grad_norm": 3.1337243741428473, |
| "learning_rate": 1.321532680904959e-06, |
| "loss": 0.3312, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.5921219822109274, |
| "grad_norm": 3.3653692372757225, |
| "learning_rate": 1.2447913436279879e-06, |
| "loss": 0.3129, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.604828462515883, |
| "grad_norm": 3.7337029805672635, |
| "learning_rate": 1.1701973201367544e-06, |
| "loss": 0.3253, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.6175349428208388, |
| "grad_norm": 4.214904637605022, |
| "learning_rate": 1.09776890545782e-06, |
| "loss": 0.3531, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.6302414231257942, |
| "grad_norm": 3.3613800857078764, |
| "learning_rate": 1.0275238634777441e-06, |
| "loss": 0.3105, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.6429479034307497, |
| "grad_norm": 3.7555272174929595, |
| "learning_rate": 9.594794225862692e-07, |
| "loss": 0.3331, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.655654383735705, |
| "grad_norm": 3.6364434124366, |
| "learning_rate": 8.936522714508678e-07, |
| "loss": 0.3336, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.6683608640406606, |
| "grad_norm": 3.684690863431174, |
| "learning_rate": 8.300585549236773e-07, |
| "loss": 0.3232, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.681067344345616, |
| "grad_norm": 4.307296056522447, |
| "learning_rate": 7.687138700817598e-07, |
| "loss": 0.3165, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.693773824650572, |
| "grad_norm": 3.418062959790304, |
| "learning_rate": 7.096332624017755e-07, |
| "loss": 0.3126, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.7064803049555275, |
| "grad_norm": 3.5508346766299397, |
| "learning_rate": 6.528312220698885e-07, |
| "loss": 0.3303, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.719186785260483, |
| "grad_norm": 4.518044935525217, |
| "learning_rate": 5.983216804278869e-07, |
| "loss": 0.3191, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.7318932655654384, |
| "grad_norm": 3.2181606746612044, |
| "learning_rate": 5.461180065563787e-07, |
| "loss": 0.3059, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.744599745870394, |
| "grad_norm": 3.8710207257234144, |
| "learning_rate": 4.962330039958585e-07, |
| "loss": 0.3194, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.7573062261753494, |
| "grad_norm": 3.6523179671049215, |
| "learning_rate": 4.486789076064968e-07, |
| "loss": 0.3148, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.770012706480305, |
| "grad_norm": 3.5451882545264053, |
| "learning_rate": 4.034673805674116e-07, |
| "loss": 0.3285, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.7827191867852603, |
| "grad_norm": 3.0920265120158827, |
| "learning_rate": 3.606095115161279e-07, |
| "loss": 0.3172, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.795425667090216, |
| "grad_norm": 3.496745467546405, |
| "learning_rate": 3.201158118289793e-07, |
| "loss": 0.3183, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.8081321473951717, |
| "grad_norm": 3.361865186046809, |
| "learning_rate": 2.8199621304306425e-07, |
| "loss": 0.3209, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.820838627700127, |
| "grad_norm": 3.951234253131063, |
| "learning_rate": 2.46260064420426e-07, |
| "loss": 0.3165, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.8335451080050826, |
| "grad_norm": 4.033338515766138, |
| "learning_rate": 2.1291613065504313e-07, |
| "loss": 0.3233, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.846251588310038, |
| "grad_norm": 3.8424683095800303, |
| "learning_rate": 1.819725897231872e-07, |
| "loss": 0.318, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.8589580686149936, |
| "grad_norm": 3.6002718533091, |
| "learning_rate": 1.5343703087768225e-07, |
| "loss": 0.3323, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.871664548919949, |
| "grad_norm": 3.392955644057836, |
| "learning_rate": 1.2731645278655448e-07, |
| "loss": 0.3088, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.884371029224905, |
| "grad_norm": 3.9060628771881842, |
| "learning_rate": 1.0361726181653209e-07, |
| "loss": 0.3213, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.8970775095298604, |
| "grad_norm": 3.6937829161145515, |
| "learning_rate": 8.234527046180885e-08, |
| "loss": 0.3193, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.909783989834816, |
| "grad_norm": 3.7501161450474036, |
| "learning_rate": 6.350569591846434e-08, |
| "loss": 0.3334, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.9224904701397714, |
| "grad_norm": 3.4784424672902605, |
| "learning_rate": 4.710315880489091e-08, |
| "loss": 0.3273, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.935196950444727, |
| "grad_norm": 3.4126846620318707, |
| "learning_rate": 3.31416820285313e-08, |
| "loss": 0.3177, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.9479034307496823, |
| "grad_norm": 3.9848361195981785, |
| "learning_rate": 2.1624689799214503e-08, |
| "loss": 0.322, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.9606099110546378, |
| "grad_norm": 3.6028548305792203, |
| "learning_rate": 1.2555006789334301e-08, |
| "loss": 0.3038, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.9733163913595932, |
| "grad_norm": 4.107748523282802, |
| "learning_rate": 5.934857441062258e-09, |
| "loss": 0.313, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.9860228716645487, |
| "grad_norm": 4.057112734457165, |
| "learning_rate": 1.765865420779722e-09, |
| "loss": 0.315, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.998729351969504, |
| "grad_norm": 3.491454630886239, |
| "learning_rate": 4.9053220856354335e-11, |
| "loss": 0.328, |
| "step": 2360 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 2361, |
| "total_flos": 121880697913344.0, |
| "train_loss": 0.6645450910134418, |
| "train_runtime": 16043.0595, |
| "train_samples_per_second": 1.177, |
| "train_steps_per_second": 0.147 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2361, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 121880697913344.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|