{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.30526315789473685, "eval_steps": 500, "global_step": 2900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010526315789473684, "grad_norm": 0.34683918952941895, "learning_rate": 4.815e-06, "loss": 1.7081634521484375, "step": 10 }, { "epoch": 0.002105263157894737, "grad_norm": 0.3531605899333954, "learning_rate": 1.0165e-05, "loss": 1.656758689880371, "step": 20 }, { "epoch": 0.003157894736842105, "grad_norm": 0.3394385576248169, "learning_rate": 1.5515e-05, "loss": 1.633415985107422, "step": 30 }, { "epoch": 0.004210526315789474, "grad_norm": 0.37144365906715393, "learning_rate": 2.0865e-05, "loss": 1.6722015380859374, "step": 40 }, { "epoch": 0.005263157894736842, "grad_norm": 0.3785368800163269, "learning_rate": 2.6215e-05, "loss": 1.6771835327148437, "step": 50 }, { "epoch": 0.00631578947368421, "grad_norm": 0.3667239546775818, "learning_rate": 3.1565e-05, "loss": 1.6657798767089844, "step": 60 }, { "epoch": 0.007368421052631579, "grad_norm": 0.35704323649406433, "learning_rate": 3.6914999999999995e-05, "loss": 1.637792205810547, "step": 70 }, { "epoch": 0.008421052631578947, "grad_norm": 0.3614155352115631, "learning_rate": 4.2265e-05, "loss": 1.6456287384033204, "step": 80 }, { "epoch": 0.009473684210526316, "grad_norm": 0.3477347195148468, "learning_rate": 4.7615e-05, "loss": 1.6376474380493165, "step": 90 }, { "epoch": 0.010526315789473684, "grad_norm": 0.3474464416503906, "learning_rate": 5.2965e-05, "loss": 1.6883708953857421, "step": 100 }, { "epoch": 0.011578947368421053, "grad_norm": 0.3608642518520355, "learning_rate": 5.831500000000001e-05, "loss": 1.7032821655273438, "step": 110 }, { "epoch": 0.01263157894736842, "grad_norm": 0.35934099555015564, "learning_rate": 6.3665e-05, "loss": 1.597799301147461, "step": 120 }, { "epoch": 0.01368421052631579, "grad_norm": 0.38500702381134033, "learning_rate": 6.9015e-05, "loss": 1.6558387756347657, "step": 130 }, { "epoch": 0.014736842105263158, "grad_norm": 0.3602914810180664, "learning_rate": 7.4365e-05, "loss": 1.6937145233154296, "step": 140 }, { "epoch": 0.015789473684210527, "grad_norm": 0.36331596970558167, "learning_rate": 7.9715e-05, "loss": 1.5696943283081055, "step": 150 }, { "epoch": 0.016842105263157894, "grad_norm": 0.3533744215965271, "learning_rate": 8.5065e-05, "loss": 1.702765655517578, "step": 160 }, { "epoch": 0.017894736842105262, "grad_norm": 0.3546121120452881, "learning_rate": 9.0415e-05, "loss": 1.6325836181640625, "step": 170 }, { "epoch": 0.018947368421052633, "grad_norm": 0.3867342472076416, "learning_rate": 9.5765e-05, "loss": 1.636269760131836, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.3859454393386841, "learning_rate": 0.000101115, "loss": 1.7112407684326172, "step": 190 }, { "epoch": 0.021052631578947368, "grad_norm": 0.3688015937805176, "learning_rate": 0.000106465, "loss": 1.6455875396728517, "step": 200 }, { "epoch": 0.022105263157894735, "grad_norm": 0.3693976402282715, "learning_rate": 0.00010699975274657343, "loss": 1.6878833770751953, "step": 210 }, { "epoch": 0.023157894736842106, "grad_norm": 0.3748058080673218, "learning_rate": 0.00010699889804630456, "loss": 1.6435226440429687, "step": 220 }, { "epoch": 0.024210526315789474, "grad_norm": 0.3806576430797577, "learning_rate": 0.00010699743285643286, "loss": 1.7004669189453125, "step": 230 }, { "epoch": 0.02526315789473684, "grad_norm": 0.3829317092895508, "learning_rate": 0.00010699535719367796, "loss": 1.6831859588623046, "step": 240 }, { "epoch": 0.02631578947368421, "grad_norm": 0.4069920480251312, "learning_rate": 0.00010699267108172577, "loss": 1.6417667388916015, "step": 250 }, { "epoch": 0.02736842105263158, "grad_norm": 0.37535834312438965, "learning_rate": 0.00010698937455122825, "loss": 1.640174102783203, "step": 260 }, { "epoch": 0.028421052631578948, "grad_norm": 0.3992610573768616, "learning_rate": 0.0001069854676398029, "loss": 1.6665351867675782, "step": 270 }, { "epoch": 0.029473684210526315, "grad_norm": 0.3680964708328247, "learning_rate": 0.0001069809503920325, "loss": 1.7111568450927734, "step": 280 }, { "epoch": 0.030526315789473683, "grad_norm": 0.4049525856971741, "learning_rate": 0.00010697582285946452, "loss": 1.6817201614379882, "step": 290 }, { "epoch": 0.031578947368421054, "grad_norm": 0.38598954677581787, "learning_rate": 0.00010697008510061057, "loss": 1.6445945739746093, "step": 300 }, { "epoch": 0.03263157894736842, "grad_norm": 0.39688920974731445, "learning_rate": 0.00010696373718094565, "loss": 1.688629150390625, "step": 310 }, { "epoch": 0.03368421052631579, "grad_norm": 0.3762621581554413, "learning_rate": 0.00010695677917290751, "loss": 1.6273818969726563, "step": 320 }, { "epoch": 0.034736842105263156, "grad_norm": 0.3470601737499237, "learning_rate": 0.00010694921115589574, "loss": 1.690780258178711, "step": 330 }, { "epoch": 0.035789473684210524, "grad_norm": 0.38783422112464905, "learning_rate": 0.00010694103321627094, "loss": 1.6885700225830078, "step": 340 }, { "epoch": 0.03684210526315789, "grad_norm": 0.3837421238422394, "learning_rate": 0.00010693224544735366, "loss": 1.670220184326172, "step": 350 }, { "epoch": 0.037894736842105266, "grad_norm": 0.3634503185749054, "learning_rate": 0.00010692284794942337, "loss": 1.6357498168945312, "step": 360 }, { "epoch": 0.03894736842105263, "grad_norm": 0.39452844858169556, "learning_rate": 0.00010691284082971734, "loss": 1.6791454315185548, "step": 370 }, { "epoch": 0.04, "grad_norm": 0.38304150104522705, "learning_rate": 0.00010690222420242937, "loss": 1.6702400207519532, "step": 380 }, { "epoch": 0.04105263157894737, "grad_norm": 0.3755001723766327, "learning_rate": 0.00010689099818870848, "loss": 1.6558124542236328, "step": 390 }, { "epoch": 0.042105263157894736, "grad_norm": 0.3776380121707916, "learning_rate": 0.0001068791629166576, "loss": 1.6616518020629882, "step": 400 }, { "epoch": 0.0431578947368421, "grad_norm": 0.3697650134563446, "learning_rate": 0.00010686671852133208, "loss": 1.6540897369384766, "step": 410 }, { "epoch": 0.04421052631578947, "grad_norm": 0.3718468248844147, "learning_rate": 0.00010685366514473802, "loss": 1.6041250228881836, "step": 420 }, { "epoch": 0.045263157894736845, "grad_norm": 0.38397344946861267, "learning_rate": 0.0001068400029358309, "loss": 1.677585983276367, "step": 430 }, { "epoch": 0.04631578947368421, "grad_norm": 0.37290486693382263, "learning_rate": 0.00010682573205051367, "loss": 1.6698143005371093, "step": 440 }, { "epoch": 0.04736842105263158, "grad_norm": 0.37734609842300415, "learning_rate": 0.00010681085265163504, "loss": 1.6791515350341797, "step": 450 }, { "epoch": 0.04842105263157895, "grad_norm": 0.354443222284317, "learning_rate": 0.00010679536490898761, "loss": 1.6450014114379883, "step": 460 }, { "epoch": 0.049473684210526316, "grad_norm": 0.3799300491809845, "learning_rate": 0.00010677926899930603, "loss": 1.6635103225708008, "step": 470 }, { "epoch": 0.05052631578947368, "grad_norm": 0.3844967484474182, "learning_rate": 0.00010676256510626478, "loss": 1.6978870391845704, "step": 480 }, { "epoch": 0.05157894736842105, "grad_norm": 0.38755500316619873, "learning_rate": 0.00010674525342047629, "loss": 1.6842260360717773, "step": 490 }, { "epoch": 0.05263157894736842, "grad_norm": 0.39443737268447876, "learning_rate": 0.00010672733413948862, "loss": 1.6408458709716798, "step": 500 }, { "epoch": 0.05368421052631579, "grad_norm": 0.4008043110370636, "learning_rate": 0.00010670880746778328, "loss": 1.61962833404541, "step": 510 }, { "epoch": 0.05473684210526316, "grad_norm": 0.3917809724807739, "learning_rate": 0.00010668967361677283, "loss": 1.718182373046875, "step": 520 }, { "epoch": 0.05578947368421053, "grad_norm": 0.364409476518631, "learning_rate": 0.00010666993280479856, "loss": 1.7204322814941406, "step": 530 }, { "epoch": 0.056842105263157895, "grad_norm": 0.39319396018981934, "learning_rate": 0.00010664958525712792, "loss": 1.6448682785034179, "step": 540 }, { "epoch": 0.05789473684210526, "grad_norm": 0.3864227533340454, "learning_rate": 0.00010662863120595196, "loss": 1.7400585174560548, "step": 550 }, { "epoch": 0.05894736842105263, "grad_norm": 0.37699612975120544, "learning_rate": 0.00010660707089038273, "loss": 1.6591960906982421, "step": 560 }, { "epoch": 0.06, "grad_norm": 0.3808913230895996, "learning_rate": 0.00010658490455645052, "loss": 1.63150634765625, "step": 570 }, { "epoch": 0.061052631578947365, "grad_norm": 0.38882365822792053, "learning_rate": 0.00010656213245710098, "loss": 1.6896860122680664, "step": 580 }, { "epoch": 0.06210526315789474, "grad_norm": 0.3772079050540924, "learning_rate": 0.0001065387548521924, "loss": 1.7085845947265625, "step": 590 }, { "epoch": 0.06315789473684211, "grad_norm": 0.37180712819099426, "learning_rate": 0.00010651477200849263, "loss": 1.7532657623291015, "step": 600 }, { "epoch": 0.06421052631578947, "grad_norm": 0.3878546357154846, "learning_rate": 0.00010649018419967597, "loss": 1.6636667251586914, "step": 610 }, { "epoch": 0.06526315789473684, "grad_norm": 0.39751365780830383, "learning_rate": 0.00010646499170632023, "loss": 1.6579233169555665, "step": 620 }, { "epoch": 0.06631578947368422, "grad_norm": 0.3831867277622223, "learning_rate": 0.00010643919481590337, "loss": 1.6426708221435546, "step": 630 }, { "epoch": 0.06736842105263158, "grad_norm": 0.3749397099018097, "learning_rate": 0.00010641279382280032, "loss": 1.7154060363769532, "step": 640 }, { "epoch": 0.06842105263157895, "grad_norm": 0.37839797139167786, "learning_rate": 0.00010638578902827957, "loss": 1.7217548370361329, "step": 650 }, { "epoch": 0.06947368421052631, "grad_norm": 0.3703754246234894, "learning_rate": 0.00010635818074049972, "loss": 1.7110353469848634, "step": 660 }, { "epoch": 0.07052631578947369, "grad_norm": 0.36747097969055176, "learning_rate": 0.00010632996927450597, "loss": 1.651369857788086, "step": 670 }, { "epoch": 0.07157894736842105, "grad_norm": 0.36606892943382263, "learning_rate": 0.00010630115495222664, "loss": 1.6909339904785157, "step": 680 }, { "epoch": 0.07263157894736842, "grad_norm": 0.3871472179889679, "learning_rate": 0.00010627173810246927, "loss": 1.6740509033203126, "step": 690 }, { "epoch": 0.07368421052631578, "grad_norm": 0.3820892572402954, "learning_rate": 0.00010624171906091708, "loss": 1.7049301147460938, "step": 700 }, { "epoch": 0.07473684210526316, "grad_norm": 0.38060277700424194, "learning_rate": 0.00010621109817012501, "loss": 1.7255819320678711, "step": 710 }, { "epoch": 0.07578947368421053, "grad_norm": 0.37024298310279846, "learning_rate": 0.00010617987577951588, "loss": 1.707390594482422, "step": 720 }, { "epoch": 0.07684210526315789, "grad_norm": 0.3976726233959198, "learning_rate": 0.0001061480522453764, "loss": 1.6445907592773437, "step": 730 }, { "epoch": 0.07789473684210527, "grad_norm": 0.3904809057712555, "learning_rate": 0.00010611562793085301, "loss": 1.7427913665771484, "step": 740 }, { "epoch": 0.07894736842105263, "grad_norm": 0.37776583433151245, "learning_rate": 0.00010608260320594787, "loss": 1.6211050033569336, "step": 750 }, { "epoch": 0.08, "grad_norm": 0.382707804441452, "learning_rate": 0.00010604897844751458, "loss": 1.6817436218261719, "step": 760 }, { "epoch": 0.08105263157894736, "grad_norm": 0.3894830048084259, "learning_rate": 0.00010601475403925381, "loss": 1.747372817993164, "step": 770 }, { "epoch": 0.08210526315789474, "grad_norm": 0.38454341888427734, "learning_rate": 0.00010597993037170907, "loss": 1.667810821533203, "step": 780 }, { "epoch": 0.08315789473684211, "grad_norm": 0.3924828767776489, "learning_rate": 0.00010594450784226211, "loss": 1.689559555053711, "step": 790 }, { "epoch": 0.08421052631578947, "grad_norm": 0.390747994184494, "learning_rate": 0.0001059084868551285, "loss": 1.687558364868164, "step": 800 }, { "epoch": 0.08526315789473685, "grad_norm": 0.38002100586891174, "learning_rate": 0.0001058718678213529, "loss": 1.7372432708740235, "step": 810 }, { "epoch": 0.0863157894736842, "grad_norm": 0.3947979509830475, "learning_rate": 0.00010583465115880448, "loss": 1.7141420364379882, "step": 820 }, { "epoch": 0.08736842105263158, "grad_norm": 0.38964593410491943, "learning_rate": 0.0001057968372921721, "loss": 1.6732599258422851, "step": 830 }, { "epoch": 0.08842105263157894, "grad_norm": 0.3914567828178406, "learning_rate": 0.00010575842665295942, "loss": 1.697699737548828, "step": 840 }, { "epoch": 0.08947368421052632, "grad_norm": 0.3780556917190552, "learning_rate": 0.00010571941967948013, "loss": 1.6859580993652343, "step": 850 }, { "epoch": 0.09052631578947369, "grad_norm": 0.3804113268852234, "learning_rate": 0.00010567981681685271, "loss": 1.630574607849121, "step": 860 }, { "epoch": 0.09157894736842105, "grad_norm": 0.3920338451862335, "learning_rate": 0.0001056396185169956, "loss": 1.701805877685547, "step": 870 }, { "epoch": 0.09263157894736843, "grad_norm": 0.3645232021808624, "learning_rate": 0.00010559882523862185, "loss": 1.6626638412475585, "step": 880 }, { "epoch": 0.09368421052631579, "grad_norm": 0.39647483825683594, "learning_rate": 0.000105557437447234, "loss": 1.657071876525879, "step": 890 }, { "epoch": 0.09473684210526316, "grad_norm": 0.3784042298793793, "learning_rate": 0.00010551545561511872, "loss": 1.6789131164550781, "step": 900 }, { "epoch": 0.09578947368421052, "grad_norm": 0.3799436390399933, "learning_rate": 0.00010547288022134141, "loss": 1.6874401092529296, "step": 910 }, { "epoch": 0.0968421052631579, "grad_norm": 0.3979872465133667, "learning_rate": 0.00010542971175174078, "loss": 1.7372554779052733, "step": 920 }, { "epoch": 0.09789473684210526, "grad_norm": 0.3869173228740692, "learning_rate": 0.0001053859506989233, "loss": 1.6965164184570312, "step": 930 }, { "epoch": 0.09894736842105263, "grad_norm": 0.38553228974342346, "learning_rate": 0.0001053415975622575, "loss": 1.6804073333740235, "step": 940 }, { "epoch": 0.1, "grad_norm": 0.37855857610702515, "learning_rate": 0.00010529665284786835, "loss": 1.7479766845703124, "step": 950 }, { "epoch": 0.10105263157894737, "grad_norm": 0.36974212527275085, "learning_rate": 0.00010525111706863153, "loss": 1.6555421829223633, "step": 960 }, { "epoch": 0.10210526315789474, "grad_norm": 0.3829262852668762, "learning_rate": 0.00010520499074416742, "loss": 1.7271907806396485, "step": 970 }, { "epoch": 0.1031578947368421, "grad_norm": 0.3871605396270752, "learning_rate": 0.0001051582744008353, "loss": 1.6716243743896484, "step": 980 }, { "epoch": 0.10421052631578948, "grad_norm": 0.3923998475074768, "learning_rate": 0.00010511096857172731, "loss": 1.6450519561767578, "step": 990 }, { "epoch": 0.10526315789473684, "grad_norm": 0.38333484530448914, "learning_rate": 0.00010506307379666238, "loss": 1.6865043640136719, "step": 1000 }, { "epoch": 0.10631578947368421, "grad_norm": 0.38256773352622986, "learning_rate": 0.00010501459062218, "loss": 1.6601579666137696, "step": 1010 }, { "epoch": 0.10736842105263159, "grad_norm": 0.3737237751483917, "learning_rate": 0.00010496551960153409, "loss": 1.6208690643310546, "step": 1020 }, { "epoch": 0.10842105263157895, "grad_norm": 0.366969496011734, "learning_rate": 0.00010491586129468662, "loss": 1.6808839797973634, "step": 1030 }, { "epoch": 0.10947368421052632, "grad_norm": 0.3720376193523407, "learning_rate": 0.0001048656162683012, "loss": 1.6338840484619142, "step": 1040 }, { "epoch": 0.11052631578947368, "grad_norm": 0.39924025535583496, "learning_rate": 0.00010481478509573669, "loss": 1.652592086791992, "step": 1050 }, { "epoch": 0.11157894736842106, "grad_norm": 0.37709176540374756, "learning_rate": 0.00010476336835704059, "loss": 1.6794198989868163, "step": 1060 }, { "epoch": 0.11263157894736842, "grad_norm": 0.382405161857605, "learning_rate": 0.00010471136663894244, "loss": 1.702239990234375, "step": 1070 }, { "epoch": 0.11368421052631579, "grad_norm": 0.3955666720867157, "learning_rate": 0.00010465878053484715, "loss": 1.625558090209961, "step": 1080 }, { "epoch": 0.11473684210526315, "grad_norm": 0.3984505534172058, "learning_rate": 0.0001046056106448282, "loss": 1.7061031341552735, "step": 1090 }, { "epoch": 0.11578947368421053, "grad_norm": 0.37337619066238403, "learning_rate": 0.00010455185757562081, "loss": 1.6474536895751952, "step": 1100 }, { "epoch": 0.1168421052631579, "grad_norm": 0.4265633225440979, "learning_rate": 0.00010449752194061497, "loss": 1.6948539733886718, "step": 1110 }, { "epoch": 0.11789473684210526, "grad_norm": 0.39065343141555786, "learning_rate": 0.0001044426043598485, "loss": 1.6905693054199218, "step": 1120 }, { "epoch": 0.11894736842105263, "grad_norm": 0.3910517692565918, "learning_rate": 0.00010438710545999999, "loss": 1.6512699127197266, "step": 1130 }, { "epoch": 0.12, "grad_norm": 0.41286537051200867, "learning_rate": 0.00010433102587438154, "loss": 1.6904163360595703, "step": 1140 }, { "epoch": 0.12105263157894737, "grad_norm": 0.39058077335357666, "learning_rate": 0.00010427436624293164, "loss": 1.6889778137207032, "step": 1150 }, { "epoch": 0.12210526315789473, "grad_norm": 0.40376579761505127, "learning_rate": 0.00010421712721220786, "loss": 1.6660743713378907, "step": 1160 }, { "epoch": 0.1231578947368421, "grad_norm": 0.4065842628479004, "learning_rate": 0.00010415930943537937, "loss": 1.7282680511474608, "step": 1170 }, { "epoch": 0.12421052631578948, "grad_norm": 0.3935592770576477, "learning_rate": 0.00010410091357221965, "loss": 1.7208686828613282, "step": 1180 }, { "epoch": 0.12526315789473685, "grad_norm": 0.3769897520542145, "learning_rate": 0.00010404194028909876, "loss": 1.6730665206909179, "step": 1190 }, { "epoch": 0.12631578947368421, "grad_norm": 0.37976640462875366, "learning_rate": 0.00010398239025897598, "loss": 1.7071300506591798, "step": 1200 }, { "epoch": 0.12736842105263158, "grad_norm": 0.38293200731277466, "learning_rate": 0.0001039222641613919, "loss": 1.7225513458251953, "step": 1210 }, { "epoch": 0.12842105263157894, "grad_norm": 0.3943805694580078, "learning_rate": 0.00010386156268246077, "loss": 1.6900711059570312, "step": 1220 }, { "epoch": 0.12947368421052632, "grad_norm": 0.402694970369339, "learning_rate": 0.00010380028651486271, "loss": 1.6741355895996093, "step": 1230 }, { "epoch": 0.13052631578947368, "grad_norm": 0.4034770429134369, "learning_rate": 0.00010373843635783572, "loss": 1.7251928329467774, "step": 1240 }, { "epoch": 0.13157894736842105, "grad_norm": 0.4223957359790802, "learning_rate": 0.00010367601291716777, "loss": 1.7350204467773438, "step": 1250 }, { "epoch": 0.13263157894736843, "grad_norm": 0.3636983633041382, "learning_rate": 0.0001036130169051887, "loss": 1.6685359954833985, "step": 1260 }, { "epoch": 0.1336842105263158, "grad_norm": 0.36913859844207764, "learning_rate": 0.00010354944904076209, "loss": 1.6918949127197265, "step": 1270 }, { "epoch": 0.13473684210526315, "grad_norm": 0.3916381597518921, "learning_rate": 0.00010348531004927711, "loss": 1.6259313583374024, "step": 1280 }, { "epoch": 0.13578947368421052, "grad_norm": 0.38772350549697876, "learning_rate": 0.00010342060066264016, "loss": 1.7148677825927734, "step": 1290 }, { "epoch": 0.1368421052631579, "grad_norm": 0.38373488187789917, "learning_rate": 0.00010335532161926664, "loss": 1.6328174591064453, "step": 1300 }, { "epoch": 0.13789473684210526, "grad_norm": 0.3877631723880768, "learning_rate": 0.00010328947366407237, "loss": 1.646784210205078, "step": 1310 }, { "epoch": 0.13894736842105262, "grad_norm": 0.39882156252861023, "learning_rate": 0.00010322305754846519, "loss": 1.6600376129150392, "step": 1320 }, { "epoch": 0.14, "grad_norm": 0.40457776188850403, "learning_rate": 0.00010315607403033641, "loss": 1.669814109802246, "step": 1330 }, { "epoch": 0.14105263157894737, "grad_norm": 0.3948962688446045, "learning_rate": 0.00010308852387405208, "loss": 1.715940284729004, "step": 1340 }, { "epoch": 0.14210526315789473, "grad_norm": 0.3921595513820648, "learning_rate": 0.00010302040785044425, "loss": 1.6944934844970703, "step": 1350 }, { "epoch": 0.1431578947368421, "grad_norm": 0.3857240676879883, "learning_rate": 0.00010295172673680234, "loss": 1.6900419235229491, "step": 1360 }, { "epoch": 0.14421052631578948, "grad_norm": 0.38249680399894714, "learning_rate": 0.00010288248131686406, "loss": 1.7138862609863281, "step": 1370 }, { "epoch": 0.14526315789473684, "grad_norm": 0.40845534205436707, "learning_rate": 0.00010281267238080664, "loss": 1.7212867736816406, "step": 1380 }, { "epoch": 0.1463157894736842, "grad_norm": 0.3911115229129791, "learning_rate": 0.00010274230072523764, "loss": 1.7087575912475585, "step": 1390 }, { "epoch": 0.14736842105263157, "grad_norm": 0.3967211842536926, "learning_rate": 0.00010267136715318605, "loss": 1.675175094604492, "step": 1400 }, { "epoch": 0.14842105263157895, "grad_norm": 0.3820992410182953, "learning_rate": 0.00010259987247409298, "loss": 1.665155792236328, "step": 1410 }, { "epoch": 0.14947368421052631, "grad_norm": 0.40317046642303467, "learning_rate": 0.00010252781750380252, "loss": 1.6777839660644531, "step": 1420 }, { "epoch": 0.15052631578947367, "grad_norm": 0.39026641845703125, "learning_rate": 0.00010245520306455232, "loss": 1.6641407012939453, "step": 1430 }, { "epoch": 0.15157894736842106, "grad_norm": 0.38703930377960205, "learning_rate": 0.00010238202998496432, "loss": 1.7006916046142577, "step": 1440 }, { "epoch": 0.15263157894736842, "grad_norm": 0.3920949697494507, "learning_rate": 0.00010230829910003525, "loss": 1.6237125396728516, "step": 1450 }, { "epoch": 0.15368421052631578, "grad_norm": 0.40310102701187134, "learning_rate": 0.00010223401125112709, "loss": 1.693703842163086, "step": 1460 }, { "epoch": 0.15473684210526314, "grad_norm": 0.3895237147808075, "learning_rate": 0.00010215916728595746, "loss": 1.6554393768310547, "step": 1470 }, { "epoch": 0.15578947368421053, "grad_norm": 0.3830355703830719, "learning_rate": 0.00010208376805858997, "loss": 1.6817665100097656, "step": 1480 }, { "epoch": 0.1568421052631579, "grad_norm": 0.4044099450111389, "learning_rate": 0.00010200781442942451, "loss": 1.740530776977539, "step": 1490 }, { "epoch": 0.15789473684210525, "grad_norm": 0.37278082966804504, "learning_rate": 0.00010193130726518736, "loss": 1.7269683837890626, "step": 1500 }, { "epoch": 0.15894736842105264, "grad_norm": 0.3909358084201813, "learning_rate": 0.00010185424743892131, "loss": 1.674229049682617, "step": 1510 }, { "epoch": 0.16, "grad_norm": 0.3877439796924591, "learning_rate": 0.00010177663582997574, "loss": 1.6566276550292969, "step": 1520 }, { "epoch": 0.16105263157894736, "grad_norm": 0.3673596978187561, "learning_rate": 0.00010169847332399658, "loss": 1.6969722747802733, "step": 1530 }, { "epoch": 0.16210526315789472, "grad_norm": 0.428408145904541, "learning_rate": 0.00010161976081291614, "loss": 1.6617691040039062, "step": 1540 }, { "epoch": 0.1631578947368421, "grad_norm": 0.38442328572273254, "learning_rate": 0.00010154049919494305, "loss": 1.7180919647216797, "step": 1550 }, { "epoch": 0.16421052631578947, "grad_norm": 0.41423359513282776, "learning_rate": 0.00010146068937455184, "loss": 1.7110111236572265, "step": 1560 }, { "epoch": 0.16526315789473683, "grad_norm": 0.3815020024776459, "learning_rate": 0.00010138033226247282, "loss": 1.6620532989501953, "step": 1570 }, { "epoch": 0.16631578947368422, "grad_norm": 0.38987597823143005, "learning_rate": 0.00010129942877568153, "loss": 1.6376758575439454, "step": 1580 }, { "epoch": 0.16736842105263158, "grad_norm": 0.37103158235549927, "learning_rate": 0.00010121797983738831, "loss": 1.6269058227539062, "step": 1590 }, { "epoch": 0.16842105263157894, "grad_norm": 0.39582741260528564, "learning_rate": 0.00010113598637702785, "loss": 1.6544437408447266, "step": 1600 }, { "epoch": 0.1694736842105263, "grad_norm": 0.3875832259654999, "learning_rate": 0.0001010534493302485, "loss": 1.69503173828125, "step": 1610 }, { "epoch": 0.1705263157894737, "grad_norm": 0.40506550669670105, "learning_rate": 0.00010097036963890156, "loss": 1.6826278686523437, "step": 1620 }, { "epoch": 0.17157894736842105, "grad_norm": 0.39827048778533936, "learning_rate": 0.00010088674825103067, "loss": 1.6500736236572267, "step": 1630 }, { "epoch": 0.1726315789473684, "grad_norm": 0.3786768913269043, "learning_rate": 0.00010080258612086083, "loss": 1.6809326171875, "step": 1640 }, { "epoch": 0.1736842105263158, "grad_norm": 0.40326225757598877, "learning_rate": 0.00010071788420878764, "loss": 1.7387603759765624, "step": 1650 }, { "epoch": 0.17473684210526316, "grad_norm": 0.3733818829059601, "learning_rate": 0.00010063264348136629, "loss": 1.6930301666259766, "step": 1660 }, { "epoch": 0.17578947368421052, "grad_norm": 0.4019014239311218, "learning_rate": 0.00010054686491130048, "loss": 1.665353012084961, "step": 1670 }, { "epoch": 0.17684210526315788, "grad_norm": 0.3994007110595703, "learning_rate": 0.00010046054947743142, "loss": 1.7481708526611328, "step": 1680 }, { "epoch": 0.17789473684210527, "grad_norm": 0.40046176314353943, "learning_rate": 0.00010037369816472658, "loss": 1.6684654235839844, "step": 1690 }, { "epoch": 0.17894736842105263, "grad_norm": 0.39062178134918213, "learning_rate": 0.00010028631196426851, "loss": 1.6636728286743163, "step": 1700 }, { "epoch": 0.18, "grad_norm": 0.40030282735824585, "learning_rate": 0.0001001983918732435, "loss": 1.6382123947143554, "step": 1710 }, { "epoch": 0.18105263157894738, "grad_norm": 0.38396012783050537, "learning_rate": 0.00010010993889493013, "loss": 1.6094409942626953, "step": 1720 }, { "epoch": 0.18210526315789474, "grad_norm": 0.3969299793243408, "learning_rate": 0.000100020954038688, "loss": 1.6550315856933593, "step": 1730 }, { "epoch": 0.1831578947368421, "grad_norm": 0.39174884557724, "learning_rate": 9.993143831994603e-05, "loss": 1.7123249053955079, "step": 1740 }, { "epoch": 0.18421052631578946, "grad_norm": 0.38760584592819214, "learning_rate": 9.984139276019098e-05, "loss": 1.6742156982421874, "step": 1750 }, { "epoch": 0.18526315789473685, "grad_norm": 0.3817841410636902, "learning_rate": 9.975081838695576e-05, "loss": 1.641263771057129, "step": 1760 }, { "epoch": 0.1863157894736842, "grad_norm": 0.4085705578327179, "learning_rate": 9.965971623380768e-05, "loss": 1.7673213958740235, "step": 1770 }, { "epoch": 0.18736842105263157, "grad_norm": 0.38965287804603577, "learning_rate": 9.956808734033671e-05, "loss": 1.770319366455078, "step": 1780 }, { "epoch": 0.18842105263157893, "grad_norm": 0.3770400881767273, "learning_rate": 9.947593275214358e-05, "loss": 1.6587142944335938, "step": 1790 }, { "epoch": 0.18947368421052632, "grad_norm": 0.40959247946739197, "learning_rate": 9.938325352082786e-05, "loss": 1.6820697784423828, "step": 1800 }, { "epoch": 0.19052631578947368, "grad_norm": 0.37764784693717957, "learning_rate": 9.929005070397595e-05, "loss": 1.6965087890625, "step": 1810 }, { "epoch": 0.19157894736842104, "grad_norm": 0.37487778067588806, "learning_rate": 9.9196325365149e-05, "loss": 1.6261119842529297, "step": 1820 }, { "epoch": 0.19263157894736843, "grad_norm": 0.4048542380332947, "learning_rate": 9.910207857387085e-05, "loss": 1.7076032638549805, "step": 1830 }, { "epoch": 0.1936842105263158, "grad_norm": 0.37118133902549744, "learning_rate": 9.90073114056157e-05, "loss": 1.70123233795166, "step": 1840 }, { "epoch": 0.19473684210526315, "grad_norm": 0.38945528864860535, "learning_rate": 9.891202494179595e-05, "loss": 1.7137296676635743, "step": 1850 }, { "epoch": 0.1957894736842105, "grad_norm": 0.39081960916519165, "learning_rate": 9.881622026974978e-05, "loss": 1.6556056976318358, "step": 1860 }, { "epoch": 0.1968421052631579, "grad_norm": 0.4000365436077118, "learning_rate": 9.871989848272882e-05, "loss": 1.708022689819336, "step": 1870 }, { "epoch": 0.19789473684210526, "grad_norm": 0.38972243666648865, "learning_rate": 9.86230606798856e-05, "loss": 1.6936985015869142, "step": 1880 }, { "epoch": 0.19894736842105262, "grad_norm": 0.4023416340351105, "learning_rate": 9.852570796626104e-05, "loss": 1.6013282775878905, "step": 1890 }, { "epoch": 0.2, "grad_norm": 0.37790361046791077, "learning_rate": 9.842784145277185e-05, "loss": 1.678757095336914, "step": 1900 }, { "epoch": 0.20105263157894737, "grad_norm": 0.4072909653186798, "learning_rate": 9.832946225619782e-05, "loss": 1.6550043106079102, "step": 1910 }, { "epoch": 0.20210526315789473, "grad_norm": 0.4222109317779541, "learning_rate": 9.823057149916913e-05, "loss": 1.6794788360595703, "step": 1920 }, { "epoch": 0.2031578947368421, "grad_norm": 0.3997038006782532, "learning_rate": 9.813117031015348e-05, "loss": 1.708123779296875, "step": 1930 }, { "epoch": 0.20421052631578948, "grad_norm": 0.387678861618042, "learning_rate": 9.803125982344328e-05, "loss": 1.694279098510742, "step": 1940 }, { "epoch": 0.20526315789473684, "grad_norm": 0.41388800740242004, "learning_rate": 9.793084117914258e-05, "loss": 1.698614501953125, "step": 1950 }, { "epoch": 0.2063157894736842, "grad_norm": 0.38706713914871216, "learning_rate": 9.782991552315424e-05, "loss": 1.702214813232422, "step": 1960 }, { "epoch": 0.2073684210526316, "grad_norm": 0.3965074419975281, "learning_rate": 9.772848400716673e-05, "loss": 1.6214000701904296, "step": 1970 }, { "epoch": 0.20842105263157895, "grad_norm": 0.39218032360076904, "learning_rate": 9.762654778864099e-05, "loss": 1.681211280822754, "step": 1980 }, { "epoch": 0.2094736842105263, "grad_norm": 0.4117305874824524, "learning_rate": 9.752410803079726e-05, "loss": 1.6745601654052735, "step": 1990 }, { "epoch": 0.21052631578947367, "grad_norm": 0.3973471224308014, "learning_rate": 9.742116590260185e-05, "loss": 1.6459293365478516, "step": 2000 }, { "epoch": 0.21157894736842106, "grad_norm": 0.3847576975822449, "learning_rate": 9.731772257875366e-05, "loss": 1.6581769943237306, "step": 2010 }, { "epoch": 0.21263157894736842, "grad_norm": 0.4136882424354553, "learning_rate": 9.721377923967092e-05, "loss": 1.7314947128295899, "step": 2020 }, { "epoch": 0.21368421052631578, "grad_norm": 0.37820902466773987, "learning_rate": 9.710933707147764e-05, "loss": 1.7070299148559571, "step": 2030 }, { "epoch": 0.21473684210526317, "grad_norm": 0.39630916714668274, "learning_rate": 9.700439726599012e-05, "loss": 1.6553241729736328, "step": 2040 }, { "epoch": 0.21578947368421053, "grad_norm": 0.3991798758506775, "learning_rate": 9.68989610207033e-05, "loss": 1.7385829925537108, "step": 2050 }, { "epoch": 0.2168421052631579, "grad_norm": 0.4119565188884735, "learning_rate": 9.679302953877712e-05, "loss": 1.71380615234375, "step": 2060 }, { "epoch": 0.21789473684210525, "grad_norm": 0.40724804997444153, "learning_rate": 9.66866040290228e-05, "loss": 1.6676467895507812, "step": 2070 }, { "epoch": 0.21894736842105264, "grad_norm": 0.4088967442512512, "learning_rate": 9.657968570588905e-05, "loss": 1.674250030517578, "step": 2080 }, { "epoch": 0.22, "grad_norm": 0.40387439727783203, "learning_rate": 9.64722757894482e-05, "loss": 1.676458740234375, "step": 2090 }, { "epoch": 0.22105263157894736, "grad_norm": 0.4028227925300598, "learning_rate": 9.636437550538226e-05, "loss": 1.6708587646484374, "step": 2100 }, { "epoch": 0.22210526315789475, "grad_norm": 0.40027210116386414, "learning_rate": 9.625598608496895e-05, "loss": 1.6314043045043944, "step": 2110 }, { "epoch": 0.2231578947368421, "grad_norm": 0.386688768863678, "learning_rate": 9.614710876506763e-05, "loss": 1.725076675415039, "step": 2120 }, { "epoch": 0.22421052631578947, "grad_norm": 0.4061787724494934, "learning_rate": 9.603774478810528e-05, "loss": 1.6826349258422852, "step": 2130 }, { "epoch": 0.22526315789473683, "grad_norm": 0.40370142459869385, "learning_rate": 9.592789540206218e-05, "loss": 1.649374771118164, "step": 2140 }, { "epoch": 0.22631578947368422, "grad_norm": 0.40586093068122864, "learning_rate": 9.581756186045777e-05, "loss": 1.6614540100097657, "step": 2150 }, { "epoch": 0.22736842105263158, "grad_norm": 0.3933681547641754, "learning_rate": 9.570674542233628e-05, "loss": 1.6946598052978517, "step": 2160 }, { "epoch": 0.22842105263157894, "grad_norm": 0.3825010359287262, "learning_rate": 9.559544735225242e-05, "loss": 1.6574283599853517, "step": 2170 }, { "epoch": 0.2294736842105263, "grad_norm": 0.4000436067581177, "learning_rate": 9.548366892025693e-05, "loss": 1.673634910583496, "step": 2180 }, { "epoch": 0.2305263157894737, "grad_norm": 0.3942500054836273, "learning_rate": 9.537141140188206e-05, "loss": 1.621174430847168, "step": 2190 }, { "epoch": 0.23157894736842105, "grad_norm": 0.3846987783908844, "learning_rate": 9.525867607812708e-05, "loss": 1.6244104385375977, "step": 2200 }, { "epoch": 0.2326315789473684, "grad_norm": 0.38483455777168274, "learning_rate": 9.514546423544357e-05, "loss": 1.687708282470703, "step": 2210 }, { "epoch": 0.2336842105263158, "grad_norm": 0.4134112000465393, "learning_rate": 9.503177716572082e-05, "loss": 1.7054229736328126, "step": 2220 }, { "epoch": 0.23473684210526316, "grad_norm": 0.3780292868614197, "learning_rate": 9.491761616627101e-05, "loss": 1.6283729553222657, "step": 2230 }, { "epoch": 0.23578947368421052, "grad_norm": 0.40246784687042236, "learning_rate": 9.480298253981456e-05, "loss": 1.7036407470703125, "step": 2240 }, { "epoch": 0.23684210526315788, "grad_norm": 0.4002091884613037, "learning_rate": 9.468787759446502e-05, "loss": 1.7064756393432616, "step": 2250 }, { "epoch": 0.23789473684210527, "grad_norm": 0.40926146507263184, "learning_rate": 9.457230264371439e-05, "loss": 1.6858642578125, "step": 2260 }, { "epoch": 0.23894736842105263, "grad_norm": 0.41373902559280396, "learning_rate": 9.445625900641796e-05, "loss": 1.655508804321289, "step": 2270 }, { "epoch": 0.24, "grad_norm": 0.38966718316078186, "learning_rate": 9.433974800677935e-05, "loss": 1.6741256713867188, "step": 2280 }, { "epoch": 0.24105263157894738, "grad_norm": 0.4069412648677826, "learning_rate": 9.422277097433537e-05, "loss": 1.6685916900634765, "step": 2290 }, { "epoch": 0.24210526315789474, "grad_norm": 0.3916907012462616, "learning_rate": 9.410532924394083e-05, "loss": 1.6491849899291993, "step": 2300 }, { "epoch": 0.2431578947368421, "grad_norm": 0.39959436655044556, "learning_rate": 9.398742415575336e-05, "loss": 1.670114517211914, "step": 2310 }, { "epoch": 0.24421052631578946, "grad_norm": 0.3950902223587036, "learning_rate": 9.386905705521803e-05, "loss": 1.6907678604125977, "step": 2320 }, { "epoch": 0.24526315789473685, "grad_norm": 0.38667526841163635, "learning_rate": 9.375022929305213e-05, "loss": 1.669590377807617, "step": 2330 }, { "epoch": 0.2463157894736842, "grad_norm": 0.39125263690948486, "learning_rate": 9.363094222522958e-05, "loss": 1.6502418518066406, "step": 2340 }, { "epoch": 0.24736842105263157, "grad_norm": 0.38178369402885437, "learning_rate": 9.351119721296566e-05, "loss": 1.7035490036010743, "step": 2350 }, { "epoch": 0.24842105263157896, "grad_norm": 0.37467339634895325, "learning_rate": 9.339099562270128e-05, "loss": 1.6536640167236327, "step": 2360 }, { "epoch": 0.24947368421052632, "grad_norm": 0.41233041882514954, "learning_rate": 9.327033882608754e-05, "loss": 1.6268924713134765, "step": 2370 }, { "epoch": 0.2505263157894737, "grad_norm": 0.3746933937072754, "learning_rate": 9.314922819996997e-05, "loss": 1.6240985870361329, "step": 2380 }, { "epoch": 0.25157894736842107, "grad_norm": 0.3932549059391022, "learning_rate": 9.302766512637293e-05, "loss": 1.6809700012207032, "step": 2390 }, { "epoch": 0.25263157894736843, "grad_norm": 0.4058087468147278, "learning_rate": 9.290565099248368e-05, "loss": 1.6474214553833009, "step": 2400 }, { "epoch": 0.2536842105263158, "grad_norm": 0.3873753547668457, "learning_rate": 9.278318719063673e-05, "loss": 1.6398870468139648, "step": 2410 }, { "epoch": 0.25473684210526315, "grad_norm": 0.41126886010169983, "learning_rate": 9.26602751182978e-05, "loss": 1.6111644744873046, "step": 2420 }, { "epoch": 0.2557894736842105, "grad_norm": 0.40002816915512085, "learning_rate": 9.2536916178048e-05, "loss": 1.6024229049682617, "step": 2430 }, { "epoch": 0.25684210526315787, "grad_norm": 0.4194015562534332, "learning_rate": 9.241311177756771e-05, "loss": 1.6467687606811523, "step": 2440 }, { "epoch": 0.2578947368421053, "grad_norm": 0.4181770980358124, "learning_rate": 9.228886332962062e-05, "loss": 1.6439130783081055, "step": 2450 }, { "epoch": 0.25894736842105265, "grad_norm": 0.40925332903862, "learning_rate": 9.216417225203754e-05, "loss": 1.6347824096679688, "step": 2460 }, { "epoch": 0.26, "grad_norm": 0.40195897221565247, "learning_rate": 9.203903996770019e-05, "loss": 1.6572818756103516, "step": 2470 }, { "epoch": 0.26105263157894737, "grad_norm": 0.4277157485485077, "learning_rate": 9.191346790452509e-05, "loss": 1.6013570785522462, "step": 2480 }, { "epoch": 0.26210526315789473, "grad_norm": 0.3951636552810669, "learning_rate": 9.178745749544716e-05, "loss": 1.694039535522461, "step": 2490 }, { "epoch": 0.2631578947368421, "grad_norm": 0.3961932957172394, "learning_rate": 9.166101017840337e-05, "loss": 1.6311038970947265, "step": 2500 }, { "epoch": 0.26421052631578945, "grad_norm": 0.40256279706954956, "learning_rate": 9.15341273963164e-05, "loss": 1.7131736755371094, "step": 2510 }, { "epoch": 0.26526315789473687, "grad_norm": 0.40076208114624023, "learning_rate": 9.14068105970781e-05, "loss": 1.659266471862793, "step": 2520 }, { "epoch": 0.26631578947368423, "grad_norm": 0.39892420172691345, "learning_rate": 9.127906123353305e-05, "loss": 1.6891080856323242, "step": 2530 }, { "epoch": 0.2673684210526316, "grad_norm": 0.39453125, "learning_rate": 9.115088076346184e-05, "loss": 1.6869060516357421, "step": 2540 }, { "epoch": 0.26842105263157895, "grad_norm": 0.3876430094242096, "learning_rate": 9.102227064956465e-05, "loss": 1.623502540588379, "step": 2550 }, { "epoch": 0.2694736842105263, "grad_norm": 0.3828693628311157, "learning_rate": 9.08932323594443e-05, "loss": 1.6787071228027344, "step": 2560 }, { "epoch": 0.27052631578947367, "grad_norm": 0.3757915198802948, "learning_rate": 9.076376736558976e-05, "loss": 1.7229637145996093, "step": 2570 }, { "epoch": 0.27157894736842103, "grad_norm": 0.3994489312171936, "learning_rate": 9.063387714535916e-05, "loss": 1.6279123306274415, "step": 2580 }, { "epoch": 0.27263157894736845, "grad_norm": 0.40050971508026123, "learning_rate": 9.0503563180963e-05, "loss": 1.667708969116211, "step": 2590 }, { "epoch": 0.2736842105263158, "grad_norm": 0.4005604684352875, "learning_rate": 9.037282695944726e-05, "loss": 1.6468616485595704, "step": 2600 }, { "epoch": 0.27473684210526317, "grad_norm": 0.40057310461997986, "learning_rate": 9.024166997267636e-05, "loss": 1.6907684326171875, "step": 2610 }, { "epoch": 0.27578947368421053, "grad_norm": 0.4074793756008148, "learning_rate": 9.011009371731623e-05, "loss": 1.6792390823364258, "step": 2620 }, { "epoch": 0.2768421052631579, "grad_norm": 0.4014405310153961, "learning_rate": 8.997809969481715e-05, "loss": 1.640324592590332, "step": 2630 }, { "epoch": 0.27789473684210525, "grad_norm": 0.42860186100006104, "learning_rate": 8.984568941139665e-05, "loss": 1.6390762329101562, "step": 2640 }, { "epoch": 0.2789473684210526, "grad_norm": 0.41278424859046936, "learning_rate": 8.971286437802235e-05, "loss": 1.7043113708496094, "step": 2650 }, { "epoch": 0.28, "grad_norm": 0.38656142354011536, "learning_rate": 8.957962611039464e-05, "loss": 1.7256532669067384, "step": 2660 }, { "epoch": 0.2810526315789474, "grad_norm": 0.3984103202819824, "learning_rate": 8.944597612892944e-05, "loss": 1.6301074981689454, "step": 2670 }, { "epoch": 0.28210526315789475, "grad_norm": 0.3937322795391083, "learning_rate": 8.93119159587409e-05, "loss": 1.6612771987915038, "step": 2680 }, { "epoch": 0.2831578947368421, "grad_norm": 0.39241543412208557, "learning_rate": 8.917744712962387e-05, "loss": 1.6962703704833983, "step": 2690 }, { "epoch": 0.28421052631578947, "grad_norm": 0.407466858625412, "learning_rate": 8.904257117603653e-05, "loss": 1.721807861328125, "step": 2700 }, { "epoch": 0.28526315789473683, "grad_norm": 0.3965199589729309, "learning_rate": 8.890728963708288e-05, "loss": 1.6854072570800782, "step": 2710 }, { "epoch": 0.2863157894736842, "grad_norm": 0.3866688013076782, "learning_rate": 8.877160405649515e-05, "loss": 1.678403663635254, "step": 2720 }, { "epoch": 0.2873684210526316, "grad_norm": 0.40115654468536377, "learning_rate": 8.863551598261618e-05, "loss": 1.688330078125, "step": 2730 }, { "epoch": 0.28842105263157897, "grad_norm": 0.41881707310676575, "learning_rate": 8.849902696838176e-05, "loss": 1.685501480102539, "step": 2740 }, { "epoch": 0.2894736842105263, "grad_norm": 0.3956238329410553, "learning_rate": 8.836213857130296e-05, "loss": 1.6521308898925782, "step": 2750 }, { "epoch": 0.2905263157894737, "grad_norm": 0.3809671700000763, "learning_rate": 8.822485235344825e-05, "loss": 1.6597816467285156, "step": 2760 }, { "epoch": 0.29157894736842105, "grad_norm": 0.39534077048301697, "learning_rate": 8.808716988142575e-05, "loss": 1.6627084732055664, "step": 2770 }, { "epoch": 0.2926315789473684, "grad_norm": 0.37715721130371094, "learning_rate": 8.794909272636537e-05, "loss": 1.6618637084960937, "step": 2780 }, { "epoch": 0.29368421052631577, "grad_norm": 0.4065514802932739, "learning_rate": 8.781062246390083e-05, "loss": 1.6399276733398438, "step": 2790 }, { "epoch": 0.29473684210526313, "grad_norm": 0.3923916220664978, "learning_rate": 8.767176067415169e-05, "loss": 1.668557357788086, "step": 2800 }, { "epoch": 0.29578947368421055, "grad_norm": 0.3970358967781067, "learning_rate": 8.75325089417053e-05, "loss": 1.6664169311523438, "step": 2810 }, { "epoch": 0.2968421052631579, "grad_norm": 0.4063076078891754, "learning_rate": 8.739286885559882e-05, "loss": 1.718800163269043, "step": 2820 }, { "epoch": 0.29789473684210527, "grad_norm": 0.41235899925231934, "learning_rate": 8.725284200930096e-05, "loss": 1.6484018325805665, "step": 2830 }, { "epoch": 0.29894736842105263, "grad_norm": 0.41001883149147034, "learning_rate": 8.711243000069387e-05, "loss": 1.6729150772094727, "step": 2840 }, { "epoch": 0.3, "grad_norm": 0.40411022305488586, "learning_rate": 8.697163443205486e-05, "loss": 1.6615083694458008, "step": 2850 }, { "epoch": 0.30105263157894735, "grad_norm": 0.3862515389919281, "learning_rate": 8.683045691003816e-05, "loss": 1.6196592330932618, "step": 2860 }, { "epoch": 0.3021052631578947, "grad_norm": 0.385047972202301, "learning_rate": 8.668889904565657e-05, "loss": 1.6499458312988282, "step": 2870 }, { "epoch": 0.3031578947368421, "grad_norm": 0.385885626077652, "learning_rate": 8.654696245426309e-05, "loss": 1.6544832229614257, "step": 2880 }, { "epoch": 0.3042105263157895, "grad_norm": 0.39182907342910767, "learning_rate": 8.640464875553244e-05, "loss": 1.6151403427124023, "step": 2890 }, { "epoch": 0.30526315789473685, "grad_norm": 0.37692710757255554, "learning_rate": 8.626195957344259e-05, "loss": 1.7116943359375, "step": 2900 } ], "logging_steps": 10, "max_steps": 9500, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.884603437744128e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }