| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.30526315789473685, |
| "eval_steps": 500, |
| "global_step": 2900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0010526315789473684, |
| "grad_norm": 0.34683918952941895, |
| "learning_rate": 4.815e-06, |
| "loss": 1.7081634521484375, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.002105263157894737, |
| "grad_norm": 0.3531605899333954, |
| "learning_rate": 1.0165e-05, |
| "loss": 1.656758689880371, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.003157894736842105, |
| "grad_norm": 0.3394385576248169, |
| "learning_rate": 1.5515e-05, |
| "loss": 1.633415985107422, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.004210526315789474, |
| "grad_norm": 0.37144365906715393, |
| "learning_rate": 2.0865e-05, |
| "loss": 1.6722015380859374, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.005263157894736842, |
| "grad_norm": 0.3785368800163269, |
| "learning_rate": 2.6215e-05, |
| "loss": 1.6771835327148437, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00631578947368421, |
| "grad_norm": 0.3667239546775818, |
| "learning_rate": 3.1565e-05, |
| "loss": 1.6657798767089844, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.007368421052631579, |
| "grad_norm": 0.35704323649406433, |
| "learning_rate": 3.6914999999999995e-05, |
| "loss": 1.637792205810547, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.008421052631578947, |
| "grad_norm": 0.3614155352115631, |
| "learning_rate": 4.2265e-05, |
| "loss": 1.6456287384033204, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.009473684210526316, |
| "grad_norm": 0.3477347195148468, |
| "learning_rate": 4.7615e-05, |
| "loss": 1.6376474380493165, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.010526315789473684, |
| "grad_norm": 0.3474464416503906, |
| "learning_rate": 5.2965e-05, |
| "loss": 1.6883708953857421, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.011578947368421053, |
| "grad_norm": 0.3608642518520355, |
| "learning_rate": 5.831500000000001e-05, |
| "loss": 1.7032821655273438, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.01263157894736842, |
| "grad_norm": 0.35934099555015564, |
| "learning_rate": 6.3665e-05, |
| "loss": 1.597799301147461, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.01368421052631579, |
| "grad_norm": 0.38500702381134033, |
| "learning_rate": 6.9015e-05, |
| "loss": 1.6558387756347657, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.014736842105263158, |
| "grad_norm": 0.3602914810180664, |
| "learning_rate": 7.4365e-05, |
| "loss": 1.6937145233154296, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.015789473684210527, |
| "grad_norm": 0.36331596970558167, |
| "learning_rate": 7.9715e-05, |
| "loss": 1.5696943283081055, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.016842105263157894, |
| "grad_norm": 0.3533744215965271, |
| "learning_rate": 8.5065e-05, |
| "loss": 1.702765655517578, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.017894736842105262, |
| "grad_norm": 0.3546121120452881, |
| "learning_rate": 9.0415e-05, |
| "loss": 1.6325836181640625, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.018947368421052633, |
| "grad_norm": 0.3867342472076416, |
| "learning_rate": 9.5765e-05, |
| "loss": 1.636269760131836, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.3859454393386841, |
| "learning_rate": 0.000101115, |
| "loss": 1.7112407684326172, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.021052631578947368, |
| "grad_norm": 0.3688015937805176, |
| "learning_rate": 0.000106465, |
| "loss": 1.6455875396728517, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.022105263157894735, |
| "grad_norm": 0.3693976402282715, |
| "learning_rate": 0.00010699975274657343, |
| "loss": 1.6878833770751953, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.023157894736842106, |
| "grad_norm": 0.3748058080673218, |
| "learning_rate": 0.00010699889804630456, |
| "loss": 1.6435226440429687, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.024210526315789474, |
| "grad_norm": 0.3806576430797577, |
| "learning_rate": 0.00010699743285643286, |
| "loss": 1.7004669189453125, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.02526315789473684, |
| "grad_norm": 0.3829317092895508, |
| "learning_rate": 0.00010699535719367796, |
| "loss": 1.6831859588623046, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.02631578947368421, |
| "grad_norm": 0.4069920480251312, |
| "learning_rate": 0.00010699267108172577, |
| "loss": 1.6417667388916015, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.02736842105263158, |
| "grad_norm": 0.37535834312438965, |
| "learning_rate": 0.00010698937455122825, |
| "loss": 1.640174102783203, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.028421052631578948, |
| "grad_norm": 0.3992610573768616, |
| "learning_rate": 0.0001069854676398029, |
| "loss": 1.6665351867675782, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.029473684210526315, |
| "grad_norm": 0.3680964708328247, |
| "learning_rate": 0.0001069809503920325, |
| "loss": 1.7111568450927734, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.030526315789473683, |
| "grad_norm": 0.4049525856971741, |
| "learning_rate": 0.00010697582285946452, |
| "loss": 1.6817201614379882, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.031578947368421054, |
| "grad_norm": 0.38598954677581787, |
| "learning_rate": 0.00010697008510061057, |
| "loss": 1.6445945739746093, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03263157894736842, |
| "grad_norm": 0.39688920974731445, |
| "learning_rate": 0.00010696373718094565, |
| "loss": 1.688629150390625, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.03368421052631579, |
| "grad_norm": 0.3762621581554413, |
| "learning_rate": 0.00010695677917290751, |
| "loss": 1.6273818969726563, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.034736842105263156, |
| "grad_norm": 0.3470601737499237, |
| "learning_rate": 0.00010694921115589574, |
| "loss": 1.690780258178711, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.035789473684210524, |
| "grad_norm": 0.38783422112464905, |
| "learning_rate": 0.00010694103321627094, |
| "loss": 1.6885700225830078, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.03684210526315789, |
| "grad_norm": 0.3837421238422394, |
| "learning_rate": 0.00010693224544735366, |
| "loss": 1.670220184326172, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.037894736842105266, |
| "grad_norm": 0.3634503185749054, |
| "learning_rate": 0.00010692284794942337, |
| "loss": 1.6357498168945312, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.03894736842105263, |
| "grad_norm": 0.39452844858169556, |
| "learning_rate": 0.00010691284082971734, |
| "loss": 1.6791454315185548, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.38304150104522705, |
| "learning_rate": 0.00010690222420242937, |
| "loss": 1.6702400207519532, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.04105263157894737, |
| "grad_norm": 0.3755001723766327, |
| "learning_rate": 0.00010689099818870848, |
| "loss": 1.6558124542236328, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.042105263157894736, |
| "grad_norm": 0.3776380121707916, |
| "learning_rate": 0.0001068791629166576, |
| "loss": 1.6616518020629882, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0431578947368421, |
| "grad_norm": 0.3697650134563446, |
| "learning_rate": 0.00010686671852133208, |
| "loss": 1.6540897369384766, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.04421052631578947, |
| "grad_norm": 0.3718468248844147, |
| "learning_rate": 0.00010685366514473802, |
| "loss": 1.6041250228881836, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.045263157894736845, |
| "grad_norm": 0.38397344946861267, |
| "learning_rate": 0.0001068400029358309, |
| "loss": 1.677585983276367, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.04631578947368421, |
| "grad_norm": 0.37290486693382263, |
| "learning_rate": 0.00010682573205051367, |
| "loss": 1.6698143005371093, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.04736842105263158, |
| "grad_norm": 0.37734609842300415, |
| "learning_rate": 0.00010681085265163504, |
| "loss": 1.6791515350341797, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.04842105263157895, |
| "grad_norm": 0.354443222284317, |
| "learning_rate": 0.00010679536490898761, |
| "loss": 1.6450014114379883, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.049473684210526316, |
| "grad_norm": 0.3799300491809845, |
| "learning_rate": 0.00010677926899930603, |
| "loss": 1.6635103225708008, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.05052631578947368, |
| "grad_norm": 0.3844967484474182, |
| "learning_rate": 0.00010676256510626478, |
| "loss": 1.6978870391845704, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.05157894736842105, |
| "grad_norm": 0.38755500316619873, |
| "learning_rate": 0.00010674525342047629, |
| "loss": 1.6842260360717773, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 0.39443737268447876, |
| "learning_rate": 0.00010672733413948862, |
| "loss": 1.6408458709716798, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05368421052631579, |
| "grad_norm": 0.4008043110370636, |
| "learning_rate": 0.00010670880746778328, |
| "loss": 1.61962833404541, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.05473684210526316, |
| "grad_norm": 0.3917809724807739, |
| "learning_rate": 0.00010668967361677283, |
| "loss": 1.718182373046875, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.05578947368421053, |
| "grad_norm": 0.364409476518631, |
| "learning_rate": 0.00010666993280479856, |
| "loss": 1.7204322814941406, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.056842105263157895, |
| "grad_norm": 0.39319396018981934, |
| "learning_rate": 0.00010664958525712792, |
| "loss": 1.6448682785034179, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.05789473684210526, |
| "grad_norm": 0.3864227533340454, |
| "learning_rate": 0.00010662863120595196, |
| "loss": 1.7400585174560548, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.05894736842105263, |
| "grad_norm": 0.37699612975120544, |
| "learning_rate": 0.00010660707089038273, |
| "loss": 1.6591960906982421, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.3808913230895996, |
| "learning_rate": 0.00010658490455645052, |
| "loss": 1.63150634765625, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.061052631578947365, |
| "grad_norm": 0.38882365822792053, |
| "learning_rate": 0.00010656213245710098, |
| "loss": 1.6896860122680664, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.06210526315789474, |
| "grad_norm": 0.3772079050540924, |
| "learning_rate": 0.0001065387548521924, |
| "loss": 1.7085845947265625, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.06315789473684211, |
| "grad_norm": 0.37180712819099426, |
| "learning_rate": 0.00010651477200849263, |
| "loss": 1.7532657623291015, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.06421052631578947, |
| "grad_norm": 0.3878546357154846, |
| "learning_rate": 0.00010649018419967597, |
| "loss": 1.6636667251586914, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.06526315789473684, |
| "grad_norm": 0.39751365780830383, |
| "learning_rate": 0.00010646499170632023, |
| "loss": 1.6579233169555665, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.06631578947368422, |
| "grad_norm": 0.3831867277622223, |
| "learning_rate": 0.00010643919481590337, |
| "loss": 1.6426708221435546, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.06736842105263158, |
| "grad_norm": 0.3749397099018097, |
| "learning_rate": 0.00010641279382280032, |
| "loss": 1.7154060363769532, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.06842105263157895, |
| "grad_norm": 0.37839797139167786, |
| "learning_rate": 0.00010638578902827957, |
| "loss": 1.7217548370361329, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.06947368421052631, |
| "grad_norm": 0.3703754246234894, |
| "learning_rate": 0.00010635818074049972, |
| "loss": 1.7110353469848634, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.07052631578947369, |
| "grad_norm": 0.36747097969055176, |
| "learning_rate": 0.00010632996927450597, |
| "loss": 1.651369857788086, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.07157894736842105, |
| "grad_norm": 0.36606892943382263, |
| "learning_rate": 0.00010630115495222664, |
| "loss": 1.6909339904785157, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.07263157894736842, |
| "grad_norm": 0.3871472179889679, |
| "learning_rate": 0.00010627173810246927, |
| "loss": 1.6740509033203126, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.07368421052631578, |
| "grad_norm": 0.3820892572402954, |
| "learning_rate": 0.00010624171906091708, |
| "loss": 1.7049301147460938, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.07473684210526316, |
| "grad_norm": 0.38060277700424194, |
| "learning_rate": 0.00010621109817012501, |
| "loss": 1.7255819320678711, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.07578947368421053, |
| "grad_norm": 0.37024298310279846, |
| "learning_rate": 0.00010617987577951588, |
| "loss": 1.707390594482422, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.07684210526315789, |
| "grad_norm": 0.3976726233959198, |
| "learning_rate": 0.0001061480522453764, |
| "loss": 1.6445907592773437, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.07789473684210527, |
| "grad_norm": 0.3904809057712555, |
| "learning_rate": 0.00010611562793085301, |
| "loss": 1.7427913665771484, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.07894736842105263, |
| "grad_norm": 0.37776583433151245, |
| "learning_rate": 0.00010608260320594787, |
| "loss": 1.6211050033569336, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.382707804441452, |
| "learning_rate": 0.00010604897844751458, |
| "loss": 1.6817436218261719, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.08105263157894736, |
| "grad_norm": 0.3894830048084259, |
| "learning_rate": 0.00010601475403925381, |
| "loss": 1.747372817993164, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.08210526315789474, |
| "grad_norm": 0.38454341888427734, |
| "learning_rate": 0.00010597993037170907, |
| "loss": 1.667810821533203, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.08315789473684211, |
| "grad_norm": 0.3924828767776489, |
| "learning_rate": 0.00010594450784226211, |
| "loss": 1.689559555053711, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.08421052631578947, |
| "grad_norm": 0.390747994184494, |
| "learning_rate": 0.0001059084868551285, |
| "loss": 1.687558364868164, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.08526315789473685, |
| "grad_norm": 0.38002100586891174, |
| "learning_rate": 0.0001058718678213529, |
| "loss": 1.7372432708740235, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0863157894736842, |
| "grad_norm": 0.3947979509830475, |
| "learning_rate": 0.00010583465115880448, |
| "loss": 1.7141420364379882, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.08736842105263158, |
| "grad_norm": 0.38964593410491943, |
| "learning_rate": 0.0001057968372921721, |
| "loss": 1.6732599258422851, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.08842105263157894, |
| "grad_norm": 0.3914567828178406, |
| "learning_rate": 0.00010575842665295942, |
| "loss": 1.697699737548828, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.08947368421052632, |
| "grad_norm": 0.3780556917190552, |
| "learning_rate": 0.00010571941967948013, |
| "loss": 1.6859580993652343, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.09052631578947369, |
| "grad_norm": 0.3804113268852234, |
| "learning_rate": 0.00010567981681685271, |
| "loss": 1.630574607849121, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.09157894736842105, |
| "grad_norm": 0.3920338451862335, |
| "learning_rate": 0.0001056396185169956, |
| "loss": 1.701805877685547, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.09263157894736843, |
| "grad_norm": 0.3645232021808624, |
| "learning_rate": 0.00010559882523862185, |
| "loss": 1.6626638412475585, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.09368421052631579, |
| "grad_norm": 0.39647483825683594, |
| "learning_rate": 0.000105557437447234, |
| "loss": 1.657071876525879, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.09473684210526316, |
| "grad_norm": 0.3784042298793793, |
| "learning_rate": 0.00010551545561511872, |
| "loss": 1.6789131164550781, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.09578947368421052, |
| "grad_norm": 0.3799436390399933, |
| "learning_rate": 0.00010547288022134141, |
| "loss": 1.6874401092529296, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0968421052631579, |
| "grad_norm": 0.3979872465133667, |
| "learning_rate": 0.00010542971175174078, |
| "loss": 1.7372554779052733, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.09789473684210526, |
| "grad_norm": 0.3869173228740692, |
| "learning_rate": 0.0001053859506989233, |
| "loss": 1.6965164184570312, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.09894736842105263, |
| "grad_norm": 0.38553228974342346, |
| "learning_rate": 0.0001053415975622575, |
| "loss": 1.6804073333740235, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.37855857610702515, |
| "learning_rate": 0.00010529665284786835, |
| "loss": 1.7479766845703124, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.10105263157894737, |
| "grad_norm": 0.36974212527275085, |
| "learning_rate": 0.00010525111706863153, |
| "loss": 1.6555421829223633, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.10210526315789474, |
| "grad_norm": 0.3829262852668762, |
| "learning_rate": 0.00010520499074416742, |
| "loss": 1.7271907806396485, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.1031578947368421, |
| "grad_norm": 0.3871605396270752, |
| "learning_rate": 0.0001051582744008353, |
| "loss": 1.6716243743896484, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.10421052631578948, |
| "grad_norm": 0.3923998475074768, |
| "learning_rate": 0.00010511096857172731, |
| "loss": 1.6450519561767578, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 0.38333484530448914, |
| "learning_rate": 0.00010506307379666238, |
| "loss": 1.6865043640136719, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.10631578947368421, |
| "grad_norm": 0.38256773352622986, |
| "learning_rate": 0.00010501459062218, |
| "loss": 1.6601579666137696, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.10736842105263159, |
| "grad_norm": 0.3737237751483917, |
| "learning_rate": 0.00010496551960153409, |
| "loss": 1.6208690643310546, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.10842105263157895, |
| "grad_norm": 0.366969496011734, |
| "learning_rate": 0.00010491586129468662, |
| "loss": 1.6808839797973634, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.10947368421052632, |
| "grad_norm": 0.3720376193523407, |
| "learning_rate": 0.0001048656162683012, |
| "loss": 1.6338840484619142, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.11052631578947368, |
| "grad_norm": 0.39924025535583496, |
| "learning_rate": 0.00010481478509573669, |
| "loss": 1.652592086791992, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.11157894736842106, |
| "grad_norm": 0.37709176540374756, |
| "learning_rate": 0.00010476336835704059, |
| "loss": 1.6794198989868163, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.11263157894736842, |
| "grad_norm": 0.382405161857605, |
| "learning_rate": 0.00010471136663894244, |
| "loss": 1.702239990234375, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.11368421052631579, |
| "grad_norm": 0.3955666720867157, |
| "learning_rate": 0.00010465878053484715, |
| "loss": 1.625558090209961, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.11473684210526315, |
| "grad_norm": 0.3984505534172058, |
| "learning_rate": 0.0001046056106448282, |
| "loss": 1.7061031341552735, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.11578947368421053, |
| "grad_norm": 0.37337619066238403, |
| "learning_rate": 0.00010455185757562081, |
| "loss": 1.6474536895751952, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.1168421052631579, |
| "grad_norm": 0.4265633225440979, |
| "learning_rate": 0.00010449752194061497, |
| "loss": 1.6948539733886718, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.11789473684210526, |
| "grad_norm": 0.39065343141555786, |
| "learning_rate": 0.0001044426043598485, |
| "loss": 1.6905693054199218, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.11894736842105263, |
| "grad_norm": 0.3910517692565918, |
| "learning_rate": 0.00010438710545999999, |
| "loss": 1.6512699127197266, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.41286537051200867, |
| "learning_rate": 0.00010433102587438154, |
| "loss": 1.6904163360595703, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.12105263157894737, |
| "grad_norm": 0.39058077335357666, |
| "learning_rate": 0.00010427436624293164, |
| "loss": 1.6889778137207032, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.12210526315789473, |
| "grad_norm": 0.40376579761505127, |
| "learning_rate": 0.00010421712721220786, |
| "loss": 1.6660743713378907, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.1231578947368421, |
| "grad_norm": 0.4065842628479004, |
| "learning_rate": 0.00010415930943537937, |
| "loss": 1.7282680511474608, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.12421052631578948, |
| "grad_norm": 0.3935592770576477, |
| "learning_rate": 0.00010410091357221965, |
| "loss": 1.7208686828613282, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.12526315789473685, |
| "grad_norm": 0.3769897520542145, |
| "learning_rate": 0.00010404194028909876, |
| "loss": 1.6730665206909179, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.12631578947368421, |
| "grad_norm": 0.37976640462875366, |
| "learning_rate": 0.00010398239025897598, |
| "loss": 1.7071300506591798, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.12736842105263158, |
| "grad_norm": 0.38293200731277466, |
| "learning_rate": 0.0001039222641613919, |
| "loss": 1.7225513458251953, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.12842105263157894, |
| "grad_norm": 0.3943805694580078, |
| "learning_rate": 0.00010386156268246077, |
| "loss": 1.6900711059570312, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.12947368421052632, |
| "grad_norm": 0.402694970369339, |
| "learning_rate": 0.00010380028651486271, |
| "loss": 1.6741355895996093, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.13052631578947368, |
| "grad_norm": 0.4034770429134369, |
| "learning_rate": 0.00010373843635783572, |
| "loss": 1.7251928329467774, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.13157894736842105, |
| "grad_norm": 0.4223957359790802, |
| "learning_rate": 0.00010367601291716777, |
| "loss": 1.7350204467773438, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.13263157894736843, |
| "grad_norm": 0.3636983633041382, |
| "learning_rate": 0.0001036130169051887, |
| "loss": 1.6685359954833985, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.1336842105263158, |
| "grad_norm": 0.36913859844207764, |
| "learning_rate": 0.00010354944904076209, |
| "loss": 1.6918949127197265, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.13473684210526315, |
| "grad_norm": 0.3916381597518921, |
| "learning_rate": 0.00010348531004927711, |
| "loss": 1.6259313583374024, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.13578947368421052, |
| "grad_norm": 0.38772350549697876, |
| "learning_rate": 0.00010342060066264016, |
| "loss": 1.7148677825927734, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.1368421052631579, |
| "grad_norm": 0.38373488187789917, |
| "learning_rate": 0.00010335532161926664, |
| "loss": 1.6328174591064453, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.13789473684210526, |
| "grad_norm": 0.3877631723880768, |
| "learning_rate": 0.00010328947366407237, |
| "loss": 1.646784210205078, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.13894736842105262, |
| "grad_norm": 0.39882156252861023, |
| "learning_rate": 0.00010322305754846519, |
| "loss": 1.6600376129150392, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.40457776188850403, |
| "learning_rate": 0.00010315607403033641, |
| "loss": 1.669814109802246, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.14105263157894737, |
| "grad_norm": 0.3948962688446045, |
| "learning_rate": 0.00010308852387405208, |
| "loss": 1.715940284729004, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.14210526315789473, |
| "grad_norm": 0.3921595513820648, |
| "learning_rate": 0.00010302040785044425, |
| "loss": 1.6944934844970703, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.1431578947368421, |
| "grad_norm": 0.3857240676879883, |
| "learning_rate": 0.00010295172673680234, |
| "loss": 1.6900419235229491, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.14421052631578948, |
| "grad_norm": 0.38249680399894714, |
| "learning_rate": 0.00010288248131686406, |
| "loss": 1.7138862609863281, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.14526315789473684, |
| "grad_norm": 0.40845534205436707, |
| "learning_rate": 0.00010281267238080664, |
| "loss": 1.7212867736816406, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.1463157894736842, |
| "grad_norm": 0.3911115229129791, |
| "learning_rate": 0.00010274230072523764, |
| "loss": 1.7087575912475585, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.14736842105263157, |
| "grad_norm": 0.3967211842536926, |
| "learning_rate": 0.00010267136715318605, |
| "loss": 1.675175094604492, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.14842105263157895, |
| "grad_norm": 0.3820992410182953, |
| "learning_rate": 0.00010259987247409298, |
| "loss": 1.665155792236328, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.14947368421052631, |
| "grad_norm": 0.40317046642303467, |
| "learning_rate": 0.00010252781750380252, |
| "loss": 1.6777839660644531, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.15052631578947367, |
| "grad_norm": 0.39026641845703125, |
| "learning_rate": 0.00010245520306455232, |
| "loss": 1.6641407012939453, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.15157894736842106, |
| "grad_norm": 0.38703930377960205, |
| "learning_rate": 0.00010238202998496432, |
| "loss": 1.7006916046142577, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.15263157894736842, |
| "grad_norm": 0.3920949697494507, |
| "learning_rate": 0.00010230829910003525, |
| "loss": 1.6237125396728516, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.15368421052631578, |
| "grad_norm": 0.40310102701187134, |
| "learning_rate": 0.00010223401125112709, |
| "loss": 1.693703842163086, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.15473684210526314, |
| "grad_norm": 0.3895237147808075, |
| "learning_rate": 0.00010215916728595746, |
| "loss": 1.6554393768310547, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.15578947368421053, |
| "grad_norm": 0.3830355703830719, |
| "learning_rate": 0.00010208376805858997, |
| "loss": 1.6817665100097656, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.1568421052631579, |
| "grad_norm": 0.4044099450111389, |
| "learning_rate": 0.00010200781442942451, |
| "loss": 1.740530776977539, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.15789473684210525, |
| "grad_norm": 0.37278082966804504, |
| "learning_rate": 0.00010193130726518736, |
| "loss": 1.7269683837890626, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.15894736842105264, |
| "grad_norm": 0.3909358084201813, |
| "learning_rate": 0.00010185424743892131, |
| "loss": 1.674229049682617, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.3877439796924591, |
| "learning_rate": 0.00010177663582997574, |
| "loss": 1.6566276550292969, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.16105263157894736, |
| "grad_norm": 0.3673596978187561, |
| "learning_rate": 0.00010169847332399658, |
| "loss": 1.6969722747802733, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.16210526315789472, |
| "grad_norm": 0.428408145904541, |
| "learning_rate": 0.00010161976081291614, |
| "loss": 1.6617691040039062, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.1631578947368421, |
| "grad_norm": 0.38442328572273254, |
| "learning_rate": 0.00010154049919494305, |
| "loss": 1.7180919647216797, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.16421052631578947, |
| "grad_norm": 0.41423359513282776, |
| "learning_rate": 0.00010146068937455184, |
| "loss": 1.7110111236572265, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.16526315789473683, |
| "grad_norm": 0.3815020024776459, |
| "learning_rate": 0.00010138033226247282, |
| "loss": 1.6620532989501953, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.16631578947368422, |
| "grad_norm": 0.38987597823143005, |
| "learning_rate": 0.00010129942877568153, |
| "loss": 1.6376758575439454, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.16736842105263158, |
| "grad_norm": 0.37103158235549927, |
| "learning_rate": 0.00010121797983738831, |
| "loss": 1.6269058227539062, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.16842105263157894, |
| "grad_norm": 0.39582741260528564, |
| "learning_rate": 0.00010113598637702785, |
| "loss": 1.6544437408447266, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.1694736842105263, |
| "grad_norm": 0.3875832259654999, |
| "learning_rate": 0.0001010534493302485, |
| "loss": 1.69503173828125, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.1705263157894737, |
| "grad_norm": 0.40506550669670105, |
| "learning_rate": 0.00010097036963890156, |
| "loss": 1.6826278686523437, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.17157894736842105, |
| "grad_norm": 0.39827048778533936, |
| "learning_rate": 0.00010088674825103067, |
| "loss": 1.6500736236572267, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.1726315789473684, |
| "grad_norm": 0.3786768913269043, |
| "learning_rate": 0.00010080258612086083, |
| "loss": 1.6809326171875, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.1736842105263158, |
| "grad_norm": 0.40326225757598877, |
| "learning_rate": 0.00010071788420878764, |
| "loss": 1.7387603759765624, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.17473684210526316, |
| "grad_norm": 0.3733818829059601, |
| "learning_rate": 0.00010063264348136629, |
| "loss": 1.6930301666259766, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.17578947368421052, |
| "grad_norm": 0.4019014239311218, |
| "learning_rate": 0.00010054686491130048, |
| "loss": 1.665353012084961, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.17684210526315788, |
| "grad_norm": 0.3994007110595703, |
| "learning_rate": 0.00010046054947743142, |
| "loss": 1.7481708526611328, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.17789473684210527, |
| "grad_norm": 0.40046176314353943, |
| "learning_rate": 0.00010037369816472658, |
| "loss": 1.6684654235839844, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.17894736842105263, |
| "grad_norm": 0.39062178134918213, |
| "learning_rate": 0.00010028631196426851, |
| "loss": 1.6636728286743163, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.40030282735824585, |
| "learning_rate": 0.0001001983918732435, |
| "loss": 1.6382123947143554, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.18105263157894738, |
| "grad_norm": 0.38396012783050537, |
| "learning_rate": 0.00010010993889493013, |
| "loss": 1.6094409942626953, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.18210526315789474, |
| "grad_norm": 0.3969299793243408, |
| "learning_rate": 0.000100020954038688, |
| "loss": 1.6550315856933593, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.1831578947368421, |
| "grad_norm": 0.39174884557724, |
| "learning_rate": 9.993143831994603e-05, |
| "loss": 1.7123249053955079, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.18421052631578946, |
| "grad_norm": 0.38760584592819214, |
| "learning_rate": 9.984139276019098e-05, |
| "loss": 1.6742156982421874, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.18526315789473685, |
| "grad_norm": 0.3817841410636902, |
| "learning_rate": 9.975081838695576e-05, |
| "loss": 1.641263771057129, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.1863157894736842, |
| "grad_norm": 0.4085705578327179, |
| "learning_rate": 9.965971623380768e-05, |
| "loss": 1.7673213958740235, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.18736842105263157, |
| "grad_norm": 0.38965287804603577, |
| "learning_rate": 9.956808734033671e-05, |
| "loss": 1.770319366455078, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.18842105263157893, |
| "grad_norm": 0.3770400881767273, |
| "learning_rate": 9.947593275214358e-05, |
| "loss": 1.6587142944335938, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.18947368421052632, |
| "grad_norm": 0.40959247946739197, |
| "learning_rate": 9.938325352082786e-05, |
| "loss": 1.6820697784423828, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.19052631578947368, |
| "grad_norm": 0.37764784693717957, |
| "learning_rate": 9.929005070397595e-05, |
| "loss": 1.6965087890625, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.19157894736842104, |
| "grad_norm": 0.37487778067588806, |
| "learning_rate": 9.9196325365149e-05, |
| "loss": 1.6261119842529297, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.19263157894736843, |
| "grad_norm": 0.4048542380332947, |
| "learning_rate": 9.910207857387085e-05, |
| "loss": 1.7076032638549805, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.1936842105263158, |
| "grad_norm": 0.37118133902549744, |
| "learning_rate": 9.90073114056157e-05, |
| "loss": 1.70123233795166, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.19473684210526315, |
| "grad_norm": 0.38945528864860535, |
| "learning_rate": 9.891202494179595e-05, |
| "loss": 1.7137296676635743, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.1957894736842105, |
| "grad_norm": 0.39081960916519165, |
| "learning_rate": 9.881622026974978e-05, |
| "loss": 1.6556056976318358, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.1968421052631579, |
| "grad_norm": 0.4000365436077118, |
| "learning_rate": 9.871989848272882e-05, |
| "loss": 1.708022689819336, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.19789473684210526, |
| "grad_norm": 0.38972243666648865, |
| "learning_rate": 9.86230606798856e-05, |
| "loss": 1.6936985015869142, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.19894736842105262, |
| "grad_norm": 0.4023416340351105, |
| "learning_rate": 9.852570796626104e-05, |
| "loss": 1.6013282775878905, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.37790361046791077, |
| "learning_rate": 9.842784145277185e-05, |
| "loss": 1.678757095336914, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.20105263157894737, |
| "grad_norm": 0.4072909653186798, |
| "learning_rate": 9.832946225619782e-05, |
| "loss": 1.6550043106079102, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.20210526315789473, |
| "grad_norm": 0.4222109317779541, |
| "learning_rate": 9.823057149916913e-05, |
| "loss": 1.6794788360595703, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.2031578947368421, |
| "grad_norm": 0.3997038006782532, |
| "learning_rate": 9.813117031015348e-05, |
| "loss": 1.708123779296875, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.20421052631578948, |
| "grad_norm": 0.387678861618042, |
| "learning_rate": 9.803125982344328e-05, |
| "loss": 1.694279098510742, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.20526315789473684, |
| "grad_norm": 0.41388800740242004, |
| "learning_rate": 9.793084117914258e-05, |
| "loss": 1.698614501953125, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.2063157894736842, |
| "grad_norm": 0.38706713914871216, |
| "learning_rate": 9.782991552315424e-05, |
| "loss": 1.702214813232422, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.2073684210526316, |
| "grad_norm": 0.3965074419975281, |
| "learning_rate": 9.772848400716673e-05, |
| "loss": 1.6214000701904296, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.20842105263157895, |
| "grad_norm": 0.39218032360076904, |
| "learning_rate": 9.762654778864099e-05, |
| "loss": 1.681211280822754, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.2094736842105263, |
| "grad_norm": 0.4117305874824524, |
| "learning_rate": 9.752410803079726e-05, |
| "loss": 1.6745601654052735, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 0.3973471224308014, |
| "learning_rate": 9.742116590260185e-05, |
| "loss": 1.6459293365478516, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.21157894736842106, |
| "grad_norm": 0.3847576975822449, |
| "learning_rate": 9.731772257875366e-05, |
| "loss": 1.6581769943237306, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.21263157894736842, |
| "grad_norm": 0.4136882424354553, |
| "learning_rate": 9.721377923967092e-05, |
| "loss": 1.7314947128295899, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.21368421052631578, |
| "grad_norm": 0.37820902466773987, |
| "learning_rate": 9.710933707147764e-05, |
| "loss": 1.7070299148559571, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.21473684210526317, |
| "grad_norm": 0.39630916714668274, |
| "learning_rate": 9.700439726599012e-05, |
| "loss": 1.6553241729736328, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.21578947368421053, |
| "grad_norm": 0.3991798758506775, |
| "learning_rate": 9.68989610207033e-05, |
| "loss": 1.7385829925537108, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.2168421052631579, |
| "grad_norm": 0.4119565188884735, |
| "learning_rate": 9.679302953877712e-05, |
| "loss": 1.71380615234375, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.21789473684210525, |
| "grad_norm": 0.40724804997444153, |
| "learning_rate": 9.66866040290228e-05, |
| "loss": 1.6676467895507812, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.21894736842105264, |
| "grad_norm": 0.4088967442512512, |
| "learning_rate": 9.657968570588905e-05, |
| "loss": 1.674250030517578, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.40387439727783203, |
| "learning_rate": 9.64722757894482e-05, |
| "loss": 1.676458740234375, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.22105263157894736, |
| "grad_norm": 0.4028227925300598, |
| "learning_rate": 9.636437550538226e-05, |
| "loss": 1.6708587646484374, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.22210526315789475, |
| "grad_norm": 0.40027210116386414, |
| "learning_rate": 9.625598608496895e-05, |
| "loss": 1.6314043045043944, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.2231578947368421, |
| "grad_norm": 0.386688768863678, |
| "learning_rate": 9.614710876506763e-05, |
| "loss": 1.725076675415039, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.22421052631578947, |
| "grad_norm": 0.4061787724494934, |
| "learning_rate": 9.603774478810528e-05, |
| "loss": 1.6826349258422852, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.22526315789473683, |
| "grad_norm": 0.40370142459869385, |
| "learning_rate": 9.592789540206218e-05, |
| "loss": 1.649374771118164, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.22631578947368422, |
| "grad_norm": 0.40586093068122864, |
| "learning_rate": 9.581756186045777e-05, |
| "loss": 1.6614540100097657, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.22736842105263158, |
| "grad_norm": 0.3933681547641754, |
| "learning_rate": 9.570674542233628e-05, |
| "loss": 1.6946598052978517, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.22842105263157894, |
| "grad_norm": 0.3825010359287262, |
| "learning_rate": 9.559544735225242e-05, |
| "loss": 1.6574283599853517, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.2294736842105263, |
| "grad_norm": 0.4000436067581177, |
| "learning_rate": 9.548366892025693e-05, |
| "loss": 1.673634910583496, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.2305263157894737, |
| "grad_norm": 0.3942500054836273, |
| "learning_rate": 9.537141140188206e-05, |
| "loss": 1.621174430847168, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.23157894736842105, |
| "grad_norm": 0.3846987783908844, |
| "learning_rate": 9.525867607812708e-05, |
| "loss": 1.6244104385375977, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.2326315789473684, |
| "grad_norm": 0.38483455777168274, |
| "learning_rate": 9.514546423544357e-05, |
| "loss": 1.687708282470703, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.2336842105263158, |
| "grad_norm": 0.4134112000465393, |
| "learning_rate": 9.503177716572082e-05, |
| "loss": 1.7054229736328126, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.23473684210526316, |
| "grad_norm": 0.3780292868614197, |
| "learning_rate": 9.491761616627101e-05, |
| "loss": 1.6283729553222657, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.23578947368421052, |
| "grad_norm": 0.40246784687042236, |
| "learning_rate": 9.480298253981456e-05, |
| "loss": 1.7036407470703125, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.23684210526315788, |
| "grad_norm": 0.4002091884613037, |
| "learning_rate": 9.468787759446502e-05, |
| "loss": 1.7064756393432616, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.23789473684210527, |
| "grad_norm": 0.40926146507263184, |
| "learning_rate": 9.457230264371439e-05, |
| "loss": 1.6858642578125, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.23894736842105263, |
| "grad_norm": 0.41373902559280396, |
| "learning_rate": 9.445625900641796e-05, |
| "loss": 1.655508804321289, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.38966718316078186, |
| "learning_rate": 9.433974800677935e-05, |
| "loss": 1.6741256713867188, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.24105263157894738, |
| "grad_norm": 0.4069412648677826, |
| "learning_rate": 9.422277097433537e-05, |
| "loss": 1.6685916900634765, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.24210526315789474, |
| "grad_norm": 0.3916907012462616, |
| "learning_rate": 9.410532924394083e-05, |
| "loss": 1.6491849899291993, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.2431578947368421, |
| "grad_norm": 0.39959436655044556, |
| "learning_rate": 9.398742415575336e-05, |
| "loss": 1.670114517211914, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.24421052631578946, |
| "grad_norm": 0.3950902223587036, |
| "learning_rate": 9.386905705521803e-05, |
| "loss": 1.6907678604125977, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.24526315789473685, |
| "grad_norm": 0.38667526841163635, |
| "learning_rate": 9.375022929305213e-05, |
| "loss": 1.669590377807617, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.2463157894736842, |
| "grad_norm": 0.39125263690948486, |
| "learning_rate": 9.363094222522958e-05, |
| "loss": 1.6502418518066406, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.24736842105263157, |
| "grad_norm": 0.38178369402885437, |
| "learning_rate": 9.351119721296566e-05, |
| "loss": 1.7035490036010743, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.24842105263157896, |
| "grad_norm": 0.37467339634895325, |
| "learning_rate": 9.339099562270128e-05, |
| "loss": 1.6536640167236327, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.24947368421052632, |
| "grad_norm": 0.41233041882514954, |
| "learning_rate": 9.327033882608754e-05, |
| "loss": 1.6268924713134765, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.2505263157894737, |
| "grad_norm": 0.3746933937072754, |
| "learning_rate": 9.314922819996997e-05, |
| "loss": 1.6240985870361329, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.25157894736842107, |
| "grad_norm": 0.3932549059391022, |
| "learning_rate": 9.302766512637293e-05, |
| "loss": 1.6809700012207032, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.25263157894736843, |
| "grad_norm": 0.4058087468147278, |
| "learning_rate": 9.290565099248368e-05, |
| "loss": 1.6474214553833009, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.2536842105263158, |
| "grad_norm": 0.3873753547668457, |
| "learning_rate": 9.278318719063673e-05, |
| "loss": 1.6398870468139648, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.25473684210526315, |
| "grad_norm": 0.41126886010169983, |
| "learning_rate": 9.26602751182978e-05, |
| "loss": 1.6111644744873046, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.2557894736842105, |
| "grad_norm": 0.40002816915512085, |
| "learning_rate": 9.2536916178048e-05, |
| "loss": 1.6024229049682617, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.25684210526315787, |
| "grad_norm": 0.4194015562534332, |
| "learning_rate": 9.241311177756771e-05, |
| "loss": 1.6467687606811523, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.2578947368421053, |
| "grad_norm": 0.4181770980358124, |
| "learning_rate": 9.228886332962062e-05, |
| "loss": 1.6439130783081055, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.25894736842105265, |
| "grad_norm": 0.40925332903862, |
| "learning_rate": 9.216417225203754e-05, |
| "loss": 1.6347824096679688, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.40195897221565247, |
| "learning_rate": 9.203903996770019e-05, |
| "loss": 1.6572818756103516, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.26105263157894737, |
| "grad_norm": 0.4277157485485077, |
| "learning_rate": 9.191346790452509e-05, |
| "loss": 1.6013570785522462, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.26210526315789473, |
| "grad_norm": 0.3951636552810669, |
| "learning_rate": 9.178745749544716e-05, |
| "loss": 1.694039535522461, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 0.3961932957172394, |
| "learning_rate": 9.166101017840337e-05, |
| "loss": 1.6311038970947265, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.26421052631578945, |
| "grad_norm": 0.40256279706954956, |
| "learning_rate": 9.15341273963164e-05, |
| "loss": 1.7131736755371094, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.26526315789473687, |
| "grad_norm": 0.40076208114624023, |
| "learning_rate": 9.14068105970781e-05, |
| "loss": 1.659266471862793, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.26631578947368423, |
| "grad_norm": 0.39892420172691345, |
| "learning_rate": 9.127906123353305e-05, |
| "loss": 1.6891080856323242, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.2673684210526316, |
| "grad_norm": 0.39453125, |
| "learning_rate": 9.115088076346184e-05, |
| "loss": 1.6869060516357421, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.26842105263157895, |
| "grad_norm": 0.3876430094242096, |
| "learning_rate": 9.102227064956465e-05, |
| "loss": 1.623502540588379, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.2694736842105263, |
| "grad_norm": 0.3828693628311157, |
| "learning_rate": 9.08932323594443e-05, |
| "loss": 1.6787071228027344, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.27052631578947367, |
| "grad_norm": 0.3757915198802948, |
| "learning_rate": 9.076376736558976e-05, |
| "loss": 1.7229637145996093, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.27157894736842103, |
| "grad_norm": 0.3994489312171936, |
| "learning_rate": 9.063387714535916e-05, |
| "loss": 1.6279123306274415, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.27263157894736845, |
| "grad_norm": 0.40050971508026123, |
| "learning_rate": 9.0503563180963e-05, |
| "loss": 1.667708969116211, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.2736842105263158, |
| "grad_norm": 0.4005604684352875, |
| "learning_rate": 9.037282695944726e-05, |
| "loss": 1.6468616485595704, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.27473684210526317, |
| "grad_norm": 0.40057310461997986, |
| "learning_rate": 9.024166997267636e-05, |
| "loss": 1.6907684326171875, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.27578947368421053, |
| "grad_norm": 0.4074793756008148, |
| "learning_rate": 9.011009371731623e-05, |
| "loss": 1.6792390823364258, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.2768421052631579, |
| "grad_norm": 0.4014405310153961, |
| "learning_rate": 8.997809969481715e-05, |
| "loss": 1.640324592590332, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.27789473684210525, |
| "grad_norm": 0.42860186100006104, |
| "learning_rate": 8.984568941139665e-05, |
| "loss": 1.6390762329101562, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.2789473684210526, |
| "grad_norm": 0.41278424859046936, |
| "learning_rate": 8.971286437802235e-05, |
| "loss": 1.7043113708496094, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.38656142354011536, |
| "learning_rate": 8.957962611039464e-05, |
| "loss": 1.7256532669067384, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.2810526315789474, |
| "grad_norm": 0.3984103202819824, |
| "learning_rate": 8.944597612892944e-05, |
| "loss": 1.6301074981689454, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.28210526315789475, |
| "grad_norm": 0.3937322795391083, |
| "learning_rate": 8.93119159587409e-05, |
| "loss": 1.6612771987915038, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.2831578947368421, |
| "grad_norm": 0.39241543412208557, |
| "learning_rate": 8.917744712962387e-05, |
| "loss": 1.6962703704833983, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.28421052631578947, |
| "grad_norm": 0.407466858625412, |
| "learning_rate": 8.904257117603653e-05, |
| "loss": 1.721807861328125, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.28526315789473683, |
| "grad_norm": 0.3965199589729309, |
| "learning_rate": 8.890728963708288e-05, |
| "loss": 1.6854072570800782, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.2863157894736842, |
| "grad_norm": 0.3866688013076782, |
| "learning_rate": 8.877160405649515e-05, |
| "loss": 1.678403663635254, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.2873684210526316, |
| "grad_norm": 0.40115654468536377, |
| "learning_rate": 8.863551598261618e-05, |
| "loss": 1.688330078125, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.28842105263157897, |
| "grad_norm": 0.41881707310676575, |
| "learning_rate": 8.849902696838176e-05, |
| "loss": 1.685501480102539, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.2894736842105263, |
| "grad_norm": 0.3956238329410553, |
| "learning_rate": 8.836213857130296e-05, |
| "loss": 1.6521308898925782, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.2905263157894737, |
| "grad_norm": 0.3809671700000763, |
| "learning_rate": 8.822485235344825e-05, |
| "loss": 1.6597816467285156, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.29157894736842105, |
| "grad_norm": 0.39534077048301697, |
| "learning_rate": 8.808716988142575e-05, |
| "loss": 1.6627084732055664, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.2926315789473684, |
| "grad_norm": 0.37715721130371094, |
| "learning_rate": 8.794909272636537e-05, |
| "loss": 1.6618637084960937, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.29368421052631577, |
| "grad_norm": 0.4065514802932739, |
| "learning_rate": 8.781062246390083e-05, |
| "loss": 1.6399276733398438, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.29473684210526313, |
| "grad_norm": 0.3923916220664978, |
| "learning_rate": 8.767176067415169e-05, |
| "loss": 1.668557357788086, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.29578947368421055, |
| "grad_norm": 0.3970358967781067, |
| "learning_rate": 8.75325089417053e-05, |
| "loss": 1.6664169311523438, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.2968421052631579, |
| "grad_norm": 0.4063076078891754, |
| "learning_rate": 8.739286885559882e-05, |
| "loss": 1.718800163269043, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.29789473684210527, |
| "grad_norm": 0.41235899925231934, |
| "learning_rate": 8.725284200930096e-05, |
| "loss": 1.6484018325805665, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.29894736842105263, |
| "grad_norm": 0.41001883149147034, |
| "learning_rate": 8.711243000069387e-05, |
| "loss": 1.6729150772094727, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.40411022305488586, |
| "learning_rate": 8.697163443205486e-05, |
| "loss": 1.6615083694458008, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.30105263157894735, |
| "grad_norm": 0.3862515389919281, |
| "learning_rate": 8.683045691003816e-05, |
| "loss": 1.6196592330932618, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.3021052631578947, |
| "grad_norm": 0.385047972202301, |
| "learning_rate": 8.668889904565657e-05, |
| "loss": 1.6499458312988282, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.3031578947368421, |
| "grad_norm": 0.385885626077652, |
| "learning_rate": 8.654696245426309e-05, |
| "loss": 1.6544832229614257, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.3042105263157895, |
| "grad_norm": 0.39182907342910767, |
| "learning_rate": 8.640464875553244e-05, |
| "loss": 1.6151403427124023, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.30526315789473685, |
| "grad_norm": 0.37692710757255554, |
| "learning_rate": 8.626195957344259e-05, |
| "loss": 1.7116943359375, |
| "step": 2900 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 9500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.884603437744128e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|