SykoLLM-V6.1 / trainer_state.json
SykoSLM's picture
Upload folder using huggingface_hub
1a8c936 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.30526315789473685,
"eval_steps": 500,
"global_step": 2900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0010526315789473684,
"grad_norm": 0.34683918952941895,
"learning_rate": 4.815e-06,
"loss": 1.7081634521484375,
"step": 10
},
{
"epoch": 0.002105263157894737,
"grad_norm": 0.3531605899333954,
"learning_rate": 1.0165e-05,
"loss": 1.656758689880371,
"step": 20
},
{
"epoch": 0.003157894736842105,
"grad_norm": 0.3394385576248169,
"learning_rate": 1.5515e-05,
"loss": 1.633415985107422,
"step": 30
},
{
"epoch": 0.004210526315789474,
"grad_norm": 0.37144365906715393,
"learning_rate": 2.0865e-05,
"loss": 1.6722015380859374,
"step": 40
},
{
"epoch": 0.005263157894736842,
"grad_norm": 0.3785368800163269,
"learning_rate": 2.6215e-05,
"loss": 1.6771835327148437,
"step": 50
},
{
"epoch": 0.00631578947368421,
"grad_norm": 0.3667239546775818,
"learning_rate": 3.1565e-05,
"loss": 1.6657798767089844,
"step": 60
},
{
"epoch": 0.007368421052631579,
"grad_norm": 0.35704323649406433,
"learning_rate": 3.6914999999999995e-05,
"loss": 1.637792205810547,
"step": 70
},
{
"epoch": 0.008421052631578947,
"grad_norm": 0.3614155352115631,
"learning_rate": 4.2265e-05,
"loss": 1.6456287384033204,
"step": 80
},
{
"epoch": 0.009473684210526316,
"grad_norm": 0.3477347195148468,
"learning_rate": 4.7615e-05,
"loss": 1.6376474380493165,
"step": 90
},
{
"epoch": 0.010526315789473684,
"grad_norm": 0.3474464416503906,
"learning_rate": 5.2965e-05,
"loss": 1.6883708953857421,
"step": 100
},
{
"epoch": 0.011578947368421053,
"grad_norm": 0.3608642518520355,
"learning_rate": 5.831500000000001e-05,
"loss": 1.7032821655273438,
"step": 110
},
{
"epoch": 0.01263157894736842,
"grad_norm": 0.35934099555015564,
"learning_rate": 6.3665e-05,
"loss": 1.597799301147461,
"step": 120
},
{
"epoch": 0.01368421052631579,
"grad_norm": 0.38500702381134033,
"learning_rate": 6.9015e-05,
"loss": 1.6558387756347657,
"step": 130
},
{
"epoch": 0.014736842105263158,
"grad_norm": 0.3602914810180664,
"learning_rate": 7.4365e-05,
"loss": 1.6937145233154296,
"step": 140
},
{
"epoch": 0.015789473684210527,
"grad_norm": 0.36331596970558167,
"learning_rate": 7.9715e-05,
"loss": 1.5696943283081055,
"step": 150
},
{
"epoch": 0.016842105263157894,
"grad_norm": 0.3533744215965271,
"learning_rate": 8.5065e-05,
"loss": 1.702765655517578,
"step": 160
},
{
"epoch": 0.017894736842105262,
"grad_norm": 0.3546121120452881,
"learning_rate": 9.0415e-05,
"loss": 1.6325836181640625,
"step": 170
},
{
"epoch": 0.018947368421052633,
"grad_norm": 0.3867342472076416,
"learning_rate": 9.5765e-05,
"loss": 1.636269760131836,
"step": 180
},
{
"epoch": 0.02,
"grad_norm": 0.3859454393386841,
"learning_rate": 0.000101115,
"loss": 1.7112407684326172,
"step": 190
},
{
"epoch": 0.021052631578947368,
"grad_norm": 0.3688015937805176,
"learning_rate": 0.000106465,
"loss": 1.6455875396728517,
"step": 200
},
{
"epoch": 0.022105263157894735,
"grad_norm": 0.3693976402282715,
"learning_rate": 0.00010699975274657343,
"loss": 1.6878833770751953,
"step": 210
},
{
"epoch": 0.023157894736842106,
"grad_norm": 0.3748058080673218,
"learning_rate": 0.00010699889804630456,
"loss": 1.6435226440429687,
"step": 220
},
{
"epoch": 0.024210526315789474,
"grad_norm": 0.3806576430797577,
"learning_rate": 0.00010699743285643286,
"loss": 1.7004669189453125,
"step": 230
},
{
"epoch": 0.02526315789473684,
"grad_norm": 0.3829317092895508,
"learning_rate": 0.00010699535719367796,
"loss": 1.6831859588623046,
"step": 240
},
{
"epoch": 0.02631578947368421,
"grad_norm": 0.4069920480251312,
"learning_rate": 0.00010699267108172577,
"loss": 1.6417667388916015,
"step": 250
},
{
"epoch": 0.02736842105263158,
"grad_norm": 0.37535834312438965,
"learning_rate": 0.00010698937455122825,
"loss": 1.640174102783203,
"step": 260
},
{
"epoch": 0.028421052631578948,
"grad_norm": 0.3992610573768616,
"learning_rate": 0.0001069854676398029,
"loss": 1.6665351867675782,
"step": 270
},
{
"epoch": 0.029473684210526315,
"grad_norm": 0.3680964708328247,
"learning_rate": 0.0001069809503920325,
"loss": 1.7111568450927734,
"step": 280
},
{
"epoch": 0.030526315789473683,
"grad_norm": 0.4049525856971741,
"learning_rate": 0.00010697582285946452,
"loss": 1.6817201614379882,
"step": 290
},
{
"epoch": 0.031578947368421054,
"grad_norm": 0.38598954677581787,
"learning_rate": 0.00010697008510061057,
"loss": 1.6445945739746093,
"step": 300
},
{
"epoch": 0.03263157894736842,
"grad_norm": 0.39688920974731445,
"learning_rate": 0.00010696373718094565,
"loss": 1.688629150390625,
"step": 310
},
{
"epoch": 0.03368421052631579,
"grad_norm": 0.3762621581554413,
"learning_rate": 0.00010695677917290751,
"loss": 1.6273818969726563,
"step": 320
},
{
"epoch": 0.034736842105263156,
"grad_norm": 0.3470601737499237,
"learning_rate": 0.00010694921115589574,
"loss": 1.690780258178711,
"step": 330
},
{
"epoch": 0.035789473684210524,
"grad_norm": 0.38783422112464905,
"learning_rate": 0.00010694103321627094,
"loss": 1.6885700225830078,
"step": 340
},
{
"epoch": 0.03684210526315789,
"grad_norm": 0.3837421238422394,
"learning_rate": 0.00010693224544735366,
"loss": 1.670220184326172,
"step": 350
},
{
"epoch": 0.037894736842105266,
"grad_norm": 0.3634503185749054,
"learning_rate": 0.00010692284794942337,
"loss": 1.6357498168945312,
"step": 360
},
{
"epoch": 0.03894736842105263,
"grad_norm": 0.39452844858169556,
"learning_rate": 0.00010691284082971734,
"loss": 1.6791454315185548,
"step": 370
},
{
"epoch": 0.04,
"grad_norm": 0.38304150104522705,
"learning_rate": 0.00010690222420242937,
"loss": 1.6702400207519532,
"step": 380
},
{
"epoch": 0.04105263157894737,
"grad_norm": 0.3755001723766327,
"learning_rate": 0.00010689099818870848,
"loss": 1.6558124542236328,
"step": 390
},
{
"epoch": 0.042105263157894736,
"grad_norm": 0.3776380121707916,
"learning_rate": 0.0001068791629166576,
"loss": 1.6616518020629882,
"step": 400
},
{
"epoch": 0.0431578947368421,
"grad_norm": 0.3697650134563446,
"learning_rate": 0.00010686671852133208,
"loss": 1.6540897369384766,
"step": 410
},
{
"epoch": 0.04421052631578947,
"grad_norm": 0.3718468248844147,
"learning_rate": 0.00010685366514473802,
"loss": 1.6041250228881836,
"step": 420
},
{
"epoch": 0.045263157894736845,
"grad_norm": 0.38397344946861267,
"learning_rate": 0.0001068400029358309,
"loss": 1.677585983276367,
"step": 430
},
{
"epoch": 0.04631578947368421,
"grad_norm": 0.37290486693382263,
"learning_rate": 0.00010682573205051367,
"loss": 1.6698143005371093,
"step": 440
},
{
"epoch": 0.04736842105263158,
"grad_norm": 0.37734609842300415,
"learning_rate": 0.00010681085265163504,
"loss": 1.6791515350341797,
"step": 450
},
{
"epoch": 0.04842105263157895,
"grad_norm": 0.354443222284317,
"learning_rate": 0.00010679536490898761,
"loss": 1.6450014114379883,
"step": 460
},
{
"epoch": 0.049473684210526316,
"grad_norm": 0.3799300491809845,
"learning_rate": 0.00010677926899930603,
"loss": 1.6635103225708008,
"step": 470
},
{
"epoch": 0.05052631578947368,
"grad_norm": 0.3844967484474182,
"learning_rate": 0.00010676256510626478,
"loss": 1.6978870391845704,
"step": 480
},
{
"epoch": 0.05157894736842105,
"grad_norm": 0.38755500316619873,
"learning_rate": 0.00010674525342047629,
"loss": 1.6842260360717773,
"step": 490
},
{
"epoch": 0.05263157894736842,
"grad_norm": 0.39443737268447876,
"learning_rate": 0.00010672733413948862,
"loss": 1.6408458709716798,
"step": 500
},
{
"epoch": 0.05368421052631579,
"grad_norm": 0.4008043110370636,
"learning_rate": 0.00010670880746778328,
"loss": 1.61962833404541,
"step": 510
},
{
"epoch": 0.05473684210526316,
"grad_norm": 0.3917809724807739,
"learning_rate": 0.00010668967361677283,
"loss": 1.718182373046875,
"step": 520
},
{
"epoch": 0.05578947368421053,
"grad_norm": 0.364409476518631,
"learning_rate": 0.00010666993280479856,
"loss": 1.7204322814941406,
"step": 530
},
{
"epoch": 0.056842105263157895,
"grad_norm": 0.39319396018981934,
"learning_rate": 0.00010664958525712792,
"loss": 1.6448682785034179,
"step": 540
},
{
"epoch": 0.05789473684210526,
"grad_norm": 0.3864227533340454,
"learning_rate": 0.00010662863120595196,
"loss": 1.7400585174560548,
"step": 550
},
{
"epoch": 0.05894736842105263,
"grad_norm": 0.37699612975120544,
"learning_rate": 0.00010660707089038273,
"loss": 1.6591960906982421,
"step": 560
},
{
"epoch": 0.06,
"grad_norm": 0.3808913230895996,
"learning_rate": 0.00010658490455645052,
"loss": 1.63150634765625,
"step": 570
},
{
"epoch": 0.061052631578947365,
"grad_norm": 0.38882365822792053,
"learning_rate": 0.00010656213245710098,
"loss": 1.6896860122680664,
"step": 580
},
{
"epoch": 0.06210526315789474,
"grad_norm": 0.3772079050540924,
"learning_rate": 0.0001065387548521924,
"loss": 1.7085845947265625,
"step": 590
},
{
"epoch": 0.06315789473684211,
"grad_norm": 0.37180712819099426,
"learning_rate": 0.00010651477200849263,
"loss": 1.7532657623291015,
"step": 600
},
{
"epoch": 0.06421052631578947,
"grad_norm": 0.3878546357154846,
"learning_rate": 0.00010649018419967597,
"loss": 1.6636667251586914,
"step": 610
},
{
"epoch": 0.06526315789473684,
"grad_norm": 0.39751365780830383,
"learning_rate": 0.00010646499170632023,
"loss": 1.6579233169555665,
"step": 620
},
{
"epoch": 0.06631578947368422,
"grad_norm": 0.3831867277622223,
"learning_rate": 0.00010643919481590337,
"loss": 1.6426708221435546,
"step": 630
},
{
"epoch": 0.06736842105263158,
"grad_norm": 0.3749397099018097,
"learning_rate": 0.00010641279382280032,
"loss": 1.7154060363769532,
"step": 640
},
{
"epoch": 0.06842105263157895,
"grad_norm": 0.37839797139167786,
"learning_rate": 0.00010638578902827957,
"loss": 1.7217548370361329,
"step": 650
},
{
"epoch": 0.06947368421052631,
"grad_norm": 0.3703754246234894,
"learning_rate": 0.00010635818074049972,
"loss": 1.7110353469848634,
"step": 660
},
{
"epoch": 0.07052631578947369,
"grad_norm": 0.36747097969055176,
"learning_rate": 0.00010632996927450597,
"loss": 1.651369857788086,
"step": 670
},
{
"epoch": 0.07157894736842105,
"grad_norm": 0.36606892943382263,
"learning_rate": 0.00010630115495222664,
"loss": 1.6909339904785157,
"step": 680
},
{
"epoch": 0.07263157894736842,
"grad_norm": 0.3871472179889679,
"learning_rate": 0.00010627173810246927,
"loss": 1.6740509033203126,
"step": 690
},
{
"epoch": 0.07368421052631578,
"grad_norm": 0.3820892572402954,
"learning_rate": 0.00010624171906091708,
"loss": 1.7049301147460938,
"step": 700
},
{
"epoch": 0.07473684210526316,
"grad_norm": 0.38060277700424194,
"learning_rate": 0.00010621109817012501,
"loss": 1.7255819320678711,
"step": 710
},
{
"epoch": 0.07578947368421053,
"grad_norm": 0.37024298310279846,
"learning_rate": 0.00010617987577951588,
"loss": 1.707390594482422,
"step": 720
},
{
"epoch": 0.07684210526315789,
"grad_norm": 0.3976726233959198,
"learning_rate": 0.0001061480522453764,
"loss": 1.6445907592773437,
"step": 730
},
{
"epoch": 0.07789473684210527,
"grad_norm": 0.3904809057712555,
"learning_rate": 0.00010611562793085301,
"loss": 1.7427913665771484,
"step": 740
},
{
"epoch": 0.07894736842105263,
"grad_norm": 0.37776583433151245,
"learning_rate": 0.00010608260320594787,
"loss": 1.6211050033569336,
"step": 750
},
{
"epoch": 0.08,
"grad_norm": 0.382707804441452,
"learning_rate": 0.00010604897844751458,
"loss": 1.6817436218261719,
"step": 760
},
{
"epoch": 0.08105263157894736,
"grad_norm": 0.3894830048084259,
"learning_rate": 0.00010601475403925381,
"loss": 1.747372817993164,
"step": 770
},
{
"epoch": 0.08210526315789474,
"grad_norm": 0.38454341888427734,
"learning_rate": 0.00010597993037170907,
"loss": 1.667810821533203,
"step": 780
},
{
"epoch": 0.08315789473684211,
"grad_norm": 0.3924828767776489,
"learning_rate": 0.00010594450784226211,
"loss": 1.689559555053711,
"step": 790
},
{
"epoch": 0.08421052631578947,
"grad_norm": 0.390747994184494,
"learning_rate": 0.0001059084868551285,
"loss": 1.687558364868164,
"step": 800
},
{
"epoch": 0.08526315789473685,
"grad_norm": 0.38002100586891174,
"learning_rate": 0.0001058718678213529,
"loss": 1.7372432708740235,
"step": 810
},
{
"epoch": 0.0863157894736842,
"grad_norm": 0.3947979509830475,
"learning_rate": 0.00010583465115880448,
"loss": 1.7141420364379882,
"step": 820
},
{
"epoch": 0.08736842105263158,
"grad_norm": 0.38964593410491943,
"learning_rate": 0.0001057968372921721,
"loss": 1.6732599258422851,
"step": 830
},
{
"epoch": 0.08842105263157894,
"grad_norm": 0.3914567828178406,
"learning_rate": 0.00010575842665295942,
"loss": 1.697699737548828,
"step": 840
},
{
"epoch": 0.08947368421052632,
"grad_norm": 0.3780556917190552,
"learning_rate": 0.00010571941967948013,
"loss": 1.6859580993652343,
"step": 850
},
{
"epoch": 0.09052631578947369,
"grad_norm": 0.3804113268852234,
"learning_rate": 0.00010567981681685271,
"loss": 1.630574607849121,
"step": 860
},
{
"epoch": 0.09157894736842105,
"grad_norm": 0.3920338451862335,
"learning_rate": 0.0001056396185169956,
"loss": 1.701805877685547,
"step": 870
},
{
"epoch": 0.09263157894736843,
"grad_norm": 0.3645232021808624,
"learning_rate": 0.00010559882523862185,
"loss": 1.6626638412475585,
"step": 880
},
{
"epoch": 0.09368421052631579,
"grad_norm": 0.39647483825683594,
"learning_rate": 0.000105557437447234,
"loss": 1.657071876525879,
"step": 890
},
{
"epoch": 0.09473684210526316,
"grad_norm": 0.3784042298793793,
"learning_rate": 0.00010551545561511872,
"loss": 1.6789131164550781,
"step": 900
},
{
"epoch": 0.09578947368421052,
"grad_norm": 0.3799436390399933,
"learning_rate": 0.00010547288022134141,
"loss": 1.6874401092529296,
"step": 910
},
{
"epoch": 0.0968421052631579,
"grad_norm": 0.3979872465133667,
"learning_rate": 0.00010542971175174078,
"loss": 1.7372554779052733,
"step": 920
},
{
"epoch": 0.09789473684210526,
"grad_norm": 0.3869173228740692,
"learning_rate": 0.0001053859506989233,
"loss": 1.6965164184570312,
"step": 930
},
{
"epoch": 0.09894736842105263,
"grad_norm": 0.38553228974342346,
"learning_rate": 0.0001053415975622575,
"loss": 1.6804073333740235,
"step": 940
},
{
"epoch": 0.1,
"grad_norm": 0.37855857610702515,
"learning_rate": 0.00010529665284786835,
"loss": 1.7479766845703124,
"step": 950
},
{
"epoch": 0.10105263157894737,
"grad_norm": 0.36974212527275085,
"learning_rate": 0.00010525111706863153,
"loss": 1.6555421829223633,
"step": 960
},
{
"epoch": 0.10210526315789474,
"grad_norm": 0.3829262852668762,
"learning_rate": 0.00010520499074416742,
"loss": 1.7271907806396485,
"step": 970
},
{
"epoch": 0.1031578947368421,
"grad_norm": 0.3871605396270752,
"learning_rate": 0.0001051582744008353,
"loss": 1.6716243743896484,
"step": 980
},
{
"epoch": 0.10421052631578948,
"grad_norm": 0.3923998475074768,
"learning_rate": 0.00010511096857172731,
"loss": 1.6450519561767578,
"step": 990
},
{
"epoch": 0.10526315789473684,
"grad_norm": 0.38333484530448914,
"learning_rate": 0.00010506307379666238,
"loss": 1.6865043640136719,
"step": 1000
},
{
"epoch": 0.10631578947368421,
"grad_norm": 0.38256773352622986,
"learning_rate": 0.00010501459062218,
"loss": 1.6601579666137696,
"step": 1010
},
{
"epoch": 0.10736842105263159,
"grad_norm": 0.3737237751483917,
"learning_rate": 0.00010496551960153409,
"loss": 1.6208690643310546,
"step": 1020
},
{
"epoch": 0.10842105263157895,
"grad_norm": 0.366969496011734,
"learning_rate": 0.00010491586129468662,
"loss": 1.6808839797973634,
"step": 1030
},
{
"epoch": 0.10947368421052632,
"grad_norm": 0.3720376193523407,
"learning_rate": 0.0001048656162683012,
"loss": 1.6338840484619142,
"step": 1040
},
{
"epoch": 0.11052631578947368,
"grad_norm": 0.39924025535583496,
"learning_rate": 0.00010481478509573669,
"loss": 1.652592086791992,
"step": 1050
},
{
"epoch": 0.11157894736842106,
"grad_norm": 0.37709176540374756,
"learning_rate": 0.00010476336835704059,
"loss": 1.6794198989868163,
"step": 1060
},
{
"epoch": 0.11263157894736842,
"grad_norm": 0.382405161857605,
"learning_rate": 0.00010471136663894244,
"loss": 1.702239990234375,
"step": 1070
},
{
"epoch": 0.11368421052631579,
"grad_norm": 0.3955666720867157,
"learning_rate": 0.00010465878053484715,
"loss": 1.625558090209961,
"step": 1080
},
{
"epoch": 0.11473684210526315,
"grad_norm": 0.3984505534172058,
"learning_rate": 0.0001046056106448282,
"loss": 1.7061031341552735,
"step": 1090
},
{
"epoch": 0.11578947368421053,
"grad_norm": 0.37337619066238403,
"learning_rate": 0.00010455185757562081,
"loss": 1.6474536895751952,
"step": 1100
},
{
"epoch": 0.1168421052631579,
"grad_norm": 0.4265633225440979,
"learning_rate": 0.00010449752194061497,
"loss": 1.6948539733886718,
"step": 1110
},
{
"epoch": 0.11789473684210526,
"grad_norm": 0.39065343141555786,
"learning_rate": 0.0001044426043598485,
"loss": 1.6905693054199218,
"step": 1120
},
{
"epoch": 0.11894736842105263,
"grad_norm": 0.3910517692565918,
"learning_rate": 0.00010438710545999999,
"loss": 1.6512699127197266,
"step": 1130
},
{
"epoch": 0.12,
"grad_norm": 0.41286537051200867,
"learning_rate": 0.00010433102587438154,
"loss": 1.6904163360595703,
"step": 1140
},
{
"epoch": 0.12105263157894737,
"grad_norm": 0.39058077335357666,
"learning_rate": 0.00010427436624293164,
"loss": 1.6889778137207032,
"step": 1150
},
{
"epoch": 0.12210526315789473,
"grad_norm": 0.40376579761505127,
"learning_rate": 0.00010421712721220786,
"loss": 1.6660743713378907,
"step": 1160
},
{
"epoch": 0.1231578947368421,
"grad_norm": 0.4065842628479004,
"learning_rate": 0.00010415930943537937,
"loss": 1.7282680511474608,
"step": 1170
},
{
"epoch": 0.12421052631578948,
"grad_norm": 0.3935592770576477,
"learning_rate": 0.00010410091357221965,
"loss": 1.7208686828613282,
"step": 1180
},
{
"epoch": 0.12526315789473685,
"grad_norm": 0.3769897520542145,
"learning_rate": 0.00010404194028909876,
"loss": 1.6730665206909179,
"step": 1190
},
{
"epoch": 0.12631578947368421,
"grad_norm": 0.37976640462875366,
"learning_rate": 0.00010398239025897598,
"loss": 1.7071300506591798,
"step": 1200
},
{
"epoch": 0.12736842105263158,
"grad_norm": 0.38293200731277466,
"learning_rate": 0.0001039222641613919,
"loss": 1.7225513458251953,
"step": 1210
},
{
"epoch": 0.12842105263157894,
"grad_norm": 0.3943805694580078,
"learning_rate": 0.00010386156268246077,
"loss": 1.6900711059570312,
"step": 1220
},
{
"epoch": 0.12947368421052632,
"grad_norm": 0.402694970369339,
"learning_rate": 0.00010380028651486271,
"loss": 1.6741355895996093,
"step": 1230
},
{
"epoch": 0.13052631578947368,
"grad_norm": 0.4034770429134369,
"learning_rate": 0.00010373843635783572,
"loss": 1.7251928329467774,
"step": 1240
},
{
"epoch": 0.13157894736842105,
"grad_norm": 0.4223957359790802,
"learning_rate": 0.00010367601291716777,
"loss": 1.7350204467773438,
"step": 1250
},
{
"epoch": 0.13263157894736843,
"grad_norm": 0.3636983633041382,
"learning_rate": 0.0001036130169051887,
"loss": 1.6685359954833985,
"step": 1260
},
{
"epoch": 0.1336842105263158,
"grad_norm": 0.36913859844207764,
"learning_rate": 0.00010354944904076209,
"loss": 1.6918949127197265,
"step": 1270
},
{
"epoch": 0.13473684210526315,
"grad_norm": 0.3916381597518921,
"learning_rate": 0.00010348531004927711,
"loss": 1.6259313583374024,
"step": 1280
},
{
"epoch": 0.13578947368421052,
"grad_norm": 0.38772350549697876,
"learning_rate": 0.00010342060066264016,
"loss": 1.7148677825927734,
"step": 1290
},
{
"epoch": 0.1368421052631579,
"grad_norm": 0.38373488187789917,
"learning_rate": 0.00010335532161926664,
"loss": 1.6328174591064453,
"step": 1300
},
{
"epoch": 0.13789473684210526,
"grad_norm": 0.3877631723880768,
"learning_rate": 0.00010328947366407237,
"loss": 1.646784210205078,
"step": 1310
},
{
"epoch": 0.13894736842105262,
"grad_norm": 0.39882156252861023,
"learning_rate": 0.00010322305754846519,
"loss": 1.6600376129150392,
"step": 1320
},
{
"epoch": 0.14,
"grad_norm": 0.40457776188850403,
"learning_rate": 0.00010315607403033641,
"loss": 1.669814109802246,
"step": 1330
},
{
"epoch": 0.14105263157894737,
"grad_norm": 0.3948962688446045,
"learning_rate": 0.00010308852387405208,
"loss": 1.715940284729004,
"step": 1340
},
{
"epoch": 0.14210526315789473,
"grad_norm": 0.3921595513820648,
"learning_rate": 0.00010302040785044425,
"loss": 1.6944934844970703,
"step": 1350
},
{
"epoch": 0.1431578947368421,
"grad_norm": 0.3857240676879883,
"learning_rate": 0.00010295172673680234,
"loss": 1.6900419235229491,
"step": 1360
},
{
"epoch": 0.14421052631578948,
"grad_norm": 0.38249680399894714,
"learning_rate": 0.00010288248131686406,
"loss": 1.7138862609863281,
"step": 1370
},
{
"epoch": 0.14526315789473684,
"grad_norm": 0.40845534205436707,
"learning_rate": 0.00010281267238080664,
"loss": 1.7212867736816406,
"step": 1380
},
{
"epoch": 0.1463157894736842,
"grad_norm": 0.3911115229129791,
"learning_rate": 0.00010274230072523764,
"loss": 1.7087575912475585,
"step": 1390
},
{
"epoch": 0.14736842105263157,
"grad_norm": 0.3967211842536926,
"learning_rate": 0.00010267136715318605,
"loss": 1.675175094604492,
"step": 1400
},
{
"epoch": 0.14842105263157895,
"grad_norm": 0.3820992410182953,
"learning_rate": 0.00010259987247409298,
"loss": 1.665155792236328,
"step": 1410
},
{
"epoch": 0.14947368421052631,
"grad_norm": 0.40317046642303467,
"learning_rate": 0.00010252781750380252,
"loss": 1.6777839660644531,
"step": 1420
},
{
"epoch": 0.15052631578947367,
"grad_norm": 0.39026641845703125,
"learning_rate": 0.00010245520306455232,
"loss": 1.6641407012939453,
"step": 1430
},
{
"epoch": 0.15157894736842106,
"grad_norm": 0.38703930377960205,
"learning_rate": 0.00010238202998496432,
"loss": 1.7006916046142577,
"step": 1440
},
{
"epoch": 0.15263157894736842,
"grad_norm": 0.3920949697494507,
"learning_rate": 0.00010230829910003525,
"loss": 1.6237125396728516,
"step": 1450
},
{
"epoch": 0.15368421052631578,
"grad_norm": 0.40310102701187134,
"learning_rate": 0.00010223401125112709,
"loss": 1.693703842163086,
"step": 1460
},
{
"epoch": 0.15473684210526314,
"grad_norm": 0.3895237147808075,
"learning_rate": 0.00010215916728595746,
"loss": 1.6554393768310547,
"step": 1470
},
{
"epoch": 0.15578947368421053,
"grad_norm": 0.3830355703830719,
"learning_rate": 0.00010208376805858997,
"loss": 1.6817665100097656,
"step": 1480
},
{
"epoch": 0.1568421052631579,
"grad_norm": 0.4044099450111389,
"learning_rate": 0.00010200781442942451,
"loss": 1.740530776977539,
"step": 1490
},
{
"epoch": 0.15789473684210525,
"grad_norm": 0.37278082966804504,
"learning_rate": 0.00010193130726518736,
"loss": 1.7269683837890626,
"step": 1500
},
{
"epoch": 0.15894736842105264,
"grad_norm": 0.3909358084201813,
"learning_rate": 0.00010185424743892131,
"loss": 1.674229049682617,
"step": 1510
},
{
"epoch": 0.16,
"grad_norm": 0.3877439796924591,
"learning_rate": 0.00010177663582997574,
"loss": 1.6566276550292969,
"step": 1520
},
{
"epoch": 0.16105263157894736,
"grad_norm": 0.3673596978187561,
"learning_rate": 0.00010169847332399658,
"loss": 1.6969722747802733,
"step": 1530
},
{
"epoch": 0.16210526315789472,
"grad_norm": 0.428408145904541,
"learning_rate": 0.00010161976081291614,
"loss": 1.6617691040039062,
"step": 1540
},
{
"epoch": 0.1631578947368421,
"grad_norm": 0.38442328572273254,
"learning_rate": 0.00010154049919494305,
"loss": 1.7180919647216797,
"step": 1550
},
{
"epoch": 0.16421052631578947,
"grad_norm": 0.41423359513282776,
"learning_rate": 0.00010146068937455184,
"loss": 1.7110111236572265,
"step": 1560
},
{
"epoch": 0.16526315789473683,
"grad_norm": 0.3815020024776459,
"learning_rate": 0.00010138033226247282,
"loss": 1.6620532989501953,
"step": 1570
},
{
"epoch": 0.16631578947368422,
"grad_norm": 0.38987597823143005,
"learning_rate": 0.00010129942877568153,
"loss": 1.6376758575439454,
"step": 1580
},
{
"epoch": 0.16736842105263158,
"grad_norm": 0.37103158235549927,
"learning_rate": 0.00010121797983738831,
"loss": 1.6269058227539062,
"step": 1590
},
{
"epoch": 0.16842105263157894,
"grad_norm": 0.39582741260528564,
"learning_rate": 0.00010113598637702785,
"loss": 1.6544437408447266,
"step": 1600
},
{
"epoch": 0.1694736842105263,
"grad_norm": 0.3875832259654999,
"learning_rate": 0.0001010534493302485,
"loss": 1.69503173828125,
"step": 1610
},
{
"epoch": 0.1705263157894737,
"grad_norm": 0.40506550669670105,
"learning_rate": 0.00010097036963890156,
"loss": 1.6826278686523437,
"step": 1620
},
{
"epoch": 0.17157894736842105,
"grad_norm": 0.39827048778533936,
"learning_rate": 0.00010088674825103067,
"loss": 1.6500736236572267,
"step": 1630
},
{
"epoch": 0.1726315789473684,
"grad_norm": 0.3786768913269043,
"learning_rate": 0.00010080258612086083,
"loss": 1.6809326171875,
"step": 1640
},
{
"epoch": 0.1736842105263158,
"grad_norm": 0.40326225757598877,
"learning_rate": 0.00010071788420878764,
"loss": 1.7387603759765624,
"step": 1650
},
{
"epoch": 0.17473684210526316,
"grad_norm": 0.3733818829059601,
"learning_rate": 0.00010063264348136629,
"loss": 1.6930301666259766,
"step": 1660
},
{
"epoch": 0.17578947368421052,
"grad_norm": 0.4019014239311218,
"learning_rate": 0.00010054686491130048,
"loss": 1.665353012084961,
"step": 1670
},
{
"epoch": 0.17684210526315788,
"grad_norm": 0.3994007110595703,
"learning_rate": 0.00010046054947743142,
"loss": 1.7481708526611328,
"step": 1680
},
{
"epoch": 0.17789473684210527,
"grad_norm": 0.40046176314353943,
"learning_rate": 0.00010037369816472658,
"loss": 1.6684654235839844,
"step": 1690
},
{
"epoch": 0.17894736842105263,
"grad_norm": 0.39062178134918213,
"learning_rate": 0.00010028631196426851,
"loss": 1.6636728286743163,
"step": 1700
},
{
"epoch": 0.18,
"grad_norm": 0.40030282735824585,
"learning_rate": 0.0001001983918732435,
"loss": 1.6382123947143554,
"step": 1710
},
{
"epoch": 0.18105263157894738,
"grad_norm": 0.38396012783050537,
"learning_rate": 0.00010010993889493013,
"loss": 1.6094409942626953,
"step": 1720
},
{
"epoch": 0.18210526315789474,
"grad_norm": 0.3969299793243408,
"learning_rate": 0.000100020954038688,
"loss": 1.6550315856933593,
"step": 1730
},
{
"epoch": 0.1831578947368421,
"grad_norm": 0.39174884557724,
"learning_rate": 9.993143831994603e-05,
"loss": 1.7123249053955079,
"step": 1740
},
{
"epoch": 0.18421052631578946,
"grad_norm": 0.38760584592819214,
"learning_rate": 9.984139276019098e-05,
"loss": 1.6742156982421874,
"step": 1750
},
{
"epoch": 0.18526315789473685,
"grad_norm": 0.3817841410636902,
"learning_rate": 9.975081838695576e-05,
"loss": 1.641263771057129,
"step": 1760
},
{
"epoch": 0.1863157894736842,
"grad_norm": 0.4085705578327179,
"learning_rate": 9.965971623380768e-05,
"loss": 1.7673213958740235,
"step": 1770
},
{
"epoch": 0.18736842105263157,
"grad_norm": 0.38965287804603577,
"learning_rate": 9.956808734033671e-05,
"loss": 1.770319366455078,
"step": 1780
},
{
"epoch": 0.18842105263157893,
"grad_norm": 0.3770400881767273,
"learning_rate": 9.947593275214358e-05,
"loss": 1.6587142944335938,
"step": 1790
},
{
"epoch": 0.18947368421052632,
"grad_norm": 0.40959247946739197,
"learning_rate": 9.938325352082786e-05,
"loss": 1.6820697784423828,
"step": 1800
},
{
"epoch": 0.19052631578947368,
"grad_norm": 0.37764784693717957,
"learning_rate": 9.929005070397595e-05,
"loss": 1.6965087890625,
"step": 1810
},
{
"epoch": 0.19157894736842104,
"grad_norm": 0.37487778067588806,
"learning_rate": 9.9196325365149e-05,
"loss": 1.6261119842529297,
"step": 1820
},
{
"epoch": 0.19263157894736843,
"grad_norm": 0.4048542380332947,
"learning_rate": 9.910207857387085e-05,
"loss": 1.7076032638549805,
"step": 1830
},
{
"epoch": 0.1936842105263158,
"grad_norm": 0.37118133902549744,
"learning_rate": 9.90073114056157e-05,
"loss": 1.70123233795166,
"step": 1840
},
{
"epoch": 0.19473684210526315,
"grad_norm": 0.38945528864860535,
"learning_rate": 9.891202494179595e-05,
"loss": 1.7137296676635743,
"step": 1850
},
{
"epoch": 0.1957894736842105,
"grad_norm": 0.39081960916519165,
"learning_rate": 9.881622026974978e-05,
"loss": 1.6556056976318358,
"step": 1860
},
{
"epoch": 0.1968421052631579,
"grad_norm": 0.4000365436077118,
"learning_rate": 9.871989848272882e-05,
"loss": 1.708022689819336,
"step": 1870
},
{
"epoch": 0.19789473684210526,
"grad_norm": 0.38972243666648865,
"learning_rate": 9.86230606798856e-05,
"loss": 1.6936985015869142,
"step": 1880
},
{
"epoch": 0.19894736842105262,
"grad_norm": 0.4023416340351105,
"learning_rate": 9.852570796626104e-05,
"loss": 1.6013282775878905,
"step": 1890
},
{
"epoch": 0.2,
"grad_norm": 0.37790361046791077,
"learning_rate": 9.842784145277185e-05,
"loss": 1.678757095336914,
"step": 1900
},
{
"epoch": 0.20105263157894737,
"grad_norm": 0.4072909653186798,
"learning_rate": 9.832946225619782e-05,
"loss": 1.6550043106079102,
"step": 1910
},
{
"epoch": 0.20210526315789473,
"grad_norm": 0.4222109317779541,
"learning_rate": 9.823057149916913e-05,
"loss": 1.6794788360595703,
"step": 1920
},
{
"epoch": 0.2031578947368421,
"grad_norm": 0.3997038006782532,
"learning_rate": 9.813117031015348e-05,
"loss": 1.708123779296875,
"step": 1930
},
{
"epoch": 0.20421052631578948,
"grad_norm": 0.387678861618042,
"learning_rate": 9.803125982344328e-05,
"loss": 1.694279098510742,
"step": 1940
},
{
"epoch": 0.20526315789473684,
"grad_norm": 0.41388800740242004,
"learning_rate": 9.793084117914258e-05,
"loss": 1.698614501953125,
"step": 1950
},
{
"epoch": 0.2063157894736842,
"grad_norm": 0.38706713914871216,
"learning_rate": 9.782991552315424e-05,
"loss": 1.702214813232422,
"step": 1960
},
{
"epoch": 0.2073684210526316,
"grad_norm": 0.3965074419975281,
"learning_rate": 9.772848400716673e-05,
"loss": 1.6214000701904296,
"step": 1970
},
{
"epoch": 0.20842105263157895,
"grad_norm": 0.39218032360076904,
"learning_rate": 9.762654778864099e-05,
"loss": 1.681211280822754,
"step": 1980
},
{
"epoch": 0.2094736842105263,
"grad_norm": 0.4117305874824524,
"learning_rate": 9.752410803079726e-05,
"loss": 1.6745601654052735,
"step": 1990
},
{
"epoch": 0.21052631578947367,
"grad_norm": 0.3973471224308014,
"learning_rate": 9.742116590260185e-05,
"loss": 1.6459293365478516,
"step": 2000
},
{
"epoch": 0.21157894736842106,
"grad_norm": 0.3847576975822449,
"learning_rate": 9.731772257875366e-05,
"loss": 1.6581769943237306,
"step": 2010
},
{
"epoch": 0.21263157894736842,
"grad_norm": 0.4136882424354553,
"learning_rate": 9.721377923967092e-05,
"loss": 1.7314947128295899,
"step": 2020
},
{
"epoch": 0.21368421052631578,
"grad_norm": 0.37820902466773987,
"learning_rate": 9.710933707147764e-05,
"loss": 1.7070299148559571,
"step": 2030
},
{
"epoch": 0.21473684210526317,
"grad_norm": 0.39630916714668274,
"learning_rate": 9.700439726599012e-05,
"loss": 1.6553241729736328,
"step": 2040
},
{
"epoch": 0.21578947368421053,
"grad_norm": 0.3991798758506775,
"learning_rate": 9.68989610207033e-05,
"loss": 1.7385829925537108,
"step": 2050
},
{
"epoch": 0.2168421052631579,
"grad_norm": 0.4119565188884735,
"learning_rate": 9.679302953877712e-05,
"loss": 1.71380615234375,
"step": 2060
},
{
"epoch": 0.21789473684210525,
"grad_norm": 0.40724804997444153,
"learning_rate": 9.66866040290228e-05,
"loss": 1.6676467895507812,
"step": 2070
},
{
"epoch": 0.21894736842105264,
"grad_norm": 0.4088967442512512,
"learning_rate": 9.657968570588905e-05,
"loss": 1.674250030517578,
"step": 2080
},
{
"epoch": 0.22,
"grad_norm": 0.40387439727783203,
"learning_rate": 9.64722757894482e-05,
"loss": 1.676458740234375,
"step": 2090
},
{
"epoch": 0.22105263157894736,
"grad_norm": 0.4028227925300598,
"learning_rate": 9.636437550538226e-05,
"loss": 1.6708587646484374,
"step": 2100
},
{
"epoch": 0.22210526315789475,
"grad_norm": 0.40027210116386414,
"learning_rate": 9.625598608496895e-05,
"loss": 1.6314043045043944,
"step": 2110
},
{
"epoch": 0.2231578947368421,
"grad_norm": 0.386688768863678,
"learning_rate": 9.614710876506763e-05,
"loss": 1.725076675415039,
"step": 2120
},
{
"epoch": 0.22421052631578947,
"grad_norm": 0.4061787724494934,
"learning_rate": 9.603774478810528e-05,
"loss": 1.6826349258422852,
"step": 2130
},
{
"epoch": 0.22526315789473683,
"grad_norm": 0.40370142459869385,
"learning_rate": 9.592789540206218e-05,
"loss": 1.649374771118164,
"step": 2140
},
{
"epoch": 0.22631578947368422,
"grad_norm": 0.40586093068122864,
"learning_rate": 9.581756186045777e-05,
"loss": 1.6614540100097657,
"step": 2150
},
{
"epoch": 0.22736842105263158,
"grad_norm": 0.3933681547641754,
"learning_rate": 9.570674542233628e-05,
"loss": 1.6946598052978517,
"step": 2160
},
{
"epoch": 0.22842105263157894,
"grad_norm": 0.3825010359287262,
"learning_rate": 9.559544735225242e-05,
"loss": 1.6574283599853517,
"step": 2170
},
{
"epoch": 0.2294736842105263,
"grad_norm": 0.4000436067581177,
"learning_rate": 9.548366892025693e-05,
"loss": 1.673634910583496,
"step": 2180
},
{
"epoch": 0.2305263157894737,
"grad_norm": 0.3942500054836273,
"learning_rate": 9.537141140188206e-05,
"loss": 1.621174430847168,
"step": 2190
},
{
"epoch": 0.23157894736842105,
"grad_norm": 0.3846987783908844,
"learning_rate": 9.525867607812708e-05,
"loss": 1.6244104385375977,
"step": 2200
},
{
"epoch": 0.2326315789473684,
"grad_norm": 0.38483455777168274,
"learning_rate": 9.514546423544357e-05,
"loss": 1.687708282470703,
"step": 2210
},
{
"epoch": 0.2336842105263158,
"grad_norm": 0.4134112000465393,
"learning_rate": 9.503177716572082e-05,
"loss": 1.7054229736328126,
"step": 2220
},
{
"epoch": 0.23473684210526316,
"grad_norm": 0.3780292868614197,
"learning_rate": 9.491761616627101e-05,
"loss": 1.6283729553222657,
"step": 2230
},
{
"epoch": 0.23578947368421052,
"grad_norm": 0.40246784687042236,
"learning_rate": 9.480298253981456e-05,
"loss": 1.7036407470703125,
"step": 2240
},
{
"epoch": 0.23684210526315788,
"grad_norm": 0.4002091884613037,
"learning_rate": 9.468787759446502e-05,
"loss": 1.7064756393432616,
"step": 2250
},
{
"epoch": 0.23789473684210527,
"grad_norm": 0.40926146507263184,
"learning_rate": 9.457230264371439e-05,
"loss": 1.6858642578125,
"step": 2260
},
{
"epoch": 0.23894736842105263,
"grad_norm": 0.41373902559280396,
"learning_rate": 9.445625900641796e-05,
"loss": 1.655508804321289,
"step": 2270
},
{
"epoch": 0.24,
"grad_norm": 0.38966718316078186,
"learning_rate": 9.433974800677935e-05,
"loss": 1.6741256713867188,
"step": 2280
},
{
"epoch": 0.24105263157894738,
"grad_norm": 0.4069412648677826,
"learning_rate": 9.422277097433537e-05,
"loss": 1.6685916900634765,
"step": 2290
},
{
"epoch": 0.24210526315789474,
"grad_norm": 0.3916907012462616,
"learning_rate": 9.410532924394083e-05,
"loss": 1.6491849899291993,
"step": 2300
},
{
"epoch": 0.2431578947368421,
"grad_norm": 0.39959436655044556,
"learning_rate": 9.398742415575336e-05,
"loss": 1.670114517211914,
"step": 2310
},
{
"epoch": 0.24421052631578946,
"grad_norm": 0.3950902223587036,
"learning_rate": 9.386905705521803e-05,
"loss": 1.6907678604125977,
"step": 2320
},
{
"epoch": 0.24526315789473685,
"grad_norm": 0.38667526841163635,
"learning_rate": 9.375022929305213e-05,
"loss": 1.669590377807617,
"step": 2330
},
{
"epoch": 0.2463157894736842,
"grad_norm": 0.39125263690948486,
"learning_rate": 9.363094222522958e-05,
"loss": 1.6502418518066406,
"step": 2340
},
{
"epoch": 0.24736842105263157,
"grad_norm": 0.38178369402885437,
"learning_rate": 9.351119721296566e-05,
"loss": 1.7035490036010743,
"step": 2350
},
{
"epoch": 0.24842105263157896,
"grad_norm": 0.37467339634895325,
"learning_rate": 9.339099562270128e-05,
"loss": 1.6536640167236327,
"step": 2360
},
{
"epoch": 0.24947368421052632,
"grad_norm": 0.41233041882514954,
"learning_rate": 9.327033882608754e-05,
"loss": 1.6268924713134765,
"step": 2370
},
{
"epoch": 0.2505263157894737,
"grad_norm": 0.3746933937072754,
"learning_rate": 9.314922819996997e-05,
"loss": 1.6240985870361329,
"step": 2380
},
{
"epoch": 0.25157894736842107,
"grad_norm": 0.3932549059391022,
"learning_rate": 9.302766512637293e-05,
"loss": 1.6809700012207032,
"step": 2390
},
{
"epoch": 0.25263157894736843,
"grad_norm": 0.4058087468147278,
"learning_rate": 9.290565099248368e-05,
"loss": 1.6474214553833009,
"step": 2400
},
{
"epoch": 0.2536842105263158,
"grad_norm": 0.3873753547668457,
"learning_rate": 9.278318719063673e-05,
"loss": 1.6398870468139648,
"step": 2410
},
{
"epoch": 0.25473684210526315,
"grad_norm": 0.41126886010169983,
"learning_rate": 9.26602751182978e-05,
"loss": 1.6111644744873046,
"step": 2420
},
{
"epoch": 0.2557894736842105,
"grad_norm": 0.40002816915512085,
"learning_rate": 9.2536916178048e-05,
"loss": 1.6024229049682617,
"step": 2430
},
{
"epoch": 0.25684210526315787,
"grad_norm": 0.4194015562534332,
"learning_rate": 9.241311177756771e-05,
"loss": 1.6467687606811523,
"step": 2440
},
{
"epoch": 0.2578947368421053,
"grad_norm": 0.4181770980358124,
"learning_rate": 9.228886332962062e-05,
"loss": 1.6439130783081055,
"step": 2450
},
{
"epoch": 0.25894736842105265,
"grad_norm": 0.40925332903862,
"learning_rate": 9.216417225203754e-05,
"loss": 1.6347824096679688,
"step": 2460
},
{
"epoch": 0.26,
"grad_norm": 0.40195897221565247,
"learning_rate": 9.203903996770019e-05,
"loss": 1.6572818756103516,
"step": 2470
},
{
"epoch": 0.26105263157894737,
"grad_norm": 0.4277157485485077,
"learning_rate": 9.191346790452509e-05,
"loss": 1.6013570785522462,
"step": 2480
},
{
"epoch": 0.26210526315789473,
"grad_norm": 0.3951636552810669,
"learning_rate": 9.178745749544716e-05,
"loss": 1.694039535522461,
"step": 2490
},
{
"epoch": 0.2631578947368421,
"grad_norm": 0.3961932957172394,
"learning_rate": 9.166101017840337e-05,
"loss": 1.6311038970947265,
"step": 2500
},
{
"epoch": 0.26421052631578945,
"grad_norm": 0.40256279706954956,
"learning_rate": 9.15341273963164e-05,
"loss": 1.7131736755371094,
"step": 2510
},
{
"epoch": 0.26526315789473687,
"grad_norm": 0.40076208114624023,
"learning_rate": 9.14068105970781e-05,
"loss": 1.659266471862793,
"step": 2520
},
{
"epoch": 0.26631578947368423,
"grad_norm": 0.39892420172691345,
"learning_rate": 9.127906123353305e-05,
"loss": 1.6891080856323242,
"step": 2530
},
{
"epoch": 0.2673684210526316,
"grad_norm": 0.39453125,
"learning_rate": 9.115088076346184e-05,
"loss": 1.6869060516357421,
"step": 2540
},
{
"epoch": 0.26842105263157895,
"grad_norm": 0.3876430094242096,
"learning_rate": 9.102227064956465e-05,
"loss": 1.623502540588379,
"step": 2550
},
{
"epoch": 0.2694736842105263,
"grad_norm": 0.3828693628311157,
"learning_rate": 9.08932323594443e-05,
"loss": 1.6787071228027344,
"step": 2560
},
{
"epoch": 0.27052631578947367,
"grad_norm": 0.3757915198802948,
"learning_rate": 9.076376736558976e-05,
"loss": 1.7229637145996093,
"step": 2570
},
{
"epoch": 0.27157894736842103,
"grad_norm": 0.3994489312171936,
"learning_rate": 9.063387714535916e-05,
"loss": 1.6279123306274415,
"step": 2580
},
{
"epoch": 0.27263157894736845,
"grad_norm": 0.40050971508026123,
"learning_rate": 9.0503563180963e-05,
"loss": 1.667708969116211,
"step": 2590
},
{
"epoch": 0.2736842105263158,
"grad_norm": 0.4005604684352875,
"learning_rate": 9.037282695944726e-05,
"loss": 1.6468616485595704,
"step": 2600
},
{
"epoch": 0.27473684210526317,
"grad_norm": 0.40057310461997986,
"learning_rate": 9.024166997267636e-05,
"loss": 1.6907684326171875,
"step": 2610
},
{
"epoch": 0.27578947368421053,
"grad_norm": 0.4074793756008148,
"learning_rate": 9.011009371731623e-05,
"loss": 1.6792390823364258,
"step": 2620
},
{
"epoch": 0.2768421052631579,
"grad_norm": 0.4014405310153961,
"learning_rate": 8.997809969481715e-05,
"loss": 1.640324592590332,
"step": 2630
},
{
"epoch": 0.27789473684210525,
"grad_norm": 0.42860186100006104,
"learning_rate": 8.984568941139665e-05,
"loss": 1.6390762329101562,
"step": 2640
},
{
"epoch": 0.2789473684210526,
"grad_norm": 0.41278424859046936,
"learning_rate": 8.971286437802235e-05,
"loss": 1.7043113708496094,
"step": 2650
},
{
"epoch": 0.28,
"grad_norm": 0.38656142354011536,
"learning_rate": 8.957962611039464e-05,
"loss": 1.7256532669067384,
"step": 2660
},
{
"epoch": 0.2810526315789474,
"grad_norm": 0.3984103202819824,
"learning_rate": 8.944597612892944e-05,
"loss": 1.6301074981689454,
"step": 2670
},
{
"epoch": 0.28210526315789475,
"grad_norm": 0.3937322795391083,
"learning_rate": 8.93119159587409e-05,
"loss": 1.6612771987915038,
"step": 2680
},
{
"epoch": 0.2831578947368421,
"grad_norm": 0.39241543412208557,
"learning_rate": 8.917744712962387e-05,
"loss": 1.6962703704833983,
"step": 2690
},
{
"epoch": 0.28421052631578947,
"grad_norm": 0.407466858625412,
"learning_rate": 8.904257117603653e-05,
"loss": 1.721807861328125,
"step": 2700
},
{
"epoch": 0.28526315789473683,
"grad_norm": 0.3965199589729309,
"learning_rate": 8.890728963708288e-05,
"loss": 1.6854072570800782,
"step": 2710
},
{
"epoch": 0.2863157894736842,
"grad_norm": 0.3866688013076782,
"learning_rate": 8.877160405649515e-05,
"loss": 1.678403663635254,
"step": 2720
},
{
"epoch": 0.2873684210526316,
"grad_norm": 0.40115654468536377,
"learning_rate": 8.863551598261618e-05,
"loss": 1.688330078125,
"step": 2730
},
{
"epoch": 0.28842105263157897,
"grad_norm": 0.41881707310676575,
"learning_rate": 8.849902696838176e-05,
"loss": 1.685501480102539,
"step": 2740
},
{
"epoch": 0.2894736842105263,
"grad_norm": 0.3956238329410553,
"learning_rate": 8.836213857130296e-05,
"loss": 1.6521308898925782,
"step": 2750
},
{
"epoch": 0.2905263157894737,
"grad_norm": 0.3809671700000763,
"learning_rate": 8.822485235344825e-05,
"loss": 1.6597816467285156,
"step": 2760
},
{
"epoch": 0.29157894736842105,
"grad_norm": 0.39534077048301697,
"learning_rate": 8.808716988142575e-05,
"loss": 1.6627084732055664,
"step": 2770
},
{
"epoch": 0.2926315789473684,
"grad_norm": 0.37715721130371094,
"learning_rate": 8.794909272636537e-05,
"loss": 1.6618637084960937,
"step": 2780
},
{
"epoch": 0.29368421052631577,
"grad_norm": 0.4065514802932739,
"learning_rate": 8.781062246390083e-05,
"loss": 1.6399276733398438,
"step": 2790
},
{
"epoch": 0.29473684210526313,
"grad_norm": 0.3923916220664978,
"learning_rate": 8.767176067415169e-05,
"loss": 1.668557357788086,
"step": 2800
},
{
"epoch": 0.29578947368421055,
"grad_norm": 0.3970358967781067,
"learning_rate": 8.75325089417053e-05,
"loss": 1.6664169311523438,
"step": 2810
},
{
"epoch": 0.2968421052631579,
"grad_norm": 0.4063076078891754,
"learning_rate": 8.739286885559882e-05,
"loss": 1.718800163269043,
"step": 2820
},
{
"epoch": 0.29789473684210527,
"grad_norm": 0.41235899925231934,
"learning_rate": 8.725284200930096e-05,
"loss": 1.6484018325805665,
"step": 2830
},
{
"epoch": 0.29894736842105263,
"grad_norm": 0.41001883149147034,
"learning_rate": 8.711243000069387e-05,
"loss": 1.6729150772094727,
"step": 2840
},
{
"epoch": 0.3,
"grad_norm": 0.40411022305488586,
"learning_rate": 8.697163443205486e-05,
"loss": 1.6615083694458008,
"step": 2850
},
{
"epoch": 0.30105263157894735,
"grad_norm": 0.3862515389919281,
"learning_rate": 8.683045691003816e-05,
"loss": 1.6196592330932618,
"step": 2860
},
{
"epoch": 0.3021052631578947,
"grad_norm": 0.385047972202301,
"learning_rate": 8.668889904565657e-05,
"loss": 1.6499458312988282,
"step": 2870
},
{
"epoch": 0.3031578947368421,
"grad_norm": 0.385885626077652,
"learning_rate": 8.654696245426309e-05,
"loss": 1.6544832229614257,
"step": 2880
},
{
"epoch": 0.3042105263157895,
"grad_norm": 0.39182907342910767,
"learning_rate": 8.640464875553244e-05,
"loss": 1.6151403427124023,
"step": 2890
},
{
"epoch": 0.30526315789473685,
"grad_norm": 0.37692710757255554,
"learning_rate": 8.626195957344259e-05,
"loss": 1.7116943359375,
"step": 2900
}
],
"logging_steps": 10,
"max_steps": 9500,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.884603437744128e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}