Yuan-embedding-2.0-en / trainer_state.json
IEIT-Yuan's picture
Upload 19 files
b2fd15d verified
{
"best_global_step": 1800,
"best_metric": 0.00229549,
"best_model_checkpoint": "/mnt/beegfs3/liying/zhangfanhao/output1125/v1-20251125-231025/checkpoint-1800",
"epoch": 3.0354280894137493,
"eval_steps": 100,
"global_step": 1800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001687051876845213,
"grad_norm": 0.5938383277366354,
"learning_rate": 5.999998316002012e-06,
"loss": 0.380859375,
"step": 1
},
{
"epoch": 0.008435259384226065,
"grad_norm": 0.432332139447319,
"learning_rate": 5.999957900144816e-06,
"loss": 0.3326416015625,
"step": 5
},
{
"epoch": 0.01687051876845213,
"grad_norm": 0.24245712798777588,
"learning_rate": 5.99983160176086e-06,
"loss": 0.2187744140625,
"step": 10
},
{
"epoch": 0.025305778152678194,
"grad_norm": 0.14967602144842607,
"learning_rate": 5.999621108392896e-06,
"loss": 0.1771240234375,
"step": 15
},
{
"epoch": 0.03374103753690426,
"grad_norm": 0.11965916268612647,
"learning_rate": 5.9993264259487505e-06,
"loss": 0.14423828125,
"step": 20
},
{
"epoch": 0.04217629692113033,
"grad_norm": 0.09039362542123534,
"learning_rate": 5.998947562699149e-06,
"loss": 0.1184326171875,
"step": 25
},
{
"epoch": 0.05061155630535639,
"grad_norm": 0.0761794885482189,
"learning_rate": 5.998484529277483e-06,
"loss": 0.108642578125,
"step": 30
},
{
"epoch": 0.059046815689582456,
"grad_norm": 0.07436752367684027,
"learning_rate": 5.997937338679513e-06,
"loss": 0.09638671875,
"step": 35
},
{
"epoch": 0.06748207507380852,
"grad_norm": 0.06054003854062884,
"learning_rate": 5.997306006263003e-06,
"loss": 0.1025146484375,
"step": 40
},
{
"epoch": 0.07591733445803459,
"grad_norm": 0.05715450839425674,
"learning_rate": 5.996590549747288e-06,
"loss": 0.0909912109375,
"step": 45
},
{
"epoch": 0.08435259384226065,
"grad_norm": 0.0562159873926997,
"learning_rate": 5.995790989212777e-06,
"loss": 0.0900390625,
"step": 50
},
{
"epoch": 0.09278785322648671,
"grad_norm": 0.054733644360014155,
"learning_rate": 5.994907347100393e-06,
"loss": 0.08599853515625,
"step": 55
},
{
"epoch": 0.10122311261071278,
"grad_norm": 0.04945430208391664,
"learning_rate": 5.99393964821094e-06,
"loss": 0.08861083984375,
"step": 60
},
{
"epoch": 0.10965837199493884,
"grad_norm": 0.060375343186170424,
"learning_rate": 5.992887919704406e-06,
"loss": 0.08037109375,
"step": 65
},
{
"epoch": 0.11809363137916491,
"grad_norm": 0.05113371142226039,
"learning_rate": 5.991752191099203e-06,
"loss": 0.07867431640625,
"step": 70
},
{
"epoch": 0.12652889076339097,
"grad_norm": 0.060073186423122656,
"learning_rate": 5.990532494271337e-06,
"loss": 0.07816162109375,
"step": 75
},
{
"epoch": 0.13496415014761703,
"grad_norm": 0.058832653609599356,
"learning_rate": 5.989228863453515e-06,
"loss": 0.08001708984375,
"step": 80
},
{
"epoch": 0.1433994095318431,
"grad_norm": 0.06587176624760811,
"learning_rate": 5.987841335234184e-06,
"loss": 0.074359130859375,
"step": 85
},
{
"epoch": 0.15183466891606917,
"grad_norm": 0.06491166432460505,
"learning_rate": 5.9863699485565e-06,
"loss": 0.0674072265625,
"step": 90
},
{
"epoch": 0.16026992830029524,
"grad_norm": 0.056198676389375694,
"learning_rate": 5.984814744717241e-06,
"loss": 0.0659912109375,
"step": 95
},
{
"epoch": 0.1687051876845213,
"grad_norm": 0.0673764252680421,
"learning_rate": 5.983175767365646e-06,
"loss": 0.063623046875,
"step": 100
},
{
"epoch": 0.1687051876845213,
"eval_loss": 0.0162808820605278,
"eval_margin": -0.020074697267714766,
"eval_mean_neg": 0.6548054814338684,
"eval_mean_pos": 0.8441178202629089,
"eval_runtime": 367.938,
"eval_samples_per_second": 21.713,
"eval_steps_per_second": 0.34,
"step": 100
},
{
"epoch": 0.17714044706874738,
"grad_norm": 0.06055978762872105,
"learning_rate": 5.981453062502185e-06,
"loss": 0.060498046875,
"step": 105
},
{
"epoch": 0.18557570645297342,
"grad_norm": 0.06213709235940642,
"learning_rate": 5.979646678477277e-06,
"loss": 0.056640625,
"step": 110
},
{
"epoch": 0.19401096583719948,
"grad_norm": 0.0659729457413995,
"learning_rate": 5.977756665989925e-06,
"loss": 0.05919189453125,
"step": 115
},
{
"epoch": 0.20244622522142555,
"grad_norm": 0.05897713608413389,
"learning_rate": 5.9757830780862985e-06,
"loss": 0.0628662109375,
"step": 120
},
{
"epoch": 0.21088148460565162,
"grad_norm": 0.05642517065149083,
"learning_rate": 5.973725970158239e-06,
"loss": 0.05245361328125,
"step": 125
},
{
"epoch": 0.2193167439898777,
"grad_norm": 0.06579611075607034,
"learning_rate": 5.9715853999417115e-06,
"loss": 0.05848388671875,
"step": 130
},
{
"epoch": 0.22775200337410376,
"grad_norm": 0.07448489445734133,
"learning_rate": 5.969361427515179e-06,
"loss": 0.0573974609375,
"step": 135
},
{
"epoch": 0.23618726275832982,
"grad_norm": 0.05975086799089143,
"learning_rate": 5.9670541152979215e-06,
"loss": 0.05091552734375,
"step": 140
},
{
"epoch": 0.2446225221425559,
"grad_norm": 0.06504159374670346,
"learning_rate": 5.964663528048276e-06,
"loss": 0.047943115234375,
"step": 145
},
{
"epoch": 0.25305778152678193,
"grad_norm": 0.060584307441235295,
"learning_rate": 5.96218973286183e-06,
"loss": 0.0493896484375,
"step": 150
},
{
"epoch": 0.261493040911008,
"grad_norm": 0.06234398910972033,
"learning_rate": 5.959632799169529e-06,
"loss": 0.04854736328125,
"step": 155
},
{
"epoch": 0.26992830029523407,
"grad_norm": 0.07326440644425879,
"learning_rate": 5.9569927987357305e-06,
"loss": 0.0443359375,
"step": 160
},
{
"epoch": 0.27836355967946014,
"grad_norm": 0.059873291821439245,
"learning_rate": 5.954269805656194e-06,
"loss": 0.04698486328125,
"step": 165
},
{
"epoch": 0.2867988190636862,
"grad_norm": 0.07214278446872342,
"learning_rate": 5.951463896355993e-06,
"loss": 0.0474639892578125,
"step": 170
},
{
"epoch": 0.2952340784479123,
"grad_norm": 0.06530184393433881,
"learning_rate": 5.94857514958738e-06,
"loss": 0.043914794921875,
"step": 175
},
{
"epoch": 0.30366933783213834,
"grad_norm": 0.06214586771199744,
"learning_rate": 5.945603646427567e-06,
"loss": 0.043475341796875,
"step": 180
},
{
"epoch": 0.3121045972163644,
"grad_norm": 0.06764874450241058,
"learning_rate": 5.9425494702764575e-06,
"loss": 0.04755859375,
"step": 185
},
{
"epoch": 0.3205398566005905,
"grad_norm": 0.06523200399348678,
"learning_rate": 5.939412706854299e-06,
"loss": 0.044635009765625,
"step": 190
},
{
"epoch": 0.32897511598481655,
"grad_norm": 0.060102318432770876,
"learning_rate": 5.9361934441992835e-06,
"loss": 0.042364501953125,
"step": 195
},
{
"epoch": 0.3374103753690426,
"grad_norm": 0.06678207500644712,
"learning_rate": 5.9328917726650706e-06,
"loss": 0.04183349609375,
"step": 200
},
{
"epoch": 0.3374103753690426,
"eval_loss": 0.010425936430692673,
"eval_margin": -0.016463442112229044,
"eval_mean_neg": 0.5932909250259399,
"eval_mean_pos": 0.8196097016334534,
"eval_runtime": 365.6666,
"eval_samples_per_second": 21.848,
"eval_steps_per_second": 0.342,
"step": 200
},
{
"epoch": 0.3458456347532687,
"grad_norm": 0.05626492604909855,
"learning_rate": 5.929507784918257e-06,
"loss": 0.040447998046875,
"step": 205
},
{
"epoch": 0.35428089413749475,
"grad_norm": 0.054176681030320105,
"learning_rate": 5.926041575935772e-06,
"loss": 0.037396240234375,
"step": 210
},
{
"epoch": 0.3627161535217208,
"grad_norm": 0.06953999336709471,
"learning_rate": 5.922493243002212e-06,
"loss": 0.042828369140625,
"step": 215
},
{
"epoch": 0.37115141290594683,
"grad_norm": 0.05589129508252642,
"learning_rate": 5.918862885707113e-06,
"loss": 0.034979248046875,
"step": 220
},
{
"epoch": 0.3795866722901729,
"grad_norm": 0.07078214617147234,
"learning_rate": 5.915150605942153e-06,
"loss": 0.035723876953125,
"step": 225
},
{
"epoch": 0.38802193167439897,
"grad_norm": 0.06815732953530805,
"learning_rate": 5.911356507898291e-06,
"loss": 0.041973876953125,
"step": 230
},
{
"epoch": 0.39645719105862504,
"grad_norm": 0.062472935047014386,
"learning_rate": 5.907480698062848e-06,
"loss": 0.0356689453125,
"step": 235
},
{
"epoch": 0.4048924504428511,
"grad_norm": 0.06665756879409568,
"learning_rate": 5.90352328521651e-06,
"loss": 0.036456298828125,
"step": 240
},
{
"epoch": 0.41332770982707717,
"grad_norm": 0.06681598226193439,
"learning_rate": 5.899484380430284e-06,
"loss": 0.0343994140625,
"step": 245
},
{
"epoch": 0.42176296921130324,
"grad_norm": 0.060740413400477374,
"learning_rate": 5.895364097062374e-06,
"loss": 0.0318511962890625,
"step": 250
},
{
"epoch": 0.4301982285955293,
"grad_norm": 0.06545743307605277,
"learning_rate": 5.8911625507550015e-06,
"loss": 0.034765625,
"step": 255
},
{
"epoch": 0.4386334879797554,
"grad_norm": 0.0673664786591912,
"learning_rate": 5.88687985943116e-06,
"loss": 0.03580322265625,
"step": 260
},
{
"epoch": 0.44706874736398144,
"grad_norm": 0.06937993663032453,
"learning_rate": 5.882516143291308e-06,
"loss": 0.036236572265625,
"step": 265
},
{
"epoch": 0.4555040067482075,
"grad_norm": 0.0639250177544625,
"learning_rate": 5.878071524809988e-06,
"loss": 0.0317962646484375,
"step": 270
},
{
"epoch": 0.4639392661324336,
"grad_norm": 0.06037822600018219,
"learning_rate": 5.873546128732399e-06,
"loss": 0.0323699951171875,
"step": 275
},
{
"epoch": 0.47237452551665965,
"grad_norm": 0.060357976056049485,
"learning_rate": 5.868940082070885e-06,
"loss": 0.033660888671875,
"step": 280
},
{
"epoch": 0.4808097849008857,
"grad_norm": 0.061037172126093234,
"learning_rate": 5.8642535141013785e-06,
"loss": 0.0297515869140625,
"step": 285
},
{
"epoch": 0.4892450442851118,
"grad_norm": 0.0524126813526148,
"learning_rate": 5.859486556359768e-06,
"loss": 0.028472900390625,
"step": 290
},
{
"epoch": 0.49768030366933785,
"grad_norm": 0.062026009465912704,
"learning_rate": 5.854639342638208e-06,
"loss": 0.030718994140625,
"step": 295
},
{
"epoch": 0.5061155630535639,
"grad_norm": 0.05866098788599579,
"learning_rate": 5.849712008981361e-06,
"loss": 0.032916259765625,
"step": 300
},
{
"epoch": 0.5061155630535639,
"eval_loss": 0.007868120446801186,
"eval_margin": -0.013977996595654517,
"eval_mean_neg": 0.5548827648162842,
"eval_mean_pos": 0.793705403804779,
"eval_runtime": 364.6437,
"eval_samples_per_second": 21.909,
"eval_steps_per_second": 0.343,
"step": 300
},
{
"epoch": 0.5145508224377899,
"grad_norm": 0.05968132039231295,
"learning_rate": 5.844704693682583e-06,
"loss": 0.0292724609375,
"step": 305
},
{
"epoch": 0.522986081822016,
"grad_norm": 0.06038138238675174,
"learning_rate": 5.8396175372800405e-06,
"loss": 0.030743408203125,
"step": 310
},
{
"epoch": 0.5314213412062421,
"grad_norm": 0.06052295196543659,
"learning_rate": 5.834450682552765e-06,
"loss": 0.030194091796875,
"step": 315
},
{
"epoch": 0.5398566005904681,
"grad_norm": 0.05539528727202974,
"learning_rate": 5.829204274516648e-06,
"loss": 0.0312774658203125,
"step": 320
},
{
"epoch": 0.5482918599746942,
"grad_norm": 0.052508369724972796,
"learning_rate": 5.823878460420366e-06,
"loss": 0.0295318603515625,
"step": 325
},
{
"epoch": 0.5567271193589203,
"grad_norm": 0.05151880865825463,
"learning_rate": 5.8184733897412565e-06,
"loss": 0.028912353515625,
"step": 330
},
{
"epoch": 0.5651623787431463,
"grad_norm": 0.0624220665428448,
"learning_rate": 5.812989214181113e-06,
"loss": 0.027313232421875,
"step": 335
},
{
"epoch": 0.5735976381273724,
"grad_norm": 0.06481057308539884,
"learning_rate": 5.807426087661934e-06,
"loss": 0.02608642578125,
"step": 340
},
{
"epoch": 0.5820328975115985,
"grad_norm": 0.06109467057046473,
"learning_rate": 5.8017841663216e-06,
"loss": 0.0282989501953125,
"step": 345
},
{
"epoch": 0.5904681568958245,
"grad_norm": 0.062107444796084835,
"learning_rate": 5.796063608509493e-06,
"loss": 0.0277069091796875,
"step": 350
},
{
"epoch": 0.5989034162800506,
"grad_norm": 0.0552072139581444,
"learning_rate": 5.7902645747820485e-06,
"loss": 0.028399658203125,
"step": 355
},
{
"epoch": 0.6073386756642767,
"grad_norm": 0.06047980839414296,
"learning_rate": 5.784387227898254e-06,
"loss": 0.0281524658203125,
"step": 360
},
{
"epoch": 0.6157739350485028,
"grad_norm": 0.05336288606895412,
"learning_rate": 5.778431732815078e-06,
"loss": 0.02484130859375,
"step": 365
},
{
"epoch": 0.6242091944327288,
"grad_norm": 0.060745200996401724,
"learning_rate": 5.77239825668284e-06,
"loss": 0.02640380859375,
"step": 370
},
{
"epoch": 0.6326444538169549,
"grad_norm": 0.048268694566304324,
"learning_rate": 5.766286968840522e-06,
"loss": 0.0278717041015625,
"step": 375
},
{
"epoch": 0.641079713201181,
"grad_norm": 0.05424806603710711,
"learning_rate": 5.760098040811012e-06,
"loss": 0.0271453857421875,
"step": 380
},
{
"epoch": 0.649514972585407,
"grad_norm": 0.054535443289609395,
"learning_rate": 5.7538316462962935e-06,
"loss": 0.026611328125,
"step": 385
},
{
"epoch": 0.6579502319696331,
"grad_norm": 0.06967389025087475,
"learning_rate": 5.7474879611725655e-06,
"loss": 0.02589111328125,
"step": 390
},
{
"epoch": 0.6663854913538592,
"grad_norm": 0.06024092137696802,
"learning_rate": 5.741067163485314e-06,
"loss": 0.0193756103515625,
"step": 395
},
{
"epoch": 0.6748207507380852,
"grad_norm": 0.05981804001044263,
"learning_rate": 5.7345694334443066e-06,
"loss": 0.0205718994140625,
"step": 400
},
{
"epoch": 0.6748207507380852,
"eval_loss": 0.006414474919438362,
"eval_margin": -0.013447051244457402,
"eval_mean_neg": 0.5324161052703857,
"eval_mean_pos": 0.7909372448921204,
"eval_runtime": 365.0145,
"eval_samples_per_second": 21.887,
"eval_steps_per_second": 0.342,
"step": 400
},
{
"epoch": 0.6832560101223113,
"grad_norm": 0.06692561927901217,
"learning_rate": 5.727994953418538e-06,
"loss": 0.022021484375,
"step": 405
},
{
"epoch": 0.6916912695065374,
"grad_norm": 0.06609269963808409,
"learning_rate": 5.721343907931114e-06,
"loss": 0.02950592041015625,
"step": 410
},
{
"epoch": 0.7001265288907634,
"grad_norm": 0.052617111357424175,
"learning_rate": 5.71461648365407e-06,
"loss": 0.025189208984375,
"step": 415
},
{
"epoch": 0.7085617882749895,
"grad_norm": 0.04860971480260525,
"learning_rate": 5.707812869403128e-06,
"loss": 0.022052001953125,
"step": 420
},
{
"epoch": 0.7169970476592156,
"grad_norm": 0.06030454097987917,
"learning_rate": 5.7009332561324085e-06,
"loss": 0.0219390869140625,
"step": 425
},
{
"epoch": 0.7254323070434416,
"grad_norm": 0.06837586048390999,
"learning_rate": 5.693977836929057e-06,
"loss": 0.0270172119140625,
"step": 430
},
{
"epoch": 0.7338675664276677,
"grad_norm": 0.05197492190608033,
"learning_rate": 5.686946807007834e-06,
"loss": 0.02206878662109375,
"step": 435
},
{
"epoch": 0.7423028258118937,
"grad_norm": 0.06128713786873146,
"learning_rate": 5.679840363705637e-06,
"loss": 0.0244720458984375,
"step": 440
},
{
"epoch": 0.7507380851961197,
"grad_norm": 0.0629198604819534,
"learning_rate": 5.672658706475953e-06,
"loss": 0.0194488525390625,
"step": 445
},
{
"epoch": 0.7591733445803458,
"grad_norm": 0.05502172045134509,
"learning_rate": 5.665402036883267e-06,
"loss": 0.0225250244140625,
"step": 450
},
{
"epoch": 0.7676086039645719,
"grad_norm": 0.06119000768724386,
"learning_rate": 5.658070558597408e-06,
"loss": 0.01928558349609375,
"step": 455
},
{
"epoch": 0.7760438633487979,
"grad_norm": 0.058834092769235756,
"learning_rate": 5.650664477387824e-06,
"loss": 0.02149658203125,
"step": 460
},
{
"epoch": 0.784479122733024,
"grad_norm": 0.06942758384696321,
"learning_rate": 5.643184001117811e-06,
"loss": 0.0266326904296875,
"step": 465
},
{
"epoch": 0.7929143821172501,
"grad_norm": 0.05395397336586372,
"learning_rate": 5.6356293397386836e-06,
"loss": 0.0206085205078125,
"step": 470
},
{
"epoch": 0.8013496415014761,
"grad_norm": 0.057301086470950384,
"learning_rate": 5.628000705283873e-06,
"loss": 0.021770477294921875,
"step": 475
},
{
"epoch": 0.8097849008857022,
"grad_norm": 0.058618795566843934,
"learning_rate": 5.620298311862985e-06,
"loss": 0.0174072265625,
"step": 480
},
{
"epoch": 0.8182201602699283,
"grad_norm": 0.053997897902853975,
"learning_rate": 5.612522375655783e-06,
"loss": 0.0246124267578125,
"step": 485
},
{
"epoch": 0.8266554196541543,
"grad_norm": 0.058293384553658546,
"learning_rate": 5.604673114906126e-06,
"loss": 0.0239288330078125,
"step": 490
},
{
"epoch": 0.8350906790383804,
"grad_norm": 0.062099166751088966,
"learning_rate": 5.596750749915842e-06,
"loss": 0.023724365234375,
"step": 495
},
{
"epoch": 0.8435259384226065,
"grad_norm": 0.0518337334475497,
"learning_rate": 5.588755503038543e-06,
"loss": 0.01995849609375,
"step": 500
},
{
"epoch": 0.8435259384226065,
"eval_loss": 0.00542406877502799,
"eval_margin": -0.010786364688688228,
"eval_mean_neg": 0.5346763134002686,
"eval_mean_pos": 0.7906754016876221,
"eval_runtime": 367.5462,
"eval_samples_per_second": 21.736,
"eval_steps_per_second": 0.34,
"step": 500
},
{
"epoch": 0.8519611978068325,
"grad_norm": 0.0579368996460804,
"learning_rate": 5.580687598673387e-06,
"loss": 0.02121734619140625,
"step": 505
},
{
"epoch": 0.8603964571910586,
"grad_norm": 0.05214070956939639,
"learning_rate": 5.572547263258776e-06,
"loss": 0.0197113037109375,
"step": 510
},
{
"epoch": 0.8688317165752847,
"grad_norm": 0.0687906199565583,
"learning_rate": 5.564334725266006e-06,
"loss": 0.0217254638671875,
"step": 515
},
{
"epoch": 0.8772669759595108,
"grad_norm": 0.051621267659708626,
"learning_rate": 5.55605021519285e-06,
"loss": 0.019158935546875,
"step": 520
},
{
"epoch": 0.8857022353437368,
"grad_norm": 0.05599957001213385,
"learning_rate": 5.547693965557092e-06,
"loss": 0.0195770263671875,
"step": 525
},
{
"epoch": 0.8941374947279629,
"grad_norm": 0.06034671456944424,
"learning_rate": 5.539266210889997e-06,
"loss": 0.0231231689453125,
"step": 530
},
{
"epoch": 0.902572754112189,
"grad_norm": 0.04518349407201743,
"learning_rate": 5.5307671877297326e-06,
"loss": 0.0208709716796875,
"step": 535
},
{
"epoch": 0.911008013496415,
"grad_norm": 0.0503478793140038,
"learning_rate": 5.522197134614728e-06,
"loss": 0.0209930419921875,
"step": 540
},
{
"epoch": 0.9194432728806411,
"grad_norm": 0.046047217532892024,
"learning_rate": 5.513556292076981e-06,
"loss": 0.0175750732421875,
"step": 545
},
{
"epoch": 0.9278785322648672,
"grad_norm": 0.05575253759567789,
"learning_rate": 5.504844902635303e-06,
"loss": 0.0171112060546875,
"step": 550
},
{
"epoch": 0.9363137916490932,
"grad_norm": 0.04687503220455111,
"learning_rate": 5.496063210788519e-06,
"loss": 0.0167633056640625,
"step": 555
},
{
"epoch": 0.9447490510333193,
"grad_norm": 0.04891593875536363,
"learning_rate": 5.487211463008597e-06,
"loss": 0.019036865234375,
"step": 560
},
{
"epoch": 0.9531843104175454,
"grad_norm": 0.04841249311058062,
"learning_rate": 5.478289907733738e-06,
"loss": 0.01807098388671875,
"step": 565
},
{
"epoch": 0.9616195698017714,
"grad_norm": 0.060373651634708765,
"learning_rate": 5.469298795361397e-06,
"loss": 0.015673828125,
"step": 570
},
{
"epoch": 0.9700548291859975,
"grad_norm": 0.051868174671481436,
"learning_rate": 5.460238378241262e-06,
"loss": 0.01802978515625,
"step": 575
},
{
"epoch": 0.9784900885702236,
"grad_norm": 0.051146316151485995,
"learning_rate": 5.451108910668163e-06,
"loss": 0.01664581298828125,
"step": 580
},
{
"epoch": 0.9869253479544496,
"grad_norm": 0.04017649470362814,
"learning_rate": 5.441910648874945e-06,
"loss": 0.016483306884765625,
"step": 585
},
{
"epoch": 0.9953606073386757,
"grad_norm": 0.04457228909606784,
"learning_rate": 5.4326438510252655e-06,
"loss": 0.0192718505859375,
"step": 590
},
{
"epoch": 1.0033741037536905,
"grad_norm": 0.053005736672298354,
"learning_rate": 5.423308777206357e-06,
"loss": 0.015604400634765625,
"step": 595
},
{
"epoch": 1.0118093631379166,
"grad_norm": 0.05754347568157857,
"learning_rate": 5.413905689421722e-06,
"loss": 0.0159515380859375,
"step": 600
},
{
"epoch": 1.0118093631379166,
"eval_loss": 0.004593910649418831,
"eval_margin": -0.011103880922159842,
"eval_mean_neg": 0.5064941644668579,
"eval_mean_pos": 0.7831713557243347,
"eval_runtime": 364.5429,
"eval_samples_per_second": 21.915,
"eval_steps_per_second": 0.343,
"step": 600
},
{
"epoch": 1.0202446225221427,
"grad_norm": 0.04962686662442784,
"learning_rate": 5.404434851583785e-06,
"loss": 0.01360015869140625,
"step": 605
},
{
"epoch": 1.0286798819063687,
"grad_norm": 0.046926535788142015,
"learning_rate": 5.394896529506479e-06,
"loss": 0.01566925048828125,
"step": 610
},
{
"epoch": 1.0371151412905948,
"grad_norm": 0.043042108440633,
"learning_rate": 5.38529099089779e-06,
"loss": 0.0128326416015625,
"step": 615
},
{
"epoch": 1.0455504006748209,
"grad_norm": 0.049749099013614635,
"learning_rate": 5.375618505352241e-06,
"loss": 0.0136383056640625,
"step": 620
},
{
"epoch": 1.053985660059047,
"grad_norm": 0.04945151693616336,
"learning_rate": 5.365879344343326e-06,
"loss": 0.01544036865234375,
"step": 625
},
{
"epoch": 1.062420919443273,
"grad_norm": 0.04626935309793636,
"learning_rate": 5.35607378121589e-06,
"loss": 0.0143829345703125,
"step": 630
},
{
"epoch": 1.070856178827499,
"grad_norm": 0.04580735975264899,
"learning_rate": 5.346202091178459e-06,
"loss": 0.014122772216796874,
"step": 635
},
{
"epoch": 1.079291438211725,
"grad_norm": 0.046216725385350446,
"learning_rate": 5.336264551295512e-06,
"loss": 0.014672088623046874,
"step": 640
},
{
"epoch": 1.087726697595951,
"grad_norm": 0.04564303944680029,
"learning_rate": 5.326261440479709e-06,
"loss": 0.0136993408203125,
"step": 645
},
{
"epoch": 1.096161956980177,
"grad_norm": 0.05114495970312972,
"learning_rate": 5.316193039484063e-06,
"loss": 0.0147705078125,
"step": 650
},
{
"epoch": 1.1045972163644031,
"grad_norm": 0.044105955284847585,
"learning_rate": 5.306059630894056e-06,
"loss": 0.015480804443359374,
"step": 655
},
{
"epoch": 1.1130324757486292,
"grad_norm": 0.045182200484827885,
"learning_rate": 5.295861499119711e-06,
"loss": 0.013404083251953126,
"step": 660
},
{
"epoch": 1.1214677351328552,
"grad_norm": 0.04324759296793784,
"learning_rate": 5.2855989303876065e-06,
"loss": 0.01672821044921875,
"step": 665
},
{
"epoch": 1.1299029945170813,
"grad_norm": 0.03328038907845692,
"learning_rate": 5.275272212732849e-06,
"loss": 0.01335906982421875,
"step": 670
},
{
"epoch": 1.1383382539013074,
"grad_norm": 0.044225327184826406,
"learning_rate": 5.264881635990984e-06,
"loss": 0.012935638427734375,
"step": 675
},
{
"epoch": 1.1467735132855335,
"grad_norm": 0.04645591264342837,
"learning_rate": 5.2544274917898615e-06,
"loss": 0.01385498046875,
"step": 680
},
{
"epoch": 1.1552087726697595,
"grad_norm": 0.05453216622664439,
"learning_rate": 5.243910073541454e-06,
"loss": 0.016290283203125,
"step": 685
},
{
"epoch": 1.1636440320539856,
"grad_norm": 0.057731965028177075,
"learning_rate": 5.233329676433617e-06,
"loss": 0.0145355224609375,
"step": 690
},
{
"epoch": 1.1720792914382117,
"grad_norm": 0.05145183297720149,
"learning_rate": 5.222686597421808e-06,
"loss": 0.01390838623046875,
"step": 695
},
{
"epoch": 1.1805145508224377,
"grad_norm": 0.04021056012812571,
"learning_rate": 5.211981135220751e-06,
"loss": 0.01344757080078125,
"step": 700
},
{
"epoch": 1.1805145508224377,
"eval_loss": 0.004191060084849596,
"eval_margin": -0.010819014589933137,
"eval_mean_neg": 0.4851545989513397,
"eval_mean_pos": 0.7733471989631653,
"eval_runtime": 359.0481,
"eval_samples_per_second": 22.251,
"eval_steps_per_second": 0.348,
"step": 700
},
{
"epoch": 1.1889498102066638,
"grad_norm": 0.0416204676277527,
"learning_rate": 5.201213590296052e-06,
"loss": 0.014748382568359374,
"step": 705
},
{
"epoch": 1.1973850695908899,
"grad_norm": 0.05633713089091016,
"learning_rate": 5.190384264855764e-06,
"loss": 0.014013671875,
"step": 710
},
{
"epoch": 1.205820328975116,
"grad_norm": 0.05143948467095745,
"learning_rate": 5.1794934628419104e-06,
"loss": 0.015460205078125,
"step": 715
},
{
"epoch": 1.214255588359342,
"grad_norm": 0.05227911954680101,
"learning_rate": 5.168541489921949e-06,
"loss": 0.01507415771484375,
"step": 720
},
{
"epoch": 1.222690847743568,
"grad_norm": 0.058608960783147375,
"learning_rate": 5.1575286534801955e-06,
"loss": 0.01417236328125,
"step": 725
},
{
"epoch": 1.2311261071277941,
"grad_norm": 0.04818858161693878,
"learning_rate": 5.146455262609197e-06,
"loss": 0.013425445556640625,
"step": 730
},
{
"epoch": 1.2395613665120202,
"grad_norm": 0.05406749848988645,
"learning_rate": 5.1353216281010535e-06,
"loss": 0.013022613525390626,
"step": 735
},
{
"epoch": 1.2479966258962463,
"grad_norm": 0.044408669007062154,
"learning_rate": 5.1241280624387e-06,
"loss": 0.01393585205078125,
"step": 740
},
{
"epoch": 1.2564318852804723,
"grad_norm": 0.04519048638967848,
"learning_rate": 5.1128748797871314e-06,
"loss": 0.013826751708984375,
"step": 745
},
{
"epoch": 1.2648671446646984,
"grad_norm": 0.0491460974626283,
"learning_rate": 5.101562395984587e-06,
"loss": 0.01336212158203125,
"step": 750
},
{
"epoch": 1.2733024040489245,
"grad_norm": 0.04356609182045035,
"learning_rate": 5.090190928533689e-06,
"loss": 0.01492156982421875,
"step": 755
},
{
"epoch": 1.2817376634331505,
"grad_norm": 0.03556136795064142,
"learning_rate": 5.078760796592524e-06,
"loss": 0.0125732421875,
"step": 760
},
{
"epoch": 1.2901729228173766,
"grad_norm": 0.04189977738590891,
"learning_rate": 5.067272320965692e-06,
"loss": 0.0149322509765625,
"step": 765
},
{
"epoch": 1.2986081822016027,
"grad_norm": 0.05051201336701144,
"learning_rate": 5.055725824095301e-06,
"loss": 0.01419525146484375,
"step": 770
},
{
"epoch": 1.3070434415858287,
"grad_norm": 0.0416942039130722,
"learning_rate": 5.0441216300519126e-06,
"loss": 0.01274261474609375,
"step": 775
},
{
"epoch": 1.3154787009700548,
"grad_norm": 0.04629875001130603,
"learning_rate": 5.032460064525455e-06,
"loss": 0.01363525390625,
"step": 780
},
{
"epoch": 1.3239139603542809,
"grad_norm": 0.03704688355237128,
"learning_rate": 5.020741454816074e-06,
"loss": 0.01301422119140625,
"step": 785
},
{
"epoch": 1.332349219738507,
"grad_norm": 0.03742406408262459,
"learning_rate": 5.00896612982495e-06,
"loss": 0.01353302001953125,
"step": 790
},
{
"epoch": 1.340784479122733,
"grad_norm": 0.050480726423335516,
"learning_rate": 4.99713442004507e-06,
"loss": 0.01196746826171875,
"step": 795
},
{
"epoch": 1.349219738506959,
"grad_norm": 0.03808846024736694,
"learning_rate": 4.985246657551943e-06,
"loss": 0.0110015869140625,
"step": 800
},
{
"epoch": 1.349219738506959,
"eval_loss": 0.003908403683453798,
"eval_margin": -0.010123856463319352,
"eval_mean_neg": 0.49688851833343506,
"eval_mean_pos": 0.7784863114356995,
"eval_runtime": 364.0137,
"eval_samples_per_second": 21.947,
"eval_steps_per_second": 0.343,
"step": 800
},
{
"epoch": 1.3576549978911852,
"grad_norm": 0.04637758927467518,
"learning_rate": 4.973303175994289e-06,
"loss": 0.013458251953125,
"step": 805
},
{
"epoch": 1.3660902572754112,
"grad_norm": 0.05066098296531039,
"learning_rate": 4.961304310584674e-06,
"loss": 0.01515960693359375,
"step": 810
},
{
"epoch": 1.3745255166596373,
"grad_norm": 0.038530384714911596,
"learning_rate": 4.949250398090092e-06,
"loss": 0.011260223388671876,
"step": 815
},
{
"epoch": 1.3829607760438634,
"grad_norm": 0.040188601844867354,
"learning_rate": 4.937141776822525e-06,
"loss": 0.0158447265625,
"step": 820
},
{
"epoch": 1.3913960354280894,
"grad_norm": 0.03574613677300634,
"learning_rate": 4.92497878662944e-06,
"loss": 0.011143875122070313,
"step": 825
},
{
"epoch": 1.3998312948123155,
"grad_norm": 0.05019423126073816,
"learning_rate": 4.912761768884255e-06,
"loss": 0.01179351806640625,
"step": 830
},
{
"epoch": 1.4082665541965416,
"grad_norm": 0.04311116805857567,
"learning_rate": 4.9004910664767545e-06,
"loss": 0.01372833251953125,
"step": 835
},
{
"epoch": 1.4167018135807676,
"grad_norm": 0.04928580588462512,
"learning_rate": 4.888167023803468e-06,
"loss": 0.01297607421875,
"step": 840
},
{
"epoch": 1.4251370729649937,
"grad_norm": 0.054968450905918724,
"learning_rate": 4.8757899867580046e-06,
"loss": 0.014654541015625,
"step": 845
},
{
"epoch": 1.4335723323492198,
"grad_norm": 0.050366347428194534,
"learning_rate": 4.86336030272134e-06,
"loss": 0.011295318603515625,
"step": 850
},
{
"epoch": 1.4420075917334458,
"grad_norm": 0.05107215089989217,
"learning_rate": 4.850878320552076e-06,
"loss": 0.01334228515625,
"step": 855
},
{
"epoch": 1.450442851117672,
"grad_norm": 0.0391963683003482,
"learning_rate": 4.838344390576638e-06,
"loss": 0.01104736328125,
"step": 860
},
{
"epoch": 1.458878110501898,
"grad_norm": 0.03985676744245212,
"learning_rate": 4.825758864579452e-06,
"loss": 0.013307952880859375,
"step": 865
},
{
"epoch": 1.467313369886124,
"grad_norm": 0.04852757651119817,
"learning_rate": 4.813122095793066e-06,
"loss": 0.014328384399414062,
"step": 870
},
{
"epoch": 1.47574862927035,
"grad_norm": 0.0454254941425111,
"learning_rate": 4.800434438888235e-06,
"loss": 0.012960052490234375,
"step": 875
},
{
"epoch": 1.4841838886545762,
"grad_norm": 0.03868230007157653,
"learning_rate": 4.787696249963974e-06,
"loss": 0.01402740478515625,
"step": 880
},
{
"epoch": 1.4926191480388022,
"grad_norm": 0.05289135869423979,
"learning_rate": 4.774907886537553e-06,
"loss": 0.013831901550292968,
"step": 885
},
{
"epoch": 1.501054407423028,
"grad_norm": 0.04594308680556284,
"learning_rate": 4.7620697075344736e-06,
"loss": 0.012446975708007813,
"step": 890
},
{
"epoch": 1.5094896668072542,
"grad_norm": 0.048917845490978454,
"learning_rate": 4.7491820732783866e-06,
"loss": 0.011295318603515625,
"step": 895
},
{
"epoch": 1.5179249261914802,
"grad_norm": 0.043266255463378436,
"learning_rate": 4.73624534548098e-06,
"loss": 0.01407012939453125,
"step": 900
},
{
"epoch": 1.5179249261914802,
"eval_loss": 0.0036048581823706627,
"eval_margin": -0.009617562525935711,
"eval_mean_neg": 0.4904225468635559,
"eval_mean_pos": 0.7793014049530029,
"eval_runtime": 363.5397,
"eval_samples_per_second": 21.976,
"eval_steps_per_second": 0.344,
"step": 900
},
{
"epoch": 1.5263601855757063,
"grad_norm": 0.04363576408467007,
"learning_rate": 4.723259887231835e-06,
"loss": 0.0138519287109375,
"step": 905
},
{
"epoch": 1.5347954449599324,
"grad_norm": 0.04199459687850267,
"learning_rate": 4.710226062988223e-06,
"loss": 0.01312255859375,
"step": 910
},
{
"epoch": 1.5432307043441584,
"grad_norm": 0.047436231412077354,
"learning_rate": 4.697144238564889e-06,
"loss": 0.01208648681640625,
"step": 915
},
{
"epoch": 1.5516659637283845,
"grad_norm": 0.04092453404900873,
"learning_rate": 4.684014781123775e-06,
"loss": 0.012505340576171874,
"step": 920
},
{
"epoch": 1.5601012231126106,
"grad_norm": 0.045645370405214956,
"learning_rate": 4.6708380591637166e-06,
"loss": 0.0120208740234375,
"step": 925
},
{
"epoch": 1.5685364824968366,
"grad_norm": 0.04911154284719614,
"learning_rate": 4.6576144425101076e-06,
"loss": 0.013311767578125,
"step": 930
},
{
"epoch": 1.5769717418810627,
"grad_norm": 0.045881762593597546,
"learning_rate": 4.64434430230451e-06,
"loss": 0.012969207763671876,
"step": 935
},
{
"epoch": 1.5854070012652888,
"grad_norm": 0.04728445094523914,
"learning_rate": 4.631028010994245e-06,
"loss": 0.01099395751953125,
"step": 940
},
{
"epoch": 1.5938422606495148,
"grad_norm": 0.03903116673162643,
"learning_rate": 4.617665942321937e-06,
"loss": 0.0129608154296875,
"step": 945
},
{
"epoch": 1.602277520033741,
"grad_norm": 0.040499425484585065,
"learning_rate": 4.6042584713150225e-06,
"loss": 0.009827423095703124,
"step": 950
},
{
"epoch": 1.610712779417967,
"grad_norm": 0.047017092872005554,
"learning_rate": 4.590805974275228e-06,
"loss": 0.01045989990234375,
"step": 955
},
{
"epoch": 1.619148038802193,
"grad_norm": 0.03869016761931018,
"learning_rate": 4.577308828768005e-06,
"loss": 0.011346435546875,
"step": 960
},
{
"epoch": 1.6275832981864191,
"grad_norm": 0.05726216064413269,
"learning_rate": 4.563767413611932e-06,
"loss": 0.01296844482421875,
"step": 965
},
{
"epoch": 1.6360185575706452,
"grad_norm": 0.034971593802495975,
"learning_rate": 4.550182108868089e-06,
"loss": 0.01379852294921875,
"step": 970
},
{
"epoch": 1.6444538169548713,
"grad_norm": 0.04877425067250454,
"learning_rate": 4.536553295829384e-06,
"loss": 0.012924957275390624,
"step": 975
},
{
"epoch": 1.6528890763390973,
"grad_norm": 0.03927648322180213,
"learning_rate": 4.522881357009853e-06,
"loss": 0.01293792724609375,
"step": 980
},
{
"epoch": 1.6613243357233234,
"grad_norm": 0.024976847462424127,
"learning_rate": 4.5091666761339275e-06,
"loss": 0.009877777099609375,
"step": 985
},
{
"epoch": 1.6697595951075495,
"grad_norm": 0.03945379802090875,
"learning_rate": 4.495409638125657e-06,
"loss": 0.01130523681640625,
"step": 990
},
{
"epoch": 1.6781948544917755,
"grad_norm": 0.03430320161614481,
"learning_rate": 4.481610629097917e-06,
"loss": 0.009923553466796875,
"step": 995
},
{
"epoch": 1.6866301138760016,
"grad_norm": 0.03895065600017937,
"learning_rate": 4.46777003634156e-06,
"loss": 0.01330413818359375,
"step": 1000
},
{
"epoch": 1.6866301138760016,
"eval_loss": 0.0034073551651090384,
"eval_margin": -0.009528953300398444,
"eval_mean_neg": 0.4926661550998688,
"eval_mean_pos": 0.7842009663581848,
"eval_runtime": 367.0219,
"eval_samples_per_second": 21.767,
"eval_steps_per_second": 0.341,
"step": 1000
},
{
"epoch": 1.6950653732602277,
"grad_norm": 0.04302786223265218,
"learning_rate": 4.453888248314553e-06,
"loss": 0.01107330322265625,
"step": 1005
},
{
"epoch": 1.7035006326444537,
"grad_norm": 0.04002206909489744,
"learning_rate": 4.439965654631073e-06,
"loss": 0.0105499267578125,
"step": 1010
},
{
"epoch": 1.7119358920286798,
"grad_norm": 0.04439497813433074,
"learning_rate": 4.426002646050574e-06,
"loss": 0.010544586181640624,
"step": 1015
},
{
"epoch": 1.7203711514129059,
"grad_norm": 0.043341839034531496,
"learning_rate": 4.411999614466812e-06,
"loss": 0.0125335693359375,
"step": 1020
},
{
"epoch": 1.728806410797132,
"grad_norm": 0.03449321841295583,
"learning_rate": 4.397956952896858e-06,
"loss": 0.010623550415039063,
"step": 1025
},
{
"epoch": 1.737241670181358,
"grad_norm": 0.041185961783139574,
"learning_rate": 4.383875055470055e-06,
"loss": 0.01031951904296875,
"step": 1030
},
{
"epoch": 1.745676929565584,
"grad_norm": 0.04627446953615271,
"learning_rate": 4.3697543174169675e-06,
"loss": 0.01590385437011719,
"step": 1035
},
{
"epoch": 1.7541121889498101,
"grad_norm": 0.04582345634360075,
"learning_rate": 4.355595135058278e-06,
"loss": 0.0119537353515625,
"step": 1040
},
{
"epoch": 1.7625474483340362,
"grad_norm": 0.033580437424405536,
"learning_rate": 4.3413979057936715e-06,
"loss": 0.01235198974609375,
"step": 1045
},
{
"epoch": 1.7709827077182623,
"grad_norm": 0.03545606353671419,
"learning_rate": 4.32716302809068e-06,
"loss": 0.012863922119140624,
"step": 1050
},
{
"epoch": 1.7794179671024883,
"grad_norm": 0.03491571698794484,
"learning_rate": 4.312890901473496e-06,
"loss": 0.01035614013671875,
"step": 1055
},
{
"epoch": 1.7878532264867144,
"grad_norm": 0.04391496148899165,
"learning_rate": 4.29858192651176e-06,
"loss": 0.011370468139648437,
"step": 1060
},
{
"epoch": 1.7962884858709405,
"grad_norm": 0.049338016603549396,
"learning_rate": 4.284236504809324e-06,
"loss": 0.011846160888671875,
"step": 1065
},
{
"epoch": 1.8047237452551665,
"grad_norm": 0.035387852478552806,
"learning_rate": 4.269855038992971e-06,
"loss": 0.011142349243164063,
"step": 1070
},
{
"epoch": 1.8131590046393926,
"grad_norm": 0.043891210942711104,
"learning_rate": 4.2554379327011196e-06,
"loss": 0.011545944213867187,
"step": 1075
},
{
"epoch": 1.8215942640236187,
"grad_norm": 0.040327331551499056,
"learning_rate": 4.240985590572496e-06,
"loss": 0.00897674560546875,
"step": 1080
},
{
"epoch": 1.8300295234078447,
"grad_norm": 0.03274271686886844,
"learning_rate": 4.226498418234771e-06,
"loss": 0.01215667724609375,
"step": 1085
},
{
"epoch": 1.8384647827920708,
"grad_norm": 0.04375742422856697,
"learning_rate": 4.2119768222931865e-06,
"loss": 0.0109588623046875,
"step": 1090
},
{
"epoch": 1.8469000421762969,
"grad_norm": 0.036163256401816654,
"learning_rate": 4.19742121031913e-06,
"loss": 0.012054443359375,
"step": 1095
},
{
"epoch": 1.855335301560523,
"grad_norm": 0.04078407955383746,
"learning_rate": 4.182831990838709e-06,
"loss": 0.0132843017578125,
"step": 1100
},
{
"epoch": 1.855335301560523,
"eval_loss": 0.003225065069273114,
"eval_margin": -0.008749207222623932,
"eval_mean_neg": 0.49084940552711487,
"eval_mean_pos": 0.7849159836769104,
"eval_runtime": 366.687,
"eval_samples_per_second": 21.787,
"eval_steps_per_second": 0.341,
"step": 1100
},
{
"epoch": 1.863770560944749,
"grad_norm": 0.047827239751426935,
"learning_rate": 4.168209573321271e-06,
"loss": 0.0133697509765625,
"step": 1105
},
{
"epoch": 1.872205820328975,
"grad_norm": 0.0274823880547768,
"learning_rate": 4.153554368167927e-06,
"loss": 0.010877227783203125,
"step": 1110
},
{
"epoch": 1.8806410797132012,
"grad_norm": 0.052787755841206804,
"learning_rate": 4.138866786700016e-06,
"loss": 0.0139434814453125,
"step": 1115
},
{
"epoch": 1.8890763390974272,
"grad_norm": 0.029629846825489692,
"learning_rate": 4.124147241147577e-06,
"loss": 0.011189651489257813,
"step": 1120
},
{
"epoch": 1.8975115984816533,
"grad_norm": 0.039855575258898726,
"learning_rate": 4.109396144637764e-06,
"loss": 0.010993194580078126,
"step": 1125
},
{
"epoch": 1.9059468578658794,
"grad_norm": 0.03789188882991695,
"learning_rate": 4.094613911183265e-06,
"loss": 0.01313323974609375,
"step": 1130
},
{
"epoch": 1.9143821172501054,
"grad_norm": 0.03482605825228896,
"learning_rate": 4.0798009556706685e-06,
"loss": 0.008492279052734374,
"step": 1135
},
{
"epoch": 1.9228173766343315,
"grad_norm": 0.0395626147511318,
"learning_rate": 4.064957693848831e-06,
"loss": 0.011167144775390625,
"step": 1140
},
{
"epoch": 1.9312526360185576,
"grad_norm": 0.026910728579180684,
"learning_rate": 4.050084542317201e-06,
"loss": 0.0124908447265625,
"step": 1145
},
{
"epoch": 1.9396878954027836,
"grad_norm": 0.05111929237613795,
"learning_rate": 4.0351819185141284e-06,
"loss": 0.01279144287109375,
"step": 1150
},
{
"epoch": 1.9481231547870097,
"grad_norm": 0.031631097839140386,
"learning_rate": 4.02025024070515e-06,
"loss": 0.010783004760742187,
"step": 1155
},
{
"epoch": 1.9565584141712358,
"grad_norm": 0.03921591693735718,
"learning_rate": 4.005289927971248e-06,
"loss": 0.009867095947265625,
"step": 1160
},
{
"epoch": 1.9649936735554618,
"grad_norm": 0.03786979993880419,
"learning_rate": 3.990301400197088e-06,
"loss": 0.010943603515625,
"step": 1165
},
{
"epoch": 1.973428932939688,
"grad_norm": 0.033688024912648086,
"learning_rate": 3.9752850780592366e-06,
"loss": 0.010836410522460937,
"step": 1170
},
{
"epoch": 1.981864192323914,
"grad_norm": 0.0473160707405277,
"learning_rate": 3.960241383014353e-06,
"loss": 0.011658477783203124,
"step": 1175
},
{
"epoch": 1.99029945170814,
"grad_norm": 0.034470209590808834,
"learning_rate": 3.945170737287356e-06,
"loss": 0.0096588134765625,
"step": 1180
},
{
"epoch": 1.9987347110923661,
"grad_norm": 0.04035006428036731,
"learning_rate": 3.930073563859583e-06,
"loss": 0.013312530517578126,
"step": 1185
},
{
"epoch": 2.006748207507381,
"grad_norm": 0.03443773853658945,
"learning_rate": 3.914950286456911e-06,
"loss": 0.0104766845703125,
"step": 1190
},
{
"epoch": 2.015183466891607,
"grad_norm": 0.02321269258461312,
"learning_rate": 3.899801329537865e-06,
"loss": 0.008111572265625,
"step": 1195
},
{
"epoch": 2.023618726275833,
"grad_norm": 0.02427731911492366,
"learning_rate": 3.884627118281706e-06,
"loss": 0.009668731689453125,
"step": 1200
},
{
"epoch": 2.023618726275833,
"eval_loss": 0.0028827113565057516,
"eval_margin": -0.008073512017877111,
"eval_mean_neg": 0.5066258907318115,
"eval_mean_pos": 0.7934735417366028,
"eval_runtime": 365.1842,
"eval_samples_per_second": 21.877,
"eval_steps_per_second": 0.342,
"step": 1200
},
{
"epoch": 2.0320539856600592,
"grad_norm": 0.033776934236771874,
"learning_rate": 3.869428078576498e-06,
"loss": 0.00937347412109375,
"step": 1205
},
{
"epoch": 2.0404892450442853,
"grad_norm": 0.05070270762284893,
"learning_rate": 3.8542046370071575e-06,
"loss": 0.008733367919921875,
"step": 1210
},
{
"epoch": 2.0489245044285114,
"grad_norm": 0.028063560546546604,
"learning_rate": 3.838957220843472e-06,
"loss": 0.00914459228515625,
"step": 1215
},
{
"epoch": 2.0573597638127374,
"grad_norm": 0.041287537117132886,
"learning_rate": 3.8236862580281175e-06,
"loss": 0.010516357421875,
"step": 1220
},
{
"epoch": 2.0657950231969635,
"grad_norm": 0.03173632436563901,
"learning_rate": 3.808392177164642e-06,
"loss": 0.010186767578125,
"step": 1225
},
{
"epoch": 2.0742302825811896,
"grad_norm": 0.03149301964970768,
"learning_rate": 3.7930754075054406e-06,
"loss": 0.010378265380859375,
"step": 1230
},
{
"epoch": 2.0826655419654156,
"grad_norm": 0.03183747792195117,
"learning_rate": 3.7777363789397004e-06,
"loss": 0.009032630920410156,
"step": 1235
},
{
"epoch": 2.0911008013496417,
"grad_norm": 0.0353065686803631,
"learning_rate": 3.7623755219813442e-06,
"loss": 0.0096771240234375,
"step": 1240
},
{
"epoch": 2.0995360607338673,
"grad_norm": 0.040800577074973816,
"learning_rate": 3.746993267756939e-06,
"loss": 0.009685516357421875,
"step": 1245
},
{
"epoch": 2.107971320118094,
"grad_norm": 0.04064182954953987,
"learning_rate": 3.7315900479936044e-06,
"loss": 0.010097503662109375,
"step": 1250
},
{
"epoch": 2.1164065795023195,
"grad_norm": 0.04908593416403285,
"learning_rate": 3.7161662950068846e-06,
"loss": 0.009412384033203125,
"step": 1255
},
{
"epoch": 2.124841838886546,
"grad_norm": 0.0284060145446946,
"learning_rate": 3.7007224416886276e-06,
"loss": 0.00821533203125,
"step": 1260
},
{
"epoch": 2.1332770982707716,
"grad_norm": 0.030842726867602113,
"learning_rate": 3.685258921494824e-06,
"loss": 0.009014129638671875,
"step": 1265
},
{
"epoch": 2.141712357654998,
"grad_norm": 0.036273158990138075,
"learning_rate": 3.6697761684334466e-06,
"loss": 0.010558700561523438,
"step": 1270
},
{
"epoch": 2.1501476170392237,
"grad_norm": 0.03693819496482909,
"learning_rate": 3.6542746170522717e-06,
"loss": 0.010668182373046875,
"step": 1275
},
{
"epoch": 2.15858287642345,
"grad_norm": 0.03797267942950567,
"learning_rate": 3.638754702426678e-06,
"loss": 0.008889389038085938,
"step": 1280
},
{
"epoch": 2.167018135807676,
"grad_norm": 0.03341108305444907,
"learning_rate": 3.6232168601474363e-06,
"loss": 0.006923675537109375,
"step": 1285
},
{
"epoch": 2.175453395191902,
"grad_norm": 0.03454779917085028,
"learning_rate": 3.607661526308488e-06,
"loss": 0.00969085693359375,
"step": 1290
},
{
"epoch": 2.183888654576128,
"grad_norm": 0.035727285557249105,
"learning_rate": 3.5920891374947005e-06,
"loss": 0.00997161865234375,
"step": 1295
},
{
"epoch": 2.192323913960354,
"grad_norm": 0.031320211315080816,
"learning_rate": 3.5765001307696152e-06,
"loss": 0.007769393920898438,
"step": 1300
},
{
"epoch": 2.192323913960354,
"eval_loss": 0.0026897923089563847,
"eval_margin": -0.0077478337221808975,
"eval_mean_neg": 0.49285975098609924,
"eval_mean_pos": 0.7862820625305176,
"eval_runtime": 362.3777,
"eval_samples_per_second": 22.046,
"eval_steps_per_second": 0.345,
"step": 1300
},
{
"epoch": 2.20075917334458,
"grad_norm": 0.023952498523963275,
"learning_rate": 3.560894943663185e-06,
"loss": 0.009902191162109376,
"step": 1305
},
{
"epoch": 2.2091944327288062,
"grad_norm": 0.029616458459003896,
"learning_rate": 3.545274014159486e-06,
"loss": 0.008718109130859375,
"step": 1310
},
{
"epoch": 2.2176296921130323,
"grad_norm": 0.026768679077660198,
"learning_rate": 3.5296377806844334e-06,
"loss": 0.006624603271484375,
"step": 1315
},
{
"epoch": 2.2260649514972584,
"grad_norm": 0.03723135315427558,
"learning_rate": 3.5139866820934687e-06,
"loss": 0.010486793518066407,
"step": 1320
},
{
"epoch": 2.2345002108814844,
"grad_norm": 0.030973900207479872,
"learning_rate": 3.498321157659248e-06,
"loss": 0.00841064453125,
"step": 1325
},
{
"epoch": 2.2429354702657105,
"grad_norm": 0.042522927349784224,
"learning_rate": 3.482641647059313e-06,
"loss": 0.010484886169433594,
"step": 1330
},
{
"epoch": 2.2513707296499366,
"grad_norm": 0.036298357689256384,
"learning_rate": 3.4669485903637452e-06,
"loss": 0.010845947265625,
"step": 1335
},
{
"epoch": 2.2598059890341626,
"grad_norm": 0.04210885166855473,
"learning_rate": 3.4512424280228227e-06,
"loss": 0.009656906127929688,
"step": 1340
},
{
"epoch": 2.2682412484183887,
"grad_norm": 0.037852259539673916,
"learning_rate": 3.435523600854652e-06,
"loss": 0.009561920166015625,
"step": 1345
},
{
"epoch": 2.2766765078026148,
"grad_norm": 0.03972030283651443,
"learning_rate": 3.4197925500327973e-06,
"loss": 0.00974578857421875,
"step": 1350
},
{
"epoch": 2.285111767186841,
"grad_norm": 0.03864567979018308,
"learning_rate": 3.4040497170739e-06,
"loss": 0.009082794189453125,
"step": 1355
},
{
"epoch": 2.293547026571067,
"grad_norm": 0.03547766099076331,
"learning_rate": 3.3882955438252852e-06,
"loss": 0.008104705810546875,
"step": 1360
},
{
"epoch": 2.301982285955293,
"grad_norm": 0.042069666240123815,
"learning_rate": 3.372530472452561e-06,
"loss": 0.010825538635253906,
"step": 1365
},
{
"epoch": 2.310417545339519,
"grad_norm": 0.030187240942476403,
"learning_rate": 3.356754945427209e-06,
"loss": 0.010921478271484375,
"step": 1370
},
{
"epoch": 2.318852804723745,
"grad_norm": 0.03775236120881388,
"learning_rate": 3.3409694055141636e-06,
"loss": 0.00971527099609375,
"step": 1375
},
{
"epoch": 2.327288064107971,
"grad_norm": 0.04517333042895106,
"learning_rate": 3.3251742957593896e-06,
"loss": 0.010394287109375,
"step": 1380
},
{
"epoch": 2.3357233234921972,
"grad_norm": 0.03441694727754078,
"learning_rate": 3.3093700594774415e-06,
"loss": 0.008525848388671875,
"step": 1385
},
{
"epoch": 2.3441585828764233,
"grad_norm": 0.039855958024762626,
"learning_rate": 3.2935571402390243e-06,
"loss": 0.01035003662109375,
"step": 1390
},
{
"epoch": 2.3525938422606494,
"grad_norm": 0.036912654679360425,
"learning_rate": 3.2777359818585453e-06,
"loss": 0.01036224365234375,
"step": 1395
},
{
"epoch": 2.3610291016448754,
"grad_norm": 0.02819486898709386,
"learning_rate": 3.2619070283816567e-06,
"loss": 0.008788299560546876,
"step": 1400
},
{
"epoch": 2.3610291016448754,
"eval_loss": 0.002835027640685439,
"eval_margin": -0.006975951657119778,
"eval_mean_neg": 0.4969240725040436,
"eval_mean_pos": 0.7925288081169128,
"eval_runtime": 364.0594,
"eval_samples_per_second": 21.944,
"eval_steps_per_second": 0.343,
"step": 1400
},
{
"epoch": 2.3694643610291015,
"grad_norm": 0.0347736325148637,
"learning_rate": 3.24607072407279e-06,
"loss": 0.00931854248046875,
"step": 1405
},
{
"epoch": 2.3778996204133276,
"grad_norm": 0.02843547221351205,
"learning_rate": 3.2302275134026902e-06,
"loss": 0.008514404296875,
"step": 1410
},
{
"epoch": 2.3863348797975537,
"grad_norm": 0.035527939183407756,
"learning_rate": 3.2143778410359414e-06,
"loss": 0.009189605712890625,
"step": 1415
},
{
"epoch": 2.3947701391817797,
"grad_norm": 0.02697400462877436,
"learning_rate": 3.1985221518184845e-06,
"loss": 0.008056259155273438,
"step": 1420
},
{
"epoch": 2.403205398566006,
"grad_norm": 0.02974726363919492,
"learning_rate": 3.1826608907651327e-06,
"loss": 0.008675384521484374,
"step": 1425
},
{
"epoch": 2.411640657950232,
"grad_norm": 0.04279127831329293,
"learning_rate": 3.1667945030470815e-06,
"loss": 0.009341812133789063,
"step": 1430
},
{
"epoch": 2.420075917334458,
"grad_norm": 0.039837807919925805,
"learning_rate": 3.1509234339794144e-06,
"loss": 0.010208892822265624,
"step": 1435
},
{
"epoch": 2.428511176718684,
"grad_norm": 0.03024657864136027,
"learning_rate": 3.1350481290086038e-06,
"loss": 0.008173370361328125,
"step": 1440
},
{
"epoch": 2.43694643610291,
"grad_norm": 0.03564318900525913,
"learning_rate": 3.119169033700011e-06,
"loss": 0.00924224853515625,
"step": 1445
},
{
"epoch": 2.445381695487136,
"grad_norm": 0.028913985964356455,
"learning_rate": 3.103286593725377e-06,
"loss": 0.008563995361328125,
"step": 1450
},
{
"epoch": 2.453816954871362,
"grad_norm": 0.035875161756803144,
"learning_rate": 3.0874012548503173e-06,
"loss": 0.009112548828125,
"step": 1455
},
{
"epoch": 2.4622522142555883,
"grad_norm": 0.03817913502015442,
"learning_rate": 3.0715134629218095e-06,
"loss": 0.007489013671875,
"step": 1460
},
{
"epoch": 2.4706874736398143,
"grad_norm": 0.03470677728941542,
"learning_rate": 3.0556236638556803e-06,
"loss": 0.012370681762695313,
"step": 1465
},
{
"epoch": 2.4791227330240404,
"grad_norm": 0.042966141209856486,
"learning_rate": 3.0397323036240886e-06,
"loss": 0.0088165283203125,
"step": 1470
},
{
"epoch": 2.4875579924082665,
"grad_norm": 0.03434953324492014,
"learning_rate": 3.023839828243012e-06,
"loss": 0.008261871337890626,
"step": 1475
},
{
"epoch": 2.4959932517924925,
"grad_norm": 0.03061507966476803,
"learning_rate": 3.007946683759723e-06,
"loss": 0.008873748779296874,
"step": 1480
},
{
"epoch": 2.5044285111767186,
"grad_norm": 0.027732115444419583,
"learning_rate": 2.9920533162402776e-06,
"loss": 0.008371734619140625,
"step": 1485
},
{
"epoch": 2.5128637705609447,
"grad_norm": 0.029951392389848317,
"learning_rate": 2.9761601717569896e-06,
"loss": 0.00865478515625,
"step": 1490
},
{
"epoch": 2.5212990299451707,
"grad_norm": 0.033652436341082566,
"learning_rate": 2.960267696375911e-06,
"loss": 0.009691619873046875,
"step": 1495
},
{
"epoch": 2.529734289329397,
"grad_norm": 0.024511774862390433,
"learning_rate": 2.9443763361443203e-06,
"loss": 0.010028076171875,
"step": 1500
},
{
"epoch": 2.529734289329397,
"eval_loss": 0.002559108193963766,
"eval_margin": -0.006972289358776423,
"eval_mean_neg": 0.4843982458114624,
"eval_mean_pos": 0.7881345152854919,
"eval_runtime": 365.5243,
"eval_samples_per_second": 21.856,
"eval_steps_per_second": 0.342,
"step": 1500
},
{
"epoch": 2.538169548713623,
"grad_norm": 0.034627287332041165,
"learning_rate": 2.9284865370781906e-06,
"loss": 0.00982513427734375,
"step": 1505
},
{
"epoch": 2.546604808097849,
"grad_norm": 0.03482839500691478,
"learning_rate": 2.9125987451496837e-06,
"loss": 0.00842742919921875,
"step": 1510
},
{
"epoch": 2.555040067482075,
"grad_norm": 0.034040276652772095,
"learning_rate": 2.8967134062746236e-06,
"loss": 0.008990859985351563,
"step": 1515
},
{
"epoch": 2.563475326866301,
"grad_norm": 0.03868034786852329,
"learning_rate": 2.8808309662999897e-06,
"loss": 0.007648468017578125,
"step": 1520
},
{
"epoch": 2.571910586250527,
"grad_norm": 0.0419385930164125,
"learning_rate": 2.864951870991397e-06,
"loss": 0.009268951416015626,
"step": 1525
},
{
"epoch": 2.580345845634753,
"grad_norm": 0.03690945718603307,
"learning_rate": 2.8490765660205857e-06,
"loss": 0.00864715576171875,
"step": 1530
},
{
"epoch": 2.5887811050189793,
"grad_norm": 0.02493335874585237,
"learning_rate": 2.833205496952919e-06,
"loss": 0.00865478515625,
"step": 1535
},
{
"epoch": 2.5972163644032054,
"grad_norm": 0.0355467734297459,
"learning_rate": 2.817339109234868e-06,
"loss": 0.009038543701171875,
"step": 1540
},
{
"epoch": 2.6056516237874314,
"grad_norm": 0.035706551369837684,
"learning_rate": 2.801477848181517e-06,
"loss": 0.008769607543945313,
"step": 1545
},
{
"epoch": 2.6140868831716575,
"grad_norm": 0.030590948482880534,
"learning_rate": 2.7856221589640584e-06,
"loss": 0.010419464111328125,
"step": 1550
},
{
"epoch": 2.6225221425558836,
"grad_norm": 0.031187166990055255,
"learning_rate": 2.7697724865973103e-06,
"loss": 0.008966064453125,
"step": 1555
},
{
"epoch": 2.6309574019401096,
"grad_norm": 0.03195446103788609,
"learning_rate": 2.753929275927211e-06,
"loss": 0.00810089111328125,
"step": 1560
},
{
"epoch": 2.6393926613243357,
"grad_norm": 0.03124766219549955,
"learning_rate": 2.7380929716183448e-06,
"loss": 0.00867919921875,
"step": 1565
},
{
"epoch": 2.6478279207085618,
"grad_norm": 0.04158743972175772,
"learning_rate": 2.722264018141455e-06,
"loss": 0.008811187744140626,
"step": 1570
},
{
"epoch": 2.656263180092788,
"grad_norm": 0.042358151513616535,
"learning_rate": 2.706442859760976e-06,
"loss": 0.008480644226074219,
"step": 1575
},
{
"epoch": 2.664698439477014,
"grad_norm": 0.02876853915749735,
"learning_rate": 2.6906299405225595e-06,
"loss": 0.009603309631347656,
"step": 1580
},
{
"epoch": 2.67313369886124,
"grad_norm": 0.031452133973887623,
"learning_rate": 2.6748257042406114e-06,
"loss": 0.008524322509765625,
"step": 1585
},
{
"epoch": 2.681568958245466,
"grad_norm": 0.03502577600676223,
"learning_rate": 2.659030594485836e-06,
"loss": 0.007845306396484375,
"step": 1590
},
{
"epoch": 2.690004217629692,
"grad_norm": 0.029358280910616305,
"learning_rate": 2.6432450545727913e-06,
"loss": 0.008304595947265625,
"step": 1595
},
{
"epoch": 2.698439477013918,
"grad_norm": 0.037226468621806945,
"learning_rate": 2.62746952754744e-06,
"loss": 0.0089141845703125,
"step": 1600
},
{
"epoch": 2.698439477013918,
"eval_loss": 0.002468662802129984,
"eval_margin": -0.006652700444383006,
"eval_mean_neg": 0.5055871605873108,
"eval_mean_pos": 0.8004181981086731,
"eval_runtime": 363.3432,
"eval_samples_per_second": 21.987,
"eval_steps_per_second": 0.344,
"step": 1600
},
{
"epoch": 2.7068747363981442,
"grad_norm": 0.029979441347867175,
"learning_rate": 2.6117044561747145e-06,
"loss": 0.007899856567382813,
"step": 1605
},
{
"epoch": 2.7153099957823703,
"grad_norm": 0.04117264280378634,
"learning_rate": 2.5959502829261e-06,
"loss": 0.009801483154296875,
"step": 1610
},
{
"epoch": 2.7237452551665964,
"grad_norm": 0.02874139529420723,
"learning_rate": 2.5802074499672033e-06,
"loss": 0.007126617431640625,
"step": 1615
},
{
"epoch": 2.7321805145508224,
"grad_norm": 0.032009387593884574,
"learning_rate": 2.564476399145349e-06,
"loss": 0.007319259643554688,
"step": 1620
},
{
"epoch": 2.7406157739350485,
"grad_norm": 0.0343660828009257,
"learning_rate": 2.5487575719771774e-06,
"loss": 0.010648345947265625,
"step": 1625
},
{
"epoch": 2.7490510333192746,
"grad_norm": 0.033859872264591424,
"learning_rate": 2.533051409636255e-06,
"loss": 0.007244110107421875,
"step": 1630
},
{
"epoch": 2.7574862927035007,
"grad_norm": 0.032973506044290384,
"learning_rate": 2.517358352940688e-06,
"loss": 0.008284759521484376,
"step": 1635
},
{
"epoch": 2.7659215520877267,
"grad_norm": 0.03481146191160576,
"learning_rate": 2.501678842340753e-06,
"loss": 0.00882110595703125,
"step": 1640
},
{
"epoch": 2.774356811471953,
"grad_norm": 0.03862588539253724,
"learning_rate": 2.4860133179065323e-06,
"loss": 0.00964202880859375,
"step": 1645
},
{
"epoch": 2.782792070856179,
"grad_norm": 0.02979780702601001,
"learning_rate": 2.4703622193155676e-06,
"loss": 0.009095001220703124,
"step": 1650
},
{
"epoch": 2.791227330240405,
"grad_norm": 0.02658002258647219,
"learning_rate": 2.4547259858405147e-06,
"loss": 0.008580398559570313,
"step": 1655
},
{
"epoch": 2.799662589624631,
"grad_norm": 0.03237100489547251,
"learning_rate": 2.439105056336816e-06,
"loss": 0.006137275695800781,
"step": 1660
},
{
"epoch": 2.808097849008857,
"grad_norm": 0.035925961611001624,
"learning_rate": 2.423499869230385e-06,
"loss": 0.006979179382324219,
"step": 1665
},
{
"epoch": 2.816533108393083,
"grad_norm": 0.028925897672990208,
"learning_rate": 2.4079108625053e-06,
"loss": 0.007439422607421875,
"step": 1670
},
{
"epoch": 2.824968367777309,
"grad_norm": 0.02643424196739614,
"learning_rate": 2.392338473691513e-06,
"loss": 0.007563400268554688,
"step": 1675
},
{
"epoch": 2.8334036271615353,
"grad_norm": 0.029469931037551172,
"learning_rate": 2.376783139852564e-06,
"loss": 0.00782928466796875,
"step": 1680
},
{
"epoch": 2.8418388865457613,
"grad_norm": 0.03519097117769341,
"learning_rate": 2.3612452975733225e-06,
"loss": 0.0081695556640625,
"step": 1685
},
{
"epoch": 2.8502741459299874,
"grad_norm": 0.041842720836538394,
"learning_rate": 2.3457253829477284e-06,
"loss": 0.00938720703125,
"step": 1690
},
{
"epoch": 2.8587094053142135,
"grad_norm": 0.02803118980318521,
"learning_rate": 2.3302238315665544e-06,
"loss": 0.007602310180664063,
"step": 1695
},
{
"epoch": 2.8671446646984395,
"grad_norm": 0.06643247372472408,
"learning_rate": 2.314741078505177e-06,
"loss": 0.009275436401367188,
"step": 1700
},
{
"epoch": 2.8671446646984395,
"eval_loss": 0.002381447935476899,
"eval_margin": -0.0063614378337778395,
"eval_mean_neg": 0.4982295334339142,
"eval_mean_pos": 0.7957465648651123,
"eval_runtime": 364.3957,
"eval_samples_per_second": 21.924,
"eval_steps_per_second": 0.343,
"step": 1700
},
{
"epoch": 2.8755799240826656,
"grad_norm": 0.04573493937998368,
"learning_rate": 2.299277558311373e-06,
"loss": 0.008275604248046875,
"step": 1705
},
{
"epoch": 2.8840151834668917,
"grad_norm": 0.030855319414577996,
"learning_rate": 2.283833704993116e-06,
"loss": 0.008497047424316406,
"step": 1710
},
{
"epoch": 2.8924504428511177,
"grad_norm": 0.03287831217925721,
"learning_rate": 2.268409952006397e-06,
"loss": 0.006939697265625,
"step": 1715
},
{
"epoch": 2.900885702235344,
"grad_norm": 0.03738971418410914,
"learning_rate": 2.253006732243061e-06,
"loss": 0.00982208251953125,
"step": 1720
},
{
"epoch": 2.90932096161957,
"grad_norm": 0.02295281003302144,
"learning_rate": 2.237624478018656e-06,
"loss": 0.00743560791015625,
"step": 1725
},
{
"epoch": 2.917756221003796,
"grad_norm": 0.03960242549923526,
"learning_rate": 2.2222636210603002e-06,
"loss": 0.008847427368164063,
"step": 1730
},
{
"epoch": 2.926191480388022,
"grad_norm": 0.04741800625952587,
"learning_rate": 2.2069245924945604e-06,
"loss": 0.009384918212890624,
"step": 1735
},
{
"epoch": 2.934626739772248,
"grad_norm": 0.03409532340357435,
"learning_rate": 2.191607822835357e-06,
"loss": 0.0076019287109375,
"step": 1740
},
{
"epoch": 2.943061999156474,
"grad_norm": 0.0239211291682541,
"learning_rate": 2.1763137419718826e-06,
"loss": 0.007954025268554687,
"step": 1745
},
{
"epoch": 2.9514972585407,
"grad_norm": 0.0255275562880085,
"learning_rate": 2.161042779156529e-06,
"loss": 0.007129669189453125,
"step": 1750
},
{
"epoch": 2.9599325179249263,
"grad_norm": 0.026100931475016395,
"learning_rate": 2.1457953629928426e-06,
"loss": 0.007111358642578125,
"step": 1755
},
{
"epoch": 2.9683677773091524,
"grad_norm": 0.03040565516608014,
"learning_rate": 2.1305719214235017e-06,
"loss": 0.00856170654296875,
"step": 1760
},
{
"epoch": 2.9768030366933784,
"grad_norm": 0.031725391054917944,
"learning_rate": 2.115372881718295e-06,
"loss": 0.00930938720703125,
"step": 1765
},
{
"epoch": 2.9852382960776045,
"grad_norm": 0.025864373534585865,
"learning_rate": 2.100198670462137e-06,
"loss": 0.007320022583007813,
"step": 1770
},
{
"epoch": 2.9936735554618306,
"grad_norm": 0.013680490985647303,
"learning_rate": 2.0850497135430897e-06,
"loss": 0.007777786254882813,
"step": 1775
},
{
"epoch": 3.001687051876845,
"grad_norm": 0.03143671946142631,
"learning_rate": 2.0699264361404174e-06,
"loss": 0.008609771728515625,
"step": 1780
},
{
"epoch": 3.010122311261071,
"grad_norm": 0.021237532660395856,
"learning_rate": 2.054829262712645e-06,
"loss": 0.007422637939453125,
"step": 1785
},
{
"epoch": 3.018557570645297,
"grad_norm": 0.02970629169587053,
"learning_rate": 2.0397586169856488e-06,
"loss": 0.008047866821289062,
"step": 1790
},
{
"epoch": 3.0269928300295232,
"grad_norm": 0.0280079357370666,
"learning_rate": 2.024714921940763e-06,
"loss": 0.008725738525390625,
"step": 1795
},
{
"epoch": 3.0354280894137493,
"grad_norm": 0.05178206206651836,
"learning_rate": 2.0096985998029124e-06,
"loss": 0.007384490966796875,
"step": 1800
},
{
"epoch": 3.0354280894137493,
"eval_loss": 0.0022954940795898438,
"eval_margin": -0.005535545939159009,
"eval_mean_neg": 0.49804064631462097,
"eval_mean_pos": 0.7978142499923706,
"eval_runtime": 362.5441,
"eval_samples_per_second": 22.036,
"eval_steps_per_second": 0.345,
"step": 1800
}
],
"logging_steps": 5,
"max_steps": 2965,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2008625567629312.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}