| { | |
| "best_global_step": 1800, | |
| "best_metric": 0.00229549, | |
| "best_model_checkpoint": "/mnt/beegfs3/liying/zhangfanhao/output1125/v1-20251125-231025/checkpoint-1800", | |
| "epoch": 3.0354280894137493, | |
| "eval_steps": 100, | |
| "global_step": 1800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001687051876845213, | |
| "grad_norm": 0.5938383277366354, | |
| "learning_rate": 5.999998316002012e-06, | |
| "loss": 0.380859375, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008435259384226065, | |
| "grad_norm": 0.432332139447319, | |
| "learning_rate": 5.999957900144816e-06, | |
| "loss": 0.3326416015625, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01687051876845213, | |
| "grad_norm": 0.24245712798777588, | |
| "learning_rate": 5.99983160176086e-06, | |
| "loss": 0.2187744140625, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.025305778152678194, | |
| "grad_norm": 0.14967602144842607, | |
| "learning_rate": 5.999621108392896e-06, | |
| "loss": 0.1771240234375, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03374103753690426, | |
| "grad_norm": 0.11965916268612647, | |
| "learning_rate": 5.9993264259487505e-06, | |
| "loss": 0.14423828125, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04217629692113033, | |
| "grad_norm": 0.09039362542123534, | |
| "learning_rate": 5.998947562699149e-06, | |
| "loss": 0.1184326171875, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05061155630535639, | |
| "grad_norm": 0.0761794885482189, | |
| "learning_rate": 5.998484529277483e-06, | |
| "loss": 0.108642578125, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.059046815689582456, | |
| "grad_norm": 0.07436752367684027, | |
| "learning_rate": 5.997937338679513e-06, | |
| "loss": 0.09638671875, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.06748207507380852, | |
| "grad_norm": 0.06054003854062884, | |
| "learning_rate": 5.997306006263003e-06, | |
| "loss": 0.1025146484375, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07591733445803459, | |
| "grad_norm": 0.05715450839425674, | |
| "learning_rate": 5.996590549747288e-06, | |
| "loss": 0.0909912109375, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.08435259384226065, | |
| "grad_norm": 0.0562159873926997, | |
| "learning_rate": 5.995790989212777e-06, | |
| "loss": 0.0900390625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09278785322648671, | |
| "grad_norm": 0.054733644360014155, | |
| "learning_rate": 5.994907347100393e-06, | |
| "loss": 0.08599853515625, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10122311261071278, | |
| "grad_norm": 0.04945430208391664, | |
| "learning_rate": 5.99393964821094e-06, | |
| "loss": 0.08861083984375, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.10965837199493884, | |
| "grad_norm": 0.060375343186170424, | |
| "learning_rate": 5.992887919704406e-06, | |
| "loss": 0.08037109375, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.11809363137916491, | |
| "grad_norm": 0.05113371142226039, | |
| "learning_rate": 5.991752191099203e-06, | |
| "loss": 0.07867431640625, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.12652889076339097, | |
| "grad_norm": 0.060073186423122656, | |
| "learning_rate": 5.990532494271337e-06, | |
| "loss": 0.07816162109375, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.13496415014761703, | |
| "grad_norm": 0.058832653609599356, | |
| "learning_rate": 5.989228863453515e-06, | |
| "loss": 0.08001708984375, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1433994095318431, | |
| "grad_norm": 0.06587176624760811, | |
| "learning_rate": 5.987841335234184e-06, | |
| "loss": 0.074359130859375, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.15183466891606917, | |
| "grad_norm": 0.06491166432460505, | |
| "learning_rate": 5.9863699485565e-06, | |
| "loss": 0.0674072265625, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.16026992830029524, | |
| "grad_norm": 0.056198676389375694, | |
| "learning_rate": 5.984814744717241e-06, | |
| "loss": 0.0659912109375, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1687051876845213, | |
| "grad_norm": 0.0673764252680421, | |
| "learning_rate": 5.983175767365646e-06, | |
| "loss": 0.063623046875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1687051876845213, | |
| "eval_loss": 0.0162808820605278, | |
| "eval_margin": -0.020074697267714766, | |
| "eval_mean_neg": 0.6548054814338684, | |
| "eval_mean_pos": 0.8441178202629089, | |
| "eval_runtime": 367.938, | |
| "eval_samples_per_second": 21.713, | |
| "eval_steps_per_second": 0.34, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17714044706874738, | |
| "grad_norm": 0.06055978762872105, | |
| "learning_rate": 5.981453062502185e-06, | |
| "loss": 0.060498046875, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.18557570645297342, | |
| "grad_norm": 0.06213709235940642, | |
| "learning_rate": 5.979646678477277e-06, | |
| "loss": 0.056640625, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.19401096583719948, | |
| "grad_norm": 0.0659729457413995, | |
| "learning_rate": 5.977756665989925e-06, | |
| "loss": 0.05919189453125, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.20244622522142555, | |
| "grad_norm": 0.05897713608413389, | |
| "learning_rate": 5.9757830780862985e-06, | |
| "loss": 0.0628662109375, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.21088148460565162, | |
| "grad_norm": 0.05642517065149083, | |
| "learning_rate": 5.973725970158239e-06, | |
| "loss": 0.05245361328125, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2193167439898777, | |
| "grad_norm": 0.06579611075607034, | |
| "learning_rate": 5.9715853999417115e-06, | |
| "loss": 0.05848388671875, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.22775200337410376, | |
| "grad_norm": 0.07448489445734133, | |
| "learning_rate": 5.969361427515179e-06, | |
| "loss": 0.0573974609375, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.23618726275832982, | |
| "grad_norm": 0.05975086799089143, | |
| "learning_rate": 5.9670541152979215e-06, | |
| "loss": 0.05091552734375, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2446225221425559, | |
| "grad_norm": 0.06504159374670346, | |
| "learning_rate": 5.964663528048276e-06, | |
| "loss": 0.047943115234375, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.25305778152678193, | |
| "grad_norm": 0.060584307441235295, | |
| "learning_rate": 5.96218973286183e-06, | |
| "loss": 0.0493896484375, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.261493040911008, | |
| "grad_norm": 0.06234398910972033, | |
| "learning_rate": 5.959632799169529e-06, | |
| "loss": 0.04854736328125, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.26992830029523407, | |
| "grad_norm": 0.07326440644425879, | |
| "learning_rate": 5.9569927987357305e-06, | |
| "loss": 0.0443359375, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.27836355967946014, | |
| "grad_norm": 0.059873291821439245, | |
| "learning_rate": 5.954269805656194e-06, | |
| "loss": 0.04698486328125, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2867988190636862, | |
| "grad_norm": 0.07214278446872342, | |
| "learning_rate": 5.951463896355993e-06, | |
| "loss": 0.0474639892578125, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2952340784479123, | |
| "grad_norm": 0.06530184393433881, | |
| "learning_rate": 5.94857514958738e-06, | |
| "loss": 0.043914794921875, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.30366933783213834, | |
| "grad_norm": 0.06214586771199744, | |
| "learning_rate": 5.945603646427567e-06, | |
| "loss": 0.043475341796875, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3121045972163644, | |
| "grad_norm": 0.06764874450241058, | |
| "learning_rate": 5.9425494702764575e-06, | |
| "loss": 0.04755859375, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.3205398566005905, | |
| "grad_norm": 0.06523200399348678, | |
| "learning_rate": 5.939412706854299e-06, | |
| "loss": 0.044635009765625, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.32897511598481655, | |
| "grad_norm": 0.060102318432770876, | |
| "learning_rate": 5.9361934441992835e-06, | |
| "loss": 0.042364501953125, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3374103753690426, | |
| "grad_norm": 0.06678207500644712, | |
| "learning_rate": 5.9328917726650706e-06, | |
| "loss": 0.04183349609375, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3374103753690426, | |
| "eval_loss": 0.010425936430692673, | |
| "eval_margin": -0.016463442112229044, | |
| "eval_mean_neg": 0.5932909250259399, | |
| "eval_mean_pos": 0.8196097016334534, | |
| "eval_runtime": 365.6666, | |
| "eval_samples_per_second": 21.848, | |
| "eval_steps_per_second": 0.342, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3458456347532687, | |
| "grad_norm": 0.05626492604909855, | |
| "learning_rate": 5.929507784918257e-06, | |
| "loss": 0.040447998046875, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.35428089413749475, | |
| "grad_norm": 0.054176681030320105, | |
| "learning_rate": 5.926041575935772e-06, | |
| "loss": 0.037396240234375, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3627161535217208, | |
| "grad_norm": 0.06953999336709471, | |
| "learning_rate": 5.922493243002212e-06, | |
| "loss": 0.042828369140625, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.37115141290594683, | |
| "grad_norm": 0.05589129508252642, | |
| "learning_rate": 5.918862885707113e-06, | |
| "loss": 0.034979248046875, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3795866722901729, | |
| "grad_norm": 0.07078214617147234, | |
| "learning_rate": 5.915150605942153e-06, | |
| "loss": 0.035723876953125, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.38802193167439897, | |
| "grad_norm": 0.06815732953530805, | |
| "learning_rate": 5.911356507898291e-06, | |
| "loss": 0.041973876953125, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.39645719105862504, | |
| "grad_norm": 0.062472935047014386, | |
| "learning_rate": 5.907480698062848e-06, | |
| "loss": 0.0356689453125, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.4048924504428511, | |
| "grad_norm": 0.06665756879409568, | |
| "learning_rate": 5.90352328521651e-06, | |
| "loss": 0.036456298828125, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.41332770982707717, | |
| "grad_norm": 0.06681598226193439, | |
| "learning_rate": 5.899484380430284e-06, | |
| "loss": 0.0343994140625, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.42176296921130324, | |
| "grad_norm": 0.060740413400477374, | |
| "learning_rate": 5.895364097062374e-06, | |
| "loss": 0.0318511962890625, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4301982285955293, | |
| "grad_norm": 0.06545743307605277, | |
| "learning_rate": 5.8911625507550015e-06, | |
| "loss": 0.034765625, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.4386334879797554, | |
| "grad_norm": 0.0673664786591912, | |
| "learning_rate": 5.88687985943116e-06, | |
| "loss": 0.03580322265625, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.44706874736398144, | |
| "grad_norm": 0.06937993663032453, | |
| "learning_rate": 5.882516143291308e-06, | |
| "loss": 0.036236572265625, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.4555040067482075, | |
| "grad_norm": 0.0639250177544625, | |
| "learning_rate": 5.878071524809988e-06, | |
| "loss": 0.0317962646484375, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4639392661324336, | |
| "grad_norm": 0.06037822600018219, | |
| "learning_rate": 5.873546128732399e-06, | |
| "loss": 0.0323699951171875, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.47237452551665965, | |
| "grad_norm": 0.060357976056049485, | |
| "learning_rate": 5.868940082070885e-06, | |
| "loss": 0.033660888671875, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4808097849008857, | |
| "grad_norm": 0.061037172126093234, | |
| "learning_rate": 5.8642535141013785e-06, | |
| "loss": 0.0297515869140625, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.4892450442851118, | |
| "grad_norm": 0.0524126813526148, | |
| "learning_rate": 5.859486556359768e-06, | |
| "loss": 0.028472900390625, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.49768030366933785, | |
| "grad_norm": 0.062026009465912704, | |
| "learning_rate": 5.854639342638208e-06, | |
| "loss": 0.030718994140625, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5061155630535639, | |
| "grad_norm": 0.05866098788599579, | |
| "learning_rate": 5.849712008981361e-06, | |
| "loss": 0.032916259765625, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5061155630535639, | |
| "eval_loss": 0.007868120446801186, | |
| "eval_margin": -0.013977996595654517, | |
| "eval_mean_neg": 0.5548827648162842, | |
| "eval_mean_pos": 0.793705403804779, | |
| "eval_runtime": 364.6437, | |
| "eval_samples_per_second": 21.909, | |
| "eval_steps_per_second": 0.343, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5145508224377899, | |
| "grad_norm": 0.05968132039231295, | |
| "learning_rate": 5.844704693682583e-06, | |
| "loss": 0.0292724609375, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.522986081822016, | |
| "grad_norm": 0.06038138238675174, | |
| "learning_rate": 5.8396175372800405e-06, | |
| "loss": 0.030743408203125, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5314213412062421, | |
| "grad_norm": 0.06052295196543659, | |
| "learning_rate": 5.834450682552765e-06, | |
| "loss": 0.030194091796875, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5398566005904681, | |
| "grad_norm": 0.05539528727202974, | |
| "learning_rate": 5.829204274516648e-06, | |
| "loss": 0.0312774658203125, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5482918599746942, | |
| "grad_norm": 0.052508369724972796, | |
| "learning_rate": 5.823878460420366e-06, | |
| "loss": 0.0295318603515625, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5567271193589203, | |
| "grad_norm": 0.05151880865825463, | |
| "learning_rate": 5.8184733897412565e-06, | |
| "loss": 0.028912353515625, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5651623787431463, | |
| "grad_norm": 0.0624220665428448, | |
| "learning_rate": 5.812989214181113e-06, | |
| "loss": 0.027313232421875, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.5735976381273724, | |
| "grad_norm": 0.06481057308539884, | |
| "learning_rate": 5.807426087661934e-06, | |
| "loss": 0.02608642578125, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5820328975115985, | |
| "grad_norm": 0.06109467057046473, | |
| "learning_rate": 5.8017841663216e-06, | |
| "loss": 0.0282989501953125, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.5904681568958245, | |
| "grad_norm": 0.062107444796084835, | |
| "learning_rate": 5.796063608509493e-06, | |
| "loss": 0.0277069091796875, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5989034162800506, | |
| "grad_norm": 0.0552072139581444, | |
| "learning_rate": 5.7902645747820485e-06, | |
| "loss": 0.028399658203125, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6073386756642767, | |
| "grad_norm": 0.06047980839414296, | |
| "learning_rate": 5.784387227898254e-06, | |
| "loss": 0.0281524658203125, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6157739350485028, | |
| "grad_norm": 0.05336288606895412, | |
| "learning_rate": 5.778431732815078e-06, | |
| "loss": 0.02484130859375, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6242091944327288, | |
| "grad_norm": 0.060745200996401724, | |
| "learning_rate": 5.77239825668284e-06, | |
| "loss": 0.02640380859375, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6326444538169549, | |
| "grad_norm": 0.048268694566304324, | |
| "learning_rate": 5.766286968840522e-06, | |
| "loss": 0.0278717041015625, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.641079713201181, | |
| "grad_norm": 0.05424806603710711, | |
| "learning_rate": 5.760098040811012e-06, | |
| "loss": 0.0271453857421875, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.649514972585407, | |
| "grad_norm": 0.054535443289609395, | |
| "learning_rate": 5.7538316462962935e-06, | |
| "loss": 0.026611328125, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.6579502319696331, | |
| "grad_norm": 0.06967389025087475, | |
| "learning_rate": 5.7474879611725655e-06, | |
| "loss": 0.02589111328125, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6663854913538592, | |
| "grad_norm": 0.06024092137696802, | |
| "learning_rate": 5.741067163485314e-06, | |
| "loss": 0.0193756103515625, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.6748207507380852, | |
| "grad_norm": 0.05981804001044263, | |
| "learning_rate": 5.7345694334443066e-06, | |
| "loss": 0.0205718994140625, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6748207507380852, | |
| "eval_loss": 0.006414474919438362, | |
| "eval_margin": -0.013447051244457402, | |
| "eval_mean_neg": 0.5324161052703857, | |
| "eval_mean_pos": 0.7909372448921204, | |
| "eval_runtime": 365.0145, | |
| "eval_samples_per_second": 21.887, | |
| "eval_steps_per_second": 0.342, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6832560101223113, | |
| "grad_norm": 0.06692561927901217, | |
| "learning_rate": 5.727994953418538e-06, | |
| "loss": 0.022021484375, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.6916912695065374, | |
| "grad_norm": 0.06609269963808409, | |
| "learning_rate": 5.721343907931114e-06, | |
| "loss": 0.02950592041015625, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7001265288907634, | |
| "grad_norm": 0.052617111357424175, | |
| "learning_rate": 5.71461648365407e-06, | |
| "loss": 0.025189208984375, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7085617882749895, | |
| "grad_norm": 0.04860971480260525, | |
| "learning_rate": 5.707812869403128e-06, | |
| "loss": 0.022052001953125, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7169970476592156, | |
| "grad_norm": 0.06030454097987917, | |
| "learning_rate": 5.7009332561324085e-06, | |
| "loss": 0.0219390869140625, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7254323070434416, | |
| "grad_norm": 0.06837586048390999, | |
| "learning_rate": 5.693977836929057e-06, | |
| "loss": 0.0270172119140625, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7338675664276677, | |
| "grad_norm": 0.05197492190608033, | |
| "learning_rate": 5.686946807007834e-06, | |
| "loss": 0.02206878662109375, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.7423028258118937, | |
| "grad_norm": 0.06128713786873146, | |
| "learning_rate": 5.679840363705637e-06, | |
| "loss": 0.0244720458984375, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7507380851961197, | |
| "grad_norm": 0.0629198604819534, | |
| "learning_rate": 5.672658706475953e-06, | |
| "loss": 0.0194488525390625, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.7591733445803458, | |
| "grad_norm": 0.05502172045134509, | |
| "learning_rate": 5.665402036883267e-06, | |
| "loss": 0.0225250244140625, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7676086039645719, | |
| "grad_norm": 0.06119000768724386, | |
| "learning_rate": 5.658070558597408e-06, | |
| "loss": 0.01928558349609375, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.7760438633487979, | |
| "grad_norm": 0.058834092769235756, | |
| "learning_rate": 5.650664477387824e-06, | |
| "loss": 0.02149658203125, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.784479122733024, | |
| "grad_norm": 0.06942758384696321, | |
| "learning_rate": 5.643184001117811e-06, | |
| "loss": 0.0266326904296875, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.7929143821172501, | |
| "grad_norm": 0.05395397336586372, | |
| "learning_rate": 5.6356293397386836e-06, | |
| "loss": 0.0206085205078125, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8013496415014761, | |
| "grad_norm": 0.057301086470950384, | |
| "learning_rate": 5.628000705283873e-06, | |
| "loss": 0.021770477294921875, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8097849008857022, | |
| "grad_norm": 0.058618795566843934, | |
| "learning_rate": 5.620298311862985e-06, | |
| "loss": 0.0174072265625, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8182201602699283, | |
| "grad_norm": 0.053997897902853975, | |
| "learning_rate": 5.612522375655783e-06, | |
| "loss": 0.0246124267578125, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.8266554196541543, | |
| "grad_norm": 0.058293384553658546, | |
| "learning_rate": 5.604673114906126e-06, | |
| "loss": 0.0239288330078125, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8350906790383804, | |
| "grad_norm": 0.062099166751088966, | |
| "learning_rate": 5.596750749915842e-06, | |
| "loss": 0.023724365234375, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.8435259384226065, | |
| "grad_norm": 0.0518337334475497, | |
| "learning_rate": 5.588755503038543e-06, | |
| "loss": 0.01995849609375, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8435259384226065, | |
| "eval_loss": 0.00542406877502799, | |
| "eval_margin": -0.010786364688688228, | |
| "eval_mean_neg": 0.5346763134002686, | |
| "eval_mean_pos": 0.7906754016876221, | |
| "eval_runtime": 367.5462, | |
| "eval_samples_per_second": 21.736, | |
| "eval_steps_per_second": 0.34, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8519611978068325, | |
| "grad_norm": 0.0579368996460804, | |
| "learning_rate": 5.580687598673387e-06, | |
| "loss": 0.02121734619140625, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.8603964571910586, | |
| "grad_norm": 0.05214070956939639, | |
| "learning_rate": 5.572547263258776e-06, | |
| "loss": 0.0197113037109375, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.8688317165752847, | |
| "grad_norm": 0.0687906199565583, | |
| "learning_rate": 5.564334725266006e-06, | |
| "loss": 0.0217254638671875, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.8772669759595108, | |
| "grad_norm": 0.051621267659708626, | |
| "learning_rate": 5.55605021519285e-06, | |
| "loss": 0.019158935546875, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8857022353437368, | |
| "grad_norm": 0.05599957001213385, | |
| "learning_rate": 5.547693965557092e-06, | |
| "loss": 0.0195770263671875, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.8941374947279629, | |
| "grad_norm": 0.06034671456944424, | |
| "learning_rate": 5.539266210889997e-06, | |
| "loss": 0.0231231689453125, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.902572754112189, | |
| "grad_norm": 0.04518349407201743, | |
| "learning_rate": 5.5307671877297326e-06, | |
| "loss": 0.0208709716796875, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.911008013496415, | |
| "grad_norm": 0.0503478793140038, | |
| "learning_rate": 5.522197134614728e-06, | |
| "loss": 0.0209930419921875, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9194432728806411, | |
| "grad_norm": 0.046047217532892024, | |
| "learning_rate": 5.513556292076981e-06, | |
| "loss": 0.0175750732421875, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.9278785322648672, | |
| "grad_norm": 0.05575253759567789, | |
| "learning_rate": 5.504844902635303e-06, | |
| "loss": 0.0171112060546875, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9363137916490932, | |
| "grad_norm": 0.04687503220455111, | |
| "learning_rate": 5.496063210788519e-06, | |
| "loss": 0.0167633056640625, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.9447490510333193, | |
| "grad_norm": 0.04891593875536363, | |
| "learning_rate": 5.487211463008597e-06, | |
| "loss": 0.019036865234375, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9531843104175454, | |
| "grad_norm": 0.04841249311058062, | |
| "learning_rate": 5.478289907733738e-06, | |
| "loss": 0.01807098388671875, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.9616195698017714, | |
| "grad_norm": 0.060373651634708765, | |
| "learning_rate": 5.469298795361397e-06, | |
| "loss": 0.015673828125, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.9700548291859975, | |
| "grad_norm": 0.051868174671481436, | |
| "learning_rate": 5.460238378241262e-06, | |
| "loss": 0.01802978515625, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.9784900885702236, | |
| "grad_norm": 0.051146316151485995, | |
| "learning_rate": 5.451108910668163e-06, | |
| "loss": 0.01664581298828125, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9869253479544496, | |
| "grad_norm": 0.04017649470362814, | |
| "learning_rate": 5.441910648874945e-06, | |
| "loss": 0.016483306884765625, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.9953606073386757, | |
| "grad_norm": 0.04457228909606784, | |
| "learning_rate": 5.4326438510252655e-06, | |
| "loss": 0.0192718505859375, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.0033741037536905, | |
| "grad_norm": 0.053005736672298354, | |
| "learning_rate": 5.423308777206357e-06, | |
| "loss": 0.015604400634765625, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.0118093631379166, | |
| "grad_norm": 0.05754347568157857, | |
| "learning_rate": 5.413905689421722e-06, | |
| "loss": 0.0159515380859375, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0118093631379166, | |
| "eval_loss": 0.004593910649418831, | |
| "eval_margin": -0.011103880922159842, | |
| "eval_mean_neg": 0.5064941644668579, | |
| "eval_mean_pos": 0.7831713557243347, | |
| "eval_runtime": 364.5429, | |
| "eval_samples_per_second": 21.915, | |
| "eval_steps_per_second": 0.343, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0202446225221427, | |
| "grad_norm": 0.04962686662442784, | |
| "learning_rate": 5.404434851583785e-06, | |
| "loss": 0.01360015869140625, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.0286798819063687, | |
| "grad_norm": 0.046926535788142015, | |
| "learning_rate": 5.394896529506479e-06, | |
| "loss": 0.01566925048828125, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.0371151412905948, | |
| "grad_norm": 0.043042108440633, | |
| "learning_rate": 5.38529099089779e-06, | |
| "loss": 0.0128326416015625, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.0455504006748209, | |
| "grad_norm": 0.049749099013614635, | |
| "learning_rate": 5.375618505352241e-06, | |
| "loss": 0.0136383056640625, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.053985660059047, | |
| "grad_norm": 0.04945151693616336, | |
| "learning_rate": 5.365879344343326e-06, | |
| "loss": 0.01544036865234375, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.062420919443273, | |
| "grad_norm": 0.04626935309793636, | |
| "learning_rate": 5.35607378121589e-06, | |
| "loss": 0.0143829345703125, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.070856178827499, | |
| "grad_norm": 0.04580735975264899, | |
| "learning_rate": 5.346202091178459e-06, | |
| "loss": 0.014122772216796874, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.079291438211725, | |
| "grad_norm": 0.046216725385350446, | |
| "learning_rate": 5.336264551295512e-06, | |
| "loss": 0.014672088623046874, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.087726697595951, | |
| "grad_norm": 0.04564303944680029, | |
| "learning_rate": 5.326261440479709e-06, | |
| "loss": 0.0136993408203125, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.096161956980177, | |
| "grad_norm": 0.05114495970312972, | |
| "learning_rate": 5.316193039484063e-06, | |
| "loss": 0.0147705078125, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.1045972163644031, | |
| "grad_norm": 0.044105955284847585, | |
| "learning_rate": 5.306059630894056e-06, | |
| "loss": 0.015480804443359374, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.1130324757486292, | |
| "grad_norm": 0.045182200484827885, | |
| "learning_rate": 5.295861499119711e-06, | |
| "loss": 0.013404083251953126, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.1214677351328552, | |
| "grad_norm": 0.04324759296793784, | |
| "learning_rate": 5.2855989303876065e-06, | |
| "loss": 0.01672821044921875, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.1299029945170813, | |
| "grad_norm": 0.03328038907845692, | |
| "learning_rate": 5.275272212732849e-06, | |
| "loss": 0.01335906982421875, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.1383382539013074, | |
| "grad_norm": 0.044225327184826406, | |
| "learning_rate": 5.264881635990984e-06, | |
| "loss": 0.012935638427734375, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.1467735132855335, | |
| "grad_norm": 0.04645591264342837, | |
| "learning_rate": 5.2544274917898615e-06, | |
| "loss": 0.01385498046875, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.1552087726697595, | |
| "grad_norm": 0.05453216622664439, | |
| "learning_rate": 5.243910073541454e-06, | |
| "loss": 0.016290283203125, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.1636440320539856, | |
| "grad_norm": 0.057731965028177075, | |
| "learning_rate": 5.233329676433617e-06, | |
| "loss": 0.0145355224609375, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.1720792914382117, | |
| "grad_norm": 0.05145183297720149, | |
| "learning_rate": 5.222686597421808e-06, | |
| "loss": 0.01390838623046875, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.1805145508224377, | |
| "grad_norm": 0.04021056012812571, | |
| "learning_rate": 5.211981135220751e-06, | |
| "loss": 0.01344757080078125, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1805145508224377, | |
| "eval_loss": 0.004191060084849596, | |
| "eval_margin": -0.010819014589933137, | |
| "eval_mean_neg": 0.4851545989513397, | |
| "eval_mean_pos": 0.7733471989631653, | |
| "eval_runtime": 359.0481, | |
| "eval_samples_per_second": 22.251, | |
| "eval_steps_per_second": 0.348, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1889498102066638, | |
| "grad_norm": 0.0416204676277527, | |
| "learning_rate": 5.201213590296052e-06, | |
| "loss": 0.014748382568359374, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.1973850695908899, | |
| "grad_norm": 0.05633713089091016, | |
| "learning_rate": 5.190384264855764e-06, | |
| "loss": 0.014013671875, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.205820328975116, | |
| "grad_norm": 0.05143948467095745, | |
| "learning_rate": 5.1794934628419104e-06, | |
| "loss": 0.015460205078125, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.214255588359342, | |
| "grad_norm": 0.05227911954680101, | |
| "learning_rate": 5.168541489921949e-06, | |
| "loss": 0.01507415771484375, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.222690847743568, | |
| "grad_norm": 0.058608960783147375, | |
| "learning_rate": 5.1575286534801955e-06, | |
| "loss": 0.01417236328125, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.2311261071277941, | |
| "grad_norm": 0.04818858161693878, | |
| "learning_rate": 5.146455262609197e-06, | |
| "loss": 0.013425445556640625, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.2395613665120202, | |
| "grad_norm": 0.05406749848988645, | |
| "learning_rate": 5.1353216281010535e-06, | |
| "loss": 0.013022613525390626, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.2479966258962463, | |
| "grad_norm": 0.044408669007062154, | |
| "learning_rate": 5.1241280624387e-06, | |
| "loss": 0.01393585205078125, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.2564318852804723, | |
| "grad_norm": 0.04519048638967848, | |
| "learning_rate": 5.1128748797871314e-06, | |
| "loss": 0.013826751708984375, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.2648671446646984, | |
| "grad_norm": 0.0491460974626283, | |
| "learning_rate": 5.101562395984587e-06, | |
| "loss": 0.01336212158203125, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.2733024040489245, | |
| "grad_norm": 0.04356609182045035, | |
| "learning_rate": 5.090190928533689e-06, | |
| "loss": 0.01492156982421875, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.2817376634331505, | |
| "grad_norm": 0.03556136795064142, | |
| "learning_rate": 5.078760796592524e-06, | |
| "loss": 0.0125732421875, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.2901729228173766, | |
| "grad_norm": 0.04189977738590891, | |
| "learning_rate": 5.067272320965692e-06, | |
| "loss": 0.0149322509765625, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.2986081822016027, | |
| "grad_norm": 0.05051201336701144, | |
| "learning_rate": 5.055725824095301e-06, | |
| "loss": 0.01419525146484375, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.3070434415858287, | |
| "grad_norm": 0.0416942039130722, | |
| "learning_rate": 5.0441216300519126e-06, | |
| "loss": 0.01274261474609375, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.3154787009700548, | |
| "grad_norm": 0.04629875001130603, | |
| "learning_rate": 5.032460064525455e-06, | |
| "loss": 0.01363525390625, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.3239139603542809, | |
| "grad_norm": 0.03704688355237128, | |
| "learning_rate": 5.020741454816074e-06, | |
| "loss": 0.01301422119140625, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.332349219738507, | |
| "grad_norm": 0.03742406408262459, | |
| "learning_rate": 5.00896612982495e-06, | |
| "loss": 0.01353302001953125, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.340784479122733, | |
| "grad_norm": 0.050480726423335516, | |
| "learning_rate": 4.99713442004507e-06, | |
| "loss": 0.01196746826171875, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.349219738506959, | |
| "grad_norm": 0.03808846024736694, | |
| "learning_rate": 4.985246657551943e-06, | |
| "loss": 0.0110015869140625, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.349219738506959, | |
| "eval_loss": 0.003908403683453798, | |
| "eval_margin": -0.010123856463319352, | |
| "eval_mean_neg": 0.49688851833343506, | |
| "eval_mean_pos": 0.7784863114356995, | |
| "eval_runtime": 364.0137, | |
| "eval_samples_per_second": 21.947, | |
| "eval_steps_per_second": 0.343, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.3576549978911852, | |
| "grad_norm": 0.04637758927467518, | |
| "learning_rate": 4.973303175994289e-06, | |
| "loss": 0.013458251953125, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.3660902572754112, | |
| "grad_norm": 0.05066098296531039, | |
| "learning_rate": 4.961304310584674e-06, | |
| "loss": 0.01515960693359375, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.3745255166596373, | |
| "grad_norm": 0.038530384714911596, | |
| "learning_rate": 4.949250398090092e-06, | |
| "loss": 0.011260223388671876, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.3829607760438634, | |
| "grad_norm": 0.040188601844867354, | |
| "learning_rate": 4.937141776822525e-06, | |
| "loss": 0.0158447265625, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.3913960354280894, | |
| "grad_norm": 0.03574613677300634, | |
| "learning_rate": 4.92497878662944e-06, | |
| "loss": 0.011143875122070313, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.3998312948123155, | |
| "grad_norm": 0.05019423126073816, | |
| "learning_rate": 4.912761768884255e-06, | |
| "loss": 0.01179351806640625, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.4082665541965416, | |
| "grad_norm": 0.04311116805857567, | |
| "learning_rate": 4.9004910664767545e-06, | |
| "loss": 0.01372833251953125, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.4167018135807676, | |
| "grad_norm": 0.04928580588462512, | |
| "learning_rate": 4.888167023803468e-06, | |
| "loss": 0.01297607421875, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.4251370729649937, | |
| "grad_norm": 0.054968450905918724, | |
| "learning_rate": 4.8757899867580046e-06, | |
| "loss": 0.014654541015625, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.4335723323492198, | |
| "grad_norm": 0.050366347428194534, | |
| "learning_rate": 4.86336030272134e-06, | |
| "loss": 0.011295318603515625, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.4420075917334458, | |
| "grad_norm": 0.05107215089989217, | |
| "learning_rate": 4.850878320552076e-06, | |
| "loss": 0.01334228515625, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.450442851117672, | |
| "grad_norm": 0.0391963683003482, | |
| "learning_rate": 4.838344390576638e-06, | |
| "loss": 0.01104736328125, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.458878110501898, | |
| "grad_norm": 0.03985676744245212, | |
| "learning_rate": 4.825758864579452e-06, | |
| "loss": 0.013307952880859375, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.467313369886124, | |
| "grad_norm": 0.04852757651119817, | |
| "learning_rate": 4.813122095793066e-06, | |
| "loss": 0.014328384399414062, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.47574862927035, | |
| "grad_norm": 0.0454254941425111, | |
| "learning_rate": 4.800434438888235e-06, | |
| "loss": 0.012960052490234375, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.4841838886545762, | |
| "grad_norm": 0.03868230007157653, | |
| "learning_rate": 4.787696249963974e-06, | |
| "loss": 0.01402740478515625, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.4926191480388022, | |
| "grad_norm": 0.05289135869423979, | |
| "learning_rate": 4.774907886537553e-06, | |
| "loss": 0.013831901550292968, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.501054407423028, | |
| "grad_norm": 0.04594308680556284, | |
| "learning_rate": 4.7620697075344736e-06, | |
| "loss": 0.012446975708007813, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.5094896668072542, | |
| "grad_norm": 0.048917845490978454, | |
| "learning_rate": 4.7491820732783866e-06, | |
| "loss": 0.011295318603515625, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.5179249261914802, | |
| "grad_norm": 0.043266255463378436, | |
| "learning_rate": 4.73624534548098e-06, | |
| "loss": 0.01407012939453125, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.5179249261914802, | |
| "eval_loss": 0.0036048581823706627, | |
| "eval_margin": -0.009617562525935711, | |
| "eval_mean_neg": 0.4904225468635559, | |
| "eval_mean_pos": 0.7793014049530029, | |
| "eval_runtime": 363.5397, | |
| "eval_samples_per_second": 21.976, | |
| "eval_steps_per_second": 0.344, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.5263601855757063, | |
| "grad_norm": 0.04363576408467007, | |
| "learning_rate": 4.723259887231835e-06, | |
| "loss": 0.0138519287109375, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.5347954449599324, | |
| "grad_norm": 0.04199459687850267, | |
| "learning_rate": 4.710226062988223e-06, | |
| "loss": 0.01312255859375, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.5432307043441584, | |
| "grad_norm": 0.047436231412077354, | |
| "learning_rate": 4.697144238564889e-06, | |
| "loss": 0.01208648681640625, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.5516659637283845, | |
| "grad_norm": 0.04092453404900873, | |
| "learning_rate": 4.684014781123775e-06, | |
| "loss": 0.012505340576171874, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.5601012231126106, | |
| "grad_norm": 0.045645370405214956, | |
| "learning_rate": 4.6708380591637166e-06, | |
| "loss": 0.0120208740234375, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.5685364824968366, | |
| "grad_norm": 0.04911154284719614, | |
| "learning_rate": 4.6576144425101076e-06, | |
| "loss": 0.013311767578125, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.5769717418810627, | |
| "grad_norm": 0.045881762593597546, | |
| "learning_rate": 4.64434430230451e-06, | |
| "loss": 0.012969207763671876, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.5854070012652888, | |
| "grad_norm": 0.04728445094523914, | |
| "learning_rate": 4.631028010994245e-06, | |
| "loss": 0.01099395751953125, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.5938422606495148, | |
| "grad_norm": 0.03903116673162643, | |
| "learning_rate": 4.617665942321937e-06, | |
| "loss": 0.0129608154296875, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.602277520033741, | |
| "grad_norm": 0.040499425484585065, | |
| "learning_rate": 4.6042584713150225e-06, | |
| "loss": 0.009827423095703124, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.610712779417967, | |
| "grad_norm": 0.047017092872005554, | |
| "learning_rate": 4.590805974275228e-06, | |
| "loss": 0.01045989990234375, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.619148038802193, | |
| "grad_norm": 0.03869016761931018, | |
| "learning_rate": 4.577308828768005e-06, | |
| "loss": 0.011346435546875, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.6275832981864191, | |
| "grad_norm": 0.05726216064413269, | |
| "learning_rate": 4.563767413611932e-06, | |
| "loss": 0.01296844482421875, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.6360185575706452, | |
| "grad_norm": 0.034971593802495975, | |
| "learning_rate": 4.550182108868089e-06, | |
| "loss": 0.01379852294921875, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.6444538169548713, | |
| "grad_norm": 0.04877425067250454, | |
| "learning_rate": 4.536553295829384e-06, | |
| "loss": 0.012924957275390624, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.6528890763390973, | |
| "grad_norm": 0.03927648322180213, | |
| "learning_rate": 4.522881357009853e-06, | |
| "loss": 0.01293792724609375, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.6613243357233234, | |
| "grad_norm": 0.024976847462424127, | |
| "learning_rate": 4.5091666761339275e-06, | |
| "loss": 0.009877777099609375, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.6697595951075495, | |
| "grad_norm": 0.03945379802090875, | |
| "learning_rate": 4.495409638125657e-06, | |
| "loss": 0.01130523681640625, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.6781948544917755, | |
| "grad_norm": 0.03430320161614481, | |
| "learning_rate": 4.481610629097917e-06, | |
| "loss": 0.009923553466796875, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.6866301138760016, | |
| "grad_norm": 0.03895065600017937, | |
| "learning_rate": 4.46777003634156e-06, | |
| "loss": 0.01330413818359375, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6866301138760016, | |
| "eval_loss": 0.0034073551651090384, | |
| "eval_margin": -0.009528953300398444, | |
| "eval_mean_neg": 0.4926661550998688, | |
| "eval_mean_pos": 0.7842009663581848, | |
| "eval_runtime": 367.0219, | |
| "eval_samples_per_second": 21.767, | |
| "eval_steps_per_second": 0.341, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6950653732602277, | |
| "grad_norm": 0.04302786223265218, | |
| "learning_rate": 4.453888248314553e-06, | |
| "loss": 0.01107330322265625, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.7035006326444537, | |
| "grad_norm": 0.04002206909489744, | |
| "learning_rate": 4.439965654631073e-06, | |
| "loss": 0.0105499267578125, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.7119358920286798, | |
| "grad_norm": 0.04439497813433074, | |
| "learning_rate": 4.426002646050574e-06, | |
| "loss": 0.010544586181640624, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.7203711514129059, | |
| "grad_norm": 0.043341839034531496, | |
| "learning_rate": 4.411999614466812e-06, | |
| "loss": 0.0125335693359375, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.728806410797132, | |
| "grad_norm": 0.03449321841295583, | |
| "learning_rate": 4.397956952896858e-06, | |
| "loss": 0.010623550415039063, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.737241670181358, | |
| "grad_norm": 0.041185961783139574, | |
| "learning_rate": 4.383875055470055e-06, | |
| "loss": 0.01031951904296875, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.745676929565584, | |
| "grad_norm": 0.04627446953615271, | |
| "learning_rate": 4.3697543174169675e-06, | |
| "loss": 0.01590385437011719, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.7541121889498101, | |
| "grad_norm": 0.04582345634360075, | |
| "learning_rate": 4.355595135058278e-06, | |
| "loss": 0.0119537353515625, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.7625474483340362, | |
| "grad_norm": 0.033580437424405536, | |
| "learning_rate": 4.3413979057936715e-06, | |
| "loss": 0.01235198974609375, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.7709827077182623, | |
| "grad_norm": 0.03545606353671419, | |
| "learning_rate": 4.32716302809068e-06, | |
| "loss": 0.012863922119140624, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.7794179671024883, | |
| "grad_norm": 0.03491571698794484, | |
| "learning_rate": 4.312890901473496e-06, | |
| "loss": 0.01035614013671875, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.7878532264867144, | |
| "grad_norm": 0.04391496148899165, | |
| "learning_rate": 4.29858192651176e-06, | |
| "loss": 0.011370468139648437, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.7962884858709405, | |
| "grad_norm": 0.049338016603549396, | |
| "learning_rate": 4.284236504809324e-06, | |
| "loss": 0.011846160888671875, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.8047237452551665, | |
| "grad_norm": 0.035387852478552806, | |
| "learning_rate": 4.269855038992971e-06, | |
| "loss": 0.011142349243164063, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.8131590046393926, | |
| "grad_norm": 0.043891210942711104, | |
| "learning_rate": 4.2554379327011196e-06, | |
| "loss": 0.011545944213867187, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.8215942640236187, | |
| "grad_norm": 0.040327331551499056, | |
| "learning_rate": 4.240985590572496e-06, | |
| "loss": 0.00897674560546875, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.8300295234078447, | |
| "grad_norm": 0.03274271686886844, | |
| "learning_rate": 4.226498418234771e-06, | |
| "loss": 0.01215667724609375, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.8384647827920708, | |
| "grad_norm": 0.04375742422856697, | |
| "learning_rate": 4.2119768222931865e-06, | |
| "loss": 0.0109588623046875, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.8469000421762969, | |
| "grad_norm": 0.036163256401816654, | |
| "learning_rate": 4.19742121031913e-06, | |
| "loss": 0.012054443359375, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.855335301560523, | |
| "grad_norm": 0.04078407955383746, | |
| "learning_rate": 4.182831990838709e-06, | |
| "loss": 0.0132843017578125, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.855335301560523, | |
| "eval_loss": 0.003225065069273114, | |
| "eval_margin": -0.008749207222623932, | |
| "eval_mean_neg": 0.49084940552711487, | |
| "eval_mean_pos": 0.7849159836769104, | |
| "eval_runtime": 366.687, | |
| "eval_samples_per_second": 21.787, | |
| "eval_steps_per_second": 0.341, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.863770560944749, | |
| "grad_norm": 0.047827239751426935, | |
| "learning_rate": 4.168209573321271e-06, | |
| "loss": 0.0133697509765625, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.872205820328975, | |
| "grad_norm": 0.0274823880547768, | |
| "learning_rate": 4.153554368167927e-06, | |
| "loss": 0.010877227783203125, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.8806410797132012, | |
| "grad_norm": 0.052787755841206804, | |
| "learning_rate": 4.138866786700016e-06, | |
| "loss": 0.0139434814453125, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.8890763390974272, | |
| "grad_norm": 0.029629846825489692, | |
| "learning_rate": 4.124147241147577e-06, | |
| "loss": 0.011189651489257813, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.8975115984816533, | |
| "grad_norm": 0.039855575258898726, | |
| "learning_rate": 4.109396144637764e-06, | |
| "loss": 0.010993194580078126, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.9059468578658794, | |
| "grad_norm": 0.03789188882991695, | |
| "learning_rate": 4.094613911183265e-06, | |
| "loss": 0.01313323974609375, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.9143821172501054, | |
| "grad_norm": 0.03482605825228896, | |
| "learning_rate": 4.0798009556706685e-06, | |
| "loss": 0.008492279052734374, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.9228173766343315, | |
| "grad_norm": 0.0395626147511318, | |
| "learning_rate": 4.064957693848831e-06, | |
| "loss": 0.011167144775390625, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.9312526360185576, | |
| "grad_norm": 0.026910728579180684, | |
| "learning_rate": 4.050084542317201e-06, | |
| "loss": 0.0124908447265625, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.9396878954027836, | |
| "grad_norm": 0.05111929237613795, | |
| "learning_rate": 4.0351819185141284e-06, | |
| "loss": 0.01279144287109375, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.9481231547870097, | |
| "grad_norm": 0.031631097839140386, | |
| "learning_rate": 4.02025024070515e-06, | |
| "loss": 0.010783004760742187, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.9565584141712358, | |
| "grad_norm": 0.03921591693735718, | |
| "learning_rate": 4.005289927971248e-06, | |
| "loss": 0.009867095947265625, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.9649936735554618, | |
| "grad_norm": 0.03786979993880419, | |
| "learning_rate": 3.990301400197088e-06, | |
| "loss": 0.010943603515625, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.973428932939688, | |
| "grad_norm": 0.033688024912648086, | |
| "learning_rate": 3.9752850780592366e-06, | |
| "loss": 0.010836410522460937, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.981864192323914, | |
| "grad_norm": 0.0473160707405277, | |
| "learning_rate": 3.960241383014353e-06, | |
| "loss": 0.011658477783203124, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.99029945170814, | |
| "grad_norm": 0.034470209590808834, | |
| "learning_rate": 3.945170737287356e-06, | |
| "loss": 0.0096588134765625, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.9987347110923661, | |
| "grad_norm": 0.04035006428036731, | |
| "learning_rate": 3.930073563859583e-06, | |
| "loss": 0.013312530517578126, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.006748207507381, | |
| "grad_norm": 0.03443773853658945, | |
| "learning_rate": 3.914950286456911e-06, | |
| "loss": 0.0104766845703125, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.015183466891607, | |
| "grad_norm": 0.02321269258461312, | |
| "learning_rate": 3.899801329537865e-06, | |
| "loss": 0.008111572265625, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.023618726275833, | |
| "grad_norm": 0.02427731911492366, | |
| "learning_rate": 3.884627118281706e-06, | |
| "loss": 0.009668731689453125, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.023618726275833, | |
| "eval_loss": 0.0028827113565057516, | |
| "eval_margin": -0.008073512017877111, | |
| "eval_mean_neg": 0.5066258907318115, | |
| "eval_mean_pos": 0.7934735417366028, | |
| "eval_runtime": 365.1842, | |
| "eval_samples_per_second": 21.877, | |
| "eval_steps_per_second": 0.342, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.0320539856600592, | |
| "grad_norm": 0.033776934236771874, | |
| "learning_rate": 3.869428078576498e-06, | |
| "loss": 0.00937347412109375, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.0404892450442853, | |
| "grad_norm": 0.05070270762284893, | |
| "learning_rate": 3.8542046370071575e-06, | |
| "loss": 0.008733367919921875, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.0489245044285114, | |
| "grad_norm": 0.028063560546546604, | |
| "learning_rate": 3.838957220843472e-06, | |
| "loss": 0.00914459228515625, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.0573597638127374, | |
| "grad_norm": 0.041287537117132886, | |
| "learning_rate": 3.8236862580281175e-06, | |
| "loss": 0.010516357421875, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.0657950231969635, | |
| "grad_norm": 0.03173632436563901, | |
| "learning_rate": 3.808392177164642e-06, | |
| "loss": 0.010186767578125, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.0742302825811896, | |
| "grad_norm": 0.03149301964970768, | |
| "learning_rate": 3.7930754075054406e-06, | |
| "loss": 0.010378265380859375, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.0826655419654156, | |
| "grad_norm": 0.03183747792195117, | |
| "learning_rate": 3.7777363789397004e-06, | |
| "loss": 0.009032630920410156, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.0911008013496417, | |
| "grad_norm": 0.0353065686803631, | |
| "learning_rate": 3.7623755219813442e-06, | |
| "loss": 0.0096771240234375, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.0995360607338673, | |
| "grad_norm": 0.040800577074973816, | |
| "learning_rate": 3.746993267756939e-06, | |
| "loss": 0.009685516357421875, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.107971320118094, | |
| "grad_norm": 0.04064182954953987, | |
| "learning_rate": 3.7315900479936044e-06, | |
| "loss": 0.010097503662109375, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.1164065795023195, | |
| "grad_norm": 0.04908593416403285, | |
| "learning_rate": 3.7161662950068846e-06, | |
| "loss": 0.009412384033203125, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.124841838886546, | |
| "grad_norm": 0.0284060145446946, | |
| "learning_rate": 3.7007224416886276e-06, | |
| "loss": 0.00821533203125, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.1332770982707716, | |
| "grad_norm": 0.030842726867602113, | |
| "learning_rate": 3.685258921494824e-06, | |
| "loss": 0.009014129638671875, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.141712357654998, | |
| "grad_norm": 0.036273158990138075, | |
| "learning_rate": 3.6697761684334466e-06, | |
| "loss": 0.010558700561523438, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.1501476170392237, | |
| "grad_norm": 0.03693819496482909, | |
| "learning_rate": 3.6542746170522717e-06, | |
| "loss": 0.010668182373046875, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.15858287642345, | |
| "grad_norm": 0.03797267942950567, | |
| "learning_rate": 3.638754702426678e-06, | |
| "loss": 0.008889389038085938, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.167018135807676, | |
| "grad_norm": 0.03341108305444907, | |
| "learning_rate": 3.6232168601474363e-06, | |
| "loss": 0.006923675537109375, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.175453395191902, | |
| "grad_norm": 0.03454779917085028, | |
| "learning_rate": 3.607661526308488e-06, | |
| "loss": 0.00969085693359375, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.183888654576128, | |
| "grad_norm": 0.035727285557249105, | |
| "learning_rate": 3.5920891374947005e-06, | |
| "loss": 0.00997161865234375, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.192323913960354, | |
| "grad_norm": 0.031320211315080816, | |
| "learning_rate": 3.5765001307696152e-06, | |
| "loss": 0.007769393920898438, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.192323913960354, | |
| "eval_loss": 0.0026897923089563847, | |
| "eval_margin": -0.0077478337221808975, | |
| "eval_mean_neg": 0.49285975098609924, | |
| "eval_mean_pos": 0.7862820625305176, | |
| "eval_runtime": 362.3777, | |
| "eval_samples_per_second": 22.046, | |
| "eval_steps_per_second": 0.345, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.20075917334458, | |
| "grad_norm": 0.023952498523963275, | |
| "learning_rate": 3.560894943663185e-06, | |
| "loss": 0.009902191162109376, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.2091944327288062, | |
| "grad_norm": 0.029616458459003896, | |
| "learning_rate": 3.545274014159486e-06, | |
| "loss": 0.008718109130859375, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.2176296921130323, | |
| "grad_norm": 0.026768679077660198, | |
| "learning_rate": 3.5296377806844334e-06, | |
| "loss": 0.006624603271484375, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.2260649514972584, | |
| "grad_norm": 0.03723135315427558, | |
| "learning_rate": 3.5139866820934687e-06, | |
| "loss": 0.010486793518066407, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.2345002108814844, | |
| "grad_norm": 0.030973900207479872, | |
| "learning_rate": 3.498321157659248e-06, | |
| "loss": 0.00841064453125, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.2429354702657105, | |
| "grad_norm": 0.042522927349784224, | |
| "learning_rate": 3.482641647059313e-06, | |
| "loss": 0.010484886169433594, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.2513707296499366, | |
| "grad_norm": 0.036298357689256384, | |
| "learning_rate": 3.4669485903637452e-06, | |
| "loss": 0.010845947265625, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.2598059890341626, | |
| "grad_norm": 0.04210885166855473, | |
| "learning_rate": 3.4512424280228227e-06, | |
| "loss": 0.009656906127929688, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.2682412484183887, | |
| "grad_norm": 0.037852259539673916, | |
| "learning_rate": 3.435523600854652e-06, | |
| "loss": 0.009561920166015625, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.2766765078026148, | |
| "grad_norm": 0.03972030283651443, | |
| "learning_rate": 3.4197925500327973e-06, | |
| "loss": 0.00974578857421875, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.285111767186841, | |
| "grad_norm": 0.03864567979018308, | |
| "learning_rate": 3.4040497170739e-06, | |
| "loss": 0.009082794189453125, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 2.293547026571067, | |
| "grad_norm": 0.03547766099076331, | |
| "learning_rate": 3.3882955438252852e-06, | |
| "loss": 0.008104705810546875, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.301982285955293, | |
| "grad_norm": 0.042069666240123815, | |
| "learning_rate": 3.372530472452561e-06, | |
| "loss": 0.010825538635253906, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 2.310417545339519, | |
| "grad_norm": 0.030187240942476403, | |
| "learning_rate": 3.356754945427209e-06, | |
| "loss": 0.010921478271484375, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.318852804723745, | |
| "grad_norm": 0.03775236120881388, | |
| "learning_rate": 3.3409694055141636e-06, | |
| "loss": 0.00971527099609375, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 2.327288064107971, | |
| "grad_norm": 0.04517333042895106, | |
| "learning_rate": 3.3251742957593896e-06, | |
| "loss": 0.010394287109375, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.3357233234921972, | |
| "grad_norm": 0.03441694727754078, | |
| "learning_rate": 3.3093700594774415e-06, | |
| "loss": 0.008525848388671875, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 2.3441585828764233, | |
| "grad_norm": 0.039855958024762626, | |
| "learning_rate": 3.2935571402390243e-06, | |
| "loss": 0.01035003662109375, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.3525938422606494, | |
| "grad_norm": 0.036912654679360425, | |
| "learning_rate": 3.2777359818585453e-06, | |
| "loss": 0.01036224365234375, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 2.3610291016448754, | |
| "grad_norm": 0.02819486898709386, | |
| "learning_rate": 3.2619070283816567e-06, | |
| "loss": 0.008788299560546876, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.3610291016448754, | |
| "eval_loss": 0.002835027640685439, | |
| "eval_margin": -0.006975951657119778, | |
| "eval_mean_neg": 0.4969240725040436, | |
| "eval_mean_pos": 0.7925288081169128, | |
| "eval_runtime": 364.0594, | |
| "eval_samples_per_second": 21.944, | |
| "eval_steps_per_second": 0.343, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.3694643610291015, | |
| "grad_norm": 0.0347736325148637, | |
| "learning_rate": 3.24607072407279e-06, | |
| "loss": 0.00931854248046875, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 2.3778996204133276, | |
| "grad_norm": 0.02843547221351205, | |
| "learning_rate": 3.2302275134026902e-06, | |
| "loss": 0.008514404296875, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.3863348797975537, | |
| "grad_norm": 0.035527939183407756, | |
| "learning_rate": 3.2143778410359414e-06, | |
| "loss": 0.009189605712890625, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 2.3947701391817797, | |
| "grad_norm": 0.02697400462877436, | |
| "learning_rate": 3.1985221518184845e-06, | |
| "loss": 0.008056259155273438, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.403205398566006, | |
| "grad_norm": 0.02974726363919492, | |
| "learning_rate": 3.1826608907651327e-06, | |
| "loss": 0.008675384521484374, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 2.411640657950232, | |
| "grad_norm": 0.04279127831329293, | |
| "learning_rate": 3.1667945030470815e-06, | |
| "loss": 0.009341812133789063, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.420075917334458, | |
| "grad_norm": 0.039837807919925805, | |
| "learning_rate": 3.1509234339794144e-06, | |
| "loss": 0.010208892822265624, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 2.428511176718684, | |
| "grad_norm": 0.03024657864136027, | |
| "learning_rate": 3.1350481290086038e-06, | |
| "loss": 0.008173370361328125, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.43694643610291, | |
| "grad_norm": 0.03564318900525913, | |
| "learning_rate": 3.119169033700011e-06, | |
| "loss": 0.00924224853515625, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 2.445381695487136, | |
| "grad_norm": 0.028913985964356455, | |
| "learning_rate": 3.103286593725377e-06, | |
| "loss": 0.008563995361328125, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.453816954871362, | |
| "grad_norm": 0.035875161756803144, | |
| "learning_rate": 3.0874012548503173e-06, | |
| "loss": 0.009112548828125, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 2.4622522142555883, | |
| "grad_norm": 0.03817913502015442, | |
| "learning_rate": 3.0715134629218095e-06, | |
| "loss": 0.007489013671875, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.4706874736398143, | |
| "grad_norm": 0.03470677728941542, | |
| "learning_rate": 3.0556236638556803e-06, | |
| "loss": 0.012370681762695313, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 2.4791227330240404, | |
| "grad_norm": 0.042966141209856486, | |
| "learning_rate": 3.0397323036240886e-06, | |
| "loss": 0.0088165283203125, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.4875579924082665, | |
| "grad_norm": 0.03434953324492014, | |
| "learning_rate": 3.023839828243012e-06, | |
| "loss": 0.008261871337890626, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 2.4959932517924925, | |
| "grad_norm": 0.03061507966476803, | |
| "learning_rate": 3.007946683759723e-06, | |
| "loss": 0.008873748779296874, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.5044285111767186, | |
| "grad_norm": 0.027732115444419583, | |
| "learning_rate": 2.9920533162402776e-06, | |
| "loss": 0.008371734619140625, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 2.5128637705609447, | |
| "grad_norm": 0.029951392389848317, | |
| "learning_rate": 2.9761601717569896e-06, | |
| "loss": 0.00865478515625, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.5212990299451707, | |
| "grad_norm": 0.033652436341082566, | |
| "learning_rate": 2.960267696375911e-06, | |
| "loss": 0.009691619873046875, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 2.529734289329397, | |
| "grad_norm": 0.024511774862390433, | |
| "learning_rate": 2.9443763361443203e-06, | |
| "loss": 0.010028076171875, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.529734289329397, | |
| "eval_loss": 0.002559108193963766, | |
| "eval_margin": -0.006972289358776423, | |
| "eval_mean_neg": 0.4843982458114624, | |
| "eval_mean_pos": 0.7881345152854919, | |
| "eval_runtime": 365.5243, | |
| "eval_samples_per_second": 21.856, | |
| "eval_steps_per_second": 0.342, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.538169548713623, | |
| "grad_norm": 0.034627287332041165, | |
| "learning_rate": 2.9284865370781906e-06, | |
| "loss": 0.00982513427734375, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 2.546604808097849, | |
| "grad_norm": 0.03482839500691478, | |
| "learning_rate": 2.9125987451496837e-06, | |
| "loss": 0.00842742919921875, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.555040067482075, | |
| "grad_norm": 0.034040276652772095, | |
| "learning_rate": 2.8967134062746236e-06, | |
| "loss": 0.008990859985351563, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 2.563475326866301, | |
| "grad_norm": 0.03868034786852329, | |
| "learning_rate": 2.8808309662999897e-06, | |
| "loss": 0.007648468017578125, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.571910586250527, | |
| "grad_norm": 0.0419385930164125, | |
| "learning_rate": 2.864951870991397e-06, | |
| "loss": 0.009268951416015626, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 2.580345845634753, | |
| "grad_norm": 0.03690945718603307, | |
| "learning_rate": 2.8490765660205857e-06, | |
| "loss": 0.00864715576171875, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.5887811050189793, | |
| "grad_norm": 0.02493335874585237, | |
| "learning_rate": 2.833205496952919e-06, | |
| "loss": 0.00865478515625, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 2.5972163644032054, | |
| "grad_norm": 0.0355467734297459, | |
| "learning_rate": 2.817339109234868e-06, | |
| "loss": 0.009038543701171875, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.6056516237874314, | |
| "grad_norm": 0.035706551369837684, | |
| "learning_rate": 2.801477848181517e-06, | |
| "loss": 0.008769607543945313, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 2.6140868831716575, | |
| "grad_norm": 0.030590948482880534, | |
| "learning_rate": 2.7856221589640584e-06, | |
| "loss": 0.010419464111328125, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.6225221425558836, | |
| "grad_norm": 0.031187166990055255, | |
| "learning_rate": 2.7697724865973103e-06, | |
| "loss": 0.008966064453125, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 2.6309574019401096, | |
| "grad_norm": 0.03195446103788609, | |
| "learning_rate": 2.753929275927211e-06, | |
| "loss": 0.00810089111328125, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.6393926613243357, | |
| "grad_norm": 0.03124766219549955, | |
| "learning_rate": 2.7380929716183448e-06, | |
| "loss": 0.00867919921875, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 2.6478279207085618, | |
| "grad_norm": 0.04158743972175772, | |
| "learning_rate": 2.722264018141455e-06, | |
| "loss": 0.008811187744140626, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.656263180092788, | |
| "grad_norm": 0.042358151513616535, | |
| "learning_rate": 2.706442859760976e-06, | |
| "loss": 0.008480644226074219, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 2.664698439477014, | |
| "grad_norm": 0.02876853915749735, | |
| "learning_rate": 2.6906299405225595e-06, | |
| "loss": 0.009603309631347656, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.67313369886124, | |
| "grad_norm": 0.031452133973887623, | |
| "learning_rate": 2.6748257042406114e-06, | |
| "loss": 0.008524322509765625, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 2.681568958245466, | |
| "grad_norm": 0.03502577600676223, | |
| "learning_rate": 2.659030594485836e-06, | |
| "loss": 0.007845306396484375, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.690004217629692, | |
| "grad_norm": 0.029358280910616305, | |
| "learning_rate": 2.6432450545727913e-06, | |
| "loss": 0.008304595947265625, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 2.698439477013918, | |
| "grad_norm": 0.037226468621806945, | |
| "learning_rate": 2.62746952754744e-06, | |
| "loss": 0.0089141845703125, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.698439477013918, | |
| "eval_loss": 0.002468662802129984, | |
| "eval_margin": -0.006652700444383006, | |
| "eval_mean_neg": 0.5055871605873108, | |
| "eval_mean_pos": 0.8004181981086731, | |
| "eval_runtime": 363.3432, | |
| "eval_samples_per_second": 21.987, | |
| "eval_steps_per_second": 0.344, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.7068747363981442, | |
| "grad_norm": 0.029979441347867175, | |
| "learning_rate": 2.6117044561747145e-06, | |
| "loss": 0.007899856567382813, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 2.7153099957823703, | |
| "grad_norm": 0.04117264280378634, | |
| "learning_rate": 2.5959502829261e-06, | |
| "loss": 0.009801483154296875, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.7237452551665964, | |
| "grad_norm": 0.02874139529420723, | |
| "learning_rate": 2.5802074499672033e-06, | |
| "loss": 0.007126617431640625, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 2.7321805145508224, | |
| "grad_norm": 0.032009387593884574, | |
| "learning_rate": 2.564476399145349e-06, | |
| "loss": 0.007319259643554688, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.7406157739350485, | |
| "grad_norm": 0.0343660828009257, | |
| "learning_rate": 2.5487575719771774e-06, | |
| "loss": 0.010648345947265625, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 2.7490510333192746, | |
| "grad_norm": 0.033859872264591424, | |
| "learning_rate": 2.533051409636255e-06, | |
| "loss": 0.007244110107421875, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.7574862927035007, | |
| "grad_norm": 0.032973506044290384, | |
| "learning_rate": 2.517358352940688e-06, | |
| "loss": 0.008284759521484376, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 2.7659215520877267, | |
| "grad_norm": 0.03481146191160576, | |
| "learning_rate": 2.501678842340753e-06, | |
| "loss": 0.00882110595703125, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.774356811471953, | |
| "grad_norm": 0.03862588539253724, | |
| "learning_rate": 2.4860133179065323e-06, | |
| "loss": 0.00964202880859375, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 2.782792070856179, | |
| "grad_norm": 0.02979780702601001, | |
| "learning_rate": 2.4703622193155676e-06, | |
| "loss": 0.009095001220703124, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.791227330240405, | |
| "grad_norm": 0.02658002258647219, | |
| "learning_rate": 2.4547259858405147e-06, | |
| "loss": 0.008580398559570313, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 2.799662589624631, | |
| "grad_norm": 0.03237100489547251, | |
| "learning_rate": 2.439105056336816e-06, | |
| "loss": 0.006137275695800781, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.808097849008857, | |
| "grad_norm": 0.035925961611001624, | |
| "learning_rate": 2.423499869230385e-06, | |
| "loss": 0.006979179382324219, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 2.816533108393083, | |
| "grad_norm": 0.028925897672990208, | |
| "learning_rate": 2.4079108625053e-06, | |
| "loss": 0.007439422607421875, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.824968367777309, | |
| "grad_norm": 0.02643424196739614, | |
| "learning_rate": 2.392338473691513e-06, | |
| "loss": 0.007563400268554688, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 2.8334036271615353, | |
| "grad_norm": 0.029469931037551172, | |
| "learning_rate": 2.376783139852564e-06, | |
| "loss": 0.00782928466796875, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.8418388865457613, | |
| "grad_norm": 0.03519097117769341, | |
| "learning_rate": 2.3612452975733225e-06, | |
| "loss": 0.0081695556640625, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 2.8502741459299874, | |
| "grad_norm": 0.041842720836538394, | |
| "learning_rate": 2.3457253829477284e-06, | |
| "loss": 0.00938720703125, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.8587094053142135, | |
| "grad_norm": 0.02803118980318521, | |
| "learning_rate": 2.3302238315665544e-06, | |
| "loss": 0.007602310180664063, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 2.8671446646984395, | |
| "grad_norm": 0.06643247372472408, | |
| "learning_rate": 2.314741078505177e-06, | |
| "loss": 0.009275436401367188, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.8671446646984395, | |
| "eval_loss": 0.002381447935476899, | |
| "eval_margin": -0.0063614378337778395, | |
| "eval_mean_neg": 0.4982295334339142, | |
| "eval_mean_pos": 0.7957465648651123, | |
| "eval_runtime": 364.3957, | |
| "eval_samples_per_second": 21.924, | |
| "eval_steps_per_second": 0.343, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.8755799240826656, | |
| "grad_norm": 0.04573493937998368, | |
| "learning_rate": 2.299277558311373e-06, | |
| "loss": 0.008275604248046875, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 2.8840151834668917, | |
| "grad_norm": 0.030855319414577996, | |
| "learning_rate": 2.283833704993116e-06, | |
| "loss": 0.008497047424316406, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.8924504428511177, | |
| "grad_norm": 0.03287831217925721, | |
| "learning_rate": 2.268409952006397e-06, | |
| "loss": 0.006939697265625, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 2.900885702235344, | |
| "grad_norm": 0.03738971418410914, | |
| "learning_rate": 2.253006732243061e-06, | |
| "loss": 0.00982208251953125, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.90932096161957, | |
| "grad_norm": 0.02295281003302144, | |
| "learning_rate": 2.237624478018656e-06, | |
| "loss": 0.00743560791015625, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 2.917756221003796, | |
| "grad_norm": 0.03960242549923526, | |
| "learning_rate": 2.2222636210603002e-06, | |
| "loss": 0.008847427368164063, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.926191480388022, | |
| "grad_norm": 0.04741800625952587, | |
| "learning_rate": 2.2069245924945604e-06, | |
| "loss": 0.009384918212890624, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 2.934626739772248, | |
| "grad_norm": 0.03409532340357435, | |
| "learning_rate": 2.191607822835357e-06, | |
| "loss": 0.0076019287109375, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.943061999156474, | |
| "grad_norm": 0.0239211291682541, | |
| "learning_rate": 2.1763137419718826e-06, | |
| "loss": 0.007954025268554687, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 2.9514972585407, | |
| "grad_norm": 0.0255275562880085, | |
| "learning_rate": 2.161042779156529e-06, | |
| "loss": 0.007129669189453125, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.9599325179249263, | |
| "grad_norm": 0.026100931475016395, | |
| "learning_rate": 2.1457953629928426e-06, | |
| "loss": 0.007111358642578125, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 2.9683677773091524, | |
| "grad_norm": 0.03040565516608014, | |
| "learning_rate": 2.1305719214235017e-06, | |
| "loss": 0.00856170654296875, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.9768030366933784, | |
| "grad_norm": 0.031725391054917944, | |
| "learning_rate": 2.115372881718295e-06, | |
| "loss": 0.00930938720703125, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 2.9852382960776045, | |
| "grad_norm": 0.025864373534585865, | |
| "learning_rate": 2.100198670462137e-06, | |
| "loss": 0.007320022583007813, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.9936735554618306, | |
| "grad_norm": 0.013680490985647303, | |
| "learning_rate": 2.0850497135430897e-06, | |
| "loss": 0.007777786254882813, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 3.001687051876845, | |
| "grad_norm": 0.03143671946142631, | |
| "learning_rate": 2.0699264361404174e-06, | |
| "loss": 0.008609771728515625, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.010122311261071, | |
| "grad_norm": 0.021237532660395856, | |
| "learning_rate": 2.054829262712645e-06, | |
| "loss": 0.007422637939453125, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 3.018557570645297, | |
| "grad_norm": 0.02970629169587053, | |
| "learning_rate": 2.0397586169856488e-06, | |
| "loss": 0.008047866821289062, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.0269928300295232, | |
| "grad_norm": 0.0280079357370666, | |
| "learning_rate": 2.024714921940763e-06, | |
| "loss": 0.008725738525390625, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 3.0354280894137493, | |
| "grad_norm": 0.05178206206651836, | |
| "learning_rate": 2.0096985998029124e-06, | |
| "loss": 0.007384490966796875, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.0354280894137493, | |
| "eval_loss": 0.0022954940795898438, | |
| "eval_margin": -0.005535545939159009, | |
| "eval_mean_neg": 0.49804064631462097, | |
| "eval_mean_pos": 0.7978142499923706, | |
| "eval_runtime": 362.5441, | |
| "eval_samples_per_second": 22.036, | |
| "eval_steps_per_second": 0.345, | |
| "step": 1800 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2965, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2008625567629312.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |