| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 710, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.014104372355430184, |
| "grad_norm": 1.0829510688781738, |
| "learning_rate": 1.348314606741573e-06, |
| "loss": 1.3063, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.028208744710860368, |
| "grad_norm": 0.91203373670578, |
| "learning_rate": 3.033707865168539e-06, |
| "loss": 1.2577, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04231311706629055, |
| "grad_norm": 0.6704057455062866, |
| "learning_rate": 4.719101123595506e-06, |
| "loss": 1.334, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.056417489421720736, |
| "grad_norm": 0.5428482294082642, |
| "learning_rate": 6.404494382022472e-06, |
| "loss": 1.2117, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07052186177715092, |
| "grad_norm": 0.4570103585720062, |
| "learning_rate": 8.089887640449438e-06, |
| "loss": 1.1806, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0846262341325811, |
| "grad_norm": 0.5431040525436401, |
| "learning_rate": 9.775280898876405e-06, |
| "loss": 1.1791, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09873060648801128, |
| "grad_norm": 0.6045286059379578, |
| "learning_rate": 1.146067415730337e-05, |
| "loss": 1.1561, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11283497884344147, |
| "grad_norm": 0.816319465637207, |
| "learning_rate": 1.3146067415730338e-05, |
| "loss": 1.1815, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12693935119887165, |
| "grad_norm": 0.41476839780807495, |
| "learning_rate": 1.4831460674157303e-05, |
| "loss": 1.1569, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.14104372355430184, |
| "grad_norm": 0.45171546936035156, |
| "learning_rate": 1.651685393258427e-05, |
| "loss": 1.1442, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.15514809590973203, |
| "grad_norm": 0.5023919343948364, |
| "learning_rate": 1.8202247191011237e-05, |
| "loss": 1.0983, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1692524682651622, |
| "grad_norm": 0.54413241147995, |
| "learning_rate": 1.98876404494382e-05, |
| "loss": 1.1343, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18335684062059238, |
| "grad_norm": 0.40992623567581177, |
| "learning_rate": 2.1573033707865168e-05, |
| "loss": 1.1189, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.19746121297602257, |
| "grad_norm": 0.5576924085617065, |
| "learning_rate": 2.3258426966292135e-05, |
| "loss": 1.1297, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.21156558533145275, |
| "grad_norm": 0.439005047082901, |
| "learning_rate": 2.4943820224719103e-05, |
| "loss": 1.1228, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.22566995768688294, |
| "grad_norm": 0.45460307598114014, |
| "learning_rate": 2.6629213483146066e-05, |
| "loss": 1.105, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2397743300423131, |
| "grad_norm": 0.5392889976501465, |
| "learning_rate": 2.8314606741573034e-05, |
| "loss": 1.0453, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2538787023977433, |
| "grad_norm": 0.49128258228302, |
| "learning_rate": 3e-05, |
| "loss": 1.0606, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2679830747531735, |
| "grad_norm": 0.574536144733429, |
| "learning_rate": 2.9999348997381465e-05, |
| "loss": 1.0693, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2820874471086037, |
| "grad_norm": 0.5417886972427368, |
| "learning_rate": 2.999739604603311e-05, |
| "loss": 1.0346, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.29619181946403383, |
| "grad_norm": 0.6563856601715088, |
| "learning_rate": 2.9994141315471794e-05, |
| "loss": 0.9918, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.31029619181946405, |
| "grad_norm": 0.5459888577461243, |
| "learning_rate": 2.998958508820927e-05, |
| "loss": 1.0295, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3244005641748942, |
| "grad_norm": 0.5069293975830078, |
| "learning_rate": 2.998372775972765e-05, |
| "loss": 1.0381, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3385049365303244, |
| "grad_norm": 0.5660542249679565, |
| "learning_rate": 2.9976569838445096e-05, |
| "loss": 0.9919, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3526093088857546, |
| "grad_norm": 0.5492711663246155, |
| "learning_rate": 2.9968111945671674e-05, |
| "loss": 1.0029, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.36671368124118475, |
| "grad_norm": 0.6453714370727539, |
| "learning_rate": 2.9958354815555426e-05, |
| "loss": 0.9913, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.38081805359661497, |
| "grad_norm": 0.534812867641449, |
| "learning_rate": 2.9947299295018656e-05, |
| "loss": 0.9343, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.39492242595204513, |
| "grad_norm": 0.5858612060546875, |
| "learning_rate": 2.9934946343684404e-05, |
| "loss": 0.9805, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4090267983074753, |
| "grad_norm": 0.5869054198265076, |
| "learning_rate": 2.9921297033793158e-05, |
| "loss": 1.0044, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4231311706629055, |
| "grad_norm": 0.7152486443519592, |
| "learning_rate": 2.9906352550109787e-05, |
| "loss": 0.9804, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.43723554301833567, |
| "grad_norm": 0.6668853163719177, |
| "learning_rate": 2.989011418982069e-05, |
| "loss": 0.8677, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4513399153737659, |
| "grad_norm": 0.7731665968894958, |
| "learning_rate": 2.9872583362421203e-05, |
| "loss": 0.9194, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.46544428772919605, |
| "grad_norm": 0.7152626514434814, |
| "learning_rate": 2.985376158959328e-05, |
| "loss": 0.8777, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4795486600846262, |
| "grad_norm": 0.6192904114723206, |
| "learning_rate": 2.983365050507336e-05, |
| "loss": 0.9021, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4936530324400564, |
| "grad_norm": 0.6604310274124146, |
| "learning_rate": 2.9812251854510603e-05, |
| "loss": 0.9059, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5077574047954866, |
| "grad_norm": 0.7105734348297119, |
| "learning_rate": 2.9789567495315357e-05, |
| "loss": 0.9273, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5218617771509168, |
| "grad_norm": 0.7766591906547546, |
| "learning_rate": 2.976559939649791e-05, |
| "loss": 0.8934, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.535966149506347, |
| "grad_norm": 0.9151214361190796, |
| "learning_rate": 2.9740349638497614e-05, |
| "loss": 0.9166, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5500705218617772, |
| "grad_norm": 0.8262761831283569, |
| "learning_rate": 2.971382041300228e-05, |
| "loss": 0.8532, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5641748942172073, |
| "grad_norm": 0.6876681447029114, |
| "learning_rate": 2.9686014022757937e-05, |
| "loss": 0.8347, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5782792665726375, |
| "grad_norm": 0.7912429571151733, |
| "learning_rate": 2.965693288136897e-05, |
| "loss": 0.8992, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5923836389280677, |
| "grad_norm": 0.7762657403945923, |
| "learning_rate": 2.9626579513088606e-05, |
| "loss": 0.85, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6064880112834978, |
| "grad_norm": 0.8784447312355042, |
| "learning_rate": 2.959495655259981e-05, |
| "loss": 0.7934, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6205923836389281, |
| "grad_norm": 0.8992597460746765, |
| "learning_rate": 2.9562066744786587e-05, |
| "loss": 0.8254, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6346967559943583, |
| "grad_norm": 0.7901438474655151, |
| "learning_rate": 2.9527912944495748e-05, |
| "loss": 0.8078, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6488011283497884, |
| "grad_norm": 0.8354968428611755, |
| "learning_rate": 2.9492498116289072e-05, |
| "loss": 0.856, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6629055007052186, |
| "grad_norm": 1.1308631896972656, |
| "learning_rate": 2.9455825334186023e-05, |
| "loss": 0.7691, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6770098730606487, |
| "grad_norm": 0.8235520124435425, |
| "learning_rate": 2.9417897781396883e-05, |
| "loss": 0.7801, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.691114245416079, |
| "grad_norm": 0.7662566900253296, |
| "learning_rate": 2.937871875004648e-05, |
| "loss": 0.7438, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7052186177715092, |
| "grad_norm": 0.8254992961883545, |
| "learning_rate": 2.9338291640888413e-05, |
| "loss": 0.8526, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7193229901269393, |
| "grad_norm": 0.783316433429718, |
| "learning_rate": 2.9296619963009866e-05, |
| "loss": 0.782, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7334273624823695, |
| "grad_norm": 0.910808801651001, |
| "learning_rate": 2.925370733352704e-05, |
| "loss": 0.7768, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7475317348377997, |
| "grad_norm": 0.8336752653121948, |
| "learning_rate": 2.920955747727115e-05, |
| "loss": 0.7485, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7616361071932299, |
| "grad_norm": 0.9764995574951172, |
| "learning_rate": 2.9164174226465134e-05, |
| "loss": 0.7457, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7757404795486601, |
| "grad_norm": 0.9447649717330933, |
| "learning_rate": 2.9117561520391002e-05, |
| "loss": 0.7196, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7898448519040903, |
| "grad_norm": 0.8971619606018066, |
| "learning_rate": 2.9069723405047923e-05, |
| "loss": 0.7608, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8039492242595204, |
| "grad_norm": 0.9162759184837341, |
| "learning_rate": 2.902066403280101e-05, |
| "loss": 0.7038, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8180535966149506, |
| "grad_norm": 1.049428105354309, |
| "learning_rate": 2.8970387662020898e-05, |
| "loss": 0.705, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8321579689703809, |
| "grad_norm": 0.8928109407424927, |
| "learning_rate": 2.8918898656714127e-05, |
| "loss": 0.7331, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.846262341325811, |
| "grad_norm": 0.928637683391571, |
| "learning_rate": 2.8866201486144336e-05, |
| "loss": 0.7034, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8603667136812412, |
| "grad_norm": 0.9384335279464722, |
| "learning_rate": 2.881230072444432e-05, |
| "loss": 0.735, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8744710860366713, |
| "grad_norm": 0.879400908946991, |
| "learning_rate": 2.8757201050219027e-05, |
| "loss": 0.7082, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8885754583921015, |
| "grad_norm": 0.9233940839767456, |
| "learning_rate": 2.8700907246139413e-05, |
| "loss": 0.6922, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.9026798307475318, |
| "grad_norm": 1.0604772567749023, |
| "learning_rate": 2.8643424198527314e-05, |
| "loss": 0.7058, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9167842031029619, |
| "grad_norm": 0.9149603843688965, |
| "learning_rate": 2.858475689693135e-05, |
| "loss": 0.6531, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9308885754583921, |
| "grad_norm": 0.9202451109886169, |
| "learning_rate": 2.852491043369377e-05, |
| "loss": 0.6501, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9449929478138223, |
| "grad_norm": 1.123238205909729, |
| "learning_rate": 2.8463890003508488e-05, |
| "loss": 0.6438, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9590973201692524, |
| "grad_norm": 0.9209488034248352, |
| "learning_rate": 2.840170090297014e-05, |
| "loss": 0.6568, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9732016925246827, |
| "grad_norm": 1.0271188020706177, |
| "learning_rate": 2.833834853011437e-05, |
| "loss": 0.6731, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9873060648801129, |
| "grad_norm": 1.0789600610733032, |
| "learning_rate": 2.827383838394926e-05, |
| "loss": 0.6798, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.23028564453125, |
| "learning_rate": 2.8208176063978018e-05, |
| "loss": 0.6558, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.0141043723554302, |
| "grad_norm": 0.9986002445220947, |
| "learning_rate": 2.814136726971294e-05, |
| "loss": 0.5642, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0282087447108603, |
| "grad_norm": 1.0259571075439453, |
| "learning_rate": 2.8073417800180707e-05, |
| "loss": 0.564, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.0423131170662905, |
| "grad_norm": 0.9642548561096191, |
| "learning_rate": 2.800433355341898e-05, |
| "loss": 0.5423, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0564174894217206, |
| "grad_norm": 0.9903756380081177, |
| "learning_rate": 2.793412052596451e-05, |
| "loss": 0.5246, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0705218617771508, |
| "grad_norm": 0.9968107342720032, |
| "learning_rate": 2.7862784812332592e-05, |
| "loss": 0.5371, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0846262341325812, |
| "grad_norm": 1.2498723268508911, |
| "learning_rate": 2.779033260448807e-05, |
| "loss": 0.5538, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.0987306064880114, |
| "grad_norm": 1.061000943183899, |
| "learning_rate": 2.7716770191307887e-05, |
| "loss": 0.5467, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.1128349788434415, |
| "grad_norm": 0.9731029272079468, |
| "learning_rate": 2.7642103958035188e-05, |
| "loss": 0.4912, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.1269393511988717, |
| "grad_norm": 0.9743561744689941, |
| "learning_rate": 2.756634038572509e-05, |
| "loss": 0.5527, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1410437235543018, |
| "grad_norm": 1.0373893976211548, |
| "learning_rate": 2.748948605068212e-05, |
| "loss": 0.5109, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.155148095909732, |
| "grad_norm": 1.0031832456588745, |
| "learning_rate": 2.7411547623889397e-05, |
| "loss": 0.5417, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.1692524682651622, |
| "grad_norm": 1.2156072854995728, |
| "learning_rate": 2.7332531870429574e-05, |
| "loss": 0.4838, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.1833568406205923, |
| "grad_norm": 0.9628976583480835, |
| "learning_rate": 2.7252445648897643e-05, |
| "loss": 0.4965, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1974612129760225, |
| "grad_norm": 1.100713849067688, |
| "learning_rate": 2.7171295910805585e-05, |
| "loss": 0.4919, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.2115655853314529, |
| "grad_norm": 1.0750036239624023, |
| "learning_rate": 2.708908969997901e-05, |
| "loss": 0.5031, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.225669957686883, |
| "grad_norm": 1.0613516569137573, |
| "learning_rate": 2.7005834151945708e-05, |
| "loss": 0.5087, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.2397743300423132, |
| "grad_norm": 1.2760698795318604, |
| "learning_rate": 2.6921536493316327e-05, |
| "loss": 0.5021, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2538787023977433, |
| "grad_norm": 0.9473477005958557, |
| "learning_rate": 2.683620404115706e-05, |
| "loss": 0.5002, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.2679830747531735, |
| "grad_norm": 1.1720986366271973, |
| "learning_rate": 2.674984420235455e-05, |
| "loss": 0.5148, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.2820874471086037, |
| "grad_norm": 1.0552853345870972, |
| "learning_rate": 2.6662464472972958e-05, |
| "loss": 0.5056, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.2961918194640338, |
| "grad_norm": 1.0545684099197388, |
| "learning_rate": 2.65740724376033e-05, |
| "loss": 0.4976, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.310296191819464, |
| "grad_norm": 0.8876795172691345, |
| "learning_rate": 2.6484675768705102e-05, |
| "loss": 0.4761, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.3244005641748942, |
| "grad_norm": 0.9161580204963684, |
| "learning_rate": 2.6394282225940445e-05, |
| "loss": 0.4589, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.3385049365303243, |
| "grad_norm": 1.0101513862609863, |
| "learning_rate": 2.63028996555004e-05, |
| "loss": 0.4852, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.3526093088857545, |
| "grad_norm": 1.0795925855636597, |
| "learning_rate": 2.6210535989423978e-05, |
| "loss": 0.4633, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.3667136812411846, |
| "grad_norm": 1.032175064086914, |
| "learning_rate": 2.6117199244909655e-05, |
| "loss": 0.4791, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.380818053596615, |
| "grad_norm": 0.9439494013786316, |
| "learning_rate": 2.6022897523619423e-05, |
| "loss": 0.4717, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.3949224259520452, |
| "grad_norm": 1.0860165357589722, |
| "learning_rate": 2.592763901097564e-05, |
| "loss": 0.4829, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.4090267983074753, |
| "grad_norm": 0.9606081247329712, |
| "learning_rate": 2.583143197545044e-05, |
| "loss": 0.4861, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4231311706629055, |
| "grad_norm": 1.1352707147598267, |
| "learning_rate": 2.5734284767848108e-05, |
| "loss": 0.4769, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.4372355430183357, |
| "grad_norm": 1.0786997079849243, |
| "learning_rate": 2.5636205820580173e-05, |
| "loss": 0.4504, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.4513399153737658, |
| "grad_norm": 1.1144368648529053, |
| "learning_rate": 2.553720364693351e-05, |
| "loss": 0.4326, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.465444287729196, |
| "grad_norm": 1.0776797533035278, |
| "learning_rate": 2.543728684033135e-05, |
| "loss": 0.4121, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.4795486600846262, |
| "grad_norm": 1.1324986219406128, |
| "learning_rate": 2.5336464073587395e-05, |
| "loss": 0.3999, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.4936530324400565, |
| "grad_norm": 0.951062023639679, |
| "learning_rate": 2.5234744098153e-05, |
| "loss": 0.4736, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.5077574047954867, |
| "grad_norm": 1.1760550737380981, |
| "learning_rate": 2.5132135743357546e-05, |
| "loss": 0.4411, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.5218617771509169, |
| "grad_norm": 1.1214802265167236, |
| "learning_rate": 2.502864791564205e-05, |
| "loss": 0.43, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.535966149506347, |
| "grad_norm": 1.0796761512756348, |
| "learning_rate": 2.492428959778609e-05, |
| "loss": 0.455, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.5500705218617772, |
| "grad_norm": 1.148000717163086, |
| "learning_rate": 2.48190698481281e-05, |
| "loss": 0.4235, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5641748942172073, |
| "grad_norm": 1.0369551181793213, |
| "learning_rate": 2.4712997799779077e-05, |
| "loss": 0.4066, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.5782792665726375, |
| "grad_norm": 1.153380036354065, |
| "learning_rate": 2.4606082659829852e-05, |
| "loss": 0.4079, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.5923836389280677, |
| "grad_norm": 1.2209539413452148, |
| "learning_rate": 2.4498333708551906e-05, |
| "loss": 0.4335, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.6064880112834978, |
| "grad_norm": 1.097364068031311, |
| "learning_rate": 2.4389760298591825e-05, |
| "loss": 0.4187, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.620592383638928, |
| "grad_norm": 1.0410133600234985, |
| "learning_rate": 2.4280371854159502e-05, |
| "loss": 0.4305, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.6346967559943582, |
| "grad_norm": 1.2560479640960693, |
| "learning_rate": 2.417017787021011e-05, |
| "loss": 0.3681, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.6488011283497883, |
| "grad_norm": 1.1253535747528076, |
| "learning_rate": 2.405918791161992e-05, |
| "loss": 0.416, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.6629055007052185, |
| "grad_norm": 1.1615822315216064, |
| "learning_rate": 2.3947411612356093e-05, |
| "loss": 0.4539, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.6770098730606486, |
| "grad_norm": 1.0008732080459595, |
| "learning_rate": 2.3834858674640434e-05, |
| "loss": 0.4258, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.691114245416079, |
| "grad_norm": 0.9982715845108032, |
| "learning_rate": 2.3721538868107226e-05, |
| "loss": 0.4154, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7052186177715092, |
| "grad_norm": 1.1458238363265991, |
| "learning_rate": 2.3607462028955245e-05, |
| "loss": 0.3803, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.7193229901269393, |
| "grad_norm": 1.0373518466949463, |
| "learning_rate": 2.3492638059093958e-05, |
| "loss": 0.3826, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.7334273624823695, |
| "grad_norm": 1.3240524530410767, |
| "learning_rate": 2.3377076925284037e-05, |
| "loss": 0.4086, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.7475317348377997, |
| "grad_norm": 1.0962055921554565, |
| "learning_rate": 2.3260788658272244e-05, |
| "loss": 0.3553, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.76163610719323, |
| "grad_norm": 1.075670838356018, |
| "learning_rate": 2.3143783351920753e-05, |
| "loss": 0.3749, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.7757404795486602, |
| "grad_norm": 0.9956583976745605, |
| "learning_rate": 2.3026071162331012e-05, |
| "loss": 0.376, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.7898448519040904, |
| "grad_norm": 1.0385911464691162, |
| "learning_rate": 2.2907662306962176e-05, |
| "loss": 0.3529, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.8039492242595205, |
| "grad_norm": 1.147087574005127, |
| "learning_rate": 2.278856706374422e-05, |
| "loss": 0.3979, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.8180535966149507, |
| "grad_norm": 1.094008445739746, |
| "learning_rate": 2.266879577018585e-05, |
| "loss": 0.3979, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.8321579689703809, |
| "grad_norm": 1.116995930671692, |
| "learning_rate": 2.2548358822477158e-05, |
| "loss": 0.3869, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.846262341325811, |
| "grad_norm": 0.991368293762207, |
| "learning_rate": 2.242726667458726e-05, |
| "loss": 0.343, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.8603667136812412, |
| "grad_norm": 1.0833584070205688, |
| "learning_rate": 2.2305529837356857e-05, |
| "loss": 0.3563, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.8744710860366713, |
| "grad_norm": 1.0229146480560303, |
| "learning_rate": 2.2183158877585937e-05, |
| "loss": 0.3327, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.8885754583921015, |
| "grad_norm": 0.9877346754074097, |
| "learning_rate": 2.206016441711652e-05, |
| "loss": 0.3832, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.9026798307475317, |
| "grad_norm": 1.1531109809875488, |
| "learning_rate": 2.1936557131910733e-05, |
| "loss": 0.3904, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.9167842031029618, |
| "grad_norm": 1.0331673622131348, |
| "learning_rate": 2.1812347751124074e-05, |
| "loss": 0.331, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.930888575458392, |
| "grad_norm": 1.2358472347259521, |
| "learning_rate": 2.1687547056174172e-05, |
| "loss": 0.3152, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.9449929478138221, |
| "grad_norm": 1.2115956544876099, |
| "learning_rate": 2.156216587980491e-05, |
| "loss": 0.3641, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.9590973201692523, |
| "grad_norm": 1.191973328590393, |
| "learning_rate": 2.1436215105146178e-05, |
| "loss": 0.3296, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.9732016925246827, |
| "grad_norm": 1.093503475189209, |
| "learning_rate": 2.1309705664769198e-05, |
| "loss": 0.3374, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.9873060648801129, |
| "grad_norm": 1.0156272649765015, |
| "learning_rate": 2.1182648539737547e-05, |
| "loss": 0.3241, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.5677207708358765, |
| "learning_rate": 2.1055054758654053e-05, |
| "loss": 0.3256, |
| "step": 710 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1775, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1044507413821522e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|