| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9989187240944313, | |
| "eval_steps": 500, | |
| "global_step": 5547, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005406379527842855, | |
| "grad_norm": 5.67321238470604, | |
| "learning_rate": 1.801801801801802e-07, | |
| "loss": 0.8785, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01081275905568571, | |
| "grad_norm": 5.2575759647356906, | |
| "learning_rate": 3.603603603603604e-07, | |
| "loss": 0.8654, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.016219138583528563, | |
| "grad_norm": 3.8360253130807958, | |
| "learning_rate": 5.405405405405406e-07, | |
| "loss": 0.8205, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02162551811137142, | |
| "grad_norm": 1.722668988638544, | |
| "learning_rate": 7.207207207207208e-07, | |
| "loss": 0.778, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.027031897639214274, | |
| "grad_norm": 1.3197714991034968, | |
| "learning_rate": 9.00900900900901e-07, | |
| "loss": 0.7286, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03243827716705713, | |
| "grad_norm": 0.8474482237034886, | |
| "learning_rate": 1.0810810810810812e-06, | |
| "loss": 0.6968, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03784465669489998, | |
| "grad_norm": 0.5645420283585227, | |
| "learning_rate": 1.2612612612612613e-06, | |
| "loss": 0.6689, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04325103622274284, | |
| "grad_norm": 0.43605656948964683, | |
| "learning_rate": 1.4414414414414416e-06, | |
| "loss": 0.6408, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04865741575058569, | |
| "grad_norm": 0.4339497028480959, | |
| "learning_rate": 1.6216216216216219e-06, | |
| "loss": 0.6153, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05406379527842855, | |
| "grad_norm": 0.3843592033040236, | |
| "learning_rate": 1.801801801801802e-06, | |
| "loss": 0.6082, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0594701748062714, | |
| "grad_norm": 0.37685068673558353, | |
| "learning_rate": 1.9819819819819822e-06, | |
| "loss": 0.6049, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06487655433411425, | |
| "grad_norm": 0.4392453448959536, | |
| "learning_rate": 2.1621621621621623e-06, | |
| "loss": 0.5889, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07028293386195711, | |
| "grad_norm": 0.4212233804351266, | |
| "learning_rate": 2.3423423423423424e-06, | |
| "loss": 0.5842, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07568931338979996, | |
| "grad_norm": 0.38709432000579613, | |
| "learning_rate": 2.5225225225225225e-06, | |
| "loss": 0.592, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08109569291764282, | |
| "grad_norm": 0.3988233764060424, | |
| "learning_rate": 2.702702702702703e-06, | |
| "loss": 0.5732, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08650207244548568, | |
| "grad_norm": 0.41395637177292804, | |
| "learning_rate": 2.882882882882883e-06, | |
| "loss": 0.5679, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09190845197332853, | |
| "grad_norm": 0.37677030114794524, | |
| "learning_rate": 3.063063063063063e-06, | |
| "loss": 0.5583, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09731483150117139, | |
| "grad_norm": 0.38451911721974225, | |
| "learning_rate": 3.2432432432432437e-06, | |
| "loss": 0.5658, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10272121102901424, | |
| "grad_norm": 0.36190379869625294, | |
| "learning_rate": 3.423423423423424e-06, | |
| "loss": 0.5554, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1081275905568571, | |
| "grad_norm": 0.3927866832932917, | |
| "learning_rate": 3.603603603603604e-06, | |
| "loss": 0.5534, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11353397008469994, | |
| "grad_norm": 0.4109637951464883, | |
| "learning_rate": 3.7837837837837844e-06, | |
| "loss": 0.5527, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1189403496125428, | |
| "grad_norm": 0.4189875109517182, | |
| "learning_rate": 3.9639639639639645e-06, | |
| "loss": 0.5521, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12434672914038565, | |
| "grad_norm": 0.44103289873218365, | |
| "learning_rate": 4.1441441441441446e-06, | |
| "loss": 0.55, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1297531086682285, | |
| "grad_norm": 0.47624121719255225, | |
| "learning_rate": 4.324324324324325e-06, | |
| "loss": 0.5455, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13515948819607138, | |
| "grad_norm": 0.4127382950104387, | |
| "learning_rate": 4.504504504504505e-06, | |
| "loss": 0.5392, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14056586772391422, | |
| "grad_norm": 0.42849081039324655, | |
| "learning_rate": 4.684684684684685e-06, | |
| "loss": 0.5317, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1459722472517571, | |
| "grad_norm": 0.4104060308344588, | |
| "learning_rate": 4.864864864864866e-06, | |
| "loss": 0.5317, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15137862677959993, | |
| "grad_norm": 0.5046982359974199, | |
| "learning_rate": 5.045045045045045e-06, | |
| "loss": 0.5342, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15678500630744277, | |
| "grad_norm": 0.4507880118410215, | |
| "learning_rate": 5.225225225225226e-06, | |
| "loss": 0.5325, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.16219138583528564, | |
| "grad_norm": 0.42877102726223915, | |
| "learning_rate": 5.405405405405406e-06, | |
| "loss": 0.5236, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16759776536312848, | |
| "grad_norm": 0.5283894117116334, | |
| "learning_rate": 5.585585585585585e-06, | |
| "loss": 0.5316, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.17300414489097135, | |
| "grad_norm": 0.45448942603717846, | |
| "learning_rate": 5.765765765765766e-06, | |
| "loss": 0.5304, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1784105244188142, | |
| "grad_norm": 0.4459611601163911, | |
| "learning_rate": 5.945945945945947e-06, | |
| "loss": 0.5307, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.18381690394665706, | |
| "grad_norm": 0.4167802385045301, | |
| "learning_rate": 6.126126126126126e-06, | |
| "loss": 0.5142, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1892232834744999, | |
| "grad_norm": 0.45167071134408077, | |
| "learning_rate": 6.3063063063063065e-06, | |
| "loss": 0.5252, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.19462966300234277, | |
| "grad_norm": 0.3815004250489287, | |
| "learning_rate": 6.486486486486487e-06, | |
| "loss": 0.5203, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.20003604253018561, | |
| "grad_norm": 0.4189611440474181, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.5198, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.20544242205802848, | |
| "grad_norm": 0.4356383135556994, | |
| "learning_rate": 6.846846846846848e-06, | |
| "loss": 0.5164, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21084880158587133, | |
| "grad_norm": 0.4146665581812368, | |
| "learning_rate": 7.027027027027028e-06, | |
| "loss": 0.5201, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2162551811137142, | |
| "grad_norm": 0.46098403607909094, | |
| "learning_rate": 7.207207207207208e-06, | |
| "loss": 0.5241, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.22166156064155704, | |
| "grad_norm": 0.4173832279688485, | |
| "learning_rate": 7.387387387387388e-06, | |
| "loss": 0.5141, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.22706794016939988, | |
| "grad_norm": 0.45342411753034784, | |
| "learning_rate": 7.567567567567569e-06, | |
| "loss": 0.5058, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.23247431969724275, | |
| "grad_norm": 0.5556218847582134, | |
| "learning_rate": 7.747747747747749e-06, | |
| "loss": 0.5132, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2378806992250856, | |
| "grad_norm": 0.4159604294450067, | |
| "learning_rate": 7.927927927927929e-06, | |
| "loss": 0.5116, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.24328707875292846, | |
| "grad_norm": 0.5011827344554423, | |
| "learning_rate": 8.108108108108109e-06, | |
| "loss": 0.5168, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2486934582807713, | |
| "grad_norm": 0.4837033851909487, | |
| "learning_rate": 8.288288288288289e-06, | |
| "loss": 0.5078, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.25409983780861417, | |
| "grad_norm": 0.43704376571990733, | |
| "learning_rate": 8.46846846846847e-06, | |
| "loss": 0.5033, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.259506217336457, | |
| "grad_norm": 0.3998543920237395, | |
| "learning_rate": 8.64864864864865e-06, | |
| "loss": 0.5023, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.26491259686429985, | |
| "grad_norm": 0.5026204387708488, | |
| "learning_rate": 8.82882882882883e-06, | |
| "loss": 0.5101, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.27031897639214275, | |
| "grad_norm": 0.5354755864920291, | |
| "learning_rate": 9.00900900900901e-06, | |
| "loss": 0.508, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2757253559199856, | |
| "grad_norm": 0.4703091181508223, | |
| "learning_rate": 9.189189189189191e-06, | |
| "loss": 0.5057, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.28113173544782843, | |
| "grad_norm": 0.5066877793509437, | |
| "learning_rate": 9.36936936936937e-06, | |
| "loss": 0.5026, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2865381149756713, | |
| "grad_norm": 0.46090960041448786, | |
| "learning_rate": 9.54954954954955e-06, | |
| "loss": 0.5106, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2919444945035142, | |
| "grad_norm": 0.48562395925030005, | |
| "learning_rate": 9.729729729729732e-06, | |
| "loss": 0.4974, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.297350874031357, | |
| "grad_norm": 0.4646077201771921, | |
| "learning_rate": 9.90990990990991e-06, | |
| "loss": 0.4999, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.30275725355919986, | |
| "grad_norm": 0.4546070354869126, | |
| "learning_rate": 9.999975246862685e-06, | |
| "loss": 0.5103, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3081636330870427, | |
| "grad_norm": 0.4529892857679444, | |
| "learning_rate": 9.999777223234682e-06, | |
| "loss": 0.5015, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.31357001261488554, | |
| "grad_norm": 0.42533238661448763, | |
| "learning_rate": 9.999381183821387e-06, | |
| "loss": 0.5079, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.31897639214272844, | |
| "grad_norm": 0.4319966793689572, | |
| "learning_rate": 9.998787144307906e-06, | |
| "loss": 0.4946, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3243827716705713, | |
| "grad_norm": 0.5664739889982127, | |
| "learning_rate": 9.997995128221131e-06, | |
| "loss": 0.4963, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3297891511984141, | |
| "grad_norm": 0.4571640893613164, | |
| "learning_rate": 9.9970051669288e-06, | |
| "loss": 0.4937, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.33519553072625696, | |
| "grad_norm": 0.46148944851299945, | |
| "learning_rate": 9.995817299638244e-06, | |
| "loss": 0.5002, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.34060191025409986, | |
| "grad_norm": 0.4844168889608816, | |
| "learning_rate": 9.994431573394861e-06, | |
| "loss": 0.5029, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3460082897819427, | |
| "grad_norm": 0.4279693386473206, | |
| "learning_rate": 9.99284804308023e-06, | |
| "loss": 0.4952, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.35141466930978554, | |
| "grad_norm": 0.5233101609153901, | |
| "learning_rate": 9.991066771409941e-06, | |
| "loss": 0.4915, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3568210488376284, | |
| "grad_norm": 0.4633208414221673, | |
| "learning_rate": 9.989087828931121e-06, | |
| "loss": 0.4981, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3622274283654713, | |
| "grad_norm": 0.450997223108701, | |
| "learning_rate": 9.986911294019631e-06, | |
| "loss": 0.4975, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3676338078933141, | |
| "grad_norm": 0.42452529740346523, | |
| "learning_rate": 9.984537252876969e-06, | |
| "loss": 0.4908, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.37304018742115697, | |
| "grad_norm": 0.46365207035760786, | |
| "learning_rate": 9.981965799526846e-06, | |
| "loss": 0.5016, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3784465669489998, | |
| "grad_norm": 0.5296232726547591, | |
| "learning_rate": 9.97919703581147e-06, | |
| "loss": 0.4876, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.38385294647684265, | |
| "grad_norm": 0.401880074927354, | |
| "learning_rate": 9.976231071387513e-06, | |
| "loss": 0.4903, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.38925932600468555, | |
| "grad_norm": 0.42396559048043103, | |
| "learning_rate": 9.973068023721761e-06, | |
| "loss": 0.4898, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.3946657055325284, | |
| "grad_norm": 0.46944427807049693, | |
| "learning_rate": 9.969708018086472e-06, | |
| "loss": 0.4881, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.40007208506037123, | |
| "grad_norm": 0.4333253518146232, | |
| "learning_rate": 9.966151187554403e-06, | |
| "loss": 0.4895, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.40547846458821407, | |
| "grad_norm": 0.37661719489991125, | |
| "learning_rate": 9.962397672993552e-06, | |
| "loss": 0.487, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.41088484411605697, | |
| "grad_norm": 0.4603392631171023, | |
| "learning_rate": 9.958447623061564e-06, | |
| "loss": 0.4872, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4162912236438998, | |
| "grad_norm": 0.3927558003883759, | |
| "learning_rate": 9.954301194199864e-06, | |
| "loss": 0.4903, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.42169760317174265, | |
| "grad_norm": 0.42897879593990096, | |
| "learning_rate": 9.949958550627436e-06, | |
| "loss": 0.4885, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4271039826995855, | |
| "grad_norm": 0.4924374446694773, | |
| "learning_rate": 9.945419864334344e-06, | |
| "loss": 0.4774, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4325103622274284, | |
| "grad_norm": 0.42518945879483444, | |
| "learning_rate": 9.940685315074898e-06, | |
| "loss": 0.4754, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.43791674175527123, | |
| "grad_norm": 0.399260485682431, | |
| "learning_rate": 9.935755090360554e-06, | |
| "loss": 0.4765, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4433231212831141, | |
| "grad_norm": 0.37083672732602235, | |
| "learning_rate": 9.930629385452475e-06, | |
| "loss": 0.4757, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.4487295008109569, | |
| "grad_norm": 0.41759222116367195, | |
| "learning_rate": 9.925308403353801e-06, | |
| "loss": 0.4871, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.45413588033879976, | |
| "grad_norm": 0.4969932090759188, | |
| "learning_rate": 9.919792354801614e-06, | |
| "loss": 0.4792, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.45954225986664266, | |
| "grad_norm": 0.5029960802938596, | |
| "learning_rate": 9.914081458258582e-06, | |
| "loss": 0.4896, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4649486393944855, | |
| "grad_norm": 0.40244747307174517, | |
| "learning_rate": 9.908175939904317e-06, | |
| "loss": 0.492, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.47035501892232834, | |
| "grad_norm": 0.4109529990790928, | |
| "learning_rate": 9.902076033626409e-06, | |
| "loss": 0.4863, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.4757613984501712, | |
| "grad_norm": 0.4151789891424962, | |
| "learning_rate": 9.89578198101117e-06, | |
| "loss": 0.48, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4811677779780141, | |
| "grad_norm": 0.4884869421566706, | |
| "learning_rate": 9.88929403133406e-06, | |
| "loss": 0.4875, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4865741575058569, | |
| "grad_norm": 0.39469839728031286, | |
| "learning_rate": 9.882612441549817e-06, | |
| "loss": 0.4886, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.49198053703369976, | |
| "grad_norm": 0.41142281651530643, | |
| "learning_rate": 9.875737476282283e-06, | |
| "loss": 0.4837, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4973869165615426, | |
| "grad_norm": 0.4420691443729092, | |
| "learning_rate": 9.868669407813919e-06, | |
| "loss": 0.4877, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5027932960893855, | |
| "grad_norm": 0.37836126000922937, | |
| "learning_rate": 9.86140851607502e-06, | |
| "loss": 0.4826, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5081996756172283, | |
| "grad_norm": 0.42066137745562854, | |
| "learning_rate": 9.85395508863264e-06, | |
| "loss": 0.4827, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5136060551450712, | |
| "grad_norm": 0.45522508321704436, | |
| "learning_rate": 9.846309420679181e-06, | |
| "loss": 0.4807, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.519012434672914, | |
| "grad_norm": 0.424109403832704, | |
| "learning_rate": 9.838471815020731e-06, | |
| "loss": 0.483, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5244188142007569, | |
| "grad_norm": 0.4571075574503357, | |
| "learning_rate": 9.830442582065046e-06, | |
| "loss": 0.4847, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5298251937285997, | |
| "grad_norm": 0.39544147521974715, | |
| "learning_rate": 9.822222039809265e-06, | |
| "loss": 0.4894, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5352315732564425, | |
| "grad_norm": 0.41512982878770877, | |
| "learning_rate": 9.813810513827324e-06, | |
| "loss": 0.4757, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5406379527842855, | |
| "grad_norm": 0.44241530882704766, | |
| "learning_rate": 9.805208337257048e-06, | |
| "loss": 0.4844, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5460443323121283, | |
| "grad_norm": 0.39829234416158904, | |
| "learning_rate": 9.79641585078697e-06, | |
| "loss": 0.4712, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5514507118399712, | |
| "grad_norm": 0.37741532471866907, | |
| "learning_rate": 9.787433402642823e-06, | |
| "loss": 0.4793, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.556857091367814, | |
| "grad_norm": 0.4148300916885638, | |
| "learning_rate": 9.778261348573766e-06, | |
| "loss": 0.4838, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5622634708956569, | |
| "grad_norm": 0.4432803310345476, | |
| "learning_rate": 9.76890005183828e-06, | |
| "loss": 0.4808, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5676698504234997, | |
| "grad_norm": 0.44053440283249773, | |
| "learning_rate": 9.759349883189788e-06, | |
| "loss": 0.4855, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5730762299513426, | |
| "grad_norm": 0.47129417304470445, | |
| "learning_rate": 9.749611220861975e-06, | |
| "loss": 0.4825, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5784826094791854, | |
| "grad_norm": 0.3519052622952217, | |
| "learning_rate": 9.739684450553796e-06, | |
| "loss": 0.4672, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5838889890070283, | |
| "grad_norm": 0.41946435282373756, | |
| "learning_rate": 9.729569965414214e-06, | |
| "loss": 0.4749, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5892953685348712, | |
| "grad_norm": 0.40367405116733107, | |
| "learning_rate": 9.719268166026619e-06, | |
| "loss": 0.4714, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.594701748062714, | |
| "grad_norm": 0.389163994716956, | |
| "learning_rate": 9.70877946039297e-06, | |
| "loss": 0.4762, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6001081275905569, | |
| "grad_norm": 0.3924144038563765, | |
| "learning_rate": 9.698104263917632e-06, | |
| "loss": 0.479, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6055145071183997, | |
| "grad_norm": 0.38077440580004723, | |
| "learning_rate": 9.687242999390923e-06, | |
| "loss": 0.4743, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6109208866462426, | |
| "grad_norm": 0.4144915670436874, | |
| "learning_rate": 9.676196096972375e-06, | |
| "loss": 0.4831, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6163272661740854, | |
| "grad_norm": 0.4019523099418982, | |
| "learning_rate": 9.664963994173695e-06, | |
| "loss": 0.4811, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6217336457019282, | |
| "grad_norm": 0.3870772083799463, | |
| "learning_rate": 9.653547135841432e-06, | |
| "loss": 0.482, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6271400252297711, | |
| "grad_norm": 0.3774486403943126, | |
| "learning_rate": 9.641945974139368e-06, | |
| "loss": 0.4808, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.632546404757614, | |
| "grad_norm": 0.3669418201630717, | |
| "learning_rate": 9.630160968530601e-06, | |
| "loss": 0.4742, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6379527842854569, | |
| "grad_norm": 0.3767330377559856, | |
| "learning_rate": 9.618192585759358e-06, | |
| "loss": 0.4793, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6433591638132997, | |
| "grad_norm": 0.4109728050110914, | |
| "learning_rate": 9.606041299832499e-06, | |
| "loss": 0.476, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6487655433411426, | |
| "grad_norm": 0.42214280261521075, | |
| "learning_rate": 9.593707592000751e-06, | |
| "loss": 0.4719, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6541719228689854, | |
| "grad_norm": 0.40015675805718526, | |
| "learning_rate": 9.581191950739651e-06, | |
| "loss": 0.4802, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6595783023968282, | |
| "grad_norm": 0.3652325798758447, | |
| "learning_rate": 9.568494871730184e-06, | |
| "loss": 0.4751, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6649846819246711, | |
| "grad_norm": 0.4758040665812572, | |
| "learning_rate": 9.555616857839171e-06, | |
| "loss": 0.476, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.6703910614525139, | |
| "grad_norm": 0.4088256926011169, | |
| "learning_rate": 9.542558419099348e-06, | |
| "loss": 0.4671, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6757974409803568, | |
| "grad_norm": 0.3777516778350075, | |
| "learning_rate": 9.529320072689157e-06, | |
| "loss": 0.4663, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6812038205081997, | |
| "grad_norm": 0.40279858714603456, | |
| "learning_rate": 9.515902342912268e-06, | |
| "loss": 0.4696, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6866102000360426, | |
| "grad_norm": 0.4553420901856075, | |
| "learning_rate": 9.50230576117682e-06, | |
| "loss": 0.4742, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6920165795638854, | |
| "grad_norm": 0.4339586123054069, | |
| "learning_rate": 9.488530865974365e-06, | |
| "loss": 0.4701, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6974229590917282, | |
| "grad_norm": 0.4249972919470697, | |
| "learning_rate": 9.47457820285855e-06, | |
| "loss": 0.4701, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.7028293386195711, | |
| "grad_norm": 0.5108244833979698, | |
| "learning_rate": 9.460448324423508e-06, | |
| "loss": 0.4767, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7082357181474139, | |
| "grad_norm": 0.41029950466124815, | |
| "learning_rate": 9.446141790281961e-06, | |
| "loss": 0.4757, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7136420976752568, | |
| "grad_norm": 0.395665406767247, | |
| "learning_rate": 9.431659167043079e-06, | |
| "loss": 0.4657, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.7190484772030996, | |
| "grad_norm": 0.3916187354896928, | |
| "learning_rate": 9.417001028290019e-06, | |
| "loss": 0.47, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.7244548567309426, | |
| "grad_norm": 0.3841663885450239, | |
| "learning_rate": 9.402167954557218e-06, | |
| "loss": 0.4622, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.7298612362587854, | |
| "grad_norm": 0.33000158409293234, | |
| "learning_rate": 9.387160533307398e-06, | |
| "loss": 0.4735, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7352676157866282, | |
| "grad_norm": 0.35110054752545317, | |
| "learning_rate": 9.371979358908302e-06, | |
| "loss": 0.4647, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7406739953144711, | |
| "grad_norm": 0.4060026085740451, | |
| "learning_rate": 9.356625032609157e-06, | |
| "loss": 0.4716, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7460803748423139, | |
| "grad_norm": 0.4014001214789219, | |
| "learning_rate": 9.341098162516848e-06, | |
| "loss": 0.4753, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7514867543701568, | |
| "grad_norm": 0.4466537387424745, | |
| "learning_rate": 9.325399363571853e-06, | |
| "loss": 0.4637, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7568931338979996, | |
| "grad_norm": 0.3789496760613153, | |
| "learning_rate": 9.309529257523873e-06, | |
| "loss": 0.4833, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7622995134258425, | |
| "grad_norm": 0.3871711262176569, | |
| "learning_rate": 9.293488472907213e-06, | |
| "loss": 0.4741, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7677058929536853, | |
| "grad_norm": 0.33522935773230744, | |
| "learning_rate": 9.277277645015895e-06, | |
| "loss": 0.4645, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.7731122724815283, | |
| "grad_norm": 0.36926574454217775, | |
| "learning_rate": 9.260897415878484e-06, | |
| "loss": 0.4737, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7785186520093711, | |
| "grad_norm": 0.38628683202935965, | |
| "learning_rate": 9.244348434232676e-06, | |
| "loss": 0.4807, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.7839250315372139, | |
| "grad_norm": 0.3723802508008121, | |
| "learning_rate": 9.227631355499588e-06, | |
| "loss": 0.4711, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7893314110650568, | |
| "grad_norm": 0.43275316141725356, | |
| "learning_rate": 9.210746841757816e-06, | |
| "loss": 0.4606, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.7947377905928996, | |
| "grad_norm": 0.36470233384616396, | |
| "learning_rate": 9.193695561717207e-06, | |
| "loss": 0.4789, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.8001441701207425, | |
| "grad_norm": 0.39548085338311784, | |
| "learning_rate": 9.176478190692369e-06, | |
| "loss": 0.4713, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.8055505496485853, | |
| "grad_norm": 0.3553750033222167, | |
| "learning_rate": 9.159095410575931e-06, | |
| "loss": 0.4725, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.8109569291764281, | |
| "grad_norm": 0.3637209745858356, | |
| "learning_rate": 9.14154790981154e-06, | |
| "loss": 0.4594, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.816363308704271, | |
| "grad_norm": 0.3827679215177506, | |
| "learning_rate": 9.12383638336659e-06, | |
| "loss": 0.4731, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.8217696882321139, | |
| "grad_norm": 0.3932319357502074, | |
| "learning_rate": 9.105961532704695e-06, | |
| "loss": 0.4744, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.8271760677599568, | |
| "grad_norm": 0.37420610924572006, | |
| "learning_rate": 9.08792406575792e-06, | |
| "loss": 0.4596, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.8325824472877996, | |
| "grad_norm": 0.36958869694379687, | |
| "learning_rate": 9.069724696898727e-06, | |
| "loss": 0.4644, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.8379888268156425, | |
| "grad_norm": 0.4296266126218128, | |
| "learning_rate": 9.051364146911696e-06, | |
| "loss": 0.4695, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8433952063434853, | |
| "grad_norm": 0.3552866307907092, | |
| "learning_rate": 9.03284314296497e-06, | |
| "loss": 0.4699, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8488015858713281, | |
| "grad_norm": 0.36327016829544306, | |
| "learning_rate": 9.01416241858146e-06, | |
| "loss": 0.4669, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.854207965399171, | |
| "grad_norm": 0.375420429355353, | |
| "learning_rate": 8.995322713609792e-06, | |
| "loss": 0.4672, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8596143449270138, | |
| "grad_norm": 0.5173900256611019, | |
| "learning_rate": 8.976324774195005e-06, | |
| "loss": 0.4683, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8650207244548568, | |
| "grad_norm": 0.39427484151317893, | |
| "learning_rate": 8.957169352749005e-06, | |
| "loss": 0.4652, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8704271039826996, | |
| "grad_norm": 0.4127231026821577, | |
| "learning_rate": 8.937857207920751e-06, | |
| "loss": 0.4693, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8758334835105425, | |
| "grad_norm": 0.3557084122875894, | |
| "learning_rate": 8.918389104566232e-06, | |
| "loss": 0.4653, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.8812398630383853, | |
| "grad_norm": 0.32279027303173025, | |
| "learning_rate": 8.898765813718155e-06, | |
| "loss": 0.4575, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.8866462425662281, | |
| "grad_norm": 0.3597815860403744, | |
| "learning_rate": 8.878988112555415e-06, | |
| "loss": 0.4635, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.892052622094071, | |
| "grad_norm": 0.3672011391559523, | |
| "learning_rate": 8.85905678437232e-06, | |
| "loss": 0.4637, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8974590016219138, | |
| "grad_norm": 0.39802107641409196, | |
| "learning_rate": 8.838972618547561e-06, | |
| "loss": 0.4668, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.9028653811497567, | |
| "grad_norm": 0.35901725656975336, | |
| "learning_rate": 8.81873641051295e-06, | |
| "loss": 0.4626, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.9082717606775995, | |
| "grad_norm": 0.45574284613082794, | |
| "learning_rate": 8.798348961721925e-06, | |
| "loss": 0.4618, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.9136781402054425, | |
| "grad_norm": 0.33960849857370073, | |
| "learning_rate": 8.777811079617793e-06, | |
| "loss": 0.4735, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.9190845197332853, | |
| "grad_norm": 0.36806947123886746, | |
| "learning_rate": 8.757123577601771e-06, | |
| "loss": 0.4642, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.9244908992611282, | |
| "grad_norm": 0.36728162811734544, | |
| "learning_rate": 8.736287275000755e-06, | |
| "loss": 0.465, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.929897278788971, | |
| "grad_norm": 0.38164336488797146, | |
| "learning_rate": 8.715302997034876e-06, | |
| "loss": 0.4702, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.9353036583168138, | |
| "grad_norm": 0.34605322849280384, | |
| "learning_rate": 8.694171574784818e-06, | |
| "loss": 0.4674, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.9407100378446567, | |
| "grad_norm": 0.3353439147558085, | |
| "learning_rate": 8.672893845158908e-06, | |
| "loss": 0.4701, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.9461164173724995, | |
| "grad_norm": 0.3437002297587831, | |
| "learning_rate": 8.651470650859955e-06, | |
| "loss": 0.4599, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9515227969003424, | |
| "grad_norm": 0.3431363969879203, | |
| "learning_rate": 8.629902840351898e-06, | |
| "loss": 0.4637, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.9569291764281853, | |
| "grad_norm": 0.3765462141591892, | |
| "learning_rate": 8.608191267826179e-06, | |
| "loss": 0.4694, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9623355559560282, | |
| "grad_norm": 0.420048049416004, | |
| "learning_rate": 8.586336793167926e-06, | |
| "loss": 0.4641, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 0.412279889648995, | |
| "learning_rate": 8.5643402819219e-06, | |
| "loss": 0.4566, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.9731483150117138, | |
| "grad_norm": 0.3299568555620076, | |
| "learning_rate": 8.542202605258204e-06, | |
| "loss": 0.463, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9785546945395567, | |
| "grad_norm": 0.32198105439404867, | |
| "learning_rate": 8.519924639937786e-06, | |
| "loss": 0.4617, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.9839610740673995, | |
| "grad_norm": 0.3549245136848414, | |
| "learning_rate": 8.49750726827772e-06, | |
| "loss": 0.4565, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.9893674535952424, | |
| "grad_norm": 0.3392271575380573, | |
| "learning_rate": 8.474951378116253e-06, | |
| "loss": 0.4639, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.9947738331230852, | |
| "grad_norm": 0.3208227345701, | |
| "learning_rate": 8.452257862777653e-06, | |
| "loss": 0.4546, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.000180212650928, | |
| "grad_norm": 0.4559641919273857, | |
| "learning_rate": 8.42942762103681e-06, | |
| "loss": 0.4837, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.005586592178771, | |
| "grad_norm": 0.3598410288175877, | |
| "learning_rate": 8.406461557083666e-06, | |
| "loss": 0.4404, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.0109929717066137, | |
| "grad_norm": 0.3857145460836866, | |
| "learning_rate": 8.383360580487378e-06, | |
| "loss": 0.4393, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.0163993512344567, | |
| "grad_norm": 0.34505752597289024, | |
| "learning_rate": 8.360125606160323e-06, | |
| "loss": 0.4422, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.0218057307622994, | |
| "grad_norm": 0.3739277339941646, | |
| "learning_rate": 8.336757554321832e-06, | |
| "loss": 0.4424, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.0272121102901424, | |
| "grad_norm": 0.3968787668713752, | |
| "learning_rate": 8.313257350461774e-06, | |
| "loss": 0.4376, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.0326184898179853, | |
| "grad_norm": 0.3451897271410753, | |
| "learning_rate": 8.289625925303877e-06, | |
| "loss": 0.4425, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.038024869345828, | |
| "grad_norm": 0.40010047495902706, | |
| "learning_rate": 8.265864214768883e-06, | |
| "loss": 0.4503, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.043431248873671, | |
| "grad_norm": 0.3736188460908676, | |
| "learning_rate": 8.241973159937482e-06, | |
| "loss": 0.4406, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.0488376284015137, | |
| "grad_norm": 0.3394542766186862, | |
| "learning_rate": 8.217953707013025e-06, | |
| "loss": 0.4393, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.0542440079293567, | |
| "grad_norm": 0.35077872709329283, | |
| "learning_rate": 8.193806807284064e-06, | |
| "loss": 0.4383, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.0596503874571994, | |
| "grad_norm": 0.3441941331677373, | |
| "learning_rate": 8.169533417086673e-06, | |
| "loss": 0.4286, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.0650567669850424, | |
| "grad_norm": 0.34884852607611294, | |
| "learning_rate": 8.145134497766566e-06, | |
| "loss": 0.4467, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.070463146512885, | |
| "grad_norm": 0.40097746242132437, | |
| "learning_rate": 8.120611015641036e-06, | |
| "loss": 0.4363, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.075869526040728, | |
| "grad_norm": 0.33184835023647064, | |
| "learning_rate": 8.095963941960667e-06, | |
| "loss": 0.437, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.081275905568571, | |
| "grad_norm": 0.394546885758411, | |
| "learning_rate": 8.071194252870887e-06, | |
| "loss": 0.432, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0866822850964137, | |
| "grad_norm": 0.472784994513626, | |
| "learning_rate": 8.046302929373286e-06, | |
| "loss": 0.4367, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.0920886646242567, | |
| "grad_norm": 0.3602670786653786, | |
| "learning_rate": 8.021290957286787e-06, | |
| "loss": 0.4352, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.0974950441520994, | |
| "grad_norm": 0.3963387130392289, | |
| "learning_rate": 7.996159327208581e-06, | |
| "loss": 0.4434, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.1029014236799424, | |
| "grad_norm": 0.37403782295160953, | |
| "learning_rate": 7.97090903447491e-06, | |
| "loss": 0.4326, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.108307803207785, | |
| "grad_norm": 0.37350913921356577, | |
| "learning_rate": 7.945541079121642e-06, | |
| "loss": 0.4485, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.113714182735628, | |
| "grad_norm": 0.3661212920976343, | |
| "learning_rate": 7.920056465844658e-06, | |
| "loss": 0.4328, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.119120562263471, | |
| "grad_norm": 0.3507951321263283, | |
| "learning_rate": 7.894456203960075e-06, | |
| "loss": 0.4339, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.1245269417913137, | |
| "grad_norm": 0.31935101139873434, | |
| "learning_rate": 7.868741307364255e-06, | |
| "loss": 0.4307, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.1299333213191567, | |
| "grad_norm": 0.3240469373544592, | |
| "learning_rate": 7.842912794493667e-06, | |
| "loss": 0.4357, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.1353397008469994, | |
| "grad_norm": 0.4024576218630106, | |
| "learning_rate": 7.81697168828454e-06, | |
| "loss": 0.4429, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.1407460803748424, | |
| "grad_norm": 0.4057186928939639, | |
| "learning_rate": 7.790919016132351e-06, | |
| "loss": 0.4435, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.146152459902685, | |
| "grad_norm": 0.4339123108369387, | |
| "learning_rate": 7.764755809851141e-06, | |
| "loss": 0.4375, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.151558839430528, | |
| "grad_norm": 0.3423301493159426, | |
| "learning_rate": 7.738483105632644e-06, | |
| "loss": 0.4408, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.1569652189583708, | |
| "grad_norm": 0.3049599421413694, | |
| "learning_rate": 7.712101944005256e-06, | |
| "loss": 0.442, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.1623715984862137, | |
| "grad_norm": 0.3235699906736669, | |
| "learning_rate": 7.685613369792815e-06, | |
| "loss": 0.4389, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.1677779780140565, | |
| "grad_norm": 0.38824198475727123, | |
| "learning_rate": 7.65901843207323e-06, | |
| "loss": 0.4372, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.1731843575418994, | |
| "grad_norm": 0.3485465278129701, | |
| "learning_rate": 7.63231818413692e-06, | |
| "loss": 0.4313, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.1785907370697424, | |
| "grad_norm": 0.3607061695090595, | |
| "learning_rate": 7.605513683445118e-06, | |
| "loss": 0.433, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.183997116597585, | |
| "grad_norm": 0.35864049794241826, | |
| "learning_rate": 7.578605991587974e-06, | |
| "loss": 0.43, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.189403496125428, | |
| "grad_norm": 0.3622129404816991, | |
| "learning_rate": 7.5515961742425146e-06, | |
| "loss": 0.4357, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.1948098756532708, | |
| "grad_norm": 0.37719764002603634, | |
| "learning_rate": 7.524485301130443e-06, | |
| "loss": 0.4363, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.2002162551811137, | |
| "grad_norm": 0.32038054153975193, | |
| "learning_rate": 7.497274445975762e-06, | |
| "loss": 0.4283, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.2056226347089565, | |
| "grad_norm": 0.3897896894072551, | |
| "learning_rate": 7.469964686462261e-06, | |
| "loss": 0.4416, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.2110290142367994, | |
| "grad_norm": 0.32144151391797593, | |
| "learning_rate": 7.4425571041908254e-06, | |
| "loss": 0.4388, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.2164353937646424, | |
| "grad_norm": 0.3553047783046372, | |
| "learning_rate": 7.415052784636603e-06, | |
| "loss": 0.4401, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.2218417732924851, | |
| "grad_norm": 0.31787401750902194, | |
| "learning_rate": 7.387452817106017e-06, | |
| "loss": 0.4313, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.227248152820328, | |
| "grad_norm": 0.3736244875654426, | |
| "learning_rate": 7.359758294693618e-06, | |
| "loss": 0.4392, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.2326545323481708, | |
| "grad_norm": 0.34863542131710556, | |
| "learning_rate": 7.331970314238799e-06, | |
| "loss": 0.4405, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.2380609118760137, | |
| "grad_norm": 0.414690288534652, | |
| "learning_rate": 7.304089976282348e-06, | |
| "loss": 0.4401, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.2434672914038565, | |
| "grad_norm": 0.356866165228421, | |
| "learning_rate": 7.276118385022865e-06, | |
| "loss": 0.4241, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.2488736709316994, | |
| "grad_norm": 0.33264484884680307, | |
| "learning_rate": 7.248056648273034e-06, | |
| "loss": 0.4425, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.2542800504595424, | |
| "grad_norm": 0.4175310788334551, | |
| "learning_rate": 7.2199058774157375e-06, | |
| "loss": 0.4276, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.2596864299873851, | |
| "grad_norm": 0.38229588901030637, | |
| "learning_rate": 7.1916671873600515e-06, | |
| "loss": 0.4312, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.2650928095152278, | |
| "grad_norm": 0.338696312422094, | |
| "learning_rate": 7.163341696497084e-06, | |
| "loss": 0.4405, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.2704991890430708, | |
| "grad_norm": 0.32136223620818055, | |
| "learning_rate": 7.134930526655679e-06, | |
| "loss": 0.4347, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.2759055685709138, | |
| "grad_norm": 0.3590441906111087, | |
| "learning_rate": 7.106434803057998e-06, | |
| "loss": 0.4392, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.2813119480987565, | |
| "grad_norm": 0.3822900334441054, | |
| "learning_rate": 7.077855654274939e-06, | |
| "loss": 0.4329, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.2867183276265994, | |
| "grad_norm": 0.4150924729603716, | |
| "learning_rate": 7.04919421218145e-06, | |
| "loss": 0.4344, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.2921247071544422, | |
| "grad_norm": 0.31977805162237566, | |
| "learning_rate": 7.020451611911703e-06, | |
| "loss": 0.4274, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.2975310866822851, | |
| "grad_norm": 0.4042413750463481, | |
| "learning_rate": 6.9916289918141265e-06, | |
| "loss": 0.4383, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.3029374662101278, | |
| "grad_norm": 0.32750161889881924, | |
| "learning_rate": 6.962727493406335e-06, | |
| "loss": 0.4363, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.3083438457379708, | |
| "grad_norm": 0.34681784503652924, | |
| "learning_rate": 6.9337482613299065e-06, | |
| "loss": 0.4251, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.3137502252658138, | |
| "grad_norm": 0.31392667825247955, | |
| "learning_rate": 6.904692443305059e-06, | |
| "loss": 0.439, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.3191566047936565, | |
| "grad_norm": 0.3080535811767778, | |
| "learning_rate": 6.87556119008519e-06, | |
| "loss": 0.4268, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.3245629843214994, | |
| "grad_norm": 0.37030845399385603, | |
| "learning_rate": 6.8463556554113005e-06, | |
| "loss": 0.4353, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.3299693638493422, | |
| "grad_norm": 0.3473034342384458, | |
| "learning_rate": 6.8170769959663045e-06, | |
| "loss": 0.4292, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.3353757433771851, | |
| "grad_norm": 0.322256198293079, | |
| "learning_rate": 6.787726371329214e-06, | |
| "loss": 0.4402, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.3407821229050279, | |
| "grad_norm": 0.3907219151376363, | |
| "learning_rate": 6.7583049439292205e-06, | |
| "loss": 0.4369, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.3461885024328708, | |
| "grad_norm": 0.34928113227903806, | |
| "learning_rate": 6.728813878999652e-06, | |
| "loss": 0.4377, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.3515948819607138, | |
| "grad_norm": 0.35544626757027864, | |
| "learning_rate": 6.699254344531821e-06, | |
| "loss": 0.4309, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.3570012614885565, | |
| "grad_norm": 0.366218747083373, | |
| "learning_rate": 6.669627511228778e-06, | |
| "loss": 0.434, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.3624076410163992, | |
| "grad_norm": 0.3580871935273299, | |
| "learning_rate": 6.6399345524589366e-06, | |
| "loss": 0.4401, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.3678140205442422, | |
| "grad_norm": 0.29886314913995143, | |
| "learning_rate": 6.610176644209602e-06, | |
| "loss": 0.4266, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.3732204000720851, | |
| "grad_norm": 0.3571328312104908, | |
| "learning_rate": 6.580354965040396e-06, | |
| "loss": 0.4393, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.3786267795999279, | |
| "grad_norm": 0.3568154757493318, | |
| "learning_rate": 6.550470696036591e-06, | |
| "loss": 0.4276, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.3840331591277708, | |
| "grad_norm": 0.3020834353942124, | |
| "learning_rate": 6.520525020762318e-06, | |
| "loss": 0.4374, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.3894395386556138, | |
| "grad_norm": 0.4345861239807074, | |
| "learning_rate": 6.490519125213701e-06, | |
| "loss": 0.44, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.3948459181834565, | |
| "grad_norm": 0.4164116140474957, | |
| "learning_rate": 6.460454197771881e-06, | |
| "loss": 0.4347, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.4002522977112992, | |
| "grad_norm": 0.3698597319632245, | |
| "learning_rate": 6.430331429155956e-06, | |
| "loss": 0.4398, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.4056586772391422, | |
| "grad_norm": 0.3557941383592286, | |
| "learning_rate": 6.400152012375818e-06, | |
| "loss": 0.4361, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.4110650567669851, | |
| "grad_norm": 0.3703620913980966, | |
| "learning_rate": 6.3699171426849036e-06, | |
| "loss": 0.433, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.4164714362948279, | |
| "grad_norm": 0.312372238883981, | |
| "learning_rate": 6.339628017532858e-06, | |
| "loss": 0.4305, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.4218778158226708, | |
| "grad_norm": 0.32819677760603516, | |
| "learning_rate": 6.309285836518113e-06, | |
| "loss": 0.4289, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.4272841953505135, | |
| "grad_norm": 0.34835896987461035, | |
| "learning_rate": 6.2788918013403695e-06, | |
| "loss": 0.4312, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.4326905748783565, | |
| "grad_norm": 0.34043287674955064, | |
| "learning_rate": 6.248447115753009e-06, | |
| "loss": 0.4327, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.4380969544061992, | |
| "grad_norm": 0.32777806734674225, | |
| "learning_rate": 6.21795298551542e-06, | |
| "loss": 0.4206, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.4435033339340422, | |
| "grad_norm": 0.2839690869238431, | |
| "learning_rate": 6.187410618345241e-06, | |
| "loss": 0.4337, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.4489097134618851, | |
| "grad_norm": 0.2845491198333412, | |
| "learning_rate": 6.156821223870533e-06, | |
| "loss": 0.428, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.4543160929897279, | |
| "grad_norm": 0.3381278947086419, | |
| "learning_rate": 6.126186013581868e-06, | |
| "loss": 0.4442, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.4597224725175708, | |
| "grad_norm": 0.2678673584947001, | |
| "learning_rate": 6.095506200784349e-06, | |
| "loss": 0.4313, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.4651288520454135, | |
| "grad_norm": 0.32064492812884415, | |
| "learning_rate": 6.06478300054956e-06, | |
| "loss": 0.4443, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.4705352315732565, | |
| "grad_norm": 0.33114310721210843, | |
| "learning_rate": 6.034017629667439e-06, | |
| "loss": 0.4321, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.4759416111010992, | |
| "grad_norm": 0.3407274170049336, | |
| "learning_rate": 6.003211306598089e-06, | |
| "loss": 0.4302, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.4813479906289422, | |
| "grad_norm": 0.3655959799961016, | |
| "learning_rate": 5.972365251423521e-06, | |
| "loss": 0.4331, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.4867543701567851, | |
| "grad_norm": 0.3707027911602118, | |
| "learning_rate": 5.941480685799338e-06, | |
| "loss": 0.433, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.4921607496846279, | |
| "grad_norm": 0.30224309374010494, | |
| "learning_rate": 5.910558832906341e-06, | |
| "loss": 0.4378, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.4975671292124706, | |
| "grad_norm": 0.3421553953269554, | |
| "learning_rate": 5.879600917402089e-06, | |
| "loss": 0.4322, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.5029735087403135, | |
| "grad_norm": 0.33381909956811917, | |
| "learning_rate": 5.848608165372403e-06, | |
| "loss": 0.425, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.5083798882681565, | |
| "grad_norm": 0.3189833875248174, | |
| "learning_rate": 5.8175818042828e-06, | |
| "loss": 0.4357, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.5137862677959992, | |
| "grad_norm": 0.36173513055424256, | |
| "learning_rate": 5.78652306292988e-06, | |
| "loss": 0.4395, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.5191926473238422, | |
| "grad_norm": 0.3265416603091211, | |
| "learning_rate": 5.75543317139266e-06, | |
| "loss": 0.4426, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.5245990268516851, | |
| "grad_norm": 0.33495795652653004, | |
| "learning_rate": 5.724313360983859e-06, | |
| "loss": 0.4335, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.5300054063795279, | |
| "grad_norm": 0.35637908471545576, | |
| "learning_rate": 5.693164864201134e-06, | |
| "loss": 0.4343, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.5354117859073706, | |
| "grad_norm": 0.3422755476029069, | |
| "learning_rate": 5.661988914678257e-06, | |
| "loss": 0.4201, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.5408181654352135, | |
| "grad_norm": 0.29401423880776295, | |
| "learning_rate": 5.630786747136269e-06, | |
| "loss": 0.4263, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.5462245449630565, | |
| "grad_norm": 0.35559246067713574, | |
| "learning_rate": 5.599559597334568e-06, | |
| "loss": 0.4327, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.5516309244908992, | |
| "grad_norm": 0.3234026109207772, | |
| "learning_rate": 5.56830870202198e-06, | |
| "loss": 0.4284, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.557037304018742, | |
| "grad_norm": 0.3041181368480941, | |
| "learning_rate": 5.537035298887764e-06, | |
| "loss": 0.4291, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.562443683546585, | |
| "grad_norm": 0.4152034967270183, | |
| "learning_rate": 5.505740626512601e-06, | |
| "loss": 0.4333, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.5678500630744279, | |
| "grad_norm": 0.32189843480023705, | |
| "learning_rate": 5.474425924319538e-06, | |
| "loss": 0.4313, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.5732564426022706, | |
| "grad_norm": 0.3400408960358337, | |
| "learning_rate": 5.443092432524906e-06, | |
| "loss": 0.4446, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.5786628221301136, | |
| "grad_norm": 0.3253331216756115, | |
| "learning_rate": 5.411741392089192e-06, | |
| "loss": 0.4276, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.5840692016579565, | |
| "grad_norm": 0.34364169352732366, | |
| "learning_rate": 5.380374044667896e-06, | |
| "loss": 0.4363, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.5894755811857992, | |
| "grad_norm": 0.2993302543547276, | |
| "learning_rate": 5.348991632562355e-06, | |
| "loss": 0.4347, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.594881960713642, | |
| "grad_norm": 0.31140003151111195, | |
| "learning_rate": 5.317595398670543e-06, | |
| "loss": 0.4203, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.600288340241485, | |
| "grad_norm": 0.34917215566088183, | |
| "learning_rate": 5.286186586437845e-06, | |
| "loss": 0.4394, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.6056947197693279, | |
| "grad_norm": 0.3099678473182354, | |
| "learning_rate": 5.254766439807807e-06, | |
| "loss": 0.4224, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.6111010992971706, | |
| "grad_norm": 0.32027842285858055, | |
| "learning_rate": 5.223336203172874e-06, | |
| "loss": 0.4289, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.6165074788250136, | |
| "grad_norm": 0.29377503624337103, | |
| "learning_rate": 5.191897121325111e-06, | |
| "loss": 0.43, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.6219138583528565, | |
| "grad_norm": 0.3286814138894788, | |
| "learning_rate": 5.16045043940689e-06, | |
| "loss": 0.4344, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.6273202378806992, | |
| "grad_norm": 0.35588674616258936, | |
| "learning_rate": 5.128997402861584e-06, | |
| "loss": 0.4306, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.632726617408542, | |
| "grad_norm": 0.33501603495492577, | |
| "learning_rate": 5.09753925738424e-06, | |
| "loss": 0.4154, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.638132996936385, | |
| "grad_norm": 0.3011476898703049, | |
| "learning_rate": 5.06607724887225e-06, | |
| "loss": 0.4314, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.6435393764642279, | |
| "grad_norm": 0.3879201939655995, | |
| "learning_rate": 5.034612623375993e-06, | |
| "loss": 0.4412, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.6489457559920706, | |
| "grad_norm": 0.3426764786646151, | |
| "learning_rate": 5.003146627049499e-06, | |
| "loss": 0.4295, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.6543521355199133, | |
| "grad_norm": 0.3408786770769329, | |
| "learning_rate": 4.971680506101086e-06, | |
| "loss": 0.4259, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.6597585150477565, | |
| "grad_norm": 0.3689333373771858, | |
| "learning_rate": 4.940215506744011e-06, | |
| "loss": 0.4254, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.6651648945755992, | |
| "grad_norm": 0.33725311763702437, | |
| "learning_rate": 4.90875287514711e-06, | |
| "loss": 0.4286, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.670571274103442, | |
| "grad_norm": 0.3106105413402686, | |
| "learning_rate": 4.87729385738544e-06, | |
| "loss": 0.426, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.675977653631285, | |
| "grad_norm": 0.361491556160267, | |
| "learning_rate": 4.845839699390936e-06, | |
| "loss": 0.4229, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.6813840331591279, | |
| "grad_norm": 0.3012437306295753, | |
| "learning_rate": 4.814391646903063e-06, | |
| "loss": 0.4296, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.6867904126869706, | |
| "grad_norm": 0.3142934287582159, | |
| "learning_rate": 4.782950945419475e-06, | |
| "loss": 0.4304, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.6921967922148133, | |
| "grad_norm": 0.3024864799296645, | |
| "learning_rate": 4.751518840146695e-06, | |
| "loss": 0.4329, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.6976031717426563, | |
| "grad_norm": 0.3081924919099197, | |
| "learning_rate": 4.720096575950784e-06, | |
| "loss": 0.4319, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.7030095512704992, | |
| "grad_norm": 0.32189094915170496, | |
| "learning_rate": 4.688685397308061e-06, | |
| "loss": 0.42, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.708415930798342, | |
| "grad_norm": 0.33972262308693657, | |
| "learning_rate": 4.657286548255789e-06, | |
| "loss": 0.4369, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.713822310326185, | |
| "grad_norm": 0.30741331028975344, | |
| "learning_rate": 4.6259012723429285e-06, | |
| "loss": 0.4274, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.7192286898540279, | |
| "grad_norm": 0.28971622178653267, | |
| "learning_rate": 4.594530812580876e-06, | |
| "loss": 0.4216, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.7246350693818706, | |
| "grad_norm": 0.2792098363578085, | |
| "learning_rate": 4.563176411394229e-06, | |
| "loss": 0.4238, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.7300414489097133, | |
| "grad_norm": 0.29274514837335597, | |
| "learning_rate": 4.531839310571595e-06, | |
| "loss": 0.4291, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.7354478284375563, | |
| "grad_norm": 0.32996912353874136, | |
| "learning_rate": 4.5005207512163914e-06, | |
| "loss": 0.4388, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.7408542079653992, | |
| "grad_norm": 0.34282857698540753, | |
| "learning_rate": 4.469221973697714e-06, | |
| "loss": 0.4373, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.746260587493242, | |
| "grad_norm": 0.3147983795136612, | |
| "learning_rate": 4.43794421760119e-06, | |
| "loss": 0.4291, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.751666967021085, | |
| "grad_norm": 0.2953517288607898, | |
| "learning_rate": 4.4066887216799055e-06, | |
| "loss": 0.4219, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.7570733465489279, | |
| "grad_norm": 0.30489564567587807, | |
| "learning_rate": 4.375456723805321e-06, | |
| "loss": 0.4308, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.7624797260767706, | |
| "grad_norm": 0.30950501632812377, | |
| "learning_rate": 4.344249460918271e-06, | |
| "loss": 0.4213, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.7678861056046133, | |
| "grad_norm": 0.30230325895579757, | |
| "learning_rate": 4.313068168979957e-06, | |
| "loss": 0.4364, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.7732924851324563, | |
| "grad_norm": 0.30774095159515363, | |
| "learning_rate": 4.281914082923002e-06, | |
| "loss": 0.4165, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.7786988646602993, | |
| "grad_norm": 0.3275433264912912, | |
| "learning_rate": 4.250788436602548e-06, | |
| "loss": 0.4269, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.784105244188142, | |
| "grad_norm": 0.3270523212461865, | |
| "learning_rate": 4.2196924627473715e-06, | |
| "loss": 0.4304, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.7895116237159847, | |
| "grad_norm": 0.28953105726529316, | |
| "learning_rate": 4.188627392911091e-06, | |
| "loss": 0.4281, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.7949180032438277, | |
| "grad_norm": 0.34157770345495453, | |
| "learning_rate": 4.157594457423357e-06, | |
| "loss": 0.432, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.8003243827716706, | |
| "grad_norm": 0.2952227481543905, | |
| "learning_rate": 4.1265948853411506e-06, | |
| "loss": 0.427, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.8057307622995133, | |
| "grad_norm": 0.3058432699391948, | |
| "learning_rate": 4.095629904400097e-06, | |
| "loss": 0.4268, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.8111371418273563, | |
| "grad_norm": 0.32888818257409286, | |
| "learning_rate": 4.06470074096584e-06, | |
| "loss": 0.4334, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.8165435213551993, | |
| "grad_norm": 0.29929296938295863, | |
| "learning_rate": 4.0338086199854765e-06, | |
| "loss": 0.4248, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.821949900883042, | |
| "grad_norm": 0.33418978699429813, | |
| "learning_rate": 4.0029547649390346e-06, | |
| "loss": 0.4307, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.8273562804108847, | |
| "grad_norm": 0.2991040804166494, | |
| "learning_rate": 3.97214039779103e-06, | |
| "loss": 0.435, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.8327626599387277, | |
| "grad_norm": 0.2829911428105187, | |
| "learning_rate": 3.941366738942058e-06, | |
| "loss": 0.4246, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.8381690394665706, | |
| "grad_norm": 0.2990384176756561, | |
| "learning_rate": 3.910635007180468e-06, | |
| "loss": 0.4394, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.8435754189944134, | |
| "grad_norm": 0.28487793163600966, | |
| "learning_rate": 3.879946419634087e-06, | |
| "loss": 0.4268, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.8489817985222563, | |
| "grad_norm": 0.30066911074015307, | |
| "learning_rate": 3.8493021917220225e-06, | |
| "loss": 0.4289, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.8543881780500993, | |
| "grad_norm": 0.3145700146426358, | |
| "learning_rate": 3.818703537106522e-06, | |
| "loss": 0.427, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.859794557577942, | |
| "grad_norm": 0.3121437364875441, | |
| "learning_rate": 3.7881516676449014e-06, | |
| "loss": 0.4334, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.8652009371057847, | |
| "grad_norm": 0.2914138429548545, | |
| "learning_rate": 3.7576477933415612e-06, | |
| "loss": 0.4358, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.8706073166336277, | |
| "grad_norm": 0.3263366427961882, | |
| "learning_rate": 3.7271931223000507e-06, | |
| "loss": 0.4294, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.8760136961614706, | |
| "grad_norm": 0.3181986581808925, | |
| "learning_rate": 3.6967888606752345e-06, | |
| "loss": 0.433, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.8814200756893134, | |
| "grad_norm": 0.31837041508546626, | |
| "learning_rate": 3.6664362126255087e-06, | |
| "loss": 0.4283, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.886826455217156, | |
| "grad_norm": 0.2876960972161682, | |
| "learning_rate": 3.636136380265124e-06, | |
| "loss": 0.4189, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.8922328347449993, | |
| "grad_norm": 0.30867320900321366, | |
| "learning_rate": 3.6058905636165674e-06, | |
| "loss": 0.4309, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.897639214272842, | |
| "grad_norm": 0.29104980848951667, | |
| "learning_rate": 3.575699960563038e-06, | |
| "loss": 0.4184, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.9030455938006847, | |
| "grad_norm": 0.2859389528274554, | |
| "learning_rate": 3.5455657668010057e-06, | |
| "loss": 0.4253, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.9084519733285277, | |
| "grad_norm": 0.30910611127718657, | |
| "learning_rate": 3.5154891757928523e-06, | |
| "loss": 0.4257, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.9138583528563706, | |
| "grad_norm": 0.31381289055858025, | |
| "learning_rate": 3.4854713787196105e-06, | |
| "loss": 0.4324, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.9192647323842134, | |
| "grad_norm": 0.33654431291917486, | |
| "learning_rate": 3.4555135644337803e-06, | |
| "loss": 0.4262, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.924671111912056, | |
| "grad_norm": 0.30712399081960845, | |
| "learning_rate": 3.42561691941225e-06, | |
| "loss": 0.4344, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.930077491439899, | |
| "grad_norm": 0.2989668977037765, | |
| "learning_rate": 3.3957826277093074e-06, | |
| "loss": 0.4278, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.935483870967742, | |
| "grad_norm": 0.3259516671848096, | |
| "learning_rate": 3.3660118709097347e-06, | |
| "loss": 0.4242, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.9408902504955847, | |
| "grad_norm": 0.29719187591192203, | |
| "learning_rate": 3.336305828082024e-06, | |
| "loss": 0.4319, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.9462966300234277, | |
| "grad_norm": 0.3250815058947025, | |
| "learning_rate": 3.306665675731674e-06, | |
| "loss": 0.4324, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.9517030095512706, | |
| "grad_norm": 0.3196705993035981, | |
| "learning_rate": 3.277092587754598e-06, | |
| "loss": 0.4283, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.9571093890791134, | |
| "grad_norm": 0.2836241969868925, | |
| "learning_rate": 3.247587735390628e-06, | |
| "loss": 0.4285, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.962515768606956, | |
| "grad_norm": 0.2963451307813687, | |
| "learning_rate": 3.218152287177133e-06, | |
| "loss": 0.4233, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.967922148134799, | |
| "grad_norm": 0.32162438964611967, | |
| "learning_rate": 3.1887874089027304e-06, | |
| "loss": 0.4275, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.973328527662642, | |
| "grad_norm": 0.2858747270839711, | |
| "learning_rate": 3.159494263561126e-06, | |
| "loss": 0.429, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.9787349071904847, | |
| "grad_norm": 0.294205581889964, | |
| "learning_rate": 3.130274011305047e-06, | |
| "loss": 0.4261, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.9841412867183277, | |
| "grad_norm": 0.3271655262933234, | |
| "learning_rate": 3.1011278094002928e-06, | |
| "loss": 0.4352, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.9895476662461706, | |
| "grad_norm": 0.3151321646815863, | |
| "learning_rate": 3.0720568121799105e-06, | |
| "loss": 0.4302, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.9949540457740134, | |
| "grad_norm": 0.3069606817223593, | |
| "learning_rate": 3.043062170998464e-06, | |
| "loss": 0.4274, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.000360425301856, | |
| "grad_norm": 0.3418886732932903, | |
| "learning_rate": 3.0141450341864486e-06, | |
| "loss": 0.4368, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.005766804829699, | |
| "grad_norm": 0.28231273100784204, | |
| "learning_rate": 2.9853065470048016e-06, | |
| "loss": 0.4084, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.011173184357542, | |
| "grad_norm": 0.27285411121752895, | |
| "learning_rate": 2.956547851599548e-06, | |
| "loss": 0.3899, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.0165795638853847, | |
| "grad_norm": 0.31740692003997667, | |
| "learning_rate": 2.9278700869565713e-06, | |
| "loss": 0.406, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.0219859434132275, | |
| "grad_norm": 0.32723222207620034, | |
| "learning_rate": 2.8992743888564886e-06, | |
| "loss": 0.4107, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.0273923229410706, | |
| "grad_norm": 0.3293876655149398, | |
| "learning_rate": 2.8707618898296864e-06, | |
| "loss": 0.4052, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.0327987024689134, | |
| "grad_norm": 0.26473497263074053, | |
| "learning_rate": 2.8423337191114495e-06, | |
| "loss": 0.402, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.038205081996756, | |
| "grad_norm": 0.31910999655360905, | |
| "learning_rate": 2.8139910025972622e-06, | |
| "loss": 0.4134, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.043611461524599, | |
| "grad_norm": 0.29154253424627524, | |
| "learning_rate": 2.785734862798184e-06, | |
| "loss": 0.4086, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.049017841052442, | |
| "grad_norm": 0.2910125618297838, | |
| "learning_rate": 2.7575664187964236e-06, | |
| "loss": 0.4007, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.0544242205802847, | |
| "grad_norm": 0.28793585101610353, | |
| "learning_rate": 2.7294867862009937e-06, | |
| "loss": 0.4053, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.0598306001081275, | |
| "grad_norm": 0.2731032601573403, | |
| "learning_rate": 2.7014970771035474e-06, | |
| "loss": 0.4138, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.0652369796359706, | |
| "grad_norm": 0.29876809472359783, | |
| "learning_rate": 2.6735984000343216e-06, | |
| "loss": 0.4156, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.0706433591638134, | |
| "grad_norm": 0.3100743441240049, | |
| "learning_rate": 2.645791859918234e-06, | |
| "loss": 0.4089, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.076049738691656, | |
| "grad_norm": 0.34676569440909566, | |
| "learning_rate": 2.6180785580311284e-06, | |
| "loss": 0.3998, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.081456118219499, | |
| "grad_norm": 0.28331404223893575, | |
| "learning_rate": 2.5904595919561563e-06, | |
| "loss": 0.3935, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.086862497747342, | |
| "grad_norm": 0.2892120423588288, | |
| "learning_rate": 2.562936055540307e-06, | |
| "loss": 0.411, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.0922688772751847, | |
| "grad_norm": 0.29210558202813347, | |
| "learning_rate": 2.5355090388510806e-06, | |
| "loss": 0.4108, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.0976752568030275, | |
| "grad_norm": 0.29027866503096267, | |
| "learning_rate": 2.508179628133326e-06, | |
| "loss": 0.4016, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.1030816363308706, | |
| "grad_norm": 0.2876065349136538, | |
| "learning_rate": 2.4809489057662168e-06, | |
| "loss": 0.4101, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.1084880158587134, | |
| "grad_norm": 0.3135899601532618, | |
| "learning_rate": 2.4538179502203753e-06, | |
| "loss": 0.4001, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.113894395386556, | |
| "grad_norm": 0.30848425065584256, | |
| "learning_rate": 2.4267878360151747e-06, | |
| "loss": 0.3997, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.119300774914399, | |
| "grad_norm": 0.2923032276510183, | |
| "learning_rate": 2.399859633676165e-06, | |
| "loss": 0.4049, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.124707154442242, | |
| "grad_norm": 0.29055776768248115, | |
| "learning_rate": 2.3730344096926974e-06, | |
| "loss": 0.3981, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.1301135339700847, | |
| "grad_norm": 0.3161385412337821, | |
| "learning_rate": 2.3463132264756617e-06, | |
| "loss": 0.4075, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.1355199134979275, | |
| "grad_norm": 0.2828900068372096, | |
| "learning_rate": 2.319697142315428e-06, | |
| "loss": 0.3906, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.14092629302577, | |
| "grad_norm": 0.26292390614915356, | |
| "learning_rate": 2.293187211339926e-06, | |
| "loss": 0.3991, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.1463326725536134, | |
| "grad_norm": 0.2987394527032652, | |
| "learning_rate": 2.2667844834728923e-06, | |
| "loss": 0.3999, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.151739052081456, | |
| "grad_norm": 0.27915670540136367, | |
| "learning_rate": 2.2404900043922996e-06, | |
| "loss": 0.3995, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.157145431609299, | |
| "grad_norm": 0.2818164391888048, | |
| "learning_rate": 2.2143048154889272e-06, | |
| "loss": 0.4015, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.162551811137142, | |
| "grad_norm": 0.26044900685376793, | |
| "learning_rate": 2.1882299538251352e-06, | |
| "loss": 0.4003, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.1679581906649847, | |
| "grad_norm": 0.27297932069072756, | |
| "learning_rate": 2.162266452093774e-06, | |
| "loss": 0.4149, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.1733645701928275, | |
| "grad_norm": 0.2978434115081757, | |
| "learning_rate": 2.1364153385773007e-06, | |
| "loss": 0.4018, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.17877094972067, | |
| "grad_norm": 0.31586609932366294, | |
| "learning_rate": 2.110677637107036e-06, | |
| "loss": 0.4053, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.1841773292485134, | |
| "grad_norm": 0.29030802044428805, | |
| "learning_rate": 2.0850543670226318e-06, | |
| "loss": 0.4065, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.189583708776356, | |
| "grad_norm": 0.3365802334808058, | |
| "learning_rate": 2.059546543131696e-06, | |
| "loss": 0.405, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.194990088304199, | |
| "grad_norm": 0.2995355365322975, | |
| "learning_rate": 2.034155175669592e-06, | |
| "loss": 0.4044, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.200396467832042, | |
| "grad_norm": 0.2868235821916637, | |
| "learning_rate": 2.0088812702594424e-06, | |
| "loss": 0.4023, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.2058028473598847, | |
| "grad_norm": 0.29532698621262965, | |
| "learning_rate": 1.9837258278722855e-06, | |
| "loss": 0.413, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.2112092268877275, | |
| "grad_norm": 0.282345122194298, | |
| "learning_rate": 1.9586898447874543e-06, | |
| "loss": 0.4033, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.21661560641557, | |
| "grad_norm": 0.28744059302390934, | |
| "learning_rate": 1.933774312553092e-06, | |
| "loss": 0.4002, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.2220219859434134, | |
| "grad_norm": 0.29637974416632634, | |
| "learning_rate": 1.9089802179469036e-06, | |
| "loss": 0.397, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.227428365471256, | |
| "grad_norm": 0.29136812414474506, | |
| "learning_rate": 1.884308542937065e-06, | |
| "loss": 0.4198, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.232834744999099, | |
| "grad_norm": 0.28845833396948634, | |
| "learning_rate": 1.8597602646433294e-06, | |
| "loss": 0.4012, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.238241124526942, | |
| "grad_norm": 0.31515767696033387, | |
| "learning_rate": 1.8353363552983382e-06, | |
| "loss": 0.4084, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.2436475040547847, | |
| "grad_norm": 0.2852056906534805, | |
| "learning_rate": 1.8110377822091057e-06, | |
| "loss": 0.4129, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.2490538835826275, | |
| "grad_norm": 0.2961534698999477, | |
| "learning_rate": 1.7868655077187175e-06, | |
| "loss": 0.404, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.25446026311047, | |
| "grad_norm": 0.3026130823215708, | |
| "learning_rate": 1.76282048916821e-06, | |
| "loss": 0.4105, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.2598666426383134, | |
| "grad_norm": 0.295103201693147, | |
| "learning_rate": 1.7389036788586627e-06, | |
| "loss": 0.4057, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.265273022166156, | |
| "grad_norm": 0.26979492433946, | |
| "learning_rate": 1.7151160240134702e-06, | |
| "loss": 0.4027, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.270679401693999, | |
| "grad_norm": 0.3069718829915049, | |
| "learning_rate": 1.6914584667408408e-06, | |
| "loss": 0.407, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.276085781221842, | |
| "grad_norm": 0.2582555297518662, | |
| "learning_rate": 1.6679319439964797e-06, | |
| "loss": 0.3943, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.2814921607496847, | |
| "grad_norm": 0.30300112933414725, | |
| "learning_rate": 1.6445373875464738e-06, | |
| "loss": 0.4073, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.2868985402775275, | |
| "grad_norm": 0.27640155584834986, | |
| "learning_rate": 1.6212757239304e-06, | |
| "loss": 0.4074, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.29230491980537, | |
| "grad_norm": 0.288482277273483, | |
| "learning_rate": 1.5981478744246242e-06, | |
| "loss": 0.3961, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.297711299333213, | |
| "grad_norm": 0.2968944260811366, | |
| "learning_rate": 1.575154755005816e-06, | |
| "loss": 0.403, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.303117678861056, | |
| "grad_norm": 0.29278471655933946, | |
| "learning_rate": 1.5522972763146653e-06, | |
| "loss": 0.4019, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.308524058388899, | |
| "grad_norm": 0.2729883421366084, | |
| "learning_rate": 1.5295763436198274e-06, | |
| "loss": 0.4148, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.3139304379167416, | |
| "grad_norm": 0.30284845140590294, | |
| "learning_rate": 1.5069928567820635e-06, | |
| "loss": 0.4016, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.3193368174445848, | |
| "grad_norm": 0.3044664985270554, | |
| "learning_rate": 1.4845477102185974e-06, | |
| "loss": 0.4092, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.3247431969724275, | |
| "grad_norm": 0.30467048506977945, | |
| "learning_rate": 1.4622417928677034e-06, | |
| "loss": 0.3997, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.33014957650027, | |
| "grad_norm": 0.25546815283849933, | |
| "learning_rate": 1.4400759881534886e-06, | |
| "loss": 0.3988, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.335555956028113, | |
| "grad_norm": 0.2852027186621198, | |
| "learning_rate": 1.418051173950914e-06, | |
| "loss": 0.4124, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.340962335555956, | |
| "grad_norm": 0.28906302811953016, | |
| "learning_rate": 1.3961682225510203e-06, | |
| "loss": 0.3993, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.346368715083799, | |
| "grad_norm": 0.27197836639387235, | |
| "learning_rate": 1.3744280006263839e-06, | |
| "loss": 0.408, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.3517750946116416, | |
| "grad_norm": 0.2668399923208869, | |
| "learning_rate": 1.3528313691967926e-06, | |
| "loss": 0.4134, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.3571814741394848, | |
| "grad_norm": 0.2872848077693314, | |
| "learning_rate": 1.3313791835951396e-06, | |
| "loss": 0.4045, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.3625878536673275, | |
| "grad_norm": 0.29802601615160446, | |
| "learning_rate": 1.310072293433558e-06, | |
| "loss": 0.4014, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.36799423319517, | |
| "grad_norm": 0.25723071187565805, | |
| "learning_rate": 1.2889115425697612e-06, | |
| "loss": 0.399, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.373400612723013, | |
| "grad_norm": 0.2842104581531295, | |
| "learning_rate": 1.2678977690736311e-06, | |
| "loss": 0.4015, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.378806992250856, | |
| "grad_norm": 0.2813179130833351, | |
| "learning_rate": 1.2470318051940205e-06, | |
| "loss": 0.4026, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.384213371778699, | |
| "grad_norm": 0.27762098429764004, | |
| "learning_rate": 1.2263144773257967e-06, | |
| "loss": 0.4068, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.3896197513065416, | |
| "grad_norm": 0.27848678899943174, | |
| "learning_rate": 1.2057466059771035e-06, | |
| "loss": 0.4006, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.3950261308343848, | |
| "grad_norm": 0.27875535013460345, | |
| "learning_rate": 1.1853290057368754e-06, | |
| "loss": 0.4088, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.4004325103622275, | |
| "grad_norm": 0.2662344684523685, | |
| "learning_rate": 1.165062485242574e-06, | |
| "loss": 0.4019, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.40583888989007, | |
| "grad_norm": 0.3005215328293971, | |
| "learning_rate": 1.1449478471481512e-06, | |
| "loss": 0.411, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.411245269417913, | |
| "grad_norm": 0.2712567161403629, | |
| "learning_rate": 1.1249858880922771e-06, | |
| "loss": 0.4059, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.416651648945756, | |
| "grad_norm": 0.26211955276644977, | |
| "learning_rate": 1.1051773986667735e-06, | |
| "loss": 0.4051, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.422058028473599, | |
| "grad_norm": 0.26165210615685336, | |
| "learning_rate": 1.0855231633853137e-06, | |
| "loss": 0.4068, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.4274644080014416, | |
| "grad_norm": 0.2765363606523804, | |
| "learning_rate": 1.0660239606523466e-06, | |
| "loss": 0.4128, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.4328707875292848, | |
| "grad_norm": 0.2770223660740028, | |
| "learning_rate": 1.0466805627322685e-06, | |
| "loss": 0.4055, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.4382771670571275, | |
| "grad_norm": 0.266013699998984, | |
| "learning_rate": 1.0274937357188414e-06, | |
| "loss": 0.4049, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.4436835465849702, | |
| "grad_norm": 0.25683355130670393, | |
| "learning_rate": 1.0084642395048428e-06, | |
| "loss": 0.4078, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.449089926112813, | |
| "grad_norm": 0.2811697424270643, | |
| "learning_rate": 9.895928277519822e-07, | |
| "loss": 0.4092, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.454496305640656, | |
| "grad_norm": 0.2836256278223854, | |
| "learning_rate": 9.708802478610413e-07, | |
| "loss": 0.4059, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.459902685168499, | |
| "grad_norm": 0.2771952071252828, | |
| "learning_rate": 9.523272409422829e-07, | |
| "loss": 0.4112, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.4653090646963416, | |
| "grad_norm": 0.2965292468618203, | |
| "learning_rate": 9.339345417860918e-07, | |
| "loss": 0.4028, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.4707154442241848, | |
| "grad_norm": 0.307263683184186, | |
| "learning_rate": 9.157028788338795e-07, | |
| "loss": 0.4029, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.4761218237520275, | |
| "grad_norm": 0.2922545833760392, | |
| "learning_rate": 8.976329741492262e-07, | |
| "loss": 0.3939, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.4815282032798702, | |
| "grad_norm": 0.29211120065069335, | |
| "learning_rate": 8.797255433892926e-07, | |
| "loss": 0.4086, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.486934582807713, | |
| "grad_norm": 0.28634400793358533, | |
| "learning_rate": 8.619812957764729e-07, | |
| "loss": 0.4059, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.492340962335556, | |
| "grad_norm": 0.2646272575948771, | |
| "learning_rate": 8.444009340703008e-07, | |
| "loss": 0.398, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.497747341863399, | |
| "grad_norm": 0.29066647888917396, | |
| "learning_rate": 8.269851545396279e-07, | |
| "loss": 0.4025, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.5031537213912416, | |
| "grad_norm": 0.28424280479329644, | |
| "learning_rate": 8.097346469350348e-07, | |
| "loss": 0.4013, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.5085601009190848, | |
| "grad_norm": 0.2896529003620974, | |
| "learning_rate": 7.926500944615267e-07, | |
| "loss": 0.4108, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.5139664804469275, | |
| "grad_norm": 0.27346406286896946, | |
| "learning_rate": 7.757321737514645e-07, | |
| "loss": 0.3941, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.5193728599747702, | |
| "grad_norm": 0.26882609264045565, | |
| "learning_rate": 7.589815548377738e-07, | |
| "loss": 0.4035, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.524779239502613, | |
| "grad_norm": 0.27733293233890505, | |
| "learning_rate": 7.423989011274052e-07, | |
| "loss": 0.4085, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.5301856190304557, | |
| "grad_norm": 0.25627085107348396, | |
| "learning_rate": 7.259848693750582e-07, | |
| "loss": 0.4017, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.535591998558299, | |
| "grad_norm": 0.2691243234604463, | |
| "learning_rate": 7.097401096571765e-07, | |
| "loss": 0.3996, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.5409983780861416, | |
| "grad_norm": 0.2764529789534093, | |
| "learning_rate": 6.936652653461939e-07, | |
| "loss": 0.4145, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.5464047576139848, | |
| "grad_norm": 0.2902741811813119, | |
| "learning_rate": 6.777609730850615e-07, | |
| "loss": 0.4007, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.5518111371418275, | |
| "grad_norm": 0.265969991168333, | |
| "learning_rate": 6.620278627620286e-07, | |
| "loss": 0.402, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.5572175166696702, | |
| "grad_norm": 0.259196836837019, | |
| "learning_rate": 6.464665574856977e-07, | |
| "loss": 0.4124, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.562623896197513, | |
| "grad_norm": 0.2829926842253021, | |
| "learning_rate": 6.310776735603452e-07, | |
| "loss": 0.3989, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.5680302757253557, | |
| "grad_norm": 0.2694529736291035, | |
| "learning_rate": 6.158618204615119e-07, | |
| "loss": 0.4032, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.573436655253199, | |
| "grad_norm": 0.2630102431201598, | |
| "learning_rate": 6.008196008118705e-07, | |
| "loss": 0.407, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.5788430347810416, | |
| "grad_norm": 0.27146999027694685, | |
| "learning_rate": 5.859516103573492e-07, | |
| "loss": 0.3982, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.5842494143088843, | |
| "grad_norm": 0.28346284777141134, | |
| "learning_rate": 5.712584379435482e-07, | |
| "loss": 0.3984, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.5896557938367275, | |
| "grad_norm": 0.28197172604169823, | |
| "learning_rate": 5.567406654924074e-07, | |
| "loss": 0.3988, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.5950621733645702, | |
| "grad_norm": 0.2717022634001503, | |
| "learning_rate": 5.423988679791686e-07, | |
| "loss": 0.4098, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.600468552892413, | |
| "grad_norm": 0.276903744178795, | |
| "learning_rate": 5.282336134095994e-07, | |
| "loss": 0.4043, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.6058749324202557, | |
| "grad_norm": 0.25453566586188486, | |
| "learning_rate": 5.142454627974969e-07, | |
| "loss": 0.3976, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.611281311948099, | |
| "grad_norm": 0.2784736093310705, | |
| "learning_rate": 5.00434970142471e-07, | |
| "loss": 0.4062, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.6166876914759416, | |
| "grad_norm": 0.24784017038474418, | |
| "learning_rate": 4.868026824080008e-07, | |
| "loss": 0.4061, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.6220940710037843, | |
| "grad_norm": 0.2807417719405863, | |
| "learning_rate": 4.7334913949977526e-07, | |
| "loss": 0.4075, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.6275004505316275, | |
| "grad_norm": 0.25346910500895187, | |
| "learning_rate": 4.6007487424430565e-07, | |
| "loss": 0.3964, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.6329068300594702, | |
| "grad_norm": 0.27364761903392193, | |
| "learning_rate": 4.46980412367829e-07, | |
| "loss": 0.3938, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.638313209587313, | |
| "grad_norm": 0.2765709048501121, | |
| "learning_rate": 4.3406627247548184e-07, | |
| "loss": 0.4074, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.6437195891151557, | |
| "grad_norm": 0.2776500402889704, | |
| "learning_rate": 4.21332966030763e-07, | |
| "loss": 0.3994, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.649125968642999, | |
| "grad_norm": 0.26079072827311783, | |
| "learning_rate": 4.08780997335278e-07, | |
| "loss": 0.4045, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.6545323481708416, | |
| "grad_norm": 0.2397016051949167, | |
| "learning_rate": 3.9641086350876155e-07, | |
| "loss": 0.4029, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.6599387276986843, | |
| "grad_norm": 0.29754617724142174, | |
| "learning_rate": 3.84223054469397e-07, | |
| "loss": 0.4018, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.6653451072265275, | |
| "grad_norm": 0.27568276310419043, | |
| "learning_rate": 3.722180529144054e-07, | |
| "loss": 0.4096, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.6707514867543702, | |
| "grad_norm": 0.25544292907340554, | |
| "learning_rate": 3.6039633430093367e-07, | |
| "loss": 0.4006, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.676157866282213, | |
| "grad_norm": 0.2904302979415872, | |
| "learning_rate": 3.4875836682722096e-07, | |
| "loss": 0.4093, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.6815642458100557, | |
| "grad_norm": 0.2796446372356396, | |
| "learning_rate": 3.373046114140571e-07, | |
| "loss": 0.4037, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.686970625337899, | |
| "grad_norm": 0.2690617997319961, | |
| "learning_rate": 3.260355216865291e-07, | |
| "loss": 0.4058, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.6923770048657416, | |
| "grad_norm": 0.27708751977237855, | |
| "learning_rate": 3.149515439560524e-07, | |
| "loss": 0.4084, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.6977833843935843, | |
| "grad_norm": 0.25923770611284674, | |
| "learning_rate": 3.040531172026978e-07, | |
| "loss": 0.4035, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.7031897639214275, | |
| "grad_norm": 0.2503752240400745, | |
| "learning_rate": 2.933406730578009e-07, | |
| "loss": 0.4094, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.7085961434492702, | |
| "grad_norm": 0.27256002841564525, | |
| "learning_rate": 2.828146357868755e-07, | |
| "loss": 0.4049, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.714002522977113, | |
| "grad_norm": 0.262526407381437, | |
| "learning_rate": 2.7247542227280155e-07, | |
| "loss": 0.399, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.7194089025049557, | |
| "grad_norm": 0.26889496739047675, | |
| "learning_rate": 2.6232344199932034e-07, | |
| "loss": 0.3974, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.7248152820327984, | |
| "grad_norm": 0.2581699169174531, | |
| "learning_rate": 2.523590970348166e-07, | |
| "loss": 0.4078, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.7302216615606416, | |
| "grad_norm": 0.2681313769671267, | |
| "learning_rate": 2.4258278201639117e-07, | |
| "loss": 0.4083, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.7356280410884843, | |
| "grad_norm": 0.2583458633767275, | |
| "learning_rate": 2.3299488413423554e-07, | |
| "loss": 0.4033, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.7410344206163275, | |
| "grad_norm": 0.27176652448537475, | |
| "learning_rate": 2.2359578311629272e-07, | |
| "loss": 0.41, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.7464408001441702, | |
| "grad_norm": 0.2651677980954859, | |
| "learning_rate": 2.1438585121322465e-07, | |
| "loss": 0.4048, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.751847179672013, | |
| "grad_norm": 0.26468667998207535, | |
| "learning_rate": 2.0536545318366018e-07, | |
| "loss": 0.4089, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.7572535591998557, | |
| "grad_norm": 0.2682578170402083, | |
| "learning_rate": 1.9653494627975888e-07, | |
| "loss": 0.404, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.7626599387276984, | |
| "grad_norm": 0.27087994511441277, | |
| "learning_rate": 1.8789468023305334e-07, | |
| "loss": 0.4033, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.7680663182555416, | |
| "grad_norm": 0.25252752081120117, | |
| "learning_rate": 1.7944499724060484e-07, | |
| "loss": 0.4086, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.7734726977833843, | |
| "grad_norm": 0.2765603337180068, | |
| "learning_rate": 1.711862319514457e-07, | |
| "loss": 0.4058, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.7788790773112275, | |
| "grad_norm": 0.2662570880480703, | |
| "learning_rate": 1.6311871145332836e-07, | |
| "loss": 0.4016, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.7842854568390702, | |
| "grad_norm": 0.26536562491010973, | |
| "learning_rate": 1.5524275525977073e-07, | |
| "loss": 0.3961, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.789691836366913, | |
| "grad_norm": 0.2696933797225792, | |
| "learning_rate": 1.4755867529740064e-07, | |
| "loss": 0.402, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.7950982158947557, | |
| "grad_norm": 0.26230277928432566, | |
| "learning_rate": 1.4006677589360307e-07, | |
| "loss": 0.4006, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.8005045954225984, | |
| "grad_norm": 0.2618189445881308, | |
| "learning_rate": 1.3276735376446693e-07, | |
| "loss": 0.4101, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.8059109749504416, | |
| "grad_norm": 0.26154419260033057, | |
| "learning_rate": 1.2566069800303393e-07, | |
| "loss": 0.4007, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.8113173544782843, | |
| "grad_norm": 0.26129803510244903, | |
| "learning_rate": 1.1874709006784891e-07, | |
| "loss": 0.4108, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.816723734006127, | |
| "grad_norm": 0.2755262239215911, | |
| "learning_rate": 1.1202680377181252e-07, | |
| "loss": 0.4081, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.8221301135339703, | |
| "grad_norm": 0.27615467193849846, | |
| "learning_rate": 1.055001052713378e-07, | |
| "loss": 0.4057, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.827536493061813, | |
| "grad_norm": 0.2565394448779921, | |
| "learning_rate": 9.916725305580632e-08, | |
| "loss": 0.4074, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.8329428725896557, | |
| "grad_norm": 0.29481883515723867, | |
| "learning_rate": 9.302849793733526e-08, | |
| "loss": 0.4037, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.8383492521174984, | |
| "grad_norm": 0.2628737439763179, | |
| "learning_rate": 8.708408304083927e-08, | |
| "loss": 0.3982, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.8437556316453416, | |
| "grad_norm": 0.2856973586242492, | |
| "learning_rate": 8.133424379440535e-08, | |
| "loss": 0.4098, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.8491620111731844, | |
| "grad_norm": 0.2573191532815954, | |
| "learning_rate": 7.577920791996595e-08, | |
| "loss": 0.4021, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.854568390701027, | |
| "grad_norm": 0.2671924144995498, | |
| "learning_rate": 7.041919542428221e-08, | |
| "loss": 0.4046, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.8599747702288703, | |
| "grad_norm": 0.27125026996972024, | |
| "learning_rate": 6.525441859022873e-08, | |
| "loss": 0.3996, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.865381149756713, | |
| "grad_norm": 0.2597885306736867, | |
| "learning_rate": 6.028508196838811e-08, | |
| "loss": 0.3991, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.8707875292845557, | |
| "grad_norm": 0.2661065612840173, | |
| "learning_rate": 5.551138236894793e-08, | |
| "loss": 0.4082, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.8761939088123984, | |
| "grad_norm": 0.27596106902272594, | |
| "learning_rate": 5.093350885390591e-08, | |
| "loss": 0.4092, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.8816002883402416, | |
| "grad_norm": 0.2798778899386736, | |
| "learning_rate": 4.655164272958534e-08, | |
| "loss": 0.3935, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.8870066678680844, | |
| "grad_norm": 0.2675281011170649, | |
| "learning_rate": 4.236595753944972e-08, | |
| "loss": 0.4049, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.892413047395927, | |
| "grad_norm": 0.24219018671622744, | |
| "learning_rate": 3.837661905723378e-08, | |
| "loss": 0.4061, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.8978194269237703, | |
| "grad_norm": 0.26852051522723963, | |
| "learning_rate": 3.458378528037598e-08, | |
| "loss": 0.3982, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.903225806451613, | |
| "grad_norm": 0.2598218760743794, | |
| "learning_rate": 3.0987606423759644e-08, | |
| "loss": 0.3978, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.9086321859794557, | |
| "grad_norm": 0.24224454585639746, | |
| "learning_rate": 2.7588224913768225e-08, | |
| "loss": 0.4056, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.9140385655072985, | |
| "grad_norm": 0.28293842876891173, | |
| "learning_rate": 2.438577538263931e-08, | |
| "loss": 0.4041, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.9194449450351416, | |
| "grad_norm": 0.24273867782068695, | |
| "learning_rate": 2.1380384663135523e-08, | |
| "loss": 0.4046, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.9248513245629844, | |
| "grad_norm": 0.2589867572465761, | |
| "learning_rate": 1.8572171783521885e-08, | |
| "loss": 0.4016, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.930257704090827, | |
| "grad_norm": 0.26040920179163585, | |
| "learning_rate": 1.596124796284848e-08, | |
| "loss": 0.4048, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.9356640836186703, | |
| "grad_norm": 0.28129280293565423, | |
| "learning_rate": 1.3547716606548967e-08, | |
| "loss": 0.4082, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.941070463146513, | |
| "grad_norm": 0.27263421805264343, | |
| "learning_rate": 1.133167330234386e-08, | |
| "loss": 0.3957, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.9464768426743557, | |
| "grad_norm": 0.27306797377575853, | |
| "learning_rate": 9.313205816454674e-09, | |
| "loss": 0.4097, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.9518832222021985, | |
| "grad_norm": 0.26535989264790094, | |
| "learning_rate": 7.492394090128364e-09, | |
| "loss": 0.4091, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.957289601730041, | |
| "grad_norm": 0.26682062170730547, | |
| "learning_rate": 5.8693102364698604e-09, | |
| "loss": 0.3975, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.9626959812578844, | |
| "grad_norm": 0.2848285894683682, | |
| "learning_rate": 4.444018537588801e-09, | |
| "loss": 0.4075, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.968102360785727, | |
| "grad_norm": 0.2853108418534249, | |
| "learning_rate": 3.2165754420510063e-09, | |
| "loss": 0.4107, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.9735087403135703, | |
| "grad_norm": 0.26447810990716136, | |
| "learning_rate": 2.1870295626441607e-09, | |
| "loss": 0.4022, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.978915119841413, | |
| "grad_norm": 0.2661971477507847, | |
| "learning_rate": 1.3554216744521287e-09, | |
| "loss": 0.4041, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.9843214993692557, | |
| "grad_norm": 0.25582504114161564, | |
| "learning_rate": 7.217847132401367e-10, | |
| "loss": 0.4064, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.9897278788970985, | |
| "grad_norm": 0.26069476073784237, | |
| "learning_rate": 2.861437741508155e-10, | |
| "loss": 0.4115, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.995134258424941, | |
| "grad_norm": 0.27554755453273777, | |
| "learning_rate": 4.851611070832984e-11, | |
| "loss": 0.4016, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.9989187240944313, | |
| "step": 5547, | |
| "total_flos": 8484146955288576.0, | |
| "train_loss": 0.44718967426225087, | |
| "train_runtime": 93872.001, | |
| "train_samples_per_second": 5.675, | |
| "train_steps_per_second": 0.059 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 5547, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8484146955288576.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |