| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 9213, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0162813415825464, | |
| "grad_norm": 1.3799059391021729, | |
| "learning_rate": 4.974492564854011e-05, | |
| "loss": 4.366279296875, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0325626831650928, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.9473569955497666e-05, | |
| "loss": 3.2682159423828123, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04884402474763921, | |
| "grad_norm": 1.1759626865386963, | |
| "learning_rate": 4.9207641376316076e-05, | |
| "loss": 2.121284637451172, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0651253663301856, | |
| "grad_norm": 1.299229383468628, | |
| "learning_rate": 4.8936285683273635e-05, | |
| "loss": 1.8733770751953125, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08140670791273201, | |
| "grad_norm": 1.3079231977462769, | |
| "learning_rate": 4.8664929990231194e-05, | |
| "loss": 1.8073320007324218, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09768804949527841, | |
| "grad_norm": 1.5056711435317993, | |
| "learning_rate": 4.839357429718876e-05, | |
| "loss": 1.7036862182617187, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11396939107782482, | |
| "grad_norm": 1.5221471786499023, | |
| "learning_rate": 4.812221860414632e-05, | |
| "loss": 1.6605093383789062, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1302507326603712, | |
| "grad_norm": 1.4612085819244385, | |
| "learning_rate": 4.785086291110388e-05, | |
| "loss": 1.582379608154297, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14653207424291761, | |
| "grad_norm": 1.3166944980621338, | |
| "learning_rate": 4.7579507218061436e-05, | |
| "loss": 1.5711520385742188, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.16281341582546402, | |
| "grad_norm": 1.8040547370910645, | |
| "learning_rate": 4.7308151525018995e-05, | |
| "loss": 1.436278076171875, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17909475740801042, | |
| "grad_norm": 1.7718613147735596, | |
| "learning_rate": 4.7036795831976553e-05, | |
| "loss": 1.4956285095214843, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.19537609899055683, | |
| "grad_norm": 2.499027967453003, | |
| "learning_rate": 4.676544013893412e-05, | |
| "loss": 1.3423948669433594, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21165744057310323, | |
| "grad_norm": 1.7684857845306396, | |
| "learning_rate": 4.649408444589168e-05, | |
| "loss": 1.358212432861328, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.22793878215564964, | |
| "grad_norm": 1.8858190774917603, | |
| "learning_rate": 4.622815586671009e-05, | |
| "loss": 1.3155609130859376, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.24422012373819602, | |
| "grad_norm": 1.708154559135437, | |
| "learning_rate": 4.5956800173667646e-05, | |
| "loss": 1.204995346069336, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2605014653207424, | |
| "grad_norm": 2.377797842025757, | |
| "learning_rate": 4.5685444480625205e-05, | |
| "loss": 1.2155376434326173, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2767828069032888, | |
| "grad_norm": 2.3532145023345947, | |
| "learning_rate": 4.5414088787582764e-05, | |
| "loss": 1.2571015930175782, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.29306414848583523, | |
| "grad_norm": 2.745908498764038, | |
| "learning_rate": 4.514273309454032e-05, | |
| "loss": 1.1259475708007813, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.30934549006838163, | |
| "grad_norm": 4.180660247802734, | |
| "learning_rate": 4.487137740149789e-05, | |
| "loss": 1.1778811645507812, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.32562683165092804, | |
| "grad_norm": 2.554922103881836, | |
| "learning_rate": 4.460002170845545e-05, | |
| "loss": 1.144913787841797, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.34190817323347444, | |
| "grad_norm": 2.6831798553466797, | |
| "learning_rate": 4.4328666015413006e-05, | |
| "loss": 1.1192340850830078, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.35818951481602085, | |
| "grad_norm": 2.5000758171081543, | |
| "learning_rate": 4.4057310322370565e-05, | |
| "loss": 1.0886085510253907, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.37447085639856725, | |
| "grad_norm": 2.5406346321105957, | |
| "learning_rate": 4.3785954629328124e-05, | |
| "loss": 1.0647865295410157, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.39075219798111366, | |
| "grad_norm": 2.5966973304748535, | |
| "learning_rate": 4.351459893628568e-05, | |
| "loss": 1.0138130187988281, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.40703353956366006, | |
| "grad_norm": 2.9423012733459473, | |
| "learning_rate": 4.324324324324325e-05, | |
| "loss": 0.971071548461914, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.42331488114620647, | |
| "grad_norm": 2.9763288497924805, | |
| "learning_rate": 4.297188755020081e-05, | |
| "loss": 0.9740264129638672, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4395962227287529, | |
| "grad_norm": 2.0831127166748047, | |
| "learning_rate": 4.270595897101922e-05, | |
| "loss": 1.0225084686279298, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4558775643112993, | |
| "grad_norm": 3.093662977218628, | |
| "learning_rate": 4.2434603277976776e-05, | |
| "loss": 0.9085057830810547, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4721589058938456, | |
| "grad_norm": 3.1048061847686768, | |
| "learning_rate": 4.2163247584934335e-05, | |
| "loss": 0.9657279968261718, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.48844024747639203, | |
| "grad_norm": 89.80404663085938, | |
| "learning_rate": 4.189189189189189e-05, | |
| "loss": 0.9195979309082031, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5047215890589385, | |
| "grad_norm": 2.7518820762634277, | |
| "learning_rate": 4.162053619884945e-05, | |
| "loss": 0.8844217681884765, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5210029306414848, | |
| "grad_norm": 2.216895818710327, | |
| "learning_rate": 4.134918050580701e-05, | |
| "loss": 0.9437327575683594, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5372842722240313, | |
| "grad_norm": 2.756894826889038, | |
| "learning_rate": 4.1077824812764577e-05, | |
| "loss": 0.9072888946533203, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5535656138065776, | |
| "grad_norm": 2.630861759185791, | |
| "learning_rate": 4.0806469119722135e-05, | |
| "loss": 0.9133613586425782, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5698469553891241, | |
| "grad_norm": 2.3018958568573, | |
| "learning_rate": 4.0535113426679694e-05, | |
| "loss": 0.9179753875732422, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5861282969716705, | |
| "grad_norm": 2.2267212867736816, | |
| "learning_rate": 4.026375773363725e-05, | |
| "loss": 0.8736819458007813, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6024096385542169, | |
| "grad_norm": 3.817021369934082, | |
| "learning_rate": 3.999240204059481e-05, | |
| "loss": 0.8818047332763672, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6186909801367633, | |
| "grad_norm": 2.8244123458862305, | |
| "learning_rate": 3.972104634755237e-05, | |
| "loss": 0.8710990142822266, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6349723217193096, | |
| "grad_norm": 2.787409782409668, | |
| "learning_rate": 3.9449690654509936e-05, | |
| "loss": 0.791876220703125, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6512536633018561, | |
| "grad_norm": 2.5339832305908203, | |
| "learning_rate": 3.9178334961467495e-05, | |
| "loss": 0.8330724334716797, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6675350048844024, | |
| "grad_norm": 2.2571518421173096, | |
| "learning_rate": 3.8906979268425054e-05, | |
| "loss": 0.8065113067626953, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6838163464669489, | |
| "grad_norm": 3.3255224227905273, | |
| "learning_rate": 3.863562357538261e-05, | |
| "loss": 0.7839543151855469, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7000976880494952, | |
| "grad_norm": 2.493654727935791, | |
| "learning_rate": 3.836426788234017e-05, | |
| "loss": 0.7902137756347656, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7163790296320417, | |
| "grad_norm": 2.943366527557373, | |
| "learning_rate": 3.809291218929774e-05, | |
| "loss": 0.9532376861572266, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.732660371214588, | |
| "grad_norm": 2.404705762863159, | |
| "learning_rate": 3.7821556496255296e-05, | |
| "loss": 0.8227187347412109, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.7489417127971345, | |
| "grad_norm": 8.06905460357666, | |
| "learning_rate": 3.7550200803212855e-05, | |
| "loss": 0.7640556335449219, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.7652230543796809, | |
| "grad_norm": 3.540977954864502, | |
| "learning_rate": 3.7278845110170414e-05, | |
| "loss": 0.8362091064453125, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7815043959622273, | |
| "grad_norm": 2.233323574066162, | |
| "learning_rate": 3.700748941712797e-05, | |
| "loss": 0.6893608093261718, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7977857375447737, | |
| "grad_norm": 2.947315216064453, | |
| "learning_rate": 3.673613372408553e-05, | |
| "loss": 0.7564961242675782, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8140670791273201, | |
| "grad_norm": 2.9839603900909424, | |
| "learning_rate": 3.64647780310431e-05, | |
| "loss": 0.7726463317871094, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8303484207098665, | |
| "grad_norm": 2.638998508453369, | |
| "learning_rate": 3.6193422338000656e-05, | |
| "loss": 0.7850227355957031, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.8466297622924129, | |
| "grad_norm": 2.203768730163574, | |
| "learning_rate": 3.5922066644958215e-05, | |
| "loss": 0.7540821838378906, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.8629111038749593, | |
| "grad_norm": 2.7057082653045654, | |
| "learning_rate": 3.565071095191577e-05, | |
| "loss": 0.6677760314941407, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8791924454575057, | |
| "grad_norm": 3.2892088890075684, | |
| "learning_rate": 3.537935525887333e-05, | |
| "loss": 0.74295654296875, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.8954737870400521, | |
| "grad_norm": 2.8778061866760254, | |
| "learning_rate": 3.510799956583089e-05, | |
| "loss": 0.7150550842285156, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.9117551286225986, | |
| "grad_norm": 1.9023234844207764, | |
| "learning_rate": 3.483664387278846e-05, | |
| "loss": 0.7367278289794922, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.9280364702051449, | |
| "grad_norm": 3.3899879455566406, | |
| "learning_rate": 3.4565288179746015e-05, | |
| "loss": 0.7095525360107422, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.9443178117876913, | |
| "grad_norm": 3.202036142349243, | |
| "learning_rate": 3.4293932486703574e-05, | |
| "loss": 0.7237194061279297, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.9605991533702377, | |
| "grad_norm": 2.44712233543396, | |
| "learning_rate": 3.402257679366113e-05, | |
| "loss": 0.710773696899414, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.9768804949527841, | |
| "grad_norm": 3.5873775482177734, | |
| "learning_rate": 3.375122110061869e-05, | |
| "loss": 0.6593586730957032, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9931618365353305, | |
| "grad_norm": 2.8714234828948975, | |
| "learning_rate": 3.347986540757626e-05, | |
| "loss": 0.7627605438232422, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bertscore_f1": 0.9653369394064688, | |
| "eval_bleu": 0.6270834635129311, | |
| "eval_loss": 0.48991522192955017, | |
| "eval_meteor": 0.7251021230424122, | |
| "eval_rouge1": 0.8465042416762141, | |
| "eval_rouge2": 0.738163460778114, | |
| "eval_runtime": 68.0737, | |
| "eval_samples_per_second": 18.979, | |
| "eval_steps_per_second": 2.38, | |
| "step": 3071 | |
| }, | |
| { | |
| "epoch": 1.009443178117877, | |
| "grad_norm": 3.2640202045440674, | |
| "learning_rate": 3.3208509714533816e-05, | |
| "loss": 0.5927775573730468, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.0257245197004232, | |
| "grad_norm": 3.130765914916992, | |
| "learning_rate": 3.2937154021491375e-05, | |
| "loss": 0.5853068161010743, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.0420058612829697, | |
| "grad_norm": 3.2238473892211914, | |
| "learning_rate": 3.2665798328448934e-05, | |
| "loss": 0.6931375885009765, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.0582872028655161, | |
| "grad_norm": 4.1798176765441895, | |
| "learning_rate": 3.239444263540649e-05, | |
| "loss": 0.6535150146484375, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.0745685444480626, | |
| "grad_norm": 3.4835116863250732, | |
| "learning_rate": 3.212308694236405e-05, | |
| "loss": 0.6570293426513671, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.0908498860306088, | |
| "grad_norm": 3.2468245029449463, | |
| "learning_rate": 3.185173124932162e-05, | |
| "loss": 0.6235344696044922, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.1071312276131553, | |
| "grad_norm": 2.503023862838745, | |
| "learning_rate": 3.1580375556279176e-05, | |
| "loss": 0.6021703720092774, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.1234125691957018, | |
| "grad_norm": 3.5487520694732666, | |
| "learning_rate": 3.1309019863236735e-05, | |
| "loss": 0.6459141540527343, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.1396939107782482, | |
| "grad_norm": 2.8496859073638916, | |
| "learning_rate": 3.1037664170194294e-05, | |
| "loss": 0.5954633712768554, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.1559752523607945, | |
| "grad_norm": 2.746445894241333, | |
| "learning_rate": 3.076630847715185e-05, | |
| "loss": 0.5743134689331054, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.172256593943341, | |
| "grad_norm": 3.843780517578125, | |
| "learning_rate": 3.0494952784109408e-05, | |
| "loss": 0.7025726318359375, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.1885379355258874, | |
| "grad_norm": 2.3990111351013184, | |
| "learning_rate": 3.0223597091066974e-05, | |
| "loss": 0.6482646942138672, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.2048192771084336, | |
| "grad_norm": 3.495655059814453, | |
| "learning_rate": 2.9952241398024532e-05, | |
| "loss": 0.6225572967529297, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.22110061869098, | |
| "grad_norm": 3.0918631553649902, | |
| "learning_rate": 2.968088570498209e-05, | |
| "loss": 0.6018490982055664, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.2373819602735265, | |
| "grad_norm": 3.54016375541687, | |
| "learning_rate": 2.940953001193965e-05, | |
| "loss": 0.6204871749877929, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.253663301856073, | |
| "grad_norm": 3.330631971359253, | |
| "learning_rate": 2.913817431889721e-05, | |
| "loss": 0.5625830459594726, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.2699446434386195, | |
| "grad_norm": 3.4091968536376953, | |
| "learning_rate": 2.8866818625854774e-05, | |
| "loss": 0.6275486755371094, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.2862259850211657, | |
| "grad_norm": 3.535207748413086, | |
| "learning_rate": 2.8595462932812333e-05, | |
| "loss": 0.6113796997070312, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.3025073266037122, | |
| "grad_norm": 2.739208459854126, | |
| "learning_rate": 2.8324107239769892e-05, | |
| "loss": 0.6166405487060547, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.3187886681862586, | |
| "grad_norm": 2.3887178897857666, | |
| "learning_rate": 2.805275154672745e-05, | |
| "loss": 0.6348526000976562, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.3350700097688049, | |
| "grad_norm": 3.2300209999084473, | |
| "learning_rate": 2.778139585368501e-05, | |
| "loss": 0.6592056274414062, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 2.4417901039123535, | |
| "learning_rate": 2.751004016064257e-05, | |
| "loss": 0.5736191177368164, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.3676326929338978, | |
| "grad_norm": 4.1886467933654785, | |
| "learning_rate": 2.7238684467600134e-05, | |
| "loss": 0.5781734466552735, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.3839140345164442, | |
| "grad_norm": 2.7025551795959473, | |
| "learning_rate": 2.6967328774557693e-05, | |
| "loss": 0.5421427917480469, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.4001953760989905, | |
| "grad_norm": 3.4467735290527344, | |
| "learning_rate": 2.6695973081515252e-05, | |
| "loss": 0.6328504180908203, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.416476717681537, | |
| "grad_norm": 2.252255916595459, | |
| "learning_rate": 2.642461738847281e-05, | |
| "loss": 0.565279884338379, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.4327580592640834, | |
| "grad_norm": 2.3594324588775635, | |
| "learning_rate": 2.615326169543037e-05, | |
| "loss": 0.585950927734375, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.4490394008466296, | |
| "grad_norm": 3.1787843704223633, | |
| "learning_rate": 2.5881906002387928e-05, | |
| "loss": 0.6461568450927735, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.465320742429176, | |
| "grad_norm": 9.052631378173828, | |
| "learning_rate": 2.5610550309345494e-05, | |
| "loss": 0.5787173461914062, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.4816020840117226, | |
| "grad_norm": 3.1000287532806396, | |
| "learning_rate": 2.5339194616303053e-05, | |
| "loss": 0.5753350830078126, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.497883425594269, | |
| "grad_norm": 2.160932779312134, | |
| "learning_rate": 2.506783892326061e-05, | |
| "loss": 0.6055181503295899, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.5141647671768155, | |
| "grad_norm": 5.498105525970459, | |
| "learning_rate": 2.479648323021817e-05, | |
| "loss": 0.5424030303955079, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.530446108759362, | |
| "grad_norm": 2.4782474040985107, | |
| "learning_rate": 2.4525127537175733e-05, | |
| "loss": 0.6082788848876953, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.5467274503419082, | |
| "grad_norm": 2.7400150299072266, | |
| "learning_rate": 2.425377184413329e-05, | |
| "loss": 0.5984983444213867, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.5630087919244544, | |
| "grad_norm": 3.0426690578460693, | |
| "learning_rate": 2.398241615109085e-05, | |
| "loss": 0.6066116333007813, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.5792901335070009, | |
| "grad_norm": 3.5095133781433105, | |
| "learning_rate": 2.3711060458048412e-05, | |
| "loss": 0.605382080078125, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.5955714750895473, | |
| "grad_norm": 3.64323091506958, | |
| "learning_rate": 2.343970476500597e-05, | |
| "loss": 0.5372691726684571, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.6118528166720938, | |
| "grad_norm": 6.410864353179932, | |
| "learning_rate": 2.316834907196353e-05, | |
| "loss": 0.4930916976928711, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.6281341582546403, | |
| "grad_norm": 2.9752631187438965, | |
| "learning_rate": 2.2896993378921092e-05, | |
| "loss": 0.49088024139404296, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6444154998371867, | |
| "grad_norm": 2.8982131481170654, | |
| "learning_rate": 2.262563768587865e-05, | |
| "loss": 0.5840103912353516, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.660696841419733, | |
| "grad_norm": 3.7222821712493896, | |
| "learning_rate": 2.235428199283621e-05, | |
| "loss": 0.5301944732666015, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.6769781830022794, | |
| "grad_norm": 3.526601791381836, | |
| "learning_rate": 2.2082926299793772e-05, | |
| "loss": 0.4781329345703125, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.6932595245848256, | |
| "grad_norm": 3.4005913734436035, | |
| "learning_rate": 2.181157060675133e-05, | |
| "loss": 0.5219943237304687, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.709540866167372, | |
| "grad_norm": 3.9888486862182617, | |
| "learning_rate": 2.154021491370889e-05, | |
| "loss": 0.5756942367553711, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.7258222077499186, | |
| "grad_norm": 3.6952855587005615, | |
| "learning_rate": 2.1268859220666452e-05, | |
| "loss": 0.5279730606079102, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.742103549332465, | |
| "grad_norm": 3.1715617179870605, | |
| "learning_rate": 2.099750352762401e-05, | |
| "loss": 0.5441674423217774, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.7583848909150115, | |
| "grad_norm": 3.5982584953308105, | |
| "learning_rate": 2.0726147834581573e-05, | |
| "loss": 0.46869205474853515, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.774666232497558, | |
| "grad_norm": 3.594470977783203, | |
| "learning_rate": 2.0454792141539132e-05, | |
| "loss": 0.5004570388793945, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.7909475740801042, | |
| "grad_norm": 3.198012351989746, | |
| "learning_rate": 2.018343644849669e-05, | |
| "loss": 0.49389095306396485, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.8072289156626506, | |
| "grad_norm": 2.3895151615142822, | |
| "learning_rate": 1.9912080755454253e-05, | |
| "loss": 0.5188541793823243, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.8235102572451969, | |
| "grad_norm": 2.874993085861206, | |
| "learning_rate": 1.964072506241181e-05, | |
| "loss": 0.4755914306640625, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.8397915988277433, | |
| "grad_norm": 4.330140590667725, | |
| "learning_rate": 1.936936936936937e-05, | |
| "loss": 0.49986125946044924, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.8560729404102898, | |
| "grad_norm": 3.2301809787750244, | |
| "learning_rate": 1.9098013676326933e-05, | |
| "loss": 0.5472452163696289, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.8723542819928363, | |
| "grad_norm": 2.056736946105957, | |
| "learning_rate": 1.883208509714534e-05, | |
| "loss": 0.5061603164672852, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.8886356235753827, | |
| "grad_norm": 4.6902031898498535, | |
| "learning_rate": 1.85607294041029e-05, | |
| "loss": 0.4669316101074219, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.904916965157929, | |
| "grad_norm": 3.790092945098877, | |
| "learning_rate": 1.828937371106046e-05, | |
| "loss": 0.561137809753418, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.9211983067404754, | |
| "grad_norm": 4.152039527893066, | |
| "learning_rate": 1.801801801801802e-05, | |
| "loss": 0.4813918304443359, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.9374796483230217, | |
| "grad_norm": 3.3476598262786865, | |
| "learning_rate": 1.774666232497558e-05, | |
| "loss": 0.5630344390869141, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.9537609899055681, | |
| "grad_norm": 4.2672810554504395, | |
| "learning_rate": 1.747530663193314e-05, | |
| "loss": 0.48508411407470703, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.9700423314881146, | |
| "grad_norm": 4.236985206604004, | |
| "learning_rate": 1.72039509388907e-05, | |
| "loss": 0.5445558929443359, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.986323673070661, | |
| "grad_norm": 2.686180591583252, | |
| "learning_rate": 1.693259524584826e-05, | |
| "loss": 0.5194969558715821, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bertscore_f1": 0.9755530517905858, | |
| "eval_bleu": 0.7363057302997511, | |
| "eval_loss": 0.3618590235710144, | |
| "eval_meteor": 0.813260581053782, | |
| "eval_rouge1": 0.8844645577727277, | |
| "eval_rouge2": 0.8050353100012327, | |
| "eval_runtime": 70.0732, | |
| "eval_samples_per_second": 18.438, | |
| "eval_steps_per_second": 2.312, | |
| "step": 6142 | |
| }, | |
| { | |
| "epoch": 2.0026050146532075, | |
| "grad_norm": 2.022204637527466, | |
| "learning_rate": 1.666123955280582e-05, | |
| "loss": 0.48952743530273435, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.018886356235754, | |
| "grad_norm": 4.96242094039917, | |
| "learning_rate": 1.638988385976338e-05, | |
| "loss": 0.5839331436157227, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.0351676978183004, | |
| "grad_norm": 3.4074771404266357, | |
| "learning_rate": 1.611852816672094e-05, | |
| "loss": 0.5070013427734374, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.0514490394008464, | |
| "grad_norm": 3.10239577293396, | |
| "learning_rate": 1.58471724736785e-05, | |
| "loss": 0.4913197708129883, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.067730380983393, | |
| "grad_norm": 3.764558792114258, | |
| "learning_rate": 1.557581678063606e-05, | |
| "loss": 0.4683738327026367, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.0840117225659394, | |
| "grad_norm": 4.150667667388916, | |
| "learning_rate": 1.5304461087593617e-05, | |
| "loss": 0.4650471878051758, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.100293064148486, | |
| "grad_norm": 3.9944324493408203, | |
| "learning_rate": 1.5033105394551178e-05, | |
| "loss": 0.5024824905395507, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.1165744057310323, | |
| "grad_norm": 2.410952568054199, | |
| "learning_rate": 1.476174970150874e-05, | |
| "loss": 0.5205254745483399, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.1328557473135787, | |
| "grad_norm": 4.4830098152160645, | |
| "learning_rate": 1.4490394008466299e-05, | |
| "loss": 0.5458049011230469, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.149137088896125, | |
| "grad_norm": 3.420327663421631, | |
| "learning_rate": 1.4219038315423858e-05, | |
| "loss": 0.544830436706543, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.165418430478671, | |
| "grad_norm": 4.262825012207031, | |
| "learning_rate": 1.394768262238142e-05, | |
| "loss": 0.4901109313964844, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.1816997720612177, | |
| "grad_norm": 2.969730854034424, | |
| "learning_rate": 1.3676326929338979e-05, | |
| "loss": 0.48183216094970704, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.197981113643764, | |
| "grad_norm": 2.7617075443267822, | |
| "learning_rate": 1.3404971236296538e-05, | |
| "loss": 0.5208282470703125, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.2142624552263106, | |
| "grad_norm": 2.8121178150177, | |
| "learning_rate": 1.31336155432541e-05, | |
| "loss": 0.47464847564697266, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.230543796808857, | |
| "grad_norm": 2.1643424034118652, | |
| "learning_rate": 1.2862259850211659e-05, | |
| "loss": 0.5135415267944335, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.2468251383914035, | |
| "grad_norm": 3.0597665309906006, | |
| "learning_rate": 1.2590904157169217e-05, | |
| "loss": 0.48383502960205077, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.26310647997395, | |
| "grad_norm": 3.4192488193511963, | |
| "learning_rate": 1.2319548464126778e-05, | |
| "loss": 0.5295528411865235, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.2793878215564964, | |
| "grad_norm": 3.485333204269409, | |
| "learning_rate": 1.2048192771084338e-05, | |
| "loss": 0.5490006637573243, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.295669163139043, | |
| "grad_norm": 3.5061099529266357, | |
| "learning_rate": 1.1776837078041899e-05, | |
| "loss": 0.444782600402832, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.311950504721589, | |
| "grad_norm": 4.059643745422363, | |
| "learning_rate": 1.1505481384999458e-05, | |
| "loss": 0.4735762786865234, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.3282318463041354, | |
| "grad_norm": 3.1162891387939453, | |
| "learning_rate": 1.1234125691957018e-05, | |
| "loss": 0.5211288452148437, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.344513187886682, | |
| "grad_norm": 1.198476791381836, | |
| "learning_rate": 1.0962769998914577e-05, | |
| "loss": 0.5121672439575196, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.3607945294692283, | |
| "grad_norm": 3.9411354064941406, | |
| "learning_rate": 1.0691414305872138e-05, | |
| "loss": 0.5504902267456054, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.3770758710517748, | |
| "grad_norm": 3.590696334838867, | |
| "learning_rate": 1.0420058612829696e-05, | |
| "loss": 0.4592051315307617, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.393357212634321, | |
| "grad_norm": 2.1098175048828125, | |
| "learning_rate": 1.0148702919787257e-05, | |
| "loss": 0.4932923126220703, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.4096385542168672, | |
| "grad_norm": 4.837367057800293, | |
| "learning_rate": 9.877347226744818e-06, | |
| "loss": 0.45726318359375, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.4259198957994137, | |
| "grad_norm": 2.808544874191284, | |
| "learning_rate": 9.605991533702376e-06, | |
| "loss": 0.4931900787353516, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.44220123738196, | |
| "grad_norm": 2.6487984657287598, | |
| "learning_rate": 9.334635840659937e-06, | |
| "loss": 0.4715615844726562, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.4584825789645066, | |
| "grad_norm": 4.251109600067139, | |
| "learning_rate": 9.063280147617497e-06, | |
| "loss": 0.5373792266845703, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.474763920547053, | |
| "grad_norm": 3.84010648727417, | |
| "learning_rate": 8.791924454575056e-06, | |
| "loss": 0.44632495880126954, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.4910452621295995, | |
| "grad_norm": 1.9418392181396484, | |
| "learning_rate": 8.520568761532617e-06, | |
| "loss": 0.48151702880859376, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.507326603712146, | |
| "grad_norm": 4.140622138977051, | |
| "learning_rate": 8.249213068490177e-06, | |
| "loss": 0.4063055419921875, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.5236079452946925, | |
| "grad_norm": 3.0216522216796875, | |
| "learning_rate": 7.977857375447738e-06, | |
| "loss": 0.4796050262451172, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.539889286877239, | |
| "grad_norm": 4.727103233337402, | |
| "learning_rate": 7.706501682405297e-06, | |
| "loss": 0.46068046569824217, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.556170628459785, | |
| "grad_norm": 4.281773567199707, | |
| "learning_rate": 7.435145989362857e-06, | |
| "loss": 0.44071575164794924, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.5724519700423314, | |
| "grad_norm": 3.134763479232788, | |
| "learning_rate": 7.163790296320418e-06, | |
| "loss": 0.4763399887084961, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.588733311624878, | |
| "grad_norm": 3.584044933319092, | |
| "learning_rate": 6.8924346032779764e-06, | |
| "loss": 0.4629644012451172, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.6050146532074243, | |
| "grad_norm": 2.601400852203369, | |
| "learning_rate": 6.621078910235537e-06, | |
| "loss": 0.4727302551269531, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.6212959947899708, | |
| "grad_norm": 3.5354995727539062, | |
| "learning_rate": 6.3497232171930975e-06, | |
| "loss": 0.42160026550292967, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 2.6375773363725172, | |
| "grad_norm": 2.9206888675689697, | |
| "learning_rate": 6.078367524150657e-06, | |
| "loss": 0.4754294204711914, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.6538586779550632, | |
| "grad_norm": 2.4927732944488525, | |
| "learning_rate": 5.807011831108217e-06, | |
| "loss": 0.5114262390136719, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 2.6701400195376097, | |
| "grad_norm": 4.378971099853516, | |
| "learning_rate": 5.535656138065777e-06, | |
| "loss": 0.5084254837036133, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.686421361120156, | |
| "grad_norm": 2.4034016132354736, | |
| "learning_rate": 5.264300445023337e-06, | |
| "loss": 0.527303466796875, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "grad_norm": 3.7141177654266357, | |
| "learning_rate": 4.9929447519808975e-06, | |
| "loss": 0.4662747573852539, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.718984044285249, | |
| "grad_norm": 3.871277332305908, | |
| "learning_rate": 4.721589058938457e-06, | |
| "loss": 0.5126468276977539, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 2.7352653858677956, | |
| "grad_norm": 2.500791072845459, | |
| "learning_rate": 4.450233365896017e-06, | |
| "loss": 0.47957534790039064, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.751546727450342, | |
| "grad_norm": 5.441941738128662, | |
| "learning_rate": 4.1788776728535765e-06, | |
| "loss": 0.38029510498046876, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 2.7678280690328885, | |
| "grad_norm": 3.3940446376800537, | |
| "learning_rate": 3.907521979811136e-06, | |
| "loss": 0.4626531219482422, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.784109410615435, | |
| "grad_norm": 4.125059127807617, | |
| "learning_rate": 3.6361662867686967e-06, | |
| "loss": 0.4890303039550781, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 2.800390752197981, | |
| "grad_norm": 2.758863687515259, | |
| "learning_rate": 3.3648105937262564e-06, | |
| "loss": 0.4689041519165039, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.8166720937805274, | |
| "grad_norm": 4.864498138427734, | |
| "learning_rate": 3.0934549006838165e-06, | |
| "loss": 0.46032047271728516, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 2.832953435363074, | |
| "grad_norm": 3.3108010292053223, | |
| "learning_rate": 2.8220992076413766e-06, | |
| "loss": 0.43362377166748045, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.8492347769456203, | |
| "grad_norm": 2.3421084880828857, | |
| "learning_rate": 2.5507435145989362e-06, | |
| "loss": 0.44478134155273436, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 2.865516118528167, | |
| "grad_norm": 3.283203601837158, | |
| "learning_rate": 2.2793878215564963e-06, | |
| "loss": 0.5047480392456055, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.8817974601107132, | |
| "grad_norm": 2.0124731063842773, | |
| "learning_rate": 2.0080321285140564e-06, | |
| "loss": 0.4658950424194336, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 2.8980788016932593, | |
| "grad_norm": 3.839552879333496, | |
| "learning_rate": 1.7366764354716163e-06, | |
| "loss": 0.45034191131591794, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.9143601432758057, | |
| "grad_norm": 4.701524257659912, | |
| "learning_rate": 1.4653207424291762e-06, | |
| "loss": 0.47517498016357423, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 2.930641484858352, | |
| "grad_norm": 6.58011531829834, | |
| "learning_rate": 1.1939650493867363e-06, | |
| "loss": 0.44451316833496096, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.9469228264408986, | |
| "grad_norm": 2.9627132415771484, | |
| "learning_rate": 9.226093563442963e-07, | |
| "loss": 0.41320926666259766, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 2.963204168023445, | |
| "grad_norm": 3.003753185272217, | |
| "learning_rate": 6.51253663301856e-07, | |
| "loss": 0.3974274444580078, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.9794855096059916, | |
| "grad_norm": 2.0012876987457275, | |
| "learning_rate": 3.7989797025941607e-07, | |
| "loss": 0.42885406494140627, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 2.995766851188538, | |
| "grad_norm": 3.7651121616363525, | |
| "learning_rate": 1.0854227721697602e-07, | |
| "loss": 0.4800850296020508, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bertscore_f1": 0.9785511039727982, | |
| "eval_bleu": 0.7645620244248046, | |
| "eval_loss": 0.3346184194087982, | |
| "eval_meteor": 0.8355226256477348, | |
| "eval_rouge1": 0.8968326891869934, | |
| "eval_rouge2": 0.8250429516845066, | |
| "eval_runtime": 68.0941, | |
| "eval_samples_per_second": 18.974, | |
| "eval_steps_per_second": 2.379, | |
| "step": 9213 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 9213, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.263887217557504e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |