| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 9213, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0162813415825464, |
| "grad_norm": 3.7931034564971924, |
| "learning_rate": 4.975035276240096e-05, |
| "loss": 1.7502801513671875, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0325626831650928, |
| "grad_norm": 3.5913758277893066, |
| "learning_rate": 4.947899706935852e-05, |
| "loss": 0.8483324432373047, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04884402474763921, |
| "grad_norm": 3.3178822994232178, |
| "learning_rate": 4.9207641376316076e-05, |
| "loss": 0.5748957061767578, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0651253663301856, |
| "grad_norm": 2.392831802368164, |
| "learning_rate": 4.8936285683273635e-05, |
| "loss": 0.4335686492919922, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08140670791273201, |
| "grad_norm": 2.411132335662842, |
| "learning_rate": 4.8664929990231194e-05, |
| "loss": 0.45588829040527346, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09768804949527841, |
| "grad_norm": 3.383033275604248, |
| "learning_rate": 4.839357429718876e-05, |
| "loss": 0.38454761505126955, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11396939107782482, |
| "grad_norm": 1.5641525983810425, |
| "learning_rate": 4.812221860414632e-05, |
| "loss": 0.31118762969970704, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1302507326603712, |
| "grad_norm": 1.962287425994873, |
| "learning_rate": 4.785086291110388e-05, |
| "loss": 0.32715892791748047, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.14653207424291761, |
| "grad_norm": 2.539684534072876, |
| "learning_rate": 4.7579507218061436e-05, |
| "loss": 0.3269093704223633, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.16281341582546402, |
| "grad_norm": 3.244333267211914, |
| "learning_rate": 4.7308151525018995e-05, |
| "loss": 0.2726271057128906, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.17909475740801042, |
| "grad_norm": 2.2011330127716064, |
| "learning_rate": 4.7036795831976553e-05, |
| "loss": 0.3161302185058594, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.19537609899055683, |
| "grad_norm": 5.027646541595459, |
| "learning_rate": 4.676544013893412e-05, |
| "loss": 0.22250593185424805, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.21165744057310323, |
| "grad_norm": 0.6281399726867676, |
| "learning_rate": 4.649408444589168e-05, |
| "loss": 0.26152374267578127, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.22793878215564964, |
| "grad_norm": 3.362748622894287, |
| "learning_rate": 4.622272875284924e-05, |
| "loss": 0.2384391975402832, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.24422012373819602, |
| "grad_norm": 0.9307177066802979, |
| "learning_rate": 4.5951373059806795e-05, |
| "loss": 0.19612070083618163, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2605014653207424, |
| "grad_norm": 3.107837438583374, |
| "learning_rate": 4.5680017366764354e-05, |
| "loss": 0.22400428771972655, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2767828069032888, |
| "grad_norm": 2.3350419998168945, |
| "learning_rate": 4.540866167372192e-05, |
| "loss": 0.246726131439209, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.29306414848583523, |
| "grad_norm": 1.3348891735076904, |
| "learning_rate": 4.513730598067948e-05, |
| "loss": 0.17960617065429688, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.30934549006838163, |
| "grad_norm": 1.4406858682632446, |
| "learning_rate": 4.486595028763704e-05, |
| "loss": 0.19865007400512696, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.32562683165092804, |
| "grad_norm": 2.17195200920105, |
| "learning_rate": 4.4594594594594596e-05, |
| "loss": 0.19141647338867188, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.34190817323347444, |
| "grad_norm": 2.63667893409729, |
| "learning_rate": 4.4323238901552155e-05, |
| "loss": 0.1807699966430664, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.35818951481602085, |
| "grad_norm": 1.1883361339569092, |
| "learning_rate": 4.4051883208509714e-05, |
| "loss": 0.19377944946289063, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.37447085639856725, |
| "grad_norm": 0.6610957384109497, |
| "learning_rate": 4.378052751546728e-05, |
| "loss": 0.18130062103271485, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.39075219798111366, |
| "grad_norm": 1.802565336227417, |
| "learning_rate": 4.350917182242484e-05, |
| "loss": 0.17183830261230468, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.40703353956366006, |
| "grad_norm": 1.6211966276168823, |
| "learning_rate": 4.32378161293824e-05, |
| "loss": 0.1512114906311035, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.42331488114620647, |
| "grad_norm": 0.8947325944900513, |
| "learning_rate": 4.2966460436339956e-05, |
| "loss": 0.1733652877807617, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4395962227287529, |
| "grad_norm": 0.31800374388694763, |
| "learning_rate": 4.2695104743297515e-05, |
| "loss": 0.1839361572265625, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.4558775643112993, |
| "grad_norm": 0.9736223220825195, |
| "learning_rate": 4.2423749050255074e-05, |
| "loss": 0.12980345726013184, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4721589058938456, |
| "grad_norm": 1.7321406602859497, |
| "learning_rate": 4.215239335721264e-05, |
| "loss": 0.14502116203308105, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.48844024747639203, |
| "grad_norm": 0.47778311371803284, |
| "learning_rate": 4.18810376641702e-05, |
| "loss": 0.15735553741455077, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5047215890589385, |
| "grad_norm": 0.43138086795806885, |
| "learning_rate": 4.160968197112776e-05, |
| "loss": 0.13693093299865722, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5210029306414848, |
| "grad_norm": 0.7121404409408569, |
| "learning_rate": 4.1338326278085316e-05, |
| "loss": 0.13991880416870117, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5372842722240313, |
| "grad_norm": 1.8167650699615479, |
| "learning_rate": 4.1066970585042875e-05, |
| "loss": 0.13732372283935546, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5535656138065776, |
| "grad_norm": 1.801047921180725, |
| "learning_rate": 4.079561489200044e-05, |
| "loss": 0.1461949062347412, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5698469553891241, |
| "grad_norm": 1.2010151147842407, |
| "learning_rate": 4.0524259198958e-05, |
| "loss": 0.16667499542236328, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5861282969716705, |
| "grad_norm": 0.31175410747528076, |
| "learning_rate": 4.025290350591556e-05, |
| "loss": 0.13411394119262696, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6024096385542169, |
| "grad_norm": 1.0062410831451416, |
| "learning_rate": 3.998154781287312e-05, |
| "loss": 0.141832914352417, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6186909801367633, |
| "grad_norm": 0.5772050619125366, |
| "learning_rate": 3.9710192119830675e-05, |
| "loss": 0.14646322250366212, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6349723217193096, |
| "grad_norm": 1.1436623334884644, |
| "learning_rate": 3.9444263540649085e-05, |
| "loss": 0.11982306480407715, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6512536633018561, |
| "grad_norm": 0.6914354562759399, |
| "learning_rate": 3.9172907847606644e-05, |
| "loss": 0.13149891853332518, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6675350048844024, |
| "grad_norm": 0.5716465711593628, |
| "learning_rate": 3.89015521545642e-05, |
| "loss": 0.1392893123626709, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.6838163464669489, |
| "grad_norm": 1.4796607494354248, |
| "learning_rate": 3.863019646152177e-05, |
| "loss": 0.1265252685546875, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7000976880494952, |
| "grad_norm": 0.849554717540741, |
| "learning_rate": 3.835884076847933e-05, |
| "loss": 0.1300504207611084, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.7163790296320417, |
| "grad_norm": 0.9229751825332642, |
| "learning_rate": 3.8087485075436886e-05, |
| "loss": 0.144088077545166, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.732660371214588, |
| "grad_norm": 0.6000483632087708, |
| "learning_rate": 3.7816129382394445e-05, |
| "loss": 0.1267460823059082, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7489417127971345, |
| "grad_norm": 1.584933876991272, |
| "learning_rate": 3.7544773689352004e-05, |
| "loss": 0.12461037635803222, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7652230543796809, |
| "grad_norm": 0.7694635987281799, |
| "learning_rate": 3.727341799630956e-05, |
| "loss": 0.1397037124633789, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.7815043959622273, |
| "grad_norm": 0.9538297653198242, |
| "learning_rate": 3.700206230326713e-05, |
| "loss": 0.09744812965393067, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7977857375447737, |
| "grad_norm": 1.10379159450531, |
| "learning_rate": 3.673070661022469e-05, |
| "loss": 0.14414773941040038, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.8140670791273201, |
| "grad_norm": 1.6340835094451904, |
| "learning_rate": 3.6459350917182246e-05, |
| "loss": 0.11836291313171386, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8303484207098665, |
| "grad_norm": 0.06909910589456558, |
| "learning_rate": 3.6187995224139805e-05, |
| "loss": 0.13456206321716307, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.8466297622924129, |
| "grad_norm": 0.11458413302898407, |
| "learning_rate": 3.5916639531097364e-05, |
| "loss": 0.12975069046020507, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8629111038749593, |
| "grad_norm": 0.3121241331100464, |
| "learning_rate": 3.564528383805492e-05, |
| "loss": 0.10294739723205566, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.8791924454575057, |
| "grad_norm": 0.7833127975463867, |
| "learning_rate": 3.537392814501249e-05, |
| "loss": 0.1058332633972168, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.8954737870400521, |
| "grad_norm": 1.0220922231674194, |
| "learning_rate": 3.510257245197005e-05, |
| "loss": 0.11729028701782226, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.9117551286225986, |
| "grad_norm": 0.6296119093894958, |
| "learning_rate": 3.4831216758927606e-05, |
| "loss": 0.12148540496826171, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9280364702051449, |
| "grad_norm": 0.8129004240036011, |
| "learning_rate": 3.4559861065885164e-05, |
| "loss": 0.09763257980346679, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.9443178117876913, |
| "grad_norm": 0.6814725399017334, |
| "learning_rate": 3.428850537284272e-05, |
| "loss": 0.10192323684692382, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9605991533702377, |
| "grad_norm": 0.19898249208927155, |
| "learning_rate": 3.401714967980029e-05, |
| "loss": 0.11552732467651367, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.9768804949527841, |
| "grad_norm": 0.7032152414321899, |
| "learning_rate": 3.374579398675785e-05, |
| "loss": 0.09218964576721192, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9931618365353305, |
| "grad_norm": 0.5327423214912415, |
| "learning_rate": 3.3474438293715407e-05, |
| "loss": 0.11835557937622071, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_bertscore_f1": 0.9908905607812545, |
| "eval_bleu": 0.8857676606120443, |
| "eval_loss": 0.08205162733793259, |
| "eval_meteor": 0.9292767478739071, |
| "eval_rouge1": 0.9454800565736884, |
| "eval_rouge2": 0.9108168851120266, |
| "eval_runtime": 61.8203, |
| "eval_samples_per_second": 20.899, |
| "eval_steps_per_second": 2.621, |
| "step": 3071 |
| }, |
| { |
| "epoch": 1.009443178117877, |
| "grad_norm": 0.7608644366264343, |
| "learning_rate": 3.3203082600672965e-05, |
| "loss": 0.08838626861572266, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0257245197004232, |
| "grad_norm": 0.6126351952552795, |
| "learning_rate": 3.2931726907630524e-05, |
| "loss": 0.07393273830413818, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.0420058612829697, |
| "grad_norm": 0.9907364845275879, |
| "learning_rate": 3.266037121458808e-05, |
| "loss": 0.1005620002746582, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0582872028655161, |
| "grad_norm": 1.0079267024993896, |
| "learning_rate": 3.238901552154565e-05, |
| "loss": 0.0909033203125, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.0745685444480626, |
| "grad_norm": 1.661521315574646, |
| "learning_rate": 3.211765982850321e-05, |
| "loss": 0.07444488525390625, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.0908498860306088, |
| "grad_norm": 0.5184240341186523, |
| "learning_rate": 3.1846304135460766e-05, |
| "loss": 0.08309778213500976, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.1071312276131553, |
| "grad_norm": 1.1483348608016968, |
| "learning_rate": 3.1574948442418325e-05, |
| "loss": 0.07855434417724609, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.1234125691957018, |
| "grad_norm": 1.0581797361373901, |
| "learning_rate": 3.1303592749375884e-05, |
| "loss": 0.0779510498046875, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.1396939107782482, |
| "grad_norm": 0.3960680663585663, |
| "learning_rate": 3.103223705633344e-05, |
| "loss": 0.07558696269989014, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.1559752523607945, |
| "grad_norm": 0.7705583572387695, |
| "learning_rate": 3.076088136329101e-05, |
| "loss": 0.07015891551971436, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.172256593943341, |
| "grad_norm": 0.9814662933349609, |
| "learning_rate": 3.0489525670248564e-05, |
| "loss": 0.09184465408325196, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.1885379355258874, |
| "grad_norm": 0.16037984192371368, |
| "learning_rate": 3.0218169977206123e-05, |
| "loss": 0.10087477684020996, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.2048192771084336, |
| "grad_norm": 0.4658585488796234, |
| "learning_rate": 2.994681428416368e-05, |
| "loss": 0.0878927993774414, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.22110061869098, |
| "grad_norm": 0.6178460717201233, |
| "learning_rate": 2.967545859112124e-05, |
| "loss": 0.08248810768127442, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.2373819602735265, |
| "grad_norm": 0.8095784783363342, |
| "learning_rate": 2.9404102898078806e-05, |
| "loss": 0.07741629600524902, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.253663301856073, |
| "grad_norm": 0.7121015191078186, |
| "learning_rate": 2.9132747205036365e-05, |
| "loss": 0.06926633358001709, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.2699446434386195, |
| "grad_norm": 0.9626070857048035, |
| "learning_rate": 2.8861391511993923e-05, |
| "loss": 0.08737580299377441, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.2862259850211657, |
| "grad_norm": 1.617689847946167, |
| "learning_rate": 2.8590035818951482e-05, |
| "loss": 0.08954649925231933, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.3025073266037122, |
| "grad_norm": 0.17025412619113922, |
| "learning_rate": 2.831868012590904e-05, |
| "loss": 0.07303418159484863, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.3187886681862586, |
| "grad_norm": 0.8474647402763367, |
| "learning_rate": 2.80473244328666e-05, |
| "loss": 0.10014421463012696, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.3350700097688049, |
| "grad_norm": 1.1335641145706177, |
| "learning_rate": 2.7775968739824165e-05, |
| "loss": 0.09378931999206543, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.3513513513513513, |
| "grad_norm": 0.05914885550737381, |
| "learning_rate": 2.7504613046781724e-05, |
| "loss": 0.0685378360748291, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.3676326929338978, |
| "grad_norm": 0.49404996633529663, |
| "learning_rate": 2.7233257353739283e-05, |
| "loss": 0.0691972017288208, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.3839140345164442, |
| "grad_norm": 0.49692803621292114, |
| "learning_rate": 2.6961901660696842e-05, |
| "loss": 0.07013116836547852, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.4001953760989905, |
| "grad_norm": 1.2489663362503052, |
| "learning_rate": 2.66905459676544e-05, |
| "loss": 0.06815986156463623, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.416476717681537, |
| "grad_norm": 1.234505534172058, |
| "learning_rate": 2.641919027461196e-05, |
| "loss": 0.07438003540039062, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.4327580592640834, |
| "grad_norm": 0.5595135688781738, |
| "learning_rate": 2.6147834581569525e-05, |
| "loss": 0.08010281562805176, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.4490394008466296, |
| "grad_norm": 0.713994026184082, |
| "learning_rate": 2.5876478888527084e-05, |
| "loss": 0.08089996337890625, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.465320742429176, |
| "grad_norm": 0.41522467136383057, |
| "learning_rate": 2.5605123195484643e-05, |
| "loss": 0.07183042049407959, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.4816020840117226, |
| "grad_norm": 0.4079296290874481, |
| "learning_rate": 2.53337675024422e-05, |
| "loss": 0.07589399337768554, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.497883425594269, |
| "grad_norm": 0.3075660169124603, |
| "learning_rate": 2.506241180939976e-05, |
| "loss": 0.07919666767120362, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.5141647671768155, |
| "grad_norm": 1.5832964181900024, |
| "learning_rate": 2.4791056116357323e-05, |
| "loss": 0.06133227825164795, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.530446108759362, |
| "grad_norm": 0.32941189408302307, |
| "learning_rate": 2.451970042331488e-05, |
| "loss": 0.07278666496276856, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.5467274503419082, |
| "grad_norm": 0.5237034559249878, |
| "learning_rate": 2.4248344730272444e-05, |
| "loss": 0.07373996734619141, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.5630087919244544, |
| "grad_norm": 0.056225214153528214, |
| "learning_rate": 2.3976989037230003e-05, |
| "loss": 0.08032115936279297, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.5792901335070009, |
| "grad_norm": 0.6325415372848511, |
| "learning_rate": 2.370563334418756e-05, |
| "loss": 0.08559741973876953, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.5955714750895473, |
| "grad_norm": 1.230356216430664, |
| "learning_rate": 2.3434277651145124e-05, |
| "loss": 0.07180691242218018, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.6118528166720938, |
| "grad_norm": 3.57700252532959, |
| "learning_rate": 2.3162921958102682e-05, |
| "loss": 0.06951488494873047, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.6281341582546403, |
| "grad_norm": 1.004461646080017, |
| "learning_rate": 2.289156626506024e-05, |
| "loss": 0.057218775749206544, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.6444154998371867, |
| "grad_norm": 0.44509896636009216, |
| "learning_rate": 2.2620210572017803e-05, |
| "loss": 0.08383867263793945, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.660696841419733, |
| "grad_norm": 0.6665693521499634, |
| "learning_rate": 2.2348854878975362e-05, |
| "loss": 0.0708467960357666, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.6769781830022794, |
| "grad_norm": 0.520028293132782, |
| "learning_rate": 2.207749918593292e-05, |
| "loss": 0.07018136024475098, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.6932595245848256, |
| "grad_norm": 0.7581444382667542, |
| "learning_rate": 2.1806143492890483e-05, |
| "loss": 0.06788209915161132, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.709540866167372, |
| "grad_norm": 0.34040266275405884, |
| "learning_rate": 2.1534787799848042e-05, |
| "loss": 0.08334577560424805, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.7258222077499186, |
| "grad_norm": 0.5161302089691162, |
| "learning_rate": 2.1263432106805604e-05, |
| "loss": 0.06911201477050781, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.742103549332465, |
| "grad_norm": 0.8025581240653992, |
| "learning_rate": 2.0992076413763163e-05, |
| "loss": 0.06495306968688964, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.7583848909150115, |
| "grad_norm": 1.0504302978515625, |
| "learning_rate": 2.0720720720720722e-05, |
| "loss": 0.06523369789123536, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.774666232497558, |
| "grad_norm": 1.5722064971923828, |
| "learning_rate": 2.0449365027678284e-05, |
| "loss": 0.06998776435852051, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.7909475740801042, |
| "grad_norm": 1.4498728513717651, |
| "learning_rate": 2.0178009334635843e-05, |
| "loss": 0.07263383388519287, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.8072289156626506, |
| "grad_norm": 0.1697084903717041, |
| "learning_rate": 1.9906653641593402e-05, |
| "loss": 0.06083515644073487, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.8235102572451969, |
| "grad_norm": 0.043431248515844345, |
| "learning_rate": 1.9635297948550964e-05, |
| "loss": 0.0591968297958374, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.8397915988277433, |
| "grad_norm": 0.9290309548377991, |
| "learning_rate": 1.9363942255508523e-05, |
| "loss": 0.060645227432250974, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.8560729404102898, |
| "grad_norm": 1.0422381162643433, |
| "learning_rate": 1.9092586562466082e-05, |
| "loss": 0.07442611217498779, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.8723542819928363, |
| "grad_norm": 0.3466901183128357, |
| "learning_rate": 1.8821230869423644e-05, |
| "loss": 0.07767025470733642, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.8886356235753827, |
| "grad_norm": 0.39657458662986755, |
| "learning_rate": 1.8549875176381203e-05, |
| "loss": 0.06347317218780518, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.904916965157929, |
| "grad_norm": 1.00450599193573, |
| "learning_rate": 1.827851948333876e-05, |
| "loss": 0.06967205524444581, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.9211983067404754, |
| "grad_norm": 1.1727004051208496, |
| "learning_rate": 1.800716379029632e-05, |
| "loss": 0.06747759819030762, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.9374796483230217, |
| "grad_norm": 0.8829087615013123, |
| "learning_rate": 1.7735808097253883e-05, |
| "loss": 0.07414731979370118, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.9537609899055681, |
| "grad_norm": 1.3967463970184326, |
| "learning_rate": 1.746445240421144e-05, |
| "loss": 0.06446901321411133, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.9700423314881146, |
| "grad_norm": 0.9375430345535278, |
| "learning_rate": 1.7193096711169e-05, |
| "loss": 0.06800864696502686, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.986323673070661, |
| "grad_norm": 0.9516276717185974, |
| "learning_rate": 1.692174101812656e-05, |
| "loss": 0.08866607666015625, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_bertscore_f1": 0.9923369143584934, |
| "eval_bleu": 0.8956281706064034, |
| "eval_loss": 0.06535279005765915, |
| "eval_meteor": 0.938086576675145, |
| "eval_rouge1": 0.950788798151768, |
| "eval_rouge2": 0.9176212368118313, |
| "eval_runtime": 58.3038, |
| "eval_samples_per_second": 22.16, |
| "eval_steps_per_second": 2.779, |
| "step": 6142 |
| }, |
| { |
| "epoch": 2.0026050146532075, |
| "grad_norm": 0.3171500861644745, |
| "learning_rate": 1.665038532508412e-05, |
| "loss": 0.06093011379241944, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.018886356235754, |
| "grad_norm": 0.3844246566295624, |
| "learning_rate": 1.637902963204168e-05, |
| "loss": 0.05403701782226562, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.0351676978183004, |
| "grad_norm": 1.5091606378555298, |
| "learning_rate": 1.610767393899924e-05, |
| "loss": 0.06063016414642334, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.0514490394008464, |
| "grad_norm": 0.5060765743255615, |
| "learning_rate": 1.58363182459568e-05, |
| "loss": 0.06424860954284668, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.067730380983393, |
| "grad_norm": 0.5501185059547424, |
| "learning_rate": 1.556496255291436e-05, |
| "loss": 0.052588853836059574, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.0840117225659394, |
| "grad_norm": 0.5140529274940491, |
| "learning_rate": 1.529360685987192e-05, |
| "loss": 0.05470933437347412, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.100293064148486, |
| "grad_norm": 0.13059721887111664, |
| "learning_rate": 1.5022251166829483e-05, |
| "loss": 0.05880857944488525, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.1165744057310323, |
| "grad_norm": 0.5545864701271057, |
| "learning_rate": 1.4750895473787041e-05, |
| "loss": 0.05454400062561035, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.1328557473135787, |
| "grad_norm": 0.7566473484039307, |
| "learning_rate": 1.44795397807446e-05, |
| "loss": 0.05996315956115723, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.149137088896125, |
| "grad_norm": 0.6309687495231628, |
| "learning_rate": 1.420818408770216e-05, |
| "loss": 0.06270824909210206, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.165418430478671, |
| "grad_norm": 0.6882494688034058, |
| "learning_rate": 1.3936828394659721e-05, |
| "loss": 0.0579791259765625, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.1816997720612177, |
| "grad_norm": 0.5102435946464539, |
| "learning_rate": 1.366547270161728e-05, |
| "loss": 0.05909278869628906, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.197981113643764, |
| "grad_norm": 0.5612519979476929, |
| "learning_rate": 1.339411700857484e-05, |
| "loss": 0.0631598711013794, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.2142624552263106, |
| "grad_norm": 0.5335197448730469, |
| "learning_rate": 1.31227613155324e-05, |
| "loss": 0.061668686866760254, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.230543796808857, |
| "grad_norm": 0.26907965540885925, |
| "learning_rate": 1.285140562248996e-05, |
| "loss": 0.05813938617706299, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.2468251383914035, |
| "grad_norm": 0.9871731996536255, |
| "learning_rate": 1.258004992944752e-05, |
| "loss": 0.06166846752166748, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.26310647997395, |
| "grad_norm": 0.7092576622962952, |
| "learning_rate": 1.230869423640508e-05, |
| "loss": 0.05214274883270264, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.2793878215564964, |
| "grad_norm": 0.6084023714065552, |
| "learning_rate": 1.203733854336264e-05, |
| "loss": 0.06180807590484619, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.295669163139043, |
| "grad_norm": 1.0545355081558228, |
| "learning_rate": 1.17659828503202e-05, |
| "loss": 0.05810202598571777, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.311950504721589, |
| "grad_norm": 0.9563855528831482, |
| "learning_rate": 1.149462715727776e-05, |
| "loss": 0.05618003368377686, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.3282318463041354, |
| "grad_norm": 0.6173250079154968, |
| "learning_rate": 1.122327146423532e-05, |
| "loss": 0.06320930480957031, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.344513187886682, |
| "grad_norm": 0.08047935366630554, |
| "learning_rate": 1.095191577119288e-05, |
| "loss": 0.058518905639648434, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.3607945294692283, |
| "grad_norm": 0.6877385973930359, |
| "learning_rate": 1.0680560078150439e-05, |
| "loss": 0.07214242458343506, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.3770758710517748, |
| "grad_norm": 0.7921647429466248, |
| "learning_rate": 1.0409204385108e-05, |
| "loss": 0.0554658842086792, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.393357212634321, |
| "grad_norm": 0.23214460909366608, |
| "learning_rate": 1.013784869206556e-05, |
| "loss": 0.05945809364318848, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.4096385542168672, |
| "grad_norm": 0.23501083254814148, |
| "learning_rate": 9.86649299902312e-06, |
| "loss": 0.04184418678283691, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.4259198957994137, |
| "grad_norm": 0.19243040680885315, |
| "learning_rate": 9.59513730598068e-06, |
| "loss": 0.0685301399230957, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.44220123738196, |
| "grad_norm": 0.03571745380759239, |
| "learning_rate": 9.32378161293824e-06, |
| "loss": 0.05060723781585694, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.4584825789645066, |
| "grad_norm": 0.06310860812664032, |
| "learning_rate": 9.0524259198958e-06, |
| "loss": 0.06717358589172363, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.474763920547053, |
| "grad_norm": 0.4403184950351715, |
| "learning_rate": 8.78107022685336e-06, |
| "loss": 0.0508097505569458, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.4910452621295995, |
| "grad_norm": 0.1725953370332718, |
| "learning_rate": 8.50971453381092e-06, |
| "loss": 0.06436698913574218, |
| "step": 7650 |
| }, |
| { |
| "epoch": 2.507326603712146, |
| "grad_norm": 0.560205340385437, |
| "learning_rate": 8.23835884076848e-06, |
| "loss": 0.05466559410095215, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.5236079452946925, |
| "grad_norm": 0.8589635491371155, |
| "learning_rate": 7.96700314772604e-06, |
| "loss": 0.052462191581726075, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.539889286877239, |
| "grad_norm": 0.40856632590293884, |
| "learning_rate": 7.6956474546836e-06, |
| "loss": 0.057110257148742676, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.556170628459785, |
| "grad_norm": 0.2351612001657486, |
| "learning_rate": 7.424291761641159e-06, |
| "loss": 0.049067635536193845, |
| "step": 7850 |
| }, |
| { |
| "epoch": 2.5724519700423314, |
| "grad_norm": 0.3204529583454132, |
| "learning_rate": 7.15293606859872e-06, |
| "loss": 0.049645824432373045, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.588733311624878, |
| "grad_norm": 0.43326708674430847, |
| "learning_rate": 6.881580375556279e-06, |
| "loss": 0.050102224349975584, |
| "step": 7950 |
| }, |
| { |
| "epoch": 2.6050146532074243, |
| "grad_norm": 1.065234899520874, |
| "learning_rate": 6.610224682513839e-06, |
| "loss": 0.06275768280029297, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.6212959947899708, |
| "grad_norm": 0.26160168647766113, |
| "learning_rate": 6.3388689894714e-06, |
| "loss": 0.04881012439727783, |
| "step": 8050 |
| }, |
| { |
| "epoch": 2.6375773363725172, |
| "grad_norm": 0.6686789989471436, |
| "learning_rate": 6.067513296428959e-06, |
| "loss": 0.058712401390075684, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.6538586779550632, |
| "grad_norm": 0.4735671281814575, |
| "learning_rate": 5.796157603386519e-06, |
| "loss": 0.05793766498565674, |
| "step": 8150 |
| }, |
| { |
| "epoch": 2.6701400195376097, |
| "grad_norm": 0.9112767577171326, |
| "learning_rate": 5.5248019103440796e-06, |
| "loss": 0.05646980285644531, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.686421361120156, |
| "grad_norm": 0.3665359914302826, |
| "learning_rate": 5.253446217301639e-06, |
| "loss": 0.05863104820251465, |
| "step": 8250 |
| }, |
| { |
| "epoch": 2.7027027027027026, |
| "grad_norm": 0.39087387919425964, |
| "learning_rate": 4.982090524259199e-06, |
| "loss": 0.04811685085296631, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.718984044285249, |
| "grad_norm": 0.7103152871131897, |
| "learning_rate": 4.7107348312167594e-06, |
| "loss": 0.0660721492767334, |
| "step": 8350 |
| }, |
| { |
| "epoch": 2.7352653858677956, |
| "grad_norm": 0.30644118785858154, |
| "learning_rate": 4.439379138174319e-06, |
| "loss": 0.061232595443725585, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.751546727450342, |
| "grad_norm": 0.6912480592727661, |
| "learning_rate": 4.16802344513188e-06, |
| "loss": 0.0465062952041626, |
| "step": 8450 |
| }, |
| { |
| "epoch": 2.7678280690328885, |
| "grad_norm": 0.2372223436832428, |
| "learning_rate": 3.896667752089439e-06, |
| "loss": 0.05613251686096191, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.784109410615435, |
| "grad_norm": 0.3588544428348541, |
| "learning_rate": 3.6253120590469985e-06, |
| "loss": 0.06228278636932373, |
| "step": 8550 |
| }, |
| { |
| "epoch": 2.800390752197981, |
| "grad_norm": 0.8760668039321899, |
| "learning_rate": 3.353956366004559e-06, |
| "loss": 0.058021135330200195, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.8166720937805274, |
| "grad_norm": 0.39105167984962463, |
| "learning_rate": 3.0826006729621187e-06, |
| "loss": 0.05042066097259521, |
| "step": 8650 |
| }, |
| { |
| "epoch": 2.832953435363074, |
| "grad_norm": 0.8453779816627502, |
| "learning_rate": 2.811244979919679e-06, |
| "loss": 0.05310141086578369, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.8492347769456203, |
| "grad_norm": 0.3931414484977722, |
| "learning_rate": 2.539889286877239e-06, |
| "loss": 0.04613284111022949, |
| "step": 8750 |
| }, |
| { |
| "epoch": 2.865516118528167, |
| "grad_norm": 0.601372480392456, |
| "learning_rate": 2.268533593834799e-06, |
| "loss": 0.055337414741516114, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.8817974601107132, |
| "grad_norm": 0.40091976523399353, |
| "learning_rate": 1.9971779007923587e-06, |
| "loss": 0.05075720310211182, |
| "step": 8850 |
| }, |
| { |
| "epoch": 2.8980788016932593, |
| "grad_norm": 0.9332064986228943, |
| "learning_rate": 1.7258222077499185e-06, |
| "loss": 0.04717796325683594, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.9143601432758057, |
| "grad_norm": 0.7941976189613342, |
| "learning_rate": 1.4544665147074786e-06, |
| "loss": 0.07562547206878661, |
| "step": 8950 |
| }, |
| { |
| "epoch": 2.930641484858352, |
| "grad_norm": 0.6598140597343445, |
| "learning_rate": 1.1831108216650385e-06, |
| "loss": 0.05916054248809814, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.9469228264408986, |
| "grad_norm": 0.07086297869682312, |
| "learning_rate": 9.117551286225986e-07, |
| "loss": 0.05104278087615967, |
| "step": 9050 |
| }, |
| { |
| "epoch": 2.963204168023445, |
| "grad_norm": 0.5035263299942017, |
| "learning_rate": 6.403994355801585e-07, |
| "loss": 0.04357606887817383, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.9794855096059916, |
| "grad_norm": 0.28602153062820435, |
| "learning_rate": 3.690437425377185e-07, |
| "loss": 0.04314669132232666, |
| "step": 9150 |
| }, |
| { |
| "epoch": 2.995766851188538, |
| "grad_norm": 0.24384021759033203, |
| "learning_rate": 9.768804949527842e-08, |
| "loss": 0.04970499038696289, |
| "step": 9200 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_bertscore_f1": 0.9924511363724068, |
| "eval_bleu": 0.9035076605970417, |
| "eval_loss": 0.058707889169454575, |
| "eval_meteor": 0.9399283468508673, |
| "eval_rouge1": 0.9525840238092467, |
| "eval_rouge2": 0.9200012975801428, |
| "eval_runtime": 58.5972, |
| "eval_samples_per_second": 22.049, |
| "eval_steps_per_second": 2.765, |
| "step": 9213 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 9213, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.24385758724096e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|