{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 9213, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0162813415825464, "grad_norm": 3.7931034564971924, "learning_rate": 4.975035276240096e-05, "loss": 1.7502801513671875, "step": 50 }, { "epoch": 0.0325626831650928, "grad_norm": 3.5913758277893066, "learning_rate": 4.947899706935852e-05, "loss": 0.8483324432373047, "step": 100 }, { "epoch": 0.04884402474763921, "grad_norm": 3.3178822994232178, "learning_rate": 4.9207641376316076e-05, "loss": 0.5748957061767578, "step": 150 }, { "epoch": 0.0651253663301856, "grad_norm": 2.392831802368164, "learning_rate": 4.8936285683273635e-05, "loss": 0.4335686492919922, "step": 200 }, { "epoch": 0.08140670791273201, "grad_norm": 2.411132335662842, "learning_rate": 4.8664929990231194e-05, "loss": 0.45588829040527346, "step": 250 }, { "epoch": 0.09768804949527841, "grad_norm": 3.383033275604248, "learning_rate": 4.839357429718876e-05, "loss": 0.38454761505126955, "step": 300 }, { "epoch": 0.11396939107782482, "grad_norm": 1.5641525983810425, "learning_rate": 4.812221860414632e-05, "loss": 0.31118762969970704, "step": 350 }, { "epoch": 0.1302507326603712, "grad_norm": 1.962287425994873, "learning_rate": 4.785086291110388e-05, "loss": 0.32715892791748047, "step": 400 }, { "epoch": 0.14653207424291761, "grad_norm": 2.539684534072876, "learning_rate": 4.7579507218061436e-05, "loss": 0.3269093704223633, "step": 450 }, { "epoch": 0.16281341582546402, "grad_norm": 3.244333267211914, "learning_rate": 4.7308151525018995e-05, "loss": 0.2726271057128906, "step": 500 }, { "epoch": 0.17909475740801042, "grad_norm": 2.2011330127716064, "learning_rate": 4.7036795831976553e-05, "loss": 0.3161302185058594, "step": 550 }, { "epoch": 0.19537609899055683, "grad_norm": 5.027646541595459, "learning_rate": 4.676544013893412e-05, "loss": 0.22250593185424805, "step": 600 }, { "epoch": 0.21165744057310323, "grad_norm": 0.6281399726867676, "learning_rate": 4.649408444589168e-05, "loss": 0.26152374267578127, "step": 650 }, { "epoch": 0.22793878215564964, "grad_norm": 3.362748622894287, "learning_rate": 4.622272875284924e-05, "loss": 0.2384391975402832, "step": 700 }, { "epoch": 0.24422012373819602, "grad_norm": 0.9307177066802979, "learning_rate": 4.5951373059806795e-05, "loss": 0.19612070083618163, "step": 750 }, { "epoch": 0.2605014653207424, "grad_norm": 3.107837438583374, "learning_rate": 4.5680017366764354e-05, "loss": 0.22400428771972655, "step": 800 }, { "epoch": 0.2767828069032888, "grad_norm": 2.3350419998168945, "learning_rate": 4.540866167372192e-05, "loss": 0.246726131439209, "step": 850 }, { "epoch": 0.29306414848583523, "grad_norm": 1.3348891735076904, "learning_rate": 4.513730598067948e-05, "loss": 0.17960617065429688, "step": 900 }, { "epoch": 0.30934549006838163, "grad_norm": 1.4406858682632446, "learning_rate": 4.486595028763704e-05, "loss": 0.19865007400512696, "step": 950 }, { "epoch": 0.32562683165092804, "grad_norm": 2.17195200920105, "learning_rate": 4.4594594594594596e-05, "loss": 0.19141647338867188, "step": 1000 }, { "epoch": 0.34190817323347444, "grad_norm": 2.63667893409729, "learning_rate": 4.4323238901552155e-05, "loss": 0.1807699966430664, "step": 1050 }, { "epoch": 0.35818951481602085, "grad_norm": 1.1883361339569092, "learning_rate": 4.4051883208509714e-05, "loss": 0.19377944946289063, "step": 1100 }, { "epoch": 0.37447085639856725, "grad_norm": 0.6610957384109497, "learning_rate": 4.378052751546728e-05, "loss": 0.18130062103271485, "step": 1150 }, { "epoch": 0.39075219798111366, "grad_norm": 1.802565336227417, "learning_rate": 4.350917182242484e-05, "loss": 0.17183830261230468, "step": 1200 }, { "epoch": 0.40703353956366006, "grad_norm": 1.6211966276168823, "learning_rate": 4.32378161293824e-05, "loss": 0.1512114906311035, "step": 1250 }, { "epoch": 0.42331488114620647, "grad_norm": 0.8947325944900513, "learning_rate": 4.2966460436339956e-05, "loss": 0.1733652877807617, "step": 1300 }, { "epoch": 0.4395962227287529, "grad_norm": 0.31800374388694763, "learning_rate": 4.2695104743297515e-05, "loss": 0.1839361572265625, "step": 1350 }, { "epoch": 0.4558775643112993, "grad_norm": 0.9736223220825195, "learning_rate": 4.2423749050255074e-05, "loss": 0.12980345726013184, "step": 1400 }, { "epoch": 0.4721589058938456, "grad_norm": 1.7321406602859497, "learning_rate": 4.215239335721264e-05, "loss": 0.14502116203308105, "step": 1450 }, { "epoch": 0.48844024747639203, "grad_norm": 0.47778311371803284, "learning_rate": 4.18810376641702e-05, "loss": 0.15735553741455077, "step": 1500 }, { "epoch": 0.5047215890589385, "grad_norm": 0.43138086795806885, "learning_rate": 4.160968197112776e-05, "loss": 0.13693093299865722, "step": 1550 }, { "epoch": 0.5210029306414848, "grad_norm": 0.7121404409408569, "learning_rate": 4.1338326278085316e-05, "loss": 0.13991880416870117, "step": 1600 }, { "epoch": 0.5372842722240313, "grad_norm": 1.8167650699615479, "learning_rate": 4.1066970585042875e-05, "loss": 0.13732372283935546, "step": 1650 }, { "epoch": 0.5535656138065776, "grad_norm": 1.801047921180725, "learning_rate": 4.079561489200044e-05, "loss": 0.1461949062347412, "step": 1700 }, { "epoch": 0.5698469553891241, "grad_norm": 1.2010151147842407, "learning_rate": 4.0524259198958e-05, "loss": 0.16667499542236328, "step": 1750 }, { "epoch": 0.5861282969716705, "grad_norm": 0.31175410747528076, "learning_rate": 4.025290350591556e-05, "loss": 0.13411394119262696, "step": 1800 }, { "epoch": 0.6024096385542169, "grad_norm": 1.0062410831451416, "learning_rate": 3.998154781287312e-05, "loss": 0.141832914352417, "step": 1850 }, { "epoch": 0.6186909801367633, "grad_norm": 0.5772050619125366, "learning_rate": 3.9710192119830675e-05, "loss": 0.14646322250366212, "step": 1900 }, { "epoch": 0.6349723217193096, "grad_norm": 1.1436623334884644, "learning_rate": 3.9444263540649085e-05, "loss": 0.11982306480407715, "step": 1950 }, { "epoch": 0.6512536633018561, "grad_norm": 0.6914354562759399, "learning_rate": 3.9172907847606644e-05, "loss": 0.13149891853332518, "step": 2000 }, { "epoch": 0.6675350048844024, "grad_norm": 0.5716465711593628, "learning_rate": 3.89015521545642e-05, "loss": 0.1392893123626709, "step": 2050 }, { "epoch": 0.6838163464669489, "grad_norm": 1.4796607494354248, "learning_rate": 3.863019646152177e-05, "loss": 0.1265252685546875, "step": 2100 }, { "epoch": 0.7000976880494952, "grad_norm": 0.849554717540741, "learning_rate": 3.835884076847933e-05, "loss": 0.1300504207611084, "step": 2150 }, { "epoch": 0.7163790296320417, "grad_norm": 0.9229751825332642, "learning_rate": 3.8087485075436886e-05, "loss": 0.144088077545166, "step": 2200 }, { "epoch": 0.732660371214588, "grad_norm": 0.6000483632087708, "learning_rate": 3.7816129382394445e-05, "loss": 0.1267460823059082, "step": 2250 }, { "epoch": 0.7489417127971345, "grad_norm": 1.584933876991272, "learning_rate": 3.7544773689352004e-05, "loss": 0.12461037635803222, "step": 2300 }, { "epoch": 0.7652230543796809, "grad_norm": 0.7694635987281799, "learning_rate": 3.727341799630956e-05, "loss": 0.1397037124633789, "step": 2350 }, { "epoch": 0.7815043959622273, "grad_norm": 0.9538297653198242, "learning_rate": 3.700206230326713e-05, "loss": 0.09744812965393067, "step": 2400 }, { "epoch": 0.7977857375447737, "grad_norm": 1.10379159450531, "learning_rate": 3.673070661022469e-05, "loss": 0.14414773941040038, "step": 2450 }, { "epoch": 0.8140670791273201, "grad_norm": 1.6340835094451904, "learning_rate": 3.6459350917182246e-05, "loss": 0.11836291313171386, "step": 2500 }, { "epoch": 0.8303484207098665, "grad_norm": 0.06909910589456558, "learning_rate": 3.6187995224139805e-05, "loss": 0.13456206321716307, "step": 2550 }, { "epoch": 0.8466297622924129, "grad_norm": 0.11458413302898407, "learning_rate": 3.5916639531097364e-05, "loss": 0.12975069046020507, "step": 2600 }, { "epoch": 0.8629111038749593, "grad_norm": 0.3121241331100464, "learning_rate": 3.564528383805492e-05, "loss": 0.10294739723205566, "step": 2650 }, { "epoch": 0.8791924454575057, "grad_norm": 0.7833127975463867, "learning_rate": 3.537392814501249e-05, "loss": 0.1058332633972168, "step": 2700 }, { "epoch": 0.8954737870400521, "grad_norm": 1.0220922231674194, "learning_rate": 3.510257245197005e-05, "loss": 0.11729028701782226, "step": 2750 }, { "epoch": 0.9117551286225986, "grad_norm": 0.6296119093894958, "learning_rate": 3.4831216758927606e-05, "loss": 0.12148540496826171, "step": 2800 }, { "epoch": 0.9280364702051449, "grad_norm": 0.8129004240036011, "learning_rate": 3.4559861065885164e-05, "loss": 0.09763257980346679, "step": 2850 }, { "epoch": 0.9443178117876913, "grad_norm": 0.6814725399017334, "learning_rate": 3.428850537284272e-05, "loss": 0.10192323684692382, "step": 2900 }, { "epoch": 0.9605991533702377, "grad_norm": 0.19898249208927155, "learning_rate": 3.401714967980029e-05, "loss": 0.11552732467651367, "step": 2950 }, { "epoch": 0.9768804949527841, "grad_norm": 0.7032152414321899, "learning_rate": 3.374579398675785e-05, "loss": 0.09218964576721192, "step": 3000 }, { "epoch": 0.9931618365353305, "grad_norm": 0.5327423214912415, "learning_rate": 3.3474438293715407e-05, "loss": 0.11835557937622071, "step": 3050 }, { "epoch": 1.0, "eval_bertscore_f1": 0.9908905607812545, "eval_bleu": 0.8857676606120443, "eval_loss": 0.08205162733793259, "eval_meteor": 0.9292767478739071, "eval_rouge1": 0.9454800565736884, "eval_rouge2": 0.9108168851120266, "eval_runtime": 61.8203, "eval_samples_per_second": 20.899, "eval_steps_per_second": 2.621, "step": 3071 }, { "epoch": 1.009443178117877, "grad_norm": 0.7608644366264343, "learning_rate": 3.3203082600672965e-05, "loss": 0.08838626861572266, "step": 3100 }, { "epoch": 1.0257245197004232, "grad_norm": 0.6126351952552795, "learning_rate": 3.2931726907630524e-05, "loss": 0.07393273830413818, "step": 3150 }, { "epoch": 1.0420058612829697, "grad_norm": 0.9907364845275879, "learning_rate": 3.266037121458808e-05, "loss": 0.1005620002746582, "step": 3200 }, { "epoch": 1.0582872028655161, "grad_norm": 1.0079267024993896, "learning_rate": 3.238901552154565e-05, "loss": 0.0909033203125, "step": 3250 }, { "epoch": 1.0745685444480626, "grad_norm": 1.661521315574646, "learning_rate": 3.211765982850321e-05, "loss": 0.07444488525390625, "step": 3300 }, { "epoch": 1.0908498860306088, "grad_norm": 0.5184240341186523, "learning_rate": 3.1846304135460766e-05, "loss": 0.08309778213500976, "step": 3350 }, { "epoch": 1.1071312276131553, "grad_norm": 1.1483348608016968, "learning_rate": 3.1574948442418325e-05, "loss": 0.07855434417724609, "step": 3400 }, { "epoch": 1.1234125691957018, "grad_norm": 1.0581797361373901, "learning_rate": 3.1303592749375884e-05, "loss": 0.0779510498046875, "step": 3450 }, { "epoch": 1.1396939107782482, "grad_norm": 0.3960680663585663, "learning_rate": 3.103223705633344e-05, "loss": 0.07558696269989014, "step": 3500 }, { "epoch": 1.1559752523607945, "grad_norm": 0.7705583572387695, "learning_rate": 3.076088136329101e-05, "loss": 0.07015891551971436, "step": 3550 }, { "epoch": 1.172256593943341, "grad_norm": 0.9814662933349609, "learning_rate": 3.0489525670248564e-05, "loss": 0.09184465408325196, "step": 3600 }, { "epoch": 1.1885379355258874, "grad_norm": 0.16037984192371368, "learning_rate": 3.0218169977206123e-05, "loss": 0.10087477684020996, "step": 3650 }, { "epoch": 1.2048192771084336, "grad_norm": 0.4658585488796234, "learning_rate": 2.994681428416368e-05, "loss": 0.0878927993774414, "step": 3700 }, { "epoch": 1.22110061869098, "grad_norm": 0.6178460717201233, "learning_rate": 2.967545859112124e-05, "loss": 0.08248810768127442, "step": 3750 }, { "epoch": 1.2373819602735265, "grad_norm": 0.8095784783363342, "learning_rate": 2.9404102898078806e-05, "loss": 0.07741629600524902, "step": 3800 }, { "epoch": 1.253663301856073, "grad_norm": 0.7121015191078186, "learning_rate": 2.9132747205036365e-05, "loss": 0.06926633358001709, "step": 3850 }, { "epoch": 1.2699446434386195, "grad_norm": 0.9626070857048035, "learning_rate": 2.8861391511993923e-05, "loss": 0.08737580299377441, "step": 3900 }, { "epoch": 1.2862259850211657, "grad_norm": 1.617689847946167, "learning_rate": 2.8590035818951482e-05, "loss": 0.08954649925231933, "step": 3950 }, { "epoch": 1.3025073266037122, "grad_norm": 0.17025412619113922, "learning_rate": 2.831868012590904e-05, "loss": 0.07303418159484863, "step": 4000 }, { "epoch": 1.3187886681862586, "grad_norm": 0.8474647402763367, "learning_rate": 2.80473244328666e-05, "loss": 0.10014421463012696, "step": 4050 }, { "epoch": 1.3350700097688049, "grad_norm": 1.1335641145706177, "learning_rate": 2.7775968739824165e-05, "loss": 0.09378931999206543, "step": 4100 }, { "epoch": 1.3513513513513513, "grad_norm": 0.05914885550737381, "learning_rate": 2.7504613046781724e-05, "loss": 0.0685378360748291, "step": 4150 }, { "epoch": 1.3676326929338978, "grad_norm": 0.49404996633529663, "learning_rate": 2.7233257353739283e-05, "loss": 0.0691972017288208, "step": 4200 }, { "epoch": 1.3839140345164442, "grad_norm": 0.49692803621292114, "learning_rate": 2.6961901660696842e-05, "loss": 0.07013116836547852, "step": 4250 }, { "epoch": 1.4001953760989905, "grad_norm": 1.2489663362503052, "learning_rate": 2.66905459676544e-05, "loss": 0.06815986156463623, "step": 4300 }, { "epoch": 1.416476717681537, "grad_norm": 1.234505534172058, "learning_rate": 2.641919027461196e-05, "loss": 0.07438003540039062, "step": 4350 }, { "epoch": 1.4327580592640834, "grad_norm": 0.5595135688781738, "learning_rate": 2.6147834581569525e-05, "loss": 0.08010281562805176, "step": 4400 }, { "epoch": 1.4490394008466296, "grad_norm": 0.713994026184082, "learning_rate": 2.5876478888527084e-05, "loss": 0.08089996337890625, "step": 4450 }, { "epoch": 1.465320742429176, "grad_norm": 0.41522467136383057, "learning_rate": 2.5605123195484643e-05, "loss": 0.07183042049407959, "step": 4500 }, { "epoch": 1.4816020840117226, "grad_norm": 0.4079296290874481, "learning_rate": 2.53337675024422e-05, "loss": 0.07589399337768554, "step": 4550 }, { "epoch": 1.497883425594269, "grad_norm": 0.3075660169124603, "learning_rate": 2.506241180939976e-05, "loss": 0.07919666767120362, "step": 4600 }, { "epoch": 1.5141647671768155, "grad_norm": 1.5832964181900024, "learning_rate": 2.4791056116357323e-05, "loss": 0.06133227825164795, "step": 4650 }, { "epoch": 1.530446108759362, "grad_norm": 0.32941189408302307, "learning_rate": 2.451970042331488e-05, "loss": 0.07278666496276856, "step": 4700 }, { "epoch": 1.5467274503419082, "grad_norm": 0.5237034559249878, "learning_rate": 2.4248344730272444e-05, "loss": 0.07373996734619141, "step": 4750 }, { "epoch": 1.5630087919244544, "grad_norm": 0.056225214153528214, "learning_rate": 2.3976989037230003e-05, "loss": 0.08032115936279297, "step": 4800 }, { "epoch": 1.5792901335070009, "grad_norm": 0.6325415372848511, "learning_rate": 2.370563334418756e-05, "loss": 0.08559741973876953, "step": 4850 }, { "epoch": 1.5955714750895473, "grad_norm": 1.230356216430664, "learning_rate": 2.3434277651145124e-05, "loss": 0.07180691242218018, "step": 4900 }, { "epoch": 1.6118528166720938, "grad_norm": 3.57700252532959, "learning_rate": 2.3162921958102682e-05, "loss": 0.06951488494873047, "step": 4950 }, { "epoch": 1.6281341582546403, "grad_norm": 1.004461646080017, "learning_rate": 2.289156626506024e-05, "loss": 0.057218775749206544, "step": 5000 }, { "epoch": 1.6444154998371867, "grad_norm": 0.44509896636009216, "learning_rate": 2.2620210572017803e-05, "loss": 0.08383867263793945, "step": 5050 }, { "epoch": 1.660696841419733, "grad_norm": 0.6665693521499634, "learning_rate": 2.2348854878975362e-05, "loss": 0.0708467960357666, "step": 5100 }, { "epoch": 1.6769781830022794, "grad_norm": 0.520028293132782, "learning_rate": 2.207749918593292e-05, "loss": 0.07018136024475098, "step": 5150 }, { "epoch": 1.6932595245848256, "grad_norm": 0.7581444382667542, "learning_rate": 2.1806143492890483e-05, "loss": 0.06788209915161132, "step": 5200 }, { "epoch": 1.709540866167372, "grad_norm": 0.34040266275405884, "learning_rate": 2.1534787799848042e-05, "loss": 0.08334577560424805, "step": 5250 }, { "epoch": 1.7258222077499186, "grad_norm": 0.5161302089691162, "learning_rate": 2.1263432106805604e-05, "loss": 0.06911201477050781, "step": 5300 }, { "epoch": 1.742103549332465, "grad_norm": 0.8025581240653992, "learning_rate": 2.0992076413763163e-05, "loss": 0.06495306968688964, "step": 5350 }, { "epoch": 1.7583848909150115, "grad_norm": 1.0504302978515625, "learning_rate": 2.0720720720720722e-05, "loss": 0.06523369789123536, "step": 5400 }, { "epoch": 1.774666232497558, "grad_norm": 1.5722064971923828, "learning_rate": 2.0449365027678284e-05, "loss": 0.06998776435852051, "step": 5450 }, { "epoch": 1.7909475740801042, "grad_norm": 1.4498728513717651, "learning_rate": 2.0178009334635843e-05, "loss": 0.07263383388519287, "step": 5500 }, { "epoch": 1.8072289156626506, "grad_norm": 0.1697084903717041, "learning_rate": 1.9906653641593402e-05, "loss": 0.06083515644073487, "step": 5550 }, { "epoch": 1.8235102572451969, "grad_norm": 0.043431248515844345, "learning_rate": 1.9635297948550964e-05, "loss": 0.0591968297958374, "step": 5600 }, { "epoch": 1.8397915988277433, "grad_norm": 0.9290309548377991, "learning_rate": 1.9363942255508523e-05, "loss": 0.060645227432250974, "step": 5650 }, { "epoch": 1.8560729404102898, "grad_norm": 1.0422381162643433, "learning_rate": 1.9092586562466082e-05, "loss": 0.07442611217498779, "step": 5700 }, { "epoch": 1.8723542819928363, "grad_norm": 0.3466901183128357, "learning_rate": 1.8821230869423644e-05, "loss": 0.07767025470733642, "step": 5750 }, { "epoch": 1.8886356235753827, "grad_norm": 0.39657458662986755, "learning_rate": 1.8549875176381203e-05, "loss": 0.06347317218780518, "step": 5800 }, { "epoch": 1.904916965157929, "grad_norm": 1.00450599193573, "learning_rate": 1.827851948333876e-05, "loss": 0.06967205524444581, "step": 5850 }, { "epoch": 1.9211983067404754, "grad_norm": 1.1727004051208496, "learning_rate": 1.800716379029632e-05, "loss": 0.06747759819030762, "step": 5900 }, { "epoch": 1.9374796483230217, "grad_norm": 0.8829087615013123, "learning_rate": 1.7735808097253883e-05, "loss": 0.07414731979370118, "step": 5950 }, { "epoch": 1.9537609899055681, "grad_norm": 1.3967463970184326, "learning_rate": 1.746445240421144e-05, "loss": 0.06446901321411133, "step": 6000 }, { "epoch": 1.9700423314881146, "grad_norm": 0.9375430345535278, "learning_rate": 1.7193096711169e-05, "loss": 0.06800864696502686, "step": 6050 }, { "epoch": 1.986323673070661, "grad_norm": 0.9516276717185974, "learning_rate": 1.692174101812656e-05, "loss": 0.08866607666015625, "step": 6100 }, { "epoch": 2.0, "eval_bertscore_f1": 0.9923369143584934, "eval_bleu": 0.8956281706064034, "eval_loss": 0.06535279005765915, "eval_meteor": 0.938086576675145, "eval_rouge1": 0.950788798151768, "eval_rouge2": 0.9176212368118313, "eval_runtime": 58.3038, "eval_samples_per_second": 22.16, "eval_steps_per_second": 2.779, "step": 6142 }, { "epoch": 2.0026050146532075, "grad_norm": 0.3171500861644745, "learning_rate": 1.665038532508412e-05, "loss": 0.06093011379241944, "step": 6150 }, { "epoch": 2.018886356235754, "grad_norm": 0.3844246566295624, "learning_rate": 1.637902963204168e-05, "loss": 0.05403701782226562, "step": 6200 }, { "epoch": 2.0351676978183004, "grad_norm": 1.5091606378555298, "learning_rate": 1.610767393899924e-05, "loss": 0.06063016414642334, "step": 6250 }, { "epoch": 2.0514490394008464, "grad_norm": 0.5060765743255615, "learning_rate": 1.58363182459568e-05, "loss": 0.06424860954284668, "step": 6300 }, { "epoch": 2.067730380983393, "grad_norm": 0.5501185059547424, "learning_rate": 1.556496255291436e-05, "loss": 0.052588853836059574, "step": 6350 }, { "epoch": 2.0840117225659394, "grad_norm": 0.5140529274940491, "learning_rate": 1.529360685987192e-05, "loss": 0.05470933437347412, "step": 6400 }, { "epoch": 2.100293064148486, "grad_norm": 0.13059721887111664, "learning_rate": 1.5022251166829483e-05, "loss": 0.05880857944488525, "step": 6450 }, { "epoch": 2.1165744057310323, "grad_norm": 0.5545864701271057, "learning_rate": 1.4750895473787041e-05, "loss": 0.05454400062561035, "step": 6500 }, { "epoch": 2.1328557473135787, "grad_norm": 0.7566473484039307, "learning_rate": 1.44795397807446e-05, "loss": 0.05996315956115723, "step": 6550 }, { "epoch": 2.149137088896125, "grad_norm": 0.6309687495231628, "learning_rate": 1.420818408770216e-05, "loss": 0.06270824909210206, "step": 6600 }, { "epoch": 2.165418430478671, "grad_norm": 0.6882494688034058, "learning_rate": 1.3936828394659721e-05, "loss": 0.0579791259765625, "step": 6650 }, { "epoch": 2.1816997720612177, "grad_norm": 0.5102435946464539, "learning_rate": 1.366547270161728e-05, "loss": 0.05909278869628906, "step": 6700 }, { "epoch": 2.197981113643764, "grad_norm": 0.5612519979476929, "learning_rate": 1.339411700857484e-05, "loss": 0.0631598711013794, "step": 6750 }, { "epoch": 2.2142624552263106, "grad_norm": 0.5335197448730469, "learning_rate": 1.31227613155324e-05, "loss": 0.061668686866760254, "step": 6800 }, { "epoch": 2.230543796808857, "grad_norm": 0.26907965540885925, "learning_rate": 1.285140562248996e-05, "loss": 0.05813938617706299, "step": 6850 }, { "epoch": 2.2468251383914035, "grad_norm": 0.9871731996536255, "learning_rate": 1.258004992944752e-05, "loss": 0.06166846752166748, "step": 6900 }, { "epoch": 2.26310647997395, "grad_norm": 0.7092576622962952, "learning_rate": 1.230869423640508e-05, "loss": 0.05214274883270264, "step": 6950 }, { "epoch": 2.2793878215564964, "grad_norm": 0.6084023714065552, "learning_rate": 1.203733854336264e-05, "loss": 0.06180807590484619, "step": 7000 }, { "epoch": 2.295669163139043, "grad_norm": 1.0545355081558228, "learning_rate": 1.17659828503202e-05, "loss": 0.05810202598571777, "step": 7050 }, { "epoch": 2.311950504721589, "grad_norm": 0.9563855528831482, "learning_rate": 1.149462715727776e-05, "loss": 0.05618003368377686, "step": 7100 }, { "epoch": 2.3282318463041354, "grad_norm": 0.6173250079154968, "learning_rate": 1.122327146423532e-05, "loss": 0.06320930480957031, "step": 7150 }, { "epoch": 2.344513187886682, "grad_norm": 0.08047935366630554, "learning_rate": 1.095191577119288e-05, "loss": 0.058518905639648434, "step": 7200 }, { "epoch": 2.3607945294692283, "grad_norm": 0.6877385973930359, "learning_rate": 1.0680560078150439e-05, "loss": 0.07214242458343506, "step": 7250 }, { "epoch": 2.3770758710517748, "grad_norm": 0.7921647429466248, "learning_rate": 1.0409204385108e-05, "loss": 0.0554658842086792, "step": 7300 }, { "epoch": 2.393357212634321, "grad_norm": 0.23214460909366608, "learning_rate": 1.013784869206556e-05, "loss": 0.05945809364318848, "step": 7350 }, { "epoch": 2.4096385542168672, "grad_norm": 0.23501083254814148, "learning_rate": 9.86649299902312e-06, "loss": 0.04184418678283691, "step": 7400 }, { "epoch": 2.4259198957994137, "grad_norm": 0.19243040680885315, "learning_rate": 9.59513730598068e-06, "loss": 0.0685301399230957, "step": 7450 }, { "epoch": 2.44220123738196, "grad_norm": 0.03571745380759239, "learning_rate": 9.32378161293824e-06, "loss": 0.05060723781585694, "step": 7500 }, { "epoch": 2.4584825789645066, "grad_norm": 0.06310860812664032, "learning_rate": 9.0524259198958e-06, "loss": 0.06717358589172363, "step": 7550 }, { "epoch": 2.474763920547053, "grad_norm": 0.4403184950351715, "learning_rate": 8.78107022685336e-06, "loss": 0.0508097505569458, "step": 7600 }, { "epoch": 2.4910452621295995, "grad_norm": 0.1725953370332718, "learning_rate": 8.50971453381092e-06, "loss": 0.06436698913574218, "step": 7650 }, { "epoch": 2.507326603712146, "grad_norm": 0.560205340385437, "learning_rate": 8.23835884076848e-06, "loss": 0.05466559410095215, "step": 7700 }, { "epoch": 2.5236079452946925, "grad_norm": 0.8589635491371155, "learning_rate": 7.96700314772604e-06, "loss": 0.052462191581726075, "step": 7750 }, { "epoch": 2.539889286877239, "grad_norm": 0.40856632590293884, "learning_rate": 7.6956474546836e-06, "loss": 0.057110257148742676, "step": 7800 }, { "epoch": 2.556170628459785, "grad_norm": 0.2351612001657486, "learning_rate": 7.424291761641159e-06, "loss": 0.049067635536193845, "step": 7850 }, { "epoch": 2.5724519700423314, "grad_norm": 0.3204529583454132, "learning_rate": 7.15293606859872e-06, "loss": 0.049645824432373045, "step": 7900 }, { "epoch": 2.588733311624878, "grad_norm": 0.43326708674430847, "learning_rate": 6.881580375556279e-06, "loss": 0.050102224349975584, "step": 7950 }, { "epoch": 2.6050146532074243, "grad_norm": 1.065234899520874, "learning_rate": 6.610224682513839e-06, "loss": 0.06275768280029297, "step": 8000 }, { "epoch": 2.6212959947899708, "grad_norm": 0.26160168647766113, "learning_rate": 6.3388689894714e-06, "loss": 0.04881012439727783, "step": 8050 }, { "epoch": 2.6375773363725172, "grad_norm": 0.6686789989471436, "learning_rate": 6.067513296428959e-06, "loss": 0.058712401390075684, "step": 8100 }, { "epoch": 2.6538586779550632, "grad_norm": 0.4735671281814575, "learning_rate": 5.796157603386519e-06, "loss": 0.05793766498565674, "step": 8150 }, { "epoch": 2.6701400195376097, "grad_norm": 0.9112767577171326, "learning_rate": 5.5248019103440796e-06, "loss": 0.05646980285644531, "step": 8200 }, { "epoch": 2.686421361120156, "grad_norm": 0.3665359914302826, "learning_rate": 5.253446217301639e-06, "loss": 0.05863104820251465, "step": 8250 }, { "epoch": 2.7027027027027026, "grad_norm": 0.39087387919425964, "learning_rate": 4.982090524259199e-06, "loss": 0.04811685085296631, "step": 8300 }, { "epoch": 2.718984044285249, "grad_norm": 0.7103152871131897, "learning_rate": 4.7107348312167594e-06, "loss": 0.0660721492767334, "step": 8350 }, { "epoch": 2.7352653858677956, "grad_norm": 0.30644118785858154, "learning_rate": 4.439379138174319e-06, "loss": 0.061232595443725585, "step": 8400 }, { "epoch": 2.751546727450342, "grad_norm": 0.6912480592727661, "learning_rate": 4.16802344513188e-06, "loss": 0.0465062952041626, "step": 8450 }, { "epoch": 2.7678280690328885, "grad_norm": 0.2372223436832428, "learning_rate": 3.896667752089439e-06, "loss": 0.05613251686096191, "step": 8500 }, { "epoch": 2.784109410615435, "grad_norm": 0.3588544428348541, "learning_rate": 3.6253120590469985e-06, "loss": 0.06228278636932373, "step": 8550 }, { "epoch": 2.800390752197981, "grad_norm": 0.8760668039321899, "learning_rate": 3.353956366004559e-06, "loss": 0.058021135330200195, "step": 8600 }, { "epoch": 2.8166720937805274, "grad_norm": 0.39105167984962463, "learning_rate": 3.0826006729621187e-06, "loss": 0.05042066097259521, "step": 8650 }, { "epoch": 2.832953435363074, "grad_norm": 0.8453779816627502, "learning_rate": 2.811244979919679e-06, "loss": 0.05310141086578369, "step": 8700 }, { "epoch": 2.8492347769456203, "grad_norm": 0.3931414484977722, "learning_rate": 2.539889286877239e-06, "loss": 0.04613284111022949, "step": 8750 }, { "epoch": 2.865516118528167, "grad_norm": 0.601372480392456, "learning_rate": 2.268533593834799e-06, "loss": 0.055337414741516114, "step": 8800 }, { "epoch": 2.8817974601107132, "grad_norm": 0.40091976523399353, "learning_rate": 1.9971779007923587e-06, "loss": 0.05075720310211182, "step": 8850 }, { "epoch": 2.8980788016932593, "grad_norm": 0.9332064986228943, "learning_rate": 1.7258222077499185e-06, "loss": 0.04717796325683594, "step": 8900 }, { "epoch": 2.9143601432758057, "grad_norm": 0.7941976189613342, "learning_rate": 1.4544665147074786e-06, "loss": 0.07562547206878661, "step": 8950 }, { "epoch": 2.930641484858352, "grad_norm": 0.6598140597343445, "learning_rate": 1.1831108216650385e-06, "loss": 0.05916054248809814, "step": 9000 }, { "epoch": 2.9469228264408986, "grad_norm": 0.07086297869682312, "learning_rate": 9.117551286225986e-07, "loss": 0.05104278087615967, "step": 9050 }, { "epoch": 2.963204168023445, "grad_norm": 0.5035263299942017, "learning_rate": 6.403994355801585e-07, "loss": 0.04357606887817383, "step": 9100 }, { "epoch": 2.9794855096059916, "grad_norm": 0.28602153062820435, "learning_rate": 3.690437425377185e-07, "loss": 0.04314669132232666, "step": 9150 }, { "epoch": 2.995766851188538, "grad_norm": 0.24384021759033203, "learning_rate": 9.768804949527842e-08, "loss": 0.04970499038696289, "step": 9200 }, { "epoch": 3.0, "eval_bertscore_f1": 0.9924511363724068, "eval_bleu": 0.9035076605970417, "eval_loss": 0.058707889169454575, "eval_meteor": 0.9399283468508673, "eval_rouge1": 0.9525840238092467, "eval_rouge2": 0.9200012975801428, "eval_runtime": 58.5972, "eval_samples_per_second": 22.049, "eval_steps_per_second": 2.765, "step": 9213 } ], "logging_steps": 50, "max_steps": 9213, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.24385758724096e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }