{ "best_global_step": 8487, "best_metric": 0.9710737692450385, "best_model_checkpoint": "./codet5-qlora-k8s/checkpoint-8487", "epoch": 3.0, "eval_steps": 500, "global_step": 8487, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017674089784376106, "grad_norm": 1.1207759380340576, "learning_rate": 0.00029898197242841994, "loss": 3.2886, "step": 50 }, { "epoch": 0.03534817956875221, "grad_norm": 1.2978123426437378, "learning_rate": 0.0002979215270413573, "loss": 1.8567, "step": 100 }, { "epoch": 0.053022269353128315, "grad_norm": 1.624740719795227, "learning_rate": 0.0002968610816542948, "loss": 1.5695, "step": 150 }, { "epoch": 0.07069635913750442, "grad_norm": 1.7711330652236938, "learning_rate": 0.0002958006362672322, "loss": 1.4205, "step": 200 }, { "epoch": 0.08837044892188052, "grad_norm": 1.62517511844635, "learning_rate": 0.0002947401908801697, "loss": 1.2732, "step": 250 }, { "epoch": 0.10604453870625663, "grad_norm": 2.038139820098877, "learning_rate": 0.00029367974549310706, "loss": 1.1913, "step": 300 }, { "epoch": 0.12371862849063273, "grad_norm": 2.262789487838745, "learning_rate": 0.00029264050901378576, "loss": 1.117, "step": 350 }, { "epoch": 0.14139271827500885, "grad_norm": 3.121687650680542, "learning_rate": 0.0002915800636267232, "loss": 1.0202, "step": 400 }, { "epoch": 0.15906680805938495, "grad_norm": 2.0951812267303467, "learning_rate": 0.0002905196182396606, "loss": 0.9499, "step": 450 }, { "epoch": 0.17674089784376104, "grad_norm": 2.670121192932129, "learning_rate": 0.00028945917285259806, "loss": 0.9707, "step": 500 }, { "epoch": 0.19441498762813716, "grad_norm": 2.3631107807159424, "learning_rate": 0.00028841993637327676, "loss": 0.7961, "step": 550 }, { "epoch": 0.21208907741251326, "grad_norm": 2.10772705078125, "learning_rate": 0.0002873594909862142, "loss": 0.8912, "step": 600 }, { "epoch": 0.22976316719688936, "grad_norm": 2.360686779022217, "learning_rate": 0.00028629904559915163, "loss": 0.871, "step": 650 }, { "epoch": 0.24743725698126545, "grad_norm": 2.191119432449341, "learning_rate": 0.0002852598091198303, "loss": 0.758, "step": 700 }, { "epoch": 0.2651113467656416, "grad_norm": 1.7646818161010742, "learning_rate": 0.00028419936373276776, "loss": 0.8244, "step": 750 }, { "epoch": 0.2827854365500177, "grad_norm": 2.3776354789733887, "learning_rate": 0.00028313891834570514, "loss": 0.7664, "step": 800 }, { "epoch": 0.30045952633439377, "grad_norm": 2.8682475090026855, "learning_rate": 0.00028207847295864263, "loss": 0.6942, "step": 850 }, { "epoch": 0.3181336161187699, "grad_norm": 2.353091239929199, "learning_rate": 0.00028101802757158, "loss": 0.7323, "step": 900 }, { "epoch": 0.335807705903146, "grad_norm": 1.9457337856292725, "learning_rate": 0.00027995758218451745, "loss": 0.6474, "step": 950 }, { "epoch": 0.3534817956875221, "grad_norm": 2.510075330734253, "learning_rate": 0.00027889713679745494, "loss": 0.6801, "step": 1000 }, { "epoch": 0.3711558854718982, "grad_norm": 1.7497014999389648, "learning_rate": 0.0002778366914103923, "loss": 0.656, "step": 1050 }, { "epoch": 0.38882997525627433, "grad_norm": 2.862682342529297, "learning_rate": 0.0002767762460233298, "loss": 0.6238, "step": 1100 }, { "epoch": 0.4065040650406504, "grad_norm": 1.998961091041565, "learning_rate": 0.0002757158006362672, "loss": 0.6306, "step": 1150 }, { "epoch": 0.4241781548250265, "grad_norm": 1.854942798614502, "learning_rate": 0.0002746553552492047, "loss": 0.5689, "step": 1200 }, { "epoch": 0.4418522446094026, "grad_norm": 1.8994203805923462, "learning_rate": 0.00027359490986214206, "loss": 0.6595, "step": 1250 }, { "epoch": 0.4595263343937787, "grad_norm": 1.6235908269882202, "learning_rate": 0.0002725344644750795, "loss": 0.5665, "step": 1300 }, { "epoch": 0.47720042417815484, "grad_norm": 2.291989803314209, "learning_rate": 0.00027147401908801693, "loss": 0.5761, "step": 1350 }, { "epoch": 0.4948745139625309, "grad_norm": 1.4632915258407593, "learning_rate": 0.00027041357370095437, "loss": 0.5171, "step": 1400 }, { "epoch": 0.512548603746907, "grad_norm": 2.1687259674072266, "learning_rate": 0.0002693531283138918, "loss": 0.6183, "step": 1450 }, { "epoch": 0.5302226935312832, "grad_norm": 1.734108805656433, "learning_rate": 0.00026829268292682924, "loss": 0.5411, "step": 1500 }, { "epoch": 0.5478967833156593, "grad_norm": 1.3890644311904907, "learning_rate": 0.00026723223753976667, "loss": 0.5092, "step": 1550 }, { "epoch": 0.5655708731000354, "grad_norm": 1.98700749874115, "learning_rate": 0.0002661717921527041, "loss": 0.4804, "step": 1600 }, { "epoch": 0.5832449628844114, "grad_norm": 1.1181468963623047, "learning_rate": 0.00026511134676564154, "loss": 0.5148, "step": 1650 }, { "epoch": 0.6009190526687875, "grad_norm": 1.7994420528411865, "learning_rate": 0.000264050901378579, "loss": 0.4231, "step": 1700 }, { "epoch": 0.6185931424531637, "grad_norm": 2.032198667526245, "learning_rate": 0.0002629904559915164, "loss": 0.5106, "step": 1750 }, { "epoch": 0.6362672322375398, "grad_norm": 3.585948944091797, "learning_rate": 0.00026193001060445385, "loss": 0.4717, "step": 1800 }, { "epoch": 0.6539413220219159, "grad_norm": 1.8610371351242065, "learning_rate": 0.0002608695652173913, "loss": 0.4765, "step": 1850 }, { "epoch": 0.671615411806292, "grad_norm": 1.2324624061584473, "learning_rate": 0.0002598091198303287, "loss": 0.4643, "step": 1900 }, { "epoch": 0.689289501590668, "grad_norm": 2.391714572906494, "learning_rate": 0.00025874867444326615, "loss": 0.4512, "step": 1950 }, { "epoch": 0.7069635913750442, "grad_norm": 1.8863242864608765, "learning_rate": 0.0002576882290562036, "loss": 0.4115, "step": 2000 }, { "epoch": 0.7246376811594203, "grad_norm": 0.7850649356842041, "learning_rate": 0.000256627783669141, "loss": 0.4341, "step": 2050 }, { "epoch": 0.7423117709437964, "grad_norm": 1.5869959592819214, "learning_rate": 0.00025556733828207846, "loss": 0.4172, "step": 2100 }, { "epoch": 0.7599858607281725, "grad_norm": 1.2584971189498901, "learning_rate": 0.0002545068928950159, "loss": 0.4384, "step": 2150 }, { "epoch": 0.7776599505125487, "grad_norm": 2.560710906982422, "learning_rate": 0.00025344644750795333, "loss": 0.4558, "step": 2200 }, { "epoch": 0.7953340402969247, "grad_norm": 2.2893359661102295, "learning_rate": 0.00025238600212089076, "loss": 0.4345, "step": 2250 }, { "epoch": 0.8130081300813008, "grad_norm": 1.5244982242584229, "learning_rate": 0.0002513255567338282, "loss": 0.4071, "step": 2300 }, { "epoch": 0.8306822198656769, "grad_norm": 1.384102463722229, "learning_rate": 0.00025026511134676563, "loss": 0.3612, "step": 2350 }, { "epoch": 0.848356309650053, "grad_norm": 1.3080965280532837, "learning_rate": 0.00024920466595970307, "loss": 0.3556, "step": 2400 }, { "epoch": 0.8660303994344292, "grad_norm": 1.3324400186538696, "learning_rate": 0.00024814422057264045, "loss": 0.3985, "step": 2450 }, { "epoch": 0.8837044892188052, "grad_norm": 1.7705445289611816, "learning_rate": 0.00024708377518557794, "loss": 0.3895, "step": 2500 }, { "epoch": 0.9013785790031813, "grad_norm": 1.352480173110962, "learning_rate": 0.0002460233297985153, "loss": 0.426, "step": 2550 }, { "epoch": 0.9190526687875574, "grad_norm": 1.479979157447815, "learning_rate": 0.0002449628844114528, "loss": 0.4057, "step": 2600 }, { "epoch": 0.9367267585719335, "grad_norm": 2.1380653381347656, "learning_rate": 0.00024390243902439022, "loss": 0.3689, "step": 2650 }, { "epoch": 0.9544008483563097, "grad_norm": 1.9099682569503784, "learning_rate": 0.00024284199363732768, "loss": 0.3991, "step": 2700 }, { "epoch": 0.9720749381406858, "grad_norm": 1.399566411972046, "learning_rate": 0.0002417815482502651, "loss": 0.3412, "step": 2750 }, { "epoch": 0.9897490279250618, "grad_norm": 2.508267879486084, "learning_rate": 0.00024072110286320252, "loss": 0.3828, "step": 2800 }, { "epoch": 1.0, "eval_bertscore_f1": 0.9667777874331811, "eval_bleu": 0.5973566262792636, "eval_loss": 0.27053505182266235, "eval_runtime": 1054.1237, "eval_samples_per_second": 6.132, "eval_steps_per_second": 0.767, "step": 2829 }, { "epoch": 1.007423117709438, "grad_norm": 1.6967344284057617, "learning_rate": 0.00023966065747613996, "loss": 0.3787, "step": 2850 }, { "epoch": 1.025097207493814, "grad_norm": 1.7119196653366089, "learning_rate": 0.0002386002120890774, "loss": 0.3507, "step": 2900 }, { "epoch": 1.0427712972781902, "grad_norm": 1.5456138849258423, "learning_rate": 0.00023753976670201483, "loss": 0.333, "step": 2950 }, { "epoch": 1.0604453870625663, "grad_norm": 1.3519443273544312, "learning_rate": 0.00023647932131495226, "loss": 0.3897, "step": 3000 }, { "epoch": 1.0781194768469424, "grad_norm": 1.4092153310775757, "learning_rate": 0.0002354188759278897, "loss": 0.3069, "step": 3050 }, { "epoch": 1.0957935666313185, "grad_norm": 1.67427659034729, "learning_rate": 0.00023435843054082713, "loss": 0.3876, "step": 3100 }, { "epoch": 1.1134676564156947, "grad_norm": 0.9288003444671631, "learning_rate": 0.00023329798515376457, "loss": 0.3052, "step": 3150 }, { "epoch": 1.1311417462000706, "grad_norm": 2.0493695735931396, "learning_rate": 0.000232237539766702, "loss": 0.3419, "step": 3200 }, { "epoch": 1.148815835984447, "grad_norm": 1.3473105430603027, "learning_rate": 0.0002311770943796394, "loss": 0.351, "step": 3250 }, { "epoch": 1.1664899257688228, "grad_norm": 2.2063777446746826, "learning_rate": 0.00023011664899257687, "loss": 0.3732, "step": 3300 }, { "epoch": 1.184164015553199, "grad_norm": 0.7194732427597046, "learning_rate": 0.00022905620360551428, "loss": 0.3098, "step": 3350 }, { "epoch": 1.201838105337575, "grad_norm": 1.8693958520889282, "learning_rate": 0.00022799575821845174, "loss": 0.3623, "step": 3400 }, { "epoch": 1.2195121951219512, "grad_norm": 1.7452648878097534, "learning_rate": 0.00022693531283138915, "loss": 0.2985, "step": 3450 }, { "epoch": 1.2371862849063273, "grad_norm": 2.7502336502075195, "learning_rate": 0.00022587486744432661, "loss": 0.2938, "step": 3500 }, { "epoch": 1.2548603746907034, "grad_norm": 1.0220433473587036, "learning_rate": 0.00022481442205726402, "loss": 0.3263, "step": 3550 }, { "epoch": 1.2725344644750796, "grad_norm": 1.1841455698013306, "learning_rate": 0.00022375397667020146, "loss": 0.3456, "step": 3600 }, { "epoch": 1.2902085542594557, "grad_norm": 1.1220083236694336, "learning_rate": 0.0002226935312831389, "loss": 0.3749, "step": 3650 }, { "epoch": 1.3078826440438318, "grad_norm": 2.557077646255493, "learning_rate": 0.00022163308589607633, "loss": 0.3479, "step": 3700 }, { "epoch": 1.3255567338282077, "grad_norm": 1.672131061553955, "learning_rate": 0.00022057264050901376, "loss": 0.3371, "step": 3750 }, { "epoch": 1.343230823612584, "grad_norm": 1.5530970096588135, "learning_rate": 0.0002195121951219512, "loss": 0.3062, "step": 3800 }, { "epoch": 1.36090491339696, "grad_norm": 0.8587738871574402, "learning_rate": 0.00021845174973488866, "loss": 0.3458, "step": 3850 }, { "epoch": 1.378579003181336, "grad_norm": 1.2779722213745117, "learning_rate": 0.00021739130434782607, "loss": 0.3582, "step": 3900 }, { "epoch": 1.3962530929657122, "grad_norm": 1.7616783380508423, "learning_rate": 0.00021633085896076348, "loss": 0.2999, "step": 3950 }, { "epoch": 1.4139271827500883, "grad_norm": 1.2923225164413452, "learning_rate": 0.00021527041357370094, "loss": 0.3079, "step": 4000 }, { "epoch": 1.4316012725344645, "grad_norm": 0.7930673360824585, "learning_rate": 0.00021420996818663835, "loss": 0.2973, "step": 4050 }, { "epoch": 1.4492753623188406, "grad_norm": 1.5622656345367432, "learning_rate": 0.0002131495227995758, "loss": 0.291, "step": 4100 }, { "epoch": 1.4669494521032167, "grad_norm": 0.8834390640258789, "learning_rate": 0.00021208907741251324, "loss": 0.2691, "step": 4150 }, { "epoch": 1.4846235418875928, "grad_norm": 1.2596232891082764, "learning_rate": 0.00021102863202545068, "loss": 0.247, "step": 4200 }, { "epoch": 1.502297631671969, "grad_norm": 0.7010456323623657, "learning_rate": 0.00020996818663838811, "loss": 0.3019, "step": 4250 }, { "epoch": 1.5199717214563448, "grad_norm": 1.071253776550293, "learning_rate": 0.00020890774125132552, "loss": 0.2447, "step": 4300 }, { "epoch": 1.5376458112407212, "grad_norm": 0.63275545835495, "learning_rate": 0.00020784729586426298, "loss": 0.246, "step": 4350 }, { "epoch": 1.555319901025097, "grad_norm": 1.0832668542861938, "learning_rate": 0.0002067868504772004, "loss": 0.249, "step": 4400 }, { "epoch": 1.5729939908094734, "grad_norm": 1.0748353004455566, "learning_rate": 0.00020572640509013785, "loss": 0.2585, "step": 4450 }, { "epoch": 1.5906680805938493, "grad_norm": 1.2410573959350586, "learning_rate": 0.00020466595970307526, "loss": 0.2821, "step": 4500 }, { "epoch": 1.6083421703782255, "grad_norm": 1.8322285413742065, "learning_rate": 0.00020360551431601272, "loss": 0.2642, "step": 4550 }, { "epoch": 1.6260162601626016, "grad_norm": 1.5231540203094482, "learning_rate": 0.00020254506892895013, "loss": 0.329, "step": 4600 }, { "epoch": 1.6436903499469777, "grad_norm": 0.8996387124061584, "learning_rate": 0.0002014846235418876, "loss": 0.2822, "step": 4650 }, { "epoch": 1.6613644397313538, "grad_norm": 1.107340693473816, "learning_rate": 0.000200424178154825, "loss": 0.2647, "step": 4700 }, { "epoch": 1.67903852951573, "grad_norm": 1.44370698928833, "learning_rate": 0.00019936373276776244, "loss": 0.3281, "step": 4750 }, { "epoch": 1.696712619300106, "grad_norm": 1.433866024017334, "learning_rate": 0.00019830328738069987, "loss": 0.2867, "step": 4800 }, { "epoch": 1.714386709084482, "grad_norm": 0.7778879404067993, "learning_rate": 0.0001972428419936373, "loss": 0.2363, "step": 4850 }, { "epoch": 1.7320607988688583, "grad_norm": 1.0693784952163696, "learning_rate": 0.00019618239660657474, "loss": 0.2989, "step": 4900 }, { "epoch": 1.7497348886532342, "grad_norm": 0.9680020213127136, "learning_rate": 0.00019512195121951218, "loss": 0.2512, "step": 4950 }, { "epoch": 1.7674089784376106, "grad_norm": 0.9300338625907898, "learning_rate": 0.00019406150583244961, "loss": 0.2814, "step": 5000 }, { "epoch": 1.7850830682219865, "grad_norm": 1.6086584329605103, "learning_rate": 0.00019300106044538705, "loss": 0.2895, "step": 5050 }, { "epoch": 1.8027571580063628, "grad_norm": 1.522153615951538, "learning_rate": 0.00019194061505832446, "loss": 0.2804, "step": 5100 }, { "epoch": 1.8204312477907387, "grad_norm": 1.3292605876922607, "learning_rate": 0.00019088016967126192, "loss": 0.2676, "step": 5150 }, { "epoch": 1.8381053375751149, "grad_norm": 1.0950225591659546, "learning_rate": 0.00018981972428419933, "loss": 0.2991, "step": 5200 }, { "epoch": 1.855779427359491, "grad_norm": 1.8333910703659058, "learning_rate": 0.0001887592788971368, "loss": 0.2742, "step": 5250 }, { "epoch": 1.873453517143867, "grad_norm": 1.447016716003418, "learning_rate": 0.0001876988335100742, "loss": 0.2125, "step": 5300 }, { "epoch": 1.8911276069282432, "grad_norm": 1.0409213304519653, "learning_rate": 0.00018663838812301166, "loss": 0.2372, "step": 5350 }, { "epoch": 1.9088016967126193, "grad_norm": 0.5701714158058167, "learning_rate": 0.00018557794273594907, "loss": 0.2332, "step": 5400 }, { "epoch": 1.9264757864969955, "grad_norm": 1.0092428922653198, "learning_rate": 0.0001845174973488865, "loss": 0.2897, "step": 5450 }, { "epoch": 1.9441498762813714, "grad_norm": 1.031217098236084, "learning_rate": 0.00018345705196182397, "loss": 0.2722, "step": 5500 }, { "epoch": 1.9618239660657477, "grad_norm": 1.2638362646102905, "learning_rate": 0.00018239660657476137, "loss": 0.2505, "step": 5550 }, { "epoch": 1.9794980558501236, "grad_norm": 1.3998290300369263, "learning_rate": 0.00018133616118769884, "loss": 0.2772, "step": 5600 }, { "epoch": 1.9971721456345, "grad_norm": 1.4681320190429688, "learning_rate": 0.00018027571580063624, "loss": 0.25, "step": 5650 }, { "epoch": 2.0, "eval_bertscore_f1": 0.970109825833968, "eval_bleu": 0.6229920961802436, "eval_loss": 0.1904931217432022, "eval_runtime": 1018.8038, "eval_samples_per_second": 6.345, "eval_steps_per_second": 0.793, "step": 5658 }, { "epoch": 2.014846235418876, "grad_norm": 1.3499983549118042, "learning_rate": 0.0001792152704135737, "loss": 0.2412, "step": 5700 }, { "epoch": 2.032520325203252, "grad_norm": 1.2155545949935913, "learning_rate": 0.00017815482502651111, "loss": 0.2919, "step": 5750 }, { "epoch": 2.050194414987628, "grad_norm": 0.9294681549072266, "learning_rate": 0.00017709437963944858, "loss": 0.2191, "step": 5800 }, { "epoch": 2.0678685047720045, "grad_norm": 0.8069599270820618, "learning_rate": 0.00017603393425238598, "loss": 0.228, "step": 5850 }, { "epoch": 2.0855425945563804, "grad_norm": 1.1825474500656128, "learning_rate": 0.00017497348886532342, "loss": 0.2422, "step": 5900 }, { "epoch": 2.1032166843407563, "grad_norm": 1.2947015762329102, "learning_rate": 0.00017391304347826085, "loss": 0.2333, "step": 5950 }, { "epoch": 2.1208907741251326, "grad_norm": 1.0622906684875488, "learning_rate": 0.0001728525980911983, "loss": 0.2029, "step": 6000 }, { "epoch": 2.1385648639095085, "grad_norm": 0.8785162568092346, "learning_rate": 0.00017179215270413572, "loss": 0.2039, "step": 6050 }, { "epoch": 2.156238953693885, "grad_norm": 0.3702610433101654, "learning_rate": 0.00017073170731707316, "loss": 0.2631, "step": 6100 }, { "epoch": 2.1739130434782608, "grad_norm": 1.0092154741287231, "learning_rate": 0.0001696712619300106, "loss": 0.2325, "step": 6150 }, { "epoch": 2.191587133262637, "grad_norm": 1.648000955581665, "learning_rate": 0.00016861081654294803, "loss": 0.2501, "step": 6200 }, { "epoch": 2.209261223047013, "grad_norm": 0.979069173336029, "learning_rate": 0.00016755037115588544, "loss": 0.256, "step": 6250 }, { "epoch": 2.2269353128313893, "grad_norm": 1.459558129310608, "learning_rate": 0.0001664899257688229, "loss": 0.2603, "step": 6300 }, { "epoch": 2.2446094026157652, "grad_norm": 1.5793472528457642, "learning_rate": 0.0001654294803817603, "loss": 0.2564, "step": 6350 }, { "epoch": 2.262283492400141, "grad_norm": 1.1787140369415283, "learning_rate": 0.00016436903499469777, "loss": 0.2782, "step": 6400 }, { "epoch": 2.2799575821845175, "grad_norm": 1.041374683380127, "learning_rate": 0.00016330858960763518, "loss": 0.2331, "step": 6450 }, { "epoch": 2.297631671968894, "grad_norm": 0.7799555063247681, "learning_rate": 0.00016224814422057264, "loss": 0.2338, "step": 6500 }, { "epoch": 2.3153057617532697, "grad_norm": 1.4405689239501953, "learning_rate": 0.00016118769883351005, "loss": 0.2737, "step": 6550 }, { "epoch": 2.3329798515376456, "grad_norm": 0.979608416557312, "learning_rate": 0.00016012725344644748, "loss": 0.2495, "step": 6600 }, { "epoch": 2.350653941322022, "grad_norm": 0.9300618171691895, "learning_rate": 0.00015906680805938492, "loss": 0.2157, "step": 6650 }, { "epoch": 2.368328031106398, "grad_norm": 0.8745370507240295, "learning_rate": 0.00015800636267232235, "loss": 0.2837, "step": 6700 }, { "epoch": 2.3860021208907742, "grad_norm": 0.9898168444633484, "learning_rate": 0.00015694591728525982, "loss": 0.221, "step": 6750 }, { "epoch": 2.40367621067515, "grad_norm": 0.8933513760566711, "learning_rate": 0.00015588547189819722, "loss": 0.1994, "step": 6800 }, { "epoch": 2.4213503004595265, "grad_norm": 1.7144904136657715, "learning_rate": 0.0001548250265111347, "loss": 0.2429, "step": 6850 }, { "epoch": 2.4390243902439024, "grad_norm": 1.5800135135650635, "learning_rate": 0.0001537645811240721, "loss": 0.2284, "step": 6900 }, { "epoch": 2.4566984800282787, "grad_norm": 1.0567731857299805, "learning_rate": 0.0001527041357370095, "loss": 0.2028, "step": 6950 }, { "epoch": 2.4743725698126546, "grad_norm": 0.59196537733078, "learning_rate": 0.00015164369034994697, "loss": 0.228, "step": 7000 }, { "epoch": 2.4920466595970305, "grad_norm": 1.0257049798965454, "learning_rate": 0.00015058324496288437, "loss": 0.2196, "step": 7050 }, { "epoch": 2.509720749381407, "grad_norm": 1.500623345375061, "learning_rate": 0.00014952279957582184, "loss": 0.2351, "step": 7100 }, { "epoch": 2.5273948391657832, "grad_norm": 1.1046085357666016, "learning_rate": 0.00014846235418875927, "loss": 0.2595, "step": 7150 }, { "epoch": 2.545068928950159, "grad_norm": 1.2226991653442383, "learning_rate": 0.0001474019088016967, "loss": 0.1914, "step": 7200 }, { "epoch": 2.562743018734535, "grad_norm": 0.6742298007011414, "learning_rate": 0.00014634146341463414, "loss": 0.2096, "step": 7250 }, { "epoch": 2.5804171085189114, "grad_norm": 1.5504461526870728, "learning_rate": 0.00014528101802757158, "loss": 0.2051, "step": 7300 }, { "epoch": 2.5980911983032873, "grad_norm": 0.9681800603866577, "learning_rate": 0.000144220572640509, "loss": 0.2327, "step": 7350 }, { "epoch": 2.6157652880876636, "grad_norm": 0.9383839964866638, "learning_rate": 0.00014316012725344645, "loss": 0.2344, "step": 7400 }, { "epoch": 2.6334393778720395, "grad_norm": 0.6154807209968567, "learning_rate": 0.00014209968186638388, "loss": 0.2383, "step": 7450 }, { "epoch": 2.6511134676564154, "grad_norm": 1.2676986455917358, "learning_rate": 0.00014103923647932132, "loss": 0.2257, "step": 7500 }, { "epoch": 2.6687875574407918, "grad_norm": 1.183440089225769, "learning_rate": 0.00013997879109225872, "loss": 0.2102, "step": 7550 }, { "epoch": 2.686461647225168, "grad_norm": 0.7244306802749634, "learning_rate": 0.00013891834570519616, "loss": 0.2146, "step": 7600 }, { "epoch": 2.704135737009544, "grad_norm": 1.187232494354248, "learning_rate": 0.0001378579003181336, "loss": 0.2119, "step": 7650 }, { "epoch": 2.72180982679392, "grad_norm": 1.4510794878005981, "learning_rate": 0.00013679745493107103, "loss": 0.1916, "step": 7700 }, { "epoch": 2.7394839165782963, "grad_norm": 1.383832335472107, "learning_rate": 0.00013573700954400847, "loss": 0.2179, "step": 7750 }, { "epoch": 2.757158006362672, "grad_norm": 0.9274504780769348, "learning_rate": 0.0001346765641569459, "loss": 0.199, "step": 7800 }, { "epoch": 2.7748320961470485, "grad_norm": 2.6429216861724854, "learning_rate": 0.00013361611876988334, "loss": 0.2407, "step": 7850 }, { "epoch": 2.7925061859314244, "grad_norm": 1.3947652578353882, "learning_rate": 0.00013255567338282077, "loss": 0.2019, "step": 7900 }, { "epoch": 2.8101802757158008, "grad_norm": 1.148478627204895, "learning_rate": 0.0001314952279957582, "loss": 0.205, "step": 7950 }, { "epoch": 2.8278543655001767, "grad_norm": 1.1087610721588135, "learning_rate": 0.00013043478260869564, "loss": 0.2527, "step": 8000 }, { "epoch": 2.845528455284553, "grad_norm": 1.4348084926605225, "learning_rate": 0.00012937433722163308, "loss": 0.2465, "step": 8050 }, { "epoch": 2.863202545068929, "grad_norm": 1.2600926160812378, "learning_rate": 0.0001283138918345705, "loss": 0.1699, "step": 8100 }, { "epoch": 2.880876634853305, "grad_norm": 0.8724793195724487, "learning_rate": 0.00012725344644750795, "loss": 0.2257, "step": 8150 }, { "epoch": 2.898550724637681, "grad_norm": 1.5324125289916992, "learning_rate": 0.00012619300106044538, "loss": 0.2002, "step": 8200 }, { "epoch": 2.9162248144220575, "grad_norm": 1.0066156387329102, "learning_rate": 0.00012513255567338282, "loss": 0.192, "step": 8250 }, { "epoch": 2.9338989042064334, "grad_norm": 0.4273667633533478, "learning_rate": 0.00012407211028632022, "loss": 0.1758, "step": 8300 }, { "epoch": 2.9515729939908093, "grad_norm": 0.6536590456962585, "learning_rate": 0.00012301166489925766, "loss": 0.1905, "step": 8350 }, { "epoch": 2.9692470837751856, "grad_norm": 0.6973742246627808, "learning_rate": 0.00012195121951219511, "loss": 0.2116, "step": 8400 }, { "epoch": 2.9869211735595615, "grad_norm": 0.9764792919158936, "learning_rate": 0.00012089077412513254, "loss": 0.2098, "step": 8450 }, { "epoch": 3.0, "eval_bertscore_f1": 0.9710737692450385, "eval_bleu": 0.6330045835354601, "eval_loss": 0.15814107656478882, "eval_runtime": 1020.076, "eval_samples_per_second": 6.337, "eval_steps_per_second": 0.792, "step": 8487 } ], "logging_steps": 50, "max_steps": 14145, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0380291237347328e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }