{ "best_global_step": 11852, "best_metric": 0.3635343246206635, "best_model_checkpoint": "runs/t5_base/checkpoint-11852", "epoch": 2.0, "eval_steps": 500, "global_step": 11852, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00843810648890389, "grad_norm": 1.1823029518127441, "learning_rate": 2.999503881201485e-05, "loss": 1.7915, "step": 50 }, { "epoch": 0.01687621297780778, "grad_norm": 1.172362208366394, "learning_rate": 2.9989976375295308e-05, "loss": 1.8189, "step": 100 }, { "epoch": 0.02531431946671167, "grad_norm": 1.6581450700759888, "learning_rate": 2.998491393857577e-05, "loss": 1.8142, "step": 150 }, { "epoch": 0.03375242595561556, "grad_norm": 1.1387466192245483, "learning_rate": 2.9979851501856227e-05, "loss": 1.8261, "step": 200 }, { "epoch": 0.04219053244451945, "grad_norm": 1.1661549806594849, "learning_rate": 2.9974789065136688e-05, "loss": 1.8097, "step": 250 }, { "epoch": 0.05062863893342334, "grad_norm": 1.2071425914764404, "learning_rate": 2.9969726628417145e-05, "loss": 1.791, "step": 300 }, { "epoch": 0.05906674542232723, "grad_norm": 1.1316887140274048, "learning_rate": 2.9964664191697606e-05, "loss": 1.8034, "step": 350 }, { "epoch": 0.06750485191123112, "grad_norm": 1.1885789632797241, "learning_rate": 2.9959601754978064e-05, "loss": 1.7741, "step": 400 }, { "epoch": 0.07594295840013501, "grad_norm": 1.2099308967590332, "learning_rate": 2.995453931825852e-05, "loss": 1.8011, "step": 450 }, { "epoch": 0.0843810648890389, "grad_norm": 1.2386366128921509, "learning_rate": 2.9949476881538982e-05, "loss": 1.8063, "step": 500 }, { "epoch": 0.0928191713779428, "grad_norm": 1.2590100765228271, "learning_rate": 2.994441444481944e-05, "loss": 1.7954, "step": 550 }, { "epoch": 0.10125727786684668, "grad_norm": 1.7030580043792725, "learning_rate": 2.99393520080999e-05, "loss": 1.7486, "step": 600 }, { "epoch": 0.10969538435575057, "grad_norm": 1.1653929948806763, "learning_rate": 2.9934289571380358e-05, "loss": 1.8254, "step": 650 }, { "epoch": 0.11813349084465447, "grad_norm": 1.2317183017730713, "learning_rate": 2.992922713466082e-05, "loss": 1.7634, "step": 700 }, { "epoch": 0.12657159733355836, "grad_norm": 1.2524069547653198, "learning_rate": 2.9924164697941276e-05, "loss": 1.758, "step": 750 }, { "epoch": 0.13500970382246225, "grad_norm": 1.220339298248291, "learning_rate": 2.9919102261221737e-05, "loss": 1.7594, "step": 800 }, { "epoch": 0.14344781031136614, "grad_norm": 1.3024489879608154, "learning_rate": 2.991403982450219e-05, "loss": 1.7682, "step": 850 }, { "epoch": 0.15188591680027003, "grad_norm": 1.2505040168762207, "learning_rate": 2.9908977387782652e-05, "loss": 1.7908, "step": 900 }, { "epoch": 0.16032402328917392, "grad_norm": 1.3853548765182495, "learning_rate": 2.9903914951063113e-05, "loss": 1.7203, "step": 950 }, { "epoch": 0.1687621297780778, "grad_norm": 1.197028636932373, "learning_rate": 2.989885251434357e-05, "loss": 1.826, "step": 1000 }, { "epoch": 0.1772002362669817, "grad_norm": 1.2575501203536987, "learning_rate": 2.989379007762403e-05, "loss": 1.7429, "step": 1050 }, { "epoch": 0.1856383427558856, "grad_norm": 1.6052565574645996, "learning_rate": 2.988872764090449e-05, "loss": 1.7866, "step": 1100 }, { "epoch": 0.19407644924478948, "grad_norm": 1.1177057027816772, "learning_rate": 2.988366520418495e-05, "loss": 1.7606, "step": 1150 }, { "epoch": 0.20251455573369337, "grad_norm": 1.1401762962341309, "learning_rate": 2.9878602767465407e-05, "loss": 1.8021, "step": 1200 }, { "epoch": 0.21095266222259726, "grad_norm": 1.583141565322876, "learning_rate": 2.987354033074587e-05, "loss": 1.775, "step": 1250 }, { "epoch": 0.21939076871150115, "grad_norm": 1.1614867448806763, "learning_rate": 2.9868477894026326e-05, "loss": 1.7617, "step": 1300 }, { "epoch": 0.22782887520040504, "grad_norm": 1.340226411819458, "learning_rate": 2.9863415457306783e-05, "loss": 1.7679, "step": 1350 }, { "epoch": 0.23626698168930893, "grad_norm": 1.1907130479812622, "learning_rate": 2.985835302058724e-05, "loss": 1.7642, "step": 1400 }, { "epoch": 0.24470508817821282, "grad_norm": 1.1323164701461792, "learning_rate": 2.9853290583867702e-05, "loss": 1.7563, "step": 1450 }, { "epoch": 0.2531431946671167, "grad_norm": 1.2015341520309448, "learning_rate": 2.9848228147148163e-05, "loss": 1.7832, "step": 1500 }, { "epoch": 0.2615813011560206, "grad_norm": 1.1307079792022705, "learning_rate": 2.984316571042862e-05, "loss": 1.7815, "step": 1550 }, { "epoch": 0.2700194076449245, "grad_norm": 1.2381619215011597, "learning_rate": 2.983810327370908e-05, "loss": 1.7602, "step": 1600 }, { "epoch": 0.27845751413382835, "grad_norm": 1.1170424222946167, "learning_rate": 2.983304083698954e-05, "loss": 1.7683, "step": 1650 }, { "epoch": 0.2868956206227323, "grad_norm": 1.3253265619277954, "learning_rate": 2.982797840027e-05, "loss": 1.7387, "step": 1700 }, { "epoch": 0.29533372711163614, "grad_norm": 1.391984224319458, "learning_rate": 2.9822915963550457e-05, "loss": 1.7655, "step": 1750 }, { "epoch": 0.30377183360054005, "grad_norm": 1.2357732057571411, "learning_rate": 2.9817853526830915e-05, "loss": 1.7711, "step": 1800 }, { "epoch": 0.3122099400894439, "grad_norm": 1.8593379259109497, "learning_rate": 2.9812791090111372e-05, "loss": 1.7707, "step": 1850 }, { "epoch": 0.32064804657834783, "grad_norm": 1.1849147081375122, "learning_rate": 2.9807728653391833e-05, "loss": 1.7511, "step": 1900 }, { "epoch": 0.3290861530672517, "grad_norm": 1.3520323038101196, "learning_rate": 2.980266621667229e-05, "loss": 1.7641, "step": 1950 }, { "epoch": 0.3375242595561556, "grad_norm": 1.1148391962051392, "learning_rate": 2.979760377995275e-05, "loss": 1.7849, "step": 2000 }, { "epoch": 0.3459623660450595, "grad_norm": 1.2033883333206177, "learning_rate": 2.979254134323321e-05, "loss": 1.7861, "step": 2050 }, { "epoch": 0.3544004725339634, "grad_norm": 1.2050273418426514, "learning_rate": 2.978747890651367e-05, "loss": 1.7531, "step": 2100 }, { "epoch": 0.36283857902286726, "grad_norm": 1.1736046075820923, "learning_rate": 2.978241646979413e-05, "loss": 1.7748, "step": 2150 }, { "epoch": 0.3712766855117712, "grad_norm": 1.2393854856491089, "learning_rate": 2.9777354033074588e-05, "loss": 1.7972, "step": 2200 }, { "epoch": 0.37971479200067504, "grad_norm": 1.3532406091690063, "learning_rate": 2.977229159635505e-05, "loss": 1.7565, "step": 2250 }, { "epoch": 0.38815289848957896, "grad_norm": 1.2599409818649292, "learning_rate": 2.9767229159635503e-05, "loss": 1.7591, "step": 2300 }, { "epoch": 0.3965910049784828, "grad_norm": 1.1961002349853516, "learning_rate": 2.9762166722915964e-05, "loss": 1.7725, "step": 2350 }, { "epoch": 0.40502911146738674, "grad_norm": 1.3700023889541626, "learning_rate": 2.975710428619642e-05, "loss": 1.7616, "step": 2400 }, { "epoch": 0.4134672179562906, "grad_norm": 2.6831870079040527, "learning_rate": 2.9752041849476883e-05, "loss": 1.7372, "step": 2450 }, { "epoch": 0.4219053244451945, "grad_norm": 1.1522475481033325, "learning_rate": 2.974697941275734e-05, "loss": 1.7758, "step": 2500 }, { "epoch": 0.4303434309340984, "grad_norm": 1.4393203258514404, "learning_rate": 2.97419169760378e-05, "loss": 1.7663, "step": 2550 }, { "epoch": 0.4387815374230023, "grad_norm": 1.3723366260528564, "learning_rate": 2.973685453931826e-05, "loss": 1.7618, "step": 2600 }, { "epoch": 0.44721964391190616, "grad_norm": 1.1089024543762207, "learning_rate": 2.973179210259872e-05, "loss": 1.7842, "step": 2650 }, { "epoch": 0.4556577504008101, "grad_norm": 1.2044650316238403, "learning_rate": 2.972672966587918e-05, "loss": 1.7793, "step": 2700 }, { "epoch": 0.46409585688971394, "grad_norm": 1.2730334997177124, "learning_rate": 2.9721667229159634e-05, "loss": 1.757, "step": 2750 }, { "epoch": 0.47253396337861786, "grad_norm": 1.1678314208984375, "learning_rate": 2.9716604792440095e-05, "loss": 1.7724, "step": 2800 }, { "epoch": 0.4809720698675217, "grad_norm": 1.220035195350647, "learning_rate": 2.9711542355720553e-05, "loss": 1.7381, "step": 2850 }, { "epoch": 0.48941017635642564, "grad_norm": 1.1194424629211426, "learning_rate": 2.9706479919001014e-05, "loss": 1.783, "step": 2900 }, { "epoch": 0.4978482828453295, "grad_norm": 1.2477567195892334, "learning_rate": 2.970141748228147e-05, "loss": 1.7778, "step": 2950 }, { "epoch": 0.5062863893342334, "grad_norm": 1.2932846546173096, "learning_rate": 2.9696355045561932e-05, "loss": 1.7495, "step": 3000 }, { "epoch": 0.5147244958231373, "grad_norm": 1.265342354774475, "learning_rate": 2.969129260884239e-05, "loss": 1.7731, "step": 3050 }, { "epoch": 0.5231626023120411, "grad_norm": 1.1403651237487793, "learning_rate": 2.968623017212285e-05, "loss": 1.775, "step": 3100 }, { "epoch": 0.5316007088009451, "grad_norm": 1.2105836868286133, "learning_rate": 2.9681167735403308e-05, "loss": 1.7812, "step": 3150 }, { "epoch": 0.540038815289849, "grad_norm": 1.1452655792236328, "learning_rate": 2.9676105298683766e-05, "loss": 1.7555, "step": 3200 }, { "epoch": 0.5484769217787528, "grad_norm": 1.0883064270019531, "learning_rate": 2.9671042861964226e-05, "loss": 1.7783, "step": 3250 }, { "epoch": 0.5569150282676567, "grad_norm": 1.279936671257019, "learning_rate": 2.9665980425244684e-05, "loss": 1.7481, "step": 3300 }, { "epoch": 0.5653531347565607, "grad_norm": 1.2121195793151855, "learning_rate": 2.9660917988525145e-05, "loss": 1.774, "step": 3350 }, { "epoch": 0.5737912412454645, "grad_norm": 1.2857924699783325, "learning_rate": 2.9655855551805602e-05, "loss": 1.7754, "step": 3400 }, { "epoch": 0.5822293477343684, "grad_norm": 1.172126293182373, "learning_rate": 2.9650793115086063e-05, "loss": 1.757, "step": 3450 }, { "epoch": 0.5906674542232723, "grad_norm": 1.261569857597351, "learning_rate": 2.964573067836652e-05, "loss": 1.7542, "step": 3500 }, { "epoch": 0.5991055607121761, "grad_norm": 1.223888635635376, "learning_rate": 2.964066824164698e-05, "loss": 1.7315, "step": 3550 }, { "epoch": 0.6075436672010801, "grad_norm": 1.1595600843429565, "learning_rate": 2.963560580492744e-05, "loss": 1.7447, "step": 3600 }, { "epoch": 0.615981773689984, "grad_norm": 1.255895733833313, "learning_rate": 2.96305433682079e-05, "loss": 1.798, "step": 3650 }, { "epoch": 0.6244198801788878, "grad_norm": 1.321952223777771, "learning_rate": 2.9625480931488354e-05, "loss": 1.7578, "step": 3700 }, { "epoch": 0.6328579866677917, "grad_norm": 1.2312285900115967, "learning_rate": 2.9620418494768815e-05, "loss": 1.7962, "step": 3750 }, { "epoch": 0.6412960931566957, "grad_norm": 1.4945430755615234, "learning_rate": 2.9615356058049276e-05, "loss": 1.7392, "step": 3800 }, { "epoch": 0.6497341996455995, "grad_norm": 1.5319666862487793, "learning_rate": 2.9610293621329734e-05, "loss": 1.7249, "step": 3850 }, { "epoch": 0.6581723061345034, "grad_norm": 1.3336591720581055, "learning_rate": 2.9605231184610194e-05, "loss": 1.7569, "step": 3900 }, { "epoch": 0.6666104126234073, "grad_norm": 1.366384744644165, "learning_rate": 2.9600168747890652e-05, "loss": 1.7298, "step": 3950 }, { "epoch": 0.6750485191123112, "grad_norm": 1.5491745471954346, "learning_rate": 2.9595106311171113e-05, "loss": 1.7618, "step": 4000 }, { "epoch": 0.6834866256012151, "grad_norm": 1.3339734077453613, "learning_rate": 2.959004387445157e-05, "loss": 1.7208, "step": 4050 }, { "epoch": 0.691924732090119, "grad_norm": 1.2187597751617432, "learning_rate": 2.958498143773203e-05, "loss": 1.7822, "step": 4100 }, { "epoch": 0.7003628385790228, "grad_norm": 1.428877353668213, "learning_rate": 2.9579919001012485e-05, "loss": 1.7534, "step": 4150 }, { "epoch": 0.7088009450679268, "grad_norm": 1.2236136198043823, "learning_rate": 2.9574856564292946e-05, "loss": 1.7512, "step": 4200 }, { "epoch": 0.7172390515568307, "grad_norm": 1.5085322856903076, "learning_rate": 2.9569794127573404e-05, "loss": 1.7556, "step": 4250 }, { "epoch": 0.7256771580457345, "grad_norm": 1.3578091859817505, "learning_rate": 2.9564731690853865e-05, "loss": 1.7768, "step": 4300 }, { "epoch": 0.7341152645346384, "grad_norm": 1.2682510614395142, "learning_rate": 2.9559669254134322e-05, "loss": 1.7726, "step": 4350 }, { "epoch": 0.7425533710235424, "grad_norm": 1.1269199848175049, "learning_rate": 2.9554606817414783e-05, "loss": 1.7732, "step": 4400 }, { "epoch": 0.7509914775124462, "grad_norm": 1.368332862854004, "learning_rate": 2.9549544380695244e-05, "loss": 1.7475, "step": 4450 }, { "epoch": 0.7594295840013501, "grad_norm": 1.813037395477295, "learning_rate": 2.95444819439757e-05, "loss": 1.7674, "step": 4500 }, { "epoch": 0.7678676904902539, "grad_norm": 1.1015700101852417, "learning_rate": 2.9539419507256162e-05, "loss": 1.7419, "step": 4550 }, { "epoch": 0.7763057969791579, "grad_norm": 1.2412892580032349, "learning_rate": 2.953435707053662e-05, "loss": 1.7903, "step": 4600 }, { "epoch": 0.7847439034680618, "grad_norm": 1.2447668313980103, "learning_rate": 2.9529294633817077e-05, "loss": 1.7427, "step": 4650 }, { "epoch": 0.7931820099569656, "grad_norm": 1.1194871664047241, "learning_rate": 2.9524232197097535e-05, "loss": 1.7353, "step": 4700 }, { "epoch": 0.8016201164458695, "grad_norm": 1.322404146194458, "learning_rate": 2.9519169760377996e-05, "loss": 1.7522, "step": 4750 }, { "epoch": 0.8100582229347735, "grad_norm": 1.2130954265594482, "learning_rate": 2.9514107323658453e-05, "loss": 1.7373, "step": 4800 }, { "epoch": 0.8184963294236773, "grad_norm": 1.2840181589126587, "learning_rate": 2.9509044886938914e-05, "loss": 1.7588, "step": 4850 }, { "epoch": 0.8269344359125812, "grad_norm": 1.1954180002212524, "learning_rate": 2.9503982450219372e-05, "loss": 1.7523, "step": 4900 }, { "epoch": 0.8353725424014851, "grad_norm": 1.2900581359863281, "learning_rate": 2.9498920013499833e-05, "loss": 1.7747, "step": 4950 }, { "epoch": 0.843810648890389, "grad_norm": 1.2370872497558594, "learning_rate": 2.9493857576780294e-05, "loss": 1.744, "step": 5000 }, { "epoch": 0.8522487553792929, "grad_norm": 2.0214121341705322, "learning_rate": 2.948879514006075e-05, "loss": 1.7506, "step": 5050 }, { "epoch": 0.8606868618681968, "grad_norm": 1.257506012916565, "learning_rate": 2.948373270334121e-05, "loss": 1.692, "step": 5100 }, { "epoch": 0.8691249683571006, "grad_norm": 1.2164514064788818, "learning_rate": 2.9478670266621666e-05, "loss": 1.7464, "step": 5150 }, { "epoch": 0.8775630748460046, "grad_norm": 1.2734804153442383, "learning_rate": 2.9473607829902127e-05, "loss": 1.7383, "step": 5200 }, { "epoch": 0.8860011813349085, "grad_norm": 1.2429083585739136, "learning_rate": 2.9468545393182585e-05, "loss": 1.7666, "step": 5250 }, { "epoch": 0.8944392878238123, "grad_norm": 1.2660380601882935, "learning_rate": 2.9463482956463045e-05, "loss": 1.7715, "step": 5300 }, { "epoch": 0.9028773943127162, "grad_norm": 1.426188588142395, "learning_rate": 2.9458420519743503e-05, "loss": 1.7812, "step": 5350 }, { "epoch": 0.9113155008016202, "grad_norm": 1.23947274684906, "learning_rate": 2.9453358083023964e-05, "loss": 1.7604, "step": 5400 }, { "epoch": 0.919753607290524, "grad_norm": 1.183780312538147, "learning_rate": 2.944829564630442e-05, "loss": 1.7618, "step": 5450 }, { "epoch": 0.9281917137794279, "grad_norm": 1.3170071840286255, "learning_rate": 2.9443233209584882e-05, "loss": 1.7323, "step": 5500 }, { "epoch": 0.9366298202683317, "grad_norm": 1.2505546808242798, "learning_rate": 2.9438170772865343e-05, "loss": 1.7306, "step": 5550 }, { "epoch": 0.9450679267572357, "grad_norm": 1.2599437236785889, "learning_rate": 2.9433108336145797e-05, "loss": 1.7135, "step": 5600 }, { "epoch": 0.9535060332461396, "grad_norm": 1.3927714824676514, "learning_rate": 2.9428045899426258e-05, "loss": 1.7458, "step": 5650 }, { "epoch": 0.9619441397350434, "grad_norm": 1.5024762153625488, "learning_rate": 2.9422983462706716e-05, "loss": 1.751, "step": 5700 }, { "epoch": 0.9703822462239473, "grad_norm": 1.2158094644546509, "learning_rate": 2.9417921025987177e-05, "loss": 1.7217, "step": 5750 }, { "epoch": 0.9788203527128513, "grad_norm": 1.177870750427246, "learning_rate": 2.9412858589267634e-05, "loss": 1.7481, "step": 5800 }, { "epoch": 0.9872584592017551, "grad_norm": 1.2789208889007568, "learning_rate": 2.9407796152548095e-05, "loss": 1.7697, "step": 5850 }, { "epoch": 0.995696565690659, "grad_norm": 1.2305048704147339, "learning_rate": 2.9402733715828553e-05, "loss": 1.7322, "step": 5900 }, { "epoch": 1.0, "eval_loss": 1.6421738862991333, "eval_rouge1": 0.4250170405747811, "eval_rouge2": 0.21400043951132275, "eval_rougeL": 0.35855678329236096, "eval_runtime": 1163.9596, "eval_samples_per_second": 20.363, "eval_steps_per_second": 1.273, "step": 5926 }, { "epoch": 1.0040502911146738, "grad_norm": 1.1294411420822144, "learning_rate": 2.9397671279109013e-05, "loss": 1.7502, "step": 5950 }, { "epoch": 1.0124883976035777, "grad_norm": 1.2580525875091553, "learning_rate": 2.939260884238947e-05, "loss": 1.6828, "step": 6000 }, { "epoch": 1.0209265040924818, "grad_norm": 1.9175118207931519, "learning_rate": 2.938754640566993e-05, "loss": 1.7069, "step": 6050 }, { "epoch": 1.0293646105813856, "grad_norm": 1.4026094675064087, "learning_rate": 2.938248396895039e-05, "loss": 1.7445, "step": 6100 }, { "epoch": 1.0378027170702895, "grad_norm": 1.1968200206756592, "learning_rate": 2.9377421532230847e-05, "loss": 1.6942, "step": 6150 }, { "epoch": 1.0462408235591933, "grad_norm": 1.1554464101791382, "learning_rate": 2.9372359095511308e-05, "loss": 1.7288, "step": 6200 }, { "epoch": 1.0546789300480972, "grad_norm": 1.3406728506088257, "learning_rate": 2.9367296658791765e-05, "loss": 1.7135, "step": 6250 }, { "epoch": 1.063117036537001, "grad_norm": 1.1203522682189941, "learning_rate": 2.9362234222072226e-05, "loss": 1.7165, "step": 6300 }, { "epoch": 1.071555143025905, "grad_norm": 1.1261606216430664, "learning_rate": 2.9357171785352684e-05, "loss": 1.7016, "step": 6350 }, { "epoch": 1.0799932495148088, "grad_norm": 2.1716997623443604, "learning_rate": 2.9352109348633145e-05, "loss": 1.6976, "step": 6400 }, { "epoch": 1.0884313560037127, "grad_norm": 1.335428237915039, "learning_rate": 2.9347046911913602e-05, "loss": 1.7018, "step": 6450 }, { "epoch": 1.0968694624926167, "grad_norm": 1.216572642326355, "learning_rate": 2.9341984475194063e-05, "loss": 1.7092, "step": 6500 }, { "epoch": 1.1053075689815206, "grad_norm": 1.151798129081726, "learning_rate": 2.9336922038474517e-05, "loss": 1.7169, "step": 6550 }, { "epoch": 1.1137456754704245, "grad_norm": 1.283575415611267, "learning_rate": 2.9331859601754978e-05, "loss": 1.7384, "step": 6600 }, { "epoch": 1.1221837819593283, "grad_norm": 1.1927815675735474, "learning_rate": 2.9326797165035436e-05, "loss": 1.7135, "step": 6650 }, { "epoch": 1.1306218884482322, "grad_norm": 1.230566382408142, "learning_rate": 2.9321734728315896e-05, "loss": 1.7062, "step": 6700 }, { "epoch": 1.139059994937136, "grad_norm": 1.1863012313842773, "learning_rate": 2.9316672291596357e-05, "loss": 1.7038, "step": 6750 }, { "epoch": 1.14749810142604, "grad_norm": 1.2071080207824707, "learning_rate": 2.9311609854876815e-05, "loss": 1.7232, "step": 6800 }, { "epoch": 1.155936207914944, "grad_norm": 1.308000922203064, "learning_rate": 2.9306547418157276e-05, "loss": 1.7493, "step": 6850 }, { "epoch": 1.1643743144038479, "grad_norm": 1.7674635648727417, "learning_rate": 2.9301484981437733e-05, "loss": 1.6925, "step": 6900 }, { "epoch": 1.1728124208927517, "grad_norm": 1.2214921712875366, "learning_rate": 2.9296422544718194e-05, "loss": 1.7767, "step": 6950 }, { "epoch": 1.1812505273816556, "grad_norm": 1.303446650505066, "learning_rate": 2.929136010799865e-05, "loss": 1.7432, "step": 7000 }, { "epoch": 1.1896886338705595, "grad_norm": 1.3376314640045166, "learning_rate": 2.928629767127911e-05, "loss": 1.7055, "step": 7050 }, { "epoch": 1.1981267403594633, "grad_norm": 1.422675609588623, "learning_rate": 2.9281235234559567e-05, "loss": 1.7143, "step": 7100 }, { "epoch": 1.2065648468483672, "grad_norm": 1.2204426527023315, "learning_rate": 2.9276172797840028e-05, "loss": 1.6867, "step": 7150 }, { "epoch": 1.215002953337271, "grad_norm": 1.1497398614883423, "learning_rate": 2.9271110361120485e-05, "loss": 1.7242, "step": 7200 }, { "epoch": 1.223441059826175, "grad_norm": 1.2231690883636475, "learning_rate": 2.9266047924400946e-05, "loss": 1.7091, "step": 7250 }, { "epoch": 1.231879166315079, "grad_norm": 1.1508055925369263, "learning_rate": 2.9260985487681407e-05, "loss": 1.7229, "step": 7300 }, { "epoch": 1.2403172728039829, "grad_norm": 1.1513855457305908, "learning_rate": 2.9255923050961864e-05, "loss": 1.7057, "step": 7350 }, { "epoch": 1.2487553792928867, "grad_norm": 1.2278095483779907, "learning_rate": 2.9250860614242325e-05, "loss": 1.7257, "step": 7400 }, { "epoch": 1.2571934857817906, "grad_norm": 1.4047170877456665, "learning_rate": 2.924579817752278e-05, "loss": 1.7463, "step": 7450 }, { "epoch": 1.2656315922706944, "grad_norm": 1.2649122476577759, "learning_rate": 2.924073574080324e-05, "loss": 1.7019, "step": 7500 }, { "epoch": 1.2740696987595983, "grad_norm": 1.2084012031555176, "learning_rate": 2.9235673304083698e-05, "loss": 1.7373, "step": 7550 }, { "epoch": 1.2825078052485022, "grad_norm": 1.3185824155807495, "learning_rate": 2.923061086736416e-05, "loss": 1.7063, "step": 7600 }, { "epoch": 1.2909459117374062, "grad_norm": 1.1844264268875122, "learning_rate": 2.9225548430644616e-05, "loss": 1.7037, "step": 7650 }, { "epoch": 1.2993840182263101, "grad_norm": 1.2209100723266602, "learning_rate": 2.9220485993925077e-05, "loss": 1.7318, "step": 7700 }, { "epoch": 1.307822124715214, "grad_norm": 1.3436676263809204, "learning_rate": 2.9215423557205535e-05, "loss": 1.7082, "step": 7750 }, { "epoch": 1.3162602312041178, "grad_norm": 1.3716071844100952, "learning_rate": 2.9210361120485996e-05, "loss": 1.7387, "step": 7800 }, { "epoch": 1.3246983376930217, "grad_norm": 1.18326735496521, "learning_rate": 2.9205298683766457e-05, "loss": 1.7277, "step": 7850 }, { "epoch": 1.3331364441819256, "grad_norm": 1.1182228326797485, "learning_rate": 2.9200236247046914e-05, "loss": 1.7437, "step": 7900 }, { "epoch": 1.3415745506708294, "grad_norm": 1.2569571733474731, "learning_rate": 2.919517381032737e-05, "loss": 1.7196, "step": 7950 }, { "epoch": 1.3500126571597333, "grad_norm": 1.2209763526916504, "learning_rate": 2.919011137360783e-05, "loss": 1.6957, "step": 8000 }, { "epoch": 1.3584507636486371, "grad_norm": 1.291174054145813, "learning_rate": 2.918504893688829e-05, "loss": 1.7115, "step": 8050 }, { "epoch": 1.366888870137541, "grad_norm": 1.2739378213882446, "learning_rate": 2.9179986500168747e-05, "loss": 1.7291, "step": 8100 }, { "epoch": 1.375326976626445, "grad_norm": 1.325810194015503, "learning_rate": 2.917492406344921e-05, "loss": 1.7318, "step": 8150 }, { "epoch": 1.383765083115349, "grad_norm": 1.3240125179290771, "learning_rate": 2.9169861626729666e-05, "loss": 1.7334, "step": 8200 }, { "epoch": 1.3922031896042528, "grad_norm": 1.1416797637939453, "learning_rate": 2.9164799190010127e-05, "loss": 1.7169, "step": 8250 }, { "epoch": 1.4006412960931567, "grad_norm": 1.7354291677474976, "learning_rate": 2.9159736753290584e-05, "loss": 1.7374, "step": 8300 }, { "epoch": 1.4090794025820605, "grad_norm": 1.2505598068237305, "learning_rate": 2.9154674316571045e-05, "loss": 1.7079, "step": 8350 }, { "epoch": 1.4175175090709644, "grad_norm": 1.2037113904953003, "learning_rate": 2.91496118798515e-05, "loss": 1.7151, "step": 8400 }, { "epoch": 1.4259556155598685, "grad_norm": 1.33303701877594, "learning_rate": 2.914454944313196e-05, "loss": 1.7099, "step": 8450 }, { "epoch": 1.4343937220487724, "grad_norm": 1.2704479694366455, "learning_rate": 2.913948700641242e-05, "loss": 1.6913, "step": 8500 }, { "epoch": 1.4428318285376762, "grad_norm": 1.3634933233261108, "learning_rate": 2.913442456969288e-05, "loss": 1.7029, "step": 8550 }, { "epoch": 1.45126993502658, "grad_norm": 1.1784166097640991, "learning_rate": 2.912936213297334e-05, "loss": 1.6844, "step": 8600 }, { "epoch": 1.459708041515484, "grad_norm": 1.2901785373687744, "learning_rate": 2.9124299696253797e-05, "loss": 1.7226, "step": 8650 }, { "epoch": 1.4681461480043878, "grad_norm": 1.2651816606521606, "learning_rate": 2.9119237259534258e-05, "loss": 1.727, "step": 8700 }, { "epoch": 1.4765842544932917, "grad_norm": 1.3113429546356201, "learning_rate": 2.9114174822814715e-05, "loss": 1.7239, "step": 8750 }, { "epoch": 1.4850223609821955, "grad_norm": 1.322194218635559, "learning_rate": 2.9109112386095176e-05, "loss": 1.6862, "step": 8800 }, { "epoch": 1.4934604674710994, "grad_norm": 1.1685904264450073, "learning_rate": 2.9104049949375634e-05, "loss": 1.7337, "step": 8850 }, { "epoch": 1.5018985739600033, "grad_norm": 1.3465616703033447, "learning_rate": 2.909898751265609e-05, "loss": 1.7007, "step": 8900 }, { "epoch": 1.5103366804489071, "grad_norm": 1.2812334299087524, "learning_rate": 2.909392507593655e-05, "loss": 1.692, "step": 8950 }, { "epoch": 1.5187747869378112, "grad_norm": 1.2780709266662598, "learning_rate": 2.908886263921701e-05, "loss": 1.7178, "step": 9000 }, { "epoch": 1.527212893426715, "grad_norm": 2.6635632514953613, "learning_rate": 2.908380020249747e-05, "loss": 1.7218, "step": 9050 }, { "epoch": 1.535650999915619, "grad_norm": 1.440506935119629, "learning_rate": 2.9078737765777928e-05, "loss": 1.7053, "step": 9100 }, { "epoch": 1.5440891064045228, "grad_norm": 1.1698048114776611, "learning_rate": 2.907367532905839e-05, "loss": 1.6817, "step": 9150 }, { "epoch": 1.5525272128934269, "grad_norm": 2.538287401199341, "learning_rate": 2.9068612892338847e-05, "loss": 1.7249, "step": 9200 }, { "epoch": 1.5609653193823307, "grad_norm": 1.577597737312317, "learning_rate": 2.9063550455619308e-05, "loss": 1.7239, "step": 9250 }, { "epoch": 1.5694034258712346, "grad_norm": 1.2279037237167358, "learning_rate": 2.9058488018899765e-05, "loss": 1.7013, "step": 9300 }, { "epoch": 1.5778415323601385, "grad_norm": 1.2282285690307617, "learning_rate": 2.9053425582180223e-05, "loss": 1.7367, "step": 9350 }, { "epoch": 1.5862796388490423, "grad_norm": 1.1559499502182007, "learning_rate": 2.904836314546068e-05, "loss": 1.6994, "step": 9400 }, { "epoch": 1.5947177453379462, "grad_norm": 1.1341310739517212, "learning_rate": 2.904330070874114e-05, "loss": 1.6864, "step": 9450 }, { "epoch": 1.60315585182685, "grad_norm": 1.2908226251602173, "learning_rate": 2.90382382720216e-05, "loss": 1.6823, "step": 9500 }, { "epoch": 1.611593958315754, "grad_norm": 1.2425497770309448, "learning_rate": 2.903317583530206e-05, "loss": 1.7256, "step": 9550 }, { "epoch": 1.6200320648046578, "grad_norm": 1.4069569110870361, "learning_rate": 2.902811339858252e-05, "loss": 1.6999, "step": 9600 }, { "epoch": 1.6284701712935616, "grad_norm": 1.2929143905639648, "learning_rate": 2.9023050961862978e-05, "loss": 1.7009, "step": 9650 }, { "epoch": 1.6369082777824655, "grad_norm": 1.2135717868804932, "learning_rate": 2.901798852514344e-05, "loss": 1.7176, "step": 9700 }, { "epoch": 1.6453463842713694, "grad_norm": 1.3558820486068726, "learning_rate": 2.9012926088423896e-05, "loss": 1.6833, "step": 9750 }, { "epoch": 1.6537844907602732, "grad_norm": 1.3430869579315186, "learning_rate": 2.9007863651704357e-05, "loss": 1.7055, "step": 9800 }, { "epoch": 1.6622225972491773, "grad_norm": 1.2483643293380737, "learning_rate": 2.900280121498481e-05, "loss": 1.6712, "step": 9850 }, { "epoch": 1.6706607037380812, "grad_norm": 1.2428581714630127, "learning_rate": 2.8997738778265272e-05, "loss": 1.7177, "step": 9900 }, { "epoch": 1.679098810226985, "grad_norm": 1.265365481376648, "learning_rate": 2.899267634154573e-05, "loss": 1.6931, "step": 9950 }, { "epoch": 1.687536916715889, "grad_norm": 1.2224442958831787, "learning_rate": 2.898761390482619e-05, "loss": 1.7572, "step": 10000 }, { "epoch": 1.695975023204793, "grad_norm": 1.0398801565170288, "learning_rate": 2.8982551468106648e-05, "loss": 1.7232, "step": 10050 }, { "epoch": 1.7044131296936968, "grad_norm": 1.4071274995803833, "learning_rate": 2.897748903138711e-05, "loss": 1.7086, "step": 10100 }, { "epoch": 1.7128512361826007, "grad_norm": 1.2596487998962402, "learning_rate": 2.8972426594667566e-05, "loss": 1.7067, "step": 10150 }, { "epoch": 1.7212893426715046, "grad_norm": 1.2502026557922363, "learning_rate": 2.8967364157948027e-05, "loss": 1.7091, "step": 10200 }, { "epoch": 1.7297274491604084, "grad_norm": 1.3957078456878662, "learning_rate": 2.8962301721228488e-05, "loss": 1.6856, "step": 10250 }, { "epoch": 1.7381655556493123, "grad_norm": 1.3089699745178223, "learning_rate": 2.8957239284508942e-05, "loss": 1.7031, "step": 10300 }, { "epoch": 1.7466036621382162, "grad_norm": 1.8227264881134033, "learning_rate": 2.8952176847789403e-05, "loss": 1.7182, "step": 10350 }, { "epoch": 1.75504176862712, "grad_norm": 1.3877559900283813, "learning_rate": 2.894711441106986e-05, "loss": 1.7356, "step": 10400 }, { "epoch": 1.7634798751160239, "grad_norm": 1.3717120885849, "learning_rate": 2.8942051974350322e-05, "loss": 1.724, "step": 10450 }, { "epoch": 1.7719179816049277, "grad_norm": 1.173403024673462, "learning_rate": 2.893698953763078e-05, "loss": 1.7179, "step": 10500 }, { "epoch": 1.7803560880938316, "grad_norm": 1.3358752727508545, "learning_rate": 2.893192710091124e-05, "loss": 1.7217, "step": 10550 }, { "epoch": 1.7887941945827355, "grad_norm": 1.1707042455673218, "learning_rate": 2.8926864664191698e-05, "loss": 1.6956, "step": 10600 }, { "epoch": 1.7972323010716396, "grad_norm": 1.0948277711868286, "learning_rate": 2.892180222747216e-05, "loss": 1.7153, "step": 10650 }, { "epoch": 1.8056704075605434, "grad_norm": 1.251162052154541, "learning_rate": 2.8916739790752616e-05, "loss": 1.7083, "step": 10700 }, { "epoch": 1.8141085140494473, "grad_norm": 1.2486658096313477, "learning_rate": 2.8911677354033074e-05, "loss": 1.6887, "step": 10750 }, { "epoch": 1.8225466205383511, "grad_norm": 1.1763782501220703, "learning_rate": 2.8906614917313534e-05, "loss": 1.7169, "step": 10800 }, { "epoch": 1.8309847270272552, "grad_norm": 1.2847461700439453, "learning_rate": 2.8901552480593992e-05, "loss": 1.7095, "step": 10850 }, { "epoch": 1.839422833516159, "grad_norm": 1.3423278331756592, "learning_rate": 2.8896490043874453e-05, "loss": 1.7019, "step": 10900 }, { "epoch": 1.847860940005063, "grad_norm": 1.1392067670822144, "learning_rate": 2.889142760715491e-05, "loss": 1.7134, "step": 10950 }, { "epoch": 1.8562990464939668, "grad_norm": 1.2327152490615845, "learning_rate": 2.888636517043537e-05, "loss": 1.7111, "step": 11000 }, { "epoch": 1.8647371529828707, "grad_norm": 1.2953259944915771, "learning_rate": 2.888130273371583e-05, "loss": 1.6891, "step": 11050 }, { "epoch": 1.8731752594717745, "grad_norm": 1.1197056770324707, "learning_rate": 2.887624029699629e-05, "loss": 1.7304, "step": 11100 }, { "epoch": 1.8816133659606784, "grad_norm": 1.2429778575897217, "learning_rate": 2.8871177860276747e-05, "loss": 1.7034, "step": 11150 }, { "epoch": 1.8900514724495823, "grad_norm": 1.2462618350982666, "learning_rate": 2.8866115423557208e-05, "loss": 1.7144, "step": 11200 }, { "epoch": 1.8984895789384861, "grad_norm": 1.3959741592407227, "learning_rate": 2.8861052986837662e-05, "loss": 1.7403, "step": 11250 }, { "epoch": 1.90692768542739, "grad_norm": 1.173663854598999, "learning_rate": 2.8855990550118123e-05, "loss": 1.688, "step": 11300 }, { "epoch": 1.9153657919162939, "grad_norm": 1.3880196809768677, "learning_rate": 2.8850928113398584e-05, "loss": 1.712, "step": 11350 }, { "epoch": 1.9238038984051977, "grad_norm": 1.1173053979873657, "learning_rate": 2.884586567667904e-05, "loss": 1.6835, "step": 11400 }, { "epoch": 1.9322420048941018, "grad_norm": 1.3038270473480225, "learning_rate": 2.8840803239959502e-05, "loss": 1.7065, "step": 11450 }, { "epoch": 1.9406801113830057, "grad_norm": 1.2091000080108643, "learning_rate": 2.883574080323996e-05, "loss": 1.7301, "step": 11500 }, { "epoch": 1.9491182178719095, "grad_norm": 1.5804047584533691, "learning_rate": 2.883067836652042e-05, "loss": 1.7425, "step": 11550 }, { "epoch": 1.9575563243608134, "grad_norm": 1.3017399311065674, "learning_rate": 2.882561592980088e-05, "loss": 1.6849, "step": 11600 }, { "epoch": 1.9659944308497175, "grad_norm": 1.2739709615707397, "learning_rate": 2.882055349308134e-05, "loss": 1.7163, "step": 11650 }, { "epoch": 1.9744325373386213, "grad_norm": 1.9534889459609985, "learning_rate": 2.8815491056361793e-05, "loss": 1.6686, "step": 11700 }, { "epoch": 1.9828706438275252, "grad_norm": 1.0748012065887451, "learning_rate": 2.8810428619642254e-05, "loss": 1.6797, "step": 11750 }, { "epoch": 1.991308750316429, "grad_norm": 1.3093863725662231, "learning_rate": 2.8805366182922712e-05, "loss": 1.6907, "step": 11800 }, { "epoch": 1.999746856805333, "grad_norm": 1.2243276834487915, "learning_rate": 2.8800303746203173e-05, "loss": 1.699, "step": 11850 }, { "epoch": 2.0, "eval_loss": 1.623704195022583, "eval_rouge1": 0.42930090258128883, "eval_rouge2": 0.2188434687019127, "eval_rougeL": 0.3635343246206635, "eval_runtime": 1180.6849, "eval_samples_per_second": 20.075, "eval_steps_per_second": 1.255, "step": 11852 } ], "logging_steps": 50, "max_steps": 296300, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.459019927198925e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }