| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9999517413335477, |
| "eval_steps": 500, |
| "global_step": 62164, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016086222150727903, |
| "grad_norm": 0.5078127384185791, |
| "learning_rate": 4.959864230101023e-05, |
| "loss": 2.1432, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.032172444301455806, |
| "grad_norm": 0.4508506655693054, |
| "learning_rate": 4.9196480277974395e-05, |
| "loss": 1.9093, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.048258666452183706, |
| "grad_norm": 0.4430558979511261, |
| "learning_rate": 4.879431825493855e-05, |
| "loss": 1.8418, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.06434488860291161, |
| "grad_norm": 0.4775325059890747, |
| "learning_rate": 4.8392156231902713e-05, |
| "loss": 1.7771, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.08043111075363951, |
| "grad_norm": 0.49685001373291016, |
| "learning_rate": 4.7989994208866876e-05, |
| "loss": 1.7226, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.09651733290436741, |
| "grad_norm": 0.5552434325218201, |
| "learning_rate": 4.7587832185831025e-05, |
| "loss": 1.6767, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.11260355505509531, |
| "grad_norm": 0.6779139637947083, |
| "learning_rate": 4.718567016279519e-05, |
| "loss": 1.6588, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.12868977720582322, |
| "grad_norm": 0.5552022457122803, |
| "learning_rate": 4.6783508139759344e-05, |
| "loss": 1.603, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.1447759993565511, |
| "grad_norm": 0.5302042365074158, |
| "learning_rate": 4.638134611672351e-05, |
| "loss": 1.5776, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.16086222150727902, |
| "grad_norm": 0.5810815691947937, |
| "learning_rate": 4.597918409368766e-05, |
| "loss": 1.5333, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.1769484436580069, |
| "grad_norm": 0.5819700956344604, |
| "learning_rate": 4.5577022070651826e-05, |
| "loss": 1.5168, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.19303466580873482, |
| "grad_norm": 0.6134072542190552, |
| "learning_rate": 4.517486004761599e-05, |
| "loss": 1.4748, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.2091208879594627, |
| "grad_norm": 0.5746152400970459, |
| "learning_rate": 4.4772698024580144e-05, |
| "loss": 1.4622, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.22520711011019062, |
| "grad_norm": 0.7663710713386536, |
| "learning_rate": 4.437053600154431e-05, |
| "loss": 1.4767, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.24129333226091854, |
| "grad_norm": 0.7993176579475403, |
| "learning_rate": 4.396837397850846e-05, |
| "loss": 1.4527, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.25737955441164645, |
| "grad_norm": 0.6892676949501038, |
| "learning_rate": 4.3566211955472626e-05, |
| "loss": 1.4325, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2734657765623743, |
| "grad_norm": 0.6928556561470032, |
| "learning_rate": 4.316404993243678e-05, |
| "loss": 1.4038, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.2895519987131022, |
| "grad_norm": 0.7578593492507935, |
| "learning_rate": 4.2761887909400944e-05, |
| "loss": 1.3945, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.30563822086383013, |
| "grad_norm": 0.7504703402519226, |
| "learning_rate": 4.23597258863651e-05, |
| "loss": 1.3644, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.32172444301455805, |
| "grad_norm": 0.8370710611343384, |
| "learning_rate": 4.1957563863329256e-05, |
| "loss": 1.3619, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.3378106651652859, |
| "grad_norm": 0.8501142263412476, |
| "learning_rate": 4.155540184029342e-05, |
| "loss": 1.3448, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.3538968873160138, |
| "grad_norm": 0.9001900553703308, |
| "learning_rate": 4.1153239817257575e-05, |
| "loss": 1.3004, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.36998310946674173, |
| "grad_norm": 1.0658681392669678, |
| "learning_rate": 4.075107779422174e-05, |
| "loss": 1.2789, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.38606933161746965, |
| "grad_norm": 1.1038371324539185, |
| "learning_rate": 4.0348915771185894e-05, |
| "loss": 1.2651, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.40215555376819756, |
| "grad_norm": 1.2004213333129883, |
| "learning_rate": 3.994755807219613e-05, |
| "loss": 1.2216, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.4182417759189254, |
| "grad_norm": 1.235543966293335, |
| "learning_rate": 3.9545396049160286e-05, |
| "loss": 1.1955, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.43432799806965333, |
| "grad_norm": 1.5088828802108765, |
| "learning_rate": 3.914323402612445e-05, |
| "loss": 1.1836, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.45041422022038125, |
| "grad_norm": 1.264153242111206, |
| "learning_rate": 3.8741072003088605e-05, |
| "loss": 1.1658, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.46650044237110916, |
| "grad_norm": 1.3023343086242676, |
| "learning_rate": 3.833971430409884e-05, |
| "loss": 1.1481, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.48258666452183707, |
| "grad_norm": 1.3824670314788818, |
| "learning_rate": 3.7938356605109064e-05, |
| "loss": 1.1221, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.49867288667256493, |
| "grad_norm": 1.4364969730377197, |
| "learning_rate": 3.75369989061193e-05, |
| "loss": 1.1057, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.5147591088232929, |
| "grad_norm": 2.051701545715332, |
| "learning_rate": 3.7134836883083456e-05, |
| "loss": 1.0873, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.5308453309740208, |
| "grad_norm": 1.4329720735549927, |
| "learning_rate": 3.673267486004762e-05, |
| "loss": 1.0607, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.5469315531247486, |
| "grad_norm": 1.4981014728546143, |
| "learning_rate": 3.6330512837011775e-05, |
| "loss": 1.0516, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.5630177752754766, |
| "grad_norm": 1.3012079000473022, |
| "learning_rate": 3.592835081397594e-05, |
| "loss": 1.0317, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.5791039974262044, |
| "grad_norm": 1.401825189590454, |
| "learning_rate": 3.552699311498617e-05, |
| "loss": 1.0183, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.5951902195769324, |
| "grad_norm": 2.0783369541168213, |
| "learning_rate": 3.512483109195033e-05, |
| "loss": 0.9985, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.6112764417276603, |
| "grad_norm": 2.3940794467926025, |
| "learning_rate": 3.4722669068914486e-05, |
| "loss": 0.9698, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.6273626638783881, |
| "grad_norm": 1.4747998714447021, |
| "learning_rate": 3.432050704587865e-05, |
| "loss": 0.9657, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.6434488860291161, |
| "grad_norm": 3.0782012939453125, |
| "learning_rate": 3.391914934688888e-05, |
| "loss": 0.9379, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.659535108179844, |
| "grad_norm": 2.4914307594299316, |
| "learning_rate": 3.3516987323853034e-05, |
| "loss": 0.915, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.6756213303305718, |
| "grad_norm": 2.772120237350464, |
| "learning_rate": 3.3115629624863264e-05, |
| "loss": 0.9047, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.6917075524812998, |
| "grad_norm": 2.519575595855713, |
| "learning_rate": 3.271346760182743e-05, |
| "loss": 0.8688, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.7077937746320276, |
| "grad_norm": 4.085098743438721, |
| "learning_rate": 3.231130557879158e-05, |
| "loss": 0.8581, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.7238799967827556, |
| "grad_norm": 1.4670002460479736, |
| "learning_rate": 3.1909143555755745e-05, |
| "loss": 0.8354, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.7399662189334835, |
| "grad_norm": 2.4749488830566406, |
| "learning_rate": 3.1507785856765975e-05, |
| "loss": 0.8108, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.7560524410842113, |
| "grad_norm": 1.8635029792785645, |
| "learning_rate": 3.110562383373014e-05, |
| "loss": 0.7773, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.7721386632349393, |
| "grad_norm": 3.5713748931884766, |
| "learning_rate": 3.0703461810694294e-05, |
| "loss": 0.756, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.7882248853856672, |
| "grad_norm": 1.8903526067733765, |
| "learning_rate": 3.0301299787658456e-05, |
| "loss": 0.7326, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.8043111075363951, |
| "grad_norm": 8.286703109741211, |
| "learning_rate": 2.9899942088668686e-05, |
| "loss": 0.6948, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.820397329687123, |
| "grad_norm": 2.2209272384643555, |
| "learning_rate": 2.9497780065632845e-05, |
| "loss": 0.6914, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.8364835518378508, |
| "grad_norm": 2.2284536361694336, |
| "learning_rate": 2.9095618042597e-05, |
| "loss": 0.6585, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.8525697739885788, |
| "grad_norm": 3.4615938663482666, |
| "learning_rate": 2.869345601956116e-05, |
| "loss": 0.633, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.8686559961393067, |
| "grad_norm": 3.1158838272094727, |
| "learning_rate": 2.829209832057139e-05, |
| "loss": 0.6181, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.8847422182900346, |
| "grad_norm": 2.3320417404174805, |
| "learning_rate": 2.7889936297535553e-05, |
| "loss": 0.5993, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.9008284404407625, |
| "grad_norm": 1.8331427574157715, |
| "learning_rate": 2.7487774274499712e-05, |
| "loss": 0.5839, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.9169146625914903, |
| "grad_norm": 3.2398369312286377, |
| "learning_rate": 2.708561225146387e-05, |
| "loss": 0.562, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.9330008847422183, |
| "grad_norm": 1.6575061082839966, |
| "learning_rate": 2.66842545524741e-05, |
| "loss": 0.5313, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.9490871068929462, |
| "grad_norm": 2.1604230403900146, |
| "learning_rate": 2.6282092529438264e-05, |
| "loss": 0.5203, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.9651733290436741, |
| "grad_norm": 3.3743808269500732, |
| "learning_rate": 2.5879930506402423e-05, |
| "loss": 0.4938, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.981259551194402, |
| "grad_norm": 3.766514301300049, |
| "learning_rate": 2.5477768483366583e-05, |
| "loss": 0.4724, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.9973457733451299, |
| "grad_norm": 2.26712703704834, |
| "learning_rate": 2.5075606460330742e-05, |
| "loss": 0.4656, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.26554691791534424, |
| "eval_runtime": 1917.4803, |
| "eval_samples_per_second": 345.81, |
| "eval_steps_per_second": 43.227, |
| "step": 31083 |
| }, |
| { |
| "epoch": 1.0134159092737072, |
| "grad_norm": 2.1041958332061768, |
| "learning_rate": 2.467424876134097e-05, |
| "loss": 0.4381, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.029502131424435, |
| "grad_norm": 1.7629106044769287, |
| "learning_rate": 2.427208673830513e-05, |
| "loss": 0.4298, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.0455883535751629, |
| "grad_norm": 2.5032904148101807, |
| "learning_rate": 2.386992471526929e-05, |
| "loss": 0.4188, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.0616745757258907, |
| "grad_norm": 1.6467881202697754, |
| "learning_rate": 2.3467762692233446e-05, |
| "loss": 0.3986, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.0777607978766186, |
| "grad_norm": 1.957220435142517, |
| "learning_rate": 2.3065600669197606e-05, |
| "loss": 0.382, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.0938470200273467, |
| "grad_norm": 1.6566946506500244, |
| "learning_rate": 2.2663438646161765e-05, |
| "loss": 0.3689, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.1099332421780745, |
| "grad_norm": 2.081613540649414, |
| "learning_rate": 2.2261276623125928e-05, |
| "loss": 0.3603, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.1260194643288024, |
| "grad_norm": 2.155226945877075, |
| "learning_rate": 2.1859918924136157e-05, |
| "loss": 0.3478, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.1421056864795303, |
| "grad_norm": 1.9459590911865234, |
| "learning_rate": 2.1457756901100317e-05, |
| "loss": 0.3315, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.1581919086302581, |
| "grad_norm": 2.3381567001342773, |
| "learning_rate": 2.1055594878064476e-05, |
| "loss": 0.3259, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.1742781307809862, |
| "grad_norm": 1.4302254915237427, |
| "learning_rate": 2.0653432855028635e-05, |
| "loss": 0.3168, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.190364352931714, |
| "grad_norm": 1.1770597696304321, |
| "learning_rate": 2.0251270831992795e-05, |
| "loss": 0.3082, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.206450575082442, |
| "grad_norm": 1.7475298643112183, |
| "learning_rate": 1.9849913133003024e-05, |
| "loss": 0.3014, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.2225367972331698, |
| "grad_norm": 1.2397468090057373, |
| "learning_rate": 1.9447751109967187e-05, |
| "loss": 0.288, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.2386230193838976, |
| "grad_norm": 1.6603740453720093, |
| "learning_rate": 1.9045589086931343e-05, |
| "loss": 0.2797, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.2547092415346257, |
| "grad_norm": 1.7009538412094116, |
| "learning_rate": 1.8643427063895502e-05, |
| "loss": 0.275, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.2707954636853536, |
| "grad_norm": 1.4941717386245728, |
| "learning_rate": 1.8241265040859662e-05, |
| "loss": 0.2623, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.2868816858360814, |
| "grad_norm": 1.941115140914917, |
| "learning_rate": 1.7839907341869895e-05, |
| "loss": 0.2572, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.3029679079868093, |
| "grad_norm": 1.487726092338562, |
| "learning_rate": 1.7437745318834054e-05, |
| "loss": 0.2502, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.3190541301375371, |
| "grad_norm": 1.4628674983978271, |
| "learning_rate": 1.7035583295798213e-05, |
| "loss": 0.2437, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.3351403522882652, |
| "grad_norm": 1.401607632637024, |
| "learning_rate": 1.663342127276237e-05, |
| "loss": 0.2421, |
| "step": 41500 |
| }, |
| { |
| "epoch": 1.351226574438993, |
| "grad_norm": 1.1497563123703003, |
| "learning_rate": 1.623125924972653e-05, |
| "loss": 0.231, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1.367312796589721, |
| "grad_norm": 1.322836995124817, |
| "learning_rate": 1.5829097226690688e-05, |
| "loss": 0.2261, |
| "step": 42500 |
| }, |
| { |
| "epoch": 1.3833990187404488, |
| "grad_norm": 1.5328525304794312, |
| "learning_rate": 1.542773952770092e-05, |
| "loss": 0.2177, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1.3994852408911767, |
| "grad_norm": 1.7748241424560547, |
| "learning_rate": 1.502557750466508e-05, |
| "loss": 0.2186, |
| "step": 43500 |
| }, |
| { |
| "epoch": 1.4155714630419047, |
| "grad_norm": 1.6542141437530518, |
| "learning_rate": 1.4623415481629241e-05, |
| "loss": 0.2138, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.4316576851926326, |
| "grad_norm": 1.3098843097686768, |
| "learning_rate": 1.4221253458593397e-05, |
| "loss": 0.211, |
| "step": 44500 |
| }, |
| { |
| "epoch": 1.4477439073433604, |
| "grad_norm": 1.345651626586914, |
| "learning_rate": 1.3819091435557557e-05, |
| "loss": 0.2027, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1.4638301294940883, |
| "grad_norm": 1.4520297050476074, |
| "learning_rate": 1.3416929412521718e-05, |
| "loss": 0.2039, |
| "step": 45500 |
| }, |
| { |
| "epoch": 1.4799163516448162, |
| "grad_norm": 1.5913499593734741, |
| "learning_rate": 1.3014767389485877e-05, |
| "loss": 0.1939, |
| "step": 46000 |
| }, |
| { |
| "epoch": 1.4960025737955442, |
| "grad_norm": 1.1803226470947266, |
| "learning_rate": 1.2612605366450037e-05, |
| "loss": 0.1887, |
| "step": 46500 |
| }, |
| { |
| "epoch": 1.5120887959462719, |
| "grad_norm": 1.1462236642837524, |
| "learning_rate": 1.2210443343414194e-05, |
| "loss": 0.1883, |
| "step": 47000 |
| }, |
| { |
| "epoch": 1.528175018097, |
| "grad_norm": 0.8483968377113342, |
| "learning_rate": 1.1808281320378355e-05, |
| "loss": 0.1809, |
| "step": 47500 |
| }, |
| { |
| "epoch": 1.5442612402477278, |
| "grad_norm": 1.1205823421478271, |
| "learning_rate": 1.1406119297342515e-05, |
| "loss": 0.1813, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.5603474623984557, |
| "grad_norm": 1.417622447013855, |
| "learning_rate": 1.1003957274306672e-05, |
| "loss": 0.1788, |
| "step": 48500 |
| }, |
| { |
| "epoch": 1.5764336845491838, |
| "grad_norm": 1.179103970527649, |
| "learning_rate": 1.0602599575316904e-05, |
| "loss": 0.1809, |
| "step": 49000 |
| }, |
| { |
| "epoch": 1.5925199066999114, |
| "grad_norm": 1.1092889308929443, |
| "learning_rate": 1.0200437552281065e-05, |
| "loss": 0.1734, |
| "step": 49500 |
| }, |
| { |
| "epoch": 1.6086061288506395, |
| "grad_norm": 1.0196574926376343, |
| "learning_rate": 9.798275529245222e-06, |
| "loss": 0.1688, |
| "step": 50000 |
| }, |
| { |
| "epoch": 1.6246923510013673, |
| "grad_norm": 1.1376862525939941, |
| "learning_rate": 9.396113506209382e-06, |
| "loss": 0.1703, |
| "step": 50500 |
| }, |
| { |
| "epoch": 1.6407785731520952, |
| "grad_norm": 0.8885149955749512, |
| "learning_rate": 8.995560131265685e-06, |
| "loss": 0.1691, |
| "step": 51000 |
| }, |
| { |
| "epoch": 1.6568647953028233, |
| "grad_norm": 1.2574944496154785, |
| "learning_rate": 8.593398108229844e-06, |
| "loss": 0.1615, |
| "step": 51500 |
| }, |
| { |
| "epoch": 1.672951017453551, |
| "grad_norm": 1.2620723247528076, |
| "learning_rate": 8.191236085194004e-06, |
| "loss": 0.1593, |
| "step": 52000 |
| }, |
| { |
| "epoch": 1.689037239604279, |
| "grad_norm": 1.551480770111084, |
| "learning_rate": 7.789074062158163e-06, |
| "loss": 0.1639, |
| "step": 52500 |
| }, |
| { |
| "epoch": 1.7051234617550068, |
| "grad_norm": 1.5938962697982788, |
| "learning_rate": 7.386912039122322e-06, |
| "loss": 0.1587, |
| "step": 53000 |
| }, |
| { |
| "epoch": 1.7212096839057347, |
| "grad_norm": 1.0503953695297241, |
| "learning_rate": 6.984750016086482e-06, |
| "loss": 0.1599, |
| "step": 53500 |
| }, |
| { |
| "epoch": 1.7372959060564628, |
| "grad_norm": 1.1205036640167236, |
| "learning_rate": 6.583392317096712e-06, |
| "loss": 0.1541, |
| "step": 54000 |
| }, |
| { |
| "epoch": 1.7533821282071904, |
| "grad_norm": 0.7524433732032776, |
| "learning_rate": 6.181230294060872e-06, |
| "loss": 0.1521, |
| "step": 54500 |
| }, |
| { |
| "epoch": 1.7694683503579185, |
| "grad_norm": 0.9619775414466858, |
| "learning_rate": 5.779068271025031e-06, |
| "loss": 0.1521, |
| "step": 55000 |
| }, |
| { |
| "epoch": 1.7855545725086464, |
| "grad_norm": 0.9406844973564148, |
| "learning_rate": 5.37690624798919e-06, |
| "loss": 0.1509, |
| "step": 55500 |
| }, |
| { |
| "epoch": 1.8016407946593742, |
| "grad_norm": 0.9363726377487183, |
| "learning_rate": 4.975548548999421e-06, |
| "loss": 0.1513, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.8177270168101023, |
| "grad_norm": 0.9941402673721313, |
| "learning_rate": 4.573386525963581e-06, |
| "loss": 0.1484, |
| "step": 56500 |
| }, |
| { |
| "epoch": 1.83381323896083, |
| "grad_norm": 1.3756345510482788, |
| "learning_rate": 4.17122450292774e-06, |
| "loss": 0.1509, |
| "step": 57000 |
| }, |
| { |
| "epoch": 1.849899461111558, |
| "grad_norm": 1.0644595623016357, |
| "learning_rate": 3.7690624798918986e-06, |
| "loss": 0.1486, |
| "step": 57500 |
| }, |
| { |
| "epoch": 1.8659856832622859, |
| "grad_norm": 1.070890188217163, |
| "learning_rate": 3.3669004568560584e-06, |
| "loss": 0.1462, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.8820719054130137, |
| "grad_norm": 1.3034768104553223, |
| "learning_rate": 2.9647384338202173e-06, |
| "loss": 0.1481, |
| "step": 58500 |
| }, |
| { |
| "epoch": 1.8981581275637418, |
| "grad_norm": 1.127517580986023, |
| "learning_rate": 2.5625764107843767e-06, |
| "loss": 0.1451, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.9142443497144694, |
| "grad_norm": 0.9431403279304504, |
| "learning_rate": 2.1604143877485364e-06, |
| "loss": 0.1458, |
| "step": 59500 |
| }, |
| { |
| "epoch": 1.9303305718651975, |
| "grad_norm": 1.271483302116394, |
| "learning_rate": 1.7590566887587673e-06, |
| "loss": 0.1463, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.9464167940159254, |
| "grad_norm": 0.7327952980995178, |
| "learning_rate": 1.3568946657229264e-06, |
| "loss": 0.1434, |
| "step": 60500 |
| }, |
| { |
| "epoch": 1.9625030161666532, |
| "grad_norm": 1.0670543909072876, |
| "learning_rate": 9.547326426870858e-07, |
| "loss": 0.1424, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.9785892383173813, |
| "grad_norm": 1.2705425024032593, |
| "learning_rate": 5.525706196512451e-07, |
| "loss": 0.1431, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.994675460468109, |
| "grad_norm": 0.9267213344573975, |
| "learning_rate": 1.5040859661540443e-07, |
| "loss": 0.1418, |
| "step": 62000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 62164, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3461207231391334e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|