| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9697529438928654, | |
| "eval_steps": 500, | |
| "global_step": 525, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0018471484645578389, | |
| "grad_norm": 0.2646295130252838, | |
| "learning_rate": 3.636363636363636e-06, | |
| "loss": 0.9117, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009235742322789195, | |
| "grad_norm": 0.29179853200912476, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.9579, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01847148464557839, | |
| "grad_norm": 0.30512726306915283, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 0.9489, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027707226968367582, | |
| "grad_norm": 0.08514489233493805, | |
| "learning_rate": 5.4545454545454546e-05, | |
| "loss": 0.9274, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03694296929115678, | |
| "grad_norm": 0.07719692587852478, | |
| "learning_rate": 7.272727272727273e-05, | |
| "loss": 0.9257, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04617871161394597, | |
| "grad_norm": 0.08602738380432129, | |
| "learning_rate": 9.090909090909092e-05, | |
| "loss": 0.9255, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.055414453936735164, | |
| "grad_norm": 0.08664494752883911, | |
| "learning_rate": 0.00010909090909090909, | |
| "loss": 0.8889, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06465019625952435, | |
| "grad_norm": 0.07322084158658981, | |
| "learning_rate": 0.00012727272727272728, | |
| "loss": 0.8809, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07388593858231356, | |
| "grad_norm": 0.08361256867647171, | |
| "learning_rate": 0.00014545454545454546, | |
| "loss": 0.8901, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08312168090510275, | |
| "grad_norm": 0.0712098553776741, | |
| "learning_rate": 0.00016363636363636366, | |
| "loss": 0.8712, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09235742322789194, | |
| "grad_norm": 0.07038867473602295, | |
| "learning_rate": 0.00018181818181818183, | |
| "loss": 0.8779, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10159316555068114, | |
| "grad_norm": 0.08133558183908463, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8549, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11082890787347033, | |
| "grad_norm": 0.07199383527040482, | |
| "learning_rate": 0.00019994777247895855, | |
| "loss": 0.8655, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12006465019625952, | |
| "grad_norm": 0.08243429660797119, | |
| "learning_rate": 0.00019979114447011323, | |
| "loss": 0.891, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1293003925190487, | |
| "grad_norm": 0.0855724886059761, | |
| "learning_rate": 0.00019953027957931658, | |
| "loss": 0.8675, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1385361348418379, | |
| "grad_norm": 0.0847950428724289, | |
| "learning_rate": 0.00019916545029310012, | |
| "loss": 0.8579, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.1477718771646271, | |
| "grad_norm": 0.07739102840423584, | |
| "learning_rate": 0.00019869703769404828, | |
| "loss": 0.8643, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1570076194874163, | |
| "grad_norm": 0.08714427053928375, | |
| "learning_rate": 0.00019812553106273847, | |
| "loss": 0.8766, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1662433618102055, | |
| "grad_norm": 0.06926970183849335, | |
| "learning_rate": 0.00019745152736666302, | |
| "loss": 0.8539, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1754791041329947, | |
| "grad_norm": 0.08098109066486359, | |
| "learning_rate": 0.0001966757306366662, | |
| "loss": 0.8823, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.18471484645578387, | |
| "grad_norm": 0.09138432890176773, | |
| "learning_rate": 0.0001957989512315489, | |
| "loss": 0.8601, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19395058877857307, | |
| "grad_norm": 0.09067590534687042, | |
| "learning_rate": 0.00019482210499160765, | |
| "loss": 0.8551, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.20318633110136228, | |
| "grad_norm": 0.08557430654764175, | |
| "learning_rate": 0.0001937462122819935, | |
| "loss": 0.8582, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21242207342415145, | |
| "grad_norm": 0.07583803683519363, | |
| "learning_rate": 0.00019257239692688907, | |
| "loss": 0.8468, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.22165781574694066, | |
| "grad_norm": 0.10376883298158646, | |
| "learning_rate": 0.00019130188503561741, | |
| "loss": 0.8722, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23089355806972986, | |
| "grad_norm": 0.07388196140527725, | |
| "learning_rate": 0.00018993600372190932, | |
| "loss": 0.8715, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.24012930039251904, | |
| "grad_norm": 0.07768449187278748, | |
| "learning_rate": 0.00018847617971766577, | |
| "loss": 0.8632, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.24936504271530824, | |
| "grad_norm": 0.07902154326438904, | |
| "learning_rate": 0.00018692393788266479, | |
| "loss": 0.8746, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2586007850380974, | |
| "grad_norm": 0.07420278340578079, | |
| "learning_rate": 0.0001852808996117683, | |
| "loss": 0.8676, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26783652736088664, | |
| "grad_norm": 0.07562129944562912, | |
| "learning_rate": 0.00018354878114129367, | |
| "loss": 0.8829, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2770722696836758, | |
| "grad_norm": 0.07764951884746552, | |
| "learning_rate": 0.00018172939175631808, | |
| "loss": 0.8766, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.286308012006465, | |
| "grad_norm": 0.07714657485485077, | |
| "learning_rate": 0.0001798246319007893, | |
| "loss": 0.8802, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2955437543292542, | |
| "grad_norm": 0.08166080713272095, | |
| "learning_rate": 0.00017783649119241602, | |
| "loss": 0.8504, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3047794966520434, | |
| "grad_norm": 0.07295581698417664, | |
| "learning_rate": 0.0001757670463444118, | |
| "loss": 0.8578, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3140152389748326, | |
| "grad_norm": 0.08463213592767715, | |
| "learning_rate": 0.00017361845899626355, | |
| "loss": 0.8465, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3232509812976218, | |
| "grad_norm": 0.07825674116611481, | |
| "learning_rate": 0.00017139297345578994, | |
| "loss": 0.8581, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.332486723620411, | |
| "grad_norm": 0.08157385140657425, | |
| "learning_rate": 0.0001690929143548488, | |
| "loss": 0.8598, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.34172246594320016, | |
| "grad_norm": 0.08528893440961838, | |
| "learning_rate": 0.00016672068422114196, | |
| "loss": 0.85, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.3509582082659894, | |
| "grad_norm": 0.07343052327632904, | |
| "learning_rate": 0.00016427876096865394, | |
| "loss": 0.8515, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.36019395058877857, | |
| "grad_norm": 0.07675015926361084, | |
| "learning_rate": 0.00016176969530934572, | |
| "loss": 0.8628, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.36942969291156774, | |
| "grad_norm": 0.07422856241464615, | |
| "learning_rate": 0.0001591961080888076, | |
| "loss": 0.866, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.378665435234357, | |
| "grad_norm": 0.0853537917137146, | |
| "learning_rate": 0.00015656068754865387, | |
| "loss": 0.8652, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.38790117755714615, | |
| "grad_norm": 0.07879694551229477, | |
| "learning_rate": 0.0001538661865185188, | |
| "loss": 0.8613, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3971369198799353, | |
| "grad_norm": 0.0811944380402565, | |
| "learning_rate": 0.00015111541954058734, | |
| "loss": 0.8723, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.40637266220272455, | |
| "grad_norm": 0.08038283884525299, | |
| "learning_rate": 0.00014831125992966385, | |
| "loss": 0.8709, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.41560840452551373, | |
| "grad_norm": 0.07797664403915405, | |
| "learning_rate": 0.00014545663677185006, | |
| "loss": 0.8583, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4248441468483029, | |
| "grad_norm": 0.08307761698961258, | |
| "learning_rate": 0.00014255453186496673, | |
| "loss": 0.8467, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.43407988917109214, | |
| "grad_norm": 0.07691530138254166, | |
| "learning_rate": 0.0001396079766039157, | |
| "loss": 0.8435, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.4433156314938813, | |
| "grad_norm": 0.09281527251005173, | |
| "learning_rate": 0.0001366200488142348, | |
| "loss": 0.8605, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4525513738166705, | |
| "grad_norm": 0.07799265533685684, | |
| "learning_rate": 0.00013359386953715421, | |
| "loss": 0.85, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4617871161394597, | |
| "grad_norm": 0.07675463706254959, | |
| "learning_rate": 0.00013053259976951133, | |
| "loss": 0.8434, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4710228584622489, | |
| "grad_norm": 0.09029978513717651, | |
| "learning_rate": 0.00012743943716193016, | |
| "loss": 0.8587, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.48025860078503807, | |
| "grad_norm": 0.07997170835733414, | |
| "learning_rate": 0.00012431761267871417, | |
| "loss": 0.8436, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4894943431078273, | |
| "grad_norm": 0.08595962822437286, | |
| "learning_rate": 0.0001211703872229411, | |
| "loss": 0.8682, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.4987300854306165, | |
| "grad_norm": 0.08949116617441177, | |
| "learning_rate": 0.00011800104823028515, | |
| "loss": 0.8663, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5079658277534057, | |
| "grad_norm": 0.08509814739227295, | |
| "learning_rate": 0.0001148129062351249, | |
| "loss": 0.8359, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5172015700761948, | |
| "grad_norm": 0.08007588982582092, | |
| "learning_rate": 0.00011160929141252303, | |
| "loss": 0.8494, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5264373123989841, | |
| "grad_norm": 0.07660423964262009, | |
| "learning_rate": 0.00010839355009969068, | |
| "loss": 0.8403, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5356730547217733, | |
| "grad_norm": 0.07779201865196228, | |
| "learning_rate": 0.00010516904130056946, | |
| "loss": 0.863, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5449087970445624, | |
| "grad_norm": 0.07425861805677414, | |
| "learning_rate": 0.00010193913317718244, | |
| "loss": 0.8732, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5541445393673516, | |
| "grad_norm": 0.08780544251203537, | |
| "learning_rate": 9.870719953141917e-05, | |
| "loss": 0.856, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5633802816901409, | |
| "grad_norm": 0.07546891272068024, | |
| "learning_rate": 9.547661628092937e-05, | |
| "loss": 0.8418, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.57261602401293, | |
| "grad_norm": 0.08621185272932053, | |
| "learning_rate": 9.225075793280692e-05, | |
| "loss": 0.8463, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5818517663357192, | |
| "grad_norm": 0.07618840038776398, | |
| "learning_rate": 8.903299405874684e-05, | |
| "loss": 0.8257, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5910875086585085, | |
| "grad_norm": 0.07749903202056885, | |
| "learning_rate": 8.582668577535797e-05, | |
| "loss": 0.8442, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6003232509812976, | |
| "grad_norm": 0.08626076579093933, | |
| "learning_rate": 8.263518223330697e-05, | |
| "loss": 0.8739, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6095589933040868, | |
| "grad_norm": 0.07912192493677139, | |
| "learning_rate": 7.94618171189618e-05, | |
| "loss": 0.816, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.618794735626876, | |
| "grad_norm": 0.08324088156223297, | |
| "learning_rate": 7.630990517218808e-05, | |
| "loss": 0.853, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6280304779496652, | |
| "grad_norm": 0.08155480027198792, | |
| "learning_rate": 7.318273872393625e-05, | |
| "loss": 0.86, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6372662202724544, | |
| "grad_norm": 0.10541233420372009, | |
| "learning_rate": 7.008358425723585e-05, | |
| "loss": 0.8674, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6465019625952436, | |
| "grad_norm": 0.08856544643640518, | |
| "learning_rate": 6.701567899518924e-05, | |
| "loss": 0.8542, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 0.0975981131196022, | |
| "learning_rate": 6.398222751952899e-05, | |
| "loss": 0.8441, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.664973447240822, | |
| "grad_norm": 0.0872284546494484, | |
| "learning_rate": 6.098639842327052e-05, | |
| "loss": 0.8661, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6742091895636112, | |
| "grad_norm": 0.08079314976930618, | |
| "learning_rate": 5.80313210009571e-05, | |
| "loss": 0.8465, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6834449318864003, | |
| "grad_norm": 0.08326224982738495, | |
| "learning_rate": 5.5120081979953785e-05, | |
| "loss": 0.8551, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6926806742091896, | |
| "grad_norm": 0.07942940294742584, | |
| "learning_rate": 5.22557222962051e-05, | |
| "loss": 0.8673, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7019164165319788, | |
| "grad_norm": 0.08273901045322418, | |
| "learning_rate": 4.9441233917824106e-05, | |
| "loss": 0.8424, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7111521588547679, | |
| "grad_norm": 0.07538026571273804, | |
| "learning_rate": 4.66795567198309e-05, | |
| "loss": 0.8448, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7203879011775571, | |
| "grad_norm": 0.07920888811349869, | |
| "learning_rate": 4.397357541330476e-05, | |
| "loss": 0.874, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7296236435003464, | |
| "grad_norm": 0.09400610625743866, | |
| "learning_rate": 4.132611653215822e-05, | |
| "loss": 0.8487, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7388593858231355, | |
| "grad_norm": 0.08254476636648178, | |
| "learning_rate": 3.873994548067972e-05, | |
| "loss": 0.836, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7480951281459247, | |
| "grad_norm": 0.08779824525117874, | |
| "learning_rate": 3.621776364492939e-05, | |
| "loss": 0.8621, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.757330870468714, | |
| "grad_norm": 0.08715569227933884, | |
| "learning_rate": 3.376220557100523e-05, | |
| "loss": 0.8413, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7665666127915031, | |
| "grad_norm": 0.08005767315626144, | |
| "learning_rate": 3.137583621312665e-05, | |
| "loss": 0.8563, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7758023551142923, | |
| "grad_norm": 0.08362836390733719, | |
| "learning_rate": 2.906114825441072e-05, | |
| "loss": 0.8431, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7850380974370815, | |
| "grad_norm": 0.08525697141885757, | |
| "learning_rate": 2.6820559503138797e-05, | |
| "loss": 0.8619, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7942738397598706, | |
| "grad_norm": 0.08668297529220581, | |
| "learning_rate": 2.465641036723393e-05, | |
| "loss": 0.8525, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8035095820826599, | |
| "grad_norm": 0.08923713862895966, | |
| "learning_rate": 2.2570961409586754e-05, | |
| "loss": 0.854, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8127453244054491, | |
| "grad_norm": 0.08294524252414703, | |
| "learning_rate": 2.0566390986783646e-05, | |
| "loss": 0.867, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8219810667282382, | |
| "grad_norm": 0.08329101651906967, | |
| "learning_rate": 1.864479297370325e-05, | |
| "loss": 0.8454, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8312168090510275, | |
| "grad_norm": 0.08947557955980301, | |
| "learning_rate": 1.6808174576358848e-05, | |
| "loss": 0.8663, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8404525513738167, | |
| "grad_norm": 0.08388309925794601, | |
| "learning_rate": 1.505845423527027e-05, | |
| "loss": 0.8663, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8496882936966058, | |
| "grad_norm": 0.07886148244142532, | |
| "learning_rate": 1.339745962155613e-05, | |
| "loss": 0.8484, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.858924036019395, | |
| "grad_norm": 0.08021736145019531, | |
| "learning_rate": 1.18269257278392e-05, | |
| "loss": 0.8613, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.8681597783421843, | |
| "grad_norm": 0.0815872773528099, | |
| "learning_rate": 1.0348493055959062e-05, | |
| "loss": 0.8361, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8773955206649734, | |
| "grad_norm": 0.08512595295906067, | |
| "learning_rate": 8.963705903385345e-06, | |
| "loss": 0.8236, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8866312629877626, | |
| "grad_norm": 0.07890893518924713, | |
| "learning_rate": 7.674010750120964e-06, | |
| "loss": 0.8398, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8958670053105519, | |
| "grad_norm": 0.08390273153781891, | |
| "learning_rate": 6.480754747781037e-06, | |
| "loss": 0.8392, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.905102747633341, | |
| "grad_norm": 0.07941864430904388, | |
| "learning_rate": 5.385184312424974e-06, | |
| "loss": 0.8344, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9143384899561302, | |
| "grad_norm": 0.0822635293006897, | |
| "learning_rate": 4.3884438226120424e-06, | |
| "loss": 0.8492, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9235742322789194, | |
| "grad_norm": 0.07574615627527237, | |
| "learning_rate": 3.4915744240403558e-06, | |
| "loss": 0.8499, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9328099746017086, | |
| "grad_norm": 0.08466410636901855, | |
| "learning_rate": 2.6955129420176196e-06, | |
| "loss": 0.848, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9420457169244978, | |
| "grad_norm": 0.0831914022564888, | |
| "learning_rate": 2.0010909028998827e-06, | |
| "loss": 0.8442, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.951281459247287, | |
| "grad_norm": 0.08431433886289597, | |
| "learning_rate": 1.409033665520354e-06, | |
| "loss": 0.8491, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.9605172015700761, | |
| "grad_norm": 0.08757560700178146, | |
| "learning_rate": 9.199596635154683e-07, | |
| "loss": 0.8526, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9697529438928654, | |
| "grad_norm": 0.0839475765824318, | |
| "learning_rate": 5.343797593398536e-07, | |
| "loss": 0.8663, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.9697529438928654, | |
| "step": 525, | |
| "total_flos": 2.6262151886631076e+18, | |
| "train_loss": 0.0, | |
| "train_runtime": 0.0108, | |
| "train_samples_per_second": 9612825.336, | |
| "train_steps_per_second": 37551.544 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 406, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.6262151886631076e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |