Qwen3-4B-Instruct-LNS-Science-ES / trainer_state.json
toroe's picture
Upload folder using huggingface_hub
a41af47 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7220420251022422,
"eval_steps": 500,
"global_step": 1280,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0028204766605556338,
"grad_norm": 12.768223762512207,
"learning_rate": 1.8691588785046728e-06,
"loss": 1.538864517211914,
"mean_token_accuracy": 0.6813269466161728,
"num_tokens": 5217337.0,
"step": 5
},
{
"epoch": 0.0056409533211112676,
"grad_norm": 3.0813796520233154,
"learning_rate": 4.205607476635514e-06,
"loss": 1.3301036834716797,
"mean_token_accuracy": 0.6994958072900772,
"num_tokens": 10428364.0,
"step": 10
},
{
"epoch": 0.008461429981666902,
"grad_norm": 3.0398967266082764,
"learning_rate": 6.542056074766355e-06,
"loss": 1.0892114639282227,
"mean_token_accuracy": 0.7210752069950104,
"num_tokens": 15653051.0,
"step": 15
},
{
"epoch": 0.011281906642222535,
"grad_norm": 1.0566803216934204,
"learning_rate": 8.878504672897196e-06,
"loss": 1.0293330192565917,
"mean_token_accuracy": 0.726907679438591,
"num_tokens": 20879685.0,
"step": 20
},
{
"epoch": 0.01410238330277817,
"grad_norm": 0.6501312255859375,
"learning_rate": 1.1214953271028037e-05,
"loss": 0.980504322052002,
"mean_token_accuracy": 0.7339713454246521,
"num_tokens": 26070702.0,
"step": 25
},
{
"epoch": 0.016922859963333804,
"grad_norm": 0.5271162390708923,
"learning_rate": 1.3551401869158877e-05,
"loss": 0.9426581382751464,
"mean_token_accuracy": 0.7401848375797272,
"num_tokens": 31272328.0,
"step": 30
},
{
"epoch": 0.019743336623889437,
"grad_norm": 0.46021828055381775,
"learning_rate": 1.588785046728972e-05,
"loss": 0.9113862991333008,
"mean_token_accuracy": 0.7477431446313858,
"num_tokens": 36488067.0,
"step": 35
},
{
"epoch": 0.02256381328444507,
"grad_norm": 0.3989814817905426,
"learning_rate": 1.822429906542056e-05,
"loss": 0.9023752212524414,
"mean_token_accuracy": 0.748314619064331,
"num_tokens": 41717937.0,
"step": 40
},
{
"epoch": 0.025384289945000704,
"grad_norm": 0.3570704460144043,
"learning_rate": 2.05607476635514e-05,
"loss": 0.8852872848510742,
"mean_token_accuracy": 0.7515153616666794,
"num_tokens": 46942356.0,
"step": 45
},
{
"epoch": 0.02820476660555634,
"grad_norm": 0.3649837076663971,
"learning_rate": 2.2897196261682244e-05,
"loss": 0.8809089660644531,
"mean_token_accuracy": 0.752334040403366,
"num_tokens": 52171635.0,
"step": 50
},
{
"epoch": 0.031025243266111974,
"grad_norm": 0.3402465879917145,
"learning_rate": 2.5233644859813084e-05,
"loss": 0.8583005905151367,
"mean_token_accuracy": 0.7575921297073365,
"num_tokens": 57376988.0,
"step": 55
},
{
"epoch": 0.03384571992666761,
"grad_norm": 0.3836044669151306,
"learning_rate": 2.7570093457943924e-05,
"loss": 0.8582182884216308,
"mean_token_accuracy": 0.7565034329891205,
"num_tokens": 62588530.0,
"step": 60
},
{
"epoch": 0.03666619658722324,
"grad_norm": 0.5010499954223633,
"learning_rate": 2.9906542056074764e-05,
"loss": 0.8590673446655274,
"mean_token_accuracy": 0.7561922013759613,
"num_tokens": 67790875.0,
"step": 65
},
{
"epoch": 0.039486673247778874,
"grad_norm": 0.46556833386421204,
"learning_rate": 3.224299065420561e-05,
"loss": 0.8575173377990722,
"mean_token_accuracy": 0.756476667523384,
"num_tokens": 72993985.0,
"step": 70
},
{
"epoch": 0.04230714990833451,
"grad_norm": 0.5273818373680115,
"learning_rate": 3.457943925233645e-05,
"loss": 0.8381841659545899,
"mean_token_accuracy": 0.7609980225563049,
"num_tokens": 78201847.0,
"step": 75
},
{
"epoch": 0.04512762656889014,
"grad_norm": 0.3794383704662323,
"learning_rate": 3.691588785046729e-05,
"loss": 0.8465839385986328,
"mean_token_accuracy": 0.7580922782421112,
"num_tokens": 83386756.0,
"step": 80
},
{
"epoch": 0.047948103229445774,
"grad_norm": 0.436827689409256,
"learning_rate": 3.925233644859813e-05,
"loss": 0.8467378616333008,
"mean_token_accuracy": 0.7579659283161163,
"num_tokens": 88612512.0,
"step": 85
},
{
"epoch": 0.05076857989000141,
"grad_norm": 0.5298004746437073,
"learning_rate": 4.1588785046728974e-05,
"loss": 0.8391071319580078,
"mean_token_accuracy": 0.7598621785640717,
"num_tokens": 93826207.0,
"step": 90
},
{
"epoch": 0.05358905655055705,
"grad_norm": 0.7562506198883057,
"learning_rate": 4.392523364485982e-05,
"loss": 0.8302077293395996,
"mean_token_accuracy": 0.7620529294013977,
"num_tokens": 99052664.0,
"step": 95
},
{
"epoch": 0.05640953321111268,
"grad_norm": 0.7646096348762512,
"learning_rate": 4.6261682242990654e-05,
"loss": 0.8623744964599609,
"mean_token_accuracy": 0.7532553136348724,
"num_tokens": 104260632.0,
"step": 100
},
{
"epoch": 0.059230009871668314,
"grad_norm": 0.758683979511261,
"learning_rate": 4.85981308411215e-05,
"loss": 0.8187331199645996,
"mean_token_accuracy": 0.764651182293892,
"num_tokens": 109476727.0,
"step": 105
},
{
"epoch": 0.06205048653222395,
"grad_norm": 0.7841841578483582,
"learning_rate": 4.9999822205123904e-05,
"loss": 0.8433048248291015,
"mean_token_accuracy": 0.7578113496303558,
"num_tokens": 114695617.0,
"step": 110
},
{
"epoch": 0.06487096319277957,
"grad_norm": 0.41629040241241455,
"learning_rate": 4.999782204181027e-05,
"loss": 0.8195880889892578,
"mean_token_accuracy": 0.7641658455133438,
"num_tokens": 119922657.0,
"step": 115
},
{
"epoch": 0.06769143985333521,
"grad_norm": 0.5381537079811096,
"learning_rate": 4.999359964998888e-05,
"loss": 0.8174989700317383,
"mean_token_accuracy": 0.7642888396978378,
"num_tokens": 125118655.0,
"step": 120
},
{
"epoch": 0.07051191651389085,
"grad_norm": 0.5138171315193176,
"learning_rate": 4.99871554050172e-05,
"loss": 0.8351571083068847,
"mean_token_accuracy": 0.7588716298341751,
"num_tokens": 130323342.0,
"step": 125
},
{
"epoch": 0.07333239317444648,
"grad_norm": 0.41433724761009216,
"learning_rate": 4.997848987976854e-05,
"loss": 0.8338793754577637,
"mean_token_accuracy": 0.7594460546970367,
"num_tokens": 135520292.0,
"step": 130
},
{
"epoch": 0.07615286983500212,
"grad_norm": 0.45325490832328796,
"learning_rate": 4.99676038445811e-05,
"loss": 0.8269057273864746,
"mean_token_accuracy": 0.7616348803043366,
"num_tokens": 140739854.0,
"step": 135
},
{
"epoch": 0.07897334649555775,
"grad_norm": 0.5383926033973694,
"learning_rate": 4.995449826718951e-05,
"loss": 0.8304360389709473,
"mean_token_accuracy": 0.7600398540496827,
"num_tokens": 145944767.0,
"step": 140
},
{
"epoch": 0.08179382315611339,
"grad_norm": 0.37939369678497314,
"learning_rate": 4.993917431263875e-05,
"loss": 0.8136066436767578,
"mean_token_accuracy": 0.764724999666214,
"num_tokens": 151174397.0,
"step": 145
},
{
"epoch": 0.08461429981666901,
"grad_norm": 0.48885253071784973,
"learning_rate": 4.9921633343180654e-05,
"loss": 0.8315029144287109,
"mean_token_accuracy": 0.7593274593353272,
"num_tokens": 156395789.0,
"step": 150
},
{
"epoch": 0.08743477647722465,
"grad_norm": 0.552020788192749,
"learning_rate": 4.9901876918152766e-05,
"loss": 0.8077513694763183,
"mean_token_accuracy": 0.7656220257282257,
"num_tokens": 161596039.0,
"step": 155
},
{
"epoch": 0.09025525313778028,
"grad_norm": 0.4785487949848175,
"learning_rate": 4.9879906793839725e-05,
"loss": 0.8288763046264649,
"mean_token_accuracy": 0.7598470091819763,
"num_tokens": 166823527.0,
"step": 160
},
{
"epoch": 0.09307572979833592,
"grad_norm": 0.5288258194923401,
"learning_rate": 4.985572492331715e-05,
"loss": 0.8152419090270996,
"mean_token_accuracy": 0.7636227786540986,
"num_tokens": 172045905.0,
"step": 165
},
{
"epoch": 0.09589620645889155,
"grad_norm": 0.39211970567703247,
"learning_rate": 4.9829333456277985e-05,
"loss": 0.8113995552062988,
"mean_token_accuracy": 0.7643435508012771,
"num_tokens": 177218911.0,
"step": 170
},
{
"epoch": 0.09871668311944719,
"grad_norm": 0.39555636048316956,
"learning_rate": 4.980073473884145e-05,
"loss": 0.8115266799926758,
"mean_token_accuracy": 0.7643254607915878,
"num_tokens": 182443958.0,
"step": 175
},
{
"epoch": 0.10153715978000281,
"grad_norm": 0.4641428589820862,
"learning_rate": 4.976993131334443e-05,
"loss": 0.8158926010131836,
"mean_token_accuracy": 0.7630140006542205,
"num_tokens": 187669516.0,
"step": 180
},
{
"epoch": 0.10435763644055845,
"grad_norm": 0.4861908555030823,
"learning_rate": 4.973692591811549e-05,
"loss": 0.8149589538574219,
"mean_token_accuracy": 0.7634222060441971,
"num_tokens": 192876653.0,
"step": 185
},
{
"epoch": 0.1071781131011141,
"grad_norm": 0.4445413649082184,
"learning_rate": 4.970172148723146e-05,
"loss": 0.813404655456543,
"mean_token_accuracy": 0.762807947397232,
"num_tokens": 198051477.0,
"step": 190
},
{
"epoch": 0.10999858976166972,
"grad_norm": 0.34663382172584534,
"learning_rate": 4.966432115025658e-05,
"loss": 0.8136863708496094,
"mean_token_accuracy": 0.7636684775352478,
"num_tokens": 203269094.0,
"step": 195
},
{
"epoch": 0.11281906642222536,
"grad_norm": 0.45725521445274353,
"learning_rate": 4.9624728231964285e-05,
"loss": 0.798128318786621,
"mean_token_accuracy": 0.7675120651721954,
"num_tokens": 208457917.0,
"step": 200
},
{
"epoch": 0.11563954308278099,
"grad_norm": 0.38955095410346985,
"learning_rate": 4.958294625204168e-05,
"loss": 0.7989690780639649,
"mean_token_accuracy": 0.767362242937088,
"num_tokens": 213637288.0,
"step": 205
},
{
"epoch": 0.11846001974333663,
"grad_norm": 0.3952544629573822,
"learning_rate": 4.9538978924776634e-05,
"loss": 0.8036691665649414,
"mean_token_accuracy": 0.7659778416156768,
"num_tokens": 218849412.0,
"step": 210
},
{
"epoch": 0.12128049640389225,
"grad_norm": 0.4218426048755646,
"learning_rate": 4.949283015872757e-05,
"loss": 0.7949204921722413,
"mean_token_accuracy": 0.7678728461265564,
"num_tokens": 224068354.0,
"step": 215
},
{
"epoch": 0.1241009730644479,
"grad_norm": 0.36846643686294556,
"learning_rate": 4.944450405637602e-05,
"loss": 0.8046231269836426,
"mean_token_accuracy": 0.7653253704309464,
"num_tokens": 229284008.0,
"step": 220
},
{
"epoch": 0.12692144972500352,
"grad_norm": 0.43659818172454834,
"learning_rate": 4.939400491376195e-05,
"loss": 0.7970304965972901,
"mean_token_accuracy": 0.7672561138868332,
"num_tokens": 234509307.0,
"step": 225
},
{
"epoch": 0.12974192638555915,
"grad_norm": 0.5250388979911804,
"learning_rate": 4.934133722010183e-05,
"loss": 0.806855583190918,
"mean_token_accuracy": 0.7648405969142914,
"num_tokens": 239715541.0,
"step": 230
},
{
"epoch": 0.1325624030461148,
"grad_norm": 0.36886003613471985,
"learning_rate": 4.928650565738955e-05,
"loss": 0.7958408832550049,
"mean_token_accuracy": 0.7676242113113403,
"num_tokens": 244915777.0,
"step": 235
},
{
"epoch": 0.13538287970667043,
"grad_norm": 0.3311154544353485,
"learning_rate": 4.922951509998023e-05,
"loss": 0.8041030883789062,
"mean_token_accuracy": 0.7651675373315812,
"num_tokens": 250143741.0,
"step": 240
},
{
"epoch": 0.13820335636722605,
"grad_norm": 0.35112565755844116,
"learning_rate": 4.9170370614156896e-05,
"loss": 0.7944831848144531,
"mean_token_accuracy": 0.7675338089466095,
"num_tokens": 255364058.0,
"step": 245
},
{
"epoch": 0.1410238330277817,
"grad_norm": 0.35654959082603455,
"learning_rate": 4.910907745768009e-05,
"loss": 0.7977266311645508,
"mean_token_accuracy": 0.7664741456508637,
"num_tokens": 260587004.0,
"step": 250
},
{
"epoch": 0.14384430968833734,
"grad_norm": 0.3724896311759949,
"learning_rate": 4.9045641079320484e-05,
"loss": 0.7852715492248535,
"mean_token_accuracy": 0.7702670186758042,
"num_tokens": 265753862.0,
"step": 255
},
{
"epoch": 0.14666478634889296,
"grad_norm": 0.43565183877944946,
"learning_rate": 4.898006711837449e-05,
"loss": 0.800434684753418,
"mean_token_accuracy": 0.7659019708633423,
"num_tokens": 270958142.0,
"step": 260
},
{
"epoch": 0.1494852630094486,
"grad_norm": 0.43222641944885254,
"learning_rate": 4.8912361404162987e-05,
"loss": 0.7894124984741211,
"mean_token_accuracy": 0.7686454892158509,
"num_tokens": 276173659.0,
"step": 265
},
{
"epoch": 0.15230573967000424,
"grad_norm": 0.37449970841407776,
"learning_rate": 4.884252995551305e-05,
"loss": 0.777103042602539,
"mean_token_accuracy": 0.7725787729024887,
"num_tokens": 281389672.0,
"step": 270
},
{
"epoch": 0.15512621633055987,
"grad_norm": 0.38641855120658875,
"learning_rate": 4.877057898022291e-05,
"loss": 0.7899458408355713,
"mean_token_accuracy": 0.7684644788503647,
"num_tokens": 286579538.0,
"step": 275
},
{
"epoch": 0.1579466929911155,
"grad_norm": 0.42743316292762756,
"learning_rate": 4.8696514874510156e-05,
"loss": 0.7985178470611572,
"mean_token_accuracy": 0.7661768645048141,
"num_tokens": 291773512.0,
"step": 280
},
{
"epoch": 0.16076716965167112,
"grad_norm": 0.4252716302871704,
"learning_rate": 4.862034422244305e-05,
"loss": 0.7951089859008789,
"mean_token_accuracy": 0.7667285829782486,
"num_tokens": 296940105.0,
"step": 285
},
{
"epoch": 0.16358764631222678,
"grad_norm": 0.35900887846946716,
"learning_rate": 4.8542073795355294e-05,
"loss": 0.7855204582214356,
"mean_token_accuracy": 0.7693159490823746,
"num_tokens": 302158270.0,
"step": 290
},
{
"epoch": 0.1664081229727824,
"grad_norm": 0.691571056842804,
"learning_rate": 4.846171055124401e-05,
"loss": 0.8179656982421875,
"mean_token_accuracy": 0.7643016219139099,
"num_tokens": 307356395.0,
"step": 295
},
{
"epoch": 0.16922859963333803,
"grad_norm": 0.37687426805496216,
"learning_rate": 4.837926163415128e-05,
"loss": 0.795097827911377,
"mean_token_accuracy": 0.7669901877641678,
"num_tokens": 312563111.0,
"step": 300
},
{
"epoch": 0.17204907629389365,
"grad_norm": 0.3831689655780792,
"learning_rate": 4.8294734373528983e-05,
"loss": 0.7758209228515625,
"mean_token_accuracy": 0.7721955150365829,
"num_tokens": 317750361.0,
"step": 305
},
{
"epoch": 0.1748695529544493,
"grad_norm": 0.4321906864643097,
"learning_rate": 4.820813628358727e-05,
"loss": 0.7786943435668945,
"mean_token_accuracy": 0.771857762336731,
"num_tokens": 322952581.0,
"step": 310
},
{
"epoch": 0.17769002961500494,
"grad_norm": 0.3875954747200012,
"learning_rate": 4.811947506262657e-05,
"loss": 0.7797961235046387,
"mean_token_accuracy": 0.7704864501953125,
"num_tokens": 328156016.0,
"step": 315
},
{
"epoch": 0.18051050627556056,
"grad_norm": 0.5029323697090149,
"learning_rate": 4.802875859235325e-05,
"loss": 0.7656207084655762,
"mean_token_accuracy": 0.7749809384346008,
"num_tokens": 333374308.0,
"step": 320
},
{
"epoch": 0.18333098293611622,
"grad_norm": 0.42617279291152954,
"learning_rate": 4.793599493717891e-05,
"loss": 0.7736545085906983,
"mean_token_accuracy": 0.7725418835878373,
"num_tokens": 338564854.0,
"step": 325
},
{
"epoch": 0.18615145959667184,
"grad_norm": 0.3626423180103302,
"learning_rate": 4.784119234350353e-05,
"loss": 0.7666655540466308,
"mean_token_accuracy": 0.7740924268960953,
"num_tokens": 343758549.0,
"step": 330
},
{
"epoch": 0.18897193625722747,
"grad_norm": 0.35442817211151123,
"learning_rate": 4.774435923898235e-05,
"loss": 0.8000862121582031,
"mean_token_accuracy": 0.7652783513069152,
"num_tokens": 348980481.0,
"step": 335
},
{
"epoch": 0.1917924129177831,
"grad_norm": 0.32683518528938293,
"learning_rate": 4.764550423177673e-05,
"loss": 0.7713173389434814,
"mean_token_accuracy": 0.773258313536644,
"num_tokens": 354190860.0,
"step": 340
},
{
"epoch": 0.19461288957833875,
"grad_norm": 0.31471505761146545,
"learning_rate": 4.754463610978886e-05,
"loss": 0.7849064826965332,
"mean_token_accuracy": 0.7690862119197845,
"num_tokens": 359399443.0,
"step": 345
},
{
"epoch": 0.19743336623889438,
"grad_norm": 0.3361181914806366,
"learning_rate": 4.744176383988055e-05,
"loss": 0.7590707302093506,
"mean_token_accuracy": 0.7761936277151108,
"num_tokens": 364594796.0,
"step": 350
},
{
"epoch": 0.20025384289945,
"grad_norm": 0.3417888581752777,
"learning_rate": 4.733689656707615e-05,
"loss": 0.7630936622619628,
"mean_token_accuracy": 0.7746737778186799,
"num_tokens": 369800137.0,
"step": 355
},
{
"epoch": 0.20307431956000563,
"grad_norm": 0.3212883174419403,
"learning_rate": 4.723004361374953e-05,
"loss": 0.7731470108032227,
"mean_token_accuracy": 0.7723031550645828,
"num_tokens": 375005470.0,
"step": 360
},
{
"epoch": 0.20589479622056128,
"grad_norm": 0.3806549310684204,
"learning_rate": 4.7121214478795386e-05,
"loss": 0.7695651054382324,
"mean_token_accuracy": 0.7732091188430786,
"num_tokens": 380204022.0,
"step": 365
},
{
"epoch": 0.2087152728811169,
"grad_norm": 0.3326440751552582,
"learning_rate": 4.7010418836784786e-05,
"loss": 0.7776393890380859,
"mean_token_accuracy": 0.7707630872726441,
"num_tokens": 385430442.0,
"step": 370
},
{
"epoch": 0.21153574954167254,
"grad_norm": 0.33390548825263977,
"learning_rate": 4.689766653710517e-05,
"loss": 0.7794651985168457,
"mean_token_accuracy": 0.7707834959030151,
"num_tokens": 390660313.0,
"step": 375
},
{
"epoch": 0.2143562262022282,
"grad_norm": 0.4141367971897125,
"learning_rate": 4.678296760308474e-05,
"loss": 0.7713262557983398,
"mean_token_accuracy": 0.773048859834671,
"num_tokens": 395866254.0,
"step": 380
},
{
"epoch": 0.21717670286278382,
"grad_norm": 0.4335712492465973,
"learning_rate": 4.666633223110142e-05,
"loss": 0.7711901664733887,
"mean_token_accuracy": 0.7728887468576431,
"num_tokens": 401075996.0,
"step": 385
},
{
"epoch": 0.21999717952333944,
"grad_norm": 0.3701898455619812,
"learning_rate": 4.6547770789676436e-05,
"loss": 0.7711897850036621,
"mean_token_accuracy": 0.7721995264291763,
"num_tokens": 406281030.0,
"step": 390
},
{
"epoch": 0.22281765618389507,
"grad_norm": 0.38962966203689575,
"learning_rate": 4.642729381855262e-05,
"loss": 0.7722814083099365,
"mean_token_accuracy": 0.7722497761249543,
"num_tokens": 411505168.0,
"step": 395
},
{
"epoch": 0.22563813284445072,
"grad_norm": 0.334955632686615,
"learning_rate": 4.630491202775739e-05,
"loss": 0.7845487594604492,
"mean_token_accuracy": 0.768746617436409,
"num_tokens": 416686758.0,
"step": 400
},
{
"epoch": 0.22845860950500635,
"grad_norm": 0.28480038046836853,
"learning_rate": 4.618063629665069e-05,
"loss": 0.7675346374511719,
"mean_token_accuracy": 0.7733120858669281,
"num_tokens": 421911969.0,
"step": 405
},
{
"epoch": 0.23127908616556198,
"grad_norm": 0.32578566670417786,
"learning_rate": 4.605447767295791e-05,
"loss": 0.7733859062194824,
"mean_token_accuracy": 0.7722859561443329,
"num_tokens": 427132233.0,
"step": 410
},
{
"epoch": 0.2340995628261176,
"grad_norm": 0.37755486369132996,
"learning_rate": 4.592644737178769e-05,
"loss": 0.7817283153533936,
"mean_token_accuracy": 0.7701420873403549,
"num_tokens": 432359875.0,
"step": 415
},
{
"epoch": 0.23692003948667326,
"grad_norm": 0.38393500447273254,
"learning_rate": 4.5796556774634955e-05,
"loss": 0.7568459987640381,
"mean_token_accuracy": 0.776387557387352,
"num_tokens": 437582569.0,
"step": 420
},
{
"epoch": 0.23974051614722888,
"grad_norm": 0.3589465320110321,
"learning_rate": 4.5664817428369176e-05,
"loss": 0.7752207756042481,
"mean_token_accuracy": 0.7715168982744217,
"num_tokens": 442808705.0,
"step": 425
},
{
"epoch": 0.2425609928077845,
"grad_norm": 0.34019097685813904,
"learning_rate": 4.553124104420784e-05,
"loss": 0.7655211448669433,
"mean_token_accuracy": 0.774545556306839,
"num_tokens": 448007126.0,
"step": 430
},
{
"epoch": 0.24538146946834016,
"grad_norm": 0.38820672035217285,
"learning_rate": 4.5395839496675404e-05,
"loss": 0.7734439849853516,
"mean_token_accuracy": 0.771916925907135,
"num_tokens": 453196680.0,
"step": 435
},
{
"epoch": 0.2482019461288958,
"grad_norm": 0.38175490498542786,
"learning_rate": 4.525862482254764e-05,
"loss": 0.7811372756958008,
"mean_token_accuracy": 0.7695781767368317,
"num_tokens": 458395676.0,
"step": 440
},
{
"epoch": 0.2510224227894514,
"grad_norm": 0.366558700799942,
"learning_rate": 4.511960921978163e-05,
"loss": 0.7817493438720703,
"mean_token_accuracy": 0.7694737076759338,
"num_tokens": 463597764.0,
"step": 445
},
{
"epoch": 0.25384289945000704,
"grad_norm": 0.32293614745140076,
"learning_rate": 4.4978805046431416e-05,
"loss": 0.7695838928222656,
"mean_token_accuracy": 0.7725834578275681,
"num_tokens": 468808142.0,
"step": 450
},
{
"epoch": 0.25666337611056267,
"grad_norm": 0.35432401299476624,
"learning_rate": 4.483622481954938e-05,
"loss": 0.7757863521575927,
"mean_token_accuracy": 0.7709449827671051,
"num_tokens": 474013365.0,
"step": 455
},
{
"epoch": 0.2594838527711183,
"grad_norm": 0.3135406970977783,
"learning_rate": 4.469188121407353e-05,
"loss": 0.780633544921875,
"mean_token_accuracy": 0.7696676045656204,
"num_tokens": 479237358.0,
"step": 460
},
{
"epoch": 0.262304329431674,
"grad_norm": 0.3338964283466339,
"learning_rate": 4.454578706170075e-05,
"loss": 0.7923246383666992,
"mean_token_accuracy": 0.7662366658449173,
"num_tokens": 484429691.0,
"step": 465
},
{
"epoch": 0.2651248060922296,
"grad_norm": 0.3694622814655304,
"learning_rate": 4.439795534974607e-05,
"loss": 0.770482349395752,
"mean_token_accuracy": 0.7723766535520553,
"num_tokens": 489651036.0,
"step": 470
},
{
"epoch": 0.26794528275278523,
"grad_norm": 0.3651314973831177,
"learning_rate": 4.424839921998819e-05,
"loss": 0.7813654899597168,
"mean_token_accuracy": 0.7697213411331176,
"num_tokens": 494857597.0,
"step": 475
},
{
"epoch": 0.27076575941334086,
"grad_norm": 0.38329413533210754,
"learning_rate": 4.4097131967501124e-05,
"loss": 0.7724425315856933,
"mean_token_accuracy": 0.7712427735328674,
"num_tokens": 500063588.0,
"step": 480
},
{
"epoch": 0.2735862360738965,
"grad_norm": 0.3533983528614044,
"learning_rate": 4.394416703947243e-05,
"loss": 0.760558795928955,
"mean_token_accuracy": 0.7749832183122635,
"num_tokens": 505284578.0,
"step": 485
},
{
"epoch": 0.2764067127344521,
"grad_norm": 0.2940215468406677,
"learning_rate": 4.378951803400768e-05,
"loss": 0.7766805648803711,
"mean_token_accuracy": 0.770464363694191,
"num_tokens": 510489061.0,
"step": 490
},
{
"epoch": 0.27922718939500774,
"grad_norm": 0.3216915726661682,
"learning_rate": 4.3633198698921724e-05,
"loss": 0.7665189743041992,
"mean_token_accuracy": 0.773473608493805,
"num_tokens": 515682890.0,
"step": 495
},
{
"epoch": 0.2820476660555634,
"grad_norm": 0.3060475289821625,
"learning_rate": 4.347522293051648e-05,
"loss": 0.7670203685760498,
"mean_token_accuracy": 0.7731666147708893,
"num_tokens": 520901566.0,
"step": 500
},
{
"epoch": 0.28486814271611904,
"grad_norm": 0.3380279242992401,
"learning_rate": 4.331560477234565e-05,
"loss": 0.7661257266998291,
"mean_token_accuracy": 0.7734242618083954,
"num_tokens": 526113564.0,
"step": 505
},
{
"epoch": 0.28768861937667467,
"grad_norm": 0.3771665692329407,
"learning_rate": 4.315435841396626e-05,
"loss": 0.7718358516693116,
"mean_token_accuracy": 0.7719818025827407,
"num_tokens": 531308013.0,
"step": 510
},
{
"epoch": 0.2905090960372303,
"grad_norm": 0.3177052438259125,
"learning_rate": 4.299149818967726e-05,
"loss": 0.7608656406402587,
"mean_token_accuracy": 0.7754600733518601,
"num_tokens": 536536429.0,
"step": 515
},
{
"epoch": 0.2933295726977859,
"grad_norm": 0.3333839774131775,
"learning_rate": 4.282703857724527e-05,
"loss": 0.7573532104492188,
"mean_token_accuracy": 0.7759276896715164,
"num_tokens": 541757187.0,
"step": 520
},
{
"epoch": 0.29615004935834155,
"grad_norm": 0.3042449355125427,
"learning_rate": 4.2660994196617496e-05,
"loss": 0.7559514999389648,
"mean_token_accuracy": 0.7758087396621705,
"num_tokens": 546980793.0,
"step": 525
},
{
"epoch": 0.2989705260188972,
"grad_norm": 0.3035680651664734,
"learning_rate": 4.249337980862215e-05,
"loss": 0.7580223083496094,
"mean_token_accuracy": 0.7753052890300751,
"num_tokens": 552180186.0,
"step": 530
},
{
"epoch": 0.3017910026794528,
"grad_norm": 0.2809864580631256,
"learning_rate": 4.2324210313656176e-05,
"loss": 0.769060754776001,
"mean_token_accuracy": 0.7719310760498047,
"num_tokens": 557388834.0,
"step": 535
},
{
"epoch": 0.3046114793400085,
"grad_norm": 0.307574987411499,
"learning_rate": 4.215350075036067e-05,
"loss": 0.7691577434539795,
"mean_token_accuracy": 0.7722433179616928,
"num_tokens": 562585314.0,
"step": 540
},
{
"epoch": 0.3074319560005641,
"grad_norm": 0.31300097703933716,
"learning_rate": 4.198126629428406e-05,
"loss": 0.7613607406616211,
"mean_token_accuracy": 0.7739471793174744,
"num_tokens": 567787568.0,
"step": 545
},
{
"epoch": 0.31025243266111974,
"grad_norm": 0.37344154715538025,
"learning_rate": 4.180752225653292e-05,
"loss": 0.783112621307373,
"mean_token_accuracy": 0.768501365184784,
"num_tokens": 572993740.0,
"step": 550
},
{
"epoch": 0.31307290932167536,
"grad_norm": 0.401102751493454,
"learning_rate": 4.1632284082410994e-05,
"loss": 0.7505324363708497,
"mean_token_accuracy": 0.7774714142084121,
"num_tokens": 578210343.0,
"step": 555
},
{
"epoch": 0.315893385982231,
"grad_norm": 0.33030402660369873,
"learning_rate": 4.145556735004606e-05,
"loss": 0.7742326736450196,
"mean_token_accuracy": 0.7704325795173645,
"num_tokens": 583420811.0,
"step": 560
},
{
"epoch": 0.3187138626427866,
"grad_norm": 0.31906676292419434,
"learning_rate": 4.127738776900513e-05,
"loss": 0.7598705291748047,
"mean_token_accuracy": 0.7744769185781479,
"num_tokens": 588629077.0,
"step": 565
},
{
"epoch": 0.32153433930334224,
"grad_norm": 0.3493998646736145,
"learning_rate": 4.109776117889789e-05,
"loss": 0.7727458000183105,
"mean_token_accuracy": 0.7708871066570282,
"num_tokens": 593833594.0,
"step": 570
},
{
"epoch": 0.3243548159638979,
"grad_norm": 0.43525683879852295,
"learning_rate": 4.091670354796866e-05,
"loss": 0.7558047294616699,
"mean_token_accuracy": 0.7755980044603348,
"num_tokens": 599042413.0,
"step": 575
},
{
"epoch": 0.32717529262445355,
"grad_norm": 0.4609135687351227,
"learning_rate": 4.073423097167681e-05,
"loss": 0.7683779239654541,
"mean_token_accuracy": 0.7725673496723175,
"num_tokens": 604263432.0,
"step": 580
},
{
"epoch": 0.3299957692850092,
"grad_norm": 0.4085586965084076,
"learning_rate": 4.055035967126592e-05,
"loss": 0.7682935237884522,
"mean_token_accuracy": 0.7724095970392227,
"num_tokens": 609481905.0,
"step": 585
},
{
"epoch": 0.3328162459455648,
"grad_norm": 0.4387555420398712,
"learning_rate": 4.036510599232183e-05,
"loss": 0.7553305625915527,
"mean_token_accuracy": 0.7757373869419097,
"num_tokens": 614695205.0,
"step": 590
},
{
"epoch": 0.33563672260612043,
"grad_norm": 0.42048364877700806,
"learning_rate": 4.01784864033195e-05,
"loss": 0.7646809577941894,
"mean_token_accuracy": 0.7734672874212265,
"num_tokens": 619906924.0,
"step": 595
},
{
"epoch": 0.33845719926667606,
"grad_norm": 0.3200714886188507,
"learning_rate": 3.999051749415905e-05,
"loss": 0.7549895286560059,
"mean_token_accuracy": 0.7760183870792389,
"num_tokens": 625118843.0,
"step": 600
},
{
"epoch": 0.3412776759272317,
"grad_norm": 0.2978014051914215,
"learning_rate": 3.980121597469096e-05,
"loss": 0.7442358016967774,
"mean_token_accuracy": 0.7792284607887268,
"num_tokens": 630325802.0,
"step": 605
},
{
"epoch": 0.3440981525877873,
"grad_norm": 0.3026927709579468,
"learning_rate": 3.96105986732306e-05,
"loss": 0.7637527465820313,
"mean_token_accuracy": 0.7731468260288239,
"num_tokens": 635526166.0,
"step": 610
},
{
"epoch": 0.346918629248343,
"grad_norm": 0.2822380065917969,
"learning_rate": 3.941868253506227e-05,
"loss": 0.7543724536895752,
"mean_token_accuracy": 0.7762434631586075,
"num_tokens": 640729993.0,
"step": 615
},
{
"epoch": 0.3497391059088986,
"grad_norm": 0.28617018461227417,
"learning_rate": 3.9225484620932805e-05,
"loss": 0.7670584678649902,
"mean_token_accuracy": 0.7725878387689591,
"num_tokens": 645951079.0,
"step": 620
},
{
"epoch": 0.35255958256945424,
"grad_norm": 0.3128873109817505,
"learning_rate": 3.9031022105534945e-05,
"loss": 0.7662755489349365,
"mean_token_accuracy": 0.7721714347600936,
"num_tokens": 651176154.0,
"step": 625
},
{
"epoch": 0.35538005923000987,
"grad_norm": 0.27455100417137146,
"learning_rate": 3.8835312275980516e-05,
"loss": 0.748596477508545,
"mean_token_accuracy": 0.7781710177659988,
"num_tokens": 656395834.0,
"step": 630
},
{
"epoch": 0.3582005358905655,
"grad_norm": 0.33018219470977783,
"learning_rate": 3.8638372530263715e-05,
"loss": 0.7739686489105224,
"mean_token_accuracy": 0.7703105211257935,
"num_tokens": 661600206.0,
"step": 635
},
{
"epoch": 0.3610210125511211,
"grad_norm": 0.3159162402153015,
"learning_rate": 3.844022037571443e-05,
"loss": 0.7523816108703614,
"mean_token_accuracy": 0.7764188557863235,
"num_tokens": 666811188.0,
"step": 640
},
{
"epoch": 0.36384148921167675,
"grad_norm": 0.3154295086860657,
"learning_rate": 3.824087342744195e-05,
"loss": 0.754487133026123,
"mean_token_accuracy": 0.7756473571062088,
"num_tokens": 672029336.0,
"step": 645
},
{
"epoch": 0.36666196587223243,
"grad_norm": 0.2579188644886017,
"learning_rate": 3.804034940676894e-05,
"loss": 0.7692998886108399,
"mean_token_accuracy": 0.7718347430229187,
"num_tokens": 677251262.0,
"step": 650
},
{
"epoch": 0.36948244253278806,
"grad_norm": 0.32802215218544006,
"learning_rate": 3.783866613965622e-05,
"loss": 0.7555614471435547,
"mean_token_accuracy": 0.7755881071090698,
"num_tokens": 682467688.0,
"step": 655
},
{
"epoch": 0.3723029191933437,
"grad_norm": 0.31287023425102234,
"learning_rate": 3.763584155511794e-05,
"loss": 0.7655069351196289,
"mean_token_accuracy": 0.7732323706150055,
"num_tokens": 687666025.0,
"step": 660
},
{
"epoch": 0.3751233958538993,
"grad_norm": 0.294041246175766,
"learning_rate": 3.743189368362784e-05,
"loss": 0.7489017486572266,
"mean_token_accuracy": 0.7772963434457779,
"num_tokens": 692873846.0,
"step": 665
},
{
"epoch": 0.37794387251445494,
"grad_norm": 0.2788609564304352,
"learning_rate": 3.722684065551638e-05,
"loss": 0.750185203552246,
"mean_token_accuracy": 0.776657447218895,
"num_tokens": 698100240.0,
"step": 670
},
{
"epoch": 0.38076434917501056,
"grad_norm": 0.2861092984676361,
"learning_rate": 3.702070069935898e-05,
"loss": 0.745637035369873,
"mean_token_accuracy": 0.7782793074846268,
"num_tokens": 703289295.0,
"step": 675
},
{
"epoch": 0.3835848258355662,
"grad_norm": 0.31763726472854614,
"learning_rate": 3.6813492140355596e-05,
"loss": 0.7542277336120605,
"mean_token_accuracy": 0.775793018937111,
"num_tokens": 708493173.0,
"step": 680
},
{
"epoch": 0.38640530249612187,
"grad_norm": 0.2676416337490082,
"learning_rate": 3.660523339870164e-05,
"loss": 0.7405709266662598,
"mean_token_accuracy": 0.7790880471467971,
"num_tokens": 713701339.0,
"step": 685
},
{
"epoch": 0.3892257791566775,
"grad_norm": 0.2737492322921753,
"learning_rate": 3.639594298795048e-05,
"loss": 0.7666029930114746,
"mean_token_accuracy": 0.7718399643898011,
"num_tokens": 718920064.0,
"step": 690
},
{
"epoch": 0.3920462558172331,
"grad_norm": 0.26979735493659973,
"learning_rate": 3.6185639513367656e-05,
"loss": 0.7548455238342285,
"mean_token_accuracy": 0.7755049705505371,
"num_tokens": 724132875.0,
"step": 695
},
{
"epoch": 0.39486673247778875,
"grad_norm": 0.30082252621650696,
"learning_rate": 3.597434167027695e-05,
"loss": 0.7691206455230712,
"mean_token_accuracy": 0.7716991513967514,
"num_tokens": 729335465.0,
"step": 700
},
{
"epoch": 0.3976872091383444,
"grad_norm": 0.2921012341976166,
"learning_rate": 3.5762068242398393e-05,
"loss": 0.751345443725586,
"mean_token_accuracy": 0.7767131477594376,
"num_tokens": 734543658.0,
"step": 705
},
{
"epoch": 0.4005076857989,
"grad_norm": 0.31780368089675903,
"learning_rate": 3.554883810017844e-05,
"loss": 0.7625522613525391,
"mean_token_accuracy": 0.7733297258615494,
"num_tokens": 739758180.0,
"step": 710
},
{
"epoch": 0.40332816245945563,
"grad_norm": 0.34423163533210754,
"learning_rate": 3.533467019911252e-05,
"loss": 0.7443047046661377,
"mean_token_accuracy": 0.7784169435501098,
"num_tokens": 744978491.0,
"step": 715
},
{
"epoch": 0.40614863912001126,
"grad_norm": 0.3304055333137512,
"learning_rate": 3.5119583578059846e-05,
"loss": 0.7599642753601075,
"mean_token_accuracy": 0.7740559220314026,
"num_tokens": 750173836.0,
"step": 720
},
{
"epoch": 0.40896911578056694,
"grad_norm": 0.30678245425224304,
"learning_rate": 3.490359735755102e-05,
"loss": 0.764622974395752,
"mean_token_accuracy": 0.7724904954433441,
"num_tokens": 755357042.0,
"step": 725
},
{
"epoch": 0.41178959244112256,
"grad_norm": 0.2555997669696808,
"learning_rate": 3.468673073808822e-05,
"loss": 0.7338571548461914,
"mean_token_accuracy": 0.780937722325325,
"num_tokens": 760569986.0,
"step": 730
},
{
"epoch": 0.4146100691016782,
"grad_norm": 0.2800235450267792,
"learning_rate": 3.4469002998438335e-05,
"loss": 0.7382781028747558,
"mean_token_accuracy": 0.7798346370458603,
"num_tokens": 765783266.0,
"step": 735
},
{
"epoch": 0.4174305457622338,
"grad_norm": 0.29841479659080505,
"learning_rate": 3.425043349391918e-05,
"loss": 0.7705670356750488,
"mean_token_accuracy": 0.7709584295749664,
"num_tokens": 771011379.0,
"step": 740
},
{
"epoch": 0.42025102242278944,
"grad_norm": 0.25931602716445923,
"learning_rate": 3.403104165467883e-05,
"loss": 0.748842716217041,
"mean_token_accuracy": 0.7773065626621246,
"num_tokens": 776228741.0,
"step": 745
},
{
"epoch": 0.42307149908334507,
"grad_norm": 0.27186912298202515,
"learning_rate": 3.381084698396835e-05,
"loss": 0.7525691032409668,
"mean_token_accuracy": 0.7757814288139343,
"num_tokens": 781410976.0,
"step": 750
},
{
"epoch": 0.4258919757439007,
"grad_norm": 0.26616016030311584,
"learning_rate": 3.358986905640802e-05,
"loss": 0.7436333656311035,
"mean_token_accuracy": 0.7787700086832047,
"num_tokens": 786617338.0,
"step": 755
},
{
"epoch": 0.4287124524044564,
"grad_norm": 0.2676887810230255,
"learning_rate": 3.336812751624723e-05,
"loss": 0.7410964965820312,
"mean_token_accuracy": 0.7793578028678894,
"num_tokens": 791825315.0,
"step": 760
},
{
"epoch": 0.431532929065012,
"grad_norm": 0.2672156095504761,
"learning_rate": 3.314564207561816e-05,
"loss": 0.7534364700317383,
"mean_token_accuracy": 0.7755870819091797,
"num_tokens": 797048164.0,
"step": 765
},
{
"epoch": 0.43435340572556763,
"grad_norm": 0.2551250457763672,
"learning_rate": 3.2922432512783395e-05,
"loss": 0.7435198783874511,
"mean_token_accuracy": 0.7779635220766068,
"num_tokens": 802233191.0,
"step": 770
},
{
"epoch": 0.43717388238612326,
"grad_norm": 0.3182034492492676,
"learning_rate": 3.269851867037774e-05,
"loss": 0.7505601406097412,
"mean_token_accuracy": 0.7762654155492783,
"num_tokens": 807443805.0,
"step": 775
},
{
"epoch": 0.4399943590466789,
"grad_norm": 0.29518380761146545,
"learning_rate": 3.247392045364426e-05,
"loss": 0.7480457782745361,
"mean_token_accuracy": 0.7769211769104004,
"num_tokens": 812656912.0,
"step": 780
},
{
"epoch": 0.4428148357072345,
"grad_norm": 0.2449672371149063,
"learning_rate": 3.224865782866478e-05,
"loss": 0.7489072799682617,
"mean_token_accuracy": 0.7767190963029862,
"num_tokens": 817867809.0,
"step": 785
},
{
"epoch": 0.44563531236779014,
"grad_norm": 0.28610652685165405,
"learning_rate": 3.202275082058492e-05,
"loss": 0.7520014762878418,
"mean_token_accuracy": 0.7751724421977997,
"num_tokens": 823079833.0,
"step": 790
},
{
"epoch": 0.44845578902834576,
"grad_norm": 0.27258017659187317,
"learning_rate": 3.179621951183397e-05,
"loss": 0.7486692428588867,
"mean_token_accuracy": 0.7766971349716186,
"num_tokens": 828303959.0,
"step": 795
},
{
"epoch": 0.45127626568890145,
"grad_norm": 0.26820626854896545,
"learning_rate": 3.156908404033961e-05,
"loss": 0.7468665599822998,
"mean_token_accuracy": 0.7771286904811859,
"num_tokens": 833510973.0,
"step": 800
},
{
"epoch": 0.45409674234945707,
"grad_norm": 0.28003305196762085,
"learning_rate": 3.1341364597737686e-05,
"loss": 0.7600772380828857,
"mean_token_accuracy": 0.7737066566944122,
"num_tokens": 838698396.0,
"step": 805
},
{
"epoch": 0.4569172190100127,
"grad_norm": 0.29965445399284363,
"learning_rate": 3.111308142757728e-05,
"loss": 0.7347710132598877,
"mean_token_accuracy": 0.7805228114128113,
"num_tokens": 843923030.0,
"step": 810
},
{
"epoch": 0.4597376956705683,
"grad_norm": 0.2517057955265045,
"learning_rate": 3.088425482352107e-05,
"loss": 0.7391749382019043,
"mean_token_accuracy": 0.7790901213884354,
"num_tokens": 849142004.0,
"step": 815
},
{
"epoch": 0.46255817233112395,
"grad_norm": 0.3115026652812958,
"learning_rate": 3.0654905127541326e-05,
"loss": 0.7473933219909668,
"mean_token_accuracy": 0.7775959491729736,
"num_tokens": 854357908.0,
"step": 820
},
{
"epoch": 0.4653786489916796,
"grad_norm": 0.2688276767730713,
"learning_rate": 3.0425052728111585e-05,
"loss": 0.7278037071228027,
"mean_token_accuracy": 0.7819948852062225,
"num_tokens": 859531077.0,
"step": 825
},
{
"epoch": 0.4681991256522352,
"grad_norm": 0.32312244176864624,
"learning_rate": 3.0194718058394123e-05,
"loss": 0.7427204132080079,
"mean_token_accuracy": 0.777895525097847,
"num_tokens": 864750610.0,
"step": 830
},
{
"epoch": 0.4710196023127909,
"grad_norm": 0.3414735496044159,
"learning_rate": 2.996392159442355e-05,
"loss": 0.7428229331970215,
"mean_token_accuracy": 0.7787304818630219,
"num_tokens": 869931763.0,
"step": 835
},
{
"epoch": 0.4738400789733465,
"grad_norm": 0.29070428013801575,
"learning_rate": 2.973268385328655e-05,
"loss": 0.7371402740478515,
"mean_token_accuracy": 0.7795629620552063,
"num_tokens": 875134610.0,
"step": 840
},
{
"epoch": 0.47666055563390214,
"grad_norm": 0.2716343104839325,
"learning_rate": 2.9501025391297976e-05,
"loss": 0.7384316444396972,
"mean_token_accuracy": 0.7791845291852951,
"num_tokens": 880349943.0,
"step": 845
},
{
"epoch": 0.47948103229445777,
"grad_norm": 0.3179316222667694,
"learning_rate": 2.9268966802173436e-05,
"loss": 0.7503187179565429,
"mean_token_accuracy": 0.7757825434207917,
"num_tokens": 885547239.0,
"step": 850
},
{
"epoch": 0.4823015089550134,
"grad_norm": 0.2789818048477173,
"learning_rate": 2.903652871519863e-05,
"loss": 0.7283576011657715,
"mean_token_accuracy": 0.7821285218000412,
"num_tokens": 890745008.0,
"step": 855
},
{
"epoch": 0.485121985615569,
"grad_norm": 0.2598032057285309,
"learning_rate": 2.88037317933954e-05,
"loss": 0.731390380859375,
"mean_token_accuracy": 0.7811041116714478,
"num_tokens": 895969819.0,
"step": 860
},
{
"epoch": 0.48794246227612464,
"grad_norm": 0.24372799694538116,
"learning_rate": 2.8570596731684895e-05,
"loss": 0.742131233215332,
"mean_token_accuracy": 0.778620821237564,
"num_tokens": 901171390.0,
"step": 865
},
{
"epoch": 0.4907629389366803,
"grad_norm": 0.2416885793209076,
"learning_rate": 2.833714425504786e-05,
"loss": 0.7391448974609375,
"mean_token_accuracy": 0.779407599568367,
"num_tokens": 906394256.0,
"step": 870
},
{
"epoch": 0.49358341559723595,
"grad_norm": 0.2647145092487335,
"learning_rate": 2.810339511668223e-05,
"loss": 0.7384161472320556,
"mean_token_accuracy": 0.7792135775089264,
"num_tokens": 911613856.0,
"step": 875
},
{
"epoch": 0.4964038922577916,
"grad_norm": 0.26926189661026,
"learning_rate": 2.786937009615824e-05,
"loss": 0.7412851333618165,
"mean_token_accuracy": 0.7784785836935043,
"num_tokens": 916834932.0,
"step": 880
},
{
"epoch": 0.4992243689183472,
"grad_norm": 0.25262531638145447,
"learning_rate": 2.7635089997571196e-05,
"loss": 0.7453501701354981,
"mean_token_accuracy": 0.777086952328682,
"num_tokens": 922044065.0,
"step": 885
},
{
"epoch": 0.5020448455789028,
"grad_norm": 0.27210497856140137,
"learning_rate": 2.7400575647692046e-05,
"loss": 0.7517458438873291,
"mean_token_accuracy": 0.7750386208295822,
"num_tokens": 927241056.0,
"step": 890
},
{
"epoch": 0.5048653222394585,
"grad_norm": 0.25430935621261597,
"learning_rate": 2.7165847894115953e-05,
"loss": 0.7491694450378418,
"mean_token_accuracy": 0.7764867752790451,
"num_tokens": 932456368.0,
"step": 895
},
{
"epoch": 0.5076857989000141,
"grad_norm": 0.2582172155380249,
"learning_rate": 2.693092760340899e-05,
"loss": 0.7363146305084228,
"mean_token_accuracy": 0.7792346268892288,
"num_tokens": 937679075.0,
"step": 900
},
{
"epoch": 0.5105062755605697,
"grad_norm": 0.23149679601192474,
"learning_rate": 2.66958356592532e-05,
"loss": 0.7403749942779541,
"mean_token_accuracy": 0.7784234285354614,
"num_tokens": 942885883.0,
"step": 905
},
{
"epoch": 0.5133267522211253,
"grad_norm": 0.2690688669681549,
"learning_rate": 2.6460592960590064e-05,
"loss": 0.7586381912231446,
"mean_token_accuracy": 0.7731125712394714,
"num_tokens": 948067461.0,
"step": 910
},
{
"epoch": 0.516147228881681,
"grad_norm": 0.24865229427814484,
"learning_rate": 2.622522041976269e-05,
"loss": 0.7361614227294921,
"mean_token_accuracy": 0.7801000714302063,
"num_tokens": 953267500.0,
"step": 915
},
{
"epoch": 0.5189677055422366,
"grad_norm": 0.3116484582424164,
"learning_rate": 2.598973896065674e-05,
"loss": 0.7414368629455567,
"mean_token_accuracy": 0.7783037513494492,
"num_tokens": 958474033.0,
"step": 920
},
{
"epoch": 0.5217881822027922,
"grad_norm": 0.23773521184921265,
"learning_rate": 2.5754169516840355e-05,
"loss": 0.7330810546875,
"mean_token_accuracy": 0.7805785417556763,
"num_tokens": 963684645.0,
"step": 925
},
{
"epoch": 0.524608658863348,
"grad_norm": 0.2488972693681717,
"learning_rate": 2.5518533029703274e-05,
"loss": 0.752569580078125,
"mean_token_accuracy": 0.7753267168998719,
"num_tokens": 968887491.0,
"step": 930
},
{
"epoch": 0.5274291355239036,
"grad_norm": 0.22092726826667786,
"learning_rate": 2.5282850446595158e-05,
"loss": 0.7525276184082031,
"mean_token_accuracy": 0.7744133800268174,
"num_tokens": 974082901.0,
"step": 935
},
{
"epoch": 0.5302496121844592,
"grad_norm": 0.2564944326877594,
"learning_rate": 2.504714271896345e-05,
"loss": 0.7492488861083985,
"mean_token_accuracy": 0.7756923973560333,
"num_tokens": 979313348.0,
"step": 940
},
{
"epoch": 0.5330700888450148,
"grad_norm": 0.24430619180202484,
"learning_rate": 2.4811430800490885e-05,
"loss": 0.7475570678710938,
"mean_token_accuracy": 0.7765256404876709,
"num_tokens": 984536065.0,
"step": 945
},
{
"epoch": 0.5358905655055705,
"grad_norm": 0.2511347830295563,
"learning_rate": 2.4575735645232743e-05,
"loss": 0.7428129196166993,
"mean_token_accuracy": 0.7775215625762939,
"num_tokens": 989758496.0,
"step": 950
},
{
"epoch": 0.5387110421661261,
"grad_norm": 0.2539248764514923,
"learning_rate": 2.43400782057541e-05,
"loss": 0.7449743747711182,
"mean_token_accuracy": 0.7767373085021972,
"num_tokens": 994981945.0,
"step": 955
},
{
"epoch": 0.5415315188266817,
"grad_norm": 0.2781333029270172,
"learning_rate": 2.4104479431267196e-05,
"loss": 0.7502236366271973,
"mean_token_accuracy": 0.7762497693300248,
"num_tokens": 1000202855.0,
"step": 960
},
{
"epoch": 0.5443519954872373,
"grad_norm": 0.251446932554245,
"learning_rate": 2.38689602657692e-05,
"loss": 0.7517457962036133,
"mean_token_accuracy": 0.7751468151807785,
"num_tokens": 1005415062.0,
"step": 965
},
{
"epoch": 0.547172472147793,
"grad_norm": 0.2488507777452469,
"learning_rate": 2.363354164618022e-05,
"loss": 0.729612922668457,
"mean_token_accuracy": 0.7817542374134063,
"num_tokens": 1010615295.0,
"step": 970
},
{
"epoch": 0.5499929488083486,
"grad_norm": 0.2578504681587219,
"learning_rate": 2.339824450048218e-05,
"loss": 0.725861930847168,
"mean_token_accuracy": 0.782097339630127,
"num_tokens": 1015841988.0,
"step": 975
},
{
"epoch": 0.5528134254689042,
"grad_norm": 0.23905125260353088,
"learning_rate": 2.3163089745858357e-05,
"loss": 0.7432829856872558,
"mean_token_accuracy": 0.7776479661464691,
"num_tokens": 1021052219.0,
"step": 980
},
{
"epoch": 0.5556339021294598,
"grad_norm": 0.30154407024383545,
"learning_rate": 2.292809828683388e-05,
"loss": 0.7325653553009033,
"mean_token_accuracy": 0.7808756172657013,
"num_tokens": 1026266561.0,
"step": 985
},
{
"epoch": 0.5584543787900155,
"grad_norm": 0.2418544590473175,
"learning_rate": 2.2693291013417453e-05,
"loss": 0.772521686553955,
"mean_token_accuracy": 0.7760232150554657,
"num_tokens": 1031469779.0,
"step": 990
},
{
"epoch": 0.5612748554505711,
"grad_norm": 0.24888356029987335,
"learning_rate": 2.2458688799244205e-05,
"loss": 0.7518490314483642,
"mean_token_accuracy": 0.7753332704305649,
"num_tokens": 1036679049.0,
"step": 995
},
{
"epoch": 0.5640953321111268,
"grad_norm": 0.24516652524471283,
"learning_rate": 2.222431249972015e-05,
"loss": 0.7229015350341796,
"mean_token_accuracy": 0.7828704863786697,
"num_tokens": 1041896384.0,
"step": 1000
},
{
"epoch": 0.5669158087716825,
"grad_norm": 0.2515292763710022,
"learning_rate": 2.199018295016822e-05,
"loss": 0.7194217205047607,
"mean_token_accuracy": 0.7844615399837493,
"num_tokens": 1047071005.0,
"step": 1005
},
{
"epoch": 0.5697362854322381,
"grad_norm": 0.2749954164028168,
"learning_rate": 2.1756320963976012e-05,
"loss": 0.7305520057678223,
"mean_token_accuracy": 0.7813587754964828,
"num_tokens": 1052273340.0,
"step": 1010
},
{
"epoch": 0.5725567620927937,
"grad_norm": 0.24928732216358185,
"learning_rate": 2.152274733074558e-05,
"loss": 0.7407473564147949,
"mean_token_accuracy": 0.7781451612710952,
"num_tokens": 1057495565.0,
"step": 1015
},
{
"epoch": 0.5753772387533493,
"grad_norm": 0.2335939258337021,
"learning_rate": 2.128948281444532e-05,
"loss": 0.7340809822082519,
"mean_token_accuracy": 0.7799234807491302,
"num_tokens": 1062704187.0,
"step": 1020
},
{
"epoch": 0.578197715413905,
"grad_norm": 0.21696053445339203,
"learning_rate": 2.1056548151564063e-05,
"loss": 0.7354939460754395,
"mean_token_accuracy": 0.7797737270593643,
"num_tokens": 1067931721.0,
"step": 1025
},
{
"epoch": 0.5810181920744606,
"grad_norm": 0.2403183877468109,
"learning_rate": 2.0823964049267723e-05,
"loss": 0.7496252059936523,
"mean_token_accuracy": 0.7757533907890319,
"num_tokens": 1073116435.0,
"step": 1030
},
{
"epoch": 0.5838386687350162,
"grad_norm": 0.26338285207748413,
"learning_rate": 2.0591751183558468e-05,
"loss": 0.7384109497070312,
"mean_token_accuracy": 0.7789325386285781,
"num_tokens": 1078344450.0,
"step": 1035
},
{
"epoch": 0.5866591453955718,
"grad_norm": 0.26449069380760193,
"learning_rate": 2.035993019743666e-05,
"loss": 0.7206357955932617,
"mean_token_accuracy": 0.7838350623846054,
"num_tokens": 1083570977.0,
"step": 1040
},
{
"epoch": 0.5894796220561275,
"grad_norm": 0.26153990626335144,
"learning_rate": 2.012852169906584e-05,
"loss": 0.7402269840240479,
"mean_token_accuracy": 0.7779274940490722,
"num_tokens": 1088779432.0,
"step": 1045
},
{
"epoch": 0.5923000987166831,
"grad_norm": 0.2370881587266922,
"learning_rate": 1.9897546259940618e-05,
"loss": 0.7502132415771484,
"mean_token_accuracy": 0.7757242441177368,
"num_tokens": 1093978609.0,
"step": 1050
},
{
"epoch": 0.5951205753772387,
"grad_norm": 0.23634928464889526,
"learning_rate": 1.9667024413058028e-05,
"loss": 0.7406221389770508,
"mean_token_accuracy": 0.7780844628810882,
"num_tokens": 1099183883.0,
"step": 1055
},
{
"epoch": 0.5979410520377944,
"grad_norm": 0.21724943816661835,
"learning_rate": 1.9436976651092144e-05,
"loss": 0.7378547668457032,
"mean_token_accuracy": 0.7793454140424728,
"num_tokens": 1104381541.0,
"step": 1060
},
{
"epoch": 0.60076152869835,
"grad_norm": 0.25824031233787537,
"learning_rate": 1.9207423424572366e-05,
"loss": 0.7385224342346192,
"mean_token_accuracy": 0.7781225651502609,
"num_tokens": 1109596846.0,
"step": 1065
},
{
"epoch": 0.6035820053589056,
"grad_norm": 0.2292436957359314,
"learning_rate": 1.8978385140065453e-05,
"loss": 0.7486650943756104,
"mean_token_accuracy": 0.7761229813098908,
"num_tokens": 1114802727.0,
"step": 1070
},
{
"epoch": 0.6064024820194613,
"grad_norm": 0.24457287788391113,
"learning_rate": 1.874988215836141e-05,
"loss": 0.7344676971435546,
"mean_token_accuracy": 0.779693141579628,
"num_tokens": 1120030959.0,
"step": 1075
},
{
"epoch": 0.609222958680017,
"grad_norm": 0.2467031627893448,
"learning_rate": 1.8521934792663477e-05,
"loss": 0.7370716571807862,
"mean_token_accuracy": 0.7789741307497025,
"num_tokens": 1125247092.0,
"step": 1080
},
{
"epoch": 0.6120434353405726,
"grad_norm": 0.27388912439346313,
"learning_rate": 1.8294563306782396e-05,
"loss": 0.7286103248596192,
"mean_token_accuracy": 0.7814449548721314,
"num_tokens": 1130468945.0,
"step": 1085
},
{
"epoch": 0.6148639120011282,
"grad_norm": 0.2156301885843277,
"learning_rate": 1.8067787913334944e-05,
"loss": 0.745603609085083,
"mean_token_accuracy": 0.7774240404367447,
"num_tokens": 1135680292.0,
"step": 1090
},
{
"epoch": 0.6176843886616838,
"grad_norm": 0.2245538830757141,
"learning_rate": 1.784162877194719e-05,
"loss": 0.7460683345794678,
"mean_token_accuracy": 0.7766919553279876,
"num_tokens": 1140892783.0,
"step": 1095
},
{
"epoch": 0.6205048653222395,
"grad_norm": 0.23326575756072998,
"learning_rate": 1.761610598746226e-05,
"loss": 0.7598372459411621,
"mean_token_accuracy": 0.772556483745575,
"num_tokens": 1146107545.0,
"step": 1100
},
{
"epoch": 0.6233253419827951,
"grad_norm": 0.2319810688495636,
"learning_rate": 1.7391239608153163e-05,
"loss": 0.7281291007995605,
"mean_token_accuracy": 0.78159399330616,
"num_tokens": 1151321289.0,
"step": 1105
},
{
"epoch": 0.6261458186433507,
"grad_norm": 0.21929802000522614,
"learning_rate": 1.7167049623940557e-05,
"loss": 0.7409855365753174,
"mean_token_accuracy": 0.7778546661138535,
"num_tokens": 1156512899.0,
"step": 1110
},
{
"epoch": 0.6289662953039064,
"grad_norm": 0.23836582899093628,
"learning_rate": 1.694355596461562e-05,
"loss": 0.7418097972869873,
"mean_token_accuracy": 0.7773305416107178,
"num_tokens": 1161722642.0,
"step": 1115
},
{
"epoch": 0.631786771964462,
"grad_norm": 0.21553729474544525,
"learning_rate": 1.6720778498068465e-05,
"loss": 0.7374235153198242,
"mean_token_accuracy": 0.7792312622070312,
"num_tokens": 1166914936.0,
"step": 1120
},
{
"epoch": 0.6346072486250176,
"grad_norm": 0.21120575070381165,
"learning_rate": 1.649873702852189e-05,
"loss": 0.728809118270874,
"mean_token_accuracy": 0.781609109044075,
"num_tokens": 1172103530.0,
"step": 1125
},
{
"epoch": 0.6374277252855732,
"grad_norm": 0.22732919454574585,
"learning_rate": 1.6277451294770834e-05,
"loss": 0.7273163795471191,
"mean_token_accuracy": 0.7819408357143403,
"num_tokens": 1177324382.0,
"step": 1130
},
{
"epoch": 0.6402482019461289,
"grad_norm": 0.2159194052219391,
"learning_rate": 1.60569409684277e-05,
"loss": 0.7394785404205322,
"mean_token_accuracy": 0.7781968146562577,
"num_tokens": 1182546421.0,
"step": 1135
},
{
"epoch": 0.6430686786066845,
"grad_norm": 0.21402783691883087,
"learning_rate": 1.5837225652173587e-05,
"loss": 0.752212381362915,
"mean_token_accuracy": 0.7745260059833526,
"num_tokens": 1187746895.0,
"step": 1140
},
{
"epoch": 0.6458891552672401,
"grad_norm": 0.21710145473480225,
"learning_rate": 1.561832487801565e-05,
"loss": 0.7255624294281006,
"mean_token_accuracy": 0.7824052214622498,
"num_tokens": 1192961255.0,
"step": 1145
},
{
"epoch": 0.6487096319277958,
"grad_norm": 0.21105757355690002,
"learning_rate": 1.5400258105550813e-05,
"loss": 0.7374918460845947,
"mean_token_accuracy": 0.7783012241125107,
"num_tokens": 1198188396.0,
"step": 1150
},
{
"epoch": 0.6515301085883515,
"grad_norm": 0.21594274044036865,
"learning_rate": 1.5183044720235834e-05,
"loss": 0.7187402248382568,
"mean_token_accuracy": 0.7842638313770294,
"num_tokens": 1203372436.0,
"step": 1155
},
{
"epoch": 0.6543505852489071,
"grad_norm": 0.23616893589496613,
"learning_rate": 1.4966704031664026e-05,
"loss": 0.7164999008178711,
"mean_token_accuracy": 0.7841055691242218,
"num_tokens": 1208563379.0,
"step": 1160
},
{
"epoch": 0.6571710619094627,
"grad_norm": 0.22016112506389618,
"learning_rate": 1.4751255271848662e-05,
"loss": 0.7453357696533203,
"mean_token_accuracy": 0.7763175398111344,
"num_tokens": 1213791014.0,
"step": 1165
},
{
"epoch": 0.6599915385700184,
"grad_norm": 0.21811263263225555,
"learning_rate": 1.453671759351334e-05,
"loss": 0.7320215225219726,
"mean_token_accuracy": 0.7802564471960067,
"num_tokens": 1219001273.0,
"step": 1170
},
{
"epoch": 0.662812015230574,
"grad_norm": 0.21295255422592163,
"learning_rate": 1.4323110068389358e-05,
"loss": 0.71702880859375,
"mean_token_accuracy": 0.7841863363981247,
"num_tokens": 1224225308.0,
"step": 1175
},
{
"epoch": 0.6656324918911296,
"grad_norm": 0.2202758491039276,
"learning_rate": 1.4110451685520265e-05,
"loss": 0.7342299938201904,
"mean_token_accuracy": 0.7799709439277649,
"num_tokens": 1229436976.0,
"step": 1180
},
{
"epoch": 0.6684529685516852,
"grad_norm": 0.22536863386631012,
"learning_rate": 1.3898761349573841e-05,
"loss": 0.7358542919158936,
"mean_token_accuracy": 0.7793794482946396,
"num_tokens": 1234660399.0,
"step": 1185
},
{
"epoch": 0.6712734452122409,
"grad_norm": 0.21837250888347626,
"learning_rate": 1.368805787916152e-05,
"loss": 0.729494047164917,
"mean_token_accuracy": 0.7810665190219879,
"num_tokens": 1239877232.0,
"step": 1190
},
{
"epoch": 0.6740939218727965,
"grad_norm": 0.21181219816207886,
"learning_rate": 1.3478360005165432e-05,
"loss": 0.7322878837585449,
"mean_token_accuracy": 0.7803588449954987,
"num_tokens": 1245083794.0,
"step": 1195
},
{
"epoch": 0.6769143985333521,
"grad_norm": 0.24090248346328735,
"learning_rate": 1.3269686369073347e-05,
"loss": 0.7103838920593262,
"mean_token_accuracy": 0.7863076359033585,
"num_tokens": 1250308093.0,
"step": 1200
},
{
"epoch": 0.6797348751939077,
"grad_norm": 0.19208142161369324,
"learning_rate": 1.306205552132147e-05,
"loss": 0.723546314239502,
"mean_token_accuracy": 0.7827136069536209,
"num_tokens": 1255522176.0,
"step": 1205
},
{
"epoch": 0.6825553518544634,
"grad_norm": 0.19714190065860748,
"learning_rate": 1.2855485919645355e-05,
"loss": 0.7245129585266114,
"mean_token_accuracy": 0.7823726564645768,
"num_tokens": 1260734730.0,
"step": 1210
},
{
"epoch": 0.685375828515019,
"grad_norm": 0.2126917988061905,
"learning_rate": 1.26499959274391e-05,
"loss": 0.7271872520446777,
"mean_token_accuracy": 0.781196317076683,
"num_tokens": 1265950168.0,
"step": 1215
},
{
"epoch": 0.6881963051755746,
"grad_norm": 0.21914449334144592,
"learning_rate": 1.2445603812122886e-05,
"loss": 0.7242794513702393,
"mean_token_accuracy": 0.7824690848588943,
"num_tokens": 1271176340.0,
"step": 1220
},
{
"epoch": 0.6910167818361304,
"grad_norm": 0.20978210866451263,
"learning_rate": 1.224232774351906e-05,
"loss": 0.7303569793701172,
"mean_token_accuracy": 0.7805366754531861,
"num_tokens": 1276388870.0,
"step": 1225
},
{
"epoch": 0.693837258496686,
"grad_norm": 0.20040655136108398,
"learning_rate": 1.2040185792236874e-05,
"loss": 0.7304568290710449,
"mean_token_accuracy": 0.7806206464767456,
"num_tokens": 1281614500.0,
"step": 1230
},
{
"epoch": 0.6966577351572416,
"grad_norm": 0.22813160717487335,
"learning_rate": 1.1839195928066102e-05,
"loss": 0.7257880687713623,
"mean_token_accuracy": 0.7822084277868271,
"num_tokens": 1286829234.0,
"step": 1235
},
{
"epoch": 0.6994782118177972,
"grad_norm": 0.19202908873558044,
"learning_rate": 1.1639376018379566e-05,
"loss": 0.7248349189758301,
"mean_token_accuracy": 0.7827732414007187,
"num_tokens": 1292054878.0,
"step": 1240
},
{
"epoch": 0.7022986884783529,
"grad_norm": 0.19687888026237488,
"learning_rate": 1.1440743826544753e-05,
"loss": 0.7293760776519775,
"mean_token_accuracy": 0.7805086255073548,
"num_tokens": 1297267890.0,
"step": 1245
},
{
"epoch": 0.7051191651389085,
"grad_norm": 0.22136810421943665,
"learning_rate": 1.1243317010344759e-05,
"loss": 0.7223714828491211,
"mean_token_accuracy": 0.7826942443847656,
"num_tokens": 1302488964.0,
"step": 1250
},
{
"epoch": 0.7079396417994641,
"grad_norm": 0.21676434576511383,
"learning_rate": 1.1047113120408537e-05,
"loss": 0.7311611652374268,
"mean_token_accuracy": 0.7800871402025222,
"num_tokens": 1307701202.0,
"step": 1255
},
{
"epoch": 0.7107601184600197,
"grad_norm": 0.2193623036146164,
"learning_rate": 1.0852149598650684e-05,
"loss": 0.7414857387542725,
"mean_token_accuracy": 0.7775449156761169,
"num_tokens": 1312918390.0,
"step": 1260
},
{
"epoch": 0.7135805951205754,
"grad_norm": 0.2889624238014221,
"learning_rate": 1.0658443776720956e-05,
"loss": 0.7338351249694824,
"mean_token_accuracy": 0.7796378195285797,
"num_tokens": 1318143275.0,
"step": 1265
},
{
"epoch": 0.716401071781131,
"grad_norm": 0.19788454473018646,
"learning_rate": 1.0466012874463507e-05,
"loss": 0.7364720821380615,
"mean_token_accuracy": 0.7789324551820755,
"num_tokens": 1323318690.0,
"step": 1270
},
{
"epoch": 0.7192215484416866,
"grad_norm": 0.21078291535377502,
"learning_rate": 1.0274873998386083e-05,
"loss": 0.7365177154541016,
"mean_token_accuracy": 0.7786802440881729,
"num_tokens": 1328546048.0,
"step": 1275
},
{
"epoch": 0.7220420251022422,
"grad_norm": 0.2020423710346222,
"learning_rate": 1.0085044140139353e-05,
"loss": 0.7265225410461426,
"mean_token_accuracy": 0.7812680572271347,
"num_tokens": 1333747221.0,
"step": 1280
}
],
"logging_steps": 5,
"max_steps": 1773,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 320,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.269279683030548e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}