Qwen2.5-1.5B-Open-R1-Distill / trainer_state.json
TTTXXX01's picture
Model save
5dc50e7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9990749306197965,
"eval_steps": 100,
"global_step": 675,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0074005550416281225,
"grad_norm": 3.17462153124654,
"learning_rate": 1.4705882352941177e-06,
"loss": 1.3164,
"mean_token_accuracy": 0.6723419636487961,
"step": 5
},
{
"epoch": 0.014801110083256245,
"grad_norm": 2.607771459790906,
"learning_rate": 2.9411764705882355e-06,
"loss": 1.2992,
"mean_token_accuracy": 0.6760224714875221,
"step": 10
},
{
"epoch": 0.022201665124884366,
"grad_norm": 1.805377852730071,
"learning_rate": 4.411764705882353e-06,
"loss": 1.2498,
"mean_token_accuracy": 0.6808868274092674,
"step": 15
},
{
"epoch": 0.02960222016651249,
"grad_norm": 1.3568970571502441,
"learning_rate": 5.882352941176471e-06,
"loss": 1.1657,
"mean_token_accuracy": 0.694776351749897,
"step": 20
},
{
"epoch": 0.03700277520814061,
"grad_norm": 1.1737781790953241,
"learning_rate": 7.352941176470589e-06,
"loss": 1.1017,
"mean_token_accuracy": 0.7045704483985901,
"step": 25
},
{
"epoch": 0.04440333024976873,
"grad_norm": 1.0614590550138614,
"learning_rate": 8.823529411764707e-06,
"loss": 1.0567,
"mean_token_accuracy": 0.7118889898061752,
"step": 30
},
{
"epoch": 0.05180388529139685,
"grad_norm": 0.8685597793171974,
"learning_rate": 1.0294117647058823e-05,
"loss": 1.0364,
"mean_token_accuracy": 0.714593480527401,
"step": 35
},
{
"epoch": 0.05920444033302498,
"grad_norm": 0.7431059435910768,
"learning_rate": 1.1764705882352942e-05,
"loss": 1.0062,
"mean_token_accuracy": 0.7208917796611786,
"step": 40
},
{
"epoch": 0.0666049953746531,
"grad_norm": 0.6773660202418459,
"learning_rate": 1.323529411764706e-05,
"loss": 0.9913,
"mean_token_accuracy": 0.7229786649346351,
"step": 45
},
{
"epoch": 0.07400555041628122,
"grad_norm": 0.7261227405919924,
"learning_rate": 1.4705882352941179e-05,
"loss": 0.9877,
"mean_token_accuracy": 0.723342613875866,
"step": 50
},
{
"epoch": 0.08140610545790934,
"grad_norm": 0.651112876340244,
"learning_rate": 1.6176470588235296e-05,
"loss": 0.9887,
"mean_token_accuracy": 0.7222026586532593,
"step": 55
},
{
"epoch": 0.08880666049953746,
"grad_norm": 0.6737886852071816,
"learning_rate": 1.7647058823529414e-05,
"loss": 0.9622,
"mean_token_accuracy": 0.7282778188586235,
"step": 60
},
{
"epoch": 0.09620721554116558,
"grad_norm": 0.7462556046211627,
"learning_rate": 1.911764705882353e-05,
"loss": 0.9388,
"mean_token_accuracy": 0.7344567880034447,
"step": 65
},
{
"epoch": 0.1036077705827937,
"grad_norm": 0.7193931069370665,
"learning_rate": 1.9999464266898485e-05,
"loss": 0.9364,
"mean_token_accuracy": 0.7341951102018356,
"step": 70
},
{
"epoch": 0.11100832562442182,
"grad_norm": 0.6733925352901556,
"learning_rate": 1.9993437928712977e-05,
"loss": 0.9287,
"mean_token_accuracy": 0.7368804678320885,
"step": 75
},
{
"epoch": 0.11840888066604996,
"grad_norm": 0.6308853770887113,
"learning_rate": 1.998071963486563e-05,
"loss": 0.9155,
"mean_token_accuracy": 0.7392918571829796,
"step": 80
},
{
"epoch": 0.12580943570767808,
"grad_norm": 0.6720227519538627,
"learning_rate": 1.9961317901970953e-05,
"loss": 0.9525,
"mean_token_accuracy": 0.729509249329567,
"step": 85
},
{
"epoch": 0.1332099907493062,
"grad_norm": 0.7516617344807497,
"learning_rate": 1.993524572210807e-05,
"loss": 0.9202,
"mean_token_accuracy": 0.7370600894093513,
"step": 90
},
{
"epoch": 0.14061054579093432,
"grad_norm": 0.7148942526620776,
"learning_rate": 1.990252055412077e-05,
"loss": 0.9193,
"mean_token_accuracy": 0.7373004630208015,
"step": 95
},
{
"epoch": 0.14801110083256244,
"grad_norm": 0.7509835780416588,
"learning_rate": 1.9863164311926433e-05,
"loss": 0.9361,
"mean_token_accuracy": 0.7319045767188073,
"step": 100
},
{
"epoch": 0.14801110083256244,
"eval_loss": 0.948406994342804,
"eval_mean_token_accuracy": 0.734520063680761,
"eval_runtime": 2.3424,
"eval_samples_per_second": 219.002,
"eval_steps_per_second": 7.257,
"step": 100
},
{
"epoch": 0.15541165587419056,
"grad_norm": 0.7567520572125319,
"learning_rate": 1.981720334984174e-05,
"loss": 0.9131,
"mean_token_accuracy": 0.7381318211555481,
"step": 105
},
{
"epoch": 0.16281221091581868,
"grad_norm": 0.6727736194852126,
"learning_rate": 1.9764668444934853e-05,
"loss": 0.9285,
"mean_token_accuracy": 0.7351326107978821,
"step": 110
},
{
"epoch": 0.1702127659574468,
"grad_norm": 0.7015220015202094,
"learning_rate": 1.970559477641606e-05,
"loss": 0.9374,
"mean_token_accuracy": 0.731748317182064,
"step": 115
},
{
"epoch": 0.17761332099907493,
"grad_norm": 0.6896500407469356,
"learning_rate": 1.9640021902080523e-05,
"loss": 0.9204,
"mean_token_accuracy": 0.735644418001175,
"step": 120
},
{
"epoch": 0.18501387604070305,
"grad_norm": 0.8048766507637898,
"learning_rate": 1.9567993731818988e-05,
"loss": 0.9294,
"mean_token_accuracy": 0.7338634684681893,
"step": 125
},
{
"epoch": 0.19241443108233117,
"grad_norm": 0.6987899075864978,
"learning_rate": 1.9489558498214197e-05,
"loss": 0.901,
"mean_token_accuracy": 0.7416511297225952,
"step": 130
},
{
"epoch": 0.1998149861239593,
"grad_norm": 0.7280824320570579,
"learning_rate": 1.9404768724242667e-05,
"loss": 0.9173,
"mean_token_accuracy": 0.7361382976174354,
"step": 135
},
{
"epoch": 0.2072155411655874,
"grad_norm": 0.698722122451753,
"learning_rate": 1.931368118810346e-05,
"loss": 0.9246,
"mean_token_accuracy": 0.7331647261977196,
"step": 140
},
{
"epoch": 0.21461609620721553,
"grad_norm": 0.7446888121310924,
"learning_rate": 1.92163568851975e-05,
"loss": 0.9078,
"mean_token_accuracy": 0.7388896599411965,
"step": 145
},
{
"epoch": 0.22201665124884365,
"grad_norm": 0.732513964641267,
"learning_rate": 1.911286098728296e-05,
"loss": 0.915,
"mean_token_accuracy": 0.7361646965146065,
"step": 150
},
{
"epoch": 0.2294172062904718,
"grad_norm": 0.7333980757279478,
"learning_rate": 1.900326279883392e-05,
"loss": 0.8929,
"mean_token_accuracy": 0.7425167575478554,
"step": 155
},
{
"epoch": 0.23681776133209992,
"grad_norm": 0.7166974974730754,
"learning_rate": 1.8887635710631716e-05,
"loss": 0.9029,
"mean_token_accuracy": 0.7399504974484443,
"step": 160
},
{
"epoch": 0.24421831637372804,
"grad_norm": 0.7178965900179631,
"learning_rate": 1.8766057150619865e-05,
"loss": 0.899,
"mean_token_accuracy": 0.7420736849308014,
"step": 165
},
{
"epoch": 0.25161887141535616,
"grad_norm": 0.7300235767147032,
"learning_rate": 1.8638608532055635e-05,
"loss": 0.8907,
"mean_token_accuracy": 0.7422624364495277,
"step": 170
},
{
"epoch": 0.2590194264569843,
"grad_norm": 0.7012568297068099,
"learning_rate": 1.8505375198992856e-05,
"loss": 0.8942,
"mean_token_accuracy": 0.74230248183012,
"step": 175
},
{
"epoch": 0.2664199814986124,
"grad_norm": 0.6887200673521662,
"learning_rate": 1.836644636913258e-05,
"loss": 0.9017,
"mean_token_accuracy": 0.7398161098361016,
"step": 180
},
{
"epoch": 0.2738205365402405,
"grad_norm": 0.7758595614622964,
"learning_rate": 1.8221915074079764e-05,
"loss": 0.9009,
"mean_token_accuracy": 0.7405245855450631,
"step": 185
},
{
"epoch": 0.28122109158186864,
"grad_norm": 0.735639251670638,
"learning_rate": 1.8071878097046064e-05,
"loss": 0.8957,
"mean_token_accuracy": 0.741812764108181,
"step": 190
},
{
"epoch": 0.28862164662349676,
"grad_norm": 0.8143223515561344,
"learning_rate": 1.7916435908040413e-05,
"loss": 0.8949,
"mean_token_accuracy": 0.7419108718633651,
"step": 195
},
{
"epoch": 0.2960222016651249,
"grad_norm": 0.6952036666245345,
"learning_rate": 1.7755692596590778e-05,
"loss": 0.8783,
"mean_token_accuracy": 0.7458099812269211,
"step": 200
},
{
"epoch": 0.2960222016651249,
"eval_loss": 0.9183758497238159,
"eval_mean_token_accuracy": 0.7399611963945276,
"eval_runtime": 2.4073,
"eval_samples_per_second": 213.105,
"eval_steps_per_second": 7.062,
"step": 200
},
{
"epoch": 0.303422756706753,
"grad_norm": 0.7459993666146241,
"learning_rate": 1.7589755802042188e-05,
"loss": 0.8844,
"mean_token_accuracy": 0.744191363453865,
"step": 205
},
{
"epoch": 0.3108233117483811,
"grad_norm": 0.7610305295318044,
"learning_rate": 1.7418736641477636e-05,
"loss": 0.8919,
"mean_token_accuracy": 0.7430422529578209,
"step": 210
},
{
"epoch": 0.31822386679000925,
"grad_norm": 0.6961595378741922,
"learning_rate": 1.7242749635310222e-05,
"loss": 0.8952,
"mean_token_accuracy": 0.7421674817800522,
"step": 215
},
{
"epoch": 0.32562442183163737,
"grad_norm": 0.694624932228998,
"learning_rate": 1.7061912630596252e-05,
"loss": 0.8994,
"mean_token_accuracy": 0.7396624863147736,
"step": 220
},
{
"epoch": 0.3330249768732655,
"grad_norm": 0.713820340095853,
"learning_rate": 1.6876346722120747e-05,
"loss": 0.8923,
"mean_token_accuracy": 0.7422993093729019,
"step": 225
},
{
"epoch": 0.3404255319148936,
"grad_norm": 0.6761846353256752,
"learning_rate": 1.6686176171308125e-05,
"loss": 0.8929,
"mean_token_accuracy": 0.7426498860120774,
"step": 230
},
{
"epoch": 0.34782608695652173,
"grad_norm": 0.7432368783900164,
"learning_rate": 1.6491528323012412e-05,
"loss": 0.8662,
"mean_token_accuracy": 0.7481524258852005,
"step": 235
},
{
"epoch": 0.35522664199814985,
"grad_norm": 0.7265498148413707,
"learning_rate": 1.6292533520242663e-05,
"loss": 0.8962,
"mean_token_accuracy": 0.740491370856762,
"step": 240
},
{
"epoch": 0.36262719703977797,
"grad_norm": 0.7293297036893336,
"learning_rate": 1.6089325016880737e-05,
"loss": 0.8875,
"mean_token_accuracy": 0.7421652853488923,
"step": 245
},
{
"epoch": 0.3700277520814061,
"grad_norm": 0.7553587971266308,
"learning_rate": 1.588203888844982e-05,
"loss": 0.8875,
"mean_token_accuracy": 0.7426256895065307,
"step": 250
},
{
"epoch": 0.3774283071230342,
"grad_norm": 0.6664473979322119,
"learning_rate": 1.5670813940993504e-05,
"loss": 0.8787,
"mean_token_accuracy": 0.7458443433046341,
"step": 255
},
{
"epoch": 0.38482886216466233,
"grad_norm": 0.7401808534835373,
"learning_rate": 1.5455791618126407e-05,
"loss": 0.8739,
"mean_token_accuracy": 0.7472684413194657,
"step": 260
},
{
"epoch": 0.39222941720629045,
"grad_norm": 0.6651789007704498,
"learning_rate": 1.5237115906318565e-05,
"loss": 0.8597,
"mean_token_accuracy": 0.7507400318980217,
"step": 265
},
{
"epoch": 0.3996299722479186,
"grad_norm": 0.6853734364253129,
"learning_rate": 1.5014933238477069e-05,
"loss": 0.8802,
"mean_token_accuracy": 0.7448777571320534,
"step": 270
},
{
"epoch": 0.4070305272895467,
"grad_norm": 0.6844289806764663,
"learning_rate": 1.4789392395889468e-05,
"loss": 0.8893,
"mean_token_accuracy": 0.7420724034309387,
"step": 275
},
{
"epoch": 0.4144310823311748,
"grad_norm": 0.6739075993807693,
"learning_rate": 1.4560644408594602e-05,
"loss": 0.8841,
"mean_token_accuracy": 0.7437198102474213,
"step": 280
},
{
"epoch": 0.42183163737280294,
"grad_norm": 0.6708853575512934,
"learning_rate": 1.432884245424761e-05,
"loss": 0.8592,
"mean_token_accuracy": 0.7501097247004509,
"step": 285
},
{
"epoch": 0.42923219241443106,
"grad_norm": 0.6273885872818475,
"learning_rate": 1.4094141755546816e-05,
"loss": 0.8864,
"mean_token_accuracy": 0.7425328984856605,
"step": 290
},
{
"epoch": 0.4366327474560592,
"grad_norm": 0.651848238006163,
"learning_rate": 1.3856699476291176e-05,
"loss": 0.8813,
"mean_token_accuracy": 0.7444320157170295,
"step": 295
},
{
"epoch": 0.4440333024976873,
"grad_norm": 0.6471291451190254,
"learning_rate": 1.3616674616137902e-05,
"loss": 0.8854,
"mean_token_accuracy": 0.7433394238352775,
"step": 300
},
{
"epoch": 0.4440333024976873,
"eval_loss": 0.9005910754203796,
"eval_mean_token_accuracy": 0.7433299317079431,
"eval_runtime": 2.2967,
"eval_samples_per_second": 223.361,
"eval_steps_per_second": 7.402,
"step": 300
},
{
"epoch": 0.4514338575393155,
"grad_norm": 0.6917570796949137,
"learning_rate": 1.3374227904130724e-05,
"loss": 0.8767,
"mean_token_accuracy": 0.7451273664832115,
"step": 305
},
{
"epoch": 0.4588344125809436,
"grad_norm": 0.6611133087282433,
"learning_rate": 1.3129521691070108e-05,
"loss": 0.8772,
"mean_token_accuracy": 0.7451275780797004,
"step": 310
},
{
"epoch": 0.4662349676225717,
"grad_norm": 0.6568754127751882,
"learning_rate": 1.2882719840797473e-05,
"loss": 0.8646,
"mean_token_accuracy": 0.7485488772392273,
"step": 315
},
{
"epoch": 0.47363552266419984,
"grad_norm": 0.7103546248209126,
"learning_rate": 1.2633987620466229e-05,
"loss": 0.8776,
"mean_token_accuracy": 0.744923198223114,
"step": 320
},
{
"epoch": 0.48103607770582796,
"grad_norm": 0.6763998847474214,
"learning_rate": 1.2383491589873122e-05,
"loss": 0.8518,
"mean_token_accuracy": 0.7524308055639267,
"step": 325
},
{
"epoch": 0.4884366327474561,
"grad_norm": 0.705580130291658,
"learning_rate": 1.213139948992394e-05,
"loss": 0.8843,
"mean_token_accuracy": 0.7439973786473274,
"step": 330
},
{
"epoch": 0.4958371877890842,
"grad_norm": 0.6831917351257646,
"learning_rate": 1.187788013030837e-05,
"loss": 0.8847,
"mean_token_accuracy": 0.7424498632550239,
"step": 335
},
{
"epoch": 0.5032377428307123,
"grad_norm": 0.6939279365402973,
"learning_rate": 1.1623103276459086e-05,
"loss": 0.8527,
"mean_token_accuracy": 0.7521541297435761,
"step": 340
},
{
"epoch": 0.5106382978723404,
"grad_norm": 0.6344189293455071,
"learning_rate": 1.1367239535870913e-05,
"loss": 0.8645,
"mean_token_accuracy": 0.7477951139211655,
"step": 345
},
{
"epoch": 0.5180388529139686,
"grad_norm": 0.6656605713302665,
"learning_rate": 1.1110460243856051e-05,
"loss": 0.8636,
"mean_token_accuracy": 0.7493920430541039,
"step": 350
},
{
"epoch": 0.5254394079555966,
"grad_norm": 0.632938823180403,
"learning_rate": 1.085293734881197e-05,
"loss": 0.8794,
"mean_token_accuracy": 0.7437050268054008,
"step": 355
},
{
"epoch": 0.5328399629972248,
"grad_norm": 0.6646039886914304,
"learning_rate": 1.0594843297078736e-05,
"loss": 0.8864,
"mean_token_accuracy": 0.7420791804790496,
"step": 360
},
{
"epoch": 0.5402405180388529,
"grad_norm": 0.598762830732268,
"learning_rate": 1.0336350917462925e-05,
"loss": 0.8712,
"mean_token_accuracy": 0.7456322193145752,
"step": 365
},
{
"epoch": 0.547641073080481,
"grad_norm": 0.632902389755479,
"learning_rate": 1.0077633305505402e-05,
"loss": 0.8621,
"mean_token_accuracy": 0.7483125552535057,
"step": 370
},
{
"epoch": 0.5550416281221091,
"grad_norm": 0.586405525156002,
"learning_rate": 9.818863707570476e-06,
"loss": 0.8666,
"mean_token_accuracy": 0.7478629052639008,
"step": 375
},
{
"epoch": 0.5624421831637373,
"grad_norm": 0.5997161121850767,
"learning_rate": 9.560215404834094e-06,
"loss": 0.8538,
"mean_token_accuracy": 0.751722939312458,
"step": 380
},
{
"epoch": 0.5698427382053654,
"grad_norm": 0.6621826013411131,
"learning_rate": 9.30186159724869e-06,
"loss": 0.8867,
"mean_token_accuracy": 0.7422450318932533,
"step": 385
},
{
"epoch": 0.5772432932469935,
"grad_norm": 0.6589675723328322,
"learning_rate": 9.043975287562443e-06,
"loss": 0.8731,
"mean_token_accuracy": 0.7454224765300751,
"step": 390
},
{
"epoch": 0.5846438482886216,
"grad_norm": 0.5948286880758474,
"learning_rate": 8.786729165470584e-06,
"loss": 0.8544,
"mean_token_accuracy": 0.7502254620194435,
"step": 395
},
{
"epoch": 0.5920444033302498,
"grad_norm": 0.5945310179240408,
"learning_rate": 8.530295491976338e-06,
"loss": 0.8771,
"mean_token_accuracy": 0.7434320345520973,
"step": 400
},
{
"epoch": 0.5920444033302498,
"eval_loss": 0.8894661068916321,
"eval_mean_token_accuracy": 0.746196382185992,
"eval_runtime": 2.2026,
"eval_samples_per_second": 232.908,
"eval_steps_per_second": 7.718,
"step": 400
},
{
"epoch": 0.599444958371878,
"grad_norm": 0.6628181091437666,
"learning_rate": 8.274845984038916e-06,
"loss": 0.8799,
"mean_token_accuracy": 0.7434773370623589,
"step": 405
},
{
"epoch": 0.606845513413506,
"grad_norm": 0.6519527429640076,
"learning_rate": 8.020551699585843e-06,
"loss": 0.8585,
"mean_token_accuracy": 0.7491447255015373,
"step": 410
},
{
"epoch": 0.6142460684551342,
"grad_norm": 0.6507708055223145,
"learning_rate": 7.76758292296659e-06,
"loss": 0.8571,
"mean_token_accuracy": 0.7494570732116699,
"step": 415
},
{
"epoch": 0.6216466234967623,
"grad_norm": 0.6292315920334376,
"learning_rate": 7.5161090509242005e-06,
"loss": 0.8536,
"mean_token_accuracy": 0.7508259728550911,
"step": 420
},
{
"epoch": 0.6290471785383904,
"grad_norm": 0.5880993475488482,
"learning_rate": 7.2662984791613186e-06,
"loss": 0.8856,
"mean_token_accuracy": 0.7416871294379235,
"step": 425
},
{
"epoch": 0.6364477335800185,
"grad_norm": 0.6308076590780972,
"learning_rate": 7.01831848957653e-06,
"loss": 0.8426,
"mean_token_accuracy": 0.7526247292757035,
"step": 430
},
{
"epoch": 0.6438482886216467,
"grad_norm": 0.6540736305699284,
"learning_rate": 6.772335138246548e-06,
"loss": 0.8636,
"mean_token_accuracy": 0.7478620380163192,
"step": 435
},
{
"epoch": 0.6512488436632747,
"grad_norm": 0.6239612099143872,
"learning_rate": 6.528513144229256e-06,
"loss": 0.8771,
"mean_token_accuracy": 0.7446955293416977,
"step": 440
},
{
"epoch": 0.6586493987049029,
"grad_norm": 0.6364132702245976,
"learning_rate": 6.287015779262064e-06,
"loss": 0.8601,
"mean_token_accuracy": 0.7497312590479851,
"step": 445
},
{
"epoch": 0.666049953746531,
"grad_norm": 0.6309690162153909,
"learning_rate": 6.048004758429451e-06,
"loss": 0.8731,
"mean_token_accuracy": 0.7458893701434135,
"step": 450
},
{
"epoch": 0.6734505087881592,
"grad_norm": 0.6453502197538109,
"learning_rate": 5.811640131872867e-06,
"loss": 0.8718,
"mean_token_accuracy": 0.7456356301903725,
"step": 455
},
{
"epoch": 0.6808510638297872,
"grad_norm": 0.6493054194653789,
"learning_rate": 5.578080177615575e-06,
"loss": 0.8697,
"mean_token_accuracy": 0.7460294872522354,
"step": 460
},
{
"epoch": 0.6882516188714154,
"grad_norm": 0.638506111356214,
"learning_rate": 5.347481295574141e-06,
"loss": 0.8699,
"mean_token_accuracy": 0.7468886405229569,
"step": 465
},
{
"epoch": 0.6956521739130435,
"grad_norm": 0.6700606081583159,
"learning_rate": 5.119997902827584e-06,
"loss": 0.8697,
"mean_token_accuracy": 0.7466486528515816,
"step": 470
},
{
"epoch": 0.7030527289546716,
"grad_norm": 0.6690763391812816,
"learning_rate": 4.8957823302142916e-06,
"loss": 0.854,
"mean_token_accuracy": 0.7497672855854034,
"step": 475
},
{
"epoch": 0.7104532839962997,
"grad_norm": 0.628582793856588,
"learning_rate": 4.674984720325961e-06,
"loss": 0.8584,
"mean_token_accuracy": 0.7492196127772331,
"step": 480
},
{
"epoch": 0.7178538390379279,
"grad_norm": 0.6092116860036849,
"learning_rate": 4.457752926966888e-06,
"loss": 0.8629,
"mean_token_accuracy": 0.7479139417409897,
"step": 485
},
{
"epoch": 0.7252543940795559,
"grad_norm": 0.6223989287313424,
"learning_rate": 4.244232416145839e-06,
"loss": 0.8552,
"mean_token_accuracy": 0.7511809840798378,
"step": 490
},
{
"epoch": 0.7326549491211841,
"grad_norm": 0.6155093095279435,
"learning_rate": 4.0345661686669745e-06,
"loss": 0.8418,
"mean_token_accuracy": 0.7539543464779854,
"step": 495
},
{
"epoch": 0.7400555041628122,
"grad_norm": 0.6100594402401441,
"learning_rate": 3.828894584384867e-06,
"loss": 0.8521,
"mean_token_accuracy": 0.750593489408493,
"step": 500
},
{
"epoch": 0.7400555041628122,
"eval_loss": 0.8816942572593689,
"eval_mean_token_accuracy": 0.7474992415484261,
"eval_runtime": 2.1819,
"eval_samples_per_second": 235.119,
"eval_steps_per_second": 7.791,
"step": 500
},
{
"epoch": 0.7474560592044404,
"grad_norm": 0.5929296663062318,
"learning_rate": 3.62735538818787e-06,
"loss": 0.8433,
"mean_token_accuracy": 0.754003980755806,
"step": 505
},
{
"epoch": 0.7548566142460684,
"grad_norm": 0.6412623171950302,
"learning_rate": 3.4300835377726904e-06,
"loss": 0.859,
"mean_token_accuracy": 0.7497094452381134,
"step": 510
},
{
"epoch": 0.7622571692876966,
"grad_norm": 0.6298988100525497,
"learning_rate": 3.2372111332720045e-06,
"loss": 0.8599,
"mean_token_accuracy": 0.7487298905849457,
"step": 515
},
{
"epoch": 0.7696577243293247,
"grad_norm": 0.5581295883021331,
"learning_rate": 3.048867328795588e-06,
"loss": 0.8459,
"mean_token_accuracy": 0.7519090935587883,
"step": 520
},
{
"epoch": 0.7770582793709528,
"grad_norm": 0.5787863942828044,
"learning_rate": 2.865178245944218e-06,
"loss": 0.8434,
"mean_token_accuracy": 0.7538654118776321,
"step": 525
},
{
"epoch": 0.7844588344125809,
"grad_norm": 0.5950526550908405,
"learning_rate": 2.686266889354211e-06,
"loss": 0.8551,
"mean_token_accuracy": 0.7497801765799522,
"step": 530
},
{
"epoch": 0.7918593894542091,
"grad_norm": 0.6588817106131167,
"learning_rate": 2.5122530643292274e-06,
"loss": 0.8397,
"mean_token_accuracy": 0.7542166978120803,
"step": 535
},
{
"epoch": 0.7992599444958371,
"grad_norm": 0.5690965403775134,
"learning_rate": 2.3432532966144526e-06,
"loss": 0.8587,
"mean_token_accuracy": 0.7487219601869584,
"step": 540
},
{
"epoch": 0.8066604995374653,
"grad_norm": 0.6125531183231568,
"learning_rate": 2.1793807543668857e-06,
"loss": 0.8588,
"mean_token_accuracy": 0.7497648850083352,
"step": 545
},
{
"epoch": 0.8140610545790934,
"grad_norm": 0.5801084214979221,
"learning_rate": 2.0207451723739633e-06,
"loss": 0.8481,
"mean_token_accuracy": 0.7510292425751686,
"step": 550
},
{
"epoch": 0.8214616096207216,
"grad_norm": 0.6020886754691482,
"learning_rate": 1.8674527785713247e-06,
"loss": 0.8545,
"mean_token_accuracy": 0.7498438000679016,
"step": 555
},
{
"epoch": 0.8288621646623496,
"grad_norm": 0.5881539619398165,
"learning_rate": 1.7196062229088606e-06,
"loss": 0.824,
"mean_token_accuracy": 0.7588429227471352,
"step": 560
},
{
"epoch": 0.8362627197039778,
"grad_norm": 0.6540097902487986,
"learning_rate": 1.577304508612717e-06,
"loss": 0.8599,
"mean_token_accuracy": 0.7493684038519859,
"step": 565
},
{
"epoch": 0.8436632747456059,
"grad_norm": 0.5515188037964074,
"learning_rate": 1.4406429258892762e-06,
"loss": 0.8695,
"mean_token_accuracy": 0.7460050553083419,
"step": 570
},
{
"epoch": 0.851063829787234,
"grad_norm": 0.5702259787030879,
"learning_rate": 1.3097129881154936e-06,
"loss": 0.8474,
"mean_token_accuracy": 0.7515697330236435,
"step": 575
},
{
"epoch": 0.8584643848288621,
"grad_norm": 0.569912856831243,
"learning_rate": 1.1846023705583442e-06,
"loss": 0.8397,
"mean_token_accuracy": 0.7538618639111518,
"step": 580
},
{
"epoch": 0.8658649398704903,
"grad_norm": 0.5814194670333124,
"learning_rate": 1.065394851664394e-06,
"loss": 0.8538,
"mean_token_accuracy": 0.7506972655653954,
"step": 585
},
{
"epoch": 0.8732654949121184,
"grad_norm": 0.5710401441163011,
"learning_rate": 9.521702569588199e-07,
"loss": 0.8503,
"mean_token_accuracy": 0.7507877260446548,
"step": 590
},
{
"epoch": 0.8806660499537465,
"grad_norm": 0.6050272603387684,
"learning_rate": 8.450044055914497e-07,
"loss": 0.8355,
"mean_token_accuracy": 0.7554965004324913,
"step": 595
},
{
"epoch": 0.8880666049953746,
"grad_norm": 0.6116095120841144,
"learning_rate": 7.439690595656013e-07,
"loss": 0.8577,
"mean_token_accuracy": 0.7487078487873078,
"step": 600
},
{
"epoch": 0.8880666049953746,
"eval_loss": 0.878446638584137,
"eval_mean_token_accuracy": 0.7483736311688143,
"eval_runtime": 2.2174,
"eval_samples_per_second": 231.351,
"eval_steps_per_second": 7.667,
"step": 600
},
{
"epoch": 0.8954671600370028,
"grad_norm": 0.5844045327647062,
"learning_rate": 6.491318756837417e-07,
"loss": 0.8608,
"mean_token_accuracy": 0.7476343438029289,
"step": 605
},
{
"epoch": 0.902867715078631,
"grad_norm": 0.6561651500395943,
"learning_rate": 5.605563602421149e-07,
"loss": 0.8443,
"mean_token_accuracy": 0.753425945341587,
"step": 610
},
{
"epoch": 0.910268270120259,
"grad_norm": 0.5889315475343176,
"learning_rate": 4.783018265047179e-07,
"loss": 0.8434,
"mean_token_accuracy": 0.7531542599201202,
"step": 615
},
{
"epoch": 0.9176688251618872,
"grad_norm": 0.5879349516306603,
"learning_rate": 4.024233549850509e-07,
"loss": 0.86,
"mean_token_accuracy": 0.747537162899971,
"step": 620
},
{
"epoch": 0.9250693802035153,
"grad_norm": 0.6096516503588968,
"learning_rate": 3.329717565622825e-07,
"loss": 0.8626,
"mean_token_accuracy": 0.7473519369959831,
"step": 625
},
{
"epoch": 0.9324699352451434,
"grad_norm": 0.5631218782489277,
"learning_rate": 2.6999353845651113e-07,
"loss": 0.8498,
"mean_token_accuracy": 0.7512675732374191,
"step": 630
},
{
"epoch": 0.9398704902867715,
"grad_norm": 0.5950538026541023,
"learning_rate": 2.1353087308590314e-07,
"loss": 0.8604,
"mean_token_accuracy": 0.7484870612621307,
"step": 635
},
{
"epoch": 0.9472710453283997,
"grad_norm": 0.6049402888795882,
"learning_rate": 1.6362156982656085e-07,
"loss": 0.8516,
"mean_token_accuracy": 0.7498179882764816,
"step": 640
},
{
"epoch": 0.9546716003700277,
"grad_norm": 0.5911879169034453,
"learning_rate": 1.2029904969404482e-07,
"loss": 0.8578,
"mean_token_accuracy": 0.7486365556716919,
"step": 645
},
{
"epoch": 0.9620721554116559,
"grad_norm": 0.6056466926084687,
"learning_rate": 8.359232296349163e-08,
"loss": 0.8688,
"mean_token_accuracy": 0.7459369316697121,
"step": 650
},
{
"epoch": 0.969472710453284,
"grad_norm": 0.5685047357347602,
"learning_rate": 5.3525969743324356e-08,
"loss": 0.8613,
"mean_token_accuracy": 0.7480048418045044,
"step": 655
},
{
"epoch": 0.9768732654949122,
"grad_norm": 0.6629187068294987,
"learning_rate": 3.012012351554017e-08,
"loss": 0.8536,
"mean_token_accuracy": 0.7507365360856056,
"step": 660
},
{
"epoch": 0.9842738205365402,
"grad_norm": 0.6206452932409162,
"learning_rate": 1.3390457653639221e-08,
"loss": 0.8292,
"mean_token_accuracy": 0.7570610553026199,
"step": 665
},
{
"epoch": 0.9916743755781684,
"grad_norm": 0.5840308030369242,
"learning_rate": 3.3481749271768726e-09,
"loss": 0.8507,
"mean_token_accuracy": 0.7509025186300278,
"step": 670
},
{
"epoch": 0.9990749306197965,
"grad_norm": 0.5806702041577617,
"learning_rate": 0.0,
"loss": 0.8311,
"mean_token_accuracy": 0.7562627077102662,
"step": 675
},
{
"epoch": 0.9990749306197965,
"step": 675,
"total_flos": 76902580617216.0,
"train_loss": 0.8954176528365524,
"train_runtime": 3065.705,
"train_samples_per_second": 28.204,
"train_steps_per_second": 0.22
}
],
"logging_steps": 5,
"max_steps": 675,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 76902580617216.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}