Qwen2.5-7B-R1-SFT / trainer_state.json
Mingsmilet's picture
Model save
4564215 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9985761746559089,
"eval_steps": 100,
"global_step": 526,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009492168960607499,
"grad_norm": 6.382084533927167,
"learning_rate": 2.830188679245283e-06,
"loss": 1.5552,
"mean_token_accuracy": 0.6334456590390899,
"step": 5
},
{
"epoch": 0.018984337921214997,
"grad_norm": 2.4038523283330995,
"learning_rate": 5.660377358490566e-06,
"loss": 1.3914,
"mean_token_accuracy": 0.6519559993952877,
"step": 10
},
{
"epoch": 0.028476506881822496,
"grad_norm": 1.3354890710706353,
"learning_rate": 8.49056603773585e-06,
"loss": 1.3131,
"mean_token_accuracy": 0.6610869096637859,
"step": 15
},
{
"epoch": 0.037968675842429994,
"grad_norm": 1.1017917039570355,
"learning_rate": 1.1320754716981132e-05,
"loss": 1.2063,
"mean_token_accuracy": 0.6809012376554828,
"step": 20
},
{
"epoch": 0.04746084480303749,
"grad_norm": 0.9879503320797501,
"learning_rate": 1.4150943396226415e-05,
"loss": 1.1486,
"mean_token_accuracy": 0.6918192724443217,
"step": 25
},
{
"epoch": 0.05695301376364499,
"grad_norm": 0.8159335394559786,
"learning_rate": 1.69811320754717e-05,
"loss": 1.1604,
"mean_token_accuracy": 0.6889519967175546,
"step": 30
},
{
"epoch": 0.0664451827242525,
"grad_norm": 0.7351159553665958,
"learning_rate": 1.981132075471698e-05,
"loss": 1.1136,
"mean_token_accuracy": 0.6980297149741597,
"step": 35
},
{
"epoch": 0.07593735168485999,
"grad_norm": 0.8029098841687364,
"learning_rate": 2.2641509433962265e-05,
"loss": 1.1006,
"mean_token_accuracy": 0.7000043453319321,
"step": 40
},
{
"epoch": 0.0854295206454675,
"grad_norm": 0.7707051213318692,
"learning_rate": 2.547169811320755e-05,
"loss": 1.0935,
"mean_token_accuracy": 0.7024640797960592,
"step": 45
},
{
"epoch": 0.09492168960607499,
"grad_norm": 0.7829819546300452,
"learning_rate": 2.830188679245283e-05,
"loss": 1.0914,
"mean_token_accuracy": 0.701772119443971,
"step": 50
},
{
"epoch": 0.10441385856668249,
"grad_norm": 0.8037742734156652,
"learning_rate": 2.999867659633677e-05,
"loss": 1.0821,
"mean_token_accuracy": 0.7028966547668545,
"step": 55
},
{
"epoch": 0.11390602752728998,
"grad_norm": 0.719419938154323,
"learning_rate": 2.998379098682235e-05,
"loss": 1.0639,
"mean_token_accuracy": 0.7082780105929181,
"step": 60
},
{
"epoch": 0.12339819648789749,
"grad_norm": 0.7279019741711135,
"learning_rate": 2.9952381983070697e-05,
"loss": 1.061,
"mean_token_accuracy": 0.7086445803281343,
"step": 65
},
{
"epoch": 0.132890365448505,
"grad_norm": 0.9168076324115938,
"learning_rate": 2.990448422139995e-05,
"loss": 1.0706,
"mean_token_accuracy": 0.706039712175089,
"step": 70
},
{
"epoch": 0.14238253440911247,
"grad_norm": 0.6318736197635204,
"learning_rate": 2.9840150521126656e-05,
"loss": 1.0556,
"mean_token_accuracy": 0.7090440166084926,
"step": 75
},
{
"epoch": 0.15187470336971998,
"grad_norm": 0.7517171577246983,
"learning_rate": 2.9759451826319188e-05,
"loss": 1.0518,
"mean_token_accuracy": 0.709636223789211,
"step": 80
},
{
"epoch": 0.16136687233032748,
"grad_norm": 0.8194879778362485,
"learning_rate": 2.9662477127564114e-05,
"loss": 1.061,
"mean_token_accuracy": 0.7081536390053055,
"step": 85
},
{
"epoch": 0.170859041290935,
"grad_norm": 0.7099500456353331,
"learning_rate": 2.954933336383167e-05,
"loss": 1.0504,
"mean_token_accuracy": 0.7099738392414334,
"step": 90
},
{
"epoch": 0.18035121025154247,
"grad_norm": 0.885116945350152,
"learning_rate": 2.9420145304548727e-05,
"loss": 1.0618,
"mean_token_accuracy": 0.7073557843749686,
"step": 95
},
{
"epoch": 0.18984337921214997,
"grad_norm": 0.7052704259862534,
"learning_rate": 2.927505541200911e-05,
"loss": 1.0447,
"mean_token_accuracy": 0.7107585697424312,
"step": 100
},
{
"epoch": 0.18984337921214997,
"eval_loss": 1.048028588294983,
"eval_mean_token_accuracy": 0.7104562116108591,
"eval_runtime": 188.174,
"eval_samples_per_second": 44.958,
"eval_steps_per_second": 2.811,
"step": 100
},
{
"epoch": 0.19933554817275748,
"grad_norm": 0.6662794449059302,
"learning_rate": 2.9114223684273157e-05,
"loss": 1.031,
"mean_token_accuracy": 0.714224491813318,
"step": 105
},
{
"epoch": 0.20882771713336498,
"grad_norm": 0.723871912393848,
"learning_rate": 2.893782747872962e-05,
"loss": 1.0641,
"mean_token_accuracy": 0.7066465015390666,
"step": 110
},
{
"epoch": 0.21831988609397246,
"grad_norm": 0.6837404594766777,
"learning_rate": 2.8746061316514636e-05,
"loss": 1.0325,
"mean_token_accuracy": 0.7134234564487059,
"step": 115
},
{
"epoch": 0.22781205505457996,
"grad_norm": 0.7302473146293046,
"learning_rate": 2.853913666800324e-05,
"loss": 1.0413,
"mean_token_accuracy": 0.711697892158014,
"step": 120
},
{
"epoch": 0.23730422401518747,
"grad_norm": 0.6282864756210261,
"learning_rate": 2.8317281719610195e-05,
"loss": 1.0232,
"mean_token_accuracy": 0.7161906910477807,
"step": 125
},
{
"epoch": 0.24679639297579498,
"grad_norm": 0.7186936614849424,
"learning_rate": 2.8080741122157116e-05,
"loss": 1.0395,
"mean_token_accuracy": 0.7129813734835235,
"step": 130
},
{
"epoch": 0.2562885619364025,
"grad_norm": 0.6699630796093422,
"learning_rate": 2.7829775721083462e-05,
"loss": 1.0334,
"mean_token_accuracy": 0.713805177916302,
"step": 135
},
{
"epoch": 0.26578073089701,
"grad_norm": 0.6671508294114585,
"learning_rate": 2.7564662268798923e-05,
"loss": 1.0398,
"mean_token_accuracy": 0.711473895741226,
"step": 140
},
{
"epoch": 0.2752728998576175,
"grad_norm": 0.7056353558790489,
"learning_rate": 2.7285693119494342e-05,
"loss": 1.0388,
"mean_token_accuracy": 0.7125332932510399,
"step": 145
},
{
"epoch": 0.28476506881822494,
"grad_norm": 0.713539935014783,
"learning_rate": 2.6993175906747766e-05,
"loss": 1.0425,
"mean_token_accuracy": 0.7113191565844058,
"step": 150
},
{
"epoch": 0.29425723777883245,
"grad_norm": 0.6506095739220058,
"learning_rate": 2.6687433204281133e-05,
"loss": 1.0431,
"mean_token_accuracy": 0.7111925093298841,
"step": 155
},
{
"epoch": 0.30374940673943995,
"grad_norm": 0.6401875745839918,
"learning_rate": 2.63688021702417e-05,
"loss": 1.0282,
"mean_token_accuracy": 0.7144017808774497,
"step": 160
},
{
"epoch": 0.31324157570004746,
"grad_norm": 0.7140579977602919,
"learning_rate": 2.603763417540048e-05,
"loss": 1.0459,
"mean_token_accuracy": 0.7098399140003829,
"step": 165
},
{
"epoch": 0.32273374466065496,
"grad_norm": 0.6253709705815874,
"learning_rate": 2.569429441567766e-05,
"loss": 1.0231,
"mean_token_accuracy": 0.7159014738186652,
"step": 170
},
{
"epoch": 0.33222591362126247,
"grad_norm": 0.6324214284509637,
"learning_rate": 2.5339161509422392e-05,
"loss": 1.0197,
"mean_token_accuracy": 0.7164093742051707,
"step": 175
},
{
"epoch": 0.34171808258187,
"grad_norm": 0.6678987876289522,
"learning_rate": 2.4972627079890876e-05,
"loss": 0.9976,
"mean_token_accuracy": 0.7215757004921317,
"step": 180
},
{
"epoch": 0.3512102515424775,
"grad_norm": 0.7567846587011824,
"learning_rate": 2.459509532338337e-05,
"loss": 1.0362,
"mean_token_accuracy": 0.7126484349370313,
"step": 185
},
{
"epoch": 0.36070242050308493,
"grad_norm": 0.6319910789700267,
"learning_rate": 2.4206982563516198e-05,
"loss": 1.0085,
"mean_token_accuracy": 0.7193452892998586,
"step": 190
},
{
"epoch": 0.37019458946369244,
"grad_norm": 0.592121827760772,
"learning_rate": 2.3808716792120365e-05,
"loss": 0.9995,
"mean_token_accuracy": 0.7203226780525329,
"step": 195
},
{
"epoch": 0.37968675842429994,
"grad_norm": 0.6491619559468075,
"learning_rate": 2.3400737197273017e-05,
"loss": 1.0068,
"mean_token_accuracy": 0.7198690144033525,
"step": 200
},
{
"epoch": 0.37968675842429994,
"eval_loss": 1.016072154045105,
"eval_mean_token_accuracy": 0.7169056956031188,
"eval_runtime": 188.2109,
"eval_samples_per_second": 44.95,
"eval_steps_per_second": 2.811,
"step": 200
},
{
"epoch": 0.38917892738490745,
"grad_norm": 0.5982062591050502,
"learning_rate": 2.2983493678982263e-05,
"loss": 1.0178,
"mean_token_accuracy": 0.7163814283777625,
"step": 205
},
{
"epoch": 0.39867109634551495,
"grad_norm": 0.6500756539810862,
"learning_rate": 2.2557446353059363e-05,
"loss": 1.0122,
"mean_token_accuracy": 0.7169991323301304,
"step": 210
},
{
"epoch": 0.40816326530612246,
"grad_norm": 0.67216281461341,
"learning_rate": 2.2123065043725443e-05,
"loss": 1.0114,
"mean_token_accuracy": 0.7178278442059701,
"step": 215
},
{
"epoch": 0.41765543426672996,
"grad_norm": 0.6091548648271161,
"learning_rate": 2.1680828765512254e-05,
"loss": 0.9911,
"mean_token_accuracy": 0.7221596642604371,
"step": 220
},
{
"epoch": 0.42714760322733747,
"grad_norm": 0.6300256337486431,
"learning_rate": 2.12312251950283e-05,
"loss": 1.0116,
"mean_token_accuracy": 0.7184098011056725,
"step": 225
},
{
"epoch": 0.4366397721879449,
"grad_norm": 0.6047772843320056,
"learning_rate": 2.077475013317283e-05,
"loss": 1.009,
"mean_token_accuracy": 0.7181628696364624,
"step": 230
},
{
"epoch": 0.4461319411485524,
"grad_norm": 0.633329142422912,
"learning_rate": 2.0311906958390815e-05,
"loss": 1.014,
"mean_token_accuracy": 0.7181358407727384,
"step": 235
},
{
"epoch": 0.45562411010915993,
"grad_norm": 0.5816601957080884,
"learning_rate": 1.9843206071571692e-05,
"loss": 1.0059,
"mean_token_accuracy": 0.7193718957578646,
"step": 240
},
{
"epoch": 0.46511627906976744,
"grad_norm": 0.5698758427376447,
"learning_rate": 1.936916433320418e-05,
"loss": 0.9902,
"mean_token_accuracy": 0.7222839979970095,
"step": 245
},
{
"epoch": 0.47460844803037494,
"grad_norm": 0.6194684314147714,
"learning_rate": 1.8890304493407705e-05,
"loss": 0.9956,
"mean_token_accuracy": 0.7217816370116409,
"step": 250
},
{
"epoch": 0.48410061699098245,
"grad_norm": 0.5900456478425009,
"learning_rate": 1.840715461546909e-05,
"loss": 0.9852,
"mean_token_accuracy": 0.7235825698249634,
"step": 255
},
{
"epoch": 0.49359278595158995,
"grad_norm": 0.5996405920369893,
"learning_rate": 1.792024749352005e-05,
"loss": 0.9802,
"mean_token_accuracy": 0.7251773628246897,
"step": 260
},
{
"epoch": 0.5030849549121974,
"grad_norm": 0.627099426962684,
"learning_rate": 1.7430120064997848e-05,
"loss": 0.9867,
"mean_token_accuracy": 0.7237486444295606,
"step": 265
},
{
"epoch": 0.512577123872805,
"grad_norm": 0.595354280461353,
"learning_rate": 1.6937312818536852e-05,
"loss": 0.9878,
"mean_token_accuracy": 0.7229575029055708,
"step": 270
},
{
"epoch": 0.5220692928334124,
"grad_norm": 0.7034272599240323,
"learning_rate": 1.644236919794398e-05,
"loss": 1.0075,
"mean_token_accuracy": 0.7184976576983118,
"step": 275
},
{
"epoch": 0.53156146179402,
"grad_norm": 0.6023946372615683,
"learning_rate": 1.5945835002915428e-05,
"loss": 0.988,
"mean_token_accuracy": 0.723351409706495,
"step": 280
},
{
"epoch": 0.5410536307546274,
"grad_norm": 0.5879635470205725,
"learning_rate": 1.5448257787155316e-05,
"loss": 1.0071,
"mean_token_accuracy": 0.7196090507353585,
"step": 285
},
{
"epoch": 0.550545799715235,
"grad_norm": 0.5510790369891281,
"learning_rate": 1.4950186254560195e-05,
"loss": 0.9504,
"mean_token_accuracy": 0.7309219942793238,
"step": 290
},
{
"epoch": 0.5600379686758424,
"grad_norm": 0.5479437822508071,
"learning_rate": 1.4452169654135116e-05,
"loss": 0.991,
"mean_token_accuracy": 0.7227064638424688,
"step": 295
},
{
"epoch": 0.5695301376364499,
"grad_norm": 0.538661848645154,
"learning_rate": 1.3954757174308611e-05,
"loss": 0.9982,
"mean_token_accuracy": 0.7204644706262047,
"step": 300
},
{
"epoch": 0.5695301376364499,
"eval_loss": 0.9927210211753845,
"eval_mean_token_accuracy": 0.7218378547164851,
"eval_runtime": 188.261,
"eval_samples_per_second": 44.938,
"eval_steps_per_second": 2.81,
"step": 300
},
{
"epoch": 0.5790223065970574,
"grad_norm": 0.5351158398440641,
"learning_rate": 1.3458497337314464e-05,
"loss": 0.9872,
"mean_token_accuracy": 0.7229412089112974,
"step": 305
},
{
"epoch": 0.5885144755576649,
"grad_norm": 0.551234414499353,
"learning_rate": 1.2963937394308139e-05,
"loss": 0.9844,
"mean_token_accuracy": 0.7240711314491671,
"step": 310
},
{
"epoch": 0.5980066445182725,
"grad_norm": 0.5663879687131853,
"learning_rate": 1.247162272188488e-05,
"loss": 1.0137,
"mean_token_accuracy": 0.7177027016634137,
"step": 315
},
{
"epoch": 0.6074988134788799,
"grad_norm": 0.5353212240764764,
"learning_rate": 1.1982096220665e-05,
"loss": 1.004,
"mean_token_accuracy": 0.7197887845300913,
"step": 320
},
{
"epoch": 0.6169909824394875,
"grad_norm": 0.583630600668319,
"learning_rate": 1.1495897716609504e-05,
"loss": 0.9873,
"mean_token_accuracy": 0.722715737983074,
"step": 325
},
{
"epoch": 0.6264831514000949,
"grad_norm": 0.5627383649758397,
"learning_rate": 1.101356336572639e-05,
"loss": 0.9758,
"mean_token_accuracy": 0.7256538668748826,
"step": 330
},
{
"epoch": 0.6359753203607024,
"grad_norm": 0.5910628365424991,
"learning_rate": 1.0535625062823913e-05,
"loss": 0.9814,
"mean_token_accuracy": 0.7250735323254608,
"step": 335
},
{
"epoch": 0.6454674893213099,
"grad_norm": 0.5676509472619453,
"learning_rate": 1.006260985496291e-05,
"loss": 0.9679,
"mean_token_accuracy": 0.7274865177290415,
"step": 340
},
{
"epoch": 0.6549596582819174,
"grad_norm": 0.5417197121930121,
"learning_rate": 9.595039360255027e-06,
"loss": 0.9891,
"mean_token_accuracy": 0.721159575635722,
"step": 345
},
{
"epoch": 0.6644518272425249,
"grad_norm": 0.5418465706657506,
"learning_rate": 9.133429192647661e-06,
"loss": 0.9877,
"mean_token_accuracy": 0.723318498917802,
"step": 350
},
{
"epoch": 0.6739439962031324,
"grad_norm": 0.5358459281845342,
"learning_rate": 8.678288393330054e-06,
"loss": 0.9929,
"mean_token_accuracy": 0.7210306617807193,
"step": 355
},
{
"epoch": 0.68343616516374,
"grad_norm": 0.5227319454091675,
"learning_rate": 8.230118869387477e-06,
"loss": 0.9795,
"mean_token_accuracy": 0.7254460407787091,
"step": 360
},
{
"epoch": 0.6929283341243474,
"grad_norm": 0.5322267392288975,
"learning_rate": 7.789414840322585e-06,
"loss": 0.9847,
"mean_token_accuracy": 0.7235376583740499,
"step": 365
},
{
"epoch": 0.702420503084955,
"grad_norm": 0.5223012851619547,
"learning_rate": 7.356662293054208e-06,
"loss": 0.966,
"mean_token_accuracy": 0.7278847554342741,
"step": 370
},
{
"epoch": 0.7119126720455624,
"grad_norm": 0.5155576369666807,
"learning_rate": 6.932338445994688e-06,
"loss": 0.9692,
"mean_token_accuracy": 0.726704847484917,
"step": 375
},
{
"epoch": 0.7214048410061699,
"grad_norm": 0.5036185424846276,
"learning_rate": 6.5169112227966985e-06,
"loss": 0.9865,
"mean_token_accuracy": 0.7224871125460137,
"step": 380
},
{
"epoch": 0.7308970099667774,
"grad_norm": 0.5257978275446427,
"learning_rate": 6.1108387363497966e-06,
"loss": 0.9796,
"mean_token_accuracy": 0.723099248470522,
"step": 385
},
{
"epoch": 0.7403891789273849,
"grad_norm": 0.5418564336337571,
"learning_rate": 5.7145687835959294e-06,
"loss": 0.9684,
"mean_token_accuracy": 0.7269886124626503,
"step": 390
},
{
"epoch": 0.7498813478879924,
"grad_norm": 0.5231604557981293,
"learning_rate": 5.328538351720738e-06,
"loss": 0.9889,
"mean_token_accuracy": 0.7233975946116854,
"step": 395
},
{
"epoch": 0.7593735168485999,
"grad_norm": 0.5208898465104785,
"learning_rate": 4.95317313626544e-06,
"loss": 0.9832,
"mean_token_accuracy": 0.7237090601111318,
"step": 400
},
{
"epoch": 0.7593735168485999,
"eval_loss": 0.9769607186317444,
"eval_mean_token_accuracy": 0.725058498177693,
"eval_runtime": 188.3893,
"eval_samples_per_second": 44.907,
"eval_steps_per_second": 2.808,
"step": 400
},
{
"epoch": 0.7688656858092074,
"grad_norm": 0.5040660800094122,
"learning_rate": 4.588887071690491e-06,
"loss": 0.974,
"mean_token_accuracy": 0.7257032089059805,
"step": 405
},
{
"epoch": 0.7783578547698149,
"grad_norm": 0.5161820626985851,
"learning_rate": 4.236081874908894e-06,
"loss": 0.9721,
"mean_token_accuracy": 0.7266561050966363,
"step": 410
},
{
"epoch": 0.7878500237304225,
"grad_norm": 0.5063329607764222,
"learning_rate": 3.895146602292322e-06,
"loss": 0.9915,
"mean_token_accuracy": 0.721969042013088,
"step": 415
},
{
"epoch": 0.7973421926910299,
"grad_norm": 0.5281346072920926,
"learning_rate": 3.5664572206387453e-06,
"loss": 0.9705,
"mean_token_accuracy": 0.7265926288123876,
"step": 420
},
{
"epoch": 0.8068343616516374,
"grad_norm": 0.5250034855647077,
"learning_rate": 3.250376192574585e-06,
"loss": 0.9578,
"mean_token_accuracy": 0.7290626104519637,
"step": 425
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.49831409290381057,
"learning_rate": 2.94725207684856e-06,
"loss": 0.975,
"mean_token_accuracy": 0.7260714077768973,
"step": 430
},
{
"epoch": 0.8258186995728524,
"grad_norm": 0.5008570434545507,
"learning_rate": 2.657419143958137e-06,
"loss": 0.9785,
"mean_token_accuracy": 0.7245489166600709,
"step": 435
},
{
"epoch": 0.8353108685334599,
"grad_norm": 0.499241785256133,
"learning_rate": 2.38119700753228e-06,
"loss": 0.9578,
"mean_token_accuracy": 0.7287562205862572,
"step": 440
},
{
"epoch": 0.8448030374940674,
"grad_norm": 0.4969273882911778,
"learning_rate": 2.1188902718771654e-06,
"loss": 0.9723,
"mean_token_accuracy": 0.7251241120989145,
"step": 445
},
{
"epoch": 0.8542952064546749,
"grad_norm": 0.5124547544001624,
"learning_rate": 1.870788196073348e-06,
"loss": 0.9686,
"mean_token_accuracy": 0.7279498396740951,
"step": 450
},
{
"epoch": 0.8637873754152824,
"grad_norm": 0.4902608341477827,
"learning_rate": 1.6371643749950034e-06,
"loss": 0.9526,
"mean_token_accuracy": 0.7303064836501066,
"step": 455
},
{
"epoch": 0.8732795443758898,
"grad_norm": 0.4890903954488522,
"learning_rate": 1.4182764376028007e-06,
"loss": 0.9655,
"mean_token_accuracy": 0.7276476069135862,
"step": 460
},
{
"epoch": 0.8827717133364974,
"grad_norm": 0.49995400150577757,
"learning_rate": 1.2143657628432675e-06,
"loss": 0.9553,
"mean_token_accuracy": 0.7299626887589745,
"step": 465
},
{
"epoch": 0.8922638822971048,
"grad_norm": 0.5001903393918901,
"learning_rate": 1.025657213467836e-06,
"loss": 0.9706,
"mean_token_accuracy": 0.7268998952970227,
"step": 470
},
{
"epoch": 0.9017560512577124,
"grad_norm": 0.5023213213383173,
"learning_rate": 8.523588880651739e-07,
"loss": 0.9762,
"mean_token_accuracy": 0.7243987187278682,
"step": 475
},
{
"epoch": 0.9112482202183199,
"grad_norm": 0.48664338068261653,
"learning_rate": 6.946618915802083e-07,
"loss": 0.9648,
"mean_token_accuracy": 0.7273781984213099,
"step": 480
},
{
"epoch": 0.9207403891789274,
"grad_norm": 0.4724859910475711,
"learning_rate": 5.527401245728963e-07,
"loss": 0.9495,
"mean_token_accuracy": 0.7314832008821588,
"step": 485
},
{
"epoch": 0.9302325581395349,
"grad_norm": 0.489734009625606,
"learning_rate": 4.267500914491812e-07,
"loss": 0.9767,
"mean_token_accuracy": 0.7242647393634309,
"step": 490
},
{
"epoch": 0.9397247271001424,
"grad_norm": 0.4841609096900709,
"learning_rate": 3.1683072787554614e-07,
"loss": 0.9772,
"mean_token_accuracy": 0.7250692813747994,
"step": 495
},
{
"epoch": 0.9492168960607499,
"grad_norm": 0.47079883155833574,
"learning_rate": 2.2310324756755096e-07,
"loss": 0.9505,
"mean_token_accuracy": 0.7317554978214195,
"step": 500
},
{
"epoch": 0.9492168960607499,
"eval_loss": 0.9712523818016052,
"eval_mean_token_accuracy": 0.7263223549450385,
"eval_runtime": 188.5521,
"eval_samples_per_second": 44.868,
"eval_steps_per_second": 2.806,
"step": 500
},
{
"epoch": 0.9587090650213573,
"grad_norm": 0.4862636180575128,
"learning_rate": 1.4567100862124261e-07,
"loss": 0.9552,
"mean_token_accuracy": 0.7289664229991631,
"step": 505
},
{
"epoch": 0.9682012339819649,
"grad_norm": 0.49399931350845777,
"learning_rate": 8.461939953489484e-08,
"loss": 0.9881,
"mean_token_accuracy": 0.7226353458405045,
"step": 510
},
{
"epoch": 0.9776934029425723,
"grad_norm": 0.510950230610024,
"learning_rate": 4.0015745046725336e-08,
"loss": 0.967,
"mean_token_accuracy": 0.7275112007297125,
"step": 515
},
{
"epoch": 0.9871855719031799,
"grad_norm": 0.48706184867233865,
"learning_rate": 1.1909231892468508e-08,
"loss": 0.9641,
"mean_token_accuracy": 0.7276599577681944,
"step": 520
},
{
"epoch": 0.9966777408637874,
"grad_norm": 0.48013249759710047,
"learning_rate": 3.3085456463188836e-10,
"loss": 0.9715,
"mean_token_accuracy": 0.7266395059769233,
"step": 525
},
{
"epoch": 0.9985761746559089,
"mean_token_accuracy": 0.7196164559476252,
"step": 526,
"total_flos": 275626774757376.0,
"train_loss": 1.0209425016954372,
"train_runtime": 3648.2915,
"train_samples_per_second": 9.24,
"train_steps_per_second": 0.144
}
],
"logging_steps": 5,
"max_steps": 526,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 275626774757376.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}