Qwen3-0.6B-Open-R1-Distill / trainer_state.json
Blancy's picture
Model save
38759ce verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 660,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15151515151515152,
"grad_norm": 0.796875,
"learning_rate": 3.6363636363636366e-06,
"loss": 0.1258,
"step": 5
},
{
"epoch": 0.30303030303030304,
"grad_norm": 0.69140625,
"learning_rate": 8.181818181818181e-06,
"loss": 0.1212,
"step": 10
},
{
"epoch": 0.45454545454545453,
"grad_norm": 0.421875,
"learning_rate": 1.2727272727272728e-05,
"loss": 0.1055,
"step": 15
},
{
"epoch": 0.6060606060606061,
"grad_norm": 0.2236328125,
"learning_rate": 1.7272727272727274e-05,
"loss": 0.0963,
"step": 20
},
{
"epoch": 0.7575757575757576,
"grad_norm": 0.2392578125,
"learning_rate": 2.1818181818181818e-05,
"loss": 0.0931,
"step": 25
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.1796875,
"learning_rate": 2.6363636363636365e-05,
"loss": 0.0903,
"step": 30
},
{
"epoch": 1.0606060606060606,
"grad_norm": 0.1552734375,
"learning_rate": 2.9999830539872836e-05,
"loss": 0.0843,
"step": 35
},
{
"epoch": 1.2121212121212122,
"grad_norm": 0.150390625,
"learning_rate": 2.9993899882114902e-05,
"loss": 0.0853,
"step": 40
},
{
"epoch": 1.3636363636363638,
"grad_norm": 0.138671875,
"learning_rate": 2.997950047184977e-05,
"loss": 0.0804,
"step": 45
},
{
"epoch": 1.5151515151515151,
"grad_norm": 0.146484375,
"learning_rate": 2.9956641346126986e-05,
"loss": 0.0809,
"step": 50
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.1396484375,
"learning_rate": 2.9925336851301575e-05,
"loss": 0.0795,
"step": 55
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.1376953125,
"learning_rate": 2.9885606634030267e-05,
"loss": 0.0789,
"step": 60
},
{
"epoch": 1.9696969696969697,
"grad_norm": 0.134765625,
"learning_rate": 2.98374756289413e-05,
"loss": 0.0778,
"step": 65
},
{
"epoch": 2.121212121212121,
"grad_norm": 0.1494140625,
"learning_rate": 2.9780974042985506e-05,
"loss": 0.0761,
"step": 70
},
{
"epoch": 2.2727272727272725,
"grad_norm": 0.2177734375,
"learning_rate": 2.971613733647841e-05,
"loss": 0.0751,
"step": 75
},
{
"epoch": 2.4242424242424243,
"grad_norm": 0.1435546875,
"learning_rate": 2.9643006200845458e-05,
"loss": 0.0756,
"step": 80
},
{
"epoch": 2.5757575757575757,
"grad_norm": 0.1376953125,
"learning_rate": 2.9561626533084068e-05,
"loss": 0.0765,
"step": 85
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.1318359375,
"learning_rate": 2.9472049406958788e-05,
"loss": 0.0746,
"step": 90
},
{
"epoch": 2.878787878787879,
"grad_norm": 0.1455078125,
"learning_rate": 2.937433104094746e-05,
"loss": 0.0757,
"step": 95
},
{
"epoch": 3.0303030303030303,
"grad_norm": 0.1328125,
"learning_rate": 2.9268532762958568e-05,
"loss": 0.0725,
"step": 100
},
{
"epoch": 3.1818181818181817,
"grad_norm": 0.1376953125,
"learning_rate": 2.915472097184196e-05,
"loss": 0.0742,
"step": 105
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.1318359375,
"learning_rate": 2.903296709571698e-05,
"loss": 0.0707,
"step": 110
},
{
"epoch": 3.484848484848485,
"grad_norm": 0.1337890625,
"learning_rate": 2.8903347547144327e-05,
"loss": 0.0734,
"step": 115
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.142578125,
"learning_rate": 2.876594367516961e-05,
"loss": 0.0724,
"step": 120
},
{
"epoch": 3.787878787878788,
"grad_norm": 0.1318359375,
"learning_rate": 2.8620841714268804e-05,
"loss": 0.0725,
"step": 125
},
{
"epoch": 3.9393939393939394,
"grad_norm": 0.1484375,
"learning_rate": 2.846813273022764e-05,
"loss": 0.0714,
"step": 130
},
{
"epoch": 4.090909090909091,
"grad_norm": 0.134765625,
"learning_rate": 2.83079125629888e-05,
"loss": 0.0727,
"step": 135
},
{
"epoch": 4.242424242424242,
"grad_norm": 0.146484375,
"learning_rate": 2.8140281766502957e-05,
"loss": 0.0716,
"step": 140
},
{
"epoch": 4.393939393939394,
"grad_norm": 0.140625,
"learning_rate": 2.7965345545621217e-05,
"loss": 0.072,
"step": 145
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.1455078125,
"learning_rate": 2.7783213690068737e-05,
"loss": 0.0701,
"step": 150
},
{
"epoch": 4.696969696969697,
"grad_norm": 0.1435546875,
"learning_rate": 2.7594000505540807e-05,
"loss": 0.0741,
"step": 155
},
{
"epoch": 4.848484848484849,
"grad_norm": 0.142578125,
"learning_rate": 2.7397824741964805e-05,
"loss": 0.0665,
"step": 160
},
{
"epoch": 5.0,
"grad_norm": 0.189453125,
"learning_rate": 2.7194809518972856e-05,
"loss": 0.0705,
"step": 165
},
{
"epoch": 5.151515151515151,
"grad_norm": 0.1494140625,
"learning_rate": 2.6985082248632174e-05,
"loss": 0.0679,
"step": 170
},
{
"epoch": 5.303030303030303,
"grad_norm": 0.14453125,
"learning_rate": 2.676877455548141e-05,
"loss": 0.0693,
"step": 175
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.1376953125,
"learning_rate": 2.6546022193923274e-05,
"loss": 0.0696,
"step": 180
},
{
"epoch": 5.606060606060606,
"grad_norm": 0.1435546875,
"learning_rate": 2.631696496302526e-05,
"loss": 0.0709,
"step": 185
},
{
"epoch": 5.757575757575758,
"grad_norm": 0.142578125,
"learning_rate": 2.6081746618781953e-05,
"loss": 0.0694,
"step": 190
},
{
"epoch": 5.909090909090909,
"grad_norm": 0.1357421875,
"learning_rate": 2.584051478389399e-05,
"loss": 0.0682,
"step": 195
},
{
"epoch": 6.0606060606060606,
"grad_norm": 0.154296875,
"learning_rate": 2.559342085512022e-05,
"loss": 0.0686,
"step": 200
},
{
"epoch": 6.212121212121212,
"grad_norm": 0.154296875,
"learning_rate": 2.5340619908261352e-05,
"loss": 0.0703,
"step": 205
},
{
"epoch": 6.363636363636363,
"grad_norm": 0.140625,
"learning_rate": 2.508227060083457e-05,
"loss": 0.0647,
"step": 210
},
{
"epoch": 6.515151515151516,
"grad_norm": 0.1376953125,
"learning_rate": 2.4818535072500327e-05,
"loss": 0.064,
"step": 215
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.142578125,
"learning_rate": 2.4549578843303708e-05,
"loss": 0.0676,
"step": 220
},
{
"epoch": 6.818181818181818,
"grad_norm": 0.14453125,
"learning_rate": 2.427557070979427e-05,
"loss": 0.0669,
"step": 225
},
{
"epoch": 6.96969696969697,
"grad_norm": 0.1376953125,
"learning_rate": 2.399668263908961e-05,
"loss": 0.0679,
"step": 230
},
{
"epoch": 7.121212121212121,
"grad_norm": 0.1357421875,
"learning_rate": 2.3713089660948985e-05,
"loss": 0.0666,
"step": 235
},
{
"epoch": 7.2727272727272725,
"grad_norm": 0.1455078125,
"learning_rate": 2.342496975792494e-05,
"loss": 0.066,
"step": 240
},
{
"epoch": 7.424242424242424,
"grad_norm": 0.123046875,
"learning_rate": 2.313250375366167e-05,
"loss": 0.0637,
"step": 245
},
{
"epoch": 7.575757575757576,
"grad_norm": 0.1298828125,
"learning_rate": 2.283587519941036e-05,
"loss": 0.0683,
"step": 250
},
{
"epoch": 7.7272727272727275,
"grad_norm": 0.1435546875,
"learning_rate": 2.253527025883271e-05,
"loss": 0.0642,
"step": 255
},
{
"epoch": 7.878787878787879,
"grad_norm": 0.1533203125,
"learning_rate": 2.2230877591164858e-05,
"loss": 0.0682,
"step": 260
},
{
"epoch": 8.030303030303031,
"grad_norm": 0.1328125,
"learning_rate": 2.192288823281509e-05,
"loss": 0.0628,
"step": 265
},
{
"epoch": 8.181818181818182,
"grad_norm": 0.158203125,
"learning_rate": 2.1611495477469712e-05,
"loss": 0.0635,
"step": 270
},
{
"epoch": 8.333333333333334,
"grad_norm": 0.15234375,
"learning_rate": 2.1296894754782155e-05,
"loss": 0.0679,
"step": 275
},
{
"epoch": 8.484848484848484,
"grad_norm": 0.140625,
"learning_rate": 2.0979283507721653e-05,
"loss": 0.0631,
"step": 280
},
{
"epoch": 8.636363636363637,
"grad_norm": 0.12890625,
"learning_rate": 2.0658861068658254e-05,
"loss": 0.0634,
"step": 285
},
{
"epoch": 8.787878787878787,
"grad_norm": 0.14453125,
"learning_rate": 2.0335828534262148e-05,
"loss": 0.0652,
"step": 290
},
{
"epoch": 8.93939393939394,
"grad_norm": 0.1484375,
"learning_rate": 2.001038863929568e-05,
"loss": 0.067,
"step": 295
},
{
"epoch": 9.090909090909092,
"grad_norm": 0.1572265625,
"learning_rate": 1.9682745629377267e-05,
"loss": 0.0647,
"step": 300
},
{
"epoch": 9.242424242424242,
"grad_norm": 0.1669921875,
"learning_rate": 1.9353105132797175e-05,
"loss": 0.0628,
"step": 305
},
{
"epoch": 9.393939393939394,
"grad_norm": 0.140625,
"learning_rate": 1.902167403146548e-05,
"loss": 0.0625,
"step": 310
},
{
"epoch": 9.545454545454545,
"grad_norm": 0.1728515625,
"learning_rate": 1.8688660331073253e-05,
"loss": 0.0634,
"step": 315
},
{
"epoch": 9.696969696969697,
"grad_norm": 0.1357421875,
"learning_rate": 1.8354273030548512e-05,
"loss": 0.0618,
"step": 320
},
{
"epoch": 9.848484848484848,
"grad_norm": 0.14453125,
"learning_rate": 1.801872199088878e-05,
"loss": 0.0618,
"step": 325
},
{
"epoch": 10.0,
"grad_norm": 0.1630859375,
"learning_rate": 1.7682217803452616e-05,
"loss": 0.0633,
"step": 330
},
{
"epoch": 10.151515151515152,
"grad_norm": 0.1376953125,
"learning_rate": 1.7344971657792768e-05,
"loss": 0.0651,
"step": 335
},
{
"epoch": 10.303030303030303,
"grad_norm": 0.15625,
"learning_rate": 1.7007195209113934e-05,
"loss": 0.0623,
"step": 340
},
{
"epoch": 10.454545454545455,
"grad_norm": 0.1533203125,
"learning_rate": 1.666910044543822e-05,
"loss": 0.0647,
"step": 345
},
{
"epoch": 10.606060606060606,
"grad_norm": 0.12890625,
"learning_rate": 1.6330899554561785e-05,
"loss": 0.0635,
"step": 350
},
{
"epoch": 10.757575757575758,
"grad_norm": 0.1552734375,
"learning_rate": 1.5992804790886075e-05,
"loss": 0.0622,
"step": 355
},
{
"epoch": 10.909090909090908,
"grad_norm": 0.1396484375,
"learning_rate": 1.5655028342207235e-05,
"loss": 0.0646,
"step": 360
},
{
"epoch": 11.06060606060606,
"grad_norm": 0.1533203125,
"learning_rate": 1.5317782196547387e-05,
"loss": 0.0638,
"step": 365
},
{
"epoch": 11.212121212121213,
"grad_norm": 0.146484375,
"learning_rate": 1.4981278009111222e-05,
"loss": 0.0633,
"step": 370
},
{
"epoch": 11.363636363636363,
"grad_norm": 0.1376953125,
"learning_rate": 1.4645726969451489e-05,
"loss": 0.0602,
"step": 375
},
{
"epoch": 11.515151515151516,
"grad_norm": 0.1533203125,
"learning_rate": 1.4311339668926748e-05,
"loss": 0.061,
"step": 380
},
{
"epoch": 11.666666666666666,
"grad_norm": 0.1513671875,
"learning_rate": 1.397832596853452e-05,
"loss": 0.0636,
"step": 385
},
{
"epoch": 11.818181818181818,
"grad_norm": 0.1357421875,
"learning_rate": 1.3646894867202821e-05,
"loss": 0.0605,
"step": 390
},
{
"epoch": 11.969696969696969,
"grad_norm": 0.1435546875,
"learning_rate": 1.3317254370622732e-05,
"loss": 0.0642,
"step": 395
},
{
"epoch": 12.121212121212121,
"grad_norm": 0.1591796875,
"learning_rate": 1.298961136070432e-05,
"loss": 0.0633,
"step": 400
},
{
"epoch": 12.272727272727273,
"grad_norm": 0.1396484375,
"learning_rate": 1.266417146573785e-05,
"loss": 0.0605,
"step": 405
},
{
"epoch": 12.424242424242424,
"grad_norm": 0.146484375,
"learning_rate": 1.2341138931341752e-05,
"loss": 0.0627,
"step": 410
},
{
"epoch": 12.575757575757576,
"grad_norm": 0.16015625,
"learning_rate": 1.2020716492278353e-05,
"loss": 0.0628,
"step": 415
},
{
"epoch": 12.727272727272727,
"grad_norm": 0.1513671875,
"learning_rate": 1.1703105245217848e-05,
"loss": 0.0598,
"step": 420
},
{
"epoch": 12.878787878787879,
"grad_norm": 0.1416015625,
"learning_rate": 1.1388504522530296e-05,
"loss": 0.0611,
"step": 425
},
{
"epoch": 13.030303030303031,
"grad_norm": 0.1435546875,
"learning_rate": 1.1077111767184916e-05,
"loss": 0.0638,
"step": 430
},
{
"epoch": 13.181818181818182,
"grad_norm": 0.1376953125,
"learning_rate": 1.0769122408835148e-05,
"loss": 0.0585,
"step": 435
},
{
"epoch": 13.333333333333334,
"grad_norm": 0.1396484375,
"learning_rate": 1.0464729741167291e-05,
"loss": 0.0635,
"step": 440
},
{
"epoch": 13.484848484848484,
"grad_norm": 0.14453125,
"learning_rate": 1.016412480058964e-05,
"loss": 0.0621,
"step": 445
},
{
"epoch": 13.636363636363637,
"grad_norm": 0.1552734375,
"learning_rate": 9.86749624633833e-06,
"loss": 0.0635,
"step": 450
},
{
"epoch": 13.787878787878787,
"grad_norm": 0.1474609375,
"learning_rate": 9.575030242075062e-06,
"loss": 0.0597,
"step": 455
},
{
"epoch": 13.93939393939394,
"grad_norm": 0.1484375,
"learning_rate": 9.286910339051015e-06,
"loss": 0.0659,
"step": 460
},
{
"epoch": 14.090909090909092,
"grad_norm": 0.140625,
"learning_rate": 9.003317360910392e-06,
"loss": 0.0618,
"step": 465
},
{
"epoch": 14.242424242424242,
"grad_norm": 0.14453125,
"learning_rate": 8.724429290205732e-06,
"loss": 0.0612,
"step": 470
},
{
"epoch": 14.393939393939394,
"grad_norm": 0.142578125,
"learning_rate": 8.450421156696298e-06,
"loss": 0.0615,
"step": 475
},
{
"epoch": 14.545454545454545,
"grad_norm": 0.1357421875,
"learning_rate": 8.181464927499674e-06,
"loss": 0.0591,
"step": 480
},
{
"epoch": 14.696969696969697,
"grad_norm": 0.15234375,
"learning_rate": 7.917729399165435e-06,
"loss": 0.0606,
"step": 485
},
{
"epoch": 14.848484848484848,
"grad_norm": 0.1416015625,
"learning_rate": 7.659380091738652e-06,
"loss": 0.0592,
"step": 490
},
{
"epoch": 15.0,
"grad_norm": 0.1865234375,
"learning_rate": 7.406579144879779e-06,
"loss": 0.0601,
"step": 495
},
{
"epoch": 15.151515151515152,
"grad_norm": 0.1455078125,
"learning_rate": 7.159485216106013e-06,
"loss": 0.0616,
"step": 500
},
{
"epoch": 15.303030303030303,
"grad_norm": 0.146484375,
"learning_rate": 6.918253381218046e-06,
"loss": 0.0583,
"step": 505
},
{
"epoch": 15.454545454545455,
"grad_norm": 0.15234375,
"learning_rate": 6.683035036974742e-06,
"loss": 0.0613,
"step": 510
},
{
"epoch": 15.606060606060606,
"grad_norm": 0.1484375,
"learning_rate": 6.45397780607673e-06,
"loss": 0.0572,
"step": 515
},
{
"epoch": 15.757575757575758,
"grad_norm": 0.1884765625,
"learning_rate": 6.23122544451859e-06,
"loss": 0.0616,
"step": 520
},
{
"epoch": 15.909090909090908,
"grad_norm": 0.15234375,
"learning_rate": 6.014917751367825e-06,
"loss": 0.0601,
"step": 525
},
{
"epoch": 16.060606060606062,
"grad_norm": 0.154296875,
"learning_rate": 5.80519048102715e-06,
"loss": 0.0597,
"step": 530
},
{
"epoch": 16.21212121212121,
"grad_norm": 0.140625,
"learning_rate": 5.602175258035204e-06,
"loss": 0.0581,
"step": 535
},
{
"epoch": 16.363636363636363,
"grad_norm": 0.1611328125,
"learning_rate": 5.4059994944591914e-06,
"loss": 0.0617,
"step": 540
},
{
"epoch": 16.515151515151516,
"grad_norm": 0.1416015625,
"learning_rate": 5.2167863099312636e-06,
"loss": 0.0587,
"step": 545
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.14453125,
"learning_rate": 5.034654454378783e-06,
"loss": 0.0599,
"step": 550
},
{
"epoch": 16.818181818181817,
"grad_norm": 0.1533203125,
"learning_rate": 4.859718233497048e-06,
"loss": 0.0624,
"step": 555
},
{
"epoch": 16.96969696969697,
"grad_norm": 0.1533203125,
"learning_rate": 4.692087437011203e-06,
"loss": 0.0589,
"step": 560
},
{
"epoch": 17.12121212121212,
"grad_norm": 0.1533203125,
"learning_rate": 4.5318672697723665e-06,
"loss": 0.0624,
"step": 565
},
{
"epoch": 17.272727272727273,
"grad_norm": 0.1494140625,
"learning_rate": 4.3791582857311975e-06,
"loss": 0.0603,
"step": 570
},
{
"epoch": 17.424242424242426,
"grad_norm": 0.142578125,
"learning_rate": 4.2340563248303915e-06,
"loss": 0.0621,
"step": 575
},
{
"epoch": 17.575757575757574,
"grad_norm": 0.1494140625,
"learning_rate": 4.096652452855675e-06,
"loss": 0.0608,
"step": 580
},
{
"epoch": 17.727272727272727,
"grad_norm": 0.1435546875,
"learning_rate": 3.967032904283021e-06,
"loss": 0.06,
"step": 585
},
{
"epoch": 17.87878787878788,
"grad_norm": 0.1484375,
"learning_rate": 3.8452790281580445e-06,
"loss": 0.0605,
"step": 590
},
{
"epoch": 18.03030303030303,
"grad_norm": 0.154296875,
"learning_rate": 3.731467237041433e-06,
"loss": 0.0601,
"step": 595
},
{
"epoch": 18.181818181818183,
"grad_norm": 0.1513671875,
"learning_rate": 3.6256689590525444e-06,
"loss": 0.0628,
"step": 600
},
{
"epoch": 18.333333333333332,
"grad_norm": 0.1396484375,
"learning_rate": 3.5279505930412164e-06,
"loss": 0.062,
"step": 605
},
{
"epoch": 18.484848484848484,
"grad_norm": 0.1591796875,
"learning_rate": 3.4383734669159366e-06,
"loss": 0.0618,
"step": 610
},
{
"epoch": 18.636363636363637,
"grad_norm": 0.1591796875,
"learning_rate": 3.356993799154545e-06,
"loss": 0.059,
"step": 615
},
{
"epoch": 18.78787878787879,
"grad_norm": 0.146484375,
"learning_rate": 3.2838626635215874e-06,
"loss": 0.0595,
"step": 620
},
{
"epoch": 18.939393939393938,
"grad_norm": 0.1611328125,
"learning_rate": 3.2190259570144957e-06,
"loss": 0.0629,
"step": 625
},
{
"epoch": 19.09090909090909,
"grad_norm": 0.150390625,
"learning_rate": 3.162524371058697e-06,
"loss": 0.0612,
"step": 630
},
{
"epoch": 19.242424242424242,
"grad_norm": 0.1337890625,
"learning_rate": 3.1143933659697377e-06,
"loss": 0.0583,
"step": 635
},
{
"epoch": 19.393939393939394,
"grad_norm": 0.1474609375,
"learning_rate": 3.0746631486984266e-06,
"loss": 0.0626,
"step": 640
},
{
"epoch": 19.545454545454547,
"grad_norm": 0.15234375,
"learning_rate": 3.043358653873013e-06,
"loss": 0.0589,
"step": 645
},
{
"epoch": 19.696969696969695,
"grad_norm": 0.1572265625,
"learning_rate": 3.020499528150232e-06,
"loss": 0.0586,
"step": 650
},
{
"epoch": 19.848484848484848,
"grad_norm": 0.15625,
"learning_rate": 3.006100117885101e-06,
"loss": 0.0591,
"step": 655
},
{
"epoch": 20.0,
"grad_norm": 0.2119140625,
"learning_rate": 3.000169460127164e-06,
"loss": 0.0613,
"step": 660
},
{
"epoch": 20.0,
"step": 660,
"total_flos": 3.880913653947433e+18,
"train_loss": 0.06725140679063218,
"train_runtime": 3002.848,
"train_samples_per_second": 27.794,
"train_steps_per_second": 0.22
}
],
"logging_steps": 5,
"max_steps": 660,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.880913653947433e+18,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}