Qwen-code-7B-SFT-cot / trainer_state.json
zhuangxialie
Model save
3907390 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.9665071770334928,
"eval_steps": 500,
"global_step": 416,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04784688995215311,
"grad_norm": 1.6178438569586449,
"learning_rate": 5.9523809523809525e-06,
"loss": 0.5431,
"mean_token_accuracy": 0.8649710834026336,
"step": 5
},
{
"epoch": 0.09569377990430622,
"grad_norm": 0.8399210434256071,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.4584,
"mean_token_accuracy": 0.8745080888271332,
"step": 10
},
{
"epoch": 0.14354066985645933,
"grad_norm": 0.4157952966846664,
"learning_rate": 1.785714285714286e-05,
"loss": 0.4063,
"mean_token_accuracy": 0.8852763116359711,
"step": 15
},
{
"epoch": 0.19138755980861244,
"grad_norm": 0.2766662800285874,
"learning_rate": 2.380952380952381e-05,
"loss": 0.3877,
"mean_token_accuracy": 0.8882795989513397,
"step": 20
},
{
"epoch": 0.23923444976076555,
"grad_norm": 0.2679535519393889,
"learning_rate": 2.9761904761904762e-05,
"loss": 0.374,
"mean_token_accuracy": 0.8910047888755799,
"step": 25
},
{
"epoch": 0.28708133971291866,
"grad_norm": 0.27461170501370225,
"learning_rate": 3.571428571428572e-05,
"loss": 0.3623,
"mean_token_accuracy": 0.8936996936798096,
"step": 30
},
{
"epoch": 0.3349282296650718,
"grad_norm": 0.2621068877552152,
"learning_rate": 4.166666666666667e-05,
"loss": 0.3545,
"mean_token_accuracy": 0.8954194068908692,
"step": 35
},
{
"epoch": 0.3827751196172249,
"grad_norm": 0.2232430857126827,
"learning_rate": 4.761904761904762e-05,
"loss": 0.351,
"mean_token_accuracy": 0.8956089854240418,
"step": 40
},
{
"epoch": 0.430622009569378,
"grad_norm": 0.2469440843623975,
"learning_rate": 4.99928562114719e-05,
"loss": 0.3458,
"mean_token_accuracy": 0.897181648015976,
"step": 45
},
{
"epoch": 0.4784688995215311,
"grad_norm": 0.23865360558587098,
"learning_rate": 4.9949216152802965e-05,
"loss": 0.3449,
"mean_token_accuracy": 0.8968794465065002,
"step": 50
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.22245226377013297,
"learning_rate": 4.9865981682910456e-05,
"loss": 0.3402,
"mean_token_accuracy": 0.8982591807842255,
"step": 55
},
{
"epoch": 0.5741626794258373,
"grad_norm": 0.2161337900395713,
"learning_rate": 4.97432996051307e-05,
"loss": 0.3388,
"mean_token_accuracy": 0.8982585489749908,
"step": 60
},
{
"epoch": 0.6220095693779905,
"grad_norm": 0.20816980844934332,
"learning_rate": 4.958138629782647e-05,
"loss": 0.3343,
"mean_token_accuracy": 0.8995780885219574,
"step": 65
},
{
"epoch": 0.6698564593301436,
"grad_norm": 0.20952600298862226,
"learning_rate": 4.938052733275354e-05,
"loss": 0.3297,
"mean_token_accuracy": 0.9005344986915589,
"step": 70
},
{
"epoch": 0.7177033492822966,
"grad_norm": 0.22463823169406383,
"learning_rate": 4.914107697138843e-05,
"loss": 0.3254,
"mean_token_accuracy": 0.901619678735733,
"step": 75
},
{
"epoch": 0.7655502392344498,
"grad_norm": 0.20387680313285525,
"learning_rate": 4.886345754010597e-05,
"loss": 0.3228,
"mean_token_accuracy": 0.9023577153682709,
"step": 80
},
{
"epoch": 0.8133971291866029,
"grad_norm": 0.1967095271533209,
"learning_rate": 4.854815868530855e-05,
"loss": 0.3189,
"mean_token_accuracy": 0.9035642802715301,
"step": 85
},
{
"epoch": 0.861244019138756,
"grad_norm": 0.20177701754772745,
"learning_rate": 4.819573650982088e-05,
"loss": 0.3155,
"mean_token_accuracy": 0.9040892541408538,
"step": 90
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.21489007606420435,
"learning_rate": 4.780681259207339e-05,
"loss": 0.3148,
"mean_token_accuracy": 0.9040708005428314,
"step": 95
},
{
"epoch": 0.9569377990430622,
"grad_norm": 0.21379980317514838,
"learning_rate": 4.738207288980417e-05,
"loss": 0.3156,
"mean_token_accuracy": 0.903698742389679,
"step": 100
},
{
"epoch": 1.0,
"grad_norm": 0.2158756062319493,
"learning_rate": 4.692226653021304e-05,
"loss": 0.3082,
"mean_token_accuracy": 0.9040403498543633,
"step": 105
},
{
"epoch": 1.0478468899521531,
"grad_norm": 0.21457223097848338,
"learning_rate": 4.6428204488701576e-05,
"loss": 0.2509,
"mean_token_accuracy": 0.9200618088245391,
"step": 110
},
{
"epoch": 1.0956937799043063,
"grad_norm": 0.20058528076642096,
"learning_rate": 4.5900758158529505e-05,
"loss": 0.2506,
"mean_token_accuracy": 0.9204949855804443,
"step": 115
},
{
"epoch": 1.1435406698564594,
"grad_norm": 0.20876791389798524,
"learning_rate": 4.534085781391011e-05,
"loss": 0.2496,
"mean_token_accuracy": 0.9207346796989441,
"step": 120
},
{
"epoch": 1.1913875598086126,
"grad_norm": 0.20289299393352286,
"learning_rate": 4.474949096925538e-05,
"loss": 0.2506,
"mean_token_accuracy": 0.9200530827045441,
"step": 125
},
{
"epoch": 1.2392344497607655,
"grad_norm": 0.21305573701153363,
"learning_rate": 4.4127700637464834e-05,
"loss": 0.2507,
"mean_token_accuracy": 0.9202218174934387,
"step": 130
},
{
"epoch": 1.2870813397129186,
"grad_norm": 0.19168843820402073,
"learning_rate": 4.347658349032977e-05,
"loss": 0.2475,
"mean_token_accuracy": 0.9211139142513275,
"step": 135
},
{
"epoch": 1.3349282296650717,
"grad_norm": 0.23386818676449111,
"learning_rate": 4.279728792429768e-05,
"loss": 0.2493,
"mean_token_accuracy": 0.9205289006233215,
"step": 140
},
{
"epoch": 1.3827751196172249,
"grad_norm": 0.19829038322936948,
"learning_rate": 4.209101203500809e-05,
"loss": 0.2469,
"mean_token_accuracy": 0.9209350109100342,
"step": 145
},
{
"epoch": 1.430622009569378,
"grad_norm": 0.22373994346715842,
"learning_rate": 4.135900150417243e-05,
"loss": 0.2416,
"mean_token_accuracy": 0.9225721061229706,
"step": 150
},
{
"epoch": 1.4784688995215312,
"grad_norm": 0.20748037358682295,
"learning_rate": 4.0602547402524813e-05,
"loss": 0.2396,
"mean_token_accuracy": 0.9232222616672516,
"step": 155
},
{
"epoch": 1.526315789473684,
"grad_norm": 0.18363411909081742,
"learning_rate": 3.982298391271858e-05,
"loss": 0.2474,
"mean_token_accuracy": 0.9210693001747131,
"step": 160
},
{
"epoch": 1.5741626794258372,
"grad_norm": 0.18354584761777637,
"learning_rate": 3.902168597618509e-05,
"loss": 0.2426,
"mean_token_accuracy": 0.9223567545413971,
"step": 165
},
{
"epoch": 1.6220095693779903,
"grad_norm": 0.19344690588457217,
"learning_rate": 3.82000668681049e-05,
"loss": 0.2419,
"mean_token_accuracy": 0.9226630091667175,
"step": 170
},
{
"epoch": 1.6698564593301435,
"grad_norm": 0.19835339977982622,
"learning_rate": 3.735957570476844e-05,
"loss": 0.2364,
"mean_token_accuracy": 0.9246738970279693,
"step": 175
},
{
"epoch": 1.7177033492822966,
"grad_norm": 0.1893700609423051,
"learning_rate": 3.65016948877226e-05,
"loss": 0.237,
"mean_token_accuracy": 0.9241552948951721,
"step": 180
},
{
"epoch": 1.7655502392344498,
"grad_norm": 0.20092617650862296,
"learning_rate": 3.562793748921095e-05,
"loss": 0.2302,
"mean_token_accuracy": 0.9259460866451263,
"step": 185
},
{
"epoch": 1.813397129186603,
"grad_norm": 0.19482063162086233,
"learning_rate": 3.473984458351913e-05,
"loss": 0.2294,
"mean_token_accuracy": 0.9257330477237702,
"step": 190
},
{
"epoch": 1.861244019138756,
"grad_norm": 0.23594862888300963,
"learning_rate": 3.383898252893217e-05,
"loss": 0.2252,
"mean_token_accuracy": 0.9272994875907898,
"step": 195
},
{
"epoch": 1.9090909090909092,
"grad_norm": 0.2264429074763011,
"learning_rate": 3.292694020509744e-05,
"loss": 0.2285,
"mean_token_accuracy": 0.9267280995845795,
"step": 200
},
{
"epoch": 1.9569377990430623,
"grad_norm": 0.20243011319089546,
"learning_rate": 3.200532621066612e-05,
"loss": 0.2317,
"mean_token_accuracy": 0.925784581899643,
"step": 205
},
{
"epoch": 2.0,
"grad_norm": 0.2761279417111308,
"learning_rate": 3.10757660261555e-05,
"loss": 0.2187,
"mean_token_accuracy": 0.9277391168806288,
"step": 210
},
{
"epoch": 2.047846889952153,
"grad_norm": 0.21472765995198592,
"learning_rate": 3.013989914703625e-05,
"loss": 0.1588,
"mean_token_accuracy": 0.9470981001853943,
"step": 215
},
{
"epoch": 2.0956937799043063,
"grad_norm": 0.19241730654495223,
"learning_rate": 2.919937619210103e-05,
"loss": 0.1597,
"mean_token_accuracy": 0.9465132236480713,
"step": 220
},
{
"epoch": 2.1435406698564594,
"grad_norm": 0.2010987341848127,
"learning_rate": 2.825585599221456e-05,
"loss": 0.1517,
"mean_token_accuracy": 0.9487400650978088,
"step": 225
},
{
"epoch": 2.1913875598086126,
"grad_norm": 0.18857044810118412,
"learning_rate": 2.7311002664579755e-05,
"loss": 0.1522,
"mean_token_accuracy": 0.9488288640975953,
"step": 230
},
{
"epoch": 2.2392344497607657,
"grad_norm": 0.1973021475908052,
"learning_rate": 2.6366482677680226e-05,
"loss": 0.1539,
"mean_token_accuracy": 0.9482394576072692,
"step": 235
},
{
"epoch": 2.287081339712919,
"grad_norm": 0.1916451391340417,
"learning_rate": 2.5423961912075712e-05,
"loss": 0.1519,
"mean_token_accuracy": 0.9490710437297821,
"step": 240
},
{
"epoch": 2.334928229665072,
"grad_norm": 0.18310116347345545,
"learning_rate": 2.448510272223445e-05,
"loss": 0.1593,
"mean_token_accuracy": 0.9469183087348938,
"step": 245
},
{
"epoch": 2.382775119617225,
"grad_norm": 0.19992899486646826,
"learning_rate": 2.3551561004584644e-05,
"loss": 0.1527,
"mean_token_accuracy": 0.9489526867866516,
"step": 250
},
{
"epoch": 2.430622009569378,
"grad_norm": 0.20001274217428525,
"learning_rate": 2.2624983276956214e-05,
"loss": 0.1545,
"mean_token_accuracy": 0.9480966806411744,
"step": 255
},
{
"epoch": 2.478468899521531,
"grad_norm": 0.19971823143333786,
"learning_rate": 2.17070037745638e-05,
"loss": 0.1516,
"mean_token_accuracy": 0.9491800308227539,
"step": 260
},
{
"epoch": 2.526315789473684,
"grad_norm": 0.17534350087544737,
"learning_rate": 2.079924156765312e-05,
"loss": 0.15,
"mean_token_accuracy": 0.9497306644916534,
"step": 265
},
{
"epoch": 2.574162679425837,
"grad_norm": 0.17941495258495743,
"learning_rate": 1.9903297705894207e-05,
"loss": 0.1552,
"mean_token_accuracy": 0.9483123421669006,
"step": 270
},
{
"epoch": 2.6220095693779903,
"grad_norm": 0.1886616293140272,
"learning_rate": 1.9020752394558096e-05,
"loss": 0.1509,
"mean_token_accuracy": 0.9495630502700806,
"step": 275
},
{
"epoch": 2.6698564593301435,
"grad_norm": 0.19242543918662838,
"learning_rate": 1.815316220745756e-05,
"loss": 0.1546,
"mean_token_accuracy": 0.9482314586639404,
"step": 280
},
{
"epoch": 2.7177033492822966,
"grad_norm": 0.18201614867985003,
"learning_rate": 1.73020573415673e-05,
"loss": 0.1496,
"mean_token_accuracy": 0.9498195767402648,
"step": 285
},
{
"epoch": 2.7655502392344498,
"grad_norm": 0.20205387945258876,
"learning_rate": 1.646893891816591e-05,
"loss": 0.1495,
"mean_token_accuracy": 0.9499354422092438,
"step": 290
},
{
"epoch": 2.813397129186603,
"grad_norm": 0.17887628669217684,
"learning_rate": 1.5655276335259493e-05,
"loss": 0.1495,
"mean_token_accuracy": 0.950058388710022,
"step": 295
},
{
"epoch": 2.861244019138756,
"grad_norm": 0.18210621293658882,
"learning_rate": 1.4862504675956803e-05,
"loss": 0.1477,
"mean_token_accuracy": 0.9507902562618256,
"step": 300
},
{
"epoch": 2.909090909090909,
"grad_norm": 0.16610798667043808,
"learning_rate": 1.4092022177366448e-05,
"loss": 0.1472,
"mean_token_accuracy": 0.9505416512489319,
"step": 305
},
{
"epoch": 2.9569377990430623,
"grad_norm": 0.19093889547243018,
"learning_rate": 1.334518776448086e-05,
"loss": 0.1449,
"mean_token_accuracy": 0.9513530313968659,
"step": 310
},
{
"epoch": 3.0,
"grad_norm": 0.17832946925769,
"learning_rate": 1.2623318653396027e-05,
"loss": 0.1407,
"mean_token_accuracy": 0.9511178533236185,
"step": 315
},
{
"epoch": 3.047846889952153,
"grad_norm": 0.25680975157440067,
"learning_rate": 1.192768802809487e-05,
"loss": 0.101,
"mean_token_accuracy": 0.9665960729122162,
"step": 320
},
{
"epoch": 3.0956937799043063,
"grad_norm": 0.18777563649802764,
"learning_rate": 1.1259522794891156e-05,
"loss": 0.0961,
"mean_token_accuracy": 0.9677205324172974,
"step": 325
},
{
"epoch": 3.1435406698564594,
"grad_norm": 0.17478369339727004,
"learning_rate": 1.0620001418495165e-05,
"loss": 0.0985,
"mean_token_accuracy": 0.966835618019104,
"step": 330
},
{
"epoch": 3.1913875598086126,
"grad_norm": 0.17275966764155462,
"learning_rate": 1.0010251843517089e-05,
"loss": 0.0972,
"mean_token_accuracy": 0.9672303974628449,
"step": 335
},
{
"epoch": 3.2392344497607657,
"grad_norm": 0.16993093028595546,
"learning_rate": 9.431349505074635e-06,
"loss": 0.0956,
"mean_token_accuracy": 0.9676419258117676,
"step": 340
},
{
"epoch": 3.287081339712919,
"grad_norm": 0.16631908155701744,
"learning_rate": 8.884315432013085e-06,
"loss": 0.0947,
"mean_token_accuracy": 0.9682831168174744,
"step": 345
},
{
"epoch": 3.334928229665072,
"grad_norm": 0.16712721419639617,
"learning_rate": 8.370114446083686e-06,
"loss": 0.0963,
"mean_token_accuracy": 0.9672574043273926,
"step": 350
},
{
"epoch": 3.382775119617225,
"grad_norm": 0.16752495961320182,
"learning_rate": 7.88965346025611e-06,
"loss": 0.0932,
"mean_token_accuracy": 0.9686044454574585,
"step": 355
},
{
"epoch": 3.430622009569378,
"grad_norm": 0.16411359819333338,
"learning_rate": 7.443779879166704e-06,
"loss": 0.0936,
"mean_token_accuracy": 0.968251782655716,
"step": 360
},
{
"epoch": 3.478468899521531,
"grad_norm": 0.17214899232280093,
"learning_rate": 7.033280104523337e-06,
"loss": 0.0947,
"mean_token_accuracy": 0.9681008815765381,
"step": 365
},
{
"epoch": 3.526315789473684,
"grad_norm": 0.16393252030290434,
"learning_rate": 6.658878148103265e-06,
"loss": 0.0954,
"mean_token_accuracy": 0.9676827132701874,
"step": 370
},
{
"epoch": 3.574162679425837,
"grad_norm": 0.15820317829789238,
"learning_rate": 6.3212343547899925e-06,
"loss": 0.0941,
"mean_token_accuracy": 0.9682557284832001,
"step": 375
},
{
"epoch": 3.6220095693779903,
"grad_norm": 0.1691401819724291,
"learning_rate": 6.020944237901609e-06,
"loss": 0.0957,
"mean_token_accuracy": 0.9677995264530181,
"step": 380
},
{
"epoch": 3.6698564593301435,
"grad_norm": 0.15841220320611066,
"learning_rate": 5.7585374288645935e-06,
"loss": 0.095,
"mean_token_accuracy": 0.9678770661354065,
"step": 385
},
{
"epoch": 3.7177033492822966,
"grad_norm": 0.15989013852853978,
"learning_rate": 5.534476743085694e-06,
"loss": 0.0933,
"mean_token_accuracy": 0.9685147047042847,
"step": 390
},
{
"epoch": 3.7655502392344498,
"grad_norm": 0.1602782553069919,
"learning_rate": 5.349157363669362e-06,
"loss": 0.0926,
"mean_token_accuracy": 0.9689649045467377,
"step": 395
},
{
"epoch": 3.813397129186603,
"grad_norm": 0.16816859348589558,
"learning_rate": 5.202906144420483e-06,
"loss": 0.0924,
"mean_token_accuracy": 0.9688647747039795,
"step": 400
},
{
"epoch": 3.861244019138756,
"grad_norm": 0.16695686882956462,
"learning_rate": 5.095981033361725e-06,
"loss": 0.0936,
"mean_token_accuracy": 0.9684687733650208,
"step": 405
},
{
"epoch": 3.909090909090909,
"grad_norm": 0.17408482158108302,
"learning_rate": 5.028570617782212e-06,
"loss": 0.0924,
"mean_token_accuracy": 0.9686804115772247,
"step": 410
},
{
"epoch": 3.9569377990430623,
"grad_norm": 0.16266285247102608,
"learning_rate": 5.000793791620026e-06,
"loss": 0.0928,
"mean_token_accuracy": 0.9688069880008697,
"step": 415
},
{
"epoch": 3.9665071770334928,
"mean_token_accuracy": 0.9688756763935089,
"step": 416,
"total_flos": 213866215112704.0,
"train_loss": 0.21202428278943095,
"train_runtime": 2397.3093,
"train_samples_per_second": 2.778,
"train_steps_per_second": 0.174
}
],
"logging_steps": 5,
"max_steps": 416,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 213866215112704.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}