Qwen2.5-Math-7B-S2R-PRL / trainer_state.json
S2R-data's picture
Upload folder using huggingface_hub
8704099 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.03203640500568828,
"eval_steps": 500,
"global_step": 44,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0029124004550625713,
"importance_ratio": 1.0,
"kl_div_avg": 6.202674558153376e-05,
"learning_rate": 0.0,
"loss_func": "stage2",
"step": 1,
"total_loss": -0.30124378204345703
},
{
"epoch": 0.0029124004550625713,
"importance_ratio": 0.9999282956123352,
"kl_div_avg": 0.0013916800962761045,
"learning_rate": 2.153382790366965e-07,
"loss_func": "stage2",
"step": 2,
"total_loss": -0.36082643270492554
},
{
"epoch": 0.0029124004550625713,
"importance_ratio": 0.9998849630355835,
"kl_div_avg": 2.4411560843873303e-06,
"learning_rate": 3.4130309724299266e-07,
"loss_func": "stage2",
"step": 3,
"total_loss": -1.0000114440917969
},
{
"epoch": 0.0029124004550625713,
"importance_ratio": 1.0000627040863037,
"kl_div_avg": 5.5745240388205275e-05,
"learning_rate": 4.30676558073393e-07,
"loss_func": "stage2",
"step": 4,
"total_loss": 0.44314149022102356
},
{
"epoch": 0.0058248009101251426,
"importance_ratio": 1.0,
"kl_div_avg": 0.0006268564611673355,
"learning_rate": 5e-07,
"loss_func": "stage2",
"step": 5,
"total_loss": 0.060124993324279785
},
{
"epoch": 0.0058248009101251426,
"importance_ratio": 1.0000600814819336,
"kl_div_avg": 0.0005689397221431136,
"learning_rate": 5e-07,
"loss_func": "stage2",
"step": 6,
"total_loss": 0.058914512395858765
},
{
"epoch": 0.0058248009101251426,
"importance_ratio": 0.9999627470970154,
"kl_div_avg": 0.0004537358181551099,
"learning_rate": 4.999271455631648e-07,
"loss_func": "stage2",
"step": 7,
"total_loss": 0.19876566529273987
},
{
"epoch": 0.0058248009101251426,
"importance_ratio": 1.0000057220458984,
"kl_div_avg": 0.000579233281314373,
"learning_rate": 4.998542911263296e-07,
"loss_func": "stage2",
"step": 8,
"total_loss": 0.05980253219604492
},
{
"epoch": 0.008737201365187713,
"importance_ratio": 1.0,
"kl_div_avg": 0.0005276197334751487,
"learning_rate": 4.997814366894943e-07,
"loss_func": "stage2",
"step": 9,
"total_loss": 0.3613969683647156
},
{
"epoch": 0.008737201365187713,
"importance_ratio": 1.000020146369934,
"kl_div_avg": 0.000614392978604883,
"learning_rate": 4.997085822526592e-07,
"loss_func": "stage2",
"step": 10,
"total_loss": -0.7252892255783081
},
{
"epoch": 0.008737201365187713,
"importance_ratio": 0.9998898506164551,
"kl_div_avg": 0.0005164016038179398,
"learning_rate": 4.99635727815824e-07,
"loss_func": "stage2",
"step": 11,
"total_loss": 0.32872042059898376
},
{
"epoch": 0.008737201365187713,
"importance_ratio": 1.0001676082611084,
"kl_div_avg": 0.000510960235260427,
"learning_rate": 4.995628733789887e-07,
"loss_func": "stage2",
"step": 12,
"total_loss": -0.8001887798309326
},
{
"epoch": 0.011649601820250285,
"importance_ratio": 0.999980092048645,
"kl_div_avg": 0.0013118372298777103,
"learning_rate": 4.994900189421535e-07,
"loss_func": "stage2",
"step": 13,
"total_loss": 0.4664098024368286
},
{
"epoch": 0.011649601820250285,
"importance_ratio": 1.0000075101852417,
"kl_div_avg": 0.0005553055088967085,
"learning_rate": 4.994171645053183e-07,
"loss_func": "stage2",
"step": 14,
"total_loss": -0.3387294113636017
},
{
"epoch": 0.011649601820250285,
"importance_ratio": 0.9999791979789734,
"kl_div_avg": 0.0006344152498058975,
"learning_rate": 4.993443100684832e-07,
"loss_func": "stage2",
"step": 15,
"total_loss": -0.7034344673156738
},
{
"epoch": 0.011649601820250285,
"importance_ratio": 0.9996992349624634,
"kl_div_avg": 0.0013188815210014582,
"learning_rate": 4.99271455631648e-07,
"loss_func": "stage2",
"step": 16,
"total_loss": 0.6023236513137817
},
{
"epoch": 0.014562002275312855,
"importance_ratio": 1.0,
"kl_div_avg": 0.0015903799794614315,
"learning_rate": 4.991986011948127e-07,
"loss_func": "stage2",
"step": 17,
"total_loss": 0.9130043983459473
},
{
"epoch": 0.014562002275312855,
"importance_ratio": 1.0000156164169312,
"kl_div_avg": 0.0008485906291753054,
"learning_rate": 4.991257467579775e-07,
"loss_func": "stage2",
"step": 18,
"total_loss": 0.05914050340652466
},
{
"epoch": 0.014562002275312855,
"importance_ratio": 1.0000320672988892,
"kl_div_avg": 0.0009148549288511276,
"learning_rate": 4.990528923211423e-07,
"loss_func": "stage2",
"step": 19,
"total_loss": -0.6230961084365845
},
{
"epoch": 0.014562002275312855,
"importance_ratio": 0.9999111890792847,
"kl_div_avg": 0.0016965724062174559,
"learning_rate": 4.989800378843072e-07,
"loss_func": "stage2",
"step": 20,
"total_loss": 0.5814720988273621
},
{
"epoch": 0.017474402730375427,
"importance_ratio": 1.0,
"kl_div_avg": 0.0012479191645979881,
"learning_rate": 4.98907183447472e-07,
"loss_func": "stage2",
"step": 21,
"total_loss": 0.291412353515625
},
{
"epoch": 0.017474402730375427,
"importance_ratio": 0.9998515248298645,
"kl_div_avg": 0.0008866681600920856,
"learning_rate": 4.988343290106367e-07,
"loss_func": "stage2",
"step": 22,
"total_loss": 0.069973886013031
},
{
"epoch": 0.017474402730375427,
"importance_ratio": 0.9997187852859497,
"kl_div_avg": 0.0029276064597070217,
"learning_rate": 4.987614745738015e-07,
"loss_func": "stage2",
"step": 23,
"total_loss": 0.14661093056201935
},
{
"epoch": 0.017474402730375427,
"importance_ratio": 0.9996069669723511,
"kl_div_avg": 0.0028409322258085012,
"learning_rate": 4.986886201369663e-07,
"loss_func": "stage2",
"step": 24,
"total_loss": 0.23758070170879364
},
{
"epoch": 0.020386803185437997,
"importance_ratio": 1.0000464916229248,
"kl_div_avg": 0.003729865886271,
"learning_rate": 4.986157657001312e-07,
"loss_func": "stage2",
"step": 25,
"total_loss": 0.3280088007450104
},
{
"epoch": 0.020386803185437997,
"importance_ratio": 0.9997869729995728,
"kl_div_avg": 0.0012685225810855627,
"learning_rate": 4.985429112632959e-07,
"loss_func": "stage2",
"step": 26,
"total_loss": 0.5303494930267334
},
{
"epoch": 0.020386803185437997,
"importance_ratio": 0.9990458488464355,
"kl_div_avg": 0.0042372471652925014,
"learning_rate": 4.984700568264607e-07,
"loss_func": "stage2",
"step": 27,
"total_loss": 0.6485586166381836
},
{
"epoch": 0.020386803185437997,
"importance_ratio": 0.9997897148132324,
"kl_div_avg": 0.000950633198954165,
"learning_rate": 4.983972023896255e-07,
"loss_func": "stage2",
"step": 28,
"total_loss": 0.9602090120315552
},
{
"epoch": 0.02329920364050057,
"importance_ratio": 1.0,
"kl_div_avg": 0.00110217509791255,
"learning_rate": 4.983243479527903e-07,
"loss_func": "stage2",
"step": 29,
"total_loss": 0.056760966777801514
},
{
"epoch": 0.02329920364050057,
"importance_ratio": 0.9995359778404236,
"kl_div_avg": 0.002028511371463537,
"learning_rate": 4.98251493515955e-07,
"loss_func": "stage2",
"step": 30,
"total_loss": -0.34580060839653015
},
{
"epoch": 0.02329920364050057,
"importance_ratio": 0.999755859375,
"kl_div_avg": 0.0011557539692148566,
"learning_rate": 4.981786390791199e-07,
"loss_func": "stage2",
"step": 31,
"total_loss": 0.05999104678630829
},
{
"epoch": 0.02329920364050057,
"importance_ratio": 0.999870240688324,
"kl_div_avg": 0.0008296141168102622,
"learning_rate": 4.981057846422847e-07,
"loss_func": "stage2",
"step": 32,
"total_loss": -0.2040196657180786
},
{
"epoch": 0.02621160409556314,
"importance_ratio": 1.0,
"kl_div_avg": 0.0052886055782437325,
"learning_rate": 4.980329302054495e-07,
"loss_func": "stage2",
"step": 33,
"total_loss": 0.3385615050792694
},
{
"epoch": 0.02621160409556314,
"importance_ratio": 0.9999160766601562,
"kl_div_avg": 0.00105857546441257,
"learning_rate": 4.979600757686143e-07,
"loss_func": "stage2",
"step": 34,
"total_loss": 0.8041456937789917
},
{
"epoch": 0.02621160409556314,
"importance_ratio": 1.0000337362289429,
"kl_div_avg": 0.001299469848163426,
"learning_rate": 4.97887221331779e-07,
"loss_func": "stage2",
"step": 35,
"total_loss": 0.059677302837371826
},
{
"epoch": 0.02621160409556314,
"importance_ratio": 1.0000333786010742,
"kl_div_avg": 0.0009728098521009088,
"learning_rate": 4.978143668949439e-07,
"loss_func": "stage2",
"step": 36,
"total_loss": 0.9722646474838257
},
{
"epoch": 0.02912400455062571,
"importance_ratio": 0.9999969005584717,
"kl_div_avg": 0.004802822135388851,
"learning_rate": 4.977415124581087e-07,
"loss_func": "stage2",
"step": 37,
"total_loss": -0.5127509236335754
},
{
"epoch": 0.02912400455062571,
"importance_ratio": 0.9998223781585693,
"kl_div_avg": 0.001794470939785242,
"learning_rate": 4.976686580212735e-07,
"loss_func": "stage2",
"step": 38,
"total_loss": 0.29614874720573425
},
{
"epoch": 0.02912400455062571,
"importance_ratio": 0.9999631643295288,
"kl_div_avg": 0.0012322800466790795,
"learning_rate": 4.975958035844383e-07,
"loss_func": "stage2",
"step": 39,
"total_loss": -0.6838780045509338
},
{
"epoch": 0.02912400455062571,
"importance_ratio": 1.0000174045562744,
"kl_div_avg": 0.0013172460021451116,
"learning_rate": 4.97522949147603e-07,
"loss_func": "stage2",
"step": 40,
"total_loss": 0.31415513157844543
},
{
"epoch": 0.03203640500568828,
"importance_ratio": 1.0,
"kl_div_avg": 0.0018465688917785883,
"learning_rate": 4.974500947107679e-07,
"loss_func": "stage2",
"step": 41,
"total_loss": -0.6754190921783447
},
{
"epoch": 0.03203640500568828,
"importance_ratio": 0.9999680519104004,
"kl_div_avg": 0.001666294177994132,
"learning_rate": 4.973772402739327e-07,
"loss_func": "stage2",
"step": 42,
"total_loss": -0.6812421083450317
},
{
"epoch": 0.03203640500568828,
"importance_ratio": 1.000089168548584,
"kl_div_avg": 0.0016971167642623186,
"learning_rate": 4.973043858370975e-07,
"loss_func": "stage2",
"step": 43,
"total_loss": -0.764133095741272
},
{
"epoch": 0.03203640500568828,
"importance_ratio": 0.9997596740722656,
"kl_div_avg": 0.001568423816934228,
"learning_rate": 4.972315314002623e-07,
"loss_func": "stage2",
"step": 44,
"total_loss": -0.6741525530815125
}
],
"logging_steps": 1.0,
"max_steps": 6868,
"num_input_tokens_seen": 0,
"num_train_epochs": 20.0,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}