Qwen2.5-Math-7B-S2R-PRL / trainer_state.json

Upload folder using huggingface_hub

8704099 verified 8 months ago

12.5 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.03203640500568828,
	"eval_steps": 500,
	"global_step": 44,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0029124004550625713,
	"importance_ratio": 1.0,
	"kl_div_avg": 6.202674558153376e-05,
	"learning_rate": 0.0,
	"loss_func": "stage2",
	"step": 1,
	"total_loss": -0.30124378204345703
	},
	{
	"epoch": 0.0029124004550625713,
	"importance_ratio": 0.9999282956123352,
	"kl_div_avg": 0.0013916800962761045,
	"learning_rate": 2.153382790366965e-07,
	"loss_func": "stage2",
	"step": 2,
	"total_loss": -0.36082643270492554
	},
	{
	"epoch": 0.0029124004550625713,
	"importance_ratio": 0.9998849630355835,
	"kl_div_avg": 2.4411560843873303e-06,
	"learning_rate": 3.4130309724299266e-07,
	"loss_func": "stage2",
	"step": 3,
	"total_loss": -1.0000114440917969
	},
	{
	"epoch": 0.0029124004550625713,
	"importance_ratio": 1.0000627040863037,
	"kl_div_avg": 5.5745240388205275e-05,
	"learning_rate": 4.30676558073393e-07,
	"loss_func": "stage2",
	"step": 4,
	"total_loss": 0.44314149022102356
	},
	{
	"epoch": 0.0058248009101251426,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.0006268564611673355,
	"learning_rate": 5e-07,
	"loss_func": "stage2",
	"step": 5,
	"total_loss": 0.060124993324279785
	},
	{
	"epoch": 0.0058248009101251426,
	"importance_ratio": 1.0000600814819336,
	"kl_div_avg": 0.0005689397221431136,
	"learning_rate": 5e-07,
	"loss_func": "stage2",
	"step": 6,
	"total_loss": 0.058914512395858765
	},
	{
	"epoch": 0.0058248009101251426,
	"importance_ratio": 0.9999627470970154,
	"kl_div_avg": 0.0004537358181551099,
	"learning_rate": 4.999271455631648e-07,
	"loss_func": "stage2",
	"step": 7,
	"total_loss": 0.19876566529273987
	},
	{
	"epoch": 0.0058248009101251426,
	"importance_ratio": 1.0000057220458984,
	"kl_div_avg": 0.000579233281314373,
	"learning_rate": 4.998542911263296e-07,
	"loss_func": "stage2",
	"step": 8,
	"total_loss": 0.05980253219604492
	},
	{
	"epoch": 0.008737201365187713,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.0005276197334751487,
	"learning_rate": 4.997814366894943e-07,
	"loss_func": "stage2",
	"step": 9,
	"total_loss": 0.3613969683647156
	},
	{
	"epoch": 0.008737201365187713,
	"importance_ratio": 1.000020146369934,
	"kl_div_avg": 0.000614392978604883,
	"learning_rate": 4.997085822526592e-07,
	"loss_func": "stage2",
	"step": 10,
	"total_loss": -0.7252892255783081
	},
	{
	"epoch": 0.008737201365187713,
	"importance_ratio": 0.9998898506164551,
	"kl_div_avg": 0.0005164016038179398,
	"learning_rate": 4.99635727815824e-07,
	"loss_func": "stage2",
	"step": 11,
	"total_loss": 0.32872042059898376
	},
	{
	"epoch": 0.008737201365187713,
	"importance_ratio": 1.0001676082611084,
	"kl_div_avg": 0.000510960235260427,
	"learning_rate": 4.995628733789887e-07,
	"loss_func": "stage2",
	"step": 12,
	"total_loss": -0.8001887798309326
	},
	{
	"epoch": 0.011649601820250285,
	"importance_ratio": 0.999980092048645,
	"kl_div_avg": 0.0013118372298777103,
	"learning_rate": 4.994900189421535e-07,
	"loss_func": "stage2",
	"step": 13,
	"total_loss": 0.4664098024368286
	},
	{
	"epoch": 0.011649601820250285,
	"importance_ratio": 1.0000075101852417,
	"kl_div_avg": 0.0005553055088967085,
	"learning_rate": 4.994171645053183e-07,
	"loss_func": "stage2",
	"step": 14,
	"total_loss": -0.3387294113636017
	},
	{
	"epoch": 0.011649601820250285,
	"importance_ratio": 0.9999791979789734,
	"kl_div_avg": 0.0006344152498058975,
	"learning_rate": 4.993443100684832e-07,
	"loss_func": "stage2",
	"step": 15,
	"total_loss": -0.7034344673156738
	},
	{
	"epoch": 0.011649601820250285,
	"importance_ratio": 0.9996992349624634,
	"kl_div_avg": 0.0013188815210014582,
	"learning_rate": 4.99271455631648e-07,
	"loss_func": "stage2",
	"step": 16,
	"total_loss": 0.6023236513137817
	},
	{
	"epoch": 0.014562002275312855,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.0015903799794614315,
	"learning_rate": 4.991986011948127e-07,
	"loss_func": "stage2",
	"step": 17,
	"total_loss": 0.9130043983459473
	},
	{
	"epoch": 0.014562002275312855,
	"importance_ratio": 1.0000156164169312,
	"kl_div_avg": 0.0008485906291753054,
	"learning_rate": 4.991257467579775e-07,
	"loss_func": "stage2",
	"step": 18,
	"total_loss": 0.05914050340652466
	},
	{
	"epoch": 0.014562002275312855,
	"importance_ratio": 1.0000320672988892,
	"kl_div_avg": 0.0009148549288511276,
	"learning_rate": 4.990528923211423e-07,
	"loss_func": "stage2",
	"step": 19,
	"total_loss": -0.6230961084365845
	},
	{
	"epoch": 0.014562002275312855,
	"importance_ratio": 0.9999111890792847,
	"kl_div_avg": 0.0016965724062174559,
	"learning_rate": 4.989800378843072e-07,
	"loss_func": "stage2",
	"step": 20,
	"total_loss": 0.5814720988273621
	},
	{
	"epoch": 0.017474402730375427,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.0012479191645979881,
	"learning_rate": 4.98907183447472e-07,
	"loss_func": "stage2",
	"step": 21,
	"total_loss": 0.291412353515625
	},
	{
	"epoch": 0.017474402730375427,
	"importance_ratio": 0.9998515248298645,
	"kl_div_avg": 0.0008866681600920856,
	"learning_rate": 4.988343290106367e-07,
	"loss_func": "stage2",
	"step": 22,
	"total_loss": 0.069973886013031
	},
	{
	"epoch": 0.017474402730375427,
	"importance_ratio": 0.9997187852859497,
	"kl_div_avg": 0.0029276064597070217,
	"learning_rate": 4.987614745738015e-07,
	"loss_func": "stage2",
	"step": 23,
	"total_loss": 0.14661093056201935
	},
	{
	"epoch": 0.017474402730375427,
	"importance_ratio": 0.9996069669723511,
	"kl_div_avg": 0.0028409322258085012,
	"learning_rate": 4.986886201369663e-07,
	"loss_func": "stage2",
	"step": 24,
	"total_loss": 0.23758070170879364
	},
	{
	"epoch": 0.020386803185437997,
	"importance_ratio": 1.0000464916229248,
	"kl_div_avg": 0.003729865886271,
	"learning_rate": 4.986157657001312e-07,
	"loss_func": "stage2",
	"step": 25,
	"total_loss": 0.3280088007450104
	},
	{
	"epoch": 0.020386803185437997,
	"importance_ratio": 0.9997869729995728,
	"kl_div_avg": 0.0012685225810855627,
	"learning_rate": 4.985429112632959e-07,
	"loss_func": "stage2",
	"step": 26,
	"total_loss": 0.5303494930267334
	},
	{
	"epoch": 0.020386803185437997,
	"importance_ratio": 0.9990458488464355,
	"kl_div_avg": 0.0042372471652925014,
	"learning_rate": 4.984700568264607e-07,
	"loss_func": "stage2",
	"step": 27,
	"total_loss": 0.6485586166381836
	},
	{
	"epoch": 0.020386803185437997,
	"importance_ratio": 0.9997897148132324,
	"kl_div_avg": 0.000950633198954165,
	"learning_rate": 4.983972023896255e-07,
	"loss_func": "stage2",
	"step": 28,
	"total_loss": 0.9602090120315552
	},
	{
	"epoch": 0.02329920364050057,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.00110217509791255,
	"learning_rate": 4.983243479527903e-07,
	"loss_func": "stage2",
	"step": 29,
	"total_loss": 0.056760966777801514
	},
	{
	"epoch": 0.02329920364050057,
	"importance_ratio": 0.9995359778404236,
	"kl_div_avg": 0.002028511371463537,
	"learning_rate": 4.98251493515955e-07,
	"loss_func": "stage2",
	"step": 30,
	"total_loss": -0.34580060839653015
	},
	{
	"epoch": 0.02329920364050057,
	"importance_ratio": 0.999755859375,
	"kl_div_avg": 0.0011557539692148566,
	"learning_rate": 4.981786390791199e-07,
	"loss_func": "stage2",
	"step": 31,
	"total_loss": 0.05999104678630829
	},
	{
	"epoch": 0.02329920364050057,
	"importance_ratio": 0.999870240688324,
	"kl_div_avg": 0.0008296141168102622,
	"learning_rate": 4.981057846422847e-07,
	"loss_func": "stage2",
	"step": 32,
	"total_loss": -0.2040196657180786
	},
	{
	"epoch": 0.02621160409556314,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.0052886055782437325,
	"learning_rate": 4.980329302054495e-07,
	"loss_func": "stage2",
	"step": 33,
	"total_loss": 0.3385615050792694
	},
	{
	"epoch": 0.02621160409556314,
	"importance_ratio": 0.9999160766601562,
	"kl_div_avg": 0.00105857546441257,
	"learning_rate": 4.979600757686143e-07,
	"loss_func": "stage2",
	"step": 34,
	"total_loss": 0.8041456937789917
	},
	{
	"epoch": 0.02621160409556314,
	"importance_ratio": 1.0000337362289429,
	"kl_div_avg": 0.001299469848163426,
	"learning_rate": 4.97887221331779e-07,
	"loss_func": "stage2",
	"step": 35,
	"total_loss": 0.059677302837371826
	},
	{
	"epoch": 0.02621160409556314,
	"importance_ratio": 1.0000333786010742,
	"kl_div_avg": 0.0009728098521009088,
	"learning_rate": 4.978143668949439e-07,
	"loss_func": "stage2",
	"step": 36,
	"total_loss": 0.9722646474838257
	},
	{
	"epoch": 0.02912400455062571,
	"importance_ratio": 0.9999969005584717,
	"kl_div_avg": 0.004802822135388851,
	"learning_rate": 4.977415124581087e-07,
	"loss_func": "stage2",
	"step": 37,
	"total_loss": -0.5127509236335754
	},
	{
	"epoch": 0.02912400455062571,
	"importance_ratio": 0.9998223781585693,
	"kl_div_avg": 0.001794470939785242,
	"learning_rate": 4.976686580212735e-07,
	"loss_func": "stage2",
	"step": 38,
	"total_loss": 0.29614874720573425
	},
	{
	"epoch": 0.02912400455062571,
	"importance_ratio": 0.9999631643295288,
	"kl_div_avg": 0.0012322800466790795,
	"learning_rate": 4.975958035844383e-07,
	"loss_func": "stage2",
	"step": 39,
	"total_loss": -0.6838780045509338
	},
	{
	"epoch": 0.02912400455062571,
	"importance_ratio": 1.0000174045562744,
	"kl_div_avg": 0.0013172460021451116,
	"learning_rate": 4.97522949147603e-07,
	"loss_func": "stage2",
	"step": 40,
	"total_loss": 0.31415513157844543
	},
	{
	"epoch": 0.03203640500568828,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.0018465688917785883,
	"learning_rate": 4.974500947107679e-07,
	"loss_func": "stage2",
	"step": 41,
	"total_loss": -0.6754190921783447
	},
	{
	"epoch": 0.03203640500568828,
	"importance_ratio": 0.9999680519104004,
	"kl_div_avg": 0.001666294177994132,
	"learning_rate": 4.973772402739327e-07,
	"loss_func": "stage2",
	"step": 42,
	"total_loss": -0.6812421083450317
	},
	{
	"epoch": 0.03203640500568828,
	"importance_ratio": 1.000089168548584,
	"kl_div_avg": 0.0016971167642623186,
	"learning_rate": 4.973043858370975e-07,
	"loss_func": "stage2",
	"step": 43,
	"total_loss": -0.764133095741272
	},
	{
	"epoch": 0.03203640500568828,
	"importance_ratio": 0.9997596740722656,
	"kl_div_avg": 0.001568423816934228,
	"learning_rate": 4.972315314002623e-07,
	"loss_func": "stage2",
	"step": 44,
	"total_loss": -0.6741525530815125
	}
	],
	"logging_steps": 1.0,
	"max_steps": 6868,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 20.0,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": false,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}