Qwen2.5-1.5B-Open-R1-Distill / trainer_state.json
Lines's picture
Model save
bfe838b verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 2.1790294647216797,
"learning_rate": 1.5625e-05,
"loss": 0.8982,
"step": 5
},
{
"epoch": 0.032,
"grad_norm": 1.2921503782272339,
"learning_rate": 3.125e-05,
"loss": 0.7771,
"step": 10
},
{
"epoch": 0.048,
"grad_norm": 1.0662598609924316,
"learning_rate": 4.6875e-05,
"loss": 0.7065,
"step": 15
},
{
"epoch": 0.064,
"grad_norm": 0.8291209936141968,
"learning_rate": 4.9979726739605334e-05,
"loss": 0.6569,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 0.8745065927505493,
"learning_rate": 4.989742922931149e-05,
"loss": 0.6506,
"step": 25
},
{
"epoch": 0.096,
"grad_norm": 0.6823293566703796,
"learning_rate": 4.975207191995552e-05,
"loss": 0.6579,
"step": 30
},
{
"epoch": 0.112,
"grad_norm": 0.7768641114234924,
"learning_rate": 4.95440640639845e-05,
"loss": 0.6842,
"step": 35
},
{
"epoch": 0.128,
"grad_norm": 0.6644716262817383,
"learning_rate": 4.927399130600373e-05,
"loss": 0.6327,
"step": 40
},
{
"epoch": 0.144,
"grad_norm": 0.6596384644508362,
"learning_rate": 4.894261403389862e-05,
"loss": 0.6664,
"step": 45
},
{
"epoch": 0.16,
"grad_norm": 0.6109484434127808,
"learning_rate": 4.855086523796815e-05,
"loss": 0.6369,
"step": 50
},
{
"epoch": 0.176,
"grad_norm": 0.6302951574325562,
"learning_rate": 4.8099847884097434e-05,
"loss": 0.6217,
"step": 55
},
{
"epoch": 0.192,
"grad_norm": 0.6894915699958801,
"learning_rate": 4.7590831808365293e-05,
"loss": 0.6364,
"step": 60
},
{
"epoch": 0.208,
"grad_norm": 0.6988873481750488,
"learning_rate": 4.702525014183007e-05,
"loss": 0.6244,
"step": 65
},
{
"epoch": 0.224,
"grad_norm": 0.6387168765068054,
"learning_rate": 4.6404695275559475e-05,
"loss": 0.614,
"step": 70
},
{
"epoch": 0.24,
"grad_norm": 0.6547486186027527,
"learning_rate": 4.57309143772652e-05,
"loss": 0.6034,
"step": 75
},
{
"epoch": 0.256,
"grad_norm": 0.6504953503608704,
"learning_rate": 4.500580447216489e-05,
"loss": 0.6265,
"step": 80
},
{
"epoch": 0.272,
"grad_norm": 0.5533855557441711,
"learning_rate": 4.423140710192144e-05,
"loss": 0.6178,
"step": 85
},
{
"epoch": 0.288,
"grad_norm": 0.5795829892158508,
"learning_rate": 4.340990257669732e-05,
"loss": 0.6243,
"step": 90
},
{
"epoch": 0.304,
"grad_norm": 0.5757337808609009,
"learning_rate": 4.254360383650716e-05,
"loss": 0.605,
"step": 95
},
{
"epoch": 0.32,
"grad_norm": 0.6556103825569153,
"learning_rate": 4.163494993915196e-05,
"loss": 0.6046,
"step": 100
},
{
"epoch": 0.336,
"grad_norm": 0.6166912913322449,
"learning_rate": 4.0686499193069595e-05,
"loss": 0.6243,
"step": 105
},
{
"epoch": 0.352,
"grad_norm": 0.6043514609336853,
"learning_rate": 3.970092195443604e-05,
"loss": 0.6182,
"step": 110
},
{
"epoch": 0.368,
"grad_norm": 0.5503015518188477,
"learning_rate": 3.8680993108796956e-05,
"loss": 0.619,
"step": 115
},
{
"epoch": 0.384,
"grad_norm": 12.36988353729248,
"learning_rate": 3.7629584258397646e-05,
"loss": 0.5871,
"step": 120
},
{
"epoch": 0.4,
"grad_norm": 0.5975585579872131,
"learning_rate": 3.65496556372078e-05,
"loss": 0.6111,
"step": 125
},
{
"epoch": 0.416,
"grad_norm": 0.5625191330909729,
"learning_rate": 3.5444247776404274e-05,
"loss": 0.5999,
"step": 130
},
{
"epoch": 0.432,
"grad_norm": 0.5412049889564514,
"learning_rate": 3.4316472943777736e-05,
"loss": 0.5846,
"step": 135
},
{
"epoch": 0.448,
"grad_norm": 0.5721265077590942,
"learning_rate": 3.316950638116532e-05,
"loss": 0.5975,
"step": 140
},
{
"epoch": 0.464,
"grad_norm": 0.5761215090751648,
"learning_rate": 3.2006577364580284e-05,
"loss": 0.6136,
"step": 145
},
{
"epoch": 0.48,
"grad_norm": 0.5800392627716064,
"learning_rate": 3.083096011220896e-05,
"loss": 0.5969,
"step": 150
},
{
"epoch": 0.496,
"grad_norm": 0.5748460292816162,
"learning_rate": 2.9645964565873207e-05,
"loss": 0.6043,
"step": 155
},
{
"epoch": 0.512,
"grad_norm": 0.5393357872962952,
"learning_rate": 2.845492707191334e-05,
"loss": 0.5814,
"step": 160
},
{
"epoch": 0.528,
"grad_norm": 0.5506784319877625,
"learning_rate": 2.7261200987729242e-05,
"loss": 0.5676,
"step": 165
},
{
"epoch": 0.544,
"grad_norm": 0.5413019061088562,
"learning_rate": 2.606814724042701e-05,
"loss": 0.6135,
"step": 170
},
{
"epoch": 0.56,
"grad_norm": 0.5511438846588135,
"learning_rate": 2.4879124864153163e-05,
"loss": 0.5744,
"step": 175
},
{
"epoch": 0.576,
"grad_norm": 0.5734318494796753,
"learning_rate": 2.36974815427584e-05,
"loss": 0.5972,
"step": 180
},
{
"epoch": 0.592,
"grad_norm": 0.5379422307014465,
"learning_rate": 2.252654418441808e-05,
"loss": 0.5717,
"step": 185
},
{
"epoch": 0.608,
"grad_norm": 0.5357218980789185,
"learning_rate": 2.136960955474649e-05,
"loss": 0.6015,
"step": 190
},
{
"epoch": 0.624,
"grad_norm": 0.5256953835487366,
"learning_rate": 2.0229934994777195e-05,
"loss": 0.5913,
"step": 195
},
{
"epoch": 0.64,
"grad_norm": 0.5051993131637573,
"learning_rate": 1.911072924994306e-05,
"loss": 0.579,
"step": 200
},
{
"epoch": 0.656,
"grad_norm": 0.5518410205841064,
"learning_rate": 1.801514343587688e-05,
"loss": 0.5805,
"step": 205
},
{
"epoch": 0.672,
"grad_norm": 0.50865238904953,
"learning_rate": 1.6946262166468175e-05,
"loss": 0.5962,
"step": 210
},
{
"epoch": 0.688,
"grad_norm": 0.5044121146202087,
"learning_rate": 1.590709486915524e-05,
"loss": 0.5689,
"step": 215
},
{
"epoch": 0.704,
"grad_norm": 0.4880557060241699,
"learning_rate": 1.4900567311904107e-05,
"loss": 0.5671,
"step": 220
},
{
"epoch": 0.72,
"grad_norm": 0.5397293567657471,
"learning_rate": 1.392951336573011e-05,
"loss": 0.5693,
"step": 225
},
{
"epoch": 0.736,
"grad_norm": 0.5184421539306641,
"learning_rate": 1.2996667025954618e-05,
"loss": 0.5693,
"step": 230
},
{
"epoch": 0.752,
"grad_norm": 0.5067721009254456,
"learning_rate": 1.2104654714661188e-05,
"loss": 0.5693,
"step": 235
},
{
"epoch": 0.768,
"grad_norm": 0.4883371591567993,
"learning_rate": 1.1255987886023202e-05,
"loss": 0.5749,
"step": 240
},
{
"epoch": 0.784,
"grad_norm": 0.4902109205722809,
"learning_rate": 1.0453055955322938e-05,
"loss": 0.5791,
"step": 245
},
{
"epoch": 0.8,
"grad_norm": 0.4747011065483093,
"learning_rate": 9.698119571570258e-06,
"loss": 0.5795,
"step": 250
},
{
"epoch": 0.816,
"grad_norm": 0.4571741223335266,
"learning_rate": 8.993304252661744e-06,
"loss": 0.5657,
"step": 255
},
{
"epoch": 0.832,
"grad_norm": 0.48521485924720764,
"learning_rate": 8.340594401000496e-06,
"loss": 0.5773,
"step": 260
},
{
"epoch": 0.848,
"grad_norm": 0.4582931399345398,
"learning_rate": 7.741827716425654e-06,
"loss": 0.5822,
"step": 265
},
{
"epoch": 0.864,
"grad_norm": 0.4858649671077728,
"learning_rate": 7.198690022181837e-06,
"loss": 0.5826,
"step": 270
},
{
"epoch": 0.88,
"grad_norm": 0.49482160806655884,
"learning_rate": 6.712710518496049e-06,
"loss": 0.5588,
"step": 275
},
{
"epoch": 0.896,
"grad_norm": 0.4916088581085205,
"learning_rate": 6.285257477125605e-06,
"loss": 0.5716,
"step": 280
},
{
"epoch": 0.912,
"grad_norm": 0.4659317433834076,
"learning_rate": 5.9175343889989275e-06,
"loss": 0.5621,
"step": 285
},
{
"epoch": 0.928,
"grad_norm": 0.5164335370063782,
"learning_rate": 5.610576575795573e-06,
"loss": 0.55,
"step": 290
},
{
"epoch": 0.944,
"grad_norm": 0.5051558017730713,
"learning_rate": 5.36524827500562e-06,
"loss": 0.5709,
"step": 295
},
{
"epoch": 0.96,
"grad_norm": 0.4648708403110504,
"learning_rate": 5.182240206675272e-06,
"loss": 0.5661,
"step": 300
},
{
"epoch": 0.976,
"grad_norm": 0.47396937012672424,
"learning_rate": 5.06206762868959e-06,
"loss": 0.5582,
"step": 305
},
{
"epoch": 0.992,
"grad_norm": 0.46491411328315735,
"learning_rate": 5.005068886067688e-06,
"loss": 0.5557,
"step": 310
},
{
"epoch": 0.9984,
"step": 312,
"total_flos": 34173306634240.0,
"train_loss": 0.6066549909420502,
"train_runtime": 5973.3628,
"train_samples_per_second": 1.674,
"train_steps_per_second": 0.052
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 34173306634240.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}