Qwen2.5-1.5B-Open-R1-Distill / trainer_state.json
JackMuX3Y's picture
Model save
7cf75e5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9988901220865705,
"eval_steps": 100,
"global_step": 225,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.022197558268590455,
"grad_norm": 2.2235984897572694,
"learning_rate": 4.347826086956522e-06,
"loss": 1.0965,
"mean_token_accuracy": 0.7105070888996124,
"step": 5
},
{
"epoch": 0.04439511653718091,
"grad_norm": 1.0371581479889551,
"learning_rate": 8.695652173913044e-06,
"loss": 1.0434,
"mean_token_accuracy": 0.7201980158686638,
"step": 10
},
{
"epoch": 0.06659267480577137,
"grad_norm": 1.3071296340196783,
"learning_rate": 1.3043478260869566e-05,
"loss": 0.9843,
"mean_token_accuracy": 0.7260666146874428,
"step": 15
},
{
"epoch": 0.08879023307436182,
"grad_norm": 0.7154957235445495,
"learning_rate": 1.739130434782609e-05,
"loss": 0.918,
"mean_token_accuracy": 0.7386834695935249,
"step": 20
},
{
"epoch": 0.11098779134295228,
"grad_norm": 0.6557057916957052,
"learning_rate": 1.999516282291988e-05,
"loss": 0.8655,
"mean_token_accuracy": 0.7502432510256767,
"step": 25
},
{
"epoch": 0.13318534961154274,
"grad_norm": 0.5942751563907254,
"learning_rate": 1.9940798309400527e-05,
"loss": 0.8476,
"mean_token_accuracy": 0.7531737372279167,
"step": 30
},
{
"epoch": 0.15538290788013318,
"grad_norm": 0.4957314057726332,
"learning_rate": 1.982635248222264e-05,
"loss": 0.8302,
"mean_token_accuracy": 0.7568761467933655,
"step": 35
},
{
"epoch": 0.17758046614872364,
"grad_norm": 0.5120800804849761,
"learning_rate": 1.9652517041934357e-05,
"loss": 0.8213,
"mean_token_accuracy": 0.7587681159377098,
"step": 40
},
{
"epoch": 0.1997780244173141,
"grad_norm": 0.3881900403766342,
"learning_rate": 1.9420342634699893e-05,
"loss": 0.8043,
"mean_token_accuracy": 0.7615880772471428,
"step": 45
},
{
"epoch": 0.22197558268590456,
"grad_norm": 0.4192634272720123,
"learning_rate": 1.913123250228619e-05,
"loss": 0.7988,
"mean_token_accuracy": 0.7634198889136314,
"step": 50
},
{
"epoch": 0.244173140954495,
"grad_norm": 0.3732616490096885,
"learning_rate": 1.878693400099269e-05,
"loss": 0.7936,
"mean_token_accuracy": 0.7645810097455978,
"step": 55
},
{
"epoch": 0.2663706992230855,
"grad_norm": 0.4016469613549646,
"learning_rate": 1.8389528040783014e-05,
"loss": 0.7885,
"mean_token_accuracy": 0.7654082521796226,
"step": 60
},
{
"epoch": 0.2885682574916759,
"grad_norm": 0.3914922958543853,
"learning_rate": 1.7941416508447537e-05,
"loss": 0.7832,
"mean_token_accuracy": 0.7660864099860192,
"step": 65
},
{
"epoch": 0.31076581576026635,
"grad_norm": 0.41242564338129056,
"learning_rate": 1.7445307750810153e-05,
"loss": 0.7843,
"mean_token_accuracy": 0.7660787045955658,
"step": 70
},
{
"epoch": 0.33296337402885684,
"grad_norm": 0.40771645930304695,
"learning_rate": 1.690420020571747e-05,
"loss": 0.7852,
"mean_token_accuracy": 0.765475058555603,
"step": 75
},
{
"epoch": 0.3551609322974473,
"grad_norm": 0.4015081855730916,
"learning_rate": 1.6321364279743267e-05,
"loss": 0.7897,
"mean_token_accuracy": 0.7640682518482208,
"step": 80
},
{
"epoch": 0.37735849056603776,
"grad_norm": 0.44299751358195016,
"learning_rate": 1.570032258213783e-05,
"loss": 0.7825,
"mean_token_accuracy": 0.7656505450606346,
"step": 85
},
{
"epoch": 0.3995560488346282,
"grad_norm": 0.4192853533608115,
"learning_rate": 1.50448286344864e-05,
"loss": 0.7807,
"mean_token_accuracy": 0.765812449157238,
"step": 90
},
{
"epoch": 0.42175360710321863,
"grad_norm": 0.3631740147185924,
"learning_rate": 1.4358844184753713e-05,
"loss": 0.7587,
"mean_token_accuracy": 0.7719828367233277,
"step": 95
},
{
"epoch": 0.4439511653718091,
"grad_norm": 0.402770649577875,
"learning_rate": 1.3646515262826551e-05,
"loss": 0.7717,
"mean_token_accuracy": 0.7688061684370041,
"step": 100
},
{
"epoch": 0.4439511653718091,
"eval_loss": 0.7915011048316956,
"eval_mean_token_accuracy": 0.7592891256014506,
"eval_runtime": 3.409,
"eval_samples_per_second": 37.841,
"eval_steps_per_second": 1.76,
"step": 100
},
{
"epoch": 0.46614872364039955,
"grad_norm": 0.3807177377985598,
"learning_rate": 1.2912147122272523e-05,
"loss": 0.7718,
"mean_token_accuracy": 0.7677725195884705,
"step": 105
},
{
"epoch": 0.48834628190899,
"grad_norm": 0.39305302441426365,
"learning_rate": 1.2160178219764838e-05,
"loss": 0.7673,
"mean_token_accuracy": 0.7697135150432587,
"step": 110
},
{
"epoch": 0.5105438401775805,
"grad_norm": 0.3538843727039592,
"learning_rate": 1.1395153389439232e-05,
"loss": 0.7774,
"mean_token_accuracy": 0.7661349773406982,
"step": 115
},
{
"epoch": 0.532741398446171,
"grad_norm": 0.39909651538501784,
"learning_rate": 1.0621696374314807e-05,
"loss": 0.7612,
"mean_token_accuracy": 0.7710714146494866,
"step": 120
},
{
"epoch": 0.5549389567147613,
"grad_norm": 0.3924767091876128,
"learning_rate": 9.844481880796492e-06,
"loss": 0.7647,
"mean_token_accuracy": 0.7698081240057946,
"step": 125
},
{
"epoch": 0.5771365149833518,
"grad_norm": 0.37938597040417427,
"learning_rate": 9.068207325159285e-06,
"loss": 0.7446,
"mean_token_accuracy": 0.7747560039162635,
"step": 130
},
{
"epoch": 0.5993340732519423,
"grad_norm": 0.3854698311289992,
"learning_rate": 8.297564442776014e-06,
"loss": 0.7555,
"mean_token_accuracy": 0.7720320910215378,
"step": 135
},
{
"epoch": 0.6215316315205327,
"grad_norm": 0.3547942293439342,
"learning_rate": 7.537210931679988e-06,
"loss": 0.7531,
"mean_token_accuracy": 0.7726601183414459,
"step": 140
},
{
"epoch": 0.6437291897891232,
"grad_norm": 0.34912810948700074,
"learning_rate": 6.791742301846325e-06,
"loss": 0.774,
"mean_token_accuracy": 0.7670607671141625,
"step": 145
},
{
"epoch": 0.6659267480577137,
"grad_norm": 0.3681331061431648,
"learning_rate": 6.065664100332478e-06,
"loss": 0.7651,
"mean_token_accuracy": 0.7690967857837677,
"step": 150
},
{
"epoch": 0.6881243063263041,
"grad_norm": 0.368954099366331,
"learning_rate": 5.3633646801467255e-06,
"loss": 0.7524,
"mean_token_accuracy": 0.7736147075891495,
"step": 155
},
{
"epoch": 0.7103218645948945,
"grad_norm": 0.35992878085709806,
"learning_rate": 4.689088677427249e-06,
"loss": 0.7373,
"mean_token_accuracy": 0.7774115353822708,
"step": 160
},
{
"epoch": 0.732519422863485,
"grad_norm": 0.3614111664312195,
"learning_rate": 4.046911357233343e-06,
"loss": 0.7524,
"mean_token_accuracy": 0.773401352763176,
"step": 165
},
{
"epoch": 0.7547169811320755,
"grad_norm": 0.36349455300535294,
"learning_rate": 3.440713983000601e-06,
"loss": 0.7558,
"mean_token_accuracy": 0.772033941745758,
"step": 170
},
{
"epoch": 0.7769145394006659,
"grad_norm": 0.3397807098524205,
"learning_rate": 2.8741603585249312e-06,
"loss": 0.7597,
"mean_token_accuracy": 0.7703836098313331,
"step": 175
},
{
"epoch": 0.7991120976692564,
"grad_norm": 0.34012440219961076,
"learning_rate": 2.3506746842535244e-06,
"loss": 0.7574,
"mean_token_accuracy": 0.7716947227716446,
"step": 180
},
{
"epoch": 0.8213096559378469,
"grad_norm": 0.33440840345953365,
"learning_rate": 1.8734208617174986e-06,
"loss": 0.7558,
"mean_token_accuracy": 0.772092518210411,
"step": 185
},
{
"epoch": 0.8435072142064373,
"grad_norm": 0.34135151245134615,
"learning_rate": 1.4452833711883629e-06,
"loss": 0.7507,
"mean_token_accuracy": 0.7737182468175888,
"step": 190
},
{
"epoch": 0.8657047724750278,
"grad_norm": 0.3426195190218489,
"learning_rate": 1.0688498381320855e-06,
"loss": 0.7506,
"mean_token_accuracy": 0.7732807129621506,
"step": 195
},
{
"epoch": 0.8879023307436182,
"grad_norm": 0.33276209433043014,
"learning_rate": 7.463953938275859e-07,
"loss": 0.7412,
"mean_token_accuracy": 0.7763293862342835,
"step": 200
},
{
"epoch": 0.8879023307436182,
"eval_loss": 0.7738624811172485,
"eval_mean_token_accuracy": 0.7629910707473755,
"eval_runtime": 3.3184,
"eval_samples_per_second": 38.875,
"eval_steps_per_second": 1.808,
"step": 200
},
{
"epoch": 0.9100998890122086,
"grad_norm": 0.32120176766623043,
"learning_rate": 4.798689246727006e-07,
"loss": 0.7435,
"mean_token_accuracy": 0.7754677474498749,
"step": 205
},
{
"epoch": 0.9322974472807991,
"grad_norm": 0.33184265142796177,
"learning_rate": 2.708812932856253e-07,
"loss": 0.7341,
"mean_token_accuracy": 0.7786281272768975,
"step": 210
},
{
"epoch": 0.9544950055493896,
"grad_norm": 0.3272741862810846,
"learning_rate": 1.206956025924333e-07,
"loss": 0.7431,
"mean_token_accuracy": 0.7758487805724144,
"step": 215
},
{
"epoch": 0.97669256381798,
"grad_norm": 0.32591262892422984,
"learning_rate": 3.0219561743707326e-08,
"loss": 0.7404,
"mean_token_accuracy": 0.7761501207947731,
"step": 220
},
{
"epoch": 0.9988901220865705,
"grad_norm": 0.3313176690461435,
"learning_rate": 0.0,
"loss": 0.754,
"mean_token_accuracy": 0.7721990346908569,
"step": 225
},
{
"epoch": 0.9988901220865705,
"step": 225,
"total_flos": 76874092904448.0,
"train_loss": 0.7931315400865343,
"train_runtime": 3405.4017,
"train_samples_per_second": 6.35,
"train_steps_per_second": 0.066
}
],
"logging_steps": 5,
"max_steps": 225,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 76874092904448.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}