t4_qwen2.5-7b_ckpt85 / trainer_state.json
AiMijie's picture
Upload 12 files
79185eb verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.753623188405797,
"eval_steps": 500,
"global_step": 85,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.057971014492753624,
"grad_norm": 5.159448254527082,
"learning_rate": 0.0,
"loss": 0.7831,
"step": 1
},
{
"epoch": 0.11594202898550725,
"grad_norm": 5.074454618154074,
"learning_rate": 1.111111111111111e-06,
"loss": 0.7702,
"step": 2
},
{
"epoch": 0.17391304347826086,
"grad_norm": 5.033467750869474,
"learning_rate": 2.222222222222222e-06,
"loss": 0.7884,
"step": 3
},
{
"epoch": 0.2318840579710145,
"grad_norm": 4.632881551856663,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.7716,
"step": 4
},
{
"epoch": 0.2898550724637681,
"grad_norm": 3.315176232308323,
"learning_rate": 4.444444444444444e-06,
"loss": 0.723,
"step": 5
},
{
"epoch": 0.34782608695652173,
"grad_norm": 2.394181259514717,
"learning_rate": 5.555555555555557e-06,
"loss": 0.6995,
"step": 6
},
{
"epoch": 0.4057971014492754,
"grad_norm": 3.7004540250844045,
"learning_rate": 6.666666666666667e-06,
"loss": 0.6504,
"step": 7
},
{
"epoch": 0.463768115942029,
"grad_norm": 3.9537411155858577,
"learning_rate": 7.77777777777778e-06,
"loss": 0.6537,
"step": 8
},
{
"epoch": 0.5217391304347826,
"grad_norm": 3.365368057453053,
"learning_rate": 8.888888888888888e-06,
"loss": 0.5975,
"step": 9
},
{
"epoch": 0.5797101449275363,
"grad_norm": 3.1403485995161238,
"learning_rate": 1e-05,
"loss": 0.6283,
"step": 10
},
{
"epoch": 0.6376811594202898,
"grad_norm": 2.4878800967156245,
"learning_rate": 9.995728791936505e-06,
"loss": 0.5365,
"step": 11
},
{
"epoch": 0.6956521739130435,
"grad_norm": 1.571307648439155,
"learning_rate": 9.98292246503335e-06,
"loss": 0.4818,
"step": 12
},
{
"epoch": 0.7536231884057971,
"grad_norm": 1.3929267598847173,
"learning_rate": 9.961602898685225e-06,
"loss": 0.5166,
"step": 13
},
{
"epoch": 0.8115942028985508,
"grad_norm": 1.6289303587249693,
"learning_rate": 9.931806517013612e-06,
"loss": 0.5384,
"step": 14
},
{
"epoch": 0.8695652173913043,
"grad_norm": 1.412181464381996,
"learning_rate": 9.893584226636773e-06,
"loss": 0.4933,
"step": 15
},
{
"epoch": 0.927536231884058,
"grad_norm": 1.444951103218855,
"learning_rate": 9.847001329696653e-06,
"loss": 0.5236,
"step": 16
},
{
"epoch": 0.9855072463768116,
"grad_norm": 1.3616316085764364,
"learning_rate": 9.792137412291265e-06,
"loss": 0.5006,
"step": 17
},
{
"epoch": 1.0,
"grad_norm": 1.3616316085764364,
"learning_rate": 9.729086208503174e-06,
"loss": 0.4813,
"step": 18
},
{
"epoch": 1.0579710144927537,
"grad_norm": 2.1567352869229954,
"learning_rate": 9.657955440256396e-06,
"loss": 0.4481,
"step": 19
},
{
"epoch": 1.1159420289855073,
"grad_norm": 1.2546916585964516,
"learning_rate": 9.578866633275289e-06,
"loss": 0.4735,
"step": 20
},
{
"epoch": 1.1739130434782608,
"grad_norm": 1.1045035898812048,
"learning_rate": 9.491954909459895e-06,
"loss": 0.4139,
"step": 21
},
{
"epoch": 1.2318840579710144,
"grad_norm": 1.0382597534226017,
"learning_rate": 9.397368756032445e-06,
"loss": 0.445,
"step": 22
},
{
"epoch": 1.289855072463768,
"grad_norm": 1.269533585653712,
"learning_rate": 9.295269771849426e-06,
"loss": 0.4651,
"step": 23
},
{
"epoch": 1.3478260869565217,
"grad_norm": 1.0434296813168673,
"learning_rate": 9.185832391312644e-06,
"loss": 0.458,
"step": 24
},
{
"epoch": 1.4057971014492754,
"grad_norm": 1.1217463005684392,
"learning_rate": 9.069243586350976e-06,
"loss": 0.4376,
"step": 25
},
{
"epoch": 1.463768115942029,
"grad_norm": 1.1702630509813354,
"learning_rate": 8.94570254698197e-06,
"loss": 0.4637,
"step": 26
},
{
"epoch": 1.5217391304347827,
"grad_norm": 0.9905456980422715,
"learning_rate": 8.815420340999034e-06,
"loss": 0.4186,
"step": 27
},
{
"epoch": 1.5797101449275361,
"grad_norm": 1.140388928402416,
"learning_rate": 8.67861955336566e-06,
"loss": 0.4851,
"step": 28
},
{
"epoch": 1.6376811594202898,
"grad_norm": 1.1003593567236911,
"learning_rate": 8.535533905932739e-06,
"loss": 0.4158,
"step": 29
},
{
"epoch": 1.6956521739130435,
"grad_norm": 1.0323183154161821,
"learning_rate": 8.386407858128707e-06,
"loss": 0.4571,
"step": 30
},
{
"epoch": 1.7536231884057971,
"grad_norm": 0.9652171295110353,
"learning_rate": 8.231496189304704e-06,
"loss": 0.4343,
"step": 31
},
{
"epoch": 1.8115942028985508,
"grad_norm": 1.0411121822811822,
"learning_rate": 8.071063563448341e-06,
"loss": 0.4371,
"step": 32
},
{
"epoch": 1.8695652173913042,
"grad_norm": 0.9087117048299469,
"learning_rate": 7.905384077009693e-06,
"loss": 0.3843,
"step": 33
},
{
"epoch": 1.927536231884058,
"grad_norm": 0.8945258612409986,
"learning_rate": 7.734740790612137e-06,
"loss": 0.4238,
"step": 34
},
{
"epoch": 1.9855072463768115,
"grad_norm": 0.9072287658607223,
"learning_rate": 7.559425245448006e-06,
"loss": 0.4239,
"step": 35
},
{
"epoch": 2.0,
"grad_norm": 0.9072287658607223,
"learning_rate": 7.379736965185369e-06,
"loss": 0.3879,
"step": 36
},
{
"epoch": 2.0579710144927534,
"grad_norm": 1.8075291280487968,
"learning_rate": 7.195982944236853e-06,
"loss": 0.4129,
"step": 37
},
{
"epoch": 2.1159420289855073,
"grad_norm": 1.0080140091271943,
"learning_rate": 7.008477123264849e-06,
"loss": 0.3956,
"step": 38
},
{
"epoch": 2.1739130434782608,
"grad_norm": 0.897311897179687,
"learning_rate": 6.817539852819149e-06,
"loss": 0.3859,
"step": 39
},
{
"epoch": 2.2318840579710146,
"grad_norm": 0.8980290567272365,
"learning_rate": 6.6234973460234184e-06,
"loss": 0.3878,
"step": 40
},
{
"epoch": 2.289855072463768,
"grad_norm": 0.9297885347982958,
"learning_rate": 6.426681121245527e-06,
"loss": 0.3511,
"step": 41
},
{
"epoch": 2.3478260869565215,
"grad_norm": 0.8239294623215564,
"learning_rate": 6.227427435703997e-06,
"loss": 0.4005,
"step": 42
},
{
"epoch": 2.4057971014492754,
"grad_norm": 7.099470282692103,
"learning_rate": 6.026076710978172e-06,
"loss": 0.3572,
"step": 43
},
{
"epoch": 2.463768115942029,
"grad_norm": 0.9621872386993758,
"learning_rate": 5.82297295140367e-06,
"loss": 0.4079,
"step": 44
},
{
"epoch": 2.5217391304347827,
"grad_norm": 0.8853307030646145,
"learning_rate": 5.61846315634674e-06,
"loss": 0.3506,
"step": 45
},
{
"epoch": 2.579710144927536,
"grad_norm": 0.8876519887284963,
"learning_rate": 5.412896727361663e-06,
"loss": 0.3726,
"step": 46
},
{
"epoch": 2.63768115942029,
"grad_norm": 0.9042713927633073,
"learning_rate": 5.206624871244066e-06,
"loss": 0.369,
"step": 47
},
{
"epoch": 2.6956521739130435,
"grad_norm": 0.8360870067501421,
"learning_rate": 5e-06,
"loss": 0.3272,
"step": 48
},
{
"epoch": 2.753623188405797,
"grad_norm": 0.7654639929708382,
"learning_rate": 4.793375128755934e-06,
"loss": 0.3464,
"step": 49
},
{
"epoch": 2.8115942028985508,
"grad_norm": 0.9559519589621139,
"learning_rate": 4.587103272638339e-06,
"loss": 0.3792,
"step": 50
},
{
"epoch": 2.869565217391304,
"grad_norm": 0.912171268081883,
"learning_rate": 4.381536843653262e-06,
"loss": 0.3639,
"step": 51
},
{
"epoch": 2.927536231884058,
"grad_norm": 0.8016958636807102,
"learning_rate": 4.17702704859633e-06,
"loss": 0.3497,
"step": 52
},
{
"epoch": 2.9855072463768115,
"grad_norm": 0.9037924330632935,
"learning_rate": 3.973923289021829e-06,
"loss": 0.3819,
"step": 53
},
{
"epoch": 3.0,
"grad_norm": 0.9037924330632935,
"learning_rate": 3.7725725642960047e-06,
"loss": 0.3725,
"step": 54
},
{
"epoch": 3.0579710144927534,
"grad_norm": 1.7435467163889906,
"learning_rate": 3.573318878754475e-06,
"loss": 0.3339,
"step": 55
},
{
"epoch": 3.1159420289855073,
"grad_norm": 0.8639273215194021,
"learning_rate": 3.3765026539765832e-06,
"loss": 0.3282,
"step": 56
},
{
"epoch": 3.1739130434782608,
"grad_norm": 0.8620194795220659,
"learning_rate": 3.1824601471808504e-06,
"loss": 0.3771,
"step": 57
},
{
"epoch": 3.2318840579710146,
"grad_norm": 0.818398812959851,
"learning_rate": 2.991522876735154e-06,
"loss": 0.32,
"step": 58
},
{
"epoch": 3.289855072463768,
"grad_norm": 0.7961553960544394,
"learning_rate": 2.804017055763149e-06,
"loss": 0.3354,
"step": 59
},
{
"epoch": 3.3478260869565215,
"grad_norm": 0.8313300023912036,
"learning_rate": 2.6202630348146323e-06,
"loss": 0.3086,
"step": 60
},
{
"epoch": 3.4057971014492754,
"grad_norm": 0.7966466657831824,
"learning_rate": 2.4405747545519966e-06,
"loss": 0.3461,
"step": 61
},
{
"epoch": 3.463768115942029,
"grad_norm": 0.8990785358269929,
"learning_rate": 2.265259209387867e-06,
"loss": 0.3419,
"step": 62
},
{
"epoch": 3.5217391304347827,
"grad_norm": 0.8214296109986554,
"learning_rate": 2.094615922990309e-06,
"loss": 0.3228,
"step": 63
},
{
"epoch": 3.579710144927536,
"grad_norm": 0.7687607558338363,
"learning_rate": 1.928936436551661e-06,
"loss": 0.3341,
"step": 64
},
{
"epoch": 3.63768115942029,
"grad_norm": 0.8107501089319402,
"learning_rate": 1.7685038106952952e-06,
"loss": 0.3141,
"step": 65
},
{
"epoch": 3.6956521739130435,
"grad_norm": 0.8005672821108396,
"learning_rate": 1.6135921418712959e-06,
"loss": 0.3064,
"step": 66
},
{
"epoch": 3.753623188405797,
"grad_norm": 0.7178076901315711,
"learning_rate": 1.4644660940672628e-06,
"loss": 0.2989,
"step": 67
},
{
"epoch": 3.8115942028985508,
"grad_norm": 0.8662218799990845,
"learning_rate": 1.321380446634342e-06,
"loss": 0.3212,
"step": 68
},
{
"epoch": 3.869565217391304,
"grad_norm": 0.7898861729963894,
"learning_rate": 1.1845796590009684e-06,
"loss": 0.3296,
"step": 69
},
{
"epoch": 3.927536231884058,
"grad_norm": 0.7743651242946709,
"learning_rate": 1.0542974530180327e-06,
"loss": 0.3341,
"step": 70
},
{
"epoch": 3.9855072463768115,
"grad_norm": 0.7837882634849173,
"learning_rate": 9.307564136490255e-07,
"loss": 0.3316,
"step": 71
},
{
"epoch": 4.0,
"grad_norm": 1.648891461040096,
"learning_rate": 8.141676086873574e-07,
"loss": 0.333,
"step": 72
},
{
"epoch": 4.057971014492754,
"grad_norm": 0.7683801753309785,
"learning_rate": 7.047302281505735e-07,
"loss": 0.3209,
"step": 73
},
{
"epoch": 4.115942028985507,
"grad_norm": 0.7457543082443584,
"learning_rate": 6.026312439675553e-07,
"loss": 0.31,
"step": 74
},
{
"epoch": 4.173913043478261,
"grad_norm": 0.6915197851906805,
"learning_rate": 5.080450905401057e-07,
"loss": 0.2763,
"step": 75
},
{
"epoch": 4.231884057971015,
"grad_norm": 0.7772721453876562,
"learning_rate": 4.211333667247125e-07,
"loss": 0.3156,
"step": 76
},
{
"epoch": 4.2898550724637685,
"grad_norm": 0.6330066051722274,
"learning_rate": 3.420445597436056e-07,
"loss": 0.2816,
"step": 77
},
{
"epoch": 4.3478260869565215,
"grad_norm": 0.7025563271833313,
"learning_rate": 2.7091379149682683e-07,
"loss": 0.2933,
"step": 78
},
{
"epoch": 4.405797101449275,
"grad_norm": 0.7797377278716924,
"learning_rate": 2.0786258770873647e-07,
"loss": 0.3372,
"step": 79
},
{
"epoch": 4.463768115942029,
"grad_norm": 0.6949232577970824,
"learning_rate": 1.5299867030334815e-07,
"loss": 0.2892,
"step": 80
},
{
"epoch": 4.521739130434782,
"grad_norm": 0.712480327775005,
"learning_rate": 1.0641577336322761e-07,
"loss": 0.3173,
"step": 81
},
{
"epoch": 4.579710144927536,
"grad_norm": 0.7367737093031481,
"learning_rate": 6.819348298638839e-08,
"loss": 0.3027,
"step": 82
},
{
"epoch": 4.63768115942029,
"grad_norm": 0.68317505546867,
"learning_rate": 3.839710131477492e-08,
"loss": 0.295,
"step": 83
},
{
"epoch": 4.695652173913043,
"grad_norm": 0.7210852567246594,
"learning_rate": 1.7077534966650767e-08,
"loss": 0.3287,
"step": 84
},
{
"epoch": 4.753623188405797,
"grad_norm": 0.7796588531860184,
"learning_rate": 4.2712080634949024e-09,
"loss": 0.3023,
"step": 85
}
],
"logging_steps": 1,
"max_steps": 85,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 17,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 12373272117248.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}