heyingzhi's picture
Upload folder using huggingface_hub
615d7e3 verified
{
"best_global_step": 48,
"best_metric": 2.5830821990966797,
"best_model_checkpoint": "output_dir/sft_reasoning-activation_7task-E2E_Qwen3-1.7B_Games/checkpoint-48",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 96,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.020840117225659396,
"grad_norm": 6.09375,
"learning_rate": 0.0,
"loss": 2.3945,
"step": 1
},
{
"epoch": 0.04168023445131879,
"grad_norm": 6.09375,
"learning_rate": 1.0000000000000002e-06,
"loss": 2.3936,
"step": 2
},
{
"epoch": 0.06252035167697818,
"grad_norm": 6.21875,
"learning_rate": 2.0000000000000003e-06,
"loss": 2.4141,
"step": 3
},
{
"epoch": 0.08336046890263758,
"grad_norm": 6.0625,
"learning_rate": 3e-06,
"loss": 2.3931,
"step": 4
},
{
"epoch": 0.10420058612829697,
"grad_norm": 6.125,
"learning_rate": 4.000000000000001e-06,
"loss": 2.3831,
"step": 5
},
{
"epoch": 0.12504070335395637,
"grad_norm": 6.03125,
"learning_rate": 5e-06,
"loss": 2.3776,
"step": 6
},
{
"epoch": 0.14588082057961577,
"grad_norm": 5.875,
"learning_rate": 6e-06,
"loss": 2.3656,
"step": 7
},
{
"epoch": 0.16672093780527517,
"grad_norm": 5.78125,
"learning_rate": 7e-06,
"loss": 2.3665,
"step": 8
},
{
"epoch": 0.18756105503093454,
"grad_norm": 5.6875,
"learning_rate": 8.000000000000001e-06,
"loss": 2.348,
"step": 9
},
{
"epoch": 0.20840117225659394,
"grad_norm": 5.53125,
"learning_rate": 9e-06,
"loss": 2.3125,
"step": 10
},
{
"epoch": 0.22924128948225334,
"grad_norm": 5.3125,
"learning_rate": 1e-05,
"loss": 2.2892,
"step": 11
},
{
"epoch": 0.25008140670791273,
"grad_norm": 5.09375,
"learning_rate": 9.956521739130436e-06,
"loss": 2.2553,
"step": 12
},
{
"epoch": 0.2709215239335721,
"grad_norm": 4.9375,
"learning_rate": 9.913043478260871e-06,
"loss": 2.238,
"step": 13
},
{
"epoch": 0.29176164115923153,
"grad_norm": 4.875,
"learning_rate": 9.869565217391304e-06,
"loss": 2.2174,
"step": 14
},
{
"epoch": 0.3126017583848909,
"grad_norm": 4.75,
"learning_rate": 9.82608695652174e-06,
"loss": 2.188,
"step": 15
},
{
"epoch": 0.33344187561055033,
"grad_norm": 4.53125,
"learning_rate": 9.782608695652175e-06,
"loss": 2.1638,
"step": 16
},
{
"epoch": 0.3542819928362097,
"grad_norm": 4.375,
"learning_rate": 9.73913043478261e-06,
"loss": 2.1385,
"step": 17
},
{
"epoch": 0.3751221100618691,
"grad_norm": 4.21875,
"learning_rate": 9.695652173913043e-06,
"loss": 2.1308,
"step": 18
},
{
"epoch": 0.3959622272875285,
"grad_norm": 4.09375,
"learning_rate": 9.652173913043478e-06,
"loss": 2.1104,
"step": 19
},
{
"epoch": 0.4168023445131879,
"grad_norm": 3.953125,
"learning_rate": 9.608695652173914e-06,
"loss": 2.0834,
"step": 20
},
{
"epoch": 0.4376424617388473,
"grad_norm": 3.765625,
"learning_rate": 9.565217391304349e-06,
"loss": 2.0657,
"step": 21
},
{
"epoch": 0.45848257896450667,
"grad_norm": 3.609375,
"learning_rate": 9.521739130434784e-06,
"loss": 2.0554,
"step": 22
},
{
"epoch": 0.47932269619016604,
"grad_norm": 3.421875,
"learning_rate": 9.478260869565217e-06,
"loss": 2.0381,
"step": 23
},
{
"epoch": 0.5001628134158255,
"grad_norm": 3.265625,
"learning_rate": 9.434782608695652e-06,
"loss": 2.006,
"step": 24
},
{
"epoch": 0.5210029306414848,
"grad_norm": 3.15625,
"learning_rate": 9.391304347826087e-06,
"loss": 2.0065,
"step": 25
},
{
"epoch": 0.5418430478671442,
"grad_norm": 3.0625,
"learning_rate": 9.347826086956523e-06,
"loss": 1.9979,
"step": 26
},
{
"epoch": 0.5626831650928037,
"grad_norm": 2.921875,
"learning_rate": 9.304347826086956e-06,
"loss": 1.9669,
"step": 27
},
{
"epoch": 0.5835232823184631,
"grad_norm": 2.78125,
"learning_rate": 9.260869565217391e-06,
"loss": 1.9712,
"step": 28
},
{
"epoch": 0.6043633995441224,
"grad_norm": 2.640625,
"learning_rate": 9.217391304347826e-06,
"loss": 1.9429,
"step": 29
},
{
"epoch": 0.6252035167697818,
"grad_norm": 2.5,
"learning_rate": 9.173913043478261e-06,
"loss": 1.9281,
"step": 30
},
{
"epoch": 0.6460436339954412,
"grad_norm": 2.4375,
"learning_rate": 9.130434782608697e-06,
"loss": 1.9199,
"step": 31
},
{
"epoch": 0.6668837512211007,
"grad_norm": 2.359375,
"learning_rate": 9.086956521739132e-06,
"loss": 1.9112,
"step": 32
},
{
"epoch": 0.68772386844676,
"grad_norm": 2.328125,
"learning_rate": 9.043478260869565e-06,
"loss": 1.8879,
"step": 33
},
{
"epoch": 0.7085639856724194,
"grad_norm": 2.296875,
"learning_rate": 9e-06,
"loss": 1.8841,
"step": 34
},
{
"epoch": 0.7294041028980788,
"grad_norm": 2.25,
"learning_rate": 8.956521739130435e-06,
"loss": 1.8732,
"step": 35
},
{
"epoch": 0.7502442201237381,
"grad_norm": 2.21875,
"learning_rate": 8.91304347826087e-06,
"loss": 1.8634,
"step": 36
},
{
"epoch": 0.7710843373493976,
"grad_norm": 2.171875,
"learning_rate": 8.869565217391306e-06,
"loss": 1.8623,
"step": 37
},
{
"epoch": 0.791924454575057,
"grad_norm": 2.125,
"learning_rate": 8.82608695652174e-06,
"loss": 1.8433,
"step": 38
},
{
"epoch": 0.8127645718007164,
"grad_norm": 2.09375,
"learning_rate": 8.782608695652174e-06,
"loss": 1.8346,
"step": 39
},
{
"epoch": 0.8336046890263757,
"grad_norm": 2.0625,
"learning_rate": 8.73913043478261e-06,
"loss": 1.8248,
"step": 40
},
{
"epoch": 0.8544448062520351,
"grad_norm": 2.0,
"learning_rate": 8.695652173913044e-06,
"loss": 1.8086,
"step": 41
},
{
"epoch": 0.8752849234776946,
"grad_norm": 1.9453125,
"learning_rate": 8.65217391304348e-06,
"loss": 1.8035,
"step": 42
},
{
"epoch": 0.896125040703354,
"grad_norm": 1.8984375,
"learning_rate": 8.608695652173915e-06,
"loss": 1.7954,
"step": 43
},
{
"epoch": 0.9169651579290133,
"grad_norm": 1.8203125,
"learning_rate": 8.56521739130435e-06,
"loss": 1.7912,
"step": 44
},
{
"epoch": 0.9378052751546727,
"grad_norm": 1.734375,
"learning_rate": 8.521739130434783e-06,
"loss": 1.7763,
"step": 45
},
{
"epoch": 0.9586453923803321,
"grad_norm": 1.71875,
"learning_rate": 8.478260869565218e-06,
"loss": 1.7602,
"step": 46
},
{
"epoch": 0.9794855096059916,
"grad_norm": 1.640625,
"learning_rate": 8.434782608695653e-06,
"loss": 1.7543,
"step": 47
},
{
"epoch": 1.0,
"grad_norm": 1.609375,
"learning_rate": 8.391304347826089e-06,
"loss": 1.7499,
"step": 48
},
{
"epoch": 1.0,
"eval_loss": 2.5830821990966797,
"eval_runtime": 12.501,
"eval_samples_per_second": 491.321,
"eval_steps_per_second": 30.718,
"step": 48
},
{
"epoch": 1.0,
"eval_title2sid_loss": 1.0781381130218506,
"eval_title2sid_runtime": 6.3056,
"eval_title2sid_samples_per_second": 603.595,
"eval_title2sid_steps_per_second": 37.744,
"step": 48
},
{
"epoch": 1.0,
"eval_title2sid_loss": 1.0781381130218506,
"eval_title2sid_runtime": 6.3056,
"eval_title2sid_samples_per_second": 603.595,
"eval_title2sid_steps_per_second": 37.744,
"step": 48
},
{
"epoch": 1.0,
"eval_sid2title_loss": 0.33906614780426025,
"eval_sid2title_runtime": 6.3626,
"eval_sid2title_samples_per_second": 601.486,
"eval_sid2title_steps_per_second": 37.721,
"step": 48
},
{
"epoch": 1.0,
"eval_sid2title_loss": 0.33906614780426025,
"eval_sid2title_runtime": 6.3626,
"eval_sid2title_samples_per_second": 601.486,
"eval_sid2title_steps_per_second": 37.721,
"step": 48
},
{
"epoch": 1.0208401172256594,
"grad_norm": 1.5703125,
"learning_rate": 8.347826086956522e-06,
"loss": 1.7506,
"step": 49
},
{
"epoch": 1.0416802344513187,
"grad_norm": 1.5859375,
"learning_rate": 8.304347826086957e-06,
"loss": 1.7408,
"step": 50
},
{
"epoch": 1.0625203516769781,
"grad_norm": 1.640625,
"learning_rate": 8.260869565217392e-06,
"loss": 1.7221,
"step": 51
},
{
"epoch": 1.0833604689026375,
"grad_norm": 1.953125,
"learning_rate": 8.217391304347827e-06,
"loss": 1.7097,
"step": 52
},
{
"epoch": 1.104200586128297,
"grad_norm": 2.078125,
"learning_rate": 8.173913043478263e-06,
"loss": 1.7002,
"step": 53
},
{
"epoch": 1.1250407033539565,
"grad_norm": 4.53125,
"learning_rate": 8.130434782608696e-06,
"loss": 1.6878,
"step": 54
},
{
"epoch": 1.1458808205796158,
"grad_norm": 4.84375,
"learning_rate": 8.086956521739131e-06,
"loss": 1.6962,
"step": 55
},
{
"epoch": 1.1667209378052752,
"grad_norm": 3.78125,
"learning_rate": 8.043478260869566e-06,
"loss": 1.6896,
"step": 56
},
{
"epoch": 1.1875610550309346,
"grad_norm": 1.8828125,
"learning_rate": 8.000000000000001e-06,
"loss": 1.6817,
"step": 57
},
{
"epoch": 1.208401172256594,
"grad_norm": 1.8671875,
"learning_rate": 7.956521739130435e-06,
"loss": 1.6763,
"step": 58
},
{
"epoch": 1.2292412894822533,
"grad_norm": 2.0625,
"learning_rate": 7.91304347826087e-06,
"loss": 1.6671,
"step": 59
},
{
"epoch": 1.2500814067079127,
"grad_norm": 1.984375,
"learning_rate": 7.869565217391305e-06,
"loss": 1.6616,
"step": 60
},
{
"epoch": 1.270921523933572,
"grad_norm": 1.6875,
"learning_rate": 7.82608695652174e-06,
"loss": 1.6532,
"step": 61
},
{
"epoch": 1.2917616411592316,
"grad_norm": 1.4765625,
"learning_rate": 7.782608695652174e-06,
"loss": 1.6553,
"step": 62
},
{
"epoch": 1.3126017583848908,
"grad_norm": 1.4609375,
"learning_rate": 7.739130434782609e-06,
"loss": 1.6433,
"step": 63
},
{
"epoch": 1.3334418756105504,
"grad_norm": 1.7421875,
"learning_rate": 7.695652173913044e-06,
"loss": 1.6379,
"step": 64
},
{
"epoch": 1.3542819928362098,
"grad_norm": 1.890625,
"learning_rate": 7.652173913043479e-06,
"loss": 1.6323,
"step": 65
},
{
"epoch": 1.3751221100618691,
"grad_norm": 1.625,
"learning_rate": 7.608695652173914e-06,
"loss": 1.6372,
"step": 66
},
{
"epoch": 1.3959622272875285,
"grad_norm": 1.453125,
"learning_rate": 7.565217391304348e-06,
"loss": 1.6324,
"step": 67
},
{
"epoch": 1.4168023445131879,
"grad_norm": 1.4296875,
"learning_rate": 7.5217391304347835e-06,
"loss": 1.6236,
"step": 68
},
{
"epoch": 1.4376424617388472,
"grad_norm": 1.453125,
"learning_rate": 7.478260869565218e-06,
"loss": 1.6177,
"step": 69
},
{
"epoch": 1.4584825789645066,
"grad_norm": 1.4765625,
"learning_rate": 7.434782608695653e-06,
"loss": 1.6162,
"step": 70
},
{
"epoch": 1.479322696190166,
"grad_norm": 1.34375,
"learning_rate": 7.391304347826087e-06,
"loss": 1.609,
"step": 71
},
{
"epoch": 1.5001628134158254,
"grad_norm": 1.2265625,
"learning_rate": 7.347826086956522e-06,
"loss": 1.6035,
"step": 72
},
{
"epoch": 1.521002930641485,
"grad_norm": 1.1953125,
"learning_rate": 7.304347826086957e-06,
"loss": 1.5981,
"step": 73
},
{
"epoch": 1.541843047867144,
"grad_norm": 1.1796875,
"learning_rate": 7.2608695652173925e-06,
"loss": 1.5962,
"step": 74
},
{
"epoch": 1.5626831650928037,
"grad_norm": 1.1640625,
"learning_rate": 7.217391304347827e-06,
"loss": 1.5896,
"step": 75
},
{
"epoch": 1.583523282318463,
"grad_norm": 1.109375,
"learning_rate": 7.173913043478261e-06,
"loss": 1.5733,
"step": 76
},
{
"epoch": 1.6043633995441224,
"grad_norm": 1.0703125,
"learning_rate": 7.130434782608696e-06,
"loss": 1.5767,
"step": 77
},
{
"epoch": 1.6252035167697818,
"grad_norm": 1.0625,
"learning_rate": 7.086956521739131e-06,
"loss": 1.5679,
"step": 78
},
{
"epoch": 1.6460436339954412,
"grad_norm": 1.0390625,
"learning_rate": 7.0434782608695665e-06,
"loss": 1.5756,
"step": 79
},
{
"epoch": 1.6668837512211008,
"grad_norm": 1.0234375,
"learning_rate": 7e-06,
"loss": 1.5833,
"step": 80
},
{
"epoch": 1.68772386844676,
"grad_norm": 1.0078125,
"learning_rate": 6.956521739130435e-06,
"loss": 1.5711,
"step": 81
},
{
"epoch": 1.7085639856724195,
"grad_norm": 1.0078125,
"learning_rate": 6.91304347826087e-06,
"loss": 1.5709,
"step": 82
},
{
"epoch": 1.7294041028980787,
"grad_norm": 1.0,
"learning_rate": 6.869565217391305e-06,
"loss": 1.5639,
"step": 83
},
{
"epoch": 1.7502442201237383,
"grad_norm": 0.96875,
"learning_rate": 6.8260869565217395e-06,
"loss": 1.5644,
"step": 84
},
{
"epoch": 1.7710843373493976,
"grad_norm": 0.94140625,
"learning_rate": 6.782608695652174e-06,
"loss": 1.5628,
"step": 85
},
{
"epoch": 1.791924454575057,
"grad_norm": 0.95703125,
"learning_rate": 6.739130434782609e-06,
"loss": 1.5604,
"step": 86
},
{
"epoch": 1.8127645718007164,
"grad_norm": 0.91796875,
"learning_rate": 6.695652173913044e-06,
"loss": 1.5658,
"step": 87
},
{
"epoch": 1.8336046890263757,
"grad_norm": 0.87890625,
"learning_rate": 6.652173913043479e-06,
"loss": 1.5471,
"step": 88
},
{
"epoch": 1.8544448062520351,
"grad_norm": 0.8359375,
"learning_rate": 6.6086956521739135e-06,
"loss": 1.5424,
"step": 89
},
{
"epoch": 1.8752849234776945,
"grad_norm": 0.796875,
"learning_rate": 6.565217391304349e-06,
"loss": 1.5423,
"step": 90
},
{
"epoch": 1.896125040703354,
"grad_norm": 0.7734375,
"learning_rate": 6.521739130434783e-06,
"loss": 1.5493,
"step": 91
},
{
"epoch": 1.9169651579290132,
"grad_norm": 0.765625,
"learning_rate": 6.478260869565218e-06,
"loss": 1.5388,
"step": 92
},
{
"epoch": 1.9378052751546728,
"grad_norm": 0.7109375,
"learning_rate": 6.434782608695652e-06,
"loss": 1.5494,
"step": 93
},
{
"epoch": 1.958645392380332,
"grad_norm": 0.6875,
"learning_rate": 6.391304347826087e-06,
"loss": 1.5367,
"step": 94
},
{
"epoch": 1.9794855096059916,
"grad_norm": 0.65234375,
"learning_rate": 6.3478260869565225e-06,
"loss": 1.5398,
"step": 95
},
{
"epoch": 2.0,
"grad_norm": 0.6328125,
"learning_rate": 6.304347826086958e-06,
"loss": 1.5429,
"step": 96
},
{
"epoch": 2.0,
"eval_loss": 2.606393814086914,
"eval_runtime": 12.5226,
"eval_samples_per_second": 490.473,
"eval_steps_per_second": 30.665,
"step": 96
},
{
"epoch": 2.0,
"eval_title2sid_loss": 1.0881061553955078,
"eval_title2sid_runtime": 6.3694,
"eval_title2sid_samples_per_second": 597.541,
"eval_title2sid_steps_per_second": 37.366,
"step": 96
},
{
"epoch": 2.0,
"eval_title2sid_loss": 1.0881061553955078,
"eval_title2sid_runtime": 6.3694,
"eval_title2sid_samples_per_second": 597.541,
"eval_title2sid_steps_per_second": 37.366,
"step": 96
},
{
"epoch": 2.0,
"eval_sid2title_loss": 0.3378770351409912,
"eval_sid2title_runtime": 8.2901,
"eval_sid2title_samples_per_second": 461.637,
"eval_sid2title_steps_per_second": 28.95,
"step": 96
},
{
"epoch": 2.0,
"eval_sid2title_loss": 0.3378770351409912,
"eval_sid2title_runtime": 8.2901,
"eval_sid2title_samples_per_second": 461.637,
"eval_sid2title_steps_per_second": 28.95,
"step": 96
}
],
"logging_steps": 1,
"max_steps": 240,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 1,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.8799889325214925e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}