{
  "best_global_step": 48,
  "best_metric": 2.5830821990966797,
  "best_model_checkpoint": "output_dir/sft_reasoning-activation_7task-E2E_Qwen3-1.7B_Games/checkpoint-48",
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 96,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.020840117225659396,
      "grad_norm": 6.09375,
      "learning_rate": 0.0,
      "loss": 2.3945,
      "step": 1
    },
    {
      "epoch": 0.04168023445131879,
      "grad_norm": 6.09375,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 2.3936,
      "step": 2
    },
    {
      "epoch": 0.06252035167697818,
      "grad_norm": 6.21875,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 2.4141,
      "step": 3
    },
    {
      "epoch": 0.08336046890263758,
      "grad_norm": 6.0625,
      "learning_rate": 3e-06,
      "loss": 2.3931,
      "step": 4
    },
    {
      "epoch": 0.10420058612829697,
      "grad_norm": 6.125,
      "learning_rate": 4.000000000000001e-06,
      "loss": 2.3831,
      "step": 5
    },
    {
      "epoch": 0.12504070335395637,
      "grad_norm": 6.03125,
      "learning_rate": 5e-06,
      "loss": 2.3776,
      "step": 6
    },
    {
      "epoch": 0.14588082057961577,
      "grad_norm": 5.875,
      "learning_rate": 6e-06,
      "loss": 2.3656,
      "step": 7
    },
    {
      "epoch": 0.16672093780527517,
      "grad_norm": 5.78125,
      "learning_rate": 7e-06,
      "loss": 2.3665,
      "step": 8
    },
    {
      "epoch": 0.18756105503093454,
      "grad_norm": 5.6875,
      "learning_rate": 8.000000000000001e-06,
      "loss": 2.348,
      "step": 9
    },
    {
      "epoch": 0.20840117225659394,
      "grad_norm": 5.53125,
      "learning_rate": 9e-06,
      "loss": 2.3125,
      "step": 10
    },
    {
      "epoch": 0.22924128948225334,
      "grad_norm": 5.3125,
      "learning_rate": 1e-05,
      "loss": 2.2892,
      "step": 11
    },
    {
      "epoch": 0.25008140670791273,
      "grad_norm": 5.09375,
      "learning_rate": 9.956521739130436e-06,
      "loss": 2.2553,
      "step": 12
    },
    {
      "epoch": 0.2709215239335721,
      "grad_norm": 4.9375,
      "learning_rate": 9.913043478260871e-06,
      "loss": 2.238,
      "step": 13
    },
    {
      "epoch": 0.29176164115923153,
      "grad_norm": 4.875,
      "learning_rate": 9.869565217391304e-06,
      "loss": 2.2174,
      "step": 14
    },
    {
      "epoch": 0.3126017583848909,
      "grad_norm": 4.75,
      "learning_rate": 9.82608695652174e-06,
      "loss": 2.188,
      "step": 15
    },
    {
      "epoch": 0.33344187561055033,
      "grad_norm": 4.53125,
      "learning_rate": 9.782608695652175e-06,
      "loss": 2.1638,
      "step": 16
    },
    {
      "epoch": 0.3542819928362097,
      "grad_norm": 4.375,
      "learning_rate": 9.73913043478261e-06,
      "loss": 2.1385,
      "step": 17
    },
    {
      "epoch": 0.3751221100618691,
      "grad_norm": 4.21875,
      "learning_rate": 9.695652173913043e-06,
      "loss": 2.1308,
      "step": 18
    },
    {
      "epoch": 0.3959622272875285,
      "grad_norm": 4.09375,
      "learning_rate": 9.652173913043478e-06,
      "loss": 2.1104,
      "step": 19
    },
    {
      "epoch": 0.4168023445131879,
      "grad_norm": 3.953125,
      "learning_rate": 9.608695652173914e-06,
      "loss": 2.0834,
      "step": 20
    },
    {
      "epoch": 0.4376424617388473,
      "grad_norm": 3.765625,
      "learning_rate": 9.565217391304349e-06,
      "loss": 2.0657,
      "step": 21
    },
    {
      "epoch": 0.45848257896450667,
      "grad_norm": 3.609375,
      "learning_rate": 9.521739130434784e-06,
      "loss": 2.0554,
      "step": 22
    },
    {
      "epoch": 0.47932269619016604,
      "grad_norm": 3.421875,
      "learning_rate": 9.478260869565217e-06,
      "loss": 2.0381,
      "step": 23
    },
    {
      "epoch": 0.5001628134158255,
      "grad_norm": 3.265625,
      "learning_rate": 9.434782608695652e-06,
      "loss": 2.006,
      "step": 24
    },
    {
      "epoch": 0.5210029306414848,
      "grad_norm": 3.15625,
      "learning_rate": 9.391304347826087e-06,
      "loss": 2.0065,
      "step": 25
    },
    {
      "epoch": 0.5418430478671442,
      "grad_norm": 3.0625,
      "learning_rate": 9.347826086956523e-06,
      "loss": 1.9979,
      "step": 26
    },
    {
      "epoch": 0.5626831650928037,
      "grad_norm": 2.921875,
      "learning_rate": 9.304347826086956e-06,
      "loss": 1.9669,
      "step": 27
    },
    {
      "epoch": 0.5835232823184631,
      "grad_norm": 2.78125,
      "learning_rate": 9.260869565217391e-06,
      "loss": 1.9712,
      "step": 28
    },
    {
      "epoch": 0.6043633995441224,
      "grad_norm": 2.640625,
      "learning_rate": 9.217391304347826e-06,
      "loss": 1.9429,
      "step": 29
    },
    {
      "epoch": 0.6252035167697818,
      "grad_norm": 2.5,
      "learning_rate": 9.173913043478261e-06,
      "loss": 1.9281,
      "step": 30
    },
    {
      "epoch": 0.6460436339954412,
      "grad_norm": 2.4375,
      "learning_rate": 9.130434782608697e-06,
      "loss": 1.9199,
      "step": 31
    },
    {
      "epoch": 0.6668837512211007,
      "grad_norm": 2.359375,
      "learning_rate": 9.086956521739132e-06,
      "loss": 1.9112,
      "step": 32
    },
    {
      "epoch": 0.68772386844676,
      "grad_norm": 2.328125,
      "learning_rate": 9.043478260869565e-06,
      "loss": 1.8879,
      "step": 33
    },
    {
      "epoch": 0.7085639856724194,
      "grad_norm": 2.296875,
      "learning_rate": 9e-06,
      "loss": 1.8841,
      "step": 34
    },
    {
      "epoch": 0.7294041028980788,
      "grad_norm": 2.25,
      "learning_rate": 8.956521739130435e-06,
      "loss": 1.8732,
      "step": 35
    },
    {
      "epoch": 0.7502442201237381,
      "grad_norm": 2.21875,
      "learning_rate": 8.91304347826087e-06,
      "loss": 1.8634,
      "step": 36
    },
    {
      "epoch": 0.7710843373493976,
      "grad_norm": 2.171875,
      "learning_rate": 8.869565217391306e-06,
      "loss": 1.8623,
      "step": 37
    },
    {
      "epoch": 0.791924454575057,
      "grad_norm": 2.125,
      "learning_rate": 8.82608695652174e-06,
      "loss": 1.8433,
      "step": 38
    },
    {
      "epoch": 0.8127645718007164,
      "grad_norm": 2.09375,
      "learning_rate": 8.782608695652174e-06,
      "loss": 1.8346,
      "step": 39
    },
    {
      "epoch": 0.8336046890263757,
      "grad_norm": 2.0625,
      "learning_rate": 8.73913043478261e-06,
      "loss": 1.8248,
      "step": 40
    },
    {
      "epoch": 0.8544448062520351,
      "grad_norm": 2.0,
      "learning_rate": 8.695652173913044e-06,
      "loss": 1.8086,
      "step": 41
    },
    {
      "epoch": 0.8752849234776946,
      "grad_norm": 1.9453125,
      "learning_rate": 8.65217391304348e-06,
      "loss": 1.8035,
      "step": 42
    },
    {
      "epoch": 0.896125040703354,
      "grad_norm": 1.8984375,
      "learning_rate": 8.608695652173915e-06,
      "loss": 1.7954,
      "step": 43
    },
    {
      "epoch": 0.9169651579290133,
      "grad_norm": 1.8203125,
      "learning_rate": 8.56521739130435e-06,
      "loss": 1.7912,
      "step": 44
    },
    {
      "epoch": 0.9378052751546727,
      "grad_norm": 1.734375,
      "learning_rate": 8.521739130434783e-06,
      "loss": 1.7763,
      "step": 45
    },
    {
      "epoch": 0.9586453923803321,
      "grad_norm": 1.71875,
      "learning_rate": 8.478260869565218e-06,
      "loss": 1.7602,
      "step": 46
    },
    {
      "epoch": 0.9794855096059916,
      "grad_norm": 1.640625,
      "learning_rate": 8.434782608695653e-06,
      "loss": 1.7543,
      "step": 47
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.609375,
      "learning_rate": 8.391304347826089e-06,
      "loss": 1.7499,
      "step": 48
    },
    {
      "epoch": 1.0,
      "eval_loss": 2.5830821990966797,
      "eval_runtime": 12.501,
      "eval_samples_per_second": 491.321,
      "eval_steps_per_second": 30.718,
      "step": 48
    },
    {
      "epoch": 1.0,
      "eval_title2sid_loss": 1.0781381130218506,
      "eval_title2sid_runtime": 6.3056,
      "eval_title2sid_samples_per_second": 603.595,
      "eval_title2sid_steps_per_second": 37.744,
      "step": 48
    },
    {
      "epoch": 1.0,
      "eval_title2sid_loss": 1.0781381130218506,
      "eval_title2sid_runtime": 6.3056,
      "eval_title2sid_samples_per_second": 603.595,
      "eval_title2sid_steps_per_second": 37.744,
      "step": 48
    },
    {
      "epoch": 1.0,
      "eval_sid2title_loss": 0.33906614780426025,
      "eval_sid2title_runtime": 6.3626,
      "eval_sid2title_samples_per_second": 601.486,
      "eval_sid2title_steps_per_second": 37.721,
      "step": 48
    },
    {
      "epoch": 1.0,
      "eval_sid2title_loss": 0.33906614780426025,
      "eval_sid2title_runtime": 6.3626,
      "eval_sid2title_samples_per_second": 601.486,
      "eval_sid2title_steps_per_second": 37.721,
      "step": 48
    },
    {
      "epoch": 1.0208401172256594,
      "grad_norm": 1.5703125,
      "learning_rate": 8.347826086956522e-06,
      "loss": 1.7506,
      "step": 49
    },
    {
      "epoch": 1.0416802344513187,
      "grad_norm": 1.5859375,
      "learning_rate": 8.304347826086957e-06,
      "loss": 1.7408,
      "step": 50
    },
    {
      "epoch": 1.0625203516769781,
      "grad_norm": 1.640625,
      "learning_rate": 8.260869565217392e-06,
      "loss": 1.7221,
      "step": 51
    },
    {
      "epoch": 1.0833604689026375,
      "grad_norm": 1.953125,
      "learning_rate": 8.217391304347827e-06,
      "loss": 1.7097,
      "step": 52
    },
    {
      "epoch": 1.104200586128297,
      "grad_norm": 2.078125,
      "learning_rate": 8.173913043478263e-06,
      "loss": 1.7002,
      "step": 53
    },
    {
      "epoch": 1.1250407033539565,
      "grad_norm": 4.53125,
      "learning_rate": 8.130434782608696e-06,
      "loss": 1.6878,
      "step": 54
    },
    {
      "epoch": 1.1458808205796158,
      "grad_norm": 4.84375,
      "learning_rate": 8.086956521739131e-06,
      "loss": 1.6962,
      "step": 55
    },
    {
      "epoch": 1.1667209378052752,
      "grad_norm": 3.78125,
      "learning_rate": 8.043478260869566e-06,
      "loss": 1.6896,
      "step": 56
    },
    {
      "epoch": 1.1875610550309346,
      "grad_norm": 1.8828125,
      "learning_rate": 8.000000000000001e-06,
      "loss": 1.6817,
      "step": 57
    },
    {
      "epoch": 1.208401172256594,
      "grad_norm": 1.8671875,
      "learning_rate": 7.956521739130435e-06,
      "loss": 1.6763,
      "step": 58
    },
    {
      "epoch": 1.2292412894822533,
      "grad_norm": 2.0625,
      "learning_rate": 7.91304347826087e-06,
      "loss": 1.6671,
      "step": 59
    },
    {
      "epoch": 1.2500814067079127,
      "grad_norm": 1.984375,
      "learning_rate": 7.869565217391305e-06,
      "loss": 1.6616,
      "step": 60
    },
    {
      "epoch": 1.270921523933572,
      "grad_norm": 1.6875,
      "learning_rate": 7.82608695652174e-06,
      "loss": 1.6532,
      "step": 61
    },
    {
      "epoch": 1.2917616411592316,
      "grad_norm": 1.4765625,
      "learning_rate": 7.782608695652174e-06,
      "loss": 1.6553,
      "step": 62
    },
    {
      "epoch": 1.3126017583848908,
      "grad_norm": 1.4609375,
      "learning_rate": 7.739130434782609e-06,
      "loss": 1.6433,
      "step": 63
    },
    {
      "epoch": 1.3334418756105504,
      "grad_norm": 1.7421875,
      "learning_rate": 7.695652173913044e-06,
      "loss": 1.6379,
      "step": 64
    },
    {
      "epoch": 1.3542819928362098,
      "grad_norm": 1.890625,
      "learning_rate": 7.652173913043479e-06,
      "loss": 1.6323,
      "step": 65
    },
    {
      "epoch": 1.3751221100618691,
      "grad_norm": 1.625,
      "learning_rate": 7.608695652173914e-06,
      "loss": 1.6372,
      "step": 66
    },
    {
      "epoch": 1.3959622272875285,
      "grad_norm": 1.453125,
      "learning_rate": 7.565217391304348e-06,
      "loss": 1.6324,
      "step": 67
    },
    {
      "epoch": 1.4168023445131879,
      "grad_norm": 1.4296875,
      "learning_rate": 7.5217391304347835e-06,
      "loss": 1.6236,
      "step": 68
    },
    {
      "epoch": 1.4376424617388472,
      "grad_norm": 1.453125,
      "learning_rate": 7.478260869565218e-06,
      "loss": 1.6177,
      "step": 69
    },
    {
      "epoch": 1.4584825789645066,
      "grad_norm": 1.4765625,
      "learning_rate": 7.434782608695653e-06,
      "loss": 1.6162,
      "step": 70
    },
    {
      "epoch": 1.479322696190166,
      "grad_norm": 1.34375,
      "learning_rate": 7.391304347826087e-06,
      "loss": 1.609,
      "step": 71
    },
    {
      "epoch": 1.5001628134158254,
      "grad_norm": 1.2265625,
      "learning_rate": 7.347826086956522e-06,
      "loss": 1.6035,
      "step": 72
    },
    {
      "epoch": 1.521002930641485,
      "grad_norm": 1.1953125,
      "learning_rate": 7.304347826086957e-06,
      "loss": 1.5981,
      "step": 73
    },
    {
      "epoch": 1.541843047867144,
      "grad_norm": 1.1796875,
      "learning_rate": 7.2608695652173925e-06,
      "loss": 1.5962,
      "step": 74
    },
    {
      "epoch": 1.5626831650928037,
      "grad_norm": 1.1640625,
      "learning_rate": 7.217391304347827e-06,
      "loss": 1.5896,
      "step": 75
    },
    {
      "epoch": 1.583523282318463,
      "grad_norm": 1.109375,
      "learning_rate": 7.173913043478261e-06,
      "loss": 1.5733,
      "step": 76
    },
    {
      "epoch": 1.6043633995441224,
      "grad_norm": 1.0703125,
      "learning_rate": 7.130434782608696e-06,
      "loss": 1.5767,
      "step": 77
    },
    {
      "epoch": 1.6252035167697818,
      "grad_norm": 1.0625,
      "learning_rate": 7.086956521739131e-06,
      "loss": 1.5679,
      "step": 78
    },
    {
      "epoch": 1.6460436339954412,
      "grad_norm": 1.0390625,
      "learning_rate": 7.0434782608695665e-06,
      "loss": 1.5756,
      "step": 79
    },
    {
      "epoch": 1.6668837512211008,
      "grad_norm": 1.0234375,
      "learning_rate": 7e-06,
      "loss": 1.5833,
      "step": 80
    },
    {
      "epoch": 1.68772386844676,
      "grad_norm": 1.0078125,
      "learning_rate": 6.956521739130435e-06,
      "loss": 1.5711,
      "step": 81
    },
    {
      "epoch": 1.7085639856724195,
      "grad_norm": 1.0078125,
      "learning_rate": 6.91304347826087e-06,
      "loss": 1.5709,
      "step": 82
    },
    {
      "epoch": 1.7294041028980787,
      "grad_norm": 1.0,
      "learning_rate": 6.869565217391305e-06,
      "loss": 1.5639,
      "step": 83
    },
    {
      "epoch": 1.7502442201237383,
      "grad_norm": 0.96875,
      "learning_rate": 6.8260869565217395e-06,
      "loss": 1.5644,
      "step": 84
    },
    {
      "epoch": 1.7710843373493976,
      "grad_norm": 0.94140625,
      "learning_rate": 6.782608695652174e-06,
      "loss": 1.5628,
      "step": 85
    },
    {
      "epoch": 1.791924454575057,
      "grad_norm": 0.95703125,
      "learning_rate": 6.739130434782609e-06,
      "loss": 1.5604,
      "step": 86
    },
    {
      "epoch": 1.8127645718007164,
      "grad_norm": 0.91796875,
      "learning_rate": 6.695652173913044e-06,
      "loss": 1.5658,
      "step": 87
    },
    {
      "epoch": 1.8336046890263757,
      "grad_norm": 0.87890625,
      "learning_rate": 6.652173913043479e-06,
      "loss": 1.5471,
      "step": 88
    },
    {
      "epoch": 1.8544448062520351,
      "grad_norm": 0.8359375,
      "learning_rate": 6.6086956521739135e-06,
      "loss": 1.5424,
      "step": 89
    },
    {
      "epoch": 1.8752849234776945,
      "grad_norm": 0.796875,
      "learning_rate": 6.565217391304349e-06,
      "loss": 1.5423,
      "step": 90
    },
    {
      "epoch": 1.896125040703354,
      "grad_norm": 0.7734375,
      "learning_rate": 6.521739130434783e-06,
      "loss": 1.5493,
      "step": 91
    },
    {
      "epoch": 1.9169651579290132,
      "grad_norm": 0.765625,
      "learning_rate": 6.478260869565218e-06,
      "loss": 1.5388,
      "step": 92
    },
    {
      "epoch": 1.9378052751546728,
      "grad_norm": 0.7109375,
      "learning_rate": 6.434782608695652e-06,
      "loss": 1.5494,
      "step": 93
    },
    {
      "epoch": 1.958645392380332,
      "grad_norm": 0.6875,
      "learning_rate": 6.391304347826087e-06,
      "loss": 1.5367,
      "step": 94
    },
    {
      "epoch": 1.9794855096059916,
      "grad_norm": 0.65234375,
      "learning_rate": 6.3478260869565225e-06,
      "loss": 1.5398,
      "step": 95
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.6328125,
      "learning_rate": 6.304347826086958e-06,
      "loss": 1.5429,
      "step": 96
    },
    {
      "epoch": 2.0,
      "eval_loss": 2.606393814086914,
      "eval_runtime": 12.5226,
      "eval_samples_per_second": 490.473,
      "eval_steps_per_second": 30.665,
      "step": 96
    },
    {
      "epoch": 2.0,
      "eval_title2sid_loss": 1.0881061553955078,
      "eval_title2sid_runtime": 6.3694,
      "eval_title2sid_samples_per_second": 597.541,
      "eval_title2sid_steps_per_second": 37.366,
      "step": 96
    },
    {
      "epoch": 2.0,
      "eval_title2sid_loss": 1.0881061553955078,
      "eval_title2sid_runtime": 6.3694,
      "eval_title2sid_samples_per_second": 597.541,
      "eval_title2sid_steps_per_second": 37.366,
      "step": 96
    },
    {
      "epoch": 2.0,
      "eval_sid2title_loss": 0.3378770351409912,
      "eval_sid2title_runtime": 8.2901,
      "eval_sid2title_samples_per_second": 461.637,
      "eval_sid2title_steps_per_second": 28.95,
      "step": 96
    },
    {
      "epoch": 2.0,
      "eval_sid2title_loss": 0.3378770351409912,
      "eval_sid2title_runtime": 8.2901,
      "eval_sid2title_samples_per_second": 461.637,
      "eval_sid2title_steps_per_second": 28.95,
      "step": 96
    }
  ],
  "logging_steps": 1,
  "max_steps": 240,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 500,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 1,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 1
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.8799889325214925e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}