{ "best_global_step": 48, "best_metric": 2.5830821990966797, "best_model_checkpoint": "output_dir/sft_reasoning-activation_7task-E2E_Qwen3-1.7B_Games/checkpoint-48", "epoch": 2.0, "eval_steps": 500, "global_step": 96, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020840117225659396, "grad_norm": 6.09375, "learning_rate": 0.0, "loss": 2.3945, "step": 1 }, { "epoch": 0.04168023445131879, "grad_norm": 6.09375, "learning_rate": 1.0000000000000002e-06, "loss": 2.3936, "step": 2 }, { "epoch": 0.06252035167697818, "grad_norm": 6.21875, "learning_rate": 2.0000000000000003e-06, "loss": 2.4141, "step": 3 }, { "epoch": 0.08336046890263758, "grad_norm": 6.0625, "learning_rate": 3e-06, "loss": 2.3931, "step": 4 }, { "epoch": 0.10420058612829697, "grad_norm": 6.125, "learning_rate": 4.000000000000001e-06, "loss": 2.3831, "step": 5 }, { "epoch": 0.12504070335395637, "grad_norm": 6.03125, "learning_rate": 5e-06, "loss": 2.3776, "step": 6 }, { "epoch": 0.14588082057961577, "grad_norm": 5.875, "learning_rate": 6e-06, "loss": 2.3656, "step": 7 }, { "epoch": 0.16672093780527517, "grad_norm": 5.78125, "learning_rate": 7e-06, "loss": 2.3665, "step": 8 }, { "epoch": 0.18756105503093454, "grad_norm": 5.6875, "learning_rate": 8.000000000000001e-06, "loss": 2.348, "step": 9 }, { "epoch": 0.20840117225659394, "grad_norm": 5.53125, "learning_rate": 9e-06, "loss": 2.3125, "step": 10 }, { "epoch": 0.22924128948225334, "grad_norm": 5.3125, "learning_rate": 1e-05, "loss": 2.2892, "step": 11 }, { "epoch": 0.25008140670791273, "grad_norm": 5.09375, "learning_rate": 9.956521739130436e-06, "loss": 2.2553, "step": 12 }, { "epoch": 0.2709215239335721, "grad_norm": 4.9375, "learning_rate": 9.913043478260871e-06, "loss": 2.238, "step": 13 }, { "epoch": 0.29176164115923153, "grad_norm": 4.875, "learning_rate": 9.869565217391304e-06, "loss": 2.2174, "step": 14 }, { "epoch": 0.3126017583848909, "grad_norm": 4.75, "learning_rate": 9.82608695652174e-06, "loss": 2.188, "step": 15 }, { "epoch": 0.33344187561055033, "grad_norm": 4.53125, "learning_rate": 9.782608695652175e-06, "loss": 2.1638, "step": 16 }, { "epoch": 0.3542819928362097, "grad_norm": 4.375, "learning_rate": 9.73913043478261e-06, "loss": 2.1385, "step": 17 }, { "epoch": 0.3751221100618691, "grad_norm": 4.21875, "learning_rate": 9.695652173913043e-06, "loss": 2.1308, "step": 18 }, { "epoch": 0.3959622272875285, "grad_norm": 4.09375, "learning_rate": 9.652173913043478e-06, "loss": 2.1104, "step": 19 }, { "epoch": 0.4168023445131879, "grad_norm": 3.953125, "learning_rate": 9.608695652173914e-06, "loss": 2.0834, "step": 20 }, { "epoch": 0.4376424617388473, "grad_norm": 3.765625, "learning_rate": 9.565217391304349e-06, "loss": 2.0657, "step": 21 }, { "epoch": 0.45848257896450667, "grad_norm": 3.609375, "learning_rate": 9.521739130434784e-06, "loss": 2.0554, "step": 22 }, { "epoch": 0.47932269619016604, "grad_norm": 3.421875, "learning_rate": 9.478260869565217e-06, "loss": 2.0381, "step": 23 }, { "epoch": 0.5001628134158255, "grad_norm": 3.265625, "learning_rate": 9.434782608695652e-06, "loss": 2.006, "step": 24 }, { "epoch": 0.5210029306414848, "grad_norm": 3.15625, "learning_rate": 9.391304347826087e-06, "loss": 2.0065, "step": 25 }, { "epoch": 0.5418430478671442, "grad_norm": 3.0625, "learning_rate": 9.347826086956523e-06, "loss": 1.9979, "step": 26 }, { "epoch": 0.5626831650928037, "grad_norm": 2.921875, "learning_rate": 9.304347826086956e-06, "loss": 1.9669, "step": 27 }, { "epoch": 0.5835232823184631, "grad_norm": 2.78125, "learning_rate": 9.260869565217391e-06, "loss": 1.9712, "step": 28 }, { "epoch": 0.6043633995441224, "grad_norm": 2.640625, "learning_rate": 9.217391304347826e-06, "loss": 1.9429, "step": 29 }, { "epoch": 0.6252035167697818, "grad_norm": 2.5, "learning_rate": 9.173913043478261e-06, "loss": 1.9281, "step": 30 }, { "epoch": 0.6460436339954412, "grad_norm": 2.4375, "learning_rate": 9.130434782608697e-06, "loss": 1.9199, "step": 31 }, { "epoch": 0.6668837512211007, "grad_norm": 2.359375, "learning_rate": 9.086956521739132e-06, "loss": 1.9112, "step": 32 }, { "epoch": 0.68772386844676, "grad_norm": 2.328125, "learning_rate": 9.043478260869565e-06, "loss": 1.8879, "step": 33 }, { "epoch": 0.7085639856724194, "grad_norm": 2.296875, "learning_rate": 9e-06, "loss": 1.8841, "step": 34 }, { "epoch": 0.7294041028980788, "grad_norm": 2.25, "learning_rate": 8.956521739130435e-06, "loss": 1.8732, "step": 35 }, { "epoch": 0.7502442201237381, "grad_norm": 2.21875, "learning_rate": 8.91304347826087e-06, "loss": 1.8634, "step": 36 }, { "epoch": 0.7710843373493976, "grad_norm": 2.171875, "learning_rate": 8.869565217391306e-06, "loss": 1.8623, "step": 37 }, { "epoch": 0.791924454575057, "grad_norm": 2.125, "learning_rate": 8.82608695652174e-06, "loss": 1.8433, "step": 38 }, { "epoch": 0.8127645718007164, "grad_norm": 2.09375, "learning_rate": 8.782608695652174e-06, "loss": 1.8346, "step": 39 }, { "epoch": 0.8336046890263757, "grad_norm": 2.0625, "learning_rate": 8.73913043478261e-06, "loss": 1.8248, "step": 40 }, { "epoch": 0.8544448062520351, "grad_norm": 2.0, "learning_rate": 8.695652173913044e-06, "loss": 1.8086, "step": 41 }, { "epoch": 0.8752849234776946, "grad_norm": 1.9453125, "learning_rate": 8.65217391304348e-06, "loss": 1.8035, "step": 42 }, { "epoch": 0.896125040703354, "grad_norm": 1.8984375, "learning_rate": 8.608695652173915e-06, "loss": 1.7954, "step": 43 }, { "epoch": 0.9169651579290133, "grad_norm": 1.8203125, "learning_rate": 8.56521739130435e-06, "loss": 1.7912, "step": 44 }, { "epoch": 0.9378052751546727, "grad_norm": 1.734375, "learning_rate": 8.521739130434783e-06, "loss": 1.7763, "step": 45 }, { "epoch": 0.9586453923803321, "grad_norm": 1.71875, "learning_rate": 8.478260869565218e-06, "loss": 1.7602, "step": 46 }, { "epoch": 0.9794855096059916, "grad_norm": 1.640625, "learning_rate": 8.434782608695653e-06, "loss": 1.7543, "step": 47 }, { "epoch": 1.0, "grad_norm": 1.609375, "learning_rate": 8.391304347826089e-06, "loss": 1.7499, "step": 48 }, { "epoch": 1.0, "eval_loss": 2.5830821990966797, "eval_runtime": 12.501, "eval_samples_per_second": 491.321, "eval_steps_per_second": 30.718, "step": 48 }, { "epoch": 1.0, "eval_title2sid_loss": 1.0781381130218506, "eval_title2sid_runtime": 6.3056, "eval_title2sid_samples_per_second": 603.595, "eval_title2sid_steps_per_second": 37.744, "step": 48 }, { "epoch": 1.0, "eval_title2sid_loss": 1.0781381130218506, "eval_title2sid_runtime": 6.3056, "eval_title2sid_samples_per_second": 603.595, "eval_title2sid_steps_per_second": 37.744, "step": 48 }, { "epoch": 1.0, "eval_sid2title_loss": 0.33906614780426025, "eval_sid2title_runtime": 6.3626, "eval_sid2title_samples_per_second": 601.486, "eval_sid2title_steps_per_second": 37.721, "step": 48 }, { "epoch": 1.0, "eval_sid2title_loss": 0.33906614780426025, "eval_sid2title_runtime": 6.3626, "eval_sid2title_samples_per_second": 601.486, "eval_sid2title_steps_per_second": 37.721, "step": 48 }, { "epoch": 1.0208401172256594, "grad_norm": 1.5703125, "learning_rate": 8.347826086956522e-06, "loss": 1.7506, "step": 49 }, { "epoch": 1.0416802344513187, "grad_norm": 1.5859375, "learning_rate": 8.304347826086957e-06, "loss": 1.7408, "step": 50 }, { "epoch": 1.0625203516769781, "grad_norm": 1.640625, "learning_rate": 8.260869565217392e-06, "loss": 1.7221, "step": 51 }, { "epoch": 1.0833604689026375, "grad_norm": 1.953125, "learning_rate": 8.217391304347827e-06, "loss": 1.7097, "step": 52 }, { "epoch": 1.104200586128297, "grad_norm": 2.078125, "learning_rate": 8.173913043478263e-06, "loss": 1.7002, "step": 53 }, { "epoch": 1.1250407033539565, "grad_norm": 4.53125, "learning_rate": 8.130434782608696e-06, "loss": 1.6878, "step": 54 }, { "epoch": 1.1458808205796158, "grad_norm": 4.84375, "learning_rate": 8.086956521739131e-06, "loss": 1.6962, "step": 55 }, { "epoch": 1.1667209378052752, "grad_norm": 3.78125, "learning_rate": 8.043478260869566e-06, "loss": 1.6896, "step": 56 }, { "epoch": 1.1875610550309346, "grad_norm": 1.8828125, "learning_rate": 8.000000000000001e-06, "loss": 1.6817, "step": 57 }, { "epoch": 1.208401172256594, "grad_norm": 1.8671875, "learning_rate": 7.956521739130435e-06, "loss": 1.6763, "step": 58 }, { "epoch": 1.2292412894822533, "grad_norm": 2.0625, "learning_rate": 7.91304347826087e-06, "loss": 1.6671, "step": 59 }, { "epoch": 1.2500814067079127, "grad_norm": 1.984375, "learning_rate": 7.869565217391305e-06, "loss": 1.6616, "step": 60 }, { "epoch": 1.270921523933572, "grad_norm": 1.6875, "learning_rate": 7.82608695652174e-06, "loss": 1.6532, "step": 61 }, { "epoch": 1.2917616411592316, "grad_norm": 1.4765625, "learning_rate": 7.782608695652174e-06, "loss": 1.6553, "step": 62 }, { "epoch": 1.3126017583848908, "grad_norm": 1.4609375, "learning_rate": 7.739130434782609e-06, "loss": 1.6433, "step": 63 }, { "epoch": 1.3334418756105504, "grad_norm": 1.7421875, "learning_rate": 7.695652173913044e-06, "loss": 1.6379, "step": 64 }, { "epoch": 1.3542819928362098, "grad_norm": 1.890625, "learning_rate": 7.652173913043479e-06, "loss": 1.6323, "step": 65 }, { "epoch": 1.3751221100618691, "grad_norm": 1.625, "learning_rate": 7.608695652173914e-06, "loss": 1.6372, "step": 66 }, { "epoch": 1.3959622272875285, "grad_norm": 1.453125, "learning_rate": 7.565217391304348e-06, "loss": 1.6324, "step": 67 }, { "epoch": 1.4168023445131879, "grad_norm": 1.4296875, "learning_rate": 7.5217391304347835e-06, "loss": 1.6236, "step": 68 }, { "epoch": 1.4376424617388472, "grad_norm": 1.453125, "learning_rate": 7.478260869565218e-06, "loss": 1.6177, "step": 69 }, { "epoch": 1.4584825789645066, "grad_norm": 1.4765625, "learning_rate": 7.434782608695653e-06, "loss": 1.6162, "step": 70 }, { "epoch": 1.479322696190166, "grad_norm": 1.34375, "learning_rate": 7.391304347826087e-06, "loss": 1.609, "step": 71 }, { "epoch": 1.5001628134158254, "grad_norm": 1.2265625, "learning_rate": 7.347826086956522e-06, "loss": 1.6035, "step": 72 }, { "epoch": 1.521002930641485, "grad_norm": 1.1953125, "learning_rate": 7.304347826086957e-06, "loss": 1.5981, "step": 73 }, { "epoch": 1.541843047867144, "grad_norm": 1.1796875, "learning_rate": 7.2608695652173925e-06, "loss": 1.5962, "step": 74 }, { "epoch": 1.5626831650928037, "grad_norm": 1.1640625, "learning_rate": 7.217391304347827e-06, "loss": 1.5896, "step": 75 }, { "epoch": 1.583523282318463, "grad_norm": 1.109375, "learning_rate": 7.173913043478261e-06, "loss": 1.5733, "step": 76 }, { "epoch": 1.6043633995441224, "grad_norm": 1.0703125, "learning_rate": 7.130434782608696e-06, "loss": 1.5767, "step": 77 }, { "epoch": 1.6252035167697818, "grad_norm": 1.0625, "learning_rate": 7.086956521739131e-06, "loss": 1.5679, "step": 78 }, { "epoch": 1.6460436339954412, "grad_norm": 1.0390625, "learning_rate": 7.0434782608695665e-06, "loss": 1.5756, "step": 79 }, { "epoch": 1.6668837512211008, "grad_norm": 1.0234375, "learning_rate": 7e-06, "loss": 1.5833, "step": 80 }, { "epoch": 1.68772386844676, "grad_norm": 1.0078125, "learning_rate": 6.956521739130435e-06, "loss": 1.5711, "step": 81 }, { "epoch": 1.7085639856724195, "grad_norm": 1.0078125, "learning_rate": 6.91304347826087e-06, "loss": 1.5709, "step": 82 }, { "epoch": 1.7294041028980787, "grad_norm": 1.0, "learning_rate": 6.869565217391305e-06, "loss": 1.5639, "step": 83 }, { "epoch": 1.7502442201237383, "grad_norm": 0.96875, "learning_rate": 6.8260869565217395e-06, "loss": 1.5644, "step": 84 }, { "epoch": 1.7710843373493976, "grad_norm": 0.94140625, "learning_rate": 6.782608695652174e-06, "loss": 1.5628, "step": 85 }, { "epoch": 1.791924454575057, "grad_norm": 0.95703125, "learning_rate": 6.739130434782609e-06, "loss": 1.5604, "step": 86 }, { "epoch": 1.8127645718007164, "grad_norm": 0.91796875, "learning_rate": 6.695652173913044e-06, "loss": 1.5658, "step": 87 }, { "epoch": 1.8336046890263757, "grad_norm": 0.87890625, "learning_rate": 6.652173913043479e-06, "loss": 1.5471, "step": 88 }, { "epoch": 1.8544448062520351, "grad_norm": 0.8359375, "learning_rate": 6.6086956521739135e-06, "loss": 1.5424, "step": 89 }, { "epoch": 1.8752849234776945, "grad_norm": 0.796875, "learning_rate": 6.565217391304349e-06, "loss": 1.5423, "step": 90 }, { "epoch": 1.896125040703354, "grad_norm": 0.7734375, "learning_rate": 6.521739130434783e-06, "loss": 1.5493, "step": 91 }, { "epoch": 1.9169651579290132, "grad_norm": 0.765625, "learning_rate": 6.478260869565218e-06, "loss": 1.5388, "step": 92 }, { "epoch": 1.9378052751546728, "grad_norm": 0.7109375, "learning_rate": 6.434782608695652e-06, "loss": 1.5494, "step": 93 }, { "epoch": 1.958645392380332, "grad_norm": 0.6875, "learning_rate": 6.391304347826087e-06, "loss": 1.5367, "step": 94 }, { "epoch": 1.9794855096059916, "grad_norm": 0.65234375, "learning_rate": 6.3478260869565225e-06, "loss": 1.5398, "step": 95 }, { "epoch": 2.0, "grad_norm": 0.6328125, "learning_rate": 6.304347826086958e-06, "loss": 1.5429, "step": 96 }, { "epoch": 2.0, "eval_loss": 2.606393814086914, "eval_runtime": 12.5226, "eval_samples_per_second": 490.473, "eval_steps_per_second": 30.665, "step": 96 }, { "epoch": 2.0, "eval_title2sid_loss": 1.0881061553955078, "eval_title2sid_runtime": 6.3694, "eval_title2sid_samples_per_second": 597.541, "eval_title2sid_steps_per_second": 37.366, "step": 96 }, { "epoch": 2.0, "eval_title2sid_loss": 1.0881061553955078, "eval_title2sid_runtime": 6.3694, "eval_title2sid_samples_per_second": 597.541, "eval_title2sid_steps_per_second": 37.366, "step": 96 }, { "epoch": 2.0, "eval_sid2title_loss": 0.3378770351409912, "eval_sid2title_runtime": 8.2901, "eval_sid2title_samples_per_second": 461.637, "eval_sid2title_steps_per_second": 28.95, "step": 96 }, { "epoch": 2.0, "eval_sid2title_loss": 0.3378770351409912, "eval_sid2title_runtime": 8.2901, "eval_sid2title_samples_per_second": 461.637, "eval_sid2title_steps_per_second": 28.95, "step": 96 } ], "logging_steps": 1, "max_steps": 240, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8799889325214925e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }