| { | |
| "best_global_step": 48, | |
| "best_metric": 2.5830821990966797, | |
| "best_model_checkpoint": "output_dir/sft_reasoning-activation_7task-E2E_Qwen3-1.7B_Games/checkpoint-48", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 96, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.020840117225659396, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 0.0, | |
| "loss": 2.3945, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.04168023445131879, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 2.3936, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.06252035167697818, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 2.4141, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.08336046890263758, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 3e-06, | |
| "loss": 2.3931, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.10420058612829697, | |
| "grad_norm": 6.125, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.3831, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.12504070335395637, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 5e-06, | |
| "loss": 2.3776, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.14588082057961577, | |
| "grad_norm": 5.875, | |
| "learning_rate": 6e-06, | |
| "loss": 2.3656, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.16672093780527517, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 7e-06, | |
| "loss": 2.3665, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.18756105503093454, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 2.348, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.20840117225659394, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 9e-06, | |
| "loss": 2.3125, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.22924128948225334, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1e-05, | |
| "loss": 2.2892, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.25008140670791273, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 9.956521739130436e-06, | |
| "loss": 2.2553, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.2709215239335721, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 9.913043478260871e-06, | |
| "loss": 2.238, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.29176164115923153, | |
| "grad_norm": 4.875, | |
| "learning_rate": 9.869565217391304e-06, | |
| "loss": 2.2174, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.3126017583848909, | |
| "grad_norm": 4.75, | |
| "learning_rate": 9.82608695652174e-06, | |
| "loss": 2.188, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.33344187561055033, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 9.782608695652175e-06, | |
| "loss": 2.1638, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.3542819928362097, | |
| "grad_norm": 4.375, | |
| "learning_rate": 9.73913043478261e-06, | |
| "loss": 2.1385, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.3751221100618691, | |
| "grad_norm": 4.21875, | |
| "learning_rate": 9.695652173913043e-06, | |
| "loss": 2.1308, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.3959622272875285, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 9.652173913043478e-06, | |
| "loss": 2.1104, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.4168023445131879, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 9.608695652173914e-06, | |
| "loss": 2.0834, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4376424617388473, | |
| "grad_norm": 3.765625, | |
| "learning_rate": 9.565217391304349e-06, | |
| "loss": 2.0657, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.45848257896450667, | |
| "grad_norm": 3.609375, | |
| "learning_rate": 9.521739130434784e-06, | |
| "loss": 2.0554, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.47932269619016604, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 9.478260869565217e-06, | |
| "loss": 2.0381, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.5001628134158255, | |
| "grad_norm": 3.265625, | |
| "learning_rate": 9.434782608695652e-06, | |
| "loss": 2.006, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.5210029306414848, | |
| "grad_norm": 3.15625, | |
| "learning_rate": 9.391304347826087e-06, | |
| "loss": 2.0065, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.5418430478671442, | |
| "grad_norm": 3.0625, | |
| "learning_rate": 9.347826086956523e-06, | |
| "loss": 1.9979, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.5626831650928037, | |
| "grad_norm": 2.921875, | |
| "learning_rate": 9.304347826086956e-06, | |
| "loss": 1.9669, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.5835232823184631, | |
| "grad_norm": 2.78125, | |
| "learning_rate": 9.260869565217391e-06, | |
| "loss": 1.9712, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.6043633995441224, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 9.217391304347826e-06, | |
| "loss": 1.9429, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.6252035167697818, | |
| "grad_norm": 2.5, | |
| "learning_rate": 9.173913043478261e-06, | |
| "loss": 1.9281, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6460436339954412, | |
| "grad_norm": 2.4375, | |
| "learning_rate": 9.130434782608697e-06, | |
| "loss": 1.9199, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.6668837512211007, | |
| "grad_norm": 2.359375, | |
| "learning_rate": 9.086956521739132e-06, | |
| "loss": 1.9112, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.68772386844676, | |
| "grad_norm": 2.328125, | |
| "learning_rate": 9.043478260869565e-06, | |
| "loss": 1.8879, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.7085639856724194, | |
| "grad_norm": 2.296875, | |
| "learning_rate": 9e-06, | |
| "loss": 1.8841, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.7294041028980788, | |
| "grad_norm": 2.25, | |
| "learning_rate": 8.956521739130435e-06, | |
| "loss": 1.8732, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.7502442201237381, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 8.91304347826087e-06, | |
| "loss": 1.8634, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.7710843373493976, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 8.869565217391306e-06, | |
| "loss": 1.8623, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.791924454575057, | |
| "grad_norm": 2.125, | |
| "learning_rate": 8.82608695652174e-06, | |
| "loss": 1.8433, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.8127645718007164, | |
| "grad_norm": 2.09375, | |
| "learning_rate": 8.782608695652174e-06, | |
| "loss": 1.8346, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.8336046890263757, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 8.73913043478261e-06, | |
| "loss": 1.8248, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8544448062520351, | |
| "grad_norm": 2.0, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 1.8086, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.8752849234776946, | |
| "grad_norm": 1.9453125, | |
| "learning_rate": 8.65217391304348e-06, | |
| "loss": 1.8035, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.896125040703354, | |
| "grad_norm": 1.8984375, | |
| "learning_rate": 8.608695652173915e-06, | |
| "loss": 1.7954, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.9169651579290133, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 8.56521739130435e-06, | |
| "loss": 1.7912, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.9378052751546727, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 8.521739130434783e-06, | |
| "loss": 1.7763, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.9586453923803321, | |
| "grad_norm": 1.71875, | |
| "learning_rate": 8.478260869565218e-06, | |
| "loss": 1.7602, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.9794855096059916, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 8.434782608695653e-06, | |
| "loss": 1.7543, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 8.391304347826089e-06, | |
| "loss": 1.7499, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.5830821990966797, | |
| "eval_runtime": 12.501, | |
| "eval_samples_per_second": 491.321, | |
| "eval_steps_per_second": 30.718, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_title2sid_loss": 1.0781381130218506, | |
| "eval_title2sid_runtime": 6.3056, | |
| "eval_title2sid_samples_per_second": 603.595, | |
| "eval_title2sid_steps_per_second": 37.744, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_title2sid_loss": 1.0781381130218506, | |
| "eval_title2sid_runtime": 6.3056, | |
| "eval_title2sid_samples_per_second": 603.595, | |
| "eval_title2sid_steps_per_second": 37.744, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_sid2title_loss": 0.33906614780426025, | |
| "eval_sid2title_runtime": 6.3626, | |
| "eval_sid2title_samples_per_second": 601.486, | |
| "eval_sid2title_steps_per_second": 37.721, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_sid2title_loss": 0.33906614780426025, | |
| "eval_sid2title_runtime": 6.3626, | |
| "eval_sid2title_samples_per_second": 601.486, | |
| "eval_sid2title_steps_per_second": 37.721, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.0208401172256594, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 8.347826086956522e-06, | |
| "loss": 1.7506, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 1.0416802344513187, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 8.304347826086957e-06, | |
| "loss": 1.7408, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0625203516769781, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 8.260869565217392e-06, | |
| "loss": 1.7221, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.0833604689026375, | |
| "grad_norm": 1.953125, | |
| "learning_rate": 8.217391304347827e-06, | |
| "loss": 1.7097, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.104200586128297, | |
| "grad_norm": 2.078125, | |
| "learning_rate": 8.173913043478263e-06, | |
| "loss": 1.7002, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 1.1250407033539565, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 8.130434782608696e-06, | |
| "loss": 1.6878, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.1458808205796158, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 8.086956521739131e-06, | |
| "loss": 1.6962, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.1667209378052752, | |
| "grad_norm": 3.78125, | |
| "learning_rate": 8.043478260869566e-06, | |
| "loss": 1.6896, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.1875610550309346, | |
| "grad_norm": 1.8828125, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.6817, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.208401172256594, | |
| "grad_norm": 1.8671875, | |
| "learning_rate": 7.956521739130435e-06, | |
| "loss": 1.6763, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.2292412894822533, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 7.91304347826087e-06, | |
| "loss": 1.6671, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.2500814067079127, | |
| "grad_norm": 1.984375, | |
| "learning_rate": 7.869565217391305e-06, | |
| "loss": 1.6616, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.270921523933572, | |
| "grad_norm": 1.6875, | |
| "learning_rate": 7.82608695652174e-06, | |
| "loss": 1.6532, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.2917616411592316, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 7.782608695652174e-06, | |
| "loss": 1.6553, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.3126017583848908, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 7.739130434782609e-06, | |
| "loss": 1.6433, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.3334418756105504, | |
| "grad_norm": 1.7421875, | |
| "learning_rate": 7.695652173913044e-06, | |
| "loss": 1.6379, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.3542819928362098, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 7.652173913043479e-06, | |
| "loss": 1.6323, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.3751221100618691, | |
| "grad_norm": 1.625, | |
| "learning_rate": 7.608695652173914e-06, | |
| "loss": 1.6372, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.3959622272875285, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 7.565217391304348e-06, | |
| "loss": 1.6324, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.4168023445131879, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 7.5217391304347835e-06, | |
| "loss": 1.6236, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.4376424617388472, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 7.478260869565218e-06, | |
| "loss": 1.6177, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.4584825789645066, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 7.434782608695653e-06, | |
| "loss": 1.6162, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.479322696190166, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 7.391304347826087e-06, | |
| "loss": 1.609, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.5001628134158254, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 7.347826086956522e-06, | |
| "loss": 1.6035, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.521002930641485, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 7.304347826086957e-06, | |
| "loss": 1.5981, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.541843047867144, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 7.2608695652173925e-06, | |
| "loss": 1.5962, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.5626831650928037, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 7.217391304347827e-06, | |
| "loss": 1.5896, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.583523282318463, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 7.173913043478261e-06, | |
| "loss": 1.5733, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.6043633995441224, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 7.130434782608696e-06, | |
| "loss": 1.5767, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.6252035167697818, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 7.086956521739131e-06, | |
| "loss": 1.5679, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.6460436339954412, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 7.0434782608695665e-06, | |
| "loss": 1.5756, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.6668837512211008, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 7e-06, | |
| "loss": 1.5833, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.68772386844676, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 6.956521739130435e-06, | |
| "loss": 1.5711, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.7085639856724195, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 6.91304347826087e-06, | |
| "loss": 1.5709, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.7294041028980787, | |
| "grad_norm": 1.0, | |
| "learning_rate": 6.869565217391305e-06, | |
| "loss": 1.5639, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.7502442201237383, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 6.8260869565217395e-06, | |
| "loss": 1.5644, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.7710843373493976, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 6.782608695652174e-06, | |
| "loss": 1.5628, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.791924454575057, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 6.739130434782609e-06, | |
| "loss": 1.5604, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.8127645718007164, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 6.695652173913044e-06, | |
| "loss": 1.5658, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.8336046890263757, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 6.652173913043479e-06, | |
| "loss": 1.5471, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.8544448062520351, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 6.6086956521739135e-06, | |
| "loss": 1.5424, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.8752849234776945, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 6.565217391304349e-06, | |
| "loss": 1.5423, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.896125040703354, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 6.521739130434783e-06, | |
| "loss": 1.5493, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.9169651579290132, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 6.478260869565218e-06, | |
| "loss": 1.5388, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.9378052751546728, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 6.434782608695652e-06, | |
| "loss": 1.5494, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.958645392380332, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 6.391304347826087e-06, | |
| "loss": 1.5367, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.9794855096059916, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 6.3478260869565225e-06, | |
| "loss": 1.5398, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 6.304347826086958e-06, | |
| "loss": 1.5429, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.606393814086914, | |
| "eval_runtime": 12.5226, | |
| "eval_samples_per_second": 490.473, | |
| "eval_steps_per_second": 30.665, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_title2sid_loss": 1.0881061553955078, | |
| "eval_title2sid_runtime": 6.3694, | |
| "eval_title2sid_samples_per_second": 597.541, | |
| "eval_title2sid_steps_per_second": 37.366, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_title2sid_loss": 1.0881061553955078, | |
| "eval_title2sid_runtime": 6.3694, | |
| "eval_title2sid_samples_per_second": 597.541, | |
| "eval_title2sid_steps_per_second": 37.366, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_sid2title_loss": 0.3378770351409912, | |
| "eval_sid2title_runtime": 8.2901, | |
| "eval_sid2title_samples_per_second": 461.637, | |
| "eval_sid2title_steps_per_second": 28.95, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_sid2title_loss": 0.3378770351409912, | |
| "eval_sid2title_runtime": 8.2901, | |
| "eval_sid2title_samples_per_second": 461.637, | |
| "eval_sid2title_steps_per_second": 28.95, | |
| "step": 96 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 1, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.8799889325214925e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |