| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.87012987012987, |
| "eval_steps": 500, |
| "global_step": 570, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.17316017316017315, |
| "grad_norm": 0.73779296875, |
| "learning_rate": 0.00019984815164333163, |
| "loss": 1.5442, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3463203463203463, |
| "grad_norm": 0.67529296875, |
| "learning_rate": 0.00019939306773179497, |
| "loss": 1.4059, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5194805194805194, |
| "grad_norm": 0.67724609375, |
| "learning_rate": 0.00019863613034027224, |
| "loss": 1.3684, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.6926406926406926, |
| "grad_norm": 0.62744140625, |
| "learning_rate": 0.00019757963826274357, |
| "loss": 1.3543, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.8658008658008658, |
| "grad_norm": 0.5947265625, |
| "learning_rate": 0.00019622680003092503, |
| "loss": 1.3547, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.0389610389610389, |
| "grad_norm": 0.5986328125, |
| "learning_rate": 0.00019458172417006347, |
| "loss": 1.3248, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.2121212121212122, |
| "grad_norm": 0.638671875, |
| "learning_rate": 0.00019264940672148018, |
| "loss": 1.2603, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.3852813852813852, |
| "grad_norm": 0.78857421875, |
| "learning_rate": 0.00019043571606975777, |
| "loss": 1.2683, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.5584415584415585, |
| "grad_norm": 0.7412109375, |
| "learning_rate": 0.0001879473751206489, |
| "loss": 1.2356, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.7316017316017316, |
| "grad_norm": 0.70458984375, |
| "learning_rate": 0.00018519194088383273, |
| "loss": 1.2849, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 0.748046875, |
| "learning_rate": 0.0001821777815225245, |
| "loss": 1.2618, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.0779220779220777, |
| "grad_norm": 0.68359375, |
| "learning_rate": 0.00017891405093963938, |
| "loss": 1.1707, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.2510822510822512, |
| "grad_norm": 0.83740234375, |
| "learning_rate": 0.00017541066097768963, |
| "loss": 1.1533, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.4242424242424243, |
| "grad_norm": 0.81005859375, |
| "learning_rate": 0.00017167825131684513, |
| "loss": 1.1604, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.5974025974025974, |
| "grad_norm": 0.99462890625, |
| "learning_rate": 0.00016772815716257412, |
| "loss": 1.1347, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.7705627705627704, |
| "grad_norm": 0.9736328125, |
| "learning_rate": 0.00016357237482099684, |
| "loss": 1.1071, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.9437229437229435, |
| "grad_norm": 0.89111328125, |
| "learning_rate": 0.00015922352526649803, |
| "loss": 1.1747, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.116883116883117, |
| "grad_norm": 1.0849609375, |
| "learning_rate": 0.00015469481581224272, |
| "loss": 1.0882, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.29004329004329, |
| "grad_norm": 1.0166015625, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 1.0027, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.463203463203463, |
| "grad_norm": 1.0068359375, |
| "learning_rate": 0.00014515333583108896, |
| "loss": 1.059, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 0.9970703125, |
| "learning_rate": 0.00014016954246529696, |
| "loss": 1.0374, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.8095238095238093, |
| "grad_norm": 1.0283203125, |
| "learning_rate": 0.00013506375551927547, |
| "loss": 0.9911, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.982683982683983, |
| "grad_norm": 1.0732421875, |
| "learning_rate": 0.00012985148110016947, |
| "loss": 1.089, |
| "step": 230 |
| }, |
| { |
| "epoch": 4.1558441558441555, |
| "grad_norm": 2.0078125, |
| "learning_rate": 0.00012454854871407994, |
| "loss": 0.9512, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.329004329004329, |
| "grad_norm": 1.2607421875, |
| "learning_rate": 0.00011917106319237386, |
| "loss": 0.9437, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.5021645021645025, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.00011373535578184082, |
| "loss": 0.9019, |
| "step": 260 |
| }, |
| { |
| "epoch": 4.675324675324675, |
| "grad_norm": 1.3486328125, |
| "learning_rate": 0.00010825793454723325, |
| "loss": 0.9206, |
| "step": 270 |
| }, |
| { |
| "epoch": 4.848484848484849, |
| "grad_norm": 1.1474609375, |
| "learning_rate": 0.00010275543423681621, |
| "loss": 0.9218, |
| "step": 280 |
| }, |
| { |
| "epoch": 5.021645021645021, |
| "grad_norm": 1.119140625, |
| "learning_rate": 9.724456576318381e-05, |
| "loss": 0.8835, |
| "step": 290 |
| }, |
| { |
| "epoch": 5.194805194805195, |
| "grad_norm": 1.25390625, |
| "learning_rate": 9.174206545276677e-05, |
| "loss": 0.8657, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.367965367965368, |
| "grad_norm": 1.2841796875, |
| "learning_rate": 8.626464421815919e-05, |
| "loss": 0.8135, |
| "step": 310 |
| }, |
| { |
| "epoch": 5.541125541125541, |
| "grad_norm": 1.302734375, |
| "learning_rate": 8.082893680762619e-05, |
| "loss": 0.828, |
| "step": 320 |
| }, |
| { |
| "epoch": 5.714285714285714, |
| "grad_norm": 1.255859375, |
| "learning_rate": 7.54514512859201e-05, |
| "loss": 0.8363, |
| "step": 330 |
| }, |
| { |
| "epoch": 5.887445887445887, |
| "grad_norm": 1.2783203125, |
| "learning_rate": 7.014851889983057e-05, |
| "loss": 0.8205, |
| "step": 340 |
| }, |
| { |
| "epoch": 6.0606060606060606, |
| "grad_norm": 1.15234375, |
| "learning_rate": 6.493624448072457e-05, |
| "loss": 0.8129, |
| "step": 350 |
| }, |
| { |
| "epoch": 6.233766233766234, |
| "grad_norm": 1.314453125, |
| "learning_rate": 5.983045753470308e-05, |
| "loss": 0.7766, |
| "step": 360 |
| }, |
| { |
| "epoch": 6.406926406926407, |
| "grad_norm": 1.556640625, |
| "learning_rate": 5.484666416891109e-05, |
| "loss": 0.7152, |
| "step": 370 |
| }, |
| { |
| "epoch": 6.58008658008658, |
| "grad_norm": 1.3330078125, |
| "learning_rate": 5.000000000000002e-05, |
| "loss": 0.7112, |
| "step": 380 |
| }, |
| { |
| "epoch": 6.753246753246753, |
| "grad_norm": 1.3564453125, |
| "learning_rate": 4.530518418775733e-05, |
| "loss": 0.7955, |
| "step": 390 |
| }, |
| { |
| "epoch": 6.926406926406926, |
| "grad_norm": 1.31640625, |
| "learning_rate": 4.077647473350201e-05, |
| "loss": 0.8241, |
| "step": 400 |
| }, |
| { |
| "epoch": 7.0995670995671, |
| "grad_norm": 1.4013671875, |
| "learning_rate": 3.642762517900322e-05, |
| "loss": 0.7759, |
| "step": 410 |
| }, |
| { |
| "epoch": 7.2727272727272725, |
| "grad_norm": 1.4345703125, |
| "learning_rate": 3.227184283742591e-05, |
| "loss": 0.747, |
| "step": 420 |
| }, |
| { |
| "epoch": 7.445887445887446, |
| "grad_norm": 1.4423828125, |
| "learning_rate": 2.8321748683154893e-05, |
| "loss": 0.7206, |
| "step": 430 |
| }, |
| { |
| "epoch": 7.619047619047619, |
| "grad_norm": 1.599609375, |
| "learning_rate": 2.4589339022310386e-05, |
| "loss": 0.6764, |
| "step": 440 |
| }, |
| { |
| "epoch": 7.792207792207792, |
| "grad_norm": 1.3486328125, |
| "learning_rate": 2.1085949060360654e-05, |
| "loss": 0.7336, |
| "step": 450 |
| }, |
| { |
| "epoch": 7.965367965367966, |
| "grad_norm": 1.576171875, |
| "learning_rate": 1.7822218477475494e-05, |
| "loss": 0.6992, |
| "step": 460 |
| }, |
| { |
| "epoch": 8.13852813852814, |
| "grad_norm": 1.560546875, |
| "learning_rate": 1.4808059116167305e-05, |
| "loss": 0.6818, |
| "step": 470 |
| }, |
| { |
| "epoch": 8.311688311688311, |
| "grad_norm": 1.6240234375, |
| "learning_rate": 1.2052624879351104e-05, |
| "loss": 0.7221, |
| "step": 480 |
| }, |
| { |
| "epoch": 8.484848484848484, |
| "grad_norm": 1.431640625, |
| "learning_rate": 9.564283930242257e-06, |
| "loss": 0.6785, |
| "step": 490 |
| }, |
| { |
| "epoch": 8.658008658008658, |
| "grad_norm": 1.4521484375, |
| "learning_rate": 7.350593278519824e-06, |
| "loss": 0.6701, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.831168831168831, |
| "grad_norm": 1.46484375, |
| "learning_rate": 5.418275829936537e-06, |
| "loss": 0.695, |
| "step": 510 |
| }, |
| { |
| "epoch": 9.004329004329005, |
| "grad_norm": 1.3701171875, |
| "learning_rate": 3.7731999690749585e-06, |
| "loss": 0.6976, |
| "step": 520 |
| }, |
| { |
| "epoch": 9.177489177489177, |
| "grad_norm": 1.3154296875, |
| "learning_rate": 2.420361737256438e-06, |
| "loss": 0.6386, |
| "step": 530 |
| }, |
| { |
| "epoch": 9.35064935064935, |
| "grad_norm": 1.5068359375, |
| "learning_rate": 1.3638696597277679e-06, |
| "loss": 0.706, |
| "step": 540 |
| }, |
| { |
| "epoch": 9.523809523809524, |
| "grad_norm": 1.5166015625, |
| "learning_rate": 6.069322682050516e-07, |
| "loss": 0.6972, |
| "step": 550 |
| }, |
| { |
| "epoch": 9.696969696969697, |
| "grad_norm": 1.2392578125, |
| "learning_rate": 1.518483566683826e-07, |
| "loss": 0.6888, |
| "step": 560 |
| }, |
| { |
| "epoch": 9.87012987012987, |
| "grad_norm": 1.4599609375, |
| "learning_rate": 0.0, |
| "loss": 0.6703, |
| "step": 570 |
| }, |
| { |
| "epoch": 9.87012987012987, |
| "step": 570, |
| "total_flos": 1.455489640562688e+16, |
| "train_loss": 0.9482885511297928, |
| "train_runtime": 548.5236, |
| "train_samples_per_second": 4.211, |
| "train_steps_per_second": 1.039 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 570, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "total_flos": 1.455489640562688e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|