| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9899159663865547, | |
| "eval_steps": 500, | |
| "global_step": 222, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06722689075630252, | |
| "grad_norm": 0.38315412402153015, | |
| "learning_rate": 4.99374449148625e-05, | |
| "loss": 1.369, | |
| "num_input_tokens_seen": 299568, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.13445378151260504, | |
| "grad_norm": 0.22612476348876953, | |
| "learning_rate": 4.975009271054409e-05, | |
| "loss": 1.3378, | |
| "num_input_tokens_seen": 599264, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.20168067226890757, | |
| "grad_norm": 0.2075585424900055, | |
| "learning_rate": 4.943888097369216e-05, | |
| "loss": 1.3046, | |
| "num_input_tokens_seen": 898016, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2689075630252101, | |
| "grad_norm": 0.1732514202594757, | |
| "learning_rate": 4.9005367134442235e-05, | |
| "loss": 1.2645, | |
| "num_input_tokens_seen": 1194592, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.33613445378151263, | |
| "grad_norm": 0.1338847130537033, | |
| "learning_rate": 4.845172067240415e-05, | |
| "loss": 1.2228, | |
| "num_input_tokens_seen": 1497584, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.40336134453781514, | |
| "grad_norm": 0.13163860142230988, | |
| "learning_rate": 4.77807122597034e-05, | |
| "loss": 1.1848, | |
| "num_input_tokens_seen": 1798048, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 0.1262914389371872, | |
| "learning_rate": 4.699569989541074e-05, | |
| "loss": 1.2055, | |
| "num_input_tokens_seen": 2096432, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.5378151260504201, | |
| "grad_norm": 0.12664474546909332, | |
| "learning_rate": 4.6100612100748765e-05, | |
| "loss": 1.1743, | |
| "num_input_tokens_seen": 2397040, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6050420168067226, | |
| "grad_norm": 0.11807628720998764, | |
| "learning_rate": 4.5099928259173516e-05, | |
| "loss": 1.1572, | |
| "num_input_tokens_seen": 2698704, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.6722689075630253, | |
| "grad_norm": 0.11550261825323105, | |
| "learning_rate": 4.3998656199717435e-05, | |
| "loss": 1.1726, | |
| "num_input_tokens_seen": 2995808, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7394957983193278, | |
| "grad_norm": 0.11989202350378036, | |
| "learning_rate": 4.280230713577564e-05, | |
| "loss": 1.1402, | |
| "num_input_tokens_seen": 3292272, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.8067226890756303, | |
| "grad_norm": 0.12094570696353912, | |
| "learning_rate": 4.151686808475204e-05, | |
| "loss": 1.1252, | |
| "num_input_tokens_seen": 3591104, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8739495798319328, | |
| "grad_norm": 0.12436824291944504, | |
| "learning_rate": 4.0148771906588706e-05, | |
| "loss": 1.1096, | |
| "num_input_tokens_seen": 3886624, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.12026601284742355, | |
| "learning_rate": 3.8704865111117746e-05, | |
| "loss": 1.1523, | |
| "num_input_tokens_seen": 4181408, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0084033613445378, | |
| "grad_norm": 0.127424955368042, | |
| "learning_rate": 3.719237359534087e-05, | |
| "loss": 1.1077, | |
| "num_input_tokens_seen": 4480512, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.0756302521008403, | |
| "grad_norm": 0.12994937598705292, | |
| "learning_rate": 3.56188664821012e-05, | |
| "loss": 1.0889, | |
| "num_input_tokens_seen": 4779072, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.13282892107963562, | |
| "learning_rate": 3.39922182411134e-05, | |
| "loss": 1.0952, | |
| "num_input_tokens_seen": 5078288, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.2100840336134453, | |
| "grad_norm": 0.12781143188476562, | |
| "learning_rate": 3.232056928191376e-05, | |
| "loss": 1.0983, | |
| "num_input_tokens_seen": 5377136, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.2773109243697478, | |
| "grad_norm": 0.149738147854805, | |
| "learning_rate": 3.061228521593931e-05, | |
| "loss": 1.0956, | |
| "num_input_tokens_seen": 5672784, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.3445378151260505, | |
| "grad_norm": 0.14856387674808502, | |
| "learning_rate": 2.8875914991604948e-05, | |
| "loss": 1.08, | |
| "num_input_tokens_seen": 5974880, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4168067226890757, | |
| "grad_norm": 0.14656391739845276, | |
| "learning_rate": 2.7120148111887732e-05, | |
| "loss": 1.0489, | |
| "num_input_tokens_seen": 6273616, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.4840336134453782, | |
| "grad_norm": 0.1688816398382187, | |
| "learning_rate": 2.5353771148519057e-05, | |
| "loss": 1.0607, | |
| "num_input_tokens_seen": 6574224, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.5512605042016807, | |
| "grad_norm": 0.1456299126148224, | |
| "learning_rate": 2.358562377040519e-05, | |
| "loss": 1.0722, | |
| "num_input_tokens_seen": 6869008, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.6184873949579832, | |
| "grad_norm": 0.16043561697006226, | |
| "learning_rate": 2.182455450632803e-05, | |
| "loss": 1.0662, | |
| "num_input_tokens_seen": 7167120, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.6857142857142857, | |
| "grad_norm": 0.1590069830417633, | |
| "learning_rate": 2.0079376463307368e-05, | |
| "loss": 1.0548, | |
| "num_input_tokens_seen": 7465408, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.7529411764705882, | |
| "grad_norm": 0.15698856115341187, | |
| "learning_rate": 1.8358823222228097e-05, | |
| "loss": 1.0645, | |
| "num_input_tokens_seen": 7764608, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.8201680672268907, | |
| "grad_norm": 0.1606791466474533, | |
| "learning_rate": 1.667150513144856e-05, | |
| "loss": 1.0671, | |
| "num_input_tokens_seen": 8062192, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.8873949579831932, | |
| "grad_norm": 0.16018208861351013, | |
| "learning_rate": 1.5025866217114592e-05, | |
| "loss": 1.0775, | |
| "num_input_tokens_seen": 8359072, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.954621848739496, | |
| "grad_norm": 0.16284871101379395, | |
| "learning_rate": 1.3430141925817532e-05, | |
| "loss": 1.0781, | |
| "num_input_tokens_seen": 8655680, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.0218487394957982, | |
| "grad_norm": 0.15517888963222504, | |
| "learning_rate": 1.1892317911069212e-05, | |
| "loss": 1.0598, | |
| "num_input_tokens_seen": 8952864, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.089075630252101, | |
| "grad_norm": 0.18014486134052277, | |
| "learning_rate": 1.0420090069843167e-05, | |
| "loss": 1.0507, | |
| "num_input_tokens_seen": 9250368, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.1563025210084033, | |
| "grad_norm": 0.1607990264892578, | |
| "learning_rate": 9.020826029175384e-06, | |
| "loss": 1.0532, | |
| "num_input_tokens_seen": 9552944, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.223529411764706, | |
| "grad_norm": 0.16465440392494202, | |
| "learning_rate": 7.701528275561348e-06, | |
| "loss": 1.0589, | |
| "num_input_tokens_seen": 9849888, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.2907563025210083, | |
| "grad_norm": 0.17045491933822632, | |
| "learning_rate": 6.468799111665003e-06, | |
| "loss": 1.034, | |
| "num_input_tokens_seen": 10149488, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.357983193277311, | |
| "grad_norm": 0.16000358760356903, | |
| "learning_rate": 5.328807615710246e-06, | |
| "loss": 1.0442, | |
| "num_input_tokens_seen": 10448208, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.4252100840336133, | |
| "grad_norm": 0.16633416712284088, | |
| "learning_rate": 4.2872587689039484e-06, | |
| "loss": 1.0579, | |
| "num_input_tokens_seen": 10744608, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.492436974789916, | |
| "grad_norm": 0.15942485630512238, | |
| "learning_rate": 3.3493649053890326e-06, | |
| "loss": 1.0397, | |
| "num_input_tokens_seen": 11044688, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.5596638655462183, | |
| "grad_norm": 0.1655404269695282, | |
| "learning_rate": 2.5198196276040782e-06, | |
| "loss": 1.0249, | |
| "num_input_tokens_seen": 11343152, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.626890756302521, | |
| "grad_norm": 0.16440080106258392, | |
| "learning_rate": 1.8027743175872664e-06, | |
| "loss": 1.0343, | |
| "num_input_tokens_seen": 11645792, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.6941176470588237, | |
| "grad_norm": 0.17174942791461945, | |
| "learning_rate": 1.201817361771837e-06, | |
| "loss": 1.045, | |
| "num_input_tokens_seen": 11942320, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.761344537815126, | |
| "grad_norm": 0.16318759322166443, | |
| "learning_rate": 7.199561932405952e-07, | |
| "loss": 1.0425, | |
| "num_input_tokens_seen": 12239184, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 2.8285714285714287, | |
| "grad_norm": 0.17799663543701172, | |
| "learning_rate": 3.5960224130728857e-07, | |
| "loss": 1.0649, | |
| "num_input_tokens_seen": 12534704, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.895798319327731, | |
| "grad_norm": 0.1608886420726776, | |
| "learning_rate": 1.2255886374334946e-07, | |
| "loss": 1.0439, | |
| "num_input_tokens_seen": 12838560, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 2.9630252100840337, | |
| "grad_norm": 0.16979870200157166, | |
| "learning_rate": 1.0012322041960676e-08, | |
| "loss": 1.0382, | |
| "num_input_tokens_seen": 13133488, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.9899159663865547, | |
| "num_input_tokens_seen": 13254560, | |
| "step": 222, | |
| "total_flos": 2.251343464014807e+17, | |
| "train_loss": 0.5790731208818453, | |
| "train_runtime": 983.1045, | |
| "train_samples_per_second": 29.011, | |
| "train_steps_per_second": 0.226 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 222, | |
| "num_input_tokens_seen": 13254560, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.251343464014807e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |