| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 34.210215142059454, |
| "global_step": 19500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.917429577464789e-05, |
| "loss": 1.7834, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.6923606991767883, |
| "eval_runtime": 1521.6883, |
| "eval_samples_per_second": 921.873, |
| "eval_steps_per_second": 14.404, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 4.817077464788733e-05, |
| "loss": 0.6994, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.6767147183418274, |
| "eval_runtime": 1526.5049, |
| "eval_samples_per_second": 918.964, |
| "eval_steps_per_second": 14.359, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 4.7167253521126765e-05, |
| "loss": 0.654, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.6709172129631042, |
| "eval_runtime": 1516.8574, |
| "eval_samples_per_second": 924.809, |
| "eval_steps_per_second": 14.45, |
| "step": 1710 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 4.61637323943662e-05, |
| "loss": 0.6194, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.6694589257240295, |
| "eval_runtime": 1517.553, |
| "eval_samples_per_second": 924.385, |
| "eval_steps_per_second": 14.444, |
| "step": 2280 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 4.516021126760563e-05, |
| "loss": 0.5904, |
| "step": 2850 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.6706073880195618, |
| "eval_runtime": 1514.7995, |
| "eval_samples_per_second": 926.065, |
| "eval_steps_per_second": 14.47, |
| "step": 2850 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 4.4156690140845075e-05, |
| "loss": 0.5653, |
| "step": 3420 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.6708899736404419, |
| "eval_runtime": 1517.3829, |
| "eval_samples_per_second": 924.488, |
| "eval_steps_per_second": 14.445, |
| "step": 3420 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 4.315316901408451e-05, |
| "loss": 0.5431, |
| "step": 3990 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.6717029809951782, |
| "eval_runtime": 1515.5706, |
| "eval_samples_per_second": 925.594, |
| "eval_steps_per_second": 14.463, |
| "step": 3990 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 4.214964788732394e-05, |
| "loss": 0.5237, |
| "step": 4560 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.6746511459350586, |
| "eval_runtime": 1515.027, |
| "eval_samples_per_second": 925.926, |
| "eval_steps_per_second": 14.468, |
| "step": 4560 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 4.114612676056338e-05, |
| "loss": 0.5061, |
| "step": 5130 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.6748108267784119, |
| "eval_runtime": 1514.6887, |
| "eval_samples_per_second": 926.133, |
| "eval_steps_per_second": 14.471, |
| "step": 5130 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 4.014260563380282e-05, |
| "loss": 0.4901, |
| "step": 5700 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.6780610680580139, |
| "eval_runtime": 1515.4454, |
| "eval_samples_per_second": 925.67, |
| "eval_steps_per_second": 14.464, |
| "step": 5700 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 3.913908450704226e-05, |
| "loss": 0.4755, |
| "step": 6270 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 0.6779971718788147, |
| "eval_runtime": 1517.3277, |
| "eval_samples_per_second": 924.522, |
| "eval_steps_per_second": 14.446, |
| "step": 6270 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 3.813556338028169e-05, |
| "loss": 0.4624, |
| "step": 6840 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.6860271096229553, |
| "eval_runtime": 1515.1169, |
| "eval_samples_per_second": 925.871, |
| "eval_steps_per_second": 14.467, |
| "step": 6840 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 3.7132042253521126e-05, |
| "loss": 0.4501, |
| "step": 7410 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.6835764646530151, |
| "eval_runtime": 1513.5665, |
| "eval_samples_per_second": 926.82, |
| "eval_steps_per_second": 14.482, |
| "step": 7410 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 3.612852112676057e-05, |
| "loss": 0.4389, |
| "step": 7980 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.6895220279693604, |
| "eval_runtime": 1514.0338, |
| "eval_samples_per_second": 926.533, |
| "eval_steps_per_second": 14.477, |
| "step": 7980 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 3.5125e-05, |
| "loss": 0.4286, |
| "step": 8550 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 0.6874631643295288, |
| "eval_runtime": 1516.075, |
| "eval_samples_per_second": 925.286, |
| "eval_steps_per_second": 14.458, |
| "step": 8550 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 3.412147887323944e-05, |
| "loss": 0.4189, |
| "step": 9120 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.6922647953033447, |
| "eval_runtime": 1516.6302, |
| "eval_samples_per_second": 924.947, |
| "eval_steps_per_second": 14.452, |
| "step": 9120 |
| }, |
| { |
| "epoch": 17.0, |
| "learning_rate": 3.3117957746478874e-05, |
| "loss": 0.41, |
| "step": 9690 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 0.6977216005325317, |
| "eval_runtime": 1513.9466, |
| "eval_samples_per_second": 926.587, |
| "eval_steps_per_second": 14.478, |
| "step": 9690 |
| }, |
| { |
| "epoch": 18.0, |
| "learning_rate": 3.211443661971831e-05, |
| "loss": 0.4017, |
| "step": 10260 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 0.705147922039032, |
| "eval_runtime": 1513.405, |
| "eval_samples_per_second": 926.918, |
| "eval_steps_per_second": 14.483, |
| "step": 10260 |
| }, |
| { |
| "epoch": 19.0, |
| "learning_rate": 3.111091549295775e-05, |
| "loss": 0.394, |
| "step": 10830 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 0.6972260475158691, |
| "eval_runtime": 1514.848, |
| "eval_samples_per_second": 926.035, |
| "eval_steps_per_second": 14.469, |
| "step": 10830 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 3.0109154929577467e-05, |
| "loss": 0.3868, |
| "step": 11400 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.7031562328338623, |
| "eval_runtime": 1513.8461, |
| "eval_samples_per_second": 926.648, |
| "eval_steps_per_second": 14.479, |
| "step": 11400 |
| }, |
| { |
| "epoch": 21.0, |
| "learning_rate": 2.91056338028169e-05, |
| "loss": 0.38, |
| "step": 11970 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_loss": 0.7028641104698181, |
| "eval_runtime": 1513.4864, |
| "eval_samples_per_second": 926.869, |
| "eval_steps_per_second": 14.482, |
| "step": 11970 |
| }, |
| { |
| "epoch": 22.0, |
| "learning_rate": 2.810211267605634e-05, |
| "loss": 0.3738, |
| "step": 12540 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_loss": 0.7074136734008789, |
| "eval_runtime": 1513.6381, |
| "eval_samples_per_second": 926.776, |
| "eval_steps_per_second": 14.481, |
| "step": 12540 |
| }, |
| { |
| "epoch": 23.0, |
| "learning_rate": 2.7098591549295778e-05, |
| "loss": 0.3679, |
| "step": 13110 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_loss": 0.7076618075370789, |
| "eval_runtime": 1513.3948, |
| "eval_samples_per_second": 926.925, |
| "eval_steps_per_second": 14.483, |
| "step": 13110 |
| }, |
| { |
| "epoch": 24.0, |
| "learning_rate": 2.609507042253521e-05, |
| "loss": 0.3623, |
| "step": 13680 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 0.7148919701576233, |
| "eval_runtime": 1514.4065, |
| "eval_samples_per_second": 926.305, |
| "eval_steps_per_second": 14.474, |
| "step": 13680 |
| }, |
| { |
| "epoch": 25.0, |
| "learning_rate": 2.509507042253521e-05, |
| "loss": 0.3572, |
| "step": 14250 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 0.7150377631187439, |
| "eval_runtime": 1514.2022, |
| "eval_samples_per_second": 926.43, |
| "eval_steps_per_second": 14.476, |
| "step": 14250 |
| }, |
| { |
| "epoch": 26.0, |
| "learning_rate": 2.409330985915493e-05, |
| "loss": 0.3523, |
| "step": 14820 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_loss": 0.7058804035186768, |
| "eval_runtime": 1514.5913, |
| "eval_samples_per_second": 926.192, |
| "eval_steps_per_second": 14.472, |
| "step": 14820 |
| }, |
| { |
| "epoch": 27.0, |
| "learning_rate": 2.3089788732394367e-05, |
| "loss": 0.3478, |
| "step": 15390 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_loss": 0.7150311470031738, |
| "eval_runtime": 1515.4959, |
| "eval_samples_per_second": 925.64, |
| "eval_steps_per_second": 14.463, |
| "step": 15390 |
| }, |
| { |
| "epoch": 28.0, |
| "learning_rate": 2.2086267605633804e-05, |
| "loss": 0.3436, |
| "step": 15960 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_loss": 0.715233564376831, |
| "eval_runtime": 1515.1776, |
| "eval_samples_per_second": 925.834, |
| "eval_steps_per_second": 14.466, |
| "step": 15960 |
| }, |
| { |
| "epoch": 29.0, |
| "learning_rate": 2.108274647887324e-05, |
| "loss": 0.3396, |
| "step": 16530 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_loss": 0.7200678586959839, |
| "eval_runtime": 1514.8194, |
| "eval_samples_per_second": 926.053, |
| "eval_steps_per_second": 14.47, |
| "step": 16530 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 2.0079225352112678e-05, |
| "loss": 0.3358, |
| "step": 17100 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_loss": 0.713365375995636, |
| "eval_runtime": 1513.5909, |
| "eval_samples_per_second": 926.805, |
| "eval_steps_per_second": 14.481, |
| "step": 17100 |
| }, |
| { |
| "epoch": 31.0, |
| "learning_rate": 1.9075704225352115e-05, |
| "loss": 0.3323, |
| "step": 17670 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_loss": 0.7232212424278259, |
| "eval_runtime": 1515.152, |
| "eval_samples_per_second": 925.85, |
| "eval_steps_per_second": 14.467, |
| "step": 17670 |
| }, |
| { |
| "epoch": 32.0, |
| "learning_rate": 1.8072183098591548e-05, |
| "loss": 0.3291, |
| "step": 18240 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_loss": 0.7199446558952332, |
| "eval_runtime": 1514.7283, |
| "eval_samples_per_second": 926.109, |
| "eval_steps_per_second": 14.471, |
| "step": 18240 |
| }, |
| { |
| "epoch": 33.0, |
| "learning_rate": 1.706866197183099e-05, |
| "loss": 0.3259, |
| "step": 18810 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_loss": 0.7213279604911804, |
| "eval_runtime": 1513.6184, |
| "eval_samples_per_second": 926.788, |
| "eval_steps_per_second": 14.481, |
| "step": 18810 |
| }, |
| { |
| "epoch": 34.0, |
| "learning_rate": 1.6065140845070422e-05, |
| "loss": 0.3231, |
| "step": 19380 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_loss": 0.7236403226852417, |
| "eval_runtime": 1514.5169, |
| "eval_samples_per_second": 926.238, |
| "eval_steps_per_second": 14.473, |
| "step": 19380 |
| } |
| ], |
| "max_steps": 28500, |
| "num_train_epochs": 50, |
| "total_flos": 3.0481579178459136e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|