| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 10, | |
| "global_step": 389, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.4501470088205292e-05, | |
| "loss": 1.0383, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 0.9290542602539062, | |
| "eval_runtime": 77.6515, | |
| "eval_samples_per_second": 0.245, | |
| "eval_steps_per_second": 0.039, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.4001440086405184e-05, | |
| "loss": 0.9448, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 0.9216130971908569, | |
| "eval_runtime": 75.2278, | |
| "eval_samples_per_second": 0.253, | |
| "eval_steps_per_second": 0.04, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.350141008460508e-05, | |
| "loss": 0.942, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 0.9170966148376465, | |
| "eval_runtime": 75.2167, | |
| "eval_samples_per_second": 0.253, | |
| "eval_steps_per_second": 0.04, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.300138008280497e-05, | |
| "loss": 0.9305, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 0.9177520871162415, | |
| "eval_runtime": 75.2861, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.250135008100486e-05, | |
| "loss": 0.9301, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 0.9236036539077759, | |
| "eval_runtime": 75.3569, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.200132007920475e-05, | |
| "loss": 0.9412, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 0.9281936287879944, | |
| "eval_runtime": 75.2776, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.1501290077404645e-05, | |
| "loss": 0.9534, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 0.931694507598877, | |
| "eval_runtime": 75.4284, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.1001260075604537e-05, | |
| "loss": 1.0193, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 0.9383592009544373, | |
| "eval_runtime": 75.2446, | |
| "eval_samples_per_second": 0.253, | |
| "eval_steps_per_second": 0.04, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.050123007380443e-05, | |
| "loss": 0.979, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 0.9449018239974976, | |
| "eval_runtime": 75.2111, | |
| "eval_samples_per_second": 0.253, | |
| "eval_steps_per_second": 0.04, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.000120007200432e-05, | |
| "loss": 1.0321, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 0.9472957253456116, | |
| "eval_runtime": 75.1836, | |
| "eval_samples_per_second": 0.253, | |
| "eval_steps_per_second": 0.04, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9501170070204212e-05, | |
| "loss": 0.997, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 0.9475318789482117, | |
| "eval_runtime": 75.3079, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.9001140068404107e-05, | |
| "loss": 0.9737, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 0.9536099433898926, | |
| "eval_runtime": 75.3453, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.8501110066603995e-05, | |
| "loss": 1.0492, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 0.9576209187507629, | |
| "eval_runtime": 75.2609, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.8001080064803887e-05, | |
| "loss": 0.9903, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 0.9594760537147522, | |
| "eval_runtime": 75.3588, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.750105006300378e-05, | |
| "loss": 1.0525, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 0.9673194885253906, | |
| "eval_runtime": 75.2799, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.700102006120367e-05, | |
| "loss": 0.9806, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 0.9813961386680603, | |
| "eval_runtime": 75.2782, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.6500990059403565e-05, | |
| "loss": 1.0374, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 0.9768068194389343, | |
| "eval_runtime": 75.2881, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.6000960057603457e-05, | |
| "loss": 0.9967, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 0.991325855255127, | |
| "eval_runtime": 75.311, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.550093005580335e-05, | |
| "loss": 1.047, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 1.0070654153823853, | |
| "eval_runtime": 75.2469, | |
| "eval_samples_per_second": 0.253, | |
| "eval_steps_per_second": 0.04, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.5000900054003238e-05, | |
| "loss": 1.0366, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 1.0217609405517578, | |
| "eval_runtime": 75.3156, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.4500870052203133e-05, | |
| "loss": 0.9782, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_loss": 1.0349949598312378, | |
| "eval_runtime": 75.3478, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.4000840050403025e-05, | |
| "loss": 1.1381, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 1.05660080909729, | |
| "eval_runtime": 75.403, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.3500810048602917e-05, | |
| "loss": 1.1266, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 1.0543776750564575, | |
| "eval_runtime": 75.3395, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.3000780046802807e-05, | |
| "loss": 1.1131, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 1.0707024335861206, | |
| "eval_runtime": 75.2946, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.2500750045002698e-05, | |
| "loss": 1.0625, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 1.0837984085083008, | |
| "eval_runtime": 75.3303, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.2000720043202592e-05, | |
| "loss": 1.0762, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_loss": 1.0885578393936157, | |
| "eval_runtime": 75.2812, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.1500690041402485e-05, | |
| "loss": 1.1995, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 1.1089729070663452, | |
| "eval_runtime": 75.3209, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.1000660039602375e-05, | |
| "loss": 1.115, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_loss": 1.1146286725997925, | |
| "eval_runtime": 75.3791, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.0500630037802268e-05, | |
| "loss": 0.9881, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 1.1138123273849487, | |
| "eval_runtime": 75.3832, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.000060003600216e-05, | |
| "loss": 1.2121, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 1.1398226022720337, | |
| "eval_runtime": 75.307, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 9.500570034202053e-06, | |
| "loss": 1.1354, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 1.1480103731155396, | |
| "eval_runtime": 75.3962, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.000540032401943e-06, | |
| "loss": 1.1815, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_loss": 1.1434487104415894, | |
| "eval_runtime": 75.3053, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 8.500510030601835e-06, | |
| "loss": 1.3204, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 1.142481803894043, | |
| "eval_runtime": 75.2776, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.000480028801728e-06, | |
| "loss": 1.2111, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 1.1574183702468872, | |
| "eval_runtime": 75.3414, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 7.500450027001619e-06, | |
| "loss": 1.1378, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 1.1606627702713013, | |
| "eval_runtime": 75.3801, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 7.0004200252015126e-06, | |
| "loss": 1.2455, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 1.1685103178024292, | |
| "eval_runtime": 75.2747, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 6.500390023401403e-06, | |
| "loss": 1.2436, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 1.179547667503357, | |
| "eval_runtime": 75.3915, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.000360021601296e-06, | |
| "loss": 1.2619, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_loss": 1.1883246898651123, | |
| "eval_runtime": 75.3375, | |
| "eval_samples_per_second": 0.252, | |
| "eval_steps_per_second": 0.04, | |
| "step": 380 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "total_flos": 3.9179178565632e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |