| { | |
| "best_metric": 1.0748603343963623, | |
| "best_model_checkpoint": "/home/htr/Works/LLM_FInetuneTrying/ModelSave_NiuTrans__Classical-Modern_Chinese_Alpaca_Plus_13B_huggingface/experiments9/checkpoint-2280", | |
| "epoch": 0.05839651830947604, | |
| "global_step": 2290, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.1373226642608643, | |
| "eval_runtime": 98.6309, | |
| "eval_samples_per_second": 10.139, | |
| "eval_steps_per_second": 1.267, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.1380295753479004, | |
| "eval_runtime": 98.364, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.54957507082153e-07, | |
| "loss": 2.0494, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.1390674114227295, | |
| "eval_runtime": 98.4516, | |
| "eval_samples_per_second": 10.157, | |
| "eval_steps_per_second": 1.27, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.1379237174987793, | |
| "eval_runtime": 98.4053, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.1322598457336426, | |
| "eval_runtime": 98.3731, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.09915014164306e-07, | |
| "loss": 2.1992, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.132437229156494, | |
| "eval_runtime": 98.3807, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.1324551105499268, | |
| "eval_runtime": 98.3666, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.1256096363067627, | |
| "eval_runtime": 98.4457, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 7.648725212464589e-07, | |
| "loss": 2.0523, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.1243367195129395, | |
| "eval_runtime": 98.3341, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.12176513671875, | |
| "eval_runtime": 98.302, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 1.272, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.1149940490722656, | |
| "eval_runtime": 98.4139, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.019830028328612e-06, | |
| "loss": 2.1769, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.108457088470459, | |
| "eval_runtime": 98.3955, | |
| "eval_samples_per_second": 10.163, | |
| "eval_steps_per_second": 1.27, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.104037046432495, | |
| "eval_runtime": 98.3573, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.09036922454834, | |
| "eval_runtime": 98.362, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.2747875354107649e-06, | |
| "loss": 2.1363, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.0742862224578857, | |
| "eval_runtime": 98.3549, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.064608335494995, | |
| "eval_runtime": 98.342, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.05263614654541, | |
| "eval_runtime": 98.3419, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.5297450424929178e-06, | |
| "loss": 1.9341, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.039722442626953, | |
| "eval_runtime": 98.3247, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.0163352489471436, | |
| "eval_runtime": 98.3135, | |
| "eval_samples_per_second": 10.172, | |
| "eval_steps_per_second": 1.271, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.9980436563491821, | |
| "eval_runtime": 98.3315, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.7847025495750709e-06, | |
| "loss": 2.0446, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.9802043437957764, | |
| "eval_runtime": 98.3457, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.9563994407653809, | |
| "eval_runtime": 98.3093, | |
| "eval_samples_per_second": 10.172, | |
| "eval_steps_per_second": 1.271, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.9318877458572388, | |
| "eval_runtime": 98.3329, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.039660056657224e-06, | |
| "loss": 1.776, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.905337929725647, | |
| "eval_runtime": 98.338, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.8747742176055908, | |
| "eval_runtime": 98.345, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.857684850692749, | |
| "eval_runtime": 98.3687, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.294617563739377e-06, | |
| "loss": 1.8543, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.8255928754806519, | |
| "eval_runtime": 98.4748, | |
| "eval_samples_per_second": 10.155, | |
| "eval_steps_per_second": 1.269, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.7987793684005737, | |
| "eval_runtime": 98.4525, | |
| "eval_samples_per_second": 10.157, | |
| "eval_steps_per_second": 1.27, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.763482689857483, | |
| "eval_runtime": 98.4527, | |
| "eval_samples_per_second": 10.157, | |
| "eval_steps_per_second": 1.27, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.5495750708215297e-06, | |
| "loss": 1.5595, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.7248739004135132, | |
| "eval_runtime": 98.447, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.7009263038635254, | |
| "eval_runtime": 98.4453, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.6721488237380981, | |
| "eval_runtime": 98.4709, | |
| "eval_samples_per_second": 10.155, | |
| "eval_steps_per_second": 1.269, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.804532577903683e-06, | |
| "loss": 1.6821, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.6415181159973145, | |
| "eval_runtime": 98.4439, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.6105990409851074, | |
| "eval_runtime": 98.5058, | |
| "eval_samples_per_second": 10.152, | |
| "eval_steps_per_second": 1.269, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.5836091041564941, | |
| "eval_runtime": 98.4419, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.0594900849858355e-06, | |
| "loss": 1.3598, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.5605424642562866, | |
| "eval_runtime": 98.5143, | |
| "eval_samples_per_second": 10.151, | |
| "eval_steps_per_second": 1.269, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.541277289390564, | |
| "eval_runtime": 98.4881, | |
| "eval_samples_per_second": 10.154, | |
| "eval_steps_per_second": 1.269, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.525178074836731, | |
| "eval_runtime": 98.5121, | |
| "eval_samples_per_second": 10.151, | |
| "eval_steps_per_second": 1.269, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.3144475920679886e-06, | |
| "loss": 1.2458, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.511926293373108, | |
| "eval_runtime": 98.5003, | |
| "eval_samples_per_second": 10.152, | |
| "eval_steps_per_second": 1.269, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4966439008712769, | |
| "eval_runtime": 98.4955, | |
| "eval_samples_per_second": 10.153, | |
| "eval_steps_per_second": 1.269, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.48922598361969, | |
| "eval_runtime": 98.522, | |
| "eval_samples_per_second": 10.15, | |
| "eval_steps_per_second": 1.269, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.5694050991501417e-06, | |
| "loss": 1.3413, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.478908896446228, | |
| "eval_runtime": 98.5403, | |
| "eval_samples_per_second": 10.148, | |
| "eval_steps_per_second": 1.269, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4668803215026855, | |
| "eval_runtime": 98.5077, | |
| "eval_samples_per_second": 10.151, | |
| "eval_steps_per_second": 1.269, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4557193517684937, | |
| "eval_runtime": 98.4997, | |
| "eval_samples_per_second": 10.152, | |
| "eval_steps_per_second": 1.269, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.824362606232295e-06, | |
| "loss": 0.9985, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4475480318069458, | |
| "eval_runtime": 98.498, | |
| "eval_samples_per_second": 10.152, | |
| "eval_steps_per_second": 1.269, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4383153915405273, | |
| "eval_runtime": 98.6487, | |
| "eval_samples_per_second": 10.137, | |
| "eval_steps_per_second": 1.267, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4303867816925049, | |
| "eval_runtime": 98.5564, | |
| "eval_samples_per_second": 10.146, | |
| "eval_steps_per_second": 1.268, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.079320113314448e-06, | |
| "loss": 1.4106, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4213155508041382, | |
| "eval_runtime": 98.5298, | |
| "eval_samples_per_second": 10.149, | |
| "eval_steps_per_second": 1.269, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4142318964004517, | |
| "eval_runtime": 98.66, | |
| "eval_samples_per_second": 10.136, | |
| "eval_steps_per_second": 1.267, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4069148302078247, | |
| "eval_runtime": 98.6431, | |
| "eval_samples_per_second": 10.138, | |
| "eval_steps_per_second": 1.267, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.3342776203966e-06, | |
| "loss": 1.1759, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4016706943511963, | |
| "eval_runtime": 98.5817, | |
| "eval_samples_per_second": 10.144, | |
| "eval_steps_per_second": 1.268, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.3924814462661743, | |
| "eval_runtime": 98.5114, | |
| "eval_samples_per_second": 10.151, | |
| "eval_steps_per_second": 1.269, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.3864272832870483, | |
| "eval_runtime": 98.5314, | |
| "eval_samples_per_second": 10.149, | |
| "eval_steps_per_second": 1.269, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.589235127478754e-06, | |
| "loss": 1.1242, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.3822429180145264, | |
| "eval_runtime": 98.5567, | |
| "eval_samples_per_second": 10.146, | |
| "eval_steps_per_second": 1.268, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.3771448135375977, | |
| "eval_runtime": 98.4784, | |
| "eval_samples_per_second": 10.155, | |
| "eval_steps_per_second": 1.269, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.370827317237854, | |
| "eval_runtime": 98.6665, | |
| "eval_samples_per_second": 10.135, | |
| "eval_steps_per_second": 1.267, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.844192634560906e-06, | |
| "loss": 1.2331, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.364367961883545, | |
| "eval_runtime": 98.5549, | |
| "eval_samples_per_second": 10.147, | |
| "eval_steps_per_second": 1.268, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.3595472574234009, | |
| "eval_runtime": 98.559, | |
| "eval_samples_per_second": 10.146, | |
| "eval_steps_per_second": 1.268, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.3528627157211304, | |
| "eval_runtime": 98.5582, | |
| "eval_samples_per_second": 10.146, | |
| "eval_steps_per_second": 1.268, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5.0991501416430595e-06, | |
| "loss": 0.9176, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.3484375476837158, | |
| "eval_runtime": 98.5428, | |
| "eval_samples_per_second": 10.148, | |
| "eval_steps_per_second": 1.268, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.3425002098083496, | |
| "eval_runtime": 98.5663, | |
| "eval_samples_per_second": 10.145, | |
| "eval_steps_per_second": 1.268, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.336937665939331, | |
| "eval_runtime": 98.4821, | |
| "eval_samples_per_second": 10.154, | |
| "eval_steps_per_second": 1.269, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5.354107648725213e-06, | |
| "loss": 1.3099, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.3323618173599243, | |
| "eval_runtime": 98.4621, | |
| "eval_samples_per_second": 10.156, | |
| "eval_steps_per_second": 1.27, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.325688123703003, | |
| "eval_runtime": 98.4324, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.323840618133545, | |
| "eval_runtime": 98.414, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5.609065155807366e-06, | |
| "loss": 1.0311, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.3195195198059082, | |
| "eval_runtime": 98.3495, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.3115589618682861, | |
| "eval_runtime": 98.3811, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.3064874410629272, | |
| "eval_runtime": 98.3079, | |
| "eval_samples_per_second": 10.172, | |
| "eval_steps_per_second": 1.272, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5.864022662889519e-06, | |
| "loss": 1.025, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.300554633140564, | |
| "eval_runtime": 98.4079, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2987103462219238, | |
| "eval_runtime": 98.326, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2936962842941284, | |
| "eval_runtime": 98.4071, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6.118980169971671e-06, | |
| "loss": 1.1209, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.28831946849823, | |
| "eval_runtime": 98.3906, | |
| "eval_samples_per_second": 10.164, | |
| "eval_steps_per_second": 1.27, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2841123342514038, | |
| "eval_runtime": 98.4338, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2792314291000366, | |
| "eval_runtime": 98.4023, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6.373937677053825e-06, | |
| "loss": 0.8206, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2769616842269897, | |
| "eval_runtime": 98.4211, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 1.27, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.271959662437439, | |
| "eval_runtime": 98.3642, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2643251419067383, | |
| "eval_runtime": 98.3824, | |
| "eval_samples_per_second": 10.164, | |
| "eval_steps_per_second": 1.271, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6.628895184135977e-06, | |
| "loss": 1.2928, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2600923776626587, | |
| "eval_runtime": 98.3338, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2568110227584839, | |
| "eval_runtime": 98.3121, | |
| "eval_samples_per_second": 10.172, | |
| "eval_steps_per_second": 1.271, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.254643201828003, | |
| "eval_runtime": 98.3589, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6.88385269121813e-06, | |
| "loss": 0.9938, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2510321140289307, | |
| "eval_runtime": 98.3386, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2455601692199707, | |
| "eval_runtime": 98.3748, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2411696910858154, | |
| "eval_runtime": 98.352, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.1388101983002834e-06, | |
| "loss": 0.9805, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2375924587249756, | |
| "eval_runtime": 98.2714, | |
| "eval_samples_per_second": 10.176, | |
| "eval_steps_per_second": 1.272, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2358232736587524, | |
| "eval_runtime": 98.4108, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2307369709014893, | |
| "eval_runtime": 98.4489, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.3937677053824365e-06, | |
| "loss": 1.077, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2273682355880737, | |
| "eval_runtime": 98.361, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.223397970199585, | |
| "eval_runtime": 98.3541, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2197948694229126, | |
| "eval_runtime": 98.3807, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.64872521246459e-06, | |
| "loss": 0.7785, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2188224792480469, | |
| "eval_runtime": 98.3148, | |
| "eval_samples_per_second": 10.171, | |
| "eval_steps_per_second": 1.271, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2149394750595093, | |
| "eval_runtime": 98.4504, | |
| "eval_samples_per_second": 10.157, | |
| "eval_steps_per_second": 1.27, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2091023921966553, | |
| "eval_runtime": 98.37, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.903682719546743e-06, | |
| "loss": 1.1816, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2046704292297363, | |
| "eval_runtime": 98.3537, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2031785249710083, | |
| "eval_runtime": 98.3469, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.2025039196014404, | |
| "eval_runtime": 98.2852, | |
| "eval_samples_per_second": 10.174, | |
| "eval_steps_per_second": 1.272, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.158640226628896e-06, | |
| "loss": 0.9553, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.1980048418045044, | |
| "eval_runtime": 98.4314, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.1943976879119873, | |
| "eval_runtime": 98.3703, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.1904170513153076, | |
| "eval_runtime": 98.4077, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 8.413597733711049e-06, | |
| "loss": 0.9928, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1887998580932617, | |
| "eval_runtime": 98.3789, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1872926950454712, | |
| "eval_runtime": 98.3078, | |
| "eval_samples_per_second": 10.172, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1832990646362305, | |
| "eval_runtime": 98.3915, | |
| "eval_samples_per_second": 10.163, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 8.6685552407932e-06, | |
| "loss": 1.0312, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.180888295173645, | |
| "eval_runtime": 98.33, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.178697109222412, | |
| "eval_runtime": 98.3446, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.176483154296875, | |
| "eval_runtime": 98.3605, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 8.923512747875353e-06, | |
| "loss": 0.7569, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1760860681533813, | |
| "eval_runtime": 98.3804, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.173068881034851, | |
| "eval_runtime": 98.4165, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1711184978485107, | |
| "eval_runtime": 98.4416, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.178470254957508e-06, | |
| "loss": 1.1469, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1693283319473267, | |
| "eval_runtime": 98.5177, | |
| "eval_samples_per_second": 10.15, | |
| "eval_steps_per_second": 1.269, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1686639785766602, | |
| "eval_runtime": 98.4, | |
| "eval_samples_per_second": 10.163, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1684391498565674, | |
| "eval_runtime": 98.4275, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.43342776203966e-06, | |
| "loss": 0.897, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1654223203659058, | |
| "eval_runtime": 98.3945, | |
| "eval_samples_per_second": 10.163, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1617776155471802, | |
| "eval_runtime": 98.3254, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1603034734725952, | |
| "eval_runtime": 98.3774, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.688385269121813e-06, | |
| "loss": 0.8758, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1597446203231812, | |
| "eval_runtime": 98.3684, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.159226417541504, | |
| "eval_runtime": 98.3648, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1565824747085571, | |
| "eval_runtime": 98.4129, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.943342776203968e-06, | |
| "loss": 0.999, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1539520025253296, | |
| "eval_runtime": 98.3654, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.152630090713501, | |
| "eval_runtime": 98.3163, | |
| "eval_samples_per_second": 10.171, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1519286632537842, | |
| "eval_runtime": 98.4417, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.0198300283286119e-05, | |
| "loss": 0.6816, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1526896953582764, | |
| "eval_runtime": 98.4988, | |
| "eval_samples_per_second": 10.152, | |
| "eval_steps_per_second": 1.269, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1494954824447632, | |
| "eval_runtime": 98.4536, | |
| "eval_samples_per_second": 10.157, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1475229263305664, | |
| "eval_runtime": 98.3874, | |
| "eval_samples_per_second": 10.164, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.0453257790368272e-05, | |
| "loss": 1.1027, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1461777687072754, | |
| "eval_runtime": 98.336, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.146038293838501, | |
| "eval_runtime": 98.3987, | |
| "eval_samples_per_second": 10.163, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1465715169906616, | |
| "eval_runtime": 98.3038, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.0708215297450425e-05, | |
| "loss": 0.8607, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1437655687332153, | |
| "eval_runtime": 98.3345, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1424213647842407, | |
| "eval_runtime": 98.4474, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.140133261680603, | |
| "eval_runtime": 98.4388, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.0963172804532578e-05, | |
| "loss": 0.8388, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.140416145324707, | |
| "eval_runtime": 98.4219, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1412097215652466, | |
| "eval_runtime": 98.3244, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1374417543411255, | |
| "eval_runtime": 98.4302, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.1218130311614731e-05, | |
| "loss": 0.9756, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1353052854537964, | |
| "eval_runtime": 98.3779, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1347367763519287, | |
| "eval_runtime": 98.406, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1346714496612549, | |
| "eval_runtime": 98.414, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.1473087818696883e-05, | |
| "loss": 0.6835, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1355887651443481, | |
| "eval_runtime": 98.409, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1324636936187744, | |
| "eval_runtime": 98.3679, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.1316462755203247, | |
| "eval_runtime": 98.3659, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.1728045325779038e-05, | |
| "loss": 1.1232, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1303904056549072, | |
| "eval_runtime": 98.366, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1291667222976685, | |
| "eval_runtime": 98.347, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1297775506973267, | |
| "eval_runtime": 98.3753, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.198300283286119e-05, | |
| "loss": 0.8979, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1276240348815918, | |
| "eval_runtime": 98.3851, | |
| "eval_samples_per_second": 10.164, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1269475221633911, | |
| "eval_runtime": 98.4324, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1256887912750244, | |
| "eval_runtime": 98.4143, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.2237960339943342e-05, | |
| "loss": 0.84, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1253855228424072, | |
| "eval_runtime": 98.3652, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1247769594192505, | |
| "eval_runtime": 98.3272, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1240159273147583, | |
| "eval_runtime": 98.4053, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.2492917847025497e-05, | |
| "loss": 0.9798, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1204980611801147, | |
| "eval_runtime": 98.3445, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.119938611984253, | |
| "eval_runtime": 98.3607, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1203584671020508, | |
| "eval_runtime": 98.302, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.274787535410765e-05, | |
| "loss": 0.672, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1203203201293945, | |
| "eval_runtime": 98.319, | |
| "eval_samples_per_second": 10.171, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1188935041427612, | |
| "eval_runtime": 98.3355, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1174798011779785, | |
| "eval_runtime": 98.2986, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.3002832861189801e-05, | |
| "loss": 1.0258, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1174086332321167, | |
| "eval_runtime": 98.4015, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1172817945480347, | |
| "eval_runtime": 98.4238, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.115920066833496, | |
| "eval_runtime": 98.4495, | |
| "eval_samples_per_second": 10.157, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.3257790368271954e-05, | |
| "loss": 0.8557, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1152660846710205, | |
| "eval_runtime": 98.4694, | |
| "eval_samples_per_second": 10.155, | |
| "eval_steps_per_second": 1.269, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1120198965072632, | |
| "eval_runtime": 98.3157, | |
| "eval_samples_per_second": 10.171, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1117701530456543, | |
| "eval_runtime": 98.3009, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.3512747875354108e-05, | |
| "loss": 0.8459, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.112415075302124, | |
| "eval_runtime": 98.3439, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1125112771987915, | |
| "eval_runtime": 98.3572, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1110690832138062, | |
| "eval_runtime": 98.419, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.376770538243626e-05, | |
| "loss": 0.9507, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.110371708869934, | |
| "eval_runtime": 98.3408, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1083916425704956, | |
| "eval_runtime": 98.4225, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1089563369750977, | |
| "eval_runtime": 98.3054, | |
| "eval_samples_per_second": 10.172, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.4022662889518414e-05, | |
| "loss": 0.6854, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.110315203666687, | |
| "eval_runtime": 98.304, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1081123352050781, | |
| "eval_runtime": 98.4452, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1062242984771729, | |
| "eval_runtime": 98.3714, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.4277620396600567e-05, | |
| "loss": 1.0904, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1061948537826538, | |
| "eval_runtime": 98.3346, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1069517135620117, | |
| "eval_runtime": 98.3114, | |
| "eval_samples_per_second": 10.172, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1066083908081055, | |
| "eval_runtime": 98.3387, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.453257790368272e-05, | |
| "loss": 0.849, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1053024530410767, | |
| "eval_runtime": 98.4738, | |
| "eval_samples_per_second": 10.155, | |
| "eval_steps_per_second": 1.269, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1040204763412476, | |
| "eval_runtime": 98.4347, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.102669596672058, | |
| "eval_runtime": 98.3742, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.4787535410764873e-05, | |
| "loss": 0.8601, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1038398742675781, | |
| "eval_runtime": 98.4219, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1060833930969238, | |
| "eval_runtime": 98.3163, | |
| "eval_samples_per_second": 10.171, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.1028786897659302, | |
| "eval_runtime": 98.4354, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.5042492917847024e-05, | |
| "loss": 0.9747, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.101389765739441, | |
| "eval_runtime": 98.3036, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.1008771657943726, | |
| "eval_runtime": 98.2846, | |
| "eval_samples_per_second": 10.175, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0998055934906006, | |
| "eval_runtime": 98.366, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.529745042492918e-05, | |
| "loss": 0.656, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.1010502576828003, | |
| "eval_runtime": 98.2933, | |
| "eval_samples_per_second": 10.174, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0997422933578491, | |
| "eval_runtime": 98.3167, | |
| "eval_samples_per_second": 10.171, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.097307562828064, | |
| "eval_runtime": 98.3299, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.5552407932011334e-05, | |
| "loss": 1.0969, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0984749794006348, | |
| "eval_runtime": 98.4212, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0964536666870117, | |
| "eval_runtime": 98.3498, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0976883172988892, | |
| "eval_runtime": 98.3395, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.5807365439093485e-05, | |
| "loss": 0.8523, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0951915979385376, | |
| "eval_runtime": 98.3767, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0934215784072876, | |
| "eval_runtime": 98.3799, | |
| "eval_samples_per_second": 10.165, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.094098687171936, | |
| "eval_runtime": 98.3294, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.6062322946175637e-05, | |
| "loss": 0.8655, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.093522310256958, | |
| "eval_runtime": 98.3345, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0934967994689941, | |
| "eval_runtime": 98.4151, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0937992334365845, | |
| "eval_runtime": 98.3474, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.631728045325779e-05, | |
| "loss": 0.9431, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0917603969573975, | |
| "eval_runtime": 98.3228, | |
| "eval_samples_per_second": 10.171, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0905735492706299, | |
| "eval_runtime": 98.3889, | |
| "eval_samples_per_second": 10.164, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.09126877784729, | |
| "eval_runtime": 98.4202, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.6572237960339943e-05, | |
| "loss": 0.6818, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0937858819961548, | |
| "eval_runtime": 98.3409, | |
| "eval_samples_per_second": 10.169, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.090471625328064, | |
| "eval_runtime": 98.327, | |
| "eval_samples_per_second": 10.17, | |
| "eval_steps_per_second": 1.271, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0879604816436768, | |
| "eval_runtime": 98.2991, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.6827195467422098e-05, | |
| "loss": 1.0641, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.089332938194275, | |
| "eval_runtime": 98.2713, | |
| "eval_samples_per_second": 10.176, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0885804891586304, | |
| "eval_runtime": 98.2603, | |
| "eval_samples_per_second": 10.177, | |
| "eval_steps_per_second": 1.272, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0916638374328613, | |
| "eval_runtime": 98.3446, | |
| "eval_samples_per_second": 10.168, | |
| "eval_steps_per_second": 1.271, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.708215297450425e-05, | |
| "loss": 0.8248, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0912878513336182, | |
| "eval_runtime": 98.3008, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 1.272, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0867935419082642, | |
| "eval_runtime": 98.397, | |
| "eval_samples_per_second": 10.163, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0863043069839478, | |
| "eval_runtime": 98.4432, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.73371104815864e-05, | |
| "loss": 0.8212, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0863004922866821, | |
| "eval_runtime": 98.4867, | |
| "eval_samples_per_second": 10.154, | |
| "eval_steps_per_second": 1.269, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0847465991973877, | |
| "eval_runtime": 98.4283, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0844731330871582, | |
| "eval_runtime": 98.5473, | |
| "eval_samples_per_second": 10.147, | |
| "eval_steps_per_second": 1.268, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.7592067988668555e-05, | |
| "loss": 0.8866, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0838637351989746, | |
| "eval_runtime": 98.5368, | |
| "eval_samples_per_second": 10.148, | |
| "eval_steps_per_second": 1.269, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0842301845550537, | |
| "eval_runtime": 98.5199, | |
| "eval_samples_per_second": 10.15, | |
| "eval_steps_per_second": 1.269, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.08109712600708, | |
| "eval_runtime": 98.4904, | |
| "eval_samples_per_second": 10.153, | |
| "eval_steps_per_second": 1.269, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.7847025495750707e-05, | |
| "loss": 0.6441, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0835895538330078, | |
| "eval_runtime": 98.5001, | |
| "eval_samples_per_second": 10.152, | |
| "eval_steps_per_second": 1.269, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.083143949508667, | |
| "eval_runtime": 98.5565, | |
| "eval_samples_per_second": 10.146, | |
| "eval_steps_per_second": 1.268, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0816445350646973, | |
| "eval_runtime": 98.4426, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.8101983002832862e-05, | |
| "loss": 0.9959, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.081859827041626, | |
| "eval_runtime": 98.513, | |
| "eval_samples_per_second": 10.151, | |
| "eval_steps_per_second": 1.269, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0832078456878662, | |
| "eval_runtime": 98.4757, | |
| "eval_samples_per_second": 10.155, | |
| "eval_steps_per_second": 1.269, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.0841658115386963, | |
| "eval_runtime": 98.4511, | |
| "eval_samples_per_second": 10.157, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.8356940509915016e-05, | |
| "loss": 0.8355, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0814462900161743, | |
| "eval_runtime": 98.5886, | |
| "eval_samples_per_second": 10.143, | |
| "eval_steps_per_second": 1.268, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0804240703582764, | |
| "eval_runtime": 98.5837, | |
| "eval_samples_per_second": 10.144, | |
| "eval_steps_per_second": 1.268, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0809556245803833, | |
| "eval_runtime": 98.5132, | |
| "eval_samples_per_second": 10.151, | |
| "eval_steps_per_second": 1.269, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.8611898016997168e-05, | |
| "loss": 0.8471, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0789848566055298, | |
| "eval_runtime": 98.4416, | |
| "eval_samples_per_second": 10.158, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0795090198516846, | |
| "eval_runtime": 98.3873, | |
| "eval_samples_per_second": 10.164, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0784635543823242, | |
| "eval_runtime": 98.4762, | |
| "eval_samples_per_second": 10.155, | |
| "eval_steps_per_second": 1.269, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.886685552407932e-05, | |
| "loss": 0.9504, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0783464908599854, | |
| "eval_runtime": 98.4343, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0772334337234497, | |
| "eval_runtime": 98.4304, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0791562795639038, | |
| "eval_runtime": 98.426, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9121813031161474e-05, | |
| "loss": 0.6722, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.080847144126892, | |
| "eval_runtime": 98.4125, | |
| "eval_samples_per_second": 10.161, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0787074565887451, | |
| "eval_runtime": 98.438, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.076585054397583, | |
| "eval_runtime": 98.3926, | |
| "eval_samples_per_second": 10.163, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9376770538243626e-05, | |
| "loss": 1.0543, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0748603343963623, | |
| "eval_runtime": 98.4047, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.0754591226577759, | |
| "eval_runtime": 98.3994, | |
| "eval_samples_per_second": 10.163, | |
| "eval_steps_per_second": 1.27, | |
| "step": 2290 | |
| } | |
| ], | |
| "max_steps": 117642, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.1765881845301248e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |