{ "best_metric": 1.0748603343963623, "best_model_checkpoint": "/home/htr/Works/LLM_FInetuneTrying/ModelSave_NiuTrans__Classical-Modern_Chinese_Alpaca_Plus_13B_huggingface/experiments9/checkpoint-2280", "epoch": 0.05839651830947604, "global_step": 2290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "eval_loss": 2.1373226642608643, "eval_runtime": 98.6309, "eval_samples_per_second": 10.139, "eval_steps_per_second": 1.267, "step": 10 }, { "epoch": 0.0, "eval_loss": 2.1380295753479004, "eval_runtime": 98.364, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.54957507082153e-07, "loss": 2.0494, "step": 30 }, { "epoch": 0.0, "eval_loss": 2.1390674114227295, "eval_runtime": 98.4516, "eval_samples_per_second": 10.157, "eval_steps_per_second": 1.27, "step": 30 }, { "epoch": 0.0, "eval_loss": 2.1379237174987793, "eval_runtime": 98.4053, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 40 }, { "epoch": 0.0, "eval_loss": 2.1322598457336426, "eval_runtime": 98.3731, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 50 }, { "epoch": 0.0, "learning_rate": 5.09915014164306e-07, "loss": 2.1992, "step": 60 }, { "epoch": 0.0, "eval_loss": 2.132437229156494, "eval_runtime": 98.3807, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 60 }, { "epoch": 0.0, "eval_loss": 2.1324551105499268, "eval_runtime": 98.3666, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 70 }, { "epoch": 0.0, "eval_loss": 2.1256096363067627, "eval_runtime": 98.4457, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 80 }, { "epoch": 0.0, "learning_rate": 7.648725212464589e-07, "loss": 2.0523, "step": 90 }, { "epoch": 0.0, "eval_loss": 2.1243367195129395, "eval_runtime": 98.3341, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 90 }, { "epoch": 0.0, "eval_loss": 2.12176513671875, "eval_runtime": 98.302, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.272, "step": 100 }, { "epoch": 0.0, "eval_loss": 2.1149940490722656, "eval_runtime": 98.4139, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 110 }, { "epoch": 0.0, "learning_rate": 1.019830028328612e-06, "loss": 2.1769, "step": 120 }, { "epoch": 0.0, "eval_loss": 2.108457088470459, "eval_runtime": 98.3955, "eval_samples_per_second": 10.163, "eval_steps_per_second": 1.27, "step": 120 }, { "epoch": 0.0, "eval_loss": 2.104037046432495, "eval_runtime": 98.3573, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 130 }, { "epoch": 0.0, "eval_loss": 2.09036922454834, "eval_runtime": 98.362, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 140 }, { "epoch": 0.0, "learning_rate": 1.2747875354107649e-06, "loss": 2.1363, "step": 150 }, { "epoch": 0.0, "eval_loss": 2.0742862224578857, "eval_runtime": 98.3549, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 150 }, { "epoch": 0.0, "eval_loss": 2.064608335494995, "eval_runtime": 98.342, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 160 }, { "epoch": 0.0, "eval_loss": 2.05263614654541, "eval_runtime": 98.3419, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 170 }, { "epoch": 0.0, "learning_rate": 1.5297450424929178e-06, "loss": 1.9341, "step": 180 }, { "epoch": 0.0, "eval_loss": 2.039722442626953, "eval_runtime": 98.3247, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 180 }, { "epoch": 0.0, "eval_loss": 2.0163352489471436, "eval_runtime": 98.3135, "eval_samples_per_second": 10.172, "eval_steps_per_second": 1.271, "step": 190 }, { "epoch": 0.01, "eval_loss": 1.9980436563491821, "eval_runtime": 98.3315, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 200 }, { "epoch": 0.01, "learning_rate": 1.7847025495750709e-06, "loss": 2.0446, "step": 210 }, { "epoch": 0.01, "eval_loss": 1.9802043437957764, "eval_runtime": 98.3457, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 210 }, { "epoch": 0.01, "eval_loss": 1.9563994407653809, "eval_runtime": 98.3093, "eval_samples_per_second": 10.172, "eval_steps_per_second": 1.271, "step": 220 }, { "epoch": 0.01, "eval_loss": 1.9318877458572388, "eval_runtime": 98.3329, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 230 }, { "epoch": 0.01, "learning_rate": 2.039660056657224e-06, "loss": 1.776, "step": 240 }, { "epoch": 0.01, "eval_loss": 1.905337929725647, "eval_runtime": 98.338, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 240 }, { "epoch": 0.01, "eval_loss": 1.8747742176055908, "eval_runtime": 98.345, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 250 }, { "epoch": 0.01, "eval_loss": 1.857684850692749, "eval_runtime": 98.3687, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 260 }, { "epoch": 0.01, "learning_rate": 2.294617563739377e-06, "loss": 1.8543, "step": 270 }, { "epoch": 0.01, "eval_loss": 1.8255928754806519, "eval_runtime": 98.4748, "eval_samples_per_second": 10.155, "eval_steps_per_second": 1.269, "step": 270 }, { "epoch": 0.01, "eval_loss": 1.7987793684005737, "eval_runtime": 98.4525, "eval_samples_per_second": 10.157, "eval_steps_per_second": 1.27, "step": 280 }, { "epoch": 0.01, "eval_loss": 1.763482689857483, "eval_runtime": 98.4527, "eval_samples_per_second": 10.157, "eval_steps_per_second": 1.27, "step": 290 }, { "epoch": 0.01, "learning_rate": 2.5495750708215297e-06, "loss": 1.5595, "step": 300 }, { "epoch": 0.01, "eval_loss": 1.7248739004135132, "eval_runtime": 98.447, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 300 }, { "epoch": 0.01, "eval_loss": 1.7009263038635254, "eval_runtime": 98.4453, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 310 }, { "epoch": 0.01, "eval_loss": 1.6721488237380981, "eval_runtime": 98.4709, "eval_samples_per_second": 10.155, "eval_steps_per_second": 1.269, "step": 320 }, { "epoch": 0.01, "learning_rate": 2.804532577903683e-06, "loss": 1.6821, "step": 330 }, { "epoch": 0.01, "eval_loss": 1.6415181159973145, "eval_runtime": 98.4439, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 330 }, { "epoch": 0.01, "eval_loss": 1.6105990409851074, "eval_runtime": 98.5058, "eval_samples_per_second": 10.152, "eval_steps_per_second": 1.269, "step": 340 }, { "epoch": 0.01, "eval_loss": 1.5836091041564941, "eval_runtime": 98.4419, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 350 }, { "epoch": 0.01, "learning_rate": 3.0594900849858355e-06, "loss": 1.3598, "step": 360 }, { "epoch": 0.01, "eval_loss": 1.5605424642562866, "eval_runtime": 98.5143, "eval_samples_per_second": 10.151, "eval_steps_per_second": 1.269, "step": 360 }, { "epoch": 0.01, "eval_loss": 1.541277289390564, "eval_runtime": 98.4881, "eval_samples_per_second": 10.154, "eval_steps_per_second": 1.269, "step": 370 }, { "epoch": 0.01, "eval_loss": 1.525178074836731, "eval_runtime": 98.5121, "eval_samples_per_second": 10.151, "eval_steps_per_second": 1.269, "step": 380 }, { "epoch": 0.01, "learning_rate": 3.3144475920679886e-06, "loss": 1.2458, "step": 390 }, { "epoch": 0.01, "eval_loss": 1.511926293373108, "eval_runtime": 98.5003, "eval_samples_per_second": 10.152, "eval_steps_per_second": 1.269, "step": 390 }, { "epoch": 0.01, "eval_loss": 1.4966439008712769, "eval_runtime": 98.4955, "eval_samples_per_second": 10.153, "eval_steps_per_second": 1.269, "step": 400 }, { "epoch": 0.01, "eval_loss": 1.48922598361969, "eval_runtime": 98.522, "eval_samples_per_second": 10.15, "eval_steps_per_second": 1.269, "step": 410 }, { "epoch": 0.01, "learning_rate": 3.5694050991501417e-06, "loss": 1.3413, "step": 420 }, { "epoch": 0.01, "eval_loss": 1.478908896446228, "eval_runtime": 98.5403, "eval_samples_per_second": 10.148, "eval_steps_per_second": 1.269, "step": 420 }, { "epoch": 0.01, "eval_loss": 1.4668803215026855, "eval_runtime": 98.5077, "eval_samples_per_second": 10.151, "eval_steps_per_second": 1.269, "step": 430 }, { "epoch": 0.01, "eval_loss": 1.4557193517684937, "eval_runtime": 98.4997, "eval_samples_per_second": 10.152, "eval_steps_per_second": 1.269, "step": 440 }, { "epoch": 0.01, "learning_rate": 3.824362606232295e-06, "loss": 0.9985, "step": 450 }, { "epoch": 0.01, "eval_loss": 1.4475480318069458, "eval_runtime": 98.498, "eval_samples_per_second": 10.152, "eval_steps_per_second": 1.269, "step": 450 }, { "epoch": 0.01, "eval_loss": 1.4383153915405273, "eval_runtime": 98.6487, "eval_samples_per_second": 10.137, "eval_steps_per_second": 1.267, "step": 460 }, { "epoch": 0.01, "eval_loss": 1.4303867816925049, "eval_runtime": 98.5564, "eval_samples_per_second": 10.146, "eval_steps_per_second": 1.268, "step": 470 }, { "epoch": 0.01, "learning_rate": 4.079320113314448e-06, "loss": 1.4106, "step": 480 }, { "epoch": 0.01, "eval_loss": 1.4213155508041382, "eval_runtime": 98.5298, "eval_samples_per_second": 10.149, "eval_steps_per_second": 1.269, "step": 480 }, { "epoch": 0.01, "eval_loss": 1.4142318964004517, "eval_runtime": 98.66, "eval_samples_per_second": 10.136, "eval_steps_per_second": 1.267, "step": 490 }, { "epoch": 0.01, "eval_loss": 1.4069148302078247, "eval_runtime": 98.6431, "eval_samples_per_second": 10.138, "eval_steps_per_second": 1.267, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.3342776203966e-06, "loss": 1.1759, "step": 510 }, { "epoch": 0.01, "eval_loss": 1.4016706943511963, "eval_runtime": 98.5817, "eval_samples_per_second": 10.144, "eval_steps_per_second": 1.268, "step": 510 }, { "epoch": 0.01, "eval_loss": 1.3924814462661743, "eval_runtime": 98.5114, "eval_samples_per_second": 10.151, "eval_steps_per_second": 1.269, "step": 520 }, { "epoch": 0.01, "eval_loss": 1.3864272832870483, "eval_runtime": 98.5314, "eval_samples_per_second": 10.149, "eval_steps_per_second": 1.269, "step": 530 }, { "epoch": 0.01, "learning_rate": 4.589235127478754e-06, "loss": 1.1242, "step": 540 }, { "epoch": 0.01, "eval_loss": 1.3822429180145264, "eval_runtime": 98.5567, "eval_samples_per_second": 10.146, "eval_steps_per_second": 1.268, "step": 540 }, { "epoch": 0.01, "eval_loss": 1.3771448135375977, "eval_runtime": 98.4784, "eval_samples_per_second": 10.155, "eval_steps_per_second": 1.269, "step": 550 }, { "epoch": 0.01, "eval_loss": 1.370827317237854, "eval_runtime": 98.6665, "eval_samples_per_second": 10.135, "eval_steps_per_second": 1.267, "step": 560 }, { "epoch": 0.01, "learning_rate": 4.844192634560906e-06, "loss": 1.2331, "step": 570 }, { "epoch": 0.01, "eval_loss": 1.364367961883545, "eval_runtime": 98.5549, "eval_samples_per_second": 10.147, "eval_steps_per_second": 1.268, "step": 570 }, { "epoch": 0.01, "eval_loss": 1.3595472574234009, "eval_runtime": 98.559, "eval_samples_per_second": 10.146, "eval_steps_per_second": 1.268, "step": 580 }, { "epoch": 0.02, "eval_loss": 1.3528627157211304, "eval_runtime": 98.5582, "eval_samples_per_second": 10.146, "eval_steps_per_second": 1.268, "step": 590 }, { "epoch": 0.02, "learning_rate": 5.0991501416430595e-06, "loss": 0.9176, "step": 600 }, { "epoch": 0.02, "eval_loss": 1.3484375476837158, "eval_runtime": 98.5428, "eval_samples_per_second": 10.148, "eval_steps_per_second": 1.268, "step": 600 }, { "epoch": 0.02, "eval_loss": 1.3425002098083496, "eval_runtime": 98.5663, "eval_samples_per_second": 10.145, "eval_steps_per_second": 1.268, "step": 610 }, { "epoch": 0.02, "eval_loss": 1.336937665939331, "eval_runtime": 98.4821, "eval_samples_per_second": 10.154, "eval_steps_per_second": 1.269, "step": 620 }, { "epoch": 0.02, "learning_rate": 5.354107648725213e-06, "loss": 1.3099, "step": 630 }, { "epoch": 0.02, "eval_loss": 1.3323618173599243, "eval_runtime": 98.4621, "eval_samples_per_second": 10.156, "eval_steps_per_second": 1.27, "step": 630 }, { "epoch": 0.02, "eval_loss": 1.325688123703003, "eval_runtime": 98.4324, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 640 }, { "epoch": 0.02, "eval_loss": 1.323840618133545, "eval_runtime": 98.414, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 650 }, { "epoch": 0.02, "learning_rate": 5.609065155807366e-06, "loss": 1.0311, "step": 660 }, { "epoch": 0.02, "eval_loss": 1.3195195198059082, "eval_runtime": 98.3495, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 660 }, { "epoch": 0.02, "eval_loss": 1.3115589618682861, "eval_runtime": 98.3811, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 670 }, { "epoch": 0.02, "eval_loss": 1.3064874410629272, "eval_runtime": 98.3079, "eval_samples_per_second": 10.172, "eval_steps_per_second": 1.272, "step": 680 }, { "epoch": 0.02, "learning_rate": 5.864022662889519e-06, "loss": 1.025, "step": 690 }, { "epoch": 0.02, "eval_loss": 1.300554633140564, "eval_runtime": 98.4079, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 690 }, { "epoch": 0.02, "eval_loss": 1.2987103462219238, "eval_runtime": 98.326, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 700 }, { "epoch": 0.02, "eval_loss": 1.2936962842941284, "eval_runtime": 98.4071, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 710 }, { "epoch": 0.02, "learning_rate": 6.118980169971671e-06, "loss": 1.1209, "step": 720 }, { "epoch": 0.02, "eval_loss": 1.28831946849823, "eval_runtime": 98.3906, "eval_samples_per_second": 10.164, "eval_steps_per_second": 1.27, "step": 720 }, { "epoch": 0.02, "eval_loss": 1.2841123342514038, "eval_runtime": 98.4338, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 730 }, { "epoch": 0.02, "eval_loss": 1.2792314291000366, "eval_runtime": 98.4023, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 740 }, { "epoch": 0.02, "learning_rate": 6.373937677053825e-06, "loss": 0.8206, "step": 750 }, { "epoch": 0.02, "eval_loss": 1.2769616842269897, "eval_runtime": 98.4211, "eval_samples_per_second": 10.16, "eval_steps_per_second": 1.27, "step": 750 }, { "epoch": 0.02, "eval_loss": 1.271959662437439, "eval_runtime": 98.3642, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 760 }, { "epoch": 0.02, "eval_loss": 1.2643251419067383, "eval_runtime": 98.3824, "eval_samples_per_second": 10.164, "eval_steps_per_second": 1.271, "step": 770 }, { "epoch": 0.02, "learning_rate": 6.628895184135977e-06, "loss": 1.2928, "step": 780 }, { "epoch": 0.02, "eval_loss": 1.2600923776626587, "eval_runtime": 98.3338, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 780 }, { "epoch": 0.02, "eval_loss": 1.2568110227584839, "eval_runtime": 98.3121, "eval_samples_per_second": 10.172, "eval_steps_per_second": 1.271, "step": 790 }, { "epoch": 0.02, "eval_loss": 1.254643201828003, "eval_runtime": 98.3589, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 800 }, { "epoch": 0.02, "learning_rate": 6.88385269121813e-06, "loss": 0.9938, "step": 810 }, { "epoch": 0.02, "eval_loss": 1.2510321140289307, "eval_runtime": 98.3386, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 810 }, { "epoch": 0.02, "eval_loss": 1.2455601692199707, "eval_runtime": 98.3748, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 820 }, { "epoch": 0.02, "eval_loss": 1.2411696910858154, "eval_runtime": 98.352, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 830 }, { "epoch": 0.02, "learning_rate": 7.1388101983002834e-06, "loss": 0.9805, "step": 840 }, { "epoch": 0.02, "eval_loss": 1.2375924587249756, "eval_runtime": 98.2714, "eval_samples_per_second": 10.176, "eval_steps_per_second": 1.272, "step": 840 }, { "epoch": 0.02, "eval_loss": 1.2358232736587524, "eval_runtime": 98.4108, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 850 }, { "epoch": 0.02, "eval_loss": 1.2307369709014893, "eval_runtime": 98.4489, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 860 }, { "epoch": 0.02, "learning_rate": 7.3937677053824365e-06, "loss": 1.077, "step": 870 }, { "epoch": 0.02, "eval_loss": 1.2273682355880737, "eval_runtime": 98.361, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 870 }, { "epoch": 0.02, "eval_loss": 1.223397970199585, "eval_runtime": 98.3541, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 880 }, { "epoch": 0.02, "eval_loss": 1.2197948694229126, "eval_runtime": 98.3807, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 890 }, { "epoch": 0.02, "learning_rate": 7.64872521246459e-06, "loss": 0.7785, "step": 900 }, { "epoch": 0.02, "eval_loss": 1.2188224792480469, "eval_runtime": 98.3148, "eval_samples_per_second": 10.171, "eval_steps_per_second": 1.271, "step": 900 }, { "epoch": 0.02, "eval_loss": 1.2149394750595093, "eval_runtime": 98.4504, "eval_samples_per_second": 10.157, "eval_steps_per_second": 1.27, "step": 910 }, { "epoch": 0.02, "eval_loss": 1.2091023921966553, "eval_runtime": 98.37, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 920 }, { "epoch": 0.02, "learning_rate": 7.903682719546743e-06, "loss": 1.1816, "step": 930 }, { "epoch": 0.02, "eval_loss": 1.2046704292297363, "eval_runtime": 98.3537, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 930 }, { "epoch": 0.02, "eval_loss": 1.2031785249710083, "eval_runtime": 98.3469, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 940 }, { "epoch": 0.02, "eval_loss": 1.2025039196014404, "eval_runtime": 98.2852, "eval_samples_per_second": 10.174, "eval_steps_per_second": 1.272, "step": 950 }, { "epoch": 0.02, "learning_rate": 8.158640226628896e-06, "loss": 0.9553, "step": 960 }, { "epoch": 0.02, "eval_loss": 1.1980048418045044, "eval_runtime": 98.4314, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 960 }, { "epoch": 0.02, "eval_loss": 1.1943976879119873, "eval_runtime": 98.3703, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 970 }, { "epoch": 0.02, "eval_loss": 1.1904170513153076, "eval_runtime": 98.4077, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 980 }, { "epoch": 0.03, "learning_rate": 8.413597733711049e-06, "loss": 0.9928, "step": 990 }, { "epoch": 0.03, "eval_loss": 1.1887998580932617, "eval_runtime": 98.3789, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 990 }, { "epoch": 0.03, "eval_loss": 1.1872926950454712, "eval_runtime": 98.3078, "eval_samples_per_second": 10.172, "eval_steps_per_second": 1.272, "step": 1000 }, { "epoch": 0.03, "eval_loss": 1.1832990646362305, "eval_runtime": 98.3915, "eval_samples_per_second": 10.163, "eval_steps_per_second": 1.27, "step": 1010 }, { "epoch": 0.03, "learning_rate": 8.6685552407932e-06, "loss": 1.0312, "step": 1020 }, { "epoch": 0.03, "eval_loss": 1.180888295173645, "eval_runtime": 98.33, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 1020 }, { "epoch": 0.03, "eval_loss": 1.178697109222412, "eval_runtime": 98.3446, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 1030 }, { "epoch": 0.03, "eval_loss": 1.176483154296875, "eval_runtime": 98.3605, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 1040 }, { "epoch": 0.03, "learning_rate": 8.923512747875353e-06, "loss": 0.7569, "step": 1050 }, { "epoch": 0.03, "eval_loss": 1.1760860681533813, "eval_runtime": 98.3804, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 1050 }, { "epoch": 0.03, "eval_loss": 1.173068881034851, "eval_runtime": 98.4165, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 1060 }, { "epoch": 0.03, "eval_loss": 1.1711184978485107, "eval_runtime": 98.4416, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 1070 }, { "epoch": 0.03, "learning_rate": 9.178470254957508e-06, "loss": 1.1469, "step": 1080 }, { "epoch": 0.03, "eval_loss": 1.1693283319473267, "eval_runtime": 98.5177, "eval_samples_per_second": 10.15, "eval_steps_per_second": 1.269, "step": 1080 }, { "epoch": 0.03, "eval_loss": 1.1686639785766602, "eval_runtime": 98.4, "eval_samples_per_second": 10.163, "eval_steps_per_second": 1.27, "step": 1090 }, { "epoch": 0.03, "eval_loss": 1.1684391498565674, "eval_runtime": 98.4275, "eval_samples_per_second": 10.16, "eval_steps_per_second": 1.27, "step": 1100 }, { "epoch": 0.03, "learning_rate": 9.43342776203966e-06, "loss": 0.897, "step": 1110 }, { "epoch": 0.03, "eval_loss": 1.1654223203659058, "eval_runtime": 98.3945, "eval_samples_per_second": 10.163, "eval_steps_per_second": 1.27, "step": 1110 }, { "epoch": 0.03, "eval_loss": 1.1617776155471802, "eval_runtime": 98.3254, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 1120 }, { "epoch": 0.03, "eval_loss": 1.1603034734725952, "eval_runtime": 98.3774, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 1130 }, { "epoch": 0.03, "learning_rate": 9.688385269121813e-06, "loss": 0.8758, "step": 1140 }, { "epoch": 0.03, "eval_loss": 1.1597446203231812, "eval_runtime": 98.3684, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 1140 }, { "epoch": 0.03, "eval_loss": 1.159226417541504, "eval_runtime": 98.3648, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 1150 }, { "epoch": 0.03, "eval_loss": 1.1565824747085571, "eval_runtime": 98.4129, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 1160 }, { "epoch": 0.03, "learning_rate": 9.943342776203968e-06, "loss": 0.999, "step": 1170 }, { "epoch": 0.03, "eval_loss": 1.1539520025253296, "eval_runtime": 98.3654, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 1170 }, { "epoch": 0.03, "eval_loss": 1.152630090713501, "eval_runtime": 98.3163, "eval_samples_per_second": 10.171, "eval_steps_per_second": 1.271, "step": 1180 }, { "epoch": 0.03, "eval_loss": 1.1519286632537842, "eval_runtime": 98.4417, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 1190 }, { "epoch": 0.03, "learning_rate": 1.0198300283286119e-05, "loss": 0.6816, "step": 1200 }, { "epoch": 0.03, "eval_loss": 1.1526896953582764, "eval_runtime": 98.4988, "eval_samples_per_second": 10.152, "eval_steps_per_second": 1.269, "step": 1200 }, { "epoch": 0.03, "eval_loss": 1.1494954824447632, "eval_runtime": 98.4536, "eval_samples_per_second": 10.157, "eval_steps_per_second": 1.27, "step": 1210 }, { "epoch": 0.03, "eval_loss": 1.1475229263305664, "eval_runtime": 98.3874, "eval_samples_per_second": 10.164, "eval_steps_per_second": 1.27, "step": 1220 }, { "epoch": 0.03, "learning_rate": 1.0453257790368272e-05, "loss": 1.1027, "step": 1230 }, { "epoch": 0.03, "eval_loss": 1.1461777687072754, "eval_runtime": 98.336, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 1230 }, { "epoch": 0.03, "eval_loss": 1.146038293838501, "eval_runtime": 98.3987, "eval_samples_per_second": 10.163, "eval_steps_per_second": 1.27, "step": 1240 }, { "epoch": 0.03, "eval_loss": 1.1465715169906616, "eval_runtime": 98.3038, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.272, "step": 1250 }, { "epoch": 0.03, "learning_rate": 1.0708215297450425e-05, "loss": 0.8607, "step": 1260 }, { "epoch": 0.03, "eval_loss": 1.1437655687332153, "eval_runtime": 98.3345, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 1260 }, { "epoch": 0.03, "eval_loss": 1.1424213647842407, "eval_runtime": 98.4474, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 1270 }, { "epoch": 0.03, "eval_loss": 1.140133261680603, "eval_runtime": 98.4388, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 1280 }, { "epoch": 0.03, "learning_rate": 1.0963172804532578e-05, "loss": 0.8388, "step": 1290 }, { "epoch": 0.03, "eval_loss": 1.140416145324707, "eval_runtime": 98.4219, "eval_samples_per_second": 10.16, "eval_steps_per_second": 1.27, "step": 1290 }, { "epoch": 0.03, "eval_loss": 1.1412097215652466, "eval_runtime": 98.3244, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 1300 }, { "epoch": 0.03, "eval_loss": 1.1374417543411255, "eval_runtime": 98.4302, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 1310 }, { "epoch": 0.03, "learning_rate": 1.1218130311614731e-05, "loss": 0.9756, "step": 1320 }, { "epoch": 0.03, "eval_loss": 1.1353052854537964, "eval_runtime": 98.3779, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 1320 }, { "epoch": 0.03, "eval_loss": 1.1347367763519287, "eval_runtime": 98.406, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 1330 }, { "epoch": 0.03, "eval_loss": 1.1346714496612549, "eval_runtime": 98.414, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 1340 }, { "epoch": 0.03, "learning_rate": 1.1473087818696883e-05, "loss": 0.6835, "step": 1350 }, { "epoch": 0.03, "eval_loss": 1.1355887651443481, "eval_runtime": 98.409, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 1350 }, { "epoch": 0.03, "eval_loss": 1.1324636936187744, "eval_runtime": 98.3679, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 1360 }, { "epoch": 0.03, "eval_loss": 1.1316462755203247, "eval_runtime": 98.3659, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 1370 }, { "epoch": 0.04, "learning_rate": 1.1728045325779038e-05, "loss": 1.1232, "step": 1380 }, { "epoch": 0.04, "eval_loss": 1.1303904056549072, "eval_runtime": 98.366, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 1380 }, { "epoch": 0.04, "eval_loss": 1.1291667222976685, "eval_runtime": 98.347, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 1390 }, { "epoch": 0.04, "eval_loss": 1.1297775506973267, "eval_runtime": 98.3753, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 1400 }, { "epoch": 0.04, "learning_rate": 1.198300283286119e-05, "loss": 0.8979, "step": 1410 }, { "epoch": 0.04, "eval_loss": 1.1276240348815918, "eval_runtime": 98.3851, "eval_samples_per_second": 10.164, "eval_steps_per_second": 1.271, "step": 1410 }, { "epoch": 0.04, "eval_loss": 1.1269475221633911, "eval_runtime": 98.4324, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 1420 }, { "epoch": 0.04, "eval_loss": 1.1256887912750244, "eval_runtime": 98.4143, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 1430 }, { "epoch": 0.04, "learning_rate": 1.2237960339943342e-05, "loss": 0.84, "step": 1440 }, { "epoch": 0.04, "eval_loss": 1.1253855228424072, "eval_runtime": 98.3652, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 1440 }, { "epoch": 0.04, "eval_loss": 1.1247769594192505, "eval_runtime": 98.3272, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 1450 }, { "epoch": 0.04, "eval_loss": 1.1240159273147583, "eval_runtime": 98.4053, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 1460 }, { "epoch": 0.04, "learning_rate": 1.2492917847025497e-05, "loss": 0.9798, "step": 1470 }, { "epoch": 0.04, "eval_loss": 1.1204980611801147, "eval_runtime": 98.3445, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 1470 }, { "epoch": 0.04, "eval_loss": 1.119938611984253, "eval_runtime": 98.3607, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 1480 }, { "epoch": 0.04, "eval_loss": 1.1203584671020508, "eval_runtime": 98.302, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.272, "step": 1490 }, { "epoch": 0.04, "learning_rate": 1.274787535410765e-05, "loss": 0.672, "step": 1500 }, { "epoch": 0.04, "eval_loss": 1.1203203201293945, "eval_runtime": 98.319, "eval_samples_per_second": 10.171, "eval_steps_per_second": 1.271, "step": 1500 }, { "epoch": 0.04, "eval_loss": 1.1188935041427612, "eval_runtime": 98.3355, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 1510 }, { "epoch": 0.04, "eval_loss": 1.1174798011779785, "eval_runtime": 98.2986, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.272, "step": 1520 }, { "epoch": 0.04, "learning_rate": 1.3002832861189801e-05, "loss": 1.0258, "step": 1530 }, { "epoch": 0.04, "eval_loss": 1.1174086332321167, "eval_runtime": 98.4015, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 1530 }, { "epoch": 0.04, "eval_loss": 1.1172817945480347, "eval_runtime": 98.4238, "eval_samples_per_second": 10.16, "eval_steps_per_second": 1.27, "step": 1540 }, { "epoch": 0.04, "eval_loss": 1.115920066833496, "eval_runtime": 98.4495, "eval_samples_per_second": 10.157, "eval_steps_per_second": 1.27, "step": 1550 }, { "epoch": 0.04, "learning_rate": 1.3257790368271954e-05, "loss": 0.8557, "step": 1560 }, { "epoch": 0.04, "eval_loss": 1.1152660846710205, "eval_runtime": 98.4694, "eval_samples_per_second": 10.155, "eval_steps_per_second": 1.269, "step": 1560 }, { "epoch": 0.04, "eval_loss": 1.1120198965072632, "eval_runtime": 98.3157, "eval_samples_per_second": 10.171, "eval_steps_per_second": 1.271, "step": 1570 }, { "epoch": 0.04, "eval_loss": 1.1117701530456543, "eval_runtime": 98.3009, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.272, "step": 1580 }, { "epoch": 0.04, "learning_rate": 1.3512747875354108e-05, "loss": 0.8459, "step": 1590 }, { "epoch": 0.04, "eval_loss": 1.112415075302124, "eval_runtime": 98.3439, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 1590 }, { "epoch": 0.04, "eval_loss": 1.1125112771987915, "eval_runtime": 98.3572, "eval_samples_per_second": 10.167, "eval_steps_per_second": 1.271, "step": 1600 }, { "epoch": 0.04, "eval_loss": 1.1110690832138062, "eval_runtime": 98.419, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 1610 }, { "epoch": 0.04, "learning_rate": 1.376770538243626e-05, "loss": 0.9507, "step": 1620 }, { "epoch": 0.04, "eval_loss": 1.110371708869934, "eval_runtime": 98.3408, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 1620 }, { "epoch": 0.04, "eval_loss": 1.1083916425704956, "eval_runtime": 98.4225, "eval_samples_per_second": 10.16, "eval_steps_per_second": 1.27, "step": 1630 }, { "epoch": 0.04, "eval_loss": 1.1089563369750977, "eval_runtime": 98.3054, "eval_samples_per_second": 10.172, "eval_steps_per_second": 1.272, "step": 1640 }, { "epoch": 0.04, "learning_rate": 1.4022662889518414e-05, "loss": 0.6854, "step": 1650 }, { "epoch": 0.04, "eval_loss": 1.110315203666687, "eval_runtime": 98.304, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.272, "step": 1650 }, { "epoch": 0.04, "eval_loss": 1.1081123352050781, "eval_runtime": 98.4452, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 1660 }, { "epoch": 0.04, "eval_loss": 1.1062242984771729, "eval_runtime": 98.3714, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 1670 }, { "epoch": 0.04, "learning_rate": 1.4277620396600567e-05, "loss": 1.0904, "step": 1680 }, { "epoch": 0.04, "eval_loss": 1.1061948537826538, "eval_runtime": 98.3346, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 1680 }, { "epoch": 0.04, "eval_loss": 1.1069517135620117, "eval_runtime": 98.3114, "eval_samples_per_second": 10.172, "eval_steps_per_second": 1.271, "step": 1690 }, { "epoch": 0.04, "eval_loss": 1.1066083908081055, "eval_runtime": 98.3387, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 1700 }, { "epoch": 0.04, "learning_rate": 1.453257790368272e-05, "loss": 0.849, "step": 1710 }, { "epoch": 0.04, "eval_loss": 1.1053024530410767, "eval_runtime": 98.4738, "eval_samples_per_second": 10.155, "eval_steps_per_second": 1.269, "step": 1710 }, { "epoch": 0.04, "eval_loss": 1.1040204763412476, "eval_runtime": 98.4347, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 1720 }, { "epoch": 0.04, "eval_loss": 1.102669596672058, "eval_runtime": 98.3742, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 1730 }, { "epoch": 0.04, "learning_rate": 1.4787535410764873e-05, "loss": 0.8601, "step": 1740 }, { "epoch": 0.04, "eval_loss": 1.1038398742675781, "eval_runtime": 98.4219, "eval_samples_per_second": 10.16, "eval_steps_per_second": 1.27, "step": 1740 }, { "epoch": 0.04, "eval_loss": 1.1060833930969238, "eval_runtime": 98.3163, "eval_samples_per_second": 10.171, "eval_steps_per_second": 1.271, "step": 1750 }, { "epoch": 0.04, "eval_loss": 1.1028786897659302, "eval_runtime": 98.4354, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 1760 }, { "epoch": 0.05, "learning_rate": 1.5042492917847024e-05, "loss": 0.9747, "step": 1770 }, { "epoch": 0.05, "eval_loss": 1.101389765739441, "eval_runtime": 98.3036, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.272, "step": 1770 }, { "epoch": 0.05, "eval_loss": 1.1008771657943726, "eval_runtime": 98.2846, "eval_samples_per_second": 10.175, "eval_steps_per_second": 1.272, "step": 1780 }, { "epoch": 0.05, "eval_loss": 1.0998055934906006, "eval_runtime": 98.366, "eval_samples_per_second": 10.166, "eval_steps_per_second": 1.271, "step": 1790 }, { "epoch": 0.05, "learning_rate": 1.529745042492918e-05, "loss": 0.656, "step": 1800 }, { "epoch": 0.05, "eval_loss": 1.1010502576828003, "eval_runtime": 98.2933, "eval_samples_per_second": 10.174, "eval_steps_per_second": 1.272, "step": 1800 }, { "epoch": 0.05, "eval_loss": 1.0997422933578491, "eval_runtime": 98.3167, "eval_samples_per_second": 10.171, "eval_steps_per_second": 1.271, "step": 1810 }, { "epoch": 0.05, "eval_loss": 1.097307562828064, "eval_runtime": 98.3299, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 1820 }, { "epoch": 0.05, "learning_rate": 1.5552407932011334e-05, "loss": 1.0969, "step": 1830 }, { "epoch": 0.05, "eval_loss": 1.0984749794006348, "eval_runtime": 98.4212, "eval_samples_per_second": 10.16, "eval_steps_per_second": 1.27, "step": 1830 }, { "epoch": 0.05, "eval_loss": 1.0964536666870117, "eval_runtime": 98.3498, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 1840 }, { "epoch": 0.05, "eval_loss": 1.0976883172988892, "eval_runtime": 98.3395, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 1850 }, { "epoch": 0.05, "learning_rate": 1.5807365439093485e-05, "loss": 0.8523, "step": 1860 }, { "epoch": 0.05, "eval_loss": 1.0951915979385376, "eval_runtime": 98.3767, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 1860 }, { "epoch": 0.05, "eval_loss": 1.0934215784072876, "eval_runtime": 98.3799, "eval_samples_per_second": 10.165, "eval_steps_per_second": 1.271, "step": 1870 }, { "epoch": 0.05, "eval_loss": 1.094098687171936, "eval_runtime": 98.3294, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 1880 }, { "epoch": 0.05, "learning_rate": 1.6062322946175637e-05, "loss": 0.8655, "step": 1890 }, { "epoch": 0.05, "eval_loss": 1.093522310256958, "eval_runtime": 98.3345, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 1890 }, { "epoch": 0.05, "eval_loss": 1.0934967994689941, "eval_runtime": 98.4151, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 1900 }, { "epoch": 0.05, "eval_loss": 1.0937992334365845, "eval_runtime": 98.3474, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 1910 }, { "epoch": 0.05, "learning_rate": 1.631728045325779e-05, "loss": 0.9431, "step": 1920 }, { "epoch": 0.05, "eval_loss": 1.0917603969573975, "eval_runtime": 98.3228, "eval_samples_per_second": 10.171, "eval_steps_per_second": 1.271, "step": 1920 }, { "epoch": 0.05, "eval_loss": 1.0905735492706299, "eval_runtime": 98.3889, "eval_samples_per_second": 10.164, "eval_steps_per_second": 1.27, "step": 1930 }, { "epoch": 0.05, "eval_loss": 1.09126877784729, "eval_runtime": 98.4202, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 1940 }, { "epoch": 0.05, "learning_rate": 1.6572237960339943e-05, "loss": 0.6818, "step": 1950 }, { "epoch": 0.05, "eval_loss": 1.0937858819961548, "eval_runtime": 98.3409, "eval_samples_per_second": 10.169, "eval_steps_per_second": 1.271, "step": 1950 }, { "epoch": 0.05, "eval_loss": 1.090471625328064, "eval_runtime": 98.327, "eval_samples_per_second": 10.17, "eval_steps_per_second": 1.271, "step": 1960 }, { "epoch": 0.05, "eval_loss": 1.0879604816436768, "eval_runtime": 98.2991, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.272, "step": 1970 }, { "epoch": 0.05, "learning_rate": 1.6827195467422098e-05, "loss": 1.0641, "step": 1980 }, { "epoch": 0.05, "eval_loss": 1.089332938194275, "eval_runtime": 98.2713, "eval_samples_per_second": 10.176, "eval_steps_per_second": 1.272, "step": 1980 }, { "epoch": 0.05, "eval_loss": 1.0885804891586304, "eval_runtime": 98.2603, "eval_samples_per_second": 10.177, "eval_steps_per_second": 1.272, "step": 1990 }, { "epoch": 0.05, "eval_loss": 1.0916638374328613, "eval_runtime": 98.3446, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.271, "step": 2000 }, { "epoch": 0.05, "learning_rate": 1.708215297450425e-05, "loss": 0.8248, "step": 2010 }, { "epoch": 0.05, "eval_loss": 1.0912878513336182, "eval_runtime": 98.3008, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.272, "step": 2010 }, { "epoch": 0.05, "eval_loss": 1.0867935419082642, "eval_runtime": 98.397, "eval_samples_per_second": 10.163, "eval_steps_per_second": 1.27, "step": 2020 }, { "epoch": 0.05, "eval_loss": 1.0863043069839478, "eval_runtime": 98.4432, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 2030 }, { "epoch": 0.05, "learning_rate": 1.73371104815864e-05, "loss": 0.8212, "step": 2040 }, { "epoch": 0.05, "eval_loss": 1.0863004922866821, "eval_runtime": 98.4867, "eval_samples_per_second": 10.154, "eval_steps_per_second": 1.269, "step": 2040 }, { "epoch": 0.05, "eval_loss": 1.0847465991973877, "eval_runtime": 98.4283, "eval_samples_per_second": 10.16, "eval_steps_per_second": 1.27, "step": 2050 }, { "epoch": 0.05, "eval_loss": 1.0844731330871582, "eval_runtime": 98.5473, "eval_samples_per_second": 10.147, "eval_steps_per_second": 1.268, "step": 2060 }, { "epoch": 0.05, "learning_rate": 1.7592067988668555e-05, "loss": 0.8866, "step": 2070 }, { "epoch": 0.05, "eval_loss": 1.0838637351989746, "eval_runtime": 98.5368, "eval_samples_per_second": 10.148, "eval_steps_per_second": 1.269, "step": 2070 }, { "epoch": 0.05, "eval_loss": 1.0842301845550537, "eval_runtime": 98.5199, "eval_samples_per_second": 10.15, "eval_steps_per_second": 1.269, "step": 2080 }, { "epoch": 0.05, "eval_loss": 1.08109712600708, "eval_runtime": 98.4904, "eval_samples_per_second": 10.153, "eval_steps_per_second": 1.269, "step": 2090 }, { "epoch": 0.05, "learning_rate": 1.7847025495750707e-05, "loss": 0.6441, "step": 2100 }, { "epoch": 0.05, "eval_loss": 1.0835895538330078, "eval_runtime": 98.5001, "eval_samples_per_second": 10.152, "eval_steps_per_second": 1.269, "step": 2100 }, { "epoch": 0.05, "eval_loss": 1.083143949508667, "eval_runtime": 98.5565, "eval_samples_per_second": 10.146, "eval_steps_per_second": 1.268, "step": 2110 }, { "epoch": 0.05, "eval_loss": 1.0816445350646973, "eval_runtime": 98.4426, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 2120 }, { "epoch": 0.05, "learning_rate": 1.8101983002832862e-05, "loss": 0.9959, "step": 2130 }, { "epoch": 0.05, "eval_loss": 1.081859827041626, "eval_runtime": 98.513, "eval_samples_per_second": 10.151, "eval_steps_per_second": 1.269, "step": 2130 }, { "epoch": 0.05, "eval_loss": 1.0832078456878662, "eval_runtime": 98.4757, "eval_samples_per_second": 10.155, "eval_steps_per_second": 1.269, "step": 2140 }, { "epoch": 0.05, "eval_loss": 1.0841658115386963, "eval_runtime": 98.4511, "eval_samples_per_second": 10.157, "eval_steps_per_second": 1.27, "step": 2150 }, { "epoch": 0.06, "learning_rate": 1.8356940509915016e-05, "loss": 0.8355, "step": 2160 }, { "epoch": 0.06, "eval_loss": 1.0814462900161743, "eval_runtime": 98.5886, "eval_samples_per_second": 10.143, "eval_steps_per_second": 1.268, "step": 2160 }, { "epoch": 0.06, "eval_loss": 1.0804240703582764, "eval_runtime": 98.5837, "eval_samples_per_second": 10.144, "eval_steps_per_second": 1.268, "step": 2170 }, { "epoch": 0.06, "eval_loss": 1.0809556245803833, "eval_runtime": 98.5132, "eval_samples_per_second": 10.151, "eval_steps_per_second": 1.269, "step": 2180 }, { "epoch": 0.06, "learning_rate": 1.8611898016997168e-05, "loss": 0.8471, "step": 2190 }, { "epoch": 0.06, "eval_loss": 1.0789848566055298, "eval_runtime": 98.4416, "eval_samples_per_second": 10.158, "eval_steps_per_second": 1.27, "step": 2190 }, { "epoch": 0.06, "eval_loss": 1.0795090198516846, "eval_runtime": 98.3873, "eval_samples_per_second": 10.164, "eval_steps_per_second": 1.27, "step": 2200 }, { "epoch": 0.06, "eval_loss": 1.0784635543823242, "eval_runtime": 98.4762, "eval_samples_per_second": 10.155, "eval_steps_per_second": 1.269, "step": 2210 }, { "epoch": 0.06, "learning_rate": 1.886685552407932e-05, "loss": 0.9504, "step": 2220 }, { "epoch": 0.06, "eval_loss": 1.0783464908599854, "eval_runtime": 98.4343, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 2220 }, { "epoch": 0.06, "eval_loss": 1.0772334337234497, "eval_runtime": 98.4304, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 2230 }, { "epoch": 0.06, "eval_loss": 1.0791562795639038, "eval_runtime": 98.426, "eval_samples_per_second": 10.16, "eval_steps_per_second": 1.27, "step": 2240 }, { "epoch": 0.06, "learning_rate": 1.9121813031161474e-05, "loss": 0.6722, "step": 2250 }, { "epoch": 0.06, "eval_loss": 1.080847144126892, "eval_runtime": 98.4125, "eval_samples_per_second": 10.161, "eval_steps_per_second": 1.27, "step": 2250 }, { "epoch": 0.06, "eval_loss": 1.0787074565887451, "eval_runtime": 98.438, "eval_samples_per_second": 10.159, "eval_steps_per_second": 1.27, "step": 2260 }, { "epoch": 0.06, "eval_loss": 1.076585054397583, "eval_runtime": 98.3926, "eval_samples_per_second": 10.163, "eval_steps_per_second": 1.27, "step": 2270 }, { "epoch": 0.06, "learning_rate": 1.9376770538243626e-05, "loss": 1.0543, "step": 2280 }, { "epoch": 0.06, "eval_loss": 1.0748603343963623, "eval_runtime": 98.4047, "eval_samples_per_second": 10.162, "eval_steps_per_second": 1.27, "step": 2280 }, { "epoch": 0.06, "eval_loss": 1.0754591226577759, "eval_runtime": 98.3994, "eval_samples_per_second": 10.163, "eval_steps_per_second": 1.27, "step": 2290 } ], "max_steps": 117642, "num_train_epochs": 3, "total_flos": 1.1765881845301248e+18, "trial_name": null, "trial_params": null }