GuwenLLAMA / trainer_state.json
ElShaddollRaven's picture
model upload
4fce53b
{
"best_metric": 1.0748603343963623,
"best_model_checkpoint": "/home/htr/Works/LLM_FInetuneTrying/ModelSave_NiuTrans__Classical-Modern_Chinese_Alpaca_Plus_13B_huggingface/experiments9/checkpoint-2280",
"epoch": 0.05839651830947604,
"global_step": 2290,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"eval_loss": 2.1373226642608643,
"eval_runtime": 98.6309,
"eval_samples_per_second": 10.139,
"eval_steps_per_second": 1.267,
"step": 10
},
{
"epoch": 0.0,
"eval_loss": 2.1380295753479004,
"eval_runtime": 98.364,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 2.54957507082153e-07,
"loss": 2.0494,
"step": 30
},
{
"epoch": 0.0,
"eval_loss": 2.1390674114227295,
"eval_runtime": 98.4516,
"eval_samples_per_second": 10.157,
"eval_steps_per_second": 1.27,
"step": 30
},
{
"epoch": 0.0,
"eval_loss": 2.1379237174987793,
"eval_runtime": 98.4053,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 40
},
{
"epoch": 0.0,
"eval_loss": 2.1322598457336426,
"eval_runtime": 98.3731,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 5.09915014164306e-07,
"loss": 2.1992,
"step": 60
},
{
"epoch": 0.0,
"eval_loss": 2.132437229156494,
"eval_runtime": 98.3807,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 60
},
{
"epoch": 0.0,
"eval_loss": 2.1324551105499268,
"eval_runtime": 98.3666,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 70
},
{
"epoch": 0.0,
"eval_loss": 2.1256096363067627,
"eval_runtime": 98.4457,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 80
},
{
"epoch": 0.0,
"learning_rate": 7.648725212464589e-07,
"loss": 2.0523,
"step": 90
},
{
"epoch": 0.0,
"eval_loss": 2.1243367195129395,
"eval_runtime": 98.3341,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 90
},
{
"epoch": 0.0,
"eval_loss": 2.12176513671875,
"eval_runtime": 98.302,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.272,
"step": 100
},
{
"epoch": 0.0,
"eval_loss": 2.1149940490722656,
"eval_runtime": 98.4139,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 110
},
{
"epoch": 0.0,
"learning_rate": 1.019830028328612e-06,
"loss": 2.1769,
"step": 120
},
{
"epoch": 0.0,
"eval_loss": 2.108457088470459,
"eval_runtime": 98.3955,
"eval_samples_per_second": 10.163,
"eval_steps_per_second": 1.27,
"step": 120
},
{
"epoch": 0.0,
"eval_loss": 2.104037046432495,
"eval_runtime": 98.3573,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 130
},
{
"epoch": 0.0,
"eval_loss": 2.09036922454834,
"eval_runtime": 98.362,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 140
},
{
"epoch": 0.0,
"learning_rate": 1.2747875354107649e-06,
"loss": 2.1363,
"step": 150
},
{
"epoch": 0.0,
"eval_loss": 2.0742862224578857,
"eval_runtime": 98.3549,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 150
},
{
"epoch": 0.0,
"eval_loss": 2.064608335494995,
"eval_runtime": 98.342,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 160
},
{
"epoch": 0.0,
"eval_loss": 2.05263614654541,
"eval_runtime": 98.3419,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 170
},
{
"epoch": 0.0,
"learning_rate": 1.5297450424929178e-06,
"loss": 1.9341,
"step": 180
},
{
"epoch": 0.0,
"eval_loss": 2.039722442626953,
"eval_runtime": 98.3247,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 180
},
{
"epoch": 0.0,
"eval_loss": 2.0163352489471436,
"eval_runtime": 98.3135,
"eval_samples_per_second": 10.172,
"eval_steps_per_second": 1.271,
"step": 190
},
{
"epoch": 0.01,
"eval_loss": 1.9980436563491821,
"eval_runtime": 98.3315,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 1.7847025495750709e-06,
"loss": 2.0446,
"step": 210
},
{
"epoch": 0.01,
"eval_loss": 1.9802043437957764,
"eval_runtime": 98.3457,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 210
},
{
"epoch": 0.01,
"eval_loss": 1.9563994407653809,
"eval_runtime": 98.3093,
"eval_samples_per_second": 10.172,
"eval_steps_per_second": 1.271,
"step": 220
},
{
"epoch": 0.01,
"eval_loss": 1.9318877458572388,
"eval_runtime": 98.3329,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 230
},
{
"epoch": 0.01,
"learning_rate": 2.039660056657224e-06,
"loss": 1.776,
"step": 240
},
{
"epoch": 0.01,
"eval_loss": 1.905337929725647,
"eval_runtime": 98.338,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 240
},
{
"epoch": 0.01,
"eval_loss": 1.8747742176055908,
"eval_runtime": 98.345,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 250
},
{
"epoch": 0.01,
"eval_loss": 1.857684850692749,
"eval_runtime": 98.3687,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 260
},
{
"epoch": 0.01,
"learning_rate": 2.294617563739377e-06,
"loss": 1.8543,
"step": 270
},
{
"epoch": 0.01,
"eval_loss": 1.8255928754806519,
"eval_runtime": 98.4748,
"eval_samples_per_second": 10.155,
"eval_steps_per_second": 1.269,
"step": 270
},
{
"epoch": 0.01,
"eval_loss": 1.7987793684005737,
"eval_runtime": 98.4525,
"eval_samples_per_second": 10.157,
"eval_steps_per_second": 1.27,
"step": 280
},
{
"epoch": 0.01,
"eval_loss": 1.763482689857483,
"eval_runtime": 98.4527,
"eval_samples_per_second": 10.157,
"eval_steps_per_second": 1.27,
"step": 290
},
{
"epoch": 0.01,
"learning_rate": 2.5495750708215297e-06,
"loss": 1.5595,
"step": 300
},
{
"epoch": 0.01,
"eval_loss": 1.7248739004135132,
"eval_runtime": 98.447,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 300
},
{
"epoch": 0.01,
"eval_loss": 1.7009263038635254,
"eval_runtime": 98.4453,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 310
},
{
"epoch": 0.01,
"eval_loss": 1.6721488237380981,
"eval_runtime": 98.4709,
"eval_samples_per_second": 10.155,
"eval_steps_per_second": 1.269,
"step": 320
},
{
"epoch": 0.01,
"learning_rate": 2.804532577903683e-06,
"loss": 1.6821,
"step": 330
},
{
"epoch": 0.01,
"eval_loss": 1.6415181159973145,
"eval_runtime": 98.4439,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 330
},
{
"epoch": 0.01,
"eval_loss": 1.6105990409851074,
"eval_runtime": 98.5058,
"eval_samples_per_second": 10.152,
"eval_steps_per_second": 1.269,
"step": 340
},
{
"epoch": 0.01,
"eval_loss": 1.5836091041564941,
"eval_runtime": 98.4419,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 350
},
{
"epoch": 0.01,
"learning_rate": 3.0594900849858355e-06,
"loss": 1.3598,
"step": 360
},
{
"epoch": 0.01,
"eval_loss": 1.5605424642562866,
"eval_runtime": 98.5143,
"eval_samples_per_second": 10.151,
"eval_steps_per_second": 1.269,
"step": 360
},
{
"epoch": 0.01,
"eval_loss": 1.541277289390564,
"eval_runtime": 98.4881,
"eval_samples_per_second": 10.154,
"eval_steps_per_second": 1.269,
"step": 370
},
{
"epoch": 0.01,
"eval_loss": 1.525178074836731,
"eval_runtime": 98.5121,
"eval_samples_per_second": 10.151,
"eval_steps_per_second": 1.269,
"step": 380
},
{
"epoch": 0.01,
"learning_rate": 3.3144475920679886e-06,
"loss": 1.2458,
"step": 390
},
{
"epoch": 0.01,
"eval_loss": 1.511926293373108,
"eval_runtime": 98.5003,
"eval_samples_per_second": 10.152,
"eval_steps_per_second": 1.269,
"step": 390
},
{
"epoch": 0.01,
"eval_loss": 1.4966439008712769,
"eval_runtime": 98.4955,
"eval_samples_per_second": 10.153,
"eval_steps_per_second": 1.269,
"step": 400
},
{
"epoch": 0.01,
"eval_loss": 1.48922598361969,
"eval_runtime": 98.522,
"eval_samples_per_second": 10.15,
"eval_steps_per_second": 1.269,
"step": 410
},
{
"epoch": 0.01,
"learning_rate": 3.5694050991501417e-06,
"loss": 1.3413,
"step": 420
},
{
"epoch": 0.01,
"eval_loss": 1.478908896446228,
"eval_runtime": 98.5403,
"eval_samples_per_second": 10.148,
"eval_steps_per_second": 1.269,
"step": 420
},
{
"epoch": 0.01,
"eval_loss": 1.4668803215026855,
"eval_runtime": 98.5077,
"eval_samples_per_second": 10.151,
"eval_steps_per_second": 1.269,
"step": 430
},
{
"epoch": 0.01,
"eval_loss": 1.4557193517684937,
"eval_runtime": 98.4997,
"eval_samples_per_second": 10.152,
"eval_steps_per_second": 1.269,
"step": 440
},
{
"epoch": 0.01,
"learning_rate": 3.824362606232295e-06,
"loss": 0.9985,
"step": 450
},
{
"epoch": 0.01,
"eval_loss": 1.4475480318069458,
"eval_runtime": 98.498,
"eval_samples_per_second": 10.152,
"eval_steps_per_second": 1.269,
"step": 450
},
{
"epoch": 0.01,
"eval_loss": 1.4383153915405273,
"eval_runtime": 98.6487,
"eval_samples_per_second": 10.137,
"eval_steps_per_second": 1.267,
"step": 460
},
{
"epoch": 0.01,
"eval_loss": 1.4303867816925049,
"eval_runtime": 98.5564,
"eval_samples_per_second": 10.146,
"eval_steps_per_second": 1.268,
"step": 470
},
{
"epoch": 0.01,
"learning_rate": 4.079320113314448e-06,
"loss": 1.4106,
"step": 480
},
{
"epoch": 0.01,
"eval_loss": 1.4213155508041382,
"eval_runtime": 98.5298,
"eval_samples_per_second": 10.149,
"eval_steps_per_second": 1.269,
"step": 480
},
{
"epoch": 0.01,
"eval_loss": 1.4142318964004517,
"eval_runtime": 98.66,
"eval_samples_per_second": 10.136,
"eval_steps_per_second": 1.267,
"step": 490
},
{
"epoch": 0.01,
"eval_loss": 1.4069148302078247,
"eval_runtime": 98.6431,
"eval_samples_per_second": 10.138,
"eval_steps_per_second": 1.267,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 4.3342776203966e-06,
"loss": 1.1759,
"step": 510
},
{
"epoch": 0.01,
"eval_loss": 1.4016706943511963,
"eval_runtime": 98.5817,
"eval_samples_per_second": 10.144,
"eval_steps_per_second": 1.268,
"step": 510
},
{
"epoch": 0.01,
"eval_loss": 1.3924814462661743,
"eval_runtime": 98.5114,
"eval_samples_per_second": 10.151,
"eval_steps_per_second": 1.269,
"step": 520
},
{
"epoch": 0.01,
"eval_loss": 1.3864272832870483,
"eval_runtime": 98.5314,
"eval_samples_per_second": 10.149,
"eval_steps_per_second": 1.269,
"step": 530
},
{
"epoch": 0.01,
"learning_rate": 4.589235127478754e-06,
"loss": 1.1242,
"step": 540
},
{
"epoch": 0.01,
"eval_loss": 1.3822429180145264,
"eval_runtime": 98.5567,
"eval_samples_per_second": 10.146,
"eval_steps_per_second": 1.268,
"step": 540
},
{
"epoch": 0.01,
"eval_loss": 1.3771448135375977,
"eval_runtime": 98.4784,
"eval_samples_per_second": 10.155,
"eval_steps_per_second": 1.269,
"step": 550
},
{
"epoch": 0.01,
"eval_loss": 1.370827317237854,
"eval_runtime": 98.6665,
"eval_samples_per_second": 10.135,
"eval_steps_per_second": 1.267,
"step": 560
},
{
"epoch": 0.01,
"learning_rate": 4.844192634560906e-06,
"loss": 1.2331,
"step": 570
},
{
"epoch": 0.01,
"eval_loss": 1.364367961883545,
"eval_runtime": 98.5549,
"eval_samples_per_second": 10.147,
"eval_steps_per_second": 1.268,
"step": 570
},
{
"epoch": 0.01,
"eval_loss": 1.3595472574234009,
"eval_runtime": 98.559,
"eval_samples_per_second": 10.146,
"eval_steps_per_second": 1.268,
"step": 580
},
{
"epoch": 0.02,
"eval_loss": 1.3528627157211304,
"eval_runtime": 98.5582,
"eval_samples_per_second": 10.146,
"eval_steps_per_second": 1.268,
"step": 590
},
{
"epoch": 0.02,
"learning_rate": 5.0991501416430595e-06,
"loss": 0.9176,
"step": 600
},
{
"epoch": 0.02,
"eval_loss": 1.3484375476837158,
"eval_runtime": 98.5428,
"eval_samples_per_second": 10.148,
"eval_steps_per_second": 1.268,
"step": 600
},
{
"epoch": 0.02,
"eval_loss": 1.3425002098083496,
"eval_runtime": 98.5663,
"eval_samples_per_second": 10.145,
"eval_steps_per_second": 1.268,
"step": 610
},
{
"epoch": 0.02,
"eval_loss": 1.336937665939331,
"eval_runtime": 98.4821,
"eval_samples_per_second": 10.154,
"eval_steps_per_second": 1.269,
"step": 620
},
{
"epoch": 0.02,
"learning_rate": 5.354107648725213e-06,
"loss": 1.3099,
"step": 630
},
{
"epoch": 0.02,
"eval_loss": 1.3323618173599243,
"eval_runtime": 98.4621,
"eval_samples_per_second": 10.156,
"eval_steps_per_second": 1.27,
"step": 630
},
{
"epoch": 0.02,
"eval_loss": 1.325688123703003,
"eval_runtime": 98.4324,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 640
},
{
"epoch": 0.02,
"eval_loss": 1.323840618133545,
"eval_runtime": 98.414,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 650
},
{
"epoch": 0.02,
"learning_rate": 5.609065155807366e-06,
"loss": 1.0311,
"step": 660
},
{
"epoch": 0.02,
"eval_loss": 1.3195195198059082,
"eval_runtime": 98.3495,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 660
},
{
"epoch": 0.02,
"eval_loss": 1.3115589618682861,
"eval_runtime": 98.3811,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 670
},
{
"epoch": 0.02,
"eval_loss": 1.3064874410629272,
"eval_runtime": 98.3079,
"eval_samples_per_second": 10.172,
"eval_steps_per_second": 1.272,
"step": 680
},
{
"epoch": 0.02,
"learning_rate": 5.864022662889519e-06,
"loss": 1.025,
"step": 690
},
{
"epoch": 0.02,
"eval_loss": 1.300554633140564,
"eval_runtime": 98.4079,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 690
},
{
"epoch": 0.02,
"eval_loss": 1.2987103462219238,
"eval_runtime": 98.326,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 700
},
{
"epoch": 0.02,
"eval_loss": 1.2936962842941284,
"eval_runtime": 98.4071,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 710
},
{
"epoch": 0.02,
"learning_rate": 6.118980169971671e-06,
"loss": 1.1209,
"step": 720
},
{
"epoch": 0.02,
"eval_loss": 1.28831946849823,
"eval_runtime": 98.3906,
"eval_samples_per_second": 10.164,
"eval_steps_per_second": 1.27,
"step": 720
},
{
"epoch": 0.02,
"eval_loss": 1.2841123342514038,
"eval_runtime": 98.4338,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 730
},
{
"epoch": 0.02,
"eval_loss": 1.2792314291000366,
"eval_runtime": 98.4023,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 740
},
{
"epoch": 0.02,
"learning_rate": 6.373937677053825e-06,
"loss": 0.8206,
"step": 750
},
{
"epoch": 0.02,
"eval_loss": 1.2769616842269897,
"eval_runtime": 98.4211,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 1.27,
"step": 750
},
{
"epoch": 0.02,
"eval_loss": 1.271959662437439,
"eval_runtime": 98.3642,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 760
},
{
"epoch": 0.02,
"eval_loss": 1.2643251419067383,
"eval_runtime": 98.3824,
"eval_samples_per_second": 10.164,
"eval_steps_per_second": 1.271,
"step": 770
},
{
"epoch": 0.02,
"learning_rate": 6.628895184135977e-06,
"loss": 1.2928,
"step": 780
},
{
"epoch": 0.02,
"eval_loss": 1.2600923776626587,
"eval_runtime": 98.3338,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 780
},
{
"epoch": 0.02,
"eval_loss": 1.2568110227584839,
"eval_runtime": 98.3121,
"eval_samples_per_second": 10.172,
"eval_steps_per_second": 1.271,
"step": 790
},
{
"epoch": 0.02,
"eval_loss": 1.254643201828003,
"eval_runtime": 98.3589,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 800
},
{
"epoch": 0.02,
"learning_rate": 6.88385269121813e-06,
"loss": 0.9938,
"step": 810
},
{
"epoch": 0.02,
"eval_loss": 1.2510321140289307,
"eval_runtime": 98.3386,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 810
},
{
"epoch": 0.02,
"eval_loss": 1.2455601692199707,
"eval_runtime": 98.3748,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 820
},
{
"epoch": 0.02,
"eval_loss": 1.2411696910858154,
"eval_runtime": 98.352,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 830
},
{
"epoch": 0.02,
"learning_rate": 7.1388101983002834e-06,
"loss": 0.9805,
"step": 840
},
{
"epoch": 0.02,
"eval_loss": 1.2375924587249756,
"eval_runtime": 98.2714,
"eval_samples_per_second": 10.176,
"eval_steps_per_second": 1.272,
"step": 840
},
{
"epoch": 0.02,
"eval_loss": 1.2358232736587524,
"eval_runtime": 98.4108,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 850
},
{
"epoch": 0.02,
"eval_loss": 1.2307369709014893,
"eval_runtime": 98.4489,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 860
},
{
"epoch": 0.02,
"learning_rate": 7.3937677053824365e-06,
"loss": 1.077,
"step": 870
},
{
"epoch": 0.02,
"eval_loss": 1.2273682355880737,
"eval_runtime": 98.361,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 870
},
{
"epoch": 0.02,
"eval_loss": 1.223397970199585,
"eval_runtime": 98.3541,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 880
},
{
"epoch": 0.02,
"eval_loss": 1.2197948694229126,
"eval_runtime": 98.3807,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 890
},
{
"epoch": 0.02,
"learning_rate": 7.64872521246459e-06,
"loss": 0.7785,
"step": 900
},
{
"epoch": 0.02,
"eval_loss": 1.2188224792480469,
"eval_runtime": 98.3148,
"eval_samples_per_second": 10.171,
"eval_steps_per_second": 1.271,
"step": 900
},
{
"epoch": 0.02,
"eval_loss": 1.2149394750595093,
"eval_runtime": 98.4504,
"eval_samples_per_second": 10.157,
"eval_steps_per_second": 1.27,
"step": 910
},
{
"epoch": 0.02,
"eval_loss": 1.2091023921966553,
"eval_runtime": 98.37,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 920
},
{
"epoch": 0.02,
"learning_rate": 7.903682719546743e-06,
"loss": 1.1816,
"step": 930
},
{
"epoch": 0.02,
"eval_loss": 1.2046704292297363,
"eval_runtime": 98.3537,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 930
},
{
"epoch": 0.02,
"eval_loss": 1.2031785249710083,
"eval_runtime": 98.3469,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 940
},
{
"epoch": 0.02,
"eval_loss": 1.2025039196014404,
"eval_runtime": 98.2852,
"eval_samples_per_second": 10.174,
"eval_steps_per_second": 1.272,
"step": 950
},
{
"epoch": 0.02,
"learning_rate": 8.158640226628896e-06,
"loss": 0.9553,
"step": 960
},
{
"epoch": 0.02,
"eval_loss": 1.1980048418045044,
"eval_runtime": 98.4314,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 960
},
{
"epoch": 0.02,
"eval_loss": 1.1943976879119873,
"eval_runtime": 98.3703,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 970
},
{
"epoch": 0.02,
"eval_loss": 1.1904170513153076,
"eval_runtime": 98.4077,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 980
},
{
"epoch": 0.03,
"learning_rate": 8.413597733711049e-06,
"loss": 0.9928,
"step": 990
},
{
"epoch": 0.03,
"eval_loss": 1.1887998580932617,
"eval_runtime": 98.3789,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 990
},
{
"epoch": 0.03,
"eval_loss": 1.1872926950454712,
"eval_runtime": 98.3078,
"eval_samples_per_second": 10.172,
"eval_steps_per_second": 1.272,
"step": 1000
},
{
"epoch": 0.03,
"eval_loss": 1.1832990646362305,
"eval_runtime": 98.3915,
"eval_samples_per_second": 10.163,
"eval_steps_per_second": 1.27,
"step": 1010
},
{
"epoch": 0.03,
"learning_rate": 8.6685552407932e-06,
"loss": 1.0312,
"step": 1020
},
{
"epoch": 0.03,
"eval_loss": 1.180888295173645,
"eval_runtime": 98.33,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 1020
},
{
"epoch": 0.03,
"eval_loss": 1.178697109222412,
"eval_runtime": 98.3446,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 1030
},
{
"epoch": 0.03,
"eval_loss": 1.176483154296875,
"eval_runtime": 98.3605,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 1040
},
{
"epoch": 0.03,
"learning_rate": 8.923512747875353e-06,
"loss": 0.7569,
"step": 1050
},
{
"epoch": 0.03,
"eval_loss": 1.1760860681533813,
"eval_runtime": 98.3804,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 1050
},
{
"epoch": 0.03,
"eval_loss": 1.173068881034851,
"eval_runtime": 98.4165,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 1060
},
{
"epoch": 0.03,
"eval_loss": 1.1711184978485107,
"eval_runtime": 98.4416,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 1070
},
{
"epoch": 0.03,
"learning_rate": 9.178470254957508e-06,
"loss": 1.1469,
"step": 1080
},
{
"epoch": 0.03,
"eval_loss": 1.1693283319473267,
"eval_runtime": 98.5177,
"eval_samples_per_second": 10.15,
"eval_steps_per_second": 1.269,
"step": 1080
},
{
"epoch": 0.03,
"eval_loss": 1.1686639785766602,
"eval_runtime": 98.4,
"eval_samples_per_second": 10.163,
"eval_steps_per_second": 1.27,
"step": 1090
},
{
"epoch": 0.03,
"eval_loss": 1.1684391498565674,
"eval_runtime": 98.4275,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 1.27,
"step": 1100
},
{
"epoch": 0.03,
"learning_rate": 9.43342776203966e-06,
"loss": 0.897,
"step": 1110
},
{
"epoch": 0.03,
"eval_loss": 1.1654223203659058,
"eval_runtime": 98.3945,
"eval_samples_per_second": 10.163,
"eval_steps_per_second": 1.27,
"step": 1110
},
{
"epoch": 0.03,
"eval_loss": 1.1617776155471802,
"eval_runtime": 98.3254,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 1120
},
{
"epoch": 0.03,
"eval_loss": 1.1603034734725952,
"eval_runtime": 98.3774,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 1130
},
{
"epoch": 0.03,
"learning_rate": 9.688385269121813e-06,
"loss": 0.8758,
"step": 1140
},
{
"epoch": 0.03,
"eval_loss": 1.1597446203231812,
"eval_runtime": 98.3684,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 1140
},
{
"epoch": 0.03,
"eval_loss": 1.159226417541504,
"eval_runtime": 98.3648,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 1150
},
{
"epoch": 0.03,
"eval_loss": 1.1565824747085571,
"eval_runtime": 98.4129,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 1160
},
{
"epoch": 0.03,
"learning_rate": 9.943342776203968e-06,
"loss": 0.999,
"step": 1170
},
{
"epoch": 0.03,
"eval_loss": 1.1539520025253296,
"eval_runtime": 98.3654,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 1170
},
{
"epoch": 0.03,
"eval_loss": 1.152630090713501,
"eval_runtime": 98.3163,
"eval_samples_per_second": 10.171,
"eval_steps_per_second": 1.271,
"step": 1180
},
{
"epoch": 0.03,
"eval_loss": 1.1519286632537842,
"eval_runtime": 98.4417,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 1190
},
{
"epoch": 0.03,
"learning_rate": 1.0198300283286119e-05,
"loss": 0.6816,
"step": 1200
},
{
"epoch": 0.03,
"eval_loss": 1.1526896953582764,
"eval_runtime": 98.4988,
"eval_samples_per_second": 10.152,
"eval_steps_per_second": 1.269,
"step": 1200
},
{
"epoch": 0.03,
"eval_loss": 1.1494954824447632,
"eval_runtime": 98.4536,
"eval_samples_per_second": 10.157,
"eval_steps_per_second": 1.27,
"step": 1210
},
{
"epoch": 0.03,
"eval_loss": 1.1475229263305664,
"eval_runtime": 98.3874,
"eval_samples_per_second": 10.164,
"eval_steps_per_second": 1.27,
"step": 1220
},
{
"epoch": 0.03,
"learning_rate": 1.0453257790368272e-05,
"loss": 1.1027,
"step": 1230
},
{
"epoch": 0.03,
"eval_loss": 1.1461777687072754,
"eval_runtime": 98.336,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 1230
},
{
"epoch": 0.03,
"eval_loss": 1.146038293838501,
"eval_runtime": 98.3987,
"eval_samples_per_second": 10.163,
"eval_steps_per_second": 1.27,
"step": 1240
},
{
"epoch": 0.03,
"eval_loss": 1.1465715169906616,
"eval_runtime": 98.3038,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.272,
"step": 1250
},
{
"epoch": 0.03,
"learning_rate": 1.0708215297450425e-05,
"loss": 0.8607,
"step": 1260
},
{
"epoch": 0.03,
"eval_loss": 1.1437655687332153,
"eval_runtime": 98.3345,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 1260
},
{
"epoch": 0.03,
"eval_loss": 1.1424213647842407,
"eval_runtime": 98.4474,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 1270
},
{
"epoch": 0.03,
"eval_loss": 1.140133261680603,
"eval_runtime": 98.4388,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 1280
},
{
"epoch": 0.03,
"learning_rate": 1.0963172804532578e-05,
"loss": 0.8388,
"step": 1290
},
{
"epoch": 0.03,
"eval_loss": 1.140416145324707,
"eval_runtime": 98.4219,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 1.27,
"step": 1290
},
{
"epoch": 0.03,
"eval_loss": 1.1412097215652466,
"eval_runtime": 98.3244,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 1300
},
{
"epoch": 0.03,
"eval_loss": 1.1374417543411255,
"eval_runtime": 98.4302,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 1310
},
{
"epoch": 0.03,
"learning_rate": 1.1218130311614731e-05,
"loss": 0.9756,
"step": 1320
},
{
"epoch": 0.03,
"eval_loss": 1.1353052854537964,
"eval_runtime": 98.3779,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 1320
},
{
"epoch": 0.03,
"eval_loss": 1.1347367763519287,
"eval_runtime": 98.406,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 1330
},
{
"epoch": 0.03,
"eval_loss": 1.1346714496612549,
"eval_runtime": 98.414,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 1340
},
{
"epoch": 0.03,
"learning_rate": 1.1473087818696883e-05,
"loss": 0.6835,
"step": 1350
},
{
"epoch": 0.03,
"eval_loss": 1.1355887651443481,
"eval_runtime": 98.409,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 1350
},
{
"epoch": 0.03,
"eval_loss": 1.1324636936187744,
"eval_runtime": 98.3679,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 1360
},
{
"epoch": 0.03,
"eval_loss": 1.1316462755203247,
"eval_runtime": 98.3659,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 1370
},
{
"epoch": 0.04,
"learning_rate": 1.1728045325779038e-05,
"loss": 1.1232,
"step": 1380
},
{
"epoch": 0.04,
"eval_loss": 1.1303904056549072,
"eval_runtime": 98.366,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 1380
},
{
"epoch": 0.04,
"eval_loss": 1.1291667222976685,
"eval_runtime": 98.347,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 1390
},
{
"epoch": 0.04,
"eval_loss": 1.1297775506973267,
"eval_runtime": 98.3753,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 1400
},
{
"epoch": 0.04,
"learning_rate": 1.198300283286119e-05,
"loss": 0.8979,
"step": 1410
},
{
"epoch": 0.04,
"eval_loss": 1.1276240348815918,
"eval_runtime": 98.3851,
"eval_samples_per_second": 10.164,
"eval_steps_per_second": 1.271,
"step": 1410
},
{
"epoch": 0.04,
"eval_loss": 1.1269475221633911,
"eval_runtime": 98.4324,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 1420
},
{
"epoch": 0.04,
"eval_loss": 1.1256887912750244,
"eval_runtime": 98.4143,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 1430
},
{
"epoch": 0.04,
"learning_rate": 1.2237960339943342e-05,
"loss": 0.84,
"step": 1440
},
{
"epoch": 0.04,
"eval_loss": 1.1253855228424072,
"eval_runtime": 98.3652,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 1440
},
{
"epoch": 0.04,
"eval_loss": 1.1247769594192505,
"eval_runtime": 98.3272,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 1450
},
{
"epoch": 0.04,
"eval_loss": 1.1240159273147583,
"eval_runtime": 98.4053,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 1460
},
{
"epoch": 0.04,
"learning_rate": 1.2492917847025497e-05,
"loss": 0.9798,
"step": 1470
},
{
"epoch": 0.04,
"eval_loss": 1.1204980611801147,
"eval_runtime": 98.3445,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 1470
},
{
"epoch": 0.04,
"eval_loss": 1.119938611984253,
"eval_runtime": 98.3607,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 1480
},
{
"epoch": 0.04,
"eval_loss": 1.1203584671020508,
"eval_runtime": 98.302,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.272,
"step": 1490
},
{
"epoch": 0.04,
"learning_rate": 1.274787535410765e-05,
"loss": 0.672,
"step": 1500
},
{
"epoch": 0.04,
"eval_loss": 1.1203203201293945,
"eval_runtime": 98.319,
"eval_samples_per_second": 10.171,
"eval_steps_per_second": 1.271,
"step": 1500
},
{
"epoch": 0.04,
"eval_loss": 1.1188935041427612,
"eval_runtime": 98.3355,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 1510
},
{
"epoch": 0.04,
"eval_loss": 1.1174798011779785,
"eval_runtime": 98.2986,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.272,
"step": 1520
},
{
"epoch": 0.04,
"learning_rate": 1.3002832861189801e-05,
"loss": 1.0258,
"step": 1530
},
{
"epoch": 0.04,
"eval_loss": 1.1174086332321167,
"eval_runtime": 98.4015,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 1530
},
{
"epoch": 0.04,
"eval_loss": 1.1172817945480347,
"eval_runtime": 98.4238,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 1.27,
"step": 1540
},
{
"epoch": 0.04,
"eval_loss": 1.115920066833496,
"eval_runtime": 98.4495,
"eval_samples_per_second": 10.157,
"eval_steps_per_second": 1.27,
"step": 1550
},
{
"epoch": 0.04,
"learning_rate": 1.3257790368271954e-05,
"loss": 0.8557,
"step": 1560
},
{
"epoch": 0.04,
"eval_loss": 1.1152660846710205,
"eval_runtime": 98.4694,
"eval_samples_per_second": 10.155,
"eval_steps_per_second": 1.269,
"step": 1560
},
{
"epoch": 0.04,
"eval_loss": 1.1120198965072632,
"eval_runtime": 98.3157,
"eval_samples_per_second": 10.171,
"eval_steps_per_second": 1.271,
"step": 1570
},
{
"epoch": 0.04,
"eval_loss": 1.1117701530456543,
"eval_runtime": 98.3009,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.272,
"step": 1580
},
{
"epoch": 0.04,
"learning_rate": 1.3512747875354108e-05,
"loss": 0.8459,
"step": 1590
},
{
"epoch": 0.04,
"eval_loss": 1.112415075302124,
"eval_runtime": 98.3439,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 1590
},
{
"epoch": 0.04,
"eval_loss": 1.1125112771987915,
"eval_runtime": 98.3572,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.271,
"step": 1600
},
{
"epoch": 0.04,
"eval_loss": 1.1110690832138062,
"eval_runtime": 98.419,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 1610
},
{
"epoch": 0.04,
"learning_rate": 1.376770538243626e-05,
"loss": 0.9507,
"step": 1620
},
{
"epoch": 0.04,
"eval_loss": 1.110371708869934,
"eval_runtime": 98.3408,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 1620
},
{
"epoch": 0.04,
"eval_loss": 1.1083916425704956,
"eval_runtime": 98.4225,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 1.27,
"step": 1630
},
{
"epoch": 0.04,
"eval_loss": 1.1089563369750977,
"eval_runtime": 98.3054,
"eval_samples_per_second": 10.172,
"eval_steps_per_second": 1.272,
"step": 1640
},
{
"epoch": 0.04,
"learning_rate": 1.4022662889518414e-05,
"loss": 0.6854,
"step": 1650
},
{
"epoch": 0.04,
"eval_loss": 1.110315203666687,
"eval_runtime": 98.304,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.272,
"step": 1650
},
{
"epoch": 0.04,
"eval_loss": 1.1081123352050781,
"eval_runtime": 98.4452,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 1660
},
{
"epoch": 0.04,
"eval_loss": 1.1062242984771729,
"eval_runtime": 98.3714,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 1670
},
{
"epoch": 0.04,
"learning_rate": 1.4277620396600567e-05,
"loss": 1.0904,
"step": 1680
},
{
"epoch": 0.04,
"eval_loss": 1.1061948537826538,
"eval_runtime": 98.3346,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 1680
},
{
"epoch": 0.04,
"eval_loss": 1.1069517135620117,
"eval_runtime": 98.3114,
"eval_samples_per_second": 10.172,
"eval_steps_per_second": 1.271,
"step": 1690
},
{
"epoch": 0.04,
"eval_loss": 1.1066083908081055,
"eval_runtime": 98.3387,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 1700
},
{
"epoch": 0.04,
"learning_rate": 1.453257790368272e-05,
"loss": 0.849,
"step": 1710
},
{
"epoch": 0.04,
"eval_loss": 1.1053024530410767,
"eval_runtime": 98.4738,
"eval_samples_per_second": 10.155,
"eval_steps_per_second": 1.269,
"step": 1710
},
{
"epoch": 0.04,
"eval_loss": 1.1040204763412476,
"eval_runtime": 98.4347,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 1720
},
{
"epoch": 0.04,
"eval_loss": 1.102669596672058,
"eval_runtime": 98.3742,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 1730
},
{
"epoch": 0.04,
"learning_rate": 1.4787535410764873e-05,
"loss": 0.8601,
"step": 1740
},
{
"epoch": 0.04,
"eval_loss": 1.1038398742675781,
"eval_runtime": 98.4219,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 1.27,
"step": 1740
},
{
"epoch": 0.04,
"eval_loss": 1.1060833930969238,
"eval_runtime": 98.3163,
"eval_samples_per_second": 10.171,
"eval_steps_per_second": 1.271,
"step": 1750
},
{
"epoch": 0.04,
"eval_loss": 1.1028786897659302,
"eval_runtime": 98.4354,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 1760
},
{
"epoch": 0.05,
"learning_rate": 1.5042492917847024e-05,
"loss": 0.9747,
"step": 1770
},
{
"epoch": 0.05,
"eval_loss": 1.101389765739441,
"eval_runtime": 98.3036,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.272,
"step": 1770
},
{
"epoch": 0.05,
"eval_loss": 1.1008771657943726,
"eval_runtime": 98.2846,
"eval_samples_per_second": 10.175,
"eval_steps_per_second": 1.272,
"step": 1780
},
{
"epoch": 0.05,
"eval_loss": 1.0998055934906006,
"eval_runtime": 98.366,
"eval_samples_per_second": 10.166,
"eval_steps_per_second": 1.271,
"step": 1790
},
{
"epoch": 0.05,
"learning_rate": 1.529745042492918e-05,
"loss": 0.656,
"step": 1800
},
{
"epoch": 0.05,
"eval_loss": 1.1010502576828003,
"eval_runtime": 98.2933,
"eval_samples_per_second": 10.174,
"eval_steps_per_second": 1.272,
"step": 1800
},
{
"epoch": 0.05,
"eval_loss": 1.0997422933578491,
"eval_runtime": 98.3167,
"eval_samples_per_second": 10.171,
"eval_steps_per_second": 1.271,
"step": 1810
},
{
"epoch": 0.05,
"eval_loss": 1.097307562828064,
"eval_runtime": 98.3299,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 1820
},
{
"epoch": 0.05,
"learning_rate": 1.5552407932011334e-05,
"loss": 1.0969,
"step": 1830
},
{
"epoch": 0.05,
"eval_loss": 1.0984749794006348,
"eval_runtime": 98.4212,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 1.27,
"step": 1830
},
{
"epoch": 0.05,
"eval_loss": 1.0964536666870117,
"eval_runtime": 98.3498,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 1840
},
{
"epoch": 0.05,
"eval_loss": 1.0976883172988892,
"eval_runtime": 98.3395,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 1850
},
{
"epoch": 0.05,
"learning_rate": 1.5807365439093485e-05,
"loss": 0.8523,
"step": 1860
},
{
"epoch": 0.05,
"eval_loss": 1.0951915979385376,
"eval_runtime": 98.3767,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 1860
},
{
"epoch": 0.05,
"eval_loss": 1.0934215784072876,
"eval_runtime": 98.3799,
"eval_samples_per_second": 10.165,
"eval_steps_per_second": 1.271,
"step": 1870
},
{
"epoch": 0.05,
"eval_loss": 1.094098687171936,
"eval_runtime": 98.3294,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 1880
},
{
"epoch": 0.05,
"learning_rate": 1.6062322946175637e-05,
"loss": 0.8655,
"step": 1890
},
{
"epoch": 0.05,
"eval_loss": 1.093522310256958,
"eval_runtime": 98.3345,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 1890
},
{
"epoch": 0.05,
"eval_loss": 1.0934967994689941,
"eval_runtime": 98.4151,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 1900
},
{
"epoch": 0.05,
"eval_loss": 1.0937992334365845,
"eval_runtime": 98.3474,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 1910
},
{
"epoch": 0.05,
"learning_rate": 1.631728045325779e-05,
"loss": 0.9431,
"step": 1920
},
{
"epoch": 0.05,
"eval_loss": 1.0917603969573975,
"eval_runtime": 98.3228,
"eval_samples_per_second": 10.171,
"eval_steps_per_second": 1.271,
"step": 1920
},
{
"epoch": 0.05,
"eval_loss": 1.0905735492706299,
"eval_runtime": 98.3889,
"eval_samples_per_second": 10.164,
"eval_steps_per_second": 1.27,
"step": 1930
},
{
"epoch": 0.05,
"eval_loss": 1.09126877784729,
"eval_runtime": 98.4202,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 1940
},
{
"epoch": 0.05,
"learning_rate": 1.6572237960339943e-05,
"loss": 0.6818,
"step": 1950
},
{
"epoch": 0.05,
"eval_loss": 1.0937858819961548,
"eval_runtime": 98.3409,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 1.271,
"step": 1950
},
{
"epoch": 0.05,
"eval_loss": 1.090471625328064,
"eval_runtime": 98.327,
"eval_samples_per_second": 10.17,
"eval_steps_per_second": 1.271,
"step": 1960
},
{
"epoch": 0.05,
"eval_loss": 1.0879604816436768,
"eval_runtime": 98.2991,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.272,
"step": 1970
},
{
"epoch": 0.05,
"learning_rate": 1.6827195467422098e-05,
"loss": 1.0641,
"step": 1980
},
{
"epoch": 0.05,
"eval_loss": 1.089332938194275,
"eval_runtime": 98.2713,
"eval_samples_per_second": 10.176,
"eval_steps_per_second": 1.272,
"step": 1980
},
{
"epoch": 0.05,
"eval_loss": 1.0885804891586304,
"eval_runtime": 98.2603,
"eval_samples_per_second": 10.177,
"eval_steps_per_second": 1.272,
"step": 1990
},
{
"epoch": 0.05,
"eval_loss": 1.0916638374328613,
"eval_runtime": 98.3446,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.271,
"step": 2000
},
{
"epoch": 0.05,
"learning_rate": 1.708215297450425e-05,
"loss": 0.8248,
"step": 2010
},
{
"epoch": 0.05,
"eval_loss": 1.0912878513336182,
"eval_runtime": 98.3008,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.272,
"step": 2010
},
{
"epoch": 0.05,
"eval_loss": 1.0867935419082642,
"eval_runtime": 98.397,
"eval_samples_per_second": 10.163,
"eval_steps_per_second": 1.27,
"step": 2020
},
{
"epoch": 0.05,
"eval_loss": 1.0863043069839478,
"eval_runtime": 98.4432,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 2030
},
{
"epoch": 0.05,
"learning_rate": 1.73371104815864e-05,
"loss": 0.8212,
"step": 2040
},
{
"epoch": 0.05,
"eval_loss": 1.0863004922866821,
"eval_runtime": 98.4867,
"eval_samples_per_second": 10.154,
"eval_steps_per_second": 1.269,
"step": 2040
},
{
"epoch": 0.05,
"eval_loss": 1.0847465991973877,
"eval_runtime": 98.4283,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 1.27,
"step": 2050
},
{
"epoch": 0.05,
"eval_loss": 1.0844731330871582,
"eval_runtime": 98.5473,
"eval_samples_per_second": 10.147,
"eval_steps_per_second": 1.268,
"step": 2060
},
{
"epoch": 0.05,
"learning_rate": 1.7592067988668555e-05,
"loss": 0.8866,
"step": 2070
},
{
"epoch": 0.05,
"eval_loss": 1.0838637351989746,
"eval_runtime": 98.5368,
"eval_samples_per_second": 10.148,
"eval_steps_per_second": 1.269,
"step": 2070
},
{
"epoch": 0.05,
"eval_loss": 1.0842301845550537,
"eval_runtime": 98.5199,
"eval_samples_per_second": 10.15,
"eval_steps_per_second": 1.269,
"step": 2080
},
{
"epoch": 0.05,
"eval_loss": 1.08109712600708,
"eval_runtime": 98.4904,
"eval_samples_per_second": 10.153,
"eval_steps_per_second": 1.269,
"step": 2090
},
{
"epoch": 0.05,
"learning_rate": 1.7847025495750707e-05,
"loss": 0.6441,
"step": 2100
},
{
"epoch": 0.05,
"eval_loss": 1.0835895538330078,
"eval_runtime": 98.5001,
"eval_samples_per_second": 10.152,
"eval_steps_per_second": 1.269,
"step": 2100
},
{
"epoch": 0.05,
"eval_loss": 1.083143949508667,
"eval_runtime": 98.5565,
"eval_samples_per_second": 10.146,
"eval_steps_per_second": 1.268,
"step": 2110
},
{
"epoch": 0.05,
"eval_loss": 1.0816445350646973,
"eval_runtime": 98.4426,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 2120
},
{
"epoch": 0.05,
"learning_rate": 1.8101983002832862e-05,
"loss": 0.9959,
"step": 2130
},
{
"epoch": 0.05,
"eval_loss": 1.081859827041626,
"eval_runtime": 98.513,
"eval_samples_per_second": 10.151,
"eval_steps_per_second": 1.269,
"step": 2130
},
{
"epoch": 0.05,
"eval_loss": 1.0832078456878662,
"eval_runtime": 98.4757,
"eval_samples_per_second": 10.155,
"eval_steps_per_second": 1.269,
"step": 2140
},
{
"epoch": 0.05,
"eval_loss": 1.0841658115386963,
"eval_runtime": 98.4511,
"eval_samples_per_second": 10.157,
"eval_steps_per_second": 1.27,
"step": 2150
},
{
"epoch": 0.06,
"learning_rate": 1.8356940509915016e-05,
"loss": 0.8355,
"step": 2160
},
{
"epoch": 0.06,
"eval_loss": 1.0814462900161743,
"eval_runtime": 98.5886,
"eval_samples_per_second": 10.143,
"eval_steps_per_second": 1.268,
"step": 2160
},
{
"epoch": 0.06,
"eval_loss": 1.0804240703582764,
"eval_runtime": 98.5837,
"eval_samples_per_second": 10.144,
"eval_steps_per_second": 1.268,
"step": 2170
},
{
"epoch": 0.06,
"eval_loss": 1.0809556245803833,
"eval_runtime": 98.5132,
"eval_samples_per_second": 10.151,
"eval_steps_per_second": 1.269,
"step": 2180
},
{
"epoch": 0.06,
"learning_rate": 1.8611898016997168e-05,
"loss": 0.8471,
"step": 2190
},
{
"epoch": 0.06,
"eval_loss": 1.0789848566055298,
"eval_runtime": 98.4416,
"eval_samples_per_second": 10.158,
"eval_steps_per_second": 1.27,
"step": 2190
},
{
"epoch": 0.06,
"eval_loss": 1.0795090198516846,
"eval_runtime": 98.3873,
"eval_samples_per_second": 10.164,
"eval_steps_per_second": 1.27,
"step": 2200
},
{
"epoch": 0.06,
"eval_loss": 1.0784635543823242,
"eval_runtime": 98.4762,
"eval_samples_per_second": 10.155,
"eval_steps_per_second": 1.269,
"step": 2210
},
{
"epoch": 0.06,
"learning_rate": 1.886685552407932e-05,
"loss": 0.9504,
"step": 2220
},
{
"epoch": 0.06,
"eval_loss": 1.0783464908599854,
"eval_runtime": 98.4343,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 2220
},
{
"epoch": 0.06,
"eval_loss": 1.0772334337234497,
"eval_runtime": 98.4304,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 2230
},
{
"epoch": 0.06,
"eval_loss": 1.0791562795639038,
"eval_runtime": 98.426,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 1.27,
"step": 2240
},
{
"epoch": 0.06,
"learning_rate": 1.9121813031161474e-05,
"loss": 0.6722,
"step": 2250
},
{
"epoch": 0.06,
"eval_loss": 1.080847144126892,
"eval_runtime": 98.4125,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 1.27,
"step": 2250
},
{
"epoch": 0.06,
"eval_loss": 1.0787074565887451,
"eval_runtime": 98.438,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 1.27,
"step": 2260
},
{
"epoch": 0.06,
"eval_loss": 1.076585054397583,
"eval_runtime": 98.3926,
"eval_samples_per_second": 10.163,
"eval_steps_per_second": 1.27,
"step": 2270
},
{
"epoch": 0.06,
"learning_rate": 1.9376770538243626e-05,
"loss": 1.0543,
"step": 2280
},
{
"epoch": 0.06,
"eval_loss": 1.0748603343963623,
"eval_runtime": 98.4047,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 1.27,
"step": 2280
},
{
"epoch": 0.06,
"eval_loss": 1.0754591226577759,
"eval_runtime": 98.3994,
"eval_samples_per_second": 10.163,
"eval_steps_per_second": 1.27,
"step": 2290
}
],
"max_steps": 117642,
"num_train_epochs": 3,
"total_flos": 1.1765881845301248e+18,
"trial_name": null,
"trial_params": null
}