| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9752701960065946, |
| "eval_steps": 100, |
| "global_step": 5391, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0001999998159397344, |
| "loss": 1.4113, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999926375961516, |
| "loss": 0.8357, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999834346167496, |
| "loss": 0.7122, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0001999970550493016, |
| "loss": 0.6653, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.000199995398527238, |
| "loss": 0.6152, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019999337390158218, |
| "loss": 0.5963, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019999098117978715, |
| "loss": 0.5923, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019998822037066105, |
| "loss": 0.5783, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019998509148436697, |
| "loss": 0.5529, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.000199981594532423, |
| "loss": 0.5382, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 0.5621132850646973, |
| "eval_runtime": 398.8595, |
| "eval_samples_per_second": 11.992, |
| "eval_steps_per_second": 2.999, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019997772952770216, |
| "loss": 0.5489, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019997349648443225, |
| "loss": 0.5268, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019996889541819602, |
| "loss": 0.525, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019996392634593092, |
| "loss": 0.4926, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019995858928592916, |
| "loss": 0.5181, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019995288425783754, |
| "loss": 0.5225, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019994681128265743, |
| "loss": 0.4993, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019994037038274467, |
| "loss": 0.5387, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001999335615818096, |
| "loss": 0.465, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019992638490491676, |
| "loss": 0.4788, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_loss": 0.5201168060302734, |
| "eval_runtime": 398.3332, |
| "eval_samples_per_second": 12.008, |
| "eval_steps_per_second": 3.003, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019991884037848497, |
| "loss": 0.451, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019991092803028725, |
| "loss": 0.4586, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019990264788945052, |
| "loss": 0.4894, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019989399998645568, |
| "loss": 0.4666, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019988498435313744, |
| "loss": 0.4763, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019987560102268422, |
| "loss": 0.4539, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019986585002963793, |
| "loss": 0.5133, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019985573140989405, |
| "loss": 0.4704, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019984524520070125, |
| "loss": 0.4548, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019983439144066143, |
| "loss": 0.4682, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_loss": 0.497054785490036, |
| "eval_runtime": 399.4057, |
| "eval_samples_per_second": 11.975, |
| "eval_steps_per_second": 2.994, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0001998231701697295, |
| "loss": 0.4867, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0001998115814292133, |
| "loss": 0.4486, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0001997996252617733, |
| "loss": 0.4472, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019978730171142268, |
| "loss": 0.4821, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0001997746108235269, |
| "loss": 0.4475, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019976155264480377, |
| "loss": 0.4485, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019974812722332308, |
| "loss": 0.4547, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0001997343346085066, |
| "loss": 0.4279, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019972017485112774, |
| "loss": 0.4748, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0001997056480033115, |
| "loss": 0.4778, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_loss": 0.4730743169784546, |
| "eval_runtime": 403.2392, |
| "eval_samples_per_second": 11.861, |
| "eval_steps_per_second": 2.966, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0001996907541185342, |
| "loss": 0.4393, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019967549325162324, |
| "loss": 0.4562, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019965986545875708, |
| "loss": 0.4389, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0001996438707974648, |
| "loss": 0.4282, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.000199627509326626, |
| "loss": 0.4254, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0001996107811064706, |
| "loss": 0.419, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00019959368619857872, |
| "loss": 0.4261, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0001995762246658801, |
| "loss": 0.4581, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019955839657265432, |
| "loss": 0.4333, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019954020198453018, |
| "loss": 0.4541, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_loss": 0.46874529123306274, |
| "eval_runtime": 403.7251, |
| "eval_samples_per_second": 11.847, |
| "eval_steps_per_second": 2.962, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019952164096848578, |
| "loss": 0.4482, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019950271359284795, |
| "loss": 0.4475, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019948341992729227, |
| "loss": 0.4339, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00019946376004284272, |
| "loss": 0.4527, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0001994437340118713, |
| "loss": 0.4009, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.000199423341908098, |
| "loss": 0.4496, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001994025838065903, |
| "loss": 0.4314, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001993814597837631, |
| "loss": 0.4454, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019935996991737818, |
| "loss": 0.4076, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0001993381142865442, |
| "loss": 0.4786, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_loss": 0.4500006139278412, |
| "eval_runtime": 399.7496, |
| "eval_samples_per_second": 11.965, |
| "eval_steps_per_second": 2.992, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019931589297171628, |
| "loss": 0.4386, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0001992933060546955, |
| "loss": 0.4805, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00019927035361862904, |
| "loss": 0.4135, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0001992470357480095, |
| "loss": 0.421, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019922335252867476, |
| "loss": 0.4224, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019919930404780766, |
| "loss": 0.4324, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0001991748903939355, |
| "loss": 0.4062, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019915011165692997, |
| "loss": 0.4107, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019912496792800677, |
| "loss": 0.3953, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00019909945929972502, |
| "loss": 0.3974, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_loss": 0.4456511437892914, |
| "eval_runtime": 403.5923, |
| "eval_samples_per_second": 11.851, |
| "eval_steps_per_second": 2.963, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0001990735858659873, |
| "loss": 0.4006, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.000199047347722039, |
| "loss": 0.4258, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019902074496446815, |
| "loss": 0.4083, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019899377769120487, |
| "loss": 0.4097, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019896644600152135, |
| "loss": 0.4262, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019893874999603103, |
| "loss": 0.4204, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0001989106897766887, |
| "loss": 0.4233, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0001988822654467897, |
| "loss": 0.4107, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019885347711096993, |
| "loss": 0.4013, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019882432487520506, |
| "loss": 0.4142, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_loss": 0.4460389018058777, |
| "eval_runtime": 400.2256, |
| "eval_samples_per_second": 11.951, |
| "eval_steps_per_second": 2.988, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001987948088468105, |
| "loss": 0.4324, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001987649291344408, |
| "loss": 0.3975, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019873468584808934, |
| "loss": 0.4109, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00019870407909908786, |
| "loss": 0.3987, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00019867310900010605, |
| "loss": 0.3885, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019864177566515122, |
| "loss": 0.4122, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019861007920956786, |
| "loss": 0.405, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019857801975003704, |
| "loss": 0.404, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0001985455974045763, |
| "loss": 0.382, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0001985128122925389, |
| "loss": 0.4374, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_loss": 0.44203105568885803, |
| "eval_runtime": 401.2967, |
| "eval_samples_per_second": 11.919, |
| "eval_steps_per_second": 2.98, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00019847966453461358, |
| "loss": 0.3859, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00019844615425282405, |
| "loss": 0.4187, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00019841228157052853, |
| "loss": 0.4078, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0001983780466124193, |
| "loss": 0.4218, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001983434495045223, |
| "loss": 0.4109, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00019830849037419656, |
| "loss": 0.4249, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019827316935013388, |
| "loss": 0.4073, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0001982374865623581, |
| "loss": 0.4336, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019820144214222497, |
| "loss": 0.4056, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019816503622242137, |
| "loss": 0.4008, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_loss": 0.4418930411338806, |
| "eval_runtime": 404.9805, |
| "eval_samples_per_second": 11.81, |
| "eval_steps_per_second": 2.953, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019812826893696495, |
| "loss": 0.365, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019809114042120367, |
| "loss": 0.4006, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0001980536508118152, |
| "loss": 0.3714, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019801580024680652, |
| "loss": 0.3945, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019797758886551324, |
| "loss": 0.4316, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001979390168085994, |
| "loss": 0.3958, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00019790008421805664, |
| "loss": 0.419, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00019786079123720377, |
| "loss": 0.4048, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001978211380106864, |
| "loss": 0.4002, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001977811246844761, |
| "loss": 0.3979, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_loss": 0.43328720331192017, |
| "eval_runtime": 398.734, |
| "eval_samples_per_second": 11.995, |
| "eval_steps_per_second": 2.999, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00019774075140587024, |
| "loss": 0.4047, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00019770001832349106, |
| "loss": 0.389, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00019765892558728542, |
| "loss": 0.3628, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0001976174733485242, |
| "loss": 0.3914, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00019757566175980146, |
| "loss": 0.3885, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00019753349097503437, |
| "loss": 0.4023, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0001974909611494622, |
| "loss": 0.4358, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00019744807243964597, |
| "loss": 0.3965, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00019740482500346779, |
| "loss": 0.3762, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001973612190001304, |
| "loss": 0.4108, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_loss": 0.4304308295249939, |
| "eval_runtime": 397.6793, |
| "eval_samples_per_second": 12.027, |
| "eval_steps_per_second": 3.007, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00019731725459015643, |
| "loss": 0.3838, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00019727293193538793, |
| "loss": 0.3782, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00019722825119898566, |
| "loss": 0.4034, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00019718321254542858, |
| "loss": 0.391, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001971378161405132, |
| "loss": 0.4082, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001970920621513531, |
| "loss": 0.3772, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00019704595074637805, |
| "loss": 0.3894, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00019699948209533355, |
| "loss": 0.3882, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00019695265636928032, |
| "loss": 0.3782, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001969054737405934, |
| "loss": 0.3578, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_loss": 0.42550337314605713, |
| "eval_runtime": 402.9262, |
| "eval_samples_per_second": 11.871, |
| "eval_steps_per_second": 2.968, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00019685793438296183, |
| "loss": 0.3644, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00019681003847138765, |
| "loss": 0.3702, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00019676178618218565, |
| "loss": 0.4213, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0001967131776929823, |
| "loss": 0.4126, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00019666421318271547, |
| "loss": 0.3643, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00019661489283163362, |
| "loss": 0.3854, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00019656521682129502, |
| "loss": 0.3933, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019651518533456733, |
| "loss": 0.3841, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019646479855562666, |
| "loss": 0.3677, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019641405666995715, |
| "loss": 0.3895, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_loss": 0.41961902379989624, |
| "eval_runtime": 402.2386, |
| "eval_samples_per_second": 11.891, |
| "eval_steps_per_second": 2.973, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00019636295986435003, |
| "loss": 0.3578, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00019631150832690318, |
| "loss": 0.3878, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00019625970224702025, |
| "loss": 0.3784, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00019620754181541008, |
| "loss": 0.4042, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0001961550272240859, |
| "loss": 0.361, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00019610215866636477, |
| "loss": 0.3877, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00019604893633686662, |
| "loss": 0.3822, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00019599536043151384, |
| "loss": 0.3726, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00019594143114753026, |
| "loss": 0.3552, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00019588714868344073, |
| "loss": 0.3725, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_loss": 0.4203263521194458, |
| "eval_runtime": 396.7561, |
| "eval_samples_per_second": 12.055, |
| "eval_steps_per_second": 3.014, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00019583251323907006, |
| "loss": 0.3993, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0001957775250155426, |
| "loss": 0.3557, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0001957221842152813, |
| "loss": 0.4027, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00019566649104200696, |
| "loss": 0.3547, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00019561044570073763, |
| "loss": 0.3978, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00019555404839778767, |
| "loss": 0.3919, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00019549729934076717, |
| "loss": 0.3713, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00019544019873858102, |
| "loss": 0.408, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00019538274680142834, |
| "loss": 0.3792, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00019532494374080144, |
| "loss": 0.3836, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_loss": 0.42036038637161255, |
| "eval_runtime": 402.4927, |
| "eval_samples_per_second": 11.883, |
| "eval_steps_per_second": 2.971, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00019526678976948525, |
| "loss": 0.3411, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0001952082851015565, |
| "loss": 0.3863, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00019514942995238287, |
| "loss": 0.375, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00019509022453862226, |
| "loss": 0.3747, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00019503066907822198, |
| "loss": 0.36, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00019497076379041786, |
| "loss": 0.3919, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00019491050889573357, |
| "loss": 0.3699, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00019484990461597978, |
| "loss": 0.3829, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00019478895117425323, |
| "loss": 0.3589, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00019472764879493616, |
| "loss": 0.3784, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_loss": 0.4183010458946228, |
| "eval_runtime": 404.1371, |
| "eval_samples_per_second": 11.835, |
| "eval_steps_per_second": 2.959, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00019466599770369509, |
| "loss": 0.3912, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00019460399812748041, |
| "loss": 0.374, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0001945416502945253, |
| "loss": 0.384, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00019447895443434486, |
| "loss": 0.3519, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00019441591077773554, |
| "loss": 0.3598, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0001943525195567739, |
| "loss": 0.3145, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00019428878100481606, |
| "loss": 0.3842, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0001942246953564967, |
| "loss": 0.3674, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00019416026284772825, |
| "loss": 0.3603, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00019409548371570007, |
| "loss": 0.369, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.66, |
| "eval_loss": 0.4111176133155823, |
| "eval_runtime": 402.9829, |
| "eval_samples_per_second": 11.869, |
| "eval_steps_per_second": 2.968, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00019403035819887734, |
| "loss": 0.3713, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00019396488653700055, |
| "loss": 0.3539, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00019389906897108428, |
| "loss": 0.3932, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0001938329057434165, |
| "loss": 0.3371, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00019376639709755766, |
| "loss": 0.3832, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00019369954327833972, |
| "loss": 0.3658, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00019363234453186534, |
| "loss": 0.3452, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00019356480110550687, |
| "loss": 0.3721, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00019349691324790555, |
| "loss": 0.3637, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00019342868120897054, |
| "loss": 0.3409, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_loss": 0.41198766231536865, |
| "eval_runtime": 402.9688, |
| "eval_samples_per_second": 11.869, |
| "eval_steps_per_second": 2.968, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00019336010523987796, |
| "loss": 0.3424, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00019329118559307, |
| "loss": 0.3532, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0001932219225222541, |
| "loss": 0.3591, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00019315231628240178, |
| "loss": 0.3794, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00019308236712974795, |
| "loss": 0.3825, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0001930120753217898, |
| "loss": 0.3482, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00019294144111728584, |
| "loss": 0.3643, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00019287046477625515, |
| "loss": 0.3522, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00019279914655997619, |
| "loss": 0.3723, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00019272748673098596, |
| "loss": 0.388, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.73, |
| "eval_loss": 0.41505494713783264, |
| "eval_runtime": 404.7184, |
| "eval_samples_per_second": 11.818, |
| "eval_steps_per_second": 2.955, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.000192655485553079, |
| "loss": 0.3739, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00019258314329130641, |
| "loss": 0.3625, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00019251046021197496, |
| "loss": 0.3174, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00019243743658264593, |
| "loss": 0.347, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00019236407267213433, |
| "loss": 0.3837, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00019229036875050777, |
| "loss": 0.3854, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001922163250890855, |
| "loss": 0.3449, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00019214194196043741, |
| "loss": 0.3507, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00019206721963838317, |
| "loss": 0.3786, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00019199215839799092, |
| "loss": 0.3608, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 0.4092780351638794, |
| "eval_runtime": 403.6583, |
| "eval_samples_per_second": 11.849, |
| "eval_steps_per_second": 2.963, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0001919167585155765, |
| "loss": 0.3233, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00019184102026870235, |
| "loss": 0.3679, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.0001917649439361765, |
| "loss": 0.3426, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00019168852979805162, |
| "loss": 0.3272, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00019161177813562379, |
| "loss": 0.3572, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0001915346892314316, |
| "loss": 0.3429, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0001914572633692552, |
| "loss": 0.3325, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00019137950083411505, |
| "loss": 0.3444, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.000191301401912271, |
| "loss": 0.351, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00019122296689122123, |
| "loss": 0.3171, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_loss": 0.4078885316848755, |
| "eval_runtime": 403.041, |
| "eval_samples_per_second": 11.867, |
| "eval_steps_per_second": 2.967, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0001911441960597012, |
| "loss": 0.3626, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0001910650897076824, |
| "loss": 0.3529, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00019098564812637165, |
| "loss": 0.3727, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0001909058716082097, |
| "loss": 0.3111, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0001908257604468703, |
| "loss": 0.3846, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00019074531493725906, |
| "loss": 0.3612, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.0001906645353755124, |
| "loss": 0.3573, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00019058342205899656, |
| "loss": 0.3902, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0001905019752863062, |
| "loss": 0.345, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0001904201953572637, |
| "loss": 0.3581, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_loss": 0.4118511974811554, |
| "eval_runtime": 405.9796, |
| "eval_samples_per_second": 11.781, |
| "eval_steps_per_second": 2.946, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00019033808257291768, |
| "loss": 0.3303, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00019025563723554223, |
| "loss": 0.3432, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00019017285964863554, |
| "loss": 0.3436, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0001900897501169189, |
| "loss": 0.338, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0001900063089463356, |
| "loss": 0.3611, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00018992253644404967, |
| "loss": 0.3311, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00018983843291844492, |
| "loss": 0.3337, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00018975399867912364, |
| "loss": 0.358, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0001896692340369057, |
| "loss": 0.3802, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00018958413930382704, |
| "loss": 0.3389, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_loss": 0.41193756461143494, |
| "eval_runtime": 402.9836, |
| "eval_samples_per_second": 11.869, |
| "eval_steps_per_second": 2.968, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0001894987147931389, |
| "loss": 0.3398, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00018941296081930646, |
| "loss": 0.3459, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00018932687769800767, |
| "loss": 0.3546, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00018924046574613222, |
| "loss": 0.3158, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0001891537252817802, |
| "loss": 0.3484, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00018906665662426104, |
| "loss": 0.3437, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0001889792600940924, |
| "loss": 0.3637, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00018889153601299888, |
| "loss": 0.3658, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00018880348470391077, |
| "loss": 0.3452, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.000188715106490963, |
| "loss": 0.3302, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.92, |
| "eval_loss": 0.4021553695201874, |
| "eval_runtime": 399.2274, |
| "eval_samples_per_second": 11.981, |
| "eval_steps_per_second": 2.996, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.000188626401699494, |
| "loss": 0.3511, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00018853737065604426, |
| "loss": 0.3232, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00018844801368835532, |
| "loss": 0.366, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00018835833112536857, |
| "loss": 0.3151, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.0001882683232972239, |
| "loss": 0.3534, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00018817799053525862, |
| "loss": 0.3779, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0001880873331720062, |
| "loss": 0.3338, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00018799635154119495, |
| "loss": 0.3243, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00018790504597774698, |
| "loss": 0.3729, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0001878134168177768, |
| "loss": 0.3553, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_loss": 0.4054717421531677, |
| "eval_runtime": 400.3737, |
| "eval_samples_per_second": 11.946, |
| "eval_steps_per_second": 2.987, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00018772146439859015, |
| "loss": 0.3268, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00018762918905868277, |
| "loss": 0.3304, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00018753659113773913, |
| "loss": 0.3606, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0001874436709766312, |
| "loss": 0.3421, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00018735042891741718, |
| "loss": 0.3629, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0001872568653033402, |
| "loss": 0.3636, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00018716298047882714, |
| "loss": 0.3485, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00018706877478948735, |
| "loss": 0.3169, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00018697424858211126, |
| "loss": 0.306, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0001868794022046693, |
| "loss": 0.3586, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_loss": 0.4048784375190735, |
| "eval_runtime": 400.9367, |
| "eval_samples_per_second": 11.93, |
| "eval_steps_per_second": 2.983, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00018678423600631042, |
| "loss": 0.3311, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00018668875033736094, |
| "loss": 0.3066, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00018659294554932324, |
| "loss": 0.338, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00018649682199487437, |
| "loss": 0.3205, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.0001864003800278649, |
| "loss": 0.3176, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00018630362000331753, |
| "loss": 0.2986, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00018620654227742572, |
| "loss": 0.3175, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0001861091472075526, |
| "loss": 0.3174, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0001860114351522293, |
| "loss": 0.2752, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00018591340647115402, |
| "loss": 0.3014, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_loss": 0.41257622838020325, |
| "eval_runtime": 223.7363, |
| "eval_samples_per_second": 21.378, |
| "eval_steps_per_second": 5.346, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0001858150615251905, |
| "loss": 0.3013, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00018571640067636662, |
| "loss": 0.3364, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00018561742428787324, |
| "loss": 0.3159, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00018552807605292504, |
| "loss": 0.3037, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00018542850114380946, |
| "loss": 0.3087, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00018532861175484162, |
| "loss": 0.2858, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00018522840825373492, |
| "loss": 0.3208, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00018512789100935906, |
| "loss": 0.3117, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00018502706039173856, |
| "loss": 0.2921, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.0001849259167720517, |
| "loss": 0.3354, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_loss": 0.4081941545009613, |
| "eval_runtime": 223.8258, |
| "eval_samples_per_second": 21.369, |
| "eval_steps_per_second": 5.343, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0001848244605226289, |
| "loss": 0.2866, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0001847226920169514, |
| "loss": 0.308, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00018462061162965, |
| "loss": 0.3142, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0001845182197365036, |
| "loss": 0.3125, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00018441551671443768, |
| "loss": 0.307, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00018431250294152323, |
| "loss": 0.2804, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00018420917879697507, |
| "loss": 0.304, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0001841055446611506, |
| "loss": 0.3152, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00018400160091554835, |
| "loss": 0.3095, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.0001838973479428066, |
| "loss": 0.2954, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_loss": 0.4158288836479187, |
| "eval_runtime": 223.8063, |
| "eval_samples_per_second": 21.371, |
| "eval_steps_per_second": 5.344, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00018379278612670193, |
| "loss": 0.3167, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00018368791585214784, |
| "loss": 0.2872, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00018358273750519337, |
| "loss": 0.2999, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00018347725147302158, |
| "loss": 0.3229, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00018337145814394825, |
| "loss": 0.3332, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0001832653579074203, |
| "loss": 0.2818, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00018315895115401457, |
| "loss": 0.3198, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00018305223827543604, |
| "loss": 0.3504, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.0001829452196645168, |
| "loss": 0.3392, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00018283789571521436, |
| "loss": 0.3023, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_loss": 0.40502193570137024, |
| "eval_runtime": 223.8877, |
| "eval_samples_per_second": 21.363, |
| "eval_steps_per_second": 5.342, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00018273026682261013, |
| "loss": 0.3164, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0001826223333829082, |
| "loss": 0.2985, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00018251409579343375, |
| "loss": 0.2992, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00018240555445263153, |
| "loss": 0.3174, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00018229670976006453, |
| "loss": 0.3025, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00018218756211641236, |
| "loss": 0.3297, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00018207811192346996, |
| "loss": 0.3139, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00018196835958414598, |
| "loss": 0.2956, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00018185830550246124, |
| "loss": 0.3189, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00018174795008354743, |
| "loss": 0.2896, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.17, |
| "eval_loss": 0.40529006719589233, |
| "eval_runtime": 223.8738, |
| "eval_samples_per_second": 21.365, |
| "eval_steps_per_second": 5.342, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00018163729373364554, |
| "loss": 0.3085, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.0001815263368601043, |
| "loss": 0.316, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00018141507987137873, |
| "loss": 0.326, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00018130352317702865, |
| "loss": 0.293, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00018119166718771716, |
| "loss": 0.2887, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00018107951231520911, |
| "loss": 0.3266, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00018096705897236966, |
| "loss": 0.3327, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00018085430757316256, |
| "loss": 0.2991, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018074125853264898, |
| "loss": 0.3076, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018062791226698558, |
| "loss": 0.3339, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_loss": 0.4053775668144226, |
| "eval_runtime": 223.9954, |
| "eval_samples_per_second": 21.353, |
| "eval_steps_per_second": 5.339, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018051426919342317, |
| "loss": 0.2935, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00018040032973030536, |
| "loss": 0.3497, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00018028609429706664, |
| "loss": 0.2897, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00018017156331423114, |
| "loss": 0.312, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00018005673720341086, |
| "loss": 0.303, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017994161638730432, |
| "loss": 0.3253, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017982620128969488, |
| "loss": 0.3322, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0001797104923354492, |
| "loss": 0.2988, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00017959448995051575, |
| "loss": 0.3094, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00017947819456192306, |
| "loss": 0.3118, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_loss": 0.3964312672615051, |
| "eval_runtime": 223.8688, |
| "eval_samples_per_second": 21.365, |
| "eval_steps_per_second": 5.342, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00017936160659777833, |
| "loss": 0.2958, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00017924472648726583, |
| "loss": 0.3119, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00017912755466064525, |
| "loss": 0.3096, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00017901009154925007, |
| "loss": 0.2778, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00017889233758548625, |
| "loss": 0.2953, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00017877429320283016, |
| "loss": 0.3063, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0001786559588358275, |
| "loss": 0.2945, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00017853733492009135, |
| "loss": 0.3027, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.0001784184218923007, |
| "loss": 0.2654, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.0001782992201901988, |
| "loss": 0.3289, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_loss": 0.39914268255233765, |
| "eval_runtime": 223.8716, |
| "eval_samples_per_second": 21.365, |
| "eval_steps_per_second": 5.342, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.0001781797302525916, |
| "loss": 0.2894, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00017805995251934614, |
| "loss": 0.2776, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00017793988743138877, |
| "loss": 0.288, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00017781953543070372, |
| "loss": 0.3229, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00017769889696033154, |
| "loss": 0.285, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.0001775779724643671, |
| "loss": 0.321, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.0001774567623879583, |
| "loss": 0.3222, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00017733526717730435, |
| "loss": 0.3087, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00017721348727965408, |
| "loss": 0.2904, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00017709142314330424, |
| "loss": 0.2984, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_loss": 0.40588298439979553, |
| "eval_runtime": 223.9372, |
| "eval_samples_per_second": 21.359, |
| "eval_steps_per_second": 5.341, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00017696907521759804, |
| "loss": 0.3216, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00017684644395292326, |
| "loss": 0.3019, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00017672352980071078, |
| "loss": 0.3272, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00017660033321343285, |
| "loss": 0.2892, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.0001764768546446014, |
| "loss": 0.2921, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00017635309454876636, |
| "loss": 0.3105, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00017622905338151408, |
| "loss": 0.3205, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00017610473159946556, |
| "loss": 0.2838, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00017598012966027482, |
| "loss": 0.2762, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0001758552480226271, |
| "loss": 0.3277, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_loss": 0.3979549705982208, |
| "eval_runtime": 223.8768, |
| "eval_samples_per_second": 21.364, |
| "eval_steps_per_second": 5.342, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00017573008714623746, |
| "loss": 0.2709, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00017560464749184876, |
| "loss": 0.2918, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00017547892952123005, |
| "loss": 0.3098, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00017535293369717506, |
| "loss": 0.3233, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00017522666048350023, |
| "loss": 0.31, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00017510011034504324, |
| "loss": 0.3337, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00017497328374766112, |
| "loss": 0.2845, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00017484618115822857, |
| "loss": 0.3073, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00017471880304463638, |
| "loss": 0.2977, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0001745911498757895, |
| "loss": 0.3011, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_loss": 0.4045478105545044, |
| "eval_runtime": 223.7983, |
| "eval_samples_per_second": 21.372, |
| "eval_steps_per_second": 5.344, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00017446322212160545, |
| "loss": 0.3029, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.0001743350202530126, |
| "loss": 0.3017, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00017420654474194832, |
| "loss": 0.3179, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00017407779606135732, |
| "loss": 0.2728, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00017394877468518996, |
| "loss": 0.286, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00017381948108840042, |
| "loss": 0.3009, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00017368991574694495, |
| "loss": 0.2946, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.0001735600791377802, |
| "loss": 0.3229, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00017342997173886134, |
| "loss": 0.3372, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00017329959402914046, |
| "loss": 0.3194, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_loss": 0.40152063965797424, |
| "eval_runtime": 223.8706, |
| "eval_samples_per_second": 21.365, |
| "eval_steps_per_second": 5.342, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.0001731689464885647, |
| "loss": 0.2674, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00017303802959807443, |
| "loss": 0.2558, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.0001729068438396016, |
| "loss": 0.3186, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00017277538969606793, |
| "loss": 0.3298, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.00017264366765138317, |
| "loss": 0.2991, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.00017251167819044315, |
| "loss": 0.2652, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0001723794217991282, |
| "loss": 0.3177, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00017224689896430117, |
| "loss": 0.3045, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00017211411017380594, |
| "loss": 0.3146, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00017198105591646528, |
| "loss": 0.2921, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_loss": 0.400907427072525, |
| "eval_runtime": 224.0601, |
| "eval_samples_per_second": 21.347, |
| "eval_steps_per_second": 5.338, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00017184773668207917, |
| "loss": 0.3441, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00017171415296142315, |
| "loss": 0.2876, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00017158030524624626, |
| "loss": 0.2755, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.0001714461940292695, |
| "loss": 0.3169, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00017131181980418374, |
| "loss": 0.2844, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.00017117718306564812, |
| "loss": 0.2821, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.00017104228430928805, |
| "loss": 0.3002, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.00017090712403169364, |
| "loss": 0.3232, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.00017077170273041757, |
| "loss": 0.3135, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.00017063602090397346, |
| "loss": 0.2917, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_loss": 0.4019235670566559, |
| "eval_runtime": 224.3869, |
| "eval_samples_per_second": 21.316, |
| "eval_steps_per_second": 5.33, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00017050007905183398, |
| "loss": 0.3089, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.000170363877674429, |
| "loss": 0.3063, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00017022741727314373, |
| "loss": 0.3056, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.00017009069835031694, |
| "loss": 0.3168, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.00016995372140923907, |
| "loss": 0.3062, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.00016981648695415033, |
| "loss": 0.2744, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.00016967899549023895, |
| "loss": 0.2877, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.00016954124752363922, |
| "loss": 0.3173, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.00016940324356142972, |
| "loss": 0.2954, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.00016926498411163135, |
| "loss": 0.2792, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.54, |
| "eval_loss": 0.4046282172203064, |
| "eval_runtime": 224.0328, |
| "eval_samples_per_second": 21.35, |
| "eval_steps_per_second": 5.339, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.00016912646968320552, |
| "loss": 0.3168, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.00016898770078605226, |
| "loss": 0.2854, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.00016884867793100843, |
| "loss": 0.2979, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.00016870940162984566, |
| "loss": 0.2656, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.00016856987239526863, |
| "loss": 0.3169, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.00016843009074091306, |
| "loss": 0.2872, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.00016829005718134397, |
| "loss": 0.2954, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 0.00016814977223205362, |
| "loss": 0.2885, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 0.00016800923640945974, |
| "loss": 0.2927, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.0001678684502309035, |
| "loss": 0.2886, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_loss": 0.4055146872997284, |
| "eval_runtime": 224.1142, |
| "eval_samples_per_second": 21.342, |
| "eval_steps_per_second": 5.337, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.00016772741421464772, |
| "loss": 0.3042, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.00016758612887987498, |
| "loss": 0.2858, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.00016744459474668557, |
| "loss": 0.3072, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.00016730281233609572, |
| "loss": 0.3126, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.00016716078217003557, |
| "loss": 0.2748, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.00016701850477134734, |
| "loss": 0.2873, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.00016687598066378336, |
| "loss": 0.3051, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.00016673321037200407, |
| "loss": 0.3077, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 0.0001665901944215763, |
| "loss": 0.2708, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 0.00016644693333897108, |
| "loss": 0.2947, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_loss": 0.40514013171195984, |
| "eval_runtime": 224.1094, |
| "eval_samples_per_second": 21.342, |
| "eval_steps_per_second": 5.337, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.0001663034276515619, |
| "loss": 0.306, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.00016615967788762261, |
| "loss": 0.3076, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.00016601568457632566, |
| "loss": 0.2895, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.00016587144824773992, |
| "loss": 0.2885, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.00016572696943282903, |
| "loss": 0.3046, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.00016558224866344907, |
| "loss": 0.3037, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.000165437286472347, |
| "loss": 0.28, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.00016529208339315833, |
| "loss": 0.2903, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.00016514663996040544, |
| "loss": 0.2849, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 0.00016500095670949548, |
| "loss": 0.2975, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.65, |
| "eval_loss": 0.4067119359970093, |
| "eval_runtime": 224.4914, |
| "eval_samples_per_second": 21.306, |
| "eval_steps_per_second": 5.328, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 0.00016485503417671836, |
| "loss": 0.3115, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.00016470887289924492, |
| "loss": 0.3409, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.00016456247341512485, |
| "loss": 0.2604, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.00016441583626328467, |
| "loss": 0.3226, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.00016426896198352587, |
| "loss": 0.2816, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.00016412185111652278, |
| "loss": 0.319, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.00016397450420382076, |
| "loss": 0.2831, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 0.00016382692178783402, |
| "loss": 0.2974, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 0.00016367910441184374, |
| "loss": 0.2772, |
| "step": 4590 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 0.00016353105261999605, |
| "loss": 0.3091, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.69, |
| "eval_loss": 0.3946963846683502, |
| "eval_runtime": 224.3647, |
| "eval_samples_per_second": 21.318, |
| "eval_steps_per_second": 5.331, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 0.00016338276695729994, |
| "loss": 0.286, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 0.00016323424796962544, |
| "loss": 0.2721, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.0001630854962037014, |
| "loss": 0.2983, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.00016293651220711364, |
| "loss": 0.2875, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.0001627872965283028, |
| "loss": 0.2859, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.00016263784971656247, |
| "loss": 0.2603, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.00016248817232203698, |
| "loss": 0.3231, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.00016233826489571963, |
| "loss": 0.2927, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.00016218812798945038, |
| "loss": 0.297, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.00016203776215591403, |
| "loss": 0.2908, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.72, |
| "eval_loss": 0.403292179107666, |
| "eval_runtime": 224.3888, |
| "eval_samples_per_second": 21.316, |
| "eval_steps_per_second": 5.33, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 0.0001618871679486381, |
| "loss": 0.3094, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 0.00016173634592199076, |
| "loss": 0.29, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 0.00016158529663117888, |
| "loss": 0.3115, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.00016143402063224584, |
| "loss": 0.3139, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.00016128251848206976, |
| "loss": 0.302, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.00016113079073836107, |
| "loss": 0.2777, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 0.0001609788379596608, |
| "loss": 0.2906, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 0.00016082666070533832, |
| "loss": 0.3028, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 0.0001606742595355893, |
| "loss": 0.3159, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.0001605216350114338, |
| "loss": 0.2864, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_loss": 0.40955105423927307, |
| "eval_runtime": 224.3793, |
| "eval_samples_per_second": 21.317, |
| "eval_steps_per_second": 5.33, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.00016036878769471401, |
| "loss": 0.3017, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.00016021571814809227, |
| "loss": 0.2986, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.000160062426935049, |
| "loss": 0.3077, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.00015990891461988065, |
| "loss": 0.2758, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.00015975518176769755, |
| "loss": 0.2592, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.0001596012289444219, |
| "loss": 0.3021, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.00015944705671678565, |
| "loss": 0.3047, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.00015929266565232846, |
| "loss": 0.2785, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.00015913805631939546, |
| "loss": 0.2839, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 0.00015898322928713544, |
| "loss": 0.2788, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_loss": 0.40693148970603943, |
| "eval_runtime": 224.6917, |
| "eval_samples_per_second": 21.287, |
| "eval_steps_per_second": 5.323, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 0.00015882818512549836, |
| "loss": 0.2875, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 0.00015867292440523378, |
| "loss": 0.2907, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.00015851744769788818, |
| "loss": 0.2776, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.00015836175557580324, |
| "loss": 0.2608, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.00015820584861211368, |
| "loss": 0.2945, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 0.000158049727380745, |
| "loss": 0.2771, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 0.0001578933924564115, |
| "loss": 0.2802, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 0.0001577368444146142, |
| "loss": 0.2849, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.00015758008383163854, |
| "loss": 0.3013, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.0001574231112845524, |
| "loss": 0.2942, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_loss": 0.399748831987381, |
| "eval_runtime": 224.6973, |
| "eval_samples_per_second": 21.286, |
| "eval_steps_per_second": 5.323, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 0.00015726592735120393, |
| "loss": 0.2883, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 0.0001571085326102195, |
| "loss": 0.2639, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 0.0001569509276410015, |
| "loss": 0.2624, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.00015679311302372614, |
| "loss": 0.2823, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.0001566350893393414, |
| "loss": 0.3026, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.00015647685716956494, |
| "loss": 0.2803, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 0.00015631841709688184, |
| "loss": 0.2991, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 0.00015615976970454257, |
| "loss": 0.2734, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 0.00015600091557656072, |
| "loss": 0.2674, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.000155841855297711, |
| "loss": 0.2736, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_loss": 0.40985968708992004, |
| "eval_runtime": 224.6419, |
| "eval_samples_per_second": 21.292, |
| "eval_steps_per_second": 5.324, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.0001556825894535269, |
| "loss": 0.2828, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 0.00015552311863029875, |
| "loss": 0.2872, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 0.00015536344341507129, |
| "loss": 0.2669, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 0.0001552035643956419, |
| "loss": 0.2708, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.00015504348216055798, |
| "loss": 0.3048, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.00015488319729911512, |
| "loss": 0.2964, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.00015472271040135483, |
| "loss": 0.266, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.00015456202205806234, |
| "loss": 0.2885, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.0001544011328607644, |
| "loss": 0.3317, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.00015424004340172719, |
| "loss": 0.2905, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_loss": 0.39825478196144104, |
| "eval_runtime": 224.8976, |
| "eval_samples_per_second": 21.267, |
| "eval_steps_per_second": 5.318, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.0001540787542739541, |
| "loss": 0.2663, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.00015391726607118345, |
| "loss": 0.2838, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.00015375557938788657, |
| "loss": 0.2962, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.0001535936948192653, |
| "loss": 0.2884, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.00015343161296124994, |
| "loss": 0.3026, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 0.00015326933441049714, |
| "loss": 0.3234, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 0.00015310685976438753, |
| "loss": 0.2547, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 0.00015294418962102363, |
| "loss": 0.2896, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 0.00015278132457922764, |
| "loss": 0.3173, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 0.00015261826523853926, |
| "loss": 0.2848, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.94, |
| "eval_loss": 0.4000326693058014, |
| "eval_runtime": 224.8241, |
| "eval_samples_per_second": 21.274, |
| "eval_steps_per_second": 5.32, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.00015245501219921336, |
| "loss": 0.2819, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.00015229156606221792, |
| "loss": 0.2578, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.0001521279274292317, |
| "loss": 0.2893, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.00015196409690264212, |
| "loss": 0.2897, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.000151800075085543, |
| "loss": 0.2718, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.0001516358625817323, |
| "loss": 0.2908, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.00015147145999570998, |
| "loss": 0.2694, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.0001513068679326757, |
| "loss": 0.3028, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.00015114208699852663, |
| "loss": 0.2707, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.98, |
| "step": 5391, |
| "total_flos": 1.7572375333880463e+18, |
| "train_loss": 4.6051067095080594e-05, |
| "train_runtime": 5.408, |
| "train_samples_per_second": 15680.552, |
| "train_steps_per_second": 980.035 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5300, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 10, |
| "total_flos": 1.7572375333880463e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|