diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,11773 @@ +{ + "best_metric": 0.7183188796043396, + "best_model_checkpoint": "./test_ast\\checkpoint-1260", + "epoch": 15.0, + "eval_steps": 5, + "global_step": 3915, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "learning_rate": 0.00019984674329501915, + "loss": 3.2249, + "step": 5 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.09770114942528736, + "eval_loss": 3.1166629791259766, + "eval_runtime": 430.1734, + "eval_samples_per_second": 0.404, + "eval_steps_per_second": 0.051, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019964240102171137, + "loss": 3.3654, + "step": 10 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.16666666666666666, + "eval_loss": 3.1070964336395264, + "eval_runtime": 220.5651, + "eval_samples_per_second": 0.789, + "eval_steps_per_second": 0.1, + "step": 10 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019938697318007664, + "loss": 2.952, + "step": 15 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.28160919540229884, + "eval_loss": 2.3668224811553955, + "eval_runtime": 235.0668, + "eval_samples_per_second": 0.74, + "eval_steps_per_second": 0.094, + "step": 15 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001991315453384419, + "loss": 2.6551, + "step": 20 + }, + { + "epoch": 0.08, + "eval_accuracy": 0.22988505747126436, + "eval_loss": 3.0940752029418945, + "eval_runtime": 263.2242, + "eval_samples_per_second": 0.661, + "eval_steps_per_second": 0.084, + "step": 20 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019887611749680716, + "loss": 3.2285, + "step": 25 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.13793103448275862, + "eval_loss": 2.4251976013183594, + "eval_runtime": 240.2214, + "eval_samples_per_second": 0.724, + "eval_steps_per_second": 0.092, + "step": 25 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019862068965517243, + "loss": 2.4251, + "step": 30 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.15517241379310345, + "eval_loss": 2.3162126541137695, + "eval_runtime": 271.4124, + "eval_samples_per_second": 0.641, + "eval_steps_per_second": 0.081, + "step": 30 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001983652618135377, + "loss": 2.2216, + "step": 35 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.2413793103448276, + "eval_loss": 2.2742679119110107, + "eval_runtime": 292.5369, + "eval_samples_per_second": 0.595, + "eval_steps_per_second": 0.075, + "step": 35 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019810983397190295, + "loss": 1.982, + "step": 40 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.25862068965517243, + "eval_loss": 2.558276653289795, + "eval_runtime": 282.7639, + "eval_samples_per_second": 0.615, + "eval_steps_per_second": 0.078, + "step": 40 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001978544061302682, + "loss": 2.2904, + "step": 45 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.1896551724137931, + "eval_loss": 2.2774674892425537, + "eval_runtime": 272.0899, + "eval_samples_per_second": 0.639, + "eval_steps_per_second": 0.081, + "step": 45 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019759897828863348, + "loss": 2.2136, + "step": 50 + }, + { + "epoch": 0.19, + "eval_accuracy": 0.1724137931034483, + "eval_loss": 2.264404535293579, + "eval_runtime": 274.6774, + "eval_samples_per_second": 0.633, + "eval_steps_per_second": 0.08, + "step": 50 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019734355044699872, + "loss": 1.9795, + "step": 55 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.27586206896551724, + "eval_loss": 2.131664752960205, + "eval_runtime": 289.0641, + "eval_samples_per_second": 0.602, + "eval_steps_per_second": 0.076, + "step": 55 + }, + { + "epoch": 0.23, + "learning_rate": 0.000197088122605364, + "loss": 2.1172, + "step": 60 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.2988505747126437, + "eval_loss": 2.00719952583313, + "eval_runtime": 237.5063, + "eval_samples_per_second": 0.733, + "eval_steps_per_second": 0.093, + "step": 60 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019683269476372924, + "loss": 2.1899, + "step": 65 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.19540229885057472, + "eval_loss": 1.9535428285598755, + "eval_runtime": 127.0996, + "eval_samples_per_second": 1.369, + "eval_steps_per_second": 0.173, + "step": 65 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019657726692209453, + "loss": 2.1116, + "step": 70 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.22988505747126436, + "eval_loss": 1.9114965200424194, + "eval_runtime": 123.0583, + "eval_samples_per_second": 1.414, + "eval_steps_per_second": 0.179, + "step": 70 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019632183908045977, + "loss": 2.0904, + "step": 75 + }, + { + "epoch": 0.29, + "eval_accuracy": 0.3160919540229885, + "eval_loss": 1.9307175874710083, + "eval_runtime": 110.3002, + "eval_samples_per_second": 1.578, + "eval_steps_per_second": 0.199, + "step": 75 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019606641123882503, + "loss": 1.9124, + "step": 80 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.3390804597701149, + "eval_loss": 1.8102548122406006, + "eval_runtime": 111.5475, + "eval_samples_per_second": 1.56, + "eval_steps_per_second": 0.197, + "step": 80 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001958109833971903, + "loss": 1.6039, + "step": 85 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.21839080459770116, + "eval_loss": 1.9069994688034058, + "eval_runtime": 109.692, + "eval_samples_per_second": 1.586, + "eval_steps_per_second": 0.201, + "step": 85 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019555555555555556, + "loss": 1.5797, + "step": 90 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.3620689655172414, + "eval_loss": 1.955863118171692, + "eval_runtime": 110.8668, + "eval_samples_per_second": 1.569, + "eval_steps_per_second": 0.198, + "step": 90 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019530012771392082, + "loss": 1.9217, + "step": 95 + }, + { + "epoch": 0.36, + "eval_accuracy": 0.28735632183908044, + "eval_loss": 1.7302300930023193, + "eval_runtime": 109.9193, + "eval_samples_per_second": 1.583, + "eval_steps_per_second": 0.2, + "step": 95 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019504469987228609, + "loss": 2.1192, + "step": 100 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.28735632183908044, + "eval_loss": 2.121290445327759, + "eval_runtime": 114.0902, + "eval_samples_per_second": 1.525, + "eval_steps_per_second": 0.193, + "step": 100 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019478927203065135, + "loss": 1.991, + "step": 105 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.25287356321839083, + "eval_loss": 1.8870444297790527, + "eval_runtime": 111.5449, + "eval_samples_per_second": 1.56, + "eval_steps_per_second": 0.197, + "step": 105 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001945338441890166, + "loss": 1.9855, + "step": 110 + }, + { + "epoch": 0.42, + "eval_accuracy": 0.3448275862068966, + "eval_loss": 1.853091835975647, + "eval_runtime": 108.3618, + "eval_samples_per_second": 1.606, + "eval_steps_per_second": 0.203, + "step": 110 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019427841634738188, + "loss": 1.6668, + "step": 115 + }, + { + "epoch": 0.44, + "eval_accuracy": 0.3218390804597701, + "eval_loss": 2.0078558921813965, + "eval_runtime": 90.0246, + "eval_samples_per_second": 1.933, + "eval_steps_per_second": 0.244, + "step": 115 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019402298850574714, + "loss": 1.4628, + "step": 120 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.3793103448275862, + "eval_loss": 1.891345500946045, + "eval_runtime": 90.081, + "eval_samples_per_second": 1.932, + "eval_steps_per_second": 0.244, + "step": 120 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001937675606641124, + "loss": 1.8827, + "step": 125 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.2988505747126437, + "eval_loss": 1.7698206901550293, + "eval_runtime": 86.9617, + "eval_samples_per_second": 2.001, + "eval_steps_per_second": 0.253, + "step": 125 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019351213282247767, + "loss": 1.9941, + "step": 130 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.29310344827586204, + "eval_loss": 1.7076359987258911, + "eval_runtime": 91.6164, + "eval_samples_per_second": 1.899, + "eval_steps_per_second": 0.24, + "step": 130 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019325670498084293, + "loss": 1.844, + "step": 135 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.29310344827586204, + "eval_loss": 1.722959280014038, + "eval_runtime": 87.2052, + "eval_samples_per_second": 1.995, + "eval_steps_per_second": 0.252, + "step": 135 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001930012771392082, + "loss": 1.5423, + "step": 140 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.3793103448275862, + "eval_loss": 1.6389808654785156, + "eval_runtime": 87.1239, + "eval_samples_per_second": 1.997, + "eval_steps_per_second": 0.253, + "step": 140 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019274584929757346, + "loss": 1.9086, + "step": 145 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.3620689655172414, + "eval_loss": 1.720744013786316, + "eval_runtime": 85.5891, + "eval_samples_per_second": 2.033, + "eval_steps_per_second": 0.257, + "step": 145 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001924904214559387, + "loss": 1.572, + "step": 150 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.3160919540229885, + "eval_loss": 1.8218252658843994, + "eval_runtime": 88.7779, + "eval_samples_per_second": 1.96, + "eval_steps_per_second": 0.248, + "step": 150 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019223499361430398, + "loss": 1.8335, + "step": 155 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.39080459770114945, + "eval_loss": 1.6242988109588623, + "eval_runtime": 87.9658, + "eval_samples_per_second": 1.978, + "eval_steps_per_second": 0.25, + "step": 155 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019197956577266922, + "loss": 1.5903, + "step": 160 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.3850574712643678, + "eval_loss": 1.634774088859558, + "eval_runtime": 87.315, + "eval_samples_per_second": 1.993, + "eval_steps_per_second": 0.252, + "step": 160 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001917241379310345, + "loss": 1.7064, + "step": 165 + }, + { + "epoch": 0.63, + "eval_accuracy": 0.42528735632183906, + "eval_loss": 1.5469759702682495, + "eval_runtime": 87.4736, + "eval_samples_per_second": 1.989, + "eval_steps_per_second": 0.252, + "step": 165 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019146871008939975, + "loss": 1.3325, + "step": 170 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.43103448275862066, + "eval_loss": 1.5237237215042114, + "eval_runtime": 88.9581, + "eval_samples_per_second": 1.956, + "eval_steps_per_second": 0.247, + "step": 170 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019121328224776504, + "loss": 1.5154, + "step": 175 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.42528735632183906, + "eval_loss": 1.571236491203308, + "eval_runtime": 84.6969, + "eval_samples_per_second": 2.054, + "eval_steps_per_second": 0.26, + "step": 175 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019095785440613027, + "loss": 1.7564, + "step": 180 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.2988505747126437, + "eval_loss": 1.5861365795135498, + "eval_runtime": 89.2791, + "eval_samples_per_second": 1.949, + "eval_steps_per_second": 0.246, + "step": 180 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019070242656449554, + "loss": 1.3903, + "step": 185 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.25862068965517243, + "eval_loss": 1.8145408630371094, + "eval_runtime": 85.8558, + "eval_samples_per_second": 2.027, + "eval_steps_per_second": 0.256, + "step": 185 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001904469987228608, + "loss": 2.0853, + "step": 190 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.367816091954023, + "eval_loss": 1.5079203844070435, + "eval_runtime": 89.4633, + "eval_samples_per_second": 1.945, + "eval_steps_per_second": 0.246, + "step": 190 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019024265644955303, + "loss": 1.5312, + "step": 195 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.4482758620689655, + "eval_loss": 1.4408893585205078, + "eval_runtime": 87.6476, + "eval_samples_per_second": 1.985, + "eval_steps_per_second": 0.251, + "step": 195 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018998722860791826, + "loss": 1.2828, + "step": 200 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.39655172413793105, + "eval_loss": 1.600140929222107, + "eval_runtime": 86.6087, + "eval_samples_per_second": 2.009, + "eval_steps_per_second": 0.254, + "step": 200 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018973180076628355, + "loss": 1.9389, + "step": 205 + }, + { + "epoch": 0.79, + "eval_accuracy": 0.367816091954023, + "eval_loss": 1.7927314043045044, + "eval_runtime": 90.3234, + "eval_samples_per_second": 1.926, + "eval_steps_per_second": 0.244, + "step": 205 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001894763729246488, + "loss": 1.5486, + "step": 210 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.39080459770114945, + "eval_loss": 1.5749437808990479, + "eval_runtime": 86.7654, + "eval_samples_per_second": 2.005, + "eval_steps_per_second": 0.254, + "step": 210 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018922094508301408, + "loss": 1.4306, + "step": 215 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.28160919540229884, + "eval_loss": 1.7231699228286743, + "eval_runtime": 87.5623, + "eval_samples_per_second": 1.987, + "eval_steps_per_second": 0.251, + "step": 215 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018896551724137932, + "loss": 1.814, + "step": 220 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.25862068965517243, + "eval_loss": 1.7349094152450562, + "eval_runtime": 87.3298, + "eval_samples_per_second": 1.992, + "eval_steps_per_second": 0.252, + "step": 220 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018871008939974458, + "loss": 1.7483, + "step": 225 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.4482758620689655, + "eval_loss": 1.606767177581787, + "eval_runtime": 88.6573, + "eval_samples_per_second": 1.963, + "eval_steps_per_second": 0.248, + "step": 225 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018845466155810984, + "loss": 1.836, + "step": 230 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.43103448275862066, + "eval_loss": 1.6200461387634277, + "eval_runtime": 86.663, + "eval_samples_per_second": 2.008, + "eval_steps_per_second": 0.254, + "step": 230 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001881992337164751, + "loss": 1.8752, + "step": 235 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.41954022988505746, + "eval_loss": 1.4891613721847534, + "eval_runtime": 87.2585, + "eval_samples_per_second": 1.994, + "eval_steps_per_second": 0.252, + "step": 235 + }, + { + "epoch": 0.92, + "learning_rate": 0.00018794380587484037, + "loss": 1.3274, + "step": 240 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.4482758620689655, + "eval_loss": 1.5101828575134277, + "eval_runtime": 89.5236, + "eval_samples_per_second": 1.944, + "eval_steps_per_second": 0.246, + "step": 240 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018768837803320563, + "loss": 1.1109, + "step": 245 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.45977011494252873, + "eval_loss": 1.6440746784210205, + "eval_runtime": 86.2143, + "eval_samples_per_second": 2.018, + "eval_steps_per_second": 0.255, + "step": 245 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001874329501915709, + "loss": 1.137, + "step": 250 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.4885057471264368, + "eval_loss": 1.6819708347320557, + "eval_runtime": 86.701, + "eval_samples_per_second": 2.007, + "eval_steps_per_second": 0.254, + "step": 250 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018717752234993616, + "loss": 2.029, + "step": 255 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.4425287356321839, + "eval_loss": 1.5568251609802246, + "eval_runtime": 89.066, + "eval_samples_per_second": 1.954, + "eval_steps_per_second": 0.247, + "step": 255 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001869220945083014, + "loss": 1.3499, + "step": 260 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.47126436781609193, + "eval_loss": 1.5453182458877563, + "eval_runtime": 88.7892, + "eval_samples_per_second": 1.96, + "eval_steps_per_second": 0.248, + "step": 260 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001866666666666667, + "loss": 1.6062, + "step": 265 + }, + { + "epoch": 1.02, + "eval_accuracy": 0.3850574712643678, + "eval_loss": 1.7614227533340454, + "eval_runtime": 88.9814, + "eval_samples_per_second": 1.955, + "eval_steps_per_second": 0.247, + "step": 265 + }, + { + "epoch": 1.03, + "learning_rate": 0.00018641123882503192, + "loss": 1.2653, + "step": 270 + }, + { + "epoch": 1.03, + "eval_accuracy": 0.40804597701149425, + "eval_loss": 1.680598497390747, + "eval_runtime": 89.0981, + "eval_samples_per_second": 1.953, + "eval_steps_per_second": 0.247, + "step": 270 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001861558109833972, + "loss": 1.5162, + "step": 275 + }, + { + "epoch": 1.05, + "eval_accuracy": 0.3160919540229885, + "eval_loss": 2.1192500591278076, + "eval_runtime": 87.4979, + "eval_samples_per_second": 1.989, + "eval_steps_per_second": 0.251, + "step": 275 + }, + { + "epoch": 1.07, + "learning_rate": 0.00018590038314176245, + "loss": 1.8098, + "step": 280 + }, + { + "epoch": 1.07, + "eval_accuracy": 0.4540229885057471, + "eval_loss": 1.5041106939315796, + "eval_runtime": 86.6499, + "eval_samples_per_second": 2.008, + "eval_steps_per_second": 0.254, + "step": 280 + }, + { + "epoch": 1.09, + "learning_rate": 0.00018564495530012774, + "loss": 1.5888, + "step": 285 + }, + { + "epoch": 1.09, + "eval_accuracy": 0.3735632183908046, + "eval_loss": 1.6979694366455078, + "eval_runtime": 84.5366, + "eval_samples_per_second": 2.058, + "eval_steps_per_second": 0.26, + "step": 285 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018538952745849298, + "loss": 1.57, + "step": 290 + }, + { + "epoch": 1.11, + "eval_accuracy": 0.3850574712643678, + "eval_loss": 1.5114161968231201, + "eval_runtime": 84.8072, + "eval_samples_per_second": 2.052, + "eval_steps_per_second": 0.259, + "step": 290 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018513409961685824, + "loss": 1.3931, + "step": 295 + }, + { + "epoch": 1.13, + "eval_accuracy": 0.40804597701149425, + "eval_loss": 1.5041882991790771, + "eval_runtime": 84.3882, + "eval_samples_per_second": 2.062, + "eval_steps_per_second": 0.261, + "step": 295 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001848786717752235, + "loss": 1.494, + "step": 300 + }, + { + "epoch": 1.15, + "eval_accuracy": 0.4425287356321839, + "eval_loss": 1.4944647550582886, + "eval_runtime": 87.1429, + "eval_samples_per_second": 1.997, + "eval_steps_per_second": 0.252, + "step": 300 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018462324393358877, + "loss": 1.2355, + "step": 305 + }, + { + "epoch": 1.17, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.4152653217315674, + "eval_runtime": 84.3818, + "eval_samples_per_second": 2.062, + "eval_steps_per_second": 0.261, + "step": 305 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018436781609195403, + "loss": 1.9234, + "step": 310 + }, + { + "epoch": 1.19, + "eval_accuracy": 0.46551724137931033, + "eval_loss": 1.3996832370758057, + "eval_runtime": 87.1832, + "eval_samples_per_second": 1.996, + "eval_steps_per_second": 0.252, + "step": 310 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001841123882503193, + "loss": 1.2396, + "step": 315 + }, + { + "epoch": 1.21, + "eval_accuracy": 0.46551724137931033, + "eval_loss": 1.3299652338027954, + "eval_runtime": 84.5376, + "eval_samples_per_second": 2.058, + "eval_steps_per_second": 0.26, + "step": 315 + }, + { + "epoch": 1.23, + "learning_rate": 0.00018385696040868456, + "loss": 1.8784, + "step": 320 + }, + { + "epoch": 1.23, + "eval_accuracy": 0.4425287356321839, + "eval_loss": 1.5061637163162231, + "eval_runtime": 84.8318, + "eval_samples_per_second": 2.051, + "eval_steps_per_second": 0.259, + "step": 320 + }, + { + "epoch": 1.25, + "learning_rate": 0.00018360153256704982, + "loss": 1.2335, + "step": 325 + }, + { + "epoch": 1.25, + "eval_accuracy": 0.46551724137931033, + "eval_loss": 1.3658419847488403, + "eval_runtime": 86.8661, + "eval_samples_per_second": 2.003, + "eval_steps_per_second": 0.253, + "step": 325 + }, + { + "epoch": 1.26, + "learning_rate": 0.00018334610472541506, + "loss": 1.2988, + "step": 330 + }, + { + "epoch": 1.26, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.4404170513153076, + "eval_runtime": 84.7349, + "eval_samples_per_second": 2.053, + "eval_steps_per_second": 0.26, + "step": 330 + }, + { + "epoch": 1.28, + "learning_rate": 0.00018309067688378035, + "loss": 1.6458, + "step": 335 + }, + { + "epoch": 1.28, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.389655590057373, + "eval_runtime": 86.6616, + "eval_samples_per_second": 2.008, + "eval_steps_per_second": 0.254, + "step": 335 + }, + { + "epoch": 1.3, + "learning_rate": 0.00018283524904214558, + "loss": 1.4325, + "step": 340 + }, + { + "epoch": 1.3, + "eval_accuracy": 0.3390804597701149, + "eval_loss": 1.943527102470398, + "eval_runtime": 84.9859, + "eval_samples_per_second": 2.047, + "eval_steps_per_second": 0.259, + "step": 340 + }, + { + "epoch": 1.32, + "learning_rate": 0.00018257982120051087, + "loss": 1.8258, + "step": 345 + }, + { + "epoch": 1.32, + "eval_accuracy": 0.41379310344827586, + "eval_loss": 1.674710988998413, + "eval_runtime": 85.9104, + "eval_samples_per_second": 2.025, + "eval_steps_per_second": 0.256, + "step": 345 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001823243933588761, + "loss": 1.6398, + "step": 350 + }, + { + "epoch": 1.34, + "eval_accuracy": 0.43103448275862066, + "eval_loss": 1.537279486656189, + "eval_runtime": 87.0005, + "eval_samples_per_second": 2.0, + "eval_steps_per_second": 0.253, + "step": 350 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001820689655172414, + "loss": 1.3836, + "step": 355 + }, + { + "epoch": 1.36, + "eval_accuracy": 0.4540229885057471, + "eval_loss": 1.530836820602417, + "eval_runtime": 85.6209, + "eval_samples_per_second": 2.032, + "eval_steps_per_second": 0.257, + "step": 355 + }, + { + "epoch": 1.38, + "learning_rate": 0.00018181353767560664, + "loss": 1.1067, + "step": 360 + }, + { + "epoch": 1.38, + "eval_accuracy": 0.45977011494252873, + "eval_loss": 1.5031547546386719, + "eval_runtime": 86.5596, + "eval_samples_per_second": 2.01, + "eval_steps_per_second": 0.254, + "step": 360 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001815581098339719, + "loss": 1.4948, + "step": 365 + }, + { + "epoch": 1.4, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.4820023775100708, + "eval_runtime": 87.8688, + "eval_samples_per_second": 1.98, + "eval_steps_per_second": 0.25, + "step": 365 + }, + { + "epoch": 1.42, + "learning_rate": 0.00018130268199233716, + "loss": 1.3582, + "step": 370 + }, + { + "epoch": 1.42, + "eval_accuracy": 0.41954022988505746, + "eval_loss": 1.455491304397583, + "eval_runtime": 85.3532, + "eval_samples_per_second": 2.039, + "eval_steps_per_second": 0.258, + "step": 370 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018104725415070243, + "loss": 1.2616, + "step": 375 + }, + { + "epoch": 1.44, + "eval_accuracy": 0.46551724137931033, + "eval_loss": 1.3622076511383057, + "eval_runtime": 85.6174, + "eval_samples_per_second": 2.032, + "eval_steps_per_second": 0.257, + "step": 375 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001807918263090677, + "loss": 1.4582, + "step": 380 + }, + { + "epoch": 1.46, + "eval_accuracy": 0.4942528735632184, + "eval_loss": 1.2709373235702515, + "eval_runtime": 87.9037, + "eval_samples_per_second": 1.979, + "eval_steps_per_second": 0.25, + "step": 380 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018053639846743295, + "loss": 1.7958, + "step": 385 + }, + { + "epoch": 1.48, + "eval_accuracy": 0.3620689655172414, + "eval_loss": 1.5655514001846313, + "eval_runtime": 89.7184, + "eval_samples_per_second": 1.939, + "eval_steps_per_second": 0.245, + "step": 385 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018028097062579822, + "loss": 1.4743, + "step": 390 + }, + { + "epoch": 1.49, + "eval_accuracy": 0.4367816091954023, + "eval_loss": 1.3905311822891235, + "eval_runtime": 87.4724, + "eval_samples_per_second": 1.989, + "eval_steps_per_second": 0.252, + "step": 390 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018002554278416348, + "loss": 1.3111, + "step": 395 + }, + { + "epoch": 1.51, + "eval_accuracy": 0.5287356321839081, + "eval_loss": 1.3618022203445435, + "eval_runtime": 85.8256, + "eval_samples_per_second": 2.027, + "eval_steps_per_second": 0.256, + "step": 395 + }, + { + "epoch": 1.53, + "learning_rate": 0.00017977011494252874, + "loss": 1.1186, + "step": 400 + }, + { + "epoch": 1.53, + "eval_accuracy": 0.5, + "eval_loss": 1.4678940773010254, + "eval_runtime": 87.4839, + "eval_samples_per_second": 1.989, + "eval_steps_per_second": 0.251, + "step": 400 + }, + { + "epoch": 1.55, + "learning_rate": 0.000179514687100894, + "loss": 1.3566, + "step": 405 + }, + { + "epoch": 1.55, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.6265980005264282, + "eval_runtime": 85.8882, + "eval_samples_per_second": 2.026, + "eval_steps_per_second": 0.256, + "step": 405 + }, + { + "epoch": 1.57, + "learning_rate": 0.00017925925925925927, + "loss": 1.4949, + "step": 410 + }, + { + "epoch": 1.57, + "eval_accuracy": 0.5057471264367817, + "eval_loss": 1.4489529132843018, + "eval_runtime": 84.8544, + "eval_samples_per_second": 2.051, + "eval_steps_per_second": 0.259, + "step": 410 + }, + { + "epoch": 1.59, + "learning_rate": 0.00017900383141762453, + "loss": 1.2182, + "step": 415 + }, + { + "epoch": 1.59, + "eval_accuracy": 0.5459770114942529, + "eval_loss": 1.152848720550537, + "eval_runtime": 86.7262, + "eval_samples_per_second": 2.006, + "eval_steps_per_second": 0.254, + "step": 415 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001787484035759898, + "loss": 1.1455, + "step": 420 + }, + { + "epoch": 1.61, + "eval_accuracy": 0.4540229885057471, + "eval_loss": 1.2889221906661987, + "eval_runtime": 89.054, + "eval_samples_per_second": 1.954, + "eval_steps_per_second": 0.247, + "step": 420 + }, + { + "epoch": 1.63, + "learning_rate": 0.00017849297573435506, + "loss": 1.0795, + "step": 425 + }, + { + "epoch": 1.63, + "eval_accuracy": 0.4827586206896552, + "eval_loss": 1.4589430093765259, + "eval_runtime": 85.7656, + "eval_samples_per_second": 2.029, + "eval_steps_per_second": 0.257, + "step": 425 + }, + { + "epoch": 1.65, + "learning_rate": 0.00017823754789272032, + "loss": 1.2771, + "step": 430 + }, + { + "epoch": 1.65, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.5259735584259033, + "eval_runtime": 85.8919, + "eval_samples_per_second": 2.026, + "eval_steps_per_second": 0.256, + "step": 430 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001779821200510856, + "loss": 1.2858, + "step": 435 + }, + { + "epoch": 1.67, + "eval_accuracy": 0.4885057471264368, + "eval_loss": 1.4051034450531006, + "eval_runtime": 86.1444, + "eval_samples_per_second": 2.02, + "eval_steps_per_second": 0.255, + "step": 435 + }, + { + "epoch": 1.69, + "learning_rate": 0.00017772669220945085, + "loss": 1.0829, + "step": 440 + }, + { + "epoch": 1.69, + "eval_accuracy": 0.5344827586206896, + "eval_loss": 1.3036625385284424, + "eval_runtime": 85.2368, + "eval_samples_per_second": 2.041, + "eval_steps_per_second": 0.258, + "step": 440 + }, + { + "epoch": 1.7, + "learning_rate": 0.00017747126436781609, + "loss": 1.5866, + "step": 445 + }, + { + "epoch": 1.7, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.328389286994934, + "eval_runtime": 85.533, + "eval_samples_per_second": 2.034, + "eval_steps_per_second": 0.257, + "step": 445 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017721583652618138, + "loss": 1.2964, + "step": 450 + }, + { + "epoch": 1.72, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.2732243537902832, + "eval_runtime": 89.1901, + "eval_samples_per_second": 1.951, + "eval_steps_per_second": 0.247, + "step": 450 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001769604086845466, + "loss": 1.1894, + "step": 455 + }, + { + "epoch": 1.74, + "eval_accuracy": 0.42528735632183906, + "eval_loss": 1.4987748861312866, + "eval_runtime": 85.0991, + "eval_samples_per_second": 2.045, + "eval_steps_per_second": 0.259, + "step": 455 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001767049808429119, + "loss": 1.5003, + "step": 460 + }, + { + "epoch": 1.76, + "eval_accuracy": 0.46551724137931033, + "eval_loss": 1.329574704170227, + "eval_runtime": 86.0734, + "eval_samples_per_second": 2.022, + "eval_steps_per_second": 0.256, + "step": 460 + }, + { + "epoch": 1.78, + "learning_rate": 0.00017644955300127714, + "loss": 1.1569, + "step": 465 + }, + { + "epoch": 1.78, + "eval_accuracy": 0.5, + "eval_loss": 1.428106665611267, + "eval_runtime": 83.952, + "eval_samples_per_second": 2.073, + "eval_steps_per_second": 0.262, + "step": 465 + }, + { + "epoch": 1.8, + "learning_rate": 0.00017619412515964243, + "loss": 1.0751, + "step": 470 + }, + { + "epoch": 1.8, + "eval_accuracy": 0.47126436781609193, + "eval_loss": 1.5787663459777832, + "eval_runtime": 85.6942, + "eval_samples_per_second": 2.03, + "eval_steps_per_second": 0.257, + "step": 470 + }, + { + "epoch": 1.82, + "learning_rate": 0.00017593869731800767, + "loss": 1.592, + "step": 475 + }, + { + "epoch": 1.82, + "eval_accuracy": 0.5, + "eval_loss": 1.2109367847442627, + "eval_runtime": 84.8219, + "eval_samples_per_second": 2.051, + "eval_steps_per_second": 0.259, + "step": 475 + }, + { + "epoch": 1.84, + "learning_rate": 0.00017568326947637293, + "loss": 1.4279, + "step": 480 + }, + { + "epoch": 1.84, + "eval_accuracy": 0.5287356321839081, + "eval_loss": 1.207238793373108, + "eval_runtime": 86.384, + "eval_samples_per_second": 2.014, + "eval_steps_per_second": 0.255, + "step": 480 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001754278416347382, + "loss": 1.19, + "step": 485 + }, + { + "epoch": 1.86, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.276541829109192, + "eval_runtime": 85.9845, + "eval_samples_per_second": 2.024, + "eval_steps_per_second": 0.256, + "step": 485 + }, + { + "epoch": 1.88, + "learning_rate": 0.00017517241379310346, + "loss": 1.3807, + "step": 490 + }, + { + "epoch": 1.88, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.3951900005340576, + "eval_runtime": 86.4759, + "eval_samples_per_second": 2.012, + "eval_steps_per_second": 0.254, + "step": 490 + }, + { + "epoch": 1.9, + "learning_rate": 0.00017491698595146872, + "loss": 1.2857, + "step": 495 + }, + { + "epoch": 1.9, + "eval_accuracy": 0.5057471264367817, + "eval_loss": 1.2476894855499268, + "eval_runtime": 85.1521, + "eval_samples_per_second": 2.043, + "eval_steps_per_second": 0.258, + "step": 495 + }, + { + "epoch": 1.92, + "learning_rate": 0.00017466155810983398, + "loss": 1.229, + "step": 500 + }, + { + "epoch": 1.92, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.2090439796447754, + "eval_runtime": 86.6818, + "eval_samples_per_second": 2.007, + "eval_steps_per_second": 0.254, + "step": 500 + }, + { + "epoch": 1.93, + "learning_rate": 0.00017440613026819925, + "loss": 0.9704, + "step": 505 + }, + { + "epoch": 1.93, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.0011608600616455, + "eval_runtime": 87.3092, + "eval_samples_per_second": 1.993, + "eval_steps_per_second": 0.252, + "step": 505 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001741507024265645, + "loss": 1.2077, + "step": 510 + }, + { + "epoch": 1.95, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.0754402875900269, + "eval_runtime": 85.1447, + "eval_samples_per_second": 2.044, + "eval_steps_per_second": 0.258, + "step": 510 + }, + { + "epoch": 1.97, + "learning_rate": 0.00017389527458492975, + "loss": 1.31, + "step": 515 + }, + { + "epoch": 1.97, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.1699658632278442, + "eval_runtime": 85.8978, + "eval_samples_per_second": 2.026, + "eval_steps_per_second": 0.256, + "step": 515 + }, + { + "epoch": 1.99, + "learning_rate": 0.00017363984674329504, + "loss": 0.9374, + "step": 520 + }, + { + "epoch": 1.99, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.1662834882736206, + "eval_runtime": 84.761, + "eval_samples_per_second": 2.053, + "eval_steps_per_second": 0.26, + "step": 520 + }, + { + "epoch": 2.01, + "learning_rate": 0.00017338441890166027, + "loss": 1.3027, + "step": 525 + }, + { + "epoch": 2.01, + "eval_accuracy": 0.5057471264367817, + "eval_loss": 1.2674177885055542, + "eval_runtime": 87.1265, + "eval_samples_per_second": 1.997, + "eval_steps_per_second": 0.253, + "step": 525 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017312899106002556, + "loss": 1.2744, + "step": 530 + }, + { + "epoch": 2.03, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.1397805213928223, + "eval_runtime": 87.5192, + "eval_samples_per_second": 1.988, + "eval_steps_per_second": 0.251, + "step": 530 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001728735632183908, + "loss": 0.6539, + "step": 535 + }, + { + "epoch": 2.05, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.3558400869369507, + "eval_runtime": 87.7373, + "eval_samples_per_second": 1.983, + "eval_steps_per_second": 0.251, + "step": 535 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001726181353767561, + "loss": 1.3282, + "step": 540 + }, + { + "epoch": 2.07, + "eval_accuracy": 0.4885057471264368, + "eval_loss": 1.671147346496582, + "eval_runtime": 85.9997, + "eval_samples_per_second": 2.023, + "eval_steps_per_second": 0.256, + "step": 540 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017236270753512133, + "loss": 1.7389, + "step": 545 + }, + { + "epoch": 2.09, + "eval_accuracy": 0.4885057471264368, + "eval_loss": 1.4171918630599976, + "eval_runtime": 86.1498, + "eval_samples_per_second": 2.02, + "eval_steps_per_second": 0.255, + "step": 545 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001721072796934866, + "loss": 0.8713, + "step": 550 + }, + { + "epoch": 2.11, + "eval_accuracy": 0.45977011494252873, + "eval_loss": 1.4530651569366455, + "eval_runtime": 84.7588, + "eval_samples_per_second": 2.053, + "eval_steps_per_second": 0.26, + "step": 550 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017185185185185185, + "loss": 1.3864, + "step": 555 + }, + { + "epoch": 2.13, + "eval_accuracy": 0.41954022988505746, + "eval_loss": 1.4029399156570435, + "eval_runtime": 86.0806, + "eval_samples_per_second": 2.021, + "eval_steps_per_second": 0.256, + "step": 555 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017159642401021712, + "loss": 1.3272, + "step": 560 + }, + { + "epoch": 2.15, + "eval_accuracy": 0.4942528735632184, + "eval_loss": 1.2591314315795898, + "eval_runtime": 85.0541, + "eval_samples_per_second": 2.046, + "eval_steps_per_second": 0.259, + "step": 560 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017134099616858238, + "loss": 1.4224, + "step": 565 + }, + { + "epoch": 2.16, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.4082285165786743, + "eval_runtime": 86.6165, + "eval_samples_per_second": 2.009, + "eval_steps_per_second": 0.254, + "step": 565 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017108556832694764, + "loss": 1.2348, + "step": 570 + }, + { + "epoch": 2.18, + "eval_accuracy": 0.4942528735632184, + "eval_loss": 1.2709393501281738, + "eval_runtime": 86.2781, + "eval_samples_per_second": 2.017, + "eval_steps_per_second": 0.255, + "step": 570 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001708301404853129, + "loss": 1.4141, + "step": 575 + }, + { + "epoch": 2.2, + "eval_accuracy": 0.5057471264367817, + "eval_loss": 1.338424801826477, + "eval_runtime": 83.8318, + "eval_samples_per_second": 2.076, + "eval_steps_per_second": 0.262, + "step": 575 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017057471264367817, + "loss": 1.3264, + "step": 580 + }, + { + "epoch": 2.22, + "eval_accuracy": 0.5287356321839081, + "eval_loss": 1.2666399478912354, + "eval_runtime": 85.5732, + "eval_samples_per_second": 2.033, + "eval_steps_per_second": 0.257, + "step": 580 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017031928480204343, + "loss": 1.1512, + "step": 585 + }, + { + "epoch": 2.24, + "eval_accuracy": 0.5172413793103449, + "eval_loss": 1.2081302404403687, + "eval_runtime": 86.2749, + "eval_samples_per_second": 2.017, + "eval_steps_per_second": 0.255, + "step": 585 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001700638569604087, + "loss": 1.0147, + "step": 590 + }, + { + "epoch": 2.26, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.1952056884765625, + "eval_runtime": 84.237, + "eval_samples_per_second": 2.066, + "eval_steps_per_second": 0.261, + "step": 590 + }, + { + "epoch": 2.28, + "learning_rate": 0.00016980842911877396, + "loss": 1.1854, + "step": 595 + }, + { + "epoch": 2.28, + "eval_accuracy": 0.5459770114942529, + "eval_loss": 1.1515438556671143, + "eval_runtime": 89.8852, + "eval_samples_per_second": 1.936, + "eval_steps_per_second": 0.245, + "step": 595 + }, + { + "epoch": 2.3, + "learning_rate": 0.00016955300127713922, + "loss": 1.1736, + "step": 600 + }, + { + "epoch": 2.3, + "eval_accuracy": 0.5517241379310345, + "eval_loss": 1.132529377937317, + "eval_runtime": 86.1072, + "eval_samples_per_second": 2.021, + "eval_steps_per_second": 0.255, + "step": 600 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001692975734355045, + "loss": 0.8421, + "step": 605 + }, + { + "epoch": 2.32, + "eval_accuracy": 0.47126436781609193, + "eval_loss": 1.3057594299316406, + "eval_runtime": 86.3601, + "eval_samples_per_second": 2.015, + "eval_steps_per_second": 0.255, + "step": 605 + }, + { + "epoch": 2.34, + "learning_rate": 0.00016904214559386975, + "loss": 1.0093, + "step": 610 + }, + { + "epoch": 2.34, + "eval_accuracy": 0.5344827586206896, + "eval_loss": 1.1371407508850098, + "eval_runtime": 86.415, + "eval_samples_per_second": 2.014, + "eval_steps_per_second": 0.255, + "step": 610 + }, + { + "epoch": 2.36, + "learning_rate": 0.000168786717752235, + "loss": 1.096, + "step": 615 + }, + { + "epoch": 2.36, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.1986898183822632, + "eval_runtime": 86.4602, + "eval_samples_per_second": 2.012, + "eval_steps_per_second": 0.254, + "step": 615 + }, + { + "epoch": 2.38, + "learning_rate": 0.00016853128991060025, + "loss": 1.1738, + "step": 620 + }, + { + "epoch": 2.38, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.163020372390747, + "eval_runtime": 85.7622, + "eval_samples_per_second": 2.029, + "eval_steps_per_second": 0.257, + "step": 620 + }, + { + "epoch": 2.39, + "learning_rate": 0.00016827586206896554, + "loss": 0.7222, + "step": 625 + }, + { + "epoch": 2.39, + "eval_accuracy": 0.45977011494252873, + "eval_loss": 1.3792271614074707, + "eval_runtime": 85.7187, + "eval_samples_per_second": 2.03, + "eval_steps_per_second": 0.257, + "step": 625 + }, + { + "epoch": 2.41, + "learning_rate": 0.00016802043422733078, + "loss": 1.259, + "step": 630 + }, + { + "epoch": 2.41, + "eval_accuracy": 0.5057471264367817, + "eval_loss": 1.4273347854614258, + "eval_runtime": 85.4147, + "eval_samples_per_second": 2.037, + "eval_steps_per_second": 0.258, + "step": 630 + }, + { + "epoch": 2.43, + "learning_rate": 0.00016776500638569607, + "loss": 0.8788, + "step": 635 + }, + { + "epoch": 2.43, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.4665697813034058, + "eval_runtime": 86.7626, + "eval_samples_per_second": 2.005, + "eval_steps_per_second": 0.254, + "step": 635 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001675095785440613, + "loss": 1.527, + "step": 640 + }, + { + "epoch": 2.45, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.4998698234558105, + "eval_runtime": 85.5223, + "eval_samples_per_second": 2.035, + "eval_steps_per_second": 0.257, + "step": 640 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001672541507024266, + "loss": 1.1176, + "step": 645 + }, + { + "epoch": 2.47, + "eval_accuracy": 0.45977011494252873, + "eval_loss": 1.571102261543274, + "eval_runtime": 84.3165, + "eval_samples_per_second": 2.064, + "eval_steps_per_second": 0.261, + "step": 645 + }, + { + "epoch": 2.49, + "learning_rate": 0.00016699872286079183, + "loss": 0.9834, + "step": 650 + }, + { + "epoch": 2.49, + "eval_accuracy": 0.5, + "eval_loss": 1.5396226644515991, + "eval_runtime": 83.6612, + "eval_samples_per_second": 2.08, + "eval_steps_per_second": 0.263, + "step": 650 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001667432950191571, + "loss": 1.046, + "step": 655 + }, + { + "epoch": 2.51, + "eval_accuracy": 0.5344827586206896, + "eval_loss": 1.2918277978897095, + "eval_runtime": 84.0945, + "eval_samples_per_second": 2.069, + "eval_steps_per_second": 0.262, + "step": 655 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016648786717752236, + "loss": 1.2347, + "step": 660 + }, + { + "epoch": 2.53, + "eval_accuracy": 0.5459770114942529, + "eval_loss": 1.3252575397491455, + "eval_runtime": 86.0389, + "eval_samples_per_second": 2.022, + "eval_steps_per_second": 0.256, + "step": 660 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016623243933588762, + "loss": 0.7441, + "step": 665 + }, + { + "epoch": 2.55, + "eval_accuracy": 0.5344827586206896, + "eval_loss": 1.4183677434921265, + "eval_runtime": 84.6125, + "eval_samples_per_second": 2.056, + "eval_steps_per_second": 0.26, + "step": 665 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016597701149425288, + "loss": 1.3217, + "step": 670 + }, + { + "epoch": 2.57, + "eval_accuracy": 0.5689655172413793, + "eval_loss": 1.2241300344467163, + "eval_runtime": 84.8503, + "eval_samples_per_second": 2.051, + "eval_steps_per_second": 0.259, + "step": 670 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016572158365261815, + "loss": 1.517, + "step": 675 + }, + { + "epoch": 2.59, + "eval_accuracy": 0.4482758620689655, + "eval_loss": 1.3935520648956299, + "eval_runtime": 85.2745, + "eval_samples_per_second": 2.04, + "eval_steps_per_second": 0.258, + "step": 675 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001654661558109834, + "loss": 1.5603, + "step": 680 + }, + { + "epoch": 2.61, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.2586129903793335, + "eval_runtime": 87.4344, + "eval_samples_per_second": 1.99, + "eval_steps_per_second": 0.252, + "step": 680 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016521072796934867, + "loss": 1.0121, + "step": 685 + }, + { + "epoch": 2.62, + "eval_accuracy": 0.5689655172413793, + "eval_loss": 1.1527811288833618, + "eval_runtime": 83.8274, + "eval_samples_per_second": 2.076, + "eval_steps_per_second": 0.262, + "step": 685 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001649553001277139, + "loss": 0.9157, + "step": 690 + }, + { + "epoch": 2.64, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.1511900424957275, + "eval_runtime": 85.1952, + "eval_samples_per_second": 2.042, + "eval_steps_per_second": 0.258, + "step": 690 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001646998722860792, + "loss": 1.0072, + "step": 695 + }, + { + "epoch": 2.66, + "eval_accuracy": 0.5574712643678161, + "eval_loss": 1.2195591926574707, + "eval_runtime": 84.6368, + "eval_samples_per_second": 2.056, + "eval_steps_per_second": 0.26, + "step": 695 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016444444444444444, + "loss": 1.1345, + "step": 700 + }, + { + "epoch": 2.68, + "eval_accuracy": 0.5632183908045977, + "eval_loss": 1.088049054145813, + "eval_runtime": 84.7373, + "eval_samples_per_second": 2.053, + "eval_steps_per_second": 0.26, + "step": 700 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016418901660280973, + "loss": 1.1517, + "step": 705 + }, + { + "epoch": 2.7, + "eval_accuracy": 0.5287356321839081, + "eval_loss": 1.1540721654891968, + "eval_runtime": 86.2918, + "eval_samples_per_second": 2.016, + "eval_steps_per_second": 0.255, + "step": 705 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016393358876117496, + "loss": 1.2588, + "step": 710 + }, + { + "epoch": 2.72, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.1188956499099731, + "eval_runtime": 83.958, + "eval_samples_per_second": 2.072, + "eval_steps_per_second": 0.262, + "step": 710 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016367816091954025, + "loss": 0.9318, + "step": 715 + }, + { + "epoch": 2.74, + "eval_accuracy": 0.5574712643678161, + "eval_loss": 1.1550383567810059, + "eval_runtime": 83.2348, + "eval_samples_per_second": 2.09, + "eval_steps_per_second": 0.264, + "step": 715 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001634227330779055, + "loss": 0.7644, + "step": 720 + }, + { + "epoch": 2.76, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.1556813716888428, + "eval_runtime": 83.996, + "eval_samples_per_second": 2.072, + "eval_steps_per_second": 0.262, + "step": 720 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016316730523627075, + "loss": 1.2642, + "step": 725 + }, + { + "epoch": 2.78, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.0784178972244263, + "eval_runtime": 86.6539, + "eval_samples_per_second": 2.008, + "eval_steps_per_second": 0.254, + "step": 725 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016291187739463602, + "loss": 1.3219, + "step": 730 + }, + { + "epoch": 2.8, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.0925211906433105, + "eval_runtime": 73.4182, + "eval_samples_per_second": 2.37, + "eval_steps_per_second": 0.3, + "step": 730 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016265644955300128, + "loss": 1.2288, + "step": 735 + }, + { + "epoch": 2.82, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.0299782752990723, + "eval_runtime": 72.0457, + "eval_samples_per_second": 2.415, + "eval_steps_per_second": 0.305, + "step": 735 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016240102171136654, + "loss": 0.8471, + "step": 740 + }, + { + "epoch": 2.84, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.0471221208572388, + "eval_runtime": 70.8776, + "eval_samples_per_second": 2.455, + "eval_steps_per_second": 0.31, + "step": 740 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001621455938697318, + "loss": 0.9432, + "step": 745 + }, + { + "epoch": 2.85, + "eval_accuracy": 0.5689655172413793, + "eval_loss": 1.133671760559082, + "eval_runtime": 73.7879, + "eval_samples_per_second": 2.358, + "eval_steps_per_second": 0.298, + "step": 745 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016189016602809707, + "loss": 0.8942, + "step": 750 + }, + { + "epoch": 2.87, + "eval_accuracy": 0.5977011494252874, + "eval_loss": 1.034655213356018, + "eval_runtime": 73.4056, + "eval_samples_per_second": 2.37, + "eval_steps_per_second": 0.3, + "step": 750 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016163473818646233, + "loss": 0.8582, + "step": 755 + }, + { + "epoch": 2.89, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9947394132614136, + "eval_runtime": 73.3937, + "eval_samples_per_second": 2.371, + "eval_steps_per_second": 0.3, + "step": 755 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001613793103448276, + "loss": 0.7802, + "step": 760 + }, + { + "epoch": 2.91, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.0483365058898926, + "eval_runtime": 73.5783, + "eval_samples_per_second": 2.365, + "eval_steps_per_second": 0.299, + "step": 760 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016112388250319286, + "loss": 0.9362, + "step": 765 + }, + { + "epoch": 2.93, + "eval_accuracy": 0.603448275862069, + "eval_loss": 1.1573561429977417, + "eval_runtime": 74.7479, + "eval_samples_per_second": 2.328, + "eval_steps_per_second": 0.294, + "step": 765 + }, + { + "epoch": 2.95, + "learning_rate": 0.00016086845466155812, + "loss": 0.7052, + "step": 770 + }, + { + "epoch": 2.95, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.9469316005706787, + "eval_runtime": 71.6241, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 770 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001606130268199234, + "loss": 0.9615, + "step": 775 + }, + { + "epoch": 2.97, + "eval_accuracy": 0.5689655172413793, + "eval_loss": 1.171260952949524, + "eval_runtime": 74.1608, + "eval_samples_per_second": 2.346, + "eval_steps_per_second": 0.297, + "step": 775 + }, + { + "epoch": 2.99, + "learning_rate": 0.00016035759897828865, + "loss": 1.3158, + "step": 780 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.603448275862069, + "eval_loss": 1.0248513221740723, + "eval_runtime": 71.7492, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 780 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001601021711366539, + "loss": 1.4599, + "step": 785 + }, + { + "epoch": 3.01, + "eval_accuracy": 0.5574712643678161, + "eval_loss": 1.2699826955795288, + "eval_runtime": 72.4519, + "eval_samples_per_second": 2.402, + "eval_steps_per_second": 0.304, + "step": 785 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015984674329501918, + "loss": 1.3217, + "step": 790 + }, + { + "epoch": 3.03, + "eval_accuracy": 0.5632183908045977, + "eval_loss": 1.1198772192001343, + "eval_runtime": 71.6, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 790 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001595913154533844, + "loss": 0.8367, + "step": 795 + }, + { + "epoch": 3.05, + "eval_accuracy": 0.5689655172413793, + "eval_loss": 1.045596957206726, + "eval_runtime": 72.1733, + "eval_samples_per_second": 2.411, + "eval_steps_per_second": 0.305, + "step": 795 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001593358876117497, + "loss": 0.7845, + "step": 800 + }, + { + "epoch": 3.07, + "eval_accuracy": 0.5632183908045977, + "eval_loss": 1.0649093389511108, + "eval_runtime": 73.3982, + "eval_samples_per_second": 2.371, + "eval_steps_per_second": 0.3, + "step": 800 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015908045977011494, + "loss": 0.8503, + "step": 805 + }, + { + "epoch": 3.08, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 0.9966627359390259, + "eval_runtime": 72.2401, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 805 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015882503192848023, + "loss": 0.6945, + "step": 810 + }, + { + "epoch": 3.1, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 0.9507883191108704, + "eval_runtime": 71.8074, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 810 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015856960408684547, + "loss": 0.916, + "step": 815 + }, + { + "epoch": 3.12, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 1.0732645988464355, + "eval_runtime": 72.2273, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 815 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015831417624521076, + "loss": 0.9392, + "step": 820 + }, + { + "epoch": 3.14, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.0749492645263672, + "eval_runtime": 71.7733, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 820 + }, + { + "epoch": 3.16, + "learning_rate": 0.000158058748403576, + "loss": 1.0007, + "step": 825 + }, + { + "epoch": 3.16, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.1707144975662231, + "eval_runtime": 72.8924, + "eval_samples_per_second": 2.387, + "eval_steps_per_second": 0.302, + "step": 825 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015780332056194128, + "loss": 1.2342, + "step": 830 + }, + { + "epoch": 3.18, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.2817833423614502, + "eval_runtime": 74.9744, + "eval_samples_per_second": 2.321, + "eval_steps_per_second": 0.293, + "step": 830 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015754789272030652, + "loss": 0.9419, + "step": 835 + }, + { + "epoch": 3.2, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.9361924529075623, + "eval_runtime": 75.7884, + "eval_samples_per_second": 2.296, + "eval_steps_per_second": 0.29, + "step": 835 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015729246487867178, + "loss": 0.7473, + "step": 840 + }, + { + "epoch": 3.22, + "eval_accuracy": 0.5517241379310345, + "eval_loss": 1.2352019548416138, + "eval_runtime": 73.6409, + "eval_samples_per_second": 2.363, + "eval_steps_per_second": 0.299, + "step": 840 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015703703703703705, + "loss": 1.3524, + "step": 845 + }, + { + "epoch": 3.24, + "eval_accuracy": 0.5574712643678161, + "eval_loss": 1.2916122674942017, + "eval_runtime": 73.9743, + "eval_samples_per_second": 2.352, + "eval_steps_per_second": 0.297, + "step": 845 + }, + { + "epoch": 3.26, + "learning_rate": 0.0001567816091954023, + "loss": 0.7914, + "step": 850 + }, + { + "epoch": 3.26, + "eval_accuracy": 0.603448275862069, + "eval_loss": 1.0420141220092773, + "eval_runtime": 73.374, + "eval_samples_per_second": 2.371, + "eval_steps_per_second": 0.3, + "step": 850 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015652618135376757, + "loss": 0.9782, + "step": 855 + }, + { + "epoch": 3.28, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.0024324655532837, + "eval_runtime": 73.5739, + "eval_samples_per_second": 2.365, + "eval_steps_per_second": 0.299, + "step": 855 + }, + { + "epoch": 3.3, + "learning_rate": 0.00015627075351213284, + "loss": 0.6729, + "step": 860 + }, + { + "epoch": 3.3, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.0617214441299438, + "eval_runtime": 73.2116, + "eval_samples_per_second": 2.377, + "eval_steps_per_second": 0.3, + "step": 860 + }, + { + "epoch": 3.31, + "learning_rate": 0.0001560153256704981, + "loss": 1.1071, + "step": 865 + }, + { + "epoch": 3.31, + "eval_accuracy": 0.5, + "eval_loss": 1.5525238513946533, + "eval_runtime": 74.0895, + "eval_samples_per_second": 2.349, + "eval_steps_per_second": 0.297, + "step": 865 + }, + { + "epoch": 3.33, + "learning_rate": 0.00015575989782886336, + "loss": 1.6989, + "step": 870 + }, + { + "epoch": 3.33, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.0040068626403809, + "eval_runtime": 75.3064, + "eval_samples_per_second": 2.311, + "eval_steps_per_second": 0.292, + "step": 870 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001555044699872286, + "loss": 0.7271, + "step": 875 + }, + { + "epoch": 3.35, + "eval_accuracy": 0.5632183908045977, + "eval_loss": 1.2051146030426025, + "eval_runtime": 73.7475, + "eval_samples_per_second": 2.359, + "eval_steps_per_second": 0.298, + "step": 875 + }, + { + "epoch": 3.37, + "learning_rate": 0.0001552490421455939, + "loss": 0.8168, + "step": 880 + }, + { + "epoch": 3.37, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.0161758661270142, + "eval_runtime": 74.8589, + "eval_samples_per_second": 2.324, + "eval_steps_per_second": 0.294, + "step": 880 + }, + { + "epoch": 3.39, + "learning_rate": 0.00015499361430395913, + "loss": 1.0251, + "step": 885 + }, + { + "epoch": 3.39, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.2411690950393677, + "eval_runtime": 74.1808, + "eval_samples_per_second": 2.346, + "eval_steps_per_second": 0.297, + "step": 885 + }, + { + "epoch": 3.41, + "learning_rate": 0.00015473818646232442, + "loss": 1.262, + "step": 890 + }, + { + "epoch": 3.41, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.0844931602478027, + "eval_runtime": 74.7088, + "eval_samples_per_second": 2.329, + "eval_steps_per_second": 0.294, + "step": 890 + }, + { + "epoch": 3.43, + "learning_rate": 0.00015448275862068965, + "loss": 0.752, + "step": 895 + }, + { + "epoch": 3.43, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.1151267290115356, + "eval_runtime": 73.9476, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.298, + "step": 895 + }, + { + "epoch": 3.45, + "learning_rate": 0.00015422733077905494, + "loss": 0.781, + "step": 900 + }, + { + "epoch": 3.45, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 1.1569331884384155, + "eval_runtime": 73.2377, + "eval_samples_per_second": 2.376, + "eval_steps_per_second": 0.3, + "step": 900 + }, + { + "epoch": 3.47, + "learning_rate": 0.00015397190293742018, + "loss": 0.689, + "step": 905 + }, + { + "epoch": 3.47, + "eval_accuracy": 0.5804597701149425, + "eval_loss": 1.1399520635604858, + "eval_runtime": 74.5155, + "eval_samples_per_second": 2.335, + "eval_steps_per_second": 0.295, + "step": 905 + }, + { + "epoch": 3.49, + "learning_rate": 0.00015371647509578544, + "loss": 1.1359, + "step": 910 + }, + { + "epoch": 3.49, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.2362200021743774, + "eval_runtime": 73.5703, + "eval_samples_per_second": 2.365, + "eval_steps_per_second": 0.299, + "step": 910 + }, + { + "epoch": 3.51, + "learning_rate": 0.0001534610472541507, + "loss": 1.0256, + "step": 915 + }, + { + "epoch": 3.51, + "eval_accuracy": 0.5, + "eval_loss": 1.3777178525924683, + "eval_runtime": 73.8937, + "eval_samples_per_second": 2.355, + "eval_steps_per_second": 0.298, + "step": 915 + }, + { + "epoch": 3.52, + "learning_rate": 0.00015320561941251597, + "loss": 1.0887, + "step": 920 + }, + { + "epoch": 3.52, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.1072226762771606, + "eval_runtime": 73.4404, + "eval_samples_per_second": 2.369, + "eval_steps_per_second": 0.3, + "step": 920 + }, + { + "epoch": 3.54, + "learning_rate": 0.00015295019157088123, + "loss": 0.6106, + "step": 925 + }, + { + "epoch": 3.54, + "eval_accuracy": 0.5459770114942529, + "eval_loss": 1.234115481376648, + "eval_runtime": 74.0108, + "eval_samples_per_second": 2.351, + "eval_steps_per_second": 0.297, + "step": 925 + }, + { + "epoch": 3.56, + "learning_rate": 0.0001526947637292465, + "loss": 1.3364, + "step": 930 + }, + { + "epoch": 3.56, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.1364753246307373, + "eval_runtime": 73.2306, + "eval_samples_per_second": 2.376, + "eval_steps_per_second": 0.3, + "step": 930 + }, + { + "epoch": 3.58, + "learning_rate": 0.00015243933588761176, + "loss": 0.7705, + "step": 935 + }, + { + "epoch": 3.58, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.230558156967163, + "eval_runtime": 74.0579, + "eval_samples_per_second": 2.35, + "eval_steps_per_second": 0.297, + "step": 935 + }, + { + "epoch": 3.6, + "learning_rate": 0.00015218390804597702, + "loss": 0.8368, + "step": 940 + }, + { + "epoch": 3.6, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.0860326290130615, + "eval_runtime": 72.9069, + "eval_samples_per_second": 2.387, + "eval_steps_per_second": 0.302, + "step": 940 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015192848020434226, + "loss": 0.8946, + "step": 945 + }, + { + "epoch": 3.62, + "eval_accuracy": 0.5804597701149425, + "eval_loss": 1.2349048852920532, + "eval_runtime": 76.0014, + "eval_samples_per_second": 2.289, + "eval_steps_per_second": 0.289, + "step": 945 + }, + { + "epoch": 3.64, + "learning_rate": 0.00015167305236270755, + "loss": 1.226, + "step": 950 + }, + { + "epoch": 3.64, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.2052266597747803, + "eval_runtime": 74.5817, + "eval_samples_per_second": 2.333, + "eval_steps_per_second": 0.295, + "step": 950 + }, + { + "epoch": 3.66, + "learning_rate": 0.00015141762452107279, + "loss": 1.1585, + "step": 955 + }, + { + "epoch": 3.66, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 0.9784772396087646, + "eval_runtime": 72.3958, + "eval_samples_per_second": 2.403, + "eval_steps_per_second": 0.304, + "step": 955 + }, + { + "epoch": 3.68, + "learning_rate": 0.00015116219667943808, + "loss": 0.7824, + "step": 960 + }, + { + "epoch": 3.68, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8754329681396484, + "eval_runtime": 71.8422, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 960 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001509067688378033, + "loss": 0.6437, + "step": 965 + }, + { + "epoch": 3.7, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 0.9256632328033447, + "eval_runtime": 72.4538, + "eval_samples_per_second": 2.402, + "eval_steps_per_second": 0.304, + "step": 965 + }, + { + "epoch": 3.72, + "learning_rate": 0.0001506513409961686, + "loss": 0.8286, + "step": 970 + }, + { + "epoch": 3.72, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.9190903306007385, + "eval_runtime": 71.6686, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 970 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015039591315453384, + "loss": 0.8671, + "step": 975 + }, + { + "epoch": 3.74, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.8495645523071289, + "eval_runtime": 73.9876, + "eval_samples_per_second": 2.352, + "eval_steps_per_second": 0.297, + "step": 975 + }, + { + "epoch": 3.75, + "learning_rate": 0.0001501404853128991, + "loss": 0.5814, + "step": 980 + }, + { + "epoch": 3.75, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 0.9788767695426941, + "eval_runtime": 71.5848, + "eval_samples_per_second": 2.431, + "eval_steps_per_second": 0.307, + "step": 980 + }, + { + "epoch": 3.77, + "learning_rate": 0.00014988505747126437, + "loss": 0.7895, + "step": 985 + }, + { + "epoch": 3.77, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.032570719718933, + "eval_runtime": 72.0801, + "eval_samples_per_second": 2.414, + "eval_steps_per_second": 0.305, + "step": 985 + }, + { + "epoch": 3.79, + "learning_rate": 0.00014962962962962963, + "loss": 0.8104, + "step": 990 + }, + { + "epoch": 3.79, + "eval_accuracy": 0.632183908045977, + "eval_loss": 0.9865307211875916, + "eval_runtime": 71.6806, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 990 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001493742017879949, + "loss": 0.8277, + "step": 995 + }, + { + "epoch": 3.81, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.0854288339614868, + "eval_runtime": 72.3687, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 995 + }, + { + "epoch": 3.83, + "learning_rate": 0.00014911877394636016, + "loss": 0.5801, + "step": 1000 + }, + { + "epoch": 3.83, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.0070602893829346, + "eval_runtime": 72.8007, + "eval_samples_per_second": 2.39, + "eval_steps_per_second": 0.302, + "step": 1000 + }, + { + "epoch": 3.85, + "learning_rate": 0.00014886334610472542, + "loss": 0.6394, + "step": 1005 + }, + { + "epoch": 3.85, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 0.9902334809303284, + "eval_runtime": 72.164, + "eval_samples_per_second": 2.411, + "eval_steps_per_second": 0.305, + "step": 1005 + }, + { + "epoch": 3.87, + "learning_rate": 0.00014860791826309068, + "loss": 0.7135, + "step": 1010 + }, + { + "epoch": 3.87, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.1338768005371094, + "eval_runtime": 73.5721, + "eval_samples_per_second": 2.365, + "eval_steps_per_second": 0.299, + "step": 1010 + }, + { + "epoch": 3.89, + "learning_rate": 0.00014835249042145595, + "loss": 1.3432, + "step": 1015 + }, + { + "epoch": 3.89, + "eval_accuracy": 0.603448275862069, + "eval_loss": 1.016777515411377, + "eval_runtime": 72.2656, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.304, + "step": 1015 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001480970625798212, + "loss": 0.8375, + "step": 1020 + }, + { + "epoch": 3.91, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 0.8976129293441772, + "eval_runtime": 71.6885, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 1020 + }, + { + "epoch": 3.93, + "learning_rate": 0.00014784163473818647, + "loss": 1.2419, + "step": 1025 + }, + { + "epoch": 3.93, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 1.0345048904418945, + "eval_runtime": 72.2899, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1025 + }, + { + "epoch": 3.95, + "learning_rate": 0.00014758620689655174, + "loss": 1.2465, + "step": 1030 + }, + { + "epoch": 3.95, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.9065479636192322, + "eval_runtime": 71.7202, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 1030 + }, + { + "epoch": 3.97, + "learning_rate": 0.000147330779054917, + "loss": 0.8987, + "step": 1035 + }, + { + "epoch": 3.97, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 0.8305312395095825, + "eval_runtime": 74.2714, + "eval_samples_per_second": 2.343, + "eval_steps_per_second": 0.296, + "step": 1035 + }, + { + "epoch": 3.98, + "learning_rate": 0.00014707535121328226, + "loss": 0.6279, + "step": 1040 + }, + { + "epoch": 3.98, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.8689811825752258, + "eval_runtime": 73.642, + "eval_samples_per_second": 2.363, + "eval_steps_per_second": 0.299, + "step": 1040 + }, + { + "epoch": 4.0, + "learning_rate": 0.00014681992337164753, + "loss": 1.1026, + "step": 1045 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.8575055599212646, + "eval_runtime": 72.2634, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.304, + "step": 1045 + }, + { + "epoch": 4.02, + "learning_rate": 0.00014656449553001276, + "loss": 0.5617, + "step": 1050 + }, + { + "epoch": 4.02, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 0.8988448977470398, + "eval_runtime": 73.0503, + "eval_samples_per_second": 2.382, + "eval_steps_per_second": 0.301, + "step": 1050 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014630906768837805, + "loss": 0.5318, + "step": 1055 + }, + { + "epoch": 4.04, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.0455691814422607, + "eval_runtime": 72.3237, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 1055 + }, + { + "epoch": 4.06, + "learning_rate": 0.0001460536398467433, + "loss": 0.7041, + "step": 1060 + }, + { + "epoch": 4.06, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.9058274626731873, + "eval_runtime": 71.8667, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 1060 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014579821200510858, + "loss": 0.688, + "step": 1065 + }, + { + "epoch": 4.08, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.0057711601257324, + "eval_runtime": 72.4073, + "eval_samples_per_second": 2.403, + "eval_steps_per_second": 0.304, + "step": 1065 + }, + { + "epoch": 4.1, + "learning_rate": 0.00014554278416347382, + "loss": 0.8286, + "step": 1070 + }, + { + "epoch": 4.1, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.0452172756195068, + "eval_runtime": 71.8108, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 1070 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001452873563218391, + "loss": 1.2596, + "step": 1075 + }, + { + "epoch": 4.12, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 0.9942687749862671, + "eval_runtime": 73.3993, + "eval_samples_per_second": 2.371, + "eval_steps_per_second": 0.3, + "step": 1075 + }, + { + "epoch": 4.14, + "learning_rate": 0.00014503192848020434, + "loss": 0.8448, + "step": 1080 + }, + { + "epoch": 4.14, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.044135570526123, + "eval_runtime": 71.6723, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 1080 + }, + { + "epoch": 4.16, + "learning_rate": 0.0001447765006385696, + "loss": 0.9666, + "step": 1085 + }, + { + "epoch": 4.16, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.0021615028381348, + "eval_runtime": 72.2989, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1085 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014452107279693487, + "loss": 1.0548, + "step": 1090 + }, + { + "epoch": 4.18, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.8905701041221619, + "eval_runtime": 71.7468, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 1090 + }, + { + "epoch": 4.2, + "learning_rate": 0.00014426564495530013, + "loss": 0.823, + "step": 1095 + }, + { + "epoch": 4.2, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.835205614566803, + "eval_runtime": 72.2334, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 1095 + }, + { + "epoch": 4.21, + "learning_rate": 0.0001440102171136654, + "loss": 0.7588, + "step": 1100 + }, + { + "epoch": 4.21, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8358584046363831, + "eval_runtime": 71.7162, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 1100 + }, + { + "epoch": 4.23, + "learning_rate": 0.00014375478927203066, + "loss": 0.5306, + "step": 1105 + }, + { + "epoch": 4.23, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.8987478613853455, + "eval_runtime": 72.2456, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.305, + "step": 1105 + }, + { + "epoch": 4.25, + "learning_rate": 0.00014349936143039592, + "loss": 1.059, + "step": 1110 + }, + { + "epoch": 4.25, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.8400871753692627, + "eval_runtime": 71.6522, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 1110 + }, + { + "epoch": 4.27, + "learning_rate": 0.00014324393358876119, + "loss": 0.6116, + "step": 1115 + }, + { + "epoch": 4.27, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.9104363322257996, + "eval_runtime": 72.2465, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.305, + "step": 1115 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014298850574712642, + "loss": 0.7483, + "step": 1120 + }, + { + "epoch": 4.29, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.0067737102508545, + "eval_runtime": 71.6939, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 1120 + }, + { + "epoch": 4.31, + "learning_rate": 0.0001427330779054917, + "loss": 0.5231, + "step": 1125 + }, + { + "epoch": 4.31, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.9476281404495239, + "eval_runtime": 72.2862, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1125 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014247765006385695, + "loss": 0.5667, + "step": 1130 + }, + { + "epoch": 4.33, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.9047439694404602, + "eval_runtime": 71.7674, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 1130 + }, + { + "epoch": 4.35, + "learning_rate": 0.00014222222222222224, + "loss": 0.7785, + "step": 1135 + }, + { + "epoch": 4.35, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 1.0280640125274658, + "eval_runtime": 72.438, + "eval_samples_per_second": 2.402, + "eval_steps_per_second": 0.304, + "step": 1135 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014196679438058748, + "loss": 1.0404, + "step": 1140 + }, + { + "epoch": 4.37, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.9103832840919495, + "eval_runtime": 73.4365, + "eval_samples_per_second": 2.369, + "eval_steps_per_second": 0.3, + "step": 1140 + }, + { + "epoch": 4.39, + "learning_rate": 0.00014171136653895277, + "loss": 0.5523, + "step": 1145 + }, + { + "epoch": 4.39, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 1.0259004831314087, + "eval_runtime": 72.2901, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1145 + }, + { + "epoch": 4.41, + "learning_rate": 0.000141455938697318, + "loss": 0.6387, + "step": 1150 + }, + { + "epoch": 4.41, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.1877542734146118, + "eval_runtime": 71.8711, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 1150 + }, + { + "epoch": 4.43, + "learning_rate": 0.00014120051085568327, + "loss": 1.038, + "step": 1155 + }, + { + "epoch": 4.43, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.065651774406433, + "eval_runtime": 72.3814, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 1155 + }, + { + "epoch": 4.44, + "learning_rate": 0.00014094508301404853, + "loss": 0.7566, + "step": 1160 + }, + { + "epoch": 4.44, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.8948299288749695, + "eval_runtime": 71.6321, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 1160 + }, + { + "epoch": 4.46, + "learning_rate": 0.0001406896551724138, + "loss": 0.5505, + "step": 1165 + }, + { + "epoch": 4.46, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.9844233393669128, + "eval_runtime": 74.1566, + "eval_samples_per_second": 2.346, + "eval_steps_per_second": 0.297, + "step": 1165 + }, + { + "epoch": 4.48, + "learning_rate": 0.00014043422733077906, + "loss": 1.0917, + "step": 1170 + }, + { + "epoch": 4.48, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.9192268252372742, + "eval_runtime": 71.6227, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 1170 + }, + { + "epoch": 4.5, + "learning_rate": 0.00014017879948914432, + "loss": 0.8048, + "step": 1175 + }, + { + "epoch": 4.5, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.1002541780471802, + "eval_runtime": 73.6156, + "eval_samples_per_second": 2.364, + "eval_steps_per_second": 0.299, + "step": 1175 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013992337164750958, + "loss": 0.5951, + "step": 1180 + }, + { + "epoch": 4.52, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 0.9471919536590576, + "eval_runtime": 71.689, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 1180 + }, + { + "epoch": 4.54, + "learning_rate": 0.00013966794380587485, + "loss": 0.603, + "step": 1185 + }, + { + "epoch": 4.54, + "eval_accuracy": 0.5632183908045977, + "eval_loss": 1.3395264148712158, + "eval_runtime": 72.507, + "eval_samples_per_second": 2.4, + "eval_steps_per_second": 0.303, + "step": 1185 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001394125159642401, + "loss": 1.0802, + "step": 1190 + }, + { + "epoch": 4.56, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 1.052255392074585, + "eval_runtime": 71.8669, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 1190 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013915708812260537, + "loss": 0.3689, + "step": 1195 + }, + { + "epoch": 4.58, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.0053684711456299, + "eval_runtime": 72.2348, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 1195 + }, + { + "epoch": 4.6, + "learning_rate": 0.00013890166028097064, + "loss": 0.5565, + "step": 1200 + }, + { + "epoch": 4.6, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.400984525680542, + "eval_runtime": 71.9603, + "eval_samples_per_second": 2.418, + "eval_steps_per_second": 0.306, + "step": 1200 + }, + { + "epoch": 4.62, + "learning_rate": 0.0001386462324393359, + "loss": 0.7934, + "step": 1205 + }, + { + "epoch": 4.62, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.4771628379821777, + "eval_runtime": 72.4137, + "eval_samples_per_second": 2.403, + "eval_steps_per_second": 0.304, + "step": 1205 + }, + { + "epoch": 4.64, + "learning_rate": 0.00013839080459770116, + "loss": 0.8364, + "step": 1210 + }, + { + "epoch": 4.64, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.2308330535888672, + "eval_runtime": 71.7192, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 1210 + }, + { + "epoch": 4.66, + "learning_rate": 0.00013813537675606643, + "loss": 0.8967, + "step": 1215 + }, + { + "epoch": 4.66, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.8904043436050415, + "eval_runtime": 72.373, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 1215 + }, + { + "epoch": 4.67, + "learning_rate": 0.0001378799489144317, + "loss": 0.5255, + "step": 1220 + }, + { + "epoch": 4.67, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.006020188331604, + "eval_runtime": 73.6542, + "eval_samples_per_second": 2.362, + "eval_steps_per_second": 0.299, + "step": 1220 + }, + { + "epoch": 4.69, + "learning_rate": 0.00013762452107279695, + "loss": 0.693, + "step": 1225 + }, + { + "epoch": 4.69, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.9818925261497498, + "eval_runtime": 72.3307, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 1225 + }, + { + "epoch": 4.71, + "learning_rate": 0.00013736909323116222, + "loss": 1.1102, + "step": 1230 + }, + { + "epoch": 4.71, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 0.8632426857948303, + "eval_runtime": 71.648, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 1230 + }, + { + "epoch": 4.73, + "learning_rate": 0.00013711366538952745, + "loss": 0.7586, + "step": 1235 + }, + { + "epoch": 4.73, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.8827661871910095, + "eval_runtime": 72.3427, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 1235 + }, + { + "epoch": 4.75, + "learning_rate": 0.00013685823754789274, + "loss": 0.5332, + "step": 1240 + }, + { + "epoch": 4.75, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8106628656387329, + "eval_runtime": 73.4839, + "eval_samples_per_second": 2.368, + "eval_steps_per_second": 0.299, + "step": 1240 + }, + { + "epoch": 4.77, + "learning_rate": 0.00013660280970625798, + "loss": 0.7737, + "step": 1245 + }, + { + "epoch": 4.77, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 0.7830407023429871, + "eval_runtime": 72.337, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 1245 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013634738186462327, + "loss": 0.8634, + "step": 1250 + }, + { + "epoch": 4.79, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 0.799767792224884, + "eval_runtime": 71.8401, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 1250 + }, + { + "epoch": 4.81, + "learning_rate": 0.0001360919540229885, + "loss": 0.8592, + "step": 1255 + }, + { + "epoch": 4.81, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.8830769658088684, + "eval_runtime": 73.1202, + "eval_samples_per_second": 2.38, + "eval_steps_per_second": 0.301, + "step": 1255 + }, + { + "epoch": 4.83, + "learning_rate": 0.0001358365261813538, + "loss": 0.3591, + "step": 1260 + }, + { + "epoch": 4.83, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.7183188796043396, + "eval_runtime": 71.7735, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 1260 + }, + { + "epoch": 4.85, + "learning_rate": 0.00013558109833971903, + "loss": 0.7214, + "step": 1265 + }, + { + "epoch": 4.85, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 0.7452751398086548, + "eval_runtime": 72.2803, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1265 + }, + { + "epoch": 4.87, + "learning_rate": 0.0001353256704980843, + "loss": 0.6555, + "step": 1270 + }, + { + "epoch": 4.87, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 0.8549041748046875, + "eval_runtime": 71.6872, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 1270 + }, + { + "epoch": 4.89, + "learning_rate": 0.00013507024265644956, + "loss": 0.3736, + "step": 1275 + }, + { + "epoch": 4.89, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.8064850568771362, + "eval_runtime": 72.2268, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 1275 + }, + { + "epoch": 4.9, + "learning_rate": 0.00013481481481481482, + "loss": 0.6139, + "step": 1280 + }, + { + "epoch": 4.9, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.7727733850479126, + "eval_runtime": 71.6667, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 1280 + }, + { + "epoch": 4.92, + "learning_rate": 0.00013455938697318009, + "loss": 1.4349, + "step": 1285 + }, + { + "epoch": 4.92, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.7865081429481506, + "eval_runtime": 72.3204, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 1285 + }, + { + "epoch": 4.94, + "learning_rate": 0.00013430395913154535, + "loss": 0.6336, + "step": 1290 + }, + { + "epoch": 4.94, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.7983749508857727, + "eval_runtime": 71.6401, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 1290 + }, + { + "epoch": 4.96, + "learning_rate": 0.0001340485312899106, + "loss": 0.667, + "step": 1295 + }, + { + "epoch": 4.96, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.0624412298202515, + "eval_runtime": 72.1869, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 1295 + }, + { + "epoch": 4.98, + "learning_rate": 0.00013379310344827588, + "loss": 0.4108, + "step": 1300 + }, + { + "epoch": 4.98, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.9411900043487549, + "eval_runtime": 72.8779, + "eval_samples_per_second": 2.388, + "eval_steps_per_second": 0.302, + "step": 1300 + }, + { + "epoch": 5.0, + "learning_rate": 0.0001335376756066411, + "loss": 1.0572, + "step": 1305 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8950245380401611, + "eval_runtime": 72.2902, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1305 + }, + { + "epoch": 5.02, + "learning_rate": 0.0001332822477650064, + "loss": 0.3411, + "step": 1310 + }, + { + "epoch": 5.02, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.8750669360160828, + "eval_runtime": 73.4412, + "eval_samples_per_second": 2.369, + "eval_steps_per_second": 0.3, + "step": 1310 + }, + { + "epoch": 5.04, + "learning_rate": 0.00013302681992337164, + "loss": 0.3644, + "step": 1315 + }, + { + "epoch": 5.04, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.8155695796012878, + "eval_runtime": 72.387, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 1315 + }, + { + "epoch": 5.06, + "learning_rate": 0.00013277139208173693, + "loss": 0.2643, + "step": 1320 + }, + { + "epoch": 5.06, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.7830978631973267, + "eval_runtime": 71.6267, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 1320 + }, + { + "epoch": 5.08, + "learning_rate": 0.00013251596424010217, + "loss": 0.3629, + "step": 1325 + }, + { + "epoch": 5.08, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.644027829170227, + "eval_runtime": 72.3202, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 1325 + }, + { + "epoch": 5.1, + "learning_rate": 0.00013226053639846746, + "loss": 1.5008, + "step": 1330 + }, + { + "epoch": 5.1, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.36289381980896, + "eval_runtime": 73.5741, + "eval_samples_per_second": 2.365, + "eval_steps_per_second": 0.299, + "step": 1330 + }, + { + "epoch": 5.11, + "learning_rate": 0.0001320051085568327, + "loss": 1.1648, + "step": 1335 + }, + { + "epoch": 5.11, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.9745023250579834, + "eval_runtime": 73.9076, + "eval_samples_per_second": 2.354, + "eval_steps_per_second": 0.298, + "step": 1335 + }, + { + "epoch": 5.13, + "learning_rate": 0.00013174968071519796, + "loss": 0.842, + "step": 1340 + }, + { + "epoch": 5.13, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.9471400380134583, + "eval_runtime": 73.3831, + "eval_samples_per_second": 2.371, + "eval_steps_per_second": 0.3, + "step": 1340 + }, + { + "epoch": 5.15, + "learning_rate": 0.00013149425287356322, + "loss": 0.4963, + "step": 1345 + }, + { + "epoch": 5.15, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.1134085655212402, + "eval_runtime": 74.2914, + "eval_samples_per_second": 2.342, + "eval_steps_per_second": 0.296, + "step": 1345 + }, + { + "epoch": 5.17, + "learning_rate": 0.00013123882503192848, + "loss": 0.6819, + "step": 1350 + }, + { + "epoch": 5.17, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.0473263263702393, + "eval_runtime": 73.934, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.298, + "step": 1350 + }, + { + "epoch": 5.19, + "learning_rate": 0.00013098339719029375, + "loss": 0.2688, + "step": 1355 + }, + { + "epoch": 5.19, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.323175311088562, + "eval_runtime": 73.8927, + "eval_samples_per_second": 2.355, + "eval_steps_per_second": 0.298, + "step": 1355 + }, + { + "epoch": 5.21, + "learning_rate": 0.000130727969348659, + "loss": 0.7999, + "step": 1360 + }, + { + "epoch": 5.21, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.2077127695083618, + "eval_runtime": 72.9338, + "eval_samples_per_second": 2.386, + "eval_steps_per_second": 0.302, + "step": 1360 + }, + { + "epoch": 5.23, + "learning_rate": 0.00013047254150702427, + "loss": 0.9447, + "step": 1365 + }, + { + "epoch": 5.23, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.119039535522461, + "eval_runtime": 75.1877, + "eval_samples_per_second": 2.314, + "eval_steps_per_second": 0.293, + "step": 1365 + }, + { + "epoch": 5.25, + "learning_rate": 0.00013021711366538954, + "loss": 0.4866, + "step": 1370 + }, + { + "epoch": 5.25, + "eval_accuracy": 0.5977011494252874, + "eval_loss": 1.1412699222564697, + "eval_runtime": 74.8277, + "eval_samples_per_second": 2.325, + "eval_steps_per_second": 0.294, + "step": 1370 + }, + { + "epoch": 5.27, + "learning_rate": 0.0001299616858237548, + "loss": 0.6268, + "step": 1375 + }, + { + "epoch": 5.27, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.1648024320602417, + "eval_runtime": 74.1075, + "eval_samples_per_second": 2.348, + "eval_steps_per_second": 0.297, + "step": 1375 + }, + { + "epoch": 5.29, + "learning_rate": 0.00012970625798212006, + "loss": 1.0332, + "step": 1380 + }, + { + "epoch": 5.29, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.0704519748687744, + "eval_runtime": 73.409, + "eval_samples_per_second": 2.37, + "eval_steps_per_second": 0.3, + "step": 1380 + }, + { + "epoch": 5.31, + "learning_rate": 0.00012945083014048533, + "loss": 0.508, + "step": 1385 + }, + { + "epoch": 5.31, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.9115325212478638, + "eval_runtime": 75.2816, + "eval_samples_per_second": 2.311, + "eval_steps_per_second": 0.292, + "step": 1385 + }, + { + "epoch": 5.33, + "learning_rate": 0.0001291954022988506, + "loss": 0.4844, + "step": 1390 + }, + { + "epoch": 5.33, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 0.7564892768859863, + "eval_runtime": 73.4276, + "eval_samples_per_second": 2.37, + "eval_steps_per_second": 0.3, + "step": 1390 + }, + { + "epoch": 5.34, + "learning_rate": 0.00012893997445721583, + "loss": 0.6529, + "step": 1395 + }, + { + "epoch": 5.34, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 0.7532500624656677, + "eval_runtime": 74.281, + "eval_samples_per_second": 2.342, + "eval_steps_per_second": 0.296, + "step": 1395 + }, + { + "epoch": 5.36, + "learning_rate": 0.00012868454661558112, + "loss": 0.674, + "step": 1400 + }, + { + "epoch": 5.36, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 0.9024896621704102, + "eval_runtime": 74.2674, + "eval_samples_per_second": 2.343, + "eval_steps_per_second": 0.296, + "step": 1400 + }, + { + "epoch": 5.38, + "learning_rate": 0.00012842911877394635, + "loss": 0.9236, + "step": 1405 + }, + { + "epoch": 5.38, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 0.7951949834823608, + "eval_runtime": 73.902, + "eval_samples_per_second": 2.354, + "eval_steps_per_second": 0.298, + "step": 1405 + }, + { + "epoch": 5.4, + "learning_rate": 0.00012817369093231162, + "loss": 0.523, + "step": 1410 + }, + { + "epoch": 5.4, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.7487069368362427, + "eval_runtime": 74.6134, + "eval_samples_per_second": 2.332, + "eval_steps_per_second": 0.295, + "step": 1410 + }, + { + "epoch": 5.42, + "learning_rate": 0.00012791826309067688, + "loss": 0.3512, + "step": 1415 + }, + { + "epoch": 5.42, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.9300501942634583, + "eval_runtime": 74.1082, + "eval_samples_per_second": 2.348, + "eval_steps_per_second": 0.297, + "step": 1415 + }, + { + "epoch": 5.44, + "learning_rate": 0.00012766283524904214, + "loss": 0.5621, + "step": 1420 + }, + { + "epoch": 5.44, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.9454444050788879, + "eval_runtime": 75.121, + "eval_samples_per_second": 2.316, + "eval_steps_per_second": 0.293, + "step": 1420 + }, + { + "epoch": 5.46, + "learning_rate": 0.0001274074074074074, + "loss": 0.3093, + "step": 1425 + }, + { + "epoch": 5.46, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 0.8725366592407227, + "eval_runtime": 74.2816, + "eval_samples_per_second": 2.342, + "eval_steps_per_second": 0.296, + "step": 1425 + }, + { + "epoch": 5.48, + "learning_rate": 0.00012715197956577267, + "loss": 0.5955, + "step": 1430 + }, + { + "epoch": 5.48, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 0.8809516429901123, + "eval_runtime": 73.6012, + "eval_samples_per_second": 2.364, + "eval_steps_per_second": 0.299, + "step": 1430 + }, + { + "epoch": 5.5, + "learning_rate": 0.00012689655172413793, + "loss": 0.2704, + "step": 1435 + }, + { + "epoch": 5.5, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.9333508610725403, + "eval_runtime": 74.0008, + "eval_samples_per_second": 2.351, + "eval_steps_per_second": 0.297, + "step": 1435 + }, + { + "epoch": 5.52, + "learning_rate": 0.0001266411238825032, + "loss": 0.3438, + "step": 1440 + }, + { + "epoch": 5.52, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 0.9162984490394592, + "eval_runtime": 73.7475, + "eval_samples_per_second": 2.359, + "eval_steps_per_second": 0.298, + "step": 1440 + }, + { + "epoch": 5.54, + "learning_rate": 0.00012638569604086846, + "loss": 0.2719, + "step": 1445 + }, + { + "epoch": 5.54, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.9165803790092468, + "eval_runtime": 73.1738, + "eval_samples_per_second": 2.378, + "eval_steps_per_second": 0.301, + "step": 1445 + }, + { + "epoch": 5.56, + "learning_rate": 0.00012613026819923372, + "loss": 0.4038, + "step": 1450 + }, + { + "epoch": 5.56, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.951847493648529, + "eval_runtime": 73.7477, + "eval_samples_per_second": 2.359, + "eval_steps_per_second": 0.298, + "step": 1450 + }, + { + "epoch": 5.57, + "learning_rate": 0.00012587484035759899, + "loss": 0.8747, + "step": 1455 + }, + { + "epoch": 5.57, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.006085753440857, + "eval_runtime": 73.6005, + "eval_samples_per_second": 2.364, + "eval_steps_per_second": 0.299, + "step": 1455 + }, + { + "epoch": 5.59, + "learning_rate": 0.00012561941251596425, + "loss": 0.5741, + "step": 1460 + }, + { + "epoch": 5.59, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.0438010692596436, + "eval_runtime": 71.8134, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 1460 + }, + { + "epoch": 5.61, + "learning_rate": 0.0001253639846743295, + "loss": 0.732, + "step": 1465 + }, + { + "epoch": 5.61, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.0875169038772583, + "eval_runtime": 72.3827, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 1465 + }, + { + "epoch": 5.63, + "learning_rate": 0.00012510855683269478, + "loss": 0.5971, + "step": 1470 + }, + { + "epoch": 5.63, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.062251091003418, + "eval_runtime": 71.8803, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 1470 + }, + { + "epoch": 5.65, + "learning_rate": 0.00012485312899106004, + "loss": 0.6264, + "step": 1475 + }, + { + "epoch": 5.65, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 0.9514502882957458, + "eval_runtime": 72.7337, + "eval_samples_per_second": 2.392, + "eval_steps_per_second": 0.302, + "step": 1475 + }, + { + "epoch": 5.67, + "learning_rate": 0.00012459770114942528, + "loss": 0.681, + "step": 1480 + }, + { + "epoch": 5.67, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.0149379968643188, + "eval_runtime": 71.7065, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 1480 + }, + { + "epoch": 5.69, + "learning_rate": 0.00012434227330779057, + "loss": 0.8418, + "step": 1485 + }, + { + "epoch": 5.69, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.1708879470825195, + "eval_runtime": 74.2125, + "eval_samples_per_second": 2.345, + "eval_steps_per_second": 0.296, + "step": 1485 + }, + { + "epoch": 5.71, + "learning_rate": 0.0001240868454661558, + "loss": 0.5313, + "step": 1490 + }, + { + "epoch": 5.71, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.0242153406143188, + "eval_runtime": 71.6182, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 1490 + }, + { + "epoch": 5.73, + "learning_rate": 0.0001238314176245211, + "loss": 0.5052, + "step": 1495 + }, + { + "epoch": 5.73, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.254513144493103, + "eval_runtime": 72.1503, + "eval_samples_per_second": 2.412, + "eval_steps_per_second": 0.305, + "step": 1495 + }, + { + "epoch": 5.75, + "learning_rate": 0.00012357598978288633, + "loss": 1.002, + "step": 1500 + }, + { + "epoch": 5.75, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.1145654916763306, + "eval_runtime": 71.7734, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 1500 + }, + { + "epoch": 5.77, + "learning_rate": 0.00012332056194125162, + "loss": 0.4721, + "step": 1505 + }, + { + "epoch": 5.77, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.091664433479309, + "eval_runtime": 74.1208, + "eval_samples_per_second": 2.348, + "eval_steps_per_second": 0.297, + "step": 1505 + }, + { + "epoch": 5.79, + "learning_rate": 0.00012306513409961686, + "loss": 0.3988, + "step": 1510 + }, + { + "epoch": 5.79, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.0722365379333496, + "eval_runtime": 73.3159, + "eval_samples_per_second": 2.373, + "eval_steps_per_second": 0.3, + "step": 1510 + }, + { + "epoch": 5.8, + "learning_rate": 0.00012280970625798212, + "loss": 0.8568, + "step": 1515 + }, + { + "epoch": 5.8, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 0.8946850895881653, + "eval_runtime": 72.2801, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1515 + }, + { + "epoch": 5.82, + "learning_rate": 0.00012255427841634738, + "loss": 0.4908, + "step": 1520 + }, + { + "epoch": 5.82, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 0.8863809704780579, + "eval_runtime": 71.7599, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 1520 + }, + { + "epoch": 5.84, + "learning_rate": 0.00012229885057471265, + "loss": 0.5216, + "step": 1525 + }, + { + "epoch": 5.84, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 0.9632396697998047, + "eval_runtime": 72.267, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.304, + "step": 1525 + }, + { + "epoch": 5.86, + "learning_rate": 0.00012204342273307792, + "loss": 0.7425, + "step": 1530 + }, + { + "epoch": 5.86, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.828774631023407, + "eval_runtime": 73.4716, + "eval_samples_per_second": 2.368, + "eval_steps_per_second": 0.299, + "step": 1530 + }, + { + "epoch": 5.88, + "learning_rate": 0.00012178799489144317, + "loss": 0.5288, + "step": 1535 + }, + { + "epoch": 5.88, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.1222902536392212, + "eval_runtime": 72.267, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.304, + "step": 1535 + }, + { + "epoch": 5.9, + "learning_rate": 0.00012153256704980845, + "loss": 0.7604, + "step": 1540 + }, + { + "epoch": 5.9, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 1.065313458442688, + "eval_runtime": 71.7733, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 1540 + }, + { + "epoch": 5.92, + "learning_rate": 0.0001212771392081737, + "loss": 0.4761, + "step": 1545 + }, + { + "epoch": 5.92, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.9595795273780823, + "eval_runtime": 72.1868, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 1545 + }, + { + "epoch": 5.94, + "learning_rate": 0.00012102171136653895, + "loss": 0.7556, + "step": 1550 + }, + { + "epoch": 5.94, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.9294580817222595, + "eval_runtime": 71.6133, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 1550 + }, + { + "epoch": 5.96, + "learning_rate": 0.00012076628352490423, + "loss": 0.7834, + "step": 1555 + }, + { + "epoch": 5.96, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9481978416442871, + "eval_runtime": 72.222, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 1555 + }, + { + "epoch": 5.98, + "learning_rate": 0.00012051085568326948, + "loss": 0.8625, + "step": 1560 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.9149760007858276, + "eval_runtime": 71.6268, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 1560 + }, + { + "epoch": 6.0, + "learning_rate": 0.00012025542784163475, + "loss": 0.4823, + "step": 1565 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 0.8527363538742065, + "eval_runtime": 72.4135, + "eval_samples_per_second": 2.403, + "eval_steps_per_second": 0.304, + "step": 1565 + }, + { + "epoch": 6.02, + "learning_rate": 0.00012, + "loss": 0.6611, + "step": 1570 + }, + { + "epoch": 6.02, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.1094207763671875, + "eval_runtime": 71.6394, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 1570 + }, + { + "epoch": 6.03, + "learning_rate": 0.00011974457215836528, + "loss": 0.4618, + "step": 1575 + }, + { + "epoch": 6.03, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.9192668199539185, + "eval_runtime": 74.1876, + "eval_samples_per_second": 2.345, + "eval_steps_per_second": 0.297, + "step": 1575 + }, + { + "epoch": 6.05, + "learning_rate": 0.00011948914431673053, + "loss": 0.3332, + "step": 1580 + }, + { + "epoch": 6.05, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 0.8720999360084534, + "eval_runtime": 73.8274, + "eval_samples_per_second": 2.357, + "eval_steps_per_second": 0.298, + "step": 1580 + }, + { + "epoch": 6.07, + "learning_rate": 0.00011923371647509578, + "loss": 0.4447, + "step": 1585 + }, + { + "epoch": 6.07, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 0.8002211451530457, + "eval_runtime": 74.081, + "eval_samples_per_second": 2.349, + "eval_steps_per_second": 0.297, + "step": 1585 + }, + { + "epoch": 6.09, + "learning_rate": 0.00011897828863346106, + "loss": 0.4332, + "step": 1590 + }, + { + "epoch": 6.09, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 0.8471765518188477, + "eval_runtime": 71.7734, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 1590 + }, + { + "epoch": 6.11, + "learning_rate": 0.0001187228607918263, + "loss": 0.3504, + "step": 1595 + }, + { + "epoch": 6.11, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.8792366981506348, + "eval_runtime": 72.3203, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 1595 + }, + { + "epoch": 6.13, + "learning_rate": 0.00011846743295019158, + "loss": 0.1991, + "step": 1600 + }, + { + "epoch": 6.13, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.9491644501686096, + "eval_runtime": 71.7555, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 1600 + }, + { + "epoch": 6.15, + "learning_rate": 0.00011821200510855683, + "loss": 0.2901, + "step": 1605 + }, + { + "epoch": 6.15, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.0009914636611938, + "eval_runtime": 72.2758, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1605 + }, + { + "epoch": 6.17, + "learning_rate": 0.00011795657726692211, + "loss": 0.6851, + "step": 1610 + }, + { + "epoch": 6.17, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.0788379907608032, + "eval_runtime": 71.7844, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 1610 + }, + { + "epoch": 6.19, + "learning_rate": 0.00011770114942528736, + "loss": 0.5596, + "step": 1615 + }, + { + "epoch": 6.19, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.04649019241333, + "eval_runtime": 72.2936, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1615 + }, + { + "epoch": 6.21, + "learning_rate": 0.00011744572158365264, + "loss": 0.4059, + "step": 1620 + }, + { + "epoch": 6.21, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 0.9852347373962402, + "eval_runtime": 71.7201, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 1620 + }, + { + "epoch": 6.23, + "learning_rate": 0.00011719029374201789, + "loss": 0.4061, + "step": 1625 + }, + { + "epoch": 6.23, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.0554276704788208, + "eval_runtime": 73.1205, + "eval_samples_per_second": 2.38, + "eval_steps_per_second": 0.301, + "step": 1625 + }, + { + "epoch": 6.25, + "learning_rate": 0.00011693486590038314, + "loss": 0.2499, + "step": 1630 + }, + { + "epoch": 6.25, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.0310966968536377, + "eval_runtime": 72.0535, + "eval_samples_per_second": 2.415, + "eval_steps_per_second": 0.305, + "step": 1630 + }, + { + "epoch": 6.26, + "learning_rate": 0.00011667943805874841, + "loss": 0.3739, + "step": 1635 + }, + { + "epoch": 6.26, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 0.8703017234802246, + "eval_runtime": 74.24, + "eval_samples_per_second": 2.344, + "eval_steps_per_second": 0.296, + "step": 1635 + }, + { + "epoch": 6.28, + "learning_rate": 0.00011642401021711366, + "loss": 0.4323, + "step": 1640 + }, + { + "epoch": 6.28, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.0342254638671875, + "eval_runtime": 71.7608, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 1640 + }, + { + "epoch": 6.3, + "learning_rate": 0.00011616858237547894, + "loss": 0.417, + "step": 1645 + }, + { + "epoch": 6.3, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.004945993423462, + "eval_runtime": 72.2243, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 1645 + }, + { + "epoch": 6.32, + "learning_rate": 0.00011591315453384419, + "loss": 0.4087, + "step": 1650 + }, + { + "epoch": 6.32, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.0639106035232544, + "eval_runtime": 71.646, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 1650 + }, + { + "epoch": 6.34, + "learning_rate": 0.00011565772669220947, + "loss": 0.3422, + "step": 1655 + }, + { + "epoch": 6.34, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.1308996677398682, + "eval_runtime": 73.9785, + "eval_samples_per_second": 2.352, + "eval_steps_per_second": 0.297, + "step": 1655 + }, + { + "epoch": 6.36, + "learning_rate": 0.00011540229885057472, + "loss": 0.4619, + "step": 1660 + }, + { + "epoch": 6.36, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.0824979543685913, + "eval_runtime": 71.7053, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 1660 + }, + { + "epoch": 6.38, + "learning_rate": 0.00011514687100893997, + "loss": 0.4767, + "step": 1665 + }, + { + "epoch": 6.38, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.046342134475708, + "eval_runtime": 72.3509, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 1665 + }, + { + "epoch": 6.4, + "learning_rate": 0.00011489144316730524, + "loss": 0.4737, + "step": 1670 + }, + { + "epoch": 6.4, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 0.9138516783714294, + "eval_runtime": 71.5867, + "eval_samples_per_second": 2.431, + "eval_steps_per_second": 0.307, + "step": 1670 + }, + { + "epoch": 6.42, + "learning_rate": 0.00011463601532567049, + "loss": 0.2711, + "step": 1675 + }, + { + "epoch": 6.42, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 0.9746137857437134, + "eval_runtime": 72.2002, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 1675 + }, + { + "epoch": 6.44, + "learning_rate": 0.00011438058748403577, + "loss": 0.3956, + "step": 1680 + }, + { + "epoch": 6.44, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.1271107196807861, + "eval_runtime": 71.5339, + "eval_samples_per_second": 2.432, + "eval_steps_per_second": 0.308, + "step": 1680 + }, + { + "epoch": 6.46, + "learning_rate": 0.00011412515964240102, + "loss": 0.5138, + "step": 1685 + }, + { + "epoch": 6.46, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.204206109046936, + "eval_runtime": 73.8943, + "eval_samples_per_second": 2.355, + "eval_steps_per_second": 0.298, + "step": 1685 + }, + { + "epoch": 6.48, + "learning_rate": 0.0001138697318007663, + "loss": 0.3022, + "step": 1690 + }, + { + "epoch": 6.48, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 1.4267605543136597, + "eval_runtime": 73.7212, + "eval_samples_per_second": 2.36, + "eval_steps_per_second": 0.298, + "step": 1690 + }, + { + "epoch": 6.49, + "learning_rate": 0.00011361430395913155, + "loss": 0.4592, + "step": 1695 + }, + { + "epoch": 6.49, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.6285024881362915, + "eval_runtime": 72.3063, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 1695 + }, + { + "epoch": 6.51, + "learning_rate": 0.00011335887611749681, + "loss": 0.7306, + "step": 1700 + }, + { + "epoch": 6.51, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.4132676124572754, + "eval_runtime": 73.4541, + "eval_samples_per_second": 2.369, + "eval_steps_per_second": 0.3, + "step": 1700 + }, + { + "epoch": 6.53, + "learning_rate": 0.00011310344827586207, + "loss": 0.2819, + "step": 1705 + }, + { + "epoch": 6.53, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.0855623483657837, + "eval_runtime": 72.8007, + "eval_samples_per_second": 2.39, + "eval_steps_per_second": 0.302, + "step": 1705 + }, + { + "epoch": 6.55, + "learning_rate": 0.00011284802043422734, + "loss": 0.3341, + "step": 1710 + }, + { + "epoch": 6.55, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 1.190242052078247, + "eval_runtime": 73.4674, + "eval_samples_per_second": 2.368, + "eval_steps_per_second": 0.299, + "step": 1710 + }, + { + "epoch": 6.57, + "learning_rate": 0.0001125925925925926, + "loss": 0.2632, + "step": 1715 + }, + { + "epoch": 6.57, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.2406810522079468, + "eval_runtime": 72.2134, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 1715 + }, + { + "epoch": 6.59, + "learning_rate": 0.00011233716475095786, + "loss": 0.3776, + "step": 1720 + }, + { + "epoch": 6.59, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.3052959442138672, + "eval_runtime": 71.6934, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 1720 + }, + { + "epoch": 6.61, + "learning_rate": 0.00011208173690932313, + "loss": 0.4002, + "step": 1725 + }, + { + "epoch": 6.61, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.335128903388977, + "eval_runtime": 72.1244, + "eval_samples_per_second": 2.412, + "eval_steps_per_second": 0.305, + "step": 1725 + }, + { + "epoch": 6.63, + "learning_rate": 0.00011182630906768839, + "loss": 0.4399, + "step": 1730 + }, + { + "epoch": 6.63, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.5893123149871826, + "eval_runtime": 71.5155, + "eval_samples_per_second": 2.433, + "eval_steps_per_second": 0.308, + "step": 1730 + }, + { + "epoch": 6.65, + "learning_rate": 0.00011157088122605364, + "loss": 0.7733, + "step": 1735 + }, + { + "epoch": 6.65, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 1.5668152570724487, + "eval_runtime": 72.1994, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 1735 + }, + { + "epoch": 6.67, + "learning_rate": 0.00011131545338441892, + "loss": 0.5065, + "step": 1740 + }, + { + "epoch": 6.67, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.554033637046814, + "eval_runtime": 72.0935, + "eval_samples_per_second": 2.414, + "eval_steps_per_second": 0.305, + "step": 1740 + }, + { + "epoch": 6.69, + "learning_rate": 0.00011106002554278417, + "loss": 0.8585, + "step": 1745 + }, + { + "epoch": 6.69, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.5956915616989136, + "eval_runtime": 72.2252, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 1745 + }, + { + "epoch": 6.7, + "learning_rate": 0.00011080459770114944, + "loss": 0.798, + "step": 1750 + }, + { + "epoch": 6.7, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.2651970386505127, + "eval_runtime": 71.7377, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 1750 + }, + { + "epoch": 6.72, + "learning_rate": 0.00011054916985951469, + "loss": 0.6362, + "step": 1755 + }, + { + "epoch": 6.72, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.0616276264190674, + "eval_runtime": 72.227, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 1755 + }, + { + "epoch": 6.74, + "learning_rate": 0.00011029374201787997, + "loss": 0.6891, + "step": 1760 + }, + { + "epoch": 6.74, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.1532150506973267, + "eval_runtime": 73.3739, + "eval_samples_per_second": 2.371, + "eval_steps_per_second": 0.3, + "step": 1760 + }, + { + "epoch": 6.76, + "learning_rate": 0.00011003831417624522, + "loss": 0.4885, + "step": 1765 + }, + { + "epoch": 6.76, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 0.964917778968811, + "eval_runtime": 72.3603, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 1765 + }, + { + "epoch": 6.78, + "learning_rate": 0.00010978288633461047, + "loss": 0.2212, + "step": 1770 + }, + { + "epoch": 6.78, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 0.8999590873718262, + "eval_runtime": 71.6665, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 1770 + }, + { + "epoch": 6.8, + "learning_rate": 0.00010952745849297575, + "loss": 0.3838, + "step": 1775 + }, + { + "epoch": 6.8, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 0.9244369268417358, + "eval_runtime": 72.3203, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 1775 + }, + { + "epoch": 6.82, + "learning_rate": 0.000109272030651341, + "loss": 0.4909, + "step": 1780 + }, + { + "epoch": 6.82, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.8321231603622437, + "eval_runtime": 73.7474, + "eval_samples_per_second": 2.359, + "eval_steps_per_second": 0.298, + "step": 1780 + }, + { + "epoch": 6.84, + "learning_rate": 0.00010901660280970627, + "loss": 0.3924, + "step": 1785 + }, + { + "epoch": 6.84, + "eval_accuracy": 0.764367816091954, + "eval_loss": 0.7822464108467102, + "eval_runtime": 72.3997, + "eval_samples_per_second": 2.403, + "eval_steps_per_second": 0.304, + "step": 1785 + }, + { + "epoch": 6.86, + "learning_rate": 0.00010876117496807152, + "loss": 0.5337, + "step": 1790 + }, + { + "epoch": 6.86, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 0.8443244099617004, + "eval_runtime": 71.6807, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 1790 + }, + { + "epoch": 6.88, + "learning_rate": 0.0001085057471264368, + "loss": 0.2258, + "step": 1795 + }, + { + "epoch": 6.88, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.8795732259750366, + "eval_runtime": 72.2803, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 1795 + }, + { + "epoch": 6.9, + "learning_rate": 0.00010825031928480205, + "loss": 0.496, + "step": 1800 + }, + { + "epoch": 6.9, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.9166645407676697, + "eval_runtime": 73.3739, + "eval_samples_per_second": 2.371, + "eval_steps_per_second": 0.3, + "step": 1800 + }, + { + "epoch": 6.92, + "learning_rate": 0.0001079948914431673, + "loss": 0.6166, + "step": 1805 + }, + { + "epoch": 6.92, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 0.969086766242981, + "eval_runtime": 72.2001, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 1805 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010773946360153258, + "loss": 0.1913, + "step": 1810 + }, + { + "epoch": 6.93, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 0.9656856060028076, + "eval_runtime": 73.6808, + "eval_samples_per_second": 2.362, + "eval_steps_per_second": 0.299, + "step": 1810 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010748403575989783, + "loss": 0.5242, + "step": 1815 + }, + { + "epoch": 6.95, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.2142490148544312, + "eval_runtime": 72.4203, + "eval_samples_per_second": 2.403, + "eval_steps_per_second": 0.304, + "step": 1815 + }, + { + "epoch": 6.97, + "learning_rate": 0.0001072286079182631, + "loss": 0.8631, + "step": 1820 + }, + { + "epoch": 6.97, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.0861307382583618, + "eval_runtime": 71.8136, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 1820 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010697318007662835, + "loss": 0.1719, + "step": 1825 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.0733994245529175, + "eval_runtime": 72.1759, + "eval_samples_per_second": 2.411, + "eval_steps_per_second": 0.305, + "step": 1825 + }, + { + "epoch": 7.01, + "learning_rate": 0.00010671775223499363, + "loss": 0.3511, + "step": 1830 + }, + { + "epoch": 7.01, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 0.9313093423843384, + "eval_runtime": 71.8799, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 1830 + }, + { + "epoch": 7.03, + "learning_rate": 0.00010646232439335888, + "loss": 0.314, + "step": 1835 + }, + { + "epoch": 7.03, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 0.9551235437393188, + "eval_runtime": 75.0945, + "eval_samples_per_second": 2.317, + "eval_steps_per_second": 0.293, + "step": 1835 + }, + { + "epoch": 7.05, + "learning_rate": 0.00010620689655172413, + "loss": 0.1106, + "step": 1840 + }, + { + "epoch": 7.05, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 0.9258528351783752, + "eval_runtime": 74.9492, + "eval_samples_per_second": 2.322, + "eval_steps_per_second": 0.294, + "step": 1840 + }, + { + "epoch": 7.07, + "learning_rate": 0.0001059514687100894, + "loss": 0.0608, + "step": 1845 + }, + { + "epoch": 7.07, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 0.960231363773346, + "eval_runtime": 74.015, + "eval_samples_per_second": 2.351, + "eval_steps_per_second": 0.297, + "step": 1845 + }, + { + "epoch": 7.09, + "learning_rate": 0.00010569604086845466, + "loss": 0.1119, + "step": 1850 + }, + { + "epoch": 7.09, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.0473183393478394, + "eval_runtime": 75.0678, + "eval_samples_per_second": 2.318, + "eval_steps_per_second": 0.293, + "step": 1850 + }, + { + "epoch": 7.11, + "learning_rate": 0.00010544061302681993, + "loss": 0.153, + "step": 1855 + }, + { + "epoch": 7.11, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.066941499710083, + "eval_runtime": 73.2783, + "eval_samples_per_second": 2.375, + "eval_steps_per_second": 0.3, + "step": 1855 + }, + { + "epoch": 7.13, + "learning_rate": 0.00010518518518518518, + "loss": 0.0751, + "step": 1860 + }, + { + "epoch": 7.13, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.1262329816818237, + "eval_runtime": 73.4273, + "eval_samples_per_second": 2.37, + "eval_steps_per_second": 0.3, + "step": 1860 + }, + { + "epoch": 7.15, + "learning_rate": 0.00010492975734355046, + "loss": 0.0407, + "step": 1865 + }, + { + "epoch": 7.15, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.2633436918258667, + "eval_runtime": 75.5481, + "eval_samples_per_second": 2.303, + "eval_steps_per_second": 0.291, + "step": 1865 + }, + { + "epoch": 7.16, + "learning_rate": 0.00010467432950191571, + "loss": 0.0824, + "step": 1870 + }, + { + "epoch": 7.16, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.3817542791366577, + "eval_runtime": 75.1612, + "eval_samples_per_second": 2.315, + "eval_steps_per_second": 0.293, + "step": 1870 + }, + { + "epoch": 7.18, + "learning_rate": 0.00010441890166028096, + "loss": 0.6322, + "step": 1875 + }, + { + "epoch": 7.18, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.281891942024231, + "eval_runtime": 76.8685, + "eval_samples_per_second": 2.264, + "eval_steps_per_second": 0.286, + "step": 1875 + }, + { + "epoch": 7.2, + "learning_rate": 0.00010416347381864624, + "loss": 0.1277, + "step": 1880 + }, + { + "epoch": 7.2, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.1478445529937744, + "eval_runtime": 72.8672, + "eval_samples_per_second": 2.388, + "eval_steps_per_second": 0.302, + "step": 1880 + }, + { + "epoch": 7.22, + "learning_rate": 0.00010390804597701149, + "loss": 0.1752, + "step": 1885 + }, + { + "epoch": 7.22, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.2275943756103516, + "eval_runtime": 74.0318, + "eval_samples_per_second": 2.35, + "eval_steps_per_second": 0.297, + "step": 1885 + }, + { + "epoch": 7.24, + "learning_rate": 0.00010365261813537676, + "loss": 0.4279, + "step": 1890 + }, + { + "epoch": 7.24, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.32535719871521, + "eval_runtime": 73.7203, + "eval_samples_per_second": 2.36, + "eval_steps_per_second": 0.298, + "step": 1890 + }, + { + "epoch": 7.26, + "learning_rate": 0.00010339719029374201, + "loss": 0.1829, + "step": 1895 + }, + { + "epoch": 7.26, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.1822688579559326, + "eval_runtime": 73.8624, + "eval_samples_per_second": 2.356, + "eval_steps_per_second": 0.298, + "step": 1895 + }, + { + "epoch": 7.28, + "learning_rate": 0.00010314176245210729, + "loss": 0.7235, + "step": 1900 + }, + { + "epoch": 7.28, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.2413686513900757, + "eval_runtime": 73.5875, + "eval_samples_per_second": 2.365, + "eval_steps_per_second": 0.299, + "step": 1900 + }, + { + "epoch": 7.3, + "learning_rate": 0.00010288633461047254, + "loss": 0.1977, + "step": 1905 + }, + { + "epoch": 7.3, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.193420648574829, + "eval_runtime": 75.4148, + "eval_samples_per_second": 2.307, + "eval_steps_per_second": 0.292, + "step": 1905 + }, + { + "epoch": 7.32, + "learning_rate": 0.0001026309067688378, + "loss": 0.2008, + "step": 1910 + }, + { + "epoch": 7.32, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.1319164037704468, + "eval_runtime": 73.4679, + "eval_samples_per_second": 2.368, + "eval_steps_per_second": 0.299, + "step": 1910 + }, + { + "epoch": 7.34, + "learning_rate": 0.00010237547892720307, + "loss": 0.3229, + "step": 1915 + }, + { + "epoch": 7.34, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.2466601133346558, + "eval_runtime": 74.0142, + "eval_samples_per_second": 2.351, + "eval_steps_per_second": 0.297, + "step": 1915 + }, + { + "epoch": 7.36, + "learning_rate": 0.00010212005108556833, + "loss": 0.2794, + "step": 1920 + }, + { + "epoch": 7.36, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.3975579738616943, + "eval_runtime": 73.5307, + "eval_samples_per_second": 2.366, + "eval_steps_per_second": 0.299, + "step": 1920 + }, + { + "epoch": 7.38, + "learning_rate": 0.00010186462324393359, + "loss": 0.6104, + "step": 1925 + }, + { + "epoch": 7.38, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.4495618343353271, + "eval_runtime": 75.3871, + "eval_samples_per_second": 2.308, + "eval_steps_per_second": 0.292, + "step": 1925 + }, + { + "epoch": 7.39, + "learning_rate": 0.00010160919540229886, + "loss": 0.3981, + "step": 1930 + }, + { + "epoch": 7.39, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.4250156879425049, + "eval_runtime": 75.0034, + "eval_samples_per_second": 2.32, + "eval_steps_per_second": 0.293, + "step": 1930 + }, + { + "epoch": 7.41, + "learning_rate": 0.00010135376756066412, + "loss": 0.4133, + "step": 1935 + }, + { + "epoch": 7.41, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.3902997970581055, + "eval_runtime": 74.0941, + "eval_samples_per_second": 2.348, + "eval_steps_per_second": 0.297, + "step": 1935 + }, + { + "epoch": 7.43, + "learning_rate": 0.00010109833971902938, + "loss": 0.2211, + "step": 1940 + }, + { + "epoch": 7.43, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.2912284135818481, + "eval_runtime": 75.5221, + "eval_samples_per_second": 2.304, + "eval_steps_per_second": 0.291, + "step": 1940 + }, + { + "epoch": 7.45, + "learning_rate": 0.00010084291187739463, + "loss": 0.5127, + "step": 1945 + }, + { + "epoch": 7.45, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.1819477081298828, + "eval_runtime": 76.0017, + "eval_samples_per_second": 2.289, + "eval_steps_per_second": 0.289, + "step": 1945 + }, + { + "epoch": 7.47, + "learning_rate": 0.00010058748403575991, + "loss": 0.4466, + "step": 1950 + }, + { + "epoch": 7.47, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.1182750463485718, + "eval_runtime": 73.33, + "eval_samples_per_second": 2.373, + "eval_steps_per_second": 0.3, + "step": 1950 + }, + { + "epoch": 7.49, + "learning_rate": 0.00010033205619412516, + "loss": 0.3296, + "step": 1955 + }, + { + "epoch": 7.49, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.1901663541793823, + "eval_runtime": 74.5747, + "eval_samples_per_second": 2.333, + "eval_steps_per_second": 0.295, + "step": 1955 + }, + { + "epoch": 7.51, + "learning_rate": 0.00010007662835249044, + "loss": 0.2157, + "step": 1960 + }, + { + "epoch": 7.51, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.127490520477295, + "eval_runtime": 71.6666, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 1960 + }, + { + "epoch": 7.53, + "learning_rate": 9.982120051085569e-05, + "loss": 0.1349, + "step": 1965 + }, + { + "epoch": 7.53, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.1302645206451416, + "eval_runtime": 72.3204, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 1965 + }, + { + "epoch": 7.55, + "learning_rate": 9.956577266922095e-05, + "loss": 0.2552, + "step": 1970 + }, + { + "epoch": 7.55, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.1957188844680786, + "eval_runtime": 71.7737, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 1970 + }, + { + "epoch": 7.57, + "learning_rate": 9.931034482758621e-05, + "loss": 0.0794, + "step": 1975 + }, + { + "epoch": 7.57, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.1891249418258667, + "eval_runtime": 72.3104, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 1975 + }, + { + "epoch": 7.59, + "learning_rate": 9.905491698595148e-05, + "loss": 0.3412, + "step": 1980 + }, + { + "epoch": 7.59, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.1822692155838013, + "eval_runtime": 71.8274, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 1980 + }, + { + "epoch": 7.61, + "learning_rate": 9.879948914431674e-05, + "loss": 0.066, + "step": 1985 + }, + { + "epoch": 7.61, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.2583097219467163, + "eval_runtime": 72.2537, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.304, + "step": 1985 + }, + { + "epoch": 7.62, + "learning_rate": 9.8544061302682e-05, + "loss": 0.1778, + "step": 1990 + }, + { + "epoch": 7.62, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.46968412399292, + "eval_runtime": 71.9868, + "eval_samples_per_second": 2.417, + "eval_steps_per_second": 0.306, + "step": 1990 + }, + { + "epoch": 7.64, + "learning_rate": 9.828863346104727e-05, + "loss": 0.2946, + "step": 1995 + }, + { + "epoch": 7.64, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.4956047534942627, + "eval_runtime": 72.3605, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 1995 + }, + { + "epoch": 7.66, + "learning_rate": 9.803320561941252e-05, + "loss": 0.5025, + "step": 2000 + }, + { + "epoch": 7.66, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.2947438955307007, + "eval_runtime": 73.1739, + "eval_samples_per_second": 2.378, + "eval_steps_per_second": 0.301, + "step": 2000 + }, + { + "epoch": 7.68, + "learning_rate": 9.777777777777778e-05, + "loss": 0.1465, + "step": 2005 + }, + { + "epoch": 7.68, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.4249370098114014, + "eval_runtime": 72.9688, + "eval_samples_per_second": 2.385, + "eval_steps_per_second": 0.301, + "step": 2005 + }, + { + "epoch": 7.7, + "learning_rate": 9.752234993614304e-05, + "loss": 0.4608, + "step": 2010 + }, + { + "epoch": 7.7, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.4163832664489746, + "eval_runtime": 71.6967, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2010 + }, + { + "epoch": 7.72, + "learning_rate": 9.72669220945083e-05, + "loss": 0.0132, + "step": 2015 + }, + { + "epoch": 7.72, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.4231857061386108, + "eval_runtime": 72.3309, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 2015 + }, + { + "epoch": 7.74, + "learning_rate": 9.701149425287357e-05, + "loss": 0.3514, + "step": 2020 + }, + { + "epoch": 7.74, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.3790318965911865, + "eval_runtime": 71.6757, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 2020 + }, + { + "epoch": 7.76, + "learning_rate": 9.675606641123883e-05, + "loss": 0.1211, + "step": 2025 + }, + { + "epoch": 7.76, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.3964256048202515, + "eval_runtime": 72.213, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 2025 + }, + { + "epoch": 7.78, + "learning_rate": 9.65006385696041e-05, + "loss": 0.2947, + "step": 2030 + }, + { + "epoch": 7.78, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.3286200761795044, + "eval_runtime": 71.7334, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2030 + }, + { + "epoch": 7.8, + "learning_rate": 9.624521072796935e-05, + "loss": 0.49, + "step": 2035 + }, + { + "epoch": 7.8, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.0810390710830688, + "eval_runtime": 72.3336, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 2035 + }, + { + "epoch": 7.82, + "learning_rate": 9.598978288633461e-05, + "loss": 0.165, + "step": 2040 + }, + { + "epoch": 7.82, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.024723768234253, + "eval_runtime": 71.7199, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2040 + }, + { + "epoch": 7.84, + "learning_rate": 9.573435504469987e-05, + "loss": 0.138, + "step": 2045 + }, + { + "epoch": 7.84, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.224605679512024, + "eval_runtime": 72.3204, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 2045 + }, + { + "epoch": 7.85, + "learning_rate": 9.547892720306514e-05, + "loss": 0.2485, + "step": 2050 + }, + { + "epoch": 7.85, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.214393138885498, + "eval_runtime": 71.5732, + "eval_samples_per_second": 2.431, + "eval_steps_per_second": 0.307, + "step": 2050 + }, + { + "epoch": 7.87, + "learning_rate": 9.52234993614304e-05, + "loss": 0.2188, + "step": 2055 + }, + { + "epoch": 7.87, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.2269409894943237, + "eval_runtime": 73.9874, + "eval_samples_per_second": 2.352, + "eval_steps_per_second": 0.297, + "step": 2055 + }, + { + "epoch": 7.89, + "learning_rate": 9.496807151979566e-05, + "loss": 0.0995, + "step": 2060 + }, + { + "epoch": 7.89, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.3358938694000244, + "eval_runtime": 73.7608, + "eval_samples_per_second": 2.359, + "eval_steps_per_second": 0.298, + "step": 2060 + }, + { + "epoch": 7.91, + "learning_rate": 9.471264367816093e-05, + "loss": 0.4366, + "step": 2065 + }, + { + "epoch": 7.91, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.3700920343399048, + "eval_runtime": 72.2954, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 2065 + }, + { + "epoch": 7.93, + "learning_rate": 9.445721583652618e-05, + "loss": 0.2468, + "step": 2070 + }, + { + "epoch": 7.93, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.5623064041137695, + "eval_runtime": 71.8518, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 2070 + }, + { + "epoch": 7.95, + "learning_rate": 9.420178799489144e-05, + "loss": 0.5595, + "step": 2075 + }, + { + "epoch": 7.95, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.3248261213302612, + "eval_runtime": 72.2402, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 2075 + }, + { + "epoch": 7.97, + "learning_rate": 9.39463601532567e-05, + "loss": 0.1288, + "step": 2080 + }, + { + "epoch": 7.97, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.1915298700332642, + "eval_runtime": 73.574, + "eval_samples_per_second": 2.365, + "eval_steps_per_second": 0.299, + "step": 2080 + }, + { + "epoch": 7.99, + "learning_rate": 9.369093231162197e-05, + "loss": 0.6715, + "step": 2085 + }, + { + "epoch": 7.99, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.2690212726593018, + "eval_runtime": 72.3729, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 2085 + }, + { + "epoch": 8.01, + "learning_rate": 9.343550446998723e-05, + "loss": 0.2428, + "step": 2090 + }, + { + "epoch": 8.01, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.2000457048416138, + "eval_runtime": 73.1206, + "eval_samples_per_second": 2.38, + "eval_steps_per_second": 0.301, + "step": 2090 + }, + { + "epoch": 8.03, + "learning_rate": 9.318007662835249e-05, + "loss": 0.1369, + "step": 2095 + }, + { + "epoch": 8.03, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.337141752243042, + "eval_runtime": 72.28, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 2095 + }, + { + "epoch": 8.05, + "learning_rate": 9.292464878671776e-05, + "loss": 0.0289, + "step": 2100 + }, + { + "epoch": 8.05, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.6684503555297852, + "eval_runtime": 71.8209, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 2100 + }, + { + "epoch": 8.07, + "learning_rate": 9.266922094508302e-05, + "loss": 0.2577, + "step": 2105 + }, + { + "epoch": 8.07, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.607564091682434, + "eval_runtime": 74.3746, + "eval_samples_per_second": 2.34, + "eval_steps_per_second": 0.296, + "step": 2105 + }, + { + "epoch": 8.08, + "learning_rate": 9.241379310344827e-05, + "loss": 0.1756, + "step": 2110 + }, + { + "epoch": 8.08, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.4137848615646362, + "eval_runtime": 71.6266, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 2110 + }, + { + "epoch": 8.1, + "learning_rate": 9.215836526181353e-05, + "loss": 0.1628, + "step": 2115 + }, + { + "epoch": 8.1, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.380110502243042, + "eval_runtime": 74.2276, + "eval_samples_per_second": 2.344, + "eval_steps_per_second": 0.296, + "step": 2115 + }, + { + "epoch": 8.12, + "learning_rate": 9.19029374201788e-05, + "loss": 0.0913, + "step": 2120 + }, + { + "epoch": 8.12, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.3470419645309448, + "eval_runtime": 73.3072, + "eval_samples_per_second": 2.374, + "eval_steps_per_second": 0.3, + "step": 2120 + }, + { + "epoch": 8.14, + "learning_rate": 9.164750957854406e-05, + "loss": 0.0892, + "step": 2125 + }, + { + "epoch": 8.14, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.4106884002685547, + "eval_runtime": 73.9475, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.298, + "step": 2125 + }, + { + "epoch": 8.16, + "learning_rate": 9.139208173690932e-05, + "loss": 0.3874, + "step": 2130 + }, + { + "epoch": 8.16, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.3398877382278442, + "eval_runtime": 71.8799, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 2130 + }, + { + "epoch": 8.18, + "learning_rate": 9.113665389527459e-05, + "loss": 0.1405, + "step": 2135 + }, + { + "epoch": 8.18, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.3665359020233154, + "eval_runtime": 74.1921, + "eval_samples_per_second": 2.345, + "eval_steps_per_second": 0.297, + "step": 2135 + }, + { + "epoch": 8.2, + "learning_rate": 9.088122605363985e-05, + "loss": 0.0921, + "step": 2140 + }, + { + "epoch": 8.2, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.4718743562698364, + "eval_runtime": 71.6672, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 2140 + }, + { + "epoch": 8.22, + "learning_rate": 9.062579821200511e-05, + "loss": 0.1429, + "step": 2145 + }, + { + "epoch": 8.22, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.484999179840088, + "eval_runtime": 72.347, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 2145 + }, + { + "epoch": 8.24, + "learning_rate": 9.037037037037038e-05, + "loss": 0.1498, + "step": 2150 + }, + { + "epoch": 8.24, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.41448974609375, + "eval_runtime": 73.4407, + "eval_samples_per_second": 2.369, + "eval_steps_per_second": 0.3, + "step": 2150 + }, + { + "epoch": 8.26, + "learning_rate": 9.011494252873564e-05, + "loss": 0.004, + "step": 2155 + }, + { + "epoch": 8.26, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.3708915710449219, + "eval_runtime": 72.3246, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 2155 + }, + { + "epoch": 8.28, + "learning_rate": 8.98595146871009e-05, + "loss": 0.0364, + "step": 2160 + }, + { + "epoch": 8.28, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.3031058311462402, + "eval_runtime": 71.8044, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 2160 + }, + { + "epoch": 8.3, + "learning_rate": 8.960408684546617e-05, + "loss": 0.0544, + "step": 2165 + }, + { + "epoch": 8.3, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.41126549243927, + "eval_runtime": 74.1212, + "eval_samples_per_second": 2.348, + "eval_steps_per_second": 0.297, + "step": 2165 + }, + { + "epoch": 8.31, + "learning_rate": 8.934865900383143e-05, + "loss": 0.0952, + "step": 2170 + }, + { + "epoch": 8.31, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.532741904258728, + "eval_runtime": 71.6535, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 2170 + }, + { + "epoch": 8.33, + "learning_rate": 8.90932311621967e-05, + "loss": 0.0006, + "step": 2175 + }, + { + "epoch": 8.33, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.6938968896865845, + "eval_runtime": 72.2802, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 2175 + }, + { + "epoch": 8.35, + "learning_rate": 8.883780332056194e-05, + "loss": 0.2597, + "step": 2180 + }, + { + "epoch": 8.35, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.5596327781677246, + "eval_runtime": 71.7186, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2180 + }, + { + "epoch": 8.37, + "learning_rate": 8.85823754789272e-05, + "loss": 0.2526, + "step": 2185 + }, + { + "epoch": 8.37, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.5019148588180542, + "eval_runtime": 73.0896, + "eval_samples_per_second": 2.381, + "eval_steps_per_second": 0.301, + "step": 2185 + }, + { + "epoch": 8.39, + "learning_rate": 8.832694763729247e-05, + "loss": 0.102, + "step": 2190 + }, + { + "epoch": 8.39, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.612549901008606, + "eval_runtime": 71.7757, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 2190 + }, + { + "epoch": 8.41, + "learning_rate": 8.807151979565773e-05, + "loss": 0.02, + "step": 2195 + }, + { + "epoch": 8.41, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.7707464694976807, + "eval_runtime": 72.3245, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 2195 + }, + { + "epoch": 8.43, + "learning_rate": 8.7816091954023e-05, + "loss": 0.0062, + "step": 2200 + }, + { + "epoch": 8.43, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.5734792947769165, + "eval_runtime": 73.3522, + "eval_samples_per_second": 2.372, + "eval_steps_per_second": 0.3, + "step": 2200 + }, + { + "epoch": 8.45, + "learning_rate": 8.756066411238826e-05, + "loss": 0.0429, + "step": 2205 + }, + { + "epoch": 8.45, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.4174237251281738, + "eval_runtime": 72.276, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 2205 + }, + { + "epoch": 8.47, + "learning_rate": 8.730523627075352e-05, + "loss": 0.272, + "step": 2210 + }, + { + "epoch": 8.47, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.4774861335754395, + "eval_runtime": 72.0492, + "eval_samples_per_second": 2.415, + "eval_steps_per_second": 0.305, + "step": 2210 + }, + { + "epoch": 8.49, + "learning_rate": 8.704980842911877e-05, + "loss": 0.1014, + "step": 2215 + }, + { + "epoch": 8.49, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.556422472000122, + "eval_runtime": 73.9875, + "eval_samples_per_second": 2.352, + "eval_steps_per_second": 0.297, + "step": 2215 + }, + { + "epoch": 8.51, + "learning_rate": 8.679438058748404e-05, + "loss": 0.1191, + "step": 2220 + }, + { + "epoch": 8.51, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.6515696048736572, + "eval_runtime": 71.7867, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 2220 + }, + { + "epoch": 8.52, + "learning_rate": 8.65389527458493e-05, + "loss": 0.1466, + "step": 2225 + }, + { + "epoch": 8.52, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.5438907146453857, + "eval_runtime": 72.413, + "eval_samples_per_second": 2.403, + "eval_steps_per_second": 0.304, + "step": 2225 + }, + { + "epoch": 8.54, + "learning_rate": 8.628352490421456e-05, + "loss": 0.033, + "step": 2230 + }, + { + "epoch": 8.54, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.6586909294128418, + "eval_runtime": 71.5867, + "eval_samples_per_second": 2.431, + "eval_steps_per_second": 0.307, + "step": 2230 + }, + { + "epoch": 8.56, + "learning_rate": 8.602809706257983e-05, + "loss": 0.4028, + "step": 2235 + }, + { + "epoch": 8.56, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.5925533771514893, + "eval_runtime": 74.2608, + "eval_samples_per_second": 2.343, + "eval_steps_per_second": 0.296, + "step": 2235 + }, + { + "epoch": 8.58, + "learning_rate": 8.577266922094509e-05, + "loss": 0.29, + "step": 2240 + }, + { + "epoch": 8.58, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.4162274599075317, + "eval_runtime": 71.6928, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2240 + }, + { + "epoch": 8.6, + "learning_rate": 8.551724137931035e-05, + "loss": 0.082, + "step": 2245 + }, + { + "epoch": 8.6, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.4092352390289307, + "eval_runtime": 72.1601, + "eval_samples_per_second": 2.411, + "eval_steps_per_second": 0.305, + "step": 2245 + }, + { + "epoch": 8.62, + "learning_rate": 8.52618135376756e-05, + "loss": 0.0273, + "step": 2250 + }, + { + "epoch": 8.62, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.4052162170410156, + "eval_runtime": 71.6429, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 2250 + }, + { + "epoch": 8.64, + "learning_rate": 8.500638569604087e-05, + "loss": 0.2974, + "step": 2255 + }, + { + "epoch": 8.64, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.4225624799728394, + "eval_runtime": 72.3814, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 2255 + }, + { + "epoch": 8.66, + "learning_rate": 8.475095785440613e-05, + "loss": 0.7249, + "step": 2260 + }, + { + "epoch": 8.66, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.3933783769607544, + "eval_runtime": 71.7734, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 2260 + }, + { + "epoch": 8.68, + "learning_rate": 8.449553001277139e-05, + "loss": 0.1874, + "step": 2265 + }, + { + "epoch": 8.68, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.3755295276641846, + "eval_runtime": 73.9422, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.298, + "step": 2265 + }, + { + "epoch": 8.7, + "learning_rate": 8.424010217113666e-05, + "loss": 0.0365, + "step": 2270 + }, + { + "epoch": 8.7, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.3688724040985107, + "eval_runtime": 71.7961, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 2270 + }, + { + "epoch": 8.72, + "learning_rate": 8.398467432950192e-05, + "loss": 0.1775, + "step": 2275 + }, + { + "epoch": 8.72, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.3662774562835693, + "eval_runtime": 72.2725, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.304, + "step": 2275 + }, + { + "epoch": 8.74, + "learning_rate": 8.372924648786718e-05, + "loss": 0.0974, + "step": 2280 + }, + { + "epoch": 8.74, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.3852037191390991, + "eval_runtime": 71.6891, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2280 + }, + { + "epoch": 8.75, + "learning_rate": 8.347381864623243e-05, + "loss": 0.1109, + "step": 2285 + }, + { + "epoch": 8.75, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.4648981094360352, + "eval_runtime": 72.2936, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 2285 + }, + { + "epoch": 8.77, + "learning_rate": 8.32183908045977e-05, + "loss": 0.2778, + "step": 2290 + }, + { + "epoch": 8.77, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.435415267944336, + "eval_runtime": 71.7606, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 2290 + }, + { + "epoch": 8.79, + "learning_rate": 8.296296296296296e-05, + "loss": 0.0384, + "step": 2295 + }, + { + "epoch": 8.79, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.3910033702850342, + "eval_runtime": 72.3796, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 2295 + }, + { + "epoch": 8.81, + "learning_rate": 8.270753512132822e-05, + "loss": 0.1499, + "step": 2300 + }, + { + "epoch": 8.81, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.4266196489334106, + "eval_runtime": 71.6688, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 2300 + }, + { + "epoch": 8.83, + "learning_rate": 8.245210727969349e-05, + "loss": 0.0703, + "step": 2305 + }, + { + "epoch": 8.83, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.3794021606445312, + "eval_runtime": 73.9299, + "eval_samples_per_second": 2.354, + "eval_steps_per_second": 0.298, + "step": 2305 + }, + { + "epoch": 8.85, + "learning_rate": 8.219667943805875e-05, + "loss": 0.2238, + "step": 2310 + }, + { + "epoch": 8.85, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.4704707860946655, + "eval_runtime": 71.631, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 2310 + }, + { + "epoch": 8.87, + "learning_rate": 8.194125159642401e-05, + "loss": 0.2418, + "step": 2315 + }, + { + "epoch": 8.87, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.4477012157440186, + "eval_runtime": 73.9608, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.297, + "step": 2315 + }, + { + "epoch": 8.89, + "learning_rate": 8.168582375478928e-05, + "loss": 0.0854, + "step": 2320 + }, + { + "epoch": 8.89, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.453401803970337, + "eval_runtime": 71.72, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2320 + }, + { + "epoch": 8.91, + "learning_rate": 8.143039591315454e-05, + "loss": 0.1613, + "step": 2325 + }, + { + "epoch": 8.91, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.5824450254440308, + "eval_runtime": 72.4535, + "eval_samples_per_second": 2.402, + "eval_steps_per_second": 0.304, + "step": 2325 + }, + { + "epoch": 8.93, + "learning_rate": 8.11749680715198e-05, + "loss": 0.0599, + "step": 2330 + }, + { + "epoch": 8.93, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.5845459699630737, + "eval_runtime": 73.534, + "eval_samples_per_second": 2.366, + "eval_steps_per_second": 0.299, + "step": 2330 + }, + { + "epoch": 8.95, + "learning_rate": 8.091954022988507e-05, + "loss": 0.2216, + "step": 2335 + }, + { + "epoch": 8.95, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.5560506582260132, + "eval_runtime": 72.8271, + "eval_samples_per_second": 2.389, + "eval_steps_per_second": 0.302, + "step": 2335 + }, + { + "epoch": 8.97, + "learning_rate": 8.066411238825033e-05, + "loss": 0.023, + "step": 2340 + }, + { + "epoch": 8.97, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.5869792699813843, + "eval_runtime": 71.7467, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 2340 + }, + { + "epoch": 8.98, + "learning_rate": 8.04086845466156e-05, + "loss": 0.0166, + "step": 2345 + }, + { + "epoch": 8.98, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.6614141464233398, + "eval_runtime": 72.2937, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 2345 + }, + { + "epoch": 9.0, + "learning_rate": 8.015325670498086e-05, + "loss": 0.1114, + "step": 2350 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.5962401628494263, + "eval_runtime": 71.8444, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 2350 + }, + { + "epoch": 9.02, + "learning_rate": 7.989782886334612e-05, + "loss": 0.0254, + "step": 2355 + }, + { + "epoch": 9.02, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.578687310218811, + "eval_runtime": 73.975, + "eval_samples_per_second": 2.352, + "eval_steps_per_second": 0.297, + "step": 2355 + }, + { + "epoch": 9.04, + "learning_rate": 7.964240102171137e-05, + "loss": 0.0807, + "step": 2360 + }, + { + "epoch": 9.04, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.6112695932388306, + "eval_runtime": 73.1873, + "eval_samples_per_second": 2.377, + "eval_steps_per_second": 0.301, + "step": 2360 + }, + { + "epoch": 9.06, + "learning_rate": 7.938697318007663e-05, + "loss": 0.0151, + "step": 2365 + }, + { + "epoch": 9.06, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.6175737380981445, + "eval_runtime": 72.2137, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 2365 + }, + { + "epoch": 9.08, + "learning_rate": 7.91315453384419e-05, + "loss": 0.0234, + "step": 2370 + }, + { + "epoch": 9.08, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.6412409543991089, + "eval_runtime": 73.4407, + "eval_samples_per_second": 2.369, + "eval_steps_per_second": 0.3, + "step": 2370 + }, + { + "epoch": 9.1, + "learning_rate": 7.887611749680716e-05, + "loss": 0.0901, + "step": 2375 + }, + { + "epoch": 9.1, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.6331058740615845, + "eval_runtime": 72.4003, + "eval_samples_per_second": 2.403, + "eval_steps_per_second": 0.304, + "step": 2375 + }, + { + "epoch": 9.12, + "learning_rate": 7.862068965517242e-05, + "loss": 0.0032, + "step": 2380 + }, + { + "epoch": 9.12, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.7730814218521118, + "eval_runtime": 71.8133, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 2380 + }, + { + "epoch": 9.14, + "learning_rate": 7.836526181353769e-05, + "loss": 0.0657, + "step": 2385 + }, + { + "epoch": 9.14, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.7673834562301636, + "eval_runtime": 72.4937, + "eval_samples_per_second": 2.4, + "eval_steps_per_second": 0.303, + "step": 2385 + }, + { + "epoch": 9.16, + "learning_rate": 7.810983397190295e-05, + "loss": 0.0035, + "step": 2390 + }, + { + "epoch": 9.16, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.7309309244155884, + "eval_runtime": 71.574, + "eval_samples_per_second": 2.431, + "eval_steps_per_second": 0.307, + "step": 2390 + }, + { + "epoch": 9.18, + "learning_rate": 7.78544061302682e-05, + "loss": 0.019, + "step": 2395 + }, + { + "epoch": 9.18, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6792755126953125, + "eval_runtime": 74.3875, + "eval_samples_per_second": 2.339, + "eval_steps_per_second": 0.296, + "step": 2395 + }, + { + "epoch": 9.2, + "learning_rate": 7.759897828863346e-05, + "loss": 0.0038, + "step": 2400 + }, + { + "epoch": 9.2, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.7092453241348267, + "eval_runtime": 71.7278, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2400 + }, + { + "epoch": 9.21, + "learning_rate": 7.734355044699873e-05, + "loss": 0.0061, + "step": 2405 + }, + { + "epoch": 9.21, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.726812720298767, + "eval_runtime": 72.2916, + "eval_samples_per_second": 2.407, + "eval_steps_per_second": 0.304, + "step": 2405 + }, + { + "epoch": 9.23, + "learning_rate": 7.708812260536399e-05, + "loss": 0.0077, + "step": 2410 + }, + { + "epoch": 9.23, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6786487102508545, + "eval_runtime": 71.8439, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 2410 + }, + { + "epoch": 9.25, + "learning_rate": 7.683269476372925e-05, + "loss": 0.1325, + "step": 2415 + }, + { + "epoch": 9.25, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7274757623672485, + "eval_runtime": 72.374, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 2415 + }, + { + "epoch": 9.27, + "learning_rate": 7.657726692209452e-05, + "loss": 0.013, + "step": 2420 + }, + { + "epoch": 9.27, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.8993436098098755, + "eval_runtime": 73.5113, + "eval_samples_per_second": 2.367, + "eval_steps_per_second": 0.299, + "step": 2420 + }, + { + "epoch": 9.29, + "learning_rate": 7.632183908045978e-05, + "loss": 0.2623, + "step": 2425 + }, + { + "epoch": 9.29, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.9386109113693237, + "eval_runtime": 74.2008, + "eval_samples_per_second": 2.345, + "eval_steps_per_second": 0.296, + "step": 2425 + }, + { + "epoch": 9.31, + "learning_rate": 7.606641123882503e-05, + "loss": 0.1043, + "step": 2430 + }, + { + "epoch": 9.31, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7973520755767822, + "eval_runtime": 71.6127, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 2430 + }, + { + "epoch": 9.33, + "learning_rate": 7.581098339719029e-05, + "loss": 0.2192, + "step": 2435 + }, + { + "epoch": 9.33, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.622019648551941, + "eval_runtime": 73.9624, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.297, + "step": 2435 + }, + { + "epoch": 9.35, + "learning_rate": 7.555555555555556e-05, + "loss": 0.0019, + "step": 2440 + }, + { + "epoch": 9.35, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6389633417129517, + "eval_runtime": 71.6133, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 2440 + }, + { + "epoch": 9.37, + "learning_rate": 7.530012771392082e-05, + "loss": 0.0072, + "step": 2445 + }, + { + "epoch": 9.37, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.765933632850647, + "eval_runtime": 74.1608, + "eval_samples_per_second": 2.346, + "eval_steps_per_second": 0.297, + "step": 2445 + }, + { + "epoch": 9.39, + "learning_rate": 7.504469987228608e-05, + "loss": 0.1955, + "step": 2450 + }, + { + "epoch": 9.39, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.865466594696045, + "eval_runtime": 71.8, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 2450 + }, + { + "epoch": 9.41, + "learning_rate": 7.478927203065135e-05, + "loss": 0.0232, + "step": 2455 + }, + { + "epoch": 9.41, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.654495120048523, + "eval_runtime": 74.2804, + "eval_samples_per_second": 2.342, + "eval_steps_per_second": 0.296, + "step": 2455 + }, + { + "epoch": 9.43, + "learning_rate": 7.453384418901661e-05, + "loss": 0.0479, + "step": 2460 + }, + { + "epoch": 9.43, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.589800238609314, + "eval_runtime": 73.7881, + "eval_samples_per_second": 2.358, + "eval_steps_per_second": 0.298, + "step": 2460 + }, + { + "epoch": 9.44, + "learning_rate": 7.427841634738186e-05, + "loss": 0.0284, + "step": 2465 + }, + { + "epoch": 9.44, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6431066989898682, + "eval_runtime": 74.4192, + "eval_samples_per_second": 2.338, + "eval_steps_per_second": 0.296, + "step": 2465 + }, + { + "epoch": 9.46, + "learning_rate": 7.402298850574712e-05, + "loss": 0.2414, + "step": 2470 + }, + { + "epoch": 9.46, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6797583103179932, + "eval_runtime": 73.2814, + "eval_samples_per_second": 2.374, + "eval_steps_per_second": 0.3, + "step": 2470 + }, + { + "epoch": 9.48, + "learning_rate": 7.376756066411239e-05, + "loss": 0.037, + "step": 2475 + }, + { + "epoch": 9.48, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.7022534608840942, + "eval_runtime": 76.0683, + "eval_samples_per_second": 2.287, + "eval_steps_per_second": 0.289, + "step": 2475 + }, + { + "epoch": 9.5, + "learning_rate": 7.351213282247765e-05, + "loss": 0.2024, + "step": 2480 + }, + { + "epoch": 9.5, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.7409052848815918, + "eval_runtime": 75.5479, + "eval_samples_per_second": 2.303, + "eval_steps_per_second": 0.291, + "step": 2480 + }, + { + "epoch": 9.52, + "learning_rate": 7.325670498084291e-05, + "loss": 0.0136, + "step": 2485 + }, + { + "epoch": 9.52, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.5980173349380493, + "eval_runtime": 74.9877, + "eval_samples_per_second": 2.32, + "eval_steps_per_second": 0.293, + "step": 2485 + }, + { + "epoch": 9.54, + "learning_rate": 7.300127713920818e-05, + "loss": 0.0064, + "step": 2490 + }, + { + "epoch": 9.54, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.572400450706482, + "eval_runtime": 74.8005, + "eval_samples_per_second": 2.326, + "eval_steps_per_second": 0.294, + "step": 2490 + }, + { + "epoch": 9.56, + "learning_rate": 7.274584929757344e-05, + "loss": 0.0092, + "step": 2495 + }, + { + "epoch": 9.56, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5245319604873657, + "eval_runtime": 73.814, + "eval_samples_per_second": 2.357, + "eval_steps_per_second": 0.298, + "step": 2495 + }, + { + "epoch": 9.58, + "learning_rate": 7.24904214559387e-05, + "loss": 0.0231, + "step": 2500 + }, + { + "epoch": 9.58, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.4510209560394287, + "eval_runtime": 73.161, + "eval_samples_per_second": 2.378, + "eval_steps_per_second": 0.301, + "step": 2500 + }, + { + "epoch": 9.6, + "learning_rate": 7.223499361430395e-05, + "loss": 0.0152, + "step": 2505 + }, + { + "epoch": 9.6, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.5502389669418335, + "eval_runtime": 76.6679, + "eval_samples_per_second": 2.27, + "eval_steps_per_second": 0.287, + "step": 2505 + }, + { + "epoch": 9.62, + "learning_rate": 7.197956577266922e-05, + "loss": 0.1021, + "step": 2510 + }, + { + "epoch": 9.62, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.6658586263656616, + "eval_runtime": 75.2815, + "eval_samples_per_second": 2.311, + "eval_steps_per_second": 0.292, + "step": 2510 + }, + { + "epoch": 9.64, + "learning_rate": 7.172413793103448e-05, + "loss": 0.0043, + "step": 2515 + }, + { + "epoch": 9.64, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.735673189163208, + "eval_runtime": 73.8942, + "eval_samples_per_second": 2.355, + "eval_steps_per_second": 0.298, + "step": 2515 + }, + { + "epoch": 9.66, + "learning_rate": 7.146871008939974e-05, + "loss": 0.0011, + "step": 2520 + }, + { + "epoch": 9.66, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.8052144050598145, + "eval_runtime": 75.1879, + "eval_samples_per_second": 2.314, + "eval_steps_per_second": 0.293, + "step": 2520 + }, + { + "epoch": 9.67, + "learning_rate": 7.1213282247765e-05, + "loss": 0.1036, + "step": 2525 + }, + { + "epoch": 9.67, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.8808355331420898, + "eval_runtime": 74.8079, + "eval_samples_per_second": 2.326, + "eval_steps_per_second": 0.294, + "step": 2525 + }, + { + "epoch": 9.69, + "learning_rate": 7.095785440613027e-05, + "loss": 0.0641, + "step": 2530 + }, + { + "epoch": 9.69, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.7997287511825562, + "eval_runtime": 73.4276, + "eval_samples_per_second": 2.37, + "eval_steps_per_second": 0.3, + "step": 2530 + }, + { + "epoch": 9.71, + "learning_rate": 7.070242656449553e-05, + "loss": 0.0263, + "step": 2535 + }, + { + "epoch": 9.71, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.8069945573806763, + "eval_runtime": 74.1898, + "eval_samples_per_second": 2.345, + "eval_steps_per_second": 0.297, + "step": 2535 + }, + { + "epoch": 9.73, + "learning_rate": 7.04469987228608e-05, + "loss": 0.1702, + "step": 2540 + }, + { + "epoch": 9.73, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.8327751159667969, + "eval_runtime": 73.9343, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.298, + "step": 2540 + }, + { + "epoch": 9.75, + "learning_rate": 7.019157088122606e-05, + "loss": 0.134, + "step": 2545 + }, + { + "epoch": 9.75, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.8617277145385742, + "eval_runtime": 75.8555, + "eval_samples_per_second": 2.294, + "eval_steps_per_second": 0.29, + "step": 2545 + }, + { + "epoch": 9.77, + "learning_rate": 6.993614303959132e-05, + "loss": 0.1743, + "step": 2550 + }, + { + "epoch": 9.77, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.8030837774276733, + "eval_runtime": 75.2054, + "eval_samples_per_second": 2.314, + "eval_steps_per_second": 0.293, + "step": 2550 + }, + { + "epoch": 9.79, + "learning_rate": 6.968071519795659e-05, + "loss": 0.0431, + "step": 2555 + }, + { + "epoch": 9.79, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.720353364944458, + "eval_runtime": 73.8007, + "eval_samples_per_second": 2.358, + "eval_steps_per_second": 0.298, + "step": 2555 + }, + { + "epoch": 9.81, + "learning_rate": 6.942528735632185e-05, + "loss": 0.1325, + "step": 2560 + }, + { + "epoch": 9.81, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.6215603351593018, + "eval_runtime": 74.9611, + "eval_samples_per_second": 2.321, + "eval_steps_per_second": 0.293, + "step": 2560 + }, + { + "epoch": 9.83, + "learning_rate": 6.916985951468711e-05, + "loss": 0.0021, + "step": 2565 + }, + { + "epoch": 9.83, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6373803615570068, + "eval_runtime": 74.0676, + "eval_samples_per_second": 2.349, + "eval_steps_per_second": 0.297, + "step": 2565 + }, + { + "epoch": 9.85, + "learning_rate": 6.891443167305238e-05, + "loss": 0.0177, + "step": 2570 + }, + { + "epoch": 9.85, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.7743432521820068, + "eval_runtime": 73.9209, + "eval_samples_per_second": 2.354, + "eval_steps_per_second": 0.298, + "step": 2570 + }, + { + "epoch": 9.87, + "learning_rate": 6.865900383141763e-05, + "loss": 0.0059, + "step": 2575 + }, + { + "epoch": 9.87, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.7778594493865967, + "eval_runtime": 73.9917, + "eval_samples_per_second": 2.352, + "eval_steps_per_second": 0.297, + "step": 2575 + }, + { + "epoch": 9.89, + "learning_rate": 6.840357598978289e-05, + "loss": 0.0019, + "step": 2580 + }, + { + "epoch": 9.89, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.670163869857788, + "eval_runtime": 75.1748, + "eval_samples_per_second": 2.315, + "eval_steps_per_second": 0.293, + "step": 2580 + }, + { + "epoch": 9.9, + "learning_rate": 6.814814814814815e-05, + "loss": 0.3578, + "step": 2585 + }, + { + "epoch": 9.9, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6000075340270996, + "eval_runtime": 74.6944, + "eval_samples_per_second": 2.329, + "eval_steps_per_second": 0.295, + "step": 2585 + }, + { + "epoch": 9.92, + "learning_rate": 6.789272030651342e-05, + "loss": 0.0012, + "step": 2590 + }, + { + "epoch": 9.92, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5551468133926392, + "eval_runtime": 73.4262, + "eval_samples_per_second": 2.37, + "eval_steps_per_second": 0.3, + "step": 2590 + }, + { + "epoch": 9.94, + "learning_rate": 6.763729246487868e-05, + "loss": 0.0325, + "step": 2595 + }, + { + "epoch": 9.94, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.5792527198791504, + "eval_runtime": 72.3934, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 2595 + }, + { + "epoch": 9.96, + "learning_rate": 6.738186462324394e-05, + "loss": 0.0029, + "step": 2600 + }, + { + "epoch": 9.96, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.6270909309387207, + "eval_runtime": 71.7327, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2600 + }, + { + "epoch": 9.98, + "learning_rate": 6.71264367816092e-05, + "loss": 0.0894, + "step": 2605 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.6158068180084229, + "eval_runtime": 72.1335, + "eval_samples_per_second": 2.412, + "eval_steps_per_second": 0.305, + "step": 2605 + }, + { + "epoch": 10.0, + "learning_rate": 6.687100893997446e-05, + "loss": 0.0841, + "step": 2610 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6193947792053223, + "eval_runtime": 71.5867, + "eval_samples_per_second": 2.431, + "eval_steps_per_second": 0.307, + "step": 2610 + }, + { + "epoch": 10.02, + "learning_rate": 6.661558109833972e-05, + "loss": 0.0199, + "step": 2615 + }, + { + "epoch": 10.02, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6935107707977295, + "eval_runtime": 72.1289, + "eval_samples_per_second": 2.412, + "eval_steps_per_second": 0.305, + "step": 2615 + }, + { + "epoch": 10.04, + "learning_rate": 6.636015325670498e-05, + "loss": 0.0164, + "step": 2620 + }, + { + "epoch": 10.04, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.7199974060058594, + "eval_runtime": 71.6036, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 2620 + }, + { + "epoch": 10.06, + "learning_rate": 6.610472541507025e-05, + "loss": 0.017, + "step": 2625 + }, + { + "epoch": 10.06, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7973920106887817, + "eval_runtime": 72.3533, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 2625 + }, + { + "epoch": 10.08, + "learning_rate": 6.584929757343551e-05, + "loss": 0.0005, + "step": 2630 + }, + { + "epoch": 10.08, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.9108890295028687, + "eval_runtime": 73.2282, + "eval_samples_per_second": 2.376, + "eval_steps_per_second": 0.3, + "step": 2630 + }, + { + "epoch": 10.1, + "learning_rate": 6.559386973180077e-05, + "loss": 0.0272, + "step": 2635 + }, + { + "epoch": 10.1, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.9884196519851685, + "eval_runtime": 73.6979, + "eval_samples_per_second": 2.361, + "eval_steps_per_second": 0.299, + "step": 2635 + }, + { + "epoch": 10.11, + "learning_rate": 6.533844189016604e-05, + "loss": 0.0006, + "step": 2640 + }, + { + "epoch": 10.11, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 2.0442349910736084, + "eval_runtime": 71.6401, + "eval_samples_per_second": 2.429, + "eval_steps_per_second": 0.307, + "step": 2640 + }, + { + "epoch": 10.13, + "learning_rate": 6.508301404853129e-05, + "loss": 0.0003, + "step": 2645 + }, + { + "epoch": 10.13, + "eval_accuracy": 0.735632183908046, + "eval_loss": 2.0899100303649902, + "eval_runtime": 74.2411, + "eval_samples_per_second": 2.344, + "eval_steps_per_second": 0.296, + "step": 2645 + }, + { + "epoch": 10.15, + "learning_rate": 6.482758620689655e-05, + "loss": 0.0064, + "step": 2650 + }, + { + "epoch": 10.15, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 2.0910024642944336, + "eval_runtime": 71.604, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 2650 + }, + { + "epoch": 10.17, + "learning_rate": 6.457215836526181e-05, + "loss": 0.3947, + "step": 2655 + }, + { + "epoch": 10.17, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 2.110867500305176, + "eval_runtime": 72.2402, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 2655 + }, + { + "epoch": 10.19, + "learning_rate": 6.431673052362708e-05, + "loss": 0.0026, + "step": 2660 + }, + { + "epoch": 10.19, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.997544765472412, + "eval_runtime": 73.4539, + "eval_samples_per_second": 2.369, + "eval_steps_per_second": 0.3, + "step": 2660 + }, + { + "epoch": 10.21, + "learning_rate": 6.406130268199234e-05, + "loss": 0.0017, + "step": 2665 + }, + { + "epoch": 10.21, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.9001383781433105, + "eval_runtime": 72.3167, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 2665 + }, + { + "epoch": 10.23, + "learning_rate": 6.38058748403576e-05, + "loss": 0.0457, + "step": 2670 + }, + { + "epoch": 10.23, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7431246042251587, + "eval_runtime": 71.7066, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2670 + }, + { + "epoch": 10.25, + "learning_rate": 6.355044699872287e-05, + "loss": 0.1929, + "step": 2675 + }, + { + "epoch": 10.25, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6250685453414917, + "eval_runtime": 74.0216, + "eval_samples_per_second": 2.351, + "eval_steps_per_second": 0.297, + "step": 2675 + }, + { + "epoch": 10.27, + "learning_rate": 6.329501915708812e-05, + "loss": 0.0095, + "step": 2680 + }, + { + "epoch": 10.27, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.595952033996582, + "eval_runtime": 71.6176, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 2680 + }, + { + "epoch": 10.29, + "learning_rate": 6.303959131545338e-05, + "loss": 0.0002, + "step": 2685 + }, + { + "epoch": 10.29, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6410387754440308, + "eval_runtime": 72.2047, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 2685 + }, + { + "epoch": 10.31, + "learning_rate": 6.278416347381864e-05, + "loss": 0.0325, + "step": 2690 + }, + { + "epoch": 10.31, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.673262357711792, + "eval_runtime": 71.6131, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 2690 + }, + { + "epoch": 10.33, + "learning_rate": 6.25287356321839e-05, + "loss": 0.0004, + "step": 2695 + }, + { + "epoch": 10.33, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.684037446975708, + "eval_runtime": 74.3936, + "eval_samples_per_second": 2.339, + "eval_steps_per_second": 0.296, + "step": 2695 + }, + { + "epoch": 10.34, + "learning_rate": 6.227330779054917e-05, + "loss": 0.1141, + "step": 2700 + }, + { + "epoch": 10.34, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.6978554725646973, + "eval_runtime": 73.7209, + "eval_samples_per_second": 2.36, + "eval_steps_per_second": 0.298, + "step": 2700 + }, + { + "epoch": 10.36, + "learning_rate": 6.201787994891443e-05, + "loss": 0.0059, + "step": 2705 + }, + { + "epoch": 10.36, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7011663913726807, + "eval_runtime": 72.3468, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 2705 + }, + { + "epoch": 10.38, + "learning_rate": 6.17624521072797e-05, + "loss": 0.0245, + "step": 2710 + }, + { + "epoch": 10.38, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.77028226852417, + "eval_runtime": 72.2673, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.304, + "step": 2710 + }, + { + "epoch": 10.4, + "learning_rate": 6.150702426564496e-05, + "loss": 0.0076, + "step": 2715 + }, + { + "epoch": 10.4, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.8610811233520508, + "eval_runtime": 72.307, + "eval_samples_per_second": 2.406, + "eval_steps_per_second": 0.304, + "step": 2715 + }, + { + "epoch": 10.42, + "learning_rate": 6.125159642401021e-05, + "loss": 0.0021, + "step": 2720 + }, + { + "epoch": 10.42, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.8926019668579102, + "eval_runtime": 71.6919, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2720 + }, + { + "epoch": 10.44, + "learning_rate": 6.099616858237548e-05, + "loss": 0.0065, + "step": 2725 + }, + { + "epoch": 10.44, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.880444049835205, + "eval_runtime": 72.3737, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 2725 + }, + { + "epoch": 10.46, + "learning_rate": 6.074074074074074e-05, + "loss": 0.0056, + "step": 2730 + }, + { + "epoch": 10.46, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.802688479423523, + "eval_runtime": 71.68, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2730 + }, + { + "epoch": 10.48, + "learning_rate": 6.0485312899106007e-05, + "loss": 0.0115, + "step": 2735 + }, + { + "epoch": 10.48, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.7529903650283813, + "eval_runtime": 72.2535, + "eval_samples_per_second": 2.408, + "eval_steps_per_second": 0.304, + "step": 2735 + }, + { + "epoch": 10.5, + "learning_rate": 6.022988505747127e-05, + "loss": 0.007, + "step": 2740 + }, + { + "epoch": 10.5, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7274096012115479, + "eval_runtime": 71.6133, + "eval_samples_per_second": 2.43, + "eval_steps_per_second": 0.307, + "step": 2740 + }, + { + "epoch": 10.52, + "learning_rate": 5.997445721583653e-05, + "loss": 0.0006, + "step": 2745 + }, + { + "epoch": 10.52, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7023934125900269, + "eval_runtime": 72.1912, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 2745 + }, + { + "epoch": 10.54, + "learning_rate": 5.97190293742018e-05, + "loss": 0.0039, + "step": 2750 + }, + { + "epoch": 10.54, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7069329023361206, + "eval_runtime": 71.7141, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2750 + }, + { + "epoch": 10.56, + "learning_rate": 5.9463601532567046e-05, + "loss": 0.0006, + "step": 2755 + }, + { + "epoch": 10.56, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7325639724731445, + "eval_runtime": 72.2136, + "eval_samples_per_second": 2.41, + "eval_steps_per_second": 0.305, + "step": 2755 + }, + { + "epoch": 10.57, + "learning_rate": 5.920817369093231e-05, + "loss": 0.0012, + "step": 2760 + }, + { + "epoch": 10.57, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7432584762573242, + "eval_runtime": 71.6908, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2760 + }, + { + "epoch": 10.59, + "learning_rate": 5.895274584929757e-05, + "loss": 0.002, + "step": 2765 + }, + { + "epoch": 10.59, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6767596006393433, + "eval_runtime": 74.0851, + "eval_samples_per_second": 2.349, + "eval_steps_per_second": 0.297, + "step": 2765 + }, + { + "epoch": 10.61, + "learning_rate": 5.8697318007662837e-05, + "loss": 0.0051, + "step": 2770 + }, + { + "epoch": 10.61, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6323719024658203, + "eval_runtime": 71.7909, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 2770 + }, + { + "epoch": 10.63, + "learning_rate": 5.84418901660281e-05, + "loss": 0.1266, + "step": 2775 + }, + { + "epoch": 10.63, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.59928560256958, + "eval_runtime": 73.9918, + "eval_samples_per_second": 2.352, + "eval_steps_per_second": 0.297, + "step": 2775 + }, + { + "epoch": 10.65, + "learning_rate": 5.818646232439336e-05, + "loss": 0.0003, + "step": 2780 + }, + { + "epoch": 10.65, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6210390329360962, + "eval_runtime": 71.7199, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2780 + }, + { + "epoch": 10.67, + "learning_rate": 5.7931034482758627e-05, + "loss": 0.0011, + "step": 2785 + }, + { + "epoch": 10.67, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6556010246276855, + "eval_runtime": 72.3514, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 2785 + }, + { + "epoch": 10.69, + "learning_rate": 5.767560664112388e-05, + "loss": 0.1543, + "step": 2790 + }, + { + "epoch": 10.69, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6259641647338867, + "eval_runtime": 71.8043, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 2790 + }, + { + "epoch": 10.71, + "learning_rate": 5.7420178799489147e-05, + "loss": 0.0174, + "step": 2795 + }, + { + "epoch": 10.71, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6242051124572754, + "eval_runtime": 76.4373, + "eval_samples_per_second": 2.276, + "eval_steps_per_second": 0.288, + "step": 2795 + }, + { + "epoch": 10.73, + "learning_rate": 5.716475095785441e-05, + "loss": 0.0012, + "step": 2800 + }, + { + "epoch": 10.73, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.6227067708969116, + "eval_runtime": 73.2688, + "eval_samples_per_second": 2.375, + "eval_steps_per_second": 0.3, + "step": 2800 + }, + { + "epoch": 10.75, + "learning_rate": 5.690932311621967e-05, + "loss": 0.0016, + "step": 2805 + }, + { + "epoch": 10.75, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6738653182983398, + "eval_runtime": 71.7359, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2805 + }, + { + "epoch": 10.77, + "learning_rate": 5.665389527458494e-05, + "loss": 0.2556, + "step": 2810 + }, + { + "epoch": 10.77, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6926209926605225, + "eval_runtime": 71.1551, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 2810 + }, + { + "epoch": 10.79, + "learning_rate": 5.63984674329502e-05, + "loss": 0.013, + "step": 2815 + }, + { + "epoch": 10.79, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7026501893997192, + "eval_runtime": 71.7376, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2815 + }, + { + "epoch": 10.8, + "learning_rate": 5.614303959131546e-05, + "loss": 0.0002, + "step": 2820 + }, + { + "epoch": 10.8, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.7139840126037598, + "eval_runtime": 73.0784, + "eval_samples_per_second": 2.381, + "eval_steps_per_second": 0.301, + "step": 2820 + }, + { + "epoch": 10.82, + "learning_rate": 5.588761174968071e-05, + "loss": 0.0023, + "step": 2825 + }, + { + "epoch": 10.82, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.7233027219772339, + "eval_runtime": 71.8919, + "eval_samples_per_second": 2.42, + "eval_steps_per_second": 0.306, + "step": 2825 + }, + { + "epoch": 10.84, + "learning_rate": 5.5632183908045976e-05, + "loss": 0.0027, + "step": 2830 + }, + { + "epoch": 10.84, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.7237999439239502, + "eval_runtime": 71.2043, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 2830 + }, + { + "epoch": 10.86, + "learning_rate": 5.537675606641124e-05, + "loss": 0.0051, + "step": 2835 + }, + { + "epoch": 10.86, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.6586277484893799, + "eval_runtime": 71.7622, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 2835 + }, + { + "epoch": 10.88, + "learning_rate": 5.51213282247765e-05, + "loss": 0.0003, + "step": 2840 + }, + { + "epoch": 10.88, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6285842657089233, + "eval_runtime": 73.1223, + "eval_samples_per_second": 2.38, + "eval_steps_per_second": 0.301, + "step": 2840 + }, + { + "epoch": 10.9, + "learning_rate": 5.4865900383141767e-05, + "loss": 0.0193, + "step": 2845 + }, + { + "epoch": 10.9, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.5850633382797241, + "eval_runtime": 71.8192, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 2845 + }, + { + "epoch": 10.92, + "learning_rate": 5.461047254150703e-05, + "loss": 0.0003, + "step": 2850 + }, + { + "epoch": 10.92, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.5855857133865356, + "eval_runtime": 71.1652, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 2850 + }, + { + "epoch": 10.94, + "learning_rate": 5.435504469987229e-05, + "loss": 0.0087, + "step": 2855 + }, + { + "epoch": 10.94, + "eval_accuracy": 0.7816091954022989, + "eval_loss": 1.614903211593628, + "eval_runtime": 71.7827, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 2855 + }, + { + "epoch": 10.96, + "learning_rate": 5.409961685823754e-05, + "loss": 0.0048, + "step": 2860 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.6289048194885254, + "eval_runtime": 71.1167, + "eval_samples_per_second": 2.447, + "eval_steps_per_second": 0.309, + "step": 2860 + }, + { + "epoch": 10.98, + "learning_rate": 5.3844189016602806e-05, + "loss": 0.0005, + "step": 2865 + }, + { + "epoch": 10.98, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6815837621688843, + "eval_runtime": 73.4806, + "eval_samples_per_second": 2.368, + "eval_steps_per_second": 0.299, + "step": 2865 + }, + { + "epoch": 11.0, + "learning_rate": 5.358876117496807e-05, + "loss": 0.0028, + "step": 2870 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7306228876113892, + "eval_runtime": 71.2539, + "eval_samples_per_second": 2.442, + "eval_steps_per_second": 0.309, + "step": 2870 + }, + { + "epoch": 11.02, + "learning_rate": 5.333333333333333e-05, + "loss": 0.2624, + "step": 2875 + }, + { + "epoch": 11.02, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7713919878005981, + "eval_runtime": 73.4631, + "eval_samples_per_second": 2.369, + "eval_steps_per_second": 0.299, + "step": 2875 + }, + { + "epoch": 11.03, + "learning_rate": 5.3077905491698597e-05, + "loss": 0.1484, + "step": 2880 + }, + { + "epoch": 11.03, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.8120393753051758, + "eval_runtime": 72.9705, + "eval_samples_per_second": 2.385, + "eval_steps_per_second": 0.301, + "step": 2880 + }, + { + "epoch": 11.05, + "learning_rate": 5.282247765006386e-05, + "loss": 0.0006, + "step": 2885 + }, + { + "epoch": 11.05, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.8391227722167969, + "eval_runtime": 71.7975, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 2885 + }, + { + "epoch": 11.07, + "learning_rate": 5.256704980842912e-05, + "loss": 0.0001, + "step": 2890 + }, + { + "epoch": 11.07, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.8372502326965332, + "eval_runtime": 73.0853, + "eval_samples_per_second": 2.381, + "eval_steps_per_second": 0.301, + "step": 2890 + }, + { + "epoch": 11.09, + "learning_rate": 5.231162196679439e-05, + "loss": 0.2108, + "step": 2895 + }, + { + "epoch": 11.09, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7650718688964844, + "eval_runtime": 71.8157, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 2895 + }, + { + "epoch": 11.11, + "learning_rate": 5.205619412515964e-05, + "loss": 0.0008, + "step": 2900 + }, + { + "epoch": 11.11, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.761852741241455, + "eval_runtime": 71.2906, + "eval_samples_per_second": 2.441, + "eval_steps_per_second": 0.309, + "step": 2900 + }, + { + "epoch": 11.13, + "learning_rate": 5.1800766283524907e-05, + "loss": 0.0007, + "step": 2905 + }, + { + "epoch": 11.13, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7633298635482788, + "eval_runtime": 71.7947, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 2905 + }, + { + "epoch": 11.15, + "learning_rate": 5.154533844189017e-05, + "loss": 0.0001, + "step": 2910 + }, + { + "epoch": 11.15, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7667099237442017, + "eval_runtime": 71.8588, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 2910 + }, + { + "epoch": 11.17, + "learning_rate": 5.128991060025543e-05, + "loss": 0.0001, + "step": 2915 + }, + { + "epoch": 11.17, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7692232131958008, + "eval_runtime": 71.8252, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 2915 + }, + { + "epoch": 11.19, + "learning_rate": 5.10344827586207e-05, + "loss": 0.0041, + "step": 2920 + }, + { + "epoch": 11.19, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7618753910064697, + "eval_runtime": 71.1483, + "eval_samples_per_second": 2.446, + "eval_steps_per_second": 0.309, + "step": 2920 + }, + { + "epoch": 11.21, + "learning_rate": 5.077905491698596e-05, + "loss": 0.0004, + "step": 2925 + }, + { + "epoch": 11.21, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.744259238243103, + "eval_runtime": 71.828, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 2925 + }, + { + "epoch": 11.23, + "learning_rate": 5.052362707535122e-05, + "loss": 0.0016, + "step": 2930 + }, + { + "epoch": 11.23, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7307969331741333, + "eval_runtime": 71.1919, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 2930 + }, + { + "epoch": 11.25, + "learning_rate": 5.026819923371647e-05, + "loss": 0.0004, + "step": 2935 + }, + { + "epoch": 11.25, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.706060767173767, + "eval_runtime": 71.6927, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2935 + }, + { + "epoch": 11.26, + "learning_rate": 5.0012771392081737e-05, + "loss": 0.0046, + "step": 2940 + }, + { + "epoch": 11.26, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7156652212142944, + "eval_runtime": 71.1594, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 2940 + }, + { + "epoch": 11.28, + "learning_rate": 4.9757343550447e-05, + "loss": 0.0002, + "step": 2945 + }, + { + "epoch": 11.28, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7299869060516357, + "eval_runtime": 71.7347, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 2945 + }, + { + "epoch": 11.3, + "learning_rate": 4.950191570881226e-05, + "loss": 0.0005, + "step": 2950 + }, + { + "epoch": 11.3, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.739342212677002, + "eval_runtime": 71.1889, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 2950 + }, + { + "epoch": 11.32, + "learning_rate": 4.9246487867177527e-05, + "loss": 0.2592, + "step": 2955 + }, + { + "epoch": 11.32, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.7353452444076538, + "eval_runtime": 71.7474, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 2955 + }, + { + "epoch": 11.34, + "learning_rate": 4.899106002554278e-05, + "loss": 0.0018, + "step": 2960 + }, + { + "epoch": 11.34, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.753889799118042, + "eval_runtime": 71.1986, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 2960 + }, + { + "epoch": 11.36, + "learning_rate": 4.8735632183908047e-05, + "loss": 0.0002, + "step": 2965 + }, + { + "epoch": 11.36, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7838666439056396, + "eval_runtime": 73.3097, + "eval_samples_per_second": 2.373, + "eval_steps_per_second": 0.3, + "step": 2965 + }, + { + "epoch": 11.38, + "learning_rate": 4.848020434227331e-05, + "loss": 0.0002, + "step": 2970 + }, + { + "epoch": 11.38, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.8079078197479248, + "eval_runtime": 71.2087, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 2970 + }, + { + "epoch": 11.4, + "learning_rate": 4.822477650063857e-05, + "loss": 0.3184, + "step": 2975 + }, + { + "epoch": 11.4, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.7978729009628296, + "eval_runtime": 71.6881, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2975 + }, + { + "epoch": 11.42, + "learning_rate": 4.796934865900383e-05, + "loss": 0.0002, + "step": 2980 + }, + { + "epoch": 11.42, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.802263617515564, + "eval_runtime": 71.289, + "eval_samples_per_second": 2.441, + "eval_steps_per_second": 0.309, + "step": 2980 + }, + { + "epoch": 11.44, + "learning_rate": 4.771392081736909e-05, + "loss": 0.0002, + "step": 2985 + }, + { + "epoch": 11.44, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.8103998899459839, + "eval_runtime": 71.6925, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2985 + }, + { + "epoch": 11.46, + "learning_rate": 4.7458492975734357e-05, + "loss": 0.0037, + "step": 2990 + }, + { + "epoch": 11.46, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.7962597608566284, + "eval_runtime": 71.0509, + "eval_samples_per_second": 2.449, + "eval_steps_per_second": 0.31, + "step": 2990 + }, + { + "epoch": 11.48, + "learning_rate": 4.720306513409962e-05, + "loss": 0.0582, + "step": 2995 + }, + { + "epoch": 11.48, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7625627517700195, + "eval_runtime": 71.6864, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 2995 + }, + { + "epoch": 11.49, + "learning_rate": 4.694763729246488e-05, + "loss": 0.0001, + "step": 3000 + }, + { + "epoch": 11.49, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7620983123779297, + "eval_runtime": 71.1842, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3000 + }, + { + "epoch": 11.51, + "learning_rate": 4.669220945083015e-05, + "loss": 0.1902, + "step": 3005 + }, + { + "epoch": 11.51, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7105143070220947, + "eval_runtime": 71.7636, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3005 + }, + { + "epoch": 11.53, + "learning_rate": 4.643678160919541e-05, + "loss": 0.1473, + "step": 3010 + }, + { + "epoch": 11.53, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.674761176109314, + "eval_runtime": 71.1642, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 3010 + }, + { + "epoch": 11.55, + "learning_rate": 4.6181353767560667e-05, + "loss": 0.0052, + "step": 3015 + }, + { + "epoch": 11.55, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6427284479141235, + "eval_runtime": 71.7334, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 3015 + }, + { + "epoch": 11.57, + "learning_rate": 4.592592592592593e-05, + "loss": 0.0005, + "step": 3020 + }, + { + "epoch": 11.57, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.633091926574707, + "eval_runtime": 71.2235, + "eval_samples_per_second": 2.443, + "eval_steps_per_second": 0.309, + "step": 3020 + }, + { + "epoch": 11.59, + "learning_rate": 4.567049808429119e-05, + "loss": 0.0001, + "step": 3025 + }, + { + "epoch": 11.59, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6355048418045044, + "eval_runtime": 71.7881, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 3025 + }, + { + "epoch": 11.61, + "learning_rate": 4.541507024265646e-05, + "loss": 0.0004, + "step": 3030 + }, + { + "epoch": 11.61, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6394970417022705, + "eval_runtime": 73.0643, + "eval_samples_per_second": 2.381, + "eval_steps_per_second": 0.301, + "step": 3030 + }, + { + "epoch": 11.63, + "learning_rate": 4.515964240102171e-05, + "loss": 0.0001, + "step": 3035 + }, + { + "epoch": 11.63, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6441478729248047, + "eval_runtime": 71.7452, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3035 + }, + { + "epoch": 11.65, + "learning_rate": 4.4904214559386977e-05, + "loss": 0.0001, + "step": 3040 + }, + { + "epoch": 11.65, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6461490392684937, + "eval_runtime": 71.208, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3040 + }, + { + "epoch": 11.67, + "learning_rate": 4.464878671775224e-05, + "loss": 0.0001, + "step": 3045 + }, + { + "epoch": 11.67, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.649631142616272, + "eval_runtime": 71.8178, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 3045 + }, + { + "epoch": 11.69, + "learning_rate": 4.4393358876117497e-05, + "loss": 0.0254, + "step": 3050 + }, + { + "epoch": 11.69, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6433813571929932, + "eval_runtime": 71.1821, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3050 + }, + { + "epoch": 11.7, + "learning_rate": 4.413793103448276e-05, + "loss": 0.0007, + "step": 3055 + }, + { + "epoch": 11.7, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.642929196357727, + "eval_runtime": 71.7678, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 3055 + }, + { + "epoch": 11.72, + "learning_rate": 4.388250319284802e-05, + "loss": 0.0004, + "step": 3060 + }, + { + "epoch": 11.72, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6454113721847534, + "eval_runtime": 72.9367, + "eval_samples_per_second": 2.386, + "eval_steps_per_second": 0.302, + "step": 3060 + }, + { + "epoch": 11.74, + "learning_rate": 4.362707535121329e-05, + "loss": 0.0001, + "step": 3065 + }, + { + "epoch": 11.74, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6498183012008667, + "eval_runtime": 71.6868, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 3065 + }, + { + "epoch": 11.76, + "learning_rate": 4.337164750957854e-05, + "loss": 0.0004, + "step": 3070 + }, + { + "epoch": 11.76, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6580952405929565, + "eval_runtime": 71.2302, + "eval_samples_per_second": 2.443, + "eval_steps_per_second": 0.309, + "step": 3070 + }, + { + "epoch": 11.78, + "learning_rate": 4.3116219667943807e-05, + "loss": 0.0003, + "step": 3075 + }, + { + "epoch": 11.78, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6635980606079102, + "eval_runtime": 71.8047, + "eval_samples_per_second": 2.423, + "eval_steps_per_second": 0.306, + "step": 3075 + }, + { + "epoch": 11.8, + "learning_rate": 4.286079182630907e-05, + "loss": 0.0002, + "step": 3080 + }, + { + "epoch": 11.8, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6681902408599854, + "eval_runtime": 71.207, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3080 + }, + { + "epoch": 11.82, + "learning_rate": 4.2605363984674326e-05, + "loss": 0.0007, + "step": 3085 + }, + { + "epoch": 11.82, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6737765073776245, + "eval_runtime": 71.7861, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 3085 + }, + { + "epoch": 11.84, + "learning_rate": 4.234993614303959e-05, + "loss": 0.0007, + "step": 3090 + }, + { + "epoch": 11.84, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6775649785995483, + "eval_runtime": 71.4067, + "eval_samples_per_second": 2.437, + "eval_steps_per_second": 0.308, + "step": 3090 + }, + { + "epoch": 11.86, + "learning_rate": 4.209450830140485e-05, + "loss": 0.002, + "step": 3095 + }, + { + "epoch": 11.86, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6792525053024292, + "eval_runtime": 75.5033, + "eval_samples_per_second": 2.305, + "eval_steps_per_second": 0.291, + "step": 3095 + }, + { + "epoch": 11.88, + "learning_rate": 4.1839080459770117e-05, + "loss": 0.0001, + "step": 3100 + }, + { + "epoch": 11.88, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6821579933166504, + "eval_runtime": 72.572, + "eval_samples_per_second": 2.398, + "eval_steps_per_second": 0.303, + "step": 3100 + }, + { + "epoch": 11.9, + "learning_rate": 4.158365261813538e-05, + "loss": 0.0015, + "step": 3105 + }, + { + "epoch": 11.9, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6829290390014648, + "eval_runtime": 74.9292, + "eval_samples_per_second": 2.322, + "eval_steps_per_second": 0.294, + "step": 3105 + }, + { + "epoch": 11.92, + "learning_rate": 4.132822477650064e-05, + "loss": 0.0001, + "step": 3110 + }, + { + "epoch": 11.92, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7074753046035767, + "eval_runtime": 72.5058, + "eval_samples_per_second": 2.4, + "eval_steps_per_second": 0.303, + "step": 3110 + }, + { + "epoch": 11.93, + "learning_rate": 4.107279693486591e-05, + "loss": 0.0002, + "step": 3115 + }, + { + "epoch": 11.93, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7354847192764282, + "eval_runtime": 72.5819, + "eval_samples_per_second": 2.397, + "eval_steps_per_second": 0.303, + "step": 3115 + }, + { + "epoch": 11.95, + "learning_rate": 4.081736909323116e-05, + "loss": 0.0008, + "step": 3120 + }, + { + "epoch": 11.95, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7531324625015259, + "eval_runtime": 74.5508, + "eval_samples_per_second": 2.334, + "eval_steps_per_second": 0.295, + "step": 3120 + }, + { + "epoch": 11.97, + "learning_rate": 4.0561941251596427e-05, + "loss": 0.0001, + "step": 3125 + }, + { + "epoch": 11.97, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7682172060012817, + "eval_runtime": 72.5782, + "eval_samples_per_second": 2.397, + "eval_steps_per_second": 0.303, + "step": 3125 + }, + { + "epoch": 11.99, + "learning_rate": 4.030651340996169e-05, + "loss": 0.0004, + "step": 3130 + }, + { + "epoch": 11.99, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.774706244468689, + "eval_runtime": 73.9012, + "eval_samples_per_second": 2.354, + "eval_steps_per_second": 0.298, + "step": 3130 + }, + { + "epoch": 12.01, + "learning_rate": 4.005108556832695e-05, + "loss": 0.0001, + "step": 3135 + }, + { + "epoch": 12.01, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.77650785446167, + "eval_runtime": 75.9663, + "eval_samples_per_second": 2.29, + "eval_steps_per_second": 0.29, + "step": 3135 + }, + { + "epoch": 12.03, + "learning_rate": 3.979565772669221e-05, + "loss": 0.0001, + "step": 3140 + }, + { + "epoch": 12.03, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.778681993484497, + "eval_runtime": 72.8484, + "eval_samples_per_second": 2.389, + "eval_steps_per_second": 0.302, + "step": 3140 + }, + { + "epoch": 12.05, + "learning_rate": 3.954022988505747e-05, + "loss": 0.0004, + "step": 3145 + }, + { + "epoch": 12.05, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.778651475906372, + "eval_runtime": 74.5051, + "eval_samples_per_second": 2.335, + "eval_steps_per_second": 0.295, + "step": 3145 + }, + { + "epoch": 12.07, + "learning_rate": 3.9284802043422737e-05, + "loss": 0.1525, + "step": 3150 + }, + { + "epoch": 12.07, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.7690765857696533, + "eval_runtime": 73.949, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.298, + "step": 3150 + }, + { + "epoch": 12.09, + "learning_rate": 3.9029374201788e-05, + "loss": 0.0001, + "step": 3155 + }, + { + "epoch": 12.09, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7667592763900757, + "eval_runtime": 73.0801, + "eval_samples_per_second": 2.381, + "eval_steps_per_second": 0.301, + "step": 3155 + }, + { + "epoch": 12.11, + "learning_rate": 3.8773946360153257e-05, + "loss": 0.0001, + "step": 3160 + }, + { + "epoch": 12.11, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7664886713027954, + "eval_runtime": 74.7535, + "eval_samples_per_second": 2.328, + "eval_steps_per_second": 0.294, + "step": 3160 + }, + { + "epoch": 12.13, + "learning_rate": 3.851851851851852e-05, + "loss": 0.0023, + "step": 3165 + }, + { + "epoch": 12.13, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.764859914779663, + "eval_runtime": 73.1922, + "eval_samples_per_second": 2.377, + "eval_steps_per_second": 0.301, + "step": 3165 + }, + { + "epoch": 12.15, + "learning_rate": 3.826309067688378e-05, + "loss": 0.0004, + "step": 3170 + }, + { + "epoch": 12.15, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7692474126815796, + "eval_runtime": 72.5207, + "eval_samples_per_second": 2.399, + "eval_steps_per_second": 0.303, + "step": 3170 + }, + { + "epoch": 12.16, + "learning_rate": 3.800766283524904e-05, + "loss": 0.0005, + "step": 3175 + }, + { + "epoch": 12.16, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7775356769561768, + "eval_runtime": 73.2672, + "eval_samples_per_second": 2.375, + "eval_steps_per_second": 0.3, + "step": 3175 + }, + { + "epoch": 12.18, + "learning_rate": 3.77522349936143e-05, + "loss": 0.0001, + "step": 3180 + }, + { + "epoch": 12.18, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.784372091293335, + "eval_runtime": 72.3493, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 3180 + }, + { + "epoch": 12.2, + "learning_rate": 3.7496807151979567e-05, + "loss": 0.0001, + "step": 3185 + }, + { + "epoch": 12.2, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.7875841856002808, + "eval_runtime": 72.8117, + "eval_samples_per_second": 2.39, + "eval_steps_per_second": 0.302, + "step": 3185 + }, + { + "epoch": 12.22, + "learning_rate": 3.724137931034483e-05, + "loss": 0.0, + "step": 3190 + }, + { + "epoch": 12.22, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.7894097566604614, + "eval_runtime": 73.3527, + "eval_samples_per_second": 2.372, + "eval_steps_per_second": 0.3, + "step": 3190 + }, + { + "epoch": 12.24, + "learning_rate": 3.6985951468710087e-05, + "loss": 0.0002, + "step": 3195 + }, + { + "epoch": 12.24, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.7878044843673706, + "eval_runtime": 75.4362, + "eval_samples_per_second": 2.307, + "eval_steps_per_second": 0.292, + "step": 3195 + }, + { + "epoch": 12.26, + "learning_rate": 3.673052362707535e-05, + "loss": 0.0001, + "step": 3200 + }, + { + "epoch": 12.26, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.787352442741394, + "eval_runtime": 72.6018, + "eval_samples_per_second": 2.397, + "eval_steps_per_second": 0.303, + "step": 3200 + }, + { + "epoch": 12.28, + "learning_rate": 3.647509578544061e-05, + "loss": 0.0789, + "step": 3205 + }, + { + "epoch": 12.28, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.7876189947128296, + "eval_runtime": 73.2483, + "eval_samples_per_second": 2.375, + "eval_steps_per_second": 0.3, + "step": 3205 + }, + { + "epoch": 12.3, + "learning_rate": 3.6219667943805877e-05, + "loss": 0.0001, + "step": 3210 + }, + { + "epoch": 12.3, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7912352085113525, + "eval_runtime": 72.2227, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 3210 + }, + { + "epoch": 12.32, + "learning_rate": 3.596424010217114e-05, + "loss": 0.0001, + "step": 3215 + }, + { + "epoch": 12.32, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7966572046279907, + "eval_runtime": 74.8197, + "eval_samples_per_second": 2.326, + "eval_steps_per_second": 0.294, + "step": 3215 + }, + { + "epoch": 12.34, + "learning_rate": 3.57088122605364e-05, + "loss": 0.0003, + "step": 3220 + }, + { + "epoch": 12.34, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7997833490371704, + "eval_runtime": 73.9379, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.298, + "step": 3220 + }, + { + "epoch": 12.36, + "learning_rate": 3.545338441890167e-05, + "loss": 0.0005, + "step": 3225 + }, + { + "epoch": 12.36, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.8003336191177368, + "eval_runtime": 73.7895, + "eval_samples_per_second": 2.358, + "eval_steps_per_second": 0.298, + "step": 3225 + }, + { + "epoch": 12.38, + "learning_rate": 3.519795657726692e-05, + "loss": 0.0001, + "step": 3230 + }, + { + "epoch": 12.38, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.801089882850647, + "eval_runtime": 73.3293, + "eval_samples_per_second": 2.373, + "eval_steps_per_second": 0.3, + "step": 3230 + }, + { + "epoch": 12.39, + "learning_rate": 3.4942528735632187e-05, + "loss": 0.0051, + "step": 3235 + }, + { + "epoch": 12.39, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.8076083660125732, + "eval_runtime": 73.9488, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.298, + "step": 3235 + }, + { + "epoch": 12.41, + "learning_rate": 3.468710089399745e-05, + "loss": 0.1098, + "step": 3240 + }, + { + "epoch": 12.41, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.8316843509674072, + "eval_runtime": 72.738, + "eval_samples_per_second": 2.392, + "eval_steps_per_second": 0.302, + "step": 3240 + }, + { + "epoch": 12.43, + "learning_rate": 3.443167305236271e-05, + "loss": 0.0009, + "step": 3245 + }, + { + "epoch": 12.43, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.8573225736618042, + "eval_runtime": 71.7532, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3245 + }, + { + "epoch": 12.45, + "learning_rate": 3.417624521072797e-05, + "loss": 0.0001, + "step": 3250 + }, + { + "epoch": 12.45, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.8779215812683105, + "eval_runtime": 71.1624, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 3250 + }, + { + "epoch": 12.47, + "learning_rate": 3.392081736909323e-05, + "loss": 0.1514, + "step": 3255 + }, + { + "epoch": 12.47, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.8445746898651123, + "eval_runtime": 73.7727, + "eval_samples_per_second": 2.359, + "eval_steps_per_second": 0.298, + "step": 3255 + }, + { + "epoch": 12.49, + "learning_rate": 3.36653895274585e-05, + "loss": 0.0002, + "step": 3260 + }, + { + "epoch": 12.49, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.8186622858047485, + "eval_runtime": 72.9059, + "eval_samples_per_second": 2.387, + "eval_steps_per_second": 0.302, + "step": 3260 + }, + { + "epoch": 12.51, + "learning_rate": 3.340996168582375e-05, + "loss": 0.0024, + "step": 3265 + }, + { + "epoch": 12.51, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.791884183883667, + "eval_runtime": 71.8399, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 3265 + }, + { + "epoch": 12.53, + "learning_rate": 3.3154533844189017e-05, + "loss": 0.0003, + "step": 3270 + }, + { + "epoch": 12.53, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7768088579177856, + "eval_runtime": 71.2715, + "eval_samples_per_second": 2.441, + "eval_steps_per_second": 0.309, + "step": 3270 + }, + { + "epoch": 12.55, + "learning_rate": 3.289910600255428e-05, + "loss": 0.0004, + "step": 3275 + }, + { + "epoch": 12.55, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.7849540710449219, + "eval_runtime": 72.4582, + "eval_samples_per_second": 2.401, + "eval_steps_per_second": 0.304, + "step": 3275 + }, + { + "epoch": 12.57, + "learning_rate": 3.264367816091954e-05, + "loss": 0.2673, + "step": 3280 + }, + { + "epoch": 12.57, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.788151502609253, + "eval_runtime": 71.2667, + "eval_samples_per_second": 2.442, + "eval_steps_per_second": 0.309, + "step": 3280 + }, + { + "epoch": 12.59, + "learning_rate": 3.23882503192848e-05, + "loss": 0.0002, + "step": 3285 + }, + { + "epoch": 12.59, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.791655421257019, + "eval_runtime": 71.75, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3285 + }, + { + "epoch": 12.61, + "learning_rate": 3.213282247765006e-05, + "loss": 0.0001, + "step": 3290 + }, + { + "epoch": 12.61, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7958180904388428, + "eval_runtime": 71.2081, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3290 + }, + { + "epoch": 12.62, + "learning_rate": 3.1877394636015327e-05, + "loss": 0.0094, + "step": 3295 + }, + { + "epoch": 12.62, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.811642050743103, + "eval_runtime": 73.4826, + "eval_samples_per_second": 2.368, + "eval_steps_per_second": 0.299, + "step": 3295 + }, + { + "epoch": 12.64, + "learning_rate": 3.162196679438058e-05, + "loss": 0.0299, + "step": 3300 + }, + { + "epoch": 12.64, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.8312135934829712, + "eval_runtime": 72.3831, + "eval_samples_per_second": 2.404, + "eval_steps_per_second": 0.304, + "step": 3300 + }, + { + "epoch": 12.66, + "learning_rate": 3.1366538952745847e-05, + "loss": 0.0002, + "step": 3305 + }, + { + "epoch": 12.66, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.8625733852386475, + "eval_runtime": 71.8476, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 3305 + }, + { + "epoch": 12.68, + "learning_rate": 3.111111111111111e-05, + "loss": 0.0007, + "step": 3310 + }, + { + "epoch": 12.68, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.8807812929153442, + "eval_runtime": 71.1083, + "eval_samples_per_second": 2.447, + "eval_steps_per_second": 0.309, + "step": 3310 + }, + { + "epoch": 12.7, + "learning_rate": 3.085568326947637e-05, + "loss": 0.0002, + "step": 3315 + }, + { + "epoch": 12.7, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.8896175622940063, + "eval_runtime": 71.6802, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 3315 + }, + { + "epoch": 12.72, + "learning_rate": 3.0600255427841637e-05, + "loss": 0.0003, + "step": 3320 + }, + { + "epoch": 12.72, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.8892227411270142, + "eval_runtime": 74.5342, + "eval_samples_per_second": 2.334, + "eval_steps_per_second": 0.295, + "step": 3320 + }, + { + "epoch": 12.74, + "learning_rate": 3.0344827586206897e-05, + "loss": 0.0001, + "step": 3325 + }, + { + "epoch": 12.74, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.8880239725112915, + "eval_runtime": 73.6161, + "eval_samples_per_second": 2.364, + "eval_steps_per_second": 0.299, + "step": 3325 + }, + { + "epoch": 12.76, + "learning_rate": 3.008939974457216e-05, + "loss": 0.0013, + "step": 3330 + }, + { + "epoch": 12.76, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.874098539352417, + "eval_runtime": 73.2537, + "eval_samples_per_second": 2.375, + "eval_steps_per_second": 0.3, + "step": 3330 + }, + { + "epoch": 12.78, + "learning_rate": 2.9833971902937423e-05, + "loss": 0.0029, + "step": 3335 + }, + { + "epoch": 12.78, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.8415035009384155, + "eval_runtime": 71.8592, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 3335 + }, + { + "epoch": 12.8, + "learning_rate": 2.9578544061302683e-05, + "loss": 0.0, + "step": 3340 + }, + { + "epoch": 12.8, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.8206106424331665, + "eval_runtime": 71.14, + "eval_samples_per_second": 2.446, + "eval_steps_per_second": 0.309, + "step": 3340 + }, + { + "epoch": 12.82, + "learning_rate": 2.9323116219667947e-05, + "loss": 0.0001, + "step": 3345 + }, + { + "epoch": 12.82, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.80865478515625, + "eval_runtime": 73.9426, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.298, + "step": 3345 + }, + { + "epoch": 12.84, + "learning_rate": 2.906768837803321e-05, + "loss": 0.0003, + "step": 3350 + }, + { + "epoch": 12.84, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.8006689548492432, + "eval_runtime": 71.1637, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 3350 + }, + { + "epoch": 12.85, + "learning_rate": 2.8812260536398467e-05, + "loss": 0.0001, + "step": 3355 + }, + { + "epoch": 12.85, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7933427095413208, + "eval_runtime": 71.6538, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 3355 + }, + { + "epoch": 12.87, + "learning_rate": 2.855683269476373e-05, + "loss": 0.0001, + "step": 3360 + }, + { + "epoch": 12.87, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7883424758911133, + "eval_runtime": 71.2115, + "eval_samples_per_second": 2.443, + "eval_steps_per_second": 0.309, + "step": 3360 + }, + { + "epoch": 12.89, + "learning_rate": 2.8301404853128993e-05, + "loss": 0.0001, + "step": 3365 + }, + { + "epoch": 12.89, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7855582237243652, + "eval_runtime": 71.6601, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 3365 + }, + { + "epoch": 12.91, + "learning_rate": 2.8045977011494257e-05, + "loss": 0.0002, + "step": 3370 + }, + { + "epoch": 12.91, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7831445932388306, + "eval_runtime": 72.466, + "eval_samples_per_second": 2.401, + "eval_steps_per_second": 0.304, + "step": 3370 + }, + { + "epoch": 12.93, + "learning_rate": 2.7790549169859513e-05, + "loss": 0.0001, + "step": 3375 + }, + { + "epoch": 12.93, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7811237573623657, + "eval_runtime": 73.4621, + "eval_samples_per_second": 2.369, + "eval_steps_per_second": 0.299, + "step": 3375 + }, + { + "epoch": 12.95, + "learning_rate": 2.7535121328224777e-05, + "loss": 0.0001, + "step": 3380 + }, + { + "epoch": 12.95, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7792061567306519, + "eval_runtime": 71.133, + "eval_samples_per_second": 2.446, + "eval_steps_per_second": 0.309, + "step": 3380 + }, + { + "epoch": 12.97, + "learning_rate": 2.727969348659004e-05, + "loss": 0.0003, + "step": 3385 + }, + { + "epoch": 12.97, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7786797285079956, + "eval_runtime": 71.7413, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3385 + }, + { + "epoch": 12.99, + "learning_rate": 2.70242656449553e-05, + "loss": 0.0001, + "step": 3390 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7803369760513306, + "eval_runtime": 71.2098, + "eval_samples_per_second": 2.443, + "eval_steps_per_second": 0.309, + "step": 3390 + }, + { + "epoch": 13.01, + "learning_rate": 2.6768837803320563e-05, + "loss": 0.0, + "step": 3395 + }, + { + "epoch": 13.01, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7803747653961182, + "eval_runtime": 73.577, + "eval_samples_per_second": 2.365, + "eval_steps_per_second": 0.299, + "step": 3395 + }, + { + "epoch": 13.03, + "learning_rate": 2.6513409961685827e-05, + "loss": 0.0001, + "step": 3400 + }, + { + "epoch": 13.03, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7811692953109741, + "eval_runtime": 71.1922, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3400 + }, + { + "epoch": 13.05, + "learning_rate": 2.625798212005109e-05, + "loss": 0.0002, + "step": 3405 + }, + { + "epoch": 13.05, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7811998128890991, + "eval_runtime": 71.7905, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 3405 + }, + { + "epoch": 13.07, + "learning_rate": 2.6002554278416347e-05, + "loss": 0.0001, + "step": 3410 + }, + { + "epoch": 13.07, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7812321186065674, + "eval_runtime": 71.2005, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3410 + }, + { + "epoch": 13.08, + "learning_rate": 2.574712643678161e-05, + "loss": 0.0002, + "step": 3415 + }, + { + "epoch": 13.08, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7813913822174072, + "eval_runtime": 73.6067, + "eval_samples_per_second": 2.364, + "eval_steps_per_second": 0.299, + "step": 3415 + }, + { + "epoch": 13.1, + "learning_rate": 2.5491698595146873e-05, + "loss": 0.0001, + "step": 3420 + }, + { + "epoch": 13.1, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7811163663864136, + "eval_runtime": 71.1956, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3420 + }, + { + "epoch": 13.12, + "learning_rate": 2.5236270753512137e-05, + "loss": 0.0001, + "step": 3425 + }, + { + "epoch": 13.12, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7811745405197144, + "eval_runtime": 73.6252, + "eval_samples_per_second": 2.363, + "eval_steps_per_second": 0.299, + "step": 3425 + }, + { + "epoch": 13.14, + "learning_rate": 2.4980842911877393e-05, + "loss": 0.0001, + "step": 3430 + }, + { + "epoch": 13.14, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7812987565994263, + "eval_runtime": 73.0837, + "eval_samples_per_second": 2.381, + "eval_steps_per_second": 0.301, + "step": 3430 + }, + { + "epoch": 13.16, + "learning_rate": 2.4725415070242657e-05, + "loss": 0.0001, + "step": 3435 + }, + { + "epoch": 13.16, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7799280881881714, + "eval_runtime": 71.791, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 3435 + }, + { + "epoch": 13.18, + "learning_rate": 2.446998722860792e-05, + "loss": 0.0001, + "step": 3440 + }, + { + "epoch": 13.18, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7800962924957275, + "eval_runtime": 71.1707, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 3440 + }, + { + "epoch": 13.2, + "learning_rate": 2.4214559386973183e-05, + "loss": 0.1536, + "step": 3445 + }, + { + "epoch": 13.2, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7748968601226807, + "eval_runtime": 71.7615, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3445 + }, + { + "epoch": 13.22, + "learning_rate": 2.3959131545338443e-05, + "loss": 0.0001, + "step": 3450 + }, + { + "epoch": 13.22, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7677268981933594, + "eval_runtime": 73.6553, + "eval_samples_per_second": 2.362, + "eval_steps_per_second": 0.299, + "step": 3450 + }, + { + "epoch": 13.24, + "learning_rate": 2.3703703703703707e-05, + "loss": 0.0001, + "step": 3455 + }, + { + "epoch": 13.24, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7640659809112549, + "eval_runtime": 71.7772, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 3455 + }, + { + "epoch": 13.26, + "learning_rate": 2.3448275862068967e-05, + "loss": 0.0001, + "step": 3460 + }, + { + "epoch": 13.26, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7622041702270508, + "eval_runtime": 71.3282, + "eval_samples_per_second": 2.439, + "eval_steps_per_second": 0.308, + "step": 3460 + }, + { + "epoch": 13.28, + "learning_rate": 2.319284802043423e-05, + "loss": 0.0001, + "step": 3465 + }, + { + "epoch": 13.28, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.760646104812622, + "eval_runtime": 71.7718, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 3465 + }, + { + "epoch": 13.3, + "learning_rate": 2.293742017879949e-05, + "loss": 0.0001, + "step": 3470 + }, + { + "epoch": 13.3, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7595189809799194, + "eval_runtime": 71.2335, + "eval_samples_per_second": 2.443, + "eval_steps_per_second": 0.309, + "step": 3470 + }, + { + "epoch": 13.31, + "learning_rate": 2.268199233716475e-05, + "loss": 0.0, + "step": 3475 + }, + { + "epoch": 13.31, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7589017152786255, + "eval_runtime": 73.1315, + "eval_samples_per_second": 2.379, + "eval_steps_per_second": 0.301, + "step": 3475 + }, + { + "epoch": 13.33, + "learning_rate": 2.2426564495530013e-05, + "loss": 0.0108, + "step": 3480 + }, + { + "epoch": 13.33, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7552155256271362, + "eval_runtime": 74.2293, + "eval_samples_per_second": 2.344, + "eval_steps_per_second": 0.296, + "step": 3480 + }, + { + "epoch": 13.35, + "learning_rate": 2.2171136653895273e-05, + "loss": 0.0088, + "step": 3485 + }, + { + "epoch": 13.35, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7531734704971313, + "eval_runtime": 73.069, + "eval_samples_per_second": 2.381, + "eval_steps_per_second": 0.301, + "step": 3485 + }, + { + "epoch": 13.37, + "learning_rate": 2.1915708812260537e-05, + "loss": 0.003, + "step": 3490 + }, + { + "epoch": 13.37, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7560912370681763, + "eval_runtime": 72.5457, + "eval_samples_per_second": 2.398, + "eval_steps_per_second": 0.303, + "step": 3490 + }, + { + "epoch": 13.39, + "learning_rate": 2.16602809706258e-05, + "loss": 0.0001, + "step": 3495 + }, + { + "epoch": 13.39, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.760205864906311, + "eval_runtime": 73.1685, + "eval_samples_per_second": 2.378, + "eval_steps_per_second": 0.301, + "step": 3495 + }, + { + "epoch": 13.41, + "learning_rate": 2.1404853128991063e-05, + "loss": 0.0001, + "step": 3500 + }, + { + "epoch": 13.41, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.763502836227417, + "eval_runtime": 74.1089, + "eval_samples_per_second": 2.348, + "eval_steps_per_second": 0.297, + "step": 3500 + }, + { + "epoch": 13.43, + "learning_rate": 2.1149425287356323e-05, + "loss": 0.0002, + "step": 3505 + }, + { + "epoch": 13.43, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.765455961227417, + "eval_runtime": 72.6454, + "eval_samples_per_second": 2.395, + "eval_steps_per_second": 0.303, + "step": 3505 + }, + { + "epoch": 13.45, + "learning_rate": 2.0893997445721587e-05, + "loss": 0.1119, + "step": 3510 + }, + { + "epoch": 13.45, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7576346397399902, + "eval_runtime": 72.2316, + "eval_samples_per_second": 2.409, + "eval_steps_per_second": 0.305, + "step": 3510 + }, + { + "epoch": 13.47, + "learning_rate": 2.0638569604086847e-05, + "loss": 0.0025, + "step": 3515 + }, + { + "epoch": 13.47, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7575277090072632, + "eval_runtime": 73.2772, + "eval_samples_per_second": 2.375, + "eval_steps_per_second": 0.3, + "step": 3515 + }, + { + "epoch": 13.49, + "learning_rate": 2.0383141762452107e-05, + "loss": 0.0001, + "step": 3520 + }, + { + "epoch": 13.49, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7568941116333008, + "eval_runtime": 74.9459, + "eval_samples_per_second": 2.322, + "eval_steps_per_second": 0.294, + "step": 3520 + }, + { + "epoch": 13.51, + "learning_rate": 2.012771392081737e-05, + "loss": 0.0, + "step": 3525 + }, + { + "epoch": 13.51, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7557793855667114, + "eval_runtime": 72.5811, + "eval_samples_per_second": 2.397, + "eval_steps_per_second": 0.303, + "step": 3525 + }, + { + "epoch": 13.52, + "learning_rate": 1.987228607918263e-05, + "loss": 0.0001, + "step": 3530 + }, + { + "epoch": 13.52, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7556322813034058, + "eval_runtime": 73.9541, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 0.297, + "step": 3530 + }, + { + "epoch": 13.54, + "learning_rate": 1.9616858237547893e-05, + "loss": 0.0001, + "step": 3535 + }, + { + "epoch": 13.54, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7552297115325928, + "eval_runtime": 73.7388, + "eval_samples_per_second": 2.36, + "eval_steps_per_second": 0.298, + "step": 3535 + }, + { + "epoch": 13.56, + "learning_rate": 1.9361430395913153e-05, + "loss": 0.0, + "step": 3540 + }, + { + "epoch": 13.56, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.755204439163208, + "eval_runtime": 74.1877, + "eval_samples_per_second": 2.345, + "eval_steps_per_second": 0.297, + "step": 3540 + }, + { + "epoch": 13.58, + "learning_rate": 1.9106002554278417e-05, + "loss": 0.0002, + "step": 3545 + }, + { + "epoch": 13.58, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7553986310958862, + "eval_runtime": 73.9835, + "eval_samples_per_second": 2.352, + "eval_steps_per_second": 0.297, + "step": 3545 + }, + { + "epoch": 13.6, + "learning_rate": 1.885057471264368e-05, + "loss": 0.0001, + "step": 3550 + }, + { + "epoch": 13.6, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.755505084991455, + "eval_runtime": 73.6282, + "eval_samples_per_second": 2.363, + "eval_steps_per_second": 0.299, + "step": 3550 + }, + { + "epoch": 13.62, + "learning_rate": 1.8595146871008943e-05, + "loss": 0.0001, + "step": 3555 + }, + { + "epoch": 13.62, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7556419372558594, + "eval_runtime": 73.3262, + "eval_samples_per_second": 2.373, + "eval_steps_per_second": 0.3, + "step": 3555 + }, + { + "epoch": 13.64, + "learning_rate": 1.8339719029374203e-05, + "loss": 0.0001, + "step": 3560 + }, + { + "epoch": 13.64, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7553131580352783, + "eval_runtime": 72.3509, + "eval_samples_per_second": 2.405, + "eval_steps_per_second": 0.304, + "step": 3560 + }, + { + "epoch": 13.66, + "learning_rate": 1.8084291187739463e-05, + "loss": 0.0002, + "step": 3565 + }, + { + "epoch": 13.66, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7552363872528076, + "eval_runtime": 74.575, + "eval_samples_per_second": 2.333, + "eval_steps_per_second": 0.295, + "step": 3565 + }, + { + "epoch": 13.68, + "learning_rate": 1.7828863346104727e-05, + "loss": 0.0001, + "step": 3570 + }, + { + "epoch": 13.68, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7547740936279297, + "eval_runtime": 74.1586, + "eval_samples_per_second": 2.346, + "eval_steps_per_second": 0.297, + "step": 3570 + }, + { + "epoch": 13.7, + "learning_rate": 1.7573435504469987e-05, + "loss": 0.0001, + "step": 3575 + }, + { + "epoch": 13.7, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7549880743026733, + "eval_runtime": 73.4905, + "eval_samples_per_second": 2.368, + "eval_steps_per_second": 0.299, + "step": 3575 + }, + { + "epoch": 13.72, + "learning_rate": 1.731800766283525e-05, + "loss": 0.0, + "step": 3580 + }, + { + "epoch": 13.72, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7544682025909424, + "eval_runtime": 72.5422, + "eval_samples_per_second": 2.399, + "eval_steps_per_second": 0.303, + "step": 3580 + }, + { + "epoch": 13.74, + "learning_rate": 1.706257982120051e-05, + "loss": 0.0001, + "step": 3585 + }, + { + "epoch": 13.74, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.755007028579712, + "eval_runtime": 73.8715, + "eval_samples_per_second": 2.355, + "eval_steps_per_second": 0.298, + "step": 3585 + }, + { + "epoch": 13.75, + "learning_rate": 1.6807151979565773e-05, + "loss": 0.0568, + "step": 3590 + }, + { + "epoch": 13.75, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7550690174102783, + "eval_runtime": 72.5531, + "eval_samples_per_second": 2.398, + "eval_steps_per_second": 0.303, + "step": 3590 + }, + { + "epoch": 13.77, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.0001, + "step": 3595 + }, + { + "epoch": 13.77, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7550101280212402, + "eval_runtime": 75.6777, + "eval_samples_per_second": 2.299, + "eval_steps_per_second": 0.291, + "step": 3595 + }, + { + "epoch": 13.79, + "learning_rate": 1.62962962962963e-05, + "loss": 0.0001, + "step": 3600 + }, + { + "epoch": 13.79, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.754870891571045, + "eval_runtime": 72.8303, + "eval_samples_per_second": 2.389, + "eval_steps_per_second": 0.302, + "step": 3600 + }, + { + "epoch": 13.81, + "learning_rate": 1.604086845466156e-05, + "loss": 0.0001, + "step": 3605 + }, + { + "epoch": 13.81, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7544294595718384, + "eval_runtime": 73.3275, + "eval_samples_per_second": 2.373, + "eval_steps_per_second": 0.3, + "step": 3605 + }, + { + "epoch": 13.83, + "learning_rate": 1.578544061302682e-05, + "loss": 0.0528, + "step": 3610 + }, + { + "epoch": 13.83, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7550995349884033, + "eval_runtime": 71.3272, + "eval_samples_per_second": 2.439, + "eval_steps_per_second": 0.308, + "step": 3610 + }, + { + "epoch": 13.85, + "learning_rate": 1.5530012771392083e-05, + "loss": 0.0001, + "step": 3615 + }, + { + "epoch": 13.85, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7551816701889038, + "eval_runtime": 71.892, + "eval_samples_per_second": 2.42, + "eval_steps_per_second": 0.306, + "step": 3615 + }, + { + "epoch": 13.87, + "learning_rate": 1.5274584929757343e-05, + "loss": 0.0001, + "step": 3620 + }, + { + "epoch": 13.87, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7544273138046265, + "eval_runtime": 71.4145, + "eval_samples_per_second": 2.436, + "eval_steps_per_second": 0.308, + "step": 3620 + }, + { + "epoch": 13.89, + "learning_rate": 1.5019157088122607e-05, + "loss": 0.0001, + "step": 3625 + }, + { + "epoch": 13.89, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7543997764587402, + "eval_runtime": 71.7733, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.307, + "step": 3625 + }, + { + "epoch": 13.91, + "learning_rate": 1.4763729246487867e-05, + "loss": 0.0001, + "step": 3630 + }, + { + "epoch": 13.91, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.753382682800293, + "eval_runtime": 71.2431, + "eval_samples_per_second": 2.442, + "eval_steps_per_second": 0.309, + "step": 3630 + }, + { + "epoch": 13.93, + "learning_rate": 1.450830140485313e-05, + "loss": 0.0001, + "step": 3635 + }, + { + "epoch": 13.93, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7532877922058105, + "eval_runtime": 71.7023, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 3635 + }, + { + "epoch": 13.95, + "learning_rate": 1.4252873563218392e-05, + "loss": 0.0001, + "step": 3640 + }, + { + "epoch": 13.95, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7533704042434692, + "eval_runtime": 71.9533, + "eval_samples_per_second": 2.418, + "eval_steps_per_second": 0.306, + "step": 3640 + }, + { + "epoch": 13.97, + "learning_rate": 1.3997445721583655e-05, + "loss": 0.0, + "step": 3645 + }, + { + "epoch": 13.97, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7539461851119995, + "eval_runtime": 72.1079, + "eval_samples_per_second": 2.413, + "eval_steps_per_second": 0.305, + "step": 3645 + }, + { + "epoch": 13.98, + "learning_rate": 1.3742017879948915e-05, + "loss": 0.1209, + "step": 3650 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7514145374298096, + "eval_runtime": 73.1219, + "eval_samples_per_second": 2.38, + "eval_steps_per_second": 0.301, + "step": 3650 + }, + { + "epoch": 14.0, + "learning_rate": 1.3486590038314175e-05, + "loss": 0.1395, + "step": 3655 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7531005144119263, + "eval_runtime": 71.7851, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 3655 + }, + { + "epoch": 14.02, + "learning_rate": 1.3231162196679438e-05, + "loss": 0.0037, + "step": 3660 + }, + { + "epoch": 14.02, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7534183263778687, + "eval_runtime": 71.1617, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 3660 + }, + { + "epoch": 14.04, + "learning_rate": 1.29757343550447e-05, + "loss": 0.0001, + "step": 3665 + }, + { + "epoch": 14.04, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7549954652786255, + "eval_runtime": 71.701, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 3665 + }, + { + "epoch": 14.06, + "learning_rate": 1.2720306513409963e-05, + "loss": 0.0001, + "step": 3670 + }, + { + "epoch": 14.06, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.754940390586853, + "eval_runtime": 71.172, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 3670 + }, + { + "epoch": 14.08, + "learning_rate": 1.2464878671775223e-05, + "loss": 0.0, + "step": 3675 + }, + { + "epoch": 14.08, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7556744813919067, + "eval_runtime": 71.7906, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 3675 + }, + { + "epoch": 14.1, + "learning_rate": 1.2209450830140485e-05, + "loss": 0.0001, + "step": 3680 + }, + { + "epoch": 14.1, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.755606770515442, + "eval_runtime": 71.2401, + "eval_samples_per_second": 2.442, + "eval_steps_per_second": 0.309, + "step": 3680 + }, + { + "epoch": 14.12, + "learning_rate": 1.1954022988505748e-05, + "loss": 0.0001, + "step": 3685 + }, + { + "epoch": 14.12, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7560800313949585, + "eval_runtime": 73.8699, + "eval_samples_per_second": 2.355, + "eval_steps_per_second": 0.298, + "step": 3685 + }, + { + "epoch": 14.14, + "learning_rate": 1.169859514687101e-05, + "loss": 0.0956, + "step": 3690 + }, + { + "epoch": 14.14, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.749796748161316, + "eval_runtime": 72.7543, + "eval_samples_per_second": 2.392, + "eval_steps_per_second": 0.302, + "step": 3690 + }, + { + "epoch": 14.16, + "learning_rate": 1.1443167305236272e-05, + "loss": 0.0, + "step": 3695 + }, + { + "epoch": 14.16, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.745739459991455, + "eval_runtime": 71.7312, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 3695 + }, + { + "epoch": 14.18, + "learning_rate": 1.1187739463601533e-05, + "loss": 0.0001, + "step": 3700 + }, + { + "epoch": 14.18, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7437233924865723, + "eval_runtime": 71.1505, + "eval_samples_per_second": 2.446, + "eval_steps_per_second": 0.309, + "step": 3700 + }, + { + "epoch": 14.2, + "learning_rate": 1.0932311621966795e-05, + "loss": 0.0001, + "step": 3705 + }, + { + "epoch": 14.2, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.742749571800232, + "eval_runtime": 71.695, + "eval_samples_per_second": 2.427, + "eval_steps_per_second": 0.307, + "step": 3705 + }, + { + "epoch": 14.21, + "learning_rate": 1.0676883780332057e-05, + "loss": 0.0, + "step": 3710 + }, + { + "epoch": 14.21, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7426731586456299, + "eval_runtime": 71.2028, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3710 + }, + { + "epoch": 14.23, + "learning_rate": 1.0421455938697318e-05, + "loss": 0.0, + "step": 3715 + }, + { + "epoch": 14.23, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7424765825271606, + "eval_runtime": 71.7529, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3715 + }, + { + "epoch": 14.25, + "learning_rate": 1.016602809706258e-05, + "loss": 0.0558, + "step": 3720 + }, + { + "epoch": 14.25, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7421131134033203, + "eval_runtime": 71.1886, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3720 + }, + { + "epoch": 14.27, + "learning_rate": 9.910600255427842e-06, + "loss": 0.0003, + "step": 3725 + }, + { + "epoch": 14.27, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7416549921035767, + "eval_runtime": 71.8684, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 3725 + }, + { + "epoch": 14.29, + "learning_rate": 9.655172413793103e-06, + "loss": 0.0001, + "step": 3730 + }, + { + "epoch": 14.29, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7416775226593018, + "eval_runtime": 71.2129, + "eval_samples_per_second": 2.443, + "eval_steps_per_second": 0.309, + "step": 3730 + }, + { + "epoch": 14.31, + "learning_rate": 9.399744572158365e-06, + "loss": 0.0, + "step": 3735 + }, + { + "epoch": 14.31, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7417203187942505, + "eval_runtime": 71.6762, + "eval_samples_per_second": 2.428, + "eval_steps_per_second": 0.307, + "step": 3735 + }, + { + "epoch": 14.33, + "learning_rate": 9.144316730523628e-06, + "loss": 0.0002, + "step": 3740 + }, + { + "epoch": 14.33, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7419242858886719, + "eval_runtime": 71.2197, + "eval_samples_per_second": 2.443, + "eval_steps_per_second": 0.309, + "step": 3740 + }, + { + "epoch": 14.35, + "learning_rate": 8.88888888888889e-06, + "loss": 0.0078, + "step": 3745 + }, + { + "epoch": 14.35, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7439537048339844, + "eval_runtime": 73.4872, + "eval_samples_per_second": 2.368, + "eval_steps_per_second": 0.299, + "step": 3745 + }, + { + "epoch": 14.37, + "learning_rate": 8.633461047254152e-06, + "loss": 0.0001, + "step": 3750 + }, + { + "epoch": 14.37, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7466919422149658, + "eval_runtime": 72.9233, + "eval_samples_per_second": 2.386, + "eval_steps_per_second": 0.302, + "step": 3750 + }, + { + "epoch": 14.39, + "learning_rate": 8.378033205619413e-06, + "loss": 0.0009, + "step": 3755 + }, + { + "epoch": 14.39, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7497990131378174, + "eval_runtime": 71.7246, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 3755 + }, + { + "epoch": 14.41, + "learning_rate": 8.122605363984675e-06, + "loss": 0.0992, + "step": 3760 + }, + { + "epoch": 14.41, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7496720552444458, + "eval_runtime": 71.1652, + "eval_samples_per_second": 2.445, + "eval_steps_per_second": 0.309, + "step": 3760 + }, + { + "epoch": 14.43, + "learning_rate": 7.867177522349937e-06, + "loss": 0.0004, + "step": 3765 + }, + { + "epoch": 14.43, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7489051818847656, + "eval_runtime": 71.7293, + "eval_samples_per_second": 2.426, + "eval_steps_per_second": 0.307, + "step": 3765 + }, + { + "epoch": 14.44, + "learning_rate": 7.611749680715198e-06, + "loss": 0.0001, + "step": 3770 + }, + { + "epoch": 14.44, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7488287687301636, + "eval_runtime": 71.1909, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3770 + }, + { + "epoch": 14.46, + "learning_rate": 7.35632183908046e-06, + "loss": 0.0001, + "step": 3775 + }, + { + "epoch": 14.46, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7490508556365967, + "eval_runtime": 71.7426, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3775 + }, + { + "epoch": 14.48, + "learning_rate": 7.100893997445722e-06, + "loss": 0.0001, + "step": 3780 + }, + { + "epoch": 14.48, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7489107847213745, + "eval_runtime": 71.2131, + "eval_samples_per_second": 2.443, + "eval_steps_per_second": 0.309, + "step": 3780 + }, + { + "epoch": 14.5, + "learning_rate": 6.845466155810984e-06, + "loss": 0.0001, + "step": 3785 + }, + { + "epoch": 14.5, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7490137815475464, + "eval_runtime": 71.8519, + "eval_samples_per_second": 2.422, + "eval_steps_per_second": 0.306, + "step": 3785 + }, + { + "epoch": 14.52, + "learning_rate": 6.590038314176246e-06, + "loss": 0.1097, + "step": 3790 + }, + { + "epoch": 14.52, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.750510573387146, + "eval_runtime": 71.3059, + "eval_samples_per_second": 2.44, + "eval_steps_per_second": 0.309, + "step": 3790 + }, + { + "epoch": 14.54, + "learning_rate": 6.3346104725415075e-06, + "loss": 0.0001, + "step": 3795 + }, + { + "epoch": 14.54, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7523186206817627, + "eval_runtime": 71.7857, + "eval_samples_per_second": 2.424, + "eval_steps_per_second": 0.306, + "step": 3795 + }, + { + "epoch": 14.56, + "learning_rate": 6.079182630906769e-06, + "loss": 0.0001, + "step": 3800 + }, + { + "epoch": 14.56, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7536077499389648, + "eval_runtime": 73.2363, + "eval_samples_per_second": 2.376, + "eval_steps_per_second": 0.3, + "step": 3800 + }, + { + "epoch": 14.58, + "learning_rate": 5.823754789272031e-06, + "loss": 0.0001, + "step": 3805 + }, + { + "epoch": 14.58, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7539523839950562, + "eval_runtime": 73.6071, + "eval_samples_per_second": 2.364, + "eval_steps_per_second": 0.299, + "step": 3805 + }, + { + "epoch": 14.6, + "learning_rate": 5.568326947637293e-06, + "loss": 0.0002, + "step": 3810 + }, + { + "epoch": 14.6, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7543648481369019, + "eval_runtime": 72.9966, + "eval_samples_per_second": 2.384, + "eval_steps_per_second": 0.301, + "step": 3810 + }, + { + "epoch": 14.62, + "learning_rate": 5.312899106002554e-06, + "loss": 0.0001, + "step": 3815 + }, + { + "epoch": 14.62, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7542980909347534, + "eval_runtime": 71.752, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3815 + }, + { + "epoch": 14.64, + "learning_rate": 5.057471264367817e-06, + "loss": 0.0001, + "step": 3820 + }, + { + "epoch": 14.64, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7545336484909058, + "eval_runtime": 71.8754, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 3820 + }, + { + "epoch": 14.66, + "learning_rate": 4.802043422733078e-06, + "loss": 0.0002, + "step": 3825 + }, + { + "epoch": 14.66, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.754623293876648, + "eval_runtime": 71.767, + "eval_samples_per_second": 2.425, + "eval_steps_per_second": 0.307, + "step": 3825 + }, + { + "epoch": 14.67, + "learning_rate": 4.54661558109834e-06, + "loss": 0.0001, + "step": 3830 + }, + { + "epoch": 14.67, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7546714544296265, + "eval_runtime": 73.0008, + "eval_samples_per_second": 2.384, + "eval_steps_per_second": 0.301, + "step": 3830 + }, + { + "epoch": 14.69, + "learning_rate": 4.291187739463602e-06, + "loss": 0.0001, + "step": 3835 + }, + { + "epoch": 14.69, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7549469470977783, + "eval_runtime": 71.8595, + "eval_samples_per_second": 2.421, + "eval_steps_per_second": 0.306, + "step": 3835 + }, + { + "epoch": 14.71, + "learning_rate": 4.035759897828863e-06, + "loss": 0.0001, + "step": 3840 + }, + { + "epoch": 14.71, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7555478811264038, + "eval_runtime": 71.1999, + "eval_samples_per_second": 2.444, + "eval_steps_per_second": 0.309, + "step": 3840 + }, + { + "epoch": 14.73, + "learning_rate": 3.7803320561941254e-06, + "loss": 0.0284, + "step": 3845 + }, + { + "epoch": 14.73, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7555177211761475, + "eval_runtime": 73.5037, + "eval_samples_per_second": 2.367, + "eval_steps_per_second": 0.299, + "step": 3845 + }, + { + "epoch": 14.75, + "learning_rate": 3.5249042145593875e-06, + "loss": 0.0, + "step": 3850 + }, + { + "epoch": 14.75, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7556744813919067, + "eval_runtime": 71.2208, + "eval_samples_per_second": 2.443, + "eval_steps_per_second": 0.309, + "step": 3850 + }, + { + "epoch": 14.77, + "learning_rate": 3.269476372924649e-06, + "loss": 0.0001, + "step": 3855 + }, + { + "epoch": 14.77, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7554370164871216, + "eval_runtime": 75.3145, + "eval_samples_per_second": 2.31, + "eval_steps_per_second": 0.292, + "step": 3855 + }, + { + "epoch": 14.79, + "learning_rate": 3.014048531289911e-06, + "loss": 0.0, + "step": 3860 + }, + { + "epoch": 14.79, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7560198307037354, + "eval_runtime": 74.6084, + "eval_samples_per_second": 2.332, + "eval_steps_per_second": 0.295, + "step": 3860 + }, + { + "epoch": 14.81, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.0001, + "step": 3865 + }, + { + "epoch": 14.81, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7560211420059204, + "eval_runtime": 73.5059, + "eval_samples_per_second": 2.367, + "eval_steps_per_second": 0.299, + "step": 3865 + }, + { + "epoch": 14.83, + "learning_rate": 2.5031928480204346e-06, + "loss": 0.0001, + "step": 3870 + }, + { + "epoch": 14.83, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7558296918869019, + "eval_runtime": 72.5044, + "eval_samples_per_second": 2.4, + "eval_steps_per_second": 0.303, + "step": 3870 + }, + { + "epoch": 14.85, + "learning_rate": 2.2477650063856962e-06, + "loss": 0.0001, + "step": 3875 + }, + { + "epoch": 14.85, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7559691667556763, + "eval_runtime": 74.4471, + "eval_samples_per_second": 2.337, + "eval_steps_per_second": 0.296, + "step": 3875 + }, + { + "epoch": 14.87, + "learning_rate": 1.992337164750958e-06, + "loss": 0.0002, + "step": 3880 + }, + { + "epoch": 14.87, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7558660507202148, + "eval_runtime": 72.5388, + "eval_samples_per_second": 2.399, + "eval_steps_per_second": 0.303, + "step": 3880 + }, + { + "epoch": 14.89, + "learning_rate": 1.7369093231162196e-06, + "loss": 0.0, + "step": 3885 + }, + { + "epoch": 14.89, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7562229633331299, + "eval_runtime": 72.5804, + "eval_samples_per_second": 2.397, + "eval_steps_per_second": 0.303, + "step": 3885 + }, + { + "epoch": 14.9, + "learning_rate": 1.4814814814814817e-06, + "loss": 0.0001, + "step": 3890 + }, + { + "epoch": 14.9, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7563174962997437, + "eval_runtime": 74.0885, + "eval_samples_per_second": 2.349, + "eval_steps_per_second": 0.297, + "step": 3890 + }, + { + "epoch": 14.92, + "learning_rate": 1.2260536398467433e-06, + "loss": 0.0, + "step": 3895 + }, + { + "epoch": 14.92, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.756478190422058, + "eval_runtime": 76.613, + "eval_samples_per_second": 2.271, + "eval_steps_per_second": 0.287, + "step": 3895 + }, + { + "epoch": 14.94, + "learning_rate": 9.706257982120052e-07, + "loss": 0.0, + "step": 3900 + }, + { + "epoch": 14.94, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.756520390510559, + "eval_runtime": 72.5435, + "eval_samples_per_second": 2.399, + "eval_steps_per_second": 0.303, + "step": 3900 + }, + { + "epoch": 14.96, + "learning_rate": 7.15197956577267e-07, + "loss": 0.0001, + "step": 3905 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7564539909362793, + "eval_runtime": 74.574, + "eval_samples_per_second": 2.333, + "eval_steps_per_second": 0.295, + "step": 3905 + }, + { + "epoch": 14.98, + "learning_rate": 4.5977011494252875e-07, + "loss": 0.1303, + "step": 3910 + }, + { + "epoch": 14.98, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.756199598312378, + "eval_runtime": 73.9195, + "eval_samples_per_second": 2.354, + "eval_steps_per_second": 0.298, + "step": 3910 + }, + { + "epoch": 15.0, + "learning_rate": 2.0434227330779057e-07, + "loss": 0.0001, + "step": 3915 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.755522608757019, + "eval_runtime": 73.61, + "eval_samples_per_second": 2.364, + "eval_steps_per_second": 0.299, + "step": 3915 + }, + { + "epoch": 15.0, + "step": 3915, + "total_flos": 1.592360968692695e+18, + "train_loss": 0.5163871185302369, + "train_runtime": 118001.491, + "train_samples_per_second": 0.199, + "train_steps_per_second": 0.033 + } + ], + "logging_steps": 5, + "max_steps": 3915, + "num_train_epochs": 15, + "save_steps": 5, + "total_flos": 1.592360968692695e+18, + "trial_name": null, + "trial_params": null +}