| { | |
| "best_metric": 91.36131386861314, | |
| "best_model_checkpoint": "outputs/bitfit/t5-base/superglue-record/checkpoint-1200", | |
| "epoch": 3.0, | |
| "global_step": 25779, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "eval_average_metrics": 89.01338199513381, | |
| "eval_em": 88.75912408759125, | |
| "eval_f1": 89.2676399026764, | |
| "eval_loss": 0.2185831516981125, | |
| "eval_runtime": 34.599, | |
| "eval_samples_per_second": 39.597, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_average_metrics": 89.71289537712894, | |
| "eval_em": 89.48905109489051, | |
| "eval_f1": 89.93673965936739, | |
| "eval_loss": 0.22008372843265533, | |
| "eval_runtime": 36.1565, | |
| "eval_samples_per_second": 37.891, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002941813103689049, | |
| "loss": 0.3381, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_average_metrics": 89.17518248175182, | |
| "eval_em": 88.9051094890511, | |
| "eval_f1": 89.44525547445255, | |
| "eval_loss": 0.22487574815750122, | |
| "eval_runtime": 38.7912, | |
| "eval_samples_per_second": 35.317, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_average_metrics": 90.32725060827251, | |
| "eval_em": 89.92700729927007, | |
| "eval_f1": 90.72749391727494, | |
| "eval_loss": 0.22092510759830475, | |
| "eval_runtime": 38.8963, | |
| "eval_samples_per_second": 35.222, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002883626207378098, | |
| "loss": 0.3288, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_average_metrics": 90.82603406326035, | |
| "eval_em": 90.58394160583941, | |
| "eval_f1": 91.06812652068128, | |
| "eval_loss": 0.2161024510860443, | |
| "eval_runtime": 38.0375, | |
| "eval_samples_per_second": 36.017, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_average_metrics": 91.36131386861314, | |
| "eval_em": 91.0948905109489, | |
| "eval_f1": 91.62773722627739, | |
| "eval_loss": 0.21504777669906616, | |
| "eval_runtime": 36.1024, | |
| "eval_samples_per_second": 37.948, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_average_metrics": 89.69829683698296, | |
| "eval_em": 89.41605839416059, | |
| "eval_f1": 89.98053527980535, | |
| "eval_loss": 0.21785993874073029, | |
| "eval_runtime": 38.4041, | |
| "eval_samples_per_second": 35.673, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00028254393110671474, | |
| "loss": 0.333, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_average_metrics": 89.78345498783455, | |
| "eval_em": 89.34306569343066, | |
| "eval_f1": 90.22384428223845, | |
| "eval_loss": 0.21883782744407654, | |
| "eval_runtime": 35.4761, | |
| "eval_samples_per_second": 38.618, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_average_metrics": 89.54866180048661, | |
| "eval_em": 89.41605839416059, | |
| "eval_f1": 89.68126520681264, | |
| "eval_loss": 0.22056403756141663, | |
| "eval_runtime": 35.8766, | |
| "eval_samples_per_second": 38.186, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002767252414756197, | |
| "loss": 0.3283, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_average_metrics": 88.84306569343067, | |
| "eval_em": 88.39416058394161, | |
| "eval_f1": 89.29197080291972, | |
| "eval_loss": 0.22133098542690277, | |
| "eval_runtime": 36.0963, | |
| "eval_samples_per_second": 37.954, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_average_metrics": 89.49391727493916, | |
| "eval_em": 89.19708029197079, | |
| "eval_f1": 89.79075425790754, | |
| "eval_loss": 0.22017844021320343, | |
| "eval_runtime": 36.292, | |
| "eval_samples_per_second": 37.749, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_average_metrics": 90.77737226277371, | |
| "eval_em": 90.43795620437956, | |
| "eval_f1": 91.11678832116786, | |
| "eval_loss": 0.21572071313858032, | |
| "eval_runtime": 38.375, | |
| "eval_samples_per_second": 35.7, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0002709065518445246, | |
| "loss": 0.3258, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_average_metrics": 90.58880778588806, | |
| "eval_em": 90.2919708029197, | |
| "eval_f1": 90.88564476885644, | |
| "eval_loss": 0.21875151991844177, | |
| "eval_runtime": 36.4713, | |
| "eval_samples_per_second": 37.564, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_average_metrics": 89.81630170316302, | |
| "eval_em": 89.48905109489051, | |
| "eval_f1": 90.14355231143553, | |
| "eval_loss": 0.22119711339473724, | |
| "eval_runtime": 36.7112, | |
| "eval_samples_per_second": 37.318, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0002650878622134295, | |
| "loss": 0.3308, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_average_metrics": 89.86253041362531, | |
| "eval_em": 89.41605839416059, | |
| "eval_f1": 90.30900243309003, | |
| "eval_loss": 0.21812815964221954, | |
| "eval_runtime": 35.9089, | |
| "eval_samples_per_second": 38.152, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_average_metrics": 90.68004866180048, | |
| "eval_em": 90.2919708029197, | |
| "eval_f1": 91.06812652068126, | |
| "eval_loss": 0.21945150196552277, | |
| "eval_runtime": 36.7647, | |
| "eval_samples_per_second": 37.264, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_average_metrics": 88.52676399026764, | |
| "eval_em": 88.24817518248175, | |
| "eval_f1": 88.80535279805353, | |
| "eval_loss": 0.22326913475990295, | |
| "eval_runtime": 38.4494, | |
| "eval_samples_per_second": 35.631, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00025926917258233445, | |
| "loss": 0.3268, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_average_metrics": 89.9014598540146, | |
| "eval_em": 89.56204379562044, | |
| "eval_f1": 90.24087591240875, | |
| "eval_loss": 0.2204274833202362, | |
| "eval_runtime": 39.4301, | |
| "eval_samples_per_second": 34.745, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_average_metrics": 88.8491484184915, | |
| "eval_em": 88.54014598540147, | |
| "eval_f1": 89.15815085158151, | |
| "eval_loss": 0.22101299464702606, | |
| "eval_runtime": 36.7124, | |
| "eval_samples_per_second": 37.317, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002534504829512394, | |
| "loss": 0.3266, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_average_metrics": 90.43673965936739, | |
| "eval_em": 90.07299270072993, | |
| "eval_f1": 90.80048661800487, | |
| "eval_loss": 0.2189851850271225, | |
| "eval_runtime": 38.8738, | |
| "eval_samples_per_second": 35.242, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_average_metrics": 88.7676399026764, | |
| "eval_em": 88.39416058394161, | |
| "eval_f1": 89.14111922141119, | |
| "eval_loss": 0.22029492259025574, | |
| "eval_runtime": 37.8194, | |
| "eval_samples_per_second": 36.225, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_average_metrics": 89.73722627737226, | |
| "eval_em": 89.41605839416059, | |
| "eval_f1": 90.05839416058394, | |
| "eval_loss": 0.21841323375701904, | |
| "eval_runtime": 39.6246, | |
| "eval_samples_per_second": 34.575, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0002476317933201443, | |
| "loss": 0.3309, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_average_metrics": 89.49391727493918, | |
| "eval_em": 89.27007299270073, | |
| "eval_f1": 89.71776155717761, | |
| "eval_loss": 0.2204442322254181, | |
| "eval_runtime": 39.9817, | |
| "eval_samples_per_second": 34.266, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_average_metrics": 89.69464720194647, | |
| "eval_em": 89.34306569343066, | |
| "eval_f1": 90.04622871046227, | |
| "eval_loss": 0.2244672179222107, | |
| "eval_runtime": 38.4253, | |
| "eval_samples_per_second": 35.654, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0002418131036890492, | |
| "loss": 0.3285, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_average_metrics": 89.48783454987836, | |
| "eval_em": 89.27007299270073, | |
| "eval_f1": 89.70559610705597, | |
| "eval_loss": 0.21830883622169495, | |
| "eval_runtime": 38.7825, | |
| "eval_samples_per_second": 35.325, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_average_metrics": 88.56326034063261, | |
| "eval_em": 88.32116788321169, | |
| "eval_f1": 88.80535279805352, | |
| "eval_loss": 0.22253794968128204, | |
| "eval_runtime": 38.0561, | |
| "eval_samples_per_second": 36.0, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_average_metrics": 89.82846715328466, | |
| "eval_em": 89.56204379562044, | |
| "eval_f1": 90.09489051094889, | |
| "eval_loss": 0.22128665447235107, | |
| "eval_runtime": 39.7387, | |
| "eval_samples_per_second": 34.475, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0002359944140579541, | |
| "loss": 0.3296, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_average_metrics": 89.02919708029196, | |
| "eval_em": 88.6861313868613, | |
| "eval_f1": 89.37226277372264, | |
| "eval_loss": 0.2234881967306137, | |
| "eval_runtime": 38.4221, | |
| "eval_samples_per_second": 35.657, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_average_metrics": 89.64355231143553, | |
| "eval_em": 89.34306569343066, | |
| "eval_f1": 89.9440389294404, | |
| "eval_loss": 0.2219015508890152, | |
| "eval_runtime": 39.7412, | |
| "eval_samples_per_second": 34.473, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00023017572442685905, | |
| "loss": 0.3377, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_average_metrics": 89.13503649635037, | |
| "eval_em": 88.9051094890511, | |
| "eval_f1": 89.36496350364963, | |
| "eval_loss": 0.22565637528896332, | |
| "eval_runtime": 38.9351, | |
| "eval_samples_per_second": 35.187, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_average_metrics": 89.66788321167883, | |
| "eval_em": 89.34306569343066, | |
| "eval_f1": 89.99270072992701, | |
| "eval_loss": 0.2238757461309433, | |
| "eval_runtime": 36.8334, | |
| "eval_samples_per_second": 37.195, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_average_metrics": 90.63138686131386, | |
| "eval_em": 90.2919708029197, | |
| "eval_f1": 90.970802919708, | |
| "eval_loss": 0.2218635529279709, | |
| "eval_runtime": 39.5931, | |
| "eval_samples_per_second": 34.602, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00022435703479576396, | |
| "loss": 0.3271, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_average_metrics": 90.15450121654501, | |
| "eval_em": 89.85401459854015, | |
| "eval_f1": 90.45498783454988, | |
| "eval_loss": 0.2218872755765915, | |
| "eval_runtime": 36.9426, | |
| "eval_samples_per_second": 37.085, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_average_metrics": 90.49513381995133, | |
| "eval_em": 90.14598540145985, | |
| "eval_f1": 90.84428223844282, | |
| "eval_loss": 0.2184651792049408, | |
| "eval_runtime": 38.3744, | |
| "eval_samples_per_second": 35.701, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0002185383451646689, | |
| "loss": 0.3221, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_average_metrics": 91.12773722627736, | |
| "eval_em": 90.8029197080292, | |
| "eval_f1": 91.45255474452554, | |
| "eval_loss": 0.2153433859348297, | |
| "eval_runtime": 38.0801, | |
| "eval_samples_per_second": 35.977, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_average_metrics": 90.32481751824818, | |
| "eval_em": 90.0, | |
| "eval_f1": 90.64963503649636, | |
| "eval_loss": 0.21808864176273346, | |
| "eval_runtime": 39.0051, | |
| "eval_samples_per_second": 35.124, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_average_metrics": 89.08394160583941, | |
| "eval_em": 88.75912408759125, | |
| "eval_f1": 89.4087591240876, | |
| "eval_loss": 0.22276869416236877, | |
| "eval_runtime": 36.749, | |
| "eval_samples_per_second": 37.28, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00021271965553357382, | |
| "loss": 0.3226, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_average_metrics": 90.485401459854, | |
| "eval_em": 90.21897810218978, | |
| "eval_f1": 90.75182481751823, | |
| "eval_loss": 0.21675343811511993, | |
| "eval_runtime": 37.5554, | |
| "eval_samples_per_second": 36.479, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_average_metrics": 90.02311435523114, | |
| "eval_em": 89.78102189781022, | |
| "eval_f1": 90.26520681265205, | |
| "eval_loss": 0.21614064276218414, | |
| "eval_runtime": 33.4296, | |
| "eval_samples_per_second": 40.982, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00020690096590247876, | |
| "loss": 0.3262, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_average_metrics": 90.10583941605839, | |
| "eval_em": 89.78102189781022, | |
| "eval_f1": 90.43065693430657, | |
| "eval_loss": 0.2208666056394577, | |
| "eval_runtime": 35.7275, | |
| "eval_samples_per_second": 38.346, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_average_metrics": 88.86253041362531, | |
| "eval_em": 88.46715328467153, | |
| "eval_f1": 89.25790754257908, | |
| "eval_loss": 0.22296273708343506, | |
| "eval_runtime": 37.4484, | |
| "eval_samples_per_second": 36.584, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_average_metrics": 88.69708029197079, | |
| "eval_em": 88.39416058394161, | |
| "eval_f1": 88.99999999999999, | |
| "eval_loss": 0.2256641685962677, | |
| "eval_runtime": 36.1104, | |
| "eval_samples_per_second": 37.939, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00020108227627138367, | |
| "loss": 0.3246, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_average_metrics": 90.43673965936739, | |
| "eval_em": 90.14598540145985, | |
| "eval_f1": 90.72749391727494, | |
| "eval_loss": 0.2168925255537033, | |
| "eval_runtime": 36.8057, | |
| "eval_samples_per_second": 37.223, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_average_metrics": 90.17883211678833, | |
| "eval_em": 89.85401459854015, | |
| "eval_f1": 90.5036496350365, | |
| "eval_loss": 0.21890240907669067, | |
| "eval_runtime": 36.2138, | |
| "eval_samples_per_second": 37.831, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00019526358664028858, | |
| "loss": 0.3085, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_average_metrics": 90.25182481751824, | |
| "eval_em": 90.0, | |
| "eval_f1": 90.5036496350365, | |
| "eval_loss": 0.21791431307792664, | |
| "eval_runtime": 36.94, | |
| "eval_samples_per_second": 37.087, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_average_metrics": 90.31508515815085, | |
| "eval_em": 90.07299270072993, | |
| "eval_f1": 90.55717761557177, | |
| "eval_loss": 0.2160734087228775, | |
| "eval_runtime": 35.6639, | |
| "eval_samples_per_second": 38.414, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_average_metrics": 89.48783454987836, | |
| "eval_em": 89.27007299270073, | |
| "eval_f1": 89.70559610705597, | |
| "eval_loss": 0.22037485241889954, | |
| "eval_runtime": 37.8808, | |
| "eval_samples_per_second": 36.166, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00018944489700919352, | |
| "loss": 0.3185, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_average_metrics": 88.35401459854015, | |
| "eval_em": 88.02919708029196, | |
| "eval_f1": 88.67883211678833, | |
| "eval_loss": 0.22213077545166016, | |
| "eval_runtime": 36.2657, | |
| "eval_samples_per_second": 37.777, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_average_metrics": 87.77007299270073, | |
| "eval_em": 87.44525547445255, | |
| "eval_f1": 88.09489051094891, | |
| "eval_loss": 0.22431711852550507, | |
| "eval_runtime": 34.0786, | |
| "eval_samples_per_second": 40.201, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00018362620737809844, | |
| "loss": 0.3149, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_average_metrics": 88.28953771289537, | |
| "eval_em": 88.02919708029196, | |
| "eval_f1": 88.54987834549878, | |
| "eval_loss": 0.2239065319299698, | |
| "eval_runtime": 37.3216, | |
| "eval_samples_per_second": 36.708, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_average_metrics": 90.12043795620437, | |
| "eval_em": 89.85401459854015, | |
| "eval_f1": 90.3868613138686, | |
| "eval_loss": 0.21789653599262238, | |
| "eval_runtime": 35.8306, | |
| "eval_samples_per_second": 38.235, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_average_metrics": 89.71654501216544, | |
| "eval_em": 89.41605839416059, | |
| "eval_f1": 90.0170316301703, | |
| "eval_loss": 0.2222936451435089, | |
| "eval_runtime": 35.4154, | |
| "eval_samples_per_second": 38.684, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00017780751774700338, | |
| "loss": 0.3167, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_average_metrics": 89.19586374695864, | |
| "eval_em": 88.83211678832117, | |
| "eval_f1": 89.5596107055961, | |
| "eval_loss": 0.22271069884300232, | |
| "eval_runtime": 35.7044, | |
| "eval_samples_per_second": 38.371, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_average_metrics": 89.97445255474452, | |
| "eval_em": 89.7080291970803, | |
| "eval_f1": 90.24087591240875, | |
| "eval_loss": 0.22066636383533478, | |
| "eval_runtime": 34.2308, | |
| "eval_samples_per_second": 40.022, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.0001719888281159083, | |
| "loss": 0.3179, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_average_metrics": 90.05717761557177, | |
| "eval_em": 89.78102189781022, | |
| "eval_f1": 90.33333333333333, | |
| "eval_loss": 0.2182897925376892, | |
| "eval_runtime": 36.8191, | |
| "eval_samples_per_second": 37.209, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_average_metrics": 88.34428223844282, | |
| "eval_em": 88.02919708029196, | |
| "eval_f1": 88.65936739659367, | |
| "eval_loss": 0.22293207049369812, | |
| "eval_runtime": 38.8183, | |
| "eval_samples_per_second": 35.293, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_average_metrics": 90.60705596107056, | |
| "eval_em": 90.36496350364963, | |
| "eval_f1": 90.84914841849148, | |
| "eval_loss": 0.2161208987236023, | |
| "eval_runtime": 38.6684, | |
| "eval_samples_per_second": 35.429, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00016617013848481323, | |
| "loss": 0.3153, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_average_metrics": 90.38807785888078, | |
| "eval_em": 90.14598540145985, | |
| "eval_f1": 90.6301703163017, | |
| "eval_loss": 0.2135632187128067, | |
| "eval_runtime": 38.983, | |
| "eval_samples_per_second": 35.144, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_average_metrics": 89.14720194647202, | |
| "eval_em": 88.9051094890511, | |
| "eval_f1": 89.38929440389293, | |
| "eval_loss": 0.22012485563755035, | |
| "eval_runtime": 39.657, | |
| "eval_samples_per_second": 34.546, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00016035144885371815, | |
| "loss": 0.3216, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_average_metrics": 89.6581508515815, | |
| "eval_em": 89.41605839416059, | |
| "eval_f1": 89.90024330900242, | |
| "eval_loss": 0.21762309968471527, | |
| "eval_runtime": 40.4674, | |
| "eval_samples_per_second": 33.854, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_average_metrics": 89.30535279805352, | |
| "eval_em": 89.05109489051095, | |
| "eval_f1": 89.5596107055961, | |
| "eval_loss": 0.21773971617221832, | |
| "eval_runtime": 38.4174, | |
| "eval_samples_per_second": 35.661, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_average_metrics": 90.169099756691, | |
| "eval_em": 89.92700729927007, | |
| "eval_f1": 90.41119221411194, | |
| "eval_loss": 0.21440057456493378, | |
| "eval_runtime": 37.9225, | |
| "eval_samples_per_second": 36.126, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00015453275922262303, | |
| "loss": 0.3151, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_average_metrics": 89.58515815085158, | |
| "eval_em": 89.34306569343066, | |
| "eval_f1": 89.82725060827251, | |
| "eval_loss": 0.21768692135810852, | |
| "eval_runtime": 36.5526, | |
| "eval_samples_per_second": 37.48, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_average_metrics": 89.99878345498783, | |
| "eval_em": 89.78102189781022, | |
| "eval_f1": 90.21654501216545, | |
| "eval_loss": 0.2161240428686142, | |
| "eval_runtime": 39.5978, | |
| "eval_samples_per_second": 34.598, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00014871406959152797, | |
| "loss": 0.3255, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_average_metrics": 89.70681265206812, | |
| "eval_em": 89.48905109489051, | |
| "eval_f1": 89.92457420924575, | |
| "eval_loss": 0.21710088849067688, | |
| "eval_runtime": 38.5743, | |
| "eval_samples_per_second": 35.516, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_average_metrics": 89.3515815085158, | |
| "eval_em": 89.05109489051095, | |
| "eval_f1": 89.65206812652067, | |
| "eval_loss": 0.21740640699863434, | |
| "eval_runtime": 38.7743, | |
| "eval_samples_per_second": 35.333, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_average_metrics": 90.29075425790754, | |
| "eval_em": 90.07299270072993, | |
| "eval_f1": 90.50851581508516, | |
| "eval_loss": 0.21489615738391876, | |
| "eval_runtime": 41.332, | |
| "eval_samples_per_second": 33.146, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00014289537996043292, | |
| "loss": 0.3129, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_average_metrics": 89.81386861313868, | |
| "eval_em": 89.48905109489051, | |
| "eval_f1": 90.13868613138686, | |
| "eval_loss": 0.21436673402786255, | |
| "eval_runtime": 39.3785, | |
| "eval_samples_per_second": 34.791, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_average_metrics": 89.77980535279806, | |
| "eval_em": 89.48905109489051, | |
| "eval_f1": 90.0705596107056, | |
| "eval_loss": 0.21632547676563263, | |
| "eval_runtime": 36.2514, | |
| "eval_samples_per_second": 37.792, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00013707669032933783, | |
| "loss": 0.317, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_average_metrics": 90.14476885644768, | |
| "eval_em": 89.85401459854015, | |
| "eval_f1": 90.43552311435523, | |
| "eval_loss": 0.21579378843307495, | |
| "eval_runtime": 38.9315, | |
| "eval_samples_per_second": 35.19, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_average_metrics": 89.69221411192214, | |
| "eval_em": 89.34306569343066, | |
| "eval_f1": 90.04136253041362, | |
| "eval_loss": 0.21815675497055054, | |
| "eval_runtime": 38.141, | |
| "eval_samples_per_second": 35.919, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_average_metrics": 90.26642335766422, | |
| "eval_em": 90.0, | |
| "eval_f1": 90.53284671532845, | |
| "eval_loss": 0.21684660017490387, | |
| "eval_runtime": 36.4617, | |
| "eval_samples_per_second": 37.574, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00013125800069824274, | |
| "loss": 0.3204, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_average_metrics": 90.54379562043796, | |
| "eval_em": 90.21897810218978, | |
| "eval_f1": 90.86861313868614, | |
| "eval_loss": 0.21547624468803406, | |
| "eval_runtime": 39.6007, | |
| "eval_samples_per_second": 34.595, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_average_metrics": 90.76277372262774, | |
| "eval_em": 90.43795620437956, | |
| "eval_f1": 91.08759124087592, | |
| "eval_loss": 0.2154303342103958, | |
| "eval_runtime": 37.9795, | |
| "eval_samples_per_second": 36.072, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00012543931106714766, | |
| "loss": 0.3153, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_average_metrics": 90.34914841849148, | |
| "eval_em": 90.0, | |
| "eval_f1": 90.69829683698296, | |
| "eval_loss": 0.21602213382720947, | |
| "eval_runtime": 40.2922, | |
| "eval_samples_per_second": 34.002, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_average_metrics": 89.31751824817519, | |
| "eval_em": 89.05109489051095, | |
| "eval_f1": 89.58394160583941, | |
| "eval_loss": 0.2179516851902008, | |
| "eval_runtime": 36.6124, | |
| "eval_samples_per_second": 37.419, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_average_metrics": 89.24452554744525, | |
| "eval_em": 88.97810218978101, | |
| "eval_f1": 89.51094890510949, | |
| "eval_loss": 0.2189921736717224, | |
| "eval_runtime": 37.6709, | |
| "eval_samples_per_second": 36.368, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00011962062143605258, | |
| "loss": 0.3104, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_average_metrics": 89.02554744525548, | |
| "eval_em": 88.75912408759125, | |
| "eval_f1": 89.29197080291972, | |
| "eval_loss": 0.21728622913360596, | |
| "eval_runtime": 37.6931, | |
| "eval_samples_per_second": 36.346, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_average_metrics": 90.48540145985402, | |
| "eval_em": 90.21897810218978, | |
| "eval_f1": 90.75182481751824, | |
| "eval_loss": 0.21391963958740234, | |
| "eval_runtime": 37.4604, | |
| "eval_samples_per_second": 36.572, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.00011380193180495751, | |
| "loss": 0.3221, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_average_metrics": 90.92335766423358, | |
| "eval_em": 90.65693430656935, | |
| "eval_f1": 91.18978102189779, | |
| "eval_loss": 0.21324890851974487, | |
| "eval_runtime": 36.0923, | |
| "eval_samples_per_second": 37.958, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_average_metrics": 90.36374695863748, | |
| "eval_em": 90.07299270072993, | |
| "eval_f1": 90.65450121654501, | |
| "eval_loss": 0.21571427583694458, | |
| "eval_runtime": 36.3418, | |
| "eval_samples_per_second": 37.698, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_average_metrics": 90.58272506082724, | |
| "eval_em": 90.2919708029197, | |
| "eval_f1": 90.8734793187348, | |
| "eval_loss": 0.21374623477458954, | |
| "eval_runtime": 35.332, | |
| "eval_samples_per_second": 38.775, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00010798324217386244, | |
| "loss": 0.3142, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_average_metrics": 90.41240875912409, | |
| "eval_em": 90.14598540145985, | |
| "eval_f1": 90.67883211678833, | |
| "eval_loss": 0.21370555460453033, | |
| "eval_runtime": 37.2326, | |
| "eval_samples_per_second": 36.796, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_average_metrics": 89.75547445255475, | |
| "eval_em": 89.48905109489051, | |
| "eval_f1": 90.02189781021897, | |
| "eval_loss": 0.21746164560317993, | |
| "eval_runtime": 35.4995, | |
| "eval_samples_per_second": 38.592, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.00010216455254276737, | |
| "loss": 0.3206, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_average_metrics": 89.82846715328466, | |
| "eval_em": 89.56204379562044, | |
| "eval_f1": 90.0948905109489, | |
| "eval_loss": 0.2167847603559494, | |
| "eval_runtime": 37.0794, | |
| "eval_samples_per_second": 36.948, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_average_metrics": 90.26642335766422, | |
| "eval_em": 90.0, | |
| "eval_f1": 90.53284671532846, | |
| "eval_loss": 0.21559016406536102, | |
| "eval_runtime": 37.9031, | |
| "eval_samples_per_second": 36.145, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_average_metrics": 89.97445255474452, | |
| "eval_em": 89.7080291970803, | |
| "eval_f1": 90.24087591240874, | |
| "eval_loss": 0.21499326825141907, | |
| "eval_runtime": 33.4521, | |
| "eval_samples_per_second": 40.954, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 9.634586291167229e-05, | |
| "loss": 0.3109, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_average_metrics": 90.14476885644768, | |
| "eval_em": 89.85401459854015, | |
| "eval_f1": 90.43552311435523, | |
| "eval_loss": 0.21627335250377655, | |
| "eval_runtime": 35.646, | |
| "eval_samples_per_second": 38.434, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_average_metrics": 90.48540145985402, | |
| "eval_em": 90.14598540145985, | |
| "eval_f1": 90.8248175182482, | |
| "eval_loss": 0.21596278250217438, | |
| "eval_runtime": 36.6834, | |
| "eval_samples_per_second": 37.347, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 9.052717328057722e-05, | |
| "loss": 0.3035, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_average_metrics": 89.97445255474453, | |
| "eval_em": 89.7080291970803, | |
| "eval_f1": 90.24087591240877, | |
| "eval_loss": 0.21806372702121735, | |
| "eval_runtime": 34.0399, | |
| "eval_samples_per_second": 40.247, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_average_metrics": 89.58515815085158, | |
| "eval_em": 89.34306569343066, | |
| "eval_f1": 89.82725060827251, | |
| "eval_loss": 0.2175503671169281, | |
| "eval_runtime": 35.3257, | |
| "eval_samples_per_second": 38.782, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_average_metrics": 90.29075425790754, | |
| "eval_em": 90.0, | |
| "eval_f1": 90.58150851581509, | |
| "eval_loss": 0.21528899669647217, | |
| "eval_runtime": 37.4995, | |
| "eval_samples_per_second": 36.534, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 8.470848364948212e-05, | |
| "loss": 0.3102, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_average_metrics": 89.9501216545012, | |
| "eval_em": 89.7080291970803, | |
| "eval_f1": 90.19221411192213, | |
| "eval_loss": 0.21668794751167297, | |
| "eval_runtime": 36.8393, | |
| "eval_samples_per_second": 37.189, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_average_metrics": 90.04744525547446, | |
| "eval_em": 89.78102189781022, | |
| "eval_f1": 90.31386861313868, | |
| "eval_loss": 0.2147464007139206, | |
| "eval_runtime": 36.4341, | |
| "eval_samples_per_second": 37.602, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 7.888979401838705e-05, | |
| "loss": 0.2992, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_average_metrics": 89.56082725060827, | |
| "eval_em": 89.34306569343066, | |
| "eval_f1": 89.77858880778588, | |
| "eval_loss": 0.21566100418567657, | |
| "eval_runtime": 37.3837, | |
| "eval_samples_per_second": 36.647, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_average_metrics": 89.26885644768856, | |
| "eval_em": 89.05109489051095, | |
| "eval_f1": 89.48661800486619, | |
| "eval_loss": 0.21698147058486938, | |
| "eval_runtime": 34.5662, | |
| "eval_samples_per_second": 39.634, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_average_metrics": 89.9501216545012, | |
| "eval_em": 89.7080291970803, | |
| "eval_f1": 90.19221411192213, | |
| "eval_loss": 0.21671971678733826, | |
| "eval_runtime": 39.0865, | |
| "eval_samples_per_second": 35.05, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 7.307110438729197e-05, | |
| "loss": 0.3104, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_average_metrics": 88.92822384428223, | |
| "eval_em": 88.6861313868613, | |
| "eval_f1": 89.17031630170315, | |
| "eval_loss": 0.21831820905208588, | |
| "eval_runtime": 38.0065, | |
| "eval_samples_per_second": 36.046, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_average_metrics": 89.82846715328466, | |
| "eval_em": 89.56204379562044, | |
| "eval_f1": 90.09489051094889, | |
| "eval_loss": 0.2175111472606659, | |
| "eval_runtime": 38.7133, | |
| "eval_samples_per_second": 35.388, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 6.72524147561969e-05, | |
| "loss": 0.299, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_average_metrics": 90.04744525547446, | |
| "eval_em": 89.78102189781022, | |
| "eval_f1": 90.31386861313868, | |
| "eval_loss": 0.21775414049625397, | |
| "eval_runtime": 37.1614, | |
| "eval_samples_per_second": 36.866, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_average_metrics": 90.33941605839416, | |
| "eval_em": 90.07299270072993, | |
| "eval_f1": 90.60583941605839, | |
| "eval_loss": 0.21791471540927887, | |
| "eval_runtime": 38.7185, | |
| "eval_samples_per_second": 35.384, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_average_metrics": 90.85036496350364, | |
| "eval_em": 90.58394160583941, | |
| "eval_f1": 91.11678832116786, | |
| "eval_loss": 0.21669700741767883, | |
| "eval_runtime": 36.9688, | |
| "eval_samples_per_second": 37.058, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 6.143372512510183e-05, | |
| "loss": 0.3106, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_average_metrics": 90.99635036496349, | |
| "eval_em": 90.72992700729927, | |
| "eval_f1": 91.26277372262773, | |
| "eval_loss": 0.21628382802009583, | |
| "eval_runtime": 36.1926, | |
| "eval_samples_per_second": 37.853, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_average_metrics": 90.485401459854, | |
| "eval_em": 90.21897810218978, | |
| "eval_f1": 90.75182481751823, | |
| "eval_loss": 0.2179671972990036, | |
| "eval_runtime": 35.7701, | |
| "eval_samples_per_second": 38.3, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5.561503549400675e-05, | |
| "loss": 0.3131, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_average_metrics": 89.90145985401459, | |
| "eval_em": 89.63503649635037, | |
| "eval_f1": 90.16788321167881, | |
| "eval_loss": 0.21815823018550873, | |
| "eval_runtime": 38.6202, | |
| "eval_samples_per_second": 35.474, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_average_metrics": 89.81386861313868, | |
| "eval_em": 89.48905109489051, | |
| "eval_f1": 90.13868613138686, | |
| "eval_loss": 0.21742023527622223, | |
| "eval_runtime": 38.6698, | |
| "eval_samples_per_second": 35.428, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_average_metrics": 90.54379562043796, | |
| "eval_em": 90.21897810218978, | |
| "eval_f1": 90.86861313868614, | |
| "eval_loss": 0.21656812727451324, | |
| "eval_runtime": 37.7889, | |
| "eval_samples_per_second": 36.254, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 4.9796345862911663e-05, | |
| "loss": 0.3109, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_average_metrics": 90.47080291970804, | |
| "eval_em": 90.14598540145985, | |
| "eval_f1": 90.79562043795622, | |
| "eval_loss": 0.21554183959960938, | |
| "eval_runtime": 40.3847, | |
| "eval_samples_per_second": 33.924, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_average_metrics": 90.55839416058393, | |
| "eval_em": 90.2919708029197, | |
| "eval_f1": 90.82481751824815, | |
| "eval_loss": 0.21551626920700073, | |
| "eval_runtime": 40.9505, | |
| "eval_samples_per_second": 33.455, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 4.397765623181659e-05, | |
| "loss": 0.3139, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_average_metrics": 90.53406326034063, | |
| "eval_em": 90.21897810218978, | |
| "eval_f1": 90.84914841849148, | |
| "eval_loss": 0.21652992069721222, | |
| "eval_runtime": 39.2455, | |
| "eval_samples_per_second": 34.908, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_average_metrics": 90.38807785888076, | |
| "eval_em": 90.07299270072993, | |
| "eval_f1": 90.70316301703161, | |
| "eval_loss": 0.2176700383424759, | |
| "eval_runtime": 39.6231, | |
| "eval_samples_per_second": 34.576, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_average_metrics": 90.82603406326034, | |
| "eval_em": 90.51094890510949, | |
| "eval_f1": 91.14111922141119, | |
| "eval_loss": 0.21595974266529083, | |
| "eval_runtime": 38.5872, | |
| "eval_samples_per_second": 35.504, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.815896660072152e-05, | |
| "loss": 0.3012, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_average_metrics": 90.60705596107056, | |
| "eval_em": 90.2919708029197, | |
| "eval_f1": 90.9221411192214, | |
| "eval_loss": 0.2160317748785019, | |
| "eval_runtime": 38.122, | |
| "eval_samples_per_second": 35.937, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "eval_average_metrics": 90.65571776155718, | |
| "eval_em": 90.36496350364963, | |
| "eval_f1": 90.94647201946472, | |
| "eval_loss": 0.21644768118858337, | |
| "eval_runtime": 34.8398, | |
| "eval_samples_per_second": 39.323, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.234027696962644e-05, | |
| "loss": 0.3084, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_average_metrics": 90.169099756691, | |
| "eval_em": 89.85401459854015, | |
| "eval_f1": 90.48418491484183, | |
| "eval_loss": 0.21626520156860352, | |
| "eval_runtime": 34.9713, | |
| "eval_samples_per_second": 39.175, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_average_metrics": 89.58515815085158, | |
| "eval_em": 89.27007299270073, | |
| "eval_f1": 89.90024330900242, | |
| "eval_loss": 0.2173507809638977, | |
| "eval_runtime": 34.5287, | |
| "eval_samples_per_second": 39.677, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_average_metrics": 89.58515815085158, | |
| "eval_em": 89.27007299270073, | |
| "eval_f1": 89.90024330900242, | |
| "eval_loss": 0.21798554062843323, | |
| "eval_runtime": 32.6335, | |
| "eval_samples_per_second": 41.981, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.6521587338531362e-05, | |
| "loss": 0.3034, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_average_metrics": 89.6581508515815, | |
| "eval_em": 89.34306569343066, | |
| "eval_f1": 89.97323600973235, | |
| "eval_loss": 0.2169240415096283, | |
| "eval_runtime": 31.2981, | |
| "eval_samples_per_second": 43.773, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_average_metrics": 90.02311435523114, | |
| "eval_em": 89.7080291970803, | |
| "eval_f1": 90.338199513382, | |
| "eval_loss": 0.2169690728187561, | |
| "eval_runtime": 31.3076, | |
| "eval_samples_per_second": 43.759, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.0702897707436283e-05, | |
| "loss": 0.3087, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_average_metrics": 90.53406326034063, | |
| "eval_em": 90.21897810218978, | |
| "eval_f1": 90.84914841849148, | |
| "eval_loss": 0.2168567031621933, | |
| "eval_runtime": 32.1016, | |
| "eval_samples_per_second": 42.677, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_average_metrics": 90.169099756691, | |
| "eval_em": 89.85401459854015, | |
| "eval_f1": 90.48418491484183, | |
| "eval_loss": 0.21707168221473694, | |
| "eval_runtime": 34.5668, | |
| "eval_samples_per_second": 39.633, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_average_metrics": 90.169099756691, | |
| "eval_em": 89.85401459854015, | |
| "eval_f1": 90.48418491484183, | |
| "eval_loss": 0.2168397605419159, | |
| "eval_runtime": 35.1948, | |
| "eval_samples_per_second": 38.926, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.4884208076341207e-05, | |
| "loss": 0.3038, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_average_metrics": 89.14720194647202, | |
| "eval_em": 88.83211678832117, | |
| "eval_f1": 89.46228710462286, | |
| "eval_loss": 0.21729852259159088, | |
| "eval_runtime": 35.4024, | |
| "eval_samples_per_second": 38.698, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_average_metrics": 89.99878345498783, | |
| "eval_em": 89.7080291970803, | |
| "eval_f1": 90.28953771289538, | |
| "eval_loss": 0.21667222678661346, | |
| "eval_runtime": 34.8507, | |
| "eval_samples_per_second": 39.311, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 9.065518445246129e-06, | |
| "loss": 0.3083, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_average_metrics": 90.21776155717762, | |
| "eval_em": 89.92700729927007, | |
| "eval_f1": 90.50851581508516, | |
| "eval_loss": 0.216518834233284, | |
| "eval_runtime": 34.7968, | |
| "eval_samples_per_second": 39.371, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_average_metrics": 90.07177615571777, | |
| "eval_em": 89.78102189781022, | |
| "eval_f1": 90.36253041362531, | |
| "eval_loss": 0.21596592664718628, | |
| "eval_runtime": 34.8982, | |
| "eval_samples_per_second": 39.257, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_average_metrics": 90.07177615571777, | |
| "eval_em": 89.78102189781022, | |
| "eval_f1": 90.36253041362531, | |
| "eval_loss": 0.21597927808761597, | |
| "eval_runtime": 35.1856, | |
| "eval_samples_per_second": 38.936, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 3.2468288141510528e-06, | |
| "loss": 0.3048, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_average_metrics": 90.07177615571777, | |
| "eval_em": 89.78102189781022, | |
| "eval_f1": 90.36253041362531, | |
| "eval_loss": 0.21600975096225739, | |
| "eval_runtime": 33.008, | |
| "eval_samples_per_second": 41.505, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 25779, | |
| "total_flos": 2.1177253912543027e+17, | |
| "train_loss": 0.3175360437943145, | |
| "train_runtime": 17672.1841, | |
| "train_samples_per_second": 23.339, | |
| "train_steps_per_second": 1.459 | |
| } | |
| ], | |
| "max_steps": 25779, | |
| "num_train_epochs": 3, | |
| "total_flos": 2.1177253912543027e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |