{ "best_metric": 0.6763644218444824, "best_model_checkpoint": "/d/hpc/projects/FRI/bb6846/run11//mt5-base/checkpoint-25861", "epoch": 28.0, "global_step": 51723, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "learning_rate": 0.0004954881790290561, "loss": 2.2444, "step": 500 }, { "epoch": 0.54, "learning_rate": 0.0004909763580581122, "loss": 1.6704, "step": 1000 }, { "epoch": 0.81, "learning_rate": 0.00048646453708716834, "loss": 1.5382, "step": 1500 }, { "epoch": 1.0, "eval_LaBSE similarity": 0.6048825370314684, "eval_bleu": 0.18207720933969782, "eval_loss": 1.2285538911819458, "eval_runtime": 727.9323, "eval_samples_per_second": 19.2, "eval_steps_per_second": 1.201, "step": 1847 }, { "epoch": 1.08, "learning_rate": 0.00048195271611622454, "loss": 1.4208, "step": 2000 }, { "epoch": 1.35, "learning_rate": 0.00047744089514528064, "loss": 1.307, "step": 2500 }, { "epoch": 1.62, "learning_rate": 0.0004729290741743368, "loss": 1.2675, "step": 3000 }, { "epoch": 1.89, "learning_rate": 0.0004684172532033929, "loss": 1.2288, "step": 3500 }, { "epoch": 2.0, "eval_LaBSE similarity": 0.6103176045423836, "eval_bleu": 0.18286165047145764, "eval_loss": 1.0573153495788574, "eval_runtime": 727.6037, "eval_samples_per_second": 19.208, "eval_steps_per_second": 1.201, "step": 3694 }, { "epoch": 2.17, "learning_rate": 0.000463905432232449, "loss": 1.1399, "step": 4000 }, { "epoch": 2.44, "learning_rate": 0.0004593936112615051, "loss": 1.0766, "step": 4500 }, { "epoch": 2.71, "learning_rate": 0.00045488179029056126, "loss": 1.0643, "step": 5000 }, { "epoch": 2.98, "learning_rate": 0.0004503699693196174, "loss": 1.0478, "step": 5500 }, { "epoch": 3.0, "eval_LaBSE similarity": 0.6151767824817785, "eval_bleu": 0.18683028946919106, "eval_loss": 0.9635915160179138, "eval_runtime": 727.6118, "eval_samples_per_second": 19.208, "eval_steps_per_second": 1.201, "step": 5541 }, { "epoch": 3.25, "learning_rate": 0.00044585814834867355, "loss": 0.9375, "step": 6000 }, { "epoch": 3.52, "learning_rate": 0.00044134632737772965, "loss": 0.918, "step": 6500 }, { "epoch": 3.79, "learning_rate": 0.0004368345064067858, "loss": 0.9207, "step": 7000 }, { "epoch": 4.0, "eval_LaBSE similarity": 0.614242324825887, "eval_bleu": 0.18815387525896146, "eval_loss": 0.8977922797203064, "eval_runtime": 728.2433, "eval_samples_per_second": 19.191, "eval_steps_per_second": 1.2, "step": 7389 }, { "epoch": 4.06, "learning_rate": 0.0004323226854358419, "loss": 0.8848, "step": 7500 }, { "epoch": 4.33, "learning_rate": 0.00042781086446489803, "loss": 0.7985, "step": 8000 }, { "epoch": 4.6, "learning_rate": 0.0004232990434939541, "loss": 0.8032, "step": 8500 }, { "epoch": 4.87, "learning_rate": 0.00041878722252301027, "loss": 0.8037, "step": 9000 }, { "epoch": 5.0, "eval_LaBSE similarity": 0.622497909277748, "eval_bleu": 0.19241623278663542, "eval_loss": 0.8346570134162903, "eval_runtime": 727.6058, "eval_samples_per_second": 19.208, "eval_steps_per_second": 1.201, "step": 9236 }, { "epoch": 5.14, "learning_rate": 0.0004142754015520664, "loss": 0.7435, "step": 9500 }, { "epoch": 5.41, "learning_rate": 0.00040976358058112257, "loss": 0.7011, "step": 10000 }, { "epoch": 5.68, "learning_rate": 0.00040525175961017866, "loss": 0.7054, "step": 10500 }, { "epoch": 5.95, "learning_rate": 0.0004007399386392348, "loss": 0.705, "step": 11000 }, { "epoch": 6.0, "eval_LaBSE similarity": 0.6255796529372922, "eval_bleu": 0.19577697504607858, "eval_loss": 0.790106475353241, "eval_runtime": 729.5559, "eval_samples_per_second": 19.157, "eval_steps_per_second": 1.198, "step": 11083 }, { "epoch": 6.23, "learning_rate": 0.0003962281176682909, "loss": 0.6238, "step": 11500 }, { "epoch": 6.5, "learning_rate": 0.00039171629669734704, "loss": 0.6194, "step": 12000 }, { "epoch": 6.77, "learning_rate": 0.00038720447572640314, "loss": 0.6263, "step": 12500 }, { "epoch": 7.0, "eval_LaBSE similarity": 0.6281634647360271, "eval_bleu": 0.1984314625604209, "eval_loss": 0.7558700442314148, "eval_runtime": 728.5022, "eval_samples_per_second": 19.185, "eval_steps_per_second": 1.2, "step": 12930 }, { "epoch": 7.04, "learning_rate": 0.00038269265475545934, "loss": 0.611, "step": 13000 }, { "epoch": 7.31, "learning_rate": 0.00037818083378451543, "loss": 0.5407, "step": 13500 }, { "epoch": 7.58, "learning_rate": 0.0003736690128135716, "loss": 0.5506, "step": 14000 }, { "epoch": 7.85, "learning_rate": 0.00036915719184262767, "loss": 0.5596, "step": 14500 }, { "epoch": 8.0, "eval_LaBSE similarity": 0.6297791211180278, "eval_bleu": 0.2016469323333738, "eval_loss": 0.7360725998878479, "eval_runtime": 730.4745, "eval_samples_per_second": 19.133, "eval_steps_per_second": 1.196, "step": 14778 }, { "epoch": 8.12, "learning_rate": 0.0003646453708716838, "loss": 0.5205, "step": 15000 }, { "epoch": 8.39, "learning_rate": 0.0003601335499007399, "loss": 0.4852, "step": 15500 }, { "epoch": 8.66, "learning_rate": 0.00035562172892979606, "loss": 0.4916, "step": 16000 }, { "epoch": 8.93, "learning_rate": 0.0003511099079588522, "loss": 0.4974, "step": 16500 }, { "epoch": 9.0, "eval_LaBSE similarity": 0.6329554534294927, "eval_bleu": 0.20453006594808004, "eval_loss": 0.7115353345870972, "eval_runtime": 731.2775, "eval_samples_per_second": 19.112, "eval_steps_per_second": 1.195, "step": 16625 }, { "epoch": 9.2, "learning_rate": 0.00034659808698790835, "loss": 0.4433, "step": 17000 }, { "epoch": 9.47, "learning_rate": 0.00034208626601696444, "loss": 0.4341, "step": 17500 }, { "epoch": 9.74, "learning_rate": 0.0003375744450460206, "loss": 0.447, "step": 18000 }, { "epoch": 10.0, "eval_LaBSE similarity": 0.635916732698647, "eval_bleu": 0.20824504916149092, "eval_loss": 0.6980001926422119, "eval_runtime": 730.5297, "eval_samples_per_second": 19.131, "eval_steps_per_second": 1.196, "step": 18472 }, { "epoch": 10.01, "learning_rate": 0.0003330626240750767, "loss": 0.4488, "step": 18500 }, { "epoch": 10.29, "learning_rate": 0.00032855080310413283, "loss": 0.3861, "step": 19000 }, { "epoch": 10.56, "learning_rate": 0.0003240389821331889, "loss": 0.398, "step": 19500 }, { "epoch": 10.83, "learning_rate": 0.00031952716116224507, "loss": 0.4006, "step": 20000 }, { "epoch": 11.0, "eval_LaBSE similarity": 0.6365891740497193, "eval_bleu": 0.20953963135229853, "eval_loss": 0.690319836139679, "eval_runtime": 730.2358, "eval_samples_per_second": 19.139, "eval_steps_per_second": 1.197, "step": 20319 }, { "epoch": 11.1, "learning_rate": 0.0003150153401913012, "loss": 0.3803, "step": 20500 }, { "epoch": 11.37, "learning_rate": 0.00031050351922035736, "loss": 0.3551, "step": 21000 }, { "epoch": 11.64, "learning_rate": 0.00030599169824941345, "loss": 0.361, "step": 21500 }, { "epoch": 11.91, "learning_rate": 0.0003014798772784696, "loss": 0.3662, "step": 22000 }, { "epoch": 12.0, "eval_LaBSE similarity": 0.6368156212159983, "eval_bleu": 0.21027391744413493, "eval_loss": 0.689200222492218, "eval_runtime": 729.7202, "eval_samples_per_second": 19.153, "eval_steps_per_second": 1.198, "step": 22167 }, { "epoch": 12.18, "learning_rate": 0.0002969680563075257, "loss": 0.3313, "step": 22500 }, { "epoch": 12.45, "learning_rate": 0.00029245623533658184, "loss": 0.3244, "step": 23000 }, { "epoch": 12.72, "learning_rate": 0.00028794441436563793, "loss": 0.3321, "step": 23500 }, { "epoch": 12.99, "learning_rate": 0.00028343259339469413, "loss": 0.3365, "step": 24000 }, { "epoch": 13.0, "eval_LaBSE similarity": 0.64150025325299, "eval_bleu": 0.21448433499231642, "eval_loss": 0.6796755790710449, "eval_runtime": 731.0965, "eval_samples_per_second": 19.116, "eval_steps_per_second": 1.195, "step": 24014 }, { "epoch": 13.26, "learning_rate": 0.0002789207724237502, "loss": 0.2878, "step": 24500 }, { "epoch": 13.53, "learning_rate": 0.00027440895145280637, "loss": 0.2969, "step": 25000 }, { "epoch": 13.8, "learning_rate": 0.00026989713048186246, "loss": 0.3052, "step": 25500 }, { "epoch": 14.0, "eval_LaBSE similarity": 0.6435598843860529, "eval_bleu": 0.217333486806597, "eval_loss": 0.6763644218444824, "eval_runtime": 731.6928, "eval_samples_per_second": 19.101, "eval_steps_per_second": 1.194, "step": 25861 }, { "epoch": 14.07, "learning_rate": 0.0002653853095109186, "loss": 0.2962, "step": 26000 }, { "epoch": 14.35, "learning_rate": 0.0002608734885399747, "loss": 0.2648, "step": 26500 }, { "epoch": 14.62, "learning_rate": 0.00025636166756903085, "loss": 0.2746, "step": 27000 }, { "epoch": 14.89, "learning_rate": 0.000251849846598087, "loss": 0.2811, "step": 27500 }, { "epoch": 15.0, "eval_LaBSE similarity": 0.6415143596865642, "eval_bleu": 0.2176019486610223, "eval_loss": 0.6784259676933289, "eval_runtime": 731.0412, "eval_samples_per_second": 19.118, "eval_steps_per_second": 1.196, "step": 27708 }, { "epoch": 15.16, "learning_rate": 0.0002473380256271431, "loss": 0.2585, "step": 28000 }, { "epoch": 15.43, "learning_rate": 0.00024282620465619926, "loss": 0.2478, "step": 28500 }, { "epoch": 15.7, "learning_rate": 0.00023831438368525538, "loss": 0.2554, "step": 29000 }, { "epoch": 15.97, "learning_rate": 0.0002338025627143115, "loss": 0.2601, "step": 29500 }, { "epoch": 16.0, "eval_LaBSE similarity": 0.6428080388896847, "eval_bleu": 0.2193918672969541, "eval_loss": 0.681696355342865, "eval_runtime": 731.5003, "eval_samples_per_second": 19.106, "eval_steps_per_second": 1.195, "step": 29556 }, { "epoch": 16.24, "learning_rate": 0.00022929074174336762, "loss": 0.227, "step": 30000 }, { "epoch": 16.51, "learning_rate": 0.00022477892077242377, "loss": 0.2329, "step": 30500 }, { "epoch": 16.78, "learning_rate": 0.0002202670998014799, "loss": 0.2379, "step": 31000 }, { "epoch": 17.0, "eval_LaBSE similarity": 0.643077788578904, "eval_bleu": 0.21980244649693723, "eval_loss": 0.6875574588775635, "eval_runtime": 729.8897, "eval_samples_per_second": 19.148, "eval_steps_per_second": 1.197, "step": 31403 }, { "epoch": 17.05, "learning_rate": 0.000215755278830536, "loss": 0.2353, "step": 31500 }, { "epoch": 17.32, "learning_rate": 0.00021124345785959216, "loss": 0.2097, "step": 32000 }, { "epoch": 17.59, "learning_rate": 0.00020673163688864828, "loss": 0.2181, "step": 32500 }, { "epoch": 17.86, "learning_rate": 0.0002022198159177044, "loss": 0.2219, "step": 33000 }, { "epoch": 18.0, "eval_LaBSE similarity": 0.6413028645145259, "eval_bleu": 0.2195570154576433, "eval_loss": 0.6933163404464722, "eval_runtime": 730.845, "eval_samples_per_second": 19.123, "eval_steps_per_second": 1.196, "step": 33250 }, { "epoch": 18.14, "learning_rate": 0.00019770799494676051, "loss": 0.2073, "step": 33500 }, { "epoch": 18.41, "learning_rate": 0.00019319617397581666, "loss": 0.1972, "step": 34000 }, { "epoch": 18.68, "learning_rate": 0.00018868435300487278, "loss": 0.2055, "step": 34500 }, { "epoch": 18.95, "learning_rate": 0.0001841725320339289, "loss": 0.2078, "step": 35000 }, { "epoch": 19.0, "eval_LaBSE similarity": 0.6431215279734293, "eval_bleu": 0.22102576165957888, "eval_loss": 0.6970334053039551, "eval_runtime": 730.8583, "eval_samples_per_second": 19.123, "eval_steps_per_second": 1.196, "step": 35097 }, { "epoch": 19.22, "learning_rate": 0.00017966071106298505, "loss": 0.1855, "step": 35500 }, { "epoch": 19.49, "learning_rate": 0.00017514889009204117, "loss": 0.1862, "step": 36000 }, { "epoch": 19.76, "learning_rate": 0.0001706370691210973, "loss": 0.1921, "step": 36500 }, { "epoch": 20.0, "eval_LaBSE similarity": 0.644736666482104, "eval_bleu": 0.22277214090837807, "eval_loss": 0.7024596333503723, "eval_runtime": 731.5546, "eval_samples_per_second": 19.105, "eval_steps_per_second": 1.195, "step": 36945 }, { "epoch": 20.03, "learning_rate": 0.0001661252481501534, "loss": 0.1931, "step": 37000 }, { "epoch": 20.3, "learning_rate": 0.00016161342717920955, "loss": 0.1702, "step": 37500 }, { "epoch": 20.57, "learning_rate": 0.00015710160620826567, "loss": 0.1786, "step": 38000 }, { "epoch": 20.84, "learning_rate": 0.0001525897852373218, "loss": 0.1808, "step": 38500 }, { "epoch": 21.0, "eval_LaBSE similarity": 0.6434357442408501, "eval_bleu": 0.22258539444788225, "eval_loss": 0.7120693325996399, "eval_runtime": 732.5597, "eval_samples_per_second": 19.078, "eval_steps_per_second": 1.193, "step": 38792 }, { "epoch": 21.11, "learning_rate": 0.0001480779642663779, "loss": 0.1727, "step": 39000 }, { "epoch": 21.38, "learning_rate": 0.00014356614329543406, "loss": 0.1606, "step": 39500 }, { "epoch": 21.65, "learning_rate": 0.00013905432232449018, "loss": 0.168, "step": 40000 }, { "epoch": 21.92, "learning_rate": 0.0001345425013535463, "loss": 0.171, "step": 40500 }, { "epoch": 22.0, "eval_LaBSE similarity": 0.6431315611951554, "eval_bleu": 0.22194535539014804, "eval_loss": 0.7173079252243042, "eval_runtime": 730.9028, "eval_samples_per_second": 19.122, "eval_steps_per_second": 1.196, "step": 40639 }, { "epoch": 22.2, "learning_rate": 0.00013003068038260244, "loss": 0.1557, "step": 41000 }, { "epoch": 22.47, "learning_rate": 0.00012551885941165856, "loss": 0.155, "step": 41500 }, { "epoch": 22.74, "learning_rate": 0.00012100703844071467, "loss": 0.1586, "step": 42000 }, { "epoch": 23.0, "eval_LaBSE similarity": 0.6457557632484162, "eval_bleu": 0.22429042016550227, "eval_loss": 0.7178816795349121, "eval_runtime": 733.5242, "eval_samples_per_second": 19.053, "eval_steps_per_second": 1.192, "step": 42486 }, { "epoch": 23.01, "learning_rate": 0.0001164952174697708, "loss": 0.1604, "step": 42500 }, { "epoch": 23.28, "learning_rate": 0.00011198339649882692, "loss": 0.142, "step": 43000 }, { "epoch": 23.55, "learning_rate": 0.00010747157552788306, "loss": 0.147, "step": 43500 }, { "epoch": 23.82, "learning_rate": 0.00010295975455693918, "loss": 0.1516, "step": 44000 }, { "epoch": 24.0, "eval_LaBSE similarity": 0.6450597929800783, "eval_bleu": 0.223409056978912, "eval_loss": 0.7363256216049194, "eval_runtime": 731.1036, "eval_samples_per_second": 19.116, "eval_steps_per_second": 1.195, "step": 44334 }, { "epoch": 24.09, "learning_rate": 9.844793358599531e-05, "loss": 0.1471, "step": 44500 }, { "epoch": 24.36, "learning_rate": 9.393611261505143e-05, "loss": 0.1376, "step": 45000 }, { "epoch": 24.63, "learning_rate": 8.942429164410756e-05, "loss": 0.14, "step": 45500 }, { "epoch": 24.9, "learning_rate": 8.49124706731637e-05, "loss": 0.1425, "step": 46000 }, { "epoch": 25.0, "eval_LaBSE similarity": 0.6440151894096452, "eval_bleu": 0.22351487448534282, "eval_loss": 0.7389833331108093, "eval_runtime": 731.0483, "eval_samples_per_second": 19.118, "eval_steps_per_second": 1.196, "step": 46181 }, { "epoch": 25.17, "learning_rate": 8.040064970221982e-05, "loss": 0.1346, "step": 46500 }, { "epoch": 25.44, "learning_rate": 7.588882873127595e-05, "loss": 0.1318, "step": 47000 }, { "epoch": 25.71, "learning_rate": 7.137700776033207e-05, "loss": 0.1342, "step": 47500 }, { "epoch": 25.98, "learning_rate": 6.68651867893882e-05, "loss": 0.1357, "step": 48000 }, { "epoch": 26.0, "eval_LaBSE similarity": 0.6438781539249833, "eval_bleu": 0.22384959032900478, "eval_loss": 0.7478045225143433, "eval_runtime": 730.26, "eval_samples_per_second": 19.138, "eval_steps_per_second": 1.197, "step": 48028 }, { "epoch": 26.26, "learning_rate": 6.235336581844433e-05, "loss": 0.1246, "step": 48500 }, { "epoch": 26.53, "learning_rate": 5.7841544847500454e-05, "loss": 0.1262, "step": 49000 }, { "epoch": 26.8, "learning_rate": 5.332972387655658e-05, "loss": 0.1285, "step": 49500 }, { "epoch": 27.0, "eval_LaBSE similarity": 0.6438721785361547, "eval_bleu": 0.22299911848016674, "eval_loss": 0.7585892081260681, "eval_runtime": 730.7966, "eval_samples_per_second": 19.124, "eval_steps_per_second": 1.196, "step": 49875 }, { "epoch": 27.07, "learning_rate": 4.881790290561271e-05, "loss": 0.1258, "step": 50000 }, { "epoch": 27.34, "learning_rate": 4.430608193466883e-05, "loss": 0.1196, "step": 50500 }, { "epoch": 27.61, "learning_rate": 3.979426096372496e-05, "loss": 0.1208, "step": 51000 }, { "epoch": 27.88, "learning_rate": 3.5282439992781086e-05, "loss": 0.1234, "step": 51500 }, { "epoch": 28.0, "eval_LaBSE similarity": 0.6445591971687343, "eval_bleu": 0.22332463264359148, "eval_loss": 0.7620986700057983, "eval_runtime": 729.8836, "eval_samples_per_second": 19.148, "eval_steps_per_second": 1.197, "step": 51723 } ], "max_steps": 55410, "num_train_epochs": 30, "total_flos": 6.48299892475902e+17, "trial_name": null, "trial_params": null }