{ "best_metric": 0.8703868389129639, "best_model_checkpoint": "/d/hpc/projects/FRI/bb6846/run11//mbart-large-50/checkpoint-11083", "epoch": 22.999593991067805, "global_step": 42486, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "learning_rate": 0.0004954881790290561, "loss": 5.4927, "step": 500 }, { "epoch": 0.54, "learning_rate": 0.0004909763580581122, "loss": 3.5335, "step": 1000 }, { "epoch": 0.81, "learning_rate": 0.00048646453708716834, "loss": 2.9081, "step": 1500 }, { "epoch": 1.0, "eval_LaBSE similarity": 0.5398781112980036, "eval_bleu": 0.09115205757865759, "eval_loss": 2.2135918140411377, "eval_runtime": 1365.0262, "eval_samples_per_second": 10.239, "eval_steps_per_second": 0.64, "step": 1847 }, { "epoch": 1.08, "learning_rate": 0.00048195271611622454, "loss": 2.2837, "step": 2000 }, { "epoch": 1.35, "learning_rate": 0.00047744089514528064, "loss": 1.801, "step": 2500 }, { "epoch": 1.62, "learning_rate": 0.0004729290741743368, "loss": 1.5852, "step": 3000 }, { "epoch": 1.89, "learning_rate": 0.0004684172532033929, "loss": 1.4323, "step": 3500 }, { "epoch": 2.0, "eval_LaBSE similarity": 0.6886588053033609, "eval_bleu": 0.25496466080419466, "eval_loss": 1.3433868885040283, "eval_runtime": 1227.9131, "eval_samples_per_second": 11.382, "eval_steps_per_second": 0.712, "step": 3694 }, { "epoch": 2.17, "learning_rate": 0.000463905432232449, "loss": 1.1374, "step": 4000 }, { "epoch": 2.44, "learning_rate": 0.0004593936112615051, "loss": 1.0137, "step": 4500 }, { "epoch": 2.71, "learning_rate": 0.00045488179029056126, "loss": 0.9937, "step": 5000 }, { "epoch": 2.98, "learning_rate": 0.0004503699693196174, "loss": 0.9558, "step": 5500 }, { "epoch": 3.0, "eval_LaBSE similarity": 0.7105592780650029, "eval_bleu": 0.2856338691080233, "eval_loss": 1.0794386863708496, "eval_runtime": 1168.6662, "eval_samples_per_second": 11.959, "eval_steps_per_second": 0.748, "step": 5541 }, { "epoch": 3.25, "learning_rate": 0.00044585814834867355, "loss": 0.6749, "step": 6000 }, { "epoch": 3.52, "learning_rate": 0.00044134632737772965, "loss": 0.6931, "step": 6500 }, { "epoch": 3.79, "learning_rate": 0.0004368345064067858, "loss": 0.6971, "step": 7000 }, { "epoch": 4.0, "eval_LaBSE similarity": 0.7229989019695549, "eval_bleu": 0.3177643404347968, "eval_loss": 0.9699832201004028, "eval_runtime": 1195.5257, "eval_samples_per_second": 11.69, "eval_steps_per_second": 0.731, "step": 7389 }, { "epoch": 4.06, "learning_rate": 0.0004323226854358419, "loss": 0.6386, "step": 7500 }, { "epoch": 4.33, "learning_rate": 0.00042781086446489803, "loss": 0.4793, "step": 8000 }, { "epoch": 4.6, "learning_rate": 0.0004232990434939541, "loss": 0.5143, "step": 8500 }, { "epoch": 4.87, "learning_rate": 0.00041878722252301027, "loss": 0.5263, "step": 9000 }, { "epoch": 5.0, "eval_LaBSE similarity": 0.7233434942747937, "eval_bleu": 0.3231544767102279, "eval_loss": 0.9114313125610352, "eval_runtime": 1133.9982, "eval_samples_per_second": 12.325, "eval_steps_per_second": 0.771, "step": 9236 }, { "epoch": 5.14, "learning_rate": 0.0004142754015520664, "loss": 0.4303, "step": 9500 }, { "epoch": 5.41, "learning_rate": 0.00040976358058112257, "loss": 0.3777, "step": 10000 }, { "epoch": 5.68, "learning_rate": 0.00040525175961017866, "loss": 0.4072, "step": 10500 }, { "epoch": 5.95, "learning_rate": 0.0004007399386392348, "loss": 0.419, "step": 11000 }, { "epoch": 6.0, "eval_LaBSE similarity": 0.7420425884927818, "eval_bleu": 0.3542640467481141, "eval_loss": 0.8703868389129639, "eval_runtime": 1176.62, "eval_samples_per_second": 11.878, "eval_steps_per_second": 0.743, "step": 11083 }, { "epoch": 6.23, "learning_rate": 0.0003962281176682909, "loss": 0.3006, "step": 11500 }, { "epoch": 6.5, "learning_rate": 0.00039171629669734704, "loss": 0.3122, "step": 12000 }, { "epoch": 6.77, "learning_rate": 0.00038720447572640314, "loss": 0.3367, "step": 12500 }, { "epoch": 7.0, "eval_LaBSE similarity": 0.7435614244402539, "eval_bleu": 0.3642121844301522, "eval_loss": 0.8795487880706787, "eval_runtime": 1222.0463, "eval_samples_per_second": 11.437, "eval_steps_per_second": 0.715, "step": 12930 }, { "epoch": 7.04, "learning_rate": 0.00038269265475545934, "loss": 0.3307, "step": 13000 }, { "epoch": 7.31, "learning_rate": 0.00037818083378451543, "loss": 0.2398, "step": 13500 }, { "epoch": 7.58, "learning_rate": 0.0003736690128135716, "loss": 0.2691, "step": 14000 }, { "epoch": 7.85, "learning_rate": 0.00036915719184262767, "loss": 0.286, "step": 14500 }, { "epoch": 8.0, "eval_LaBSE similarity": 0.7364368761335748, "eval_bleu": 0.34924239776356997, "eval_loss": 0.8712966442108154, "eval_runtime": 1189.2045, "eval_samples_per_second": 11.752, "eval_steps_per_second": 0.735, "step": 14778 }, { "epoch": 8.12, "learning_rate": 0.0003646453708716838, "loss": 0.2493, "step": 15000 }, { "epoch": 8.39, "learning_rate": 0.0003601335499007399, "loss": 0.2157, "step": 15500 }, { "epoch": 8.66, "learning_rate": 0.00035562172892979606, "loss": 0.2358, "step": 16000 }, { "epoch": 8.93, "learning_rate": 0.0003511099079588522, "loss": 0.2518, "step": 16500 }, { "epoch": 9.0, "eval_LaBSE similarity": 0.7470671729007166, "eval_bleu": 0.37070924925201876, "eval_loss": 0.8941549062728882, "eval_runtime": 1235.4354, "eval_samples_per_second": 11.313, "eval_steps_per_second": 0.707, "step": 16625 }, { "epoch": 9.2, "learning_rate": 0.00034659808698790835, "loss": 0.1955, "step": 17000 }, { "epoch": 9.47, "learning_rate": 0.00034208626601696444, "loss": 0.1953, "step": 17500 }, { "epoch": 9.74, "learning_rate": 0.0003375744450460206, "loss": 0.2129, "step": 18000 }, { "epoch": 10.0, "eval_LaBSE similarity": 0.7456615649679629, "eval_bleu": 0.3745055214526943, "eval_loss": 0.9131841659545898, "eval_runtime": 1223.5826, "eval_samples_per_second": 11.422, "eval_steps_per_second": 0.714, "step": 18472 }, { "epoch": 10.01, "learning_rate": 0.0003330626240750767, "loss": 0.219, "step": 18500 }, { "epoch": 10.29, "learning_rate": 0.00032855080310413283, "loss": 0.1602, "step": 19000 }, { "epoch": 10.56, "learning_rate": 0.0003240389821331889, "loss": 0.1807, "step": 19500 }, { "epoch": 10.83, "learning_rate": 0.00031952716116224507, "loss": 0.1925, "step": 20000 }, { "epoch": 11.0, "eval_LaBSE similarity": 0.7471288781191697, "eval_bleu": 0.37290758010326946, "eval_loss": 0.9259693622589111, "eval_runtime": 1200.2766, "eval_samples_per_second": 11.644, "eval_steps_per_second": 0.728, "step": 20319 }, { "epoch": 11.1, "learning_rate": 0.0003150153401913012, "loss": 0.1765, "step": 20500 }, { "epoch": 11.37, "learning_rate": 0.00031050351922035736, "loss": 0.1498, "step": 21000 }, { "epoch": 11.64, "learning_rate": 0.00030599169824941345, "loss": 0.1655, "step": 21500 }, { "epoch": 11.91, "learning_rate": 0.0003014798772784696, "loss": 0.1752, "step": 22000 }, { "epoch": 12.0, "eval_LaBSE similarity": 0.7468445703041946, "eval_bleu": 0.37465455155641914, "eval_loss": 0.9446151256561279, "eval_runtime": 1244.3591, "eval_samples_per_second": 11.231, "eval_steps_per_second": 0.702, "step": 22167 }, { "epoch": 12.18, "learning_rate": 0.0002969680563075257, "loss": 0.1418, "step": 22500 }, { "epoch": 12.45, "learning_rate": 0.00029245623533658184, "loss": 0.1396, "step": 23000 }, { "epoch": 12.72, "learning_rate": 0.00028794441436563793, "loss": 0.1526, "step": 23500 }, { "epoch": 12.99, "learning_rate": 0.00028343259339469413, "loss": 0.1601, "step": 24000 }, { "epoch": 13.0, "eval_LaBSE similarity": 0.7469340100011889, "eval_bleu": 0.37582936090048125, "eval_loss": 0.9434267282485962, "eval_runtime": 1202.1543, "eval_samples_per_second": 11.626, "eval_steps_per_second": 0.727, "step": 24014 }, { "epoch": 13.26, "learning_rate": 0.0002789207724237502, "loss": 0.1195, "step": 24500 }, { "epoch": 13.53, "learning_rate": 0.00027440895145280637, "loss": 0.1312, "step": 25000 }, { "epoch": 13.8, "learning_rate": 0.00026989713048186246, "loss": 0.141, "step": 25500 }, { "epoch": 14.0, "eval_LaBSE similarity": 0.7477662809688649, "eval_bleu": 0.3734200651221202, "eval_loss": 0.9459152221679688, "eval_runtime": 1210.207, "eval_samples_per_second": 11.548, "eval_steps_per_second": 0.722, "step": 25861 }, { "epoch": 14.07, "learning_rate": 0.0002653853095109186, "loss": 0.1336, "step": 26000 }, { "epoch": 14.35, "learning_rate": 0.0002608734885399747, "loss": 0.1123, "step": 26500 }, { "epoch": 14.62, "learning_rate": 0.00025636166756903085, "loss": 0.1229, "step": 27000 }, { "epoch": 14.89, "learning_rate": 0.000251849846598087, "loss": 0.1303, "step": 27500 }, { "epoch": 15.0, "eval_LaBSE similarity": 0.7455726656700262, "eval_bleu": 0.3728343171898791, "eval_loss": 0.9986817240715027, "eval_runtime": 1216.2061, "eval_samples_per_second": 11.491, "eval_steps_per_second": 0.719, "step": 27708 }, { "epoch": 15.16, "learning_rate": 0.0002473380256271431, "loss": 0.1126, "step": 28000 }, { "epoch": 15.43, "learning_rate": 0.00024282620465619926, "loss": 0.1065, "step": 28500 }, { "epoch": 15.7, "learning_rate": 0.00023831438368525538, "loss": 0.1155, "step": 29000 }, { "epoch": 15.97, "learning_rate": 0.0002338025627143115, "loss": 0.1216, "step": 29500 }, { "epoch": 16.0, "eval_LaBSE similarity": 0.7427742036105345, "eval_bleu": 0.36683694992363525, "eval_loss": 1.0019079446792603, "eval_runtime": 1175.8161, "eval_samples_per_second": 11.886, "eval_steps_per_second": 0.743, "step": 29556 }, { "epoch": 16.24, "learning_rate": 0.00022929074174336762, "loss": 0.0952, "step": 30000 }, { "epoch": 16.51, "learning_rate": 0.00022477892077242377, "loss": 0.1014, "step": 30500 }, { "epoch": 16.78, "learning_rate": 0.0002202670998014799, "loss": 0.1071, "step": 31000 }, { "epoch": 17.0, "eval_LaBSE similarity": 0.745781449845613, "eval_bleu": 0.372475196057264, "eval_loss": 1.0133806467056274, "eval_runtime": 1177.7522, "eval_samples_per_second": 11.867, "eval_steps_per_second": 0.742, "step": 31403 }, { "epoch": 17.05, "learning_rate": 0.000215755278830536, "loss": 0.1058, "step": 31500 }, { "epoch": 17.32, "learning_rate": 0.00021124345785959216, "loss": 0.0861, "step": 32000 }, { "epoch": 17.59, "learning_rate": 0.00020673163688864828, "loss": 0.0962, "step": 32500 }, { "epoch": 17.86, "learning_rate": 0.0002022198159177044, "loss": 0.1001, "step": 33000 }, { "epoch": 18.0, "eval_LaBSE similarity": 0.7447677442699225, "eval_bleu": 0.3671262927472257, "eval_loss": 1.0423822402954102, "eval_runtime": 1175.2578, "eval_samples_per_second": 11.892, "eval_steps_per_second": 0.744, "step": 33250 }, { "epoch": 18.14, "learning_rate": 0.00019770799494676051, "loss": 0.0892, "step": 33500 }, { "epoch": 18.41, "learning_rate": 0.00019319617397581666, "loss": 0.0818, "step": 34000 }, { "epoch": 18.68, "learning_rate": 0.00018868435300487278, "loss": 0.0898, "step": 34500 }, { "epoch": 18.95, "learning_rate": 0.0001841725320339289, "loss": 0.0945, "step": 35000 }, { "epoch": 19.0, "eval_LaBSE similarity": 0.7462503632276549, "eval_bleu": 0.3708357814718823, "eval_loss": 1.0612070560455322, "eval_runtime": 1198.6678, "eval_samples_per_second": 11.66, "eval_steps_per_second": 0.729, "step": 35097 }, { "epoch": 19.22, "learning_rate": 0.00017966071106298505, "loss": 0.0762, "step": 35500 }, { "epoch": 19.49, "learning_rate": 0.00017514889009204117, "loss": 0.0796, "step": 36000 }, { "epoch": 19.76, "learning_rate": 0.0001706370691210973, "loss": 0.0844, "step": 36500 }, { "epoch": 20.0, "eval_LaBSE similarity": 0.7422917604312603, "eval_bleu": 0.36480748501421006, "eval_loss": 1.0829150676727295, "eval_runtime": 1172.318, "eval_samples_per_second": 11.922, "eval_steps_per_second": 0.746, "step": 36945 }, { "epoch": 20.03, "learning_rate": 0.0001661252481501534, "loss": 0.0855, "step": 37000 }, { "epoch": 20.3, "learning_rate": 0.00016161342717920955, "loss": 0.067, "step": 37500 }, { "epoch": 20.57, "learning_rate": 0.00015710160620826567, "loss": 0.0753, "step": 38000 }, { "epoch": 20.84, "learning_rate": 0.0001525897852373218, "loss": 0.0793, "step": 38500 }, { "epoch": 21.0, "eval_LaBSE similarity": 0.7398600145383046, "eval_bleu": 0.3595059153487151, "eval_loss": 1.1107969284057617, "eval_runtime": 1170.616, "eval_samples_per_second": 11.939, "eval_steps_per_second": 0.747, "step": 38792 }, { "epoch": 21.11, "learning_rate": 0.0001480779642663779, "loss": 0.0731, "step": 39000 }, { "epoch": 21.38, "learning_rate": 0.00014356614329543406, "loss": 0.0651, "step": 39500 }, { "epoch": 21.65, "learning_rate": 0.00013905432232449018, "loss": 0.0716, "step": 40000 }, { "epoch": 21.92, "learning_rate": 0.0001345425013535463, "loss": 0.0748, "step": 40500 }, { "epoch": 22.0, "eval_LaBSE similarity": 0.7413012238766339, "eval_bleu": 0.36304917318364166, "eval_loss": 1.1223746538162231, "eval_runtime": 1177.2462, "eval_samples_per_second": 11.872, "eval_steps_per_second": 0.742, "step": 40639 }, { "epoch": 22.2, "learning_rate": 0.00013003068038260244, "loss": 0.0618, "step": 41000 }, { "epoch": 22.47, "learning_rate": 0.00012551885941165856, "loss": 0.0619, "step": 41500 }, { "epoch": 22.74, "learning_rate": 0.00012100703844071467, "loss": 0.0676, "step": 42000 }, { "epoch": 23.0, "eval_LaBSE similarity": 0.7416807065988956, "eval_bleu": 0.3601707131632923, "eval_loss": 1.1566632986068726, "eval_runtime": 1175.1131, "eval_samples_per_second": 11.893, "eval_steps_per_second": 0.744, "step": 42486 } ], "max_steps": 55410, "num_train_epochs": 30, "total_flos": 4.22917524411777e+17, "trial_name": null, "trial_params": null }