{ "best_metric": 0.8388484120368958, "best_model_checkpoint": "/d/hpc/projects/FRI/bb6846/run11//t5-sl-large/checkpoint-5541", "epoch": 17.999729327378535, "global_step": 33250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "learning_rate": 0.0004966161342717921, "loss": 1.5149, "step": 500 }, { "epoch": 0.54, "learning_rate": 0.0004932322685435842, "loss": 1.2852, "step": 1000 }, { "epoch": 0.81, "learning_rate": 0.0004898484028153763, "loss": 1.1548, "step": 1500 }, { "epoch": 1.0, "eval_LaBSE similarity": 0.6812127827302594, "eval_bleu": 0.26085685311843865, "eval_loss": 1.030735969543457, "eval_runtime": 1202.6717, "eval_samples_per_second": 11.621, "eval_steps_per_second": 0.727, "step": 1847 }, { "epoch": 1.08, "learning_rate": 0.00048646453708716834, "loss": 0.9389, "step": 2000 }, { "epoch": 1.35, "learning_rate": 0.0004830806713589605, "loss": 0.6595, "step": 2500 }, { "epoch": 1.62, "learning_rate": 0.0004796968056307526, "loss": 0.6495, "step": 3000 }, { "epoch": 1.89, "learning_rate": 0.00047631293990254463, "loss": 0.6266, "step": 3500 }, { "epoch": 2.0, "eval_LaBSE similarity": 0.6908316853313329, "eval_bleu": 0.2815949228649396, "eval_loss": 0.8447950482368469, "eval_runtime": 1213.9058, "eval_samples_per_second": 11.513, "eval_steps_per_second": 0.72, "step": 3694 }, { "epoch": 2.17, "learning_rate": 0.0004729290741743368, "loss": 0.4401, "step": 4000 }, { "epoch": 2.44, "learning_rate": 0.0004695452084461289, "loss": 0.3565, "step": 4500 }, { "epoch": 2.71, "learning_rate": 0.000466161342717921, "loss": 0.3761, "step": 5000 }, { "epoch": 2.98, "learning_rate": 0.00046277747698971307, "loss": 0.383, "step": 5500 }, { "epoch": 3.0, "eval_LaBSE similarity": 0.6995571957101206, "eval_bleu": 0.29809786923507614, "eval_loss": 0.8388484120368958, "eval_runtime": 1199.9823, "eval_samples_per_second": 11.647, "eval_steps_per_second": 0.728, "step": 5541 }, { "epoch": 3.25, "learning_rate": 0.0004593936112615051, "loss": 0.2344, "step": 6000 }, { "epoch": 3.52, "learning_rate": 0.00045600974553329727, "loss": 0.2454, "step": 6500 }, { "epoch": 3.79, "learning_rate": 0.00045262587980508936, "loss": 0.2646, "step": 7000 }, { "epoch": 4.0, "eval_LaBSE similarity": 0.6979512369452641, "eval_bleu": 0.2997067254948018, "eval_loss": 0.8804282546043396, "eval_runtime": 1204.2265, "eval_samples_per_second": 11.606, "eval_steps_per_second": 0.726, "step": 7389 }, { "epoch": 4.06, "learning_rate": 0.0004492420140768814, "loss": 0.2489, "step": 7500 }, { "epoch": 4.33, "learning_rate": 0.00044585814834867355, "loss": 0.18, "step": 8000 }, { "epoch": 4.6, "learning_rate": 0.00044247428262046565, "loss": 0.1993, "step": 8500 }, { "epoch": 4.87, "learning_rate": 0.0004390904168922577, "loss": 0.2131, "step": 9000 }, { "epoch": 5.0, "eval_LaBSE similarity": 0.7007296268536876, "eval_bleu": 0.3013868679121285, "eval_loss": 0.9110961556434631, "eval_runtime": 1215.4225, "eval_samples_per_second": 11.499, "eval_steps_per_second": 0.719, "step": 9236 }, { "epoch": 5.14, "learning_rate": 0.00043570655116404984, "loss": 0.178, "step": 9500 }, { "epoch": 5.41, "learning_rate": 0.0004323226854358419, "loss": 0.1556, "step": 10000 }, { "epoch": 5.68, "learning_rate": 0.000428938819707634, "loss": 0.1717, "step": 10500 }, { "epoch": 5.95, "learning_rate": 0.00042555495397942613, "loss": 0.1837, "step": 11000 }, { "epoch": 6.0, "eval_LaBSE similarity": 0.7018732900689151, "eval_bleu": 0.3018978141259116, "eval_loss": 0.9366760849952698, "eval_runtime": 1203.8978, "eval_samples_per_second": 11.609, "eval_steps_per_second": 0.726, "step": 11083 }, { "epoch": 6.23, "learning_rate": 0.0004221710882512182, "loss": 0.1369, "step": 11500 }, { "epoch": 6.5, "learning_rate": 0.00041878722252301027, "loss": 0.1428, "step": 12000 }, { "epoch": 6.77, "learning_rate": 0.0004154033567948024, "loss": 0.1566, "step": 12500 }, { "epoch": 7.0, "eval_LaBSE similarity": 0.702976470663088, "eval_bleu": 0.30368208309669026, "eval_loss": 1.0034101009368896, "eval_runtime": 1207.5273, "eval_samples_per_second": 11.574, "eval_steps_per_second": 0.724, "step": 12930 }, { "epoch": 7.04, "learning_rate": 0.00041201949106659447, "loss": 0.1566, "step": 13000 }, { "epoch": 7.31, "learning_rate": 0.0004086356253383866, "loss": 0.1192, "step": 13500 }, { "epoch": 7.58, "learning_rate": 0.00040525175961017866, "loss": 0.1323, "step": 14000 }, { "epoch": 7.85, "learning_rate": 0.00040186789388197076, "loss": 0.1431, "step": 14500 }, { "epoch": 8.0, "eval_LaBSE similarity": 0.7013175535222604, "eval_bleu": 0.3049308476119818, "eval_loss": 1.0045561790466309, "eval_runtime": 1204.442, "eval_samples_per_second": 11.604, "eval_steps_per_second": 0.726, "step": 14778 }, { "epoch": 8.12, "learning_rate": 0.0003984840281537629, "loss": 0.1289, "step": 15000 }, { "epoch": 8.39, "learning_rate": 0.00039510016242555495, "loss": 0.1114, "step": 15500 }, { "epoch": 8.66, "learning_rate": 0.00039171629669734704, "loss": 0.1252, "step": 16000 }, { "epoch": 8.93, "learning_rate": 0.0003883324309691392, "loss": 0.1341, "step": 16500 }, { "epoch": 9.0, "eval_LaBSE similarity": 0.7028801773059591, "eval_bleu": 0.3060926268506286, "eval_loss": 0.9563193917274475, "eval_runtime": 1204.9532, "eval_samples_per_second": 11.599, "eval_steps_per_second": 0.725, "step": 16625 }, { "epoch": 9.2, "learning_rate": 0.00038494856524093124, "loss": 0.1083, "step": 17000 }, { "epoch": 9.47, "learning_rate": 0.00038156469951272333, "loss": 0.1099, "step": 17500 }, { "epoch": 9.74, "learning_rate": 0.00037818083378451543, "loss": 0.1206, "step": 18000 }, { "epoch": 10.0, "eval_LaBSE similarity": 0.7016379604564951, "eval_bleu": 0.3033423466794905, "eval_loss": 1.041945219039917, "eval_runtime": 1201.5959, "eval_samples_per_second": 11.631, "eval_steps_per_second": 0.727, "step": 18472 }, { "epoch": 10.01, "learning_rate": 0.0003747969680563075, "loss": 0.125, "step": 18500 }, { "epoch": 10.29, "learning_rate": 0.0003714131023280996, "loss": 0.0938, "step": 19000 }, { "epoch": 10.56, "learning_rate": 0.0003680292365998917, "loss": 0.1063, "step": 19500 }, { "epoch": 10.83, "learning_rate": 0.0003646453708716838, "loss": 0.1157, "step": 20000 }, { "epoch": 11.0, "eval_LaBSE similarity": 0.7031656672306994, "eval_bleu": 0.304420237465134, "eval_loss": 1.0878944396972656, "eval_runtime": 1198.7547, "eval_samples_per_second": 11.659, "eval_steps_per_second": 0.729, "step": 20319 }, { "epoch": 11.1, "learning_rate": 0.0003612615051434759, "loss": 0.1069, "step": 20500 }, { "epoch": 11.37, "learning_rate": 0.000357877639415268, "loss": 0.0938, "step": 21000 }, { "epoch": 11.64, "learning_rate": 0.0003544937736870601, "loss": 0.1029, "step": 21500 }, { "epoch": 11.91, "learning_rate": 0.0003511099079588522, "loss": 0.1095, "step": 22000 }, { "epoch": 12.0, "eval_LaBSE similarity": 0.7013727569331282, "eval_bleu": 0.3008776228703018, "eval_loss": 1.091700553894043, "eval_runtime": 1199.6915, "eval_samples_per_second": 11.65, "eval_steps_per_second": 0.729, "step": 22167 }, { "epoch": 12.18, "learning_rate": 0.0003477260422306443, "loss": 0.0931, "step": 22500 }, { "epoch": 12.45, "learning_rate": 0.0003443421765024364, "loss": 0.0913, "step": 23000 }, { "epoch": 12.72, "learning_rate": 0.0003409583107742285, "loss": 0.1011, "step": 23500 }, { "epoch": 12.99, "learning_rate": 0.0003375744450460206, "loss": 0.106, "step": 24000 }, { "epoch": 13.0, "eval_LaBSE similarity": 0.7020342419015667, "eval_bleu": 0.30263173440284996, "eval_loss": 1.0771766901016235, "eval_runtime": 1201.3264, "eval_samples_per_second": 11.634, "eval_steps_per_second": 0.728, "step": 24014 }, { "epoch": 13.26, "learning_rate": 0.0003341905793178127, "loss": 0.0804, "step": 24500 }, { "epoch": 13.53, "learning_rate": 0.0003308067135896048, "loss": 0.0895, "step": 25000 }, { "epoch": 13.8, "learning_rate": 0.0003274228478613969, "loss": 0.0969, "step": 25500 }, { "epoch": 14.0, "eval_LaBSE similarity": 0.7029242980227026, "eval_bleu": 0.3052818250366099, "eval_loss": 1.1355116367340088, "eval_runtime": 1215.6829, "eval_samples_per_second": 11.496, "eval_steps_per_second": 0.719, "step": 25861 }, { "epoch": 14.07, "learning_rate": 0.0003240389821331889, "loss": 0.0941, "step": 26000 }, { "epoch": 14.35, "learning_rate": 0.00032065511640498107, "loss": 0.0791, "step": 26500 }, { "epoch": 14.62, "learning_rate": 0.00031727125067677317, "loss": 0.0884, "step": 27000 }, { "epoch": 14.89, "learning_rate": 0.0003138873849485652, "loss": 0.0937, "step": 27500 }, { "epoch": 15.0, "eval_LaBSE similarity": 0.7020669575193156, "eval_bleu": 0.30208227926668096, "eval_loss": 1.1526124477386475, "eval_runtime": 1203.9587, "eval_samples_per_second": 11.608, "eval_steps_per_second": 0.726, "step": 27708 }, { "epoch": 15.16, "learning_rate": 0.00031050351922035736, "loss": 0.0813, "step": 28000 }, { "epoch": 15.43, "learning_rate": 0.00030711965349214946, "loss": 0.0787, "step": 28500 }, { "epoch": 15.7, "learning_rate": 0.0003037357877639415, "loss": 0.0869, "step": 29000 }, { "epoch": 15.97, "learning_rate": 0.00030035192203573365, "loss": 0.0919, "step": 29500 }, { "epoch": 16.0, "eval_LaBSE similarity": 0.7013649646287148, "eval_bleu": 0.30275833235224037, "eval_loss": 1.1413129568099976, "eval_runtime": 1202.6189, "eval_samples_per_second": 11.621, "eval_steps_per_second": 0.727, "step": 29556 }, { "epoch": 16.24, "learning_rate": 0.0002969680563075257, "loss": 0.0712, "step": 30000 }, { "epoch": 16.51, "learning_rate": 0.00029358419057931784, "loss": 0.0794, "step": 30500 }, { "epoch": 16.78, "learning_rate": 0.00029020032485110994, "loss": 0.0854, "step": 31000 }, { "epoch": 17.0, "eval_LaBSE similarity": 0.7010715400030315, "eval_bleu": 0.3027145728229586, "eval_loss": 1.162865161895752, "eval_runtime": 1202.091, "eval_samples_per_second": 11.626, "eval_steps_per_second": 0.727, "step": 31403 }, { "epoch": 17.05, "learning_rate": 0.000286816459122902, "loss": 0.0833, "step": 31500 }, { "epoch": 17.32, "learning_rate": 0.00028343259339469413, "loss": 0.0686, "step": 32000 }, { "epoch": 17.59, "learning_rate": 0.00028004872766648623, "loss": 0.0775, "step": 32500 }, { "epoch": 17.86, "learning_rate": 0.00027666486193827827, "loss": 0.0828, "step": 33000 }, { "epoch": 18.0, "eval_LaBSE similarity": 0.7000625387362445, "eval_bleu": 0.30075415431038816, "eval_loss": 1.192597508430481, "eval_runtime": 1208.0939, "eval_samples_per_second": 11.569, "eval_steps_per_second": 0.723, "step": 33250 } ], "max_steps": 73880, "num_train_epochs": 40, "total_flos": 5.982983397653299e+17, "trial_name": null, "trial_params": null }