| { | |
| "best_metric": 0.8388484120368958, | |
| "best_model_checkpoint": "/d/hpc/projects/FRI/bb6846/run11//t5-sl-large/checkpoint-5541", | |
| "epoch": 17.999729327378535, | |
| "global_step": 33250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0004966161342717921, | |
| "loss": 1.5149, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0004932322685435842, | |
| "loss": 1.2852, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0004898484028153763, | |
| "loss": 1.1548, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_LaBSE similarity": 0.6812127827302594, | |
| "eval_bleu": 0.26085685311843865, | |
| "eval_loss": 1.030735969543457, | |
| "eval_runtime": 1202.6717, | |
| "eval_samples_per_second": 11.621, | |
| "eval_steps_per_second": 0.727, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00048646453708716834, | |
| "loss": 0.9389, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.0004830806713589605, | |
| "loss": 0.6595, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0004796968056307526, | |
| "loss": 0.6495, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00047631293990254463, | |
| "loss": 0.6266, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_LaBSE similarity": 0.6908316853313329, | |
| "eval_bleu": 0.2815949228649396, | |
| "eval_loss": 0.8447950482368469, | |
| "eval_runtime": 1213.9058, | |
| "eval_samples_per_second": 11.513, | |
| "eval_steps_per_second": 0.72, | |
| "step": 3694 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 0.0004729290741743368, | |
| "loss": 0.4401, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.0004695452084461289, | |
| "loss": 0.3565, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 0.000466161342717921, | |
| "loss": 0.3761, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.00046277747698971307, | |
| "loss": 0.383, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_LaBSE similarity": 0.6995571957101206, | |
| "eval_bleu": 0.29809786923507614, | |
| "eval_loss": 0.8388484120368958, | |
| "eval_runtime": 1199.9823, | |
| "eval_samples_per_second": 11.647, | |
| "eval_steps_per_second": 0.728, | |
| "step": 5541 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 0.0004593936112615051, | |
| "loss": 0.2344, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 0.00045600974553329727, | |
| "loss": 0.2454, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 0.00045262587980508936, | |
| "loss": 0.2646, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_LaBSE similarity": 0.6979512369452641, | |
| "eval_bleu": 0.2997067254948018, | |
| "eval_loss": 0.8804282546043396, | |
| "eval_runtime": 1204.2265, | |
| "eval_samples_per_second": 11.606, | |
| "eval_steps_per_second": 0.726, | |
| "step": 7389 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 0.0004492420140768814, | |
| "loss": 0.2489, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 0.00044585814834867355, | |
| "loss": 0.18, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 0.00044247428262046565, | |
| "loss": 0.1993, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 0.0004390904168922577, | |
| "loss": 0.2131, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_LaBSE similarity": 0.7007296268536876, | |
| "eval_bleu": 0.3013868679121285, | |
| "eval_loss": 0.9110961556434631, | |
| "eval_runtime": 1215.4225, | |
| "eval_samples_per_second": 11.499, | |
| "eval_steps_per_second": 0.719, | |
| "step": 9236 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 0.00043570655116404984, | |
| "loss": 0.178, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 0.0004323226854358419, | |
| "loss": 0.1556, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 0.000428938819707634, | |
| "loss": 0.1717, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 0.00042555495397942613, | |
| "loss": 0.1837, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_LaBSE similarity": 0.7018732900689151, | |
| "eval_bleu": 0.3018978141259116, | |
| "eval_loss": 0.9366760849952698, | |
| "eval_runtime": 1203.8978, | |
| "eval_samples_per_second": 11.609, | |
| "eval_steps_per_second": 0.726, | |
| "step": 11083 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 0.0004221710882512182, | |
| "loss": 0.1369, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 0.00041878722252301027, | |
| "loss": 0.1428, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 0.0004154033567948024, | |
| "loss": 0.1566, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_LaBSE similarity": 0.702976470663088, | |
| "eval_bleu": 0.30368208309669026, | |
| "eval_loss": 1.0034101009368896, | |
| "eval_runtime": 1207.5273, | |
| "eval_samples_per_second": 11.574, | |
| "eval_steps_per_second": 0.724, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 0.00041201949106659447, | |
| "loss": 0.1566, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 0.0004086356253383866, | |
| "loss": 0.1192, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 0.00040525175961017866, | |
| "loss": 0.1323, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 0.00040186789388197076, | |
| "loss": 0.1431, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_LaBSE similarity": 0.7013175535222604, | |
| "eval_bleu": 0.3049308476119818, | |
| "eval_loss": 1.0045561790466309, | |
| "eval_runtime": 1204.442, | |
| "eval_samples_per_second": 11.604, | |
| "eval_steps_per_second": 0.726, | |
| "step": 14778 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 0.0003984840281537629, | |
| "loss": 0.1289, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 0.00039510016242555495, | |
| "loss": 0.1114, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 0.00039171629669734704, | |
| "loss": 0.1252, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 0.0003883324309691392, | |
| "loss": 0.1341, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_LaBSE similarity": 0.7028801773059591, | |
| "eval_bleu": 0.3060926268506286, | |
| "eval_loss": 0.9563193917274475, | |
| "eval_runtime": 1204.9532, | |
| "eval_samples_per_second": 11.599, | |
| "eval_steps_per_second": 0.725, | |
| "step": 16625 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 0.00038494856524093124, | |
| "loss": 0.1083, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 0.00038156469951272333, | |
| "loss": 0.1099, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 0.00037818083378451543, | |
| "loss": 0.1206, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_LaBSE similarity": 0.7016379604564951, | |
| "eval_bleu": 0.3033423466794905, | |
| "eval_loss": 1.041945219039917, | |
| "eval_runtime": 1201.5959, | |
| "eval_samples_per_second": 11.631, | |
| "eval_steps_per_second": 0.727, | |
| "step": 18472 | |
| }, | |
| { | |
| "epoch": 10.01, | |
| "learning_rate": 0.0003747969680563075, | |
| "loss": 0.125, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "learning_rate": 0.0003714131023280996, | |
| "loss": 0.0938, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 0.0003680292365998917, | |
| "loss": 0.1063, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "learning_rate": 0.0003646453708716838, | |
| "loss": 0.1157, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_LaBSE similarity": 0.7031656672306994, | |
| "eval_bleu": 0.304420237465134, | |
| "eval_loss": 1.0878944396972656, | |
| "eval_runtime": 1198.7547, | |
| "eval_samples_per_second": 11.659, | |
| "eval_steps_per_second": 0.729, | |
| "step": 20319 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "learning_rate": 0.0003612615051434759, | |
| "loss": 0.1069, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 11.37, | |
| "learning_rate": 0.000357877639415268, | |
| "loss": 0.0938, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 11.64, | |
| "learning_rate": 0.0003544937736870601, | |
| "loss": 0.1029, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 11.91, | |
| "learning_rate": 0.0003511099079588522, | |
| "loss": 0.1095, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_LaBSE similarity": 0.7013727569331282, | |
| "eval_bleu": 0.3008776228703018, | |
| "eval_loss": 1.091700553894043, | |
| "eval_runtime": 1199.6915, | |
| "eval_samples_per_second": 11.65, | |
| "eval_steps_per_second": 0.729, | |
| "step": 22167 | |
| }, | |
| { | |
| "epoch": 12.18, | |
| "learning_rate": 0.0003477260422306443, | |
| "loss": 0.0931, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 12.45, | |
| "learning_rate": 0.0003443421765024364, | |
| "loss": 0.0913, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 12.72, | |
| "learning_rate": 0.0003409583107742285, | |
| "loss": 0.1011, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 0.0003375744450460206, | |
| "loss": 0.106, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_LaBSE similarity": 0.7020342419015667, | |
| "eval_bleu": 0.30263173440284996, | |
| "eval_loss": 1.0771766901016235, | |
| "eval_runtime": 1201.3264, | |
| "eval_samples_per_second": 11.634, | |
| "eval_steps_per_second": 0.728, | |
| "step": 24014 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 0.0003341905793178127, | |
| "loss": 0.0804, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 13.53, | |
| "learning_rate": 0.0003308067135896048, | |
| "loss": 0.0895, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "learning_rate": 0.0003274228478613969, | |
| "loss": 0.0969, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_LaBSE similarity": 0.7029242980227026, | |
| "eval_bleu": 0.3052818250366099, | |
| "eval_loss": 1.1355116367340088, | |
| "eval_runtime": 1215.6829, | |
| "eval_samples_per_second": 11.496, | |
| "eval_steps_per_second": 0.719, | |
| "step": 25861 | |
| }, | |
| { | |
| "epoch": 14.07, | |
| "learning_rate": 0.0003240389821331889, | |
| "loss": 0.0941, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 0.00032065511640498107, | |
| "loss": 0.0791, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 0.00031727125067677317, | |
| "loss": 0.0884, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 14.89, | |
| "learning_rate": 0.0003138873849485652, | |
| "loss": 0.0937, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_LaBSE similarity": 0.7020669575193156, | |
| "eval_bleu": 0.30208227926668096, | |
| "eval_loss": 1.1526124477386475, | |
| "eval_runtime": 1203.9587, | |
| "eval_samples_per_second": 11.608, | |
| "eval_steps_per_second": 0.726, | |
| "step": 27708 | |
| }, | |
| { | |
| "epoch": 15.16, | |
| "learning_rate": 0.00031050351922035736, | |
| "loss": 0.0813, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 15.43, | |
| "learning_rate": 0.00030711965349214946, | |
| "loss": 0.0787, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "learning_rate": 0.0003037357877639415, | |
| "loss": 0.0869, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 15.97, | |
| "learning_rate": 0.00030035192203573365, | |
| "loss": 0.0919, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_LaBSE similarity": 0.7013649646287148, | |
| "eval_bleu": 0.30275833235224037, | |
| "eval_loss": 1.1413129568099976, | |
| "eval_runtime": 1202.6189, | |
| "eval_samples_per_second": 11.621, | |
| "eval_steps_per_second": 0.727, | |
| "step": 29556 | |
| }, | |
| { | |
| "epoch": 16.24, | |
| "learning_rate": 0.0002969680563075257, | |
| "loss": 0.0712, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 16.51, | |
| "learning_rate": 0.00029358419057931784, | |
| "loss": 0.0794, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 16.78, | |
| "learning_rate": 0.00029020032485110994, | |
| "loss": 0.0854, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_LaBSE similarity": 0.7010715400030315, | |
| "eval_bleu": 0.3027145728229586, | |
| "eval_loss": 1.162865161895752, | |
| "eval_runtime": 1202.091, | |
| "eval_samples_per_second": 11.626, | |
| "eval_steps_per_second": 0.727, | |
| "step": 31403 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "learning_rate": 0.000286816459122902, | |
| "loss": 0.0833, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 17.32, | |
| "learning_rate": 0.00028343259339469413, | |
| "loss": 0.0686, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 17.59, | |
| "learning_rate": 0.00028004872766648623, | |
| "loss": 0.0775, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 17.86, | |
| "learning_rate": 0.00027666486193827827, | |
| "loss": 0.0828, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_LaBSE similarity": 0.7000625387362445, | |
| "eval_bleu": 0.30075415431038816, | |
| "eval_loss": 1.192597508430481, | |
| "eval_runtime": 1208.0939, | |
| "eval_samples_per_second": 11.569, | |
| "eval_steps_per_second": 0.723, | |
| "step": 33250 | |
| } | |
| ], | |
| "max_steps": 73880, | |
| "num_train_epochs": 40, | |
| "total_flos": 5.982983397653299e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |