mbart-TS-sentence / trainer_state.json
boneb's picture
Upload folder using huggingface_hub
89b2fc3
{
"best_metric": 0.8703868389129639,
"best_model_checkpoint": "/d/hpc/projects/FRI/bb6846/run11//mbart-large-50/checkpoint-11083",
"epoch": 22.999593991067805,
"global_step": 42486,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.27,
"learning_rate": 0.0004954881790290561,
"loss": 5.4927,
"step": 500
},
{
"epoch": 0.54,
"learning_rate": 0.0004909763580581122,
"loss": 3.5335,
"step": 1000
},
{
"epoch": 0.81,
"learning_rate": 0.00048646453708716834,
"loss": 2.9081,
"step": 1500
},
{
"epoch": 1.0,
"eval_LaBSE similarity": 0.5398781112980036,
"eval_bleu": 0.09115205757865759,
"eval_loss": 2.2135918140411377,
"eval_runtime": 1365.0262,
"eval_samples_per_second": 10.239,
"eval_steps_per_second": 0.64,
"step": 1847
},
{
"epoch": 1.08,
"learning_rate": 0.00048195271611622454,
"loss": 2.2837,
"step": 2000
},
{
"epoch": 1.35,
"learning_rate": 0.00047744089514528064,
"loss": 1.801,
"step": 2500
},
{
"epoch": 1.62,
"learning_rate": 0.0004729290741743368,
"loss": 1.5852,
"step": 3000
},
{
"epoch": 1.89,
"learning_rate": 0.0004684172532033929,
"loss": 1.4323,
"step": 3500
},
{
"epoch": 2.0,
"eval_LaBSE similarity": 0.6886588053033609,
"eval_bleu": 0.25496466080419466,
"eval_loss": 1.3433868885040283,
"eval_runtime": 1227.9131,
"eval_samples_per_second": 11.382,
"eval_steps_per_second": 0.712,
"step": 3694
},
{
"epoch": 2.17,
"learning_rate": 0.000463905432232449,
"loss": 1.1374,
"step": 4000
},
{
"epoch": 2.44,
"learning_rate": 0.0004593936112615051,
"loss": 1.0137,
"step": 4500
},
{
"epoch": 2.71,
"learning_rate": 0.00045488179029056126,
"loss": 0.9937,
"step": 5000
},
{
"epoch": 2.98,
"learning_rate": 0.0004503699693196174,
"loss": 0.9558,
"step": 5500
},
{
"epoch": 3.0,
"eval_LaBSE similarity": 0.7105592780650029,
"eval_bleu": 0.2856338691080233,
"eval_loss": 1.0794386863708496,
"eval_runtime": 1168.6662,
"eval_samples_per_second": 11.959,
"eval_steps_per_second": 0.748,
"step": 5541
},
{
"epoch": 3.25,
"learning_rate": 0.00044585814834867355,
"loss": 0.6749,
"step": 6000
},
{
"epoch": 3.52,
"learning_rate": 0.00044134632737772965,
"loss": 0.6931,
"step": 6500
},
{
"epoch": 3.79,
"learning_rate": 0.0004368345064067858,
"loss": 0.6971,
"step": 7000
},
{
"epoch": 4.0,
"eval_LaBSE similarity": 0.7229989019695549,
"eval_bleu": 0.3177643404347968,
"eval_loss": 0.9699832201004028,
"eval_runtime": 1195.5257,
"eval_samples_per_second": 11.69,
"eval_steps_per_second": 0.731,
"step": 7389
},
{
"epoch": 4.06,
"learning_rate": 0.0004323226854358419,
"loss": 0.6386,
"step": 7500
},
{
"epoch": 4.33,
"learning_rate": 0.00042781086446489803,
"loss": 0.4793,
"step": 8000
},
{
"epoch": 4.6,
"learning_rate": 0.0004232990434939541,
"loss": 0.5143,
"step": 8500
},
{
"epoch": 4.87,
"learning_rate": 0.00041878722252301027,
"loss": 0.5263,
"step": 9000
},
{
"epoch": 5.0,
"eval_LaBSE similarity": 0.7233434942747937,
"eval_bleu": 0.3231544767102279,
"eval_loss": 0.9114313125610352,
"eval_runtime": 1133.9982,
"eval_samples_per_second": 12.325,
"eval_steps_per_second": 0.771,
"step": 9236
},
{
"epoch": 5.14,
"learning_rate": 0.0004142754015520664,
"loss": 0.4303,
"step": 9500
},
{
"epoch": 5.41,
"learning_rate": 0.00040976358058112257,
"loss": 0.3777,
"step": 10000
},
{
"epoch": 5.68,
"learning_rate": 0.00040525175961017866,
"loss": 0.4072,
"step": 10500
},
{
"epoch": 5.95,
"learning_rate": 0.0004007399386392348,
"loss": 0.419,
"step": 11000
},
{
"epoch": 6.0,
"eval_LaBSE similarity": 0.7420425884927818,
"eval_bleu": 0.3542640467481141,
"eval_loss": 0.8703868389129639,
"eval_runtime": 1176.62,
"eval_samples_per_second": 11.878,
"eval_steps_per_second": 0.743,
"step": 11083
},
{
"epoch": 6.23,
"learning_rate": 0.0003962281176682909,
"loss": 0.3006,
"step": 11500
},
{
"epoch": 6.5,
"learning_rate": 0.00039171629669734704,
"loss": 0.3122,
"step": 12000
},
{
"epoch": 6.77,
"learning_rate": 0.00038720447572640314,
"loss": 0.3367,
"step": 12500
},
{
"epoch": 7.0,
"eval_LaBSE similarity": 0.7435614244402539,
"eval_bleu": 0.3642121844301522,
"eval_loss": 0.8795487880706787,
"eval_runtime": 1222.0463,
"eval_samples_per_second": 11.437,
"eval_steps_per_second": 0.715,
"step": 12930
},
{
"epoch": 7.04,
"learning_rate": 0.00038269265475545934,
"loss": 0.3307,
"step": 13000
},
{
"epoch": 7.31,
"learning_rate": 0.00037818083378451543,
"loss": 0.2398,
"step": 13500
},
{
"epoch": 7.58,
"learning_rate": 0.0003736690128135716,
"loss": 0.2691,
"step": 14000
},
{
"epoch": 7.85,
"learning_rate": 0.00036915719184262767,
"loss": 0.286,
"step": 14500
},
{
"epoch": 8.0,
"eval_LaBSE similarity": 0.7364368761335748,
"eval_bleu": 0.34924239776356997,
"eval_loss": 0.8712966442108154,
"eval_runtime": 1189.2045,
"eval_samples_per_second": 11.752,
"eval_steps_per_second": 0.735,
"step": 14778
},
{
"epoch": 8.12,
"learning_rate": 0.0003646453708716838,
"loss": 0.2493,
"step": 15000
},
{
"epoch": 8.39,
"learning_rate": 0.0003601335499007399,
"loss": 0.2157,
"step": 15500
},
{
"epoch": 8.66,
"learning_rate": 0.00035562172892979606,
"loss": 0.2358,
"step": 16000
},
{
"epoch": 8.93,
"learning_rate": 0.0003511099079588522,
"loss": 0.2518,
"step": 16500
},
{
"epoch": 9.0,
"eval_LaBSE similarity": 0.7470671729007166,
"eval_bleu": 0.37070924925201876,
"eval_loss": 0.8941549062728882,
"eval_runtime": 1235.4354,
"eval_samples_per_second": 11.313,
"eval_steps_per_second": 0.707,
"step": 16625
},
{
"epoch": 9.2,
"learning_rate": 0.00034659808698790835,
"loss": 0.1955,
"step": 17000
},
{
"epoch": 9.47,
"learning_rate": 0.00034208626601696444,
"loss": 0.1953,
"step": 17500
},
{
"epoch": 9.74,
"learning_rate": 0.0003375744450460206,
"loss": 0.2129,
"step": 18000
},
{
"epoch": 10.0,
"eval_LaBSE similarity": 0.7456615649679629,
"eval_bleu": 0.3745055214526943,
"eval_loss": 0.9131841659545898,
"eval_runtime": 1223.5826,
"eval_samples_per_second": 11.422,
"eval_steps_per_second": 0.714,
"step": 18472
},
{
"epoch": 10.01,
"learning_rate": 0.0003330626240750767,
"loss": 0.219,
"step": 18500
},
{
"epoch": 10.29,
"learning_rate": 0.00032855080310413283,
"loss": 0.1602,
"step": 19000
},
{
"epoch": 10.56,
"learning_rate": 0.0003240389821331889,
"loss": 0.1807,
"step": 19500
},
{
"epoch": 10.83,
"learning_rate": 0.00031952716116224507,
"loss": 0.1925,
"step": 20000
},
{
"epoch": 11.0,
"eval_LaBSE similarity": 0.7471288781191697,
"eval_bleu": 0.37290758010326946,
"eval_loss": 0.9259693622589111,
"eval_runtime": 1200.2766,
"eval_samples_per_second": 11.644,
"eval_steps_per_second": 0.728,
"step": 20319
},
{
"epoch": 11.1,
"learning_rate": 0.0003150153401913012,
"loss": 0.1765,
"step": 20500
},
{
"epoch": 11.37,
"learning_rate": 0.00031050351922035736,
"loss": 0.1498,
"step": 21000
},
{
"epoch": 11.64,
"learning_rate": 0.00030599169824941345,
"loss": 0.1655,
"step": 21500
},
{
"epoch": 11.91,
"learning_rate": 0.0003014798772784696,
"loss": 0.1752,
"step": 22000
},
{
"epoch": 12.0,
"eval_LaBSE similarity": 0.7468445703041946,
"eval_bleu": 0.37465455155641914,
"eval_loss": 0.9446151256561279,
"eval_runtime": 1244.3591,
"eval_samples_per_second": 11.231,
"eval_steps_per_second": 0.702,
"step": 22167
},
{
"epoch": 12.18,
"learning_rate": 0.0002969680563075257,
"loss": 0.1418,
"step": 22500
},
{
"epoch": 12.45,
"learning_rate": 0.00029245623533658184,
"loss": 0.1396,
"step": 23000
},
{
"epoch": 12.72,
"learning_rate": 0.00028794441436563793,
"loss": 0.1526,
"step": 23500
},
{
"epoch": 12.99,
"learning_rate": 0.00028343259339469413,
"loss": 0.1601,
"step": 24000
},
{
"epoch": 13.0,
"eval_LaBSE similarity": 0.7469340100011889,
"eval_bleu": 0.37582936090048125,
"eval_loss": 0.9434267282485962,
"eval_runtime": 1202.1543,
"eval_samples_per_second": 11.626,
"eval_steps_per_second": 0.727,
"step": 24014
},
{
"epoch": 13.26,
"learning_rate": 0.0002789207724237502,
"loss": 0.1195,
"step": 24500
},
{
"epoch": 13.53,
"learning_rate": 0.00027440895145280637,
"loss": 0.1312,
"step": 25000
},
{
"epoch": 13.8,
"learning_rate": 0.00026989713048186246,
"loss": 0.141,
"step": 25500
},
{
"epoch": 14.0,
"eval_LaBSE similarity": 0.7477662809688649,
"eval_bleu": 0.3734200651221202,
"eval_loss": 0.9459152221679688,
"eval_runtime": 1210.207,
"eval_samples_per_second": 11.548,
"eval_steps_per_second": 0.722,
"step": 25861
},
{
"epoch": 14.07,
"learning_rate": 0.0002653853095109186,
"loss": 0.1336,
"step": 26000
},
{
"epoch": 14.35,
"learning_rate": 0.0002608734885399747,
"loss": 0.1123,
"step": 26500
},
{
"epoch": 14.62,
"learning_rate": 0.00025636166756903085,
"loss": 0.1229,
"step": 27000
},
{
"epoch": 14.89,
"learning_rate": 0.000251849846598087,
"loss": 0.1303,
"step": 27500
},
{
"epoch": 15.0,
"eval_LaBSE similarity": 0.7455726656700262,
"eval_bleu": 0.3728343171898791,
"eval_loss": 0.9986817240715027,
"eval_runtime": 1216.2061,
"eval_samples_per_second": 11.491,
"eval_steps_per_second": 0.719,
"step": 27708
},
{
"epoch": 15.16,
"learning_rate": 0.0002473380256271431,
"loss": 0.1126,
"step": 28000
},
{
"epoch": 15.43,
"learning_rate": 0.00024282620465619926,
"loss": 0.1065,
"step": 28500
},
{
"epoch": 15.7,
"learning_rate": 0.00023831438368525538,
"loss": 0.1155,
"step": 29000
},
{
"epoch": 15.97,
"learning_rate": 0.0002338025627143115,
"loss": 0.1216,
"step": 29500
},
{
"epoch": 16.0,
"eval_LaBSE similarity": 0.7427742036105345,
"eval_bleu": 0.36683694992363525,
"eval_loss": 1.0019079446792603,
"eval_runtime": 1175.8161,
"eval_samples_per_second": 11.886,
"eval_steps_per_second": 0.743,
"step": 29556
},
{
"epoch": 16.24,
"learning_rate": 0.00022929074174336762,
"loss": 0.0952,
"step": 30000
},
{
"epoch": 16.51,
"learning_rate": 0.00022477892077242377,
"loss": 0.1014,
"step": 30500
},
{
"epoch": 16.78,
"learning_rate": 0.0002202670998014799,
"loss": 0.1071,
"step": 31000
},
{
"epoch": 17.0,
"eval_LaBSE similarity": 0.745781449845613,
"eval_bleu": 0.372475196057264,
"eval_loss": 1.0133806467056274,
"eval_runtime": 1177.7522,
"eval_samples_per_second": 11.867,
"eval_steps_per_second": 0.742,
"step": 31403
},
{
"epoch": 17.05,
"learning_rate": 0.000215755278830536,
"loss": 0.1058,
"step": 31500
},
{
"epoch": 17.32,
"learning_rate": 0.00021124345785959216,
"loss": 0.0861,
"step": 32000
},
{
"epoch": 17.59,
"learning_rate": 0.00020673163688864828,
"loss": 0.0962,
"step": 32500
},
{
"epoch": 17.86,
"learning_rate": 0.0002022198159177044,
"loss": 0.1001,
"step": 33000
},
{
"epoch": 18.0,
"eval_LaBSE similarity": 0.7447677442699225,
"eval_bleu": 0.3671262927472257,
"eval_loss": 1.0423822402954102,
"eval_runtime": 1175.2578,
"eval_samples_per_second": 11.892,
"eval_steps_per_second": 0.744,
"step": 33250
},
{
"epoch": 18.14,
"learning_rate": 0.00019770799494676051,
"loss": 0.0892,
"step": 33500
},
{
"epoch": 18.41,
"learning_rate": 0.00019319617397581666,
"loss": 0.0818,
"step": 34000
},
{
"epoch": 18.68,
"learning_rate": 0.00018868435300487278,
"loss": 0.0898,
"step": 34500
},
{
"epoch": 18.95,
"learning_rate": 0.0001841725320339289,
"loss": 0.0945,
"step": 35000
},
{
"epoch": 19.0,
"eval_LaBSE similarity": 0.7462503632276549,
"eval_bleu": 0.3708357814718823,
"eval_loss": 1.0612070560455322,
"eval_runtime": 1198.6678,
"eval_samples_per_second": 11.66,
"eval_steps_per_second": 0.729,
"step": 35097
},
{
"epoch": 19.22,
"learning_rate": 0.00017966071106298505,
"loss": 0.0762,
"step": 35500
},
{
"epoch": 19.49,
"learning_rate": 0.00017514889009204117,
"loss": 0.0796,
"step": 36000
},
{
"epoch": 19.76,
"learning_rate": 0.0001706370691210973,
"loss": 0.0844,
"step": 36500
},
{
"epoch": 20.0,
"eval_LaBSE similarity": 0.7422917604312603,
"eval_bleu": 0.36480748501421006,
"eval_loss": 1.0829150676727295,
"eval_runtime": 1172.318,
"eval_samples_per_second": 11.922,
"eval_steps_per_second": 0.746,
"step": 36945
},
{
"epoch": 20.03,
"learning_rate": 0.0001661252481501534,
"loss": 0.0855,
"step": 37000
},
{
"epoch": 20.3,
"learning_rate": 0.00016161342717920955,
"loss": 0.067,
"step": 37500
},
{
"epoch": 20.57,
"learning_rate": 0.00015710160620826567,
"loss": 0.0753,
"step": 38000
},
{
"epoch": 20.84,
"learning_rate": 0.0001525897852373218,
"loss": 0.0793,
"step": 38500
},
{
"epoch": 21.0,
"eval_LaBSE similarity": 0.7398600145383046,
"eval_bleu": 0.3595059153487151,
"eval_loss": 1.1107969284057617,
"eval_runtime": 1170.616,
"eval_samples_per_second": 11.939,
"eval_steps_per_second": 0.747,
"step": 38792
},
{
"epoch": 21.11,
"learning_rate": 0.0001480779642663779,
"loss": 0.0731,
"step": 39000
},
{
"epoch": 21.38,
"learning_rate": 0.00014356614329543406,
"loss": 0.0651,
"step": 39500
},
{
"epoch": 21.65,
"learning_rate": 0.00013905432232449018,
"loss": 0.0716,
"step": 40000
},
{
"epoch": 21.92,
"learning_rate": 0.0001345425013535463,
"loss": 0.0748,
"step": 40500
},
{
"epoch": 22.0,
"eval_LaBSE similarity": 0.7413012238766339,
"eval_bleu": 0.36304917318364166,
"eval_loss": 1.1223746538162231,
"eval_runtime": 1177.2462,
"eval_samples_per_second": 11.872,
"eval_steps_per_second": 0.742,
"step": 40639
},
{
"epoch": 22.2,
"learning_rate": 0.00013003068038260244,
"loss": 0.0618,
"step": 41000
},
{
"epoch": 22.47,
"learning_rate": 0.00012551885941165856,
"loss": 0.0619,
"step": 41500
},
{
"epoch": 22.74,
"learning_rate": 0.00012100703844071467,
"loss": 0.0676,
"step": 42000
},
{
"epoch": 23.0,
"eval_LaBSE similarity": 0.7416807065988956,
"eval_bleu": 0.3601707131632923,
"eval_loss": 1.1566632986068726,
"eval_runtime": 1175.1131,
"eval_samples_per_second": 11.893,
"eval_steps_per_second": 0.744,
"step": 42486
}
],
"max_steps": 55410,
"num_train_epochs": 30,
"total_flos": 4.22917524411777e+17,
"trial_name": null,
"trial_params": null
}