NLLB3.3_finetuned / trainer_state.json
Kleber's picture
Upload folder using huggingface_hub
cc52e77 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.4561562514161417,
"eval_steps": 500,
"global_step": 54200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.022658268001993928,
"grad_norm": 0.8800877332687378,
"learning_rate": 4.962236219996677e-05,
"loss": 0.2417,
"step": 500
},
{
"epoch": 0.022658268001993928,
"eval_bleu": 30.5488,
"eval_chrf++": 57.639,
"eval_gen_len": 28.2956,
"eval_loss": 1.1721652746200562,
"eval_runtime": 777.8277,
"eval_samples_per_second": 3.214,
"eval_spbleu": 41.6724,
"eval_steps_per_second": 0.643,
"eval_ter": 56.0615,
"step": 500
},
{
"epoch": 0.045316536003987856,
"grad_norm": 0.8184657096862793,
"learning_rate": 4.924472439993354e-05,
"loss": 1.0084,
"step": 1000
},
{
"epoch": 0.045316536003987856,
"eval_bleu": 31.6027,
"eval_chrf++": 58.8947,
"eval_gen_len": 27.9,
"eval_loss": 1.0108660459518433,
"eval_runtime": 745.4422,
"eval_samples_per_second": 3.354,
"eval_spbleu": 44.064,
"eval_steps_per_second": 0.671,
"eval_ter": 52.7499,
"step": 1000
},
{
"epoch": 0.06797480400598178,
"grad_norm": 0.9998241662979126,
"learning_rate": 4.886708659990031e-05,
"loss": 1.0649,
"step": 1500
},
{
"epoch": 0.06797480400598178,
"eval_bleu": 35.962,
"eval_chrf++": 60.0663,
"eval_gen_len": 27.614,
"eval_loss": 0.9778443574905396,
"eval_runtime": 743.9627,
"eval_samples_per_second": 3.36,
"eval_spbleu": 45.4876,
"eval_steps_per_second": 0.672,
"eval_ter": 52.1752,
"step": 1500
},
{
"epoch": 0.09063307200797571,
"grad_norm": 0.9190363883972168,
"learning_rate": 4.848944879986708e-05,
"loss": 1.0414,
"step": 2000
},
{
"epoch": 0.09063307200797571,
"eval_bleu": 33.9141,
"eval_chrf++": 60.0862,
"eval_gen_len": 27.7216,
"eval_loss": 0.9621853232383728,
"eval_runtime": 739.7943,
"eval_samples_per_second": 3.379,
"eval_spbleu": 45.9477,
"eval_steps_per_second": 0.676,
"eval_ter": 51.1627,
"step": 2000
},
{
"epoch": 0.11329134000996964,
"grad_norm": 0.8132910132408142,
"learning_rate": 4.811181099983384e-05,
"loss": 1.0051,
"step": 2500
},
{
"epoch": 0.11329134000996964,
"eval_bleu": 33.3827,
"eval_chrf++": 60.7457,
"eval_gen_len": 28.04,
"eval_loss": 0.9484396576881409,
"eval_runtime": 751.6447,
"eval_samples_per_second": 3.326,
"eval_spbleu": 46.5548,
"eval_steps_per_second": 0.665,
"eval_ter": 50.9017,
"step": 2500
},
{
"epoch": 0.13594960801196357,
"grad_norm": 0.8168209195137024,
"learning_rate": 4.7734173199800606e-05,
"loss": 1.0033,
"step": 3000
},
{
"epoch": 0.13594960801196357,
"eval_bleu": 33.8725,
"eval_chrf++": 60.8653,
"eval_gen_len": 28.0696,
"eval_loss": 0.9424599409103394,
"eval_runtime": 759.5734,
"eval_samples_per_second": 3.291,
"eval_spbleu": 46.9775,
"eval_steps_per_second": 0.658,
"eval_ter": 50.6539,
"step": 3000
},
{
"epoch": 0.15860787601395748,
"grad_norm": 0.9191615581512451,
"learning_rate": 4.735653539976738e-05,
"loss": 0.994,
"step": 3500
},
{
"epoch": 0.15860787601395748,
"eval_bleu": 34.0874,
"eval_chrf++": 61.217,
"eval_gen_len": 27.996,
"eval_loss": 0.9314232468605042,
"eval_runtime": 754.8484,
"eval_samples_per_second": 3.312,
"eval_spbleu": 47.405,
"eval_steps_per_second": 0.662,
"eval_ter": 50.0527,
"step": 3500
},
{
"epoch": 0.18126614401595142,
"grad_norm": 0.7960318326950073,
"learning_rate": 4.697889759973415e-05,
"loss": 0.9801,
"step": 4000
},
{
"epoch": 0.18126614401595142,
"eval_bleu": 37.2884,
"eval_chrf++": 61.5163,
"eval_gen_len": 27.9868,
"eval_loss": 0.9198995232582092,
"eval_runtime": 750.5441,
"eval_samples_per_second": 3.331,
"eval_spbleu": 47.7096,
"eval_steps_per_second": 0.666,
"eval_ter": 50.0316,
"step": 4000
},
{
"epoch": 0.20392441201794534,
"grad_norm": 0.9299506545066833,
"learning_rate": 4.6601259799700914e-05,
"loss": 0.9679,
"step": 4500
},
{
"epoch": 0.20392441201794534,
"eval_bleu": 36.221,
"eval_chrf++": 61.6489,
"eval_gen_len": 27.7652,
"eval_loss": 0.9132654070854187,
"eval_runtime": 742.4239,
"eval_samples_per_second": 3.367,
"eval_spbleu": 48.0051,
"eval_steps_per_second": 0.673,
"eval_ter": 49.4911,
"step": 4500
},
{
"epoch": 0.22658268001993928,
"grad_norm": 0.8396582007408142,
"learning_rate": 4.622362199966768e-05,
"loss": 0.9567,
"step": 5000
},
{
"epoch": 0.22658268001993928,
"eval_bleu": 35.8613,
"eval_chrf++": 62.1554,
"eval_gen_len": 27.9184,
"eval_loss": 0.9109494090080261,
"eval_runtime": 749.1424,
"eval_samples_per_second": 3.337,
"eval_spbleu": 48.6507,
"eval_steps_per_second": 0.667,
"eval_ter": 49.9394,
"step": 5000
},
{
"epoch": 0.2492409480219332,
"grad_norm": 0.6273393034934998,
"learning_rate": 4.584598419963445e-05,
"loss": 0.9625,
"step": 5500
},
{
"epoch": 0.2492409480219332,
"eval_bleu": 34.9284,
"eval_chrf++": 61.8771,
"eval_gen_len": 27.8456,
"eval_loss": 0.9041927456855774,
"eval_runtime": 762.3368,
"eval_samples_per_second": 3.279,
"eval_spbleu": 48.461,
"eval_steps_per_second": 0.656,
"eval_ter": 49.4832,
"step": 5500
},
{
"epoch": 0.27189921602392714,
"grad_norm": 0.8030633330345154,
"learning_rate": 4.546834639960122e-05,
"loss": 0.9465,
"step": 6000
},
{
"epoch": 0.27189921602392714,
"eval_bleu": 35.4623,
"eval_chrf++": 62.3793,
"eval_gen_len": 27.7244,
"eval_loss": 0.8957546949386597,
"eval_runtime": 760.5666,
"eval_samples_per_second": 3.287,
"eval_spbleu": 49.0727,
"eval_steps_per_second": 0.657,
"eval_ter": 48.5367,
"step": 6000
},
{
"epoch": 0.2945574840259211,
"grad_norm": 0.8178768754005432,
"learning_rate": 4.509070859956798e-05,
"loss": 0.9275,
"step": 6500
},
{
"epoch": 0.2945574840259211,
"eval_bleu": 35.2431,
"eval_chrf++": 62.0683,
"eval_gen_len": 27.8264,
"eval_loss": 0.8866144418716431,
"eval_runtime": 751.0343,
"eval_samples_per_second": 3.329,
"eval_spbleu": 48.8382,
"eval_steps_per_second": 0.666,
"eval_ter": 48.9164,
"step": 6500
},
{
"epoch": 0.31721575202791497,
"grad_norm": 0.6623912453651428,
"learning_rate": 4.471307079953475e-05,
"loss": 0.925,
"step": 7000
},
{
"epoch": 0.31721575202791497,
"eval_bleu": 35.6474,
"eval_chrf++": 62.5261,
"eval_gen_len": 27.9388,
"eval_loss": 0.884535551071167,
"eval_runtime": 758.4604,
"eval_samples_per_second": 3.296,
"eval_spbleu": 49.2377,
"eval_steps_per_second": 0.659,
"eval_ter": 48.4945,
"step": 7000
},
{
"epoch": 0.3398740200299089,
"grad_norm": 0.8043058514595032,
"learning_rate": 4.433543299950152e-05,
"loss": 0.928,
"step": 7500
},
{
"epoch": 0.3398740200299089,
"eval_bleu": 35.1147,
"eval_chrf++": 62.2742,
"eval_gen_len": 28.0044,
"eval_loss": 0.8792969584465027,
"eval_runtime": 755.7098,
"eval_samples_per_second": 3.308,
"eval_spbleu": 48.8554,
"eval_steps_per_second": 0.662,
"eval_ter": 49.3198,
"step": 7500
},
{
"epoch": 0.36253228803190285,
"grad_norm": 0.747755765914917,
"learning_rate": 4.395779519946829e-05,
"loss": 0.9096,
"step": 8000
},
{
"epoch": 0.36253228803190285,
"eval_bleu": 35.7901,
"eval_chrf++": 62.8302,
"eval_gen_len": 27.9312,
"eval_loss": 0.8780434727668762,
"eval_runtime": 770.928,
"eval_samples_per_second": 3.243,
"eval_spbleu": 49.5562,
"eval_steps_per_second": 0.649,
"eval_ter": 48.5578,
"step": 8000
},
{
"epoch": 0.3851905560338968,
"grad_norm": 0.6910504102706909,
"learning_rate": 4.358015739943506e-05,
"loss": 0.9014,
"step": 8500
},
{
"epoch": 0.3851905560338968,
"eval_bleu": 36.2166,
"eval_chrf++": 63.0932,
"eval_gen_len": 27.8432,
"eval_loss": 0.8704683780670166,
"eval_runtime": 750.7247,
"eval_samples_per_second": 3.33,
"eval_spbleu": 50.2483,
"eval_steps_per_second": 0.666,
"eval_ter": 48.3258,
"step": 8500
},
{
"epoch": 0.4078488240358907,
"grad_norm": 0.7716640830039978,
"learning_rate": 4.3202519599401825e-05,
"loss": 0.9059,
"step": 9000
},
{
"epoch": 0.4078488240358907,
"eval_bleu": 41.2195,
"eval_chrf++": 62.902,
"eval_gen_len": 27.5208,
"eval_loss": 0.8670679926872253,
"eval_runtime": 748.217,
"eval_samples_per_second": 3.341,
"eval_spbleu": 49.6619,
"eval_steps_per_second": 0.668,
"eval_ter": 48.3732,
"step": 9000
},
{
"epoch": 0.4305070920378846,
"grad_norm": 0.801590085029602,
"learning_rate": 4.282488179936859e-05,
"loss": 0.8976,
"step": 9500
},
{
"epoch": 0.4305070920378846,
"eval_bleu": 35.9681,
"eval_chrf++": 63.0627,
"eval_gen_len": 28.004,
"eval_loss": 0.8662621378898621,
"eval_runtime": 762.3803,
"eval_samples_per_second": 3.279,
"eval_spbleu": 50.006,
"eval_steps_per_second": 0.656,
"eval_ter": 48.4945,
"step": 9500
},
{
"epoch": 0.45316536003987856,
"grad_norm": 0.7821282148361206,
"learning_rate": 4.244724399933536e-05,
"loss": 0.889,
"step": 10000
},
{
"epoch": 0.45316536003987856,
"eval_bleu": 36.3811,
"eval_chrf++": 63.2137,
"eval_gen_len": 27.9044,
"eval_loss": 0.8606961965560913,
"eval_runtime": 766.3054,
"eval_samples_per_second": 3.262,
"eval_spbleu": 50.2147,
"eval_steps_per_second": 0.652,
"eval_ter": 47.6614,
"step": 10000
},
{
"epoch": 0.4758236280418725,
"grad_norm": 0.7221835851669312,
"learning_rate": 4.206960619930213e-05,
"loss": 0.8979,
"step": 10500
},
{
"epoch": 0.4758236280418725,
"eval_bleu": 36.4697,
"eval_chrf++": 63.3942,
"eval_gen_len": 27.9276,
"eval_loss": 0.8558794856071472,
"eval_runtime": 758.2694,
"eval_samples_per_second": 3.297,
"eval_spbleu": 50.6637,
"eval_steps_per_second": 0.659,
"eval_ter": 48.0226,
"step": 10500
},
{
"epoch": 0.4984818960438664,
"grad_norm": 0.5793339014053345,
"learning_rate": 4.1691968399268894e-05,
"loss": 0.8817,
"step": 11000
},
{
"epoch": 0.4984818960438664,
"eval_bleu": 36.4549,
"eval_chrf++": 63.2444,
"eval_gen_len": 27.7924,
"eval_loss": 0.8521751761436462,
"eval_runtime": 746.8271,
"eval_samples_per_second": 3.347,
"eval_spbleu": 50.4999,
"eval_steps_per_second": 0.669,
"eval_ter": 47.9487,
"step": 11000
},
{
"epoch": 0.5211401640458604,
"grad_norm": 0.8384801149368286,
"learning_rate": 4.131433059923566e-05,
"loss": 0.8696,
"step": 11500
},
{
"epoch": 0.5211401640458604,
"eval_bleu": 36.1199,
"eval_chrf++": 63.343,
"eval_gen_len": 28.076,
"eval_loss": 0.8511990308761597,
"eval_runtime": 758.0827,
"eval_samples_per_second": 3.298,
"eval_spbleu": 50.1722,
"eval_steps_per_second": 0.66,
"eval_ter": 48.6316,
"step": 11500
},
{
"epoch": 0.5437984320478543,
"grad_norm": 0.6142855882644653,
"learning_rate": 4.093669279920243e-05,
"loss": 0.8758,
"step": 12000
},
{
"epoch": 0.5437984320478543,
"eval_bleu": 36.7344,
"eval_chrf++": 63.7589,
"eval_gen_len": 27.9936,
"eval_loss": 0.8446237444877625,
"eval_runtime": 749.403,
"eval_samples_per_second": 3.336,
"eval_spbleu": 50.8742,
"eval_steps_per_second": 0.667,
"eval_ter": 47.1841,
"step": 12000
},
{
"epoch": 0.5664567000498482,
"grad_norm": 0.5856760144233704,
"learning_rate": 4.05590549991692e-05,
"loss": 0.8793,
"step": 12500
},
{
"epoch": 0.5664567000498482,
"eval_bleu": 41.021,
"eval_chrf++": 63.6181,
"eval_gen_len": 27.5668,
"eval_loss": 0.8422514796257019,
"eval_runtime": 733.2302,
"eval_samples_per_second": 3.41,
"eval_spbleu": 50.6419,
"eval_steps_per_second": 0.682,
"eval_ter": 47.1499,
"step": 12500
},
{
"epoch": 0.5891149680518422,
"grad_norm": 0.6606504321098328,
"learning_rate": 4.018141719913597e-05,
"loss": 0.8794,
"step": 13000
},
{
"epoch": 0.5891149680518422,
"eval_bleu": 37.286,
"eval_chrf++": 63.8579,
"eval_gen_len": 27.8624,
"eval_loss": 0.8412001729011536,
"eval_runtime": 745.0766,
"eval_samples_per_second": 3.355,
"eval_spbleu": 51.2084,
"eval_steps_per_second": 0.671,
"eval_ter": 46.4538,
"step": 13000
},
{
"epoch": 0.611773236053836,
"grad_norm": 0.7332282066345215,
"learning_rate": 3.980377939910274e-05,
"loss": 0.8543,
"step": 13500
},
{
"epoch": 0.611773236053836,
"eval_bleu": 36.8567,
"eval_chrf++": 63.6306,
"eval_gen_len": 27.7904,
"eval_loss": 0.8333261013031006,
"eval_runtime": 744.0262,
"eval_samples_per_second": 3.36,
"eval_spbleu": 50.873,
"eval_steps_per_second": 0.672,
"eval_ter": 47.0998,
"step": 13500
},
{
"epoch": 0.6344315040558299,
"grad_norm": 0.8010419607162476,
"learning_rate": 3.9426141599069504e-05,
"loss": 0.8661,
"step": 14000
},
{
"epoch": 0.6344315040558299,
"eval_bleu": 36.9197,
"eval_chrf++": 63.5882,
"eval_gen_len": 27.8996,
"eval_loss": 0.8315057754516602,
"eval_runtime": 748.0384,
"eval_samples_per_second": 3.342,
"eval_spbleu": 50.798,
"eval_steps_per_second": 0.668,
"eval_ter": 46.8968,
"step": 14000
},
{
"epoch": 0.6570897720578239,
"grad_norm": 0.5245931148529053,
"learning_rate": 3.9048503799036265e-05,
"loss": 0.8556,
"step": 14500
},
{
"epoch": 0.6570897720578239,
"eval_bleu": 37.9259,
"eval_chrf++": 63.8176,
"eval_gen_len": 27.84,
"eval_loss": 0.8274693489074707,
"eval_runtime": 748.0441,
"eval_samples_per_second": 3.342,
"eval_spbleu": 51.0265,
"eval_steps_per_second": 0.668,
"eval_ter": 46.9205,
"step": 14500
},
{
"epoch": 0.6797480400598178,
"grad_norm": 0.7052202820777893,
"learning_rate": 3.867086599900304e-05,
"loss": 0.8635,
"step": 15000
},
{
"epoch": 0.6797480400598178,
"eval_bleu": 38.0929,
"eval_chrf++": 63.8837,
"eval_gen_len": 27.8216,
"eval_loss": 0.8276916146278381,
"eval_runtime": 744.6549,
"eval_samples_per_second": 3.357,
"eval_spbleu": 51.13,
"eval_steps_per_second": 0.671,
"eval_ter": 46.9099,
"step": 15000
},
{
"epoch": 0.7024063080618117,
"grad_norm": 0.7750408053398132,
"learning_rate": 3.8293228198969806e-05,
"loss": 0.8412,
"step": 15500
},
{
"epoch": 0.7024063080618117,
"eval_bleu": 38.2488,
"eval_chrf++": 63.9152,
"eval_gen_len": 27.7136,
"eval_loss": 0.8237889409065247,
"eval_runtime": 739.0003,
"eval_samples_per_second": 3.383,
"eval_spbleu": 51.3155,
"eval_steps_per_second": 0.677,
"eval_ter": 46.5224,
"step": 15500
},
{
"epoch": 0.7250645760638057,
"grad_norm": 0.7442999482154846,
"learning_rate": 3.791559039893657e-05,
"loss": 0.8476,
"step": 16000
},
{
"epoch": 0.7250645760638057,
"eval_bleu": 40.1641,
"eval_chrf++": 64.262,
"eval_gen_len": 27.616,
"eval_loss": 0.8218015432357788,
"eval_runtime": 736.1573,
"eval_samples_per_second": 3.396,
"eval_spbleu": 51.5374,
"eval_steps_per_second": 0.679,
"eval_ter": 46.2982,
"step": 16000
},
{
"epoch": 0.7477228440657996,
"grad_norm": 0.6437325477600098,
"learning_rate": 3.753795259890334e-05,
"loss": 0.854,
"step": 16500
},
{
"epoch": 0.7477228440657996,
"eval_bleu": 37.6219,
"eval_chrf++": 64.4048,
"eval_gen_len": 27.9068,
"eval_loss": 0.8224019408226013,
"eval_runtime": 747.0111,
"eval_samples_per_second": 3.347,
"eval_spbleu": 51.8679,
"eval_steps_per_second": 0.669,
"eval_ter": 46.3009,
"step": 16500
},
{
"epoch": 0.7703811120677936,
"grad_norm": 0.7045587301254272,
"learning_rate": 3.716031479887011e-05,
"loss": 0.8433,
"step": 17000
},
{
"epoch": 0.7703811120677936,
"eval_bleu": 38.7383,
"eval_chrf++": 64.511,
"eval_gen_len": 27.94,
"eval_loss": 0.8197815418243408,
"eval_runtime": 750.586,
"eval_samples_per_second": 3.331,
"eval_spbleu": 51.9857,
"eval_steps_per_second": 0.666,
"eval_ter": 46.3378,
"step": 17000
},
{
"epoch": 0.7930393800697875,
"grad_norm": 0.6813265681266785,
"learning_rate": 3.678267699883688e-05,
"loss": 0.8372,
"step": 17500
},
{
"epoch": 0.7930393800697875,
"eval_bleu": 39.4841,
"eval_chrf++": 64.344,
"eval_gen_len": 27.5488,
"eval_loss": 0.8175507187843323,
"eval_runtime": 732.9073,
"eval_samples_per_second": 3.411,
"eval_spbleu": 51.5575,
"eval_steps_per_second": 0.682,
"eval_ter": 46.1163,
"step": 17500
},
{
"epoch": 0.8156976480717814,
"grad_norm": 0.5977945327758789,
"learning_rate": 3.640503919880365e-05,
"loss": 0.8317,
"step": 18000
},
{
"epoch": 0.8156976480717814,
"eval_bleu": 37.7506,
"eval_chrf++": 64.0896,
"eval_gen_len": 27.8936,
"eval_loss": 0.8134418725967407,
"eval_runtime": 746.7872,
"eval_samples_per_second": 3.348,
"eval_spbleu": 51.6484,
"eval_steps_per_second": 0.67,
"eval_ter": 46.6937,
"step": 18000
},
{
"epoch": 0.8383559160737754,
"grad_norm": 0.6788719892501831,
"learning_rate": 3.602740139877041e-05,
"loss": 0.8331,
"step": 18500
},
{
"epoch": 0.8383559160737754,
"eval_bleu": 37.9443,
"eval_chrf++": 64.6177,
"eval_gen_len": 28.022,
"eval_loss": 0.8157890439033508,
"eval_runtime": 748.194,
"eval_samples_per_second": 3.341,
"eval_spbleu": 52.1179,
"eval_steps_per_second": 0.668,
"eval_ter": 46.2323,
"step": 18500
},
{
"epoch": 0.8610141840757692,
"grad_norm": 0.6696301102638245,
"learning_rate": 3.5649763598737176e-05,
"loss": 0.8342,
"step": 19000
},
{
"epoch": 0.8610141840757692,
"eval_bleu": 38.7784,
"eval_chrf++": 64.6136,
"eval_gen_len": 27.9508,
"eval_loss": 0.8111441731452942,
"eval_runtime": 748.3754,
"eval_samples_per_second": 3.341,
"eval_spbleu": 52.0913,
"eval_steps_per_second": 0.668,
"eval_ter": 46.206,
"step": 19000
},
{
"epoch": 0.8836724520777631,
"grad_norm": 0.7288480401039124,
"learning_rate": 3.527212579870395e-05,
"loss": 0.8282,
"step": 19500
},
{
"epoch": 0.8836724520777631,
"eval_bleu": 38.1426,
"eval_chrf++": 64.6892,
"eval_gen_len": 27.8628,
"eval_loss": 0.8068262934684753,
"eval_runtime": 743.9866,
"eval_samples_per_second": 3.36,
"eval_spbleu": 52.3226,
"eval_steps_per_second": 0.672,
"eval_ter": 45.7841,
"step": 19500
},
{
"epoch": 0.9063307200797571,
"grad_norm": 0.6522256731987,
"learning_rate": 3.489448799867072e-05,
"loss": 0.8345,
"step": 20000
},
{
"epoch": 0.9063307200797571,
"eval_bleu": 37.3235,
"eval_chrf++": 64.2609,
"eval_gen_len": 27.9892,
"eval_loss": 0.8071653246879578,
"eval_runtime": 750.865,
"eval_samples_per_second": 3.329,
"eval_spbleu": 51.689,
"eval_steps_per_second": 0.666,
"eval_ter": 47.0022,
"step": 20000
},
{
"epoch": 0.928988988081751,
"grad_norm": 0.7145525217056274,
"learning_rate": 3.4516850198637484e-05,
"loss": 0.8224,
"step": 20500
},
{
"epoch": 0.928988988081751,
"eval_bleu": 37.5387,
"eval_chrf++": 64.2559,
"eval_gen_len": 27.82,
"eval_loss": 0.8012556433677673,
"eval_runtime": 768.5173,
"eval_samples_per_second": 3.253,
"eval_spbleu": 51.7469,
"eval_steps_per_second": 0.651,
"eval_ter": 46.3668,
"step": 20500
},
{
"epoch": 0.951647256083745,
"grad_norm": 0.7082468271255493,
"learning_rate": 3.413921239860425e-05,
"loss": 0.8315,
"step": 21000
},
{
"epoch": 0.951647256083745,
"eval_bleu": 37.9748,
"eval_chrf++": 64.7286,
"eval_gen_len": 27.8264,
"eval_loss": 0.7990391254425049,
"eval_runtime": 772.2382,
"eval_samples_per_second": 3.237,
"eval_spbleu": 52.3829,
"eval_steps_per_second": 0.647,
"eval_ter": 45.8711,
"step": 21000
},
{
"epoch": 0.9743055240857389,
"grad_norm": 0.6599904298782349,
"learning_rate": 3.376157459857102e-05,
"loss": 0.8269,
"step": 21500
},
{
"epoch": 0.9743055240857389,
"eval_bleu": 39.3032,
"eval_chrf++": 64.9609,
"eval_gen_len": 27.8436,
"eval_loss": 0.7974932789802551,
"eval_runtime": 771.5992,
"eval_samples_per_second": 3.24,
"eval_spbleu": 52.5588,
"eval_steps_per_second": 0.648,
"eval_ter": 45.8975,
"step": 21500
},
{
"epoch": 0.9969637920877328,
"grad_norm": 0.6297094821929932,
"learning_rate": 3.338393679853779e-05,
"loss": 0.8159,
"step": 22000
},
{
"epoch": 0.9969637920877328,
"eval_bleu": 38.4628,
"eval_chrf++": 64.9748,
"eval_gen_len": 27.7568,
"eval_loss": 0.7950631380081177,
"eval_runtime": 757.8415,
"eval_samples_per_second": 3.299,
"eval_spbleu": 52.6788,
"eval_steps_per_second": 0.66,
"eval_ter": 45.3491,
"step": 22000
},
{
"epoch": 1.0196220600897268,
"grad_norm": 0.6592913866043091,
"learning_rate": 3.300629899850455e-05,
"loss": 0.6601,
"step": 22500
},
{
"epoch": 1.0196220600897268,
"eval_bleu": 39.9752,
"eval_chrf++": 65.1542,
"eval_gen_len": 27.834,
"eval_loss": 0.810078501701355,
"eval_runtime": 742.5172,
"eval_samples_per_second": 3.367,
"eval_spbleu": 52.8176,
"eval_steps_per_second": 0.673,
"eval_ter": 45.4176,
"step": 22500
},
{
"epoch": 1.0422803280917208,
"grad_norm": 0.5827597379684448,
"learning_rate": 3.262866119847132e-05,
"loss": 0.6316,
"step": 23000
},
{
"epoch": 1.0422803280917208,
"eval_bleu": 43.5482,
"eval_chrf++": 65.346,
"eval_gen_len": 27.59,
"eval_loss": 0.814832329750061,
"eval_runtime": 736.0286,
"eval_samples_per_second": 3.397,
"eval_spbleu": 52.9228,
"eval_steps_per_second": 0.679,
"eval_ter": 45.2832,
"step": 23000
},
{
"epoch": 1.0649385960937146,
"grad_norm": 0.736733078956604,
"learning_rate": 3.225102339843809e-05,
"loss": 0.6365,
"step": 23500
},
{
"epoch": 1.0649385960937146,
"eval_bleu": 40.0573,
"eval_chrf++": 65.2248,
"eval_gen_len": 27.7468,
"eval_loss": 0.8096536993980408,
"eval_runtime": 762.5207,
"eval_samples_per_second": 3.279,
"eval_spbleu": 52.9015,
"eval_steps_per_second": 0.656,
"eval_ter": 45.1302,
"step": 23500
},
{
"epoch": 1.0875968640957085,
"grad_norm": 0.6646838188171387,
"learning_rate": 3.187338559840486e-05,
"loss": 0.6462,
"step": 24000
},
{
"epoch": 1.0875968640957085,
"eval_bleu": 41.7287,
"eval_chrf++": 65.476,
"eval_gen_len": 27.8052,
"eval_loss": 0.8067141771316528,
"eval_runtime": 764.719,
"eval_samples_per_second": 3.269,
"eval_spbleu": 53.0853,
"eval_steps_per_second": 0.654,
"eval_ter": 45.2621,
"step": 24000
},
{
"epoch": 1.1102551320977025,
"grad_norm": 0.7113286852836609,
"learning_rate": 3.149574779837163e-05,
"loss": 0.6383,
"step": 24500
},
{
"epoch": 1.1102551320977025,
"eval_bleu": 44.484,
"eval_chrf++": 65.5094,
"eval_gen_len": 27.502,
"eval_loss": 0.8042193055152893,
"eval_runtime": 761.5932,
"eval_samples_per_second": 3.283,
"eval_spbleu": 53.234,
"eval_steps_per_second": 0.657,
"eval_ter": 45.0591,
"step": 24500
},
{
"epoch": 1.1329134000996963,
"grad_norm": 0.6746016144752502,
"learning_rate": 3.1118109998338396e-05,
"loss": 0.6464,
"step": 25000
},
{
"epoch": 1.1329134000996963,
"eval_bleu": 44.4968,
"eval_chrf++": 65.4383,
"eval_gen_len": 27.4832,
"eval_loss": 0.8051723837852478,
"eval_runtime": 762.6344,
"eval_samples_per_second": 3.278,
"eval_spbleu": 53.209,
"eval_steps_per_second": 0.656,
"eval_ter": 45.1672,
"step": 25000
},
{
"epoch": 1.1555716681016903,
"grad_norm": 0.7094623446464539,
"learning_rate": 3.074047219830516e-05,
"loss": 0.6353,
"step": 25500
},
{
"epoch": 1.1555716681016903,
"eval_bleu": 44.7381,
"eval_chrf++": 65.7012,
"eval_gen_len": 27.5904,
"eval_loss": 0.8053088784217834,
"eval_runtime": 766.039,
"eval_samples_per_second": 3.264,
"eval_spbleu": 53.2617,
"eval_steps_per_second": 0.653,
"eval_ter": 44.9852,
"step": 25500
},
{
"epoch": 1.1782299361036843,
"grad_norm": 0.5892546772956848,
"learning_rate": 3.0362834398271934e-05,
"loss": 0.6483,
"step": 26000
},
{
"epoch": 1.1782299361036843,
"eval_bleu": 44.1957,
"eval_chrf++": 65.3258,
"eval_gen_len": 27.6048,
"eval_loss": 0.8034100532531738,
"eval_runtime": 761.0595,
"eval_samples_per_second": 3.285,
"eval_spbleu": 52.8918,
"eval_steps_per_second": 0.657,
"eval_ter": 45.4572,
"step": 26000
},
{
"epoch": 1.200888204105678,
"grad_norm": 0.744484543800354,
"learning_rate": 2.9985196598238697e-05,
"loss": 0.6382,
"step": 26500
},
{
"epoch": 1.200888204105678,
"eval_bleu": 44.663,
"eval_chrf++": 65.2386,
"eval_gen_len": 27.4888,
"eval_loss": 0.8034644722938538,
"eval_runtime": 742.6066,
"eval_samples_per_second": 3.367,
"eval_spbleu": 52.8205,
"eval_steps_per_second": 0.673,
"eval_ter": 45.1144,
"step": 26500
},
{
"epoch": 1.223546472107672,
"grad_norm": 0.6990212798118591,
"learning_rate": 2.9607558798205465e-05,
"loss": 0.6425,
"step": 27000
},
{
"epoch": 1.223546472107672,
"eval_bleu": 40.3169,
"eval_chrf++": 65.5055,
"eval_gen_len": 27.7456,
"eval_loss": 0.7975181341171265,
"eval_runtime": 753.8226,
"eval_samples_per_second": 3.316,
"eval_spbleu": 53.4546,
"eval_steps_per_second": 0.663,
"eval_ter": 44.7295,
"step": 27000
},
{
"epoch": 1.246204740109666,
"grad_norm": 0.7708460092544556,
"learning_rate": 2.9229920998172232e-05,
"loss": 0.6364,
"step": 27500
},
{
"epoch": 1.246204740109666,
"eval_bleu": 39.0833,
"eval_chrf++": 65.4463,
"eval_gen_len": 27.9864,
"eval_loss": 0.799001157283783,
"eval_runtime": 765.257,
"eval_samples_per_second": 3.267,
"eval_spbleu": 53.3214,
"eval_steps_per_second": 0.653,
"eval_ter": 45.1513,
"step": 27500
},
{
"epoch": 1.2688630081116599,
"grad_norm": 0.7766411900520325,
"learning_rate": 2.8852283198139002e-05,
"loss": 0.6311,
"step": 28000
},
{
"epoch": 1.2688630081116599,
"eval_bleu": 44.6719,
"eval_chrf++": 65.7788,
"eval_gen_len": 27.5984,
"eval_loss": 0.8024120330810547,
"eval_runtime": 739.5078,
"eval_samples_per_second": 3.381,
"eval_spbleu": 53.4111,
"eval_steps_per_second": 0.676,
"eval_ter": 44.8429,
"step": 28000
},
{
"epoch": 1.2915212761136539,
"grad_norm": 0.6488195657730103,
"learning_rate": 2.847464539810577e-05,
"loss": 0.6315,
"step": 28500
},
{
"epoch": 1.2915212761136539,
"eval_bleu": 38.9676,
"eval_chrf++": 65.2009,
"eval_gen_len": 27.7544,
"eval_loss": 0.799105167388916,
"eval_runtime": 751.4345,
"eval_samples_per_second": 3.327,
"eval_spbleu": 53.0925,
"eval_steps_per_second": 0.665,
"eval_ter": 45.0802,
"step": 28500
},
{
"epoch": 1.3141795441156479,
"grad_norm": 0.6332802176475525,
"learning_rate": 2.809700759807254e-05,
"loss": 0.6339,
"step": 29000
},
{
"epoch": 1.3141795441156479,
"eval_bleu": 39.0276,
"eval_chrf++": 65.2617,
"eval_gen_len": 27.824,
"eval_loss": 0.7974073886871338,
"eval_runtime": 754.6502,
"eval_samples_per_second": 3.313,
"eval_spbleu": 53.046,
"eval_steps_per_second": 0.663,
"eval_ter": 45.2331,
"step": 29000
},
{
"epoch": 1.3368378121176416,
"grad_norm": 0.5959407687187195,
"learning_rate": 2.7719369798039307e-05,
"loss": 0.6412,
"step": 29500
},
{
"epoch": 1.3368378121176416,
"eval_bleu": 40.1118,
"eval_chrf++": 65.4814,
"eval_gen_len": 27.8892,
"eval_loss": 0.7944240570068359,
"eval_runtime": 752.0202,
"eval_samples_per_second": 3.324,
"eval_spbleu": 53.4305,
"eval_steps_per_second": 0.665,
"eval_ter": 45.0327,
"step": 29500
},
{
"epoch": 1.3594960801196356,
"grad_norm": 0.6927244067192078,
"learning_rate": 2.7341731998006075e-05,
"loss": 0.6354,
"step": 30000
},
{
"epoch": 1.3594960801196356,
"eval_bleu": 41.7725,
"eval_chrf++": 65.7724,
"eval_gen_len": 27.7264,
"eval_loss": 0.7973920702934265,
"eval_runtime": 742.892,
"eval_samples_per_second": 3.365,
"eval_spbleu": 53.752,
"eval_steps_per_second": 0.673,
"eval_ter": 44.4975,
"step": 30000
},
{
"epoch": 1.3821543481216296,
"grad_norm": 0.6661298871040344,
"learning_rate": 2.696409419797284e-05,
"loss": 0.6294,
"step": 30500
},
{
"epoch": 1.3821543481216296,
"eval_bleu": 39.0417,
"eval_chrf++": 65.4211,
"eval_gen_len": 27.8648,
"eval_loss": 0.795570969581604,
"eval_runtime": 749.9034,
"eval_samples_per_second": 3.334,
"eval_spbleu": 53.378,
"eval_steps_per_second": 0.667,
"eval_ter": 45.0802,
"step": 30500
},
{
"epoch": 1.4048126161236234,
"grad_norm": 0.6107171773910522,
"learning_rate": 2.658645639793961e-05,
"loss": 0.636,
"step": 31000
},
{
"epoch": 1.4048126161236234,
"eval_bleu": 39.9268,
"eval_chrf++": 65.47,
"eval_gen_len": 27.682,
"eval_loss": 0.7938565611839294,
"eval_runtime": 743.0863,
"eval_samples_per_second": 3.364,
"eval_spbleu": 53.3727,
"eval_steps_per_second": 0.673,
"eval_ter": 44.5871,
"step": 31000
},
{
"epoch": 1.4274708841256174,
"grad_norm": 0.6050147414207458,
"learning_rate": 2.6208818597906376e-05,
"loss": 0.6385,
"step": 31500
},
{
"epoch": 1.4274708841256174,
"eval_bleu": 40.4175,
"eval_chrf++": 65.7235,
"eval_gen_len": 27.8052,
"eval_loss": 0.7914307713508606,
"eval_runtime": 748.359,
"eval_samples_per_second": 3.341,
"eval_spbleu": 53.722,
"eval_steps_per_second": 0.668,
"eval_ter": 44.6003,
"step": 31500
},
{
"epoch": 1.4501291521276114,
"grad_norm": 0.8934792280197144,
"learning_rate": 2.5831180797873143e-05,
"loss": 0.6392,
"step": 32000
},
{
"epoch": 1.4501291521276114,
"eval_bleu": 40.6796,
"eval_chrf++": 65.8163,
"eval_gen_len": 27.7424,
"eval_loss": 0.7927303910255432,
"eval_runtime": 748.6849,
"eval_samples_per_second": 3.339,
"eval_spbleu": 53.7139,
"eval_steps_per_second": 0.668,
"eval_ter": 44.4184,
"step": 32000
},
{
"epoch": 1.4727874201296052,
"grad_norm": 0.6742972731590271,
"learning_rate": 2.5453542997839914e-05,
"loss": 0.6364,
"step": 32500
},
{
"epoch": 1.4727874201296052,
"eval_bleu": 40.2137,
"eval_chrf++": 65.6498,
"eval_gen_len": 27.7408,
"eval_loss": 0.7901710867881775,
"eval_runtime": 749.8815,
"eval_samples_per_second": 3.334,
"eval_spbleu": 53.6947,
"eval_steps_per_second": 0.667,
"eval_ter": 44.7031,
"step": 32500
},
{
"epoch": 1.4954456881315992,
"grad_norm": 0.6159557104110718,
"learning_rate": 2.507590519780668e-05,
"loss": 0.6352,
"step": 33000
},
{
"epoch": 1.4954456881315992,
"eval_bleu": 41.264,
"eval_chrf++": 65.7523,
"eval_gen_len": 27.8552,
"eval_loss": 0.7894487380981445,
"eval_runtime": 748.2109,
"eval_samples_per_second": 3.341,
"eval_spbleu": 53.6724,
"eval_steps_per_second": 0.668,
"eval_ter": 44.8244,
"step": 33000
},
{
"epoch": 1.5181039561335932,
"grad_norm": 0.8539830446243286,
"learning_rate": 2.469826739777345e-05,
"loss": 0.6234,
"step": 33500
},
{
"epoch": 1.5181039561335932,
"eval_bleu": 40.1811,
"eval_chrf++": 65.8305,
"eval_gen_len": 27.8484,
"eval_loss": 0.7886767983436584,
"eval_runtime": 745.5767,
"eval_samples_per_second": 3.353,
"eval_spbleu": 53.7788,
"eval_steps_per_second": 0.671,
"eval_ter": 44.5739,
"step": 33500
},
{
"epoch": 1.540762224135587,
"grad_norm": 0.7526208758354187,
"learning_rate": 2.4320629597740216e-05,
"loss": 0.6411,
"step": 34000
},
{
"epoch": 1.540762224135587,
"eval_bleu": 42.6857,
"eval_chrf++": 65.9528,
"eval_gen_len": 27.6804,
"eval_loss": 0.7847135663032532,
"eval_runtime": 741.0515,
"eval_samples_per_second": 3.374,
"eval_spbleu": 53.8997,
"eval_steps_per_second": 0.675,
"eval_ter": 44.3024,
"step": 34000
},
{
"epoch": 1.5634204921375812,
"grad_norm": 0.8738523125648499,
"learning_rate": 2.3942991797706986e-05,
"loss": 0.63,
"step": 34500
},
{
"epoch": 1.5634204921375812,
"eval_bleu": 45.4639,
"eval_chrf++": 66.1251,
"eval_gen_len": 27.5308,
"eval_loss": 0.781486451625824,
"eval_runtime": 736.5596,
"eval_samples_per_second": 3.394,
"eval_spbleu": 53.9656,
"eval_steps_per_second": 0.679,
"eval_ter": 44.334,
"step": 34500
},
{
"epoch": 1.586078760139575,
"grad_norm": 0.7074981927871704,
"learning_rate": 2.3565353997673753e-05,
"loss": 0.6265,
"step": 35000
},
{
"epoch": 1.586078760139575,
"eval_bleu": 44.9751,
"eval_chrf++": 65.9036,
"eval_gen_len": 27.6124,
"eval_loss": 0.7836451530456543,
"eval_runtime": 746.2524,
"eval_samples_per_second": 3.35,
"eval_spbleu": 53.5997,
"eval_steps_per_second": 0.67,
"eval_ter": 44.8059,
"step": 35000
},
{
"epoch": 1.6087370281415687,
"grad_norm": 0.7853338718414307,
"learning_rate": 2.318771619764052e-05,
"loss": 0.6202,
"step": 35500
},
{
"epoch": 1.6087370281415687,
"eval_bleu": 45.0486,
"eval_chrf++": 66.1202,
"eval_gen_len": 27.6572,
"eval_loss": 0.7826634049415588,
"eval_runtime": 750.4591,
"eval_samples_per_second": 3.331,
"eval_spbleu": 53.9114,
"eval_steps_per_second": 0.666,
"eval_ter": 44.7242,
"step": 35500
},
{
"epoch": 1.631395296143563,
"grad_norm": 0.7461378574371338,
"learning_rate": 2.2810078397607288e-05,
"loss": 0.6385,
"step": 36000
},
{
"epoch": 1.631395296143563,
"eval_bleu": 41.8569,
"eval_chrf++": 65.9455,
"eval_gen_len": 27.8612,
"eval_loss": 0.7859405279159546,
"eval_runtime": 753.5356,
"eval_samples_per_second": 3.318,
"eval_spbleu": 53.9218,
"eval_steps_per_second": 0.664,
"eval_ter": 44.5897,
"step": 36000
},
{
"epoch": 1.6540535641455567,
"grad_norm": 0.5514925122261047,
"learning_rate": 2.2432440597574055e-05,
"loss": 0.6269,
"step": 36500
},
{
"epoch": 1.6540535641455567,
"eval_bleu": 43.6861,
"eval_chrf++": 65.9551,
"eval_gen_len": 27.5088,
"eval_loss": 0.7851018905639648,
"eval_runtime": 744.0917,
"eval_samples_per_second": 3.36,
"eval_spbleu": 53.792,
"eval_steps_per_second": 0.672,
"eval_ter": 44.3419,
"step": 36500
},
{
"epoch": 1.6767118321475505,
"grad_norm": 0.6642000675201416,
"learning_rate": 2.2054802797540825e-05,
"loss": 0.6301,
"step": 37000
},
{
"epoch": 1.6767118321475505,
"eval_bleu": 46.0896,
"eval_chrf++": 66.164,
"eval_gen_len": 27.426,
"eval_loss": 0.7796212434768677,
"eval_runtime": 740.9337,
"eval_samples_per_second": 3.374,
"eval_spbleu": 54.0105,
"eval_steps_per_second": 0.675,
"eval_ter": 44.1494,
"step": 37000
},
{
"epoch": 1.6993701001495447,
"grad_norm": 0.8100460171699524,
"learning_rate": 2.167716499750759e-05,
"loss": 0.6213,
"step": 37500
},
{
"epoch": 1.6993701001495447,
"eval_bleu": 45.5601,
"eval_chrf++": 66.0823,
"eval_gen_len": 27.5128,
"eval_loss": 0.7815007567405701,
"eval_runtime": 733.2938,
"eval_samples_per_second": 3.409,
"eval_spbleu": 53.9922,
"eval_steps_per_second": 0.682,
"eval_ter": 44.5133,
"step": 37500
},
{
"epoch": 1.7220283681515385,
"grad_norm": 0.6142133474349976,
"learning_rate": 2.129952719747436e-05,
"loss": 0.623,
"step": 38000
},
{
"epoch": 1.7220283681515385,
"eval_bleu": 45.0364,
"eval_chrf++": 66.1218,
"eval_gen_len": 27.5352,
"eval_loss": 0.7782283425331116,
"eval_runtime": 736.7203,
"eval_samples_per_second": 3.393,
"eval_spbleu": 54.0624,
"eval_steps_per_second": 0.679,
"eval_ter": 44.3314,
"step": 38000
},
{
"epoch": 1.7446866361535325,
"grad_norm": 0.8021434545516968,
"learning_rate": 2.0921889397441127e-05,
"loss": 0.6269,
"step": 38500
},
{
"epoch": 1.7446866361535325,
"eval_bleu": 41.6796,
"eval_chrf++": 66.0402,
"eval_gen_len": 27.7448,
"eval_loss": 0.7799319624900818,
"eval_runtime": 738.6103,
"eval_samples_per_second": 3.385,
"eval_spbleu": 54.1452,
"eval_steps_per_second": 0.677,
"eval_ter": 44.1653,
"step": 38500
},
{
"epoch": 1.7673449041555265,
"grad_norm": 0.6603755354881287,
"learning_rate": 2.0544251597407894e-05,
"loss": 0.6339,
"step": 39000
},
{
"epoch": 1.7673449041555265,
"eval_bleu": 46.523,
"eval_chrf++": 66.3925,
"eval_gen_len": 27.5112,
"eval_loss": 0.7813342809677124,
"eval_runtime": 728.2882,
"eval_samples_per_second": 3.433,
"eval_spbleu": 54.2461,
"eval_steps_per_second": 0.687,
"eval_ter": 44.0071,
"step": 39000
},
{
"epoch": 1.7900031721575203,
"grad_norm": 0.6977267861366272,
"learning_rate": 2.016661379737466e-05,
"loss": 0.621,
"step": 39500
},
{
"epoch": 1.7900031721575203,
"eval_bleu": 43.8812,
"eval_chrf++": 66.186,
"eval_gen_len": 27.6576,
"eval_loss": 0.7753216028213501,
"eval_runtime": 736.3388,
"eval_samples_per_second": 3.395,
"eval_spbleu": 54.244,
"eval_steps_per_second": 0.679,
"eval_ter": 44.3445,
"step": 39500
},
{
"epoch": 1.8126614401595142,
"grad_norm": 0.7790645956993103,
"learning_rate": 1.978897599734143e-05,
"loss": 0.6278,
"step": 40000
},
{
"epoch": 1.8126614401595142,
"eval_bleu": 46.7458,
"eval_chrf++": 66.4123,
"eval_gen_len": 27.4892,
"eval_loss": 0.7777643799781799,
"eval_runtime": 730.5316,
"eval_samples_per_second": 3.422,
"eval_spbleu": 54.4016,
"eval_steps_per_second": 0.684,
"eval_ter": 43.9702,
"step": 40000
},
{
"epoch": 1.8353197081615082,
"grad_norm": 0.6901569366455078,
"learning_rate": 1.94113381973082e-05,
"loss": 0.6221,
"step": 40500
},
{
"epoch": 1.8353197081615082,
"eval_bleu": 45.0544,
"eval_chrf++": 66.3725,
"eval_gen_len": 27.598,
"eval_loss": 0.7787633538246155,
"eval_runtime": 734.485,
"eval_samples_per_second": 3.404,
"eval_spbleu": 54.419,
"eval_steps_per_second": 0.681,
"eval_ter": 43.9201,
"step": 40500
},
{
"epoch": 1.857977976163502,
"grad_norm": 0.7807871103286743,
"learning_rate": 1.9033700397274966e-05,
"loss": 0.6209,
"step": 41000
},
{
"epoch": 1.857977976163502,
"eval_bleu": 44.5498,
"eval_chrf++": 66.3741,
"eval_gen_len": 27.5916,
"eval_loss": 0.7768906354904175,
"eval_runtime": 737.3971,
"eval_samples_per_second": 3.39,
"eval_spbleu": 54.5028,
"eval_steps_per_second": 0.678,
"eval_ter": 43.9728,
"step": 41000
},
{
"epoch": 1.880636244165496,
"grad_norm": 0.8082613945007324,
"learning_rate": 1.8656062597241737e-05,
"loss": 0.6267,
"step": 41500
},
{
"epoch": 1.880636244165496,
"eval_bleu": 45.3502,
"eval_chrf++": 66.5334,
"eval_gen_len": 27.5344,
"eval_loss": 0.7741044759750366,
"eval_runtime": 731.6398,
"eval_samples_per_second": 3.417,
"eval_spbleu": 54.4958,
"eval_steps_per_second": 0.683,
"eval_ter": 43.8726,
"step": 41500
},
{
"epoch": 1.90329451216749,
"grad_norm": 0.5612310171127319,
"learning_rate": 1.82784247972085e-05,
"loss": 0.625,
"step": 42000
},
{
"epoch": 1.90329451216749,
"eval_bleu": 45.4662,
"eval_chrf++": 66.6858,
"eval_gen_len": 27.5552,
"eval_loss": 0.7751156687736511,
"eval_runtime": 734.022,
"eval_samples_per_second": 3.406,
"eval_spbleu": 54.7854,
"eval_steps_per_second": 0.681,
"eval_ter": 43.5404,
"step": 42000
},
{
"epoch": 1.9259527801694838,
"grad_norm": 0.7477275133132935,
"learning_rate": 1.790078699717527e-05,
"loss": 0.6268,
"step": 42500
},
{
"epoch": 1.9259527801694838,
"eval_bleu": 43.7231,
"eval_chrf++": 66.4796,
"eval_gen_len": 27.71,
"eval_loss": 0.7729161977767944,
"eval_runtime": 736.8783,
"eval_samples_per_second": 3.393,
"eval_spbleu": 54.5524,
"eval_steps_per_second": 0.679,
"eval_ter": 43.8383,
"step": 42500
},
{
"epoch": 1.9486110481714778,
"grad_norm": 0.7059822678565979,
"learning_rate": 1.752314919714204e-05,
"loss": 0.6263,
"step": 43000
},
{
"epoch": 1.9486110481714778,
"eval_bleu": 45.1162,
"eval_chrf++": 66.5293,
"eval_gen_len": 27.5624,
"eval_loss": 0.771515429019928,
"eval_runtime": 732.1583,
"eval_samples_per_second": 3.415,
"eval_spbleu": 54.5958,
"eval_steps_per_second": 0.683,
"eval_ter": 43.6564,
"step": 43000
},
{
"epoch": 1.9712693161734718,
"grad_norm": 0.7907470464706421,
"learning_rate": 1.7145511397108806e-05,
"loss": 0.6178,
"step": 43500
},
{
"epoch": 1.9712693161734718,
"eval_bleu": 44.3099,
"eval_chrf++": 66.5724,
"eval_gen_len": 27.632,
"eval_loss": 0.7728075385093689,
"eval_runtime": 733.6074,
"eval_samples_per_second": 3.408,
"eval_spbleu": 54.6752,
"eval_steps_per_second": 0.682,
"eval_ter": 43.725,
"step": 43500
},
{
"epoch": 1.9939275841754656,
"grad_norm": 0.7742732763290405,
"learning_rate": 1.6767873597075573e-05,
"loss": 0.609,
"step": 44000
},
{
"epoch": 1.9939275841754656,
"eval_bleu": 46.4711,
"eval_chrf++": 66.7721,
"eval_gen_len": 27.4864,
"eval_loss": 0.7715900540351868,
"eval_runtime": 726.1293,
"eval_samples_per_second": 3.443,
"eval_spbleu": 54.9332,
"eval_steps_per_second": 0.689,
"eval_ter": 43.3822,
"step": 44000
},
{
"epoch": 2.0165858521774593,
"grad_norm": 0.7575493454933167,
"learning_rate": 1.639023579704234e-05,
"loss": 0.5124,
"step": 44500
},
{
"epoch": 2.0165858521774593,
"eval_bleu": 45.9215,
"eval_chrf++": 66.6482,
"eval_gen_len": 27.622,
"eval_loss": 0.8128123879432678,
"eval_runtime": 732.4537,
"eval_samples_per_second": 3.413,
"eval_spbleu": 54.7846,
"eval_steps_per_second": 0.683,
"eval_ter": 43.7355,
"step": 44500
},
{
"epoch": 2.0392441201794536,
"grad_norm": 0.732072114944458,
"learning_rate": 1.601259799700911e-05,
"loss": 0.4683,
"step": 45000
},
{
"epoch": 2.0392441201794536,
"eval_bleu": 45.6558,
"eval_chrf++": 66.455,
"eval_gen_len": 27.5796,
"eval_loss": 0.8135092258453369,
"eval_runtime": 732.47,
"eval_samples_per_second": 3.413,
"eval_spbleu": 54.5066,
"eval_steps_per_second": 0.683,
"eval_ter": 44.0202,
"step": 45000
},
{
"epoch": 2.0619023881814473,
"grad_norm": 0.7785657644271851,
"learning_rate": 1.5634960196975878e-05,
"loss": 0.4632,
"step": 45500
},
{
"epoch": 2.0619023881814473,
"eval_bleu": 46.9546,
"eval_chrf++": 66.5237,
"eval_gen_len": 27.482,
"eval_loss": 0.81900554895401,
"eval_runtime": 734.696,
"eval_samples_per_second": 3.403,
"eval_spbleu": 54.4887,
"eval_steps_per_second": 0.681,
"eval_ter": 43.7988,
"step": 45500
},
{
"epoch": 2.0845606561834416,
"grad_norm": 1.1765786409378052,
"learning_rate": 1.5257322396942645e-05,
"loss": 0.4696,
"step": 46000
},
{
"epoch": 2.0845606561834416,
"eval_bleu": 47.083,
"eval_chrf++": 66.8275,
"eval_gen_len": 27.5328,
"eval_loss": 0.8155868053436279,
"eval_runtime": 736.5203,
"eval_samples_per_second": 3.394,
"eval_spbleu": 54.892,
"eval_steps_per_second": 0.679,
"eval_ter": 43.456,
"step": 46000
},
{
"epoch": 2.1072189241854353,
"grad_norm": 0.8067004084587097,
"learning_rate": 1.4879684596909412e-05,
"loss": 0.4635,
"step": 46500
},
{
"epoch": 2.1072189241854353,
"eval_bleu": 46.6541,
"eval_chrf++": 66.7448,
"eval_gen_len": 27.5476,
"eval_loss": 0.8161126971244812,
"eval_runtime": 740.7767,
"eval_samples_per_second": 3.375,
"eval_spbleu": 54.8934,
"eval_steps_per_second": 0.675,
"eval_ter": 43.3189,
"step": 46500
},
{
"epoch": 2.129877192187429,
"grad_norm": 0.9044099450111389,
"learning_rate": 1.4502046796876181e-05,
"loss": 0.4725,
"step": 47000
},
{
"epoch": 2.129877192187429,
"eval_bleu": 47.2452,
"eval_chrf++": 66.8326,
"eval_gen_len": 27.558,
"eval_loss": 0.8134703040122986,
"eval_runtime": 741.2625,
"eval_samples_per_second": 3.373,
"eval_spbleu": 54.9705,
"eval_steps_per_second": 0.675,
"eval_ter": 43.3216,
"step": 47000
},
{
"epoch": 2.1525354601894233,
"grad_norm": 0.8520795106887817,
"learning_rate": 1.412440899684295e-05,
"loss": 0.4727,
"step": 47500
},
{
"epoch": 2.1525354601894233,
"eval_bleu": 46.5313,
"eval_chrf++": 66.6736,
"eval_gen_len": 27.5296,
"eval_loss": 0.820831835269928,
"eval_runtime": 742.5016,
"eval_samples_per_second": 3.367,
"eval_spbleu": 54.5714,
"eval_steps_per_second": 0.673,
"eval_ter": 43.6722,
"step": 47500
},
{
"epoch": 2.175193728191417,
"grad_norm": 0.7529011964797974,
"learning_rate": 1.3746771196809716e-05,
"loss": 0.4736,
"step": 48000
},
{
"epoch": 2.175193728191417,
"eval_bleu": 46.3524,
"eval_chrf++": 66.7416,
"eval_gen_len": 27.5684,
"eval_loss": 0.8130167126655579,
"eval_runtime": 730.5257,
"eval_samples_per_second": 3.422,
"eval_spbleu": 54.8088,
"eval_steps_per_second": 0.684,
"eval_ter": 43.4718,
"step": 48000
},
{
"epoch": 2.197851996193411,
"grad_norm": 0.7033498883247375,
"learning_rate": 1.3369133396776484e-05,
"loss": 0.471,
"step": 48500
},
{
"epoch": 2.197851996193411,
"eval_bleu": 46.2188,
"eval_chrf++": 66.702,
"eval_gen_len": 27.6192,
"eval_loss": 0.8164393305778503,
"eval_runtime": 732.5336,
"eval_samples_per_second": 3.413,
"eval_spbleu": 54.6656,
"eval_steps_per_second": 0.683,
"eval_ter": 43.6511,
"step": 48500
},
{
"epoch": 2.220510264195405,
"grad_norm": 0.7331113815307617,
"learning_rate": 1.2991495596743253e-05,
"loss": 0.4712,
"step": 49000
},
{
"epoch": 2.220510264195405,
"eval_bleu": 47.0435,
"eval_chrf++": 66.6968,
"eval_gen_len": 27.4924,
"eval_loss": 0.81520676612854,
"eval_runtime": 729.4123,
"eval_samples_per_second": 3.427,
"eval_spbleu": 54.6842,
"eval_steps_per_second": 0.685,
"eval_ter": 43.4376,
"step": 49000
},
{
"epoch": 2.243168532197399,
"grad_norm": 0.6714054346084595,
"learning_rate": 1.261385779671002e-05,
"loss": 0.4741,
"step": 49500
},
{
"epoch": 2.243168532197399,
"eval_bleu": 47.2441,
"eval_chrf++": 66.8706,
"eval_gen_len": 27.4916,
"eval_loss": 0.8153889775276184,
"eval_runtime": 727.829,
"eval_samples_per_second": 3.435,
"eval_spbleu": 54.9194,
"eval_steps_per_second": 0.687,
"eval_ter": 43.3374,
"step": 49500
},
{
"epoch": 2.2658268001993926,
"grad_norm": 0.7230417132377625,
"learning_rate": 1.2236219996676788e-05,
"loss": 0.4723,
"step": 50000
},
{
"epoch": 2.2658268001993926,
"eval_bleu": 47.0146,
"eval_chrf++": 66.9999,
"eval_gen_len": 27.496,
"eval_loss": 0.8151711225509644,
"eval_runtime": 725.7357,
"eval_samples_per_second": 3.445,
"eval_spbleu": 55.1257,
"eval_steps_per_second": 0.689,
"eval_ter": 43.0605,
"step": 50000
},
{
"epoch": 2.288485068201387,
"grad_norm": 0.7548694014549255,
"learning_rate": 1.1858582196643555e-05,
"loss": 0.4736,
"step": 50500
},
{
"epoch": 2.288485068201387,
"eval_bleu": 47.3114,
"eval_chrf++": 67.059,
"eval_gen_len": 27.5484,
"eval_loss": 0.8111055493354797,
"eval_runtime": 730.4241,
"eval_samples_per_second": 3.423,
"eval_spbleu": 55.1401,
"eval_steps_per_second": 0.685,
"eval_ter": 43.2688,
"step": 50500
},
{
"epoch": 2.3111433362033806,
"grad_norm": 0.6914283037185669,
"learning_rate": 1.1480944396610324e-05,
"loss": 0.4673,
"step": 51000
},
{
"epoch": 2.3111433362033806,
"eval_bleu": 47.0659,
"eval_chrf++": 66.8804,
"eval_gen_len": 27.5096,
"eval_loss": 0.8131672739982605,
"eval_runtime": 728.8458,
"eval_samples_per_second": 3.43,
"eval_spbleu": 54.9036,
"eval_steps_per_second": 0.686,
"eval_ter": 43.3585,
"step": 51000
},
{
"epoch": 2.3338016042053744,
"grad_norm": 0.8246389031410217,
"learning_rate": 1.1103306596577091e-05,
"loss": 0.4598,
"step": 51500
},
{
"epoch": 2.3338016042053744,
"eval_bleu": 47.2042,
"eval_chrf++": 66.7914,
"eval_gen_len": 27.506,
"eval_loss": 0.8135460019111633,
"eval_runtime": 727.1036,
"eval_samples_per_second": 3.438,
"eval_spbleu": 54.8775,
"eval_steps_per_second": 0.688,
"eval_ter": 43.485,
"step": 51500
},
{
"epoch": 2.3564598722073686,
"grad_norm": 0.8400627970695496,
"learning_rate": 1.072566879654386e-05,
"loss": 0.4729,
"step": 52000
},
{
"epoch": 2.3564598722073686,
"eval_bleu": 45.7548,
"eval_chrf++": 66.7646,
"eval_gen_len": 27.63,
"eval_loss": 0.8120532035827637,
"eval_runtime": 730.4307,
"eval_samples_per_second": 3.423,
"eval_spbleu": 54.8446,
"eval_steps_per_second": 0.685,
"eval_ter": 43.4956,
"step": 52000
},
{
"epoch": 2.3791181402093624,
"grad_norm": 0.8089118599891663,
"learning_rate": 1.0348030996510627e-05,
"loss": 0.4683,
"step": 52500
},
{
"epoch": 2.3791181402093624,
"eval_bleu": 46.2723,
"eval_chrf++": 66.8032,
"eval_gen_len": 27.588,
"eval_loss": 0.8125736117362976,
"eval_runtime": 729.5548,
"eval_samples_per_second": 3.427,
"eval_spbleu": 54.9204,
"eval_steps_per_second": 0.685,
"eval_ter": 43.3954,
"step": 52500
},
{
"epoch": 2.401776408211356,
"grad_norm": 0.6534927487373352,
"learning_rate": 9.970393196477396e-06,
"loss": 0.4727,
"step": 53000
},
{
"epoch": 2.401776408211356,
"eval_bleu": 46.7508,
"eval_chrf++": 66.7655,
"eval_gen_len": 27.5272,
"eval_loss": 0.8069682717323303,
"eval_runtime": 732.8023,
"eval_samples_per_second": 3.412,
"eval_spbleu": 54.8765,
"eval_steps_per_second": 0.682,
"eval_ter": 43.3875,
"step": 53000
},
{
"epoch": 2.4244346762133504,
"grad_norm": 0.6930407881736755,
"learning_rate": 9.592755396444163e-06,
"loss": 0.4723,
"step": 53500
},
{
"epoch": 2.4244346762133504,
"eval_bleu": 47.2069,
"eval_chrf++": 66.9501,
"eval_gen_len": 27.5308,
"eval_loss": 0.8114036321640015,
"eval_runtime": 730.3451,
"eval_samples_per_second": 3.423,
"eval_spbleu": 55.1585,
"eval_steps_per_second": 0.685,
"eval_ter": 43.2451,
"step": 53500
},
{
"epoch": 2.447092944215344,
"grad_norm": 0.7665801644325256,
"learning_rate": 9.215117596410932e-06,
"loss": 0.4711,
"step": 54000
},
{
"epoch": 2.447092944215344,
"eval_bleu": 47.4976,
"eval_chrf++": 67.0709,
"eval_gen_len": 27.4768,
"eval_loss": 0.8121780753135681,
"eval_runtime": 727.4198,
"eval_samples_per_second": 3.437,
"eval_spbleu": 55.3017,
"eval_steps_per_second": 0.687,
"eval_ter": 43.0816,
"step": 54000
}
],
"logging_steps": 500,
"max_steps": 66201,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.756455083596841e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}