| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.4561562514161417, | |
| "eval_steps": 500, | |
| "global_step": 54200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.022658268001993928, | |
| "grad_norm": 0.8800877332687378, | |
| "learning_rate": 4.962236219996677e-05, | |
| "loss": 0.2417, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.022658268001993928, | |
| "eval_bleu": 30.5488, | |
| "eval_chrf++": 57.639, | |
| "eval_gen_len": 28.2956, | |
| "eval_loss": 1.1721652746200562, | |
| "eval_runtime": 777.8277, | |
| "eval_samples_per_second": 3.214, | |
| "eval_spbleu": 41.6724, | |
| "eval_steps_per_second": 0.643, | |
| "eval_ter": 56.0615, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.045316536003987856, | |
| "grad_norm": 0.8184657096862793, | |
| "learning_rate": 4.924472439993354e-05, | |
| "loss": 1.0084, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.045316536003987856, | |
| "eval_bleu": 31.6027, | |
| "eval_chrf++": 58.8947, | |
| "eval_gen_len": 27.9, | |
| "eval_loss": 1.0108660459518433, | |
| "eval_runtime": 745.4422, | |
| "eval_samples_per_second": 3.354, | |
| "eval_spbleu": 44.064, | |
| "eval_steps_per_second": 0.671, | |
| "eval_ter": 52.7499, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06797480400598178, | |
| "grad_norm": 0.9998241662979126, | |
| "learning_rate": 4.886708659990031e-05, | |
| "loss": 1.0649, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.06797480400598178, | |
| "eval_bleu": 35.962, | |
| "eval_chrf++": 60.0663, | |
| "eval_gen_len": 27.614, | |
| "eval_loss": 0.9778443574905396, | |
| "eval_runtime": 743.9627, | |
| "eval_samples_per_second": 3.36, | |
| "eval_spbleu": 45.4876, | |
| "eval_steps_per_second": 0.672, | |
| "eval_ter": 52.1752, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.09063307200797571, | |
| "grad_norm": 0.9190363883972168, | |
| "learning_rate": 4.848944879986708e-05, | |
| "loss": 1.0414, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.09063307200797571, | |
| "eval_bleu": 33.9141, | |
| "eval_chrf++": 60.0862, | |
| "eval_gen_len": 27.7216, | |
| "eval_loss": 0.9621853232383728, | |
| "eval_runtime": 739.7943, | |
| "eval_samples_per_second": 3.379, | |
| "eval_spbleu": 45.9477, | |
| "eval_steps_per_second": 0.676, | |
| "eval_ter": 51.1627, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.11329134000996964, | |
| "grad_norm": 0.8132910132408142, | |
| "learning_rate": 4.811181099983384e-05, | |
| "loss": 1.0051, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.11329134000996964, | |
| "eval_bleu": 33.3827, | |
| "eval_chrf++": 60.7457, | |
| "eval_gen_len": 28.04, | |
| "eval_loss": 0.9484396576881409, | |
| "eval_runtime": 751.6447, | |
| "eval_samples_per_second": 3.326, | |
| "eval_spbleu": 46.5548, | |
| "eval_steps_per_second": 0.665, | |
| "eval_ter": 50.9017, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.13594960801196357, | |
| "grad_norm": 0.8168209195137024, | |
| "learning_rate": 4.7734173199800606e-05, | |
| "loss": 1.0033, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.13594960801196357, | |
| "eval_bleu": 33.8725, | |
| "eval_chrf++": 60.8653, | |
| "eval_gen_len": 28.0696, | |
| "eval_loss": 0.9424599409103394, | |
| "eval_runtime": 759.5734, | |
| "eval_samples_per_second": 3.291, | |
| "eval_spbleu": 46.9775, | |
| "eval_steps_per_second": 0.658, | |
| "eval_ter": 50.6539, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.15860787601395748, | |
| "grad_norm": 0.9191615581512451, | |
| "learning_rate": 4.735653539976738e-05, | |
| "loss": 0.994, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.15860787601395748, | |
| "eval_bleu": 34.0874, | |
| "eval_chrf++": 61.217, | |
| "eval_gen_len": 27.996, | |
| "eval_loss": 0.9314232468605042, | |
| "eval_runtime": 754.8484, | |
| "eval_samples_per_second": 3.312, | |
| "eval_spbleu": 47.405, | |
| "eval_steps_per_second": 0.662, | |
| "eval_ter": 50.0527, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.18126614401595142, | |
| "grad_norm": 0.7960318326950073, | |
| "learning_rate": 4.697889759973415e-05, | |
| "loss": 0.9801, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.18126614401595142, | |
| "eval_bleu": 37.2884, | |
| "eval_chrf++": 61.5163, | |
| "eval_gen_len": 27.9868, | |
| "eval_loss": 0.9198995232582092, | |
| "eval_runtime": 750.5441, | |
| "eval_samples_per_second": 3.331, | |
| "eval_spbleu": 47.7096, | |
| "eval_steps_per_second": 0.666, | |
| "eval_ter": 50.0316, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.20392441201794534, | |
| "grad_norm": 0.9299506545066833, | |
| "learning_rate": 4.6601259799700914e-05, | |
| "loss": 0.9679, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.20392441201794534, | |
| "eval_bleu": 36.221, | |
| "eval_chrf++": 61.6489, | |
| "eval_gen_len": 27.7652, | |
| "eval_loss": 0.9132654070854187, | |
| "eval_runtime": 742.4239, | |
| "eval_samples_per_second": 3.367, | |
| "eval_spbleu": 48.0051, | |
| "eval_steps_per_second": 0.673, | |
| "eval_ter": 49.4911, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.22658268001993928, | |
| "grad_norm": 0.8396582007408142, | |
| "learning_rate": 4.622362199966768e-05, | |
| "loss": 0.9567, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.22658268001993928, | |
| "eval_bleu": 35.8613, | |
| "eval_chrf++": 62.1554, | |
| "eval_gen_len": 27.9184, | |
| "eval_loss": 0.9109494090080261, | |
| "eval_runtime": 749.1424, | |
| "eval_samples_per_second": 3.337, | |
| "eval_spbleu": 48.6507, | |
| "eval_steps_per_second": 0.667, | |
| "eval_ter": 49.9394, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.2492409480219332, | |
| "grad_norm": 0.6273393034934998, | |
| "learning_rate": 4.584598419963445e-05, | |
| "loss": 0.9625, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.2492409480219332, | |
| "eval_bleu": 34.9284, | |
| "eval_chrf++": 61.8771, | |
| "eval_gen_len": 27.8456, | |
| "eval_loss": 0.9041927456855774, | |
| "eval_runtime": 762.3368, | |
| "eval_samples_per_second": 3.279, | |
| "eval_spbleu": 48.461, | |
| "eval_steps_per_second": 0.656, | |
| "eval_ter": 49.4832, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.27189921602392714, | |
| "grad_norm": 0.8030633330345154, | |
| "learning_rate": 4.546834639960122e-05, | |
| "loss": 0.9465, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.27189921602392714, | |
| "eval_bleu": 35.4623, | |
| "eval_chrf++": 62.3793, | |
| "eval_gen_len": 27.7244, | |
| "eval_loss": 0.8957546949386597, | |
| "eval_runtime": 760.5666, | |
| "eval_samples_per_second": 3.287, | |
| "eval_spbleu": 49.0727, | |
| "eval_steps_per_second": 0.657, | |
| "eval_ter": 48.5367, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.2945574840259211, | |
| "grad_norm": 0.8178768754005432, | |
| "learning_rate": 4.509070859956798e-05, | |
| "loss": 0.9275, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.2945574840259211, | |
| "eval_bleu": 35.2431, | |
| "eval_chrf++": 62.0683, | |
| "eval_gen_len": 27.8264, | |
| "eval_loss": 0.8866144418716431, | |
| "eval_runtime": 751.0343, | |
| "eval_samples_per_second": 3.329, | |
| "eval_spbleu": 48.8382, | |
| "eval_steps_per_second": 0.666, | |
| "eval_ter": 48.9164, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.31721575202791497, | |
| "grad_norm": 0.6623912453651428, | |
| "learning_rate": 4.471307079953475e-05, | |
| "loss": 0.925, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.31721575202791497, | |
| "eval_bleu": 35.6474, | |
| "eval_chrf++": 62.5261, | |
| "eval_gen_len": 27.9388, | |
| "eval_loss": 0.884535551071167, | |
| "eval_runtime": 758.4604, | |
| "eval_samples_per_second": 3.296, | |
| "eval_spbleu": 49.2377, | |
| "eval_steps_per_second": 0.659, | |
| "eval_ter": 48.4945, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.3398740200299089, | |
| "grad_norm": 0.8043058514595032, | |
| "learning_rate": 4.433543299950152e-05, | |
| "loss": 0.928, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.3398740200299089, | |
| "eval_bleu": 35.1147, | |
| "eval_chrf++": 62.2742, | |
| "eval_gen_len": 28.0044, | |
| "eval_loss": 0.8792969584465027, | |
| "eval_runtime": 755.7098, | |
| "eval_samples_per_second": 3.308, | |
| "eval_spbleu": 48.8554, | |
| "eval_steps_per_second": 0.662, | |
| "eval_ter": 49.3198, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.36253228803190285, | |
| "grad_norm": 0.747755765914917, | |
| "learning_rate": 4.395779519946829e-05, | |
| "loss": 0.9096, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.36253228803190285, | |
| "eval_bleu": 35.7901, | |
| "eval_chrf++": 62.8302, | |
| "eval_gen_len": 27.9312, | |
| "eval_loss": 0.8780434727668762, | |
| "eval_runtime": 770.928, | |
| "eval_samples_per_second": 3.243, | |
| "eval_spbleu": 49.5562, | |
| "eval_steps_per_second": 0.649, | |
| "eval_ter": 48.5578, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.3851905560338968, | |
| "grad_norm": 0.6910504102706909, | |
| "learning_rate": 4.358015739943506e-05, | |
| "loss": 0.9014, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.3851905560338968, | |
| "eval_bleu": 36.2166, | |
| "eval_chrf++": 63.0932, | |
| "eval_gen_len": 27.8432, | |
| "eval_loss": 0.8704683780670166, | |
| "eval_runtime": 750.7247, | |
| "eval_samples_per_second": 3.33, | |
| "eval_spbleu": 50.2483, | |
| "eval_steps_per_second": 0.666, | |
| "eval_ter": 48.3258, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.4078488240358907, | |
| "grad_norm": 0.7716640830039978, | |
| "learning_rate": 4.3202519599401825e-05, | |
| "loss": 0.9059, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.4078488240358907, | |
| "eval_bleu": 41.2195, | |
| "eval_chrf++": 62.902, | |
| "eval_gen_len": 27.5208, | |
| "eval_loss": 0.8670679926872253, | |
| "eval_runtime": 748.217, | |
| "eval_samples_per_second": 3.341, | |
| "eval_spbleu": 49.6619, | |
| "eval_steps_per_second": 0.668, | |
| "eval_ter": 48.3732, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.4305070920378846, | |
| "grad_norm": 0.801590085029602, | |
| "learning_rate": 4.282488179936859e-05, | |
| "loss": 0.8976, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.4305070920378846, | |
| "eval_bleu": 35.9681, | |
| "eval_chrf++": 63.0627, | |
| "eval_gen_len": 28.004, | |
| "eval_loss": 0.8662621378898621, | |
| "eval_runtime": 762.3803, | |
| "eval_samples_per_second": 3.279, | |
| "eval_spbleu": 50.006, | |
| "eval_steps_per_second": 0.656, | |
| "eval_ter": 48.4945, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.45316536003987856, | |
| "grad_norm": 0.7821282148361206, | |
| "learning_rate": 4.244724399933536e-05, | |
| "loss": 0.889, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.45316536003987856, | |
| "eval_bleu": 36.3811, | |
| "eval_chrf++": 63.2137, | |
| "eval_gen_len": 27.9044, | |
| "eval_loss": 0.8606961965560913, | |
| "eval_runtime": 766.3054, | |
| "eval_samples_per_second": 3.262, | |
| "eval_spbleu": 50.2147, | |
| "eval_steps_per_second": 0.652, | |
| "eval_ter": 47.6614, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.4758236280418725, | |
| "grad_norm": 0.7221835851669312, | |
| "learning_rate": 4.206960619930213e-05, | |
| "loss": 0.8979, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.4758236280418725, | |
| "eval_bleu": 36.4697, | |
| "eval_chrf++": 63.3942, | |
| "eval_gen_len": 27.9276, | |
| "eval_loss": 0.8558794856071472, | |
| "eval_runtime": 758.2694, | |
| "eval_samples_per_second": 3.297, | |
| "eval_spbleu": 50.6637, | |
| "eval_steps_per_second": 0.659, | |
| "eval_ter": 48.0226, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.4984818960438664, | |
| "grad_norm": 0.5793339014053345, | |
| "learning_rate": 4.1691968399268894e-05, | |
| "loss": 0.8817, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.4984818960438664, | |
| "eval_bleu": 36.4549, | |
| "eval_chrf++": 63.2444, | |
| "eval_gen_len": 27.7924, | |
| "eval_loss": 0.8521751761436462, | |
| "eval_runtime": 746.8271, | |
| "eval_samples_per_second": 3.347, | |
| "eval_spbleu": 50.4999, | |
| "eval_steps_per_second": 0.669, | |
| "eval_ter": 47.9487, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.5211401640458604, | |
| "grad_norm": 0.8384801149368286, | |
| "learning_rate": 4.131433059923566e-05, | |
| "loss": 0.8696, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.5211401640458604, | |
| "eval_bleu": 36.1199, | |
| "eval_chrf++": 63.343, | |
| "eval_gen_len": 28.076, | |
| "eval_loss": 0.8511990308761597, | |
| "eval_runtime": 758.0827, | |
| "eval_samples_per_second": 3.298, | |
| "eval_spbleu": 50.1722, | |
| "eval_steps_per_second": 0.66, | |
| "eval_ter": 48.6316, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.5437984320478543, | |
| "grad_norm": 0.6142855882644653, | |
| "learning_rate": 4.093669279920243e-05, | |
| "loss": 0.8758, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.5437984320478543, | |
| "eval_bleu": 36.7344, | |
| "eval_chrf++": 63.7589, | |
| "eval_gen_len": 27.9936, | |
| "eval_loss": 0.8446237444877625, | |
| "eval_runtime": 749.403, | |
| "eval_samples_per_second": 3.336, | |
| "eval_spbleu": 50.8742, | |
| "eval_steps_per_second": 0.667, | |
| "eval_ter": 47.1841, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.5664567000498482, | |
| "grad_norm": 0.5856760144233704, | |
| "learning_rate": 4.05590549991692e-05, | |
| "loss": 0.8793, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.5664567000498482, | |
| "eval_bleu": 41.021, | |
| "eval_chrf++": 63.6181, | |
| "eval_gen_len": 27.5668, | |
| "eval_loss": 0.8422514796257019, | |
| "eval_runtime": 733.2302, | |
| "eval_samples_per_second": 3.41, | |
| "eval_spbleu": 50.6419, | |
| "eval_steps_per_second": 0.682, | |
| "eval_ter": 47.1499, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.5891149680518422, | |
| "grad_norm": 0.6606504321098328, | |
| "learning_rate": 4.018141719913597e-05, | |
| "loss": 0.8794, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.5891149680518422, | |
| "eval_bleu": 37.286, | |
| "eval_chrf++": 63.8579, | |
| "eval_gen_len": 27.8624, | |
| "eval_loss": 0.8412001729011536, | |
| "eval_runtime": 745.0766, | |
| "eval_samples_per_second": 3.355, | |
| "eval_spbleu": 51.2084, | |
| "eval_steps_per_second": 0.671, | |
| "eval_ter": 46.4538, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.611773236053836, | |
| "grad_norm": 0.7332282066345215, | |
| "learning_rate": 3.980377939910274e-05, | |
| "loss": 0.8543, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.611773236053836, | |
| "eval_bleu": 36.8567, | |
| "eval_chrf++": 63.6306, | |
| "eval_gen_len": 27.7904, | |
| "eval_loss": 0.8333261013031006, | |
| "eval_runtime": 744.0262, | |
| "eval_samples_per_second": 3.36, | |
| "eval_spbleu": 50.873, | |
| "eval_steps_per_second": 0.672, | |
| "eval_ter": 47.0998, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.6344315040558299, | |
| "grad_norm": 0.8010419607162476, | |
| "learning_rate": 3.9426141599069504e-05, | |
| "loss": 0.8661, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.6344315040558299, | |
| "eval_bleu": 36.9197, | |
| "eval_chrf++": 63.5882, | |
| "eval_gen_len": 27.8996, | |
| "eval_loss": 0.8315057754516602, | |
| "eval_runtime": 748.0384, | |
| "eval_samples_per_second": 3.342, | |
| "eval_spbleu": 50.798, | |
| "eval_steps_per_second": 0.668, | |
| "eval_ter": 46.8968, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.6570897720578239, | |
| "grad_norm": 0.5245931148529053, | |
| "learning_rate": 3.9048503799036265e-05, | |
| "loss": 0.8556, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.6570897720578239, | |
| "eval_bleu": 37.9259, | |
| "eval_chrf++": 63.8176, | |
| "eval_gen_len": 27.84, | |
| "eval_loss": 0.8274693489074707, | |
| "eval_runtime": 748.0441, | |
| "eval_samples_per_second": 3.342, | |
| "eval_spbleu": 51.0265, | |
| "eval_steps_per_second": 0.668, | |
| "eval_ter": 46.9205, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.6797480400598178, | |
| "grad_norm": 0.7052202820777893, | |
| "learning_rate": 3.867086599900304e-05, | |
| "loss": 0.8635, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.6797480400598178, | |
| "eval_bleu": 38.0929, | |
| "eval_chrf++": 63.8837, | |
| "eval_gen_len": 27.8216, | |
| "eval_loss": 0.8276916146278381, | |
| "eval_runtime": 744.6549, | |
| "eval_samples_per_second": 3.357, | |
| "eval_spbleu": 51.13, | |
| "eval_steps_per_second": 0.671, | |
| "eval_ter": 46.9099, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.7024063080618117, | |
| "grad_norm": 0.7750408053398132, | |
| "learning_rate": 3.8293228198969806e-05, | |
| "loss": 0.8412, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.7024063080618117, | |
| "eval_bleu": 38.2488, | |
| "eval_chrf++": 63.9152, | |
| "eval_gen_len": 27.7136, | |
| "eval_loss": 0.8237889409065247, | |
| "eval_runtime": 739.0003, | |
| "eval_samples_per_second": 3.383, | |
| "eval_spbleu": 51.3155, | |
| "eval_steps_per_second": 0.677, | |
| "eval_ter": 46.5224, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.7250645760638057, | |
| "grad_norm": 0.7442999482154846, | |
| "learning_rate": 3.791559039893657e-05, | |
| "loss": 0.8476, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.7250645760638057, | |
| "eval_bleu": 40.1641, | |
| "eval_chrf++": 64.262, | |
| "eval_gen_len": 27.616, | |
| "eval_loss": 0.8218015432357788, | |
| "eval_runtime": 736.1573, | |
| "eval_samples_per_second": 3.396, | |
| "eval_spbleu": 51.5374, | |
| "eval_steps_per_second": 0.679, | |
| "eval_ter": 46.2982, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.7477228440657996, | |
| "grad_norm": 0.6437325477600098, | |
| "learning_rate": 3.753795259890334e-05, | |
| "loss": 0.854, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.7477228440657996, | |
| "eval_bleu": 37.6219, | |
| "eval_chrf++": 64.4048, | |
| "eval_gen_len": 27.9068, | |
| "eval_loss": 0.8224019408226013, | |
| "eval_runtime": 747.0111, | |
| "eval_samples_per_second": 3.347, | |
| "eval_spbleu": 51.8679, | |
| "eval_steps_per_second": 0.669, | |
| "eval_ter": 46.3009, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.7703811120677936, | |
| "grad_norm": 0.7045587301254272, | |
| "learning_rate": 3.716031479887011e-05, | |
| "loss": 0.8433, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.7703811120677936, | |
| "eval_bleu": 38.7383, | |
| "eval_chrf++": 64.511, | |
| "eval_gen_len": 27.94, | |
| "eval_loss": 0.8197815418243408, | |
| "eval_runtime": 750.586, | |
| "eval_samples_per_second": 3.331, | |
| "eval_spbleu": 51.9857, | |
| "eval_steps_per_second": 0.666, | |
| "eval_ter": 46.3378, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.7930393800697875, | |
| "grad_norm": 0.6813265681266785, | |
| "learning_rate": 3.678267699883688e-05, | |
| "loss": 0.8372, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.7930393800697875, | |
| "eval_bleu": 39.4841, | |
| "eval_chrf++": 64.344, | |
| "eval_gen_len": 27.5488, | |
| "eval_loss": 0.8175507187843323, | |
| "eval_runtime": 732.9073, | |
| "eval_samples_per_second": 3.411, | |
| "eval_spbleu": 51.5575, | |
| "eval_steps_per_second": 0.682, | |
| "eval_ter": 46.1163, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.8156976480717814, | |
| "grad_norm": 0.5977945327758789, | |
| "learning_rate": 3.640503919880365e-05, | |
| "loss": 0.8317, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.8156976480717814, | |
| "eval_bleu": 37.7506, | |
| "eval_chrf++": 64.0896, | |
| "eval_gen_len": 27.8936, | |
| "eval_loss": 0.8134418725967407, | |
| "eval_runtime": 746.7872, | |
| "eval_samples_per_second": 3.348, | |
| "eval_spbleu": 51.6484, | |
| "eval_steps_per_second": 0.67, | |
| "eval_ter": 46.6937, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.8383559160737754, | |
| "grad_norm": 0.6788719892501831, | |
| "learning_rate": 3.602740139877041e-05, | |
| "loss": 0.8331, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.8383559160737754, | |
| "eval_bleu": 37.9443, | |
| "eval_chrf++": 64.6177, | |
| "eval_gen_len": 28.022, | |
| "eval_loss": 0.8157890439033508, | |
| "eval_runtime": 748.194, | |
| "eval_samples_per_second": 3.341, | |
| "eval_spbleu": 52.1179, | |
| "eval_steps_per_second": 0.668, | |
| "eval_ter": 46.2323, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.8610141840757692, | |
| "grad_norm": 0.6696301102638245, | |
| "learning_rate": 3.5649763598737176e-05, | |
| "loss": 0.8342, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.8610141840757692, | |
| "eval_bleu": 38.7784, | |
| "eval_chrf++": 64.6136, | |
| "eval_gen_len": 27.9508, | |
| "eval_loss": 0.8111441731452942, | |
| "eval_runtime": 748.3754, | |
| "eval_samples_per_second": 3.341, | |
| "eval_spbleu": 52.0913, | |
| "eval_steps_per_second": 0.668, | |
| "eval_ter": 46.206, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.8836724520777631, | |
| "grad_norm": 0.7288480401039124, | |
| "learning_rate": 3.527212579870395e-05, | |
| "loss": 0.8282, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.8836724520777631, | |
| "eval_bleu": 38.1426, | |
| "eval_chrf++": 64.6892, | |
| "eval_gen_len": 27.8628, | |
| "eval_loss": 0.8068262934684753, | |
| "eval_runtime": 743.9866, | |
| "eval_samples_per_second": 3.36, | |
| "eval_spbleu": 52.3226, | |
| "eval_steps_per_second": 0.672, | |
| "eval_ter": 45.7841, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.9063307200797571, | |
| "grad_norm": 0.6522256731987, | |
| "learning_rate": 3.489448799867072e-05, | |
| "loss": 0.8345, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.9063307200797571, | |
| "eval_bleu": 37.3235, | |
| "eval_chrf++": 64.2609, | |
| "eval_gen_len": 27.9892, | |
| "eval_loss": 0.8071653246879578, | |
| "eval_runtime": 750.865, | |
| "eval_samples_per_second": 3.329, | |
| "eval_spbleu": 51.689, | |
| "eval_steps_per_second": 0.666, | |
| "eval_ter": 47.0022, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.928988988081751, | |
| "grad_norm": 0.7145525217056274, | |
| "learning_rate": 3.4516850198637484e-05, | |
| "loss": 0.8224, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.928988988081751, | |
| "eval_bleu": 37.5387, | |
| "eval_chrf++": 64.2559, | |
| "eval_gen_len": 27.82, | |
| "eval_loss": 0.8012556433677673, | |
| "eval_runtime": 768.5173, | |
| "eval_samples_per_second": 3.253, | |
| "eval_spbleu": 51.7469, | |
| "eval_steps_per_second": 0.651, | |
| "eval_ter": 46.3668, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.951647256083745, | |
| "grad_norm": 0.7082468271255493, | |
| "learning_rate": 3.413921239860425e-05, | |
| "loss": 0.8315, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.951647256083745, | |
| "eval_bleu": 37.9748, | |
| "eval_chrf++": 64.7286, | |
| "eval_gen_len": 27.8264, | |
| "eval_loss": 0.7990391254425049, | |
| "eval_runtime": 772.2382, | |
| "eval_samples_per_second": 3.237, | |
| "eval_spbleu": 52.3829, | |
| "eval_steps_per_second": 0.647, | |
| "eval_ter": 45.8711, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.9743055240857389, | |
| "grad_norm": 0.6599904298782349, | |
| "learning_rate": 3.376157459857102e-05, | |
| "loss": 0.8269, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.9743055240857389, | |
| "eval_bleu": 39.3032, | |
| "eval_chrf++": 64.9609, | |
| "eval_gen_len": 27.8436, | |
| "eval_loss": 0.7974932789802551, | |
| "eval_runtime": 771.5992, | |
| "eval_samples_per_second": 3.24, | |
| "eval_spbleu": 52.5588, | |
| "eval_steps_per_second": 0.648, | |
| "eval_ter": 45.8975, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.9969637920877328, | |
| "grad_norm": 0.6297094821929932, | |
| "learning_rate": 3.338393679853779e-05, | |
| "loss": 0.8159, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.9969637920877328, | |
| "eval_bleu": 38.4628, | |
| "eval_chrf++": 64.9748, | |
| "eval_gen_len": 27.7568, | |
| "eval_loss": 0.7950631380081177, | |
| "eval_runtime": 757.8415, | |
| "eval_samples_per_second": 3.299, | |
| "eval_spbleu": 52.6788, | |
| "eval_steps_per_second": 0.66, | |
| "eval_ter": 45.3491, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.0196220600897268, | |
| "grad_norm": 0.6592913866043091, | |
| "learning_rate": 3.300629899850455e-05, | |
| "loss": 0.6601, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.0196220600897268, | |
| "eval_bleu": 39.9752, | |
| "eval_chrf++": 65.1542, | |
| "eval_gen_len": 27.834, | |
| "eval_loss": 0.810078501701355, | |
| "eval_runtime": 742.5172, | |
| "eval_samples_per_second": 3.367, | |
| "eval_spbleu": 52.8176, | |
| "eval_steps_per_second": 0.673, | |
| "eval_ter": 45.4176, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.0422803280917208, | |
| "grad_norm": 0.5827597379684448, | |
| "learning_rate": 3.262866119847132e-05, | |
| "loss": 0.6316, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.0422803280917208, | |
| "eval_bleu": 43.5482, | |
| "eval_chrf++": 65.346, | |
| "eval_gen_len": 27.59, | |
| "eval_loss": 0.814832329750061, | |
| "eval_runtime": 736.0286, | |
| "eval_samples_per_second": 3.397, | |
| "eval_spbleu": 52.9228, | |
| "eval_steps_per_second": 0.679, | |
| "eval_ter": 45.2832, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.0649385960937146, | |
| "grad_norm": 0.736733078956604, | |
| "learning_rate": 3.225102339843809e-05, | |
| "loss": 0.6365, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.0649385960937146, | |
| "eval_bleu": 40.0573, | |
| "eval_chrf++": 65.2248, | |
| "eval_gen_len": 27.7468, | |
| "eval_loss": 0.8096536993980408, | |
| "eval_runtime": 762.5207, | |
| "eval_samples_per_second": 3.279, | |
| "eval_spbleu": 52.9015, | |
| "eval_steps_per_second": 0.656, | |
| "eval_ter": 45.1302, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.0875968640957085, | |
| "grad_norm": 0.6646838188171387, | |
| "learning_rate": 3.187338559840486e-05, | |
| "loss": 0.6462, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.0875968640957085, | |
| "eval_bleu": 41.7287, | |
| "eval_chrf++": 65.476, | |
| "eval_gen_len": 27.8052, | |
| "eval_loss": 0.8067141771316528, | |
| "eval_runtime": 764.719, | |
| "eval_samples_per_second": 3.269, | |
| "eval_spbleu": 53.0853, | |
| "eval_steps_per_second": 0.654, | |
| "eval_ter": 45.2621, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.1102551320977025, | |
| "grad_norm": 0.7113286852836609, | |
| "learning_rate": 3.149574779837163e-05, | |
| "loss": 0.6383, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.1102551320977025, | |
| "eval_bleu": 44.484, | |
| "eval_chrf++": 65.5094, | |
| "eval_gen_len": 27.502, | |
| "eval_loss": 0.8042193055152893, | |
| "eval_runtime": 761.5932, | |
| "eval_samples_per_second": 3.283, | |
| "eval_spbleu": 53.234, | |
| "eval_steps_per_second": 0.657, | |
| "eval_ter": 45.0591, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.1329134000996963, | |
| "grad_norm": 0.6746016144752502, | |
| "learning_rate": 3.1118109998338396e-05, | |
| "loss": 0.6464, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.1329134000996963, | |
| "eval_bleu": 44.4968, | |
| "eval_chrf++": 65.4383, | |
| "eval_gen_len": 27.4832, | |
| "eval_loss": 0.8051723837852478, | |
| "eval_runtime": 762.6344, | |
| "eval_samples_per_second": 3.278, | |
| "eval_spbleu": 53.209, | |
| "eval_steps_per_second": 0.656, | |
| "eval_ter": 45.1672, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.1555716681016903, | |
| "grad_norm": 0.7094623446464539, | |
| "learning_rate": 3.074047219830516e-05, | |
| "loss": 0.6353, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.1555716681016903, | |
| "eval_bleu": 44.7381, | |
| "eval_chrf++": 65.7012, | |
| "eval_gen_len": 27.5904, | |
| "eval_loss": 0.8053088784217834, | |
| "eval_runtime": 766.039, | |
| "eval_samples_per_second": 3.264, | |
| "eval_spbleu": 53.2617, | |
| "eval_steps_per_second": 0.653, | |
| "eval_ter": 44.9852, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.1782299361036843, | |
| "grad_norm": 0.5892546772956848, | |
| "learning_rate": 3.0362834398271934e-05, | |
| "loss": 0.6483, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.1782299361036843, | |
| "eval_bleu": 44.1957, | |
| "eval_chrf++": 65.3258, | |
| "eval_gen_len": 27.6048, | |
| "eval_loss": 0.8034100532531738, | |
| "eval_runtime": 761.0595, | |
| "eval_samples_per_second": 3.285, | |
| "eval_spbleu": 52.8918, | |
| "eval_steps_per_second": 0.657, | |
| "eval_ter": 45.4572, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.200888204105678, | |
| "grad_norm": 0.744484543800354, | |
| "learning_rate": 2.9985196598238697e-05, | |
| "loss": 0.6382, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.200888204105678, | |
| "eval_bleu": 44.663, | |
| "eval_chrf++": 65.2386, | |
| "eval_gen_len": 27.4888, | |
| "eval_loss": 0.8034644722938538, | |
| "eval_runtime": 742.6066, | |
| "eval_samples_per_second": 3.367, | |
| "eval_spbleu": 52.8205, | |
| "eval_steps_per_second": 0.673, | |
| "eval_ter": 45.1144, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.223546472107672, | |
| "grad_norm": 0.6990212798118591, | |
| "learning_rate": 2.9607558798205465e-05, | |
| "loss": 0.6425, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.223546472107672, | |
| "eval_bleu": 40.3169, | |
| "eval_chrf++": 65.5055, | |
| "eval_gen_len": 27.7456, | |
| "eval_loss": 0.7975181341171265, | |
| "eval_runtime": 753.8226, | |
| "eval_samples_per_second": 3.316, | |
| "eval_spbleu": 53.4546, | |
| "eval_steps_per_second": 0.663, | |
| "eval_ter": 44.7295, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.246204740109666, | |
| "grad_norm": 0.7708460092544556, | |
| "learning_rate": 2.9229920998172232e-05, | |
| "loss": 0.6364, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.246204740109666, | |
| "eval_bleu": 39.0833, | |
| "eval_chrf++": 65.4463, | |
| "eval_gen_len": 27.9864, | |
| "eval_loss": 0.799001157283783, | |
| "eval_runtime": 765.257, | |
| "eval_samples_per_second": 3.267, | |
| "eval_spbleu": 53.3214, | |
| "eval_steps_per_second": 0.653, | |
| "eval_ter": 45.1513, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.2688630081116599, | |
| "grad_norm": 0.7766411900520325, | |
| "learning_rate": 2.8852283198139002e-05, | |
| "loss": 0.6311, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.2688630081116599, | |
| "eval_bleu": 44.6719, | |
| "eval_chrf++": 65.7788, | |
| "eval_gen_len": 27.5984, | |
| "eval_loss": 0.8024120330810547, | |
| "eval_runtime": 739.5078, | |
| "eval_samples_per_second": 3.381, | |
| "eval_spbleu": 53.4111, | |
| "eval_steps_per_second": 0.676, | |
| "eval_ter": 44.8429, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.2915212761136539, | |
| "grad_norm": 0.6488195657730103, | |
| "learning_rate": 2.847464539810577e-05, | |
| "loss": 0.6315, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.2915212761136539, | |
| "eval_bleu": 38.9676, | |
| "eval_chrf++": 65.2009, | |
| "eval_gen_len": 27.7544, | |
| "eval_loss": 0.799105167388916, | |
| "eval_runtime": 751.4345, | |
| "eval_samples_per_second": 3.327, | |
| "eval_spbleu": 53.0925, | |
| "eval_steps_per_second": 0.665, | |
| "eval_ter": 45.0802, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.3141795441156479, | |
| "grad_norm": 0.6332802176475525, | |
| "learning_rate": 2.809700759807254e-05, | |
| "loss": 0.6339, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.3141795441156479, | |
| "eval_bleu": 39.0276, | |
| "eval_chrf++": 65.2617, | |
| "eval_gen_len": 27.824, | |
| "eval_loss": 0.7974073886871338, | |
| "eval_runtime": 754.6502, | |
| "eval_samples_per_second": 3.313, | |
| "eval_spbleu": 53.046, | |
| "eval_steps_per_second": 0.663, | |
| "eval_ter": 45.2331, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.3368378121176416, | |
| "grad_norm": 0.5959407687187195, | |
| "learning_rate": 2.7719369798039307e-05, | |
| "loss": 0.6412, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.3368378121176416, | |
| "eval_bleu": 40.1118, | |
| "eval_chrf++": 65.4814, | |
| "eval_gen_len": 27.8892, | |
| "eval_loss": 0.7944240570068359, | |
| "eval_runtime": 752.0202, | |
| "eval_samples_per_second": 3.324, | |
| "eval_spbleu": 53.4305, | |
| "eval_steps_per_second": 0.665, | |
| "eval_ter": 45.0327, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.3594960801196356, | |
| "grad_norm": 0.6927244067192078, | |
| "learning_rate": 2.7341731998006075e-05, | |
| "loss": 0.6354, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.3594960801196356, | |
| "eval_bleu": 41.7725, | |
| "eval_chrf++": 65.7724, | |
| "eval_gen_len": 27.7264, | |
| "eval_loss": 0.7973920702934265, | |
| "eval_runtime": 742.892, | |
| "eval_samples_per_second": 3.365, | |
| "eval_spbleu": 53.752, | |
| "eval_steps_per_second": 0.673, | |
| "eval_ter": 44.4975, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.3821543481216296, | |
| "grad_norm": 0.6661298871040344, | |
| "learning_rate": 2.696409419797284e-05, | |
| "loss": 0.6294, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.3821543481216296, | |
| "eval_bleu": 39.0417, | |
| "eval_chrf++": 65.4211, | |
| "eval_gen_len": 27.8648, | |
| "eval_loss": 0.795570969581604, | |
| "eval_runtime": 749.9034, | |
| "eval_samples_per_second": 3.334, | |
| "eval_spbleu": 53.378, | |
| "eval_steps_per_second": 0.667, | |
| "eval_ter": 45.0802, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.4048126161236234, | |
| "grad_norm": 0.6107171773910522, | |
| "learning_rate": 2.658645639793961e-05, | |
| "loss": 0.636, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.4048126161236234, | |
| "eval_bleu": 39.9268, | |
| "eval_chrf++": 65.47, | |
| "eval_gen_len": 27.682, | |
| "eval_loss": 0.7938565611839294, | |
| "eval_runtime": 743.0863, | |
| "eval_samples_per_second": 3.364, | |
| "eval_spbleu": 53.3727, | |
| "eval_steps_per_second": 0.673, | |
| "eval_ter": 44.5871, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.4274708841256174, | |
| "grad_norm": 0.6050147414207458, | |
| "learning_rate": 2.6208818597906376e-05, | |
| "loss": 0.6385, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.4274708841256174, | |
| "eval_bleu": 40.4175, | |
| "eval_chrf++": 65.7235, | |
| "eval_gen_len": 27.8052, | |
| "eval_loss": 0.7914307713508606, | |
| "eval_runtime": 748.359, | |
| "eval_samples_per_second": 3.341, | |
| "eval_spbleu": 53.722, | |
| "eval_steps_per_second": 0.668, | |
| "eval_ter": 44.6003, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.4501291521276114, | |
| "grad_norm": 0.8934792280197144, | |
| "learning_rate": 2.5831180797873143e-05, | |
| "loss": 0.6392, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.4501291521276114, | |
| "eval_bleu": 40.6796, | |
| "eval_chrf++": 65.8163, | |
| "eval_gen_len": 27.7424, | |
| "eval_loss": 0.7927303910255432, | |
| "eval_runtime": 748.6849, | |
| "eval_samples_per_second": 3.339, | |
| "eval_spbleu": 53.7139, | |
| "eval_steps_per_second": 0.668, | |
| "eval_ter": 44.4184, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.4727874201296052, | |
| "grad_norm": 0.6742972731590271, | |
| "learning_rate": 2.5453542997839914e-05, | |
| "loss": 0.6364, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.4727874201296052, | |
| "eval_bleu": 40.2137, | |
| "eval_chrf++": 65.6498, | |
| "eval_gen_len": 27.7408, | |
| "eval_loss": 0.7901710867881775, | |
| "eval_runtime": 749.8815, | |
| "eval_samples_per_second": 3.334, | |
| "eval_spbleu": 53.6947, | |
| "eval_steps_per_second": 0.667, | |
| "eval_ter": 44.7031, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.4954456881315992, | |
| "grad_norm": 0.6159557104110718, | |
| "learning_rate": 2.507590519780668e-05, | |
| "loss": 0.6352, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.4954456881315992, | |
| "eval_bleu": 41.264, | |
| "eval_chrf++": 65.7523, | |
| "eval_gen_len": 27.8552, | |
| "eval_loss": 0.7894487380981445, | |
| "eval_runtime": 748.2109, | |
| "eval_samples_per_second": 3.341, | |
| "eval_spbleu": 53.6724, | |
| "eval_steps_per_second": 0.668, | |
| "eval_ter": 44.8244, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.5181039561335932, | |
| "grad_norm": 0.8539830446243286, | |
| "learning_rate": 2.469826739777345e-05, | |
| "loss": 0.6234, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.5181039561335932, | |
| "eval_bleu": 40.1811, | |
| "eval_chrf++": 65.8305, | |
| "eval_gen_len": 27.8484, | |
| "eval_loss": 0.7886767983436584, | |
| "eval_runtime": 745.5767, | |
| "eval_samples_per_second": 3.353, | |
| "eval_spbleu": 53.7788, | |
| "eval_steps_per_second": 0.671, | |
| "eval_ter": 44.5739, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.540762224135587, | |
| "grad_norm": 0.7526208758354187, | |
| "learning_rate": 2.4320629597740216e-05, | |
| "loss": 0.6411, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.540762224135587, | |
| "eval_bleu": 42.6857, | |
| "eval_chrf++": 65.9528, | |
| "eval_gen_len": 27.6804, | |
| "eval_loss": 0.7847135663032532, | |
| "eval_runtime": 741.0515, | |
| "eval_samples_per_second": 3.374, | |
| "eval_spbleu": 53.8997, | |
| "eval_steps_per_second": 0.675, | |
| "eval_ter": 44.3024, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.5634204921375812, | |
| "grad_norm": 0.8738523125648499, | |
| "learning_rate": 2.3942991797706986e-05, | |
| "loss": 0.63, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.5634204921375812, | |
| "eval_bleu": 45.4639, | |
| "eval_chrf++": 66.1251, | |
| "eval_gen_len": 27.5308, | |
| "eval_loss": 0.781486451625824, | |
| "eval_runtime": 736.5596, | |
| "eval_samples_per_second": 3.394, | |
| "eval_spbleu": 53.9656, | |
| "eval_steps_per_second": 0.679, | |
| "eval_ter": 44.334, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.586078760139575, | |
| "grad_norm": 0.7074981927871704, | |
| "learning_rate": 2.3565353997673753e-05, | |
| "loss": 0.6265, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.586078760139575, | |
| "eval_bleu": 44.9751, | |
| "eval_chrf++": 65.9036, | |
| "eval_gen_len": 27.6124, | |
| "eval_loss": 0.7836451530456543, | |
| "eval_runtime": 746.2524, | |
| "eval_samples_per_second": 3.35, | |
| "eval_spbleu": 53.5997, | |
| "eval_steps_per_second": 0.67, | |
| "eval_ter": 44.8059, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.6087370281415687, | |
| "grad_norm": 0.7853338718414307, | |
| "learning_rate": 2.318771619764052e-05, | |
| "loss": 0.6202, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.6087370281415687, | |
| "eval_bleu": 45.0486, | |
| "eval_chrf++": 66.1202, | |
| "eval_gen_len": 27.6572, | |
| "eval_loss": 0.7826634049415588, | |
| "eval_runtime": 750.4591, | |
| "eval_samples_per_second": 3.331, | |
| "eval_spbleu": 53.9114, | |
| "eval_steps_per_second": 0.666, | |
| "eval_ter": 44.7242, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.631395296143563, | |
| "grad_norm": 0.7461378574371338, | |
| "learning_rate": 2.2810078397607288e-05, | |
| "loss": 0.6385, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.631395296143563, | |
| "eval_bleu": 41.8569, | |
| "eval_chrf++": 65.9455, | |
| "eval_gen_len": 27.8612, | |
| "eval_loss": 0.7859405279159546, | |
| "eval_runtime": 753.5356, | |
| "eval_samples_per_second": 3.318, | |
| "eval_spbleu": 53.9218, | |
| "eval_steps_per_second": 0.664, | |
| "eval_ter": 44.5897, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.6540535641455567, | |
| "grad_norm": 0.5514925122261047, | |
| "learning_rate": 2.2432440597574055e-05, | |
| "loss": 0.6269, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.6540535641455567, | |
| "eval_bleu": 43.6861, | |
| "eval_chrf++": 65.9551, | |
| "eval_gen_len": 27.5088, | |
| "eval_loss": 0.7851018905639648, | |
| "eval_runtime": 744.0917, | |
| "eval_samples_per_second": 3.36, | |
| "eval_spbleu": 53.792, | |
| "eval_steps_per_second": 0.672, | |
| "eval_ter": 44.3419, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.6767118321475505, | |
| "grad_norm": 0.6642000675201416, | |
| "learning_rate": 2.2054802797540825e-05, | |
| "loss": 0.6301, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.6767118321475505, | |
| "eval_bleu": 46.0896, | |
| "eval_chrf++": 66.164, | |
| "eval_gen_len": 27.426, | |
| "eval_loss": 0.7796212434768677, | |
| "eval_runtime": 740.9337, | |
| "eval_samples_per_second": 3.374, | |
| "eval_spbleu": 54.0105, | |
| "eval_steps_per_second": 0.675, | |
| "eval_ter": 44.1494, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.6993701001495447, | |
| "grad_norm": 0.8100460171699524, | |
| "learning_rate": 2.167716499750759e-05, | |
| "loss": 0.6213, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.6993701001495447, | |
| "eval_bleu": 45.5601, | |
| "eval_chrf++": 66.0823, | |
| "eval_gen_len": 27.5128, | |
| "eval_loss": 0.7815007567405701, | |
| "eval_runtime": 733.2938, | |
| "eval_samples_per_second": 3.409, | |
| "eval_spbleu": 53.9922, | |
| "eval_steps_per_second": 0.682, | |
| "eval_ter": 44.5133, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.7220283681515385, | |
| "grad_norm": 0.6142133474349976, | |
| "learning_rate": 2.129952719747436e-05, | |
| "loss": 0.623, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.7220283681515385, | |
| "eval_bleu": 45.0364, | |
| "eval_chrf++": 66.1218, | |
| "eval_gen_len": 27.5352, | |
| "eval_loss": 0.7782283425331116, | |
| "eval_runtime": 736.7203, | |
| "eval_samples_per_second": 3.393, | |
| "eval_spbleu": 54.0624, | |
| "eval_steps_per_second": 0.679, | |
| "eval_ter": 44.3314, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.7446866361535325, | |
| "grad_norm": 0.8021434545516968, | |
| "learning_rate": 2.0921889397441127e-05, | |
| "loss": 0.6269, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.7446866361535325, | |
| "eval_bleu": 41.6796, | |
| "eval_chrf++": 66.0402, | |
| "eval_gen_len": 27.7448, | |
| "eval_loss": 0.7799319624900818, | |
| "eval_runtime": 738.6103, | |
| "eval_samples_per_second": 3.385, | |
| "eval_spbleu": 54.1452, | |
| "eval_steps_per_second": 0.677, | |
| "eval_ter": 44.1653, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.7673449041555265, | |
| "grad_norm": 0.6603755354881287, | |
| "learning_rate": 2.0544251597407894e-05, | |
| "loss": 0.6339, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.7673449041555265, | |
| "eval_bleu": 46.523, | |
| "eval_chrf++": 66.3925, | |
| "eval_gen_len": 27.5112, | |
| "eval_loss": 0.7813342809677124, | |
| "eval_runtime": 728.2882, | |
| "eval_samples_per_second": 3.433, | |
| "eval_spbleu": 54.2461, | |
| "eval_steps_per_second": 0.687, | |
| "eval_ter": 44.0071, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.7900031721575203, | |
| "grad_norm": 0.6977267861366272, | |
| "learning_rate": 2.016661379737466e-05, | |
| "loss": 0.621, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.7900031721575203, | |
| "eval_bleu": 43.8812, | |
| "eval_chrf++": 66.186, | |
| "eval_gen_len": 27.6576, | |
| "eval_loss": 0.7753216028213501, | |
| "eval_runtime": 736.3388, | |
| "eval_samples_per_second": 3.395, | |
| "eval_spbleu": 54.244, | |
| "eval_steps_per_second": 0.679, | |
| "eval_ter": 44.3445, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.8126614401595142, | |
| "grad_norm": 0.7790645956993103, | |
| "learning_rate": 1.978897599734143e-05, | |
| "loss": 0.6278, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.8126614401595142, | |
| "eval_bleu": 46.7458, | |
| "eval_chrf++": 66.4123, | |
| "eval_gen_len": 27.4892, | |
| "eval_loss": 0.7777643799781799, | |
| "eval_runtime": 730.5316, | |
| "eval_samples_per_second": 3.422, | |
| "eval_spbleu": 54.4016, | |
| "eval_steps_per_second": 0.684, | |
| "eval_ter": 43.9702, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.8353197081615082, | |
| "grad_norm": 0.6901569366455078, | |
| "learning_rate": 1.94113381973082e-05, | |
| "loss": 0.6221, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.8353197081615082, | |
| "eval_bleu": 45.0544, | |
| "eval_chrf++": 66.3725, | |
| "eval_gen_len": 27.598, | |
| "eval_loss": 0.7787633538246155, | |
| "eval_runtime": 734.485, | |
| "eval_samples_per_second": 3.404, | |
| "eval_spbleu": 54.419, | |
| "eval_steps_per_second": 0.681, | |
| "eval_ter": 43.9201, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.857977976163502, | |
| "grad_norm": 0.7807871103286743, | |
| "learning_rate": 1.9033700397274966e-05, | |
| "loss": 0.6209, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.857977976163502, | |
| "eval_bleu": 44.5498, | |
| "eval_chrf++": 66.3741, | |
| "eval_gen_len": 27.5916, | |
| "eval_loss": 0.7768906354904175, | |
| "eval_runtime": 737.3971, | |
| "eval_samples_per_second": 3.39, | |
| "eval_spbleu": 54.5028, | |
| "eval_steps_per_second": 0.678, | |
| "eval_ter": 43.9728, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.880636244165496, | |
| "grad_norm": 0.8082613945007324, | |
| "learning_rate": 1.8656062597241737e-05, | |
| "loss": 0.6267, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.880636244165496, | |
| "eval_bleu": 45.3502, | |
| "eval_chrf++": 66.5334, | |
| "eval_gen_len": 27.5344, | |
| "eval_loss": 0.7741044759750366, | |
| "eval_runtime": 731.6398, | |
| "eval_samples_per_second": 3.417, | |
| "eval_spbleu": 54.4958, | |
| "eval_steps_per_second": 0.683, | |
| "eval_ter": 43.8726, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.90329451216749, | |
| "grad_norm": 0.5612310171127319, | |
| "learning_rate": 1.82784247972085e-05, | |
| "loss": 0.625, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.90329451216749, | |
| "eval_bleu": 45.4662, | |
| "eval_chrf++": 66.6858, | |
| "eval_gen_len": 27.5552, | |
| "eval_loss": 0.7751156687736511, | |
| "eval_runtime": 734.022, | |
| "eval_samples_per_second": 3.406, | |
| "eval_spbleu": 54.7854, | |
| "eval_steps_per_second": 0.681, | |
| "eval_ter": 43.5404, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.9259527801694838, | |
| "grad_norm": 0.7477275133132935, | |
| "learning_rate": 1.790078699717527e-05, | |
| "loss": 0.6268, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.9259527801694838, | |
| "eval_bleu": 43.7231, | |
| "eval_chrf++": 66.4796, | |
| "eval_gen_len": 27.71, | |
| "eval_loss": 0.7729161977767944, | |
| "eval_runtime": 736.8783, | |
| "eval_samples_per_second": 3.393, | |
| "eval_spbleu": 54.5524, | |
| "eval_steps_per_second": 0.679, | |
| "eval_ter": 43.8383, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.9486110481714778, | |
| "grad_norm": 0.7059822678565979, | |
| "learning_rate": 1.752314919714204e-05, | |
| "loss": 0.6263, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.9486110481714778, | |
| "eval_bleu": 45.1162, | |
| "eval_chrf++": 66.5293, | |
| "eval_gen_len": 27.5624, | |
| "eval_loss": 0.771515429019928, | |
| "eval_runtime": 732.1583, | |
| "eval_samples_per_second": 3.415, | |
| "eval_spbleu": 54.5958, | |
| "eval_steps_per_second": 0.683, | |
| "eval_ter": 43.6564, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.9712693161734718, | |
| "grad_norm": 0.7907470464706421, | |
| "learning_rate": 1.7145511397108806e-05, | |
| "loss": 0.6178, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.9712693161734718, | |
| "eval_bleu": 44.3099, | |
| "eval_chrf++": 66.5724, | |
| "eval_gen_len": 27.632, | |
| "eval_loss": 0.7728075385093689, | |
| "eval_runtime": 733.6074, | |
| "eval_samples_per_second": 3.408, | |
| "eval_spbleu": 54.6752, | |
| "eval_steps_per_second": 0.682, | |
| "eval_ter": 43.725, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.9939275841754656, | |
| "grad_norm": 0.7742732763290405, | |
| "learning_rate": 1.6767873597075573e-05, | |
| "loss": 0.609, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.9939275841754656, | |
| "eval_bleu": 46.4711, | |
| "eval_chrf++": 66.7721, | |
| "eval_gen_len": 27.4864, | |
| "eval_loss": 0.7715900540351868, | |
| "eval_runtime": 726.1293, | |
| "eval_samples_per_second": 3.443, | |
| "eval_spbleu": 54.9332, | |
| "eval_steps_per_second": 0.689, | |
| "eval_ter": 43.3822, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.0165858521774593, | |
| "grad_norm": 0.7575493454933167, | |
| "learning_rate": 1.639023579704234e-05, | |
| "loss": 0.5124, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.0165858521774593, | |
| "eval_bleu": 45.9215, | |
| "eval_chrf++": 66.6482, | |
| "eval_gen_len": 27.622, | |
| "eval_loss": 0.8128123879432678, | |
| "eval_runtime": 732.4537, | |
| "eval_samples_per_second": 3.413, | |
| "eval_spbleu": 54.7846, | |
| "eval_steps_per_second": 0.683, | |
| "eval_ter": 43.7355, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.0392441201794536, | |
| "grad_norm": 0.732072114944458, | |
| "learning_rate": 1.601259799700911e-05, | |
| "loss": 0.4683, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.0392441201794536, | |
| "eval_bleu": 45.6558, | |
| "eval_chrf++": 66.455, | |
| "eval_gen_len": 27.5796, | |
| "eval_loss": 0.8135092258453369, | |
| "eval_runtime": 732.47, | |
| "eval_samples_per_second": 3.413, | |
| "eval_spbleu": 54.5066, | |
| "eval_steps_per_second": 0.683, | |
| "eval_ter": 44.0202, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.0619023881814473, | |
| "grad_norm": 0.7785657644271851, | |
| "learning_rate": 1.5634960196975878e-05, | |
| "loss": 0.4632, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.0619023881814473, | |
| "eval_bleu": 46.9546, | |
| "eval_chrf++": 66.5237, | |
| "eval_gen_len": 27.482, | |
| "eval_loss": 0.81900554895401, | |
| "eval_runtime": 734.696, | |
| "eval_samples_per_second": 3.403, | |
| "eval_spbleu": 54.4887, | |
| "eval_steps_per_second": 0.681, | |
| "eval_ter": 43.7988, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.0845606561834416, | |
| "grad_norm": 1.1765786409378052, | |
| "learning_rate": 1.5257322396942645e-05, | |
| "loss": 0.4696, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.0845606561834416, | |
| "eval_bleu": 47.083, | |
| "eval_chrf++": 66.8275, | |
| "eval_gen_len": 27.5328, | |
| "eval_loss": 0.8155868053436279, | |
| "eval_runtime": 736.5203, | |
| "eval_samples_per_second": 3.394, | |
| "eval_spbleu": 54.892, | |
| "eval_steps_per_second": 0.679, | |
| "eval_ter": 43.456, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.1072189241854353, | |
| "grad_norm": 0.8067004084587097, | |
| "learning_rate": 1.4879684596909412e-05, | |
| "loss": 0.4635, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.1072189241854353, | |
| "eval_bleu": 46.6541, | |
| "eval_chrf++": 66.7448, | |
| "eval_gen_len": 27.5476, | |
| "eval_loss": 0.8161126971244812, | |
| "eval_runtime": 740.7767, | |
| "eval_samples_per_second": 3.375, | |
| "eval_spbleu": 54.8934, | |
| "eval_steps_per_second": 0.675, | |
| "eval_ter": 43.3189, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.129877192187429, | |
| "grad_norm": 0.9044099450111389, | |
| "learning_rate": 1.4502046796876181e-05, | |
| "loss": 0.4725, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.129877192187429, | |
| "eval_bleu": 47.2452, | |
| "eval_chrf++": 66.8326, | |
| "eval_gen_len": 27.558, | |
| "eval_loss": 0.8134703040122986, | |
| "eval_runtime": 741.2625, | |
| "eval_samples_per_second": 3.373, | |
| "eval_spbleu": 54.9705, | |
| "eval_steps_per_second": 0.675, | |
| "eval_ter": 43.3216, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.1525354601894233, | |
| "grad_norm": 0.8520795106887817, | |
| "learning_rate": 1.412440899684295e-05, | |
| "loss": 0.4727, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.1525354601894233, | |
| "eval_bleu": 46.5313, | |
| "eval_chrf++": 66.6736, | |
| "eval_gen_len": 27.5296, | |
| "eval_loss": 0.820831835269928, | |
| "eval_runtime": 742.5016, | |
| "eval_samples_per_second": 3.367, | |
| "eval_spbleu": 54.5714, | |
| "eval_steps_per_second": 0.673, | |
| "eval_ter": 43.6722, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.175193728191417, | |
| "grad_norm": 0.7529011964797974, | |
| "learning_rate": 1.3746771196809716e-05, | |
| "loss": 0.4736, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.175193728191417, | |
| "eval_bleu": 46.3524, | |
| "eval_chrf++": 66.7416, | |
| "eval_gen_len": 27.5684, | |
| "eval_loss": 0.8130167126655579, | |
| "eval_runtime": 730.5257, | |
| "eval_samples_per_second": 3.422, | |
| "eval_spbleu": 54.8088, | |
| "eval_steps_per_second": 0.684, | |
| "eval_ter": 43.4718, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.197851996193411, | |
| "grad_norm": 0.7033498883247375, | |
| "learning_rate": 1.3369133396776484e-05, | |
| "loss": 0.471, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.197851996193411, | |
| "eval_bleu": 46.2188, | |
| "eval_chrf++": 66.702, | |
| "eval_gen_len": 27.6192, | |
| "eval_loss": 0.8164393305778503, | |
| "eval_runtime": 732.5336, | |
| "eval_samples_per_second": 3.413, | |
| "eval_spbleu": 54.6656, | |
| "eval_steps_per_second": 0.683, | |
| "eval_ter": 43.6511, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.220510264195405, | |
| "grad_norm": 0.7331113815307617, | |
| "learning_rate": 1.2991495596743253e-05, | |
| "loss": 0.4712, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.220510264195405, | |
| "eval_bleu": 47.0435, | |
| "eval_chrf++": 66.6968, | |
| "eval_gen_len": 27.4924, | |
| "eval_loss": 0.81520676612854, | |
| "eval_runtime": 729.4123, | |
| "eval_samples_per_second": 3.427, | |
| "eval_spbleu": 54.6842, | |
| "eval_steps_per_second": 0.685, | |
| "eval_ter": 43.4376, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.243168532197399, | |
| "grad_norm": 0.6714054346084595, | |
| "learning_rate": 1.261385779671002e-05, | |
| "loss": 0.4741, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.243168532197399, | |
| "eval_bleu": 47.2441, | |
| "eval_chrf++": 66.8706, | |
| "eval_gen_len": 27.4916, | |
| "eval_loss": 0.8153889775276184, | |
| "eval_runtime": 727.829, | |
| "eval_samples_per_second": 3.435, | |
| "eval_spbleu": 54.9194, | |
| "eval_steps_per_second": 0.687, | |
| "eval_ter": 43.3374, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.2658268001993926, | |
| "grad_norm": 0.7230417132377625, | |
| "learning_rate": 1.2236219996676788e-05, | |
| "loss": 0.4723, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.2658268001993926, | |
| "eval_bleu": 47.0146, | |
| "eval_chrf++": 66.9999, | |
| "eval_gen_len": 27.496, | |
| "eval_loss": 0.8151711225509644, | |
| "eval_runtime": 725.7357, | |
| "eval_samples_per_second": 3.445, | |
| "eval_spbleu": 55.1257, | |
| "eval_steps_per_second": 0.689, | |
| "eval_ter": 43.0605, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.288485068201387, | |
| "grad_norm": 0.7548694014549255, | |
| "learning_rate": 1.1858582196643555e-05, | |
| "loss": 0.4736, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.288485068201387, | |
| "eval_bleu": 47.3114, | |
| "eval_chrf++": 67.059, | |
| "eval_gen_len": 27.5484, | |
| "eval_loss": 0.8111055493354797, | |
| "eval_runtime": 730.4241, | |
| "eval_samples_per_second": 3.423, | |
| "eval_spbleu": 55.1401, | |
| "eval_steps_per_second": 0.685, | |
| "eval_ter": 43.2688, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.3111433362033806, | |
| "grad_norm": 0.6914283037185669, | |
| "learning_rate": 1.1480944396610324e-05, | |
| "loss": 0.4673, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.3111433362033806, | |
| "eval_bleu": 47.0659, | |
| "eval_chrf++": 66.8804, | |
| "eval_gen_len": 27.5096, | |
| "eval_loss": 0.8131672739982605, | |
| "eval_runtime": 728.8458, | |
| "eval_samples_per_second": 3.43, | |
| "eval_spbleu": 54.9036, | |
| "eval_steps_per_second": 0.686, | |
| "eval_ter": 43.3585, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.3338016042053744, | |
| "grad_norm": 0.8246389031410217, | |
| "learning_rate": 1.1103306596577091e-05, | |
| "loss": 0.4598, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.3338016042053744, | |
| "eval_bleu": 47.2042, | |
| "eval_chrf++": 66.7914, | |
| "eval_gen_len": 27.506, | |
| "eval_loss": 0.8135460019111633, | |
| "eval_runtime": 727.1036, | |
| "eval_samples_per_second": 3.438, | |
| "eval_spbleu": 54.8775, | |
| "eval_steps_per_second": 0.688, | |
| "eval_ter": 43.485, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.3564598722073686, | |
| "grad_norm": 0.8400627970695496, | |
| "learning_rate": 1.072566879654386e-05, | |
| "loss": 0.4729, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.3564598722073686, | |
| "eval_bleu": 45.7548, | |
| "eval_chrf++": 66.7646, | |
| "eval_gen_len": 27.63, | |
| "eval_loss": 0.8120532035827637, | |
| "eval_runtime": 730.4307, | |
| "eval_samples_per_second": 3.423, | |
| "eval_spbleu": 54.8446, | |
| "eval_steps_per_second": 0.685, | |
| "eval_ter": 43.4956, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.3791181402093624, | |
| "grad_norm": 0.8089118599891663, | |
| "learning_rate": 1.0348030996510627e-05, | |
| "loss": 0.4683, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.3791181402093624, | |
| "eval_bleu": 46.2723, | |
| "eval_chrf++": 66.8032, | |
| "eval_gen_len": 27.588, | |
| "eval_loss": 0.8125736117362976, | |
| "eval_runtime": 729.5548, | |
| "eval_samples_per_second": 3.427, | |
| "eval_spbleu": 54.9204, | |
| "eval_steps_per_second": 0.685, | |
| "eval_ter": 43.3954, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.401776408211356, | |
| "grad_norm": 0.6534927487373352, | |
| "learning_rate": 9.970393196477396e-06, | |
| "loss": 0.4727, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.401776408211356, | |
| "eval_bleu": 46.7508, | |
| "eval_chrf++": 66.7655, | |
| "eval_gen_len": 27.5272, | |
| "eval_loss": 0.8069682717323303, | |
| "eval_runtime": 732.8023, | |
| "eval_samples_per_second": 3.412, | |
| "eval_spbleu": 54.8765, | |
| "eval_steps_per_second": 0.682, | |
| "eval_ter": 43.3875, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.4244346762133504, | |
| "grad_norm": 0.6930407881736755, | |
| "learning_rate": 9.592755396444163e-06, | |
| "loss": 0.4723, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.4244346762133504, | |
| "eval_bleu": 47.2069, | |
| "eval_chrf++": 66.9501, | |
| "eval_gen_len": 27.5308, | |
| "eval_loss": 0.8114036321640015, | |
| "eval_runtime": 730.3451, | |
| "eval_samples_per_second": 3.423, | |
| "eval_spbleu": 55.1585, | |
| "eval_steps_per_second": 0.685, | |
| "eval_ter": 43.2451, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.447092944215344, | |
| "grad_norm": 0.7665801644325256, | |
| "learning_rate": 9.215117596410932e-06, | |
| "loss": 0.4711, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.447092944215344, | |
| "eval_bleu": 47.4976, | |
| "eval_chrf++": 67.0709, | |
| "eval_gen_len": 27.4768, | |
| "eval_loss": 0.8121780753135681, | |
| "eval_runtime": 727.4198, | |
| "eval_samples_per_second": 3.437, | |
| "eval_spbleu": 55.3017, | |
| "eval_steps_per_second": 0.687, | |
| "eval_ter": 43.0816, | |
| "step": 54000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 66201, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.756455083596841e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |