{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.4561562514161417, "eval_steps": 500, "global_step": 54200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.022658268001993928, "grad_norm": 0.8800877332687378, "learning_rate": 4.962236219996677e-05, "loss": 0.2417, "step": 500 }, { "epoch": 0.022658268001993928, "eval_bleu": 30.5488, "eval_chrf++": 57.639, "eval_gen_len": 28.2956, "eval_loss": 1.1721652746200562, "eval_runtime": 777.8277, "eval_samples_per_second": 3.214, "eval_spbleu": 41.6724, "eval_steps_per_second": 0.643, "eval_ter": 56.0615, "step": 500 }, { "epoch": 0.045316536003987856, "grad_norm": 0.8184657096862793, "learning_rate": 4.924472439993354e-05, "loss": 1.0084, "step": 1000 }, { "epoch": 0.045316536003987856, "eval_bleu": 31.6027, "eval_chrf++": 58.8947, "eval_gen_len": 27.9, "eval_loss": 1.0108660459518433, "eval_runtime": 745.4422, "eval_samples_per_second": 3.354, "eval_spbleu": 44.064, "eval_steps_per_second": 0.671, "eval_ter": 52.7499, "step": 1000 }, { "epoch": 0.06797480400598178, "grad_norm": 0.9998241662979126, "learning_rate": 4.886708659990031e-05, "loss": 1.0649, "step": 1500 }, { "epoch": 0.06797480400598178, "eval_bleu": 35.962, "eval_chrf++": 60.0663, "eval_gen_len": 27.614, "eval_loss": 0.9778443574905396, "eval_runtime": 743.9627, "eval_samples_per_second": 3.36, "eval_spbleu": 45.4876, "eval_steps_per_second": 0.672, "eval_ter": 52.1752, "step": 1500 }, { "epoch": 0.09063307200797571, "grad_norm": 0.9190363883972168, "learning_rate": 4.848944879986708e-05, "loss": 1.0414, "step": 2000 }, { "epoch": 0.09063307200797571, "eval_bleu": 33.9141, "eval_chrf++": 60.0862, "eval_gen_len": 27.7216, "eval_loss": 0.9621853232383728, "eval_runtime": 739.7943, "eval_samples_per_second": 3.379, "eval_spbleu": 45.9477, "eval_steps_per_second": 0.676, "eval_ter": 51.1627, "step": 2000 }, { "epoch": 0.11329134000996964, "grad_norm": 0.8132910132408142, "learning_rate": 4.811181099983384e-05, "loss": 1.0051, "step": 2500 }, { "epoch": 0.11329134000996964, "eval_bleu": 33.3827, "eval_chrf++": 60.7457, "eval_gen_len": 28.04, "eval_loss": 0.9484396576881409, "eval_runtime": 751.6447, "eval_samples_per_second": 3.326, "eval_spbleu": 46.5548, "eval_steps_per_second": 0.665, "eval_ter": 50.9017, "step": 2500 }, { "epoch": 0.13594960801196357, "grad_norm": 0.8168209195137024, "learning_rate": 4.7734173199800606e-05, "loss": 1.0033, "step": 3000 }, { "epoch": 0.13594960801196357, "eval_bleu": 33.8725, "eval_chrf++": 60.8653, "eval_gen_len": 28.0696, "eval_loss": 0.9424599409103394, "eval_runtime": 759.5734, "eval_samples_per_second": 3.291, "eval_spbleu": 46.9775, "eval_steps_per_second": 0.658, "eval_ter": 50.6539, "step": 3000 }, { "epoch": 0.15860787601395748, "grad_norm": 0.9191615581512451, "learning_rate": 4.735653539976738e-05, "loss": 0.994, "step": 3500 }, { "epoch": 0.15860787601395748, "eval_bleu": 34.0874, "eval_chrf++": 61.217, "eval_gen_len": 27.996, "eval_loss": 0.9314232468605042, "eval_runtime": 754.8484, "eval_samples_per_second": 3.312, "eval_spbleu": 47.405, "eval_steps_per_second": 0.662, "eval_ter": 50.0527, "step": 3500 }, { "epoch": 0.18126614401595142, "grad_norm": 0.7960318326950073, "learning_rate": 4.697889759973415e-05, "loss": 0.9801, "step": 4000 }, { "epoch": 0.18126614401595142, "eval_bleu": 37.2884, "eval_chrf++": 61.5163, "eval_gen_len": 27.9868, "eval_loss": 0.9198995232582092, "eval_runtime": 750.5441, "eval_samples_per_second": 3.331, "eval_spbleu": 47.7096, "eval_steps_per_second": 0.666, "eval_ter": 50.0316, "step": 4000 }, { "epoch": 0.20392441201794534, "grad_norm": 0.9299506545066833, "learning_rate": 4.6601259799700914e-05, "loss": 0.9679, "step": 4500 }, { "epoch": 0.20392441201794534, "eval_bleu": 36.221, "eval_chrf++": 61.6489, "eval_gen_len": 27.7652, "eval_loss": 0.9132654070854187, "eval_runtime": 742.4239, "eval_samples_per_second": 3.367, "eval_spbleu": 48.0051, "eval_steps_per_second": 0.673, "eval_ter": 49.4911, "step": 4500 }, { "epoch": 0.22658268001993928, "grad_norm": 0.8396582007408142, "learning_rate": 4.622362199966768e-05, "loss": 0.9567, "step": 5000 }, { "epoch": 0.22658268001993928, "eval_bleu": 35.8613, "eval_chrf++": 62.1554, "eval_gen_len": 27.9184, "eval_loss": 0.9109494090080261, "eval_runtime": 749.1424, "eval_samples_per_second": 3.337, "eval_spbleu": 48.6507, "eval_steps_per_second": 0.667, "eval_ter": 49.9394, "step": 5000 }, { "epoch": 0.2492409480219332, "grad_norm": 0.6273393034934998, "learning_rate": 4.584598419963445e-05, "loss": 0.9625, "step": 5500 }, { "epoch": 0.2492409480219332, "eval_bleu": 34.9284, "eval_chrf++": 61.8771, "eval_gen_len": 27.8456, "eval_loss": 0.9041927456855774, "eval_runtime": 762.3368, "eval_samples_per_second": 3.279, "eval_spbleu": 48.461, "eval_steps_per_second": 0.656, "eval_ter": 49.4832, "step": 5500 }, { "epoch": 0.27189921602392714, "grad_norm": 0.8030633330345154, "learning_rate": 4.546834639960122e-05, "loss": 0.9465, "step": 6000 }, { "epoch": 0.27189921602392714, "eval_bleu": 35.4623, "eval_chrf++": 62.3793, "eval_gen_len": 27.7244, "eval_loss": 0.8957546949386597, "eval_runtime": 760.5666, "eval_samples_per_second": 3.287, "eval_spbleu": 49.0727, "eval_steps_per_second": 0.657, "eval_ter": 48.5367, "step": 6000 }, { "epoch": 0.2945574840259211, "grad_norm": 0.8178768754005432, "learning_rate": 4.509070859956798e-05, "loss": 0.9275, "step": 6500 }, { "epoch": 0.2945574840259211, "eval_bleu": 35.2431, "eval_chrf++": 62.0683, "eval_gen_len": 27.8264, "eval_loss": 0.8866144418716431, "eval_runtime": 751.0343, "eval_samples_per_second": 3.329, "eval_spbleu": 48.8382, "eval_steps_per_second": 0.666, "eval_ter": 48.9164, "step": 6500 }, { "epoch": 0.31721575202791497, "grad_norm": 0.6623912453651428, "learning_rate": 4.471307079953475e-05, "loss": 0.925, "step": 7000 }, { "epoch": 0.31721575202791497, "eval_bleu": 35.6474, "eval_chrf++": 62.5261, "eval_gen_len": 27.9388, "eval_loss": 0.884535551071167, "eval_runtime": 758.4604, "eval_samples_per_second": 3.296, "eval_spbleu": 49.2377, "eval_steps_per_second": 0.659, "eval_ter": 48.4945, "step": 7000 }, { "epoch": 0.3398740200299089, "grad_norm": 0.8043058514595032, "learning_rate": 4.433543299950152e-05, "loss": 0.928, "step": 7500 }, { "epoch": 0.3398740200299089, "eval_bleu": 35.1147, "eval_chrf++": 62.2742, "eval_gen_len": 28.0044, "eval_loss": 0.8792969584465027, "eval_runtime": 755.7098, "eval_samples_per_second": 3.308, "eval_spbleu": 48.8554, "eval_steps_per_second": 0.662, "eval_ter": 49.3198, "step": 7500 }, { "epoch": 0.36253228803190285, "grad_norm": 0.747755765914917, "learning_rate": 4.395779519946829e-05, "loss": 0.9096, "step": 8000 }, { "epoch": 0.36253228803190285, "eval_bleu": 35.7901, "eval_chrf++": 62.8302, "eval_gen_len": 27.9312, "eval_loss": 0.8780434727668762, "eval_runtime": 770.928, "eval_samples_per_second": 3.243, "eval_spbleu": 49.5562, "eval_steps_per_second": 0.649, "eval_ter": 48.5578, "step": 8000 }, { "epoch": 0.3851905560338968, "grad_norm": 0.6910504102706909, "learning_rate": 4.358015739943506e-05, "loss": 0.9014, "step": 8500 }, { "epoch": 0.3851905560338968, "eval_bleu": 36.2166, "eval_chrf++": 63.0932, "eval_gen_len": 27.8432, "eval_loss": 0.8704683780670166, "eval_runtime": 750.7247, "eval_samples_per_second": 3.33, "eval_spbleu": 50.2483, "eval_steps_per_second": 0.666, "eval_ter": 48.3258, "step": 8500 }, { "epoch": 0.4078488240358907, "grad_norm": 0.7716640830039978, "learning_rate": 4.3202519599401825e-05, "loss": 0.9059, "step": 9000 }, { "epoch": 0.4078488240358907, "eval_bleu": 41.2195, "eval_chrf++": 62.902, "eval_gen_len": 27.5208, "eval_loss": 0.8670679926872253, "eval_runtime": 748.217, "eval_samples_per_second": 3.341, "eval_spbleu": 49.6619, "eval_steps_per_second": 0.668, "eval_ter": 48.3732, "step": 9000 }, { "epoch": 0.4305070920378846, "grad_norm": 0.801590085029602, "learning_rate": 4.282488179936859e-05, "loss": 0.8976, "step": 9500 }, { "epoch": 0.4305070920378846, "eval_bleu": 35.9681, "eval_chrf++": 63.0627, "eval_gen_len": 28.004, "eval_loss": 0.8662621378898621, "eval_runtime": 762.3803, "eval_samples_per_second": 3.279, "eval_spbleu": 50.006, "eval_steps_per_second": 0.656, "eval_ter": 48.4945, "step": 9500 }, { "epoch": 0.45316536003987856, "grad_norm": 0.7821282148361206, "learning_rate": 4.244724399933536e-05, "loss": 0.889, "step": 10000 }, { "epoch": 0.45316536003987856, "eval_bleu": 36.3811, "eval_chrf++": 63.2137, "eval_gen_len": 27.9044, "eval_loss": 0.8606961965560913, "eval_runtime": 766.3054, "eval_samples_per_second": 3.262, "eval_spbleu": 50.2147, "eval_steps_per_second": 0.652, "eval_ter": 47.6614, "step": 10000 }, { "epoch": 0.4758236280418725, "grad_norm": 0.7221835851669312, "learning_rate": 4.206960619930213e-05, "loss": 0.8979, "step": 10500 }, { "epoch": 0.4758236280418725, "eval_bleu": 36.4697, "eval_chrf++": 63.3942, "eval_gen_len": 27.9276, "eval_loss": 0.8558794856071472, "eval_runtime": 758.2694, "eval_samples_per_second": 3.297, "eval_spbleu": 50.6637, "eval_steps_per_second": 0.659, "eval_ter": 48.0226, "step": 10500 }, { "epoch": 0.4984818960438664, "grad_norm": 0.5793339014053345, "learning_rate": 4.1691968399268894e-05, "loss": 0.8817, "step": 11000 }, { "epoch": 0.4984818960438664, "eval_bleu": 36.4549, "eval_chrf++": 63.2444, "eval_gen_len": 27.7924, "eval_loss": 0.8521751761436462, "eval_runtime": 746.8271, "eval_samples_per_second": 3.347, "eval_spbleu": 50.4999, "eval_steps_per_second": 0.669, "eval_ter": 47.9487, "step": 11000 }, { "epoch": 0.5211401640458604, "grad_norm": 0.8384801149368286, "learning_rate": 4.131433059923566e-05, "loss": 0.8696, "step": 11500 }, { "epoch": 0.5211401640458604, "eval_bleu": 36.1199, "eval_chrf++": 63.343, "eval_gen_len": 28.076, "eval_loss": 0.8511990308761597, "eval_runtime": 758.0827, "eval_samples_per_second": 3.298, "eval_spbleu": 50.1722, "eval_steps_per_second": 0.66, "eval_ter": 48.6316, "step": 11500 }, { "epoch": 0.5437984320478543, "grad_norm": 0.6142855882644653, "learning_rate": 4.093669279920243e-05, "loss": 0.8758, "step": 12000 }, { "epoch": 0.5437984320478543, "eval_bleu": 36.7344, "eval_chrf++": 63.7589, "eval_gen_len": 27.9936, "eval_loss": 0.8446237444877625, "eval_runtime": 749.403, "eval_samples_per_second": 3.336, "eval_spbleu": 50.8742, "eval_steps_per_second": 0.667, "eval_ter": 47.1841, "step": 12000 }, { "epoch": 0.5664567000498482, "grad_norm": 0.5856760144233704, "learning_rate": 4.05590549991692e-05, "loss": 0.8793, "step": 12500 }, { "epoch": 0.5664567000498482, "eval_bleu": 41.021, "eval_chrf++": 63.6181, "eval_gen_len": 27.5668, "eval_loss": 0.8422514796257019, "eval_runtime": 733.2302, "eval_samples_per_second": 3.41, "eval_spbleu": 50.6419, "eval_steps_per_second": 0.682, "eval_ter": 47.1499, "step": 12500 }, { "epoch": 0.5891149680518422, "grad_norm": 0.6606504321098328, "learning_rate": 4.018141719913597e-05, "loss": 0.8794, "step": 13000 }, { "epoch": 0.5891149680518422, "eval_bleu": 37.286, "eval_chrf++": 63.8579, "eval_gen_len": 27.8624, "eval_loss": 0.8412001729011536, "eval_runtime": 745.0766, "eval_samples_per_second": 3.355, "eval_spbleu": 51.2084, "eval_steps_per_second": 0.671, "eval_ter": 46.4538, "step": 13000 }, { "epoch": 0.611773236053836, "grad_norm": 0.7332282066345215, "learning_rate": 3.980377939910274e-05, "loss": 0.8543, "step": 13500 }, { "epoch": 0.611773236053836, "eval_bleu": 36.8567, "eval_chrf++": 63.6306, "eval_gen_len": 27.7904, "eval_loss": 0.8333261013031006, "eval_runtime": 744.0262, "eval_samples_per_second": 3.36, "eval_spbleu": 50.873, "eval_steps_per_second": 0.672, "eval_ter": 47.0998, "step": 13500 }, { "epoch": 0.6344315040558299, "grad_norm": 0.8010419607162476, "learning_rate": 3.9426141599069504e-05, "loss": 0.8661, "step": 14000 }, { "epoch": 0.6344315040558299, "eval_bleu": 36.9197, "eval_chrf++": 63.5882, "eval_gen_len": 27.8996, "eval_loss": 0.8315057754516602, "eval_runtime": 748.0384, "eval_samples_per_second": 3.342, "eval_spbleu": 50.798, "eval_steps_per_second": 0.668, "eval_ter": 46.8968, "step": 14000 }, { "epoch": 0.6570897720578239, "grad_norm": 0.5245931148529053, "learning_rate": 3.9048503799036265e-05, "loss": 0.8556, "step": 14500 }, { "epoch": 0.6570897720578239, "eval_bleu": 37.9259, "eval_chrf++": 63.8176, "eval_gen_len": 27.84, "eval_loss": 0.8274693489074707, "eval_runtime": 748.0441, "eval_samples_per_second": 3.342, "eval_spbleu": 51.0265, "eval_steps_per_second": 0.668, "eval_ter": 46.9205, "step": 14500 }, { "epoch": 0.6797480400598178, "grad_norm": 0.7052202820777893, "learning_rate": 3.867086599900304e-05, "loss": 0.8635, "step": 15000 }, { "epoch": 0.6797480400598178, "eval_bleu": 38.0929, "eval_chrf++": 63.8837, "eval_gen_len": 27.8216, "eval_loss": 0.8276916146278381, "eval_runtime": 744.6549, "eval_samples_per_second": 3.357, "eval_spbleu": 51.13, "eval_steps_per_second": 0.671, "eval_ter": 46.9099, "step": 15000 }, { "epoch": 0.7024063080618117, "grad_norm": 0.7750408053398132, "learning_rate": 3.8293228198969806e-05, "loss": 0.8412, "step": 15500 }, { "epoch": 0.7024063080618117, "eval_bleu": 38.2488, "eval_chrf++": 63.9152, "eval_gen_len": 27.7136, "eval_loss": 0.8237889409065247, "eval_runtime": 739.0003, "eval_samples_per_second": 3.383, "eval_spbleu": 51.3155, "eval_steps_per_second": 0.677, "eval_ter": 46.5224, "step": 15500 }, { "epoch": 0.7250645760638057, "grad_norm": 0.7442999482154846, "learning_rate": 3.791559039893657e-05, "loss": 0.8476, "step": 16000 }, { "epoch": 0.7250645760638057, "eval_bleu": 40.1641, "eval_chrf++": 64.262, "eval_gen_len": 27.616, "eval_loss": 0.8218015432357788, "eval_runtime": 736.1573, "eval_samples_per_second": 3.396, "eval_spbleu": 51.5374, "eval_steps_per_second": 0.679, "eval_ter": 46.2982, "step": 16000 }, { "epoch": 0.7477228440657996, "grad_norm": 0.6437325477600098, "learning_rate": 3.753795259890334e-05, "loss": 0.854, "step": 16500 }, { "epoch": 0.7477228440657996, "eval_bleu": 37.6219, "eval_chrf++": 64.4048, "eval_gen_len": 27.9068, "eval_loss": 0.8224019408226013, "eval_runtime": 747.0111, "eval_samples_per_second": 3.347, "eval_spbleu": 51.8679, "eval_steps_per_second": 0.669, "eval_ter": 46.3009, "step": 16500 }, { "epoch": 0.7703811120677936, "grad_norm": 0.7045587301254272, "learning_rate": 3.716031479887011e-05, "loss": 0.8433, "step": 17000 }, { "epoch": 0.7703811120677936, "eval_bleu": 38.7383, "eval_chrf++": 64.511, "eval_gen_len": 27.94, "eval_loss": 0.8197815418243408, "eval_runtime": 750.586, "eval_samples_per_second": 3.331, "eval_spbleu": 51.9857, "eval_steps_per_second": 0.666, "eval_ter": 46.3378, "step": 17000 }, { "epoch": 0.7930393800697875, "grad_norm": 0.6813265681266785, "learning_rate": 3.678267699883688e-05, "loss": 0.8372, "step": 17500 }, { "epoch": 0.7930393800697875, "eval_bleu": 39.4841, "eval_chrf++": 64.344, "eval_gen_len": 27.5488, "eval_loss": 0.8175507187843323, "eval_runtime": 732.9073, "eval_samples_per_second": 3.411, "eval_spbleu": 51.5575, "eval_steps_per_second": 0.682, "eval_ter": 46.1163, "step": 17500 }, { "epoch": 0.8156976480717814, "grad_norm": 0.5977945327758789, "learning_rate": 3.640503919880365e-05, "loss": 0.8317, "step": 18000 }, { "epoch": 0.8156976480717814, "eval_bleu": 37.7506, "eval_chrf++": 64.0896, "eval_gen_len": 27.8936, "eval_loss": 0.8134418725967407, "eval_runtime": 746.7872, "eval_samples_per_second": 3.348, "eval_spbleu": 51.6484, "eval_steps_per_second": 0.67, "eval_ter": 46.6937, "step": 18000 }, { "epoch": 0.8383559160737754, "grad_norm": 0.6788719892501831, "learning_rate": 3.602740139877041e-05, "loss": 0.8331, "step": 18500 }, { "epoch": 0.8383559160737754, "eval_bleu": 37.9443, "eval_chrf++": 64.6177, "eval_gen_len": 28.022, "eval_loss": 0.8157890439033508, "eval_runtime": 748.194, "eval_samples_per_second": 3.341, "eval_spbleu": 52.1179, "eval_steps_per_second": 0.668, "eval_ter": 46.2323, "step": 18500 }, { "epoch": 0.8610141840757692, "grad_norm": 0.6696301102638245, "learning_rate": 3.5649763598737176e-05, "loss": 0.8342, "step": 19000 }, { "epoch": 0.8610141840757692, "eval_bleu": 38.7784, "eval_chrf++": 64.6136, "eval_gen_len": 27.9508, "eval_loss": 0.8111441731452942, "eval_runtime": 748.3754, "eval_samples_per_second": 3.341, "eval_spbleu": 52.0913, "eval_steps_per_second": 0.668, "eval_ter": 46.206, "step": 19000 }, { "epoch": 0.8836724520777631, "grad_norm": 0.7288480401039124, "learning_rate": 3.527212579870395e-05, "loss": 0.8282, "step": 19500 }, { "epoch": 0.8836724520777631, "eval_bleu": 38.1426, "eval_chrf++": 64.6892, "eval_gen_len": 27.8628, "eval_loss": 0.8068262934684753, "eval_runtime": 743.9866, "eval_samples_per_second": 3.36, "eval_spbleu": 52.3226, "eval_steps_per_second": 0.672, "eval_ter": 45.7841, "step": 19500 }, { "epoch": 0.9063307200797571, "grad_norm": 0.6522256731987, "learning_rate": 3.489448799867072e-05, "loss": 0.8345, "step": 20000 }, { "epoch": 0.9063307200797571, "eval_bleu": 37.3235, "eval_chrf++": 64.2609, "eval_gen_len": 27.9892, "eval_loss": 0.8071653246879578, "eval_runtime": 750.865, "eval_samples_per_second": 3.329, "eval_spbleu": 51.689, "eval_steps_per_second": 0.666, "eval_ter": 47.0022, "step": 20000 }, { "epoch": 0.928988988081751, "grad_norm": 0.7145525217056274, "learning_rate": 3.4516850198637484e-05, "loss": 0.8224, "step": 20500 }, { "epoch": 0.928988988081751, "eval_bleu": 37.5387, "eval_chrf++": 64.2559, "eval_gen_len": 27.82, "eval_loss": 0.8012556433677673, "eval_runtime": 768.5173, "eval_samples_per_second": 3.253, "eval_spbleu": 51.7469, "eval_steps_per_second": 0.651, "eval_ter": 46.3668, "step": 20500 }, { "epoch": 0.951647256083745, "grad_norm": 0.7082468271255493, "learning_rate": 3.413921239860425e-05, "loss": 0.8315, "step": 21000 }, { "epoch": 0.951647256083745, "eval_bleu": 37.9748, "eval_chrf++": 64.7286, "eval_gen_len": 27.8264, "eval_loss": 0.7990391254425049, "eval_runtime": 772.2382, "eval_samples_per_second": 3.237, "eval_spbleu": 52.3829, "eval_steps_per_second": 0.647, "eval_ter": 45.8711, "step": 21000 }, { "epoch": 0.9743055240857389, "grad_norm": 0.6599904298782349, "learning_rate": 3.376157459857102e-05, "loss": 0.8269, "step": 21500 }, { "epoch": 0.9743055240857389, "eval_bleu": 39.3032, "eval_chrf++": 64.9609, "eval_gen_len": 27.8436, "eval_loss": 0.7974932789802551, "eval_runtime": 771.5992, "eval_samples_per_second": 3.24, "eval_spbleu": 52.5588, "eval_steps_per_second": 0.648, "eval_ter": 45.8975, "step": 21500 }, { "epoch": 0.9969637920877328, "grad_norm": 0.6297094821929932, "learning_rate": 3.338393679853779e-05, "loss": 0.8159, "step": 22000 }, { "epoch": 0.9969637920877328, "eval_bleu": 38.4628, "eval_chrf++": 64.9748, "eval_gen_len": 27.7568, "eval_loss": 0.7950631380081177, "eval_runtime": 757.8415, "eval_samples_per_second": 3.299, "eval_spbleu": 52.6788, "eval_steps_per_second": 0.66, "eval_ter": 45.3491, "step": 22000 }, { "epoch": 1.0196220600897268, "grad_norm": 0.6592913866043091, "learning_rate": 3.300629899850455e-05, "loss": 0.6601, "step": 22500 }, { "epoch": 1.0196220600897268, "eval_bleu": 39.9752, "eval_chrf++": 65.1542, "eval_gen_len": 27.834, "eval_loss": 0.810078501701355, "eval_runtime": 742.5172, "eval_samples_per_second": 3.367, "eval_spbleu": 52.8176, "eval_steps_per_second": 0.673, "eval_ter": 45.4176, "step": 22500 }, { "epoch": 1.0422803280917208, "grad_norm": 0.5827597379684448, "learning_rate": 3.262866119847132e-05, "loss": 0.6316, "step": 23000 }, { "epoch": 1.0422803280917208, "eval_bleu": 43.5482, "eval_chrf++": 65.346, "eval_gen_len": 27.59, "eval_loss": 0.814832329750061, "eval_runtime": 736.0286, "eval_samples_per_second": 3.397, "eval_spbleu": 52.9228, "eval_steps_per_second": 0.679, "eval_ter": 45.2832, "step": 23000 }, { "epoch": 1.0649385960937146, "grad_norm": 0.736733078956604, "learning_rate": 3.225102339843809e-05, "loss": 0.6365, "step": 23500 }, { "epoch": 1.0649385960937146, "eval_bleu": 40.0573, "eval_chrf++": 65.2248, "eval_gen_len": 27.7468, "eval_loss": 0.8096536993980408, "eval_runtime": 762.5207, "eval_samples_per_second": 3.279, "eval_spbleu": 52.9015, "eval_steps_per_second": 0.656, "eval_ter": 45.1302, "step": 23500 }, { "epoch": 1.0875968640957085, "grad_norm": 0.6646838188171387, "learning_rate": 3.187338559840486e-05, "loss": 0.6462, "step": 24000 }, { "epoch": 1.0875968640957085, "eval_bleu": 41.7287, "eval_chrf++": 65.476, "eval_gen_len": 27.8052, "eval_loss": 0.8067141771316528, "eval_runtime": 764.719, "eval_samples_per_second": 3.269, "eval_spbleu": 53.0853, "eval_steps_per_second": 0.654, "eval_ter": 45.2621, "step": 24000 }, { "epoch": 1.1102551320977025, "grad_norm": 0.7113286852836609, "learning_rate": 3.149574779837163e-05, "loss": 0.6383, "step": 24500 }, { "epoch": 1.1102551320977025, "eval_bleu": 44.484, "eval_chrf++": 65.5094, "eval_gen_len": 27.502, "eval_loss": 0.8042193055152893, "eval_runtime": 761.5932, "eval_samples_per_second": 3.283, "eval_spbleu": 53.234, "eval_steps_per_second": 0.657, "eval_ter": 45.0591, "step": 24500 }, { "epoch": 1.1329134000996963, "grad_norm": 0.6746016144752502, "learning_rate": 3.1118109998338396e-05, "loss": 0.6464, "step": 25000 }, { "epoch": 1.1329134000996963, "eval_bleu": 44.4968, "eval_chrf++": 65.4383, "eval_gen_len": 27.4832, "eval_loss": 0.8051723837852478, "eval_runtime": 762.6344, "eval_samples_per_second": 3.278, "eval_spbleu": 53.209, "eval_steps_per_second": 0.656, "eval_ter": 45.1672, "step": 25000 }, { "epoch": 1.1555716681016903, "grad_norm": 0.7094623446464539, "learning_rate": 3.074047219830516e-05, "loss": 0.6353, "step": 25500 }, { "epoch": 1.1555716681016903, "eval_bleu": 44.7381, "eval_chrf++": 65.7012, "eval_gen_len": 27.5904, "eval_loss": 0.8053088784217834, "eval_runtime": 766.039, "eval_samples_per_second": 3.264, "eval_spbleu": 53.2617, "eval_steps_per_second": 0.653, "eval_ter": 44.9852, "step": 25500 }, { "epoch": 1.1782299361036843, "grad_norm": 0.5892546772956848, "learning_rate": 3.0362834398271934e-05, "loss": 0.6483, "step": 26000 }, { "epoch": 1.1782299361036843, "eval_bleu": 44.1957, "eval_chrf++": 65.3258, "eval_gen_len": 27.6048, "eval_loss": 0.8034100532531738, "eval_runtime": 761.0595, "eval_samples_per_second": 3.285, "eval_spbleu": 52.8918, "eval_steps_per_second": 0.657, "eval_ter": 45.4572, "step": 26000 }, { "epoch": 1.200888204105678, "grad_norm": 0.744484543800354, "learning_rate": 2.9985196598238697e-05, "loss": 0.6382, "step": 26500 }, { "epoch": 1.200888204105678, "eval_bleu": 44.663, "eval_chrf++": 65.2386, "eval_gen_len": 27.4888, "eval_loss": 0.8034644722938538, "eval_runtime": 742.6066, "eval_samples_per_second": 3.367, "eval_spbleu": 52.8205, "eval_steps_per_second": 0.673, "eval_ter": 45.1144, "step": 26500 }, { "epoch": 1.223546472107672, "grad_norm": 0.6990212798118591, "learning_rate": 2.9607558798205465e-05, "loss": 0.6425, "step": 27000 }, { "epoch": 1.223546472107672, "eval_bleu": 40.3169, "eval_chrf++": 65.5055, "eval_gen_len": 27.7456, "eval_loss": 0.7975181341171265, "eval_runtime": 753.8226, "eval_samples_per_second": 3.316, "eval_spbleu": 53.4546, "eval_steps_per_second": 0.663, "eval_ter": 44.7295, "step": 27000 }, { "epoch": 1.246204740109666, "grad_norm": 0.7708460092544556, "learning_rate": 2.9229920998172232e-05, "loss": 0.6364, "step": 27500 }, { "epoch": 1.246204740109666, "eval_bleu": 39.0833, "eval_chrf++": 65.4463, "eval_gen_len": 27.9864, "eval_loss": 0.799001157283783, "eval_runtime": 765.257, "eval_samples_per_second": 3.267, "eval_spbleu": 53.3214, "eval_steps_per_second": 0.653, "eval_ter": 45.1513, "step": 27500 }, { "epoch": 1.2688630081116599, "grad_norm": 0.7766411900520325, "learning_rate": 2.8852283198139002e-05, "loss": 0.6311, "step": 28000 }, { "epoch": 1.2688630081116599, "eval_bleu": 44.6719, "eval_chrf++": 65.7788, "eval_gen_len": 27.5984, "eval_loss": 0.8024120330810547, "eval_runtime": 739.5078, "eval_samples_per_second": 3.381, "eval_spbleu": 53.4111, "eval_steps_per_second": 0.676, "eval_ter": 44.8429, "step": 28000 }, { "epoch": 1.2915212761136539, "grad_norm": 0.6488195657730103, "learning_rate": 2.847464539810577e-05, "loss": 0.6315, "step": 28500 }, { "epoch": 1.2915212761136539, "eval_bleu": 38.9676, "eval_chrf++": 65.2009, "eval_gen_len": 27.7544, "eval_loss": 0.799105167388916, "eval_runtime": 751.4345, "eval_samples_per_second": 3.327, "eval_spbleu": 53.0925, "eval_steps_per_second": 0.665, "eval_ter": 45.0802, "step": 28500 }, { "epoch": 1.3141795441156479, "grad_norm": 0.6332802176475525, "learning_rate": 2.809700759807254e-05, "loss": 0.6339, "step": 29000 }, { "epoch": 1.3141795441156479, "eval_bleu": 39.0276, "eval_chrf++": 65.2617, "eval_gen_len": 27.824, "eval_loss": 0.7974073886871338, "eval_runtime": 754.6502, "eval_samples_per_second": 3.313, "eval_spbleu": 53.046, "eval_steps_per_second": 0.663, "eval_ter": 45.2331, "step": 29000 }, { "epoch": 1.3368378121176416, "grad_norm": 0.5959407687187195, "learning_rate": 2.7719369798039307e-05, "loss": 0.6412, "step": 29500 }, { "epoch": 1.3368378121176416, "eval_bleu": 40.1118, "eval_chrf++": 65.4814, "eval_gen_len": 27.8892, "eval_loss": 0.7944240570068359, "eval_runtime": 752.0202, "eval_samples_per_second": 3.324, "eval_spbleu": 53.4305, "eval_steps_per_second": 0.665, "eval_ter": 45.0327, "step": 29500 }, { "epoch": 1.3594960801196356, "grad_norm": 0.6927244067192078, "learning_rate": 2.7341731998006075e-05, "loss": 0.6354, "step": 30000 }, { "epoch": 1.3594960801196356, "eval_bleu": 41.7725, "eval_chrf++": 65.7724, "eval_gen_len": 27.7264, "eval_loss": 0.7973920702934265, "eval_runtime": 742.892, "eval_samples_per_second": 3.365, "eval_spbleu": 53.752, "eval_steps_per_second": 0.673, "eval_ter": 44.4975, "step": 30000 }, { "epoch": 1.3821543481216296, "grad_norm": 0.6661298871040344, "learning_rate": 2.696409419797284e-05, "loss": 0.6294, "step": 30500 }, { "epoch": 1.3821543481216296, "eval_bleu": 39.0417, "eval_chrf++": 65.4211, "eval_gen_len": 27.8648, "eval_loss": 0.795570969581604, "eval_runtime": 749.9034, "eval_samples_per_second": 3.334, "eval_spbleu": 53.378, "eval_steps_per_second": 0.667, "eval_ter": 45.0802, "step": 30500 }, { "epoch": 1.4048126161236234, "grad_norm": 0.6107171773910522, "learning_rate": 2.658645639793961e-05, "loss": 0.636, "step": 31000 }, { "epoch": 1.4048126161236234, "eval_bleu": 39.9268, "eval_chrf++": 65.47, "eval_gen_len": 27.682, "eval_loss": 0.7938565611839294, "eval_runtime": 743.0863, "eval_samples_per_second": 3.364, "eval_spbleu": 53.3727, "eval_steps_per_second": 0.673, "eval_ter": 44.5871, "step": 31000 }, { "epoch": 1.4274708841256174, "grad_norm": 0.6050147414207458, "learning_rate": 2.6208818597906376e-05, "loss": 0.6385, "step": 31500 }, { "epoch": 1.4274708841256174, "eval_bleu": 40.4175, "eval_chrf++": 65.7235, "eval_gen_len": 27.8052, "eval_loss": 0.7914307713508606, "eval_runtime": 748.359, "eval_samples_per_second": 3.341, "eval_spbleu": 53.722, "eval_steps_per_second": 0.668, "eval_ter": 44.6003, "step": 31500 }, { "epoch": 1.4501291521276114, "grad_norm": 0.8934792280197144, "learning_rate": 2.5831180797873143e-05, "loss": 0.6392, "step": 32000 }, { "epoch": 1.4501291521276114, "eval_bleu": 40.6796, "eval_chrf++": 65.8163, "eval_gen_len": 27.7424, "eval_loss": 0.7927303910255432, "eval_runtime": 748.6849, "eval_samples_per_second": 3.339, "eval_spbleu": 53.7139, "eval_steps_per_second": 0.668, "eval_ter": 44.4184, "step": 32000 }, { "epoch": 1.4727874201296052, "grad_norm": 0.6742972731590271, "learning_rate": 2.5453542997839914e-05, "loss": 0.6364, "step": 32500 }, { "epoch": 1.4727874201296052, "eval_bleu": 40.2137, "eval_chrf++": 65.6498, "eval_gen_len": 27.7408, "eval_loss": 0.7901710867881775, "eval_runtime": 749.8815, "eval_samples_per_second": 3.334, "eval_spbleu": 53.6947, "eval_steps_per_second": 0.667, "eval_ter": 44.7031, "step": 32500 }, { "epoch": 1.4954456881315992, "grad_norm": 0.6159557104110718, "learning_rate": 2.507590519780668e-05, "loss": 0.6352, "step": 33000 }, { "epoch": 1.4954456881315992, "eval_bleu": 41.264, "eval_chrf++": 65.7523, "eval_gen_len": 27.8552, "eval_loss": 0.7894487380981445, "eval_runtime": 748.2109, "eval_samples_per_second": 3.341, "eval_spbleu": 53.6724, "eval_steps_per_second": 0.668, "eval_ter": 44.8244, "step": 33000 }, { "epoch": 1.5181039561335932, "grad_norm": 0.8539830446243286, "learning_rate": 2.469826739777345e-05, "loss": 0.6234, "step": 33500 }, { "epoch": 1.5181039561335932, "eval_bleu": 40.1811, "eval_chrf++": 65.8305, "eval_gen_len": 27.8484, "eval_loss": 0.7886767983436584, "eval_runtime": 745.5767, "eval_samples_per_second": 3.353, "eval_spbleu": 53.7788, "eval_steps_per_second": 0.671, "eval_ter": 44.5739, "step": 33500 }, { "epoch": 1.540762224135587, "grad_norm": 0.7526208758354187, "learning_rate": 2.4320629597740216e-05, "loss": 0.6411, "step": 34000 }, { "epoch": 1.540762224135587, "eval_bleu": 42.6857, "eval_chrf++": 65.9528, "eval_gen_len": 27.6804, "eval_loss": 0.7847135663032532, "eval_runtime": 741.0515, "eval_samples_per_second": 3.374, "eval_spbleu": 53.8997, "eval_steps_per_second": 0.675, "eval_ter": 44.3024, "step": 34000 }, { "epoch": 1.5634204921375812, "grad_norm": 0.8738523125648499, "learning_rate": 2.3942991797706986e-05, "loss": 0.63, "step": 34500 }, { "epoch": 1.5634204921375812, "eval_bleu": 45.4639, "eval_chrf++": 66.1251, "eval_gen_len": 27.5308, "eval_loss": 0.781486451625824, "eval_runtime": 736.5596, "eval_samples_per_second": 3.394, "eval_spbleu": 53.9656, "eval_steps_per_second": 0.679, "eval_ter": 44.334, "step": 34500 }, { "epoch": 1.586078760139575, "grad_norm": 0.7074981927871704, "learning_rate": 2.3565353997673753e-05, "loss": 0.6265, "step": 35000 }, { "epoch": 1.586078760139575, "eval_bleu": 44.9751, "eval_chrf++": 65.9036, "eval_gen_len": 27.6124, "eval_loss": 0.7836451530456543, "eval_runtime": 746.2524, "eval_samples_per_second": 3.35, "eval_spbleu": 53.5997, "eval_steps_per_second": 0.67, "eval_ter": 44.8059, "step": 35000 }, { "epoch": 1.6087370281415687, "grad_norm": 0.7853338718414307, "learning_rate": 2.318771619764052e-05, "loss": 0.6202, "step": 35500 }, { "epoch": 1.6087370281415687, "eval_bleu": 45.0486, "eval_chrf++": 66.1202, "eval_gen_len": 27.6572, "eval_loss": 0.7826634049415588, "eval_runtime": 750.4591, "eval_samples_per_second": 3.331, "eval_spbleu": 53.9114, "eval_steps_per_second": 0.666, "eval_ter": 44.7242, "step": 35500 }, { "epoch": 1.631395296143563, "grad_norm": 0.7461378574371338, "learning_rate": 2.2810078397607288e-05, "loss": 0.6385, "step": 36000 }, { "epoch": 1.631395296143563, "eval_bleu": 41.8569, "eval_chrf++": 65.9455, "eval_gen_len": 27.8612, "eval_loss": 0.7859405279159546, "eval_runtime": 753.5356, "eval_samples_per_second": 3.318, "eval_spbleu": 53.9218, "eval_steps_per_second": 0.664, "eval_ter": 44.5897, "step": 36000 }, { "epoch": 1.6540535641455567, "grad_norm": 0.5514925122261047, "learning_rate": 2.2432440597574055e-05, "loss": 0.6269, "step": 36500 }, { "epoch": 1.6540535641455567, "eval_bleu": 43.6861, "eval_chrf++": 65.9551, "eval_gen_len": 27.5088, "eval_loss": 0.7851018905639648, "eval_runtime": 744.0917, "eval_samples_per_second": 3.36, "eval_spbleu": 53.792, "eval_steps_per_second": 0.672, "eval_ter": 44.3419, "step": 36500 }, { "epoch": 1.6767118321475505, "grad_norm": 0.6642000675201416, "learning_rate": 2.2054802797540825e-05, "loss": 0.6301, "step": 37000 }, { "epoch": 1.6767118321475505, "eval_bleu": 46.0896, "eval_chrf++": 66.164, "eval_gen_len": 27.426, "eval_loss": 0.7796212434768677, "eval_runtime": 740.9337, "eval_samples_per_second": 3.374, "eval_spbleu": 54.0105, "eval_steps_per_second": 0.675, "eval_ter": 44.1494, "step": 37000 }, { "epoch": 1.6993701001495447, "grad_norm": 0.8100460171699524, "learning_rate": 2.167716499750759e-05, "loss": 0.6213, "step": 37500 }, { "epoch": 1.6993701001495447, "eval_bleu": 45.5601, "eval_chrf++": 66.0823, "eval_gen_len": 27.5128, "eval_loss": 0.7815007567405701, "eval_runtime": 733.2938, "eval_samples_per_second": 3.409, "eval_spbleu": 53.9922, "eval_steps_per_second": 0.682, "eval_ter": 44.5133, "step": 37500 }, { "epoch": 1.7220283681515385, "grad_norm": 0.6142133474349976, "learning_rate": 2.129952719747436e-05, "loss": 0.623, "step": 38000 }, { "epoch": 1.7220283681515385, "eval_bleu": 45.0364, "eval_chrf++": 66.1218, "eval_gen_len": 27.5352, "eval_loss": 0.7782283425331116, "eval_runtime": 736.7203, "eval_samples_per_second": 3.393, "eval_spbleu": 54.0624, "eval_steps_per_second": 0.679, "eval_ter": 44.3314, "step": 38000 }, { "epoch": 1.7446866361535325, "grad_norm": 0.8021434545516968, "learning_rate": 2.0921889397441127e-05, "loss": 0.6269, "step": 38500 }, { "epoch": 1.7446866361535325, "eval_bleu": 41.6796, "eval_chrf++": 66.0402, "eval_gen_len": 27.7448, "eval_loss": 0.7799319624900818, "eval_runtime": 738.6103, "eval_samples_per_second": 3.385, "eval_spbleu": 54.1452, "eval_steps_per_second": 0.677, "eval_ter": 44.1653, "step": 38500 }, { "epoch": 1.7673449041555265, "grad_norm": 0.6603755354881287, "learning_rate": 2.0544251597407894e-05, "loss": 0.6339, "step": 39000 }, { "epoch": 1.7673449041555265, "eval_bleu": 46.523, "eval_chrf++": 66.3925, "eval_gen_len": 27.5112, "eval_loss": 0.7813342809677124, "eval_runtime": 728.2882, "eval_samples_per_second": 3.433, "eval_spbleu": 54.2461, "eval_steps_per_second": 0.687, "eval_ter": 44.0071, "step": 39000 }, { "epoch": 1.7900031721575203, "grad_norm": 0.6977267861366272, "learning_rate": 2.016661379737466e-05, "loss": 0.621, "step": 39500 }, { "epoch": 1.7900031721575203, "eval_bleu": 43.8812, "eval_chrf++": 66.186, "eval_gen_len": 27.6576, "eval_loss": 0.7753216028213501, "eval_runtime": 736.3388, "eval_samples_per_second": 3.395, "eval_spbleu": 54.244, "eval_steps_per_second": 0.679, "eval_ter": 44.3445, "step": 39500 }, { "epoch": 1.8126614401595142, "grad_norm": 0.7790645956993103, "learning_rate": 1.978897599734143e-05, "loss": 0.6278, "step": 40000 }, { "epoch": 1.8126614401595142, "eval_bleu": 46.7458, "eval_chrf++": 66.4123, "eval_gen_len": 27.4892, "eval_loss": 0.7777643799781799, "eval_runtime": 730.5316, "eval_samples_per_second": 3.422, "eval_spbleu": 54.4016, "eval_steps_per_second": 0.684, "eval_ter": 43.9702, "step": 40000 }, { "epoch": 1.8353197081615082, "grad_norm": 0.6901569366455078, "learning_rate": 1.94113381973082e-05, "loss": 0.6221, "step": 40500 }, { "epoch": 1.8353197081615082, "eval_bleu": 45.0544, "eval_chrf++": 66.3725, "eval_gen_len": 27.598, "eval_loss": 0.7787633538246155, "eval_runtime": 734.485, "eval_samples_per_second": 3.404, "eval_spbleu": 54.419, "eval_steps_per_second": 0.681, "eval_ter": 43.9201, "step": 40500 }, { "epoch": 1.857977976163502, "grad_norm": 0.7807871103286743, "learning_rate": 1.9033700397274966e-05, "loss": 0.6209, "step": 41000 }, { "epoch": 1.857977976163502, "eval_bleu": 44.5498, "eval_chrf++": 66.3741, "eval_gen_len": 27.5916, "eval_loss": 0.7768906354904175, "eval_runtime": 737.3971, "eval_samples_per_second": 3.39, "eval_spbleu": 54.5028, "eval_steps_per_second": 0.678, "eval_ter": 43.9728, "step": 41000 }, { "epoch": 1.880636244165496, "grad_norm": 0.8082613945007324, "learning_rate": 1.8656062597241737e-05, "loss": 0.6267, "step": 41500 }, { "epoch": 1.880636244165496, "eval_bleu": 45.3502, "eval_chrf++": 66.5334, "eval_gen_len": 27.5344, "eval_loss": 0.7741044759750366, "eval_runtime": 731.6398, "eval_samples_per_second": 3.417, "eval_spbleu": 54.4958, "eval_steps_per_second": 0.683, "eval_ter": 43.8726, "step": 41500 }, { "epoch": 1.90329451216749, "grad_norm": 0.5612310171127319, "learning_rate": 1.82784247972085e-05, "loss": 0.625, "step": 42000 }, { "epoch": 1.90329451216749, "eval_bleu": 45.4662, "eval_chrf++": 66.6858, "eval_gen_len": 27.5552, "eval_loss": 0.7751156687736511, "eval_runtime": 734.022, "eval_samples_per_second": 3.406, "eval_spbleu": 54.7854, "eval_steps_per_second": 0.681, "eval_ter": 43.5404, "step": 42000 }, { "epoch": 1.9259527801694838, "grad_norm": 0.7477275133132935, "learning_rate": 1.790078699717527e-05, "loss": 0.6268, "step": 42500 }, { "epoch": 1.9259527801694838, "eval_bleu": 43.7231, "eval_chrf++": 66.4796, "eval_gen_len": 27.71, "eval_loss": 0.7729161977767944, "eval_runtime": 736.8783, "eval_samples_per_second": 3.393, "eval_spbleu": 54.5524, "eval_steps_per_second": 0.679, "eval_ter": 43.8383, "step": 42500 }, { "epoch": 1.9486110481714778, "grad_norm": 0.7059822678565979, "learning_rate": 1.752314919714204e-05, "loss": 0.6263, "step": 43000 }, { "epoch": 1.9486110481714778, "eval_bleu": 45.1162, "eval_chrf++": 66.5293, "eval_gen_len": 27.5624, "eval_loss": 0.771515429019928, "eval_runtime": 732.1583, "eval_samples_per_second": 3.415, "eval_spbleu": 54.5958, "eval_steps_per_second": 0.683, "eval_ter": 43.6564, "step": 43000 }, { "epoch": 1.9712693161734718, "grad_norm": 0.7907470464706421, "learning_rate": 1.7145511397108806e-05, "loss": 0.6178, "step": 43500 }, { "epoch": 1.9712693161734718, "eval_bleu": 44.3099, "eval_chrf++": 66.5724, "eval_gen_len": 27.632, "eval_loss": 0.7728075385093689, "eval_runtime": 733.6074, "eval_samples_per_second": 3.408, "eval_spbleu": 54.6752, "eval_steps_per_second": 0.682, "eval_ter": 43.725, "step": 43500 }, { "epoch": 1.9939275841754656, "grad_norm": 0.7742732763290405, "learning_rate": 1.6767873597075573e-05, "loss": 0.609, "step": 44000 }, { "epoch": 1.9939275841754656, "eval_bleu": 46.4711, "eval_chrf++": 66.7721, "eval_gen_len": 27.4864, "eval_loss": 0.7715900540351868, "eval_runtime": 726.1293, "eval_samples_per_second": 3.443, "eval_spbleu": 54.9332, "eval_steps_per_second": 0.689, "eval_ter": 43.3822, "step": 44000 }, { "epoch": 2.0165858521774593, "grad_norm": 0.7575493454933167, "learning_rate": 1.639023579704234e-05, "loss": 0.5124, "step": 44500 }, { "epoch": 2.0165858521774593, "eval_bleu": 45.9215, "eval_chrf++": 66.6482, "eval_gen_len": 27.622, "eval_loss": 0.8128123879432678, "eval_runtime": 732.4537, "eval_samples_per_second": 3.413, "eval_spbleu": 54.7846, "eval_steps_per_second": 0.683, "eval_ter": 43.7355, "step": 44500 }, { "epoch": 2.0392441201794536, "grad_norm": 0.732072114944458, "learning_rate": 1.601259799700911e-05, "loss": 0.4683, "step": 45000 }, { "epoch": 2.0392441201794536, "eval_bleu": 45.6558, "eval_chrf++": 66.455, "eval_gen_len": 27.5796, "eval_loss": 0.8135092258453369, "eval_runtime": 732.47, "eval_samples_per_second": 3.413, "eval_spbleu": 54.5066, "eval_steps_per_second": 0.683, "eval_ter": 44.0202, "step": 45000 }, { "epoch": 2.0619023881814473, "grad_norm": 0.7785657644271851, "learning_rate": 1.5634960196975878e-05, "loss": 0.4632, "step": 45500 }, { "epoch": 2.0619023881814473, "eval_bleu": 46.9546, "eval_chrf++": 66.5237, "eval_gen_len": 27.482, "eval_loss": 0.81900554895401, "eval_runtime": 734.696, "eval_samples_per_second": 3.403, "eval_spbleu": 54.4887, "eval_steps_per_second": 0.681, "eval_ter": 43.7988, "step": 45500 }, { "epoch": 2.0845606561834416, "grad_norm": 1.1765786409378052, "learning_rate": 1.5257322396942645e-05, "loss": 0.4696, "step": 46000 }, { "epoch": 2.0845606561834416, "eval_bleu": 47.083, "eval_chrf++": 66.8275, "eval_gen_len": 27.5328, "eval_loss": 0.8155868053436279, "eval_runtime": 736.5203, "eval_samples_per_second": 3.394, "eval_spbleu": 54.892, "eval_steps_per_second": 0.679, "eval_ter": 43.456, "step": 46000 }, { "epoch": 2.1072189241854353, "grad_norm": 0.8067004084587097, "learning_rate": 1.4879684596909412e-05, "loss": 0.4635, "step": 46500 }, { "epoch": 2.1072189241854353, "eval_bleu": 46.6541, "eval_chrf++": 66.7448, "eval_gen_len": 27.5476, "eval_loss": 0.8161126971244812, "eval_runtime": 740.7767, "eval_samples_per_second": 3.375, "eval_spbleu": 54.8934, "eval_steps_per_second": 0.675, "eval_ter": 43.3189, "step": 46500 }, { "epoch": 2.129877192187429, "grad_norm": 0.9044099450111389, "learning_rate": 1.4502046796876181e-05, "loss": 0.4725, "step": 47000 }, { "epoch": 2.129877192187429, "eval_bleu": 47.2452, "eval_chrf++": 66.8326, "eval_gen_len": 27.558, "eval_loss": 0.8134703040122986, "eval_runtime": 741.2625, "eval_samples_per_second": 3.373, "eval_spbleu": 54.9705, "eval_steps_per_second": 0.675, "eval_ter": 43.3216, "step": 47000 }, { "epoch": 2.1525354601894233, "grad_norm": 0.8520795106887817, "learning_rate": 1.412440899684295e-05, "loss": 0.4727, "step": 47500 }, { "epoch": 2.1525354601894233, "eval_bleu": 46.5313, "eval_chrf++": 66.6736, "eval_gen_len": 27.5296, "eval_loss": 0.820831835269928, "eval_runtime": 742.5016, "eval_samples_per_second": 3.367, "eval_spbleu": 54.5714, "eval_steps_per_second": 0.673, "eval_ter": 43.6722, "step": 47500 }, { "epoch": 2.175193728191417, "grad_norm": 0.7529011964797974, "learning_rate": 1.3746771196809716e-05, "loss": 0.4736, "step": 48000 }, { "epoch": 2.175193728191417, "eval_bleu": 46.3524, "eval_chrf++": 66.7416, "eval_gen_len": 27.5684, "eval_loss": 0.8130167126655579, "eval_runtime": 730.5257, "eval_samples_per_second": 3.422, "eval_spbleu": 54.8088, "eval_steps_per_second": 0.684, "eval_ter": 43.4718, "step": 48000 }, { "epoch": 2.197851996193411, "grad_norm": 0.7033498883247375, "learning_rate": 1.3369133396776484e-05, "loss": 0.471, "step": 48500 }, { "epoch": 2.197851996193411, "eval_bleu": 46.2188, "eval_chrf++": 66.702, "eval_gen_len": 27.6192, "eval_loss": 0.8164393305778503, "eval_runtime": 732.5336, "eval_samples_per_second": 3.413, "eval_spbleu": 54.6656, "eval_steps_per_second": 0.683, "eval_ter": 43.6511, "step": 48500 }, { "epoch": 2.220510264195405, "grad_norm": 0.7331113815307617, "learning_rate": 1.2991495596743253e-05, "loss": 0.4712, "step": 49000 }, { "epoch": 2.220510264195405, "eval_bleu": 47.0435, "eval_chrf++": 66.6968, "eval_gen_len": 27.4924, "eval_loss": 0.81520676612854, "eval_runtime": 729.4123, "eval_samples_per_second": 3.427, "eval_spbleu": 54.6842, "eval_steps_per_second": 0.685, "eval_ter": 43.4376, "step": 49000 }, { "epoch": 2.243168532197399, "grad_norm": 0.6714054346084595, "learning_rate": 1.261385779671002e-05, "loss": 0.4741, "step": 49500 }, { "epoch": 2.243168532197399, "eval_bleu": 47.2441, "eval_chrf++": 66.8706, "eval_gen_len": 27.4916, "eval_loss": 0.8153889775276184, "eval_runtime": 727.829, "eval_samples_per_second": 3.435, "eval_spbleu": 54.9194, "eval_steps_per_second": 0.687, "eval_ter": 43.3374, "step": 49500 }, { "epoch": 2.2658268001993926, "grad_norm": 0.7230417132377625, "learning_rate": 1.2236219996676788e-05, "loss": 0.4723, "step": 50000 }, { "epoch": 2.2658268001993926, "eval_bleu": 47.0146, "eval_chrf++": 66.9999, "eval_gen_len": 27.496, "eval_loss": 0.8151711225509644, "eval_runtime": 725.7357, "eval_samples_per_second": 3.445, "eval_spbleu": 55.1257, "eval_steps_per_second": 0.689, "eval_ter": 43.0605, "step": 50000 }, { "epoch": 2.288485068201387, "grad_norm": 0.7548694014549255, "learning_rate": 1.1858582196643555e-05, "loss": 0.4736, "step": 50500 }, { "epoch": 2.288485068201387, "eval_bleu": 47.3114, "eval_chrf++": 67.059, "eval_gen_len": 27.5484, "eval_loss": 0.8111055493354797, "eval_runtime": 730.4241, "eval_samples_per_second": 3.423, "eval_spbleu": 55.1401, "eval_steps_per_second": 0.685, "eval_ter": 43.2688, "step": 50500 }, { "epoch": 2.3111433362033806, "grad_norm": 0.6914283037185669, "learning_rate": 1.1480944396610324e-05, "loss": 0.4673, "step": 51000 }, { "epoch": 2.3111433362033806, "eval_bleu": 47.0659, "eval_chrf++": 66.8804, "eval_gen_len": 27.5096, "eval_loss": 0.8131672739982605, "eval_runtime": 728.8458, "eval_samples_per_second": 3.43, "eval_spbleu": 54.9036, "eval_steps_per_second": 0.686, "eval_ter": 43.3585, "step": 51000 }, { "epoch": 2.3338016042053744, "grad_norm": 0.8246389031410217, "learning_rate": 1.1103306596577091e-05, "loss": 0.4598, "step": 51500 }, { "epoch": 2.3338016042053744, "eval_bleu": 47.2042, "eval_chrf++": 66.7914, "eval_gen_len": 27.506, "eval_loss": 0.8135460019111633, "eval_runtime": 727.1036, "eval_samples_per_second": 3.438, "eval_spbleu": 54.8775, "eval_steps_per_second": 0.688, "eval_ter": 43.485, "step": 51500 }, { "epoch": 2.3564598722073686, "grad_norm": 0.8400627970695496, "learning_rate": 1.072566879654386e-05, "loss": 0.4729, "step": 52000 }, { "epoch": 2.3564598722073686, "eval_bleu": 45.7548, "eval_chrf++": 66.7646, "eval_gen_len": 27.63, "eval_loss": 0.8120532035827637, "eval_runtime": 730.4307, "eval_samples_per_second": 3.423, "eval_spbleu": 54.8446, "eval_steps_per_second": 0.685, "eval_ter": 43.4956, "step": 52000 }, { "epoch": 2.3791181402093624, "grad_norm": 0.8089118599891663, "learning_rate": 1.0348030996510627e-05, "loss": 0.4683, "step": 52500 }, { "epoch": 2.3791181402093624, "eval_bleu": 46.2723, "eval_chrf++": 66.8032, "eval_gen_len": 27.588, "eval_loss": 0.8125736117362976, "eval_runtime": 729.5548, "eval_samples_per_second": 3.427, "eval_spbleu": 54.9204, "eval_steps_per_second": 0.685, "eval_ter": 43.3954, "step": 52500 }, { "epoch": 2.401776408211356, "grad_norm": 0.6534927487373352, "learning_rate": 9.970393196477396e-06, "loss": 0.4727, "step": 53000 }, { "epoch": 2.401776408211356, "eval_bleu": 46.7508, "eval_chrf++": 66.7655, "eval_gen_len": 27.5272, "eval_loss": 0.8069682717323303, "eval_runtime": 732.8023, "eval_samples_per_second": 3.412, "eval_spbleu": 54.8765, "eval_steps_per_second": 0.682, "eval_ter": 43.3875, "step": 53000 }, { "epoch": 2.4244346762133504, "grad_norm": 0.6930407881736755, "learning_rate": 9.592755396444163e-06, "loss": 0.4723, "step": 53500 }, { "epoch": 2.4244346762133504, "eval_bleu": 47.2069, "eval_chrf++": 66.9501, "eval_gen_len": 27.5308, "eval_loss": 0.8114036321640015, "eval_runtime": 730.3451, "eval_samples_per_second": 3.423, "eval_spbleu": 55.1585, "eval_steps_per_second": 0.685, "eval_ter": 43.2451, "step": 53500 }, { "epoch": 2.447092944215344, "grad_norm": 0.7665801644325256, "learning_rate": 9.215117596410932e-06, "loss": 0.4711, "step": 54000 }, { "epoch": 2.447092944215344, "eval_bleu": 47.4976, "eval_chrf++": 67.0709, "eval_gen_len": 27.4768, "eval_loss": 0.8121780753135681, "eval_runtime": 727.4198, "eval_samples_per_second": 3.437, "eval_spbleu": 55.3017, "eval_steps_per_second": 0.687, "eval_ter": 43.0816, "step": 54000 } ], "logging_steps": 500, "max_steps": 66201, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.756455083596841e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }