| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 5000, | |
| "global_step": 107877, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.634779384501298e-08, | |
| "loss": 14.0889, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.269558769002596e-08, | |
| "loss": 13.3753, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.3904338153503894e-07, | |
| "loss": 12.3675, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.8539117538005193e-07, | |
| "loss": 11.1815, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.317389692250649e-07, | |
| "loss": 9.3244, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.780867630700779e-07, | |
| "loss": 7.8289, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.2443455691509084e-07, | |
| "loss": 6.5179, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.7078235076010385e-07, | |
| "loss": 5.6367, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.171301446051168e-07, | |
| "loss": 5.0302, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.634779384501298e-07, | |
| "loss": 4.4714, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.098257322951427e-07, | |
| "loss": 4.0915, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.561735261401558e-07, | |
| "loss": 3.7279, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.025213199851688e-07, | |
| "loss": 3.4629, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.488691138301817e-07, | |
| "loss": 3.2831, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.952169076751947e-07, | |
| "loss": 3.0667, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.415647015202077e-07, | |
| "loss": 2.9512, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 7.879124953652206e-07, | |
| "loss": 2.7697, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.342602892102336e-07, | |
| "loss": 2.6436, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.806080830552465e-07, | |
| "loss": 2.5721, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.269558769002596e-07, | |
| "loss": 2.547, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.733036707452726e-07, | |
| "loss": 2.4468, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.0196514645902855e-06, | |
| "loss": 2.4144, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.0659992584352986e-06, | |
| "loss": 2.3397, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.1123470522803115e-06, | |
| "loss": 2.2857, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.1586948461253246e-06, | |
| "loss": 2.2327, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.2050426399703375e-06, | |
| "loss": 2.1722, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.2513904338153504e-06, | |
| "loss": 2.1463, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.2977382276603633e-06, | |
| "loss": 2.1144, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3440860215053765e-06, | |
| "loss": 2.0433, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3904338153503894e-06, | |
| "loss": 2.0107, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.4367816091954023e-06, | |
| "loss": 2.0057, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.4831294030404154e-06, | |
| "loss": 1.986, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.5294771968854283e-06, | |
| "loss": 1.9158, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.5758249907304412e-06, | |
| "loss": 1.9486, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.6221727845754543e-06, | |
| "loss": 1.85, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.6685205784204673e-06, | |
| "loss": 1.8479, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.7148683722654802e-06, | |
| "loss": 1.8052, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.761216166110493e-06, | |
| "loss": 1.7931, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8075639599555062e-06, | |
| "loss": 1.7661, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.853911753800519e-06, | |
| "loss": 1.7307, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.900259547645532e-06, | |
| "loss": 1.6742, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.946607341490545e-06, | |
| "loss": 1.629, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9929551353355583e-06, | |
| "loss": 1.6147, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.039302929180571e-06, | |
| "loss": 1.6556, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.085650723025584e-06, | |
| "loss": 1.5819, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.131998516870597e-06, | |
| "loss": 1.6211, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.17834631071561e-06, | |
| "loss": 1.5386, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.224694104560623e-06, | |
| "loss": 1.5553, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.271041898405636e-06, | |
| "loss": 1.5572, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.3173896922506492e-06, | |
| "loss": 1.5671, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_bleu": 6.5196, | |
| "eval_gen_len": 18.9699, | |
| "eval_loss": 1.1690529584884644, | |
| "eval_runtime": 968.3873, | |
| "eval_samples_per_second": 2.061, | |
| "eval_steps_per_second": 1.031, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.3637374860956624e-06, | |
| "loss": 1.4969, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.410085279940675e-06, | |
| "loss": 1.5547, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.456433073785688e-06, | |
| "loss": 1.5369, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.502780867630701e-06, | |
| "loss": 1.4743, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.549128661475714e-06, | |
| "loss": 1.5065, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.5954764553207267e-06, | |
| "loss": 1.4637, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.64182424916574e-06, | |
| "loss": 1.4976, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.688172043010753e-06, | |
| "loss": 1.482, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.7345198368557656e-06, | |
| "loss": 1.4703, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.7808676307007788e-06, | |
| "loss": 1.4279, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.827215424545792e-06, | |
| "loss": 1.4673, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8735632183908046e-06, | |
| "loss": 1.3763, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.9199110122358177e-06, | |
| "loss": 1.4375, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.966258806080831e-06, | |
| "loss": 1.3887, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.0126065999258435e-06, | |
| "loss": 1.3819, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.0589543937708566e-06, | |
| "loss": 1.4471, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.1053021876158698e-06, | |
| "loss": 1.3955, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.1516499814608825e-06, | |
| "loss": 1.4317, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.1979977753058956e-06, | |
| "loss": 1.3863, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.2443455691509087e-06, | |
| "loss": 1.3759, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.2906933629959214e-06, | |
| "loss": 1.3599, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.3370411568409345e-06, | |
| "loss": 1.4061, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.383388950685947e-06, | |
| "loss": 1.339, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.4297367445309603e-06, | |
| "loss": 1.377, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.4760845383759734e-06, | |
| "loss": 1.3454, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.522432332220986e-06, | |
| "loss": 1.3565, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.5687801260659993e-06, | |
| "loss": 1.3381, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.6151279199110124e-06, | |
| "loss": 1.3326, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.661475713756025e-06, | |
| "loss": 1.3442, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.707823507601038e-06, | |
| "loss": 1.3053, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.7541713014460513e-06, | |
| "loss": 1.3071, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.800519095291064e-06, | |
| "loss": 1.3401, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8468668891360776e-06, | |
| "loss": 1.3061, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.89321468298109e-06, | |
| "loss": 1.3304, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.939562476826103e-06, | |
| "loss": 1.3354, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.9859102706711165e-06, | |
| "loss": 1.3253, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.032258064516129e-06, | |
| "loss": 1.3006, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.078605858361142e-06, | |
| "loss": 1.2852, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.1249536522061554e-06, | |
| "loss": 1.2775, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.171301446051168e-06, | |
| "loss": 1.2842, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.217649239896181e-06, | |
| "loss": 1.2935, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.263997033741194e-06, | |
| "loss": 1.2656, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.310344827586207e-06, | |
| "loss": 1.27, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.35669262143122e-06, | |
| "loss": 1.2983, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.403040415276233e-06, | |
| "loss": 1.2317, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.449388209121246e-06, | |
| "loss": 1.2491, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.4957360029662596e-06, | |
| "loss": 1.2438, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.542083796811272e-06, | |
| "loss": 1.2608, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.588431590656286e-06, | |
| "loss": 1.2656, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.6347793845012985e-06, | |
| "loss": 1.2277, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_bleu": 7.082, | |
| "eval_gen_len": 18.9724, | |
| "eval_loss": 1.0592412948608398, | |
| "eval_runtime": 961.2611, | |
| "eval_samples_per_second": 2.076, | |
| "eval_steps_per_second": 1.038, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.681127178346311e-06, | |
| "loss": 1.3006, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.727474972191325e-06, | |
| "loss": 1.2365, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.7738227660363374e-06, | |
| "loss": 1.252, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.82017055988135e-06, | |
| "loss": 1.2351, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.866518353726364e-06, | |
| "loss": 1.2442, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.912866147571376e-06, | |
| "loss": 1.2158, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.959213941416389e-06, | |
| "loss": 1.2123, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.999999811534358e-06, | |
| "loss": 1.1988, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.999983582566248e-06, | |
| "loss": 1.2438, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.999941178008878e-06, | |
| "loss": 1.2145, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.999872598306237e-06, | |
| "loss": 1.1855, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.999777844176376e-06, | |
| "loss": 1.2148, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.999656916611398e-06, | |
| "loss": 1.2128, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.999509816877453e-06, | |
| "loss": 1.2222, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.999336546514719e-06, | |
| "loss": 1.1951, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.9991371073373895e-06, | |
| "loss": 1.1656, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.998911501433653e-06, | |
| "loss": 1.1783, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.9986597311656735e-06, | |
| "loss": 1.182, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.998381799169562e-06, | |
| "loss": 1.1869, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.99807770835535e-06, | |
| "loss": 1.2242, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.997747461906961e-06, | |
| "loss": 1.1884, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.997391063282177e-06, | |
| "loss": 1.2025, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.997008516212599e-06, | |
| "loss": 1.207, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.996599824703613e-06, | |
| "loss": 1.1898, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.996164993034341e-06, | |
| "loss": 1.1942, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.995704025757605e-06, | |
| "loss": 1.1647, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.995216927699872e-06, | |
| "loss": 1.1961, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.994703703961206e-06, | |
| "loss": 1.1728, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.994164359915219e-06, | |
| "loss": 1.1801, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.993598901209003e-06, | |
| "loss": 1.1607, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.993007333763086e-06, | |
| "loss": 1.1674, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.9923896637713575e-06, | |
| "loss": 1.1837, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.991745897701012e-06, | |
| "loss": 1.1709, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.991076042292475e-06, | |
| "loss": 1.1672, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.990380104559337e-06, | |
| "loss": 1.1649, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.989658091788277e-06, | |
| "loss": 1.1582, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.988910011538991e-06, | |
| "loss": 1.1485, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.988135871644105e-06, | |
| "loss": 1.1887, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.987335680209099e-06, | |
| "loss": 1.1656, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.986509445612223e-06, | |
| "loss": 1.1182, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.985657176504402e-06, | |
| "loss": 1.0969, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.984778881809156e-06, | |
| "loss": 1.1648, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.983874570722496e-06, | |
| "loss": 1.1689, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.982944252712834e-06, | |
| "loss": 1.1127, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.981987937520884e-06, | |
| "loss": 1.1529, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.981005635159558e-06, | |
| "loss": 1.1279, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.979997355913859e-06, | |
| "loss": 1.1099, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.978963110340778e-06, | |
| "loss": 1.1671, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.9779029092691825e-06, | |
| "loss": 1.167, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.976816763799698e-06, | |
| "loss": 1.1316, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_bleu": 7.3283, | |
| "eval_gen_len": 18.9825, | |
| "eval_loss": 1.0111815929412842, | |
| "eval_runtime": 963.287, | |
| "eval_samples_per_second": 2.072, | |
| "eval_steps_per_second": 1.036, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.975704685304601e-06, | |
| "loss": 1.1448, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.97456668542769e-06, | |
| "loss": 1.1221, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.97340277608417e-06, | |
| "loss": 1.1549, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.972212969460528e-06, | |
| "loss": 1.1493, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.9709972780144e-06, | |
| "loss": 1.1595, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.969755714474447e-06, | |
| "loss": 1.1123, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.968488291840215e-06, | |
| "loss": 1.123, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.9671950233820075e-06, | |
| "loss": 1.1096, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.965875922640738e-06, | |
| "loss": 1.1296, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.964531003427792e-06, | |
| "loss": 1.1528, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.9631602798248845e-06, | |
| "loss": 1.1315, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.961763766183908e-06, | |
| "loss": 1.0704, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.960341477126786e-06, | |
| "loss": 1.1107, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.9588934275453165e-06, | |
| "loss": 1.1351, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.957419632601022e-06, | |
| "loss": 1.1132, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.955920107724982e-06, | |
| "loss": 1.0942, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.95439486861768e-06, | |
| "loss": 1.1466, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.952843931248834e-06, | |
| "loss": 1.1164, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.951267311857229e-06, | |
| "loss": 1.1336, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.949665026950551e-06, | |
| "loss": 1.0946, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.948037093305211e-06, | |
| "loss": 1.0768, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.94638352796617e-06, | |
| "loss": 1.1048, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.944704348246759e-06, | |
| "loss": 1.0964, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.942999571728503e-06, | |
| "loss": 1.115, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.941269216260929e-06, | |
| "loss": 1.0912, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.9395132999613874e-06, | |
| "loss": 1.1039, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.937731841214856e-06, | |
| "loss": 1.1001, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.935924858673751e-06, | |
| "loss": 1.1137, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.934092371257727e-06, | |
| "loss": 1.1154, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.932234398153488e-06, | |
| "loss": 1.1103, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.930350958814578e-06, | |
| "loss": 1.0984, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.9284420729611785e-06, | |
| "loss": 1.1006, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.926507760579906e-06, | |
| "loss": 1.1028, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.9245480419236015e-06, | |
| "loss": 1.0915, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.922562937511115e-06, | |
| "loss": 1.0716, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.920552468127093e-06, | |
| "loss": 1.0817, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.918516654821765e-06, | |
| "loss": 1.113, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.916455518910713e-06, | |
| "loss": 1.1086, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.9143690819746595e-06, | |
| "loss": 1.0853, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.912257365859234e-06, | |
| "loss": 1.1003, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.9101203926747465e-06, | |
| "loss": 1.074, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.907958184795958e-06, | |
| "loss": 1.069, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.905770764861842e-06, | |
| "loss": 1.0779, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.903558155775352e-06, | |
| "loss": 1.0746, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.901320380703179e-06, | |
| "loss": 1.0855, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.8990574630755085e-06, | |
| "loss": 1.0499, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.896769426585778e-06, | |
| "loss": 1.0826, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.8944562951904256e-06, | |
| "loss": 1.0936, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.892118093108641e-06, | |
| "loss": 1.0893, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.889754844822113e-06, | |
| "loss": 1.0833, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_bleu": 7.4462, | |
| "eval_gen_len": 18.977, | |
| "eval_loss": 0.972794771194458, | |
| "eval_runtime": 963.5918, | |
| "eval_samples_per_second": 2.071, | |
| "eval_steps_per_second": 1.036, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.887366575074769e-06, | |
| "loss": 1.1106, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.884953308872522e-06, | |
| "loss": 1.0961, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.882515071483003e-06, | |
| "loss": 1.0668, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.8800518884353e-06, | |
| "loss": 1.0548, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.8775637855196885e-06, | |
| "loss": 1.1031, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.875050788787367e-06, | |
| "loss": 1.0564, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.872512924550172e-06, | |
| "loss": 1.0837, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.869950219380317e-06, | |
| "loss": 1.0816, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.867362700110105e-06, | |
| "loss": 1.0352, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.864750393831651e-06, | |
| "loss": 1.068, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.8621133278965956e-06, | |
| "loss": 1.0525, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.859451529915825e-06, | |
| "loss": 1.0867, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.856765027759171e-06, | |
| "loss": 1.0544, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.8540538495551314e-06, | |
| "loss": 1.0805, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.851318023690567e-06, | |
| "loss": 1.0598, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.848557578810407e-06, | |
| "loss": 1.0418, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.845772543817351e-06, | |
| "loss": 1.0486, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.842962947871561e-06, | |
| "loss": 1.0618, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.840128820390364e-06, | |
| "loss": 1.0583, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.837270191047937e-06, | |
| "loss": 1.0703, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.834387089774999e-06, | |
| "loss": 1.039, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.8314795467585e-06, | |
| "loss": 1.0505, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.828547592441298e-06, | |
| "loss": 1.0539, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.8255912575218485e-06, | |
| "loss": 1.0375, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.8226105729538786e-06, | |
| "loss": 1.0378, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.8196055699460636e-06, | |
| "loss": 1.0304, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.816576279961699e-06, | |
| "loss": 1.065, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.813522734718373e-06, | |
| "loss": 1.0445, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.810444966187635e-06, | |
| "loss": 1.0327, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.807343006594658e-06, | |
| "loss": 1.0751, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.804216888417904e-06, | |
| "loss": 1.0444, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.801066644388781e-06, | |
| "loss": 1.005, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.797892307491303e-06, | |
| "loss": 1.0401, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.794693910961745e-06, | |
| "loss": 1.0566, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.7914714882882924e-06, | |
| "loss": 1.0478, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.788225073210691e-06, | |
| "loss": 1.0656, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.784954699719895e-06, | |
| "loss": 1.0244, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.7816604020577105e-06, | |
| "loss": 1.0288, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.778342214716438e-06, | |
| "loss": 1.0515, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.775000172438508e-06, | |
| "loss": 1.0391, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.771634310216122e-06, | |
| "loss": 1.0481, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.768244663290881e-06, | |
| "loss": 1.0502, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.76483126715342e-06, | |
| "loss": 1.0855, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.761394157543038e-06, | |
| "loss": 1.0305, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.757933370447317e-06, | |
| "loss": 1.0609, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.754448942101753e-06, | |
| "loss": 1.0152, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.7509409089893695e-06, | |
| "loss": 1.0449, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.7474093078403436e-06, | |
| "loss": 1.0152, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.743854175631614e-06, | |
| "loss": 1.0, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.740275549586496e-06, | |
| "loss": 1.0339, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_bleu": 8.0126, | |
| "eval_gen_len": 18.982, | |
| "eval_loss": 0.9545806646347046, | |
| "eval_runtime": 963.2133, | |
| "eval_samples_per_second": 2.072, | |
| "eval_steps_per_second": 1.036, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.736673467174295e-06, | |
| "loss": 1.0262, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.733047966109911e-06, | |
| "loss": 1.054, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.729399084353444e-06, | |
| "loss": 0.9769, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.725726860109794e-06, | |
| "loss": 1.0007, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.7220313318282704e-06, | |
| "loss": 1.0438, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.718312538202179e-06, | |
| "loss": 1.0347, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.7145705181684195e-06, | |
| "loss": 1.04, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.710805310907083e-06, | |
| "loss": 1.0099, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.707016955841034e-06, | |
| "loss": 1.0447, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.703205492635504e-06, | |
| "loss": 0.9819, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.699370961197675e-06, | |
| "loss": 1.0066, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.695513401676256e-06, | |
| "loss": 1.007, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.691632854461071e-06, | |
| "loss": 1.037, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.68772936018263e-06, | |
| "loss": 1.0054, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.683802959711709e-06, | |
| "loss": 1.0078, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.6798536941589125e-06, | |
| "loss": 1.0187, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.675881604874257e-06, | |
| "loss": 1.0209, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.6718867334467245e-06, | |
| "loss": 0.9908, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.667869121703835e-06, | |
| "loss": 0.9868, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.6638288117112075e-06, | |
| "loss": 0.985, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.6597658457721175e-06, | |
| "loss": 1.0143, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.655680266427057e-06, | |
| "loss": 0.9698, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.651572116453281e-06, | |
| "loss": 1.0149, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.6474414388643755e-06, | |
| "loss": 1.0212, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.643288276909791e-06, | |
| "loss": 1.0126, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.639112674074396e-06, | |
| "loss": 1.0163, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.634914674078025e-06, | |
| "loss": 0.9814, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.630694320875016e-06, | |
| "loss": 1.039, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.626451658653752e-06, | |
| "loss": 1.0087, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.6221867318361975e-06, | |
| "loss": 0.9795, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.617899585077436e-06, | |
| "loss": 1.0499, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.613590263265198e-06, | |
| "loss": 1.0109, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.6092588115193945e-06, | |
| "loss": 1.0347, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.604905275191647e-06, | |
| "loss": 1.0123, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.600529699864803e-06, | |
| "loss": 1.0216, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.59613213135247e-06, | |
| "loss": 0.9955, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.59171261569853e-06, | |
| "loss": 0.9794, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.587271199176654e-06, | |
| "loss": 1.0098, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.58280792828983e-06, | |
| "loss": 1.0176, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.57832284976986e-06, | |
| "loss": 1.0104, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.5738160105768815e-06, | |
| "loss": 1.0023, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.569287457898874e-06, | |
| "loss": 1.0333, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.564737239151164e-06, | |
| "loss": 0.9876, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.560165401975925e-06, | |
| "loss": 0.9966, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.555571994241685e-06, | |
| "loss": 1.0267, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.550957064042821e-06, | |
| "loss": 1.0371, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.546320659699059e-06, | |
| "loss": 0.9998, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.541662829754963e-06, | |
| "loss": 1.0174, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.536983622979429e-06, | |
| "loss": 0.99, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.532283088365179e-06, | |
| "loss": 1.025, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_bleu": 7.7648, | |
| "eval_gen_len": 18.9805, | |
| "eval_loss": 0.9337242245674133, | |
| "eval_runtime": 963.8615, | |
| "eval_samples_per_second": 2.071, | |
| "eval_steps_per_second": 1.035, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.527561275128241e-06, | |
| "loss": 1.03, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.522818232707435e-06, | |
| "loss": 1.0015, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.5180540107638634e-06, | |
| "loss": 0.9622, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.513268659180377e-06, | |
| "loss": 1.0272, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.508462228061065e-06, | |
| "loss": 0.994, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.503634767730724e-06, | |
| "loss": 0.9826, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.498786328734336e-06, | |
| "loss": 0.9823, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.49391696183653e-06, | |
| "loss": 0.9975, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.489026718021061e-06, | |
| "loss": 0.9776, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.484115648490271e-06, | |
| "loss": 0.9798, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.4791838046645545e-06, | |
| "loss": 0.9634, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.474231238181817e-06, | |
| "loss": 1.0174, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.469258000896936e-06, | |
| "loss": 0.9762, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.464264144881221e-06, | |
| "loss": 1.0287, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.459249722421866e-06, | |
| "loss": 0.977, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.454214786021399e-06, | |
| "loss": 0.9885, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.449159388397138e-06, | |
| "loss": 1.0286, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.4440835824806364e-06, | |
| "loss": 0.9907, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.438987421417126e-06, | |
| "loss": 0.9827, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.433870958564965e-06, | |
| "loss": 0.9911, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.428734247495077e-06, | |
| "loss": 1.0034, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.423577341990392e-06, | |
| "loss": 0.9539, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.41840029604528e-06, | |
| "loss": 1.0161, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.413203163864988e-06, | |
| "loss": 1.0044, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.407985999865072e-06, | |
| "loss": 0.9767, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.4027488586708274e-06, | |
| "loss": 0.9824, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.397491795116719e-06, | |
| "loss": 0.9905, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.392214864245801e-06, | |
| "loss": 0.9632, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.386918121309147e-06, | |
| "loss": 0.9724, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.381601621765267e-06, | |
| "loss": 1.0322, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.376265421279532e-06, | |
| "loss": 1.0209, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.3709095757235835e-06, | |
| "loss": 0.9452, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.365534141174756e-06, | |
| "loss": 0.9999, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.360139173915486e-06, | |
| "loss": 0.9786, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.3547247304327234e-06, | |
| "loss": 1.0074, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.34929086741734e-06, | |
| "loss": 0.9756, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.343837641763535e-06, | |
| "loss": 0.9844, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.338365110568242e-06, | |
| "loss": 1.0181, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.332873331130531e-06, | |
| "loss": 0.9706, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.3273623609509996e-06, | |
| "loss": 0.9749, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.321832257731189e-06, | |
| "loss": 0.969, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.316283079372959e-06, | |
| "loss": 1.0083, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.3107148839779e-06, | |
| "loss": 0.9969, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.305127729846711e-06, | |
| "loss": 0.9863, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.299521675478598e-06, | |
| "loss": 0.9646, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.293896779570656e-06, | |
| "loss": 0.9704, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.288253101017259e-06, | |
| "loss": 0.949, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.282590698909439e-06, | |
| "loss": 0.9667, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.276909632534269e-06, | |
| "loss": 0.9565, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.271209961374246e-06, | |
| "loss": 0.9733, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_bleu": 7.9496, | |
| "eval_gen_len": 18.9815, | |
| "eval_loss": 0.9227670431137085, | |
| "eval_runtime": 961.6825, | |
| "eval_samples_per_second": 2.076, | |
| "eval_steps_per_second": 1.038, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.26549174510666e-06, | |
| "loss": 0.9857, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.259755043602978e-06, | |
| "loss": 0.9431, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.253999916928211e-06, | |
| "loss": 0.9731, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.248226425340288e-06, | |
| "loss": 0.9833, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.242434629289421e-06, | |
| "loss": 0.9582, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.236624589417482e-06, | |
| "loss": 1.002, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.230796366557354e-06, | |
| "loss": 0.9598, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.224950021732307e-06, | |
| "loss": 0.9576, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.21908561615535e-06, | |
| "loss": 1.0025, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.213203211228596e-06, | |
| "loss": 0.9746, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.2073028685426146e-06, | |
| "loss": 0.9419, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.20138464987579e-06, | |
| "loss": 0.9092, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.195448617193676e-06, | |
| "loss": 0.9345, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.189494832648339e-06, | |
| "loss": 0.9613, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.183523358577716e-06, | |
| "loss": 0.9596, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.177534257504961e-06, | |
| "loss": 0.9412, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.171527592137783e-06, | |
| "loss": 0.9418, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.1655034253678e-06, | |
| "loss": 0.9166, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.15946182026987e-06, | |
| "loss": 0.9483, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.153402840101438e-06, | |
| "loss": 0.9604, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.1473265483018735e-06, | |
| "loss": 0.928, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.141233008491797e-06, | |
| "loss": 0.953, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.1351222844724305e-06, | |
| "loss": 0.935, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.128994440224912e-06, | |
| "loss": 0.9269, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.122849539909637e-06, | |
| "loss": 0.9423, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.1166876478655835e-06, | |
| "loss": 0.9402, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.110508828609638e-06, | |
| "loss": 0.9472, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.1043131468359155e-06, | |
| "loss": 0.9457, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.098100667415095e-06, | |
| "loss": 0.8999, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.091871455393725e-06, | |
| "loss": 0.9388, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.0856255759935515e-06, | |
| "loss": 0.9416, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.079363094610836e-06, | |
| "loss": 0.98, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.0730840768156625e-06, | |
| "loss": 0.9424, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.066788588351261e-06, | |
| "loss": 0.9192, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.0604766951333105e-06, | |
| "loss": 0.9188, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.054148463249257e-06, | |
| "loss": 0.9636, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.0478039589576146e-06, | |
| "loss": 0.9716, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.041443248687273e-06, | |
| "loss": 0.955, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.035066399036807e-06, | |
| "loss": 0.9519, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.028673476773774e-06, | |
| "loss": 0.9069, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.022264548834016e-06, | |
| "loss": 0.9085, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.015839682320959e-06, | |
| "loss": 0.9313, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.0093989445049135e-06, | |
| "loss": 0.9648, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.002942402822364e-06, | |
| "loss": 0.9398, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.9964701248752665e-06, | |
| "loss": 0.9305, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.989982178430345e-06, | |
| "loss": 0.9475, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.983478631418372e-06, | |
| "loss": 0.9448, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.976959551933464e-06, | |
| "loss": 0.9754, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.970425008232369e-06, | |
| "loss": 0.9564, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.96387506873375e-06, | |
| "loss": 0.9035, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_bleu": 7.689, | |
| "eval_gen_len": 18.9795, | |
| "eval_loss": 0.9161636233329773, | |
| "eval_runtime": 963.6243, | |
| "eval_samples_per_second": 2.071, | |
| "eval_steps_per_second": 1.036, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.957309802017466e-06, | |
| "loss": 0.9898, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.95072927682386e-06, | |
| "loss": 0.8963, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.944133562053033e-06, | |
| "loss": 0.9179, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.937522726764128e-06, | |
| "loss": 0.9368, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.930896840174603e-06, | |
| "loss": 0.9281, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.924255971659506e-06, | |
| "loss": 0.9706, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.9176001907507546e-06, | |
| "loss": 0.9401, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.910929567136401e-06, | |
| "loss": 0.9185, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.904244170659904e-06, | |
| "loss": 0.9406, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.8975440713194016e-06, | |
| "loss": 0.923, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.890829339266973e-06, | |
| "loss": 0.9427, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.884100044807907e-06, | |
| "loss": 0.9471, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.877356258399967e-06, | |
| "loss": 0.9367, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.870598050652648e-06, | |
| "loss": 0.9063, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.863825492326444e-06, | |
| "loss": 0.9604, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.857038654332102e-06, | |
| "loss": 0.8888, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.850237607729882e-06, | |
| "loss": 0.9494, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.8434224237288134e-06, | |
| "loss": 0.8925, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.836593173685946e-06, | |
| "loss": 0.9473, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.829749929105609e-06, | |
| "loss": 0.9236, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.822892761638656e-06, | |
| "loss": 0.9083, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.816021743081717e-06, | |
| "loss": 0.9135, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.8091369453764504e-06, | |
| "loss": 0.9526, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.8022384406087824e-06, | |
| "loss": 0.9345, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.79532630100816e-06, | |
| "loss": 0.9176, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.7884005989467866e-06, | |
| "loss": 0.9467, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.781461406938874e-06, | |
| "loss": 0.9271, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.774508797639874e-06, | |
| "loss": 0.9588, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.7675428438457234e-06, | |
| "loss": 0.9201, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.760563618492079e-06, | |
| "loss": 0.9285, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.7535711946535552e-06, | |
| "loss": 0.9484, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.746565645542958e-06, | |
| "loss": 0.9253, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.739547044510521e-06, | |
| "loss": 0.9179, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.7325154650431317e-06, | |
| "loss": 0.9143, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.725470980763569e-06, | |
| "loss": 0.9382, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.718413665429729e-06, | |
| "loss": 0.9533, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.711343592933851e-06, | |
| "loss": 0.9214, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.704260837301746e-06, | |
| "loss": 0.8943, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.6971654726920243e-06, | |
| "loss": 0.9063, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.690057573395311e-06, | |
| "loss": 0.8965, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.6829372138334763e-06, | |
| "loss": 0.9241, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.6758044685588547e-06, | |
| "loss": 0.9644, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.668659412253458e-06, | |
| "loss": 0.9391, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.661502119728203e-06, | |
| "loss": 0.9273, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.6543326659221213e-06, | |
| "loss": 0.9354, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.6471511259015764e-06, | |
| "loss": 0.9162, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.6399575748594796e-06, | |
| "loss": 0.9281, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.6327520881145002e-06, | |
| "loss": 0.8999, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.6255347411102777e-06, | |
| "loss": 0.909, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.6183056094146333e-06, | |
| "loss": 0.9386, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_bleu": 7.6781, | |
| "eval_gen_len": 18.9825, | |
| "eval_loss": 0.9038894176483154, | |
| "eval_runtime": 963.1893, | |
| "eval_samples_per_second": 2.072, | |
| "eval_steps_per_second": 1.036, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.611064768718777e-06, | |
| "loss": 0.8965, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.603812294836515e-06, | |
| "loss": 0.9717, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.5965482637034567e-06, | |
| "loss": 0.8826, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.58927275137622e-06, | |
| "loss": 0.9187, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.581985834031635e-06, | |
| "loss": 0.888, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.5746875879659426e-06, | |
| "loss": 0.9339, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.5673780895940034e-06, | |
| "loss": 0.9291, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.56005741544849e-06, | |
| "loss": 0.9303, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.5527256421790902e-06, | |
| "loss": 0.9123, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.5453828465517e-06, | |
| "loss": 0.9037, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.538029105447628e-06, | |
| "loss": 0.9447, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.530664495862782e-06, | |
| "loss": 0.9011, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.523289094906865e-06, | |
| "loss": 0.8897, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.5159029798025717e-06, | |
| "loss": 0.9278, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.5085062278847765e-06, | |
| "loss": 0.9204, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.5010989165997227e-06, | |
| "loss": 0.9185, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.4936811235042158e-06, | |
| "loss": 0.9067, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.4862529262648076e-06, | |
| "loss": 0.9469, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.4788144026569846e-06, | |
| "loss": 0.942, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.4713656305643543e-06, | |
| "loss": 0.9493, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.4639066879778278e-06, | |
| "loss": 0.9285, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.4564376529948045e-06, | |
| "loss": 0.9242, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.4489586038183564e-06, | |
| "loss": 0.9045, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.4414696187564035e-06, | |
| "loss": 0.9233, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.4339707762209006e-06, | |
| "loss": 0.896, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.426462154727012e-06, | |
| "loss": 0.9072, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.418943832892291e-06, | |
| "loss": 0.9419, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.411415889435856e-06, | |
| "loss": 0.8977, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.403878403177567e-06, | |
| "loss": 0.8837, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.3963314530372e-06, | |
| "loss": 0.9148, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.388775118033621e-06, | |
| "loss": 0.9005, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.381209477283957e-06, | |
| "loss": 0.8907, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.3736346100027717e-06, | |
| "loss": 0.9064, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.3660505955012308e-06, | |
| "loss": 0.9026, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.3584575131862757e-06, | |
| "loss": 0.9501, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.3508554425597896e-06, | |
| "loss": 0.9016, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.3432444632177663e-06, | |
| "loss": 0.9159, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.335624654849477e-06, | |
| "loss": 0.8766, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.327996097236636e-06, | |
| "loss": 0.9125, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.320358870252563e-06, | |
| "loss": 0.8931, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.3127130538613506e-06, | |
| "loss": 0.9088, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.3050587281170245e-06, | |
| "loss": 0.9427, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.297395973162705e-06, | |
| "loss": 0.9042, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.2897248692297678e-06, | |
| "loss": 0.9094, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.2820454966370102e-06, | |
| "loss": 0.9384, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.2743579357897997e-06, | |
| "loss": 0.888, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.266662267179238e-06, | |
| "loss": 0.9208, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.25895857138132e-06, | |
| "loss": 0.9304, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 3.2512469290560848e-06, | |
| "loss": 0.9346, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 3.2435274209467765e-06, | |
| "loss": 0.9073, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_bleu": 7.8607, | |
| "eval_gen_len": 18.9805, | |
| "eval_loss": 0.8985511064529419, | |
| "eval_runtime": 966.2977, | |
| "eval_samples_per_second": 2.066, | |
| "eval_steps_per_second": 1.033, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 3.235800127878995e-06, | |
| "loss": 0.8804, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.228065130759852e-06, | |
| "loss": 0.9301, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.220322510577121e-06, | |
| "loss": 0.9109, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.2125723483983935e-06, | |
| "loss": 0.9029, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.204814725370227e-06, | |
| "loss": 0.9089, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.1970497227172957e-06, | |
| "loss": 0.9184, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.1892774217415433e-06, | |
| "loss": 0.894, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.181497903821326e-06, | |
| "loss": 0.905, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.1737112504105655e-06, | |
| "loss": 0.889, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.165917543037894e-06, | |
| "loss": 0.8977, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.1581168633058002e-06, | |
| "loss": 0.9111, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.150309292889776e-06, | |
| "loss": 0.9073, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.14249491353746e-06, | |
| "loss": 0.9229, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.134673807067784e-06, | |
| "loss": 0.8961, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.1268460553701146e-06, | |
| "loss": 0.8976, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.1190117404033943e-06, | |
| "loss": 0.9186, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.111170944195286e-06, | |
| "loss": 0.8826, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.1033237488413144e-06, | |
| "loss": 0.9097, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.095470236504003e-06, | |
| "loss": 0.8805, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.0876104894120164e-06, | |
| "loss": 0.8718, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.0797445898593007e-06, | |
| "loss": 0.9029, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.07187262020422e-06, | |
| "loss": 0.9297, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.0639946628686913e-06, | |
| "loss": 0.8916, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.0561108003373275e-06, | |
| "loss": 0.9609, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.0482211151565693e-06, | |
| "loss": 0.8926, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.0403256899338236e-06, | |
| "loss": 0.8811, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.032424607336595e-06, | |
| "loss": 0.8748, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.0245179500916245e-06, | |
| "loss": 0.9036, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.016605800984021e-06, | |
| "loss": 0.9, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.0086882428563948e-06, | |
| "loss": 0.8919, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.0007653586079884e-06, | |
| "loss": 0.924, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.9928372311938134e-06, | |
| "loss": 0.9289, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.984903943623779e-06, | |
| "loss": 0.8922, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.9769655789618185e-06, | |
| "loss": 0.904, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.9690222203250286e-06, | |
| "loss": 0.9239, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.961073950882793e-06, | |
| "loss": 0.9248, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.9531208538559114e-06, | |
| "loss": 0.8867, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.945163012515732e-06, | |
| "loss": 0.8872, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.9372005101832767e-06, | |
| "loss": 0.9082, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.9292334302283683e-06, | |
| "loss": 0.8745, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.9212618560687604e-06, | |
| "loss": 0.8917, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.9132858711692607e-06, | |
| "loss": 0.8771, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.9053055590408603e-06, | |
| "loss": 0.8999, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.8973210032398567e-06, | |
| "loss": 0.8902, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.88933228736698e-06, | |
| "loss": 0.8912, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.881339495066518e-06, | |
| "loss": 0.8482, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.8733427100254383e-06, | |
| "loss": 0.9028, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.8653420159725166e-06, | |
| "loss": 0.8881, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.8573374966774546e-06, | |
| "loss": 0.8913, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.849329235950007e-06, | |
| "loss": 0.8928, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_bleu": 8.0666, | |
| "eval_gen_len": 18.981, | |
| "eval_loss": 0.8941593170166016, | |
| "eval_runtime": 968.3576, | |
| "eval_samples_per_second": 2.061, | |
| "eval_steps_per_second": 1.031, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.8413173176391006e-06, | |
| "loss": 0.8992, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.8333018256319617e-06, | |
| "loss": 0.8777, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.82528284385323e-06, | |
| "loss": 0.8709, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.817260456264086e-06, | |
| "loss": 0.9326, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.809234746861372e-06, | |
| "loss": 0.9006, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.801205799676709e-06, | |
| "loss": 0.929, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.7931736987756165e-06, | |
| "loss": 0.9161, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.7851385282566372e-06, | |
| "loss": 0.908, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.7771003722504534e-06, | |
| "loss": 0.9008, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.769059314919006e-06, | |
| "loss": 0.9195, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.7610154404546136e-06, | |
| "loss": 0.8826, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.752968833079089e-06, | |
| "loss": 0.858, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.744919577042863e-06, | |
| "loss": 0.9305, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.7368677566240976e-06, | |
| "loss": 0.9289, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.7288134561278017e-06, | |
| "loss": 0.9168, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.720756759884956e-06, | |
| "loss": 0.8958, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.7126977522516223e-06, | |
| "loss": 0.8924, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.7046365176080635e-06, | |
| "loss": 0.9235, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.6965731403578614e-06, | |
| "loss": 0.9171, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.6885077049270316e-06, | |
| "loss": 0.9159, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.6804402957631364e-06, | |
| "loss": 0.8949, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.6723709973344088e-06, | |
| "loss": 0.8796, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.6642998941288573e-06, | |
| "loss": 0.8943, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.6562270706533917e-06, | |
| "loss": 0.8714, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.6481526114329313e-06, | |
| "loss": 0.8647, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.640076601009522e-06, | |
| "loss": 0.9151, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.631999123941452e-06, | |
| "loss": 0.8942, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.6239202648023666e-06, | |
| "loss": 0.8863, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.6158401081803784e-06, | |
| "loss": 0.9138, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.6077587386771896e-06, | |
| "loss": 0.9095, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.5996762409071978e-06, | |
| "loss": 0.9049, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.591592699496616e-06, | |
| "loss": 0.8793, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.583508199082585e-06, | |
| "loss": 0.864, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.575422824312284e-06, | |
| "loss": 0.9173, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.5673366598420487e-06, | |
| "loss": 0.8799, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.5592497903364834e-06, | |
| "loss": 0.8751, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.5511623004675743e-06, | |
| "loss": 0.8871, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.5430742749138015e-06, | |
| "loss": 0.9121, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.534985798359257e-06, | |
| "loss": 0.8947, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.5268969554927512e-06, | |
| "loss": 0.8911, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.5188078310069326e-06, | |
| "loss": 0.8626, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.5107185095973967e-06, | |
| "loss": 0.8943, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.5026290759618026e-06, | |
| "loss": 0.9163, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.494539614798982e-06, | |
| "loss": 0.8599, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.486450210808057e-06, | |
| "loss": 0.8907, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.4783609486875507e-06, | |
| "loss": 0.889, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.4702719131345003e-06, | |
| "loss": 0.8872, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.462183188843569e-06, | |
| "loss": 0.8836, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.4540948605061652e-06, | |
| "loss": 0.8837, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.446007012809548e-06, | |
| "loss": 0.884, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_bleu": 8.1679, | |
| "eval_gen_len": 18.9785, | |
| "eval_loss": 0.8873680830001831, | |
| "eval_runtime": 968.5261, | |
| "eval_samples_per_second": 2.061, | |
| "eval_steps_per_second": 1.03, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.437919730435946e-06, | |
| "loss": 0.8902, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.4298330980616674e-06, | |
| "loss": 0.8499, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.4217472003562144e-06, | |
| "loss": 0.8927, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.413662121981399e-06, | |
| "loss": 0.9325, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.4055779475904536e-06, | |
| "loss": 0.8586, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.397494761827145e-06, | |
| "loss": 0.8788, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.3894126493248884e-06, | |
| "loss": 0.9161, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.3813316947058634e-06, | |
| "loss": 0.9058, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.373251982580124e-06, | |
| "loss": 0.8881, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.365173597544718e-06, | |
| "loss": 0.9027, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.3570966241827947e-06, | |
| "loss": 0.914, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.3490211470627254e-06, | |
| "loss": 0.8989, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.3409472507372134e-06, | |
| "loss": 0.8792, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.3328750197424115e-06, | |
| "loss": 0.8651, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.3248045385970357e-06, | |
| "loss": 0.9071, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.316735891801482e-06, | |
| "loss": 0.8676, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.308669163836938e-06, | |
| "loss": 0.9145, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.300604439164501e-06, | |
| "loss": 0.9072, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.2925418022242955e-06, | |
| "loss": 0.8916, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.2844813374345837e-06, | |
| "loss": 0.872, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.2764231291908847e-06, | |
| "loss": 0.8983, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.2683672618650945e-06, | |
| "loss": 0.8939, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.2603138198045966e-06, | |
| "loss": 0.9226, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.2522628873313806e-06, | |
| "loss": 0.8937, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.2442145487411605e-06, | |
| "loss": 0.8331, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.2361688883024912e-06, | |
| "loss": 0.8817, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.228125990255889e-06, | |
| "loss": 0.875, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.2200859388129447e-06, | |
| "loss": 0.9098, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.2120488181554433e-06, | |
| "loss": 0.8639, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.2040147124344864e-06, | |
| "loss": 0.89, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.195983705769607e-06, | |
| "loss": 0.8963, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.1879558822478883e-06, | |
| "loss": 0.9029, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.1799313259230894e-06, | |
| "loss": 0.8764, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.1719101208147557e-06, | |
| "loss": 0.8964, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.163892350907349e-06, | |
| "loss": 0.9076, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.1558781001493604e-06, | |
| "loss": 0.8936, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.147867452452435e-06, | |
| "loss": 0.8659, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.1398604916904923e-06, | |
| "loss": 0.897, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.131857301698852e-06, | |
| "loss": 0.8884, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.123857966273348e-06, | |
| "loss": 0.8893, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.115862569169458e-06, | |
| "loss": 0.9308, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.1078711941014242e-06, | |
| "loss": 0.8899, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.099883924741376e-06, | |
| "loss": 0.8964, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0919008447184562e-06, | |
| "loss": 0.9132, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.083922037617943e-06, | |
| "loss": 0.9075, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.075947586980376e-06, | |
| "loss": 0.8968, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.067977576300682e-06, | |
| "loss": 0.8683, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0600120890272976e-06, | |
| "loss": 0.8596, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0520512085613e-06, | |
| "loss": 0.8863, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.0440950182555337e-06, | |
| "loss": 0.8786, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_bleu": 7.8516, | |
| "eval_gen_len": 18.9805, | |
| "eval_loss": 0.8830544948577881, | |
| "eval_runtime": 968.5663, | |
| "eval_samples_per_second": 2.061, | |
| "eval_steps_per_second": 1.03, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.0361436014137315e-06, | |
| "loss": 0.8911, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.028197041289649e-06, | |
| "loss": 0.9172, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.0202554210861906e-06, | |
| "loss": 0.8468, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.0123188239545375e-06, | |
| "loss": 0.8445, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.0043873329932774e-06, | |
| "loss": 0.8917, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.996461031247536e-06, | |
| "loss": 0.8759, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9885400017081075e-06, | |
| "loss": 0.9045, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9806243273105807e-06, | |
| "loss": 0.8706, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9727140909344767e-06, | |
| "loss": 0.8973, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9648093754023784e-06, | |
| "loss": 0.8648, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.956910263479066e-06, | |
| "loss": 0.8774, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9490168378706456e-06, | |
| "loss": 0.9013, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9411291812236855e-06, | |
| "loss": 0.9092, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.9332473761243532e-06, | |
| "loss": 0.8628, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.925371505097548e-06, | |
| "loss": 0.8867, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.9175016506060357e-06, | |
| "loss": 0.8694, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.9096378950495915e-06, | |
| "loss": 0.8613, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.9017803207641282e-06, | |
| "loss": 0.8966, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.8939290100208425e-06, | |
| "loss": 0.8381, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.8860840450253467e-06, | |
| "loss": 0.866, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8782455079168144e-06, | |
| "loss": 0.9077, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8704134807671138e-06, | |
| "loss": 0.908, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8625880455799562e-06, | |
| "loss": 0.8655, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8547692842900283e-06, | |
| "loss": 0.8985, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8469572787621426e-06, | |
| "loss": 0.8852, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8391521107903747e-06, | |
| "loss": 0.8918, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8313538620972094e-06, | |
| "loss": 0.8765, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8235626143326865e-06, | |
| "loss": 0.8762, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8157784490735404e-06, | |
| "loss": 0.9053, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8080014478223523e-06, | |
| "loss": 0.8811, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8002316920066932e-06, | |
| "loss": 0.8794, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.792469262978271e-06, | |
| "loss": 0.8885, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.7847142420120815e-06, | |
| "loss": 0.8312, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.7769667103055564e-06, | |
| "loss": 0.856, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.7692267489777104e-06, | |
| "loss": 0.8706, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.761494439068295e-06, | |
| "loss": 0.873, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.7537698615369504e-06, | |
| "loss": 0.8709, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.7460530972623537e-06, | |
| "loss": 0.8237, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.738344227041376e-06, | |
| "loss": 0.8571, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7306433315882372e-06, | |
| "loss": 0.8635, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7229504915336574e-06, | |
| "loss": 0.8814, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.715265787424013e-06, | |
| "loss": 0.8667, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7075892997204958e-06, | |
| "loss": 0.8739, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.6999211087982686e-06, | |
| "loss": 0.8479, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.6922612949456274e-06, | |
| "loss": 0.8878, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.6846099383631537e-06, | |
| "loss": 0.8956, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.6769671191628807e-06, | |
| "loss": 0.8428, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.6693329173674521e-06, | |
| "loss": 0.8344, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.6617074129092857e-06, | |
| "loss": 0.8504, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.6540906856297336e-06, | |
| "loss": 0.8899, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_bleu": 7.9392, | |
| "eval_gen_len": 18.9785, | |
| "eval_loss": 0.8788951635360718, | |
| "eval_runtime": 967.0825, | |
| "eval_samples_per_second": 2.064, | |
| "eval_steps_per_second": 1.032, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.6464828152782508e-06, | |
| "loss": 0.8444, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.638883881511556e-06, | |
| "loss": 0.8604, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.6312939638927994e-06, | |
| "loss": 0.8953, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.623713141890728e-06, | |
| "loss": 0.8298, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.6161414948788575e-06, | |
| "loss": 0.8403, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.6085791021346365e-06, | |
| "loss": 0.8284, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.6010260428386205e-06, | |
| "loss": 0.8546, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.5934823960736402e-06, | |
| "loss": 0.8742, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.5859482408239718e-06, | |
| "loss": 0.8856, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.5784236559745175e-06, | |
| "loss": 0.8231, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.5709087203099687e-06, | |
| "loss": 0.8579, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.5634035125139923e-06, | |
| "loss": 0.8614, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.5559081111683977e-06, | |
| "loss": 0.8646, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.5484225947523201e-06, | |
| "loss": 0.8503, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.5409470416413943e-06, | |
| "loss": 0.8683, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.5334815301069374e-06, | |
| "loss": 0.8306, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.526026138315128e-06, | |
| "loss": 0.8103, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.5185809443261897e-06, | |
| "loss": 0.8605, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.5111460260935695e-06, | |
| "loss": 0.833, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.5037214614631234e-06, | |
| "loss": 0.9071, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.496307328172306e-06, | |
| "loss": 0.854, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.4889037038493488e-06, | |
| "loss": 0.8733, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.4815106660124517e-06, | |
| "loss": 0.8441, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.4741282920689736e-06, | |
| "loss": 0.874, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.4667566593146167e-06, | |
| "loss": 0.8514, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.45939584493262e-06, | |
| "loss": 0.8502, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.4520459259929527e-06, | |
| "loss": 0.8563, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.4447069794515e-06, | |
| "loss": 0.8676, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.437379082149271e-06, | |
| "loss": 0.8202, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.4300623108115793e-06, | |
| "loss": 0.8378, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.4227567420472487e-06, | |
| "loss": 0.8805, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.4154624523478095e-06, | |
| "loss": 0.8568, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.408179518086694e-06, | |
| "loss": 0.8617, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.4009080155184407e-06, | |
| "loss": 0.8427, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.393648020777899e-06, | |
| "loss": 0.8675, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.3863996098794213e-06, | |
| "loss": 0.8523, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.3791628587160768e-06, | |
| "loss": 0.8456, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.3719378430588553e-06, | |
| "loss": 0.8492, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.3647246385558682e-06, | |
| "loss": 0.8459, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.3575233207315635e-06, | |
| "loss": 0.8667, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.3503339649859315e-06, | |
| "loss": 0.8342, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.3431566465937163e-06, | |
| "loss": 0.8684, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.3359914407036267e-06, | |
| "loss": 0.8315, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.3288384223375487e-06, | |
| "loss": 0.8773, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.3216976663897622e-06, | |
| "loss": 0.8355, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.3145692476261587e-06, | |
| "loss": 0.8641, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.3074532406834505e-06, | |
| "loss": 0.8655, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.300349720068398e-06, | |
| "loss": 0.8502, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.2932587601570245e-06, | |
| "loss": 0.8529, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.286180435193839e-06, | |
| "loss": 0.8638, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_bleu": 8.1623, | |
| "eval_gen_len": 18.979, | |
| "eval_loss": 0.878086507320404, | |
| "eval_runtime": 964.1689, | |
| "eval_samples_per_second": 2.07, | |
| "eval_steps_per_second": 1.035, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.2791148192910586e-06, | |
| "loss": 0.8571, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.2720619864278338e-06, | |
| "loss": 0.8588, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.2650220104494714e-06, | |
| "loss": 0.8413, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.2579949650666648e-06, | |
| "loss": 0.8627, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.2509809238547165e-06, | |
| "loss": 0.8522, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.2439799602527741e-06, | |
| "loss": 0.8369, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.2369921475630586e-06, | |
| "loss": 0.8676, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.230017558950096e-06, | |
| "loss": 0.8744, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.2230562674399538e-06, | |
| "loss": 0.8681, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.2161083459194714e-06, | |
| "loss": 0.8504, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.2091738671355039e-06, | |
| "loss": 0.8568, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.2022529036941546e-06, | |
| "loss": 0.862, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.1953455280600188e-06, | |
| "loss": 0.8509, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.188451812555422e-06, | |
| "loss": 0.8718, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.1815718293596653e-06, | |
| "loss": 0.877, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.1747056505082658e-06, | |
| "loss": 0.8373, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.1678533478922075e-06, | |
| "loss": 0.8603, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.1610149932571847e-06, | |
| "loss": 0.8459, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.1541906582028526e-06, | |
| "loss": 0.8692, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.1473804141820783e-06, | |
| "loss": 0.8508, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.1405843325001878e-06, | |
| "loss": 0.8799, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.1338024843142265e-06, | |
| "loss": 0.8758, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.1270349406322109e-06, | |
| "loss": 0.8531, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.1202817723123807e-06, | |
| "loss": 0.8452, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.1135430500624675e-06, | |
| "loss": 0.9195, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.1068188444389444e-06, | |
| "loss": 0.8824, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.10010922584629e-06, | |
| "loss": 0.8379, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.0934142645362547e-06, | |
| "loss": 0.8654, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.0867340306071228e-06, | |
| "loss": 0.8671, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.0800685940029742e-06, | |
| "loss": 0.8641, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.073418024512964e-06, | |
| "loss": 0.8477, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.066782391770576e-06, | |
| "loss": 0.8477, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.0601617652529066e-06, | |
| "loss": 0.8356, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.0535562142799325e-06, | |
| "loss": 0.8803, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.0469658080137801e-06, | |
| "loss": 0.8187, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.0403906154580127e-06, | |
| "loss": 0.8548, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.0338307054568977e-06, | |
| "loss": 0.8409, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.027286146694689e-06, | |
| "loss": 0.8734, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.0207570076949099e-06, | |
| "loss": 0.858, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.0142433568196347e-06, | |
| "loss": 0.8373, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.007745262268768e-06, | |
| "loss": 0.8376, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.0012627920793424e-06, | |
| "loss": 0.8204, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 9.947960141247918e-07, | |
| "loss": 0.8247, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 9.883449961142504e-07, | |
| "loss": 0.8753, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 9.819098055918424e-07, | |
| "loss": 0.8703, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 9.7549050993597e-07, | |
| "loss": 0.823, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 9.690871763586137e-07, | |
| "loss": 0.8877, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 9.626998719046263e-07, | |
| "loss": 0.8174, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 9.563286634510293e-07, | |
| "loss": 0.8523, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 9.499736177063165e-07, | |
| "loss": 0.8293, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_bleu": 8.0989, | |
| "eval_gen_len": 18.98, | |
| "eval_loss": 0.8752478361129761, | |
| "eval_runtime": 964.0098, | |
| "eval_samples_per_second": 2.071, | |
| "eval_steps_per_second": 1.035, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 9.436348012097496e-07, | |
| "loss": 0.8633, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 9.373122803306672e-07, | |
| "loss": 0.8656, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 9.31006121267791e-07, | |
| "loss": 0.8289, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 9.247163900485232e-07, | |
| "loss": 0.8325, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 9.184431525282659e-07, | |
| "loss": 0.8779, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 9.121864743897266e-07, | |
| "loss": 0.8456, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 9.059464211422286e-07, | |
| "loss": 0.8431, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 8.9972305812103e-07, | |
| "loss": 0.8287, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 8.935164504866367e-07, | |
| "loss": 0.842, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 8.873266632241201e-07, | |
| "loss": 0.8748, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 8.811537611424383e-07, | |
| "loss": 0.8652, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 8.749978088737541e-07, | |
| "loss": 0.8327, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 8.688588708727621e-07, | |
| "loss": 0.8433, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 8.627370114160133e-07, | |
| "loss": 0.861, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 8.566322946012389e-07, | |
| "loss": 0.852, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 8.505447843466836e-07, | |
| "loss": 0.8334, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 8.444745443904337e-07, | |
| "loss": 0.8509, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 8.384216382897476e-07, | |
| "loss": 0.8272, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 8.323861294203964e-07, | |
| "loss": 0.7921, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 8.263680809759955e-07, | |
| "loss": 0.8153, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 8.203675559673441e-07, | |
| "loss": 0.8168, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 8.143846172217671e-07, | |
| "loss": 0.872, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 8.084193273824531e-07, | |
| "loss": 0.8519, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 8.024717489078032e-07, | |
| "loss": 0.8594, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 7.965419440707756e-07, | |
| "loss": 0.8445, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 7.90629974958232e-07, | |
| "loss": 0.8429, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 7.84735903470289e-07, | |
| "loss": 0.819, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 7.788597913196702e-07, | |
| "loss": 0.829, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 7.730017000310575e-07, | |
| "loss": 0.8163, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 7.671616909404508e-07, | |
| "loss": 0.8699, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 7.613398251945239e-07, | |
| "loss": 0.8303, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 7.555361637499833e-07, | |
| "loss": 0.8012, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 7.49750767372932e-07, | |
| "loss": 0.8178, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 7.439836966382303e-07, | |
| "loss": 0.847, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 7.382350119288647e-07, | |
| "loss": 0.8623, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 7.325047734353155e-07, | |
| "loss": 0.8079, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 7.267930411549234e-07, | |
| "loss": 0.8337, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 7.210998748912657e-07, | |
| "loss": 0.8399, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 7.154253342535248e-07, | |
| "loss": 0.851, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 7.097694786558693e-07, | |
| "loss": 0.8304, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 7.041323673168307e-07, | |
| "loss": 0.846, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 6.985140592586781e-07, | |
| "loss": 0.8835, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 6.929146133068102e-07, | |
| "loss": 0.8588, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 6.873340880891308e-07, | |
| "loss": 0.8275, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 6.817725420354365e-07, | |
| "loss": 0.8529, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 6.762300333768082e-07, | |
| "loss": 0.8705, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 6.707066201450003e-07, | |
| "loss": 0.8591, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 6.652023601718282e-07, | |
| "loss": 0.8235, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 6.597173110885732e-07, | |
| "loss": 0.8575, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 6.542515303253666e-07, | |
| "loss": 0.8625, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_bleu": 8.176, | |
| "eval_gen_len": 18.979, | |
| "eval_loss": 0.8743442296981812, | |
| "eval_runtime": 965.6565, | |
| "eval_samples_per_second": 2.067, | |
| "eval_steps_per_second": 1.033, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 6.488050751105979e-07, | |
| "loss": 0.8735, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 6.433780024703124e-07, | |
| "loss": 0.8721, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 6.379703692276104e-07, | |
| "loss": 0.8554, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 6.325822320020608e-07, | |
| "loss": 0.8569, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 6.272136472091008e-07, | |
| "loss": 0.8107, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 6.218646710594465e-07, | |
| "loss": 0.8671, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 6.165353595585069e-07, | |
| "loss": 0.8392, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 6.112257685057973e-07, | |
| "loss": 0.8353, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 6.059359534943501e-07, | |
| "loss": 0.8244, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 6.006659699101419e-07, | |
| "loss": 0.8328, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 5.954158729315032e-07, | |
| "loss": 0.8333, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 5.901857175285488e-07, | |
| "loss": 0.8651, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 5.849755584625985e-07, | |
| "loss": 0.8671, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 5.797854502856029e-07, | |
| "loss": 0.8453, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 5.746154473395752e-07, | |
| "loss": 0.8358, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 5.694656037560206e-07, | |
| "loss": 0.8291, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 5.643359734553693e-07, | |
| "loss": 0.8541, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 5.592266101464122e-07, | |
| "loss": 0.8466, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 5.541375673257394e-07, | |
| "loss": 0.8221, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 5.490688982771769e-07, | |
| "loss": 0.8347, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 5.440206560712352e-07, | |
| "loss": 0.8715, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 5.389928935645452e-07, | |
| "loss": 0.854, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 5.339856633993124e-07, | |
| "loss": 0.8747, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 5.28999018002761e-07, | |
| "loss": 0.8586, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 5.240330095865856e-07, | |
| "loss": 0.8247, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5.190876901464067e-07, | |
| "loss": 0.8357, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5.14163111461225e-07, | |
| "loss": 0.847, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5.092593250928782e-07, | |
| "loss": 0.8241, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5.043763823855036e-07, | |
| "loss": 0.8706, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 4.995143344649964e-07, | |
| "loss": 0.8625, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 4.946732322384795e-07, | |
| "loss": 0.8476, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 4.89853126393767e-07, | |
| "loss": 0.8344, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 4.850540673988346e-07, | |
| "loss": 0.8456, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 4.802761055012914e-07, | |
| "loss": 0.8175, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 4.755192907278536e-07, | |
| "loss": 0.848, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 4.7078367288381886e-07, | |
| "loss": 0.8592, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 4.660693015525486e-07, | |
| "loss": 0.82, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 4.613762260949456e-07, | |
| "loss": 0.8745, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 4.567044956489394e-07, | |
| "loss": 0.8827, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.520541591289701e-07, | |
| "loss": 0.862, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.4742526522547626e-07, | |
| "loss": 0.8684, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.428178624043866e-07, | |
| "loss": 0.8537, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.382319989066117e-07, | |
| "loss": 0.8451, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 4.336677227475383e-07, | |
| "loss": 0.8589, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 4.2912508171652765e-07, | |
| "loss": 0.8102, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 4.2460412337641504e-07, | |
| "loss": 0.8662, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 4.2010489506300933e-07, | |
| "loss": 0.8566, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 4.156274438846017e-07, | |
| "loss": 0.8663, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 4.11171816721469e-07, | |
| "loss": 0.8125, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 4.0673806022538425e-07, | |
| "loss": 0.8605, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_bleu": 8.0117, | |
| "eval_gen_len": 18.9805, | |
| "eval_loss": 0.8721033334732056, | |
| "eval_runtime": 963.8746, | |
| "eval_samples_per_second": 2.071, | |
| "eval_steps_per_second": 1.035, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 4.023262208191284e-07, | |
| "loss": 0.8296, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.9793634469600216e-07, | |
| "loss": 0.83, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.9356847781934575e-07, | |
| "loss": 0.8432, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.892226659220552e-07, | |
| "loss": 0.8396, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.8489895450610407e-07, | |
| "loss": 0.8742, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.8059738884206775e-07, | |
| "loss": 0.8124, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.7631801396864757e-07, | |
| "loss": 0.8238, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.7206087469220195e-07, | |
| "loss": 0.8443, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.6782601558627563e-07, | |
| "loss": 0.8567, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.6361348099113123e-07, | |
| "loss": 0.8406, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.5942331501329003e-07, | |
| "loss": 0.8667, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.552555615250658e-07, | |
| "loss": 0.8206, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.511102641641051e-07, | |
| "loss": 0.8545, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.469874663329342e-07, | |
| "loss": 0.8578, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.428872111985021e-07, | |
| "loss": 0.8555, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.388095416917267e-07, | |
| "loss": 0.8161, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.3475450050705125e-07, | |
| "loss": 0.8234, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.3072213010199053e-07, | |
| "loss": 0.8342, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.267124726966903e-07, | |
| "loss": 0.8458, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.2272557027348524e-07, | |
| "loss": 0.8573, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.187614645764564e-07, | |
| "loss": 0.8544, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.1482019711099735e-07, | |
| "loss": 0.8455, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.109018091433802e-07, | |
| "loss": 0.847, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.07006341700318e-07, | |
| "loss": 0.8277, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.031338355685418e-07, | |
| "loss": 0.8297, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.992843312943702e-07, | |
| "loss": 0.8648, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.954578691832835e-07, | |
| "loss": 0.8469, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.9165448929950685e-07, | |
| "loss": 0.8757, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.878742314655844e-07, | |
| "loss": 0.8526, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.8411713526196677e-07, | |
| "loss": 0.8369, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.80383240026596e-07, | |
| "loss": 0.8428, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.766725848544907e-07, | |
| "loss": 0.8428, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.7298520859734054e-07, | |
| "loss": 0.8695, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.6932114986309874e-07, | |
| "loss": 0.8534, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.6568044701557494e-07, | |
| "loss": 0.8515, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.6206313817403627e-07, | |
| "loss": 0.8642, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.5846926121280843e-07, | |
| "loss": 0.8184, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.5489885376087626e-07, | |
| "loss": 0.8173, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.5135195320149355e-07, | |
| "loss": 0.85, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.478285966717889e-07, | |
| "loss": 0.858, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.4432882106237786e-07, | |
| "loss": 0.8104, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.408526630169772e-07, | |
| "loss": 0.8469, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.3740015893201906e-07, | |
| "loss": 0.8599, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.3397134495627278e-07, | |
| "loss": 0.874, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.305662569904646e-07, | |
| "loss": 0.8251, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.2718493068690229e-07, | |
| "loss": 0.86, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.2382740144910236e-07, | |
| "loss": 0.8766, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.2049370443141704e-07, | |
| "loss": 0.8541, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.171838745386695e-07, | |
| "loss": 0.8276, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.1389794642578649e-07, | |
| "loss": 0.8479, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_bleu": 8.1008, | |
| "eval_gen_len": 18.978, | |
| "eval_loss": 0.8710653781890869, | |
| "eval_runtime": 966.0685, | |
| "eval_samples_per_second": 2.066, | |
| "eval_steps_per_second": 1.033, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.1063595449743595e-07, | |
| "loss": 0.8244, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.073979329076664e-07, | |
| "loss": 0.8897, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.0418391555955042e-07, | |
| "loss": 0.8269, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.0099393610482688e-07, | |
| "loss": 0.8353, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.9782802794355239e-07, | |
| "loss": 0.8267, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.9468622422374962e-07, | |
| "loss": 0.8484, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.9156855784106004e-07, | |
| "loss": 0.8665, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.8847506143839983e-07, | |
| "loss": 0.8377, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.854057674056181e-07, | |
| "loss": 0.859, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.8236070787915754e-07, | |
| "loss": 0.8135, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.793399147417188e-07, | |
| "loss": 0.8111, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 1.7634341962192602e-07, | |
| "loss": 0.8239, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 1.7337125389399496e-07, | |
| "loss": 0.8728, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 1.7042344867740645e-07, | |
| "loss": 0.8582, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 1.6750003483657739e-07, | |
| "loss": 0.8271, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.646010429805417e-07, | |
| "loss": 0.8376, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.6172650346262642e-07, | |
| "loss": 0.867, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.5887644638013566e-07, | |
| "loss": 0.8924, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.5605090157403495e-07, | |
| "loss": 0.8466, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 1.5324989862863792e-07, | |
| "loss": 0.8462, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 1.5047346687129898e-07, | |
| "loss": 0.8679, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 1.4772163537210393e-07, | |
| "loss": 0.8395, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.4499443294356541e-07, | |
| "loss": 0.8514, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.422918881403243e-07, | |
| "loss": 0.8532, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.3961402925884744e-07, | |
| "loss": 0.8172, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.36960884337132e-07, | |
| "loss": 0.7917, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.3433248115441362e-07, | |
| "loss": 0.8305, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.3172884723087336e-07, | |
| "loss": 0.8496, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.2915000982735155e-07, | |
| "loss": 0.8685, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.2659599594506106e-07, | |
| "loss": 0.8706, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.2406683232530414e-07, | |
| "loss": 0.8015, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.2156254544919476e-07, | |
| "loss": 0.8639, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.1908316153737858e-07, | |
| "loss": 0.8624, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.1662870654975955e-07, | |
| "loss": 0.8271, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.1419920618522984e-07, | |
| "loss": 0.8196, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.1179468588139702e-07, | |
| "loss": 0.8528, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.0941517081432096e-07, | |
| "loss": 0.8642, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.0706068589824925e-07, | |
| "loss": 0.8515, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.0473125578535526e-07, | |
| "loss": 0.8398, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.0242690486548134e-07, | |
| "loss": 0.8797, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.0014765726588437e-07, | |
| "loss": 0.8537, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 9.789353685097953e-08, | |
| "loss": 0.812, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 9.566456722209432e-08, | |
| "loss": 0.8588, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 9.34607717172195e-08, | |
| "loss": 0.8394, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 9.12821734107644e-08, | |
| "loss": 0.8493, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 8.91287951133174e-08, | |
| "loss": 0.8376, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 8.700065937140401e-08, | |
| "loss": 0.832, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 8.489778846725417e-08, | |
| "loss": 0.8802, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 8.282020441856637e-08, | |
| "loss": 0.8084, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 8.076792897827757e-08, | |
| "loss": 0.8391, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_bleu": 8.2041, | |
| "eval_gen_len": 18.9795, | |
| "eval_loss": 0.8708174228668213, | |
| "eval_runtime": 963.8453, | |
| "eval_samples_per_second": 2.071, | |
| "eval_steps_per_second": 1.035, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 7.874098363433668e-08, | |
| "loss": 0.8315, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 7.673938960947924e-08, | |
| "loss": 0.8792, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 7.476316786100336e-08, | |
| "loss": 0.8167, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 7.281233908055269e-08, | |
| "loss": 0.8536, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 7.088692369389888e-08, | |
| "loss": 0.8216, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 6.89869418607264e-08, | |
| "loss": 0.8033, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 6.711241347442415e-08, | |
| "loss": 0.8184, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 6.526335816187474e-08, | |
| "loss": 0.847, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 6.34397952832505e-08, | |
| "loss": 0.8446, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 6.164174393181038e-08, | |
| "loss": 0.869, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 5.986922293369834e-08, | |
| "loss": 0.8355, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 5.812225084774969e-08, | |
| "loss": 0.8527, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 5.640084596529399e-08, | |
| "loss": 0.8716, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 5.4705026309964104e-08, | |
| "loss": 0.8185, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 5.3034809637508846e-08, | |
| "loss": 0.8322, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 5.139021343560452e-08, | |
| "loss": 0.81, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.977125492367452e-08, | |
| "loss": 0.8391, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.817795105270723e-08, | |
| "loss": 0.8142, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.661031850507924e-08, | |
| "loss": 0.8039, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 4.5068373694380775e-08, | |
| "loss": 0.828, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 4.355213276524356e-08, | |
| "loss": 0.8512, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 4.206161159317129e-08, | |
| "loss": 0.839, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 4.059682578437474e-08, | |
| "loss": 0.8535, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.915779067560743e-08, | |
| "loss": 0.8548, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.774452133400469e-08, | |
| "loss": 0.8437, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.635703255692735e-08, | |
| "loss": 0.8258, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 3.4995338871804954e-08, | |
| "loss": 0.8327, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 3.3659454535985015e-08, | |
| "loss": 0.8453, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 3.234939353658345e-08, | |
| "loss": 0.8719, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 3.1065169590337453e-08, | |
| "loss": 0.8516, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.98067961434631e-08, | |
| "loss": 0.8355, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.857428637151327e-08, | |
| "loss": 0.8453, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.7367653179240783e-08, | |
| "loss": 0.8211, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.6186909200462128e-08, | |
| "loss": 0.8567, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.503206679792647e-08, | |
| "loss": 0.8693, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.390313806318545e-08, | |
| "loss": 0.8263, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.2800134816466647e-08, | |
| "loss": 0.8622, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.172306860654977e-08, | |
| "loss": 0.8541, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.0671950710645928e-08, | |
| "loss": 0.8408, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.9646792134279667e-08, | |
| "loss": 0.8284, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.864760361117296e-08, | |
| "loss": 0.8354, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.7674395603134442e-08, | |
| "loss": 0.8895, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.6727178299948133e-08, | |
| "loss": 0.8532, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.5805961619267396e-08, | |
| "loss": 0.8279, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.491075520651142e-08, | |
| "loss": 0.8415, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.4041568434764175e-08, | |
| "loss": 0.8381, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.3198410404675066e-08, | |
| "loss": 0.8357, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.2381289944366492e-08, | |
| "loss": 0.8405, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.1590215609337264e-08, | |
| "loss": 0.8565, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.0825195682377387e-08, | |
| "loss": 0.8649, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_bleu": 8.1488, | |
| "eval_gen_len": 18.9785, | |
| "eval_loss": 0.8710347414016724, | |
| "eval_runtime": 973.1045, | |
| "eval_samples_per_second": 2.051, | |
| "eval_steps_per_second": 1.026, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.0086238173478146e-08, | |
| "loss": 0.8551, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 9.373350819749382e-09, | |
| "loss": 0.8313, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 8.686541085339006e-09, | |
| "loss": 0.8401, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 8.025816161353895e-09, | |
| "loss": 0.8806, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 7.391182965785504e-09, | |
| "loss": 0.8271, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 6.782648143436321e-09, | |
| "loss": 0.8348, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 6.200218065851304e-09, | |
| "loss": 0.8564, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 5.6438988312504385e-09, | |
| "loss": 0.8111, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 5.113696264466006e-09, | |
| "loss": 0.856, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 4.6096159168798616e-09, | |
| "loss": 0.814, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 4.1316630663670864e-09, | |
| "loss": 0.887, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 3.6798427172390904e-09, | |
| "loss": 0.8207, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 3.254159600192819e-09, | |
| "loss": 0.8367, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.8546181722599597e-09, | |
| "loss": 0.881, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.48122261676087e-09, | |
| "loss": 0.8333, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.1339768432609988e-09, | |
| "loss": 0.8364, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 1.8128844875289764e-09, | |
| "loss": 0.8306, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 1.517948911499978e-09, | |
| "loss": 0.8722, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 1.2491732032385295e-09, | |
| "loss": 0.8287, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.0065601769088106e-09, | |
| "loss": 0.8401, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 7.901123727427351e-10, | |
| "loss": 0.8713, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 5.998320570149708e-10, | |
| "loss": 0.8514, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.357212220182372e-10, | |
| "loss": 0.8462, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.9778158604276684e-10, | |
| "loss": 0.8381, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.860145933585411e-10, | |
| "loss": 0.8647, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.0042141420030238e-10, | |
| "loss": 0.8551, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 4.1002944754509055e-11, | |
| "loss": 0.8085, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 7.759807150731214e-12, | |
| "loss": 0.8357, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 107877, | |
| "total_flos": 3.211633985499169e+18, | |
| "train_loss": 0.3135226284782316, | |
| "train_runtime": 125809.459, | |
| "train_samples_per_second": 3.43, | |
| "train_steps_per_second": 0.857 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 107877, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2000, | |
| "total_flos": 3.211633985499169e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |