| { | |
| "best_metric": 0.7786535620689392, | |
| "best_model_checkpoint": "./enko_mbartLarge_100p_run1/checkpoint-128123", | |
| "epoch": 4.999992195000117, | |
| "eval_steps": 500, | |
| "global_step": 320307, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.940000000000001e-06, | |
| "loss": 1.6027, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.994e-05, | |
| "loss": 1.3159, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.994e-05, | |
| "loss": 1.2703, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.994e-05, | |
| "loss": 1.2587, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9940000000000006e-05, | |
| "loss": 1.2526, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.997407177475311e-05, | |
| "loss": 1.2571, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994803921057162e-05, | |
| "loss": 1.2336, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.992195447692284e-05, | |
| "loss": 1.2062, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9895869743274054e-05, | |
| "loss": 1.1793, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.986983717909256e-05, | |
| "loss": 1.1679, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.984375244544378e-05, | |
| "loss": 1.148, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9817667711794995e-05, | |
| "loss": 1.1374, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.979158297814621e-05, | |
| "loss": 1.1147, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.976549824449743e-05, | |
| "loss": 1.1143, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9739465680315936e-05, | |
| "loss": 1.1026, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.971338094666715e-05, | |
| "loss": 1.1118, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.968729621301837e-05, | |
| "loss": 1.0842, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9661211479369584e-05, | |
| "loss": 1.0774, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.96351267457208e-05, | |
| "loss": 1.0735, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9609042012072016e-05, | |
| "loss": 1.0637, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.958300944789053e-05, | |
| "loss": 1.0474, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.955692471424174e-05, | |
| "loss": 1.0447, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.953089215006026e-05, | |
| "loss": 1.048, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.950480741641147e-05, | |
| "loss": 1.0178, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.947872268276269e-05, | |
| "loss": 1.0335, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.9452637949113905e-05, | |
| "loss": 1.031, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.9426605384932414e-05, | |
| "loss": 1.0121, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.9400624990218224e-05, | |
| "loss": 1.0188, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.937454025656944e-05, | |
| "loss": 0.9988, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.9348455522920656e-05, | |
| "loss": 1.0051, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.932237078927187e-05, | |
| "loss": 0.9967, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.929628605562309e-05, | |
| "loss": 0.9931, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.9270201321974304e-05, | |
| "loss": 0.9833, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.924411658832552e-05, | |
| "loss": 0.9801, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.9218031854676735e-05, | |
| "loss": 0.9848, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.919194712102795e-05, | |
| "loss": 0.9774, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.916586238737916e-05, | |
| "loss": 0.9867, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.9139777653730376e-05, | |
| "loss": 0.9738, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.911369292008159e-05, | |
| "loss": 0.9593, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.908760818643281e-05, | |
| "loss": 0.9804, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.9061523452784024e-05, | |
| "loss": 0.9654, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.903543871913524e-05, | |
| "loss": 0.9641, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.9009353985486456e-05, | |
| "loss": 0.955, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.8983321421304965e-05, | |
| "loss": 0.953, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.895723668765618e-05, | |
| "loss": 0.9585, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.89311519540074e-05, | |
| "loss": 0.9453, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.890506722035861e-05, | |
| "loss": 0.9519, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.887898248670983e-05, | |
| "loss": 0.9512, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.8852897753061045e-05, | |
| "loss": 0.9388, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.882686518887956e-05, | |
| "loss": 0.9257, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.880078045523077e-05, | |
| "loss": 0.9346, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.8774695721581986e-05, | |
| "loss": 0.9399, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.87486109879332e-05, | |
| "loss": 0.9269, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.872252625428442e-05, | |
| "loss": 0.9239, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.8696441520635634e-05, | |
| "loss": 0.9355, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.867035678698685e-05, | |
| "loss": 0.9256, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.864437639227266e-05, | |
| "loss": 0.9278, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.8618343828091176e-05, | |
| "loss": 0.9259, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.859225909444239e-05, | |
| "loss": 0.9181, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.85661743607936e-05, | |
| "loss": 0.9189, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.854008962714482e-05, | |
| "loss": 0.9094, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.851400489349603e-05, | |
| "loss": 0.9121, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.848792015984725e-05, | |
| "loss": 0.8966, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.8461835426198464e-05, | |
| "loss": 0.9144, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.843575069254968e-05, | |
| "loss": 0.9067, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.8409665958900896e-05, | |
| "loss": 0.9144, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.838358122525211e-05, | |
| "loss": 0.9011, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.835749649160333e-05, | |
| "loss": 0.8886, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.8331411757954544e-05, | |
| "loss": 0.9009, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.830532702430576e-05, | |
| "loss": 0.8945, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.8279242290656976e-05, | |
| "loss": 0.893, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.825315755700819e-05, | |
| "loss": 0.9099, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.82270728233594e-05, | |
| "loss": 0.8861, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.820098808971062e-05, | |
| "loss": 0.8945, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.817490335606183e-05, | |
| "loss": 0.8851, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.814881862241305e-05, | |
| "loss": 0.8827, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.8122733888764264e-05, | |
| "loss": 0.8887, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.809664915511548e-05, | |
| "loss": 0.8785, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.8070564421466696e-05, | |
| "loss": 0.8792, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.804447968781791e-05, | |
| "loss": 0.8724, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.801839495416913e-05, | |
| "loss": 0.8725, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.799236238998764e-05, | |
| "loss": 0.8745, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.7966277656338853e-05, | |
| "loss": 0.8696, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.794029726162466e-05, | |
| "loss": 0.8795, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.791421252797588e-05, | |
| "loss": 0.8744, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.7888127794327095e-05, | |
| "loss": 0.8728, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.786204306067831e-05, | |
| "loss": 0.8772, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.783595832702952e-05, | |
| "loss": 0.8685, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.7809873593380736e-05, | |
| "loss": 0.8698, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.778378885973195e-05, | |
| "loss": 0.8696, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.775770412608317e-05, | |
| "loss": 0.8573, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.7731619392434384e-05, | |
| "loss": 0.8623, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.77055346587856e-05, | |
| "loss": 0.8529, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.7679449925136815e-05, | |
| "loss": 0.8704, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.7653417360955325e-05, | |
| "loss": 0.8549, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.762733262730654e-05, | |
| "loss": 0.8648, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.760124789365776e-05, | |
| "loss": 0.8603, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.757516316000897e-05, | |
| "loss": 0.876, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.754913059582749e-05, | |
| "loss": 0.8657, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.7523045862178705e-05, | |
| "loss": 0.8569, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.749696112852992e-05, | |
| "loss": 0.8561, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.747092856434843e-05, | |
| "loss": 0.8585, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.7444843830699646e-05, | |
| "loss": 0.8592, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.741875909705086e-05, | |
| "loss": 0.859, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.739267436340208e-05, | |
| "loss": 0.844, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.7366589629753294e-05, | |
| "loss": 0.8492, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.734050489610451e-05, | |
| "loss": 0.8473, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.7314420162455726e-05, | |
| "loss": 0.8435, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.7288387598274235e-05, | |
| "loss": 0.8472, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.726230286462545e-05, | |
| "loss": 0.8356, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.723621813097667e-05, | |
| "loss": 0.8386, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.721013339732788e-05, | |
| "loss": 0.8414, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.71840486636791e-05, | |
| "loss": 0.8404, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.7157963930030314e-05, | |
| "loss": 0.8419, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.7131931365848824e-05, | |
| "loss": 0.8479, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.710584663220004e-05, | |
| "loss": 0.8432, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.7079761898551256e-05, | |
| "loss": 0.8391, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.705367716490247e-05, | |
| "loss": 0.8262, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.702759243125369e-05, | |
| "loss": 0.8401, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.7001507697604903e-05, | |
| "loss": 0.8386, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.697542296395612e-05, | |
| "loss": 0.831, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.694939039977463e-05, | |
| "loss": 0.8343, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.6923305666125845e-05, | |
| "loss": 0.8212, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.689722093247706e-05, | |
| "loss": 0.826, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.6871136198828277e-05, | |
| "loss": 0.833, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.684505146517949e-05, | |
| "loss": 0.8231, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.681896673153071e-05, | |
| "loss": 0.8402, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.679293416734922e-05, | |
| "loss": 0.8286, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 50.2043, | |
| "eval_gen_len": 15.0703, | |
| "eval_loss": 0.8096853494644165, | |
| "eval_runtime": 9598.1646, | |
| "eval_samples_per_second": 13.325, | |
| "eval_steps_per_second": 1.666, | |
| "step": 64061 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.6766849433700434e-05, | |
| "loss": 0.8088, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.674076470005165e-05, | |
| "loss": 0.8023, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.6714679966402866e-05, | |
| "loss": 0.8022, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.668859523275408e-05, | |
| "loss": 0.7841, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.66625104991053e-05, | |
| "loss": 0.7722, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.6636477934923814e-05, | |
| "loss": 0.7627, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.661039320127503e-05, | |
| "loss": 0.7635, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.6584308467626245e-05, | |
| "loss": 0.7612, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.6558223733977454e-05, | |
| "loss": 0.7563, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.653213900032867e-05, | |
| "loss": 0.7468, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.650605426667988e-05, | |
| "loss": 0.7534, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.6479969533031095e-05, | |
| "loss": 0.7457, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.645388479938231e-05, | |
| "loss": 0.7376, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.642780006573353e-05, | |
| "loss": 0.7377, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.640171533208474e-05, | |
| "loss": 0.7385, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.637563059843596e-05, | |
| "loss": 0.7483, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.6349545864787175e-05, | |
| "loss": 0.7356, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.632346113113839e-05, | |
| "loss": 0.7303, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.629742856695691e-05, | |
| "loss": 0.7383, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.627134383330812e-05, | |
| "loss": 0.7295, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.624531126912663e-05, | |
| "loss": 0.7284, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.621922653547785e-05, | |
| "loss": 0.72, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.6193141801829064e-05, | |
| "loss": 0.7246, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.616705706818028e-05, | |
| "loss": 0.7091, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.6140972334531496e-05, | |
| "loss": 0.7227, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.611488760088271e-05, | |
| "loss": 0.722, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.608880286723393e-05, | |
| "loss": 0.7167, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.6062718133585144e-05, | |
| "loss": 0.7211, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.603673773887095e-05, | |
| "loss": 0.7076, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.601065300522216e-05, | |
| "loss": 0.7132, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.598456827157338e-05, | |
| "loss": 0.7142, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.5958483537924594e-05, | |
| "loss": 0.7061, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.593239880427581e-05, | |
| "loss": 0.704, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.5906314070627026e-05, | |
| "loss": 0.6912, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.588022933697824e-05, | |
| "loss": 0.7105, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.585414460332946e-05, | |
| "loss": 0.7057, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.5828059869680674e-05, | |
| "loss": 0.7093, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.580197513603189e-05, | |
| "loss": 0.7089, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.5775890402383106e-05, | |
| "loss": 0.6892, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.574980566873432e-05, | |
| "loss": 0.7142, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.572377310455283e-05, | |
| "loss": 0.7039, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.569768837090405e-05, | |
| "loss": 0.6999, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.5671655806722557e-05, | |
| "loss": 0.698, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.564557107307377e-05, | |
| "loss": 0.6956, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.561948633942499e-05, | |
| "loss": 0.7012, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.5593401605776204e-05, | |
| "loss": 0.6933, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.556736904159472e-05, | |
| "loss": 0.6986, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.5541284307945936e-05, | |
| "loss": 0.6973, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.5515251743764446e-05, | |
| "loss": 0.6912, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.548916701011566e-05, | |
| "loss": 0.6842, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.546313444593418e-05, | |
| "loss": 0.6836, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.543704971228539e-05, | |
| "loss": 0.6926, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.54109649786366e-05, | |
| "loss": 0.6785, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 4.538488024498782e-05, | |
| "loss": 0.6801, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 4.5358795511339035e-05, | |
| "loss": 0.6837, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.533271077769025e-05, | |
| "loss": 0.6895, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.5306626044041467e-05, | |
| "loss": 0.688, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 4.528054131039268e-05, | |
| "loss": 0.6849, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 4.52544565767439e-05, | |
| "loss": 0.6814, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 4.5228371843095114e-05, | |
| "loss": 0.6862, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 4.5202339278913624e-05, | |
| "loss": 0.6727, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 4.517625454526484e-05, | |
| "loss": 0.6807, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4.5150169811616056e-05, | |
| "loss": 0.6669, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 4.512408507796727e-05, | |
| "loss": 0.6765, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.509800034431849e-05, | |
| "loss": 0.6792, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.50719156106697e-05, | |
| "loss": 0.6803, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 4.504583087702092e-05, | |
| "loss": 0.6692, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.5019746143372135e-05, | |
| "loss": 0.6671, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.499366140972335e-05, | |
| "loss": 0.6712, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.496757667607456e-05, | |
| "loss": 0.6729, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.4941544111893076e-05, | |
| "loss": 0.6715, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.491545937824429e-05, | |
| "loss": 0.6821, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.488937464459551e-05, | |
| "loss": 0.6641, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.4863289910946724e-05, | |
| "loss": 0.673, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.483720517729793e-05, | |
| "loss": 0.6686, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.481112044364915e-05, | |
| "loss": 0.6614, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.4785035710000365e-05, | |
| "loss": 0.6708, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 4.475895097635158e-05, | |
| "loss": 0.6621, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 4.47328662427028e-05, | |
| "loss": 0.6618, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 4.4706833678521306e-05, | |
| "loss": 0.6647, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 4.468074894487252e-05, | |
| "loss": 0.6587, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 4.465466421122374e-05, | |
| "loss": 0.6589, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 4.4628579477574954e-05, | |
| "loss": 0.654, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 4.460249474392617e-05, | |
| "loss": 0.6614, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 4.4576410010277386e-05, | |
| "loss": 0.6663, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 4.45503252766286e-05, | |
| "loss": 0.6612, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 4.452424054297982e-05, | |
| "loss": 0.6594, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.449820797879833e-05, | |
| "loss": 0.6584, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.447217541461684e-05, | |
| "loss": 0.6592, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 4.444609068096806e-05, | |
| "loss": 0.6595, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 4.4420005947319275e-05, | |
| "loss": 0.6545, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.439392121367049e-05, | |
| "loss": 0.6543, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.4367888649489e-05, | |
| "loss": 0.6509, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 4.4341803915840216e-05, | |
| "loss": 0.6641, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 4.431577135165873e-05, | |
| "loss": 0.6514, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 4.428968661800995e-05, | |
| "loss": 0.6588, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.4263601884361164e-05, | |
| "loss": 0.6565, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.4237517150712373e-05, | |
| "loss": 0.6654, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 4.421143241706359e-05, | |
| "loss": 0.6642, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 4.4185347683414805e-05, | |
| "loss": 0.6505, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 4.415926294976602e-05, | |
| "loss": 0.6561, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 4.413317821611724e-05, | |
| "loss": 0.6555, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 4.410709348246845e-05, | |
| "loss": 0.6597, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 4.408100874881967e-05, | |
| "loss": 0.6635, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.405497618463818e-05, | |
| "loss": 0.6469, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.4028891450989394e-05, | |
| "loss": 0.6466, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.400280671734061e-05, | |
| "loss": 0.6504, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 4.3976721983691826e-05, | |
| "loss": 0.6463, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 4.395063725004304e-05, | |
| "loss": 0.6505, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 4.392455251639426e-05, | |
| "loss": 0.6392, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 4.3898467782745474e-05, | |
| "loss": 0.6446, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 4.387238304909669e-05, | |
| "loss": 0.6431, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 4.38463504849152e-05, | |
| "loss": 0.6452, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 4.3820265751266415e-05, | |
| "loss": 0.6505, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.379418101761763e-05, | |
| "loss": 0.6487, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.376809628396885e-05, | |
| "loss": 0.6481, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 4.3742063719787356e-05, | |
| "loss": 0.648, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.371597898613857e-05, | |
| "loss": 0.6378, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 4.368989425248979e-05, | |
| "loss": 0.6457, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 4.3663809518841004e-05, | |
| "loss": 0.6468, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 4.363772478519222e-05, | |
| "loss": 0.6456, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 4.3611692221010736e-05, | |
| "loss": 0.6392, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 4.358560748736195e-05, | |
| "loss": 0.6388, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.355952275371316e-05, | |
| "loss": 0.6382, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.353343802006438e-05, | |
| "loss": 0.6397, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 4.350735328641559e-05, | |
| "loss": 0.6368, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 4.348126855276681e-05, | |
| "loss": 0.6516, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 4.3455235988585325e-05, | |
| "loss": 0.6429, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 51.8273, | |
| "eval_gen_len": 15.0439, | |
| "eval_loss": 0.7786535620689392, | |
| "eval_runtime": 9521.3959, | |
| "eval_samples_per_second": 13.433, | |
| "eval_steps_per_second": 1.679, | |
| "step": 128123 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.342915125493654e-05, | |
| "loss": 0.6315, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.340306652128776e-05, | |
| "loss": 0.6169, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 4.337698178763897e-05, | |
| "loss": 0.6218, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 4.335089705399019e-05, | |
| "loss": 0.6108, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.33248644898087e-05, | |
| "loss": 0.6101, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.3298779756159914e-05, | |
| "loss": 0.5922, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 4.327269502251113e-05, | |
| "loss": 0.6026, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 4.3246610288862346e-05, | |
| "loss": 0.591, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 4.322052555521356e-05, | |
| "loss": 0.5985, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 4.319444082156478e-05, | |
| "loss": 0.5816, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 4.316840825738329e-05, | |
| "loss": 0.5942, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 4.31423235237345e-05, | |
| "loss": 0.5905, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 4.311623879008572e-05, | |
| "loss": 0.5845, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 4.3090154056436935e-05, | |
| "loss": 0.5799, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 4.306406932278815e-05, | |
| "loss": 0.5847, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 4.303798458913936e-05, | |
| "loss": 0.5866, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 4.3011899855490576e-05, | |
| "loss": 0.5867, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 4.298581512184179e-05, | |
| "loss": 0.5739, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 4.295973038819301e-05, | |
| "loss": 0.5882, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 4.2933697824011524e-05, | |
| "loss": 0.576, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 4.290761309036273e-05, | |
| "loss": 0.5654, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 4.288152835671395e-05, | |
| "loss": 0.5708, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 4.2855443623065165e-05, | |
| "loss": 0.5725, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 4.282935888941638e-05, | |
| "loss": 0.5687, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 4.28033263252349e-05, | |
| "loss": 0.5627, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 4.2777293761053406e-05, | |
| "loss": 0.5745, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 4.275120902740462e-05, | |
| "loss": 0.5617, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 4.272512429375584e-05, | |
| "loss": 0.5687, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 4.2699039560107054e-05, | |
| "loss": 0.5682, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 4.267295482645827e-05, | |
| "loss": 0.5573, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 4.264687009280948e-05, | |
| "loss": 0.5729, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 4.2620785359160695e-05, | |
| "loss": 0.5602, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 4.259470062551191e-05, | |
| "loss": 0.553, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 4.256861589186313e-05, | |
| "loss": 0.5501, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 4.254253115821434e-05, | |
| "loss": 0.5646, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 4.251644642456556e-05, | |
| "loss": 0.5609, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 4.2490361690916775e-05, | |
| "loss": 0.5575, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 4.246432912673529e-05, | |
| "loss": 0.5687, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 4.24382443930865e-05, | |
| "loss": 0.5425, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 4.2412159659437716e-05, | |
| "loss": 0.5689, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 4.238607492578893e-05, | |
| "loss": 0.557, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 4.235999019214015e-05, | |
| "loss": 0.5557, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 4.2333905458491364e-05, | |
| "loss": 0.5629, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 4.230787289430988e-05, | |
| "loss": 0.556, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 4.228184033012839e-05, | |
| "loss": 0.5521, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 4.2255807765946905e-05, | |
| "loss": 0.5508, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 4.2229775201765415e-05, | |
| "loss": 0.5539, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 4.220369046811663e-05, | |
| "loss": 0.5601, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 4.217760573446785e-05, | |
| "loss": 0.5504, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 4.215152100081906e-05, | |
| "loss": 0.5475, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 4.212543626717028e-05, | |
| "loss": 0.5477, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 4.2099351533521494e-05, | |
| "loss": 0.5421, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 4.207326679987271e-05, | |
| "loss": 0.5374, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 4.204718206622392e-05, | |
| "loss": 0.5443, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 4.2021097332575135e-05, | |
| "loss": 0.547, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 4.199501259892635e-05, | |
| "loss": 0.5496, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 4.196892786527757e-05, | |
| "loss": 0.5472, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 4.194284313162878e-05, | |
| "loss": 0.5458, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 4.191675839798e-05, | |
| "loss": 0.5505, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 4.1890673664331215e-05, | |
| "loss": 0.5454, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 4.186458893068243e-05, | |
| "loss": 0.5341, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.183850419703365e-05, | |
| "loss": 0.5402, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 4.1812471632852156e-05, | |
| "loss": 0.5363, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 4.178643906867067e-05, | |
| "loss": 0.5358, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 4.176035433502189e-05, | |
| "loss": 0.5388, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 4.1734269601373104e-05, | |
| "loss": 0.5427, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 4.170818486772432e-05, | |
| "loss": 0.5325, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 4.1682100134075536e-05, | |
| "loss": 0.5375, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 4.1656015400426745e-05, | |
| "loss": 0.535, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 4.162993066677796e-05, | |
| "loss": 0.5287, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 4.160384593312918e-05, | |
| "loss": 0.5345, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 4.157776119948039e-05, | |
| "loss": 0.5411, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 4.155167646583161e-05, | |
| "loss": 0.5366, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 4.152569607111742e-05, | |
| "loss": 0.5378, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 4.1499611337468634e-05, | |
| "loss": 0.5349, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 4.147352660381985e-05, | |
| "loss": 0.5279, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 4.1447441870171066e-05, | |
| "loss": 0.5324, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 4.142135713652228e-05, | |
| "loss": 0.5326, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 4.139532457234079e-05, | |
| "loss": 0.5254, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 4.136923983869201e-05, | |
| "loss": 0.5317, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 4.134315510504322e-05, | |
| "loss": 0.5278, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 4.131707037139444e-05, | |
| "loss": 0.5223, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 4.129103780721295e-05, | |
| "loss": 0.5237, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 4.1265005243031465e-05, | |
| "loss": 0.5251, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 4.123892050938268e-05, | |
| "loss": 0.5345, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 4.12128357757339e-05, | |
| "loss": 0.5302, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 4.1186751042085106e-05, | |
| "loss": 0.5256, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 4.116066630843632e-05, | |
| "loss": 0.5231, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 4.113458157478754e-05, | |
| "loss": 0.5322, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.1108549010606054e-05, | |
| "loss": 0.5268, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.108246427695727e-05, | |
| "loss": 0.521, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.1056379543308486e-05, | |
| "loss": 0.5179, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.10302948096597e-05, | |
| "loss": 0.5249, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.100421007601092e-05, | |
| "loss": 0.5267, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.0978125342362133e-05, | |
| "loss": 0.5198, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.095204060871335e-05, | |
| "loss": 0.525, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.0925955875064565e-05, | |
| "loss": 0.5236, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.0899871141415774e-05, | |
| "loss": 0.5345, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.087378640776699e-05, | |
| "loss": 0.5307, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.0847701674118206e-05, | |
| "loss": 0.5227, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 4.082161694046942e-05, | |
| "loss": 0.523, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 4.079553220682064e-05, | |
| "loss": 0.5221, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 4.0769447473171854e-05, | |
| "loss": 0.5195, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 4.074336273952307e-05, | |
| "loss": 0.5366, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 4.071727800587428e-05, | |
| "loss": 0.5173, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.0691193272225495e-05, | |
| "loss": 0.5168, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.066516070804402e-05, | |
| "loss": 0.5227, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 4.063912814386253e-05, | |
| "loss": 0.5145, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 4.061304341021374e-05, | |
| "loss": 0.5162, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 4.058695867656496e-05, | |
| "loss": 0.5155, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 4.0560873942916175e-05, | |
| "loss": 0.5142, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 4.053478920926739e-05, | |
| "loss": 0.5168, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 4.05087044756186e-05, | |
| "loss": 0.5152, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 4.0482619741969816e-05, | |
| "loss": 0.5196, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 4.045653500832103e-05, | |
| "loss": 0.5173, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 4.043045027467225e-05, | |
| "loss": 0.5178, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 4.040446987995806e-05, | |
| "loss": 0.5186, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 4.037838514630927e-05, | |
| "loss": 0.5108, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 4.035230041266049e-05, | |
| "loss": 0.5198, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 4.0326215679011705e-05, | |
| "loss": 0.5184, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 4.030013094536292e-05, | |
| "loss": 0.5184, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 4.027409838118143e-05, | |
| "loss": 0.5123, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 4.0248013647532646e-05, | |
| "loss": 0.5031, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.022192891388386e-05, | |
| "loss": 0.5148, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.019584418023508e-05, | |
| "loss": 0.5079, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.0169759446586294e-05, | |
| "loss": 0.5117, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 4.014367471293751e-05, | |
| "loss": 0.523, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 4.011758997928872e-05, | |
| "loss": 0.517, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu": 52.0502, | |
| "eval_gen_len": 14.9976, | |
| "eval_loss": 0.8042312264442444, | |
| "eval_runtime": 9500.4244, | |
| "eval_samples_per_second": 13.462, | |
| "eval_steps_per_second": 1.683, | |
| "step": 192184 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 4.0091557415107235e-05, | |
| "loss": 0.5007, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 4.006547268145845e-05, | |
| "loss": 0.4984, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 4.003938794780967e-05, | |
| "loss": 0.4934, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 4.001330321416088e-05, | |
| "loss": 0.4892, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.998721848051209e-05, | |
| "loss": 0.4925, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.9961185916330615e-05, | |
| "loss": 0.4777, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 3.9935153352149125e-05, | |
| "loss": 0.4794, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 3.990906861850034e-05, | |
| "loss": 0.4773, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 3.9882983884851556e-05, | |
| "loss": 0.4829, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 3.9856951320670066e-05, | |
| "loss": 0.4692, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 3.983086658702128e-05, | |
| "loss": 0.4776, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 3.98047818533725e-05, | |
| "loss": 0.472, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 3.9778697119723714e-05, | |
| "loss": 0.4704, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 3.975261238607493e-05, | |
| "loss": 0.4599, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 3.9726527652426145e-05, | |
| "loss": 0.4698, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 3.970044291877736e-05, | |
| "loss": 0.4742, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 3.967435818512858e-05, | |
| "loss": 0.4715, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 3.9648273451479786e-05, | |
| "loss": 0.4599, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 3.9622188717831e-05, | |
| "loss": 0.4671, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 3.959610398418222e-05, | |
| "loss": 0.4655, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.9570019250533434e-05, | |
| "loss": 0.4529, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 3.954393451688465e-05, | |
| "loss": 0.4639, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 3.9517849783235866e-05, | |
| "loss": 0.451, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 3.949176504958708e-05, | |
| "loss": 0.4584, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 3.946573248540559e-05, | |
| "loss": 0.4503, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 3.943964775175681e-05, | |
| "loss": 0.4578, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 3.9413615187575323e-05, | |
| "loss": 0.4563, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 3.938753045392653e-05, | |
| "loss": 0.455, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 3.936144572027775e-05, | |
| "loss": 0.4595, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 3.9335360986628964e-05, | |
| "loss": 0.4473, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 3.930927625298018e-05, | |
| "loss": 0.4613, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 3.9283191519331396e-05, | |
| "loss": 0.4529, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 3.9257158955149906e-05, | |
| "loss": 0.4433, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 3.923107422150112e-05, | |
| "loss": 0.4402, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 3.920498948785234e-05, | |
| "loss": 0.4506, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 3.917890475420355e-05, | |
| "loss": 0.4544, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 3.915282002055477e-05, | |
| "loss": 0.4503, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 3.9126787456373285e-05, | |
| "loss": 0.4585, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 3.91007027227245e-05, | |
| "loss": 0.432, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 3.907461798907572e-05, | |
| "loss": 0.4579, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 3.904853325542693e-05, | |
| "loss": 0.4488, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 3.902244852177814e-05, | |
| "loss": 0.4514, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.899641595759666e-05, | |
| "loss": 0.4534, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 3.8970331223947874e-05, | |
| "loss": 0.4451, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 3.894424649029909e-05, | |
| "loss": 0.4453, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.8918161756650306e-05, | |
| "loss": 0.4422, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.8892129192468816e-05, | |
| "loss": 0.447, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 3.886604445882003e-05, | |
| "loss": 0.4494, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.883995972517125e-05, | |
| "loss": 0.4442, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 3.8813874991522463e-05, | |
| "loss": 0.4367, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 3.878779025787368e-05, | |
| "loss": 0.4418, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 3.8761705524224895e-05, | |
| "loss": 0.4374, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 3.873562079057611e-05, | |
| "loss": 0.4372, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 3.870953605692733e-05, | |
| "loss": 0.434, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 3.868345132327854e-05, | |
| "loss": 0.4363, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 3.865736658962976e-05, | |
| "loss": 0.4446, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 3.8631281855980975e-05, | |
| "loss": 0.4362, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 3.860519712233219e-05, | |
| "loss": 0.4402, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 3.8579216727617994e-05, | |
| "loss": 0.4426, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 3.855313199396921e-05, | |
| "loss": 0.4387, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 3.8527047260320425e-05, | |
| "loss": 0.4312, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 3.850096252667164e-05, | |
| "loss": 0.4336, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 3.847487779302286e-05, | |
| "loss": 0.4339, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 3.844879305937407e-05, | |
| "loss": 0.4244, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 3.842270832572529e-05, | |
| "loss": 0.4393, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 3.83966757615438e-05, | |
| "loss": 0.4311, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 3.8370591027895014e-05, | |
| "loss": 0.434, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 3.834450629424623e-05, | |
| "loss": 0.4313, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 3.8318421560597446e-05, | |
| "loss": 0.4307, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 3.829233682694866e-05, | |
| "loss": 0.4251, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 3.826625209329988e-05, | |
| "loss": 0.4315, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 3.8240167359651094e-05, | |
| "loss": 0.4372, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 3.821408262600231e-05, | |
| "loss": 0.4343, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 3.818799789235352e-05, | |
| "loss": 0.4331, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 3.8161913158704735e-05, | |
| "loss": 0.4299, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 3.813582842505595e-05, | |
| "loss": 0.4238, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 3.810974369140717e-05, | |
| "loss": 0.4228, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 3.808371112722568e-05, | |
| "loss": 0.425, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 3.805762639357689e-05, | |
| "loss": 0.4243, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 3.803154165992811e-05, | |
| "loss": 0.4273, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 3.8005509095746624e-05, | |
| "loss": 0.4274, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 3.797942436209784e-05, | |
| "loss": 0.4191, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 3.7953339628449056e-05, | |
| "loss": 0.4225, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 3.792725489480027e-05, | |
| "loss": 0.4218, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.790117016115148e-05, | |
| "loss": 0.4279, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 3.78750854275027e-05, | |
| "loss": 0.4233, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 3.784900069385391e-05, | |
| "loss": 0.4232, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 3.782291596020513e-05, | |
| "loss": 0.4216, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 3.7796831226556345e-05, | |
| "loss": 0.4251, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 3.777074649290756e-05, | |
| "loss": 0.4321, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 3.7744661759258777e-05, | |
| "loss": 0.4179, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 3.771857702560999e-05, | |
| "loss": 0.4137, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 3.769254446142851e-05, | |
| "loss": 0.4162, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 3.7666459727779725e-05, | |
| "loss": 0.431, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 3.7640427163598234e-05, | |
| "loss": 0.4126, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 3.761439459941675e-05, | |
| "loss": 0.4238, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 3.758836203523526e-05, | |
| "loss": 0.4239, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 3.7562277301586476e-05, | |
| "loss": 0.4297, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 3.753619256793769e-05, | |
| "loss": 0.4225, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 3.751010783428891e-05, | |
| "loss": 0.4201, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 3.748402310064012e-05, | |
| "loss": 0.4215, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 3.745793836699133e-05, | |
| "loss": 0.4182, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 3.743185363334255e-05, | |
| "loss": 0.4184, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 3.7405768899693764e-05, | |
| "loss": 0.4304, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 3.737968416604498e-05, | |
| "loss": 0.4175, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 3.7353599432396196e-05, | |
| "loss": 0.4144, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 3.732751469874741e-05, | |
| "loss": 0.4197, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 3.730142996509863e-05, | |
| "loss": 0.4157, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 3.7275345231449844e-05, | |
| "loss": 0.4127, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 3.724926049780106e-05, | |
| "loss": 0.4139, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 3.722322793361957e-05, | |
| "loss": 0.4154, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 3.7197143199970785e-05, | |
| "loss": 0.4172, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 3.7171058466322e-05, | |
| "loss": 0.4129, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 3.714497373267322e-05, | |
| "loss": 0.4162, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 3.7118941168491726e-05, | |
| "loss": 0.42, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 3.709285643484294e-05, | |
| "loss": 0.4146, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 3.706687604012876e-05, | |
| "loss": 0.4211, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 3.7040791306479975e-05, | |
| "loss": 0.4134, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 3.7014706572831184e-05, | |
| "loss": 0.4171, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 3.69886218391824e-05, | |
| "loss": 0.42, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 3.6962537105533615e-05, | |
| "loss": 0.4074, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 3.693650454135213e-05, | |
| "loss": 0.4119, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 3.691047197717064e-05, | |
| "loss": 0.4126, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 3.688438724352186e-05, | |
| "loss": 0.4086, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 3.685830250987307e-05, | |
| "loss": 0.4074, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 3.683221777622429e-05, | |
| "loss": 0.4154, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 3.6806133042575505e-05, | |
| "loss": 0.4223, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 3.678004830892672e-05, | |
| "loss": 0.415, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 52.0616, | |
| "eval_gen_len": 15.0215, | |
| "eval_loss": 0.8553618788719177, | |
| "eval_runtime": 9536.6894, | |
| "eval_samples_per_second": 13.411, | |
| "eval_steps_per_second": 1.676, | |
| "step": 256246 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 3.6753963575277937e-05, | |
| "loss": 0.3994, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 3.6727878841629146e-05, | |
| "loss": 0.4028, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 3.670184627744766e-05, | |
| "loss": 0.3918, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 3.667576154379888e-05, | |
| "loss": 0.3958, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 3.6649676810150094e-05, | |
| "loss": 0.396, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 3.662359207650131e-05, | |
| "loss": 0.3874, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 3.659750734285252e-05, | |
| "loss": 0.3863, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 3.657147477867104e-05, | |
| "loss": 0.3821, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 3.654544221448955e-05, | |
| "loss": 0.3895, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 3.651935748084077e-05, | |
| "loss": 0.3819, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 3.649327274719198e-05, | |
| "loss": 0.3798, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.646724018301049e-05, | |
| "loss": 0.3812, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 3.644115544936171e-05, | |
| "loss": 0.3796, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.6415070715712924e-05, | |
| "loss": 0.3743, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.638898598206414e-05, | |
| "loss": 0.374, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 3.6362901248415356e-05, | |
| "loss": 0.3849, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 3.633681651476657e-05, | |
| "loss": 0.3815, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 3.631073178111779e-05, | |
| "loss": 0.3717, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 3.6284647047469e-05, | |
| "loss": 0.3727, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 3.625856231382021e-05, | |
| "loss": 0.3789, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 3.623247758017143e-05, | |
| "loss": 0.3658, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 3.6206392846522645e-05, | |
| "loss": 0.3717, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 3.618030811287386e-05, | |
| "loss": 0.3661, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 3.6154223379225077e-05, | |
| "loss": 0.3732, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 3.612813864557629e-05, | |
| "loss": 0.361, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 3.610205391192751e-05, | |
| "loss": 0.3657, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 3.6075969178278724e-05, | |
| "loss": 0.37, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 3.604988444462994e-05, | |
| "loss": 0.3647, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 3.6023799710981156e-05, | |
| "loss": 0.3665, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 3.599781931626696e-05, | |
| "loss": 0.3597, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 3.5971734582618175e-05, | |
| "loss": 0.3711, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 3.594564984896939e-05, | |
| "loss": 0.3582, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 3.591956511532061e-05, | |
| "loss": 0.358, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 3.589348038167182e-05, | |
| "loss": 0.3548, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 3.586744781749033e-05, | |
| "loss": 0.3592, | |
| "step": 273500 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 3.584136308384155e-05, | |
| "loss": 0.3683, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 3.5815278350192764e-05, | |
| "loss": 0.3615, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 3.578919361654398e-05, | |
| "loss": 0.3691, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 3.5763161052362496e-05, | |
| "loss": 0.3503, | |
| "step": 275500 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 3.573707631871371e-05, | |
| "loss": 0.3686, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 3.571099158506493e-05, | |
| "loss": 0.3617, | |
| "step": 276500 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 3.5684906851416144e-05, | |
| "loss": 0.3621, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 3.565882211776736e-05, | |
| "loss": 0.3653, | |
| "step": 277500 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 3.5632737384118576e-05, | |
| "loss": 0.3594, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 3.5606652650469785e-05, | |
| "loss": 0.358, | |
| "step": 278500 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 3.55806200862883e-05, | |
| "loss": 0.3565, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 3.555453535263952e-05, | |
| "loss": 0.3576, | |
| "step": 279500 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 3.552845061899073e-05, | |
| "loss": 0.3646, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 3.550236588534195e-05, | |
| "loss": 0.3541, | |
| "step": 280500 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 3.5476281151693165e-05, | |
| "loss": 0.3531, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 3.545019641804438e-05, | |
| "loss": 0.3547, | |
| "step": 281500 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 3.5424111684395596e-05, | |
| "loss": 0.3496, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 3.539802695074681e-05, | |
| "loss": 0.3525, | |
| "step": 282500 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 3.537199438656532e-05, | |
| "loss": 0.3485, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 3.534590965291654e-05, | |
| "loss": 0.352, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 3.5319824919267754e-05, | |
| "loss": 0.3585, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 3.529374018561897e-05, | |
| "loss": 0.3549, | |
| "step": 284500 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 3.5267655451970185e-05, | |
| "loss": 0.3503, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 3.52415707183214e-05, | |
| "loss": 0.3567, | |
| "step": 285500 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 3.521548598467261e-05, | |
| "loss": 0.3526, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 3.5189401251023826e-05, | |
| "loss": 0.3458, | |
| "step": 286500 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 3.5163420856309636e-05, | |
| "loss": 0.3486, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 3.513733612266085e-05, | |
| "loss": 0.3476, | |
| "step": 287500 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 3.511125138901207e-05, | |
| "loss": 0.346, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 3.508521882483058e-05, | |
| "loss": 0.3547, | |
| "step": 288500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 3.505913409118179e-05, | |
| "loss": 0.3442, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 3.503304935753301e-05, | |
| "loss": 0.3488, | |
| "step": 289500 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 3.5006964623884225e-05, | |
| "loss": 0.3464, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 3.498087989023544e-05, | |
| "loss": 0.3482, | |
| "step": 290500 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 3.495479515658666e-05, | |
| "loss": 0.342, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 3.492871042293787e-05, | |
| "loss": 0.3462, | |
| "step": 291500 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 3.490262568928909e-05, | |
| "loss": 0.3514, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 3.4876540955640305e-05, | |
| "loss": 0.352, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 3.485045622199152e-05, | |
| "loss": 0.3479, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 3.4824371488342736e-05, | |
| "loss": 0.3489, | |
| "step": 293500 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 3.4798286754693946e-05, | |
| "loss": 0.3403, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 3.477220202104516e-05, | |
| "loss": 0.337, | |
| "step": 294500 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 3.474611728739638e-05, | |
| "loss": 0.346, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 3.472003255374759e-05, | |
| "loss": 0.3395, | |
| "step": 295500 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 3.469394782009881e-05, | |
| "loss": 0.3379, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 3.4667863086450025e-05, | |
| "loss": 0.3447, | |
| "step": 296500 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 3.464177835280124e-05, | |
| "loss": 0.3392, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 3.461569361915246e-05, | |
| "loss": 0.3439, | |
| "step": 297500 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 3.458960888550367e-05, | |
| "loss": 0.3341, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 3.456352415185489e-05, | |
| "loss": 0.3435, | |
| "step": 298500 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 3.4537439418206105e-05, | |
| "loss": 0.3459, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 3.4511406854024614e-05, | |
| "loss": 0.3394, | |
| "step": 299500 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 3.448532212037583e-05, | |
| "loss": 0.3338, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 3.4459237386727046e-05, | |
| "loss": 0.3435, | |
| "step": 300500 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 3.443315265307826e-05, | |
| "loss": 0.3512, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 3.440706791942948e-05, | |
| "loss": 0.3327, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 3.438113969418259e-05, | |
| "loss": 0.3351, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 3.43550549605338e-05, | |
| "loss": 0.3341, | |
| "step": 302500 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 3.432897022688501e-05, | |
| "loss": 0.3392, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 3.430288549323623e-05, | |
| "loss": 0.3324, | |
| "step": 303500 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 3.4276800759587445e-05, | |
| "loss": 0.3373, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 3.425071602593866e-05, | |
| "loss": 0.3402, | |
| "step": 304500 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 3.4224631292289876e-05, | |
| "loss": 0.3451, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 3.419854655864109e-05, | |
| "loss": 0.3417, | |
| "step": 305500 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 3.417246182499231e-05, | |
| "loss": 0.3386, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 3.4146377091343524e-05, | |
| "loss": 0.3384, | |
| "step": 306500 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 3.412029235769474e-05, | |
| "loss": 0.3388, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 3.4094207624045956e-05, | |
| "loss": 0.3357, | |
| "step": 307500 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 3.4068175059864465e-05, | |
| "loss": 0.3482, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 3.404209032621568e-05, | |
| "loss": 0.334, | |
| "step": 308500 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 3.40160055925669e-05, | |
| "loss": 0.3334, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 3.398992085891811e-05, | |
| "loss": 0.3379, | |
| "step": 309500 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 3.396383612526933e-05, | |
| "loss": 0.3321, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 3.393780356108784e-05, | |
| "loss": 0.3291, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 3.391177099690635e-05, | |
| "loss": 0.3314, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 3.3885686263257564e-05, | |
| "loss": 0.3365, | |
| "step": 311500 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 3.385960152960878e-05, | |
| "loss": 0.3352, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 3.3833516795959996e-05, | |
| "loss": 0.3276, | |
| "step": 312500 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 3.380743206231121e-05, | |
| "loss": 0.3353, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 3.378134732866243e-05, | |
| "loss": 0.3404, | |
| "step": 313500 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 3.375526259501364e-05, | |
| "loss": 0.331, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 3.372923003083215e-05, | |
| "loss": 0.3357, | |
| "step": 314500 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 3.370314529718337e-05, | |
| "loss": 0.3313, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 3.3677060563534585e-05, | |
| "loss": 0.3392, | |
| "step": 315500 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 3.36509758298858e-05, | |
| "loss": 0.3325, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 3.3624891096237016e-05, | |
| "loss": 0.3276, | |
| "step": 316500 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 3.359880636258823e-05, | |
| "loss": 0.3323, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 3.357272162893945e-05, | |
| "loss": 0.3269, | |
| "step": 317500 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 3.3546636895290664e-05, | |
| "loss": 0.3339, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 3.352055216164188e-05, | |
| "loss": 0.3211, | |
| "step": 318500 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 3.3494467427993096e-05, | |
| "loss": 0.3357, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 3.3468382694344305e-05, | |
| "loss": 0.3379, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 3.344229796069552e-05, | |
| "loss": 0.3348, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bleu": 51.995, | |
| "eval_gen_len": 14.9859, | |
| "eval_loss": 0.9135012626647949, | |
| "eval_runtime": 9511.5501, | |
| "eval_samples_per_second": 13.446, | |
| "eval_steps_per_second": 1.681, | |
| "step": 320307 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 320307, | |
| "total_flos": 1.1106366691632742e+19, | |
| "train_loss": 0.5943015630864049, | |
| "train_runtime": 296139.8154, | |
| "train_samples_per_second": 51.917, | |
| "train_steps_per_second": 3.245 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 960915, | |
| "num_train_epochs": 15, | |
| "save_steps": 500, | |
| "total_flos": 1.1106366691632742e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |