| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9918182928318475, |
| "global_step": 171500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.985636946193791e-05, |
| "loss": 4.5112, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9710994423413524e-05, |
| "loss": 2.8906, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.956561938488914e-05, |
| "loss": 2.5728, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9420244346364754e-05, |
| "loss": 2.3159, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.927516005791742e-05, |
| "loss": 2.0901, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9129785019393034e-05, |
| "loss": 1.9299, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.898440998086865e-05, |
| "loss": 1.6719, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.883903494234426e-05, |
| "loss": 1.5558, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.869365990381988e-05, |
| "loss": 1.4674, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.854828486529549e-05, |
| "loss": 1.3837, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.840290982677111e-05, |
| "loss": 1.2387, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.825753478824672e-05, |
| "loss": 1.2115, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.811215974972234e-05, |
| "loss": 1.1154, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.7966784711197945e-05, |
| "loss": 1.0496, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.782140967267357e-05, |
| "loss": 0.9716, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.767603463414918e-05, |
| "loss": 0.907, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.753095034570184e-05, |
| "loss": 0.9429, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.738557530717746e-05, |
| "loss": 0.9419, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.7240200268653076e-05, |
| "loss": 0.8054, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.7094825230128684e-05, |
| "loss": 0.7875, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.6949450191604306e-05, |
| "loss": 0.7689, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.680407515307992e-05, |
| "loss": 0.7609, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.665870011455553e-05, |
| "loss": 0.7068, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.651390657618524e-05, |
| "loss": 0.7209, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.636853153766086e-05, |
| "loss": 0.64, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.622315649913647e-05, |
| "loss": 0.6096, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.607778146061209e-05, |
| "loss": 0.6555, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.59324064220877e-05, |
| "loss": 0.6192, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.578703138356332e-05, |
| "loss": 0.6229, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.564165634503894e-05, |
| "loss": 0.5818, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.549628130651455e-05, |
| "loss": 0.5736, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.535090626799016e-05, |
| "loss": 0.5501, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.520553122946578e-05, |
| "loss": 0.5234, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.506044694101844e-05, |
| "loss": 0.5175, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.4915071902494056e-05, |
| "loss": 0.5241, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.476969686396967e-05, |
| "loss": 0.4967, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.4624321825445286e-05, |
| "loss": 0.4896, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.44789467869209e-05, |
| "loss": 0.5281, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.4333571748396515e-05, |
| "loss": 0.4756, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.418819670987213e-05, |
| "loss": 0.4733, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.4042821671347745e-05, |
| "loss": 0.4575, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.389744663282336e-05, |
| "loss": 0.4333, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.3752071594298975e-05, |
| "loss": 0.4247, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.360669655577459e-05, |
| "loss": 0.4739, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.3461321517250204e-05, |
| "loss": 0.4469, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.331623722880287e-05, |
| "loss": 0.3953, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.3170862190278484e-05, |
| "loss": 0.4534, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.30254871517541e-05, |
| "loss": 0.4207, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.288040286330676e-05, |
| "loss": 0.3691, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.273502782478238e-05, |
| "loss": 0.3827, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.2589652786257986e-05, |
| "loss": 0.4101, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.244427774773361e-05, |
| "loss": 0.379, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.229890270920922e-05, |
| "loss": 0.3616, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.215381842076188e-05, |
| "loss": 0.4308, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.20084433822375e-05, |
| "loss": 0.354, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.186306834371311e-05, |
| "loss": 0.3633, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.1717693305188725e-05, |
| "loss": 0.4011, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.157231826666435e-05, |
| "loss": 0.3489, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.142694322813996e-05, |
| "loss": 0.3721, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.128185893969262e-05, |
| "loss": 0.3636, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.1137065401322334e-05, |
| "loss": 0.3521, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.099169036279795e-05, |
| "loss": 0.3484, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.0846315324273564e-05, |
| "loss": 0.3303, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.070094028574918e-05, |
| "loss": 0.3119, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.055556524722479e-05, |
| "loss": 0.3495, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.041019020870041e-05, |
| "loss": 0.3297, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.026481517017602e-05, |
| "loss": 0.318, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.011944013165164e-05, |
| "loss": 0.3036, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.997406509312725e-05, |
| "loss": 0.3131, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.982869005460287e-05, |
| "loss": 0.2966, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.968331501607848e-05, |
| "loss": 0.3322, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.953823072763114e-05, |
| "loss": 0.3059, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.939285568910676e-05, |
| "loss": 0.3008, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.9247480650582376e-05, |
| "loss": 0.2686, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.9102105612057984e-05, |
| "loss": 0.3025, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.8956730573533606e-05, |
| "loss": 0.295, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 3.881135553500922e-05, |
| "loss": 0.3083, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.8665980496484836e-05, |
| "loss": 0.3016, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 3.8520605457960444e-05, |
| "loss": 0.3099, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 3.8375230419436065e-05, |
| "loss": 0.2942, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.822985538091168e-05, |
| "loss": 0.2642, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.808448034238729e-05, |
| "loss": 0.268, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.793910530386291e-05, |
| "loss": 0.2829, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.779431176549262e-05, |
| "loss": 0.2487, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.764893672696824e-05, |
| "loss": 0.2845, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.750356168844385e-05, |
| "loss": 0.2894, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.735818664991946e-05, |
| "loss": 0.2812, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.721281161139508e-05, |
| "loss": 0.2448, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.70674365728707e-05, |
| "loss": 0.246, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.6922061534346307e-05, |
| "loss": 0.2405, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.677668649582192e-05, |
| "loss": 0.259, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.663131145729754e-05, |
| "loss": 0.2679, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 3.648593641877315e-05, |
| "loss": 0.2557, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 3.6340561380248766e-05, |
| "loss": 0.2326, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 3.619576784187848e-05, |
| "loss": 0.2394, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.6050392803354095e-05, |
| "loss": 0.2612, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.590501776482971e-05, |
| "loss": 0.2153, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.5759642726305325e-05, |
| "loss": 0.2298, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.561426768778094e-05, |
| "loss": 0.2299, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 3.5468892649256555e-05, |
| "loss": 0.2372, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.532351761073217e-05, |
| "loss": 0.2233, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.5178142572207784e-05, |
| "loss": 0.2256, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.50327675336834e-05, |
| "loss": 0.2409, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.488739249515901e-05, |
| "loss": 0.2315, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.474230820671168e-05, |
| "loss": 0.2129, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.4596933168187287e-05, |
| "loss": 0.2215, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.445155812966291e-05, |
| "loss": 0.22, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.430618309113852e-05, |
| "loss": 0.2431, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.416080805261414e-05, |
| "loss": 0.2271, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.4015433014089746e-05, |
| "loss": 0.2015, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.387034872564242e-05, |
| "loss": 0.2285, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.3724973687118025e-05, |
| "loss": 0.227, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.357959864859364e-05, |
| "loss": 0.2139, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.343422361006926e-05, |
| "loss": 0.2038, |
| "step": 57000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_bleu": 0.0, |
| "eval_gen_len": 1.0, |
| "eval_loss": 0.13874725997447968, |
| "eval_runtime": 2091.8758, |
| "eval_samples_per_second": 12.179, |
| "eval_steps_per_second": 3.045, |
| "step": 57323 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.328913932162192e-05, |
| "loss": 0.2153, |
| "step": 57500 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.314376428309754e-05, |
| "loss": 0.1445, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 3.299838924457315e-05, |
| "loss": 0.1382, |
| "step": 58500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.2853014206048764e-05, |
| "loss": 0.1251, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.270792991760143e-05, |
| "loss": 0.1249, |
| "step": 59500 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 3.2562554879077044e-05, |
| "loss": 0.135, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.241747059062971e-05, |
| "loss": 0.1367, |
| "step": 60500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.227209555210532e-05, |
| "loss": 0.1398, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 3.212672051358094e-05, |
| "loss": 0.1445, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.198134547505655e-05, |
| "loss": 0.1367, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 3.183597043653217e-05, |
| "loss": 0.1416, |
| "step": 62500 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.169059539800778e-05, |
| "loss": 0.1378, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.15452203594834e-05, |
| "loss": 0.1439, |
| "step": 63500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.139984532095901e-05, |
| "loss": 0.1413, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.125447028243463e-05, |
| "loss": 0.1345, |
| "step": 64500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.110938599398729e-05, |
| "loss": 0.1261, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 3.09640109554629e-05, |
| "loss": 0.1403, |
| "step": 65500 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 3.081863591693852e-05, |
| "loss": 0.1351, |
| "step": 66000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.0673260878414136e-05, |
| "loss": 0.1414, |
| "step": 66500 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.0527885839889744e-05, |
| "loss": 0.1428, |
| "step": 67000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 3.0382510801365366e-05, |
| "loss": 0.1404, |
| "step": 67500 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.023713576284098e-05, |
| "loss": 0.1611, |
| "step": 68000 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.0091760724316592e-05, |
| "loss": 0.1342, |
| "step": 68500 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 2.994638568579221e-05, |
| "loss": 0.1428, |
| "step": 69000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 2.9801010647267825e-05, |
| "loss": 0.1441, |
| "step": 69500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2.9655635608743436e-05, |
| "loss": 0.1204, |
| "step": 70000 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2.9510551320296105e-05, |
| "loss": 0.132, |
| "step": 70500 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 2.9365176281771716e-05, |
| "loss": 0.1363, |
| "step": 71000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.921980124324733e-05, |
| "loss": 0.1293, |
| "step": 71500 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 2.9074426204722942e-05, |
| "loss": 0.1276, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 2.892934191627561e-05, |
| "loss": 0.1413, |
| "step": 72500 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 2.8783966877751222e-05, |
| "loss": 0.1259, |
| "step": 73000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.863859183922684e-05, |
| "loss": 0.1265, |
| "step": 73500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2.8493216800702455e-05, |
| "loss": 0.1382, |
| "step": 74000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 2.8348132512255116e-05, |
| "loss": 0.1313, |
| "step": 74500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 2.8202757473730734e-05, |
| "loss": 0.1366, |
| "step": 75000 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 2.805738243520635e-05, |
| "loss": 0.1283, |
| "step": 75500 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2.7912298146759007e-05, |
| "loss": 0.128, |
| "step": 76000 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2.776692310823463e-05, |
| "loss": 0.124, |
| "step": 76500 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2.762154806971024e-05, |
| "loss": 0.1176, |
| "step": 77000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2.7476173031185855e-05, |
| "loss": 0.1314, |
| "step": 77500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2.7330797992661473e-05, |
| "loss": 0.1146, |
| "step": 78000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 2.7185422954137084e-05, |
| "loss": 0.1198, |
| "step": 78500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2.70400479156127e-05, |
| "loss": 0.1187, |
| "step": 79000 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2.689467287708831e-05, |
| "loss": 0.1201, |
| "step": 79500 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.674929783856393e-05, |
| "loss": 0.1169, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.6603922800039544e-05, |
| "loss": 0.1097, |
| "step": 80500 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.6458547761515155e-05, |
| "loss": 0.1263, |
| "step": 81000 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2.6313172722990777e-05, |
| "loss": 0.1141, |
| "step": 81500 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.6168088434543435e-05, |
| "loss": 0.1406, |
| "step": 82000 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.602271339601905e-05, |
| "loss": 0.1233, |
| "step": 82500 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.5877338357494668e-05, |
| "loss": 0.108, |
| "step": 83000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.5731963318970283e-05, |
| "loss": 0.1129, |
| "step": 83500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.5586588280445894e-05, |
| "loss": 0.1181, |
| "step": 84000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.544121324192151e-05, |
| "loss": 0.1305, |
| "step": 84500 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2.5295838203397127e-05, |
| "loss": 0.1111, |
| "step": 85000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.515075391494979e-05, |
| "loss": 0.1064, |
| "step": 85500 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.5005378876425407e-05, |
| "loss": 0.103, |
| "step": 86000 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2.4860003837901018e-05, |
| "loss": 0.1156, |
| "step": 86500 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.4714628799376633e-05, |
| "loss": 0.1359, |
| "step": 87000 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.4569253760852248e-05, |
| "loss": 0.1015, |
| "step": 87500 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.4423878722327862e-05, |
| "loss": 0.105, |
| "step": 88000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.4278794433880524e-05, |
| "loss": 0.1157, |
| "step": 88500 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2.4133419395356142e-05, |
| "loss": 0.0975, |
| "step": 89000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.3988044356831757e-05, |
| "loss": 0.1053, |
| "step": 89500 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.384296006838442e-05, |
| "loss": 0.0981, |
| "step": 90000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2.3697585029860033e-05, |
| "loss": 0.1031, |
| "step": 90500 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2.355220999133565e-05, |
| "loss": 0.1124, |
| "step": 91000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.3406834952811263e-05, |
| "loss": 0.108, |
| "step": 91500 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.3261459914286877e-05, |
| "loss": 0.0937, |
| "step": 92000 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 2.3116084875762492e-05, |
| "loss": 0.1043, |
| "step": 92500 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.2970709837238107e-05, |
| "loss": 0.0982, |
| "step": 93000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.2825625548790772e-05, |
| "loss": 0.1112, |
| "step": 93500 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2.2680250510266387e-05, |
| "loss": 0.1197, |
| "step": 94000 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2.2534875471742e-05, |
| "loss": 0.1042, |
| "step": 94500 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.2389500433217616e-05, |
| "loss": 0.0888, |
| "step": 95000 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.224412539469323e-05, |
| "loss": 0.1044, |
| "step": 95500 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.2098750356168846e-05, |
| "loss": 0.0927, |
| "step": 96000 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2.195337531764446e-05, |
| "loss": 0.0971, |
| "step": 96500 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2.1808291029197122e-05, |
| "loss": 0.1112, |
| "step": 97000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.166291599067274e-05, |
| "loss": 0.1028, |
| "step": 97500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.1517540952148355e-05, |
| "loss": 0.0985, |
| "step": 98000 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1372165913623966e-05, |
| "loss": 0.1159, |
| "step": 98500 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.1226790875099585e-05, |
| "loss": 0.0989, |
| "step": 99000 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.1081415836575196e-05, |
| "loss": 0.1067, |
| "step": 99500 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.0936040798050814e-05, |
| "loss": 0.1184, |
| "step": 100000 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.0790665759526426e-05, |
| "loss": 0.081, |
| "step": 100500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.064529072100204e-05, |
| "loss": 0.0942, |
| "step": 101000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2.049991568247766e-05, |
| "loss": 0.1033, |
| "step": 101500 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.035454064395327e-05, |
| "loss": 0.0883, |
| "step": 102000 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.020916560542889e-05, |
| "loss": 0.1017, |
| "step": 102500 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 2.00643720670586e-05, |
| "loss": 0.0915, |
| "step": 103000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.9918997028534214e-05, |
| "loss": 0.1009, |
| "step": 103500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.9773621990009826e-05, |
| "loss": 0.0905, |
| "step": 104000 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.962853770156249e-05, |
| "loss": 0.0989, |
| "step": 104500 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.9483162663038105e-05, |
| "loss": 0.1035, |
| "step": 105000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.933778762451372e-05, |
| "loss": 0.0879, |
| "step": 105500 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.9192412585989335e-05, |
| "loss": 0.1027, |
| "step": 106000 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.9047037547464953e-05, |
| "loss": 0.0813, |
| "step": 106500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.8901662508940565e-05, |
| "loss": 0.0839, |
| "step": 107000 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.8756287470416183e-05, |
| "loss": 0.0878, |
| "step": 107500 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.8610912431891794e-05, |
| "loss": 0.0826, |
| "step": 108000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.846553739336741e-05, |
| "loss": 0.0934, |
| "step": 108500 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.8320162354843027e-05, |
| "loss": 0.0807, |
| "step": 109000 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.817478731631864e-05, |
| "loss": 0.0856, |
| "step": 109500 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.8029412277794257e-05, |
| "loss": 0.0819, |
| "step": 110000 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.788403723926987e-05, |
| "loss": 0.0979, |
| "step": 110500 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.7738662200745483e-05, |
| "loss": 0.1042, |
| "step": 111000 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.7593287162221098e-05, |
| "loss": 0.0744, |
| "step": 111500 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.7447912123696713e-05, |
| "loss": 0.0879, |
| "step": 112000 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.730253708517233e-05, |
| "loss": 0.0849, |
| "step": 112500 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7157452796724992e-05, |
| "loss": 0.0767, |
| "step": 113000 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.7012077758200607e-05, |
| "loss": 0.0859, |
| "step": 113500 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.6866702719676222e-05, |
| "loss": 0.0818, |
| "step": 114000 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.6721618431228887e-05, |
| "loss": 0.0827, |
| "step": 114500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_bleu": 0.0, |
| "eval_gen_len": 1.0, |
| "eval_loss": 0.04327930510044098, |
| "eval_runtime": 2076.3927, |
| "eval_samples_per_second": 12.27, |
| "eval_steps_per_second": 3.068, |
| "step": 114646 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.6576243392704498e-05, |
| "loss": 0.0638, |
| "step": 115000 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.6430868354180116e-05, |
| "loss": 0.0321, |
| "step": 115500 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.6285493315655728e-05, |
| "loss": 0.0423, |
| "step": 116000 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.6140118277131343e-05, |
| "loss": 0.0364, |
| "step": 116500 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.5995033988684007e-05, |
| "loss": 0.0399, |
| "step": 117000 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.5849658950159622e-05, |
| "loss": 0.0411, |
| "step": 117500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.5704283911635237e-05, |
| "loss": 0.034, |
| "step": 118000 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.5558908873110852e-05, |
| "loss": 0.0467, |
| "step": 118500 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.5413533834586467e-05, |
| "loss": 0.0395, |
| "step": 119000 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.526815879606208e-05, |
| "loss": 0.0421, |
| "step": 119500 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.5122783757537695e-05, |
| "loss": 0.0425, |
| "step": 120000 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.4977408719013311e-05, |
| "loss": 0.0426, |
| "step": 120500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.4832324430565974e-05, |
| "loss": 0.0506, |
| "step": 121000 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.468694939204159e-05, |
| "loss": 0.0341, |
| "step": 121500 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.4541574353517204e-05, |
| "loss": 0.0429, |
| "step": 122000 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.439619931499282e-05, |
| "loss": 0.039, |
| "step": 122500 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.4250824276468433e-05, |
| "loss": 0.0445, |
| "step": 123000 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.4105449237944048e-05, |
| "loss": 0.0367, |
| "step": 123500 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.3960074199419665e-05, |
| "loss": 0.0408, |
| "step": 124000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.3814699160895278e-05, |
| "loss": 0.0432, |
| "step": 124500 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.3669614872447942e-05, |
| "loss": 0.0319, |
| "step": 125000 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.3524239833923557e-05, |
| "loss": 0.0547, |
| "step": 125500 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.337886479539917e-05, |
| "loss": 0.0347, |
| "step": 126000 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.3233489756874787e-05, |
| "loss": 0.0329, |
| "step": 126500 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.30881147183504e-05, |
| "loss": 0.0381, |
| "step": 127000 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.2942739679826017e-05, |
| "loss": 0.0407, |
| "step": 127500 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.279765539137868e-05, |
| "loss": 0.0441, |
| "step": 128000 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.2652280352854296e-05, |
| "loss": 0.0412, |
| "step": 128500 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.250690531432991e-05, |
| "loss": 0.0422, |
| "step": 129000 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.2361530275805522e-05, |
| "loss": 0.0308, |
| "step": 129500 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.2216155237281139e-05, |
| "loss": 0.0404, |
| "step": 130000 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.2071070948833802e-05, |
| "loss": 0.041, |
| "step": 130500 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.1925695910309417e-05, |
| "loss": 0.0331, |
| "step": 131000 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.1780320871785032e-05, |
| "loss": 0.041, |
| "step": 131500 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.1634945833260646e-05, |
| "loss": 0.0393, |
| "step": 132000 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.1489570794736261e-05, |
| "loss": 0.036, |
| "step": 132500 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.1344486506288924e-05, |
| "loss": 0.036, |
| "step": 133000 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.1199111467764539e-05, |
| "loss": 0.0406, |
| "step": 133500 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.1053736429240154e-05, |
| "loss": 0.0421, |
| "step": 134000 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.090836139071577e-05, |
| "loss": 0.0345, |
| "step": 134500 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.0762986352191383e-05, |
| "loss": 0.0489, |
| "step": 135000 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.0617611313666998e-05, |
| "loss": 0.0298, |
| "step": 135500 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.0472236275142613e-05, |
| "loss": 0.0339, |
| "step": 136000 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.0327151986695278e-05, |
| "loss": 0.0362, |
| "step": 136500 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.0181776948170891e-05, |
| "loss": 0.0298, |
| "step": 137000 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.0036401909646506e-05, |
| "loss": 0.0433, |
| "step": 137500 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.89102687112212e-06, |
| "loss": 0.0362, |
| "step": 138000 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.745651832597737e-06, |
| "loss": 0.0365, |
| "step": 138500 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.600276794073352e-06, |
| "loss": 0.0404, |
| "step": 139000 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.455192505626015e-06, |
| "loss": 0.039, |
| "step": 139500 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 9.30981746710163e-06, |
| "loss": 0.0361, |
| "step": 140000 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.164442428577245e-06, |
| "loss": 0.0297, |
| "step": 140500 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 9.01906739005286e-06, |
| "loss": 0.0405, |
| "step": 141000 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 8.873692351528472e-06, |
| "loss": 0.0355, |
| "step": 141500 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.728608063081137e-06, |
| "loss": 0.0361, |
| "step": 142000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 8.583233024556752e-06, |
| "loss": 0.0327, |
| "step": 142500 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 8.437857986032367e-06, |
| "loss": 0.0285, |
| "step": 143000 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.292482947507982e-06, |
| "loss": 0.0315, |
| "step": 143500 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 8.147398659060646e-06, |
| "loss": 0.0401, |
| "step": 144000 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 8.00202362053626e-06, |
| "loss": 0.0356, |
| "step": 144500 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 7.856648582011874e-06, |
| "loss": 0.0257, |
| "step": 145000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 7.711273543487489e-06, |
| "loss": 0.0346, |
| "step": 145500 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 7.566189255040153e-06, |
| "loss": 0.0326, |
| "step": 146000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.420814216515768e-06, |
| "loss": 0.0334, |
| "step": 146500 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.27602067814548e-06, |
| "loss": 0.0309, |
| "step": 147000 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 7.1306456396210946e-06, |
| "loss": 0.0439, |
| "step": 147500 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 6.985270601096709e-06, |
| "loss": 0.036, |
| "step": 148000 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 6.839895562572323e-06, |
| "loss": 0.0454, |
| "step": 148500 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 6.69452052404794e-06, |
| "loss": 0.0307, |
| "step": 149000 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 6.549145485523554e-06, |
| "loss": 0.031, |
| "step": 149500 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 6.403770446999169e-06, |
| "loss": 0.0357, |
| "step": 150000 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.2583954084747835e-06, |
| "loss": 0.0262, |
| "step": 150500 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.113020369950398e-06, |
| "loss": 0.0296, |
| "step": 151000 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 5.967645331426013e-06, |
| "loss": 0.0312, |
| "step": 151500 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 5.822270292901628e-06, |
| "loss": 0.0313, |
| "step": 152000 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 5.676895254377243e-06, |
| "loss": 0.0237, |
| "step": 152500 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 5.531520215852858e-06, |
| "loss": 0.0208, |
| "step": 153000 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.3861451773284725e-06, |
| "loss": 0.0312, |
| "step": 153500 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.240770138804087e-06, |
| "loss": 0.0354, |
| "step": 154000 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.095395100279702e-06, |
| "loss": 0.0294, |
| "step": 154500 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 4.950020061755316e-06, |
| "loss": 0.0222, |
| "step": 155000 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.804645023230931e-06, |
| "loss": 0.0279, |
| "step": 155500 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.6592699847065466e-06, |
| "loss": 0.0437, |
| "step": 156000 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.5141856962592096e-06, |
| "loss": 0.0317, |
| "step": 156500 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.368810657734824e-06, |
| "loss": 0.0286, |
| "step": 157000 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.223435619210439e-06, |
| "loss": 0.0334, |
| "step": 157500 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 4.078060580686054e-06, |
| "loss": 0.0379, |
| "step": 158000 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.932976292238718e-06, |
| "loss": 0.0279, |
| "step": 158500 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.7876012537143327e-06, |
| "loss": 0.0385, |
| "step": 159000 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 3.642226215189947e-06, |
| "loss": 0.0287, |
| "step": 159500 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.4968511766655615e-06, |
| "loss": 0.0275, |
| "step": 160000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.3514761381411767e-06, |
| "loss": 0.0321, |
| "step": 160500 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.2061010996167916e-06, |
| "loss": 0.0274, |
| "step": 161000 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 3.0607260610924064e-06, |
| "loss": 0.0357, |
| "step": 161500 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.9153510225680212e-06, |
| "loss": 0.0304, |
| "step": 162000 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.7699759840436356e-06, |
| "loss": 0.0209, |
| "step": 162500 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2.624600945519251e-06, |
| "loss": 0.0216, |
| "step": 163000 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.4792259069948657e-06, |
| "loss": 0.0288, |
| "step": 163500 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2.33385086847048e-06, |
| "loss": 0.0258, |
| "step": 164000 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2.1890573301001925e-06, |
| "loss": 0.0294, |
| "step": 164500 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2.0436822915758073e-06, |
| "loss": 0.0287, |
| "step": 165000 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.898307253051422e-06, |
| "loss": 0.0262, |
| "step": 165500 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.7532229646040858e-06, |
| "loss": 0.0359, |
| "step": 166000 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.6078479260797006e-06, |
| "loss": 0.0255, |
| "step": 166500 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.4624728875553154e-06, |
| "loss": 0.0241, |
| "step": 167000 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.3173885991079788e-06, |
| "loss": 0.025, |
| "step": 167500 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.1720135605835934e-06, |
| "loss": 0.0262, |
| "step": 168000 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.0266385220592085e-06, |
| "loss": 0.0251, |
| "step": 168500 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.812634835348232e-07, |
| "loss": 0.0264, |
| "step": 169000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 7.35888445010438e-07, |
| "loss": 0.0284, |
| "step": 169500 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 5.905134064860527e-07, |
| "loss": 0.0246, |
| "step": 170000 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.4513836796166756e-07, |
| "loss": 0.0244, |
| "step": 170500 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 2.9976332943728233e-07, |
| "loss": 0.0306, |
| "step": 171000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.543882909128971e-07, |
| "loss": 0.0237, |
| "step": 171500 |
| } |
| ], |
| "max_steps": 171969, |
| "num_train_epochs": 3, |
| "total_flos": 3.801898817534362e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|