| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999981467376096, | |
| "global_step": 269794, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.446997776130467e-06, | |
| "loss": 4.2339, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.9295774647887324e-06, | |
| "loss": 3.743, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.4121571534469985e-06, | |
| "loss": 3.5488, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.8947368421052634e-06, | |
| "loss": 3.441, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.377316530763528e-06, | |
| "loss": 3.3861, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.859896219421795e-06, | |
| "loss": 3.3338, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.034247590808006e-05, | |
| "loss": 3.2739, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.1825055596738326e-05, | |
| "loss": 3.2384, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.3307635285396591e-05, | |
| "loss": 3.1955, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.4790214974054856e-05, | |
| "loss": 3.1733, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.6272794662713124e-05, | |
| "loss": 3.1648, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.7755374351371386e-05, | |
| "loss": 3.1262, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.923795404002965e-05, | |
| "loss": 3.112, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.072053372868792e-05, | |
| "loss": 3.0858, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.2203113417346184e-05, | |
| "loss": 3.0709, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.368569310600445e-05, | |
| "loss": 3.0446, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.516827279466271e-05, | |
| "loss": 3.0337, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.6650852483320982e-05, | |
| "loss": 3.0176, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.8133432171979247e-05, | |
| "loss": 2.9892, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.9613046701260195e-05, | |
| "loss": 2.9966, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.1095626389918456e-05, | |
| "loss": 2.9685, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.257820607857672e-05, | |
| "loss": 2.9461, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.406078576723499e-05, | |
| "loss": 2.9472, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.554336545589326e-05, | |
| "loss": 2.9346, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.7022979985174206e-05, | |
| "loss": 2.9418, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.850555967383247e-05, | |
| "loss": 2.9088, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.9988139362490736e-05, | |
| "loss": 2.8908, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.147071905114901e-05, | |
| "loss": 2.8886, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.2953298739807265e-05, | |
| "loss": 2.8681, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.4432913269088213e-05, | |
| "loss": 2.8626, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.591549295774648e-05, | |
| "loss": 2.8519, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.739807264640474e-05, | |
| "loss": 2.8402, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.888065233506301e-05, | |
| "loss": 2.8259, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.036323202372127e-05, | |
| "loss": 2.836, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.1845811712379545e-05, | |
| "loss": 2.8166, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.3328391401037816e-05, | |
| "loss": 2.796, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.4810971089696074e-05, | |
| "loss": 2.7931, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 5.629058561897702e-05, | |
| "loss": 2.7947, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 5.777020014825797e-05, | |
| "loss": 2.7701, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 5.925277983691624e-05, | |
| "loss": 2.7674, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 6.07353595255745e-05, | |
| "loss": 2.7625, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 6.221793921423277e-05, | |
| "loss": 2.754, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 6.370051890289104e-05, | |
| "loss": 2.7621, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 6.51830985915493e-05, | |
| "loss": 2.7329, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 6.666567828020757e-05, | |
| "loss": 2.7392, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 6.814825796886583e-05, | |
| "loss": 2.7446, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 6.96308376575241e-05, | |
| "loss": 2.7346, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 7.111341734618236e-05, | |
| "loss": 2.7246, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 7.259599703484063e-05, | |
| "loss": 2.7393, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 7.407857672349889e-05, | |
| "loss": 2.7271, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.5492873191833496, | |
| "eval_runtime": 278.1602, | |
| "eval_samples_per_second": 359.505, | |
| "eval_steps_per_second": 44.938, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 7.556115641215716e-05, | |
| "loss": 2.7127, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 7.704373610081542e-05, | |
| "loss": 2.713, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 7.852631578947369e-05, | |
| "loss": 2.7002, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 8.000593031875464e-05, | |
| "loss": 2.7042, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 8.14885100074129e-05, | |
| "loss": 2.7054, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 8.297108969607117e-05, | |
| "loss": 2.6925, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 8.445366938472942e-05, | |
| "loss": 2.6927, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 8.59362490733877e-05, | |
| "loss": 2.6843, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 8.741882876204596e-05, | |
| "loss": 2.6848, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 8.890140845070423e-05, | |
| "loss": 2.6851, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.03839881393625e-05, | |
| "loss": 2.676, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.186656782802076e-05, | |
| "loss": 2.6811, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.334914751667902e-05, | |
| "loss": 2.6687, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.483172720533729e-05, | |
| "loss": 2.6751, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.631430689399555e-05, | |
| "loss": 2.6746, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.779688658265382e-05, | |
| "loss": 2.6662, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.927946627131208e-05, | |
| "loss": 2.6685, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.999982309633072e-05, | |
| "loss": 2.6702, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.999846516961786e-05, | |
| "loss": 2.6692, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.999577483170183e-05, | |
| "loss": 2.6732, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.9991750774992e-05, | |
| "loss": 2.6676, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.998638103115459e-05, | |
| "loss": 2.65, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.997967246921306e-05, | |
| "loss": 2.6493, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.997162526884725e-05, | |
| "loss": 2.657, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.996223964559059e-05, | |
| "loss": 2.6521, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.995151585082442e-05, | |
| "loss": 2.6412, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.993947963012904e-05, | |
| "loss": 2.6418, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.992608306462086e-05, | |
| "loss": 2.6442, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.991134929600863e-05, | |
| "loss": 2.6362, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.989527871891659e-05, | |
| "loss": 2.6354, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.987787176377355e-05, | |
| "loss": 2.6167, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.985912889680143e-05, | |
| "loss": 2.6316, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.983905062000275e-05, | |
| "loss": 2.6355, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.981763747114724e-05, | |
| "loss": 2.6183, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.979493684988302e-05, | |
| "loss": 2.6111, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.977085837996527e-05, | |
| "loss": 2.6053, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.97454990173924e-05, | |
| "loss": 2.6019, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.971875780087649e-05, | |
| "loss": 2.6166, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.969074240841182e-05, | |
| "loss": 2.6378, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.966134130446202e-05, | |
| "loss": 2.6069, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.963067287750436e-05, | |
| "loss": 2.6201, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.959861502967945e-05, | |
| "loss": 2.5973, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.956522874954218e-05, | |
| "loss": 2.5926, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.95305149312992e-05, | |
| "loss": 2.5856, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.949447450471355e-05, | |
| "loss": 2.5929, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.945710843507967e-05, | |
| "loss": 2.5851, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.94184964259314e-05, | |
| "loss": 2.5704, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.9378484754234e-05, | |
| "loss": 2.5823, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.933715054611808e-05, | |
| "loss": 2.5716, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.929458153797284e-05, | |
| "loss": 2.6217, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 2.431061267852783, | |
| "eval_runtime": 279.1709, | |
| "eval_samples_per_second": 358.204, | |
| "eval_steps_per_second": 44.775, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.77079560136976e-05, | |
| "loss": 2.5993, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.76101478820416e-05, | |
| "loss": 2.5537, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.751034728712014e-05, | |
| "loss": 2.5581, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.740855840554321e-05, | |
| "loss": 2.6137, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.730478549712973e-05, | |
| "loss": 2.5886, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.719903290472921e-05, | |
| "loss": 2.5534, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.709130505404002e-05, | |
| "loss": 2.5475, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.698160645342426e-05, | |
| "loss": 2.5613, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.686994169371903e-05, | |
| "loss": 2.5487, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.675631544804424e-05, | |
| "loss": 2.5659, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.664073247160717e-05, | |
| "loss": 2.5319, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.652319760150334e-05, | |
| "loss": 2.5536, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.640371575651422e-05, | |
| "loss": 2.53, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.628229193690122e-05, | |
| "loss": 2.524, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.615893122419657e-05, | |
| "loss": 2.5219, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.60336387809906e-05, | |
| "loss": 2.51, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.590641985071566e-05, | |
| "loss": 2.5131, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.577727975742674e-05, | |
| "loss": 2.506, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.564622390557863e-05, | |
| "loss": 2.5086, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.551325777979978e-05, | |
| "loss": 2.4997, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.53783869446627e-05, | |
| "loss": 2.4888, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.524161704445116e-05, | |
| "loss": 2.4952, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.510295380292393e-05, | |
| "loss": 2.4868, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.496240302307531e-05, | |
| "loss": 2.4906, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.481997058689214e-05, | |
| "loss": 2.4815, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.467566245510782e-05, | |
| "loss": 2.4772, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.45294846669527e-05, | |
| "loss": 2.4919, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.438144333990148e-05, | |
| "loss": 2.4889, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.423154466941704e-05, | |
| "loss": 2.4811, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.407979492869132e-05, | |
| "loss": 2.4721, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.392620046838267e-05, | |
| "loss": 2.4696, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.377076771635013e-05, | |
| "loss": 2.4739, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.361350317738446e-05, | |
| "loss": 2.4704, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.345441343293581e-05, | |
| "loss": 2.4727, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.329350514083845e-05, | |
| "loss": 2.4649, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.313078503503196e-05, | |
| "loss": 2.4619, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.296625992527957e-05, | |
| "loss": 2.4577, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 9.279993669688308e-05, | |
| "loss": 2.46, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 9.263182231039476e-05, | |
| "loss": 2.4562, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 9.246192380132604e-05, | |
| "loss": 2.4431, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 9.229024827985306e-05, | |
| "loss": 2.4523, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 9.211680293051915e-05, | |
| "loss": 2.4558, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.194159501193414e-05, | |
| "loss": 2.4508, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.17646318564706e-05, | |
| "loss": 2.4526, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.158592086995692e-05, | |
| "loss": 2.4455, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.140546953136748e-05, | |
| "loss": 2.4423, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.122328539250962e-05, | |
| "loss": 2.4291, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.103937607770756e-05, | |
| "loss": 2.4521, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.085374928348338e-05, | |
| "loss": 2.4427, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.066641277823487e-05, | |
| "loss": 2.4378, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 2.274932622909546, | |
| "eval_runtime": 278.4521, | |
| "eval_samples_per_second": 359.128, | |
| "eval_steps_per_second": 44.891, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.047737440191048e-05, | |
| "loss": 2.4375, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.028664206568123e-05, | |
| "loss": 2.4262, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.009422375160955e-05, | |
| "loss": 2.4342, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.990012751231527e-05, | |
| "loss": 2.427, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.970436147063869e-05, | |
| "loss": 2.419, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.950693381930058e-05, | |
| "loss": 2.4241, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.930785282055928e-05, | |
| "loss": 2.4192, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.910712680586502e-05, | |
| "loss": 2.4179, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.890476417551119e-05, | |
| "loss": 2.4176, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.870077339828284e-05, | |
| "loss": 2.4179, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.849516301110216e-05, | |
| "loss": 2.418, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.828794161867136e-05, | |
| "loss": 2.4116, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.807911789311245e-05, | |
| "loss": 2.4114, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.786870057360441e-05, | |
| "loss": 2.4091, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.765669846601735e-05, | |
| "loss": 2.4125, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.744312044254409e-05, | |
| "loss": 2.4115, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.722797544132881e-05, | |
| "loss": 2.4195, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.701127246609299e-05, | |
| "loss": 2.4048, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.679302058575865e-05, | |
| "loss": 2.4075, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.657322893406876e-05, | |
| "loss": 2.3983, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.635190670920503e-05, | |
| "loss": 2.3999, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.6129063173403e-05, | |
| "loss": 2.4055, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.590470765256439e-05, | |
| "loss": 2.3899, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.567884953586675e-05, | |
| "loss": 2.3935, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.545149827537065e-05, | |
| "loss": 2.3872, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.522266338562404e-05, | |
| "loss": 2.3877, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.499235444326407e-05, | |
| "loss": 2.3939, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.476058108661639e-05, | |
| "loss": 2.3855, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.452735301529164e-05, | |
| "loss": 2.388, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.429267998977967e-05, | |
| "loss": 2.3801, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 8.4056571831041e-05, | |
| "loss": 2.3723, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 8.381903842009583e-05, | |
| "loss": 2.3838, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 8.358008969761053e-05, | |
| "loss": 2.3857, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 8.333973566348161e-05, | |
| "loss": 2.3678, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 8.309798637641725e-05, | |
| "loss": 2.3804, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 8.285485195351632e-05, | |
| "loss": 2.3667, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 8.261034256984503e-05, | |
| "loss": 2.3648, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 8.236446845801104e-05, | |
| "loss": 2.4219, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 8.211723990773533e-05, | |
| "loss": 2.4274, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 8.186866726542143e-05, | |
| "loss": 2.4006, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 8.161876093372264e-05, | |
| "loss": 2.3849, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 8.136753137110643e-05, | |
| "loss": 2.3937, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 8.111498909141696e-05, | |
| "loss": 2.3867, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 8.086114466343502e-05, | |
| "loss": 2.3914, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 8.060600871043566e-05, | |
| "loss": 2.3946, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 8.034959190974374e-05, | |
| "loss": 2.397, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 8.009190499228698e-05, | |
| "loss": 2.3854, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.983295874214692e-05, | |
| "loss": 2.4079, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.95727639961076e-05, | |
| "loss": 2.394, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.931133164320208e-05, | |
| "loss": 2.394, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 2.2282919883728027, | |
| "eval_runtime": 285.8926, | |
| "eval_samples_per_second": 349.782, | |
| "eval_steps_per_second": 43.723, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.904867262425669e-05, | |
| "loss": 2.3871, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.878479793143314e-05, | |
| "loss": 2.3768, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.851971860776864e-05, | |
| "loss": 2.3783, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.82534457467136e-05, | |
| "loss": 2.3733, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.798599049166741e-05, | |
| "loss": 2.3717, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.771736403551216e-05, | |
| "loss": 2.3689, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.744757762014416e-05, | |
| "loss": 2.3714, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.717664253600352e-05, | |
| "loss": 2.3792, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.690457012160156e-05, | |
| "loss": 2.3711, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.663137176304642e-05, | |
| "loss": 2.3829, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.635705889356646e-05, | |
| "loss": 2.4541, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.608164299303187e-05, | |
| "loss": 2.386, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.580513558747409e-05, | |
| "loss": 2.3839, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.552754824860368e-05, | |
| "loss": 2.3896, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.524889259332584e-05, | |
| "loss": 2.3839, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.496918028325434e-05, | |
| "loss": 2.3821, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.468842302422355e-05, | |
| "loss": 2.3797, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.440663256579836e-05, | |
| "loss": 2.348, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 7.412382070078269e-05, | |
| "loss": 2.3413, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 7.383999926472585e-05, | |
| "loss": 2.3497, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 7.355518013542717e-05, | |
| "loss": 2.3303, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 7.326937523243908e-05, | |
| "loss": 2.3348, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 7.29825965165682e-05, | |
| "loss": 2.3375, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.269485598937468e-05, | |
| "loss": 2.3271, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.240616569267015e-05, | |
| "loss": 2.3184, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.211653770801363e-05, | |
| "loss": 2.333, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.182598415620591e-05, | |
| "loss": 2.3712, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.153451719678243e-05, | |
| "loss": 2.3387, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.124214902750428e-05, | |
| "loss": 2.3274, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 7.094889188384774e-05, | |
| "loss": 2.326, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 7.065475803849241e-05, | |
| "loss": 2.3131, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 7.035975980080728e-05, | |
| "loss": 2.3186, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 7.006390951633589e-05, | |
| "loss": 2.3156, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 6.976721956627952e-05, | |
| "loss": 2.3211, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.946970236697905e-05, | |
| "loss": 2.3116, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.917137036939542e-05, | |
| "loss": 2.3189, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.88722360585885e-05, | |
| "loss": 2.3286, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.857231195319457e-05, | |
| "loss": 2.3186, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.827161060490248e-05, | |
| "loss": 2.3102, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.797014459792836e-05, | |
| "loss": 2.3146, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6.766792654848896e-05, | |
| "loss": 2.2962, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6.736496910427364e-05, | |
| "loss": 2.2984, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6.70612849439151e-05, | |
| "loss": 2.308, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6.675688677645883e-05, | |
| "loss": 2.3029, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6.645178734083117e-05, | |
| "loss": 2.2933, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.614599940530619e-05, | |
| "loss": 2.3026, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.583953576697141e-05, | |
| "loss": 2.3084, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.553240925119219e-05, | |
| "loss": 2.2985, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.522463271107502e-05, | |
| "loss": 2.3035, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.49162190269296e-05, | |
| "loss": 2.2891, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 2.1396679878234863, | |
| "eval_runtime": 305.6778, | |
| "eval_samples_per_second": 327.142, | |
| "eval_steps_per_second": 40.893, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 6.460718110572983e-05, | |
| "loss": 2.2987, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 6.429753188057368e-05, | |
| "loss": 2.2899, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 6.398728431014187e-05, | |
| "loss": 2.2919, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 6.367645137815561e-05, | |
| "loss": 2.2878, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 6.336504609283325e-05, | |
| "loss": 2.2848, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 6.305308148634585e-05, | |
| "loss": 2.2813, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 6.274057061427182e-05, | |
| "loss": 2.2804, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 6.242752655505053e-05, | |
| "loss": 2.2888, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 6.211396240943499e-05, | |
| "loss": 2.2884, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 6.17998912999436e-05, | |
| "loss": 2.28, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 6.148532637031098e-05, | |
| "loss": 2.2855, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 6.117028078493787e-05, | |
| "loss": 2.3124, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 6.085476772834029e-05, | |
| "loss": 2.2886, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 6.053880040459764e-05, | |
| "loss": 2.3043, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 6.022239203680027e-05, | |
| "loss": 2.3534, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.990555586649599e-05, | |
| "loss": 2.569, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.958830515313596e-05, | |
| "loss": 2.341, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.927065317351976e-05, | |
| "loss": 2.2866, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.8952613221239826e-05, | |
| "loss": 2.2786, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.863419860612506e-05, | |
| "loss": 2.2707, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.831542265368378e-05, | |
| "loss": 2.277, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.799629870454619e-05, | |
| "loss": 2.2702, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.7676840113905974e-05, | |
| "loss": 2.2673, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.735706025096136e-05, | |
| "loss": 2.2517, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.7036972498355744e-05, | |
| "loss": 2.2768, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.671659025161755e-05, | |
| "loss": 2.2569, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.6395926918599606e-05, | |
| "loss": 2.2527, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.607499591891816e-05, | |
| "loss": 2.2662, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.5753810683391104e-05, | |
| "loss": 2.2446, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.5432384653476e-05, | |
| "loss": 2.2506, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.5110731280707605e-05, | |
| "loss": 2.2495, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.4788864026134824e-05, | |
| "loss": 2.2513, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.446679635975741e-05, | |
| "loss": 2.2512, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 5.41445417599623e-05, | |
| "loss": 2.2497, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 5.3822113712959466e-05, | |
| "loss": 2.2491, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 5.349952571221761e-05, | |
| "loss": 2.2425, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 5.3176791257899405e-05, | |
| "loss": 2.2491, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 5.285392385629653e-05, | |
| "loss": 2.2542, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5.253093701926446e-05, | |
| "loss": 2.2498, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5.2207844263657e-05, | |
| "loss": 2.2456, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5.188465911076059e-05, | |
| "loss": 2.235, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5.156139508572844e-05, | |
| "loss": 2.2463, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5.1238065717014526e-05, | |
| "loss": 2.232, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5.091468453580748e-05, | |
| "loss": 2.227, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 5.0591265075464167e-05, | |
| "loss": 2.2354, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 5.026782087094353e-05, | |
| "loss": 2.2266, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.9944365458239946e-05, | |
| "loss": 2.2319, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.9620912373816894e-05, | |
| "loss": 2.2383, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.929747515404043e-05, | |
| "loss": 2.2315, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.897406733461264e-05, | |
| "loss": 2.2286, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 2.0796430110931396, | |
| "eval_runtime": 286.5966, | |
| "eval_samples_per_second": 348.923, | |
| "eval_steps_per_second": 43.615, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.8650702450005264e-05, | |
| "loss": 2.2269, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.832739403289318e-05, | |
| "loss": 2.2294, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.8004155613588214e-05, | |
| "loss": 2.216, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.7681000719472726e-05, | |
| "loss": 2.2128, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.7357942874433634e-05, | |
| "loss": 2.2088, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.703499559829639e-05, | |
| "loss": 2.2112, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.671217240625916e-05, | |
| "loss": 2.2168, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.6389486808327304e-05, | |
| "loss": 2.2034, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.606695230874788e-05, | |
| "loss": 2.2045, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.5744582405444544e-05, | |
| "loss": 2.2043, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.542239058945272e-05, | |
| "loss": 2.2116, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.51003903443549e-05, | |
| "loss": 2.2046, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.4778595145716465e-05, | |
| "loss": 2.1967, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.4457018460521684e-05, | |
| "loss": 2.2046, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.4135673746610115e-05, | |
| "loss": 2.1952, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.381457445211346e-05, | |
| "loss": 2.2071, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.349373401489269e-05, | |
| "loss": 2.1993, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.317316586197571e-05, | |
| "loss": 2.1998, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.2852883408995515e-05, | |
| "loss": 2.1938, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.253290005962863e-05, | |
| "loss": 2.1901, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.221322920503423e-05, | |
| "loss": 2.1933, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.1893884223293746e-05, | |
| "loss": 2.1884, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.157487847885094e-05, | |
| "loss": 2.1855, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.1256225321952705e-05, | |
| "loss": 2.1968, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.093793808809028e-05, | |
| "loss": 2.186, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.062003009744115e-05, | |
| "loss": 2.1905, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.0302514654311675e-05, | |
| "loss": 2.1828, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.998540504658027e-05, | |
| "loss": 2.1878, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.966871454514137e-05, | |
| "loss": 2.1918, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.935245640334991e-05, | |
| "loss": 2.182, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.903664385646685e-05, | |
| "loss": 2.1872, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.872129012110515e-05, | |
| "loss": 2.18, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.8406408394676724e-05, | |
| "loss": 2.1794, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.8092011854840135e-05, | |
| "loss": 2.1778, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.7778113658949145e-05, | |
| "loss": 2.1764, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.7464726943501955e-05, | |
| "loss": 2.1727, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.715186482359162e-05, | |
| "loss": 2.1718, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.683954039235707e-05, | |
| "loss": 2.1766, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.6527766720435186e-05, | |
| "loss": 2.1838, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.6216556855413906e-05, | |
| "loss": 2.1786, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.5905923821286006e-05, | |
| "loss": 2.1748, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.559588061790419e-05, | |
| "loss": 2.1708, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.528644022043701e-05, | |
| "loss": 2.1749, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.497761557882584e-05, | |
| "loss": 2.1668, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.466941961724301e-05, | |
| "loss": 2.1703, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.4361865233550814e-05, | |
| "loss": 2.1682, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.40549652987618e-05, | |
| "loss": 2.1705, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.374873265650016e-05, | |
| "loss": 2.1671, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.3443180122464156e-05, | |
| "loss": 2.16, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.3138320483889874e-05, | |
| "loss": 2.1536, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 2.0123212337493896, | |
| "eval_runtime": 294.337, | |
| "eval_samples_per_second": 339.747, | |
| "eval_steps_per_second": 42.468, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.283416649901599e-05, | |
| "loss": 2.152, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.253073089654992e-05, | |
| "loss": 2.1519, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.222802637513508e-05, | |
| "loss": 2.1638, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.192606560281948e-05, | |
| "loss": 2.1527, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.162486121652556e-05, | |
| "loss": 2.1499, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.1324425821521375e-05, | |
| "loss": 2.1614, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.1024771990893e-05, | |
| "loss": 2.1556, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.072591226501842e-05, | |
| "loss": 2.1514, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.042785915104267e-05, | |
| "loss": 2.1533, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.013062512235445e-05, | |
| "loss": 2.1413, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.9834222618064146e-05, | |
| "loss": 2.1461, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.9538664042483145e-05, | |
| "loss": 2.1475, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.9243961764604878e-05, | |
| "loss": 2.1452, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.895012811758705e-05, | |
| "loss": 2.1488, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.8657175398235548e-05, | |
| "loss": 2.1496, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.8365115866489895e-05, | |
| "loss": 2.1348, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.8073961744910036e-05, | |
| "loss": 2.1406, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.7783725218164992e-05, | |
| "loss": 2.1484, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.7494418432522773e-05, | |
| "loss": 2.1482, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.7206053495342176e-05, | |
| "loss": 2.1491, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.691864247456609e-05, | |
| "loss": 2.1316, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.6632197398216403e-05, | |
| "loss": 2.1459, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.6346730253890626e-05, | |
| "loss": 2.1308, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.6062252988260348e-05, | |
| "loss": 2.1389, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.5778777506571112e-05, | |
| "loss": 2.1374, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.5496315672144238e-05, | |
| "loss": 2.1322, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.521487930588044e-05, | |
| "loss": 2.1235, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.4934480185764976e-05, | |
| "loss": 2.1291, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.465513004637487e-05, | |
| "loss": 2.1366, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.4376840578387754e-05, | |
| "loss": 2.1256, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.409962342809261e-05, | |
| "loss": 2.1304, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.382349019690248e-05, | |
| "loss": 2.1229, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.3548452440868816e-05, | |
| "loss": 2.1268, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.3274521670197923e-05, | |
| "loss": 2.1278, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.300170934876934e-05, | |
| "loss": 2.1277, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.2730026893655916e-05, | |
| "loss": 2.1195, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.2459485674646187e-05, | |
| "loss": 2.1253, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.2190097013768403e-05, | |
| "loss": 2.1252, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.1921872184816817e-05, | |
| "loss": 2.1168, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.1654822412879798e-05, | |
| "loss": 2.1288, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.138895887387018e-05, | |
| "loss": 2.1195, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.112429269405739e-05, | |
| "loss": 2.1196, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.0860834949602015e-05, | |
| "loss": 2.1175, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.0598596666092075e-05, | |
| "loss": 2.1234, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.0337588818081744e-05, | |
| "loss": 2.1122, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.007782232863199e-05, | |
| "loss": 2.1185, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.9819308068853526e-05, | |
| "loss": 2.1201, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.956205685745183e-05, | |
| "loss": 2.1079, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.9306079460274302e-05, | |
| "loss": 2.1137, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.9051386589859843e-05, | |
| "loss": 2.114, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 1.9818874597549438, | |
| "eval_runtime": 287.1851, | |
| "eval_samples_per_second": 348.208, | |
| "eval_steps_per_second": 43.526, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.879798890499046e-05, | |
| "loss": 2.1094, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.8545897010245273e-05, | |
| "loss": 2.1185, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.8295121455556607e-05, | |
| "loss": 2.1187, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.8045672735768616e-05, | |
| "loss": 2.1096, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.7797561290197957e-05, | |
| "loss": 2.1058, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.755079750219699e-05, | |
| "loss": 2.1065, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.7305391698719187e-05, | |
| "loss": 2.104, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.706135414988701e-05, | |
| "loss": 2.1047, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.6818695068562084e-05, | |
| "loss": 2.1033, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.6577424609917756e-05, | |
| "loss": 2.1106, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.633755287101416e-05, | |
| "loss": 2.1068, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.6099089890375623e-05, | |
| "loss": 2.1013, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.5862045647570574e-05, | |
| "loss": 2.1019, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.562643006279392e-05, | |
| "loss": 2.0999, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.5392252996451884e-05, | |
| "loss": 2.1059, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.5159524248749296e-05, | |
| "loss": 2.1054, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.4928253559279532e-05, | |
| "loss": 2.1043, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.469845060661686e-05, | |
| "loss": 2.105, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.4470125007911478e-05, | |
| "loss": 2.1091, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.4243286318486915e-05, | |
| "loss": 2.101, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.4017944031440283e-05, | |
| "loss": 2.1054, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.3794107577244886e-05, | |
| "loss": 2.0959, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.3571786323355596e-05, | |
| "loss": 2.0913, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.335098957381687e-05, | |
| "loss": 2.0924, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.3131726568873315e-05, | |
| "loss": 2.0939, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.2914006484583013e-05, | |
| "loss": 2.0917, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.2697838432433545e-05, | |
| "loss": 2.0899, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.2483231458960599e-05, | |
| "loss": 2.0961, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.2270194545369473e-05, | |
| "loss": 2.0955, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.2058736607159133e-05, | |
| "loss": 2.0848, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1848866493749111e-05, | |
| "loss": 2.0841, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1640592988109261e-05, | |
| "loss": 2.088, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1433924806392054e-05, | |
| "loss": 2.092, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1228870597567887e-05, | |
| "loss": 2.0807, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.102543894306316e-05, | |
| "loss": 2.0881, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0823638356401056e-05, | |
| "loss": 2.084, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0623477282845312e-05, | |
| "loss": 2.0816, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0424964099046813e-05, | |
| "loss": 2.0868, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0228107112692953e-05, | |
| "loss": 2.0883, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.0032914562160051e-05, | |
| "loss": 2.0836, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.839394616168506e-06, | |
| "loss": 2.0877, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.647555373440976e-06, | |
| "loss": 2.082, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.457404862363428e-06, | |
| "loss": 2.0798, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.26895104064921e-06, | |
| "loss": 2.0855, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.082201795005968e-06, | |
| "loss": 2.0892, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.897164940805591e-06, | |
| "loss": 2.0816, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.71384822175716e-06, | |
| "loss": 2.0886, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.532259309582886e-06, | |
| "loss": 2.0848, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.35240580369701e-06, | |
| "loss": 2.0806, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.174295230887846e-06, | |
| "loss": 2.0845, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 1.9471186399459839, | |
| "eval_runtime": 284.8288, | |
| "eval_samples_per_second": 351.088, | |
| "eval_steps_per_second": 43.886, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.997935045002724e-06, | |
| "loss": 2.0879, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.823332626636065e-06, | |
| "loss": 2.0804, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.650495282820502e-06, | |
| "loss": 2.0835, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.479430246721092e-06, | |
| "loss": 2.0834, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.310144677332631e-06, | |
| "loss": 2.0755, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.142645659180036e-06, | |
| "loss": 2.0744, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 6.9769402020218314e-06, | |
| "loss": 2.073, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 6.813035240556842e-06, | |
| "loss": 2.075, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 6.650937634133952e-06, | |
| "loss": 2.0767, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 6.4906541664650325e-06, | |
| "loss": 2.0683, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 6.332191545341093e-06, | |
| "loss": 2.0783, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.175556402351546e-06, | |
| "loss": 2.0665, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.020755292606617e-06, | |
| "loss": 2.0818, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.867794694463102e-06, | |
| "loss": 2.0693, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.716681009253189e-06, | |
| "loss": 2.0705, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.567420561016629e-06, | |
| "loss": 2.0818, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 5.420019596236003e-06, | |
| "loss": 2.0723, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 5.274484283575371e-06, | |
| "loss": 2.0833, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 5.1308207136220755e-06, | |
| "loss": 2.0764, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.989034898631872e-06, | |
| "loss": 2.0724, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.84913277227731e-06, | |
| "loss": 2.0718, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.711120189399443e-06, | |
| "loss": 2.0752, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.575002925762739e-06, | |
| "loss": 2.0718, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.440786677813458e-06, | |
| "loss": 2.0747, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.308477062441168e-06, | |
| "loss": 2.0714, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.17807961674373e-06, | |
| "loss": 2.0652, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.049599797795589e-06, | |
| "loss": 2.0726, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.923042982419334e-06, | |
| "loss": 2.0692, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.798414466960759e-06, | |
| "loss": 2.076, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.6757194670671513e-06, | |
| "loss": 2.0695, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.55496311746904e-06, | |
| "loss": 2.066, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.436150471765326e-06, | |
| "loss": 2.0671, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.319286502211766e-06, | |
| "loss": 2.0682, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.2043760995128903e-06, | |
| "loss": 2.0716, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.091424072617366e-06, | |
| "loss": 2.0684, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.9804351485166747e-06, | |
| "loss": 2.0631, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.8714139720473597e-06, | |
| "loss": 2.0686, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.7643651056965924e-06, | |
| "loss": 2.0651, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.659293029411264e-06, | |
| "loss": 2.0695, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.556202140410474e-06, | |
| "loss": 2.0704, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.4550967530015623e-06, | |
| "loss": 2.0668, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.3559810983994732e-06, | |
| "loss": 2.0656, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.258859324549778e-06, | |
| "loss": 2.0691, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.1637354959549884e-06, | |
| "loss": 2.0589, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.0706135935045333e-06, | |
| "loss": 2.067, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.9794975143081264e-06, | |
| "loss": 2.0654, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.8903910715326823e-06, | |
| "loss": 2.0635, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.8032979942427475e-06, | |
| "loss": 2.0673, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.7182219272444011e-06, | |
| "loss": 2.0662, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.635166430932772e-06, | |
| "loss": 2.0697, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 1.9344754219055176, | |
| "eval_runtime": 284.7729, | |
| "eval_samples_per_second": 351.157, | |
| "eval_steps_per_second": 43.895, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.5541349811430016e-06, | |
| "loss": 2.061, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.475130969004812e-06, | |
| "loss": 2.0696, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.3981577008005563e-06, | |
| "loss": 2.0691, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.3232183978268698e-06, | |
| "loss": 2.0622, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.2503161962598653e-06, | |
| "loss": 2.0714, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.1794541470238729e-06, | |
| "loss": 2.0664, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.1106352156637633e-06, | |
| "loss": 2.0606, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.0438622822208478e-06, | |
| "loss": 2.0645, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.791381411123513e-07, | |
| "loss": 2.0724, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.164655010144518e-07, | |
| "loss": 2.0629, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 8.558469847489314e-07, | |
| "loss": 2.0688, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 7.972851291734252e-07, | |
| "loss": 2.0566, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 7.407823850752338e-07, | |
| "loss": 2.0689, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 6.863411170687673e-07, | |
| "loss": 2.071, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 6.339636034966123e-07, | |
| "loss": 2.0665, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 5.836520363341258e-07, | |
| "loss": 2.0606, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 5.354085210977633e-07, | |
| "loss": 2.0646, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.892350767569276e-07, | |
| "loss": 2.0663, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.451336356494862e-07, | |
| "loss": 2.0683, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.031060434009026e-07, | |
| "loss": 2.0595, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.631540588470039e-07, | |
| "loss": 2.0691, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.252793539603671e-07, | |
| "loss": 2.0651, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.8948351378034753e-07, | |
| "loss": 2.0611, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.557680363467485e-07, | |
| "loss": 2.0733, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.2413433263713257e-07, | |
| "loss": 2.0591, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.9458372650776325e-07, | |
| "loss": 2.0626, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.6711745463821593e-07, | |
| "loss": 2.0669, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.4173666647959715e-07, | |
| "loss": 2.0701, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.1844242420647745e-07, | |
| "loss": 2.0679, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 9.723570267241577e-08, | |
| "loss": 2.0679, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 7.811738936916447e-08, | |
| "loss": 2.0643, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 6.108828438952663e-08, | |
| "loss": 2.0615, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.614910039389409e-08, | |
| "loss": 2.0511, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.3300462580387884e-08, | |
| "loss": 2.0594, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.254290865871811e-08, | |
| "loss": 2.0665, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.3876888827679679e-08, | |
| "loss": 2.0572, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 7.302765756300733e-09, | |
| "loss": 2.0682, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.820814568671448e-09, | |
| "loss": 2.0611, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.31222832436573e-10, | |
| "loss": 2.0658, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 269794, | |
| "total_flos": 7.264837577610363e+18, | |
| "train_loss": 1.8196985500431868, | |
| "train_runtime": 434920.5349, | |
| "train_samples_per_second": 148.879, | |
| "train_steps_per_second": 0.62 | |
| } | |
| ], | |
| "max_steps": 269794, | |
| "num_train_epochs": 1, | |
| "total_flos": 7.264837577610363e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |