mrm8488's picture
Initial commit
7e6c6d0
{
"best_metric": 0.08917281776666641,
"best_model_checkpoint": "/content/drive/MyDrive/vit-cifar10/checkpoint-263043",
"epoch": 100.0,
"global_step": 265700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 1.9992472713586754e-05,
"loss": 0.3607,
"step": 100
},
{
"epoch": 0.08,
"learning_rate": 1.9984945427173507e-05,
"loss": 0.3215,
"step": 200
},
{
"epoch": 0.11,
"learning_rate": 1.997741814076026e-05,
"loss": 0.3187,
"step": 300
},
{
"epoch": 0.15,
"learning_rate": 1.9969890854347008e-05,
"loss": 0.3123,
"step": 400
},
{
"epoch": 0.19,
"learning_rate": 1.996236356793376e-05,
"loss": 0.3007,
"step": 500
},
{
"epoch": 0.23,
"learning_rate": 1.9954836281520513e-05,
"loss": 0.3073,
"step": 600
},
{
"epoch": 0.26,
"learning_rate": 1.9947308995107266e-05,
"loss": 0.3031,
"step": 700
},
{
"epoch": 0.3,
"learning_rate": 1.993978170869402e-05,
"loss": 0.3008,
"step": 800
},
{
"epoch": 0.34,
"learning_rate": 1.993225442228077e-05,
"loss": 0.2996,
"step": 900
},
{
"epoch": 0.38,
"learning_rate": 1.992472713586752e-05,
"loss": 0.3026,
"step": 1000
},
{
"epoch": 0.41,
"learning_rate": 1.9917199849454273e-05,
"loss": 0.3003,
"step": 1100
},
{
"epoch": 0.45,
"learning_rate": 1.9909672563041025e-05,
"loss": 0.2994,
"step": 1200
},
{
"epoch": 0.49,
"learning_rate": 1.9902145276627778e-05,
"loss": 0.2974,
"step": 1300
},
{
"epoch": 0.53,
"learning_rate": 1.989461799021453e-05,
"loss": 0.2973,
"step": 1400
},
{
"epoch": 0.56,
"learning_rate": 1.9887090703801283e-05,
"loss": 0.2995,
"step": 1500
},
{
"epoch": 0.6,
"learning_rate": 1.987956341738803e-05,
"loss": 0.2945,
"step": 1600
},
{
"epoch": 0.64,
"learning_rate": 1.9872036130974784e-05,
"loss": 0.296,
"step": 1700
},
{
"epoch": 0.68,
"learning_rate": 1.9864508844561537e-05,
"loss": 0.2904,
"step": 1800
},
{
"epoch": 0.72,
"learning_rate": 1.985698155814829e-05,
"loss": 0.2931,
"step": 1900
},
{
"epoch": 0.75,
"learning_rate": 1.9849454271735042e-05,
"loss": 0.2952,
"step": 2000
},
{
"epoch": 0.79,
"learning_rate": 1.9841926985321794e-05,
"loss": 0.2935,
"step": 2100
},
{
"epoch": 0.83,
"learning_rate": 1.9834399698908543e-05,
"loss": 0.2912,
"step": 2200
},
{
"epoch": 0.87,
"learning_rate": 1.9826872412495296e-05,
"loss": 0.2951,
"step": 2300
},
{
"epoch": 0.9,
"learning_rate": 1.981934512608205e-05,
"loss": 0.2873,
"step": 2400
},
{
"epoch": 0.94,
"learning_rate": 1.98118178396688e-05,
"loss": 0.2906,
"step": 2500
},
{
"epoch": 0.98,
"learning_rate": 1.9804290553255553e-05,
"loss": 0.289,
"step": 2600
},
{
"epoch": 1.0,
"eval_loss": 0.2940625846385956,
"eval_runtime": 45.1121,
"eval_samples_per_second": 166.253,
"eval_steps_per_second": 10.396,
"step": 2657
},
{
"epoch": 1.02,
"learning_rate": 1.9796763266842306e-05,
"loss": 0.2831,
"step": 2700
},
{
"epoch": 1.05,
"learning_rate": 1.978923598042906e-05,
"loss": 0.2941,
"step": 2800
},
{
"epoch": 1.09,
"learning_rate": 1.9781708694015808e-05,
"loss": 0.2866,
"step": 2900
},
{
"epoch": 1.13,
"learning_rate": 1.977418140760256e-05,
"loss": 0.2811,
"step": 3000
},
{
"epoch": 1.17,
"learning_rate": 1.9766654121189313e-05,
"loss": 0.2912,
"step": 3100
},
{
"epoch": 1.2,
"learning_rate": 1.9759126834776065e-05,
"loss": 0.289,
"step": 3200
},
{
"epoch": 1.24,
"learning_rate": 1.9751599548362818e-05,
"loss": 0.2907,
"step": 3300
},
{
"epoch": 1.28,
"learning_rate": 1.974407226194957e-05,
"loss": 0.2913,
"step": 3400
},
{
"epoch": 1.32,
"learning_rate": 1.973654497553632e-05,
"loss": 0.2863,
"step": 3500
},
{
"epoch": 1.35,
"learning_rate": 1.9729017689123072e-05,
"loss": 0.2822,
"step": 3600
},
{
"epoch": 1.39,
"learning_rate": 1.9721490402709824e-05,
"loss": 0.2808,
"step": 3700
},
{
"epoch": 1.43,
"learning_rate": 1.9713963116296577e-05,
"loss": 0.2836,
"step": 3800
},
{
"epoch": 1.47,
"learning_rate": 1.970643582988333e-05,
"loss": 0.2796,
"step": 3900
},
{
"epoch": 1.51,
"learning_rate": 1.9698908543470082e-05,
"loss": 0.2831,
"step": 4000
},
{
"epoch": 1.54,
"learning_rate": 1.969138125705683e-05,
"loss": 0.2878,
"step": 4100
},
{
"epoch": 1.58,
"learning_rate": 1.9683853970643583e-05,
"loss": 0.2767,
"step": 4200
},
{
"epoch": 1.62,
"learning_rate": 1.9676326684230336e-05,
"loss": 0.2835,
"step": 4300
},
{
"epoch": 1.66,
"learning_rate": 1.966879939781709e-05,
"loss": 0.2823,
"step": 4400
},
{
"epoch": 1.69,
"learning_rate": 1.966127211140384e-05,
"loss": 0.284,
"step": 4500
},
{
"epoch": 1.73,
"learning_rate": 1.9653744824990594e-05,
"loss": 0.2813,
"step": 4600
},
{
"epoch": 1.77,
"learning_rate": 1.9646217538577343e-05,
"loss": 0.2806,
"step": 4700
},
{
"epoch": 1.81,
"learning_rate": 1.9638690252164095e-05,
"loss": 0.2846,
"step": 4800
},
{
"epoch": 1.84,
"learning_rate": 1.9631162965750848e-05,
"loss": 0.2834,
"step": 4900
},
{
"epoch": 1.88,
"learning_rate": 1.96236356793376e-05,
"loss": 0.2821,
"step": 5000
},
{
"epoch": 1.92,
"learning_rate": 1.9616108392924353e-05,
"loss": 0.2837,
"step": 5100
},
{
"epoch": 1.96,
"learning_rate": 1.9608581106511105e-05,
"loss": 0.2782,
"step": 5200
},
{
"epoch": 1.99,
"learning_rate": 1.9601053820097858e-05,
"loss": 0.2858,
"step": 5300
},
{
"epoch": 2.0,
"eval_loss": 0.28088775277137756,
"eval_runtime": 45.112,
"eval_samples_per_second": 166.253,
"eval_steps_per_second": 10.396,
"step": 5314
},
{
"epoch": 2.03,
"learning_rate": 1.9593526533684607e-05,
"loss": 0.2826,
"step": 5400
},
{
"epoch": 2.07,
"learning_rate": 1.9585999247271363e-05,
"loss": 0.2858,
"step": 5500
},
{
"epoch": 2.11,
"learning_rate": 1.9578471960858112e-05,
"loss": 0.2782,
"step": 5600
},
{
"epoch": 2.15,
"learning_rate": 1.9570944674444864e-05,
"loss": 0.2779,
"step": 5700
},
{
"epoch": 2.18,
"learning_rate": 1.9563417388031617e-05,
"loss": 0.2792,
"step": 5800
},
{
"epoch": 2.22,
"learning_rate": 1.955589010161837e-05,
"loss": 0.2837,
"step": 5900
},
{
"epoch": 2.26,
"learning_rate": 1.954836281520512e-05,
"loss": 0.2792,
"step": 6000
},
{
"epoch": 2.3,
"learning_rate": 1.954083552879187e-05,
"loss": 0.2825,
"step": 6100
},
{
"epoch": 2.33,
"learning_rate": 1.9533308242378624e-05,
"loss": 0.2762,
"step": 6200
},
{
"epoch": 2.37,
"learning_rate": 1.9525780955965376e-05,
"loss": 0.283,
"step": 6300
},
{
"epoch": 2.41,
"learning_rate": 1.951825366955213e-05,
"loss": 0.2824,
"step": 6400
},
{
"epoch": 2.45,
"learning_rate": 1.951072638313888e-05,
"loss": 0.2787,
"step": 6500
},
{
"epoch": 2.48,
"learning_rate": 1.950319909672563e-05,
"loss": 0.2769,
"step": 6600
},
{
"epoch": 2.52,
"learning_rate": 1.9495671810312383e-05,
"loss": 0.2791,
"step": 6700
},
{
"epoch": 2.56,
"learning_rate": 1.9488144523899135e-05,
"loss": 0.2754,
"step": 6800
},
{
"epoch": 2.6,
"learning_rate": 1.9480617237485888e-05,
"loss": 0.2721,
"step": 6900
},
{
"epoch": 2.63,
"learning_rate": 1.947308995107264e-05,
"loss": 0.2796,
"step": 7000
},
{
"epoch": 2.67,
"learning_rate": 1.9465562664659393e-05,
"loss": 0.2771,
"step": 7100
},
{
"epoch": 2.71,
"learning_rate": 1.9458035378246142e-05,
"loss": 0.2778,
"step": 7200
},
{
"epoch": 2.75,
"learning_rate": 1.9450508091832894e-05,
"loss": 0.2773,
"step": 7300
},
{
"epoch": 2.79,
"learning_rate": 1.9442980805419647e-05,
"loss": 0.2771,
"step": 7400
},
{
"epoch": 2.82,
"learning_rate": 1.94354535190064e-05,
"loss": 0.2764,
"step": 7500
},
{
"epoch": 2.86,
"learning_rate": 1.9427926232593152e-05,
"loss": 0.2789,
"step": 7600
},
{
"epoch": 2.9,
"learning_rate": 1.9420398946179905e-05,
"loss": 0.2768,
"step": 7700
},
{
"epoch": 2.94,
"learning_rate": 1.9412871659766657e-05,
"loss": 0.2765,
"step": 7800
},
{
"epoch": 2.97,
"learning_rate": 1.9405344373353406e-05,
"loss": 0.2693,
"step": 7900
},
{
"epoch": 3.0,
"eval_loss": 0.2738477289676666,
"eval_runtime": 44.824,
"eval_samples_per_second": 167.321,
"eval_steps_per_second": 10.463,
"step": 7971
},
{
"epoch": 3.01,
"learning_rate": 1.9397817086940162e-05,
"loss": 0.277,
"step": 8000
},
{
"epoch": 3.05,
"learning_rate": 1.939028980052691e-05,
"loss": 0.2734,
"step": 8100
},
{
"epoch": 3.09,
"learning_rate": 1.9382762514113664e-05,
"loss": 0.2767,
"step": 8200
},
{
"epoch": 3.12,
"learning_rate": 1.9375235227700416e-05,
"loss": 0.2767,
"step": 8300
},
{
"epoch": 3.16,
"learning_rate": 1.936770794128717e-05,
"loss": 0.271,
"step": 8400
},
{
"epoch": 3.2,
"learning_rate": 1.9360180654873918e-05,
"loss": 0.2747,
"step": 8500
},
{
"epoch": 3.24,
"learning_rate": 1.9352653368460674e-05,
"loss": 0.2758,
"step": 8600
},
{
"epoch": 3.27,
"learning_rate": 1.9345126082047423e-05,
"loss": 0.2706,
"step": 8700
},
{
"epoch": 3.31,
"learning_rate": 1.9337598795634175e-05,
"loss": 0.2734,
"step": 8800
},
{
"epoch": 3.35,
"learning_rate": 1.9330071509220928e-05,
"loss": 0.2732,
"step": 8900
},
{
"epoch": 3.39,
"learning_rate": 1.932254422280768e-05,
"loss": 0.2724,
"step": 9000
},
{
"epoch": 3.42,
"learning_rate": 1.931501693639443e-05,
"loss": 0.2678,
"step": 9100
},
{
"epoch": 3.46,
"learning_rate": 1.9307489649981182e-05,
"loss": 0.2755,
"step": 9200
},
{
"epoch": 3.5,
"learning_rate": 1.9299962363567935e-05,
"loss": 0.264,
"step": 9300
},
{
"epoch": 3.54,
"learning_rate": 1.9292435077154687e-05,
"loss": 0.2706,
"step": 9400
},
{
"epoch": 3.58,
"learning_rate": 1.928490779074144e-05,
"loss": 0.2686,
"step": 9500
},
{
"epoch": 3.61,
"learning_rate": 1.9277380504328192e-05,
"loss": 0.2681,
"step": 9600
},
{
"epoch": 3.65,
"learning_rate": 1.926985321791494e-05,
"loss": 0.2671,
"step": 9700
},
{
"epoch": 3.69,
"learning_rate": 1.9262325931501694e-05,
"loss": 0.2668,
"step": 9800
},
{
"epoch": 3.73,
"learning_rate": 1.9254798645088446e-05,
"loss": 0.2621,
"step": 9900
},
{
"epoch": 3.76,
"learning_rate": 1.92472713586752e-05,
"loss": 0.2641,
"step": 10000
},
{
"epoch": 3.8,
"learning_rate": 1.923974407226195e-05,
"loss": 0.2645,
"step": 10100
},
{
"epoch": 3.84,
"learning_rate": 1.9232216785848704e-05,
"loss": 0.2593,
"step": 10200
},
{
"epoch": 3.88,
"learning_rate": 1.9224689499435456e-05,
"loss": 0.2604,
"step": 10300
},
{
"epoch": 3.91,
"learning_rate": 1.9217162213022205e-05,
"loss": 0.2614,
"step": 10400
},
{
"epoch": 3.95,
"learning_rate": 1.920963492660896e-05,
"loss": 0.2627,
"step": 10500
},
{
"epoch": 3.99,
"learning_rate": 1.920210764019571e-05,
"loss": 0.2578,
"step": 10600
},
{
"epoch": 4.0,
"eval_loss": 0.2545997202396393,
"eval_runtime": 44.2178,
"eval_samples_per_second": 169.615,
"eval_steps_per_second": 10.607,
"step": 10628
},
{
"epoch": 4.03,
"learning_rate": 1.9194580353782463e-05,
"loss": 0.2567,
"step": 10700
},
{
"epoch": 4.06,
"learning_rate": 1.9187053067369215e-05,
"loss": 0.2547,
"step": 10800
},
{
"epoch": 4.1,
"learning_rate": 1.9179525780955968e-05,
"loss": 0.2564,
"step": 10900
},
{
"epoch": 4.14,
"learning_rate": 1.9171998494542717e-05,
"loss": 0.2603,
"step": 11000
},
{
"epoch": 4.18,
"learning_rate": 1.9164471208129473e-05,
"loss": 0.2514,
"step": 11100
},
{
"epoch": 4.22,
"learning_rate": 1.9156943921716222e-05,
"loss": 0.2571,
"step": 11200
},
{
"epoch": 4.25,
"learning_rate": 1.9149416635302975e-05,
"loss": 0.2513,
"step": 11300
},
{
"epoch": 4.29,
"learning_rate": 1.9141889348889727e-05,
"loss": 0.2531,
"step": 11400
},
{
"epoch": 4.33,
"learning_rate": 1.913436206247648e-05,
"loss": 0.2485,
"step": 11500
},
{
"epoch": 4.37,
"learning_rate": 1.912683477606323e-05,
"loss": 0.2519,
"step": 11600
},
{
"epoch": 4.4,
"learning_rate": 1.9119307489649985e-05,
"loss": 0.2506,
"step": 11700
},
{
"epoch": 4.44,
"learning_rate": 1.9111780203236734e-05,
"loss": 0.2487,
"step": 11800
},
{
"epoch": 4.48,
"learning_rate": 1.9104252916823486e-05,
"loss": 0.2456,
"step": 11900
},
{
"epoch": 4.52,
"learning_rate": 1.909672563041024e-05,
"loss": 0.2493,
"step": 12000
},
{
"epoch": 4.55,
"learning_rate": 1.908919834399699e-05,
"loss": 0.2439,
"step": 12100
},
{
"epoch": 4.59,
"learning_rate": 1.908167105758374e-05,
"loss": 0.2434,
"step": 12200
},
{
"epoch": 4.63,
"learning_rate": 1.9074143771170493e-05,
"loss": 0.2454,
"step": 12300
},
{
"epoch": 4.67,
"learning_rate": 1.9066616484757246e-05,
"loss": 0.2406,
"step": 12400
},
{
"epoch": 4.7,
"learning_rate": 1.9059089198343998e-05,
"loss": 0.2392,
"step": 12500
},
{
"epoch": 4.74,
"learning_rate": 1.905156191193075e-05,
"loss": 0.2394,
"step": 12600
},
{
"epoch": 4.78,
"learning_rate": 1.9044034625517503e-05,
"loss": 0.2351,
"step": 12700
},
{
"epoch": 4.82,
"learning_rate": 1.9036507339104256e-05,
"loss": 0.2367,
"step": 12800
},
{
"epoch": 4.86,
"learning_rate": 1.9028980052691005e-05,
"loss": 0.233,
"step": 12900
},
{
"epoch": 4.89,
"learning_rate": 1.902145276627776e-05,
"loss": 0.2276,
"step": 13000
},
{
"epoch": 4.93,
"learning_rate": 1.901392547986451e-05,
"loss": 0.2323,
"step": 13100
},
{
"epoch": 4.97,
"learning_rate": 1.9006398193451262e-05,
"loss": 0.2211,
"step": 13200
},
{
"epoch": 5.0,
"eval_loss": 0.21532748639583588,
"eval_runtime": 44.0312,
"eval_samples_per_second": 170.334,
"eval_steps_per_second": 10.652,
"step": 13285
},
{
"epoch": 5.01,
"learning_rate": 1.8998870907038015e-05,
"loss": 0.2225,
"step": 13300
},
{
"epoch": 5.04,
"learning_rate": 1.8991343620624767e-05,
"loss": 0.2211,
"step": 13400
},
{
"epoch": 5.08,
"learning_rate": 1.8983816334211516e-05,
"loss": 0.2178,
"step": 13500
},
{
"epoch": 5.12,
"learning_rate": 1.8976289047798272e-05,
"loss": 0.2164,
"step": 13600
},
{
"epoch": 5.16,
"learning_rate": 1.896876176138502e-05,
"loss": 0.2144,
"step": 13700
},
{
"epoch": 5.19,
"learning_rate": 1.8961234474971774e-05,
"loss": 0.2093,
"step": 13800
},
{
"epoch": 5.23,
"learning_rate": 1.8953707188558526e-05,
"loss": 0.2087,
"step": 13900
},
{
"epoch": 5.27,
"learning_rate": 1.894617990214528e-05,
"loss": 0.2111,
"step": 14000
},
{
"epoch": 5.31,
"learning_rate": 1.8938652615732028e-05,
"loss": 0.2041,
"step": 14100
},
{
"epoch": 5.34,
"learning_rate": 1.8931125329318784e-05,
"loss": 0.2023,
"step": 14200
},
{
"epoch": 5.38,
"learning_rate": 1.8923598042905533e-05,
"loss": 0.2011,
"step": 14300
},
{
"epoch": 5.42,
"learning_rate": 1.8916070756492286e-05,
"loss": 0.2028,
"step": 14400
},
{
"epoch": 5.46,
"learning_rate": 1.8908543470079038e-05,
"loss": 0.198,
"step": 14500
},
{
"epoch": 5.49,
"learning_rate": 1.890101618366579e-05,
"loss": 0.1986,
"step": 14600
},
{
"epoch": 5.53,
"learning_rate": 1.889348889725254e-05,
"loss": 0.1959,
"step": 14700
},
{
"epoch": 5.57,
"learning_rate": 1.8885961610839296e-05,
"loss": 0.1953,
"step": 14800
},
{
"epoch": 5.61,
"learning_rate": 1.8878434324426048e-05,
"loss": 0.193,
"step": 14900
},
{
"epoch": 5.65,
"learning_rate": 1.8870907038012797e-05,
"loss": 0.1918,
"step": 15000
},
{
"epoch": 5.68,
"learning_rate": 1.886337975159955e-05,
"loss": 0.1911,
"step": 15100
},
{
"epoch": 5.72,
"learning_rate": 1.8855852465186302e-05,
"loss": 0.1889,
"step": 15200
},
{
"epoch": 5.76,
"learning_rate": 1.8848325178773055e-05,
"loss": 0.1879,
"step": 15300
},
{
"epoch": 5.8,
"learning_rate": 1.8840797892359804e-05,
"loss": 0.1844,
"step": 15400
},
{
"epoch": 5.83,
"learning_rate": 1.883327060594656e-05,
"loss": 0.1821,
"step": 15500
},
{
"epoch": 5.87,
"learning_rate": 1.882574331953331e-05,
"loss": 0.1808,
"step": 15600
},
{
"epoch": 5.91,
"learning_rate": 1.881821603312006e-05,
"loss": 0.1863,
"step": 15700
},
{
"epoch": 5.95,
"learning_rate": 1.8810688746706814e-05,
"loss": 0.1806,
"step": 15800
},
{
"epoch": 5.98,
"learning_rate": 1.8803161460293567e-05,
"loss": 0.1799,
"step": 15900
},
{
"epoch": 6.0,
"eval_loss": 0.1794862300157547,
"eval_runtime": 44.3468,
"eval_samples_per_second": 169.121,
"eval_steps_per_second": 10.576,
"step": 15942
},
{
"epoch": 6.02,
"learning_rate": 1.8795634173880316e-05,
"loss": 0.1773,
"step": 16000
},
{
"epoch": 6.06,
"learning_rate": 1.878810688746707e-05,
"loss": 0.1746,
"step": 16100
},
{
"epoch": 6.1,
"learning_rate": 1.878057960105382e-05,
"loss": 0.1772,
"step": 16200
},
{
"epoch": 6.13,
"learning_rate": 1.8773052314640573e-05,
"loss": 0.1724,
"step": 16300
},
{
"epoch": 6.17,
"learning_rate": 1.8765525028227326e-05,
"loss": 0.17,
"step": 16400
},
{
"epoch": 6.21,
"learning_rate": 1.8757997741814078e-05,
"loss": 0.1737,
"step": 16500
},
{
"epoch": 6.25,
"learning_rate": 1.8750470455400827e-05,
"loss": 0.1732,
"step": 16600
},
{
"epoch": 6.29,
"learning_rate": 1.8742943168987583e-05,
"loss": 0.1689,
"step": 16700
},
{
"epoch": 6.32,
"learning_rate": 1.8735415882574332e-05,
"loss": 0.1704,
"step": 16800
},
{
"epoch": 6.36,
"learning_rate": 1.8727888596161085e-05,
"loss": 0.1664,
"step": 16900
},
{
"epoch": 6.4,
"learning_rate": 1.8720361309747837e-05,
"loss": 0.1651,
"step": 17000
},
{
"epoch": 6.44,
"learning_rate": 1.871283402333459e-05,
"loss": 0.1678,
"step": 17100
},
{
"epoch": 6.47,
"learning_rate": 1.870530673692134e-05,
"loss": 0.1674,
"step": 17200
},
{
"epoch": 6.51,
"learning_rate": 1.8697779450508095e-05,
"loss": 0.1674,
"step": 17300
},
{
"epoch": 6.55,
"learning_rate": 1.8690252164094847e-05,
"loss": 0.1643,
"step": 17400
},
{
"epoch": 6.59,
"learning_rate": 1.8682724877681597e-05,
"loss": 0.1667,
"step": 17500
},
{
"epoch": 6.62,
"learning_rate": 1.867519759126835e-05,
"loss": 0.1601,
"step": 17600
},
{
"epoch": 6.66,
"learning_rate": 1.86676703048551e-05,
"loss": 0.1645,
"step": 17700
},
{
"epoch": 6.7,
"learning_rate": 1.8660143018441854e-05,
"loss": 0.1637,
"step": 17800
},
{
"epoch": 6.74,
"learning_rate": 1.8652615732028607e-05,
"loss": 0.1633,
"step": 17900
},
{
"epoch": 6.77,
"learning_rate": 1.864508844561536e-05,
"loss": 0.1651,
"step": 18000
},
{
"epoch": 6.81,
"learning_rate": 1.863756115920211e-05,
"loss": 0.1589,
"step": 18100
},
{
"epoch": 6.85,
"learning_rate": 1.863003387278886e-05,
"loss": 0.1612,
"step": 18200
},
{
"epoch": 6.89,
"learning_rate": 1.8622506586375613e-05,
"loss": 0.1629,
"step": 18300
},
{
"epoch": 6.93,
"learning_rate": 1.8614979299962366e-05,
"loss": 0.1571,
"step": 18400
},
{
"epoch": 6.96,
"learning_rate": 1.8607452013549115e-05,
"loss": 0.158,
"step": 18500
},
{
"epoch": 7.0,
"eval_loss": 0.162311390042305,
"eval_runtime": 44.6199,
"eval_samples_per_second": 168.087,
"eval_steps_per_second": 10.511,
"step": 18599
},
{
"epoch": 7.0,
"learning_rate": 1.859992472713587e-05,
"loss": 0.158,
"step": 18600
},
{
"epoch": 7.04,
"learning_rate": 1.859239744072262e-05,
"loss": 0.1588,
"step": 18700
},
{
"epoch": 7.08,
"learning_rate": 1.8584870154309373e-05,
"loss": 0.1556,
"step": 18800
},
{
"epoch": 7.11,
"learning_rate": 1.8577342867896125e-05,
"loss": 0.1552,
"step": 18900
},
{
"epoch": 7.15,
"learning_rate": 1.8569815581482878e-05,
"loss": 0.1567,
"step": 19000
},
{
"epoch": 7.19,
"learning_rate": 1.8562288295069627e-05,
"loss": 0.1533,
"step": 19100
},
{
"epoch": 7.23,
"learning_rate": 1.8554761008656383e-05,
"loss": 0.1543,
"step": 19200
},
{
"epoch": 7.26,
"learning_rate": 1.854723372224313e-05,
"loss": 0.1545,
"step": 19300
},
{
"epoch": 7.3,
"learning_rate": 1.8539706435829884e-05,
"loss": 0.1557,
"step": 19400
},
{
"epoch": 7.34,
"learning_rate": 1.8532179149416637e-05,
"loss": 0.1524,
"step": 19500
},
{
"epoch": 7.38,
"learning_rate": 1.852465186300339e-05,
"loss": 0.1538,
"step": 19600
},
{
"epoch": 7.41,
"learning_rate": 1.851712457659014e-05,
"loss": 0.1533,
"step": 19700
},
{
"epoch": 7.45,
"learning_rate": 1.8509597290176894e-05,
"loss": 0.1506,
"step": 19800
},
{
"epoch": 7.49,
"learning_rate": 1.8502070003763647e-05,
"loss": 0.1499,
"step": 19900
},
{
"epoch": 7.53,
"learning_rate": 1.8494542717350396e-05,
"loss": 0.1514,
"step": 20000
},
{
"epoch": 7.56,
"learning_rate": 1.848701543093715e-05,
"loss": 0.1497,
"step": 20100
},
{
"epoch": 7.6,
"learning_rate": 1.84794881445239e-05,
"loss": 0.151,
"step": 20200
},
{
"epoch": 7.64,
"learning_rate": 1.8471960858110653e-05,
"loss": 0.1523,
"step": 20300
},
{
"epoch": 7.68,
"learning_rate": 1.8464433571697406e-05,
"loss": 0.1484,
"step": 20400
},
{
"epoch": 7.72,
"learning_rate": 1.845690628528416e-05,
"loss": 0.15,
"step": 20500
},
{
"epoch": 7.75,
"learning_rate": 1.8449378998870908e-05,
"loss": 0.1467,
"step": 20600
},
{
"epoch": 7.79,
"learning_rate": 1.844185171245766e-05,
"loss": 0.1467,
"step": 20700
},
{
"epoch": 7.83,
"learning_rate": 1.8434324426044413e-05,
"loss": 0.1469,
"step": 20800
},
{
"epoch": 7.87,
"learning_rate": 1.8426797139631165e-05,
"loss": 0.148,
"step": 20900
},
{
"epoch": 7.9,
"learning_rate": 1.8419269853217914e-05,
"loss": 0.1484,
"step": 21000
},
{
"epoch": 7.94,
"learning_rate": 1.841174256680467e-05,
"loss": 0.1459,
"step": 21100
},
{
"epoch": 7.98,
"learning_rate": 1.840421528039142e-05,
"loss": 0.1481,
"step": 21200
},
{
"epoch": 8.0,
"eval_loss": 0.14529532194137573,
"eval_runtime": 44.2737,
"eval_samples_per_second": 169.401,
"eval_steps_per_second": 10.593,
"step": 21256
},
{
"epoch": 8.02,
"learning_rate": 1.8396687993978172e-05,
"loss": 0.1446,
"step": 21300
},
{
"epoch": 8.05,
"learning_rate": 1.8389160707564924e-05,
"loss": 0.1465,
"step": 21400
},
{
"epoch": 8.09,
"learning_rate": 1.8381633421151677e-05,
"loss": 0.1458,
"step": 21500
},
{
"epoch": 8.13,
"learning_rate": 1.8374106134738426e-05,
"loss": 0.145,
"step": 21600
},
{
"epoch": 8.17,
"learning_rate": 1.8366578848325182e-05,
"loss": 0.1457,
"step": 21700
},
{
"epoch": 8.2,
"learning_rate": 1.835905156191193e-05,
"loss": 0.1422,
"step": 21800
},
{
"epoch": 8.24,
"learning_rate": 1.8351524275498683e-05,
"loss": 0.1435,
"step": 21900
},
{
"epoch": 8.28,
"learning_rate": 1.8343996989085436e-05,
"loss": 0.1456,
"step": 22000
},
{
"epoch": 8.32,
"learning_rate": 1.833646970267219e-05,
"loss": 0.1472,
"step": 22100
},
{
"epoch": 8.36,
"learning_rate": 1.8328942416258938e-05,
"loss": 0.1434,
"step": 22200
},
{
"epoch": 8.39,
"learning_rate": 1.8321415129845694e-05,
"loss": 0.1446,
"step": 22300
},
{
"epoch": 8.43,
"learning_rate": 1.8313887843432446e-05,
"loss": 0.1464,
"step": 22400
},
{
"epoch": 8.47,
"learning_rate": 1.8306360557019195e-05,
"loss": 0.1446,
"step": 22500
},
{
"epoch": 8.51,
"learning_rate": 1.8298833270605948e-05,
"loss": 0.1428,
"step": 22600
},
{
"epoch": 8.54,
"learning_rate": 1.82913059841927e-05,
"loss": 0.1437,
"step": 22700
},
{
"epoch": 8.58,
"learning_rate": 1.8283778697779453e-05,
"loss": 0.1412,
"step": 22800
},
{
"epoch": 8.62,
"learning_rate": 1.8276251411366205e-05,
"loss": 0.1396,
"step": 22900
},
{
"epoch": 8.66,
"learning_rate": 1.8268724124952958e-05,
"loss": 0.1426,
"step": 23000
},
{
"epoch": 8.69,
"learning_rate": 1.8261196838539707e-05,
"loss": 0.1421,
"step": 23100
},
{
"epoch": 8.73,
"learning_rate": 1.825366955212646e-05,
"loss": 0.1418,
"step": 23200
},
{
"epoch": 8.77,
"learning_rate": 1.8246142265713212e-05,
"loss": 0.1403,
"step": 23300
},
{
"epoch": 8.81,
"learning_rate": 1.8238614979299964e-05,
"loss": 0.1416,
"step": 23400
},
{
"epoch": 8.84,
"learning_rate": 1.8231087692886717e-05,
"loss": 0.1403,
"step": 23500
},
{
"epoch": 8.88,
"learning_rate": 1.822356040647347e-05,
"loss": 0.1396,
"step": 23600
},
{
"epoch": 8.92,
"learning_rate": 1.821603312006022e-05,
"loss": 0.1378,
"step": 23700
},
{
"epoch": 8.96,
"learning_rate": 1.820850583364697e-05,
"loss": 0.1392,
"step": 23800
},
{
"epoch": 9.0,
"learning_rate": 1.8200978547233724e-05,
"loss": 0.1391,
"step": 23900
},
{
"epoch": 9.0,
"eval_loss": 0.13683784008026123,
"eval_runtime": 44.0792,
"eval_samples_per_second": 170.148,
"eval_steps_per_second": 10.64,
"step": 23913
},
{
"epoch": 9.03,
"learning_rate": 1.8193451260820476e-05,
"loss": 0.1407,
"step": 24000
},
{
"epoch": 9.07,
"learning_rate": 1.8185923974407225e-05,
"loss": 0.1386,
"step": 24100
},
{
"epoch": 9.11,
"learning_rate": 1.817839668799398e-05,
"loss": 0.1385,
"step": 24200
},
{
"epoch": 9.15,
"learning_rate": 1.817086940158073e-05,
"loss": 0.1403,
"step": 24300
},
{
"epoch": 9.18,
"learning_rate": 1.8163342115167483e-05,
"loss": 0.1395,
"step": 24400
},
{
"epoch": 9.22,
"learning_rate": 1.8155814828754235e-05,
"loss": 0.1374,
"step": 24500
},
{
"epoch": 9.26,
"learning_rate": 1.8148287542340988e-05,
"loss": 0.1354,
"step": 24600
},
{
"epoch": 9.3,
"learning_rate": 1.8140760255927737e-05,
"loss": 0.1367,
"step": 24700
},
{
"epoch": 9.33,
"learning_rate": 1.8133232969514493e-05,
"loss": 0.1389,
"step": 24800
},
{
"epoch": 9.37,
"learning_rate": 1.8125705683101245e-05,
"loss": 0.1355,
"step": 24900
},
{
"epoch": 9.41,
"learning_rate": 1.8118178396687994e-05,
"loss": 0.1359,
"step": 25000
},
{
"epoch": 9.45,
"learning_rate": 1.8110651110274747e-05,
"loss": 0.1351,
"step": 25100
},
{
"epoch": 9.48,
"learning_rate": 1.81031238238615e-05,
"loss": 0.1381,
"step": 25200
},
{
"epoch": 9.52,
"learning_rate": 1.8095596537448252e-05,
"loss": 0.1364,
"step": 25300
},
{
"epoch": 9.56,
"learning_rate": 1.8088069251035005e-05,
"loss": 0.1339,
"step": 25400
},
{
"epoch": 9.6,
"learning_rate": 1.8080541964621757e-05,
"loss": 0.1346,
"step": 25500
},
{
"epoch": 9.63,
"learning_rate": 1.8073014678208506e-05,
"loss": 0.1338,
"step": 25600
},
{
"epoch": 9.67,
"learning_rate": 1.806548739179526e-05,
"loss": 0.1332,
"step": 25700
},
{
"epoch": 9.71,
"learning_rate": 1.805796010538201e-05,
"loss": 0.1344,
"step": 25800
},
{
"epoch": 9.75,
"learning_rate": 1.8050432818968764e-05,
"loss": 0.134,
"step": 25900
},
{
"epoch": 9.79,
"learning_rate": 1.8042905532555516e-05,
"loss": 0.1365,
"step": 26000
},
{
"epoch": 9.82,
"learning_rate": 1.803537824614227e-05,
"loss": 0.1354,
"step": 26100
},
{
"epoch": 9.86,
"learning_rate": 1.8027850959729018e-05,
"loss": 0.1327,
"step": 26200
},
{
"epoch": 9.9,
"learning_rate": 1.802032367331577e-05,
"loss": 0.1366,
"step": 26300
},
{
"epoch": 9.94,
"learning_rate": 1.8012796386902523e-05,
"loss": 0.1343,
"step": 26400
},
{
"epoch": 9.97,
"learning_rate": 1.8005269100489275e-05,
"loss": 0.1348,
"step": 26500
},
{
"epoch": 10.0,
"eval_loss": 0.13540224730968475,
"eval_runtime": 43.8422,
"eval_samples_per_second": 171.068,
"eval_steps_per_second": 10.697,
"step": 26570
},
{
"epoch": 10.01,
"learning_rate": 1.7997741814076028e-05,
"loss": 0.134,
"step": 26600
},
{
"epoch": 10.05,
"learning_rate": 1.799021452766278e-05,
"loss": 0.1327,
"step": 26700
},
{
"epoch": 10.09,
"learning_rate": 1.798268724124953e-05,
"loss": 0.1303,
"step": 26800
},
{
"epoch": 10.12,
"learning_rate": 1.7975159954836282e-05,
"loss": 0.1343,
"step": 26900
},
{
"epoch": 10.16,
"learning_rate": 1.7967632668423035e-05,
"loss": 0.1323,
"step": 27000
},
{
"epoch": 10.2,
"learning_rate": 1.7960105382009787e-05,
"loss": 0.1318,
"step": 27100
},
{
"epoch": 10.24,
"learning_rate": 1.7952578095596536e-05,
"loss": 0.1322,
"step": 27200
},
{
"epoch": 10.27,
"learning_rate": 1.7945050809183292e-05,
"loss": 0.1318,
"step": 27300
},
{
"epoch": 10.31,
"learning_rate": 1.7937523522770045e-05,
"loss": 0.1324,
"step": 27400
},
{
"epoch": 10.35,
"learning_rate": 1.7929996236356794e-05,
"loss": 0.1306,
"step": 27500
},
{
"epoch": 10.39,
"learning_rate": 1.7922468949943546e-05,
"loss": 0.1283,
"step": 27600
},
{
"epoch": 10.43,
"learning_rate": 1.79149416635303e-05,
"loss": 0.1311,
"step": 27700
},
{
"epoch": 10.46,
"learning_rate": 1.790741437711705e-05,
"loss": 0.1313,
"step": 27800
},
{
"epoch": 10.5,
"learning_rate": 1.7899887090703804e-05,
"loss": 0.1291,
"step": 27900
},
{
"epoch": 10.54,
"learning_rate": 1.7892359804290556e-05,
"loss": 0.1291,
"step": 28000
},
{
"epoch": 10.58,
"learning_rate": 1.7884832517877305e-05,
"loss": 0.1314,
"step": 28100
},
{
"epoch": 10.61,
"learning_rate": 1.7877305231464058e-05,
"loss": 0.1294,
"step": 28200
},
{
"epoch": 10.65,
"learning_rate": 1.786977794505081e-05,
"loss": 0.1308,
"step": 28300
},
{
"epoch": 10.69,
"learning_rate": 1.7862250658637563e-05,
"loss": 0.1317,
"step": 28400
},
{
"epoch": 10.73,
"learning_rate": 1.7854723372224315e-05,
"loss": 0.129,
"step": 28500
},
{
"epoch": 10.76,
"learning_rate": 1.7847196085811068e-05,
"loss": 0.1307,
"step": 28600
},
{
"epoch": 10.8,
"learning_rate": 1.7839668799397817e-05,
"loss": 0.1302,
"step": 28700
},
{
"epoch": 10.84,
"learning_rate": 1.783214151298457e-05,
"loss": 0.1283,
"step": 28800
},
{
"epoch": 10.88,
"learning_rate": 1.7824614226571322e-05,
"loss": 0.1277,
"step": 28900
},
{
"epoch": 10.91,
"learning_rate": 1.7817086940158075e-05,
"loss": 0.1287,
"step": 29000
},
{
"epoch": 10.95,
"learning_rate": 1.7809559653744827e-05,
"loss": 0.13,
"step": 29100
},
{
"epoch": 10.99,
"learning_rate": 1.780203236733158e-05,
"loss": 0.129,
"step": 29200
},
{
"epoch": 11.0,
"eval_loss": 0.12486864626407623,
"eval_runtime": 44.5752,
"eval_samples_per_second": 168.255,
"eval_steps_per_second": 10.522,
"step": 29227
},
{
"epoch": 11.03,
"learning_rate": 1.779450508091833e-05,
"loss": 0.1259,
"step": 29300
},
{
"epoch": 11.07,
"learning_rate": 1.778697779450508e-05,
"loss": 0.128,
"step": 29400
},
{
"epoch": 11.1,
"learning_rate": 1.7779450508091834e-05,
"loss": 0.127,
"step": 29500
},
{
"epoch": 11.14,
"learning_rate": 1.7771923221678586e-05,
"loss": 0.1277,
"step": 29600
},
{
"epoch": 11.18,
"learning_rate": 1.776439593526534e-05,
"loss": 0.1271,
"step": 29700
},
{
"epoch": 11.22,
"learning_rate": 1.775686864885209e-05,
"loss": 0.1263,
"step": 29800
},
{
"epoch": 11.25,
"learning_rate": 1.7749341362438844e-05,
"loss": 0.1259,
"step": 29900
},
{
"epoch": 11.29,
"learning_rate": 1.7741814076025593e-05,
"loss": 0.1257,
"step": 30000
},
{
"epoch": 11.33,
"learning_rate": 1.7734286789612346e-05,
"loss": 0.125,
"step": 30100
},
{
"epoch": 11.37,
"learning_rate": 1.7726759503199098e-05,
"loss": 0.128,
"step": 30200
},
{
"epoch": 11.4,
"learning_rate": 1.771923221678585e-05,
"loss": 0.1271,
"step": 30300
},
{
"epoch": 11.44,
"learning_rate": 1.7711704930372603e-05,
"loss": 0.1268,
"step": 30400
},
{
"epoch": 11.48,
"learning_rate": 1.7704177643959356e-05,
"loss": 0.1262,
"step": 30500
},
{
"epoch": 11.52,
"learning_rate": 1.7696650357546105e-05,
"loss": 0.1247,
"step": 30600
},
{
"epoch": 11.55,
"learning_rate": 1.7689123071132857e-05,
"loss": 0.1243,
"step": 30700
},
{
"epoch": 11.59,
"learning_rate": 1.768159578471961e-05,
"loss": 0.1249,
"step": 30800
},
{
"epoch": 11.63,
"learning_rate": 1.7674068498306362e-05,
"loss": 0.1255,
"step": 30900
},
{
"epoch": 11.67,
"learning_rate": 1.7666541211893115e-05,
"loss": 0.1258,
"step": 31000
},
{
"epoch": 11.7,
"learning_rate": 1.7659013925479867e-05,
"loss": 0.1234,
"step": 31100
},
{
"epoch": 11.74,
"learning_rate": 1.7651486639066616e-05,
"loss": 0.1226,
"step": 31200
},
{
"epoch": 11.78,
"learning_rate": 1.764395935265337e-05,
"loss": 0.1251,
"step": 31300
},
{
"epoch": 11.82,
"learning_rate": 1.763643206624012e-05,
"loss": 0.1238,
"step": 31400
},
{
"epoch": 11.86,
"learning_rate": 1.7628904779826874e-05,
"loss": 0.1262,
"step": 31500
},
{
"epoch": 11.89,
"learning_rate": 1.7621377493413626e-05,
"loss": 0.1249,
"step": 31600
},
{
"epoch": 11.93,
"learning_rate": 1.761385020700038e-05,
"loss": 0.1224,
"step": 31700
},
{
"epoch": 11.97,
"learning_rate": 1.7606322920587128e-05,
"loss": 0.126,
"step": 31800
},
{
"epoch": 12.0,
"eval_loss": 0.12289831042289734,
"eval_runtime": 44.279,
"eval_samples_per_second": 169.381,
"eval_steps_per_second": 10.592,
"step": 31884
},
{
"epoch": 12.01,
"learning_rate": 1.759879563417388e-05,
"loss": 0.1247,
"step": 31900
},
{
"epoch": 12.04,
"learning_rate": 1.7591268347760633e-05,
"loss": 0.1224,
"step": 32000
},
{
"epoch": 12.08,
"learning_rate": 1.7583741061347386e-05,
"loss": 0.1225,
"step": 32100
},
{
"epoch": 12.12,
"learning_rate": 1.7576213774934138e-05,
"loss": 0.1247,
"step": 32200
},
{
"epoch": 12.16,
"learning_rate": 1.756868648852089e-05,
"loss": 0.1244,
"step": 32300
},
{
"epoch": 12.19,
"learning_rate": 1.7561159202107643e-05,
"loss": 0.1231,
"step": 32400
},
{
"epoch": 12.23,
"learning_rate": 1.7553631915694392e-05,
"loss": 0.1215,
"step": 32500
},
{
"epoch": 12.27,
"learning_rate": 1.7546104629281148e-05,
"loss": 0.1238,
"step": 32600
},
{
"epoch": 12.31,
"learning_rate": 1.7538577342867897e-05,
"loss": 0.1251,
"step": 32700
},
{
"epoch": 12.34,
"learning_rate": 1.753105005645465e-05,
"loss": 0.1248,
"step": 32800
},
{
"epoch": 12.38,
"learning_rate": 1.7523522770041402e-05,
"loss": 0.1227,
"step": 32900
},
{
"epoch": 12.42,
"learning_rate": 1.7515995483628155e-05,
"loss": 0.1219,
"step": 33000
},
{
"epoch": 12.46,
"learning_rate": 1.7508468197214904e-05,
"loss": 0.121,
"step": 33100
},
{
"epoch": 12.5,
"learning_rate": 1.7500940910801657e-05,
"loss": 0.1225,
"step": 33200
},
{
"epoch": 12.53,
"learning_rate": 1.749341362438841e-05,
"loss": 0.1224,
"step": 33300
},
{
"epoch": 12.57,
"learning_rate": 1.748588633797516e-05,
"loss": 0.1214,
"step": 33400
},
{
"epoch": 12.61,
"learning_rate": 1.7478359051561914e-05,
"loss": 0.1217,
"step": 33500
},
{
"epoch": 12.65,
"learning_rate": 1.7470831765148667e-05,
"loss": 0.1201,
"step": 33600
},
{
"epoch": 12.68,
"learning_rate": 1.7463304478735416e-05,
"loss": 0.1212,
"step": 33700
},
{
"epoch": 12.72,
"learning_rate": 1.7455777192322168e-05,
"loss": 0.1218,
"step": 33800
},
{
"epoch": 12.76,
"learning_rate": 1.744824990590892e-05,
"loss": 0.1221,
"step": 33900
},
{
"epoch": 12.8,
"learning_rate": 1.7440722619495673e-05,
"loss": 0.1199,
"step": 34000
},
{
"epoch": 12.83,
"learning_rate": 1.7433195333082426e-05,
"loss": 0.122,
"step": 34100
},
{
"epoch": 12.87,
"learning_rate": 1.7425668046669178e-05,
"loss": 0.12,
"step": 34200
},
{
"epoch": 12.91,
"learning_rate": 1.7418140760255927e-05,
"loss": 0.1222,
"step": 34300
},
{
"epoch": 12.95,
"learning_rate": 1.741061347384268e-05,
"loss": 0.1228,
"step": 34400
},
{
"epoch": 12.98,
"learning_rate": 1.7403086187429432e-05,
"loss": 0.1216,
"step": 34500
},
{
"epoch": 13.0,
"eval_loss": 0.11841125041246414,
"eval_runtime": 44.6411,
"eval_samples_per_second": 168.006,
"eval_steps_per_second": 10.506,
"step": 34541
},
{
"epoch": 13.02,
"learning_rate": 1.7395558901016185e-05,
"loss": 0.1195,
"step": 34600
},
{
"epoch": 13.06,
"learning_rate": 1.7388031614602937e-05,
"loss": 0.1211,
"step": 34700
},
{
"epoch": 13.1,
"learning_rate": 1.738050432818969e-05,
"loss": 0.1209,
"step": 34800
},
{
"epoch": 13.14,
"learning_rate": 1.7372977041776442e-05,
"loss": 0.122,
"step": 34900
},
{
"epoch": 13.17,
"learning_rate": 1.736544975536319e-05,
"loss": 0.1206,
"step": 35000
},
{
"epoch": 13.21,
"learning_rate": 1.7357922468949947e-05,
"loss": 0.1198,
"step": 35100
},
{
"epoch": 13.25,
"learning_rate": 1.7350395182536697e-05,
"loss": 0.1196,
"step": 35200
},
{
"epoch": 13.29,
"learning_rate": 1.734286789612345e-05,
"loss": 0.1192,
"step": 35300
},
{
"epoch": 13.32,
"learning_rate": 1.73353406097102e-05,
"loss": 0.12,
"step": 35400
},
{
"epoch": 13.36,
"learning_rate": 1.7327813323296954e-05,
"loss": 0.1179,
"step": 35500
},
{
"epoch": 13.4,
"learning_rate": 1.7320286036883703e-05,
"loss": 0.1201,
"step": 35600
},
{
"epoch": 13.44,
"learning_rate": 1.731275875047046e-05,
"loss": 0.1181,
"step": 35700
},
{
"epoch": 13.47,
"learning_rate": 1.730523146405721e-05,
"loss": 0.1185,
"step": 35800
},
{
"epoch": 13.51,
"learning_rate": 1.729770417764396e-05,
"loss": 0.1171,
"step": 35900
},
{
"epoch": 13.55,
"learning_rate": 1.7290176891230713e-05,
"loss": 0.1202,
"step": 36000
},
{
"epoch": 13.59,
"learning_rate": 1.7282649604817466e-05,
"loss": 0.1191,
"step": 36100
},
{
"epoch": 13.62,
"learning_rate": 1.7275122318404215e-05,
"loss": 0.1201,
"step": 36200
},
{
"epoch": 13.66,
"learning_rate": 1.7267595031990967e-05,
"loss": 0.1188,
"step": 36300
},
{
"epoch": 13.7,
"learning_rate": 1.726006774557772e-05,
"loss": 0.1191,
"step": 36400
},
{
"epoch": 13.74,
"learning_rate": 1.7252540459164473e-05,
"loss": 0.117,
"step": 36500
},
{
"epoch": 13.77,
"learning_rate": 1.7245013172751225e-05,
"loss": 0.1177,
"step": 36600
},
{
"epoch": 13.81,
"learning_rate": 1.7237485886337978e-05,
"loss": 0.1173,
"step": 36700
},
{
"epoch": 13.85,
"learning_rate": 1.7229958599924727e-05,
"loss": 0.1176,
"step": 36800
},
{
"epoch": 13.89,
"learning_rate": 1.722243131351148e-05,
"loss": 0.1177,
"step": 36900
},
{
"epoch": 13.93,
"learning_rate": 1.7214904027098232e-05,
"loss": 0.1168,
"step": 37000
},
{
"epoch": 13.96,
"learning_rate": 1.7207376740684984e-05,
"loss": 0.1175,
"step": 37100
},
{
"epoch": 14.0,
"eval_loss": 0.1184767335653305,
"eval_runtime": 44.9958,
"eval_samples_per_second": 166.682,
"eval_steps_per_second": 10.423,
"step": 37198
},
{
"epoch": 14.0,
"learning_rate": 1.7199849454271737e-05,
"loss": 0.1172,
"step": 37200
},
{
"epoch": 14.04,
"learning_rate": 1.719232216785849e-05,
"loss": 0.117,
"step": 37300
},
{
"epoch": 14.08,
"learning_rate": 1.7184794881445242e-05,
"loss": 0.1175,
"step": 37400
},
{
"epoch": 14.11,
"learning_rate": 1.717726759503199e-05,
"loss": 0.1164,
"step": 37500
},
{
"epoch": 14.15,
"learning_rate": 1.7169740308618747e-05,
"loss": 0.1161,
"step": 37600
},
{
"epoch": 14.19,
"learning_rate": 1.7162213022205496e-05,
"loss": 0.1183,
"step": 37700
},
{
"epoch": 14.23,
"learning_rate": 1.715468573579225e-05,
"loss": 0.1161,
"step": 37800
},
{
"epoch": 14.26,
"learning_rate": 1.7147158449379e-05,
"loss": 0.117,
"step": 37900
},
{
"epoch": 14.3,
"learning_rate": 1.7139631162965753e-05,
"loss": 0.1169,
"step": 38000
},
{
"epoch": 14.34,
"learning_rate": 1.7132103876552503e-05,
"loss": 0.1171,
"step": 38100
},
{
"epoch": 14.38,
"learning_rate": 1.712457659013926e-05,
"loss": 0.1165,
"step": 38200
},
{
"epoch": 14.41,
"learning_rate": 1.7117049303726008e-05,
"loss": 0.115,
"step": 38300
},
{
"epoch": 14.45,
"learning_rate": 1.710952201731276e-05,
"loss": 0.1164,
"step": 38400
},
{
"epoch": 14.49,
"learning_rate": 1.7101994730899513e-05,
"loss": 0.1151,
"step": 38500
},
{
"epoch": 14.53,
"learning_rate": 1.7094467444486265e-05,
"loss": 0.1163,
"step": 38600
},
{
"epoch": 14.57,
"learning_rate": 1.7086940158073014e-05,
"loss": 0.1157,
"step": 38700
},
{
"epoch": 14.6,
"learning_rate": 1.7079412871659767e-05,
"loss": 0.1154,
"step": 38800
},
{
"epoch": 14.64,
"learning_rate": 1.707188558524652e-05,
"loss": 0.1145,
"step": 38900
},
{
"epoch": 14.68,
"learning_rate": 1.7064358298833272e-05,
"loss": 0.1153,
"step": 39000
},
{
"epoch": 14.72,
"learning_rate": 1.7056831012420024e-05,
"loss": 0.1163,
"step": 39100
},
{
"epoch": 14.75,
"learning_rate": 1.7049303726006777e-05,
"loss": 0.1173,
"step": 39200
},
{
"epoch": 14.79,
"learning_rate": 1.7041776439593526e-05,
"loss": 0.1161,
"step": 39300
},
{
"epoch": 14.83,
"learning_rate": 1.703424915318028e-05,
"loss": 0.1144,
"step": 39400
},
{
"epoch": 14.87,
"learning_rate": 1.702672186676703e-05,
"loss": 0.1152,
"step": 39500
},
{
"epoch": 14.9,
"learning_rate": 1.7019194580353783e-05,
"loss": 0.1129,
"step": 39600
},
{
"epoch": 14.94,
"learning_rate": 1.7011667293940536e-05,
"loss": 0.1159,
"step": 39700
},
{
"epoch": 14.98,
"learning_rate": 1.700414000752729e-05,
"loss": 0.1137,
"step": 39800
},
{
"epoch": 15.0,
"eval_loss": 0.11463519930839539,
"eval_runtime": 44.7206,
"eval_samples_per_second": 167.708,
"eval_steps_per_second": 10.487,
"step": 39855
},
{
"epoch": 15.02,
"learning_rate": 1.699661272111404e-05,
"loss": 0.1136,
"step": 39900
},
{
"epoch": 15.05,
"learning_rate": 1.698908543470079e-05,
"loss": 0.1154,
"step": 40000
},
{
"epoch": 15.09,
"learning_rate": 1.6981558148287546e-05,
"loss": 0.1119,
"step": 40100
},
{
"epoch": 15.13,
"learning_rate": 1.6974030861874295e-05,
"loss": 0.1147,
"step": 40200
},
{
"epoch": 15.17,
"learning_rate": 1.6966503575461048e-05,
"loss": 0.1133,
"step": 40300
},
{
"epoch": 15.21,
"learning_rate": 1.69589762890478e-05,
"loss": 0.1159,
"step": 40400
},
{
"epoch": 15.24,
"learning_rate": 1.6951449002634553e-05,
"loss": 0.1123,
"step": 40500
},
{
"epoch": 15.28,
"learning_rate": 1.6943921716221302e-05,
"loss": 0.1144,
"step": 40600
},
{
"epoch": 15.32,
"learning_rate": 1.6936394429808058e-05,
"loss": 0.1156,
"step": 40700
},
{
"epoch": 15.36,
"learning_rate": 1.6928867143394807e-05,
"loss": 0.115,
"step": 40800
},
{
"epoch": 15.39,
"learning_rate": 1.692133985698156e-05,
"loss": 0.1129,
"step": 40900
},
{
"epoch": 15.43,
"learning_rate": 1.6913812570568312e-05,
"loss": 0.1136,
"step": 41000
},
{
"epoch": 15.47,
"learning_rate": 1.6906285284155064e-05,
"loss": 0.1127,
"step": 41100
},
{
"epoch": 15.51,
"learning_rate": 1.6898757997741814e-05,
"loss": 0.1119,
"step": 41200
},
{
"epoch": 15.54,
"learning_rate": 1.689123071132857e-05,
"loss": 0.1131,
"step": 41300
},
{
"epoch": 15.58,
"learning_rate": 1.688370342491532e-05,
"loss": 0.1112,
"step": 41400
},
{
"epoch": 15.62,
"learning_rate": 1.687617613850207e-05,
"loss": 0.1149,
"step": 41500
},
{
"epoch": 15.66,
"learning_rate": 1.6868648852088824e-05,
"loss": 0.1133,
"step": 41600
},
{
"epoch": 15.69,
"learning_rate": 1.6861121565675576e-05,
"loss": 0.1123,
"step": 41700
},
{
"epoch": 15.73,
"learning_rate": 1.6853594279262325e-05,
"loss": 0.1138,
"step": 41800
},
{
"epoch": 15.77,
"learning_rate": 1.6846066992849078e-05,
"loss": 0.1134,
"step": 41900
},
{
"epoch": 15.81,
"learning_rate": 1.683853970643583e-05,
"loss": 0.1138,
"step": 42000
},
{
"epoch": 15.84,
"learning_rate": 1.6831012420022583e-05,
"loss": 0.1122,
"step": 42100
},
{
"epoch": 15.88,
"learning_rate": 1.6823485133609335e-05,
"loss": 0.1127,
"step": 42200
},
{
"epoch": 15.92,
"learning_rate": 1.6815957847196088e-05,
"loss": 0.1147,
"step": 42300
},
{
"epoch": 15.96,
"learning_rate": 1.680843056078284e-05,
"loss": 0.1127,
"step": 42400
},
{
"epoch": 16.0,
"learning_rate": 1.680090327436959e-05,
"loss": 0.1125,
"step": 42500
},
{
"epoch": 16.0,
"eval_loss": 0.1117386743426323,
"eval_runtime": 44.2589,
"eval_samples_per_second": 169.457,
"eval_steps_per_second": 10.597,
"step": 42512
},
{
"epoch": 16.03,
"learning_rate": 1.6793375987956345e-05,
"loss": 0.1115,
"step": 42600
},
{
"epoch": 16.07,
"learning_rate": 1.6785848701543094e-05,
"loss": 0.1128,
"step": 42700
},
{
"epoch": 16.11,
"learning_rate": 1.6778321415129847e-05,
"loss": 0.1115,
"step": 42800
},
{
"epoch": 16.15,
"learning_rate": 1.67707941287166e-05,
"loss": 0.1119,
"step": 42900
},
{
"epoch": 16.18,
"learning_rate": 1.6763266842303352e-05,
"loss": 0.1126,
"step": 43000
},
{
"epoch": 16.22,
"learning_rate": 1.67557395558901e-05,
"loss": 0.1127,
"step": 43100
},
{
"epoch": 16.26,
"learning_rate": 1.6748212269476857e-05,
"loss": 0.1145,
"step": 43200
},
{
"epoch": 16.3,
"learning_rate": 1.6740684983063606e-05,
"loss": 0.1118,
"step": 43300
},
{
"epoch": 16.33,
"learning_rate": 1.673315769665036e-05,
"loss": 0.1129,
"step": 43400
},
{
"epoch": 16.37,
"learning_rate": 1.672563041023711e-05,
"loss": 0.113,
"step": 43500
},
{
"epoch": 16.41,
"learning_rate": 1.6718103123823864e-05,
"loss": 0.1129,
"step": 43600
},
{
"epoch": 16.45,
"learning_rate": 1.6710575837410613e-05,
"loss": 0.1098,
"step": 43700
},
{
"epoch": 16.48,
"learning_rate": 1.670304855099737e-05,
"loss": 0.112,
"step": 43800
},
{
"epoch": 16.52,
"learning_rate": 1.6695521264584118e-05,
"loss": 0.1117,
"step": 43900
},
{
"epoch": 16.56,
"learning_rate": 1.668799397817087e-05,
"loss": 0.1128,
"step": 44000
},
{
"epoch": 16.6,
"learning_rate": 1.6680466691757623e-05,
"loss": 0.1133,
"step": 44100
},
{
"epoch": 16.64,
"learning_rate": 1.6672939405344375e-05,
"loss": 0.11,
"step": 44200
},
{
"epoch": 16.67,
"learning_rate": 1.6665412118931125e-05,
"loss": 0.1138,
"step": 44300
},
{
"epoch": 16.71,
"learning_rate": 1.665788483251788e-05,
"loss": 0.1108,
"step": 44400
},
{
"epoch": 16.75,
"learning_rate": 1.665035754610463e-05,
"loss": 0.1096,
"step": 44500
},
{
"epoch": 16.79,
"learning_rate": 1.6642830259691382e-05,
"loss": 0.1092,
"step": 44600
},
{
"epoch": 16.82,
"learning_rate": 1.6635302973278135e-05,
"loss": 0.1106,
"step": 44700
},
{
"epoch": 16.86,
"learning_rate": 1.6627775686864887e-05,
"loss": 0.1118,
"step": 44800
},
{
"epoch": 16.9,
"learning_rate": 1.662024840045164e-05,
"loss": 0.1116,
"step": 44900
},
{
"epoch": 16.94,
"learning_rate": 1.661272111403839e-05,
"loss": 0.1095,
"step": 45000
},
{
"epoch": 16.97,
"learning_rate": 1.6605193827625145e-05,
"loss": 0.1112,
"step": 45100
},
{
"epoch": 17.0,
"eval_loss": 0.10999125987291336,
"eval_runtime": 44.3431,
"eval_samples_per_second": 169.136,
"eval_steps_per_second": 10.577,
"step": 45169
},
{
"epoch": 17.01,
"learning_rate": 1.6597666541211894e-05,
"loss": 0.11,
"step": 45200
},
{
"epoch": 17.05,
"learning_rate": 1.6590139254798646e-05,
"loss": 0.1107,
"step": 45300
},
{
"epoch": 17.09,
"learning_rate": 1.65826119683854e-05,
"loss": 0.1127,
"step": 45400
},
{
"epoch": 17.12,
"learning_rate": 1.657508468197215e-05,
"loss": 0.1106,
"step": 45500
},
{
"epoch": 17.16,
"learning_rate": 1.65675573955589e-05,
"loss": 0.1098,
"step": 45600
},
{
"epoch": 17.2,
"learning_rate": 1.6560030109145656e-05,
"loss": 0.111,
"step": 45700
},
{
"epoch": 17.24,
"learning_rate": 1.6552502822732405e-05,
"loss": 0.1094,
"step": 45800
},
{
"epoch": 17.28,
"learning_rate": 1.6544975536319158e-05,
"loss": 0.1098,
"step": 45900
},
{
"epoch": 17.31,
"learning_rate": 1.653744824990591e-05,
"loss": 0.11,
"step": 46000
},
{
"epoch": 17.35,
"learning_rate": 1.6529920963492663e-05,
"loss": 0.1101,
"step": 46100
},
{
"epoch": 17.39,
"learning_rate": 1.6522393677079412e-05,
"loss": 0.1113,
"step": 46200
},
{
"epoch": 17.43,
"learning_rate": 1.6514866390666168e-05,
"loss": 0.112,
"step": 46300
},
{
"epoch": 17.46,
"learning_rate": 1.6507339104252917e-05,
"loss": 0.1109,
"step": 46400
},
{
"epoch": 17.5,
"learning_rate": 1.649981181783967e-05,
"loss": 0.1108,
"step": 46500
},
{
"epoch": 17.54,
"learning_rate": 1.6492284531426422e-05,
"loss": 0.1092,
"step": 46600
},
{
"epoch": 17.58,
"learning_rate": 1.6484757245013175e-05,
"loss": 0.1097,
"step": 46700
},
{
"epoch": 17.61,
"learning_rate": 1.6477229958599924e-05,
"loss": 0.1098,
"step": 46800
},
{
"epoch": 17.65,
"learning_rate": 1.646970267218668e-05,
"loss": 0.1081,
"step": 46900
},
{
"epoch": 17.69,
"learning_rate": 1.646217538577343e-05,
"loss": 0.1085,
"step": 47000
},
{
"epoch": 17.73,
"learning_rate": 1.645464809936018e-05,
"loss": 0.1089,
"step": 47100
},
{
"epoch": 17.76,
"learning_rate": 1.6447120812946934e-05,
"loss": 0.1084,
"step": 47200
},
{
"epoch": 17.8,
"learning_rate": 1.6439593526533686e-05,
"loss": 0.1093,
"step": 47300
},
{
"epoch": 17.84,
"learning_rate": 1.643206624012044e-05,
"loss": 0.1115,
"step": 47400
},
{
"epoch": 17.88,
"learning_rate": 1.642453895370719e-05,
"loss": 0.1091,
"step": 47500
},
{
"epoch": 17.91,
"learning_rate": 1.6417011667293944e-05,
"loss": 0.11,
"step": 47600
},
{
"epoch": 17.95,
"learning_rate": 1.6409484380880693e-05,
"loss": 0.1096,
"step": 47700
},
{
"epoch": 17.99,
"learning_rate": 1.6401957094467446e-05,
"loss": 0.1108,
"step": 47800
},
{
"epoch": 18.0,
"eval_loss": 0.10891053080558777,
"eval_runtime": 44.4735,
"eval_samples_per_second": 168.64,
"eval_steps_per_second": 10.546,
"step": 47826
},
{
"epoch": 18.03,
"learning_rate": 1.6394429808054198e-05,
"loss": 0.1076,
"step": 47900
},
{
"epoch": 18.07,
"learning_rate": 1.638690252164095e-05,
"loss": 0.1092,
"step": 48000
},
{
"epoch": 18.1,
"learning_rate": 1.63793752352277e-05,
"loss": 0.1081,
"step": 48100
},
{
"epoch": 18.14,
"learning_rate": 1.6371847948814456e-05,
"loss": 0.1089,
"step": 48200
},
{
"epoch": 18.18,
"learning_rate": 1.6364320662401205e-05,
"loss": 0.1097,
"step": 48300
},
{
"epoch": 18.22,
"learning_rate": 1.6356793375987957e-05,
"loss": 0.11,
"step": 48400
},
{
"epoch": 18.25,
"learning_rate": 1.634926608957471e-05,
"loss": 0.107,
"step": 48500
},
{
"epoch": 18.29,
"learning_rate": 1.6341738803161462e-05,
"loss": 0.109,
"step": 48600
},
{
"epoch": 18.33,
"learning_rate": 1.633421151674821e-05,
"loss": 0.1092,
"step": 48700
},
{
"epoch": 18.37,
"learning_rate": 1.6326684230334967e-05,
"loss": 0.109,
"step": 48800
},
{
"epoch": 18.4,
"learning_rate": 1.6319156943921716e-05,
"loss": 0.1068,
"step": 48900
},
{
"epoch": 18.44,
"learning_rate": 1.631162965750847e-05,
"loss": 0.1091,
"step": 49000
},
{
"epoch": 18.48,
"learning_rate": 1.630410237109522e-05,
"loss": 0.1087,
"step": 49100
},
{
"epoch": 18.52,
"learning_rate": 1.6296575084681974e-05,
"loss": 0.1078,
"step": 49200
},
{
"epoch": 18.55,
"learning_rate": 1.6289047798268723e-05,
"loss": 0.109,
"step": 49300
},
{
"epoch": 18.59,
"learning_rate": 1.628152051185548e-05,
"loss": 0.1112,
"step": 49400
},
{
"epoch": 18.63,
"learning_rate": 1.6273993225442228e-05,
"loss": 0.1098,
"step": 49500
},
{
"epoch": 18.67,
"learning_rate": 1.626646593902898e-05,
"loss": 0.11,
"step": 49600
},
{
"epoch": 18.71,
"learning_rate": 1.6258938652615733e-05,
"loss": 0.1085,
"step": 49700
},
{
"epoch": 18.74,
"learning_rate": 1.6251411366202486e-05,
"loss": 0.1088,
"step": 49800
},
{
"epoch": 18.78,
"learning_rate": 1.6243884079789238e-05,
"loss": 0.1093,
"step": 49900
},
{
"epoch": 18.82,
"learning_rate": 1.623635679337599e-05,
"loss": 0.1069,
"step": 50000
},
{
"epoch": 18.86,
"learning_rate": 1.6228829506962743e-05,
"loss": 0.1082,
"step": 50100
},
{
"epoch": 18.89,
"learning_rate": 1.6221302220549492e-05,
"loss": 0.1093,
"step": 50200
},
{
"epoch": 18.93,
"learning_rate": 1.6213774934136245e-05,
"loss": 0.1074,
"step": 50300
},
{
"epoch": 18.97,
"learning_rate": 1.6206247647722997e-05,
"loss": 0.1061,
"step": 50400
},
{
"epoch": 19.0,
"eval_loss": 0.10703522711992264,
"eval_runtime": 45.2943,
"eval_samples_per_second": 165.584,
"eval_steps_per_second": 10.354,
"step": 50483
},
{
"epoch": 19.01,
"learning_rate": 1.619872036130975e-05,
"loss": 0.1082,
"step": 50500
},
{
"epoch": 19.04,
"learning_rate": 1.6191193074896502e-05,
"loss": 0.1093,
"step": 50600
},
{
"epoch": 19.08,
"learning_rate": 1.6183665788483255e-05,
"loss": 0.1078,
"step": 50700
},
{
"epoch": 19.12,
"learning_rate": 1.6176138502070004e-05,
"loss": 0.1069,
"step": 50800
},
{
"epoch": 19.16,
"learning_rate": 1.6168611215656757e-05,
"loss": 0.1092,
"step": 50900
},
{
"epoch": 19.19,
"learning_rate": 1.616108392924351e-05,
"loss": 0.1064,
"step": 51000
},
{
"epoch": 19.23,
"learning_rate": 1.615355664283026e-05,
"loss": 0.1063,
"step": 51100
},
{
"epoch": 19.27,
"learning_rate": 1.614602935641701e-05,
"loss": 0.1071,
"step": 51200
},
{
"epoch": 19.31,
"learning_rate": 1.6138502070003767e-05,
"loss": 0.1083,
"step": 51300
},
{
"epoch": 19.35,
"learning_rate": 1.6130974783590516e-05,
"loss": 0.1079,
"step": 51400
},
{
"epoch": 19.38,
"learning_rate": 1.6123447497177268e-05,
"loss": 0.1081,
"step": 51500
},
{
"epoch": 19.42,
"learning_rate": 1.611592021076402e-05,
"loss": 0.1086,
"step": 51600
},
{
"epoch": 19.46,
"learning_rate": 1.6108392924350773e-05,
"loss": 0.1073,
"step": 51700
},
{
"epoch": 19.5,
"learning_rate": 1.6100865637937522e-05,
"loss": 0.1082,
"step": 51800
},
{
"epoch": 19.53,
"learning_rate": 1.6093338351524278e-05,
"loss": 0.1081,
"step": 51900
},
{
"epoch": 19.57,
"learning_rate": 1.6085811065111027e-05,
"loss": 0.1081,
"step": 52000
},
{
"epoch": 19.61,
"learning_rate": 1.607828377869778e-05,
"loss": 0.1077,
"step": 52100
},
{
"epoch": 19.65,
"learning_rate": 1.6070756492284532e-05,
"loss": 0.108,
"step": 52200
},
{
"epoch": 19.68,
"learning_rate": 1.6063229205871285e-05,
"loss": 0.107,
"step": 52300
},
{
"epoch": 19.72,
"learning_rate": 1.6055701919458037e-05,
"loss": 0.1082,
"step": 52400
},
{
"epoch": 19.76,
"learning_rate": 1.604817463304479e-05,
"loss": 0.1082,
"step": 52500
},
{
"epoch": 19.8,
"learning_rate": 1.6040647346631542e-05,
"loss": 0.1067,
"step": 52600
},
{
"epoch": 19.83,
"learning_rate": 1.603312006021829e-05,
"loss": 0.1071,
"step": 52700
},
{
"epoch": 19.87,
"learning_rate": 1.6025592773805044e-05,
"loss": 0.1068,
"step": 52800
},
{
"epoch": 19.91,
"learning_rate": 1.6018065487391797e-05,
"loss": 0.1086,
"step": 52900
},
{
"epoch": 19.95,
"learning_rate": 1.601053820097855e-05,
"loss": 0.1085,
"step": 53000
},
{
"epoch": 19.98,
"learning_rate": 1.60030109145653e-05,
"loss": 0.1073,
"step": 53100
},
{
"epoch": 20.0,
"eval_loss": 0.10757213830947876,
"eval_runtime": 45.7115,
"eval_samples_per_second": 164.072,
"eval_steps_per_second": 10.26,
"step": 53140
},
{
"epoch": 20.02,
"learning_rate": 1.5995483628152054e-05,
"loss": 0.1084,
"step": 53200
},
{
"epoch": 20.06,
"learning_rate": 1.5987956341738803e-05,
"loss": 0.1089,
"step": 53300
},
{
"epoch": 20.1,
"learning_rate": 1.5980429055325556e-05,
"loss": 0.1085,
"step": 53400
},
{
"epoch": 20.14,
"learning_rate": 1.597290176891231e-05,
"loss": 0.1092,
"step": 53500
},
{
"epoch": 20.17,
"learning_rate": 1.596537448249906e-05,
"loss": 0.1073,
"step": 53600
},
{
"epoch": 20.21,
"learning_rate": 1.5957847196085813e-05,
"loss": 0.1087,
"step": 53700
},
{
"epoch": 20.25,
"learning_rate": 1.5950319909672566e-05,
"loss": 0.1071,
"step": 53800
},
{
"epoch": 20.29,
"learning_rate": 1.5942792623259315e-05,
"loss": 0.1061,
"step": 53900
},
{
"epoch": 20.32,
"learning_rate": 1.5935265336846068e-05,
"loss": 0.1055,
"step": 54000
},
{
"epoch": 20.36,
"learning_rate": 1.592773805043282e-05,
"loss": 0.1077,
"step": 54100
},
{
"epoch": 20.4,
"learning_rate": 1.5920210764019573e-05,
"loss": 0.108,
"step": 54200
},
{
"epoch": 20.44,
"learning_rate": 1.591268347760632e-05,
"loss": 0.1075,
"step": 54300
},
{
"epoch": 20.47,
"learning_rate": 1.5905156191193078e-05,
"loss": 0.1066,
"step": 54400
},
{
"epoch": 20.51,
"learning_rate": 1.5897628904779827e-05,
"loss": 0.1055,
"step": 54500
},
{
"epoch": 20.55,
"learning_rate": 1.589010161836658e-05,
"loss": 0.1069,
"step": 54600
},
{
"epoch": 20.59,
"learning_rate": 1.5882574331953332e-05,
"loss": 0.1068,
"step": 54700
},
{
"epoch": 20.62,
"learning_rate": 1.5875047045540084e-05,
"loss": 0.1065,
"step": 54800
},
{
"epoch": 20.66,
"learning_rate": 1.5867519759126837e-05,
"loss": 0.1053,
"step": 54900
},
{
"epoch": 20.7,
"learning_rate": 1.585999247271359e-05,
"loss": 0.1055,
"step": 55000
},
{
"epoch": 20.74,
"learning_rate": 1.5852465186300342e-05,
"loss": 0.107,
"step": 55100
},
{
"epoch": 20.78,
"learning_rate": 1.584493789988709e-05,
"loss": 0.1058,
"step": 55200
},
{
"epoch": 20.81,
"learning_rate": 1.5837410613473843e-05,
"loss": 0.1088,
"step": 55300
},
{
"epoch": 20.85,
"learning_rate": 1.5829883327060596e-05,
"loss": 0.1061,
"step": 55400
},
{
"epoch": 20.89,
"learning_rate": 1.582235604064735e-05,
"loss": 0.1066,
"step": 55500
},
{
"epoch": 20.93,
"learning_rate": 1.58148287542341e-05,
"loss": 0.1071,
"step": 55600
},
{
"epoch": 20.96,
"learning_rate": 1.5807301467820853e-05,
"loss": 0.1066,
"step": 55700
},
{
"epoch": 21.0,
"eval_loss": 0.1060996800661087,
"eval_runtime": 45.0415,
"eval_samples_per_second": 166.513,
"eval_steps_per_second": 10.413,
"step": 55797
},
{
"epoch": 21.0,
"learning_rate": 1.5799774181407603e-05,
"loss": 0.1054,
"step": 55800
},
{
"epoch": 21.04,
"learning_rate": 1.5792246894994355e-05,
"loss": 0.1053,
"step": 55900
},
{
"epoch": 21.08,
"learning_rate": 1.5784719608581108e-05,
"loss": 0.1055,
"step": 56000
},
{
"epoch": 21.11,
"learning_rate": 1.577719232216786e-05,
"loss": 0.1056,
"step": 56100
},
{
"epoch": 21.15,
"learning_rate": 1.5769665035754613e-05,
"loss": 0.106,
"step": 56200
},
{
"epoch": 21.19,
"learning_rate": 1.5762137749341365e-05,
"loss": 0.106,
"step": 56300
},
{
"epoch": 21.23,
"learning_rate": 1.5754610462928114e-05,
"loss": 0.1056,
"step": 56400
},
{
"epoch": 21.26,
"learning_rate": 1.5747083176514867e-05,
"loss": 0.1069,
"step": 56500
},
{
"epoch": 21.3,
"learning_rate": 1.573955589010162e-05,
"loss": 0.1054,
"step": 56600
},
{
"epoch": 21.34,
"learning_rate": 1.5732028603688372e-05,
"loss": 0.1087,
"step": 56700
},
{
"epoch": 21.38,
"learning_rate": 1.5724501317275124e-05,
"loss": 0.1085,
"step": 56800
},
{
"epoch": 21.42,
"learning_rate": 1.5716974030861877e-05,
"loss": 0.1052,
"step": 56900
},
{
"epoch": 21.45,
"learning_rate": 1.5709446744448626e-05,
"loss": 0.1072,
"step": 57000
},
{
"epoch": 21.49,
"learning_rate": 1.570191945803538e-05,
"loss": 0.1055,
"step": 57100
},
{
"epoch": 21.53,
"learning_rate": 1.569439217162213e-05,
"loss": 0.1067,
"step": 57200
},
{
"epoch": 21.57,
"learning_rate": 1.5686864885208884e-05,
"loss": 0.1057,
"step": 57300
},
{
"epoch": 21.6,
"learning_rate": 1.5679337598795636e-05,
"loss": 0.1057,
"step": 57400
},
{
"epoch": 21.64,
"learning_rate": 1.567181031238239e-05,
"loss": 0.106,
"step": 57500
},
{
"epoch": 21.68,
"learning_rate": 1.566428302596914e-05,
"loss": 0.1022,
"step": 57600
},
{
"epoch": 21.72,
"learning_rate": 1.565675573955589e-05,
"loss": 0.1066,
"step": 57700
},
{
"epoch": 21.75,
"learning_rate": 1.5649228453142643e-05,
"loss": 0.1063,
"step": 57800
},
{
"epoch": 21.79,
"learning_rate": 1.5641701166729395e-05,
"loss": 0.1072,
"step": 57900
},
{
"epoch": 21.83,
"learning_rate": 1.5634173880316148e-05,
"loss": 0.1044,
"step": 58000
},
{
"epoch": 21.87,
"learning_rate": 1.56266465939029e-05,
"loss": 0.1071,
"step": 58100
},
{
"epoch": 21.9,
"learning_rate": 1.5619119307489653e-05,
"loss": 0.1058,
"step": 58200
},
{
"epoch": 21.94,
"learning_rate": 1.5611592021076402e-05,
"loss": 0.1072,
"step": 58300
},
{
"epoch": 21.98,
"learning_rate": 1.5604064734663154e-05,
"loss": 0.1065,
"step": 58400
},
{
"epoch": 22.0,
"eval_loss": 0.10562047362327576,
"eval_runtime": 44.6592,
"eval_samples_per_second": 167.938,
"eval_steps_per_second": 10.502,
"step": 58454
},
{
"epoch": 22.02,
"learning_rate": 1.5596537448249907e-05,
"loss": 0.1042,
"step": 58500
},
{
"epoch": 22.05,
"learning_rate": 1.558901016183666e-05,
"loss": 0.1051,
"step": 58600
},
{
"epoch": 22.09,
"learning_rate": 1.5581482875423412e-05,
"loss": 0.1075,
"step": 58700
},
{
"epoch": 22.13,
"learning_rate": 1.5573955589010164e-05,
"loss": 0.1052,
"step": 58800
},
{
"epoch": 22.17,
"learning_rate": 1.5566428302596914e-05,
"loss": 0.1039,
"step": 58900
},
{
"epoch": 22.21,
"learning_rate": 1.5558901016183666e-05,
"loss": 0.1063,
"step": 59000
},
{
"epoch": 22.24,
"learning_rate": 1.555137372977042e-05,
"loss": 0.1048,
"step": 59100
},
{
"epoch": 22.28,
"learning_rate": 1.554384644335717e-05,
"loss": 0.1056,
"step": 59200
},
{
"epoch": 22.32,
"learning_rate": 1.5536319156943924e-05,
"loss": 0.1064,
"step": 59300
},
{
"epoch": 22.36,
"learning_rate": 1.5528791870530676e-05,
"loss": 0.1055,
"step": 59400
},
{
"epoch": 22.39,
"learning_rate": 1.5521264584117425e-05,
"loss": 0.1065,
"step": 59500
},
{
"epoch": 22.43,
"learning_rate": 1.5513737297704178e-05,
"loss": 0.1063,
"step": 59600
},
{
"epoch": 22.47,
"learning_rate": 1.550621001129093e-05,
"loss": 0.1045,
"step": 59700
},
{
"epoch": 22.51,
"learning_rate": 1.5498682724877683e-05,
"loss": 0.1041,
"step": 59800
},
{
"epoch": 22.54,
"learning_rate": 1.5491155438464435e-05,
"loss": 0.1057,
"step": 59900
},
{
"epoch": 22.58,
"learning_rate": 1.5483628152051188e-05,
"loss": 0.1048,
"step": 60000
},
{
"epoch": 22.62,
"learning_rate": 1.547610086563794e-05,
"loss": 0.1055,
"step": 60100
},
{
"epoch": 22.66,
"learning_rate": 1.546857357922469e-05,
"loss": 0.1021,
"step": 60200
},
{
"epoch": 22.69,
"learning_rate": 1.5461046292811442e-05,
"loss": 0.1044,
"step": 60300
},
{
"epoch": 22.73,
"learning_rate": 1.5453519006398194e-05,
"loss": 0.1056,
"step": 60400
},
{
"epoch": 22.77,
"learning_rate": 1.5445991719984947e-05,
"loss": 0.1057,
"step": 60500
},
{
"epoch": 22.81,
"learning_rate": 1.54384644335717e-05,
"loss": 0.1058,
"step": 60600
},
{
"epoch": 22.85,
"learning_rate": 1.5430937147158452e-05,
"loss": 0.1059,
"step": 60700
},
{
"epoch": 22.88,
"learning_rate": 1.54234098607452e-05,
"loss": 0.1058,
"step": 60800
},
{
"epoch": 22.92,
"learning_rate": 1.5415882574331954e-05,
"loss": 0.1052,
"step": 60900
},
{
"epoch": 22.96,
"learning_rate": 1.5408355287918706e-05,
"loss": 0.1062,
"step": 61000
},
{
"epoch": 23.0,
"learning_rate": 1.540082800150546e-05,
"loss": 0.1045,
"step": 61100
},
{
"epoch": 23.0,
"eval_loss": 0.10369115322828293,
"eval_runtime": 44.6965,
"eval_samples_per_second": 167.798,
"eval_steps_per_second": 10.493,
"step": 61111
},
{
"epoch": 23.03,
"learning_rate": 1.539330071509221e-05,
"loss": 0.1056,
"step": 61200
},
{
"epoch": 23.07,
"learning_rate": 1.5385773428678964e-05,
"loss": 0.1034,
"step": 61300
},
{
"epoch": 23.11,
"learning_rate": 1.5378246142265713e-05,
"loss": 0.1056,
"step": 61400
},
{
"epoch": 23.15,
"learning_rate": 1.5370718855852465e-05,
"loss": 0.1055,
"step": 61500
},
{
"epoch": 23.18,
"learning_rate": 1.5363191569439218e-05,
"loss": 0.1053,
"step": 61600
},
{
"epoch": 23.22,
"learning_rate": 1.535566428302597e-05,
"loss": 0.1049,
"step": 61700
},
{
"epoch": 23.26,
"learning_rate": 1.5348136996612723e-05,
"loss": 0.1062,
"step": 61800
},
{
"epoch": 23.3,
"learning_rate": 1.5340609710199475e-05,
"loss": 0.1038,
"step": 61900
},
{
"epoch": 23.33,
"learning_rate": 1.5333082423786225e-05,
"loss": 0.1048,
"step": 62000
},
{
"epoch": 23.37,
"learning_rate": 1.5325555137372977e-05,
"loss": 0.1054,
"step": 62100
},
{
"epoch": 23.41,
"learning_rate": 1.5318027850959733e-05,
"loss": 0.1041,
"step": 62200
},
{
"epoch": 23.45,
"learning_rate": 1.5310500564546482e-05,
"loss": 0.1048,
"step": 62300
},
{
"epoch": 23.49,
"learning_rate": 1.5302973278133235e-05,
"loss": 0.105,
"step": 62400
},
{
"epoch": 23.52,
"learning_rate": 1.5295445991719987e-05,
"loss": 0.1023,
"step": 62500
},
{
"epoch": 23.56,
"learning_rate": 1.528791870530674e-05,
"loss": 0.105,
"step": 62600
},
{
"epoch": 23.6,
"learning_rate": 1.528039141889349e-05,
"loss": 0.1046,
"step": 62700
},
{
"epoch": 23.64,
"learning_rate": 1.527286413248024e-05,
"loss": 0.1033,
"step": 62800
},
{
"epoch": 23.67,
"learning_rate": 1.5265336846066994e-05,
"loss": 0.1057,
"step": 62900
},
{
"epoch": 23.71,
"learning_rate": 1.5257809559653746e-05,
"loss": 0.1056,
"step": 63000
},
{
"epoch": 23.75,
"learning_rate": 1.5250282273240497e-05,
"loss": 0.1056,
"step": 63100
},
{
"epoch": 23.79,
"learning_rate": 1.5242754986827251e-05,
"loss": 0.1032,
"step": 63200
},
{
"epoch": 23.82,
"learning_rate": 1.5235227700414002e-05,
"loss": 0.1025,
"step": 63300
},
{
"epoch": 23.86,
"learning_rate": 1.5227700414000755e-05,
"loss": 0.1063,
"step": 63400
},
{
"epoch": 23.9,
"learning_rate": 1.5220173127587505e-05,
"loss": 0.1035,
"step": 63500
},
{
"epoch": 23.94,
"learning_rate": 1.5212645841174258e-05,
"loss": 0.104,
"step": 63600
},
{
"epoch": 23.97,
"learning_rate": 1.5205118554761009e-05,
"loss": 0.1052,
"step": 63700
},
{
"epoch": 24.0,
"eval_loss": 0.10549741983413696,
"eval_runtime": 43.683,
"eval_samples_per_second": 171.691,
"eval_steps_per_second": 10.736,
"step": 63768
},
{
"epoch": 24.01,
"learning_rate": 1.5197591268347763e-05,
"loss": 0.1051,
"step": 63800
},
{
"epoch": 24.05,
"learning_rate": 1.5190063981934514e-05,
"loss": 0.1031,
"step": 63900
},
{
"epoch": 24.09,
"learning_rate": 1.5182536695521266e-05,
"loss": 0.1057,
"step": 64000
},
{
"epoch": 24.12,
"learning_rate": 1.5175009409108017e-05,
"loss": 0.1048,
"step": 64100
},
{
"epoch": 24.16,
"learning_rate": 1.516748212269477e-05,
"loss": 0.1054,
"step": 64200
},
{
"epoch": 24.2,
"learning_rate": 1.515995483628152e-05,
"loss": 0.1042,
"step": 64300
},
{
"epoch": 24.24,
"learning_rate": 1.5152427549868273e-05,
"loss": 0.1049,
"step": 64400
},
{
"epoch": 24.28,
"learning_rate": 1.5144900263455024e-05,
"loss": 0.1039,
"step": 64500
},
{
"epoch": 24.31,
"learning_rate": 1.5137372977041778e-05,
"loss": 0.1039,
"step": 64600
},
{
"epoch": 24.35,
"learning_rate": 1.512984569062853e-05,
"loss": 0.104,
"step": 64700
},
{
"epoch": 24.39,
"learning_rate": 1.5122318404215281e-05,
"loss": 0.1039,
"step": 64800
},
{
"epoch": 24.43,
"learning_rate": 1.5114791117802034e-05,
"loss": 0.1031,
"step": 64900
},
{
"epoch": 24.46,
"learning_rate": 1.5107263831388785e-05,
"loss": 0.1019,
"step": 65000
},
{
"epoch": 24.5,
"learning_rate": 1.5099736544975539e-05,
"loss": 0.1041,
"step": 65100
},
{
"epoch": 24.54,
"learning_rate": 1.509220925856229e-05,
"loss": 0.1049,
"step": 65200
},
{
"epoch": 24.58,
"learning_rate": 1.5084681972149042e-05,
"loss": 0.1029,
"step": 65300
},
{
"epoch": 24.61,
"learning_rate": 1.5077154685735793e-05,
"loss": 0.105,
"step": 65400
},
{
"epoch": 24.65,
"learning_rate": 1.5069627399322546e-05,
"loss": 0.1041,
"step": 65500
},
{
"epoch": 24.69,
"learning_rate": 1.5062100112909296e-05,
"loss": 0.1032,
"step": 65600
},
{
"epoch": 24.73,
"learning_rate": 1.505457282649605e-05,
"loss": 0.1033,
"step": 65700
},
{
"epoch": 24.76,
"learning_rate": 1.5047045540082801e-05,
"loss": 0.1036,
"step": 65800
},
{
"epoch": 24.8,
"learning_rate": 1.5039518253669554e-05,
"loss": 0.1031,
"step": 65900
},
{
"epoch": 24.84,
"learning_rate": 1.5031990967256305e-05,
"loss": 0.1031,
"step": 66000
},
{
"epoch": 24.88,
"learning_rate": 1.5024463680843057e-05,
"loss": 0.103,
"step": 66100
},
{
"epoch": 24.92,
"learning_rate": 1.5016936394429808e-05,
"loss": 0.103,
"step": 66200
},
{
"epoch": 24.95,
"learning_rate": 1.5009409108016562e-05,
"loss": 0.103,
"step": 66300
},
{
"epoch": 24.99,
"learning_rate": 1.5001881821603313e-05,
"loss": 0.102,
"step": 66400
},
{
"epoch": 25.0,
"eval_loss": 0.10278935730457306,
"eval_runtime": 43.9205,
"eval_samples_per_second": 170.763,
"eval_steps_per_second": 10.678,
"step": 66425
},
{
"epoch": 25.03,
"learning_rate": 1.4994354535190066e-05,
"loss": 0.1033,
"step": 66500
},
{
"epoch": 25.07,
"learning_rate": 1.4986827248776816e-05,
"loss": 0.1022,
"step": 66600
},
{
"epoch": 25.1,
"learning_rate": 1.4979299962363569e-05,
"loss": 0.1028,
"step": 66700
},
{
"epoch": 25.14,
"learning_rate": 1.497177267595032e-05,
"loss": 0.1044,
"step": 66800
},
{
"epoch": 25.18,
"learning_rate": 1.4964245389537074e-05,
"loss": 0.1038,
"step": 66900
},
{
"epoch": 25.22,
"learning_rate": 1.4956718103123825e-05,
"loss": 0.1052,
"step": 67000
},
{
"epoch": 25.25,
"learning_rate": 1.4949190816710577e-05,
"loss": 0.1039,
"step": 67100
},
{
"epoch": 25.29,
"learning_rate": 1.494166353029733e-05,
"loss": 0.1039,
"step": 67200
},
{
"epoch": 25.33,
"learning_rate": 1.493413624388408e-05,
"loss": 0.1025,
"step": 67300
},
{
"epoch": 25.37,
"learning_rate": 1.4926608957470833e-05,
"loss": 0.1037,
"step": 67400
},
{
"epoch": 25.4,
"learning_rate": 1.4919081671057584e-05,
"loss": 0.1019,
"step": 67500
},
{
"epoch": 25.44,
"learning_rate": 1.4911554384644338e-05,
"loss": 0.1027,
"step": 67600
},
{
"epoch": 25.48,
"learning_rate": 1.4904027098231089e-05,
"loss": 0.1037,
"step": 67700
},
{
"epoch": 25.52,
"learning_rate": 1.4896499811817842e-05,
"loss": 0.1031,
"step": 67800
},
{
"epoch": 25.56,
"learning_rate": 1.4888972525404592e-05,
"loss": 0.1035,
"step": 67900
},
{
"epoch": 25.59,
"learning_rate": 1.4881445238991345e-05,
"loss": 0.1031,
"step": 68000
},
{
"epoch": 25.63,
"learning_rate": 1.4873917952578096e-05,
"loss": 0.1034,
"step": 68100
},
{
"epoch": 25.67,
"learning_rate": 1.486639066616485e-05,
"loss": 0.1037,
"step": 68200
},
{
"epoch": 25.71,
"learning_rate": 1.48588633797516e-05,
"loss": 0.104,
"step": 68300
},
{
"epoch": 25.74,
"learning_rate": 1.4851336093338353e-05,
"loss": 0.1036,
"step": 68400
},
{
"epoch": 25.78,
"learning_rate": 1.4843808806925104e-05,
"loss": 0.1031,
"step": 68500
},
{
"epoch": 25.82,
"learning_rate": 1.4836281520511857e-05,
"loss": 0.1027,
"step": 68600
},
{
"epoch": 25.86,
"learning_rate": 1.4828754234098607e-05,
"loss": 0.1036,
"step": 68700
},
{
"epoch": 25.89,
"learning_rate": 1.4821226947685362e-05,
"loss": 0.1023,
"step": 68800
},
{
"epoch": 25.93,
"learning_rate": 1.4813699661272112e-05,
"loss": 0.1015,
"step": 68900
},
{
"epoch": 25.97,
"learning_rate": 1.4806172374858865e-05,
"loss": 0.1025,
"step": 69000
},
{
"epoch": 26.0,
"eval_loss": 0.10342206060886383,
"eval_runtime": 45.4198,
"eval_samples_per_second": 165.126,
"eval_steps_per_second": 10.326,
"step": 69082
},
{
"epoch": 26.01,
"learning_rate": 1.4798645088445616e-05,
"loss": 0.1011,
"step": 69100
},
{
"epoch": 26.04,
"learning_rate": 1.4791117802032368e-05,
"loss": 0.1021,
"step": 69200
},
{
"epoch": 26.08,
"learning_rate": 1.4783590515619119e-05,
"loss": 0.1036,
"step": 69300
},
{
"epoch": 26.12,
"learning_rate": 1.4776063229205873e-05,
"loss": 0.1041,
"step": 69400
},
{
"epoch": 26.16,
"learning_rate": 1.4768535942792624e-05,
"loss": 0.1028,
"step": 69500
},
{
"epoch": 26.19,
"learning_rate": 1.4761008656379377e-05,
"loss": 0.1024,
"step": 69600
},
{
"epoch": 26.23,
"learning_rate": 1.4753481369966129e-05,
"loss": 0.1019,
"step": 69700
},
{
"epoch": 26.27,
"learning_rate": 1.474595408355288e-05,
"loss": 0.1022,
"step": 69800
},
{
"epoch": 26.31,
"learning_rate": 1.4738426797139634e-05,
"loss": 0.102,
"step": 69900
},
{
"epoch": 26.35,
"learning_rate": 1.4730899510726385e-05,
"loss": 0.1008,
"step": 70000
},
{
"epoch": 26.38,
"learning_rate": 1.4723372224313137e-05,
"loss": 0.1032,
"step": 70100
},
{
"epoch": 26.42,
"learning_rate": 1.4715844937899888e-05,
"loss": 0.1036,
"step": 70200
},
{
"epoch": 26.46,
"learning_rate": 1.470831765148664e-05,
"loss": 0.103,
"step": 70300
},
{
"epoch": 26.5,
"learning_rate": 1.4700790365073392e-05,
"loss": 0.1016,
"step": 70400
},
{
"epoch": 26.53,
"learning_rate": 1.4693263078660144e-05,
"loss": 0.1022,
"step": 70500
},
{
"epoch": 26.57,
"learning_rate": 1.4685735792246895e-05,
"loss": 0.1033,
"step": 70600
},
{
"epoch": 26.61,
"learning_rate": 1.4678208505833649e-05,
"loss": 0.1032,
"step": 70700
},
{
"epoch": 26.65,
"learning_rate": 1.46706812194204e-05,
"loss": 0.1026,
"step": 70800
},
{
"epoch": 26.68,
"learning_rate": 1.4663153933007152e-05,
"loss": 0.1019,
"step": 70900
},
{
"epoch": 26.72,
"learning_rate": 1.4655626646593903e-05,
"loss": 0.1035,
"step": 71000
},
{
"epoch": 26.76,
"learning_rate": 1.4648099360180656e-05,
"loss": 0.102,
"step": 71100
},
{
"epoch": 26.8,
"learning_rate": 1.4640572073767407e-05,
"loss": 0.1026,
"step": 71200
},
{
"epoch": 26.83,
"learning_rate": 1.463304478735416e-05,
"loss": 0.1023,
"step": 71300
},
{
"epoch": 26.87,
"learning_rate": 1.4625517500940912e-05,
"loss": 0.1011,
"step": 71400
},
{
"epoch": 26.91,
"learning_rate": 1.4617990214527664e-05,
"loss": 0.1037,
"step": 71500
},
{
"epoch": 26.95,
"learning_rate": 1.4610462928114415e-05,
"loss": 0.1036,
"step": 71600
},
{
"epoch": 26.99,
"learning_rate": 1.4602935641701168e-05,
"loss": 0.1037,
"step": 71700
},
{
"epoch": 27.0,
"eval_loss": 0.10246068239212036,
"eval_runtime": 45.3187,
"eval_samples_per_second": 165.495,
"eval_steps_per_second": 10.349,
"step": 71739
},
{
"epoch": 27.02,
"learning_rate": 1.4595408355287918e-05,
"loss": 0.1032,
"step": 71800
},
{
"epoch": 27.06,
"learning_rate": 1.4587881068874673e-05,
"loss": 0.1021,
"step": 71900
},
{
"epoch": 27.1,
"learning_rate": 1.4580353782461423e-05,
"loss": 0.1034,
"step": 72000
},
{
"epoch": 27.14,
"learning_rate": 1.4572826496048176e-05,
"loss": 0.1029,
"step": 72100
},
{
"epoch": 27.17,
"learning_rate": 1.4565299209634928e-05,
"loss": 0.1024,
"step": 72200
},
{
"epoch": 27.21,
"learning_rate": 1.455777192322168e-05,
"loss": 0.1028,
"step": 72300
},
{
"epoch": 27.25,
"learning_rate": 1.4550244636808433e-05,
"loss": 0.1053,
"step": 72400
},
{
"epoch": 27.29,
"learning_rate": 1.4542717350395184e-05,
"loss": 0.1013,
"step": 72500
},
{
"epoch": 27.32,
"learning_rate": 1.4535190063981937e-05,
"loss": 0.1012,
"step": 72600
},
{
"epoch": 27.36,
"learning_rate": 1.4527662777568688e-05,
"loss": 0.1024,
"step": 72700
},
{
"epoch": 27.4,
"learning_rate": 1.452013549115544e-05,
"loss": 0.1003,
"step": 72800
},
{
"epoch": 27.44,
"learning_rate": 1.4512608204742191e-05,
"loss": 0.103,
"step": 72900
},
{
"epoch": 27.47,
"learning_rate": 1.4505080918328945e-05,
"loss": 0.1012,
"step": 73000
},
{
"epoch": 27.51,
"learning_rate": 1.4497553631915696e-05,
"loss": 0.1015,
"step": 73100
},
{
"epoch": 27.55,
"learning_rate": 1.4490026345502448e-05,
"loss": 0.1032,
"step": 73200
},
{
"epoch": 27.59,
"learning_rate": 1.44824990590892e-05,
"loss": 0.1038,
"step": 73300
},
{
"epoch": 27.63,
"learning_rate": 1.4474971772675952e-05,
"loss": 0.0996,
"step": 73400
},
{
"epoch": 27.66,
"learning_rate": 1.4467444486262703e-05,
"loss": 0.1024,
"step": 73500
},
{
"epoch": 27.7,
"learning_rate": 1.4459917199849455e-05,
"loss": 0.1019,
"step": 73600
},
{
"epoch": 27.74,
"learning_rate": 1.4452389913436206e-05,
"loss": 0.1024,
"step": 73700
},
{
"epoch": 27.78,
"learning_rate": 1.444486262702296e-05,
"loss": 0.1006,
"step": 73800
},
{
"epoch": 27.81,
"learning_rate": 1.4437335340609711e-05,
"loss": 0.1014,
"step": 73900
},
{
"epoch": 27.85,
"learning_rate": 1.4429808054196463e-05,
"loss": 0.1024,
"step": 74000
},
{
"epoch": 27.89,
"learning_rate": 1.4422280767783214e-05,
"loss": 0.1023,
"step": 74100
},
{
"epoch": 27.93,
"learning_rate": 1.4414753481369967e-05,
"loss": 0.1034,
"step": 74200
},
{
"epoch": 27.96,
"learning_rate": 1.4407226194956718e-05,
"loss": 0.1022,
"step": 74300
},
{
"epoch": 28.0,
"eval_loss": 0.10144173353910446,
"eval_runtime": 45.583,
"eval_samples_per_second": 164.535,
"eval_steps_per_second": 10.289,
"step": 74396
},
{
"epoch": 28.0,
"learning_rate": 1.4399698908543472e-05,
"loss": 0.1023,
"step": 74400
},
{
"epoch": 28.04,
"learning_rate": 1.4392171622130224e-05,
"loss": 0.1017,
"step": 74500
},
{
"epoch": 28.08,
"learning_rate": 1.4384644335716975e-05,
"loss": 0.1017,
"step": 74600
},
{
"epoch": 28.11,
"learning_rate": 1.4377117049303728e-05,
"loss": 0.1008,
"step": 74700
},
{
"epoch": 28.15,
"learning_rate": 1.4369589762890478e-05,
"loss": 0.1023,
"step": 74800
},
{
"epoch": 28.19,
"learning_rate": 1.4362062476477233e-05,
"loss": 0.1022,
"step": 74900
},
{
"epoch": 28.23,
"learning_rate": 1.4354535190063984e-05,
"loss": 0.1017,
"step": 75000
},
{
"epoch": 28.26,
"learning_rate": 1.4347007903650736e-05,
"loss": 0.1012,
"step": 75100
},
{
"epoch": 28.3,
"learning_rate": 1.4339480617237487e-05,
"loss": 0.103,
"step": 75200
},
{
"epoch": 28.34,
"learning_rate": 1.433195333082424e-05,
"loss": 0.1025,
"step": 75300
},
{
"epoch": 28.38,
"learning_rate": 1.432442604441099e-05,
"loss": 0.1013,
"step": 75400
},
{
"epoch": 28.42,
"learning_rate": 1.4316898757997744e-05,
"loss": 0.1021,
"step": 75500
},
{
"epoch": 28.45,
"learning_rate": 1.4309371471584495e-05,
"loss": 0.1026,
"step": 75600
},
{
"epoch": 28.49,
"learning_rate": 1.4301844185171248e-05,
"loss": 0.1027,
"step": 75700
},
{
"epoch": 28.53,
"learning_rate": 1.4294316898757999e-05,
"loss": 0.1014,
"step": 75800
},
{
"epoch": 28.57,
"learning_rate": 1.4286789612344751e-05,
"loss": 0.1013,
"step": 75900
},
{
"epoch": 28.6,
"learning_rate": 1.4279262325931502e-05,
"loss": 0.1018,
"step": 76000
},
{
"epoch": 28.64,
"learning_rate": 1.4271735039518254e-05,
"loss": 0.102,
"step": 76100
},
{
"epoch": 28.68,
"learning_rate": 1.4264207753105005e-05,
"loss": 0.1013,
"step": 76200
},
{
"epoch": 28.72,
"learning_rate": 1.425668046669176e-05,
"loss": 0.1027,
"step": 76300
},
{
"epoch": 28.75,
"learning_rate": 1.424915318027851e-05,
"loss": 0.1004,
"step": 76400
},
{
"epoch": 28.79,
"learning_rate": 1.4241625893865263e-05,
"loss": 0.1013,
"step": 76500
},
{
"epoch": 28.83,
"learning_rate": 1.4234098607452014e-05,
"loss": 0.1021,
"step": 76600
},
{
"epoch": 28.87,
"learning_rate": 1.4226571321038766e-05,
"loss": 0.1009,
"step": 76700
},
{
"epoch": 28.9,
"learning_rate": 1.4219044034625517e-05,
"loss": 0.1012,
"step": 76800
},
{
"epoch": 28.94,
"learning_rate": 1.4211516748212271e-05,
"loss": 0.1015,
"step": 76900
},
{
"epoch": 28.98,
"learning_rate": 1.4203989461799024e-05,
"loss": 0.1026,
"step": 77000
},
{
"epoch": 29.0,
"eval_loss": 0.10109123587608337,
"eval_runtime": 45.9148,
"eval_samples_per_second": 163.346,
"eval_steps_per_second": 10.215,
"step": 77053
},
{
"epoch": 29.02,
"learning_rate": 1.4196462175385774e-05,
"loss": 0.102,
"step": 77100
},
{
"epoch": 29.06,
"learning_rate": 1.4188934888972527e-05,
"loss": 0.1028,
"step": 77200
},
{
"epoch": 29.09,
"learning_rate": 1.4181407602559278e-05,
"loss": 0.1016,
"step": 77300
},
{
"epoch": 29.13,
"learning_rate": 1.4173880316146032e-05,
"loss": 0.1021,
"step": 77400
},
{
"epoch": 29.17,
"learning_rate": 1.4166353029732783e-05,
"loss": 0.1018,
"step": 77500
},
{
"epoch": 29.21,
"learning_rate": 1.4158825743319535e-05,
"loss": 0.1004,
"step": 77600
},
{
"epoch": 29.24,
"learning_rate": 1.4151298456906286e-05,
"loss": 0.102,
"step": 77700
},
{
"epoch": 29.28,
"learning_rate": 1.4143771170493039e-05,
"loss": 0.1013,
"step": 77800
},
{
"epoch": 29.32,
"learning_rate": 1.413624388407979e-05,
"loss": 0.1014,
"step": 77900
},
{
"epoch": 29.36,
"learning_rate": 1.4128716597666544e-05,
"loss": 0.1003,
"step": 78000
},
{
"epoch": 29.39,
"learning_rate": 1.4121189311253294e-05,
"loss": 0.1009,
"step": 78100
},
{
"epoch": 29.43,
"learning_rate": 1.4113662024840047e-05,
"loss": 0.1008,
"step": 78200
},
{
"epoch": 29.47,
"learning_rate": 1.4106134738426798e-05,
"loss": 0.1015,
"step": 78300
},
{
"epoch": 29.51,
"learning_rate": 1.409860745201355e-05,
"loss": 0.1019,
"step": 78400
},
{
"epoch": 29.54,
"learning_rate": 1.4091080165600301e-05,
"loss": 0.1014,
"step": 78500
},
{
"epoch": 29.58,
"learning_rate": 1.4083552879187055e-05,
"loss": 0.1009,
"step": 78600
},
{
"epoch": 29.62,
"learning_rate": 1.4076025592773806e-05,
"loss": 0.1013,
"step": 78700
},
{
"epoch": 29.66,
"learning_rate": 1.4068498306360559e-05,
"loss": 0.1018,
"step": 78800
},
{
"epoch": 29.7,
"learning_rate": 1.406097101994731e-05,
"loss": 0.1026,
"step": 78900
},
{
"epoch": 29.73,
"learning_rate": 1.4053443733534062e-05,
"loss": 0.1005,
"step": 79000
},
{
"epoch": 29.77,
"learning_rate": 1.4045916447120813e-05,
"loss": 0.1009,
"step": 79100
},
{
"epoch": 29.81,
"learning_rate": 1.4038389160707565e-05,
"loss": 0.1001,
"step": 79200
},
{
"epoch": 29.85,
"learning_rate": 1.4030861874294316e-05,
"loss": 0.102,
"step": 79300
},
{
"epoch": 29.88,
"learning_rate": 1.402333458788107e-05,
"loss": 0.1011,
"step": 79400
},
{
"epoch": 29.92,
"learning_rate": 1.4015807301467823e-05,
"loss": 0.1011,
"step": 79500
},
{
"epoch": 29.96,
"learning_rate": 1.4008280015054574e-05,
"loss": 0.1011,
"step": 79600
},
{
"epoch": 30.0,
"learning_rate": 1.4000752728641326e-05,
"loss": 0.1022,
"step": 79700
},
{
"epoch": 30.0,
"eval_loss": 0.10009202361106873,
"eval_runtime": 45.5608,
"eval_samples_per_second": 164.615,
"eval_steps_per_second": 10.294,
"step": 79710
},
{
"epoch": 30.03,
"learning_rate": 1.3993225442228077e-05,
"loss": 0.1027,
"step": 79800
},
{
"epoch": 30.07,
"learning_rate": 1.3985698155814831e-05,
"loss": 0.1015,
"step": 79900
},
{
"epoch": 30.11,
"learning_rate": 1.3978170869401582e-05,
"loss": 0.1018,
"step": 80000
},
{
"epoch": 30.15,
"learning_rate": 1.3970643582988335e-05,
"loss": 0.1014,
"step": 80100
},
{
"epoch": 30.18,
"learning_rate": 1.3963116296575085e-05,
"loss": 0.1013,
"step": 80200
},
{
"epoch": 30.22,
"learning_rate": 1.3955589010161838e-05,
"loss": 0.0992,
"step": 80300
},
{
"epoch": 30.26,
"learning_rate": 1.3948061723748589e-05,
"loss": 0.1029,
"step": 80400
},
{
"epoch": 30.3,
"learning_rate": 1.3940534437335343e-05,
"loss": 0.1009,
"step": 80500
},
{
"epoch": 30.33,
"learning_rate": 1.3933007150922094e-05,
"loss": 0.1011,
"step": 80600
},
{
"epoch": 30.37,
"learning_rate": 1.3925479864508846e-05,
"loss": 0.1003,
"step": 80700
},
{
"epoch": 30.41,
"learning_rate": 1.3917952578095597e-05,
"loss": 0.1011,
"step": 80800
},
{
"epoch": 30.45,
"learning_rate": 1.391042529168235e-05,
"loss": 0.1003,
"step": 80900
},
{
"epoch": 30.49,
"learning_rate": 1.39028980052691e-05,
"loss": 0.1011,
"step": 81000
},
{
"epoch": 30.52,
"learning_rate": 1.3895370718855855e-05,
"loss": 0.1004,
"step": 81100
},
{
"epoch": 30.56,
"learning_rate": 1.3887843432442605e-05,
"loss": 0.1006,
"step": 81200
},
{
"epoch": 30.6,
"learning_rate": 1.3880316146029358e-05,
"loss": 0.1019,
"step": 81300
},
{
"epoch": 30.64,
"learning_rate": 1.3872788859616109e-05,
"loss": 0.1013,
"step": 81400
},
{
"epoch": 30.67,
"learning_rate": 1.3865261573202861e-05,
"loss": 0.0997,
"step": 81500
},
{
"epoch": 30.71,
"learning_rate": 1.3857734286789612e-05,
"loss": 0.1,
"step": 81600
},
{
"epoch": 30.75,
"learning_rate": 1.3850207000376366e-05,
"loss": 0.1004,
"step": 81700
},
{
"epoch": 30.79,
"learning_rate": 1.3842679713963117e-05,
"loss": 0.101,
"step": 81800
},
{
"epoch": 30.82,
"learning_rate": 1.383515242754987e-05,
"loss": 0.1003,
"step": 81900
},
{
"epoch": 30.86,
"learning_rate": 1.3827625141136622e-05,
"loss": 0.1,
"step": 82000
},
{
"epoch": 30.9,
"learning_rate": 1.3820097854723373e-05,
"loss": 0.1003,
"step": 82100
},
{
"epoch": 30.94,
"learning_rate": 1.3812570568310126e-05,
"loss": 0.0997,
"step": 82200
},
{
"epoch": 30.97,
"learning_rate": 1.3805043281896876e-05,
"loss": 0.0997,
"step": 82300
},
{
"epoch": 31.0,
"eval_loss": 0.10071013867855072,
"eval_runtime": 45.6947,
"eval_samples_per_second": 164.133,
"eval_steps_per_second": 10.264,
"step": 82367
},
{
"epoch": 31.01,
"learning_rate": 1.379751599548363e-05,
"loss": 0.0994,
"step": 82400
},
{
"epoch": 31.05,
"learning_rate": 1.3789988709070381e-05,
"loss": 0.1028,
"step": 82500
},
{
"epoch": 31.09,
"learning_rate": 1.3782461422657134e-05,
"loss": 0.1002,
"step": 82600
},
{
"epoch": 31.13,
"learning_rate": 1.3774934136243885e-05,
"loss": 0.0998,
"step": 82700
},
{
"epoch": 31.16,
"learning_rate": 1.3767406849830637e-05,
"loss": 0.1013,
"step": 82800
},
{
"epoch": 31.2,
"learning_rate": 1.3759879563417388e-05,
"loss": 0.1021,
"step": 82900
},
{
"epoch": 31.24,
"learning_rate": 1.3752352277004142e-05,
"loss": 0.1008,
"step": 83000
},
{
"epoch": 31.28,
"learning_rate": 1.3744824990590893e-05,
"loss": 0.101,
"step": 83100
},
{
"epoch": 31.31,
"learning_rate": 1.3737297704177646e-05,
"loss": 0.1016,
"step": 83200
},
{
"epoch": 31.35,
"learning_rate": 1.3729770417764396e-05,
"loss": 0.0993,
"step": 83300
},
{
"epoch": 31.39,
"learning_rate": 1.3722243131351149e-05,
"loss": 0.0993,
"step": 83400
},
{
"epoch": 31.43,
"learning_rate": 1.37147158449379e-05,
"loss": 0.0998,
"step": 83500
},
{
"epoch": 31.46,
"learning_rate": 1.3707188558524654e-05,
"loss": 0.1008,
"step": 83600
},
{
"epoch": 31.5,
"learning_rate": 1.3699661272111405e-05,
"loss": 0.0989,
"step": 83700
},
{
"epoch": 31.54,
"learning_rate": 1.3692133985698157e-05,
"loss": 0.1027,
"step": 83800
},
{
"epoch": 31.58,
"learning_rate": 1.3684606699284908e-05,
"loss": 0.1001,
"step": 83900
},
{
"epoch": 31.61,
"learning_rate": 1.367707941287166e-05,
"loss": 0.1006,
"step": 84000
},
{
"epoch": 31.65,
"learning_rate": 1.3669552126458411e-05,
"loss": 0.0991,
"step": 84100
},
{
"epoch": 31.69,
"learning_rate": 1.3662024840045166e-05,
"loss": 0.1005,
"step": 84200
},
{
"epoch": 31.73,
"learning_rate": 1.3654497553631916e-05,
"loss": 0.099,
"step": 84300
},
{
"epoch": 31.77,
"learning_rate": 1.3646970267218669e-05,
"loss": 0.1002,
"step": 84400
},
{
"epoch": 31.8,
"learning_rate": 1.3639442980805421e-05,
"loss": 0.1001,
"step": 84500
},
{
"epoch": 31.84,
"learning_rate": 1.3631915694392172e-05,
"loss": 0.0988,
"step": 84600
},
{
"epoch": 31.88,
"learning_rate": 1.3624388407978926e-05,
"loss": 0.0998,
"step": 84700
},
{
"epoch": 31.92,
"learning_rate": 1.3616861121565677e-05,
"loss": 0.0996,
"step": 84800
},
{
"epoch": 31.95,
"learning_rate": 1.360933383515243e-05,
"loss": 0.1022,
"step": 84900
},
{
"epoch": 31.99,
"learning_rate": 1.360180654873918e-05,
"loss": 0.0998,
"step": 85000
},
{
"epoch": 32.0,
"eval_loss": 0.10160314291715622,
"eval_runtime": 45.6356,
"eval_samples_per_second": 164.345,
"eval_steps_per_second": 10.277,
"step": 85024
},
{
"epoch": 32.03,
"learning_rate": 1.3594279262325933e-05,
"loss": 0.1026,
"step": 85100
},
{
"epoch": 32.07,
"learning_rate": 1.3586751975912684e-05,
"loss": 0.0999,
"step": 85200
},
{
"epoch": 32.1,
"learning_rate": 1.3579224689499436e-05,
"loss": 0.101,
"step": 85300
},
{
"epoch": 32.14,
"learning_rate": 1.3571697403086187e-05,
"loss": 0.1007,
"step": 85400
},
{
"epoch": 32.18,
"learning_rate": 1.3564170116672942e-05,
"loss": 0.1001,
"step": 85500
},
{
"epoch": 32.22,
"learning_rate": 1.3556642830259692e-05,
"loss": 0.1014,
"step": 85600
},
{
"epoch": 32.25,
"learning_rate": 1.3549115543846445e-05,
"loss": 0.0993,
"step": 85700
},
{
"epoch": 32.29,
"learning_rate": 1.3541588257433196e-05,
"loss": 0.1004,
"step": 85800
},
{
"epoch": 32.33,
"learning_rate": 1.3534060971019948e-05,
"loss": 0.1015,
"step": 85900
},
{
"epoch": 32.37,
"learning_rate": 1.3526533684606699e-05,
"loss": 0.0991,
"step": 86000
},
{
"epoch": 32.4,
"learning_rate": 1.3519006398193453e-05,
"loss": 0.0984,
"step": 86100
},
{
"epoch": 32.44,
"learning_rate": 1.3511479111780204e-05,
"loss": 0.1011,
"step": 86200
},
{
"epoch": 32.48,
"learning_rate": 1.3503951825366957e-05,
"loss": 0.1,
"step": 86300
},
{
"epoch": 32.52,
"learning_rate": 1.3496424538953707e-05,
"loss": 0.1009,
"step": 86400
},
{
"epoch": 32.56,
"learning_rate": 1.348889725254046e-05,
"loss": 0.0996,
"step": 86500
},
{
"epoch": 32.59,
"learning_rate": 1.348136996612721e-05,
"loss": 0.1003,
"step": 86600
},
{
"epoch": 32.63,
"learning_rate": 1.3473842679713965e-05,
"loss": 0.0997,
"step": 86700
},
{
"epoch": 32.67,
"learning_rate": 1.3466315393300716e-05,
"loss": 0.1016,
"step": 86800
},
{
"epoch": 32.71,
"learning_rate": 1.3458788106887468e-05,
"loss": 0.1003,
"step": 86900
},
{
"epoch": 32.74,
"learning_rate": 1.345126082047422e-05,
"loss": 0.0984,
"step": 87000
},
{
"epoch": 32.78,
"learning_rate": 1.3443733534060972e-05,
"loss": 0.1008,
"step": 87100
},
{
"epoch": 32.82,
"learning_rate": 1.3436206247647726e-05,
"loss": 0.0999,
"step": 87200
},
{
"epoch": 32.86,
"learning_rate": 1.3428678961234477e-05,
"loss": 0.0996,
"step": 87300
},
{
"epoch": 32.89,
"learning_rate": 1.3421151674821229e-05,
"loss": 0.101,
"step": 87400
},
{
"epoch": 32.93,
"learning_rate": 1.341362438840798e-05,
"loss": 0.099,
"step": 87500
},
{
"epoch": 32.97,
"learning_rate": 1.3406097101994732e-05,
"loss": 0.1019,
"step": 87600
},
{
"epoch": 33.0,
"eval_loss": 0.10076244920492172,
"eval_runtime": 45.2424,
"eval_samples_per_second": 165.774,
"eval_steps_per_second": 10.366,
"step": 87681
},
{
"epoch": 33.01,
"learning_rate": 1.3398569815581483e-05,
"loss": 0.1001,
"step": 87700
},
{
"epoch": 33.04,
"learning_rate": 1.3391042529168237e-05,
"loss": 0.1017,
"step": 87800
},
{
"epoch": 33.08,
"learning_rate": 1.3383515242754988e-05,
"loss": 0.1,
"step": 87900
},
{
"epoch": 33.12,
"learning_rate": 1.337598795634174e-05,
"loss": 0.0983,
"step": 88000
},
{
"epoch": 33.16,
"learning_rate": 1.3368460669928492e-05,
"loss": 0.1,
"step": 88100
},
{
"epoch": 33.2,
"learning_rate": 1.3360933383515244e-05,
"loss": 0.1008,
"step": 88200
},
{
"epoch": 33.23,
"learning_rate": 1.3353406097101995e-05,
"loss": 0.0998,
"step": 88300
},
{
"epoch": 33.27,
"learning_rate": 1.3345878810688747e-05,
"loss": 0.1,
"step": 88400
},
{
"epoch": 33.31,
"learning_rate": 1.3338351524275498e-05,
"loss": 0.0997,
"step": 88500
},
{
"epoch": 33.35,
"learning_rate": 1.3330824237862252e-05,
"loss": 0.0995,
"step": 88600
},
{
"epoch": 33.38,
"learning_rate": 1.3323296951449003e-05,
"loss": 0.1015,
"step": 88700
},
{
"epoch": 33.42,
"learning_rate": 1.3315769665035756e-05,
"loss": 0.0993,
"step": 88800
},
{
"epoch": 33.46,
"learning_rate": 1.3308242378622507e-05,
"loss": 0.0986,
"step": 88900
},
{
"epoch": 33.5,
"learning_rate": 1.330071509220926e-05,
"loss": 0.1003,
"step": 89000
},
{
"epoch": 33.53,
"learning_rate": 1.329318780579601e-05,
"loss": 0.0997,
"step": 89100
},
{
"epoch": 33.57,
"learning_rate": 1.3285660519382764e-05,
"loss": 0.0993,
"step": 89200
},
{
"epoch": 33.61,
"learning_rate": 1.3278133232969515e-05,
"loss": 0.1017,
"step": 89300
},
{
"epoch": 33.65,
"learning_rate": 1.3270605946556268e-05,
"loss": 0.1003,
"step": 89400
},
{
"epoch": 33.68,
"learning_rate": 1.326307866014302e-05,
"loss": 0.1012,
"step": 89500
},
{
"epoch": 33.72,
"learning_rate": 1.3255551373729771e-05,
"loss": 0.1006,
"step": 89600
},
{
"epoch": 33.76,
"learning_rate": 1.3248024087316525e-05,
"loss": 0.0976,
"step": 89700
},
{
"epoch": 33.8,
"learning_rate": 1.3240496800903276e-05,
"loss": 0.1002,
"step": 89800
},
{
"epoch": 33.84,
"learning_rate": 1.3232969514490028e-05,
"loss": 0.0984,
"step": 89900
},
{
"epoch": 33.87,
"learning_rate": 1.322544222807678e-05,
"loss": 0.0987,
"step": 90000
},
{
"epoch": 33.91,
"learning_rate": 1.3217914941663532e-05,
"loss": 0.1008,
"step": 90100
},
{
"epoch": 33.95,
"learning_rate": 1.3210387655250283e-05,
"loss": 0.1031,
"step": 90200
},
{
"epoch": 33.99,
"learning_rate": 1.3202860368837037e-05,
"loss": 0.0999,
"step": 90300
},
{
"epoch": 34.0,
"eval_loss": 0.10001099109649658,
"eval_runtime": 45.4099,
"eval_samples_per_second": 165.162,
"eval_steps_per_second": 10.328,
"step": 90338
},
{
"epoch": 34.02,
"learning_rate": 1.3195333082423788e-05,
"loss": 0.0995,
"step": 90400
},
{
"epoch": 34.06,
"learning_rate": 1.318780579601054e-05,
"loss": 0.1017,
"step": 90500
},
{
"epoch": 34.1,
"learning_rate": 1.3180278509597291e-05,
"loss": 0.101,
"step": 90600
},
{
"epoch": 34.14,
"learning_rate": 1.3172751223184043e-05,
"loss": 0.1002,
"step": 90700
},
{
"epoch": 34.17,
"learning_rate": 1.3165223936770794e-05,
"loss": 0.0988,
"step": 90800
},
{
"epoch": 34.21,
"learning_rate": 1.3157696650357548e-05,
"loss": 0.101,
"step": 90900
},
{
"epoch": 34.25,
"learning_rate": 1.31501693639443e-05,
"loss": 0.0999,
"step": 91000
},
{
"epoch": 34.29,
"learning_rate": 1.3142642077531052e-05,
"loss": 0.0994,
"step": 91100
},
{
"epoch": 34.32,
"learning_rate": 1.3135114791117803e-05,
"loss": 0.1002,
"step": 91200
},
{
"epoch": 34.36,
"learning_rate": 1.3127587504704555e-05,
"loss": 0.0987,
"step": 91300
},
{
"epoch": 34.4,
"learning_rate": 1.3120060218291306e-05,
"loss": 0.1001,
"step": 91400
},
{
"epoch": 34.44,
"learning_rate": 1.3112532931878058e-05,
"loss": 0.099,
"step": 91500
},
{
"epoch": 34.47,
"learning_rate": 1.310500564546481e-05,
"loss": 0.0987,
"step": 91600
},
{
"epoch": 34.51,
"learning_rate": 1.3097478359051563e-05,
"loss": 0.0997,
"step": 91700
},
{
"epoch": 34.55,
"learning_rate": 1.3089951072638314e-05,
"loss": 0.1008,
"step": 91800
},
{
"epoch": 34.59,
"learning_rate": 1.3082423786225067e-05,
"loss": 0.0991,
"step": 91900
},
{
"epoch": 34.63,
"learning_rate": 1.307489649981182e-05,
"loss": 0.0995,
"step": 92000
},
{
"epoch": 34.66,
"learning_rate": 1.306736921339857e-05,
"loss": 0.1001,
"step": 92100
},
{
"epoch": 34.7,
"learning_rate": 1.3059841926985324e-05,
"loss": 0.0991,
"step": 92200
},
{
"epoch": 34.74,
"learning_rate": 1.3052314640572075e-05,
"loss": 0.0977,
"step": 92300
},
{
"epoch": 34.78,
"learning_rate": 1.3044787354158828e-05,
"loss": 0.0993,
"step": 92400
},
{
"epoch": 34.81,
"learning_rate": 1.3037260067745578e-05,
"loss": 0.1001,
"step": 92500
},
{
"epoch": 34.85,
"learning_rate": 1.3029732781332331e-05,
"loss": 0.1,
"step": 92600
},
{
"epoch": 34.89,
"learning_rate": 1.3022205494919082e-05,
"loss": 0.0998,
"step": 92700
},
{
"epoch": 34.93,
"learning_rate": 1.3014678208505836e-05,
"loss": 0.0996,
"step": 92800
},
{
"epoch": 34.96,
"learning_rate": 1.3007150922092587e-05,
"loss": 0.0998,
"step": 92900
},
{
"epoch": 35.0,
"eval_loss": 0.09930834919214249,
"eval_runtime": 45.6646,
"eval_samples_per_second": 164.241,
"eval_steps_per_second": 10.271,
"step": 92995
},
{
"epoch": 35.0,
"learning_rate": 1.299962363567934e-05,
"loss": 0.1003,
"step": 93000
},
{
"epoch": 35.04,
"learning_rate": 1.299209634926609e-05,
"loss": 0.0996,
"step": 93100
},
{
"epoch": 35.08,
"learning_rate": 1.2984569062852843e-05,
"loss": 0.0986,
"step": 93200
},
{
"epoch": 35.11,
"learning_rate": 1.2977041776439594e-05,
"loss": 0.0999,
"step": 93300
},
{
"epoch": 35.15,
"learning_rate": 1.2969514490026348e-05,
"loss": 0.1006,
"step": 93400
},
{
"epoch": 35.19,
"learning_rate": 1.2961987203613099e-05,
"loss": 0.0999,
"step": 93500
},
{
"epoch": 35.23,
"learning_rate": 1.2954459917199851e-05,
"loss": 0.0984,
"step": 93600
},
{
"epoch": 35.27,
"learning_rate": 1.2946932630786602e-05,
"loss": 0.0981,
"step": 93700
},
{
"epoch": 35.3,
"learning_rate": 1.2939405344373354e-05,
"loss": 0.1004,
"step": 93800
},
{
"epoch": 35.34,
"learning_rate": 1.2931878057960105e-05,
"loss": 0.0994,
"step": 93900
},
{
"epoch": 35.38,
"learning_rate": 1.292435077154686e-05,
"loss": 0.0984,
"step": 94000
},
{
"epoch": 35.42,
"learning_rate": 1.291682348513361e-05,
"loss": 0.1002,
"step": 94100
},
{
"epoch": 35.45,
"learning_rate": 1.2909296198720363e-05,
"loss": 0.0997,
"step": 94200
},
{
"epoch": 35.49,
"learning_rate": 1.2901768912307114e-05,
"loss": 0.0977,
"step": 94300
},
{
"epoch": 35.53,
"learning_rate": 1.2894241625893866e-05,
"loss": 0.0991,
"step": 94400
},
{
"epoch": 35.57,
"learning_rate": 1.2886714339480619e-05,
"loss": 0.0981,
"step": 94500
},
{
"epoch": 35.6,
"learning_rate": 1.287918705306737e-05,
"loss": 0.0998,
"step": 94600
},
{
"epoch": 35.64,
"learning_rate": 1.2871659766654124e-05,
"loss": 0.0999,
"step": 94700
},
{
"epoch": 35.68,
"learning_rate": 1.2864132480240874e-05,
"loss": 0.0989,
"step": 94800
},
{
"epoch": 35.72,
"learning_rate": 1.2856605193827627e-05,
"loss": 0.1,
"step": 94900
},
{
"epoch": 35.75,
"learning_rate": 1.2849077907414378e-05,
"loss": 0.1003,
"step": 95000
},
{
"epoch": 35.79,
"learning_rate": 1.284155062100113e-05,
"loss": 0.0997,
"step": 95100
},
{
"epoch": 35.83,
"learning_rate": 1.2834023334587881e-05,
"loss": 0.1002,
"step": 95200
},
{
"epoch": 35.87,
"learning_rate": 1.2826496048174635e-05,
"loss": 0.0986,
"step": 95300
},
{
"epoch": 35.91,
"learning_rate": 1.2818968761761386e-05,
"loss": 0.0999,
"step": 95400
},
{
"epoch": 35.94,
"learning_rate": 1.2811441475348139e-05,
"loss": 0.1005,
"step": 95500
},
{
"epoch": 35.98,
"learning_rate": 1.280391418893489e-05,
"loss": 0.0994,
"step": 95600
},
{
"epoch": 36.0,
"eval_loss": 0.09918170422315598,
"eval_runtime": 45.6422,
"eval_samples_per_second": 164.321,
"eval_steps_per_second": 10.276,
"step": 95652
},
{
"epoch": 36.02,
"learning_rate": 1.2796386902521642e-05,
"loss": 0.0979,
"step": 95700
},
{
"epoch": 36.06,
"learning_rate": 1.2788859616108393e-05,
"loss": 0.0981,
"step": 95800
},
{
"epoch": 36.09,
"learning_rate": 1.2781332329695147e-05,
"loss": 0.0992,
"step": 95900
},
{
"epoch": 36.13,
"learning_rate": 1.2773805043281898e-05,
"loss": 0.0991,
"step": 96000
},
{
"epoch": 36.17,
"learning_rate": 1.276627775686865e-05,
"loss": 0.1003,
"step": 96100
},
{
"epoch": 36.21,
"learning_rate": 1.2758750470455401e-05,
"loss": 0.0991,
"step": 96200
},
{
"epoch": 36.24,
"learning_rate": 1.2751223184042154e-05,
"loss": 0.0985,
"step": 96300
},
{
"epoch": 36.28,
"learning_rate": 1.2743695897628904e-05,
"loss": 0.0993,
"step": 96400
},
{
"epoch": 36.32,
"learning_rate": 1.2736168611215659e-05,
"loss": 0.0998,
"step": 96500
},
{
"epoch": 36.36,
"learning_rate": 1.272864132480241e-05,
"loss": 0.1014,
"step": 96600
},
{
"epoch": 36.39,
"learning_rate": 1.2721114038389162e-05,
"loss": 0.1005,
"step": 96700
},
{
"epoch": 36.43,
"learning_rate": 1.2713586751975913e-05,
"loss": 0.0992,
"step": 96800
},
{
"epoch": 36.47,
"learning_rate": 1.2706059465562665e-05,
"loss": 0.0981,
"step": 96900
},
{
"epoch": 36.51,
"learning_rate": 1.2698532179149418e-05,
"loss": 0.0988,
"step": 97000
},
{
"epoch": 36.54,
"learning_rate": 1.2691004892736169e-05,
"loss": 0.0978,
"step": 97100
},
{
"epoch": 36.58,
"learning_rate": 1.2683477606322923e-05,
"loss": 0.0989,
"step": 97200
},
{
"epoch": 36.62,
"learning_rate": 1.2675950319909674e-05,
"loss": 0.098,
"step": 97300
},
{
"epoch": 36.66,
"learning_rate": 1.2668423033496426e-05,
"loss": 0.0979,
"step": 97400
},
{
"epoch": 36.7,
"learning_rate": 1.2660895747083177e-05,
"loss": 0.0989,
"step": 97500
},
{
"epoch": 36.73,
"learning_rate": 1.265336846066993e-05,
"loss": 0.0993,
"step": 97600
},
{
"epoch": 36.77,
"learning_rate": 1.264584117425668e-05,
"loss": 0.0988,
"step": 97700
},
{
"epoch": 36.81,
"learning_rate": 1.2638313887843435e-05,
"loss": 0.0979,
"step": 97800
},
{
"epoch": 36.85,
"learning_rate": 1.2630786601430185e-05,
"loss": 0.0982,
"step": 97900
},
{
"epoch": 36.88,
"learning_rate": 1.2623259315016938e-05,
"loss": 0.0992,
"step": 98000
},
{
"epoch": 36.92,
"learning_rate": 1.2615732028603689e-05,
"loss": 0.1,
"step": 98100
},
{
"epoch": 36.96,
"learning_rate": 1.2608204742190441e-05,
"loss": 0.0977,
"step": 98200
},
{
"epoch": 37.0,
"learning_rate": 1.2600677455777192e-05,
"loss": 0.0966,
"step": 98300
},
{
"epoch": 37.0,
"eval_loss": 0.09910181164741516,
"eval_runtime": 45.3338,
"eval_samples_per_second": 165.439,
"eval_steps_per_second": 10.345,
"step": 98309
},
{
"epoch": 37.03,
"learning_rate": 1.2593150169363946e-05,
"loss": 0.0961,
"step": 98400
},
{
"epoch": 37.07,
"learning_rate": 1.2585622882950697e-05,
"loss": 0.0995,
"step": 98500
},
{
"epoch": 37.11,
"learning_rate": 1.257809559653745e-05,
"loss": 0.0996,
"step": 98600
},
{
"epoch": 37.15,
"learning_rate": 1.25705683101242e-05,
"loss": 0.0982,
"step": 98700
},
{
"epoch": 37.18,
"learning_rate": 1.2563041023710953e-05,
"loss": 0.0985,
"step": 98800
},
{
"epoch": 37.22,
"learning_rate": 1.2555513737297704e-05,
"loss": 0.0968,
"step": 98900
},
{
"epoch": 37.26,
"learning_rate": 1.2547986450884458e-05,
"loss": 0.0982,
"step": 99000
},
{
"epoch": 37.3,
"learning_rate": 1.2540459164471209e-05,
"loss": 0.0994,
"step": 99100
},
{
"epoch": 37.34,
"learning_rate": 1.2532931878057961e-05,
"loss": 0.1004,
"step": 99200
},
{
"epoch": 37.37,
"learning_rate": 1.2525404591644712e-05,
"loss": 0.0988,
"step": 99300
},
{
"epoch": 37.41,
"learning_rate": 1.2517877305231465e-05,
"loss": 0.099,
"step": 99400
},
{
"epoch": 37.45,
"learning_rate": 1.2510350018818219e-05,
"loss": 0.0991,
"step": 99500
},
{
"epoch": 37.49,
"learning_rate": 1.250282273240497e-05,
"loss": 0.0978,
"step": 99600
},
{
"epoch": 37.52,
"learning_rate": 1.2495295445991722e-05,
"loss": 0.0981,
"step": 99700
},
{
"epoch": 37.56,
"learning_rate": 1.2487768159578473e-05,
"loss": 0.0989,
"step": 99800
},
{
"epoch": 37.6,
"learning_rate": 1.2480240873165226e-05,
"loss": 0.0982,
"step": 99900
},
{
"epoch": 37.64,
"learning_rate": 1.2472713586751976e-05,
"loss": 0.1001,
"step": 100000
},
{
"epoch": 37.67,
"learning_rate": 1.2465186300338729e-05,
"loss": 0.0972,
"step": 100100
},
{
"epoch": 37.71,
"learning_rate": 1.245765901392548e-05,
"loss": 0.0993,
"step": 100200
},
{
"epoch": 37.75,
"learning_rate": 1.2450131727512234e-05,
"loss": 0.0997,
"step": 100300
},
{
"epoch": 37.79,
"learning_rate": 1.2442604441098985e-05,
"loss": 0.0978,
"step": 100400
},
{
"epoch": 37.82,
"learning_rate": 1.2435077154685737e-05,
"loss": 0.0992,
"step": 100500
},
{
"epoch": 37.86,
"learning_rate": 1.2427549868272488e-05,
"loss": 0.0992,
"step": 100600
},
{
"epoch": 37.9,
"learning_rate": 1.242002258185924e-05,
"loss": 0.0984,
"step": 100700
},
{
"epoch": 37.94,
"learning_rate": 1.2412495295445991e-05,
"loss": 0.0983,
"step": 100800
},
{
"epoch": 37.98,
"learning_rate": 1.2404968009032746e-05,
"loss": 0.0997,
"step": 100900
},
{
"epoch": 38.0,
"eval_loss": 0.09699103981256485,
"eval_runtime": 45.3352,
"eval_samples_per_second": 165.435,
"eval_steps_per_second": 10.345,
"step": 100966
},
{
"epoch": 38.01,
"learning_rate": 1.2397440722619496e-05,
"loss": 0.0983,
"step": 101000
},
{
"epoch": 38.05,
"learning_rate": 1.2389913436206249e-05,
"loss": 0.0984,
"step": 101100
},
{
"epoch": 38.09,
"learning_rate": 1.2382386149793e-05,
"loss": 0.0971,
"step": 101200
},
{
"epoch": 38.13,
"learning_rate": 1.2374858863379752e-05,
"loss": 0.0979,
"step": 101300
},
{
"epoch": 38.16,
"learning_rate": 1.2367331576966503e-05,
"loss": 0.0992,
"step": 101400
},
{
"epoch": 38.2,
"learning_rate": 1.2359804290553257e-05,
"loss": 0.0989,
"step": 101500
},
{
"epoch": 38.24,
"learning_rate": 1.2352277004140008e-05,
"loss": 0.0988,
"step": 101600
},
{
"epoch": 38.28,
"learning_rate": 1.234474971772676e-05,
"loss": 0.098,
"step": 101700
},
{
"epoch": 38.31,
"learning_rate": 1.2337222431313511e-05,
"loss": 0.0961,
"step": 101800
},
{
"epoch": 38.35,
"learning_rate": 1.2329695144900264e-05,
"loss": 0.0978,
"step": 101900
},
{
"epoch": 38.39,
"learning_rate": 1.2322167858487018e-05,
"loss": 0.1003,
"step": 102000
},
{
"epoch": 38.43,
"learning_rate": 1.2314640572073769e-05,
"loss": 0.0989,
"step": 102100
},
{
"epoch": 38.46,
"learning_rate": 1.2307113285660521e-05,
"loss": 0.0984,
"step": 102200
},
{
"epoch": 38.5,
"learning_rate": 1.2299585999247272e-05,
"loss": 0.0985,
"step": 102300
},
{
"epoch": 38.54,
"learning_rate": 1.2292058712834025e-05,
"loss": 0.1008,
"step": 102400
},
{
"epoch": 38.58,
"learning_rate": 1.2284531426420776e-05,
"loss": 0.0969,
"step": 102500
},
{
"epoch": 38.61,
"learning_rate": 1.227700414000753e-05,
"loss": 0.0981,
"step": 102600
},
{
"epoch": 38.65,
"learning_rate": 1.226947685359428e-05,
"loss": 0.0979,
"step": 102700
},
{
"epoch": 38.69,
"learning_rate": 1.2261949567181033e-05,
"loss": 0.0994,
"step": 102800
},
{
"epoch": 38.73,
"learning_rate": 1.2254422280767784e-05,
"loss": 0.0992,
"step": 102900
},
{
"epoch": 38.77,
"learning_rate": 1.2246894994354536e-05,
"loss": 0.0981,
"step": 103000
},
{
"epoch": 38.8,
"learning_rate": 1.2239367707941287e-05,
"loss": 0.0991,
"step": 103100
},
{
"epoch": 38.84,
"learning_rate": 1.223184042152804e-05,
"loss": 0.0977,
"step": 103200
},
{
"epoch": 38.88,
"learning_rate": 1.222431313511479e-05,
"loss": 0.0979,
"step": 103300
},
{
"epoch": 38.92,
"learning_rate": 1.2216785848701545e-05,
"loss": 0.0976,
"step": 103400
},
{
"epoch": 38.95,
"learning_rate": 1.2209258562288296e-05,
"loss": 0.0996,
"step": 103500
},
{
"epoch": 38.99,
"learning_rate": 1.2201731275875048e-05,
"loss": 0.0991,
"step": 103600
},
{
"epoch": 39.0,
"eval_loss": 0.09791671484708786,
"eval_runtime": 45.4451,
"eval_samples_per_second": 165.034,
"eval_steps_per_second": 10.32,
"step": 103623
},
{
"epoch": 39.03,
"learning_rate": 1.2194203989461799e-05,
"loss": 0.098,
"step": 103700
},
{
"epoch": 39.07,
"learning_rate": 1.2186676703048552e-05,
"loss": 0.0965,
"step": 103800
},
{
"epoch": 39.1,
"learning_rate": 1.2179149416635302e-05,
"loss": 0.0979,
"step": 103900
},
{
"epoch": 39.14,
"learning_rate": 1.2171622130222057e-05,
"loss": 0.0978,
"step": 104000
},
{
"epoch": 39.18,
"learning_rate": 1.2164094843808807e-05,
"loss": 0.0996,
"step": 104100
},
{
"epoch": 39.22,
"learning_rate": 1.215656755739556e-05,
"loss": 0.0995,
"step": 104200
},
{
"epoch": 39.25,
"learning_rate": 1.2149040270982312e-05,
"loss": 0.0988,
"step": 104300
},
{
"epoch": 39.29,
"learning_rate": 1.2141512984569063e-05,
"loss": 0.0975,
"step": 104400
},
{
"epoch": 39.33,
"learning_rate": 1.2133985698155817e-05,
"loss": 0.098,
"step": 104500
},
{
"epoch": 39.37,
"learning_rate": 1.2126458411742568e-05,
"loss": 0.098,
"step": 104600
},
{
"epoch": 39.41,
"learning_rate": 1.211893112532932e-05,
"loss": 0.098,
"step": 104700
},
{
"epoch": 39.44,
"learning_rate": 1.2111403838916072e-05,
"loss": 0.0995,
"step": 104800
},
{
"epoch": 39.48,
"learning_rate": 1.2103876552502824e-05,
"loss": 0.0977,
"step": 104900
},
{
"epoch": 39.52,
"learning_rate": 1.2096349266089575e-05,
"loss": 0.0988,
"step": 105000
},
{
"epoch": 39.56,
"learning_rate": 1.2088821979676329e-05,
"loss": 0.0986,
"step": 105100
},
{
"epoch": 39.59,
"learning_rate": 1.208129469326308e-05,
"loss": 0.0987,
"step": 105200
},
{
"epoch": 39.63,
"learning_rate": 1.2073767406849832e-05,
"loss": 0.0979,
"step": 105300
},
{
"epoch": 39.67,
"learning_rate": 1.2066240120436583e-05,
"loss": 0.0963,
"step": 105400
},
{
"epoch": 39.71,
"learning_rate": 1.2058712834023336e-05,
"loss": 0.0978,
"step": 105500
},
{
"epoch": 39.74,
"learning_rate": 1.2051185547610087e-05,
"loss": 0.0989,
"step": 105600
},
{
"epoch": 39.78,
"learning_rate": 1.204365826119684e-05,
"loss": 0.0971,
"step": 105700
},
{
"epoch": 39.82,
"learning_rate": 1.2036130974783592e-05,
"loss": 0.0985,
"step": 105800
},
{
"epoch": 39.86,
"learning_rate": 1.2028603688370344e-05,
"loss": 0.0964,
"step": 105900
},
{
"epoch": 39.89,
"learning_rate": 1.2021076401957095e-05,
"loss": 0.0968,
"step": 106000
},
{
"epoch": 39.93,
"learning_rate": 1.2013549115543847e-05,
"loss": 0.0965,
"step": 106100
},
{
"epoch": 39.97,
"learning_rate": 1.2006021829130598e-05,
"loss": 0.099,
"step": 106200
},
{
"epoch": 40.0,
"eval_loss": 0.09832270443439484,
"eval_runtime": 45.1549,
"eval_samples_per_second": 166.095,
"eval_steps_per_second": 10.386,
"step": 106280
},
{
"epoch": 40.01,
"learning_rate": 1.199849454271735e-05,
"loss": 0.098,
"step": 106300
},
{
"epoch": 40.05,
"learning_rate": 1.1990967256304102e-05,
"loss": 0.098,
"step": 106400
},
{
"epoch": 40.08,
"learning_rate": 1.1983439969890856e-05,
"loss": 0.0977,
"step": 106500
},
{
"epoch": 40.12,
"learning_rate": 1.1975912683477607e-05,
"loss": 0.0994,
"step": 106600
},
{
"epoch": 40.16,
"learning_rate": 1.196838539706436e-05,
"loss": 0.0976,
"step": 106700
},
{
"epoch": 40.2,
"learning_rate": 1.1960858110651112e-05,
"loss": 0.098,
"step": 106800
},
{
"epoch": 40.23,
"learning_rate": 1.1953330824237862e-05,
"loss": 0.0978,
"step": 106900
},
{
"epoch": 40.27,
"learning_rate": 1.1945803537824617e-05,
"loss": 0.0981,
"step": 107000
},
{
"epoch": 40.31,
"learning_rate": 1.1938276251411368e-05,
"loss": 0.0971,
"step": 107100
},
{
"epoch": 40.35,
"learning_rate": 1.193074896499812e-05,
"loss": 0.0985,
"step": 107200
},
{
"epoch": 40.38,
"learning_rate": 1.1923221678584871e-05,
"loss": 0.0996,
"step": 107300
},
{
"epoch": 40.42,
"learning_rate": 1.1915694392171623e-05,
"loss": 0.0981,
"step": 107400
},
{
"epoch": 40.46,
"learning_rate": 1.1908167105758374e-05,
"loss": 0.0996,
"step": 107500
},
{
"epoch": 40.5,
"learning_rate": 1.1900639819345128e-05,
"loss": 0.0989,
"step": 107600
},
{
"epoch": 40.53,
"learning_rate": 1.189311253293188e-05,
"loss": 0.0992,
"step": 107700
},
{
"epoch": 40.57,
"learning_rate": 1.1885585246518632e-05,
"loss": 0.0974,
"step": 107800
},
{
"epoch": 40.61,
"learning_rate": 1.1878057960105383e-05,
"loss": 0.0984,
"step": 107900
},
{
"epoch": 40.65,
"learning_rate": 1.1870530673692135e-05,
"loss": 0.0984,
"step": 108000
},
{
"epoch": 40.68,
"learning_rate": 1.1863003387278886e-05,
"loss": 0.099,
"step": 108100
},
{
"epoch": 40.72,
"learning_rate": 1.185547610086564e-05,
"loss": 0.0984,
"step": 108200
},
{
"epoch": 40.76,
"learning_rate": 1.1847948814452391e-05,
"loss": 0.0966,
"step": 108300
},
{
"epoch": 40.8,
"learning_rate": 1.1840421528039143e-05,
"loss": 0.1,
"step": 108400
},
{
"epoch": 40.84,
"learning_rate": 1.1832894241625894e-05,
"loss": 0.0979,
"step": 108500
},
{
"epoch": 40.87,
"learning_rate": 1.1825366955212647e-05,
"loss": 0.0981,
"step": 108600
},
{
"epoch": 40.91,
"learning_rate": 1.1817839668799398e-05,
"loss": 0.0973,
"step": 108700
},
{
"epoch": 40.95,
"learning_rate": 1.1810312382386152e-05,
"loss": 0.0978,
"step": 108800
},
{
"epoch": 40.99,
"learning_rate": 1.1802785095972903e-05,
"loss": 0.0974,
"step": 108900
},
{
"epoch": 41.0,
"eval_loss": 0.09795571118593216,
"eval_runtime": 45.3802,
"eval_samples_per_second": 165.27,
"eval_steps_per_second": 10.335,
"step": 108937
},
{
"epoch": 41.02,
"learning_rate": 1.1795257809559655e-05,
"loss": 0.0961,
"step": 109000
},
{
"epoch": 41.06,
"learning_rate": 1.1787730523146406e-05,
"loss": 0.0989,
"step": 109100
},
{
"epoch": 41.1,
"learning_rate": 1.1780203236733158e-05,
"loss": 0.0988,
"step": 109200
},
{
"epoch": 41.14,
"learning_rate": 1.1772675950319911e-05,
"loss": 0.0979,
"step": 109300
},
{
"epoch": 41.17,
"learning_rate": 1.1765148663906662e-05,
"loss": 0.0971,
"step": 109400
},
{
"epoch": 41.21,
"learning_rate": 1.1757621377493416e-05,
"loss": 0.0965,
"step": 109500
},
{
"epoch": 41.25,
"learning_rate": 1.1750094091080167e-05,
"loss": 0.0982,
"step": 109600
},
{
"epoch": 41.29,
"learning_rate": 1.174256680466692e-05,
"loss": 0.0974,
"step": 109700
},
{
"epoch": 41.32,
"learning_rate": 1.173503951825367e-05,
"loss": 0.097,
"step": 109800
},
{
"epoch": 41.36,
"learning_rate": 1.1727512231840423e-05,
"loss": 0.0974,
"step": 109900
},
{
"epoch": 41.4,
"learning_rate": 1.1719984945427173e-05,
"loss": 0.0969,
"step": 110000
},
{
"epoch": 41.44,
"learning_rate": 1.1712457659013928e-05,
"loss": 0.0983,
"step": 110100
},
{
"epoch": 41.48,
"learning_rate": 1.1704930372600679e-05,
"loss": 0.0978,
"step": 110200
},
{
"epoch": 41.51,
"learning_rate": 1.1697403086187431e-05,
"loss": 0.0962,
"step": 110300
},
{
"epoch": 41.55,
"learning_rate": 1.1689875799774182e-05,
"loss": 0.0985,
"step": 110400
},
{
"epoch": 41.59,
"learning_rate": 1.1682348513360934e-05,
"loss": 0.0992,
"step": 110500
},
{
"epoch": 41.63,
"learning_rate": 1.1674821226947685e-05,
"loss": 0.0972,
"step": 110600
},
{
"epoch": 41.66,
"learning_rate": 1.166729394053444e-05,
"loss": 0.098,
"step": 110700
},
{
"epoch": 41.7,
"learning_rate": 1.165976665412119e-05,
"loss": 0.0991,
"step": 110800
},
{
"epoch": 41.74,
"learning_rate": 1.1652239367707943e-05,
"loss": 0.0963,
"step": 110900
},
{
"epoch": 41.78,
"learning_rate": 1.1644712081294694e-05,
"loss": 0.0983,
"step": 111000
},
{
"epoch": 41.81,
"learning_rate": 1.1637184794881446e-05,
"loss": 0.0988,
"step": 111100
},
{
"epoch": 41.85,
"learning_rate": 1.1629657508468197e-05,
"loss": 0.0977,
"step": 111200
},
{
"epoch": 41.89,
"learning_rate": 1.1622130222054951e-05,
"loss": 0.0984,
"step": 111300
},
{
"epoch": 41.93,
"learning_rate": 1.1614602935641702e-05,
"loss": 0.0974,
"step": 111400
},
{
"epoch": 41.96,
"learning_rate": 1.1607075649228454e-05,
"loss": 0.0974,
"step": 111500
},
{
"epoch": 42.0,
"eval_loss": 0.0971272811293602,
"eval_runtime": 45.4607,
"eval_samples_per_second": 164.978,
"eval_steps_per_second": 10.317,
"step": 111594
},
{
"epoch": 42.0,
"learning_rate": 1.1599548362815205e-05,
"loss": 0.0983,
"step": 111600
},
{
"epoch": 42.04,
"learning_rate": 1.1592021076401958e-05,
"loss": 0.0968,
"step": 111700
},
{
"epoch": 42.08,
"learning_rate": 1.1584493789988712e-05,
"loss": 0.0984,
"step": 111800
},
{
"epoch": 42.12,
"learning_rate": 1.1576966503575463e-05,
"loss": 0.0991,
"step": 111900
},
{
"epoch": 42.15,
"learning_rate": 1.1569439217162215e-05,
"loss": 0.0965,
"step": 112000
},
{
"epoch": 42.19,
"learning_rate": 1.1561911930748966e-05,
"loss": 0.098,
"step": 112100
},
{
"epoch": 42.23,
"learning_rate": 1.1554384644335719e-05,
"loss": 0.0979,
"step": 112200
},
{
"epoch": 42.27,
"learning_rate": 1.154685735792247e-05,
"loss": 0.0971,
"step": 112300
},
{
"epoch": 42.3,
"learning_rate": 1.1539330071509222e-05,
"loss": 0.0974,
"step": 112400
},
{
"epoch": 42.34,
"learning_rate": 1.1531802785095973e-05,
"loss": 0.0963,
"step": 112500
},
{
"epoch": 42.38,
"learning_rate": 1.1524275498682727e-05,
"loss": 0.097,
"step": 112600
},
{
"epoch": 42.42,
"learning_rate": 1.1516748212269478e-05,
"loss": 0.1001,
"step": 112700
},
{
"epoch": 42.45,
"learning_rate": 1.150922092585623e-05,
"loss": 0.0972,
"step": 112800
},
{
"epoch": 42.49,
"learning_rate": 1.1501693639442981e-05,
"loss": 0.0976,
"step": 112900
},
{
"epoch": 42.53,
"learning_rate": 1.1494166353029734e-05,
"loss": 0.0963,
"step": 113000
},
{
"epoch": 42.57,
"learning_rate": 1.1486639066616484e-05,
"loss": 0.0965,
"step": 113100
},
{
"epoch": 42.6,
"learning_rate": 1.1479111780203239e-05,
"loss": 0.0978,
"step": 113200
},
{
"epoch": 42.64,
"learning_rate": 1.147158449378999e-05,
"loss": 0.0996,
"step": 113300
},
{
"epoch": 42.68,
"learning_rate": 1.1464057207376742e-05,
"loss": 0.0965,
"step": 113400
},
{
"epoch": 42.72,
"learning_rate": 1.1456529920963493e-05,
"loss": 0.0964,
"step": 113500
},
{
"epoch": 42.75,
"learning_rate": 1.1449002634550245e-05,
"loss": 0.0979,
"step": 113600
},
{
"epoch": 42.79,
"learning_rate": 1.1441475348136996e-05,
"loss": 0.0982,
"step": 113700
},
{
"epoch": 42.83,
"learning_rate": 1.143394806172375e-05,
"loss": 0.0975,
"step": 113800
},
{
"epoch": 42.87,
"learning_rate": 1.1426420775310501e-05,
"loss": 0.0956,
"step": 113900
},
{
"epoch": 42.91,
"learning_rate": 1.1418893488897254e-05,
"loss": 0.0964,
"step": 114000
},
{
"epoch": 42.94,
"learning_rate": 1.1411366202484005e-05,
"loss": 0.0984,
"step": 114100
},
{
"epoch": 42.98,
"learning_rate": 1.1403838916070757e-05,
"loss": 0.0972,
"step": 114200
},
{
"epoch": 43.0,
"eval_loss": 0.09703872352838516,
"eval_runtime": 45.2306,
"eval_samples_per_second": 165.817,
"eval_steps_per_second": 10.369,
"step": 114251
},
{
"epoch": 43.02,
"learning_rate": 1.1396311629657511e-05,
"loss": 0.0971,
"step": 114300
},
{
"epoch": 43.06,
"learning_rate": 1.1388784343244262e-05,
"loss": 0.0988,
"step": 114400
},
{
"epoch": 43.09,
"learning_rate": 1.1381257056831015e-05,
"loss": 0.0966,
"step": 114500
},
{
"epoch": 43.13,
"learning_rate": 1.1373729770417765e-05,
"loss": 0.0964,
"step": 114600
},
{
"epoch": 43.17,
"learning_rate": 1.1366202484004518e-05,
"loss": 0.0984,
"step": 114700
},
{
"epoch": 43.21,
"learning_rate": 1.1358675197591269e-05,
"loss": 0.0978,
"step": 114800
},
{
"epoch": 43.24,
"learning_rate": 1.1351147911178021e-05,
"loss": 0.0975,
"step": 114900
},
{
"epoch": 43.28,
"learning_rate": 1.1343620624764772e-05,
"loss": 0.0976,
"step": 115000
},
{
"epoch": 43.32,
"learning_rate": 1.1336093338351526e-05,
"loss": 0.0965,
"step": 115100
},
{
"epoch": 43.36,
"learning_rate": 1.1328566051938277e-05,
"loss": 0.0961,
"step": 115200
},
{
"epoch": 43.39,
"learning_rate": 1.132103876552503e-05,
"loss": 0.0964,
"step": 115300
},
{
"epoch": 43.43,
"learning_rate": 1.131351147911178e-05,
"loss": 0.0969,
"step": 115400
},
{
"epoch": 43.47,
"learning_rate": 1.1305984192698533e-05,
"loss": 0.097,
"step": 115500
},
{
"epoch": 43.51,
"learning_rate": 1.1298456906285284e-05,
"loss": 0.0977,
"step": 115600
},
{
"epoch": 43.55,
"learning_rate": 1.1290929619872038e-05,
"loss": 0.0981,
"step": 115700
},
{
"epoch": 43.58,
"learning_rate": 1.1283402333458789e-05,
"loss": 0.0973,
"step": 115800
},
{
"epoch": 43.62,
"learning_rate": 1.1275875047045541e-05,
"loss": 0.0968,
"step": 115900
},
{
"epoch": 43.66,
"learning_rate": 1.1268347760632292e-05,
"loss": 0.0962,
"step": 116000
},
{
"epoch": 43.7,
"learning_rate": 1.1260820474219045e-05,
"loss": 0.098,
"step": 116100
},
{
"epoch": 43.73,
"learning_rate": 1.1253293187805795e-05,
"loss": 0.0974,
"step": 116200
},
{
"epoch": 43.77,
"learning_rate": 1.124576590139255e-05,
"loss": 0.0957,
"step": 116300
},
{
"epoch": 43.81,
"learning_rate": 1.12382386149793e-05,
"loss": 0.0974,
"step": 116400
},
{
"epoch": 43.85,
"learning_rate": 1.1230711328566053e-05,
"loss": 0.096,
"step": 116500
},
{
"epoch": 43.88,
"learning_rate": 1.1223184042152804e-05,
"loss": 0.0979,
"step": 116600
},
{
"epoch": 43.92,
"learning_rate": 1.1215656755739556e-05,
"loss": 0.0958,
"step": 116700
},
{
"epoch": 43.96,
"learning_rate": 1.120812946932631e-05,
"loss": 0.097,
"step": 116800
},
{
"epoch": 44.0,
"learning_rate": 1.1200602182913061e-05,
"loss": 0.0991,
"step": 116900
},
{
"epoch": 44.0,
"eval_loss": 0.09703505784273148,
"eval_runtime": 45.5627,
"eval_samples_per_second": 164.608,
"eval_steps_per_second": 10.294,
"step": 116908
},
{
"epoch": 44.03,
"learning_rate": 1.1193074896499814e-05,
"loss": 0.0984,
"step": 117000
},
{
"epoch": 44.07,
"learning_rate": 1.1185547610086565e-05,
"loss": 0.0965,
"step": 117100
},
{
"epoch": 44.11,
"learning_rate": 1.1178020323673317e-05,
"loss": 0.0957,
"step": 117200
},
{
"epoch": 44.15,
"learning_rate": 1.1170493037260068e-05,
"loss": 0.0971,
"step": 117300
},
{
"epoch": 44.19,
"learning_rate": 1.1162965750846822e-05,
"loss": 0.096,
"step": 117400
},
{
"epoch": 44.22,
"learning_rate": 1.1155438464433573e-05,
"loss": 0.0959,
"step": 117500
},
{
"epoch": 44.26,
"learning_rate": 1.1147911178020326e-05,
"loss": 0.0965,
"step": 117600
},
{
"epoch": 44.3,
"learning_rate": 1.1140383891607076e-05,
"loss": 0.0987,
"step": 117700
},
{
"epoch": 44.34,
"learning_rate": 1.1132856605193829e-05,
"loss": 0.0972,
"step": 117800
},
{
"epoch": 44.37,
"learning_rate": 1.112532931878058e-05,
"loss": 0.0962,
"step": 117900
},
{
"epoch": 44.41,
"learning_rate": 1.1117802032367332e-05,
"loss": 0.0956,
"step": 118000
},
{
"epoch": 44.45,
"learning_rate": 1.1110274745954083e-05,
"loss": 0.0974,
"step": 118100
},
{
"epoch": 44.49,
"learning_rate": 1.1102747459540837e-05,
"loss": 0.0973,
"step": 118200
},
{
"epoch": 44.52,
"learning_rate": 1.1095220173127588e-05,
"loss": 0.0952,
"step": 118300
},
{
"epoch": 44.56,
"learning_rate": 1.108769288671434e-05,
"loss": 0.0968,
"step": 118400
},
{
"epoch": 44.6,
"learning_rate": 1.1080165600301091e-05,
"loss": 0.0971,
"step": 118500
},
{
"epoch": 44.64,
"learning_rate": 1.1072638313887844e-05,
"loss": 0.1,
"step": 118600
},
{
"epoch": 44.67,
"learning_rate": 1.1065111027474595e-05,
"loss": 0.0966,
"step": 118700
},
{
"epoch": 44.71,
"learning_rate": 1.1057583741061349e-05,
"loss": 0.0973,
"step": 118800
},
{
"epoch": 44.75,
"learning_rate": 1.10500564546481e-05,
"loss": 0.0957,
"step": 118900
},
{
"epoch": 44.79,
"learning_rate": 1.1042529168234852e-05,
"loss": 0.0964,
"step": 119000
},
{
"epoch": 44.82,
"learning_rate": 1.1035001881821603e-05,
"loss": 0.0954,
"step": 119100
},
{
"epoch": 44.86,
"learning_rate": 1.1027474595408356e-05,
"loss": 0.0971,
"step": 119200
},
{
"epoch": 44.9,
"learning_rate": 1.101994730899511e-05,
"loss": 0.0973,
"step": 119300
},
{
"epoch": 44.94,
"learning_rate": 1.101242002258186e-05,
"loss": 0.0975,
"step": 119400
},
{
"epoch": 44.98,
"learning_rate": 1.1004892736168613e-05,
"loss": 0.0979,
"step": 119500
},
{
"epoch": 45.0,
"eval_loss": 0.09719178825616837,
"eval_runtime": 45.3083,
"eval_samples_per_second": 165.532,
"eval_steps_per_second": 10.351,
"step": 119565
},
{
"epoch": 45.01,
"learning_rate": 1.0997365449755364e-05,
"loss": 0.0972,
"step": 119600
},
{
"epoch": 45.05,
"learning_rate": 1.0989838163342116e-05,
"loss": 0.0977,
"step": 119700
},
{
"epoch": 45.09,
"learning_rate": 1.0982310876928867e-05,
"loss": 0.0973,
"step": 119800
},
{
"epoch": 45.13,
"learning_rate": 1.0974783590515621e-05,
"loss": 0.0954,
"step": 119900
},
{
"epoch": 45.16,
"learning_rate": 1.0967256304102372e-05,
"loss": 0.0969,
"step": 120000
},
{
"epoch": 45.2,
"learning_rate": 1.0959729017689125e-05,
"loss": 0.0958,
"step": 120100
},
{
"epoch": 45.24,
"learning_rate": 1.0952201731275876e-05,
"loss": 0.0964,
"step": 120200
},
{
"epoch": 45.28,
"learning_rate": 1.0944674444862628e-05,
"loss": 0.0961,
"step": 120300
},
{
"epoch": 45.31,
"learning_rate": 1.0937147158449379e-05,
"loss": 0.0968,
"step": 120400
},
{
"epoch": 45.35,
"learning_rate": 1.0929619872036133e-05,
"loss": 0.0973,
"step": 120500
},
{
"epoch": 45.39,
"learning_rate": 1.0922092585622884e-05,
"loss": 0.0983,
"step": 120600
},
{
"epoch": 45.43,
"learning_rate": 1.0914565299209637e-05,
"loss": 0.0979,
"step": 120700
},
{
"epoch": 45.46,
"learning_rate": 1.0907038012796387e-05,
"loss": 0.0959,
"step": 120800
},
{
"epoch": 45.5,
"learning_rate": 1.089951072638314e-05,
"loss": 0.0967,
"step": 120900
},
{
"epoch": 45.54,
"learning_rate": 1.089198343996989e-05,
"loss": 0.0978,
"step": 121000
},
{
"epoch": 45.58,
"learning_rate": 1.0884456153556643e-05,
"loss": 0.0966,
"step": 121100
},
{
"epoch": 45.62,
"learning_rate": 1.0876928867143394e-05,
"loss": 0.0967,
"step": 121200
},
{
"epoch": 45.65,
"learning_rate": 1.0869401580730148e-05,
"loss": 0.0981,
"step": 121300
},
{
"epoch": 45.69,
"learning_rate": 1.0861874294316899e-05,
"loss": 0.0955,
"step": 121400
},
{
"epoch": 45.73,
"learning_rate": 1.0854347007903652e-05,
"loss": 0.0981,
"step": 121500
},
{
"epoch": 45.77,
"learning_rate": 1.0846819721490402e-05,
"loss": 0.0963,
"step": 121600
},
{
"epoch": 45.8,
"learning_rate": 1.0839292435077155e-05,
"loss": 0.096,
"step": 121700
},
{
"epoch": 45.84,
"learning_rate": 1.0831765148663909e-05,
"loss": 0.0969,
"step": 121800
},
{
"epoch": 45.88,
"learning_rate": 1.082423786225066e-05,
"loss": 0.0959,
"step": 121900
},
{
"epoch": 45.92,
"learning_rate": 1.0816710575837412e-05,
"loss": 0.0969,
"step": 122000
},
{
"epoch": 45.95,
"learning_rate": 1.0809183289424163e-05,
"loss": 0.0968,
"step": 122100
},
{
"epoch": 45.99,
"learning_rate": 1.0801656003010916e-05,
"loss": 0.097,
"step": 122200
},
{
"epoch": 46.0,
"eval_loss": 0.09704454988241196,
"eval_runtime": 45.4128,
"eval_samples_per_second": 165.152,
"eval_steps_per_second": 10.327,
"step": 122222
},
{
"epoch": 46.03,
"learning_rate": 1.0794128716597667e-05,
"loss": 0.0977,
"step": 122300
},
{
"epoch": 46.07,
"learning_rate": 1.078660143018442e-05,
"loss": 0.0966,
"step": 122400
},
{
"epoch": 46.1,
"learning_rate": 1.0779074143771172e-05,
"loss": 0.0972,
"step": 122500
},
{
"epoch": 46.14,
"learning_rate": 1.0771546857357924e-05,
"loss": 0.0965,
"step": 122600
},
{
"epoch": 46.18,
"learning_rate": 1.0764019570944675e-05,
"loss": 0.0959,
"step": 122700
},
{
"epoch": 46.22,
"learning_rate": 1.0756492284531427e-05,
"loss": 0.0955,
"step": 122800
},
{
"epoch": 46.26,
"learning_rate": 1.0748964998118178e-05,
"loss": 0.0963,
"step": 122900
},
{
"epoch": 46.29,
"learning_rate": 1.0741437711704932e-05,
"loss": 0.0965,
"step": 123000
},
{
"epoch": 46.33,
"learning_rate": 1.0733910425291683e-05,
"loss": 0.0966,
"step": 123100
},
{
"epoch": 46.37,
"learning_rate": 1.0726383138878436e-05,
"loss": 0.0959,
"step": 123200
},
{
"epoch": 46.41,
"learning_rate": 1.0718855852465187e-05,
"loss": 0.0975,
"step": 123300
},
{
"epoch": 46.44,
"learning_rate": 1.0711328566051939e-05,
"loss": 0.0957,
"step": 123400
},
{
"epoch": 46.48,
"learning_rate": 1.070380127963869e-05,
"loss": 0.0966,
"step": 123500
},
{
"epoch": 46.52,
"learning_rate": 1.0696273993225444e-05,
"loss": 0.0973,
"step": 123600
},
{
"epoch": 46.56,
"learning_rate": 1.0688746706812195e-05,
"loss": 0.0979,
"step": 123700
},
{
"epoch": 46.59,
"learning_rate": 1.0681219420398947e-05,
"loss": 0.0983,
"step": 123800
},
{
"epoch": 46.63,
"learning_rate": 1.0673692133985698e-05,
"loss": 0.0969,
"step": 123900
},
{
"epoch": 46.67,
"learning_rate": 1.066616484757245e-05,
"loss": 0.097,
"step": 124000
},
{
"epoch": 46.71,
"learning_rate": 1.0658637561159202e-05,
"loss": 0.0969,
"step": 124100
},
{
"epoch": 46.74,
"learning_rate": 1.0651110274745954e-05,
"loss": 0.0967,
"step": 124200
},
{
"epoch": 46.78,
"learning_rate": 1.0643582988332708e-05,
"loss": 0.0955,
"step": 124300
},
{
"epoch": 46.82,
"learning_rate": 1.063605570191946e-05,
"loss": 0.0967,
"step": 124400
},
{
"epoch": 46.86,
"learning_rate": 1.0628528415506212e-05,
"loss": 0.0972,
"step": 124500
},
{
"epoch": 46.89,
"learning_rate": 1.0621001129092963e-05,
"loss": 0.0975,
"step": 124600
},
{
"epoch": 46.93,
"learning_rate": 1.0613473842679715e-05,
"loss": 0.0956,
"step": 124700
},
{
"epoch": 46.97,
"learning_rate": 1.0605946556266466e-05,
"loss": 0.0936,
"step": 124800
},
{
"epoch": 47.0,
"eval_loss": 0.096713587641716,
"eval_runtime": 45.4283,
"eval_samples_per_second": 165.095,
"eval_steps_per_second": 10.324,
"step": 124879
},
{
"epoch": 47.01,
"learning_rate": 1.059841926985322e-05,
"loss": 0.0966,
"step": 124900
},
{
"epoch": 47.05,
"learning_rate": 1.0590891983439971e-05,
"loss": 0.0958,
"step": 125000
},
{
"epoch": 47.08,
"learning_rate": 1.0583364697026723e-05,
"loss": 0.0961,
"step": 125100
},
{
"epoch": 47.12,
"learning_rate": 1.0575837410613474e-05,
"loss": 0.0955,
"step": 125200
},
{
"epoch": 47.16,
"learning_rate": 1.0568310124200227e-05,
"loss": 0.0958,
"step": 125300
},
{
"epoch": 47.2,
"learning_rate": 1.0560782837786978e-05,
"loss": 0.0961,
"step": 125400
},
{
"epoch": 47.23,
"learning_rate": 1.0553255551373732e-05,
"loss": 0.0959,
"step": 125500
},
{
"epoch": 47.27,
"learning_rate": 1.0545728264960483e-05,
"loss": 0.0954,
"step": 125600
},
{
"epoch": 47.31,
"learning_rate": 1.0538200978547235e-05,
"loss": 0.0992,
"step": 125700
},
{
"epoch": 47.35,
"learning_rate": 1.0530673692133986e-05,
"loss": 0.0967,
"step": 125800
},
{
"epoch": 47.38,
"learning_rate": 1.0523146405720738e-05,
"loss": 0.0957,
"step": 125900
},
{
"epoch": 47.42,
"learning_rate": 1.051561911930749e-05,
"loss": 0.0963,
"step": 126000
},
{
"epoch": 47.46,
"learning_rate": 1.0508091832894243e-05,
"loss": 0.0959,
"step": 126100
},
{
"epoch": 47.5,
"learning_rate": 1.0500564546480994e-05,
"loss": 0.0956,
"step": 126200
},
{
"epoch": 47.53,
"learning_rate": 1.0493037260067747e-05,
"loss": 0.0976,
"step": 126300
},
{
"epoch": 47.57,
"learning_rate": 1.0485509973654498e-05,
"loss": 0.0981,
"step": 126400
},
{
"epoch": 47.61,
"learning_rate": 1.047798268724125e-05,
"loss": 0.0982,
"step": 126500
},
{
"epoch": 47.65,
"learning_rate": 1.0470455400828001e-05,
"loss": 0.0959,
"step": 126600
},
{
"epoch": 47.69,
"learning_rate": 1.0462928114414755e-05,
"loss": 0.098,
"step": 126700
},
{
"epoch": 47.72,
"learning_rate": 1.0455400828001508e-05,
"loss": 0.0958,
"step": 126800
},
{
"epoch": 47.76,
"learning_rate": 1.0447873541588258e-05,
"loss": 0.0977,
"step": 126900
},
{
"epoch": 47.8,
"learning_rate": 1.0440346255175011e-05,
"loss": 0.0944,
"step": 127000
},
{
"epoch": 47.84,
"learning_rate": 1.0432818968761762e-05,
"loss": 0.0953,
"step": 127100
},
{
"epoch": 47.87,
"learning_rate": 1.0425291682348514e-05,
"loss": 0.0949,
"step": 127200
},
{
"epoch": 47.91,
"learning_rate": 1.0417764395935265e-05,
"loss": 0.0957,
"step": 127300
},
{
"epoch": 47.95,
"learning_rate": 1.041023710952202e-05,
"loss": 0.0976,
"step": 127400
},
{
"epoch": 47.99,
"learning_rate": 1.040270982310877e-05,
"loss": 0.0948,
"step": 127500
},
{
"epoch": 48.0,
"eval_loss": 0.0966743603348732,
"eval_runtime": 45.5618,
"eval_samples_per_second": 164.611,
"eval_steps_per_second": 10.294,
"step": 127536
},
{
"epoch": 48.02,
"learning_rate": 1.0395182536695523e-05,
"loss": 0.096,
"step": 127600
},
{
"epoch": 48.06,
"learning_rate": 1.0387655250282273e-05,
"loss": 0.0958,
"step": 127700
},
{
"epoch": 48.1,
"learning_rate": 1.0380127963869026e-05,
"loss": 0.0963,
"step": 127800
},
{
"epoch": 48.14,
"learning_rate": 1.0372600677455777e-05,
"loss": 0.095,
"step": 127900
},
{
"epoch": 48.17,
"learning_rate": 1.0365073391042531e-05,
"loss": 0.0973,
"step": 128000
},
{
"epoch": 48.21,
"learning_rate": 1.0357546104629282e-05,
"loss": 0.0958,
"step": 128100
},
{
"epoch": 48.25,
"learning_rate": 1.0350018818216034e-05,
"loss": 0.094,
"step": 128200
},
{
"epoch": 48.29,
"learning_rate": 1.0342491531802785e-05,
"loss": 0.0965,
"step": 128300
},
{
"epoch": 48.33,
"learning_rate": 1.0334964245389538e-05,
"loss": 0.0962,
"step": 128400
},
{
"epoch": 48.36,
"learning_rate": 1.0327436958976289e-05,
"loss": 0.0973,
"step": 128500
},
{
"epoch": 48.4,
"learning_rate": 1.0319909672563043e-05,
"loss": 0.0966,
"step": 128600
},
{
"epoch": 48.44,
"learning_rate": 1.0312382386149794e-05,
"loss": 0.0967,
"step": 128700
},
{
"epoch": 48.48,
"learning_rate": 1.0304855099736546e-05,
"loss": 0.0983,
"step": 128800
},
{
"epoch": 48.51,
"learning_rate": 1.0297327813323297e-05,
"loss": 0.0953,
"step": 128900
},
{
"epoch": 48.55,
"learning_rate": 1.028980052691005e-05,
"loss": 0.0951,
"step": 129000
},
{
"epoch": 48.59,
"learning_rate": 1.02822732404968e-05,
"loss": 0.0988,
"step": 129100
},
{
"epoch": 48.63,
"learning_rate": 1.0274745954083554e-05,
"loss": 0.095,
"step": 129200
},
{
"epoch": 48.66,
"learning_rate": 1.0267218667670307e-05,
"loss": 0.0956,
"step": 129300
},
{
"epoch": 48.7,
"learning_rate": 1.0259691381257058e-05,
"loss": 0.0962,
"step": 129400
},
{
"epoch": 48.74,
"learning_rate": 1.025216409484381e-05,
"loss": 0.0964,
"step": 129500
},
{
"epoch": 48.78,
"learning_rate": 1.0244636808430561e-05,
"loss": 0.0952,
"step": 129600
},
{
"epoch": 48.81,
"learning_rate": 1.0237109522017315e-05,
"loss": 0.0959,
"step": 129700
},
{
"epoch": 48.85,
"learning_rate": 1.0229582235604066e-05,
"loss": 0.0965,
"step": 129800
},
{
"epoch": 48.89,
"learning_rate": 1.0222054949190819e-05,
"loss": 0.0949,
"step": 129900
},
{
"epoch": 48.93,
"learning_rate": 1.021452766277757e-05,
"loss": 0.0966,
"step": 130000
},
{
"epoch": 48.96,
"learning_rate": 1.0207000376364322e-05,
"loss": 0.0974,
"step": 130100
},
{
"epoch": 49.0,
"eval_loss": 0.09543051570653915,
"eval_runtime": 45.608,
"eval_samples_per_second": 164.445,
"eval_steps_per_second": 10.283,
"step": 130193
},
{
"epoch": 49.0,
"learning_rate": 1.0199473089951073e-05,
"loss": 0.0959,
"step": 130200
},
{
"epoch": 49.04,
"learning_rate": 1.0191945803537825e-05,
"loss": 0.0962,
"step": 130300
},
{
"epoch": 49.08,
"learning_rate": 1.0184418517124576e-05,
"loss": 0.0954,
"step": 130400
},
{
"epoch": 49.12,
"learning_rate": 1.017689123071133e-05,
"loss": 0.0959,
"step": 130500
},
{
"epoch": 49.15,
"learning_rate": 1.0169363944298081e-05,
"loss": 0.0949,
"step": 130600
},
{
"epoch": 49.19,
"learning_rate": 1.0161836657884834e-05,
"loss": 0.0967,
"step": 130700
},
{
"epoch": 49.23,
"learning_rate": 1.0154309371471584e-05,
"loss": 0.0965,
"step": 130800
},
{
"epoch": 49.27,
"learning_rate": 1.0146782085058337e-05,
"loss": 0.0963,
"step": 130900
},
{
"epoch": 49.3,
"learning_rate": 1.0139254798645088e-05,
"loss": 0.0956,
"step": 131000
},
{
"epoch": 49.34,
"learning_rate": 1.0131727512231842e-05,
"loss": 0.0959,
"step": 131100
},
{
"epoch": 49.38,
"learning_rate": 1.0124200225818593e-05,
"loss": 0.0958,
"step": 131200
},
{
"epoch": 49.42,
"learning_rate": 1.0116672939405345e-05,
"loss": 0.0974,
"step": 131300
},
{
"epoch": 49.45,
"learning_rate": 1.0109145652992096e-05,
"loss": 0.0984,
"step": 131400
},
{
"epoch": 49.49,
"learning_rate": 1.0101618366578849e-05,
"loss": 0.0946,
"step": 131500
},
{
"epoch": 49.53,
"learning_rate": 1.00940910801656e-05,
"loss": 0.0967,
"step": 131600
},
{
"epoch": 49.57,
"learning_rate": 1.0086563793752354e-05,
"loss": 0.0968,
"step": 131700
},
{
"epoch": 49.6,
"learning_rate": 1.0079036507339106e-05,
"loss": 0.0961,
"step": 131800
},
{
"epoch": 49.64,
"learning_rate": 1.0071509220925857e-05,
"loss": 0.0946,
"step": 131900
},
{
"epoch": 49.68,
"learning_rate": 1.006398193451261e-05,
"loss": 0.0959,
"step": 132000
},
{
"epoch": 49.72,
"learning_rate": 1.005645464809936e-05,
"loss": 0.0959,
"step": 132100
},
{
"epoch": 49.76,
"learning_rate": 1.0048927361686115e-05,
"loss": 0.0974,
"step": 132200
},
{
"epoch": 49.79,
"learning_rate": 1.0041400075272865e-05,
"loss": 0.095,
"step": 132300
},
{
"epoch": 49.83,
"learning_rate": 1.0033872788859618e-05,
"loss": 0.0954,
"step": 132400
},
{
"epoch": 49.87,
"learning_rate": 1.0026345502446369e-05,
"loss": 0.0958,
"step": 132500
},
{
"epoch": 49.91,
"learning_rate": 1.0018818216033121e-05,
"loss": 0.0959,
"step": 132600
},
{
"epoch": 49.94,
"learning_rate": 1.0011290929619872e-05,
"loss": 0.095,
"step": 132700
},
{
"epoch": 49.98,
"learning_rate": 1.0003763643206625e-05,
"loss": 0.0958,
"step": 132800
},
{
"epoch": 50.0,
"eval_loss": 0.09539712220430374,
"eval_runtime": 45.0771,
"eval_samples_per_second": 166.382,
"eval_steps_per_second": 10.404,
"step": 132850
},
{
"epoch": 50.02,
"learning_rate": 9.996236356793377e-06,
"loss": 0.0956,
"step": 132900
},
{
"epoch": 50.06,
"learning_rate": 9.98870907038013e-06,
"loss": 0.0943,
"step": 133000
},
{
"epoch": 50.09,
"learning_rate": 9.98118178396688e-06,
"loss": 0.0958,
"step": 133100
},
{
"epoch": 50.13,
"learning_rate": 9.973654497553633e-06,
"loss": 0.0952,
"step": 133200
},
{
"epoch": 50.17,
"learning_rate": 9.966127211140385e-06,
"loss": 0.0969,
"step": 133300
},
{
"epoch": 50.21,
"learning_rate": 9.958599924727136e-06,
"loss": 0.0952,
"step": 133400
},
{
"epoch": 50.24,
"learning_rate": 9.951072638313889e-06,
"loss": 0.0959,
"step": 133500
},
{
"epoch": 50.28,
"learning_rate": 9.943545351900641e-06,
"loss": 0.0949,
"step": 133600
},
{
"epoch": 50.32,
"learning_rate": 9.936018065487392e-06,
"loss": 0.0964,
"step": 133700
},
{
"epoch": 50.36,
"learning_rate": 9.928490779074145e-06,
"loss": 0.0952,
"step": 133800
},
{
"epoch": 50.4,
"learning_rate": 9.920963492660897e-06,
"loss": 0.0944,
"step": 133900
},
{
"epoch": 50.43,
"learning_rate": 9.913436206247648e-06,
"loss": 0.0973,
"step": 134000
},
{
"epoch": 50.47,
"learning_rate": 9.9059089198344e-06,
"loss": 0.0949,
"step": 134100
},
{
"epoch": 50.51,
"learning_rate": 9.898381633421153e-06,
"loss": 0.096,
"step": 134200
},
{
"epoch": 50.55,
"learning_rate": 9.890854347007904e-06,
"loss": 0.0962,
"step": 134300
},
{
"epoch": 50.58,
"learning_rate": 9.883327060594656e-06,
"loss": 0.0972,
"step": 134400
},
{
"epoch": 50.62,
"learning_rate": 9.875799774181409e-06,
"loss": 0.0931,
"step": 134500
},
{
"epoch": 50.66,
"learning_rate": 9.86827248776816e-06,
"loss": 0.0961,
"step": 134600
},
{
"epoch": 50.7,
"learning_rate": 9.860745201354912e-06,
"loss": 0.0947,
"step": 134700
},
{
"epoch": 50.73,
"learning_rate": 9.853217914941665e-06,
"loss": 0.0968,
"step": 134800
},
{
"epoch": 50.77,
"learning_rate": 9.845690628528415e-06,
"loss": 0.095,
"step": 134900
},
{
"epoch": 50.81,
"learning_rate": 9.838163342115168e-06,
"loss": 0.0966,
"step": 135000
},
{
"epoch": 50.85,
"learning_rate": 9.83063605570192e-06,
"loss": 0.0966,
"step": 135100
},
{
"epoch": 50.88,
"learning_rate": 9.823108769288671e-06,
"loss": 0.0957,
"step": 135200
},
{
"epoch": 50.92,
"learning_rate": 9.815581482875424e-06,
"loss": 0.0948,
"step": 135300
},
{
"epoch": 50.96,
"learning_rate": 9.808054196462176e-06,
"loss": 0.0942,
"step": 135400
},
{
"epoch": 51.0,
"learning_rate": 9.800526910048929e-06,
"loss": 0.0948,
"step": 135500
},
{
"epoch": 51.0,
"eval_loss": 0.09547575563192368,
"eval_runtime": 45.2525,
"eval_samples_per_second": 165.737,
"eval_steps_per_second": 10.364,
"step": 135507
},
{
"epoch": 51.04,
"learning_rate": 9.792999623635681e-06,
"loss": 0.0973,
"step": 135600
},
{
"epoch": 51.07,
"learning_rate": 9.785472337222432e-06,
"loss": 0.0962,
"step": 135700
},
{
"epoch": 51.11,
"learning_rate": 9.777945050809185e-06,
"loss": 0.0957,
"step": 135800
},
{
"epoch": 51.15,
"learning_rate": 9.770417764395936e-06,
"loss": 0.0953,
"step": 135900
},
{
"epoch": 51.19,
"learning_rate": 9.762890477982688e-06,
"loss": 0.0948,
"step": 136000
},
{
"epoch": 51.22,
"learning_rate": 9.75536319156944e-06,
"loss": 0.0958,
"step": 136100
},
{
"epoch": 51.26,
"learning_rate": 9.747835905156191e-06,
"loss": 0.0956,
"step": 136200
},
{
"epoch": 51.3,
"learning_rate": 9.740308618742944e-06,
"loss": 0.0957,
"step": 136300
},
{
"epoch": 51.34,
"learning_rate": 9.732781332329696e-06,
"loss": 0.0953,
"step": 136400
},
{
"epoch": 51.37,
"learning_rate": 9.725254045916447e-06,
"loss": 0.0942,
"step": 136500
},
{
"epoch": 51.41,
"learning_rate": 9.7177267595032e-06,
"loss": 0.0952,
"step": 136600
},
{
"epoch": 51.45,
"learning_rate": 9.710199473089952e-06,
"loss": 0.0949,
"step": 136700
},
{
"epoch": 51.49,
"learning_rate": 9.702672186676703e-06,
"loss": 0.0961,
"step": 136800
},
{
"epoch": 51.52,
"learning_rate": 9.695144900263456e-06,
"loss": 0.0949,
"step": 136900
},
{
"epoch": 51.56,
"learning_rate": 9.687617613850208e-06,
"loss": 0.0955,
"step": 137000
},
{
"epoch": 51.6,
"learning_rate": 9.680090327436959e-06,
"loss": 0.0954,
"step": 137100
},
{
"epoch": 51.64,
"learning_rate": 9.672563041023711e-06,
"loss": 0.0958,
"step": 137200
},
{
"epoch": 51.67,
"learning_rate": 9.665035754610464e-06,
"loss": 0.0961,
"step": 137300
},
{
"epoch": 51.71,
"learning_rate": 9.657508468197215e-06,
"loss": 0.0971,
"step": 137400
},
{
"epoch": 51.75,
"learning_rate": 9.649981181783967e-06,
"loss": 0.0952,
"step": 137500
},
{
"epoch": 51.79,
"learning_rate": 9.64245389537072e-06,
"loss": 0.0961,
"step": 137600
},
{
"epoch": 51.83,
"learning_rate": 9.63492660895747e-06,
"loss": 0.0963,
"step": 137700
},
{
"epoch": 51.86,
"learning_rate": 9.627399322544223e-06,
"loss": 0.0952,
"step": 137800
},
{
"epoch": 51.9,
"learning_rate": 9.619872036130976e-06,
"loss": 0.0944,
"step": 137900
},
{
"epoch": 51.94,
"learning_rate": 9.612344749717728e-06,
"loss": 0.0952,
"step": 138000
},
{
"epoch": 51.98,
"learning_rate": 9.60481746330448e-06,
"loss": 0.095,
"step": 138100
},
{
"epoch": 52.0,
"eval_loss": 0.0952862873673439,
"eval_runtime": 45.261,
"eval_samples_per_second": 165.706,
"eval_steps_per_second": 10.362,
"step": 138164
},
{
"epoch": 52.01,
"learning_rate": 9.597290176891231e-06,
"loss": 0.0958,
"step": 138200
},
{
"epoch": 52.05,
"learning_rate": 9.589762890477984e-06,
"loss": 0.0955,
"step": 138300
},
{
"epoch": 52.09,
"learning_rate": 9.582235604064737e-06,
"loss": 0.0959,
"step": 138400
},
{
"epoch": 52.13,
"learning_rate": 9.574708317651487e-06,
"loss": 0.0971,
"step": 138500
},
{
"epoch": 52.16,
"learning_rate": 9.56718103123824e-06,
"loss": 0.0952,
"step": 138600
},
{
"epoch": 52.2,
"learning_rate": 9.559653744824992e-06,
"loss": 0.0955,
"step": 138700
},
{
"epoch": 52.24,
"learning_rate": 9.552126458411743e-06,
"loss": 0.0946,
"step": 138800
},
{
"epoch": 52.28,
"learning_rate": 9.544599171998496e-06,
"loss": 0.0965,
"step": 138900
},
{
"epoch": 52.31,
"learning_rate": 9.537071885585247e-06,
"loss": 0.0941,
"step": 139000
},
{
"epoch": 52.35,
"learning_rate": 9.529544599171999e-06,
"loss": 0.096,
"step": 139100
},
{
"epoch": 52.39,
"learning_rate": 9.522017312758752e-06,
"loss": 0.0928,
"step": 139200
},
{
"epoch": 52.43,
"learning_rate": 9.514490026345502e-06,
"loss": 0.096,
"step": 139300
},
{
"epoch": 52.47,
"learning_rate": 9.506962739932255e-06,
"loss": 0.0953,
"step": 139400
},
{
"epoch": 52.5,
"learning_rate": 9.499435453519007e-06,
"loss": 0.0943,
"step": 139500
},
{
"epoch": 52.54,
"learning_rate": 9.491908167105758e-06,
"loss": 0.0948,
"step": 139600
},
{
"epoch": 52.58,
"learning_rate": 9.48438088069251e-06,
"loss": 0.0953,
"step": 139700
},
{
"epoch": 52.62,
"learning_rate": 9.476853594279263e-06,
"loss": 0.0952,
"step": 139800
},
{
"epoch": 52.65,
"learning_rate": 9.469326307866014e-06,
"loss": 0.0934,
"step": 139900
},
{
"epoch": 52.69,
"learning_rate": 9.461799021452767e-06,
"loss": 0.0965,
"step": 140000
},
{
"epoch": 52.73,
"learning_rate": 9.454271735039519e-06,
"loss": 0.0962,
"step": 140100
},
{
"epoch": 52.77,
"learning_rate": 9.44674444862627e-06,
"loss": 0.0954,
"step": 140200
},
{
"epoch": 52.8,
"learning_rate": 9.439217162213024e-06,
"loss": 0.095,
"step": 140300
},
{
"epoch": 52.84,
"learning_rate": 9.431689875799775e-06,
"loss": 0.0941,
"step": 140400
},
{
"epoch": 52.88,
"learning_rate": 9.424162589386527e-06,
"loss": 0.0953,
"step": 140500
},
{
"epoch": 52.92,
"learning_rate": 9.41663530297328e-06,
"loss": 0.0963,
"step": 140600
},
{
"epoch": 52.95,
"learning_rate": 9.40910801656003e-06,
"loss": 0.0958,
"step": 140700
},
{
"epoch": 52.99,
"learning_rate": 9.401580730146783e-06,
"loss": 0.0939,
"step": 140800
},
{
"epoch": 53.0,
"eval_loss": 0.09453196078538895,
"eval_runtime": 45.2882,
"eval_samples_per_second": 165.606,
"eval_steps_per_second": 10.356,
"step": 140821
},
{
"epoch": 53.03,
"learning_rate": 9.394053443733536e-06,
"loss": 0.095,
"step": 140900
},
{
"epoch": 53.07,
"learning_rate": 9.386526157320287e-06,
"loss": 0.0955,
"step": 141000
},
{
"epoch": 53.11,
"learning_rate": 9.378998870907039e-06,
"loss": 0.0944,
"step": 141100
},
{
"epoch": 53.14,
"learning_rate": 9.371471584493792e-06,
"loss": 0.0953,
"step": 141200
},
{
"epoch": 53.18,
"learning_rate": 9.363944298080542e-06,
"loss": 0.0945,
"step": 141300
},
{
"epoch": 53.22,
"learning_rate": 9.356417011667295e-06,
"loss": 0.0959,
"step": 141400
},
{
"epoch": 53.26,
"learning_rate": 9.348889725254047e-06,
"loss": 0.0938,
"step": 141500
},
{
"epoch": 53.29,
"learning_rate": 9.341362438840798e-06,
"loss": 0.0956,
"step": 141600
},
{
"epoch": 53.33,
"learning_rate": 9.33383515242755e-06,
"loss": 0.0962,
"step": 141700
},
{
"epoch": 53.37,
"learning_rate": 9.326307866014303e-06,
"loss": 0.0959,
"step": 141800
},
{
"epoch": 53.41,
"learning_rate": 9.318780579601054e-06,
"loss": 0.0953,
"step": 141900
},
{
"epoch": 53.44,
"learning_rate": 9.311253293187807e-06,
"loss": 0.0952,
"step": 142000
},
{
"epoch": 53.48,
"learning_rate": 9.303726006774557e-06,
"loss": 0.0945,
"step": 142100
},
{
"epoch": 53.52,
"learning_rate": 9.29619872036131e-06,
"loss": 0.0958,
"step": 142200
},
{
"epoch": 53.56,
"learning_rate": 9.288671433948063e-06,
"loss": 0.0949,
"step": 142300
},
{
"epoch": 53.59,
"learning_rate": 9.281144147534813e-06,
"loss": 0.0943,
"step": 142400
},
{
"epoch": 53.63,
"learning_rate": 9.273616861121566e-06,
"loss": 0.0957,
"step": 142500
},
{
"epoch": 53.67,
"learning_rate": 9.266089574708318e-06,
"loss": 0.0949,
"step": 142600
},
{
"epoch": 53.71,
"learning_rate": 9.25856228829507e-06,
"loss": 0.0939,
"step": 142700
},
{
"epoch": 53.74,
"learning_rate": 9.251035001881823e-06,
"loss": 0.0942,
"step": 142800
},
{
"epoch": 53.78,
"learning_rate": 9.243507715468574e-06,
"loss": 0.0955,
"step": 142900
},
{
"epoch": 53.82,
"learning_rate": 9.235980429055327e-06,
"loss": 0.0935,
"step": 143000
},
{
"epoch": 53.86,
"learning_rate": 9.22845314264208e-06,
"loss": 0.0952,
"step": 143100
},
{
"epoch": 53.9,
"learning_rate": 9.22092585622883e-06,
"loss": 0.0946,
"step": 143200
},
{
"epoch": 53.93,
"learning_rate": 9.213398569815583e-06,
"loss": 0.0971,
"step": 143300
},
{
"epoch": 53.97,
"learning_rate": 9.205871283402335e-06,
"loss": 0.0961,
"step": 143400
},
{
"epoch": 54.0,
"eval_loss": 0.09483154118061066,
"eval_runtime": 45.4413,
"eval_samples_per_second": 165.048,
"eval_steps_per_second": 10.321,
"step": 143478
},
{
"epoch": 54.01,
"learning_rate": 9.198343996989086e-06,
"loss": 0.0943,
"step": 143500
},
{
"epoch": 54.05,
"learning_rate": 9.190816710575838e-06,
"loss": 0.097,
"step": 143600
},
{
"epoch": 54.08,
"learning_rate": 9.183289424162591e-06,
"loss": 0.0956,
"step": 143700
},
{
"epoch": 54.12,
"learning_rate": 9.175762137749342e-06,
"loss": 0.0945,
"step": 143800
},
{
"epoch": 54.16,
"learning_rate": 9.168234851336094e-06,
"loss": 0.0968,
"step": 143900
},
{
"epoch": 54.2,
"learning_rate": 9.160707564922847e-06,
"loss": 0.0955,
"step": 144000
},
{
"epoch": 54.23,
"learning_rate": 9.153180278509598e-06,
"loss": 0.0952,
"step": 144100
},
{
"epoch": 54.27,
"learning_rate": 9.14565299209635e-06,
"loss": 0.0941,
"step": 144200
},
{
"epoch": 54.31,
"learning_rate": 9.138125705683103e-06,
"loss": 0.0935,
"step": 144300
},
{
"epoch": 54.35,
"learning_rate": 9.130598419269853e-06,
"loss": 0.0949,
"step": 144400
},
{
"epoch": 54.38,
"learning_rate": 9.123071132856606e-06,
"loss": 0.0957,
"step": 144500
},
{
"epoch": 54.42,
"learning_rate": 9.115543846443358e-06,
"loss": 0.0958,
"step": 144600
},
{
"epoch": 54.46,
"learning_rate": 9.10801656003011e-06,
"loss": 0.0932,
"step": 144700
},
{
"epoch": 54.5,
"learning_rate": 9.100489273616862e-06,
"loss": 0.0945,
"step": 144800
},
{
"epoch": 54.54,
"learning_rate": 9.092961987203613e-06,
"loss": 0.0956,
"step": 144900
},
{
"epoch": 54.57,
"learning_rate": 9.085434700790365e-06,
"loss": 0.0949,
"step": 145000
},
{
"epoch": 54.61,
"learning_rate": 9.077907414377118e-06,
"loss": 0.0942,
"step": 145100
},
{
"epoch": 54.65,
"learning_rate": 9.070380127963868e-06,
"loss": 0.0949,
"step": 145200
},
{
"epoch": 54.69,
"learning_rate": 9.062852841550623e-06,
"loss": 0.0947,
"step": 145300
},
{
"epoch": 54.72,
"learning_rate": 9.055325555137373e-06,
"loss": 0.0952,
"step": 145400
},
{
"epoch": 54.76,
"learning_rate": 9.047798268724126e-06,
"loss": 0.096,
"step": 145500
},
{
"epoch": 54.8,
"learning_rate": 9.040270982310879e-06,
"loss": 0.0953,
"step": 145600
},
{
"epoch": 54.84,
"learning_rate": 9.03274369589763e-06,
"loss": 0.094,
"step": 145700
},
{
"epoch": 54.87,
"learning_rate": 9.025216409484382e-06,
"loss": 0.0936,
"step": 145800
},
{
"epoch": 54.91,
"learning_rate": 9.017689123071134e-06,
"loss": 0.0951,
"step": 145900
},
{
"epoch": 54.95,
"learning_rate": 9.010161836657885e-06,
"loss": 0.0959,
"step": 146000
},
{
"epoch": 54.99,
"learning_rate": 9.002634550244638e-06,
"loss": 0.0964,
"step": 146100
},
{
"epoch": 55.0,
"eval_loss": 0.09549073874950409,
"eval_runtime": 45.2457,
"eval_samples_per_second": 165.762,
"eval_steps_per_second": 10.366,
"step": 146135
},
{
"epoch": 55.02,
"learning_rate": 8.99510726383139e-06,
"loss": 0.0963,
"step": 146200
},
{
"epoch": 55.06,
"learning_rate": 8.987579977418141e-06,
"loss": 0.0962,
"step": 146300
},
{
"epoch": 55.1,
"learning_rate": 8.980052691004894e-06,
"loss": 0.0954,
"step": 146400
},
{
"epoch": 55.14,
"learning_rate": 8.972525404591646e-06,
"loss": 0.0934,
"step": 146500
},
{
"epoch": 55.18,
"learning_rate": 8.964998118178397e-06,
"loss": 0.0945,
"step": 146600
},
{
"epoch": 55.21,
"learning_rate": 8.95747083176515e-06,
"loss": 0.0936,
"step": 146700
},
{
"epoch": 55.25,
"learning_rate": 8.949943545351902e-06,
"loss": 0.095,
"step": 146800
},
{
"epoch": 55.29,
"learning_rate": 8.942416258938653e-06,
"loss": 0.094,
"step": 146900
},
{
"epoch": 55.33,
"learning_rate": 8.934888972525405e-06,
"loss": 0.0944,
"step": 147000
},
{
"epoch": 55.36,
"learning_rate": 8.927361686112158e-06,
"loss": 0.0947,
"step": 147100
},
{
"epoch": 55.4,
"learning_rate": 8.919834399698909e-06,
"loss": 0.0966,
"step": 147200
},
{
"epoch": 55.44,
"learning_rate": 8.912307113285661e-06,
"loss": 0.0933,
"step": 147300
},
{
"epoch": 55.48,
"learning_rate": 8.904779826872414e-06,
"loss": 0.0939,
"step": 147400
},
{
"epoch": 55.51,
"learning_rate": 8.897252540459164e-06,
"loss": 0.0953,
"step": 147500
},
{
"epoch": 55.55,
"learning_rate": 8.889725254045917e-06,
"loss": 0.0963,
"step": 147600
},
{
"epoch": 55.59,
"learning_rate": 8.88219796763267e-06,
"loss": 0.0947,
"step": 147700
},
{
"epoch": 55.63,
"learning_rate": 8.874670681219422e-06,
"loss": 0.0933,
"step": 147800
},
{
"epoch": 55.66,
"learning_rate": 8.867143394806173e-06,
"loss": 0.0951,
"step": 147900
},
{
"epoch": 55.7,
"learning_rate": 8.859616108392925e-06,
"loss": 0.0955,
"step": 148000
},
{
"epoch": 55.74,
"learning_rate": 8.852088821979678e-06,
"loss": 0.0943,
"step": 148100
},
{
"epoch": 55.78,
"learning_rate": 8.844561535566429e-06,
"loss": 0.0942,
"step": 148200
},
{
"epoch": 55.81,
"learning_rate": 8.837034249153181e-06,
"loss": 0.0962,
"step": 148300
},
{
"epoch": 55.85,
"learning_rate": 8.829506962739934e-06,
"loss": 0.0937,
"step": 148400
},
{
"epoch": 55.89,
"learning_rate": 8.821979676326684e-06,
"loss": 0.0944,
"step": 148500
},
{
"epoch": 55.93,
"learning_rate": 8.814452389913437e-06,
"loss": 0.0937,
"step": 148600
},
{
"epoch": 55.97,
"learning_rate": 8.80692510350019e-06,
"loss": 0.0934,
"step": 148700
},
{
"epoch": 56.0,
"eval_loss": 0.0948183611035347,
"eval_runtime": 44.9888,
"eval_samples_per_second": 166.708,
"eval_steps_per_second": 10.425,
"step": 148792
},
{
"epoch": 56.0,
"learning_rate": 8.79939781708694e-06,
"loss": 0.0939,
"step": 148800
},
{
"epoch": 56.04,
"learning_rate": 8.791870530673693e-06,
"loss": 0.0966,
"step": 148900
},
{
"epoch": 56.08,
"learning_rate": 8.784343244260445e-06,
"loss": 0.0951,
"step": 149000
},
{
"epoch": 56.12,
"learning_rate": 8.776815957847196e-06,
"loss": 0.0955,
"step": 149100
},
{
"epoch": 56.15,
"learning_rate": 8.769288671433949e-06,
"loss": 0.0959,
"step": 149200
},
{
"epoch": 56.19,
"learning_rate": 8.761761385020701e-06,
"loss": 0.0949,
"step": 149300
},
{
"epoch": 56.23,
"learning_rate": 8.754234098607452e-06,
"loss": 0.0938,
"step": 149400
},
{
"epoch": 56.27,
"learning_rate": 8.746706812194205e-06,
"loss": 0.0941,
"step": 149500
},
{
"epoch": 56.3,
"learning_rate": 8.739179525780957e-06,
"loss": 0.0939,
"step": 149600
},
{
"epoch": 56.34,
"learning_rate": 8.731652239367708e-06,
"loss": 0.0944,
"step": 149700
},
{
"epoch": 56.38,
"learning_rate": 8.72412495295446e-06,
"loss": 0.0949,
"step": 149800
},
{
"epoch": 56.42,
"learning_rate": 8.716597666541213e-06,
"loss": 0.0952,
"step": 149900
},
{
"epoch": 56.45,
"learning_rate": 8.709070380127964e-06,
"loss": 0.0967,
"step": 150000
},
{
"epoch": 56.49,
"learning_rate": 8.701543093714716e-06,
"loss": 0.0948,
"step": 150100
},
{
"epoch": 56.53,
"learning_rate": 8.694015807301469e-06,
"loss": 0.0942,
"step": 150200
},
{
"epoch": 56.57,
"learning_rate": 8.686488520888221e-06,
"loss": 0.0948,
"step": 150300
},
{
"epoch": 56.61,
"learning_rate": 8.678961234474974e-06,
"loss": 0.0954,
"step": 150400
},
{
"epoch": 56.64,
"learning_rate": 8.671433948061725e-06,
"loss": 0.0951,
"step": 150500
},
{
"epoch": 56.68,
"learning_rate": 8.663906661648477e-06,
"loss": 0.094,
"step": 150600
},
{
"epoch": 56.72,
"learning_rate": 8.65637937523523e-06,
"loss": 0.094,
"step": 150700
},
{
"epoch": 56.76,
"learning_rate": 8.64885208882198e-06,
"loss": 0.0947,
"step": 150800
},
{
"epoch": 56.79,
"learning_rate": 8.641324802408733e-06,
"loss": 0.0954,
"step": 150900
},
{
"epoch": 56.83,
"learning_rate": 8.633797515995484e-06,
"loss": 0.0941,
"step": 151000
},
{
"epoch": 56.87,
"learning_rate": 8.626270229582236e-06,
"loss": 0.0948,
"step": 151100
},
{
"epoch": 56.91,
"learning_rate": 8.618742943168989e-06,
"loss": 0.0934,
"step": 151200
},
{
"epoch": 56.94,
"learning_rate": 8.61121565675574e-06,
"loss": 0.0927,
"step": 151300
},
{
"epoch": 56.98,
"learning_rate": 8.603688370342492e-06,
"loss": 0.0965,
"step": 151400
},
{
"epoch": 57.0,
"eval_loss": 0.09426940232515335,
"eval_runtime": 45.2417,
"eval_samples_per_second": 165.776,
"eval_steps_per_second": 10.367,
"step": 151449
},
{
"epoch": 57.02,
"learning_rate": 8.596161083929245e-06,
"loss": 0.0941,
"step": 151500
},
{
"epoch": 57.06,
"learning_rate": 8.588633797515995e-06,
"loss": 0.0947,
"step": 151600
},
{
"epoch": 57.09,
"learning_rate": 8.581106511102748e-06,
"loss": 0.0932,
"step": 151700
},
{
"epoch": 57.13,
"learning_rate": 8.5735792246895e-06,
"loss": 0.0947,
"step": 151800
},
{
"epoch": 57.17,
"learning_rate": 8.566051938276251e-06,
"loss": 0.0954,
"step": 151900
},
{
"epoch": 57.21,
"learning_rate": 8.558524651863004e-06,
"loss": 0.0956,
"step": 152000
},
{
"epoch": 57.25,
"learning_rate": 8.550997365449756e-06,
"loss": 0.0939,
"step": 152100
},
{
"epoch": 57.28,
"learning_rate": 8.543470079036507e-06,
"loss": 0.0943,
"step": 152200
},
{
"epoch": 57.32,
"learning_rate": 8.53594279262326e-06,
"loss": 0.0926,
"step": 152300
},
{
"epoch": 57.36,
"learning_rate": 8.528415506210012e-06,
"loss": 0.0936,
"step": 152400
},
{
"epoch": 57.4,
"learning_rate": 8.520888219796763e-06,
"loss": 0.0942,
"step": 152500
},
{
"epoch": 57.43,
"learning_rate": 8.513360933383515e-06,
"loss": 0.0955,
"step": 152600
},
{
"epoch": 57.47,
"learning_rate": 8.505833646970268e-06,
"loss": 0.0955,
"step": 152700
},
{
"epoch": 57.51,
"learning_rate": 8.49830636055702e-06,
"loss": 0.0946,
"step": 152800
},
{
"epoch": 57.55,
"learning_rate": 8.490779074143773e-06,
"loss": 0.0964,
"step": 152900
},
{
"epoch": 57.58,
"learning_rate": 8.483251787730524e-06,
"loss": 0.094,
"step": 153000
},
{
"epoch": 57.62,
"learning_rate": 8.475724501317276e-06,
"loss": 0.0949,
"step": 153100
},
{
"epoch": 57.66,
"learning_rate": 8.468197214904029e-06,
"loss": 0.0947,
"step": 153200
},
{
"epoch": 57.7,
"learning_rate": 8.46066992849078e-06,
"loss": 0.0952,
"step": 153300
},
{
"epoch": 57.73,
"learning_rate": 8.453142642077532e-06,
"loss": 0.0939,
"step": 153400
},
{
"epoch": 57.77,
"learning_rate": 8.445615355664285e-06,
"loss": 0.0947,
"step": 153500
},
{
"epoch": 57.81,
"learning_rate": 8.438088069251036e-06,
"loss": 0.094,
"step": 153600
},
{
"epoch": 57.85,
"learning_rate": 8.430560782837788e-06,
"loss": 0.0922,
"step": 153700
},
{
"epoch": 57.88,
"learning_rate": 8.423033496424539e-06,
"loss": 0.0947,
"step": 153800
},
{
"epoch": 57.92,
"learning_rate": 8.415506210011291e-06,
"loss": 0.0948,
"step": 153900
},
{
"epoch": 57.96,
"learning_rate": 8.407978923598044e-06,
"loss": 0.0921,
"step": 154000
},
{
"epoch": 58.0,
"learning_rate": 8.400451637184795e-06,
"loss": 0.0966,
"step": 154100
},
{
"epoch": 58.0,
"eval_loss": 0.094062440097332,
"eval_runtime": 45.4411,
"eval_samples_per_second": 165.049,
"eval_steps_per_second": 10.321,
"step": 154106
},
{
"epoch": 58.04,
"learning_rate": 8.392924350771547e-06,
"loss": 0.0938,
"step": 154200
},
{
"epoch": 58.07,
"learning_rate": 8.3853970643583e-06,
"loss": 0.0938,
"step": 154300
},
{
"epoch": 58.11,
"learning_rate": 8.37786977794505e-06,
"loss": 0.0936,
"step": 154400
},
{
"epoch": 58.15,
"learning_rate": 8.370342491531803e-06,
"loss": 0.0942,
"step": 154500
},
{
"epoch": 58.19,
"learning_rate": 8.362815205118556e-06,
"loss": 0.0961,
"step": 154600
},
{
"epoch": 58.22,
"learning_rate": 8.355287918705306e-06,
"loss": 0.0945,
"step": 154700
},
{
"epoch": 58.26,
"learning_rate": 8.347760632292059e-06,
"loss": 0.0954,
"step": 154800
},
{
"epoch": 58.3,
"learning_rate": 8.340233345878811e-06,
"loss": 0.0942,
"step": 154900
},
{
"epoch": 58.34,
"learning_rate": 8.332706059465562e-06,
"loss": 0.095,
"step": 155000
},
{
"epoch": 58.37,
"learning_rate": 8.325178773052315e-06,
"loss": 0.0945,
"step": 155100
},
{
"epoch": 58.41,
"learning_rate": 8.317651486639067e-06,
"loss": 0.0937,
"step": 155200
},
{
"epoch": 58.45,
"learning_rate": 8.31012420022582e-06,
"loss": 0.0956,
"step": 155300
},
{
"epoch": 58.49,
"learning_rate": 8.302596913812572e-06,
"loss": 0.0926,
"step": 155400
},
{
"epoch": 58.52,
"learning_rate": 8.295069627399323e-06,
"loss": 0.0936,
"step": 155500
},
{
"epoch": 58.56,
"learning_rate": 8.287542340986076e-06,
"loss": 0.095,
"step": 155600
},
{
"epoch": 58.6,
"learning_rate": 8.280015054572828e-06,
"loss": 0.0933,
"step": 155700
},
{
"epoch": 58.64,
"learning_rate": 8.272487768159579e-06,
"loss": 0.0941,
"step": 155800
},
{
"epoch": 58.68,
"learning_rate": 8.264960481746331e-06,
"loss": 0.0937,
"step": 155900
},
{
"epoch": 58.71,
"learning_rate": 8.257433195333084e-06,
"loss": 0.0948,
"step": 156000
},
{
"epoch": 58.75,
"learning_rate": 8.249905908919835e-06,
"loss": 0.0947,
"step": 156100
},
{
"epoch": 58.79,
"learning_rate": 8.242378622506587e-06,
"loss": 0.0919,
"step": 156200
},
{
"epoch": 58.83,
"learning_rate": 8.23485133609334e-06,
"loss": 0.0956,
"step": 156300
},
{
"epoch": 58.86,
"learning_rate": 8.22732404968009e-06,
"loss": 0.0946,
"step": 156400
},
{
"epoch": 58.9,
"learning_rate": 8.219796763266843e-06,
"loss": 0.0934,
"step": 156500
},
{
"epoch": 58.94,
"learning_rate": 8.212269476853596e-06,
"loss": 0.0953,
"step": 156600
},
{
"epoch": 58.98,
"learning_rate": 8.204742190440347e-06,
"loss": 0.0926,
"step": 156700
},
{
"epoch": 59.0,
"eval_loss": 0.0938277319073677,
"eval_runtime": 45.0217,
"eval_samples_per_second": 166.586,
"eval_steps_per_second": 10.417,
"step": 156763
},
{
"epoch": 59.01,
"learning_rate": 8.197214904027099e-06,
"loss": 0.0939,
"step": 156800
},
{
"epoch": 59.05,
"learning_rate": 8.18968761761385e-06,
"loss": 0.0931,
"step": 156900
},
{
"epoch": 59.09,
"learning_rate": 8.182160331200602e-06,
"loss": 0.0945,
"step": 157000
},
{
"epoch": 59.13,
"learning_rate": 8.174633044787355e-06,
"loss": 0.0941,
"step": 157100
},
{
"epoch": 59.16,
"learning_rate": 8.167105758374106e-06,
"loss": 0.0937,
"step": 157200
},
{
"epoch": 59.2,
"learning_rate": 8.159578471960858e-06,
"loss": 0.0945,
"step": 157300
},
{
"epoch": 59.24,
"learning_rate": 8.15205118554761e-06,
"loss": 0.0949,
"step": 157400
},
{
"epoch": 59.28,
"learning_rate": 8.144523899134362e-06,
"loss": 0.0926,
"step": 157500
},
{
"epoch": 59.32,
"learning_rate": 8.136996612721114e-06,
"loss": 0.0943,
"step": 157600
},
{
"epoch": 59.35,
"learning_rate": 8.129469326307867e-06,
"loss": 0.0929,
"step": 157700
},
{
"epoch": 59.39,
"learning_rate": 8.121942039894619e-06,
"loss": 0.0964,
"step": 157800
},
{
"epoch": 59.43,
"learning_rate": 8.114414753481372e-06,
"loss": 0.0944,
"step": 157900
},
{
"epoch": 59.47,
"learning_rate": 8.106887467068122e-06,
"loss": 0.0943,
"step": 158000
},
{
"epoch": 59.5,
"learning_rate": 8.099360180654875e-06,
"loss": 0.0962,
"step": 158100
},
{
"epoch": 59.54,
"learning_rate": 8.091832894241627e-06,
"loss": 0.0948,
"step": 158200
},
{
"epoch": 59.58,
"learning_rate": 8.084305607828378e-06,
"loss": 0.095,
"step": 158300
},
{
"epoch": 59.62,
"learning_rate": 8.07677832141513e-06,
"loss": 0.0941,
"step": 158400
},
{
"epoch": 59.65,
"learning_rate": 8.069251035001883e-06,
"loss": 0.0949,
"step": 158500
},
{
"epoch": 59.69,
"learning_rate": 8.061723748588634e-06,
"loss": 0.0945,
"step": 158600
},
{
"epoch": 59.73,
"learning_rate": 8.054196462175387e-06,
"loss": 0.0946,
"step": 158700
},
{
"epoch": 59.77,
"learning_rate": 8.046669175762139e-06,
"loss": 0.0948,
"step": 158800
},
{
"epoch": 59.8,
"learning_rate": 8.03914188934889e-06,
"loss": 0.0946,
"step": 158900
},
{
"epoch": 59.84,
"learning_rate": 8.031614602935642e-06,
"loss": 0.0945,
"step": 159000
},
{
"epoch": 59.88,
"learning_rate": 8.024087316522395e-06,
"loss": 0.0944,
"step": 159100
},
{
"epoch": 59.92,
"learning_rate": 8.016560030109146e-06,
"loss": 0.0942,
"step": 159200
},
{
"epoch": 59.95,
"learning_rate": 8.009032743695898e-06,
"loss": 0.0937,
"step": 159300
},
{
"epoch": 59.99,
"learning_rate": 8.00150545728265e-06,
"loss": 0.0928,
"step": 159400
},
{
"epoch": 60.0,
"eval_loss": 0.09416601806879044,
"eval_runtime": 45.0453,
"eval_samples_per_second": 166.499,
"eval_steps_per_second": 10.412,
"step": 159420
},
{
"epoch": 60.03,
"learning_rate": 7.993978170869402e-06,
"loss": 0.0923,
"step": 159500
},
{
"epoch": 60.07,
"learning_rate": 7.986450884456154e-06,
"loss": 0.0956,
"step": 159600
},
{
"epoch": 60.11,
"learning_rate": 7.978923598042907e-06,
"loss": 0.0954,
"step": 159700
},
{
"epoch": 60.14,
"learning_rate": 7.971396311629657e-06,
"loss": 0.0946,
"step": 159800
},
{
"epoch": 60.18,
"learning_rate": 7.96386902521641e-06,
"loss": 0.0952,
"step": 159900
},
{
"epoch": 60.22,
"learning_rate": 7.95634173880316e-06,
"loss": 0.0932,
"step": 160000
},
{
"epoch": 60.26,
"learning_rate": 7.948814452389913e-06,
"loss": 0.0942,
"step": 160100
},
{
"epoch": 60.29,
"learning_rate": 7.941287165976666e-06,
"loss": 0.0923,
"step": 160200
},
{
"epoch": 60.33,
"learning_rate": 7.933759879563418e-06,
"loss": 0.0943,
"step": 160300
},
{
"epoch": 60.37,
"learning_rate": 7.926232593150171e-06,
"loss": 0.0926,
"step": 160400
},
{
"epoch": 60.41,
"learning_rate": 7.918705306736922e-06,
"loss": 0.0944,
"step": 160500
},
{
"epoch": 60.44,
"learning_rate": 7.911178020323674e-06,
"loss": 0.0951,
"step": 160600
},
{
"epoch": 60.48,
"learning_rate": 7.903650733910427e-06,
"loss": 0.0925,
"step": 160700
},
{
"epoch": 60.52,
"learning_rate": 7.896123447497178e-06,
"loss": 0.0938,
"step": 160800
},
{
"epoch": 60.56,
"learning_rate": 7.88859616108393e-06,
"loss": 0.0949,
"step": 160900
},
{
"epoch": 60.59,
"learning_rate": 7.881068874670683e-06,
"loss": 0.0924,
"step": 161000
},
{
"epoch": 60.63,
"learning_rate": 7.873541588257433e-06,
"loss": 0.0945,
"step": 161100
},
{
"epoch": 60.67,
"learning_rate": 7.866014301844186e-06,
"loss": 0.0935,
"step": 161200
},
{
"epoch": 60.71,
"learning_rate": 7.858487015430938e-06,
"loss": 0.095,
"step": 161300
},
{
"epoch": 60.75,
"learning_rate": 7.85095972901769e-06,
"loss": 0.0938,
"step": 161400
},
{
"epoch": 60.78,
"learning_rate": 7.843432442604442e-06,
"loss": 0.0937,
"step": 161500
},
{
"epoch": 60.82,
"learning_rate": 7.835905156191194e-06,
"loss": 0.0942,
"step": 161600
},
{
"epoch": 60.86,
"learning_rate": 7.828377869777945e-06,
"loss": 0.0955,
"step": 161700
},
{
"epoch": 60.9,
"learning_rate": 7.820850583364698e-06,
"loss": 0.0932,
"step": 161800
},
{
"epoch": 60.93,
"learning_rate": 7.81332329695145e-06,
"loss": 0.094,
"step": 161900
},
{
"epoch": 60.97,
"learning_rate": 7.805796010538201e-06,
"loss": 0.093,
"step": 162000
},
{
"epoch": 61.0,
"eval_loss": 0.09355577826499939,
"eval_runtime": 45.0615,
"eval_samples_per_second": 166.439,
"eval_steps_per_second": 10.408,
"step": 162077
},
{
"epoch": 61.01,
"learning_rate": 7.798268724124953e-06,
"loss": 0.0932,
"step": 162100
},
{
"epoch": 61.05,
"learning_rate": 7.790741437711706e-06,
"loss": 0.0942,
"step": 162200
},
{
"epoch": 61.08,
"learning_rate": 7.783214151298457e-06,
"loss": 0.0933,
"step": 162300
},
{
"epoch": 61.12,
"learning_rate": 7.77568686488521e-06,
"loss": 0.0942,
"step": 162400
},
{
"epoch": 61.16,
"learning_rate": 7.768159578471962e-06,
"loss": 0.0938,
"step": 162500
},
{
"epoch": 61.2,
"learning_rate": 7.760632292058713e-06,
"loss": 0.0948,
"step": 162600
},
{
"epoch": 61.23,
"learning_rate": 7.753105005645465e-06,
"loss": 0.0946,
"step": 162700
},
{
"epoch": 61.27,
"learning_rate": 7.745577719232218e-06,
"loss": 0.0926,
"step": 162800
},
{
"epoch": 61.31,
"learning_rate": 7.73805043281897e-06,
"loss": 0.0945,
"step": 162900
},
{
"epoch": 61.35,
"learning_rate": 7.730523146405721e-06,
"loss": 0.0923,
"step": 163000
},
{
"epoch": 61.39,
"learning_rate": 7.722995859992473e-06,
"loss": 0.0935,
"step": 163100
},
{
"epoch": 61.42,
"learning_rate": 7.715468573579226e-06,
"loss": 0.0938,
"step": 163200
},
{
"epoch": 61.46,
"learning_rate": 7.707941287165977e-06,
"loss": 0.0938,
"step": 163300
},
{
"epoch": 61.5,
"learning_rate": 7.70041400075273e-06,
"loss": 0.0929,
"step": 163400
},
{
"epoch": 61.54,
"learning_rate": 7.692886714339482e-06,
"loss": 0.0937,
"step": 163500
},
{
"epoch": 61.57,
"learning_rate": 7.685359427926233e-06,
"loss": 0.0921,
"step": 163600
},
{
"epoch": 61.61,
"learning_rate": 7.677832141512985e-06,
"loss": 0.0933,
"step": 163700
},
{
"epoch": 61.65,
"learning_rate": 7.670304855099738e-06,
"loss": 0.0929,
"step": 163800
},
{
"epoch": 61.69,
"learning_rate": 7.662777568686489e-06,
"loss": 0.0931,
"step": 163900
},
{
"epoch": 61.72,
"learning_rate": 7.655250282273241e-06,
"loss": 0.0947,
"step": 164000
},
{
"epoch": 61.76,
"learning_rate": 7.647722995859994e-06,
"loss": 0.0944,
"step": 164100
},
{
"epoch": 61.8,
"learning_rate": 7.640195709446744e-06,
"loss": 0.0929,
"step": 164200
},
{
"epoch": 61.84,
"learning_rate": 7.632668423033497e-06,
"loss": 0.0945,
"step": 164300
},
{
"epoch": 61.87,
"learning_rate": 7.6251411366202485e-06,
"loss": 0.0952,
"step": 164400
},
{
"epoch": 61.91,
"learning_rate": 7.617613850207001e-06,
"loss": 0.0939,
"step": 164500
},
{
"epoch": 61.95,
"learning_rate": 7.610086563793753e-06,
"loss": 0.0949,
"step": 164600
},
{
"epoch": 61.99,
"learning_rate": 7.602559277380504e-06,
"loss": 0.0939,
"step": 164700
},
{
"epoch": 62.0,
"eval_loss": 0.09392710030078888,
"eval_runtime": 45.1193,
"eval_samples_per_second": 166.226,
"eval_steps_per_second": 10.395,
"step": 164734
},
{
"epoch": 62.02,
"learning_rate": 7.595031990967257e-06,
"loss": 0.0937,
"step": 164800
},
{
"epoch": 62.06,
"learning_rate": 7.5875047045540086e-06,
"loss": 0.0934,
"step": 164900
},
{
"epoch": 62.1,
"learning_rate": 7.57997741814076e-06,
"loss": 0.094,
"step": 165000
},
{
"epoch": 62.14,
"learning_rate": 7.572450131727512e-06,
"loss": 0.0947,
"step": 165100
},
{
"epoch": 62.18,
"learning_rate": 7.564922845314265e-06,
"loss": 0.0926,
"step": 165200
},
{
"epoch": 62.21,
"learning_rate": 7.557395558901017e-06,
"loss": 0.0947,
"step": 165300
},
{
"epoch": 62.25,
"learning_rate": 7.5498682724877694e-06,
"loss": 0.0937,
"step": 165400
},
{
"epoch": 62.29,
"learning_rate": 7.542340986074521e-06,
"loss": 0.0914,
"step": 165500
},
{
"epoch": 62.33,
"learning_rate": 7.534813699661273e-06,
"loss": 0.0953,
"step": 165600
},
{
"epoch": 62.36,
"learning_rate": 7.527286413248025e-06,
"loss": 0.0952,
"step": 165700
},
{
"epoch": 62.4,
"learning_rate": 7.519759126834777e-06,
"loss": 0.0935,
"step": 165800
},
{
"epoch": 62.44,
"learning_rate": 7.512231840421529e-06,
"loss": 0.0947,
"step": 165900
},
{
"epoch": 62.48,
"learning_rate": 7.504704554008281e-06,
"loss": 0.0935,
"step": 166000
},
{
"epoch": 62.51,
"learning_rate": 7.497177267595033e-06,
"loss": 0.0957,
"step": 166100
},
{
"epoch": 62.55,
"learning_rate": 7.4896499811817845e-06,
"loss": 0.0926,
"step": 166200
},
{
"epoch": 62.59,
"learning_rate": 7.482122694768537e-06,
"loss": 0.0943,
"step": 166300
},
{
"epoch": 62.63,
"learning_rate": 7.474595408355289e-06,
"loss": 0.0938,
"step": 166400
},
{
"epoch": 62.66,
"learning_rate": 7.46706812194204e-06,
"loss": 0.0943,
"step": 166500
},
{
"epoch": 62.7,
"learning_rate": 7.459540835528792e-06,
"loss": 0.0924,
"step": 166600
},
{
"epoch": 62.74,
"learning_rate": 7.4520135491155445e-06,
"loss": 0.0929,
"step": 166700
},
{
"epoch": 62.78,
"learning_rate": 7.444486262702296e-06,
"loss": 0.0936,
"step": 166800
},
{
"epoch": 62.82,
"learning_rate": 7.436958976289048e-06,
"loss": 0.0931,
"step": 166900
},
{
"epoch": 62.85,
"learning_rate": 7.4294316898758e-06,
"loss": 0.0943,
"step": 167000
},
{
"epoch": 62.89,
"learning_rate": 7.421904403462552e-06,
"loss": 0.094,
"step": 167100
},
{
"epoch": 62.93,
"learning_rate": 7.414377117049304e-06,
"loss": 0.0931,
"step": 167200
},
{
"epoch": 62.97,
"learning_rate": 7.406849830636056e-06,
"loss": 0.0936,
"step": 167300
},
{
"epoch": 63.0,
"eval_loss": 0.09357059001922607,
"eval_runtime": 45.0624,
"eval_samples_per_second": 166.436,
"eval_steps_per_second": 10.408,
"step": 167391
},
{
"epoch": 63.0,
"learning_rate": 7.399322544222808e-06,
"loss": 0.0936,
"step": 167400
},
{
"epoch": 63.04,
"learning_rate": 7.3917952578095595e-06,
"loss": 0.094,
"step": 167500
},
{
"epoch": 63.08,
"learning_rate": 7.384267971396312e-06,
"loss": 0.0946,
"step": 167600
},
{
"epoch": 63.12,
"learning_rate": 7.3767406849830646e-06,
"loss": 0.0941,
"step": 167700
},
{
"epoch": 63.15,
"learning_rate": 7.369213398569817e-06,
"loss": 0.0943,
"step": 167800
},
{
"epoch": 63.19,
"learning_rate": 7.361686112156569e-06,
"loss": 0.0949,
"step": 167900
},
{
"epoch": 63.23,
"learning_rate": 7.35415882574332e-06,
"loss": 0.0945,
"step": 168000
},
{
"epoch": 63.27,
"learning_rate": 7.346631539330072e-06,
"loss": 0.0924,
"step": 168100
},
{
"epoch": 63.3,
"learning_rate": 7.3391042529168246e-06,
"loss": 0.094,
"step": 168200
},
{
"epoch": 63.34,
"learning_rate": 7.331576966503576e-06,
"loss": 0.0955,
"step": 168300
},
{
"epoch": 63.38,
"learning_rate": 7.324049680090328e-06,
"loss": 0.0937,
"step": 168400
},
{
"epoch": 63.42,
"learning_rate": 7.31652239367708e-06,
"loss": 0.0924,
"step": 168500
},
{
"epoch": 63.46,
"learning_rate": 7.308995107263832e-06,
"loss": 0.0943,
"step": 168600
},
{
"epoch": 63.49,
"learning_rate": 7.301467820850584e-06,
"loss": 0.094,
"step": 168700
},
{
"epoch": 63.53,
"learning_rate": 7.293940534437336e-06,
"loss": 0.0916,
"step": 168800
},
{
"epoch": 63.57,
"learning_rate": 7.286413248024088e-06,
"loss": 0.092,
"step": 168900
},
{
"epoch": 63.61,
"learning_rate": 7.27888596161084e-06,
"loss": 0.0951,
"step": 169000
},
{
"epoch": 63.64,
"learning_rate": 7.271358675197592e-06,
"loss": 0.0928,
"step": 169100
},
{
"epoch": 63.68,
"learning_rate": 7.263831388784344e-06,
"loss": 0.0938,
"step": 169200
},
{
"epoch": 63.72,
"learning_rate": 7.2563041023710954e-06,
"loss": 0.0936,
"step": 169300
},
{
"epoch": 63.76,
"learning_rate": 7.248776815957848e-06,
"loss": 0.0928,
"step": 169400
},
{
"epoch": 63.79,
"learning_rate": 7.2412495295446e-06,
"loss": 0.0944,
"step": 169500
},
{
"epoch": 63.83,
"learning_rate": 7.233722243131351e-06,
"loss": 0.0925,
"step": 169600
},
{
"epoch": 63.87,
"learning_rate": 7.226194956718103e-06,
"loss": 0.0932,
"step": 169700
},
{
"epoch": 63.91,
"learning_rate": 7.2186676703048555e-06,
"loss": 0.0934,
"step": 169800
},
{
"epoch": 63.94,
"learning_rate": 7.211140383891607e-06,
"loss": 0.0927,
"step": 169900
},
{
"epoch": 63.98,
"learning_rate": 7.203613097478359e-06,
"loss": 0.093,
"step": 170000
},
{
"epoch": 64.0,
"eval_loss": 0.09292689710855484,
"eval_runtime": 45.1577,
"eval_samples_per_second": 166.085,
"eval_steps_per_second": 10.386,
"step": 170048
},
{
"epoch": 64.02,
"learning_rate": 7.196085811065112e-06,
"loss": 0.0933,
"step": 170100
},
{
"epoch": 64.06,
"learning_rate": 7.188558524651864e-06,
"loss": 0.0938,
"step": 170200
},
{
"epoch": 64.09,
"learning_rate": 7.181031238238616e-06,
"loss": 0.0913,
"step": 170300
},
{
"epoch": 64.13,
"learning_rate": 7.173503951825368e-06,
"loss": 0.0919,
"step": 170400
},
{
"epoch": 64.17,
"learning_rate": 7.16597666541212e-06,
"loss": 0.0949,
"step": 170500
},
{
"epoch": 64.21,
"learning_rate": 7.158449378998872e-06,
"loss": 0.0938,
"step": 170600
},
{
"epoch": 64.25,
"learning_rate": 7.150922092585624e-06,
"loss": 0.0948,
"step": 170700
},
{
"epoch": 64.28,
"learning_rate": 7.1433948061723755e-06,
"loss": 0.093,
"step": 170800
},
{
"epoch": 64.32,
"learning_rate": 7.135867519759127e-06,
"loss": 0.0933,
"step": 170900
},
{
"epoch": 64.36,
"learning_rate": 7.12834023334588e-06,
"loss": 0.0915,
"step": 171000
},
{
"epoch": 64.4,
"learning_rate": 7.120812946932631e-06,
"loss": 0.093,
"step": 171100
},
{
"epoch": 64.43,
"learning_rate": 7.113285660519383e-06,
"loss": 0.0933,
"step": 171200
},
{
"epoch": 64.47,
"learning_rate": 7.1057583741061356e-06,
"loss": 0.0936,
"step": 171300
},
{
"epoch": 64.51,
"learning_rate": 7.098231087692887e-06,
"loss": 0.0935,
"step": 171400
},
{
"epoch": 64.55,
"learning_rate": 7.090703801279639e-06,
"loss": 0.094,
"step": 171500
},
{
"epoch": 64.58,
"learning_rate": 7.083176514866391e-06,
"loss": 0.0941,
"step": 171600
},
{
"epoch": 64.62,
"learning_rate": 7.075649228453143e-06,
"loss": 0.0946,
"step": 171700
},
{
"epoch": 64.66,
"learning_rate": 7.068121942039895e-06,
"loss": 0.0946,
"step": 171800
},
{
"epoch": 64.7,
"learning_rate": 7.060594655626647e-06,
"loss": 0.0926,
"step": 171900
},
{
"epoch": 64.73,
"learning_rate": 7.053067369213399e-06,
"loss": 0.0946,
"step": 172000
},
{
"epoch": 64.77,
"learning_rate": 7.045540082800151e-06,
"loss": 0.0937,
"step": 172100
},
{
"epoch": 64.81,
"learning_rate": 7.038012796386903e-06,
"loss": 0.0927,
"step": 172200
},
{
"epoch": 64.85,
"learning_rate": 7.030485509973655e-06,
"loss": 0.0945,
"step": 172300
},
{
"epoch": 64.89,
"learning_rate": 7.022958223560406e-06,
"loss": 0.0918,
"step": 172400
},
{
"epoch": 64.92,
"learning_rate": 7.015430937147158e-06,
"loss": 0.0923,
"step": 172500
},
{
"epoch": 64.96,
"learning_rate": 7.0079036507339114e-06,
"loss": 0.0926,
"step": 172600
},
{
"epoch": 65.0,
"learning_rate": 7.000376364320663e-06,
"loss": 0.0929,
"step": 172700
},
{
"epoch": 65.0,
"eval_loss": 0.0930134728550911,
"eval_runtime": 44.9287,
"eval_samples_per_second": 166.931,
"eval_steps_per_second": 10.439,
"step": 172705
},
{
"epoch": 65.04,
"learning_rate": 6.992849077907416e-06,
"loss": 0.0929,
"step": 172800
},
{
"epoch": 65.07,
"learning_rate": 6.985321791494167e-06,
"loss": 0.0932,
"step": 172900
},
{
"epoch": 65.11,
"learning_rate": 6.977794505080919e-06,
"loss": 0.0948,
"step": 173000
},
{
"epoch": 65.15,
"learning_rate": 6.9702672186676715e-06,
"loss": 0.093,
"step": 173100
},
{
"epoch": 65.19,
"learning_rate": 6.962739932254423e-06,
"loss": 0.0947,
"step": 173200
},
{
"epoch": 65.22,
"learning_rate": 6.955212645841175e-06,
"loss": 0.0925,
"step": 173300
},
{
"epoch": 65.26,
"learning_rate": 6.947685359427927e-06,
"loss": 0.093,
"step": 173400
},
{
"epoch": 65.3,
"learning_rate": 6.940158073014679e-06,
"loss": 0.0931,
"step": 173500
},
{
"epoch": 65.34,
"learning_rate": 6.932630786601431e-06,
"loss": 0.0944,
"step": 173600
},
{
"epoch": 65.37,
"learning_rate": 6.925103500188183e-06,
"loss": 0.0923,
"step": 173700
},
{
"epoch": 65.41,
"learning_rate": 6.917576213774935e-06,
"loss": 0.0919,
"step": 173800
},
{
"epoch": 65.45,
"learning_rate": 6.9100489273616865e-06,
"loss": 0.0945,
"step": 173900
},
{
"epoch": 65.49,
"learning_rate": 6.902521640948438e-06,
"loss": 0.0924,
"step": 174000
},
{
"epoch": 65.53,
"learning_rate": 6.894994354535191e-06,
"loss": 0.0941,
"step": 174100
},
{
"epoch": 65.56,
"learning_rate": 6.887467068121942e-06,
"loss": 0.0918,
"step": 174200
},
{
"epoch": 65.6,
"learning_rate": 6.879939781708694e-06,
"loss": 0.0935,
"step": 174300
},
{
"epoch": 65.64,
"learning_rate": 6.8724124952954465e-06,
"loss": 0.0944,
"step": 174400
},
{
"epoch": 65.68,
"learning_rate": 6.864885208882198e-06,
"loss": 0.0924,
"step": 174500
},
{
"epoch": 65.71,
"learning_rate": 6.85735792246895e-06,
"loss": 0.0927,
"step": 174600
},
{
"epoch": 65.75,
"learning_rate": 6.849830636055702e-06,
"loss": 0.094,
"step": 174700
},
{
"epoch": 65.79,
"learning_rate": 6.842303349642454e-06,
"loss": 0.0935,
"step": 174800
},
{
"epoch": 65.83,
"learning_rate": 6.834776063229206e-06,
"loss": 0.0927,
"step": 174900
},
{
"epoch": 65.86,
"learning_rate": 6.827248776815958e-06,
"loss": 0.0937,
"step": 175000
},
{
"epoch": 65.9,
"learning_rate": 6.819721490402711e-06,
"loss": 0.0938,
"step": 175100
},
{
"epoch": 65.94,
"learning_rate": 6.812194203989463e-06,
"loss": 0.0931,
"step": 175200
},
{
"epoch": 65.98,
"learning_rate": 6.804666917576215e-06,
"loss": 0.0917,
"step": 175300
},
{
"epoch": 66.0,
"eval_loss": 0.09251850843429565,
"eval_runtime": 44.9106,
"eval_samples_per_second": 166.998,
"eval_steps_per_second": 10.443,
"step": 175362
},
{
"epoch": 66.01,
"learning_rate": 6.797139631162967e-06,
"loss": 0.0936,
"step": 175400
},
{
"epoch": 66.05,
"learning_rate": 6.789612344749718e-06,
"loss": 0.0948,
"step": 175500
},
{
"epoch": 66.09,
"learning_rate": 6.782085058336471e-06,
"loss": 0.0945,
"step": 175600
},
{
"epoch": 66.13,
"learning_rate": 6.7745577719232224e-06,
"loss": 0.0937,
"step": 175700
},
{
"epoch": 66.16,
"learning_rate": 6.767030485509974e-06,
"loss": 0.0945,
"step": 175800
},
{
"epoch": 66.2,
"learning_rate": 6.759503199096727e-06,
"loss": 0.0932,
"step": 175900
},
{
"epoch": 66.24,
"learning_rate": 6.751975912683478e-06,
"loss": 0.0936,
"step": 176000
},
{
"epoch": 66.28,
"learning_rate": 6.74444862627023e-06,
"loss": 0.0933,
"step": 176100
},
{
"epoch": 66.32,
"learning_rate": 6.7369213398569825e-06,
"loss": 0.0926,
"step": 176200
},
{
"epoch": 66.35,
"learning_rate": 6.729394053443734e-06,
"loss": 0.093,
"step": 176300
},
{
"epoch": 66.39,
"learning_rate": 6.721866767030486e-06,
"loss": 0.0929,
"step": 176400
},
{
"epoch": 66.43,
"learning_rate": 6.714339480617238e-06,
"loss": 0.0934,
"step": 176500
},
{
"epoch": 66.47,
"learning_rate": 6.70681219420399e-06,
"loss": 0.0936,
"step": 176600
},
{
"epoch": 66.5,
"learning_rate": 6.699284907790742e-06,
"loss": 0.0916,
"step": 176700
},
{
"epoch": 66.54,
"learning_rate": 6.691757621377494e-06,
"loss": 0.0921,
"step": 176800
},
{
"epoch": 66.58,
"learning_rate": 6.684230334964246e-06,
"loss": 0.094,
"step": 176900
},
{
"epoch": 66.62,
"learning_rate": 6.6767030485509975e-06,
"loss": 0.0915,
"step": 177000
},
{
"epoch": 66.65,
"learning_rate": 6.669175762137749e-06,
"loss": 0.0919,
"step": 177100
},
{
"epoch": 66.69,
"learning_rate": 6.661648475724502e-06,
"loss": 0.0936,
"step": 177200
},
{
"epoch": 66.73,
"learning_rate": 6.654121189311253e-06,
"loss": 0.0927,
"step": 177300
},
{
"epoch": 66.77,
"learning_rate": 6.646593902898005e-06,
"loss": 0.0921,
"step": 177400
},
{
"epoch": 66.8,
"learning_rate": 6.6390666164847575e-06,
"loss": 0.0929,
"step": 177500
},
{
"epoch": 66.84,
"learning_rate": 6.63153933007151e-06,
"loss": 0.0934,
"step": 177600
},
{
"epoch": 66.88,
"learning_rate": 6.6240120436582625e-06,
"loss": 0.0932,
"step": 177700
},
{
"epoch": 66.92,
"learning_rate": 6.616484757245014e-06,
"loss": 0.0944,
"step": 177800
},
{
"epoch": 66.96,
"learning_rate": 6.608957470831766e-06,
"loss": 0.092,
"step": 177900
},
{
"epoch": 66.99,
"learning_rate": 6.601430184418518e-06,
"loss": 0.0948,
"step": 178000
},
{
"epoch": 67.0,
"eval_loss": 0.09316383302211761,
"eval_runtime": 44.8531,
"eval_samples_per_second": 167.212,
"eval_steps_per_second": 10.456,
"step": 178019
},
{
"epoch": 67.03,
"learning_rate": 6.59390289800527e-06,
"loss": 0.0931,
"step": 178100
},
{
"epoch": 67.07,
"learning_rate": 6.586375611592022e-06,
"loss": 0.0929,
"step": 178200
},
{
"epoch": 67.11,
"learning_rate": 6.578848325178774e-06,
"loss": 0.0933,
"step": 178300
},
{
"epoch": 67.14,
"learning_rate": 6.571321038765526e-06,
"loss": 0.0909,
"step": 178400
},
{
"epoch": 67.18,
"learning_rate": 6.5637937523522776e-06,
"loss": 0.093,
"step": 178500
},
{
"epoch": 67.22,
"learning_rate": 6.556266465939029e-06,
"loss": 0.0942,
"step": 178600
},
{
"epoch": 67.26,
"learning_rate": 6.548739179525782e-06,
"loss": 0.0926,
"step": 178700
},
{
"epoch": 67.29,
"learning_rate": 6.541211893112533e-06,
"loss": 0.0921,
"step": 178800
},
{
"epoch": 67.33,
"learning_rate": 6.533684606699285e-06,
"loss": 0.0932,
"step": 178900
},
{
"epoch": 67.37,
"learning_rate": 6.526157320286038e-06,
"loss": 0.0939,
"step": 179000
},
{
"epoch": 67.41,
"learning_rate": 6.518630033872789e-06,
"loss": 0.0943,
"step": 179100
},
{
"epoch": 67.44,
"learning_rate": 6.511102747459541e-06,
"loss": 0.0925,
"step": 179200
},
{
"epoch": 67.48,
"learning_rate": 6.5035754610462934e-06,
"loss": 0.092,
"step": 179300
},
{
"epoch": 67.52,
"learning_rate": 6.496048174633045e-06,
"loss": 0.093,
"step": 179400
},
{
"epoch": 67.56,
"learning_rate": 6.488520888219797e-06,
"loss": 0.0942,
"step": 179500
},
{
"epoch": 67.6,
"learning_rate": 6.480993601806549e-06,
"loss": 0.0933,
"step": 179600
},
{
"epoch": 67.63,
"learning_rate": 6.473466315393301e-06,
"loss": 0.0921,
"step": 179700
},
{
"epoch": 67.67,
"learning_rate": 6.465939028980053e-06,
"loss": 0.0928,
"step": 179800
},
{
"epoch": 67.71,
"learning_rate": 6.458411742566805e-06,
"loss": 0.0945,
"step": 179900
},
{
"epoch": 67.75,
"learning_rate": 6.450884456153557e-06,
"loss": 0.0925,
"step": 180000
},
{
"epoch": 67.78,
"learning_rate": 6.443357169740309e-06,
"loss": 0.0912,
"step": 180100
},
{
"epoch": 67.82,
"learning_rate": 6.435829883327062e-06,
"loss": 0.0917,
"step": 180200
},
{
"epoch": 67.86,
"learning_rate": 6.4283025969138135e-06,
"loss": 0.093,
"step": 180300
},
{
"epoch": 67.9,
"learning_rate": 6.420775310500565e-06,
"loss": 0.0928,
"step": 180400
},
{
"epoch": 67.93,
"learning_rate": 6.413248024087318e-06,
"loss": 0.0915,
"step": 180500
},
{
"epoch": 67.97,
"learning_rate": 6.405720737674069e-06,
"loss": 0.0931,
"step": 180600
},
{
"epoch": 68.0,
"eval_loss": 0.09266681969165802,
"eval_runtime": 44.9069,
"eval_samples_per_second": 167.012,
"eval_steps_per_second": 10.444,
"step": 180676
},
{
"epoch": 68.01,
"learning_rate": 6.398193451260821e-06,
"loss": 0.0939,
"step": 180700
},
{
"epoch": 68.05,
"learning_rate": 6.3906661648475735e-06,
"loss": 0.0933,
"step": 180800
},
{
"epoch": 68.08,
"learning_rate": 6.383138878434325e-06,
"loss": 0.0922,
"step": 180900
},
{
"epoch": 68.12,
"learning_rate": 6.375611592021077e-06,
"loss": 0.0922,
"step": 181000
},
{
"epoch": 68.16,
"learning_rate": 6.368084305607829e-06,
"loss": 0.0935,
"step": 181100
},
{
"epoch": 68.2,
"learning_rate": 6.360557019194581e-06,
"loss": 0.0938,
"step": 181200
},
{
"epoch": 68.23,
"learning_rate": 6.353029732781333e-06,
"loss": 0.0929,
"step": 181300
},
{
"epoch": 68.27,
"learning_rate": 6.345502446368084e-06,
"loss": 0.093,
"step": 181400
},
{
"epoch": 68.31,
"learning_rate": 6.337975159954837e-06,
"loss": 0.0932,
"step": 181500
},
{
"epoch": 68.35,
"learning_rate": 6.3304478735415885e-06,
"loss": 0.0914,
"step": 181600
},
{
"epoch": 68.39,
"learning_rate": 6.32292058712834e-06,
"loss": 0.0926,
"step": 181700
},
{
"epoch": 68.42,
"learning_rate": 6.315393300715093e-06,
"loss": 0.0935,
"step": 181800
},
{
"epoch": 68.46,
"learning_rate": 6.307866014301844e-06,
"loss": 0.0923,
"step": 181900
},
{
"epoch": 68.5,
"learning_rate": 6.300338727888596e-06,
"loss": 0.0921,
"step": 182000
},
{
"epoch": 68.54,
"learning_rate": 6.2928114414753486e-06,
"loss": 0.0929,
"step": 182100
},
{
"epoch": 68.57,
"learning_rate": 6.2852841550621e-06,
"loss": 0.0935,
"step": 182200
},
{
"epoch": 68.61,
"learning_rate": 6.277756868648852e-06,
"loss": 0.0923,
"step": 182300
},
{
"epoch": 68.65,
"learning_rate": 6.270229582235604e-06,
"loss": 0.093,
"step": 182400
},
{
"epoch": 68.69,
"learning_rate": 6.262702295822356e-06,
"loss": 0.0912,
"step": 182500
},
{
"epoch": 68.72,
"learning_rate": 6.2551750094091094e-06,
"loss": 0.0922,
"step": 182600
},
{
"epoch": 68.76,
"learning_rate": 6.247647722995861e-06,
"loss": 0.0919,
"step": 182700
},
{
"epoch": 68.8,
"learning_rate": 6.240120436582613e-06,
"loss": 0.0933,
"step": 182800
},
{
"epoch": 68.84,
"learning_rate": 6.2325931501693644e-06,
"loss": 0.0936,
"step": 182900
},
{
"epoch": 68.87,
"learning_rate": 6.225065863756117e-06,
"loss": 0.0935,
"step": 183000
},
{
"epoch": 68.91,
"learning_rate": 6.217538577342869e-06,
"loss": 0.0917,
"step": 183100
},
{
"epoch": 68.95,
"learning_rate": 6.21001129092962e-06,
"loss": 0.0916,
"step": 183200
},
{
"epoch": 68.99,
"learning_rate": 6.202484004516373e-06,
"loss": 0.0911,
"step": 183300
},
{
"epoch": 69.0,
"eval_loss": 0.092154860496521,
"eval_runtime": 44.9385,
"eval_samples_per_second": 166.895,
"eval_steps_per_second": 10.436,
"step": 183333
},
{
"epoch": 69.03,
"learning_rate": 6.1949567181031245e-06,
"loss": 0.0919,
"step": 183400
},
{
"epoch": 69.06,
"learning_rate": 6.187429431689876e-06,
"loss": 0.0931,
"step": 183500
},
{
"epoch": 69.1,
"learning_rate": 6.179902145276629e-06,
"loss": 0.0923,
"step": 183600
},
{
"epoch": 69.14,
"learning_rate": 6.17237485886338e-06,
"loss": 0.0927,
"step": 183700
},
{
"epoch": 69.18,
"learning_rate": 6.164847572450132e-06,
"loss": 0.0942,
"step": 183800
},
{
"epoch": 69.21,
"learning_rate": 6.1573202860368845e-06,
"loss": 0.0926,
"step": 183900
},
{
"epoch": 69.25,
"learning_rate": 6.149792999623636e-06,
"loss": 0.0943,
"step": 184000
},
{
"epoch": 69.29,
"learning_rate": 6.142265713210388e-06,
"loss": 0.0918,
"step": 184100
},
{
"epoch": 69.33,
"learning_rate": 6.13473842679714e-06,
"loss": 0.0933,
"step": 184200
},
{
"epoch": 69.36,
"learning_rate": 6.127211140383892e-06,
"loss": 0.093,
"step": 184300
},
{
"epoch": 69.4,
"learning_rate": 6.119683853970644e-06,
"loss": 0.0927,
"step": 184400
},
{
"epoch": 69.44,
"learning_rate": 6.112156567557395e-06,
"loss": 0.0925,
"step": 184500
},
{
"epoch": 69.48,
"learning_rate": 6.104629281144148e-06,
"loss": 0.0922,
"step": 184600
},
{
"epoch": 69.51,
"learning_rate": 6.0971019947308995e-06,
"loss": 0.0926,
"step": 184700
},
{
"epoch": 69.55,
"learning_rate": 6.089574708317651e-06,
"loss": 0.0928,
"step": 184800
},
{
"epoch": 69.59,
"learning_rate": 6.082047421904404e-06,
"loss": 0.092,
"step": 184900
},
{
"epoch": 69.63,
"learning_rate": 6.074520135491156e-06,
"loss": 0.0934,
"step": 185000
},
{
"epoch": 69.67,
"learning_rate": 6.066992849077909e-06,
"loss": 0.0927,
"step": 185100
},
{
"epoch": 69.7,
"learning_rate": 6.05946556266466e-06,
"loss": 0.0923,
"step": 185200
},
{
"epoch": 69.74,
"learning_rate": 6.051938276251412e-06,
"loss": 0.0933,
"step": 185300
},
{
"epoch": 69.78,
"learning_rate": 6.0444109898381646e-06,
"loss": 0.0914,
"step": 185400
},
{
"epoch": 69.82,
"learning_rate": 6.036883703424916e-06,
"loss": 0.0928,
"step": 185500
},
{
"epoch": 69.85,
"learning_rate": 6.029356417011668e-06,
"loss": 0.0917,
"step": 185600
},
{
"epoch": 69.89,
"learning_rate": 6.02182913059842e-06,
"loss": 0.0932,
"step": 185700
},
{
"epoch": 69.93,
"learning_rate": 6.014301844185172e-06,
"loss": 0.0917,
"step": 185800
},
{
"epoch": 69.97,
"learning_rate": 6.006774557771924e-06,
"loss": 0.0923,
"step": 185900
},
{
"epoch": 70.0,
"eval_loss": 0.0924314558506012,
"eval_runtime": 45.1886,
"eval_samples_per_second": 165.971,
"eval_steps_per_second": 10.379,
"step": 185990
},
{
"epoch": 70.0,
"learning_rate": 5.999247271358675e-06,
"loss": 0.0933,
"step": 186000
},
{
"epoch": 70.04,
"learning_rate": 5.991719984945428e-06,
"loss": 0.0918,
"step": 186100
},
{
"epoch": 70.08,
"learning_rate": 5.98419269853218e-06,
"loss": 0.0919,
"step": 186200
},
{
"epoch": 70.12,
"learning_rate": 5.976665412118931e-06,
"loss": 0.0942,
"step": 186300
},
{
"epoch": 70.15,
"learning_rate": 5.969138125705684e-06,
"loss": 0.092,
"step": 186400
},
{
"epoch": 70.19,
"learning_rate": 5.9616108392924354e-06,
"loss": 0.0923,
"step": 186500
},
{
"epoch": 70.23,
"learning_rate": 5.954083552879187e-06,
"loss": 0.0929,
"step": 186600
},
{
"epoch": 70.27,
"learning_rate": 5.94655626646594e-06,
"loss": 0.0932,
"step": 186700
},
{
"epoch": 70.3,
"learning_rate": 5.939028980052691e-06,
"loss": 0.0936,
"step": 186800
},
{
"epoch": 70.34,
"learning_rate": 5.931501693639443e-06,
"loss": 0.0931,
"step": 186900
},
{
"epoch": 70.38,
"learning_rate": 5.9239744072261955e-06,
"loss": 0.0919,
"step": 187000
},
{
"epoch": 70.42,
"learning_rate": 5.916447120812947e-06,
"loss": 0.092,
"step": 187100
},
{
"epoch": 70.46,
"learning_rate": 5.908919834399699e-06,
"loss": 0.0949,
"step": 187200
},
{
"epoch": 70.49,
"learning_rate": 5.901392547986451e-06,
"loss": 0.0931,
"step": 187300
},
{
"epoch": 70.53,
"learning_rate": 5.893865261573203e-06,
"loss": 0.0927,
"step": 187400
},
{
"epoch": 70.57,
"learning_rate": 5.8863379751599555e-06,
"loss": 0.0918,
"step": 187500
},
{
"epoch": 70.61,
"learning_rate": 5.878810688746708e-06,
"loss": 0.0916,
"step": 187600
},
{
"epoch": 70.64,
"learning_rate": 5.87128340233346e-06,
"loss": 0.0926,
"step": 187700
},
{
"epoch": 70.68,
"learning_rate": 5.863756115920211e-06,
"loss": 0.0913,
"step": 187800
},
{
"epoch": 70.72,
"learning_rate": 5.856228829506964e-06,
"loss": 0.093,
"step": 187900
},
{
"epoch": 70.76,
"learning_rate": 5.8487015430937155e-06,
"loss": 0.091,
"step": 188000
},
{
"epoch": 70.79,
"learning_rate": 5.841174256680467e-06,
"loss": 0.0941,
"step": 188100
},
{
"epoch": 70.83,
"learning_rate": 5.83364697026722e-06,
"loss": 0.0933,
"step": 188200
},
{
"epoch": 70.87,
"learning_rate": 5.826119683853971e-06,
"loss": 0.0916,
"step": 188300
},
{
"epoch": 70.91,
"learning_rate": 5.818592397440723e-06,
"loss": 0.0946,
"step": 188400
},
{
"epoch": 70.94,
"learning_rate": 5.8110651110274755e-06,
"loss": 0.0927,
"step": 188500
},
{
"epoch": 70.98,
"learning_rate": 5.803537824614227e-06,
"loss": 0.0923,
"step": 188600
},
{
"epoch": 71.0,
"eval_loss": 0.09234917163848877,
"eval_runtime": 45.0733,
"eval_samples_per_second": 166.395,
"eval_steps_per_second": 10.405,
"step": 188647
},
{
"epoch": 71.02,
"learning_rate": 5.796010538200979e-06,
"loss": 0.0929,
"step": 188700
},
{
"epoch": 71.06,
"learning_rate": 5.788483251787731e-06,
"loss": 0.0928,
"step": 188800
},
{
"epoch": 71.1,
"learning_rate": 5.780955965374483e-06,
"loss": 0.0925,
"step": 188900
},
{
"epoch": 71.13,
"learning_rate": 5.773428678961235e-06,
"loss": 0.0928,
"step": 189000
},
{
"epoch": 71.17,
"learning_rate": 5.765901392547986e-06,
"loss": 0.0943,
"step": 189100
},
{
"epoch": 71.21,
"learning_rate": 5.758374106134739e-06,
"loss": 0.092,
"step": 189200
},
{
"epoch": 71.25,
"learning_rate": 5.7508468197214906e-06,
"loss": 0.0928,
"step": 189300
},
{
"epoch": 71.28,
"learning_rate": 5.743319533308242e-06,
"loss": 0.0917,
"step": 189400
},
{
"epoch": 71.32,
"learning_rate": 5.735792246894995e-06,
"loss": 0.0919,
"step": 189500
},
{
"epoch": 71.36,
"learning_rate": 5.728264960481746e-06,
"loss": 0.0922,
"step": 189600
},
{
"epoch": 71.4,
"learning_rate": 5.720737674068498e-06,
"loss": 0.0931,
"step": 189700
},
{
"epoch": 71.43,
"learning_rate": 5.713210387655251e-06,
"loss": 0.0922,
"step": 189800
},
{
"epoch": 71.47,
"learning_rate": 5.705683101242002e-06,
"loss": 0.0908,
"step": 189900
},
{
"epoch": 71.51,
"learning_rate": 5.698155814828756e-06,
"loss": 0.0909,
"step": 190000
},
{
"epoch": 71.55,
"learning_rate": 5.690628528415507e-06,
"loss": 0.0922,
"step": 190100
},
{
"epoch": 71.58,
"learning_rate": 5.683101242002259e-06,
"loss": 0.0941,
"step": 190200
},
{
"epoch": 71.62,
"learning_rate": 5.675573955589011e-06,
"loss": 0.0931,
"step": 190300
},
{
"epoch": 71.66,
"learning_rate": 5.668046669175763e-06,
"loss": 0.092,
"step": 190400
},
{
"epoch": 71.7,
"learning_rate": 5.660519382762515e-06,
"loss": 0.0903,
"step": 190500
},
{
"epoch": 71.74,
"learning_rate": 5.6529920963492665e-06,
"loss": 0.0917,
"step": 190600
},
{
"epoch": 71.77,
"learning_rate": 5.645464809936019e-06,
"loss": 0.0911,
"step": 190700
},
{
"epoch": 71.81,
"learning_rate": 5.637937523522771e-06,
"loss": 0.0924,
"step": 190800
},
{
"epoch": 71.85,
"learning_rate": 5.630410237109522e-06,
"loss": 0.0922,
"step": 190900
},
{
"epoch": 71.89,
"learning_rate": 5.622882950696275e-06,
"loss": 0.0935,
"step": 191000
},
{
"epoch": 71.92,
"learning_rate": 5.6153556642830265e-06,
"loss": 0.0912,
"step": 191100
},
{
"epoch": 71.96,
"learning_rate": 5.607828377869778e-06,
"loss": 0.0919,
"step": 191200
},
{
"epoch": 72.0,
"learning_rate": 5.600301091456531e-06,
"loss": 0.0929,
"step": 191300
},
{
"epoch": 72.0,
"eval_loss": 0.09194895625114441,
"eval_runtime": 45.1877,
"eval_samples_per_second": 165.975,
"eval_steps_per_second": 10.379,
"step": 191304
},
{
"epoch": 72.04,
"learning_rate": 5.592773805043282e-06,
"loss": 0.0934,
"step": 191400
},
{
"epoch": 72.07,
"learning_rate": 5.585246518630034e-06,
"loss": 0.0922,
"step": 191500
},
{
"epoch": 72.11,
"learning_rate": 5.5777192322167865e-06,
"loss": 0.0934,
"step": 191600
},
{
"epoch": 72.15,
"learning_rate": 5.570191945803538e-06,
"loss": 0.0902,
"step": 191700
},
{
"epoch": 72.19,
"learning_rate": 5.56266465939029e-06,
"loss": 0.0921,
"step": 191800
},
{
"epoch": 72.22,
"learning_rate": 5.5551373729770415e-06,
"loss": 0.0922,
"step": 191900
},
{
"epoch": 72.26,
"learning_rate": 5.547610086563794e-06,
"loss": 0.0925,
"step": 192000
},
{
"epoch": 72.3,
"learning_rate": 5.540082800150546e-06,
"loss": 0.0927,
"step": 192100
},
{
"epoch": 72.34,
"learning_rate": 5.532555513737297e-06,
"loss": 0.0923,
"step": 192200
},
{
"epoch": 72.37,
"learning_rate": 5.52502822732405e-06,
"loss": 0.0924,
"step": 192300
},
{
"epoch": 72.41,
"learning_rate": 5.5175009409108015e-06,
"loss": 0.0919,
"step": 192400
},
{
"epoch": 72.45,
"learning_rate": 5.509973654497555e-06,
"loss": 0.0937,
"step": 192500
},
{
"epoch": 72.49,
"learning_rate": 5.5024463680843066e-06,
"loss": 0.0919,
"step": 192600
},
{
"epoch": 72.53,
"learning_rate": 5.494919081671058e-06,
"loss": 0.0922,
"step": 192700
},
{
"epoch": 72.56,
"learning_rate": 5.487391795257811e-06,
"loss": 0.0925,
"step": 192800
},
{
"epoch": 72.6,
"learning_rate": 5.479864508844562e-06,
"loss": 0.0919,
"step": 192900
},
{
"epoch": 72.64,
"learning_rate": 5.472337222431314e-06,
"loss": 0.0908,
"step": 193000
},
{
"epoch": 72.68,
"learning_rate": 5.464809936018067e-06,
"loss": 0.0925,
"step": 193100
},
{
"epoch": 72.71,
"learning_rate": 5.457282649604818e-06,
"loss": 0.0921,
"step": 193200
},
{
"epoch": 72.75,
"learning_rate": 5.44975536319157e-06,
"loss": 0.0909,
"step": 193300
},
{
"epoch": 72.79,
"learning_rate": 5.442228076778322e-06,
"loss": 0.0938,
"step": 193400
},
{
"epoch": 72.83,
"learning_rate": 5.434700790365074e-06,
"loss": 0.0919,
"step": 193500
},
{
"epoch": 72.86,
"learning_rate": 5.427173503951826e-06,
"loss": 0.0941,
"step": 193600
},
{
"epoch": 72.9,
"learning_rate": 5.4196462175385774e-06,
"loss": 0.0916,
"step": 193700
},
{
"epoch": 72.94,
"learning_rate": 5.41211893112533e-06,
"loss": 0.0922,
"step": 193800
},
{
"epoch": 72.98,
"learning_rate": 5.404591644712082e-06,
"loss": 0.0916,
"step": 193900
},
{
"epoch": 73.0,
"eval_loss": 0.09231603145599365,
"eval_runtime": 45.0139,
"eval_samples_per_second": 166.615,
"eval_steps_per_second": 10.419,
"step": 193961
},
{
"epoch": 73.01,
"learning_rate": 5.397064358298833e-06,
"loss": 0.093,
"step": 194000
},
{
"epoch": 73.05,
"learning_rate": 5.389537071885586e-06,
"loss": 0.0926,
"step": 194100
},
{
"epoch": 73.09,
"learning_rate": 5.3820097854723375e-06,
"loss": 0.0935,
"step": 194200
},
{
"epoch": 73.13,
"learning_rate": 5.374482499059089e-06,
"loss": 0.0924,
"step": 194300
},
{
"epoch": 73.17,
"learning_rate": 5.366955212645842e-06,
"loss": 0.0918,
"step": 194400
},
{
"epoch": 73.2,
"learning_rate": 5.359427926232593e-06,
"loss": 0.0929,
"step": 194500
},
{
"epoch": 73.24,
"learning_rate": 5.351900639819345e-06,
"loss": 0.0932,
"step": 194600
},
{
"epoch": 73.28,
"learning_rate": 5.3443733534060975e-06,
"loss": 0.092,
"step": 194700
},
{
"epoch": 73.32,
"learning_rate": 5.336846066992849e-06,
"loss": 0.0913,
"step": 194800
},
{
"epoch": 73.35,
"learning_rate": 5.329318780579601e-06,
"loss": 0.093,
"step": 194900
},
{
"epoch": 73.39,
"learning_rate": 5.321791494166354e-06,
"loss": 0.0922,
"step": 195000
},
{
"epoch": 73.43,
"learning_rate": 5.314264207753106e-06,
"loss": 0.0898,
"step": 195100
},
{
"epoch": 73.47,
"learning_rate": 5.3067369213398575e-06,
"loss": 0.0921,
"step": 195200
},
{
"epoch": 73.5,
"learning_rate": 5.29920963492661e-06,
"loss": 0.0927,
"step": 195300
},
{
"epoch": 73.54,
"learning_rate": 5.291682348513362e-06,
"loss": 0.0932,
"step": 195400
},
{
"epoch": 73.58,
"learning_rate": 5.284155062100113e-06,
"loss": 0.092,
"step": 195500
},
{
"epoch": 73.62,
"learning_rate": 5.276627775686866e-06,
"loss": 0.091,
"step": 195600
},
{
"epoch": 73.65,
"learning_rate": 5.2691004892736175e-06,
"loss": 0.093,
"step": 195700
},
{
"epoch": 73.69,
"learning_rate": 5.261573202860369e-06,
"loss": 0.0943,
"step": 195800
},
{
"epoch": 73.73,
"learning_rate": 5.254045916447122e-06,
"loss": 0.0913,
"step": 195900
},
{
"epoch": 73.77,
"learning_rate": 5.246518630033873e-06,
"loss": 0.0905,
"step": 196000
},
{
"epoch": 73.81,
"learning_rate": 5.238991343620625e-06,
"loss": 0.0905,
"step": 196100
},
{
"epoch": 73.84,
"learning_rate": 5.2314640572073776e-06,
"loss": 0.0923,
"step": 196200
},
{
"epoch": 73.88,
"learning_rate": 5.223936770794129e-06,
"loss": 0.0923,
"step": 196300
},
{
"epoch": 73.92,
"learning_rate": 5.216409484380881e-06,
"loss": 0.0927,
"step": 196400
},
{
"epoch": 73.96,
"learning_rate": 5.2088821979676326e-06,
"loss": 0.093,
"step": 196500
},
{
"epoch": 73.99,
"learning_rate": 5.201354911554385e-06,
"loss": 0.0927,
"step": 196600
},
{
"epoch": 74.0,
"eval_loss": 0.0920698270201683,
"eval_runtime": 45.1732,
"eval_samples_per_second": 166.028,
"eval_steps_per_second": 10.382,
"step": 196618
},
{
"epoch": 74.03,
"learning_rate": 5.193827625141137e-06,
"loss": 0.0911,
"step": 196700
},
{
"epoch": 74.07,
"learning_rate": 5.186300338727888e-06,
"loss": 0.0921,
"step": 196800
},
{
"epoch": 74.11,
"learning_rate": 5.178773052314641e-06,
"loss": 0.0924,
"step": 196900
},
{
"epoch": 74.14,
"learning_rate": 5.171245765901393e-06,
"loss": 0.0937,
"step": 197000
},
{
"epoch": 74.18,
"learning_rate": 5.163718479488144e-06,
"loss": 0.0927,
"step": 197100
},
{
"epoch": 74.22,
"learning_rate": 5.156191193074897e-06,
"loss": 0.0932,
"step": 197200
},
{
"epoch": 74.26,
"learning_rate": 5.1486639066616484e-06,
"loss": 0.091,
"step": 197300
},
{
"epoch": 74.29,
"learning_rate": 5.1411366202484e-06,
"loss": 0.0914,
"step": 197400
},
{
"epoch": 74.33,
"learning_rate": 5.1336093338351535e-06,
"loss": 0.0921,
"step": 197500
},
{
"epoch": 74.37,
"learning_rate": 5.126082047421905e-06,
"loss": 0.0932,
"step": 197600
},
{
"epoch": 74.41,
"learning_rate": 5.118554761008658e-06,
"loss": 0.0928,
"step": 197700
},
{
"epoch": 74.44,
"learning_rate": 5.111027474595409e-06,
"loss": 0.0918,
"step": 197800
},
{
"epoch": 74.48,
"learning_rate": 5.103500188182161e-06,
"loss": 0.0894,
"step": 197900
},
{
"epoch": 74.52,
"learning_rate": 5.095972901768913e-06,
"loss": 0.091,
"step": 198000
},
{
"epoch": 74.56,
"learning_rate": 5.088445615355665e-06,
"loss": 0.0929,
"step": 198100
},
{
"epoch": 74.6,
"learning_rate": 5.080918328942417e-06,
"loss": 0.0931,
"step": 198200
},
{
"epoch": 74.63,
"learning_rate": 5.0733910425291685e-06,
"loss": 0.092,
"step": 198300
},
{
"epoch": 74.67,
"learning_rate": 5.065863756115921e-06,
"loss": 0.0916,
"step": 198400
},
{
"epoch": 74.71,
"learning_rate": 5.058336469702673e-06,
"loss": 0.0914,
"step": 198500
},
{
"epoch": 74.75,
"learning_rate": 5.050809183289424e-06,
"loss": 0.0929,
"step": 198600
},
{
"epoch": 74.78,
"learning_rate": 5.043281896876177e-06,
"loss": 0.0913,
"step": 198700
},
{
"epoch": 74.82,
"learning_rate": 5.0357546104629285e-06,
"loss": 0.0918,
"step": 198800
},
{
"epoch": 74.86,
"learning_rate": 5.02822732404968e-06,
"loss": 0.0922,
"step": 198900
},
{
"epoch": 74.9,
"learning_rate": 5.020700037636433e-06,
"loss": 0.0918,
"step": 199000
},
{
"epoch": 74.93,
"learning_rate": 5.013172751223184e-06,
"loss": 0.0918,
"step": 199100
},
{
"epoch": 74.97,
"learning_rate": 5.005645464809936e-06,
"loss": 0.0907,
"step": 199200
},
{
"epoch": 75.0,
"eval_loss": 0.09217877686023712,
"eval_runtime": 44.7295,
"eval_samples_per_second": 167.675,
"eval_steps_per_second": 10.485,
"step": 199275
},
{
"epoch": 75.01,
"learning_rate": 4.9981181783966885e-06,
"loss": 0.0917,
"step": 199300
},
{
"epoch": 75.05,
"learning_rate": 4.99059089198344e-06,
"loss": 0.0921,
"step": 199400
},
{
"epoch": 75.08,
"learning_rate": 4.983063605570193e-06,
"loss": 0.0925,
"step": 199500
},
{
"epoch": 75.12,
"learning_rate": 4.975536319156944e-06,
"loss": 0.0928,
"step": 199600
},
{
"epoch": 75.16,
"learning_rate": 4.968009032743696e-06,
"loss": 0.092,
"step": 199700
},
{
"epoch": 75.2,
"learning_rate": 4.9604817463304486e-06,
"loss": 0.0913,
"step": 199800
},
{
"epoch": 75.24,
"learning_rate": 4.9529544599172e-06,
"loss": 0.0928,
"step": 199900
},
{
"epoch": 75.27,
"learning_rate": 4.945427173503952e-06,
"loss": 0.0917,
"step": 200000
},
{
"epoch": 75.31,
"learning_rate": 4.937899887090704e-06,
"loss": 0.0928,
"step": 200100
},
{
"epoch": 75.35,
"learning_rate": 4.930372600677456e-06,
"loss": 0.0919,
"step": 200200
},
{
"epoch": 75.39,
"learning_rate": 4.922845314264208e-06,
"loss": 0.0919,
"step": 200300
},
{
"epoch": 75.42,
"learning_rate": 4.91531802785096e-06,
"loss": 0.092,
"step": 200400
},
{
"epoch": 75.46,
"learning_rate": 4.907790741437712e-06,
"loss": 0.0902,
"step": 200500
},
{
"epoch": 75.5,
"learning_rate": 4.9002634550244644e-06,
"loss": 0.0911,
"step": 200600
},
{
"epoch": 75.54,
"learning_rate": 4.892736168611216e-06,
"loss": 0.093,
"step": 200700
},
{
"epoch": 75.57,
"learning_rate": 4.885208882197968e-06,
"loss": 0.0917,
"step": 200800
},
{
"epoch": 75.61,
"learning_rate": 4.87768159578472e-06,
"loss": 0.0911,
"step": 200900
},
{
"epoch": 75.65,
"learning_rate": 4.870154309371472e-06,
"loss": 0.0927,
"step": 201000
},
{
"epoch": 75.69,
"learning_rate": 4.862627022958224e-06,
"loss": 0.0917,
"step": 201100
},
{
"epoch": 75.72,
"learning_rate": 4.855099736544976e-06,
"loss": 0.0936,
"step": 201200
},
{
"epoch": 75.76,
"learning_rate": 4.847572450131728e-06,
"loss": 0.0938,
"step": 201300
},
{
"epoch": 75.8,
"learning_rate": 4.8400451637184795e-06,
"loss": 0.0896,
"step": 201400
},
{
"epoch": 75.84,
"learning_rate": 4.832517877305232e-06,
"loss": 0.0934,
"step": 201500
},
{
"epoch": 75.88,
"learning_rate": 4.824990590891984e-06,
"loss": 0.0902,
"step": 201600
},
{
"epoch": 75.91,
"learning_rate": 4.817463304478735e-06,
"loss": 0.0906,
"step": 201700
},
{
"epoch": 75.95,
"learning_rate": 4.809936018065488e-06,
"loss": 0.0906,
"step": 201800
},
{
"epoch": 75.99,
"learning_rate": 4.80240873165224e-06,
"loss": 0.0927,
"step": 201900
},
{
"epoch": 76.0,
"eval_loss": 0.09185120463371277,
"eval_runtime": 44.8491,
"eval_samples_per_second": 167.228,
"eval_steps_per_second": 10.457,
"step": 201932
},
{
"epoch": 76.03,
"learning_rate": 4.794881445238992e-06,
"loss": 0.0924,
"step": 202000
},
{
"epoch": 76.06,
"learning_rate": 4.787354158825744e-06,
"loss": 0.0922,
"step": 202100
},
{
"epoch": 76.1,
"learning_rate": 4.779826872412496e-06,
"loss": 0.0933,
"step": 202200
},
{
"epoch": 76.14,
"learning_rate": 4.772299585999248e-06,
"loss": 0.0922,
"step": 202300
},
{
"epoch": 76.18,
"learning_rate": 4.7647722995859995e-06,
"loss": 0.0916,
"step": 202400
},
{
"epoch": 76.21,
"learning_rate": 4.757245013172751e-06,
"loss": 0.0916,
"step": 202500
},
{
"epoch": 76.25,
"learning_rate": 4.749717726759504e-06,
"loss": 0.0931,
"step": 202600
},
{
"epoch": 76.29,
"learning_rate": 4.742190440346255e-06,
"loss": 0.0907,
"step": 202700
},
{
"epoch": 76.33,
"learning_rate": 4.734663153933007e-06,
"loss": 0.092,
"step": 202800
},
{
"epoch": 76.36,
"learning_rate": 4.7271358675197595e-06,
"loss": 0.0912,
"step": 202900
},
{
"epoch": 76.4,
"learning_rate": 4.719608581106512e-06,
"loss": 0.0924,
"step": 203000
},
{
"epoch": 76.44,
"learning_rate": 4.712081294693264e-06,
"loss": 0.0915,
"step": 203100
},
{
"epoch": 76.48,
"learning_rate": 4.704554008280015e-06,
"loss": 0.0908,
"step": 203200
},
{
"epoch": 76.51,
"learning_rate": 4.697026721866768e-06,
"loss": 0.0919,
"step": 203300
},
{
"epoch": 76.55,
"learning_rate": 4.6894994354535196e-06,
"loss": 0.0912,
"step": 203400
},
{
"epoch": 76.59,
"learning_rate": 4.681972149040271e-06,
"loss": 0.092,
"step": 203500
},
{
"epoch": 76.63,
"learning_rate": 4.674444862627024e-06,
"loss": 0.091,
"step": 203600
},
{
"epoch": 76.67,
"learning_rate": 4.666917576213775e-06,
"loss": 0.0928,
"step": 203700
},
{
"epoch": 76.7,
"learning_rate": 4.659390289800527e-06,
"loss": 0.0902,
"step": 203800
},
{
"epoch": 76.74,
"learning_rate": 4.651863003387279e-06,
"loss": 0.0921,
"step": 203900
},
{
"epoch": 76.78,
"learning_rate": 4.644335716974031e-06,
"loss": 0.0899,
"step": 204000
},
{
"epoch": 76.82,
"learning_rate": 4.636808430560783e-06,
"loss": 0.0897,
"step": 204100
},
{
"epoch": 76.85,
"learning_rate": 4.629281144147535e-06,
"loss": 0.0909,
"step": 204200
},
{
"epoch": 76.89,
"learning_rate": 4.621753857734287e-06,
"loss": 0.0933,
"step": 204300
},
{
"epoch": 76.93,
"learning_rate": 4.61422657132104e-06,
"loss": 0.0937,
"step": 204400
},
{
"epoch": 76.97,
"learning_rate": 4.606699284907791e-06,
"loss": 0.0925,
"step": 204500
},
{
"epoch": 77.0,
"eval_loss": 0.09133084863424301,
"eval_runtime": 45.172,
"eval_samples_per_second": 166.032,
"eval_steps_per_second": 10.383,
"step": 204589
},
{
"epoch": 77.0,
"learning_rate": 4.599171998494543e-06,
"loss": 0.0911,
"step": 204600
},
{
"epoch": 77.04,
"learning_rate": 4.5916447120812955e-06,
"loss": 0.0936,
"step": 204700
},
{
"epoch": 77.08,
"learning_rate": 4.584117425668047e-06,
"loss": 0.0913,
"step": 204800
},
{
"epoch": 77.12,
"learning_rate": 4.576590139254799e-06,
"loss": 0.0911,
"step": 204900
},
{
"epoch": 77.15,
"learning_rate": 4.569062852841551e-06,
"loss": 0.0921,
"step": 205000
},
{
"epoch": 77.19,
"learning_rate": 4.561535566428303e-06,
"loss": 0.0918,
"step": 205100
},
{
"epoch": 77.23,
"learning_rate": 4.554008280015055e-06,
"loss": 0.0918,
"step": 205200
},
{
"epoch": 77.27,
"learning_rate": 4.546480993601806e-06,
"loss": 0.0922,
"step": 205300
},
{
"epoch": 77.31,
"learning_rate": 4.538953707188559e-06,
"loss": 0.0918,
"step": 205400
},
{
"epoch": 77.34,
"learning_rate": 4.531426420775311e-06,
"loss": 0.0921,
"step": 205500
},
{
"epoch": 77.38,
"learning_rate": 4.523899134362063e-06,
"loss": 0.0906,
"step": 205600
},
{
"epoch": 77.42,
"learning_rate": 4.516371847948815e-06,
"loss": 0.0905,
"step": 205700
},
{
"epoch": 77.46,
"learning_rate": 4.508844561535567e-06,
"loss": 0.0913,
"step": 205800
},
{
"epoch": 77.49,
"learning_rate": 4.501317275122319e-06,
"loss": 0.0928,
"step": 205900
},
{
"epoch": 77.53,
"learning_rate": 4.4937899887090705e-06,
"loss": 0.0931,
"step": 206000
},
{
"epoch": 77.57,
"learning_rate": 4.486262702295823e-06,
"loss": 0.0925,
"step": 206100
},
{
"epoch": 77.61,
"learning_rate": 4.478735415882575e-06,
"loss": 0.0907,
"step": 206200
},
{
"epoch": 77.64,
"learning_rate": 4.471208129469326e-06,
"loss": 0.0901,
"step": 206300
},
{
"epoch": 77.68,
"learning_rate": 4.463680843056079e-06,
"loss": 0.0925,
"step": 206400
},
{
"epoch": 77.72,
"learning_rate": 4.4561535566428305e-06,
"loss": 0.0899,
"step": 206500
},
{
"epoch": 77.76,
"learning_rate": 4.448626270229582e-06,
"loss": 0.0911,
"step": 206600
},
{
"epoch": 77.79,
"learning_rate": 4.441098983816335e-06,
"loss": 0.0921,
"step": 206700
},
{
"epoch": 77.83,
"learning_rate": 4.433571697403086e-06,
"loss": 0.092,
"step": 206800
},
{
"epoch": 77.87,
"learning_rate": 4.426044410989839e-06,
"loss": 0.0925,
"step": 206900
},
{
"epoch": 77.91,
"learning_rate": 4.4185171245765906e-06,
"loss": 0.0904,
"step": 207000
},
{
"epoch": 77.95,
"learning_rate": 4.410989838163342e-06,
"loss": 0.0904,
"step": 207100
},
{
"epoch": 77.98,
"learning_rate": 4.403462551750095e-06,
"loss": 0.0921,
"step": 207200
},
{
"epoch": 78.0,
"eval_loss": 0.09170127660036087,
"eval_runtime": 44.7794,
"eval_samples_per_second": 167.488,
"eval_steps_per_second": 10.474,
"step": 207246
},
{
"epoch": 78.02,
"learning_rate": 4.395935265336846e-06,
"loss": 0.0911,
"step": 207300
},
{
"epoch": 78.06,
"learning_rate": 4.388407978923598e-06,
"loss": 0.0918,
"step": 207400
},
{
"epoch": 78.1,
"learning_rate": 4.380880692510351e-06,
"loss": 0.0918,
"step": 207500
},
{
"epoch": 78.13,
"learning_rate": 4.373353406097102e-06,
"loss": 0.0901,
"step": 207600
},
{
"epoch": 78.17,
"learning_rate": 4.365826119683854e-06,
"loss": 0.0909,
"step": 207700
},
{
"epoch": 78.21,
"learning_rate": 4.3582988332706064e-06,
"loss": 0.0924,
"step": 207800
},
{
"epoch": 78.25,
"learning_rate": 4.350771546857358e-06,
"loss": 0.093,
"step": 207900
},
{
"epoch": 78.28,
"learning_rate": 4.343244260444111e-06,
"loss": 0.0917,
"step": 208000
},
{
"epoch": 78.32,
"learning_rate": 4.335716974030862e-06,
"loss": 0.0918,
"step": 208100
},
{
"epoch": 78.36,
"learning_rate": 4.328189687617615e-06,
"loss": 0.0917,
"step": 208200
},
{
"epoch": 78.4,
"learning_rate": 4.3206624012043665e-06,
"loss": 0.0918,
"step": 208300
},
{
"epoch": 78.43,
"learning_rate": 4.313135114791118e-06,
"loss": 0.0936,
"step": 208400
},
{
"epoch": 78.47,
"learning_rate": 4.30560782837787e-06,
"loss": 0.092,
"step": 208500
},
{
"epoch": 78.51,
"learning_rate": 4.298080541964622e-06,
"loss": 0.0911,
"step": 208600
},
{
"epoch": 78.55,
"learning_rate": 4.290553255551374e-06,
"loss": 0.0909,
"step": 208700
},
{
"epoch": 78.58,
"learning_rate": 4.283025969138126e-06,
"loss": 0.0913,
"step": 208800
},
{
"epoch": 78.62,
"learning_rate": 4.275498682724878e-06,
"loss": 0.0902,
"step": 208900
},
{
"epoch": 78.66,
"learning_rate": 4.26797139631163e-06,
"loss": 0.0917,
"step": 209000
},
{
"epoch": 78.7,
"learning_rate": 4.2604441098983815e-06,
"loss": 0.092,
"step": 209100
},
{
"epoch": 78.74,
"learning_rate": 4.252916823485134e-06,
"loss": 0.0927,
"step": 209200
},
{
"epoch": 78.77,
"learning_rate": 4.2453895370718865e-06,
"loss": 0.093,
"step": 209300
},
{
"epoch": 78.81,
"learning_rate": 4.237862250658638e-06,
"loss": 0.0923,
"step": 209400
},
{
"epoch": 78.85,
"learning_rate": 4.23033496424539e-06,
"loss": 0.0911,
"step": 209500
},
{
"epoch": 78.89,
"learning_rate": 4.222807677832142e-06,
"loss": 0.0914,
"step": 209600
},
{
"epoch": 78.92,
"learning_rate": 4.215280391418894e-06,
"loss": 0.092,
"step": 209700
},
{
"epoch": 78.96,
"learning_rate": 4.207753105005646e-06,
"loss": 0.0919,
"step": 209800
},
{
"epoch": 79.0,
"learning_rate": 4.200225818592397e-06,
"loss": 0.0895,
"step": 209900
},
{
"epoch": 79.0,
"eval_loss": 0.09116315096616745,
"eval_runtime": 45.3163,
"eval_samples_per_second": 165.503,
"eval_steps_per_second": 10.349,
"step": 209903
},
{
"epoch": 79.04,
"learning_rate": 4.19269853217915e-06,
"loss": 0.0912,
"step": 210000
},
{
"epoch": 79.07,
"learning_rate": 4.1851712457659015e-06,
"loss": 0.0909,
"step": 210100
},
{
"epoch": 79.11,
"learning_rate": 4.177643959352653e-06,
"loss": 0.0911,
"step": 210200
},
{
"epoch": 79.15,
"learning_rate": 4.170116672939406e-06,
"loss": 0.0925,
"step": 210300
},
{
"epoch": 79.19,
"learning_rate": 4.162589386526157e-06,
"loss": 0.0926,
"step": 210400
},
{
"epoch": 79.22,
"learning_rate": 4.15506210011291e-06,
"loss": 0.0913,
"step": 210500
},
{
"epoch": 79.26,
"learning_rate": 4.1475348136996616e-06,
"loss": 0.0907,
"step": 210600
},
{
"epoch": 79.3,
"learning_rate": 4.140007527286414e-06,
"loss": 0.0912,
"step": 210700
},
{
"epoch": 79.34,
"learning_rate": 4.132480240873166e-06,
"loss": 0.0906,
"step": 210800
},
{
"epoch": 79.38,
"learning_rate": 4.124952954459917e-06,
"loss": 0.0914,
"step": 210900
},
{
"epoch": 79.41,
"learning_rate": 4.11742566804667e-06,
"loss": 0.0916,
"step": 211000
},
{
"epoch": 79.45,
"learning_rate": 4.109898381633422e-06,
"loss": 0.0909,
"step": 211100
},
{
"epoch": 79.49,
"learning_rate": 4.102371095220173e-06,
"loss": 0.0917,
"step": 211200
},
{
"epoch": 79.53,
"learning_rate": 4.094843808806925e-06,
"loss": 0.0917,
"step": 211300
},
{
"epoch": 79.56,
"learning_rate": 4.0873165223936774e-06,
"loss": 0.0915,
"step": 211400
},
{
"epoch": 79.6,
"learning_rate": 4.079789235980429e-06,
"loss": 0.091,
"step": 211500
},
{
"epoch": 79.64,
"learning_rate": 4.072261949567181e-06,
"loss": 0.0915,
"step": 211600
},
{
"epoch": 79.68,
"learning_rate": 4.064734663153933e-06,
"loss": 0.0919,
"step": 211700
},
{
"epoch": 79.71,
"learning_rate": 4.057207376740686e-06,
"loss": 0.0909,
"step": 211800
},
{
"epoch": 79.75,
"learning_rate": 4.0496800903274375e-06,
"loss": 0.0915,
"step": 211900
},
{
"epoch": 79.79,
"learning_rate": 4.042152803914189e-06,
"loss": 0.0904,
"step": 212000
},
{
"epoch": 79.83,
"learning_rate": 4.034625517500942e-06,
"loss": 0.09,
"step": 212100
},
{
"epoch": 79.86,
"learning_rate": 4.027098231087693e-06,
"loss": 0.0918,
"step": 212200
},
{
"epoch": 79.9,
"learning_rate": 4.019570944674445e-06,
"loss": 0.0912,
"step": 212300
},
{
"epoch": 79.94,
"learning_rate": 4.0120436582611975e-06,
"loss": 0.0903,
"step": 212400
},
{
"epoch": 79.98,
"learning_rate": 4.004516371847949e-06,
"loss": 0.0916,
"step": 212500
},
{
"epoch": 80.0,
"eval_loss": 0.09135947376489639,
"eval_runtime": 45.1657,
"eval_samples_per_second": 166.055,
"eval_steps_per_second": 10.384,
"step": 212560
},
{
"epoch": 80.02,
"learning_rate": 3.996989085434701e-06,
"loss": 0.0909,
"step": 212600
},
{
"epoch": 80.05,
"learning_rate": 3.989461799021453e-06,
"loss": 0.0912,
"step": 212700
},
{
"epoch": 80.09,
"learning_rate": 3.981934512608205e-06,
"loss": 0.091,
"step": 212800
},
{
"epoch": 80.13,
"learning_rate": 3.974407226194957e-06,
"loss": 0.0916,
"step": 212900
},
{
"epoch": 80.17,
"learning_rate": 3.966879939781709e-06,
"loss": 0.0918,
"step": 213000
},
{
"epoch": 80.2,
"learning_rate": 3.959352653368461e-06,
"loss": 0.0927,
"step": 213100
},
{
"epoch": 80.24,
"learning_rate": 3.951825366955213e-06,
"loss": 0.0928,
"step": 213200
},
{
"epoch": 80.28,
"learning_rate": 3.944298080541965e-06,
"loss": 0.0902,
"step": 213300
},
{
"epoch": 80.32,
"learning_rate": 3.936770794128717e-06,
"loss": 0.0909,
"step": 213400
},
{
"epoch": 80.35,
"learning_rate": 3.929243507715469e-06,
"loss": 0.0909,
"step": 213500
},
{
"epoch": 80.39,
"learning_rate": 3.921716221302221e-06,
"loss": 0.089,
"step": 213600
},
{
"epoch": 80.43,
"learning_rate": 3.9141889348889725e-06,
"loss": 0.0928,
"step": 213700
},
{
"epoch": 80.47,
"learning_rate": 3.906661648475725e-06,
"loss": 0.0895,
"step": 213800
},
{
"epoch": 80.5,
"learning_rate": 3.899134362062477e-06,
"loss": 0.091,
"step": 213900
},
{
"epoch": 80.54,
"learning_rate": 3.891607075649228e-06,
"loss": 0.0908,
"step": 214000
},
{
"epoch": 80.58,
"learning_rate": 3.884079789235981e-06,
"loss": 0.0921,
"step": 214100
},
{
"epoch": 80.62,
"learning_rate": 3.8765525028227326e-06,
"loss": 0.0904,
"step": 214200
},
{
"epoch": 80.65,
"learning_rate": 3.869025216409485e-06,
"loss": 0.0914,
"step": 214300
},
{
"epoch": 80.69,
"learning_rate": 3.861497929996237e-06,
"loss": 0.0898,
"step": 214400
},
{
"epoch": 80.73,
"learning_rate": 3.853970643582988e-06,
"loss": 0.0923,
"step": 214500
},
{
"epoch": 80.77,
"learning_rate": 3.846443357169741e-06,
"loss": 0.0934,
"step": 214600
},
{
"epoch": 80.81,
"learning_rate": 3.838916070756493e-06,
"loss": 0.0914,
"step": 214700
},
{
"epoch": 80.84,
"learning_rate": 3.831388784343244e-06,
"loss": 0.09,
"step": 214800
},
{
"epoch": 80.88,
"learning_rate": 3.823861497929997e-06,
"loss": 0.091,
"step": 214900
},
{
"epoch": 80.92,
"learning_rate": 3.8163342115167484e-06,
"loss": 0.0907,
"step": 215000
},
{
"epoch": 80.96,
"learning_rate": 3.8088069251035005e-06,
"loss": 0.0913,
"step": 215100
},
{
"epoch": 80.99,
"learning_rate": 3.801279638690252e-06,
"loss": 0.09,
"step": 215200
},
{
"epoch": 81.0,
"eval_loss": 0.09087579697370529,
"eval_runtime": 45.0879,
"eval_samples_per_second": 166.342,
"eval_steps_per_second": 10.402,
"step": 215217
},
{
"epoch": 81.03,
"learning_rate": 3.7937523522770043e-06,
"loss": 0.0912,
"step": 215300
},
{
"epoch": 81.07,
"learning_rate": 3.786225065863756e-06,
"loss": 0.0922,
"step": 215400
},
{
"epoch": 81.11,
"learning_rate": 3.7786977794505085e-06,
"loss": 0.0913,
"step": 215500
},
{
"epoch": 81.14,
"learning_rate": 3.7711704930372606e-06,
"loss": 0.0917,
"step": 215600
},
{
"epoch": 81.18,
"learning_rate": 3.7636432066240126e-06,
"loss": 0.0921,
"step": 215700
},
{
"epoch": 81.22,
"learning_rate": 3.7561159202107643e-06,
"loss": 0.0913,
"step": 215800
},
{
"epoch": 81.26,
"learning_rate": 3.7485886337975164e-06,
"loss": 0.0918,
"step": 215900
},
{
"epoch": 81.29,
"learning_rate": 3.7410613473842685e-06,
"loss": 0.0917,
"step": 216000
},
{
"epoch": 81.33,
"learning_rate": 3.73353406097102e-06,
"loss": 0.0888,
"step": 216100
},
{
"epoch": 81.37,
"learning_rate": 3.7260067745577722e-06,
"loss": 0.0896,
"step": 216200
},
{
"epoch": 81.41,
"learning_rate": 3.718479488144524e-06,
"loss": 0.0917,
"step": 216300
},
{
"epoch": 81.45,
"learning_rate": 3.710952201731276e-06,
"loss": 0.0918,
"step": 216400
},
{
"epoch": 81.48,
"learning_rate": 3.703424915318028e-06,
"loss": 0.0908,
"step": 216500
},
{
"epoch": 81.52,
"learning_rate": 3.6958976289047798e-06,
"loss": 0.0921,
"step": 216600
},
{
"epoch": 81.56,
"learning_rate": 3.6883703424915323e-06,
"loss": 0.0896,
"step": 216700
},
{
"epoch": 81.6,
"learning_rate": 3.6808430560782844e-06,
"loss": 0.0909,
"step": 216800
},
{
"epoch": 81.63,
"learning_rate": 3.673315769665036e-06,
"loss": 0.0912,
"step": 216900
},
{
"epoch": 81.67,
"learning_rate": 3.665788483251788e-06,
"loss": 0.0915,
"step": 217000
},
{
"epoch": 81.71,
"learning_rate": 3.65826119683854e-06,
"loss": 0.091,
"step": 217100
},
{
"epoch": 81.75,
"learning_rate": 3.650733910425292e-06,
"loss": 0.09,
"step": 217200
},
{
"epoch": 81.78,
"learning_rate": 3.643206624012044e-06,
"loss": 0.0912,
"step": 217300
},
{
"epoch": 81.82,
"learning_rate": 3.635679337598796e-06,
"loss": 0.0908,
"step": 217400
},
{
"epoch": 81.86,
"learning_rate": 3.6281520511855477e-06,
"loss": 0.0902,
"step": 217500
},
{
"epoch": 81.9,
"learning_rate": 3.6206247647723e-06,
"loss": 0.0907,
"step": 217600
},
{
"epoch": 81.93,
"learning_rate": 3.6130974783590515e-06,
"loss": 0.0892,
"step": 217700
},
{
"epoch": 81.97,
"learning_rate": 3.6055701919458036e-06,
"loss": 0.0916,
"step": 217800
},
{
"epoch": 82.0,
"eval_loss": 0.09082730859518051,
"eval_runtime": 45.1546,
"eval_samples_per_second": 166.096,
"eval_steps_per_second": 10.387,
"step": 217874
},
{
"epoch": 82.01,
"learning_rate": 3.598042905532556e-06,
"loss": 0.0915,
"step": 217900
},
{
"epoch": 82.05,
"learning_rate": 3.590515619119308e-06,
"loss": 0.091,
"step": 218000
},
{
"epoch": 82.09,
"learning_rate": 3.58298833270606e-06,
"loss": 0.0918,
"step": 218100
},
{
"epoch": 82.12,
"learning_rate": 3.575461046292812e-06,
"loss": 0.0911,
"step": 218200
},
{
"epoch": 82.16,
"learning_rate": 3.5679337598795636e-06,
"loss": 0.0907,
"step": 218300
},
{
"epoch": 82.2,
"learning_rate": 3.5604064734663157e-06,
"loss": 0.0912,
"step": 218400
},
{
"epoch": 82.24,
"learning_rate": 3.5528791870530678e-06,
"loss": 0.0915,
"step": 218500
},
{
"epoch": 82.27,
"learning_rate": 3.5453519006398194e-06,
"loss": 0.0909,
"step": 218600
},
{
"epoch": 82.31,
"learning_rate": 3.5378246142265715e-06,
"loss": 0.0915,
"step": 218700
},
{
"epoch": 82.35,
"learning_rate": 3.5302973278133236e-06,
"loss": 0.0895,
"step": 218800
},
{
"epoch": 82.39,
"learning_rate": 3.5227700414000753e-06,
"loss": 0.0909,
"step": 218900
},
{
"epoch": 82.42,
"learning_rate": 3.5152427549868274e-06,
"loss": 0.0922,
"step": 219000
},
{
"epoch": 82.46,
"learning_rate": 3.507715468573579e-06,
"loss": 0.0923,
"step": 219100
},
{
"epoch": 82.5,
"learning_rate": 3.5001881821603316e-06,
"loss": 0.0923,
"step": 219200
},
{
"epoch": 82.54,
"learning_rate": 3.4926608957470836e-06,
"loss": 0.0904,
"step": 219300
},
{
"epoch": 82.57,
"learning_rate": 3.4851336093338357e-06,
"loss": 0.0904,
"step": 219400
},
{
"epoch": 82.61,
"learning_rate": 3.4776063229205874e-06,
"loss": 0.0921,
"step": 219500
},
{
"epoch": 82.65,
"learning_rate": 3.4700790365073395e-06,
"loss": 0.0904,
"step": 219600
},
{
"epoch": 82.69,
"learning_rate": 3.4625517500940916e-06,
"loss": 0.0903,
"step": 219700
},
{
"epoch": 82.72,
"learning_rate": 3.4550244636808433e-06,
"loss": 0.0911,
"step": 219800
},
{
"epoch": 82.76,
"learning_rate": 3.4474971772675953e-06,
"loss": 0.0904,
"step": 219900
},
{
"epoch": 82.8,
"learning_rate": 3.439969890854347e-06,
"loss": 0.0898,
"step": 220000
},
{
"epoch": 82.84,
"learning_rate": 3.432442604441099e-06,
"loss": 0.0906,
"step": 220100
},
{
"epoch": 82.88,
"learning_rate": 3.424915318027851e-06,
"loss": 0.0885,
"step": 220200
},
{
"epoch": 82.91,
"learning_rate": 3.417388031614603e-06,
"loss": 0.0917,
"step": 220300
},
{
"epoch": 82.95,
"learning_rate": 3.4098607452013554e-06,
"loss": 0.0888,
"step": 220400
},
{
"epoch": 82.99,
"learning_rate": 3.4023334587881075e-06,
"loss": 0.0902,
"step": 220500
},
{
"epoch": 83.0,
"eval_loss": 0.09073475003242493,
"eval_runtime": 44.912,
"eval_samples_per_second": 166.993,
"eval_steps_per_second": 10.443,
"step": 220531
},
{
"epoch": 83.03,
"learning_rate": 3.394806172374859e-06,
"loss": 0.0911,
"step": 220600
},
{
"epoch": 83.06,
"learning_rate": 3.3872788859616112e-06,
"loss": 0.0922,
"step": 220700
},
{
"epoch": 83.1,
"learning_rate": 3.3797515995483633e-06,
"loss": 0.0911,
"step": 220800
},
{
"epoch": 83.14,
"learning_rate": 3.372224313135115e-06,
"loss": 0.0914,
"step": 220900
},
{
"epoch": 83.18,
"learning_rate": 3.364697026721867e-06,
"loss": 0.0912,
"step": 221000
},
{
"epoch": 83.21,
"learning_rate": 3.357169740308619e-06,
"loss": 0.091,
"step": 221100
},
{
"epoch": 83.25,
"learning_rate": 3.349642453895371e-06,
"loss": 0.0911,
"step": 221200
},
{
"epoch": 83.29,
"learning_rate": 3.342115167482123e-06,
"loss": 0.0896,
"step": 221300
},
{
"epoch": 83.33,
"learning_rate": 3.3345878810688746e-06,
"loss": 0.0926,
"step": 221400
},
{
"epoch": 83.36,
"learning_rate": 3.3270605946556267e-06,
"loss": 0.0896,
"step": 221500
},
{
"epoch": 83.4,
"learning_rate": 3.3195333082423788e-06,
"loss": 0.0918,
"step": 221600
},
{
"epoch": 83.44,
"learning_rate": 3.3120060218291313e-06,
"loss": 0.0904,
"step": 221700
},
{
"epoch": 83.48,
"learning_rate": 3.304478735415883e-06,
"loss": 0.0924,
"step": 221800
},
{
"epoch": 83.52,
"learning_rate": 3.296951449002635e-06,
"loss": 0.0898,
"step": 221900
},
{
"epoch": 83.55,
"learning_rate": 3.289424162589387e-06,
"loss": 0.0919,
"step": 222000
},
{
"epoch": 83.59,
"learning_rate": 3.2818968761761388e-06,
"loss": 0.0902,
"step": 222100
},
{
"epoch": 83.63,
"learning_rate": 3.274369589762891e-06,
"loss": 0.0911,
"step": 222200
},
{
"epoch": 83.67,
"learning_rate": 3.2668423033496425e-06,
"loss": 0.0907,
"step": 222300
},
{
"epoch": 83.7,
"learning_rate": 3.2593150169363946e-06,
"loss": 0.0907,
"step": 222400
},
{
"epoch": 83.74,
"learning_rate": 3.2517877305231467e-06,
"loss": 0.0894,
"step": 222500
},
{
"epoch": 83.78,
"learning_rate": 3.2442604441098984e-06,
"loss": 0.0912,
"step": 222600
},
{
"epoch": 83.82,
"learning_rate": 3.2367331576966505e-06,
"loss": 0.0919,
"step": 222700
},
{
"epoch": 83.85,
"learning_rate": 3.2292058712834026e-06,
"loss": 0.0906,
"step": 222800
},
{
"epoch": 83.89,
"learning_rate": 3.2216785848701546e-06,
"loss": 0.0902,
"step": 222900
},
{
"epoch": 83.93,
"learning_rate": 3.2141512984569067e-06,
"loss": 0.0908,
"step": 223000
},
{
"epoch": 83.97,
"learning_rate": 3.206624012043659e-06,
"loss": 0.0911,
"step": 223100
},
{
"epoch": 84.0,
"eval_loss": 0.09099774062633514,
"eval_runtime": 45.2441,
"eval_samples_per_second": 165.768,
"eval_steps_per_second": 10.366,
"step": 223188
},
{
"epoch": 84.0,
"learning_rate": 3.1990967256304105e-06,
"loss": 0.0915,
"step": 223200
},
{
"epoch": 84.04,
"learning_rate": 3.1915694392171626e-06,
"loss": 0.0904,
"step": 223300
},
{
"epoch": 84.08,
"learning_rate": 3.1840421528039147e-06,
"loss": 0.0894,
"step": 223400
},
{
"epoch": 84.12,
"learning_rate": 3.1765148663906663e-06,
"loss": 0.0901,
"step": 223500
},
{
"epoch": 84.16,
"learning_rate": 3.1689875799774184e-06,
"loss": 0.0901,
"step": 223600
},
{
"epoch": 84.19,
"learning_rate": 3.16146029356417e-06,
"loss": 0.091,
"step": 223700
},
{
"epoch": 84.23,
"learning_rate": 3.153933007150922e-06,
"loss": 0.0913,
"step": 223800
},
{
"epoch": 84.27,
"learning_rate": 3.1464057207376743e-06,
"loss": 0.091,
"step": 223900
},
{
"epoch": 84.31,
"learning_rate": 3.138878434324426e-06,
"loss": 0.0907,
"step": 224000
},
{
"epoch": 84.34,
"learning_rate": 3.131351147911178e-06,
"loss": 0.0913,
"step": 224100
},
{
"epoch": 84.38,
"learning_rate": 3.1238238614979305e-06,
"loss": 0.0898,
"step": 224200
},
{
"epoch": 84.42,
"learning_rate": 3.1162965750846822e-06,
"loss": 0.0897,
"step": 224300
},
{
"epoch": 84.46,
"learning_rate": 3.1087692886714343e-06,
"loss": 0.0897,
"step": 224400
},
{
"epoch": 84.49,
"learning_rate": 3.1012420022581864e-06,
"loss": 0.0915,
"step": 224500
},
{
"epoch": 84.53,
"learning_rate": 3.093714715844938e-06,
"loss": 0.0916,
"step": 224600
},
{
"epoch": 84.57,
"learning_rate": 3.08618742943169e-06,
"loss": 0.0932,
"step": 224700
},
{
"epoch": 84.61,
"learning_rate": 3.0786601430184422e-06,
"loss": 0.0909,
"step": 224800
},
{
"epoch": 84.64,
"learning_rate": 3.071132856605194e-06,
"loss": 0.0897,
"step": 224900
},
{
"epoch": 84.68,
"learning_rate": 3.063605570191946e-06,
"loss": 0.0914,
"step": 225000
},
{
"epoch": 84.72,
"learning_rate": 3.0560782837786977e-06,
"loss": 0.0891,
"step": 225100
},
{
"epoch": 84.76,
"learning_rate": 3.0485509973654498e-06,
"loss": 0.0896,
"step": 225200
},
{
"epoch": 84.79,
"learning_rate": 3.041023710952202e-06,
"loss": 0.0911,
"step": 225300
},
{
"epoch": 84.83,
"learning_rate": 3.0334964245389544e-06,
"loss": 0.0931,
"step": 225400
},
{
"epoch": 84.87,
"learning_rate": 3.025969138125706e-06,
"loss": 0.0922,
"step": 225500
},
{
"epoch": 84.91,
"learning_rate": 3.018441851712458e-06,
"loss": 0.0914,
"step": 225600
},
{
"epoch": 84.95,
"learning_rate": 3.01091456529921e-06,
"loss": 0.0914,
"step": 225700
},
{
"epoch": 84.98,
"learning_rate": 3.003387278885962e-06,
"loss": 0.091,
"step": 225800
},
{
"epoch": 85.0,
"eval_loss": 0.0903320163488388,
"eval_runtime": 45.2024,
"eval_samples_per_second": 165.921,
"eval_steps_per_second": 10.376,
"step": 225845
},
{
"epoch": 85.02,
"learning_rate": 2.995859992472714e-06,
"loss": 0.0899,
"step": 225900
},
{
"epoch": 85.06,
"learning_rate": 2.9883327060594656e-06,
"loss": 0.0913,
"step": 226000
},
{
"epoch": 85.1,
"learning_rate": 2.9808054196462177e-06,
"loss": 0.091,
"step": 226100
},
{
"epoch": 85.13,
"learning_rate": 2.97327813323297e-06,
"loss": 0.0931,
"step": 226200
},
{
"epoch": 85.17,
"learning_rate": 2.9657508468197215e-06,
"loss": 0.0925,
"step": 226300
},
{
"epoch": 85.21,
"learning_rate": 2.9582235604064736e-06,
"loss": 0.0903,
"step": 226400
},
{
"epoch": 85.25,
"learning_rate": 2.9506962739932257e-06,
"loss": 0.0908,
"step": 226500
},
{
"epoch": 85.28,
"learning_rate": 2.9431689875799777e-06,
"loss": 0.091,
"step": 226600
},
{
"epoch": 85.32,
"learning_rate": 2.93564170116673e-06,
"loss": 0.0901,
"step": 226700
},
{
"epoch": 85.36,
"learning_rate": 2.928114414753482e-06,
"loss": 0.0907,
"step": 226800
},
{
"epoch": 85.4,
"learning_rate": 2.9205871283402336e-06,
"loss": 0.0912,
"step": 226900
},
{
"epoch": 85.43,
"learning_rate": 2.9130598419269857e-06,
"loss": 0.0904,
"step": 227000
},
{
"epoch": 85.47,
"learning_rate": 2.9055325555137378e-06,
"loss": 0.0913,
"step": 227100
},
{
"epoch": 85.51,
"learning_rate": 2.8980052691004894e-06,
"loss": 0.0917,
"step": 227200
},
{
"epoch": 85.55,
"learning_rate": 2.8904779826872415e-06,
"loss": 0.0897,
"step": 227300
},
{
"epoch": 85.59,
"learning_rate": 2.882950696273993e-06,
"loss": 0.09,
"step": 227400
},
{
"epoch": 85.62,
"learning_rate": 2.8754234098607453e-06,
"loss": 0.0913,
"step": 227500
},
{
"epoch": 85.66,
"learning_rate": 2.8678961234474974e-06,
"loss": 0.0899,
"step": 227600
},
{
"epoch": 85.7,
"learning_rate": 2.860368837034249e-06,
"loss": 0.0905,
"step": 227700
},
{
"epoch": 85.74,
"learning_rate": 2.852841550621001e-06,
"loss": 0.0898,
"step": 227800
},
{
"epoch": 85.77,
"learning_rate": 2.8453142642077536e-06,
"loss": 0.09,
"step": 227900
},
{
"epoch": 85.81,
"learning_rate": 2.8377869777945053e-06,
"loss": 0.0908,
"step": 228000
},
{
"epoch": 85.85,
"learning_rate": 2.8302596913812574e-06,
"loss": 0.0906,
"step": 228100
},
{
"epoch": 85.89,
"learning_rate": 2.8227324049680095e-06,
"loss": 0.0904,
"step": 228200
},
{
"epoch": 85.92,
"learning_rate": 2.815205118554761e-06,
"loss": 0.0897,
"step": 228300
},
{
"epoch": 85.96,
"learning_rate": 2.8076778321415132e-06,
"loss": 0.091,
"step": 228400
},
{
"epoch": 86.0,
"learning_rate": 2.8001505457282653e-06,
"loss": 0.0903,
"step": 228500
},
{
"epoch": 86.0,
"eval_loss": 0.0905364602804184,
"eval_runtime": 45.2173,
"eval_samples_per_second": 165.866,
"eval_steps_per_second": 10.372,
"step": 228502
},
{
"epoch": 86.04,
"learning_rate": 2.792623259315017e-06,
"loss": 0.0907,
"step": 228600
},
{
"epoch": 86.07,
"learning_rate": 2.785095972901769e-06,
"loss": 0.0918,
"step": 228700
},
{
"epoch": 86.11,
"learning_rate": 2.7775686864885208e-06,
"loss": 0.0925,
"step": 228800
},
{
"epoch": 86.15,
"learning_rate": 2.770041400075273e-06,
"loss": 0.0897,
"step": 228900
},
{
"epoch": 86.19,
"learning_rate": 2.762514113662025e-06,
"loss": 0.0895,
"step": 229000
},
{
"epoch": 86.23,
"learning_rate": 2.7549868272487774e-06,
"loss": 0.088,
"step": 229100
},
{
"epoch": 86.26,
"learning_rate": 2.747459540835529e-06,
"loss": 0.0899,
"step": 229200
},
{
"epoch": 86.3,
"learning_rate": 2.739932254422281e-06,
"loss": 0.0898,
"step": 229300
},
{
"epoch": 86.34,
"learning_rate": 2.7324049680090333e-06,
"loss": 0.0904,
"step": 229400
},
{
"epoch": 86.38,
"learning_rate": 2.724877681595785e-06,
"loss": 0.0904,
"step": 229500
},
{
"epoch": 86.41,
"learning_rate": 2.717350395182537e-06,
"loss": 0.0906,
"step": 229600
},
{
"epoch": 86.45,
"learning_rate": 2.7098231087692887e-06,
"loss": 0.0899,
"step": 229700
},
{
"epoch": 86.49,
"learning_rate": 2.702295822356041e-06,
"loss": 0.0889,
"step": 229800
},
{
"epoch": 86.53,
"learning_rate": 2.694768535942793e-06,
"loss": 0.0915,
"step": 229900
},
{
"epoch": 86.56,
"learning_rate": 2.6872412495295446e-06,
"loss": 0.09,
"step": 230000
},
{
"epoch": 86.6,
"learning_rate": 2.6797139631162967e-06,
"loss": 0.0911,
"step": 230100
},
{
"epoch": 86.64,
"learning_rate": 2.6721866767030487e-06,
"loss": 0.0911,
"step": 230200
},
{
"epoch": 86.68,
"learning_rate": 2.6646593902898004e-06,
"loss": 0.0914,
"step": 230300
},
{
"epoch": 86.71,
"learning_rate": 2.657132103876553e-06,
"loss": 0.0904,
"step": 230400
},
{
"epoch": 86.75,
"learning_rate": 2.649604817463305e-06,
"loss": 0.0892,
"step": 230500
},
{
"epoch": 86.79,
"learning_rate": 2.6420775310500567e-06,
"loss": 0.0896,
"step": 230600
},
{
"epoch": 86.83,
"learning_rate": 2.6345502446368088e-06,
"loss": 0.0902,
"step": 230700
},
{
"epoch": 86.86,
"learning_rate": 2.627022958223561e-06,
"loss": 0.0907,
"step": 230800
},
{
"epoch": 86.9,
"learning_rate": 2.6194956718103125e-06,
"loss": 0.0904,
"step": 230900
},
{
"epoch": 86.94,
"learning_rate": 2.6119683853970646e-06,
"loss": 0.0906,
"step": 231000
},
{
"epoch": 86.98,
"learning_rate": 2.6044410989838163e-06,
"loss": 0.0907,
"step": 231100
},
{
"epoch": 87.0,
"eval_loss": 0.09008638560771942,
"eval_runtime": 45.2794,
"eval_samples_per_second": 165.638,
"eval_steps_per_second": 10.358,
"step": 231159
},
{
"epoch": 87.02,
"learning_rate": 2.5969138125705684e-06,
"loss": 0.0909,
"step": 231200
},
{
"epoch": 87.05,
"learning_rate": 2.5893865261573205e-06,
"loss": 0.0889,
"step": 231300
},
{
"epoch": 87.09,
"learning_rate": 2.581859239744072e-06,
"loss": 0.0905,
"step": 231400
},
{
"epoch": 87.13,
"learning_rate": 2.5743319533308242e-06,
"loss": 0.0887,
"step": 231500
},
{
"epoch": 87.17,
"learning_rate": 2.5668046669175767e-06,
"loss": 0.0914,
"step": 231600
},
{
"epoch": 87.2,
"learning_rate": 2.559277380504329e-06,
"loss": 0.0906,
"step": 231700
},
{
"epoch": 87.24,
"learning_rate": 2.5517500940910805e-06,
"loss": 0.0909,
"step": 231800
},
{
"epoch": 87.28,
"learning_rate": 2.5442228076778326e-06,
"loss": 0.09,
"step": 231900
},
{
"epoch": 87.32,
"learning_rate": 2.5366955212645842e-06,
"loss": 0.0888,
"step": 232000
},
{
"epoch": 87.35,
"learning_rate": 2.5291682348513363e-06,
"loss": 0.09,
"step": 232100
},
{
"epoch": 87.39,
"learning_rate": 2.5216409484380884e-06,
"loss": 0.0895,
"step": 232200
},
{
"epoch": 87.43,
"learning_rate": 2.51411366202484e-06,
"loss": 0.0907,
"step": 232300
},
{
"epoch": 87.47,
"learning_rate": 2.506586375611592e-06,
"loss": 0.0906,
"step": 232400
},
{
"epoch": 87.5,
"learning_rate": 2.4990590891983443e-06,
"loss": 0.0907,
"step": 232500
},
{
"epoch": 87.54,
"learning_rate": 2.4915318027850964e-06,
"loss": 0.0907,
"step": 232600
},
{
"epoch": 87.58,
"learning_rate": 2.484004516371848e-06,
"loss": 0.0907,
"step": 232700
},
{
"epoch": 87.62,
"learning_rate": 2.4764772299586e-06,
"loss": 0.0892,
"step": 232800
},
{
"epoch": 87.66,
"learning_rate": 2.468949943545352e-06,
"loss": 0.0908,
"step": 232900
},
{
"epoch": 87.69,
"learning_rate": 2.461422657132104e-06,
"loss": 0.0906,
"step": 233000
},
{
"epoch": 87.73,
"learning_rate": 2.453895370718856e-06,
"loss": 0.0907,
"step": 233100
},
{
"epoch": 87.77,
"learning_rate": 2.446368084305608e-06,
"loss": 0.0905,
"step": 233200
},
{
"epoch": 87.81,
"learning_rate": 2.43884079789236e-06,
"loss": 0.0913,
"step": 233300
},
{
"epoch": 87.84,
"learning_rate": 2.431313511479112e-06,
"loss": 0.0926,
"step": 233400
},
{
"epoch": 87.88,
"learning_rate": 2.423786225065864e-06,
"loss": 0.0924,
"step": 233500
},
{
"epoch": 87.92,
"learning_rate": 2.416258938652616e-06,
"loss": 0.0897,
"step": 233600
},
{
"epoch": 87.96,
"learning_rate": 2.4087316522393677e-06,
"loss": 0.0922,
"step": 233700
},
{
"epoch": 87.99,
"learning_rate": 2.40120436582612e-06,
"loss": 0.0908,
"step": 233800
},
{
"epoch": 88.0,
"eval_loss": 0.0906805768609047,
"eval_runtime": 44.9216,
"eval_samples_per_second": 166.958,
"eval_steps_per_second": 10.44,
"step": 233816
},
{
"epoch": 88.03,
"learning_rate": 2.393677079412872e-06,
"loss": 0.0916,
"step": 233900
},
{
"epoch": 88.07,
"learning_rate": 2.386149792999624e-06,
"loss": 0.0901,
"step": 234000
},
{
"epoch": 88.11,
"learning_rate": 2.3786225065863756e-06,
"loss": 0.0899,
"step": 234100
},
{
"epoch": 88.14,
"learning_rate": 2.3710952201731277e-06,
"loss": 0.0909,
"step": 234200
},
{
"epoch": 88.18,
"learning_rate": 2.3635679337598798e-06,
"loss": 0.0904,
"step": 234300
},
{
"epoch": 88.22,
"learning_rate": 2.356040647346632e-06,
"loss": 0.0914,
"step": 234400
},
{
"epoch": 88.26,
"learning_rate": 2.348513360933384e-06,
"loss": 0.091,
"step": 234500
},
{
"epoch": 88.3,
"learning_rate": 2.3409860745201356e-06,
"loss": 0.0919,
"step": 234600
},
{
"epoch": 88.33,
"learning_rate": 2.3334587881068877e-06,
"loss": 0.091,
"step": 234700
},
{
"epoch": 88.37,
"learning_rate": 2.3259315016936394e-06,
"loss": 0.0899,
"step": 234800
},
{
"epoch": 88.41,
"learning_rate": 2.3184042152803915e-06,
"loss": 0.0899,
"step": 234900
},
{
"epoch": 88.45,
"learning_rate": 2.3108769288671436e-06,
"loss": 0.0899,
"step": 235000
},
{
"epoch": 88.48,
"learning_rate": 2.3033496424538956e-06,
"loss": 0.0916,
"step": 235100
},
{
"epoch": 88.52,
"learning_rate": 2.2958223560406477e-06,
"loss": 0.0901,
"step": 235200
},
{
"epoch": 88.56,
"learning_rate": 2.2882950696273994e-06,
"loss": 0.0898,
"step": 235300
},
{
"epoch": 88.6,
"learning_rate": 2.2807677832141515e-06,
"loss": 0.0908,
"step": 235400
},
{
"epoch": 88.63,
"learning_rate": 2.273240496800903e-06,
"loss": 0.0891,
"step": 235500
},
{
"epoch": 88.67,
"learning_rate": 2.2657132103876557e-06,
"loss": 0.0909,
"step": 235600
},
{
"epoch": 88.71,
"learning_rate": 2.2581859239744073e-06,
"loss": 0.0889,
"step": 235700
},
{
"epoch": 88.75,
"learning_rate": 2.2506586375611594e-06,
"loss": 0.093,
"step": 235800
},
{
"epoch": 88.78,
"learning_rate": 2.2431313511479115e-06,
"loss": 0.0902,
"step": 235900
},
{
"epoch": 88.82,
"learning_rate": 2.235604064734663e-06,
"loss": 0.09,
"step": 236000
},
{
"epoch": 88.86,
"learning_rate": 2.2280767783214153e-06,
"loss": 0.0893,
"step": 236100
},
{
"epoch": 88.9,
"learning_rate": 2.2205494919081674e-06,
"loss": 0.0902,
"step": 236200
},
{
"epoch": 88.93,
"learning_rate": 2.2130222054949194e-06,
"loss": 0.0912,
"step": 236300
},
{
"epoch": 88.97,
"learning_rate": 2.205494919081671e-06,
"loss": 0.0911,
"step": 236400
},
{
"epoch": 89.0,
"eval_loss": 0.09018085896968842,
"eval_runtime": 45.1243,
"eval_samples_per_second": 166.207,
"eval_steps_per_second": 10.394,
"step": 236473
},
{
"epoch": 89.01,
"learning_rate": 2.197967632668423e-06,
"loss": 0.092,
"step": 236500
},
{
"epoch": 89.05,
"learning_rate": 2.1904403462551753e-06,
"loss": 0.0904,
"step": 236600
},
{
"epoch": 89.09,
"learning_rate": 2.182913059841927e-06,
"loss": 0.09,
"step": 236700
},
{
"epoch": 89.12,
"learning_rate": 2.175385773428679e-06,
"loss": 0.0911,
"step": 236800
},
{
"epoch": 89.16,
"learning_rate": 2.167858487015431e-06,
"loss": 0.0883,
"step": 236900
},
{
"epoch": 89.2,
"learning_rate": 2.1603312006021832e-06,
"loss": 0.0904,
"step": 237000
},
{
"epoch": 89.24,
"learning_rate": 2.152803914188935e-06,
"loss": 0.0912,
"step": 237100
},
{
"epoch": 89.27,
"learning_rate": 2.145276627775687e-06,
"loss": 0.0892,
"step": 237200
},
{
"epoch": 89.31,
"learning_rate": 2.137749341362439e-06,
"loss": 0.0912,
"step": 237300
},
{
"epoch": 89.35,
"learning_rate": 2.1302220549491907e-06,
"loss": 0.09,
"step": 237400
},
{
"epoch": 89.39,
"learning_rate": 2.1226947685359433e-06,
"loss": 0.0904,
"step": 237500
},
{
"epoch": 89.42,
"learning_rate": 2.115167482122695e-06,
"loss": 0.0913,
"step": 237600
},
{
"epoch": 89.46,
"learning_rate": 2.107640195709447e-06,
"loss": 0.0904,
"step": 237700
},
{
"epoch": 89.5,
"learning_rate": 2.1001129092961987e-06,
"loss": 0.0899,
"step": 237800
},
{
"epoch": 89.54,
"learning_rate": 2.0925856228829508e-06,
"loss": 0.0893,
"step": 237900
},
{
"epoch": 89.57,
"learning_rate": 2.085058336469703e-06,
"loss": 0.0903,
"step": 238000
},
{
"epoch": 89.61,
"learning_rate": 2.077531050056455e-06,
"loss": 0.09,
"step": 238100
},
{
"epoch": 89.65,
"learning_rate": 2.070003763643207e-06,
"loss": 0.0903,
"step": 238200
},
{
"epoch": 89.69,
"learning_rate": 2.0624764772299587e-06,
"loss": 0.0891,
"step": 238300
},
{
"epoch": 89.73,
"learning_rate": 2.054949190816711e-06,
"loss": 0.0892,
"step": 238400
},
{
"epoch": 89.76,
"learning_rate": 2.0474219044034625e-06,
"loss": 0.09,
"step": 238500
},
{
"epoch": 89.8,
"learning_rate": 2.0398946179902146e-06,
"loss": 0.0908,
"step": 238600
},
{
"epoch": 89.84,
"learning_rate": 2.0323673315769666e-06,
"loss": 0.0902,
"step": 238700
},
{
"epoch": 89.88,
"learning_rate": 2.0248400451637187e-06,
"loss": 0.0916,
"step": 238800
},
{
"epoch": 89.91,
"learning_rate": 2.017312758750471e-06,
"loss": 0.0891,
"step": 238900
},
{
"epoch": 89.95,
"learning_rate": 2.0097854723372225e-06,
"loss": 0.0898,
"step": 239000
},
{
"epoch": 89.99,
"learning_rate": 2.0022581859239746e-06,
"loss": 0.0905,
"step": 239100
},
{
"epoch": 90.0,
"eval_loss": 0.09060540050268173,
"eval_runtime": 45.1371,
"eval_samples_per_second": 166.16,
"eval_steps_per_second": 10.391,
"step": 239130
},
{
"epoch": 90.03,
"learning_rate": 1.9947308995107267e-06,
"loss": 0.0915,
"step": 239200
},
{
"epoch": 90.06,
"learning_rate": 1.9872036130974783e-06,
"loss": 0.0896,
"step": 239300
},
{
"epoch": 90.1,
"learning_rate": 1.9796763266842304e-06,
"loss": 0.0899,
"step": 239400
},
{
"epoch": 90.14,
"learning_rate": 1.9721490402709825e-06,
"loss": 0.091,
"step": 239500
},
{
"epoch": 90.18,
"learning_rate": 1.9646217538577346e-06,
"loss": 0.0894,
"step": 239600
},
{
"epoch": 90.21,
"learning_rate": 1.9570944674444863e-06,
"loss": 0.0897,
"step": 239700
},
{
"epoch": 90.25,
"learning_rate": 1.9495671810312384e-06,
"loss": 0.0905,
"step": 239800
},
{
"epoch": 90.29,
"learning_rate": 1.9420398946179905e-06,
"loss": 0.0893,
"step": 239900
},
{
"epoch": 90.33,
"learning_rate": 1.9345126082047425e-06,
"loss": 0.0904,
"step": 240000
},
{
"epoch": 90.37,
"learning_rate": 1.926985321791494e-06,
"loss": 0.0908,
"step": 240100
},
{
"epoch": 90.4,
"learning_rate": 1.9194580353782463e-06,
"loss": 0.0892,
"step": 240200
},
{
"epoch": 90.44,
"learning_rate": 1.9119307489649984e-06,
"loss": 0.0906,
"step": 240300
},
{
"epoch": 90.48,
"learning_rate": 1.9044034625517503e-06,
"loss": 0.0907,
"step": 240400
},
{
"epoch": 90.52,
"learning_rate": 1.8968761761385021e-06,
"loss": 0.0917,
"step": 240500
},
{
"epoch": 90.55,
"learning_rate": 1.8893488897252542e-06,
"loss": 0.0902,
"step": 240600
},
{
"epoch": 90.59,
"learning_rate": 1.8818216033120063e-06,
"loss": 0.0894,
"step": 240700
},
{
"epoch": 90.63,
"learning_rate": 1.8742943168987582e-06,
"loss": 0.0887,
"step": 240800
},
{
"epoch": 90.67,
"learning_rate": 1.86676703048551e-06,
"loss": 0.0913,
"step": 240900
},
{
"epoch": 90.7,
"learning_rate": 1.859239744072262e-06,
"loss": 0.091,
"step": 241000
},
{
"epoch": 90.74,
"learning_rate": 1.851712457659014e-06,
"loss": 0.0892,
"step": 241100
},
{
"epoch": 90.78,
"learning_rate": 1.8441851712457661e-06,
"loss": 0.0898,
"step": 241200
},
{
"epoch": 90.82,
"learning_rate": 1.836657884832518e-06,
"loss": 0.0888,
"step": 241300
},
{
"epoch": 90.85,
"learning_rate": 1.82913059841927e-06,
"loss": 0.0895,
"step": 241400
},
{
"epoch": 90.89,
"learning_rate": 1.821603312006022e-06,
"loss": 0.0905,
"step": 241500
},
{
"epoch": 90.93,
"learning_rate": 1.8140760255927739e-06,
"loss": 0.0906,
"step": 241600
},
{
"epoch": 90.97,
"learning_rate": 1.8065487391795257e-06,
"loss": 0.089,
"step": 241700
},
{
"epoch": 91.0,
"eval_loss": 0.0901167169213295,
"eval_runtime": 44.9942,
"eval_samples_per_second": 166.688,
"eval_steps_per_second": 10.424,
"step": 241787
},
{
"epoch": 91.0,
"learning_rate": 1.799021452766278e-06,
"loss": 0.0896,
"step": 241800
},
{
"epoch": 91.04,
"learning_rate": 1.79149416635303e-06,
"loss": 0.0903,
"step": 241900
},
{
"epoch": 91.08,
"learning_rate": 1.7839668799397818e-06,
"loss": 0.0903,
"step": 242000
},
{
"epoch": 91.12,
"learning_rate": 1.7764395935265339e-06,
"loss": 0.0887,
"step": 242100
},
{
"epoch": 91.16,
"learning_rate": 1.7689123071132858e-06,
"loss": 0.0893,
"step": 242200
},
{
"epoch": 91.19,
"learning_rate": 1.7613850207000376e-06,
"loss": 0.0905,
"step": 242300
},
{
"epoch": 91.23,
"learning_rate": 1.7538577342867895e-06,
"loss": 0.0896,
"step": 242400
},
{
"epoch": 91.27,
"learning_rate": 1.7463304478735418e-06,
"loss": 0.0897,
"step": 242500
},
{
"epoch": 91.31,
"learning_rate": 1.7388031614602937e-06,
"loss": 0.091,
"step": 242600
},
{
"epoch": 91.34,
"learning_rate": 1.7312758750470458e-06,
"loss": 0.0905,
"step": 242700
},
{
"epoch": 91.38,
"learning_rate": 1.7237485886337977e-06,
"loss": 0.0887,
"step": 242800
},
{
"epoch": 91.42,
"learning_rate": 1.7162213022205495e-06,
"loss": 0.0896,
"step": 242900
},
{
"epoch": 91.46,
"learning_rate": 1.7086940158073014e-06,
"loss": 0.0905,
"step": 243000
},
{
"epoch": 91.49,
"learning_rate": 1.7011667293940537e-06,
"loss": 0.0874,
"step": 243100
},
{
"epoch": 91.53,
"learning_rate": 1.6936394429808056e-06,
"loss": 0.0896,
"step": 243200
},
{
"epoch": 91.57,
"learning_rate": 1.6861121565675575e-06,
"loss": 0.0888,
"step": 243300
},
{
"epoch": 91.61,
"learning_rate": 1.6785848701543096e-06,
"loss": 0.0891,
"step": 243400
},
{
"epoch": 91.64,
"learning_rate": 1.6710575837410615e-06,
"loss": 0.0905,
"step": 243500
},
{
"epoch": 91.68,
"learning_rate": 1.6635302973278133e-06,
"loss": 0.0915,
"step": 243600
},
{
"epoch": 91.72,
"learning_rate": 1.6560030109145656e-06,
"loss": 0.0912,
"step": 243700
},
{
"epoch": 91.76,
"learning_rate": 1.6484757245013175e-06,
"loss": 0.0892,
"step": 243800
},
{
"epoch": 91.8,
"learning_rate": 1.6409484380880694e-06,
"loss": 0.089,
"step": 243900
},
{
"epoch": 91.83,
"learning_rate": 1.6334211516748213e-06,
"loss": 0.0894,
"step": 244000
},
{
"epoch": 91.87,
"learning_rate": 1.6258938652615734e-06,
"loss": 0.0902,
"step": 244100
},
{
"epoch": 91.91,
"learning_rate": 1.6183665788483252e-06,
"loss": 0.0898,
"step": 244200
},
{
"epoch": 91.95,
"learning_rate": 1.6108392924350773e-06,
"loss": 0.0897,
"step": 244300
},
{
"epoch": 91.98,
"learning_rate": 1.6033120060218294e-06,
"loss": 0.0908,
"step": 244400
},
{
"epoch": 92.0,
"eval_loss": 0.08964475989341736,
"eval_runtime": 43.9599,
"eval_samples_per_second": 170.61,
"eval_steps_per_second": 10.669,
"step": 244444
},
{
"epoch": 92.02,
"learning_rate": 1.5957847196085813e-06,
"loss": 0.0888,
"step": 244500
},
{
"epoch": 92.06,
"learning_rate": 1.5882574331953332e-06,
"loss": 0.0873,
"step": 244600
},
{
"epoch": 92.1,
"learning_rate": 1.580730146782085e-06,
"loss": 0.091,
"step": 244700
},
{
"epoch": 92.13,
"learning_rate": 1.5732028603688371e-06,
"loss": 0.0898,
"step": 244800
},
{
"epoch": 92.17,
"learning_rate": 1.565675573955589e-06,
"loss": 0.0895,
"step": 244900
},
{
"epoch": 92.21,
"learning_rate": 1.5581482875423411e-06,
"loss": 0.0898,
"step": 245000
},
{
"epoch": 92.25,
"learning_rate": 1.5506210011290932e-06,
"loss": 0.0912,
"step": 245100
},
{
"epoch": 92.28,
"learning_rate": 1.543093714715845e-06,
"loss": 0.09,
"step": 245200
},
{
"epoch": 92.32,
"learning_rate": 1.535566428302597e-06,
"loss": 0.0899,
"step": 245300
},
{
"epoch": 92.36,
"learning_rate": 1.5280391418893488e-06,
"loss": 0.0904,
"step": 245400
},
{
"epoch": 92.4,
"learning_rate": 1.520511855476101e-06,
"loss": 0.0907,
"step": 245500
},
{
"epoch": 92.44,
"learning_rate": 1.512984569062853e-06,
"loss": 0.0901,
"step": 245600
},
{
"epoch": 92.47,
"learning_rate": 1.505457282649605e-06,
"loss": 0.0912,
"step": 245700
},
{
"epoch": 92.51,
"learning_rate": 1.497929996236357e-06,
"loss": 0.0901,
"step": 245800
},
{
"epoch": 92.55,
"learning_rate": 1.4904027098231089e-06,
"loss": 0.091,
"step": 245900
},
{
"epoch": 92.59,
"learning_rate": 1.4828754234098607e-06,
"loss": 0.0893,
"step": 246000
},
{
"epoch": 92.62,
"learning_rate": 1.4753481369966128e-06,
"loss": 0.0899,
"step": 246100
},
{
"epoch": 92.66,
"learning_rate": 1.467820850583365e-06,
"loss": 0.0898,
"step": 246200
},
{
"epoch": 92.7,
"learning_rate": 1.4602935641701168e-06,
"loss": 0.0908,
"step": 246300
},
{
"epoch": 92.74,
"learning_rate": 1.4527662777568689e-06,
"loss": 0.0909,
"step": 246400
},
{
"epoch": 92.77,
"learning_rate": 1.4452389913436208e-06,
"loss": 0.0887,
"step": 246500
},
{
"epoch": 92.81,
"learning_rate": 1.4377117049303726e-06,
"loss": 0.089,
"step": 246600
},
{
"epoch": 92.85,
"learning_rate": 1.4301844185171245e-06,
"loss": 0.0903,
"step": 246700
},
{
"epoch": 92.89,
"learning_rate": 1.4226571321038768e-06,
"loss": 0.0889,
"step": 246800
},
{
"epoch": 92.92,
"learning_rate": 1.4151298456906287e-06,
"loss": 0.0894,
"step": 246900
},
{
"epoch": 92.96,
"learning_rate": 1.4076025592773806e-06,
"loss": 0.0913,
"step": 247000
},
{
"epoch": 93.0,
"learning_rate": 1.4000752728641327e-06,
"loss": 0.0894,
"step": 247100
},
{
"epoch": 93.0,
"eval_loss": 0.08920498192310333,
"eval_runtime": 43.79,
"eval_samples_per_second": 171.272,
"eval_steps_per_second": 10.71,
"step": 247101
},
{
"epoch": 93.04,
"learning_rate": 1.3925479864508845e-06,
"loss": 0.0895,
"step": 247200
},
{
"epoch": 93.07,
"learning_rate": 1.3850207000376364e-06,
"loss": 0.0895,
"step": 247300
},
{
"epoch": 93.11,
"learning_rate": 1.3774934136243887e-06,
"loss": 0.0912,
"step": 247400
},
{
"epoch": 93.15,
"learning_rate": 1.3699661272111406e-06,
"loss": 0.0905,
"step": 247500
},
{
"epoch": 93.19,
"learning_rate": 1.3624388407978925e-06,
"loss": 0.0893,
"step": 247600
},
{
"epoch": 93.23,
"learning_rate": 1.3549115543846444e-06,
"loss": 0.0889,
"step": 247700
},
{
"epoch": 93.26,
"learning_rate": 1.3473842679713964e-06,
"loss": 0.0902,
"step": 247800
},
{
"epoch": 93.3,
"learning_rate": 1.3398569815581483e-06,
"loss": 0.0891,
"step": 247900
},
{
"epoch": 93.34,
"learning_rate": 1.3323296951449002e-06,
"loss": 0.0896,
"step": 248000
},
{
"epoch": 93.38,
"learning_rate": 1.3248024087316525e-06,
"loss": 0.0899,
"step": 248100
},
{
"epoch": 93.41,
"learning_rate": 1.3172751223184044e-06,
"loss": 0.0898,
"step": 248200
},
{
"epoch": 93.45,
"learning_rate": 1.3097478359051563e-06,
"loss": 0.0884,
"step": 248300
},
{
"epoch": 93.49,
"learning_rate": 1.3022205494919081e-06,
"loss": 0.0902,
"step": 248400
},
{
"epoch": 93.53,
"learning_rate": 1.2946932630786602e-06,
"loss": 0.0923,
"step": 248500
},
{
"epoch": 93.56,
"learning_rate": 1.2871659766654121e-06,
"loss": 0.0896,
"step": 248600
},
{
"epoch": 93.6,
"learning_rate": 1.2796386902521644e-06,
"loss": 0.0904,
"step": 248700
},
{
"epoch": 93.64,
"learning_rate": 1.2721114038389163e-06,
"loss": 0.0911,
"step": 248800
},
{
"epoch": 93.68,
"learning_rate": 1.2645841174256682e-06,
"loss": 0.0897,
"step": 248900
},
{
"epoch": 93.71,
"learning_rate": 1.25705683101242e-06,
"loss": 0.0898,
"step": 249000
},
{
"epoch": 93.75,
"learning_rate": 1.2495295445991721e-06,
"loss": 0.0889,
"step": 249100
},
{
"epoch": 93.79,
"learning_rate": 1.242002258185924e-06,
"loss": 0.0892,
"step": 249200
},
{
"epoch": 93.83,
"learning_rate": 1.234474971772676e-06,
"loss": 0.0881,
"step": 249300
},
{
"epoch": 93.87,
"learning_rate": 1.226947685359428e-06,
"loss": 0.0904,
"step": 249400
},
{
"epoch": 93.9,
"learning_rate": 1.21942039894618e-06,
"loss": 0.0894,
"step": 249500
},
{
"epoch": 93.94,
"learning_rate": 1.211893112532932e-06,
"loss": 0.0904,
"step": 249600
},
{
"epoch": 93.98,
"learning_rate": 1.2043658261196838e-06,
"loss": 0.0899,
"step": 249700
},
{
"epoch": 94.0,
"eval_loss": 0.08932201564311981,
"eval_runtime": 43.7672,
"eval_samples_per_second": 171.361,
"eval_steps_per_second": 10.716,
"step": 249758
},
{
"epoch": 94.02,
"learning_rate": 1.196838539706436e-06,
"loss": 0.0896,
"step": 249800
},
{
"epoch": 94.05,
"learning_rate": 1.1893112532931878e-06,
"loss": 0.0895,
"step": 249900
},
{
"epoch": 94.09,
"learning_rate": 1.1817839668799399e-06,
"loss": 0.0899,
"step": 250000
},
{
"epoch": 94.13,
"learning_rate": 1.174256680466692e-06,
"loss": 0.0902,
"step": 250100
},
{
"epoch": 94.17,
"learning_rate": 1.1667293940534439e-06,
"loss": 0.0901,
"step": 250200
},
{
"epoch": 94.2,
"learning_rate": 1.1592021076401957e-06,
"loss": 0.0885,
"step": 250300
},
{
"epoch": 94.24,
"learning_rate": 1.1516748212269478e-06,
"loss": 0.0906,
"step": 250400
},
{
"epoch": 94.28,
"learning_rate": 1.1441475348136997e-06,
"loss": 0.0899,
"step": 250500
},
{
"epoch": 94.32,
"learning_rate": 1.1366202484004516e-06,
"loss": 0.0894,
"step": 250600
},
{
"epoch": 94.35,
"learning_rate": 1.1290929619872037e-06,
"loss": 0.0892,
"step": 250700
},
{
"epoch": 94.39,
"learning_rate": 1.1215656755739558e-06,
"loss": 0.0912,
"step": 250800
},
{
"epoch": 94.43,
"learning_rate": 1.1140383891607076e-06,
"loss": 0.0871,
"step": 250900
},
{
"epoch": 94.47,
"learning_rate": 1.1065111027474597e-06,
"loss": 0.0898,
"step": 251000
},
{
"epoch": 94.51,
"learning_rate": 1.0989838163342116e-06,
"loss": 0.0896,
"step": 251100
},
{
"epoch": 94.54,
"learning_rate": 1.0914565299209635e-06,
"loss": 0.0914,
"step": 251200
},
{
"epoch": 94.58,
"learning_rate": 1.0839292435077156e-06,
"loss": 0.0907,
"step": 251300
},
{
"epoch": 94.62,
"learning_rate": 1.0764019570944674e-06,
"loss": 0.0896,
"step": 251400
},
{
"epoch": 94.66,
"learning_rate": 1.0688746706812195e-06,
"loss": 0.0888,
"step": 251500
},
{
"epoch": 94.69,
"learning_rate": 1.0613473842679716e-06,
"loss": 0.0879,
"step": 251600
},
{
"epoch": 94.73,
"learning_rate": 1.0538200978547235e-06,
"loss": 0.0899,
"step": 251700
},
{
"epoch": 94.77,
"learning_rate": 1.0462928114414754e-06,
"loss": 0.0903,
"step": 251800
},
{
"epoch": 94.81,
"learning_rate": 1.0387655250282275e-06,
"loss": 0.0878,
"step": 251900
},
{
"epoch": 94.84,
"learning_rate": 1.0312382386149794e-06,
"loss": 0.0894,
"step": 252000
},
{
"epoch": 94.88,
"learning_rate": 1.0237109522017312e-06,
"loss": 0.0901,
"step": 252100
},
{
"epoch": 94.92,
"learning_rate": 1.0161836657884833e-06,
"loss": 0.0906,
"step": 252200
},
{
"epoch": 94.96,
"learning_rate": 1.0086563793752354e-06,
"loss": 0.0911,
"step": 252300
},
{
"epoch": 94.99,
"learning_rate": 1.0011290929619873e-06,
"loss": 0.0899,
"step": 252400
},
{
"epoch": 95.0,
"eval_loss": 0.08966313302516937,
"eval_runtime": 43.6014,
"eval_samples_per_second": 172.013,
"eval_steps_per_second": 10.757,
"step": 252415
},
{
"epoch": 95.03,
"learning_rate": 9.936018065487392e-07,
"loss": 0.0903,
"step": 252500
},
{
"epoch": 95.07,
"learning_rate": 9.860745201354913e-07,
"loss": 0.0907,
"step": 252600
},
{
"epoch": 95.11,
"learning_rate": 9.785472337222431e-07,
"loss": 0.0909,
"step": 252700
},
{
"epoch": 95.14,
"learning_rate": 9.710199473089952e-07,
"loss": 0.0911,
"step": 252800
},
{
"epoch": 95.18,
"learning_rate": 9.63492660895747e-07,
"loss": 0.0916,
"step": 252900
},
{
"epoch": 95.22,
"learning_rate": 9.559653744824992e-07,
"loss": 0.0908,
"step": 253000
},
{
"epoch": 95.26,
"learning_rate": 9.484380880692511e-07,
"loss": 0.0907,
"step": 253100
},
{
"epoch": 95.3,
"learning_rate": 9.409108016560032e-07,
"loss": 0.0892,
"step": 253200
},
{
"epoch": 95.33,
"learning_rate": 9.33383515242755e-07,
"loss": 0.091,
"step": 253300
},
{
"epoch": 95.37,
"learning_rate": 9.25856228829507e-07,
"loss": 0.0888,
"step": 253400
},
{
"epoch": 95.41,
"learning_rate": 9.18328942416259e-07,
"loss": 0.0892,
"step": 253500
},
{
"epoch": 95.45,
"learning_rate": 9.10801656003011e-07,
"loss": 0.09,
"step": 253600
},
{
"epoch": 95.48,
"learning_rate": 9.032743695897629e-07,
"loss": 0.0896,
"step": 253700
},
{
"epoch": 95.52,
"learning_rate": 8.95747083176515e-07,
"loss": 0.0891,
"step": 253800
},
{
"epoch": 95.56,
"learning_rate": 8.882197967632669e-07,
"loss": 0.0897,
"step": 253900
},
{
"epoch": 95.6,
"learning_rate": 8.806925103500188e-07,
"loss": 0.0893,
"step": 254000
},
{
"epoch": 95.63,
"learning_rate": 8.731652239367709e-07,
"loss": 0.0901,
"step": 254100
},
{
"epoch": 95.67,
"learning_rate": 8.656379375235229e-07,
"loss": 0.0887,
"step": 254200
},
{
"epoch": 95.71,
"learning_rate": 8.581106511102748e-07,
"loss": 0.0886,
"step": 254300
},
{
"epoch": 95.75,
"learning_rate": 8.505833646970269e-07,
"loss": 0.0907,
"step": 254400
},
{
"epoch": 95.78,
"learning_rate": 8.430560782837787e-07,
"loss": 0.0894,
"step": 254500
},
{
"epoch": 95.82,
"learning_rate": 8.355287918705307e-07,
"loss": 0.0899,
"step": 254600
},
{
"epoch": 95.86,
"learning_rate": 8.280015054572828e-07,
"loss": 0.0881,
"step": 254700
},
{
"epoch": 95.9,
"learning_rate": 8.204742190440347e-07,
"loss": 0.0902,
"step": 254800
},
{
"epoch": 95.94,
"learning_rate": 8.129469326307867e-07,
"loss": 0.0895,
"step": 254900
},
{
"epoch": 95.97,
"learning_rate": 8.054196462175387e-07,
"loss": 0.0904,
"step": 255000
},
{
"epoch": 96.0,
"eval_loss": 0.0898142084479332,
"eval_runtime": 43.5802,
"eval_samples_per_second": 172.096,
"eval_steps_per_second": 10.762,
"step": 255072
},
{
"epoch": 96.01,
"learning_rate": 7.978923598042906e-07,
"loss": 0.0897,
"step": 255100
},
{
"epoch": 96.05,
"learning_rate": 7.903650733910425e-07,
"loss": 0.0908,
"step": 255200
},
{
"epoch": 96.09,
"learning_rate": 7.828377869777945e-07,
"loss": 0.0905,
"step": 255300
},
{
"epoch": 96.12,
"learning_rate": 7.753105005645466e-07,
"loss": 0.0892,
"step": 255400
},
{
"epoch": 96.16,
"learning_rate": 7.677832141512985e-07,
"loss": 0.0891,
"step": 255500
},
{
"epoch": 96.2,
"learning_rate": 7.602559277380505e-07,
"loss": 0.0898,
"step": 255600
},
{
"epoch": 96.24,
"learning_rate": 7.527286413248026e-07,
"loss": 0.0893,
"step": 255700
},
{
"epoch": 96.27,
"learning_rate": 7.452013549115544e-07,
"loss": 0.0896,
"step": 255800
},
{
"epoch": 96.31,
"learning_rate": 7.376740684983064e-07,
"loss": 0.0897,
"step": 255900
},
{
"epoch": 96.35,
"learning_rate": 7.301467820850584e-07,
"loss": 0.0903,
"step": 256000
},
{
"epoch": 96.39,
"learning_rate": 7.226194956718104e-07,
"loss": 0.0898,
"step": 256100
},
{
"epoch": 96.42,
"learning_rate": 7.150922092585623e-07,
"loss": 0.0895,
"step": 256200
},
{
"epoch": 96.46,
"learning_rate": 7.075649228453143e-07,
"loss": 0.0902,
"step": 256300
},
{
"epoch": 96.5,
"learning_rate": 7.000376364320663e-07,
"loss": 0.0889,
"step": 256400
},
{
"epoch": 96.54,
"learning_rate": 6.925103500188182e-07,
"loss": 0.0893,
"step": 256500
},
{
"epoch": 96.58,
"learning_rate": 6.849830636055703e-07,
"loss": 0.0901,
"step": 256600
},
{
"epoch": 96.61,
"learning_rate": 6.774557771923222e-07,
"loss": 0.09,
"step": 256700
},
{
"epoch": 96.65,
"learning_rate": 6.699284907790742e-07,
"loss": 0.0889,
"step": 256800
},
{
"epoch": 96.69,
"learning_rate": 6.624012043658263e-07,
"loss": 0.0887,
"step": 256900
},
{
"epoch": 96.73,
"learning_rate": 6.548739179525781e-07,
"loss": 0.089,
"step": 257000
},
{
"epoch": 96.76,
"learning_rate": 6.473466315393301e-07,
"loss": 0.0907,
"step": 257100
},
{
"epoch": 96.8,
"learning_rate": 6.398193451260822e-07,
"loss": 0.0888,
"step": 257200
},
{
"epoch": 96.84,
"learning_rate": 6.322920587128341e-07,
"loss": 0.0882,
"step": 257300
},
{
"epoch": 96.88,
"learning_rate": 6.247647722995861e-07,
"loss": 0.0897,
"step": 257400
},
{
"epoch": 96.91,
"learning_rate": 6.17237485886338e-07,
"loss": 0.0893,
"step": 257500
},
{
"epoch": 96.95,
"learning_rate": 6.0971019947309e-07,
"loss": 0.0892,
"step": 257600
},
{
"epoch": 96.99,
"learning_rate": 6.021829130598419e-07,
"loss": 0.0906,
"step": 257700
},
{
"epoch": 97.0,
"eval_loss": 0.08935380727052689,
"eval_runtime": 43.4106,
"eval_samples_per_second": 172.769,
"eval_steps_per_second": 10.804,
"step": 257729
},
{
"epoch": 97.03,
"learning_rate": 5.946556266465939e-07,
"loss": 0.0902,
"step": 257800
},
{
"epoch": 97.06,
"learning_rate": 5.87128340233346e-07,
"loss": 0.0891,
"step": 257900
},
{
"epoch": 97.1,
"learning_rate": 5.796010538200979e-07,
"loss": 0.088,
"step": 258000
},
{
"epoch": 97.14,
"learning_rate": 5.720737674068498e-07,
"loss": 0.0905,
"step": 258100
},
{
"epoch": 97.18,
"learning_rate": 5.645464809936018e-07,
"loss": 0.088,
"step": 258200
},
{
"epoch": 97.21,
"learning_rate": 5.570191945803538e-07,
"loss": 0.0886,
"step": 258300
},
{
"epoch": 97.25,
"learning_rate": 5.494919081671058e-07,
"loss": 0.0889,
"step": 258400
},
{
"epoch": 97.29,
"learning_rate": 5.419646217538578e-07,
"loss": 0.0877,
"step": 258500
},
{
"epoch": 97.33,
"learning_rate": 5.344373353406098e-07,
"loss": 0.0904,
"step": 258600
},
{
"epoch": 97.37,
"learning_rate": 5.269100489273618e-07,
"loss": 0.0886,
"step": 258700
},
{
"epoch": 97.4,
"learning_rate": 5.193827625141137e-07,
"loss": 0.0896,
"step": 258800
},
{
"epoch": 97.44,
"learning_rate": 5.118554761008656e-07,
"loss": 0.0884,
"step": 258900
},
{
"epoch": 97.48,
"learning_rate": 5.043281896876177e-07,
"loss": 0.0896,
"step": 259000
},
{
"epoch": 97.52,
"learning_rate": 4.968009032743696e-07,
"loss": 0.0886,
"step": 259100
},
{
"epoch": 97.55,
"learning_rate": 4.892736168611216e-07,
"loss": 0.0895,
"step": 259200
},
{
"epoch": 97.59,
"learning_rate": 4.817463304478736e-07,
"loss": 0.0889,
"step": 259300
},
{
"epoch": 97.63,
"learning_rate": 4.7421904403462554e-07,
"loss": 0.0888,
"step": 259400
},
{
"epoch": 97.67,
"learning_rate": 4.666917576213775e-07,
"loss": 0.0897,
"step": 259500
},
{
"epoch": 97.7,
"learning_rate": 4.591644712081295e-07,
"loss": 0.0903,
"step": 259600
},
{
"epoch": 97.74,
"learning_rate": 4.5163718479488144e-07,
"loss": 0.0883,
"step": 259700
},
{
"epoch": 97.78,
"learning_rate": 4.4410989838163347e-07,
"loss": 0.0902,
"step": 259800
},
{
"epoch": 97.82,
"learning_rate": 4.3658261196838546e-07,
"loss": 0.09,
"step": 259900
},
{
"epoch": 97.85,
"learning_rate": 4.290553255551374e-07,
"loss": 0.0883,
"step": 260000
},
{
"epoch": 97.89,
"learning_rate": 4.2152803914188937e-07,
"loss": 0.0904,
"step": 260100
},
{
"epoch": 97.93,
"learning_rate": 4.140007527286414e-07,
"loss": 0.0891,
"step": 260200
},
{
"epoch": 97.97,
"learning_rate": 4.0647346631539334e-07,
"loss": 0.0892,
"step": 260300
},
{
"epoch": 98.0,
"eval_loss": 0.08942902088165283,
"eval_runtime": 43.3793,
"eval_samples_per_second": 172.893,
"eval_steps_per_second": 10.812,
"step": 260386
},
{
"epoch": 98.01,
"learning_rate": 3.989461799021453e-07,
"loss": 0.0902,
"step": 260400
},
{
"epoch": 98.04,
"learning_rate": 3.9141889348889725e-07,
"loss": 0.0906,
"step": 260500
},
{
"epoch": 98.08,
"learning_rate": 3.8389160707564924e-07,
"loss": 0.0889,
"step": 260600
},
{
"epoch": 98.12,
"learning_rate": 3.763643206624013e-07,
"loss": 0.0907,
"step": 260700
},
{
"epoch": 98.16,
"learning_rate": 3.688370342491532e-07,
"loss": 0.0879,
"step": 260800
},
{
"epoch": 98.19,
"learning_rate": 3.613097478359052e-07,
"loss": 0.0877,
"step": 260900
},
{
"epoch": 98.23,
"learning_rate": 3.537824614226572e-07,
"loss": 0.0895,
"step": 261000
},
{
"epoch": 98.27,
"learning_rate": 3.462551750094091e-07,
"loss": 0.0903,
"step": 261100
},
{
"epoch": 98.31,
"learning_rate": 3.387278885961611e-07,
"loss": 0.0897,
"step": 261200
},
{
"epoch": 98.34,
"learning_rate": 3.312006021829131e-07,
"loss": 0.0886,
"step": 261300
},
{
"epoch": 98.38,
"learning_rate": 3.2367331576966506e-07,
"loss": 0.0882,
"step": 261400
},
{
"epoch": 98.42,
"learning_rate": 3.1614602935641704e-07,
"loss": 0.0894,
"step": 261500
},
{
"epoch": 98.46,
"learning_rate": 3.08618742943169e-07,
"loss": 0.0901,
"step": 261600
},
{
"epoch": 98.49,
"learning_rate": 3.0109145652992096e-07,
"loss": 0.0909,
"step": 261700
},
{
"epoch": 98.53,
"learning_rate": 2.93564170116673e-07,
"loss": 0.0897,
"step": 261800
},
{
"epoch": 98.57,
"learning_rate": 2.860368837034249e-07,
"loss": 0.0907,
"step": 261900
},
{
"epoch": 98.61,
"learning_rate": 2.785095972901769e-07,
"loss": 0.0885,
"step": 262000
},
{
"epoch": 98.65,
"learning_rate": 2.709823108769289e-07,
"loss": 0.0898,
"step": 262100
},
{
"epoch": 98.68,
"learning_rate": 2.634550244636809e-07,
"loss": 0.0897,
"step": 262200
},
{
"epoch": 98.72,
"learning_rate": 2.559277380504328e-07,
"loss": 0.0887,
"step": 262300
},
{
"epoch": 98.76,
"learning_rate": 2.484004516371848e-07,
"loss": 0.0888,
"step": 262400
},
{
"epoch": 98.8,
"learning_rate": 2.408731652239368e-07,
"loss": 0.0897,
"step": 262500
},
{
"epoch": 98.83,
"learning_rate": 2.3334587881068876e-07,
"loss": 0.0894,
"step": 262600
},
{
"epoch": 98.87,
"learning_rate": 2.2581859239744072e-07,
"loss": 0.0902,
"step": 262700
},
{
"epoch": 98.91,
"learning_rate": 2.1829130598419273e-07,
"loss": 0.0898,
"step": 262800
},
{
"epoch": 98.95,
"learning_rate": 2.1076401957094469e-07,
"loss": 0.0908,
"step": 262900
},
{
"epoch": 98.98,
"learning_rate": 2.0323673315769667e-07,
"loss": 0.0881,
"step": 263000
},
{
"epoch": 99.0,
"eval_loss": 0.08917281776666641,
"eval_runtime": 43.4811,
"eval_samples_per_second": 172.489,
"eval_steps_per_second": 10.786,
"step": 263043
},
{
"epoch": 99.02,
"learning_rate": 1.9570944674444863e-07,
"loss": 0.0909,
"step": 263100
},
{
"epoch": 99.06,
"learning_rate": 1.8818216033120064e-07,
"loss": 0.0888,
"step": 263200
},
{
"epoch": 99.1,
"learning_rate": 1.806548739179526e-07,
"loss": 0.0897,
"step": 263300
},
{
"epoch": 99.13,
"learning_rate": 1.7312758750470455e-07,
"loss": 0.0897,
"step": 263400
},
{
"epoch": 99.17,
"learning_rate": 1.6560030109145656e-07,
"loss": 0.0882,
"step": 263500
},
{
"epoch": 99.21,
"learning_rate": 1.5807301467820852e-07,
"loss": 0.0889,
"step": 263600
},
{
"epoch": 99.25,
"learning_rate": 1.5054572826496048e-07,
"loss": 0.0908,
"step": 263700
},
{
"epoch": 99.28,
"learning_rate": 1.4301844185171246e-07,
"loss": 0.088,
"step": 263800
},
{
"epoch": 99.32,
"learning_rate": 1.3549115543846445e-07,
"loss": 0.0876,
"step": 263900
},
{
"epoch": 99.36,
"learning_rate": 1.279638690252164e-07,
"loss": 0.088,
"step": 264000
},
{
"epoch": 99.4,
"learning_rate": 1.204365826119684e-07,
"loss": 0.0892,
"step": 264100
},
{
"epoch": 99.44,
"learning_rate": 1.1290929619872036e-07,
"loss": 0.0895,
"step": 264200
},
{
"epoch": 99.47,
"learning_rate": 1.0538200978547234e-07,
"loss": 0.0899,
"step": 264300
},
{
"epoch": 99.51,
"learning_rate": 9.785472337222431e-08,
"loss": 0.089,
"step": 264400
},
{
"epoch": 99.55,
"learning_rate": 9.03274369589763e-08,
"loss": 0.0895,
"step": 264500
},
{
"epoch": 99.59,
"learning_rate": 8.280015054572828e-08,
"loss": 0.091,
"step": 264600
},
{
"epoch": 99.62,
"learning_rate": 7.527286413248024e-08,
"loss": 0.0896,
"step": 264700
},
{
"epoch": 99.66,
"learning_rate": 6.774557771923222e-08,
"loss": 0.0894,
"step": 264800
},
{
"epoch": 99.7,
"learning_rate": 6.02182913059842e-08,
"loss": 0.0898,
"step": 264900
},
{
"epoch": 99.74,
"learning_rate": 5.269100489273617e-08,
"loss": 0.0882,
"step": 265000
},
{
"epoch": 99.77,
"learning_rate": 4.516371847948815e-08,
"loss": 0.0901,
"step": 265100
},
{
"epoch": 99.81,
"learning_rate": 3.763643206624012e-08,
"loss": 0.0885,
"step": 265200
},
{
"epoch": 99.85,
"learning_rate": 3.01091456529921e-08,
"loss": 0.0901,
"step": 265300
},
{
"epoch": 99.89,
"learning_rate": 2.2581859239744074e-08,
"loss": 0.0897,
"step": 265400
},
{
"epoch": 99.92,
"learning_rate": 1.505457282649605e-08,
"loss": 0.0902,
"step": 265500
},
{
"epoch": 99.96,
"learning_rate": 7.527286413248024e-09,
"loss": 0.0911,
"step": 265600
},
{
"epoch": 100.0,
"learning_rate": 0.0,
"loss": 0.09,
"step": 265700
},
{
"epoch": 100.0,
"eval_loss": 0.08935302495956421,
"eval_runtime": 44.5127,
"eval_samples_per_second": 168.491,
"eval_steps_per_second": 10.536,
"step": 265700
},
{
"epoch": 100.0,
"step": 265700,
"total_flos": 3.31604966375424e+20,
"train_loss": 0.10943094944119408,
"train_runtime": 65782.603,
"train_samples_per_second": 64.607,
"train_steps_per_second": 4.039
}
],
"max_steps": 265700,
"num_train_epochs": 100,
"total_flos": 3.31604966375424e+20,
"trial_name": null,
"trial_params": null
}