{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4649, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00021510002151000216, "grad_norm": 0.0, "learning_rate": 1.4285714285714287e-07, "loss": 1.7358, "step": 1 }, { "epoch": 0.00043020004302000433, "grad_norm": 0.0, "learning_rate": 2.8571428571428575e-07, "loss": 1.7069, "step": 2 }, { "epoch": 0.0006453000645300065, "grad_norm": 0.0, "learning_rate": 4.285714285714286e-07, "loss": 1.7119, "step": 3 }, { "epoch": 0.0008604000860400087, "grad_norm": 0.0, "learning_rate": 5.714285714285715e-07, "loss": 1.6695, "step": 4 }, { "epoch": 0.0010755001075500108, "grad_norm": 0.0, "learning_rate": 7.142857142857143e-07, "loss": 1.6631, "step": 5 }, { "epoch": 0.001290600129060013, "grad_norm": 0.0, "learning_rate": 8.571428571428572e-07, "loss": 1.7013, "step": 6 }, { "epoch": 0.001505700150570015, "grad_norm": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 1.649, "step": 7 }, { "epoch": 0.0017208001720800173, "grad_norm": 0.0, "learning_rate": 1.142857142857143e-06, "loss": 1.431, "step": 8 }, { "epoch": 0.0019359001935900194, "grad_norm": 0.0, "learning_rate": 1.2857142857142856e-06, "loss": 1.4435, "step": 9 }, { "epoch": 0.0021510002151000217, "grad_norm": 0.0, "learning_rate": 1.4285714285714286e-06, "loss": 1.3671, "step": 10 }, { "epoch": 0.0023661002366100236, "grad_norm": 0.0, "learning_rate": 1.5714285714285714e-06, "loss": 1.3399, "step": 11 }, { "epoch": 0.002581200258120026, "grad_norm": 0.0, "learning_rate": 1.7142857142857145e-06, "loss": 1.3014, "step": 12 }, { "epoch": 0.002796300279630028, "grad_norm": 0.0, "learning_rate": 1.8571428571428573e-06, "loss": 1.3198, "step": 13 }, { "epoch": 0.00301140030114003, "grad_norm": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 1.3075, "step": 14 }, { "epoch": 0.0032265003226500323, "grad_norm": 0.0, "learning_rate": 2.1428571428571427e-06, "loss": 1.3019, "step": 15 }, { "epoch": 0.0034416003441600346, "grad_norm": 0.0, "learning_rate": 2.285714285714286e-06, "loss": 1.3052, "step": 16 }, { "epoch": 0.0036567003656700365, "grad_norm": 0.0, "learning_rate": 2.428571428571429e-06, "loss": 1.1353, "step": 17 }, { "epoch": 0.003871800387180039, "grad_norm": 0.0, "learning_rate": 2.571428571428571e-06, "loss": 1.1694, "step": 18 }, { "epoch": 0.004086900408690041, "grad_norm": 0.0, "learning_rate": 2.7142857142857144e-06, "loss": 1.2111, "step": 19 }, { "epoch": 0.004302000430200043, "grad_norm": 0.0, "learning_rate": 2.8571428571428573e-06, "loss": 1.1543, "step": 20 }, { "epoch": 0.004517100451710045, "grad_norm": 0.0, "learning_rate": 3e-06, "loss": 1.1976, "step": 21 }, { "epoch": 0.004732200473220047, "grad_norm": 0.0, "learning_rate": 3.142857142857143e-06, "loss": 1.1319, "step": 22 }, { "epoch": 0.004947300494730049, "grad_norm": 0.0, "learning_rate": 3.285714285714286e-06, "loss": 1.1672, "step": 23 }, { "epoch": 0.005162400516240052, "grad_norm": 0.0, "learning_rate": 3.428571428571429e-06, "loss": 1.1673, "step": 24 }, { "epoch": 0.005377500537750054, "grad_norm": 0.0, "learning_rate": 3.5714285714285718e-06, "loss": 1.1228, "step": 25 }, { "epoch": 0.005592600559260056, "grad_norm": 0.0, "learning_rate": 3.7142857142857146e-06, "loss": 1.1347, "step": 26 }, { "epoch": 0.005807700580770058, "grad_norm": 0.0, "learning_rate": 3.857142857142858e-06, "loss": 1.1153, "step": 27 }, { "epoch": 0.00602280060228006, "grad_norm": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 1.1935, "step": 28 }, { "epoch": 0.006237900623790062, "grad_norm": 0.0, "learning_rate": 4.1428571428571435e-06, "loss": 1.1182, "step": 29 }, { "epoch": 0.006453000645300065, "grad_norm": 0.0, "learning_rate": 4.2857142857142855e-06, "loss": 1.1337, "step": 30 }, { "epoch": 0.006668100666810067, "grad_norm": 0.0, "learning_rate": 4.428571428571429e-06, "loss": 0.9977, "step": 31 }, { "epoch": 0.006883200688320069, "grad_norm": 0.0, "learning_rate": 4.571428571428572e-06, "loss": 1.0817, "step": 32 }, { "epoch": 0.007098300709830071, "grad_norm": 0.0, "learning_rate": 4.714285714285715e-06, "loss": 1.0712, "step": 33 }, { "epoch": 0.007313400731340073, "grad_norm": 0.0, "learning_rate": 4.857142857142858e-06, "loss": 1.0085, "step": 34 }, { "epoch": 0.007528500752850075, "grad_norm": 0.0, "learning_rate": 5e-06, "loss": 1.1077, "step": 35 }, { "epoch": 0.007743600774360078, "grad_norm": 0.0, "learning_rate": 5.142857142857142e-06, "loss": 1.0453, "step": 36 }, { "epoch": 0.007958700795870079, "grad_norm": 0.0, "learning_rate": 5.285714285714286e-06, "loss": 1.101, "step": 37 }, { "epoch": 0.008173800817380082, "grad_norm": 0.0, "learning_rate": 5.428571428571429e-06, "loss": 1.0156, "step": 38 }, { "epoch": 0.008388900838890084, "grad_norm": 0.0, "learning_rate": 5.571428571428572e-06, "loss": 1.0343, "step": 39 }, { "epoch": 0.008604000860400087, "grad_norm": 0.0, "learning_rate": 5.7142857142857145e-06, "loss": 1.0585, "step": 40 }, { "epoch": 0.008819100881910088, "grad_norm": 0.0, "learning_rate": 5.857142857142858e-06, "loss": 1.0678, "step": 41 }, { "epoch": 0.00903420090342009, "grad_norm": 0.0, "learning_rate": 6e-06, "loss": 1.0106, "step": 42 }, { "epoch": 0.009249300924930093, "grad_norm": 0.0, "learning_rate": 6.142857142857144e-06, "loss": 1.1114, "step": 43 }, { "epoch": 0.009464400946440094, "grad_norm": 0.0, "learning_rate": 6.285714285714286e-06, "loss": 1.0184, "step": 44 }, { "epoch": 0.009679500967950097, "grad_norm": 0.0, "learning_rate": 6.4285714285714295e-06, "loss": 1.0557, "step": 45 }, { "epoch": 0.009894600989460099, "grad_norm": 0.0, "learning_rate": 6.571428571428572e-06, "loss": 1.0479, "step": 46 }, { "epoch": 0.0101097010109701, "grad_norm": 0.0, "learning_rate": 6.714285714285714e-06, "loss": 1.048, "step": 47 }, { "epoch": 0.010324801032480103, "grad_norm": 0.0, "learning_rate": 6.857142857142858e-06, "loss": 1.0314, "step": 48 }, { "epoch": 0.010539901053990105, "grad_norm": 0.0, "learning_rate": 7e-06, "loss": 1.0827, "step": 49 }, { "epoch": 0.010755001075500108, "grad_norm": 0.0, "learning_rate": 7.1428571428571436e-06, "loss": 1.0486, "step": 50 }, { "epoch": 0.01097010109701011, "grad_norm": 0.0, "learning_rate": 7.285714285714286e-06, "loss": 1.0477, "step": 51 }, { "epoch": 0.011185201118520113, "grad_norm": 0.0, "learning_rate": 7.428571428571429e-06, "loss": 1.0078, "step": 52 }, { "epoch": 0.011400301140030114, "grad_norm": 0.0, "learning_rate": 7.571428571428572e-06, "loss": 1.0178, "step": 53 }, { "epoch": 0.011615401161540116, "grad_norm": 0.0, "learning_rate": 7.714285714285716e-06, "loss": 0.9819, "step": 54 }, { "epoch": 0.011830501183050119, "grad_norm": 0.0, "learning_rate": 7.857142857142858e-06, "loss": 1.0593, "step": 55 }, { "epoch": 0.01204560120456012, "grad_norm": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 1.0313, "step": 56 }, { "epoch": 0.012260701226070123, "grad_norm": 0.0, "learning_rate": 8.142857142857143e-06, "loss": 1.0131, "step": 57 }, { "epoch": 0.012475801247580125, "grad_norm": 0.0, "learning_rate": 8.285714285714287e-06, "loss": 0.9946, "step": 58 }, { "epoch": 0.012690901269090126, "grad_norm": 0.0, "learning_rate": 8.428571428571429e-06, "loss": 1.0208, "step": 59 }, { "epoch": 0.01290600129060013, "grad_norm": 0.0, "learning_rate": 8.571428571428571e-06, "loss": 1.0307, "step": 60 }, { "epoch": 0.01312110131211013, "grad_norm": 0.0, "learning_rate": 8.714285714285715e-06, "loss": 0.9715, "step": 61 }, { "epoch": 0.013336201333620134, "grad_norm": 0.0, "learning_rate": 8.857142857142858e-06, "loss": 0.9103, "step": 62 }, { "epoch": 0.013551301355130135, "grad_norm": 0.0, "learning_rate": 9e-06, "loss": 1.093, "step": 63 }, { "epoch": 0.013766401376640139, "grad_norm": 0.0, "learning_rate": 9.142857142857144e-06, "loss": 0.9808, "step": 64 }, { "epoch": 0.01398150139815014, "grad_norm": 0.0, "learning_rate": 9.285714285714288e-06, "loss": 1.0582, "step": 65 }, { "epoch": 0.014196601419660141, "grad_norm": 0.0, "learning_rate": 9.42857142857143e-06, "loss": 0.975, "step": 66 }, { "epoch": 0.014411701441170145, "grad_norm": 0.0, "learning_rate": 9.571428571428573e-06, "loss": 0.97, "step": 67 }, { "epoch": 0.014626801462680146, "grad_norm": 0.0, "learning_rate": 9.714285714285715e-06, "loss": 1.0694, "step": 68 }, { "epoch": 0.01484190148419015, "grad_norm": 0.0, "learning_rate": 9.857142857142859e-06, "loss": 1.0238, "step": 69 }, { "epoch": 0.01505700150570015, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 1.0397, "step": 70 }, { "epoch": 0.015272101527210152, "grad_norm": 0.0, "learning_rate": 1.0142857142857143e-05, "loss": 1.0694, "step": 71 }, { "epoch": 0.015487201548720155, "grad_norm": 0.0, "learning_rate": 1.0285714285714285e-05, "loss": 0.9487, "step": 72 }, { "epoch": 0.015702301570230157, "grad_norm": 0.0, "learning_rate": 1.042857142857143e-05, "loss": 0.9332, "step": 73 }, { "epoch": 0.015917401591740158, "grad_norm": 0.0, "learning_rate": 1.0571428571428572e-05, "loss": 1.0271, "step": 74 }, { "epoch": 0.016132501613250163, "grad_norm": 0.0, "learning_rate": 1.0714285714285714e-05, "loss": 1.0254, "step": 75 }, { "epoch": 0.016347601634760164, "grad_norm": 0.0, "learning_rate": 1.0857142857142858e-05, "loss": 0.9678, "step": 76 }, { "epoch": 0.016562701656270166, "grad_norm": 0.0, "learning_rate": 1.1000000000000001e-05, "loss": 1.0253, "step": 77 }, { "epoch": 0.016777801677780167, "grad_norm": 0.0, "learning_rate": 1.1142857142857143e-05, "loss": 1.0322, "step": 78 }, { "epoch": 0.01699290169929017, "grad_norm": 0.0, "learning_rate": 1.1285714285714287e-05, "loss": 0.9864, "step": 79 }, { "epoch": 0.017208001720800174, "grad_norm": 0.0, "learning_rate": 1.1428571428571429e-05, "loss": 1.017, "step": 80 }, { "epoch": 0.017423101742310175, "grad_norm": 0.0, "learning_rate": 1.1571428571428573e-05, "loss": 1.048, "step": 81 }, { "epoch": 0.017638201763820176, "grad_norm": 0.0, "learning_rate": 1.1714285714285716e-05, "loss": 1.0558, "step": 82 }, { "epoch": 0.017853301785330178, "grad_norm": 0.0, "learning_rate": 1.1857142857142858e-05, "loss": 1.0355, "step": 83 }, { "epoch": 0.01806840180684018, "grad_norm": 0.0, "learning_rate": 1.2e-05, "loss": 0.9978, "step": 84 }, { "epoch": 0.018283501828350184, "grad_norm": 0.0, "learning_rate": 1.2142857142857142e-05, "loss": 0.959, "step": 85 }, { "epoch": 0.018498601849860186, "grad_norm": 0.0, "learning_rate": 1.2285714285714288e-05, "loss": 1.0093, "step": 86 }, { "epoch": 0.018713701871370187, "grad_norm": 0.0, "learning_rate": 1.242857142857143e-05, "loss": 1.0048, "step": 87 }, { "epoch": 0.01892880189288019, "grad_norm": 0.0, "learning_rate": 1.2571428571428572e-05, "loss": 1.0063, "step": 88 }, { "epoch": 0.01914390191439019, "grad_norm": 0.0, "learning_rate": 1.2714285714285715e-05, "loss": 1.0857, "step": 89 }, { "epoch": 0.019359001935900195, "grad_norm": 0.0, "learning_rate": 1.2857142857142859e-05, "loss": 1.0478, "step": 90 }, { "epoch": 0.019574101957410196, "grad_norm": 0.0, "learning_rate": 1.3000000000000001e-05, "loss": 0.9996, "step": 91 }, { "epoch": 0.019789201978920198, "grad_norm": 0.0, "learning_rate": 1.3142857142857145e-05, "loss": 0.9492, "step": 92 }, { "epoch": 0.0200043020004302, "grad_norm": 0.0, "learning_rate": 1.3285714285714287e-05, "loss": 0.9939, "step": 93 }, { "epoch": 0.0202194020219402, "grad_norm": 0.0, "learning_rate": 1.3428571428571429e-05, "loss": 1.0402, "step": 94 }, { "epoch": 0.020434502043450205, "grad_norm": 0.0, "learning_rate": 1.3571428571428574e-05, "loss": 0.9867, "step": 95 }, { "epoch": 0.020649602064960207, "grad_norm": 0.0, "learning_rate": 1.3714285714285716e-05, "loss": 0.9301, "step": 96 }, { "epoch": 0.02086470208647021, "grad_norm": 0.0, "learning_rate": 1.3857142857142858e-05, "loss": 1.0195, "step": 97 }, { "epoch": 0.02107980210798021, "grad_norm": 0.0, "learning_rate": 1.4e-05, "loss": 0.9919, "step": 98 }, { "epoch": 0.021294902129490215, "grad_norm": 0.0, "learning_rate": 1.4142857142857145e-05, "loss": 1.043, "step": 99 }, { "epoch": 0.021510002151000216, "grad_norm": 0.0, "learning_rate": 1.4285714285714287e-05, "loss": 0.9567, "step": 100 }, { "epoch": 0.021725102172510218, "grad_norm": 0.0, "learning_rate": 1.4428571428571429e-05, "loss": 1.0453, "step": 101 }, { "epoch": 0.02194020219402022, "grad_norm": 0.0, "learning_rate": 1.4571428571428573e-05, "loss": 0.9903, "step": 102 }, { "epoch": 0.02215530221553022, "grad_norm": 0.0, "learning_rate": 1.4714285714285716e-05, "loss": 0.9636, "step": 103 }, { "epoch": 0.022370402237040225, "grad_norm": 0.0, "learning_rate": 1.4857142857142858e-05, "loss": 0.9519, "step": 104 }, { "epoch": 0.022585502258550227, "grad_norm": 0.0, "learning_rate": 1.5000000000000002e-05, "loss": 0.9829, "step": 105 }, { "epoch": 0.022800602280060228, "grad_norm": 0.0, "learning_rate": 1.5142857142857144e-05, "loss": 0.9673, "step": 106 }, { "epoch": 0.02301570230157023, "grad_norm": 0.0, "learning_rate": 1.5285714285714286e-05, "loss": 1.0144, "step": 107 }, { "epoch": 0.02323080232308023, "grad_norm": 0.0, "learning_rate": 1.542857142857143e-05, "loss": 1.0068, "step": 108 }, { "epoch": 0.023445902344590236, "grad_norm": 0.0, "learning_rate": 1.5571428571428573e-05, "loss": 1.062, "step": 109 }, { "epoch": 0.023661002366100237, "grad_norm": 0.0, "learning_rate": 1.5714285714285715e-05, "loss": 0.9843, "step": 110 }, { "epoch": 0.02387610238761024, "grad_norm": 0.0, "learning_rate": 1.5857142857142857e-05, "loss": 1.0297, "step": 111 }, { "epoch": 0.02409120240912024, "grad_norm": 0.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.9894, "step": 112 }, { "epoch": 0.02430630243063024, "grad_norm": 0.0, "learning_rate": 1.6142857142857145e-05, "loss": 1.0086, "step": 113 }, { "epoch": 0.024521402452140247, "grad_norm": 0.0, "learning_rate": 1.6285714285714287e-05, "loss": 0.965, "step": 114 }, { "epoch": 0.024736502473650248, "grad_norm": 0.0, "learning_rate": 1.642857142857143e-05, "loss": 1.0184, "step": 115 }, { "epoch": 0.02495160249516025, "grad_norm": 0.0, "learning_rate": 1.6571428571428574e-05, "loss": 0.9504, "step": 116 }, { "epoch": 0.02516670251667025, "grad_norm": 0.0, "learning_rate": 1.6714285714285716e-05, "loss": 0.9846, "step": 117 }, { "epoch": 0.025381802538180252, "grad_norm": 0.0, "learning_rate": 1.6857142857142858e-05, "loss": 1.0332, "step": 118 }, { "epoch": 0.025596902559690257, "grad_norm": 0.0, "learning_rate": 1.7e-05, "loss": 1.0284, "step": 119 }, { "epoch": 0.02581200258120026, "grad_norm": 0.0, "learning_rate": 1.7142857142857142e-05, "loss": 0.9632, "step": 120 }, { "epoch": 0.02602710260271026, "grad_norm": 0.0, "learning_rate": 1.7285714285714287e-05, "loss": 0.9937, "step": 121 }, { "epoch": 0.02624220262422026, "grad_norm": 0.0, "learning_rate": 1.742857142857143e-05, "loss": 1.0101, "step": 122 }, { "epoch": 0.026457302645730263, "grad_norm": 0.0, "learning_rate": 1.757142857142857e-05, "loss": 1.02, "step": 123 }, { "epoch": 0.026672402667240268, "grad_norm": 0.0, "learning_rate": 1.7714285714285717e-05, "loss": 0.9759, "step": 124 }, { "epoch": 0.02688750268875027, "grad_norm": 0.0, "learning_rate": 1.785714285714286e-05, "loss": 1.0175, "step": 125 }, { "epoch": 0.02710260271026027, "grad_norm": 0.0, "learning_rate": 1.8e-05, "loss": 0.9886, "step": 126 }, { "epoch": 0.027317702731770272, "grad_norm": 0.0, "learning_rate": 1.8142857142857146e-05, "loss": 1.0129, "step": 127 }, { "epoch": 0.027532802753280277, "grad_norm": 0.0, "learning_rate": 1.8285714285714288e-05, "loss": 1.0234, "step": 128 }, { "epoch": 0.02774790277479028, "grad_norm": 0.0, "learning_rate": 1.842857142857143e-05, "loss": 1.0325, "step": 129 }, { "epoch": 0.02796300279630028, "grad_norm": 0.0, "learning_rate": 1.8571428571428575e-05, "loss": 1.0275, "step": 130 }, { "epoch": 0.02817810281781028, "grad_norm": 0.0, "learning_rate": 1.8714285714285717e-05, "loss": 1.025, "step": 131 }, { "epoch": 0.028393202839320283, "grad_norm": 0.0, "learning_rate": 1.885714285714286e-05, "loss": 1.0253, "step": 132 }, { "epoch": 0.028608302860830288, "grad_norm": 0.0, "learning_rate": 1.9e-05, "loss": 0.9791, "step": 133 }, { "epoch": 0.02882340288234029, "grad_norm": 0.0, "learning_rate": 1.9142857142857146e-05, "loss": 1.0389, "step": 134 }, { "epoch": 0.02903850290385029, "grad_norm": 0.0, "learning_rate": 1.928571428571429e-05, "loss": 1.0425, "step": 135 }, { "epoch": 0.029253602925360292, "grad_norm": 0.0, "learning_rate": 1.942857142857143e-05, "loss": 1.0423, "step": 136 }, { "epoch": 0.029468702946870293, "grad_norm": 0.0, "learning_rate": 1.9571428571428572e-05, "loss": 1.0444, "step": 137 }, { "epoch": 0.0296838029683803, "grad_norm": 0.0, "learning_rate": 1.9714285714285718e-05, "loss": 0.9717, "step": 138 }, { "epoch": 0.0298989029898903, "grad_norm": 0.0, "learning_rate": 1.985714285714286e-05, "loss": 1.0227, "step": 139 }, { "epoch": 0.0301140030114003, "grad_norm": 0.0, "learning_rate": 2e-05, "loss": 1.0157, "step": 140 }, { "epoch": 0.030329103032910303, "grad_norm": 0.0, "learning_rate": 1.9999997572779332e-05, "loss": 1.0007, "step": 141 }, { "epoch": 0.030544203054420304, "grad_norm": 0.0, "learning_rate": 1.9999990291118506e-05, "loss": 1.055, "step": 142 }, { "epoch": 0.03075930307593031, "grad_norm": 0.0, "learning_rate": 1.9999978155021058e-05, "loss": 1.0181, "step": 143 }, { "epoch": 0.03097440309744031, "grad_norm": 0.0, "learning_rate": 1.9999961164492875e-05, "loss": 1.0719, "step": 144 }, { "epoch": 0.031189503118950312, "grad_norm": 0.0, "learning_rate": 1.9999939319542212e-05, "loss": 0.9398, "step": 145 }, { "epoch": 0.03140460314046031, "grad_norm": 0.0, "learning_rate": 1.9999912620179666e-05, "loss": 1.0339, "step": 146 }, { "epoch": 0.03161970316197032, "grad_norm": 0.0, "learning_rate": 1.9999881066418204e-05, "loss": 1.0131, "step": 147 }, { "epoch": 0.031834803183480316, "grad_norm": 0.0, "learning_rate": 1.999984465827314e-05, "loss": 0.9945, "step": 148 }, { "epoch": 0.03204990320499032, "grad_norm": 0.0, "learning_rate": 1.999980339576215e-05, "loss": 1.0186, "step": 149 }, { "epoch": 0.032265003226500326, "grad_norm": 0.0, "learning_rate": 1.9999757278905266e-05, "loss": 1.031, "step": 150 }, { "epoch": 0.032480103248010324, "grad_norm": 0.0, "learning_rate": 1.999970630772487e-05, "loss": 1.0405, "step": 151 }, { "epoch": 0.03269520326952033, "grad_norm": 0.0, "learning_rate": 1.999965048224571e-05, "loss": 1.0178, "step": 152 }, { "epoch": 0.03291030329103033, "grad_norm": 0.0, "learning_rate": 1.9999589802494888e-05, "loss": 1.0204, "step": 153 }, { "epoch": 0.03312540331254033, "grad_norm": 0.0, "learning_rate": 1.9999524268501854e-05, "loss": 1.0537, "step": 154 }, { "epoch": 0.033340503334050337, "grad_norm": 0.0, "learning_rate": 1.9999453880298423e-05, "loss": 1.0239, "step": 155 }, { "epoch": 0.033555603355560334, "grad_norm": 0.0, "learning_rate": 1.999937863791877e-05, "loss": 1.0326, "step": 156 }, { "epoch": 0.03377070337707034, "grad_norm": 0.0, "learning_rate": 1.999929854139942e-05, "loss": 1.0052, "step": 157 }, { "epoch": 0.03398580339858034, "grad_norm": 0.0, "learning_rate": 1.9999213590779245e-05, "loss": 1.0182, "step": 158 }, { "epoch": 0.03420090342009034, "grad_norm": 0.0, "learning_rate": 1.9999123786099497e-05, "loss": 1.0151, "step": 159 }, { "epoch": 0.03441600344160035, "grad_norm": 0.0, "learning_rate": 1.9999029127403763e-05, "loss": 0.9961, "step": 160 }, { "epoch": 0.034631103463110345, "grad_norm": 0.0, "learning_rate": 1.9998929614738e-05, "loss": 1.0555, "step": 161 }, { "epoch": 0.03484620348462035, "grad_norm": 0.0, "learning_rate": 1.999882524815051e-05, "loss": 1.0047, "step": 162 }, { "epoch": 0.03506130350613035, "grad_norm": 0.0, "learning_rate": 1.999871602769196e-05, "loss": 0.9963, "step": 163 }, { "epoch": 0.03527640352764035, "grad_norm": 0.0, "learning_rate": 1.9998601953415374e-05, "loss": 0.9714, "step": 164 }, { "epoch": 0.03549150354915036, "grad_norm": 0.0, "learning_rate": 1.999848302537612e-05, "loss": 1.054, "step": 165 }, { "epoch": 0.035706603570660356, "grad_norm": 0.0, "learning_rate": 1.999835924363194e-05, "loss": 1.0144, "step": 166 }, { "epoch": 0.03592170359217036, "grad_norm": 0.0, "learning_rate": 1.9998230608242917e-05, "loss": 1.0421, "step": 167 }, { "epoch": 0.03613680361368036, "grad_norm": 0.0, "learning_rate": 1.99980971192715e-05, "loss": 0.9899, "step": 168 }, { "epoch": 0.036351903635190363, "grad_norm": 0.0, "learning_rate": 1.999795877678249e-05, "loss": 1.0686, "step": 169 }, { "epoch": 0.03656700365670037, "grad_norm": 0.0, "learning_rate": 1.9997815580843043e-05, "loss": 0.9609, "step": 170 }, { "epoch": 0.036782103678210366, "grad_norm": 0.0, "learning_rate": 1.999766753152267e-05, "loss": 1.0215, "step": 171 }, { "epoch": 0.03699720369972037, "grad_norm": 0.0, "learning_rate": 1.9997514628893247e-05, "loss": 0.9995, "step": 172 }, { "epoch": 0.03721230372123037, "grad_norm": 0.0, "learning_rate": 1.9997356873028994e-05, "loss": 1.0368, "step": 173 }, { "epoch": 0.037427403742740374, "grad_norm": 0.0, "learning_rate": 1.9997194264006498e-05, "loss": 0.9855, "step": 174 }, { "epoch": 0.03764250376425038, "grad_norm": 0.0, "learning_rate": 1.999702680190469e-05, "loss": 0.9558, "step": 175 }, { "epoch": 0.03785760378576038, "grad_norm": 0.0, "learning_rate": 1.9996854486804874e-05, "loss": 0.9937, "step": 176 }, { "epoch": 0.03807270380727038, "grad_norm": 0.0, "learning_rate": 1.9996677318790687e-05, "loss": 0.9762, "step": 177 }, { "epoch": 0.03828780382878038, "grad_norm": 0.0, "learning_rate": 1.9996495297948143e-05, "loss": 1.0705, "step": 178 }, { "epoch": 0.038502903850290385, "grad_norm": 0.0, "learning_rate": 1.9996308424365596e-05, "loss": 0.9845, "step": 179 }, { "epoch": 0.03871800387180039, "grad_norm": 0.0, "learning_rate": 1.999611669813377e-05, "loss": 1.0645, "step": 180 }, { "epoch": 0.03893310389331039, "grad_norm": 0.0, "learning_rate": 1.999592011934573e-05, "loss": 1.0481, "step": 181 }, { "epoch": 0.03914820391482039, "grad_norm": 0.0, "learning_rate": 1.999571868809691e-05, "loss": 1.0405, "step": 182 }, { "epoch": 0.03936330393633039, "grad_norm": 0.0, "learning_rate": 1.999551240448509e-05, "loss": 1.0632, "step": 183 }, { "epoch": 0.039578403957840395, "grad_norm": 0.0, "learning_rate": 1.9995301268610415e-05, "loss": 1.0103, "step": 184 }, { "epoch": 0.0397935039793504, "grad_norm": 0.0, "learning_rate": 1.999508528057537e-05, "loss": 1.0134, "step": 185 }, { "epoch": 0.0400086040008604, "grad_norm": 0.0, "learning_rate": 1.9994864440484812e-05, "loss": 0.9762, "step": 186 }, { "epoch": 0.0402237040223704, "grad_norm": 0.0, "learning_rate": 1.999463874844595e-05, "loss": 1.0603, "step": 187 }, { "epoch": 0.0404388040438804, "grad_norm": 0.0, "learning_rate": 1.9994408204568335e-05, "loss": 0.9945, "step": 188 }, { "epoch": 0.040653904065390406, "grad_norm": 0.0, "learning_rate": 1.9994172808963887e-05, "loss": 1.0014, "step": 189 }, { "epoch": 0.04086900408690041, "grad_norm": 0.0, "learning_rate": 1.999393256174688e-05, "loss": 1.0286, "step": 190 }, { "epoch": 0.04108410410841041, "grad_norm": 0.0, "learning_rate": 1.999368746303394e-05, "loss": 1.0704, "step": 191 }, { "epoch": 0.041299204129920414, "grad_norm": 0.0, "learning_rate": 1.9993437512944046e-05, "loss": 0.9998, "step": 192 }, { "epoch": 0.04151430415143041, "grad_norm": 0.0, "learning_rate": 1.9993182711598536e-05, "loss": 1.0206, "step": 193 }, { "epoch": 0.04172940417294042, "grad_norm": 0.0, "learning_rate": 1.9992923059121107e-05, "loss": 0.9683, "step": 194 }, { "epoch": 0.04194450419445042, "grad_norm": 0.0, "learning_rate": 1.9992658555637795e-05, "loss": 0.943, "step": 195 }, { "epoch": 0.04215960421596042, "grad_norm": 0.0, "learning_rate": 1.999238920127701e-05, "loss": 1.0395, "step": 196 }, { "epoch": 0.042374704237470424, "grad_norm": 0.0, "learning_rate": 1.9992114996169506e-05, "loss": 0.9962, "step": 197 }, { "epoch": 0.04258980425898043, "grad_norm": 0.0, "learning_rate": 1.9991835940448396e-05, "loss": 1.0622, "step": 198 }, { "epoch": 0.04280490428049043, "grad_norm": 0.0, "learning_rate": 1.9991552034249146e-05, "loss": 1.0278, "step": 199 }, { "epoch": 0.04302000430200043, "grad_norm": 0.0, "learning_rate": 1.9991263277709572e-05, "loss": 1.0942, "step": 200 }, { "epoch": 0.04323510432351043, "grad_norm": 0.0, "learning_rate": 1.9990969670969852e-05, "loss": 1.0118, "step": 201 }, { "epoch": 0.043450204345020435, "grad_norm": 0.0, "learning_rate": 1.9990671214172517e-05, "loss": 1.0182, "step": 202 }, { "epoch": 0.04366530436653044, "grad_norm": 0.0, "learning_rate": 1.999036790746245e-05, "loss": 1.0803, "step": 203 }, { "epoch": 0.04388040438804044, "grad_norm": 0.0, "learning_rate": 1.999005975098689e-05, "loss": 1.0718, "step": 204 }, { "epoch": 0.04409550440955044, "grad_norm": 0.0, "learning_rate": 1.998974674489543e-05, "loss": 1.0441, "step": 205 }, { "epoch": 0.04431060443106044, "grad_norm": 0.0, "learning_rate": 1.9989428889340016e-05, "loss": 1.0395, "step": 206 }, { "epoch": 0.044525704452570446, "grad_norm": 0.0, "learning_rate": 1.9989106184474948e-05, "loss": 0.9808, "step": 207 }, { "epoch": 0.04474080447408045, "grad_norm": 0.0, "learning_rate": 1.9988778630456883e-05, "loss": 1.0357, "step": 208 }, { "epoch": 0.04495590449559045, "grad_norm": 0.0, "learning_rate": 1.998844622744483e-05, "loss": 0.9994, "step": 209 }, { "epoch": 0.04517100451710045, "grad_norm": 0.0, "learning_rate": 1.9988108975600153e-05, "loss": 0.9807, "step": 210 }, { "epoch": 0.04538610453861045, "grad_norm": 0.0, "learning_rate": 1.9987766875086562e-05, "loss": 1.009, "step": 211 }, { "epoch": 0.045601204560120456, "grad_norm": 0.0, "learning_rate": 1.998741992607014e-05, "loss": 1.0176, "step": 212 }, { "epoch": 0.04581630458163046, "grad_norm": 0.0, "learning_rate": 1.9987068128719304e-05, "loss": 0.9829, "step": 213 }, { "epoch": 0.04603140460314046, "grad_norm": 0.0, "learning_rate": 1.998671148320483e-05, "loss": 1.02, "step": 214 }, { "epoch": 0.046246504624650464, "grad_norm": 0.0, "learning_rate": 1.998634998969985e-05, "loss": 0.9454, "step": 215 }, { "epoch": 0.04646160464616046, "grad_norm": 0.0, "learning_rate": 1.9985983648379857e-05, "loss": 0.9612, "step": 216 }, { "epoch": 0.04667670466767047, "grad_norm": 0.0, "learning_rate": 1.998561245942268e-05, "loss": 0.9566, "step": 217 }, { "epoch": 0.04689180468918047, "grad_norm": 0.0, "learning_rate": 1.9985236423008513e-05, "loss": 0.9382, "step": 218 }, { "epoch": 0.04710690471069047, "grad_norm": 0.0, "learning_rate": 1.9984855539319904e-05, "loss": 0.99, "step": 219 }, { "epoch": 0.047322004732200475, "grad_norm": 0.0, "learning_rate": 1.9984469808541747e-05, "loss": 1.0389, "step": 220 }, { "epoch": 0.04753710475371047, "grad_norm": 0.0, "learning_rate": 1.9984079230861296e-05, "loss": 0.9997, "step": 221 }, { "epoch": 0.04775220477522048, "grad_norm": 0.0, "learning_rate": 1.998368380646815e-05, "loss": 1.0078, "step": 222 }, { "epoch": 0.04796730479673048, "grad_norm": 0.0, "learning_rate": 1.998328353555427e-05, "loss": 0.9862, "step": 223 }, { "epoch": 0.04818240481824048, "grad_norm": 0.0, "learning_rate": 1.998287841831396e-05, "loss": 1.0607, "step": 224 }, { "epoch": 0.048397504839750485, "grad_norm": 0.0, "learning_rate": 1.998246845494389e-05, "loss": 0.9533, "step": 225 }, { "epoch": 0.04861260486126048, "grad_norm": 0.0, "learning_rate": 1.998205364564307e-05, "loss": 1.036, "step": 226 }, { "epoch": 0.04882770488277049, "grad_norm": 0.0, "learning_rate": 1.998163399061286e-05, "loss": 1.0615, "step": 227 }, { "epoch": 0.04904280490428049, "grad_norm": 0.0, "learning_rate": 1.998120949005699e-05, "loss": 1.019, "step": 228 }, { "epoch": 0.04925790492579049, "grad_norm": 0.0, "learning_rate": 1.9980780144181524e-05, "loss": 1.044, "step": 229 }, { "epoch": 0.049473004947300496, "grad_norm": 0.0, "learning_rate": 1.9980345953194892e-05, "loss": 1.0828, "step": 230 }, { "epoch": 0.049688104968810494, "grad_norm": 0.0, "learning_rate": 1.9979906917307857e-05, "loss": 1.0152, "step": 231 }, { "epoch": 0.0499032049903205, "grad_norm": 0.0, "learning_rate": 1.9979463036733562e-05, "loss": 1.0352, "step": 232 }, { "epoch": 0.050118305011830504, "grad_norm": 0.0, "learning_rate": 1.9979014311687475e-05, "loss": 1.0282, "step": 233 }, { "epoch": 0.0503334050333405, "grad_norm": 0.0, "learning_rate": 1.9978560742387437e-05, "loss": 1.0557, "step": 234 }, { "epoch": 0.05054850505485051, "grad_norm": 0.0, "learning_rate": 1.9978102329053618e-05, "loss": 0.9244, "step": 235 }, { "epoch": 0.050763605076360505, "grad_norm": 0.0, "learning_rate": 1.9977639071908564e-05, "loss": 1.0218, "step": 236 }, { "epoch": 0.05097870509787051, "grad_norm": 0.0, "learning_rate": 1.9977170971177152e-05, "loss": 1.0706, "step": 237 }, { "epoch": 0.051193805119380514, "grad_norm": 0.0, "learning_rate": 1.9976698027086622e-05, "loss": 1.0053, "step": 238 }, { "epoch": 0.05140890514089051, "grad_norm": 0.0, "learning_rate": 1.9976220239866563e-05, "loss": 0.9486, "step": 239 }, { "epoch": 0.05162400516240052, "grad_norm": 0.0, "learning_rate": 1.997573760974891e-05, "loss": 0.9735, "step": 240 }, { "epoch": 0.051839105183910515, "grad_norm": 0.0, "learning_rate": 1.9975250136967957e-05, "loss": 0.999, "step": 241 }, { "epoch": 0.05205420520542052, "grad_norm": 0.0, "learning_rate": 1.9974757821760343e-05, "loss": 1.0376, "step": 242 }, { "epoch": 0.052269305226930525, "grad_norm": 0.0, "learning_rate": 1.997426066436506e-05, "loss": 1.0271, "step": 243 }, { "epoch": 0.05248440524844052, "grad_norm": 0.0, "learning_rate": 1.997375866502345e-05, "loss": 1.0412, "step": 244 }, { "epoch": 0.05269950526995053, "grad_norm": 0.0, "learning_rate": 1.997325182397921e-05, "loss": 1.034, "step": 245 }, { "epoch": 0.052914605291460526, "grad_norm": 0.0, "learning_rate": 1.9972740141478374e-05, "loss": 0.904, "step": 246 }, { "epoch": 0.05312970531297053, "grad_norm": 0.0, "learning_rate": 1.9972223617769343e-05, "loss": 1.0233, "step": 247 }, { "epoch": 0.053344805334480536, "grad_norm": 0.0, "learning_rate": 1.9971702253102856e-05, "loss": 1.0107, "step": 248 }, { "epoch": 0.053559905355990534, "grad_norm": 0.0, "learning_rate": 1.9971176047732008e-05, "loss": 1.0228, "step": 249 }, { "epoch": 0.05377500537750054, "grad_norm": 0.0, "learning_rate": 1.9970645001912243e-05, "loss": 1.0251, "step": 250 }, { "epoch": 0.053990105399010536, "grad_norm": 0.0, "learning_rate": 1.997010911590135e-05, "loss": 0.952, "step": 251 }, { "epoch": 0.05420520542052054, "grad_norm": 0.0, "learning_rate": 1.996956838995948e-05, "loss": 1.0073, "step": 252 }, { "epoch": 0.054420305442030546, "grad_norm": 0.0, "learning_rate": 1.9969022824349115e-05, "loss": 0.9844, "step": 253 }, { "epoch": 0.054635405463540544, "grad_norm": 0.0, "learning_rate": 1.9968472419335106e-05, "loss": 1.0232, "step": 254 }, { "epoch": 0.05485050548505055, "grad_norm": 0.0, "learning_rate": 1.996791717518464e-05, "loss": 1.0286, "step": 255 }, { "epoch": 0.055065605506560554, "grad_norm": 0.0, "learning_rate": 1.9967357092167258e-05, "loss": 1.0014, "step": 256 }, { "epoch": 0.05528070552807055, "grad_norm": 0.0, "learning_rate": 1.9966792170554842e-05, "loss": 0.9896, "step": 257 }, { "epoch": 0.05549580554958056, "grad_norm": 0.0, "learning_rate": 1.996622241062164e-05, "loss": 0.9869, "step": 258 }, { "epoch": 0.055710905571090555, "grad_norm": 0.0, "learning_rate": 1.9965647812644232e-05, "loss": 1.0099, "step": 259 }, { "epoch": 0.05592600559260056, "grad_norm": 0.0, "learning_rate": 1.9965068376901556e-05, "loss": 1.0089, "step": 260 }, { "epoch": 0.056141105614110565, "grad_norm": 0.0, "learning_rate": 1.9964484103674896e-05, "loss": 0.9594, "step": 261 }, { "epoch": 0.05635620563562056, "grad_norm": 0.0, "learning_rate": 1.9963894993247884e-05, "loss": 0.9938, "step": 262 }, { "epoch": 0.05657130565713057, "grad_norm": 0.0, "learning_rate": 1.9963301045906497e-05, "loss": 1.0276, "step": 263 }, { "epoch": 0.056786405678640565, "grad_norm": 0.0, "learning_rate": 1.9962702261939067e-05, "loss": 1.0705, "step": 264 }, { "epoch": 0.05700150570015057, "grad_norm": 0.0, "learning_rate": 1.9962098641636268e-05, "loss": 1.0053, "step": 265 }, { "epoch": 0.057216605721660575, "grad_norm": 0.0, "learning_rate": 1.9961490185291128e-05, "loss": 1.0102, "step": 266 }, { "epoch": 0.05743170574317057, "grad_norm": 0.0, "learning_rate": 1.9960876893199016e-05, "loss": 0.9833, "step": 267 }, { "epoch": 0.05764680576468058, "grad_norm": 0.0, "learning_rate": 1.9960258765657646e-05, "loss": 0.9396, "step": 268 }, { "epoch": 0.057861905786190576, "grad_norm": 0.0, "learning_rate": 1.995963580296709e-05, "loss": 0.9708, "step": 269 }, { "epoch": 0.05807700580770058, "grad_norm": 0.0, "learning_rate": 1.9959008005429757e-05, "loss": 1.0087, "step": 270 }, { "epoch": 0.058292105829210586, "grad_norm": 0.0, "learning_rate": 1.995837537335042e-05, "loss": 0.9785, "step": 271 }, { "epoch": 0.058507205850720584, "grad_norm": 0.0, "learning_rate": 1.995773790703617e-05, "loss": 0.9179, "step": 272 }, { "epoch": 0.05872230587223059, "grad_norm": 0.0, "learning_rate": 1.9957095606796472e-05, "loss": 0.8997, "step": 273 }, { "epoch": 0.05893740589374059, "grad_norm": 0.0, "learning_rate": 1.9956448472943122e-05, "loss": 0.918, "step": 274 }, { "epoch": 0.05915250591525059, "grad_norm": 0.0, "learning_rate": 1.9955796505790266e-05, "loss": 1.0035, "step": 275 }, { "epoch": 0.0593676059367606, "grad_norm": 0.0, "learning_rate": 1.9955139705654405e-05, "loss": 0.9604, "step": 276 }, { "epoch": 0.059582705958270595, "grad_norm": 0.0, "learning_rate": 1.9954478072854374e-05, "loss": 0.9775, "step": 277 }, { "epoch": 0.0597978059797806, "grad_norm": 0.0, "learning_rate": 1.995381160771136e-05, "loss": 1.05, "step": 278 }, { "epoch": 0.0600129060012906, "grad_norm": 0.0, "learning_rate": 1.9953140310548892e-05, "loss": 0.9708, "step": 279 }, { "epoch": 0.0602280060228006, "grad_norm": 0.0, "learning_rate": 1.995246418169285e-05, "loss": 1.0018, "step": 280 }, { "epoch": 0.06044310604431061, "grad_norm": 0.0, "learning_rate": 1.9951783221471456e-05, "loss": 1.0487, "step": 281 }, { "epoch": 0.060658206065820605, "grad_norm": 0.0, "learning_rate": 1.995109743021528e-05, "loss": 0.9987, "step": 282 }, { "epoch": 0.06087330608733061, "grad_norm": 0.0, "learning_rate": 1.9950406808257227e-05, "loss": 0.9853, "step": 283 }, { "epoch": 0.06108840610884061, "grad_norm": 0.0, "learning_rate": 1.9949711355932565e-05, "loss": 1.0024, "step": 284 }, { "epoch": 0.06130350613035061, "grad_norm": 0.0, "learning_rate": 1.9949011073578894e-05, "loss": 0.9622, "step": 285 }, { "epoch": 0.06151860615186062, "grad_norm": 0.0, "learning_rate": 1.9948305961536163e-05, "loss": 0.9528, "step": 286 }, { "epoch": 0.061733706173370616, "grad_norm": 0.0, "learning_rate": 1.9947596020146666e-05, "loss": 1.0037, "step": 287 }, { "epoch": 0.06194880619488062, "grad_norm": 0.0, "learning_rate": 1.994688124975503e-05, "loss": 1.0771, "step": 288 }, { "epoch": 0.06216390621639062, "grad_norm": 0.0, "learning_rate": 1.994616165070825e-05, "loss": 1.0046, "step": 289 }, { "epoch": 0.062379006237900624, "grad_norm": 0.0, "learning_rate": 1.994543722335564e-05, "loss": 0.9943, "step": 290 }, { "epoch": 0.06259410625941063, "grad_norm": 0.0, "learning_rate": 1.9944707968048875e-05, "loss": 1.0105, "step": 291 }, { "epoch": 0.06280920628092063, "grad_norm": 0.0, "learning_rate": 1.9943973885141966e-05, "loss": 0.9987, "step": 292 }, { "epoch": 0.06302430630243062, "grad_norm": 0.0, "learning_rate": 1.994323497499127e-05, "loss": 1.0077, "step": 293 }, { "epoch": 0.06323940632394064, "grad_norm": 0.0, "learning_rate": 1.9942491237955485e-05, "loss": 1.0056, "step": 294 }, { "epoch": 0.06345450634545063, "grad_norm": 0.0, "learning_rate": 1.994174267439566e-05, "loss": 0.9977, "step": 295 }, { "epoch": 0.06366960636696063, "grad_norm": 0.0, "learning_rate": 1.994098928467517e-05, "loss": 0.9779, "step": 296 }, { "epoch": 0.06388470638847064, "grad_norm": 0.0, "learning_rate": 1.9940231069159753e-05, "loss": 1.0025, "step": 297 }, { "epoch": 0.06409980640998064, "grad_norm": 0.0, "learning_rate": 1.9939468028217473e-05, "loss": 0.9805, "step": 298 }, { "epoch": 0.06431490643149064, "grad_norm": 0.0, "learning_rate": 1.993870016221875e-05, "loss": 0.9834, "step": 299 }, { "epoch": 0.06453000645300065, "grad_norm": 0.0, "learning_rate": 1.9937927471536335e-05, "loss": 0.9441, "step": 300 }, { "epoch": 0.06474510647451065, "grad_norm": 0.0, "learning_rate": 1.993714995654533e-05, "loss": 1.017, "step": 301 }, { "epoch": 0.06496020649602065, "grad_norm": 0.0, "learning_rate": 1.993636761762317e-05, "loss": 0.9782, "step": 302 }, { "epoch": 0.06517530651753065, "grad_norm": 0.0, "learning_rate": 1.9935580455149645e-05, "loss": 1.0041, "step": 303 }, { "epoch": 0.06539040653904066, "grad_norm": 0.0, "learning_rate": 1.993478846950687e-05, "loss": 1.0593, "step": 304 }, { "epoch": 0.06560550656055066, "grad_norm": 0.0, "learning_rate": 1.9933991661079316e-05, "loss": 0.9384, "step": 305 }, { "epoch": 0.06582060658206065, "grad_norm": 0.0, "learning_rate": 1.9933190030253788e-05, "loss": 1.05, "step": 306 }, { "epoch": 0.06603570660357067, "grad_norm": 0.0, "learning_rate": 1.9932383577419432e-05, "loss": 0.944, "step": 307 }, { "epoch": 0.06625080662508066, "grad_norm": 0.0, "learning_rate": 1.9931572302967735e-05, "loss": 1.0194, "step": 308 }, { "epoch": 0.06646590664659066, "grad_norm": 0.0, "learning_rate": 1.9930756207292522e-05, "loss": 0.9951, "step": 309 }, { "epoch": 0.06668100666810067, "grad_norm": 0.0, "learning_rate": 1.9929935290789968e-05, "loss": 0.9343, "step": 310 }, { "epoch": 0.06689610668961067, "grad_norm": 0.0, "learning_rate": 1.9929109553858578e-05, "loss": 0.9807, "step": 311 }, { "epoch": 0.06711120671112067, "grad_norm": 0.0, "learning_rate": 1.9928278996899205e-05, "loss": 0.9783, "step": 312 }, { "epoch": 0.06732630673263067, "grad_norm": 0.0, "learning_rate": 1.9927443620315037e-05, "loss": 0.9828, "step": 313 }, { "epoch": 0.06754140675414068, "grad_norm": 0.0, "learning_rate": 1.99266034245116e-05, "loss": 0.9795, "step": 314 }, { "epoch": 0.06775650677565068, "grad_norm": 0.0, "learning_rate": 1.9925758409896763e-05, "loss": 1.0048, "step": 315 }, { "epoch": 0.06797160679716067, "grad_norm": 0.0, "learning_rate": 1.9924908576880738e-05, "loss": 1.0114, "step": 316 }, { "epoch": 0.06818670681867069, "grad_norm": 0.0, "learning_rate": 1.9924053925876064e-05, "loss": 0.9853, "step": 317 }, { "epoch": 0.06840180684018068, "grad_norm": 0.0, "learning_rate": 1.9923194457297628e-05, "loss": 0.9964, "step": 318 }, { "epoch": 0.06861690686169068, "grad_norm": 0.0, "learning_rate": 1.9922330171562657e-05, "loss": 0.9402, "step": 319 }, { "epoch": 0.0688320068832007, "grad_norm": 0.0, "learning_rate": 1.9921461069090715e-05, "loss": 0.9307, "step": 320 }, { "epoch": 0.06904710690471069, "grad_norm": 0.0, "learning_rate": 1.9920587150303697e-05, "loss": 1.0541, "step": 321 }, { "epoch": 0.06926220692622069, "grad_norm": 0.0, "learning_rate": 1.9919708415625847e-05, "loss": 0.9857, "step": 322 }, { "epoch": 0.06947730694773069, "grad_norm": 0.0, "learning_rate": 1.991882486548374e-05, "loss": 0.9511, "step": 323 }, { "epoch": 0.0696924069692407, "grad_norm": 0.0, "learning_rate": 1.9917936500306285e-05, "loss": 0.9404, "step": 324 }, { "epoch": 0.0699075069907507, "grad_norm": 0.0, "learning_rate": 1.991704332052474e-05, "loss": 0.9678, "step": 325 }, { "epoch": 0.0701226070122607, "grad_norm": 0.0, "learning_rate": 1.9916145326572696e-05, "loss": 0.9417, "step": 326 }, { "epoch": 0.07033770703377071, "grad_norm": 0.0, "learning_rate": 1.991524251888607e-05, "loss": 1.0437, "step": 327 }, { "epoch": 0.0705528070552807, "grad_norm": 0.0, "learning_rate": 1.9914334897903134e-05, "loss": 0.9753, "step": 328 }, { "epoch": 0.0707679070767907, "grad_norm": 0.0, "learning_rate": 1.991342246406448e-05, "loss": 0.9854, "step": 329 }, { "epoch": 0.07098300709830072, "grad_norm": 0.0, "learning_rate": 1.991250521781305e-05, "loss": 1.0302, "step": 330 }, { "epoch": 0.07119810711981071, "grad_norm": 0.0, "learning_rate": 1.9911583159594107e-05, "loss": 0.9938, "step": 331 }, { "epoch": 0.07141320714132071, "grad_norm": 0.0, "learning_rate": 1.991065628985527e-05, "loss": 0.9583, "step": 332 }, { "epoch": 0.07162830716283071, "grad_norm": 0.0, "learning_rate": 1.9909724609046473e-05, "loss": 0.9288, "step": 333 }, { "epoch": 0.07184340718434072, "grad_norm": 0.0, "learning_rate": 1.9908788117619997e-05, "loss": 1.0728, "step": 334 }, { "epoch": 0.07205850720585072, "grad_norm": 0.0, "learning_rate": 1.990784681603046e-05, "loss": 1.0048, "step": 335 }, { "epoch": 0.07227360722736072, "grad_norm": 0.0, "learning_rate": 1.9906900704734808e-05, "loss": 0.9559, "step": 336 }, { "epoch": 0.07248870724887073, "grad_norm": 0.0, "learning_rate": 1.990594978419233e-05, "loss": 0.9848, "step": 337 }, { "epoch": 0.07270380727038073, "grad_norm": 0.0, "learning_rate": 1.990499405486464e-05, "loss": 1.048, "step": 338 }, { "epoch": 0.07291890729189072, "grad_norm": 0.0, "learning_rate": 1.990403351721569e-05, "loss": 0.9511, "step": 339 }, { "epoch": 0.07313400731340074, "grad_norm": 0.0, "learning_rate": 1.990306817171177e-05, "loss": 1.0538, "step": 340 }, { "epoch": 0.07334910733491073, "grad_norm": 0.0, "learning_rate": 1.9902098018821504e-05, "loss": 1.0083, "step": 341 }, { "epoch": 0.07356420735642073, "grad_norm": 0.0, "learning_rate": 1.9901123059015843e-05, "loss": 0.963, "step": 342 }, { "epoch": 0.07377930737793074, "grad_norm": 0.0, "learning_rate": 1.9900143292768075e-05, "loss": 1.0131, "step": 343 }, { "epoch": 0.07399440739944074, "grad_norm": 0.0, "learning_rate": 1.9899158720553828e-05, "loss": 1.0032, "step": 344 }, { "epoch": 0.07420950742095074, "grad_norm": 0.0, "learning_rate": 1.9898169342851046e-05, "loss": 1.0037, "step": 345 }, { "epoch": 0.07442460744246074, "grad_norm": 0.0, "learning_rate": 1.9897175160140027e-05, "loss": 0.935, "step": 346 }, { "epoch": 0.07463970746397075, "grad_norm": 0.0, "learning_rate": 1.9896176172903384e-05, "loss": 1.0063, "step": 347 }, { "epoch": 0.07485480748548075, "grad_norm": 0.0, "learning_rate": 1.9895172381626072e-05, "loss": 0.9272, "step": 348 }, { "epoch": 0.07506990750699075, "grad_norm": 0.0, "learning_rate": 1.9894163786795378e-05, "loss": 1.0057, "step": 349 }, { "epoch": 0.07528500752850076, "grad_norm": 0.0, "learning_rate": 1.9893150388900918e-05, "loss": 0.9658, "step": 350 }, { "epoch": 0.07550010755001076, "grad_norm": 0.0, "learning_rate": 1.989213218843464e-05, "loss": 0.9152, "step": 351 }, { "epoch": 0.07571520757152075, "grad_norm": 0.0, "learning_rate": 1.9891109185890814e-05, "loss": 0.9858, "step": 352 }, { "epoch": 0.07593030759303077, "grad_norm": 0.0, "learning_rate": 1.9890081381766066e-05, "loss": 0.9606, "step": 353 }, { "epoch": 0.07614540761454076, "grad_norm": 0.0, "learning_rate": 1.988904877655933e-05, "loss": 1.0109, "step": 354 }, { "epoch": 0.07636050763605076, "grad_norm": 0.0, "learning_rate": 1.9888011370771874e-05, "loss": 0.9712, "step": 355 }, { "epoch": 0.07657560765756076, "grad_norm": 0.0, "learning_rate": 1.9886969164907306e-05, "loss": 0.9352, "step": 356 }, { "epoch": 0.07679070767907077, "grad_norm": 0.0, "learning_rate": 1.988592215947156e-05, "loss": 1.0107, "step": 357 }, { "epoch": 0.07700580770058077, "grad_norm": 0.0, "learning_rate": 1.9884870354972895e-05, "loss": 0.9775, "step": 358 }, { "epoch": 0.07722090772209077, "grad_norm": 0.0, "learning_rate": 1.9883813751921904e-05, "loss": 1.0029, "step": 359 }, { "epoch": 0.07743600774360078, "grad_norm": 0.0, "learning_rate": 1.988275235083151e-05, "loss": 0.9722, "step": 360 }, { "epoch": 0.07765110776511078, "grad_norm": 0.0, "learning_rate": 1.9881686152216962e-05, "loss": 1.0135, "step": 361 }, { "epoch": 0.07786620778662078, "grad_norm": 0.0, "learning_rate": 1.988061515659584e-05, "loss": 0.9172, "step": 362 }, { "epoch": 0.07808130780813079, "grad_norm": 0.0, "learning_rate": 1.9879539364488057e-05, "loss": 0.9994, "step": 363 }, { "epoch": 0.07829640782964079, "grad_norm": 0.0, "learning_rate": 1.9878458776415842e-05, "loss": 0.9632, "step": 364 }, { "epoch": 0.07851150785115078, "grad_norm": 0.0, "learning_rate": 1.9877373392903768e-05, "loss": 1.0173, "step": 365 }, { "epoch": 0.07872660787266078, "grad_norm": 0.0, "learning_rate": 1.9876283214478725e-05, "loss": 1.035, "step": 366 }, { "epoch": 0.07894170789417079, "grad_norm": 0.0, "learning_rate": 1.987518824166993e-05, "loss": 1.0533, "step": 367 }, { "epoch": 0.07915680791568079, "grad_norm": 0.0, "learning_rate": 1.987408847500894e-05, "loss": 0.9066, "step": 368 }, { "epoch": 0.07937190793719079, "grad_norm": 0.0, "learning_rate": 1.9872983915029623e-05, "loss": 1.0099, "step": 369 }, { "epoch": 0.0795870079587008, "grad_norm": 0.0, "learning_rate": 1.987187456226818e-05, "loss": 0.9315, "step": 370 }, { "epoch": 0.0798021079802108, "grad_norm": 0.0, "learning_rate": 1.9870760417263145e-05, "loss": 0.986, "step": 371 }, { "epoch": 0.0800172080017208, "grad_norm": 0.0, "learning_rate": 1.986964148055537e-05, "loss": 0.9271, "step": 372 }, { "epoch": 0.08023230802323081, "grad_norm": 0.0, "learning_rate": 1.9868517752688037e-05, "loss": 0.9642, "step": 373 }, { "epoch": 0.0804474080447408, "grad_norm": 0.0, "learning_rate": 1.9867389234206655e-05, "loss": 0.9539, "step": 374 }, { "epoch": 0.0806625080662508, "grad_norm": 0.0, "learning_rate": 1.9866255925659055e-05, "loss": 1.0291, "step": 375 }, { "epoch": 0.0808776080877608, "grad_norm": 0.0, "learning_rate": 1.986511782759539e-05, "loss": 0.9975, "step": 376 }, { "epoch": 0.08109270810927081, "grad_norm": 0.0, "learning_rate": 1.9863974940568157e-05, "loss": 0.9298, "step": 377 }, { "epoch": 0.08130780813078081, "grad_norm": 0.0, "learning_rate": 1.9862827265132147e-05, "loss": 0.9902, "step": 378 }, { "epoch": 0.08152290815229081, "grad_norm": 0.0, "learning_rate": 1.9861674801844507e-05, "loss": 1.0535, "step": 379 }, { "epoch": 0.08173800817380082, "grad_norm": 0.0, "learning_rate": 1.986051755126468e-05, "loss": 0.9997, "step": 380 }, { "epoch": 0.08195310819531082, "grad_norm": 0.0, "learning_rate": 1.9859355513954458e-05, "loss": 0.9858, "step": 381 }, { "epoch": 0.08216820821682082, "grad_norm": 0.0, "learning_rate": 1.9858188690477938e-05, "loss": 1.0153, "step": 382 }, { "epoch": 0.08238330823833083, "grad_norm": 0.0, "learning_rate": 1.985701708140155e-05, "loss": 1.0178, "step": 383 }, { "epoch": 0.08259840825984083, "grad_norm": 0.0, "learning_rate": 1.9855840687294043e-05, "loss": 0.9706, "step": 384 }, { "epoch": 0.08281350828135083, "grad_norm": 0.0, "learning_rate": 1.9854659508726497e-05, "loss": 1.0434, "step": 385 }, { "epoch": 0.08302860830286082, "grad_norm": 0.0, "learning_rate": 1.98534735462723e-05, "loss": 1.0067, "step": 386 }, { "epoch": 0.08324370832437084, "grad_norm": 0.0, "learning_rate": 1.9852282800507176e-05, "loss": 0.9681, "step": 387 }, { "epoch": 0.08345880834588083, "grad_norm": 0.0, "learning_rate": 1.9851087272009162e-05, "loss": 1.0024, "step": 388 }, { "epoch": 0.08367390836739083, "grad_norm": 0.0, "learning_rate": 1.9849886961358624e-05, "loss": 0.9706, "step": 389 }, { "epoch": 0.08388900838890084, "grad_norm": 0.0, "learning_rate": 1.9848681869138244e-05, "loss": 0.9997, "step": 390 }, { "epoch": 0.08410410841041084, "grad_norm": 0.0, "learning_rate": 1.984747199593302e-05, "loss": 0.985, "step": 391 }, { "epoch": 0.08431920843192084, "grad_norm": 0.0, "learning_rate": 1.984625734233029e-05, "loss": 0.9471, "step": 392 }, { "epoch": 0.08453430845343085, "grad_norm": 0.0, "learning_rate": 1.9845037908919697e-05, "loss": 0.9406, "step": 393 }, { "epoch": 0.08474940847494085, "grad_norm": 0.0, "learning_rate": 1.9843813696293202e-05, "loss": 0.9958, "step": 394 }, { "epoch": 0.08496450849645085, "grad_norm": 0.0, "learning_rate": 1.9842584705045097e-05, "loss": 0.9581, "step": 395 }, { "epoch": 0.08517960851796086, "grad_norm": 0.0, "learning_rate": 1.9841350935771987e-05, "loss": 0.9743, "step": 396 }, { "epoch": 0.08539470853947086, "grad_norm": 0.0, "learning_rate": 1.9840112389072796e-05, "loss": 0.9861, "step": 397 }, { "epoch": 0.08560980856098085, "grad_norm": 0.0, "learning_rate": 1.9838869065548772e-05, "loss": 0.9129, "step": 398 }, { "epoch": 0.08582490858249085, "grad_norm": 0.0, "learning_rate": 1.9837620965803483e-05, "loss": 1.0068, "step": 399 }, { "epoch": 0.08604000860400086, "grad_norm": 0.0, "learning_rate": 1.9836368090442805e-05, "loss": 0.9607, "step": 400 }, { "epoch": 0.08625510862551086, "grad_norm": 0.0, "learning_rate": 1.9835110440074938e-05, "loss": 0.9707, "step": 401 }, { "epoch": 0.08647020864702086, "grad_norm": 0.0, "learning_rate": 1.983384801531041e-05, "loss": 1.0328, "step": 402 }, { "epoch": 0.08668530866853087, "grad_norm": 0.0, "learning_rate": 1.983258081676205e-05, "loss": 1.0515, "step": 403 }, { "epoch": 0.08690040869004087, "grad_norm": 0.0, "learning_rate": 1.983130884504501e-05, "loss": 1.0055, "step": 404 }, { "epoch": 0.08711550871155087, "grad_norm": 0.0, "learning_rate": 1.9830032100776767e-05, "loss": 0.9657, "step": 405 }, { "epoch": 0.08733060873306088, "grad_norm": 0.0, "learning_rate": 1.9828750584577107e-05, "loss": 0.996, "step": 406 }, { "epoch": 0.08754570875457088, "grad_norm": 0.0, "learning_rate": 1.9827464297068137e-05, "loss": 1.04, "step": 407 }, { "epoch": 0.08776080877608088, "grad_norm": 0.0, "learning_rate": 1.9826173238874275e-05, "loss": 1.0959, "step": 408 }, { "epoch": 0.08797590879759087, "grad_norm": 0.0, "learning_rate": 1.9824877410622258e-05, "loss": 1.0074, "step": 409 }, { "epoch": 0.08819100881910089, "grad_norm": 0.0, "learning_rate": 1.982357681294114e-05, "loss": 0.981, "step": 410 }, { "epoch": 0.08840610884061088, "grad_norm": 0.0, "learning_rate": 1.9822271446462283e-05, "loss": 1.0075, "step": 411 }, { "epoch": 0.08862120886212088, "grad_norm": 0.0, "learning_rate": 1.9820961311819376e-05, "loss": 0.9987, "step": 412 }, { "epoch": 0.0888363088836309, "grad_norm": 0.0, "learning_rate": 1.9819646409648415e-05, "loss": 0.9566, "step": 413 }, { "epoch": 0.08905140890514089, "grad_norm": 0.0, "learning_rate": 1.9818326740587707e-05, "loss": 1.0033, "step": 414 }, { "epoch": 0.08926650892665089, "grad_norm": 0.0, "learning_rate": 1.9817002305277883e-05, "loss": 1.0157, "step": 415 }, { "epoch": 0.0894816089481609, "grad_norm": 0.0, "learning_rate": 1.9815673104361876e-05, "loss": 1.0215, "step": 416 }, { "epoch": 0.0896967089696709, "grad_norm": 0.0, "learning_rate": 1.9814339138484948e-05, "loss": 0.9552, "step": 417 }, { "epoch": 0.0899118089911809, "grad_norm": 0.0, "learning_rate": 1.9813000408294656e-05, "loss": 0.9753, "step": 418 }, { "epoch": 0.0901269090126909, "grad_norm": 0.0, "learning_rate": 1.9811656914440885e-05, "loss": 0.9532, "step": 419 }, { "epoch": 0.0903420090342009, "grad_norm": 0.0, "learning_rate": 1.9810308657575822e-05, "loss": 0.997, "step": 420 }, { "epoch": 0.0905571090557109, "grad_norm": 0.0, "learning_rate": 1.9808955638353973e-05, "loss": 1.0101, "step": 421 }, { "epoch": 0.0907722090772209, "grad_norm": 0.0, "learning_rate": 1.9807597857432152e-05, "loss": 0.9645, "step": 422 }, { "epoch": 0.09098730909873091, "grad_norm": 0.0, "learning_rate": 1.9806235315469485e-05, "loss": 0.9993, "step": 423 }, { "epoch": 0.09120240912024091, "grad_norm": 0.0, "learning_rate": 1.980486801312741e-05, "loss": 0.9147, "step": 424 }, { "epoch": 0.09141750914175091, "grad_norm": 0.0, "learning_rate": 1.980349595106968e-05, "loss": 0.9377, "step": 425 }, { "epoch": 0.09163260916326092, "grad_norm": 0.0, "learning_rate": 1.9802119129962352e-05, "loss": 0.8853, "step": 426 }, { "epoch": 0.09184770918477092, "grad_norm": 0.0, "learning_rate": 1.9800737550473793e-05, "loss": 1.0028, "step": 427 }, { "epoch": 0.09206280920628092, "grad_norm": 0.0, "learning_rate": 1.9799351213274688e-05, "loss": 0.9541, "step": 428 }, { "epoch": 0.09227790922779092, "grad_norm": 0.0, "learning_rate": 1.979796011903802e-05, "loss": 1.003, "step": 429 }, { "epoch": 0.09249300924930093, "grad_norm": 0.0, "learning_rate": 1.979656426843909e-05, "loss": 0.9735, "step": 430 }, { "epoch": 0.09270810927081093, "grad_norm": 0.0, "learning_rate": 1.979516366215551e-05, "loss": 1.0213, "step": 431 }, { "epoch": 0.09292320929232092, "grad_norm": 0.0, "learning_rate": 1.979375830086719e-05, "loss": 0.989, "step": 432 }, { "epoch": 0.09313830931383094, "grad_norm": 0.0, "learning_rate": 1.9792348185256356e-05, "loss": 1.0623, "step": 433 }, { "epoch": 0.09335340933534093, "grad_norm": 0.0, "learning_rate": 1.9790933316007543e-05, "loss": 0.9875, "step": 434 }, { "epoch": 0.09356850935685093, "grad_norm": 0.0, "learning_rate": 1.9789513693807584e-05, "loss": 0.9279, "step": 435 }, { "epoch": 0.09378360937836094, "grad_norm": 0.0, "learning_rate": 1.978808931934564e-05, "loss": 0.9897, "step": 436 }, { "epoch": 0.09399870939987094, "grad_norm": 0.0, "learning_rate": 1.978666019331315e-05, "loss": 0.9412, "step": 437 }, { "epoch": 0.09421380942138094, "grad_norm": 0.0, "learning_rate": 1.978522631640388e-05, "loss": 1.0034, "step": 438 }, { "epoch": 0.09442890944289094, "grad_norm": 0.0, "learning_rate": 1.97837876893139e-05, "loss": 1.0261, "step": 439 }, { "epoch": 0.09464400946440095, "grad_norm": 0.0, "learning_rate": 1.9782344312741585e-05, "loss": 1.0006, "step": 440 }, { "epoch": 0.09485910948591095, "grad_norm": 0.0, "learning_rate": 1.9780896187387606e-05, "loss": 0.9482, "step": 441 }, { "epoch": 0.09507420950742095, "grad_norm": 0.0, "learning_rate": 1.9779443313954957e-05, "loss": 0.9839, "step": 442 }, { "epoch": 0.09528930952893096, "grad_norm": 0.0, "learning_rate": 1.9777985693148912e-05, "loss": 0.9559, "step": 443 }, { "epoch": 0.09550440955044096, "grad_norm": 0.0, "learning_rate": 1.9776523325677083e-05, "loss": 0.8739, "step": 444 }, { "epoch": 0.09571950957195095, "grad_norm": 0.0, "learning_rate": 1.977505621224935e-05, "loss": 0.9316, "step": 445 }, { "epoch": 0.09593460959346096, "grad_norm": 0.0, "learning_rate": 1.9773584353577928e-05, "loss": 0.9773, "step": 446 }, { "epoch": 0.09614970961497096, "grad_norm": 0.0, "learning_rate": 1.9772107750377315e-05, "loss": 1.0232, "step": 447 }, { "epoch": 0.09636480963648096, "grad_norm": 0.0, "learning_rate": 1.977062640336432e-05, "loss": 0.9707, "step": 448 }, { "epoch": 0.09657990965799097, "grad_norm": 0.0, "learning_rate": 1.976914031325806e-05, "loss": 0.9906, "step": 449 }, { "epoch": 0.09679500967950097, "grad_norm": 0.0, "learning_rate": 1.9767649480779944e-05, "loss": 0.9911, "step": 450 }, { "epoch": 0.09701010970101097, "grad_norm": 0.0, "learning_rate": 1.9766153906653684e-05, "loss": 0.9398, "step": 451 }, { "epoch": 0.09722520972252097, "grad_norm": 0.0, "learning_rate": 1.9764653591605306e-05, "loss": 1.0622, "step": 452 }, { "epoch": 0.09744030974403098, "grad_norm": 0.0, "learning_rate": 1.976314853636312e-05, "loss": 0.9937, "step": 453 }, { "epoch": 0.09765540976554098, "grad_norm": 0.0, "learning_rate": 1.9761638741657757e-05, "loss": 0.874, "step": 454 }, { "epoch": 0.09787050978705097, "grad_norm": 0.0, "learning_rate": 1.9760124208222128e-05, "loss": 0.9683, "step": 455 }, { "epoch": 0.09808560980856099, "grad_norm": 0.0, "learning_rate": 1.9758604936791462e-05, "loss": 1.0063, "step": 456 }, { "epoch": 0.09830070983007098, "grad_norm": 0.0, "learning_rate": 1.9757080928103273e-05, "loss": 1.0373, "step": 457 }, { "epoch": 0.09851580985158098, "grad_norm": 0.0, "learning_rate": 1.9755552182897388e-05, "loss": 1.0465, "step": 458 }, { "epoch": 0.098730909873091, "grad_norm": 0.0, "learning_rate": 1.9754018701915926e-05, "loss": 0.9567, "step": 459 }, { "epoch": 0.09894600989460099, "grad_norm": 0.0, "learning_rate": 1.9752480485903306e-05, "loss": 0.9935, "step": 460 }, { "epoch": 0.09916110991611099, "grad_norm": 0.0, "learning_rate": 1.9750937535606246e-05, "loss": 0.9636, "step": 461 }, { "epoch": 0.09937620993762099, "grad_norm": 0.0, "learning_rate": 1.974938985177376e-05, "loss": 0.9897, "step": 462 }, { "epoch": 0.099591309959131, "grad_norm": 0.0, "learning_rate": 1.9747837435157168e-05, "loss": 0.9958, "step": 463 }, { "epoch": 0.099806409980641, "grad_norm": 0.0, "learning_rate": 1.974628028651007e-05, "loss": 0.9621, "step": 464 }, { "epoch": 0.100021510002151, "grad_norm": 0.0, "learning_rate": 1.974471840658839e-05, "loss": 0.9367, "step": 465 }, { "epoch": 0.10023661002366101, "grad_norm": 0.0, "learning_rate": 1.9743151796150323e-05, "loss": 1.0421, "step": 466 }, { "epoch": 0.100451710045171, "grad_norm": 0.0, "learning_rate": 1.974158045595637e-05, "loss": 0.9802, "step": 467 }, { "epoch": 0.100666810066681, "grad_norm": 0.0, "learning_rate": 1.9740004386769337e-05, "loss": 0.9193, "step": 468 }, { "epoch": 0.10088191008819102, "grad_norm": 0.0, "learning_rate": 1.9738423589354314e-05, "loss": 0.9592, "step": 469 }, { "epoch": 0.10109701010970101, "grad_norm": 0.0, "learning_rate": 1.9736838064478683e-05, "loss": 0.9638, "step": 470 }, { "epoch": 0.10131211013121101, "grad_norm": 0.0, "learning_rate": 1.9735247812912138e-05, "loss": 0.9706, "step": 471 }, { "epoch": 0.10152721015272101, "grad_norm": 0.0, "learning_rate": 1.973365283542665e-05, "loss": 0.9822, "step": 472 }, { "epoch": 0.10174231017423102, "grad_norm": 0.0, "learning_rate": 1.9732053132796496e-05, "loss": 0.9993, "step": 473 }, { "epoch": 0.10195741019574102, "grad_norm": 0.0, "learning_rate": 1.973044870579824e-05, "loss": 0.9414, "step": 474 }, { "epoch": 0.10217251021725102, "grad_norm": 0.0, "learning_rate": 1.9728839555210744e-05, "loss": 0.9384, "step": 475 }, { "epoch": 0.10238761023876103, "grad_norm": 0.0, "learning_rate": 1.9727225681815154e-05, "loss": 0.9645, "step": 476 }, { "epoch": 0.10260271026027103, "grad_norm": 0.0, "learning_rate": 1.9725607086394923e-05, "loss": 0.9927, "step": 477 }, { "epoch": 0.10281781028178102, "grad_norm": 0.0, "learning_rate": 1.972398376973579e-05, "loss": 0.9913, "step": 478 }, { "epoch": 0.10303291030329104, "grad_norm": 0.0, "learning_rate": 1.9722355732625775e-05, "loss": 0.974, "step": 479 }, { "epoch": 0.10324801032480103, "grad_norm": 0.0, "learning_rate": 1.9720722975855206e-05, "loss": 0.9496, "step": 480 }, { "epoch": 0.10346311034631103, "grad_norm": 0.0, "learning_rate": 1.9719085500216694e-05, "loss": 1.0039, "step": 481 }, { "epoch": 0.10367821036782103, "grad_norm": 0.0, "learning_rate": 1.9717443306505143e-05, "loss": 1.0526, "step": 482 }, { "epoch": 0.10389331038933104, "grad_norm": 0.0, "learning_rate": 1.971579639551774e-05, "loss": 0.9164, "step": 483 }, { "epoch": 0.10410841041084104, "grad_norm": 0.0, "learning_rate": 1.9714144768053978e-05, "loss": 0.9875, "step": 484 }, { "epoch": 0.10432351043235104, "grad_norm": 0.0, "learning_rate": 1.9712488424915625e-05, "loss": 0.9767, "step": 485 }, { "epoch": 0.10453861045386105, "grad_norm": 0.0, "learning_rate": 1.9710827366906743e-05, "loss": 0.9997, "step": 486 }, { "epoch": 0.10475371047537105, "grad_norm": 0.0, "learning_rate": 1.9709161594833682e-05, "loss": 0.9569, "step": 487 }, { "epoch": 0.10496881049688105, "grad_norm": 0.0, "learning_rate": 1.9707491109505083e-05, "loss": 0.9012, "step": 488 }, { "epoch": 0.10518391051839106, "grad_norm": 0.0, "learning_rate": 1.9705815911731873e-05, "loss": 0.9706, "step": 489 }, { "epoch": 0.10539901053990106, "grad_norm": 0.0, "learning_rate": 1.9704136002327262e-05, "loss": 0.9529, "step": 490 }, { "epoch": 0.10561411056141105, "grad_norm": 0.0, "learning_rate": 1.9702451382106763e-05, "loss": 1.0382, "step": 491 }, { "epoch": 0.10582921058292105, "grad_norm": 0.0, "learning_rate": 1.9700762051888153e-05, "loss": 1.0273, "step": 492 }, { "epoch": 0.10604431060443106, "grad_norm": 0.0, "learning_rate": 1.9699068012491515e-05, "loss": 0.9511, "step": 493 }, { "epoch": 0.10625941062594106, "grad_norm": 0.0, "learning_rate": 1.9697369264739213e-05, "loss": 0.9633, "step": 494 }, { "epoch": 0.10647451064745106, "grad_norm": 0.0, "learning_rate": 1.9695665809455885e-05, "loss": 1.0158, "step": 495 }, { "epoch": 0.10668961066896107, "grad_norm": 0.0, "learning_rate": 1.969395764746847e-05, "loss": 0.9885, "step": 496 }, { "epoch": 0.10690471069047107, "grad_norm": 0.0, "learning_rate": 1.9692244779606185e-05, "loss": 0.9199, "step": 497 }, { "epoch": 0.10711981071198107, "grad_norm": 0.0, "learning_rate": 1.9690527206700526e-05, "loss": 1.0063, "step": 498 }, { "epoch": 0.10733491073349108, "grad_norm": 0.0, "learning_rate": 1.9688804929585284e-05, "loss": 0.9782, "step": 499 }, { "epoch": 0.10755001075500108, "grad_norm": 0.0, "learning_rate": 1.9687077949096528e-05, "loss": 0.9492, "step": 500 }, { "epoch": 0.10776511077651107, "grad_norm": 0.0, "learning_rate": 1.968534626607261e-05, "loss": 0.9589, "step": 501 }, { "epoch": 0.10798021079802107, "grad_norm": 0.0, "learning_rate": 1.9683609881354164e-05, "loss": 1.066, "step": 502 }, { "epoch": 0.10819531081953108, "grad_norm": 0.0, "learning_rate": 1.9681868795784108e-05, "loss": 0.9504, "step": 503 }, { "epoch": 0.10841041084104108, "grad_norm": 0.0, "learning_rate": 1.968012301020765e-05, "loss": 0.9708, "step": 504 }, { "epoch": 0.10862551086255108, "grad_norm": 0.0, "learning_rate": 1.9678372525472252e-05, "loss": 0.9475, "step": 505 }, { "epoch": 0.10884061088406109, "grad_norm": 0.0, "learning_rate": 1.9676617342427696e-05, "loss": 0.9626, "step": 506 }, { "epoch": 0.10905571090557109, "grad_norm": 0.0, "learning_rate": 1.9674857461926015e-05, "loss": 1.0265, "step": 507 }, { "epoch": 0.10927081092708109, "grad_norm": 0.0, "learning_rate": 1.9673092884821538e-05, "loss": 0.9892, "step": 508 }, { "epoch": 0.1094859109485911, "grad_norm": 0.0, "learning_rate": 1.9671323611970864e-05, "loss": 0.9873, "step": 509 }, { "epoch": 0.1097010109701011, "grad_norm": 0.0, "learning_rate": 1.9669549644232876e-05, "loss": 0.9576, "step": 510 }, { "epoch": 0.1099161109916111, "grad_norm": 0.0, "learning_rate": 1.966777098246874e-05, "loss": 0.9919, "step": 511 }, { "epoch": 0.11013121101312111, "grad_norm": 0.0, "learning_rate": 1.9665987627541895e-05, "loss": 0.9491, "step": 512 }, { "epoch": 0.1103463110346311, "grad_norm": 0.0, "learning_rate": 1.9664199580318062e-05, "loss": 1.0039, "step": 513 }, { "epoch": 0.1105614110561411, "grad_norm": 0.0, "learning_rate": 1.9662406841665235e-05, "loss": 1.0083, "step": 514 }, { "epoch": 0.1107765110776511, "grad_norm": 0.0, "learning_rate": 1.9660609412453692e-05, "loss": 0.9387, "step": 515 }, { "epoch": 0.11099161109916111, "grad_norm": 0.0, "learning_rate": 1.9658807293555983e-05, "loss": 0.9786, "step": 516 }, { "epoch": 0.11120671112067111, "grad_norm": 0.0, "learning_rate": 1.9657000485846933e-05, "loss": 1.0162, "step": 517 }, { "epoch": 0.11142181114218111, "grad_norm": 0.0, "learning_rate": 1.9655188990203648e-05, "loss": 0.9827, "step": 518 }, { "epoch": 0.11163691116369112, "grad_norm": 0.0, "learning_rate": 1.965337280750551e-05, "loss": 0.9913, "step": 519 }, { "epoch": 0.11185201118520112, "grad_norm": 0.0, "learning_rate": 1.9651551938634173e-05, "loss": 0.9308, "step": 520 }, { "epoch": 0.11206711120671112, "grad_norm": 0.0, "learning_rate": 1.9649726384473568e-05, "loss": 0.94, "step": 521 }, { "epoch": 0.11228221122822113, "grad_norm": 0.0, "learning_rate": 1.9647896145909893e-05, "loss": 0.9687, "step": 522 }, { "epoch": 0.11249731124973113, "grad_norm": 0.0, "learning_rate": 1.964606122383164e-05, "loss": 0.9768, "step": 523 }, { "epoch": 0.11271241127124113, "grad_norm": 0.0, "learning_rate": 1.964422161912955e-05, "loss": 1.0694, "step": 524 }, { "epoch": 0.11292751129275112, "grad_norm": 0.0, "learning_rate": 1.964237733269665e-05, "loss": 0.9917, "step": 525 }, { "epoch": 0.11314261131426114, "grad_norm": 0.0, "learning_rate": 1.964052836542824e-05, "loss": 0.978, "step": 526 }, { "epoch": 0.11335771133577113, "grad_norm": 0.0, "learning_rate": 1.963867471822189e-05, "loss": 0.9658, "step": 527 }, { "epoch": 0.11357281135728113, "grad_norm": 0.0, "learning_rate": 1.9636816391977444e-05, "loss": 0.985, "step": 528 }, { "epoch": 0.11378791137879114, "grad_norm": 0.0, "learning_rate": 1.9634953387597014e-05, "loss": 0.9812, "step": 529 }, { "epoch": 0.11400301140030114, "grad_norm": 0.0, "learning_rate": 1.963308570598498e-05, "loss": 0.9539, "step": 530 }, { "epoch": 0.11421811142181114, "grad_norm": 0.0, "learning_rate": 1.9631213348048005e-05, "loss": 1.0291, "step": 531 }, { "epoch": 0.11443321144332115, "grad_norm": 0.0, "learning_rate": 1.962933631469501e-05, "loss": 0.9588, "step": 532 }, { "epoch": 0.11464831146483115, "grad_norm": 0.0, "learning_rate": 1.9627454606837187e-05, "loss": 0.9728, "step": 533 }, { "epoch": 0.11486341148634115, "grad_norm": 0.0, "learning_rate": 1.9625568225388006e-05, "loss": 0.9007, "step": 534 }, { "epoch": 0.11507851150785114, "grad_norm": 0.0, "learning_rate": 1.9623677171263198e-05, "loss": 0.9612, "step": 535 }, { "epoch": 0.11529361152936116, "grad_norm": 0.0, "learning_rate": 1.962178144538076e-05, "loss": 0.9766, "step": 536 }, { "epoch": 0.11550871155087115, "grad_norm": 0.0, "learning_rate": 1.961988104866097e-05, "loss": 0.9679, "step": 537 }, { "epoch": 0.11572381157238115, "grad_norm": 0.0, "learning_rate": 1.9617975982026352e-05, "loss": 0.987, "step": 538 }, { "epoch": 0.11593891159389116, "grad_norm": 0.0, "learning_rate": 1.961606624640172e-05, "loss": 0.9963, "step": 539 }, { "epoch": 0.11615401161540116, "grad_norm": 0.0, "learning_rate": 1.961415184271414e-05, "loss": 0.9571, "step": 540 }, { "epoch": 0.11636911163691116, "grad_norm": 0.0, "learning_rate": 1.961223277189295e-05, "loss": 0.9364, "step": 541 }, { "epoch": 0.11658421165842117, "grad_norm": 0.0, "learning_rate": 1.9610309034869743e-05, "loss": 1.0093, "step": 542 }, { "epoch": 0.11679931167993117, "grad_norm": 0.0, "learning_rate": 1.96083806325784e-05, "loss": 0.9627, "step": 543 }, { "epoch": 0.11701441170144117, "grad_norm": 0.0, "learning_rate": 1.960644756595504e-05, "loss": 0.9874, "step": 544 }, { "epoch": 0.11722951172295117, "grad_norm": 0.0, "learning_rate": 1.9604509835938066e-05, "loss": 0.9323, "step": 545 }, { "epoch": 0.11744461174446118, "grad_norm": 0.0, "learning_rate": 1.9602567443468136e-05, "loss": 0.9896, "step": 546 }, { "epoch": 0.11765971176597118, "grad_norm": 0.0, "learning_rate": 1.9600620389488173e-05, "loss": 0.924, "step": 547 }, { "epoch": 0.11787481178748117, "grad_norm": 0.0, "learning_rate": 1.9598668674943362e-05, "loss": 0.9835, "step": 548 }, { "epoch": 0.11808991180899119, "grad_norm": 0.0, "learning_rate": 1.959671230078115e-05, "loss": 0.9345, "step": 549 }, { "epoch": 0.11830501183050118, "grad_norm": 0.0, "learning_rate": 1.9594751267951256e-05, "loss": 1.0156, "step": 550 }, { "epoch": 0.11852011185201118, "grad_norm": 0.0, "learning_rate": 1.959278557740564e-05, "loss": 0.9889, "step": 551 }, { "epoch": 0.1187352118735212, "grad_norm": 0.0, "learning_rate": 1.959081523009854e-05, "loss": 0.9342, "step": 552 }, { "epoch": 0.11895031189503119, "grad_norm": 0.0, "learning_rate": 1.9588840226986453e-05, "loss": 0.9374, "step": 553 }, { "epoch": 0.11916541191654119, "grad_norm": 0.0, "learning_rate": 1.9586860569028126e-05, "loss": 0.9053, "step": 554 }, { "epoch": 0.11938051193805119, "grad_norm": 0.0, "learning_rate": 1.958487625718458e-05, "loss": 1.0175, "step": 555 }, { "epoch": 0.1195956119595612, "grad_norm": 0.0, "learning_rate": 1.958288729241908e-05, "loss": 0.9451, "step": 556 }, { "epoch": 0.1198107119810712, "grad_norm": 0.0, "learning_rate": 1.958089367569716e-05, "loss": 0.9003, "step": 557 }, { "epoch": 0.1200258120025812, "grad_norm": 0.0, "learning_rate": 1.9578895407986612e-05, "loss": 0.9626, "step": 558 }, { "epoch": 0.1202409120240912, "grad_norm": 0.0, "learning_rate": 1.9576892490257483e-05, "loss": 0.9562, "step": 559 }, { "epoch": 0.1204560120456012, "grad_norm": 0.0, "learning_rate": 1.9574884923482073e-05, "loss": 1.0095, "step": 560 }, { "epoch": 0.1206711120671112, "grad_norm": 0.0, "learning_rate": 1.957287270863495e-05, "loss": 0.96, "step": 561 }, { "epoch": 0.12088621208862121, "grad_norm": 0.0, "learning_rate": 1.9570855846692924e-05, "loss": 0.9843, "step": 562 }, { "epoch": 0.12110131211013121, "grad_norm": 0.0, "learning_rate": 1.956883433863508e-05, "loss": 0.9568, "step": 563 }, { "epoch": 0.12131641213164121, "grad_norm": 0.0, "learning_rate": 1.9566808185442735e-05, "loss": 0.9515, "step": 564 }, { "epoch": 0.12153151215315122, "grad_norm": 0.0, "learning_rate": 1.956477738809948e-05, "loss": 0.961, "step": 565 }, { "epoch": 0.12174661217466122, "grad_norm": 0.0, "learning_rate": 1.9562741947591154e-05, "loss": 0.897, "step": 566 }, { "epoch": 0.12196171219617122, "grad_norm": 0.0, "learning_rate": 1.9560701864905844e-05, "loss": 0.9255, "step": 567 }, { "epoch": 0.12217681221768122, "grad_norm": 0.0, "learning_rate": 1.9558657141033905e-05, "loss": 0.9643, "step": 568 }, { "epoch": 0.12239191223919123, "grad_norm": 0.0, "learning_rate": 1.955660777696793e-05, "loss": 0.9389, "step": 569 }, { "epoch": 0.12260701226070123, "grad_norm": 0.0, "learning_rate": 1.9554553773702773e-05, "loss": 0.9991, "step": 570 }, { "epoch": 0.12282211228221122, "grad_norm": 0.0, "learning_rate": 1.9552495132235537e-05, "loss": 0.9408, "step": 571 }, { "epoch": 0.12303721230372124, "grad_norm": 0.0, "learning_rate": 1.9550431853565577e-05, "loss": 1.0082, "step": 572 }, { "epoch": 0.12325231232523123, "grad_norm": 0.0, "learning_rate": 1.95483639386945e-05, "loss": 1.0204, "step": 573 }, { "epoch": 0.12346741234674123, "grad_norm": 0.0, "learning_rate": 1.954629138862616e-05, "loss": 1.0123, "step": 574 }, { "epoch": 0.12368251236825124, "grad_norm": 0.0, "learning_rate": 1.9544214204366675e-05, "loss": 0.9291, "step": 575 }, { "epoch": 0.12389761238976124, "grad_norm": 0.0, "learning_rate": 1.9542132386924387e-05, "loss": 0.9825, "step": 576 }, { "epoch": 0.12411271241127124, "grad_norm": 0.0, "learning_rate": 1.9540045937309914e-05, "loss": 1.0535, "step": 577 }, { "epoch": 0.12432781243278124, "grad_norm": 0.0, "learning_rate": 1.9537954856536104e-05, "loss": 0.9593, "step": 578 }, { "epoch": 0.12454291245429125, "grad_norm": 0.0, "learning_rate": 1.953585914561806e-05, "loss": 0.917, "step": 579 }, { "epoch": 0.12475801247580125, "grad_norm": 0.0, "learning_rate": 1.953375880557314e-05, "loss": 0.9554, "step": 580 }, { "epoch": 0.12497311249731125, "grad_norm": 0.0, "learning_rate": 1.9531653837420932e-05, "loss": 0.956, "step": 581 }, { "epoch": 0.12518821251882126, "grad_norm": 0.0, "learning_rate": 1.9529544242183288e-05, "loss": 0.956, "step": 582 }, { "epoch": 0.12540331254033124, "grad_norm": 0.0, "learning_rate": 1.9527430020884293e-05, "loss": 0.9564, "step": 583 }, { "epoch": 0.12561841256184125, "grad_norm": 0.0, "learning_rate": 1.9525311174550286e-05, "loss": 1.0534, "step": 584 }, { "epoch": 0.12583351258335126, "grad_norm": 0.0, "learning_rate": 1.952318770420985e-05, "loss": 0.9951, "step": 585 }, { "epoch": 0.12604861260486125, "grad_norm": 0.0, "learning_rate": 1.9521059610893805e-05, "loss": 0.9304, "step": 586 }, { "epoch": 0.12626371262637126, "grad_norm": 0.0, "learning_rate": 1.9518926895635232e-05, "loss": 0.9684, "step": 587 }, { "epoch": 0.12647881264788127, "grad_norm": 0.0, "learning_rate": 1.951678955946944e-05, "loss": 1.0583, "step": 588 }, { "epoch": 0.12669391266939126, "grad_norm": 0.0, "learning_rate": 1.951464760343398e-05, "loss": 0.9435, "step": 589 }, { "epoch": 0.12690901269090127, "grad_norm": 0.0, "learning_rate": 1.951250102856866e-05, "loss": 1.0754, "step": 590 }, { "epoch": 0.12712411271241128, "grad_norm": 0.0, "learning_rate": 1.9510349835915517e-05, "loss": 0.9779, "step": 591 }, { "epoch": 0.12733921273392126, "grad_norm": 0.0, "learning_rate": 1.9508194026518837e-05, "loss": 0.9579, "step": 592 }, { "epoch": 0.12755431275543128, "grad_norm": 0.0, "learning_rate": 1.9506033601425146e-05, "loss": 1.0214, "step": 593 }, { "epoch": 0.1277694127769413, "grad_norm": 0.0, "learning_rate": 1.950386856168321e-05, "loss": 0.9662, "step": 594 }, { "epoch": 0.12798451279845127, "grad_norm": 0.0, "learning_rate": 1.9501698908344036e-05, "loss": 0.9056, "step": 595 }, { "epoch": 0.12819961281996128, "grad_norm": 0.0, "learning_rate": 1.9499524642460868e-05, "loss": 0.9119, "step": 596 }, { "epoch": 0.1284147128414713, "grad_norm": 0.0, "learning_rate": 1.9497345765089188e-05, "loss": 0.9728, "step": 597 }, { "epoch": 0.12862981286298128, "grad_norm": 0.0, "learning_rate": 1.949516227728672e-05, "loss": 1.044, "step": 598 }, { "epoch": 0.1288449128844913, "grad_norm": 0.0, "learning_rate": 1.9492974180113425e-05, "loss": 0.9659, "step": 599 }, { "epoch": 0.1290600129060013, "grad_norm": 0.0, "learning_rate": 1.9490781474631508e-05, "loss": 1.0224, "step": 600 }, { "epoch": 0.1292751129275113, "grad_norm": 0.0, "learning_rate": 1.94885841619054e-05, "loss": 0.9803, "step": 601 }, { "epoch": 0.1294902129490213, "grad_norm": 0.0, "learning_rate": 1.9486382243001773e-05, "loss": 1.012, "step": 602 }, { "epoch": 0.12970531297053128, "grad_norm": 0.0, "learning_rate": 1.948417571898954e-05, "loss": 0.9761, "step": 603 }, { "epoch": 0.1299204129920413, "grad_norm": 0.0, "learning_rate": 1.9481964590939836e-05, "loss": 0.9613, "step": 604 }, { "epoch": 0.1301355130135513, "grad_norm": 0.0, "learning_rate": 1.947974885992605e-05, "loss": 0.9733, "step": 605 }, { "epoch": 0.1303506130350613, "grad_norm": 0.0, "learning_rate": 1.947752852702379e-05, "loss": 0.9728, "step": 606 }, { "epoch": 0.1305657130565713, "grad_norm": 0.0, "learning_rate": 1.9475303593310904e-05, "loss": 0.9834, "step": 607 }, { "epoch": 0.13078081307808131, "grad_norm": 0.0, "learning_rate": 1.9473074059867476e-05, "loss": 0.9025, "step": 608 }, { "epoch": 0.1309959130995913, "grad_norm": 0.0, "learning_rate": 1.9470839927775817e-05, "loss": 0.9469, "step": 609 }, { "epoch": 0.1312110131211013, "grad_norm": 0.0, "learning_rate": 1.9468601198120476e-05, "loss": 0.9362, "step": 610 }, { "epoch": 0.13142611314261132, "grad_norm": 0.0, "learning_rate": 1.946635787198823e-05, "loss": 1.0223, "step": 611 }, { "epoch": 0.1316412131641213, "grad_norm": 0.0, "learning_rate": 1.9464109950468088e-05, "loss": 1.0169, "step": 612 }, { "epoch": 0.13185631318563132, "grad_norm": 0.0, "learning_rate": 1.946185743465129e-05, "loss": 1.0391, "step": 613 }, { "epoch": 0.13207141320714133, "grad_norm": 0.0, "learning_rate": 1.9459600325631306e-05, "loss": 1.0028, "step": 614 }, { "epoch": 0.13228651322865131, "grad_norm": 0.0, "learning_rate": 1.9457338624503838e-05, "loss": 1.0014, "step": 615 }, { "epoch": 0.13250161325016133, "grad_norm": 0.0, "learning_rate": 1.9455072332366817e-05, "loss": 0.9304, "step": 616 }, { "epoch": 0.13271671327167134, "grad_norm": 0.0, "learning_rate": 1.9452801450320398e-05, "loss": 1.0144, "step": 617 }, { "epoch": 0.13293181329318132, "grad_norm": 0.0, "learning_rate": 1.9450525979466967e-05, "loss": 0.9322, "step": 618 }, { "epoch": 0.13314691331469133, "grad_norm": 0.0, "learning_rate": 1.944824592091114e-05, "loss": 0.96, "step": 619 }, { "epoch": 0.13336201333620135, "grad_norm": 0.0, "learning_rate": 1.9445961275759758e-05, "loss": 0.9633, "step": 620 }, { "epoch": 0.13357711335771133, "grad_norm": 0.0, "learning_rate": 1.9443672045121887e-05, "loss": 0.8974, "step": 621 }, { "epoch": 0.13379221337922134, "grad_norm": 0.0, "learning_rate": 1.9441378230108826e-05, "loss": 0.9706, "step": 622 }, { "epoch": 0.13400731340073135, "grad_norm": 0.0, "learning_rate": 1.9439079831834087e-05, "loss": 0.9223, "step": 623 }, { "epoch": 0.13422241342224134, "grad_norm": 0.0, "learning_rate": 1.9436776851413416e-05, "loss": 0.9665, "step": 624 }, { "epoch": 0.13443751344375135, "grad_norm": 0.0, "learning_rate": 1.9434469289964782e-05, "loss": 0.9267, "step": 625 }, { "epoch": 0.13465261346526133, "grad_norm": 0.0, "learning_rate": 1.943215714860838e-05, "loss": 0.9456, "step": 626 }, { "epoch": 0.13486771348677135, "grad_norm": 0.0, "learning_rate": 1.942984042846662e-05, "loss": 0.9737, "step": 627 }, { "epoch": 0.13508281350828136, "grad_norm": 0.0, "learning_rate": 1.9427519130664145e-05, "loss": 0.9565, "step": 628 }, { "epoch": 0.13529791352979134, "grad_norm": 0.0, "learning_rate": 1.9425193256327812e-05, "loss": 0.9381, "step": 629 }, { "epoch": 0.13551301355130135, "grad_norm": 0.0, "learning_rate": 1.9422862806586703e-05, "loss": 1.0323, "step": 630 }, { "epoch": 0.13572811357281137, "grad_norm": 0.0, "learning_rate": 1.9420527782572126e-05, "loss": 0.995, "step": 631 }, { "epoch": 0.13594321359432135, "grad_norm": 0.0, "learning_rate": 1.94181881854176e-05, "loss": 0.9846, "step": 632 }, { "epoch": 0.13615831361583136, "grad_norm": 0.0, "learning_rate": 1.9415844016258867e-05, "loss": 0.8896, "step": 633 }, { "epoch": 0.13637341363734137, "grad_norm": 0.0, "learning_rate": 1.9413495276233896e-05, "loss": 1.008, "step": 634 }, { "epoch": 0.13658851365885136, "grad_norm": 0.0, "learning_rate": 1.9411141966482865e-05, "loss": 0.919, "step": 635 }, { "epoch": 0.13680361368036137, "grad_norm": 0.0, "learning_rate": 1.9408784088148177e-05, "loss": 0.9941, "step": 636 }, { "epoch": 0.13701871370187138, "grad_norm": 0.0, "learning_rate": 1.9406421642374448e-05, "loss": 0.9555, "step": 637 }, { "epoch": 0.13723381372338136, "grad_norm": 0.0, "learning_rate": 1.9404054630308513e-05, "loss": 1.0118, "step": 638 }, { "epoch": 0.13744891374489138, "grad_norm": 0.0, "learning_rate": 1.9401683053099426e-05, "loss": 0.9635, "step": 639 }, { "epoch": 0.1376640137664014, "grad_norm": 0.0, "learning_rate": 1.9399306911898458e-05, "loss": 0.9059, "step": 640 }, { "epoch": 0.13787911378791137, "grad_norm": 0.0, "learning_rate": 1.9396926207859085e-05, "loss": 0.9824, "step": 641 }, { "epoch": 0.13809421380942138, "grad_norm": 0.0, "learning_rate": 1.9394540942137012e-05, "loss": 0.8592, "step": 642 }, { "epoch": 0.1383093138309314, "grad_norm": 0.0, "learning_rate": 1.939215111589015e-05, "loss": 0.9135, "step": 643 }, { "epoch": 0.13852441385244138, "grad_norm": 0.0, "learning_rate": 1.9389756730278625e-05, "loss": 1.0415, "step": 644 }, { "epoch": 0.1387395138739514, "grad_norm": 0.0, "learning_rate": 1.9387357786464785e-05, "loss": 0.9565, "step": 645 }, { "epoch": 0.13895461389546138, "grad_norm": 0.0, "learning_rate": 1.9384954285613173e-05, "loss": 0.9817, "step": 646 }, { "epoch": 0.1391697139169714, "grad_norm": 0.0, "learning_rate": 1.9382546228890558e-05, "loss": 0.991, "step": 647 }, { "epoch": 0.1393848139384814, "grad_norm": 0.0, "learning_rate": 1.938013361746592e-05, "loss": 0.9469, "step": 648 }, { "epoch": 0.13959991395999138, "grad_norm": 0.0, "learning_rate": 1.937771645251044e-05, "loss": 1.0116, "step": 649 }, { "epoch": 0.1398150139815014, "grad_norm": 0.0, "learning_rate": 1.9375294735197525e-05, "loss": 1.0716, "step": 650 }, { "epoch": 0.1400301140030114, "grad_norm": 0.0, "learning_rate": 1.9372868466702782e-05, "loss": 1.0193, "step": 651 }, { "epoch": 0.1402452140245214, "grad_norm": 0.0, "learning_rate": 1.9370437648204023e-05, "loss": 0.9365, "step": 652 }, { "epoch": 0.1404603140460314, "grad_norm": 0.0, "learning_rate": 1.936800228088128e-05, "loss": 0.9969, "step": 653 }, { "epoch": 0.14067541406754142, "grad_norm": 0.0, "learning_rate": 1.9365562365916782e-05, "loss": 0.9321, "step": 654 }, { "epoch": 0.1408905140890514, "grad_norm": 0.0, "learning_rate": 1.936311790449498e-05, "loss": 0.9712, "step": 655 }, { "epoch": 0.1411056141105614, "grad_norm": 0.0, "learning_rate": 1.9360668897802518e-05, "loss": 0.9019, "step": 656 }, { "epoch": 0.14132071413207142, "grad_norm": 0.0, "learning_rate": 1.935821534702825e-05, "loss": 0.9961, "step": 657 }, { "epoch": 0.1415358141535814, "grad_norm": 0.0, "learning_rate": 1.9355757253363243e-05, "loss": 1.0064, "step": 658 }, { "epoch": 0.14175091417509142, "grad_norm": 0.0, "learning_rate": 1.935329461800076e-05, "loss": 0.9784, "step": 659 }, { "epoch": 0.14196601419660143, "grad_norm": 0.0, "learning_rate": 1.935082744213628e-05, "loss": 0.9261, "step": 660 }, { "epoch": 0.14218111421811142, "grad_norm": 0.0, "learning_rate": 1.9348355726967466e-05, "loss": 0.9926, "step": 661 }, { "epoch": 0.14239621423962143, "grad_norm": 0.0, "learning_rate": 1.9345879473694207e-05, "loss": 0.9174, "step": 662 }, { "epoch": 0.14261131426113144, "grad_norm": 0.0, "learning_rate": 1.9343398683518586e-05, "loss": 0.9817, "step": 663 }, { "epoch": 0.14282641428264142, "grad_norm": 0.0, "learning_rate": 1.9340913357644885e-05, "loss": 0.9449, "step": 664 }, { "epoch": 0.14304151430415143, "grad_norm": 0.0, "learning_rate": 1.933842349727959e-05, "loss": 0.9495, "step": 665 }, { "epoch": 0.14325661432566142, "grad_norm": 0.0, "learning_rate": 1.9335929103631396e-05, "loss": 0.9728, "step": 666 }, { "epoch": 0.14347171434717143, "grad_norm": 0.0, "learning_rate": 1.9333430177911182e-05, "loss": 0.9882, "step": 667 }, { "epoch": 0.14368681436868144, "grad_norm": 0.0, "learning_rate": 1.933092672133204e-05, "loss": 0.9224, "step": 668 }, { "epoch": 0.14390191439019143, "grad_norm": 0.0, "learning_rate": 1.9328418735109268e-05, "loss": 0.9605, "step": 669 }, { "epoch": 0.14411701441170144, "grad_norm": 0.0, "learning_rate": 1.932590622046034e-05, "loss": 0.987, "step": 670 }, { "epoch": 0.14433211443321145, "grad_norm": 0.0, "learning_rate": 1.9323389178604942e-05, "loss": 1.011, "step": 671 }, { "epoch": 0.14454721445472143, "grad_norm": 0.0, "learning_rate": 1.9320867610764965e-05, "loss": 0.9035, "step": 672 }, { "epoch": 0.14476231447623145, "grad_norm": 0.0, "learning_rate": 1.9318341518164487e-05, "loss": 0.9882, "step": 673 }, { "epoch": 0.14497741449774146, "grad_norm": 0.0, "learning_rate": 1.9315810902029786e-05, "loss": 1.0032, "step": 674 }, { "epoch": 0.14519251451925144, "grad_norm": 0.0, "learning_rate": 1.931327576358933e-05, "loss": 0.9605, "step": 675 }, { "epoch": 0.14540761454076145, "grad_norm": 0.0, "learning_rate": 1.931073610407379e-05, "loss": 0.9775, "step": 676 }, { "epoch": 0.14562271456227147, "grad_norm": 0.0, "learning_rate": 1.9308191924716027e-05, "loss": 1.0099, "step": 677 }, { "epoch": 0.14583781458378145, "grad_norm": 0.0, "learning_rate": 1.9305643226751104e-05, "loss": 0.9651, "step": 678 }, { "epoch": 0.14605291460529146, "grad_norm": 0.0, "learning_rate": 1.930309001141626e-05, "loss": 0.9723, "step": 679 }, { "epoch": 0.14626801462680147, "grad_norm": 0.0, "learning_rate": 1.930053227995095e-05, "loss": 0.929, "step": 680 }, { "epoch": 0.14648311464831146, "grad_norm": 0.0, "learning_rate": 1.9297970033596805e-05, "loss": 0.9722, "step": 681 }, { "epoch": 0.14669821466982147, "grad_norm": 0.0, "learning_rate": 1.929540327359765e-05, "loss": 0.9512, "step": 682 }, { "epoch": 0.14691331469133148, "grad_norm": 0.0, "learning_rate": 1.9292832001199507e-05, "loss": 0.9743, "step": 683 }, { "epoch": 0.14712841471284147, "grad_norm": 0.0, "learning_rate": 1.9290256217650588e-05, "loss": 0.9437, "step": 684 }, { "epoch": 0.14734351473435148, "grad_norm": 0.0, "learning_rate": 1.9287675924201284e-05, "loss": 0.9697, "step": 685 }, { "epoch": 0.1475586147558615, "grad_norm": 0.0, "learning_rate": 1.928509112210419e-05, "loss": 0.9757, "step": 686 }, { "epoch": 0.14777371477737147, "grad_norm": 0.0, "learning_rate": 1.928250181261408e-05, "loss": 1.0089, "step": 687 }, { "epoch": 0.14798881479888149, "grad_norm": 0.0, "learning_rate": 1.9279907996987917e-05, "loss": 0.9284, "step": 688 }, { "epoch": 0.14820391482039147, "grad_norm": 0.0, "learning_rate": 1.927730967648486e-05, "loss": 0.9764, "step": 689 }, { "epoch": 0.14841901484190148, "grad_norm": 0.0, "learning_rate": 1.927470685236624e-05, "loss": 0.947, "step": 690 }, { "epoch": 0.1486341148634115, "grad_norm": 0.0, "learning_rate": 1.927209952589559e-05, "loss": 0.9526, "step": 691 }, { "epoch": 0.14884921488492148, "grad_norm": 0.0, "learning_rate": 1.926948769833862e-05, "loss": 0.9571, "step": 692 }, { "epoch": 0.1490643149064315, "grad_norm": 0.0, "learning_rate": 1.9266871370963223e-05, "loss": 0.9534, "step": 693 }, { "epoch": 0.1492794149279415, "grad_norm": 0.0, "learning_rate": 1.9264250545039485e-05, "loss": 0.9444, "step": 694 }, { "epoch": 0.14949451494945148, "grad_norm": 0.0, "learning_rate": 1.9261625221839664e-05, "loss": 0.9247, "step": 695 }, { "epoch": 0.1497096149709615, "grad_norm": 0.0, "learning_rate": 1.9258995402638212e-05, "loss": 1.0066, "step": 696 }, { "epoch": 0.1499247149924715, "grad_norm": 0.0, "learning_rate": 1.925636108871176e-05, "loss": 0.9466, "step": 697 }, { "epoch": 0.1501398150139815, "grad_norm": 0.0, "learning_rate": 1.925372228133912e-05, "loss": 0.9663, "step": 698 }, { "epoch": 0.1503549150354915, "grad_norm": 0.0, "learning_rate": 1.925107898180128e-05, "loss": 0.9395, "step": 699 }, { "epoch": 0.15057001505700152, "grad_norm": 0.0, "learning_rate": 1.9248431191381418e-05, "loss": 0.9515, "step": 700 }, { "epoch": 0.1507851150785115, "grad_norm": 0.0, "learning_rate": 1.9245778911364894e-05, "loss": 0.9927, "step": 701 }, { "epoch": 0.1510002151000215, "grad_norm": 0.0, "learning_rate": 1.9243122143039232e-05, "loss": 1.0028, "step": 702 }, { "epoch": 0.15121531512153152, "grad_norm": 0.0, "learning_rate": 1.924046088769415e-05, "loss": 1.0198, "step": 703 }, { "epoch": 0.1514304151430415, "grad_norm": 0.0, "learning_rate": 1.923779514662154e-05, "loss": 0.9357, "step": 704 }, { "epoch": 0.15164551516455152, "grad_norm": 0.0, "learning_rate": 1.923512492111547e-05, "loss": 0.9594, "step": 705 }, { "epoch": 0.15186061518606153, "grad_norm": 0.0, "learning_rate": 1.923245021247218e-05, "loss": 0.8893, "step": 706 }, { "epoch": 0.15207571520757152, "grad_norm": 0.0, "learning_rate": 1.9229771021990094e-05, "loss": 0.9322, "step": 707 }, { "epoch": 0.15229081522908153, "grad_norm": 0.0, "learning_rate": 1.9227087350969815e-05, "loss": 0.9436, "step": 708 }, { "epoch": 0.1525059152505915, "grad_norm": 0.0, "learning_rate": 1.922439920071411e-05, "loss": 0.9351, "step": 709 }, { "epoch": 0.15272101527210152, "grad_norm": 0.0, "learning_rate": 1.9221706572527925e-05, "loss": 1.0172, "step": 710 }, { "epoch": 0.15293611529361154, "grad_norm": 0.0, "learning_rate": 1.9219009467718385e-05, "loss": 0.984, "step": 711 }, { "epoch": 0.15315121531512152, "grad_norm": 0.0, "learning_rate": 1.9216307887594777e-05, "loss": 0.9751, "step": 712 }, { "epoch": 0.15336631533663153, "grad_norm": 0.0, "learning_rate": 1.9213601833468573e-05, "loss": 0.9742, "step": 713 }, { "epoch": 0.15358141535814154, "grad_norm": 0.0, "learning_rate": 1.9210891306653413e-05, "loss": 0.9819, "step": 714 }, { "epoch": 0.15379651537965153, "grad_norm": 0.0, "learning_rate": 1.9208176308465098e-05, "loss": 0.862, "step": 715 }, { "epoch": 0.15401161540116154, "grad_norm": 0.0, "learning_rate": 1.9205456840221614e-05, "loss": 0.9897, "step": 716 }, { "epoch": 0.15422671542267155, "grad_norm": 0.0, "learning_rate": 1.920273290324311e-05, "loss": 0.9263, "step": 717 }, { "epoch": 0.15444181544418153, "grad_norm": 0.0, "learning_rate": 1.9200004498851903e-05, "loss": 0.9596, "step": 718 }, { "epoch": 0.15465691546569155, "grad_norm": 0.0, "learning_rate": 1.9197271628372484e-05, "loss": 1.0044, "step": 719 }, { "epoch": 0.15487201548720156, "grad_norm": 0.0, "learning_rate": 1.9194534293131507e-05, "loss": 0.9987, "step": 720 }, { "epoch": 0.15508711550871154, "grad_norm": 0.0, "learning_rate": 1.9191792494457793e-05, "loss": 0.9305, "step": 721 }, { "epoch": 0.15530221553022155, "grad_norm": 0.0, "learning_rate": 1.918904623368234e-05, "loss": 0.9187, "step": 722 }, { "epoch": 0.15551731555173157, "grad_norm": 0.0, "learning_rate": 1.91862955121383e-05, "loss": 1.0113, "step": 723 }, { "epoch": 0.15573241557324155, "grad_norm": 0.0, "learning_rate": 1.918354033116099e-05, "loss": 1.007, "step": 724 }, { "epoch": 0.15594751559475156, "grad_norm": 0.0, "learning_rate": 1.9180780692087903e-05, "loss": 0.9315, "step": 725 }, { "epoch": 0.15616261561626157, "grad_norm": 0.0, "learning_rate": 1.9178016596258688e-05, "loss": 0.9444, "step": 726 }, { "epoch": 0.15637771563777156, "grad_norm": 0.0, "learning_rate": 1.9175248045015153e-05, "loss": 0.9399, "step": 727 }, { "epoch": 0.15659281565928157, "grad_norm": 0.0, "learning_rate": 1.9172475039701283e-05, "loss": 0.9602, "step": 728 }, { "epoch": 0.15680791568079158, "grad_norm": 0.0, "learning_rate": 1.9169697581663212e-05, "loss": 0.9955, "step": 729 }, { "epoch": 0.15702301570230157, "grad_norm": 0.0, "learning_rate": 1.9166915672249246e-05, "loss": 0.9357, "step": 730 }, { "epoch": 0.15723811572381158, "grad_norm": 0.0, "learning_rate": 1.916412931280984e-05, "loss": 0.9672, "step": 731 }, { "epoch": 0.15745321574532156, "grad_norm": 0.0, "learning_rate": 1.9161338504697622e-05, "loss": 0.8958, "step": 732 }, { "epoch": 0.15766831576683157, "grad_norm": 0.0, "learning_rate": 1.915854324926737e-05, "loss": 0.9375, "step": 733 }, { "epoch": 0.15788341578834159, "grad_norm": 0.0, "learning_rate": 1.9155743547876026e-05, "loss": 0.9313, "step": 734 }, { "epoch": 0.15809851580985157, "grad_norm": 0.0, "learning_rate": 1.9152939401882684e-05, "loss": 1.0264, "step": 735 }, { "epoch": 0.15831361583136158, "grad_norm": 0.0, "learning_rate": 1.9150130812648607e-05, "loss": 0.9898, "step": 736 }, { "epoch": 0.1585287158528716, "grad_norm": 0.0, "learning_rate": 1.9147317781537204e-05, "loss": 1.0068, "step": 737 }, { "epoch": 0.15874381587438158, "grad_norm": 0.0, "learning_rate": 1.9144500309914047e-05, "loss": 0.9449, "step": 738 }, { "epoch": 0.1589589158958916, "grad_norm": 0.0, "learning_rate": 1.9141678399146858e-05, "loss": 0.9143, "step": 739 }, { "epoch": 0.1591740159174016, "grad_norm": 0.0, "learning_rate": 1.913885205060552e-05, "loss": 1.0497, "step": 740 }, { "epoch": 0.15938911593891159, "grad_norm": 0.0, "learning_rate": 1.9136021265662067e-05, "loss": 0.9597, "step": 741 }, { "epoch": 0.1596042159604216, "grad_norm": 0.0, "learning_rate": 1.9133186045690682e-05, "loss": 0.9295, "step": 742 }, { "epoch": 0.1598193159819316, "grad_norm": 0.0, "learning_rate": 1.913034639206771e-05, "loss": 0.9438, "step": 743 }, { "epoch": 0.1600344160034416, "grad_norm": 0.0, "learning_rate": 1.9127502306171644e-05, "loss": 1.0477, "step": 744 }, { "epoch": 0.1602495160249516, "grad_norm": 0.0, "learning_rate": 1.9124653789383134e-05, "loss": 0.9176, "step": 745 }, { "epoch": 0.16046461604646162, "grad_norm": 0.0, "learning_rate": 1.9121800843084964e-05, "loss": 0.9352, "step": 746 }, { "epoch": 0.1606797160679716, "grad_norm": 0.0, "learning_rate": 1.911894346866209e-05, "loss": 0.9934, "step": 747 }, { "epoch": 0.1608948160894816, "grad_norm": 0.0, "learning_rate": 1.9116081667501603e-05, "loss": 0.9692, "step": 748 }, { "epoch": 0.16110991611099162, "grad_norm": 0.0, "learning_rate": 1.9113215440992752e-05, "loss": 0.9515, "step": 749 }, { "epoch": 0.1613250161325016, "grad_norm": 0.0, "learning_rate": 1.911034479052692e-05, "loss": 1.0278, "step": 750 }, { "epoch": 0.16154011615401162, "grad_norm": 0.0, "learning_rate": 1.910746971749766e-05, "loss": 0.9481, "step": 751 }, { "epoch": 0.1617552161755216, "grad_norm": 0.0, "learning_rate": 1.9104590223300656e-05, "loss": 1.0185, "step": 752 }, { "epoch": 0.16197031619703162, "grad_norm": 0.0, "learning_rate": 1.9101706309333732e-05, "loss": 0.9761, "step": 753 }, { "epoch": 0.16218541621854163, "grad_norm": 0.0, "learning_rate": 1.909881797699688e-05, "loss": 0.9853, "step": 754 }, { "epoch": 0.1624005162400516, "grad_norm": 0.0, "learning_rate": 1.909592522769221e-05, "loss": 0.9937, "step": 755 }, { "epoch": 0.16261561626156162, "grad_norm": 0.0, "learning_rate": 1.9093028062824005e-05, "loss": 0.9799, "step": 756 }, { "epoch": 0.16283071628307164, "grad_norm": 0.0, "learning_rate": 1.9090126483798667e-05, "loss": 0.994, "step": 757 }, { "epoch": 0.16304581630458162, "grad_norm": 0.0, "learning_rate": 1.9087220492024756e-05, "loss": 0.9675, "step": 758 }, { "epoch": 0.16326091632609163, "grad_norm": 0.0, "learning_rate": 1.9084310088912964e-05, "loss": 0.985, "step": 759 }, { "epoch": 0.16347601634760164, "grad_norm": 0.0, "learning_rate": 1.908139527587613e-05, "loss": 0.9446, "step": 760 }, { "epoch": 0.16369111636911163, "grad_norm": 0.0, "learning_rate": 1.9078476054329237e-05, "loss": 0.9707, "step": 761 }, { "epoch": 0.16390621639062164, "grad_norm": 0.0, "learning_rate": 1.90755524256894e-05, "loss": 1.0313, "step": 762 }, { "epoch": 0.16412131641213165, "grad_norm": 0.0, "learning_rate": 1.907262439137588e-05, "loss": 0.984, "step": 763 }, { "epoch": 0.16433641643364164, "grad_norm": 0.0, "learning_rate": 1.9069691952810073e-05, "loss": 1.0376, "step": 764 }, { "epoch": 0.16455151645515165, "grad_norm": 0.0, "learning_rate": 1.906675511141551e-05, "loss": 0.9401, "step": 765 }, { "epoch": 0.16476661647666166, "grad_norm": 0.0, "learning_rate": 1.906381386861787e-05, "loss": 0.9058, "step": 766 }, { "epoch": 0.16498171649817164, "grad_norm": 0.0, "learning_rate": 1.9060868225844955e-05, "loss": 0.9999, "step": 767 }, { "epoch": 0.16519681651968166, "grad_norm": 0.0, "learning_rate": 1.905791818452672e-05, "loss": 0.9403, "step": 768 }, { "epoch": 0.16541191654119167, "grad_norm": 0.0, "learning_rate": 1.9054963746095235e-05, "loss": 0.9356, "step": 769 }, { "epoch": 0.16562701656270165, "grad_norm": 0.0, "learning_rate": 1.9052004911984722e-05, "loss": 0.9319, "step": 770 }, { "epoch": 0.16584211658421166, "grad_norm": 0.0, "learning_rate": 1.9049041683631526e-05, "loss": 0.9677, "step": 771 }, { "epoch": 0.16605721660572165, "grad_norm": 0.0, "learning_rate": 1.9046074062474127e-05, "loss": 0.8562, "step": 772 }, { "epoch": 0.16627231662723166, "grad_norm": 0.0, "learning_rate": 1.9043102049953148e-05, "loss": 0.9725, "step": 773 }, { "epoch": 0.16648741664874167, "grad_norm": 0.0, "learning_rate": 1.9040125647511324e-05, "loss": 1.0131, "step": 774 }, { "epoch": 0.16670251667025165, "grad_norm": 0.0, "learning_rate": 1.903714485659354e-05, "loss": 0.9786, "step": 775 }, { "epoch": 0.16691761669176167, "grad_norm": 0.0, "learning_rate": 1.9034159678646798e-05, "loss": 0.9412, "step": 776 }, { "epoch": 0.16713271671327168, "grad_norm": 0.0, "learning_rate": 1.9031170115120237e-05, "loss": 1.0132, "step": 777 }, { "epoch": 0.16734781673478166, "grad_norm": 0.0, "learning_rate": 1.9028176167465125e-05, "loss": 0.9767, "step": 778 }, { "epoch": 0.16756291675629167, "grad_norm": 0.0, "learning_rate": 1.9025177837134858e-05, "loss": 0.9359, "step": 779 }, { "epoch": 0.1677780167778017, "grad_norm": 0.0, "learning_rate": 1.902217512558495e-05, "loss": 0.9634, "step": 780 }, { "epoch": 0.16799311679931167, "grad_norm": 0.0, "learning_rate": 1.9019168034273062e-05, "loss": 0.9774, "step": 781 }, { "epoch": 0.16820821682082168, "grad_norm": 0.0, "learning_rate": 1.9016156564658955e-05, "loss": 0.9266, "step": 782 }, { "epoch": 0.1684233168423317, "grad_norm": 0.0, "learning_rate": 1.901314071820454e-05, "loss": 0.904, "step": 783 }, { "epoch": 0.16863841686384168, "grad_norm": 0.0, "learning_rate": 1.9010120496373834e-05, "loss": 0.9772, "step": 784 }, { "epoch": 0.1688535168853517, "grad_norm": 0.0, "learning_rate": 1.900709590063299e-05, "loss": 0.9358, "step": 785 }, { "epoch": 0.1690686169068617, "grad_norm": 0.0, "learning_rate": 1.900406693245028e-05, "loss": 0.9484, "step": 786 }, { "epoch": 0.16928371692837169, "grad_norm": 0.0, "learning_rate": 1.9001033593296103e-05, "loss": 0.9448, "step": 787 }, { "epoch": 0.1694988169498817, "grad_norm": 0.0, "learning_rate": 1.8997995884642968e-05, "loss": 0.9405, "step": 788 }, { "epoch": 0.1697139169713917, "grad_norm": 0.0, "learning_rate": 1.8994953807965516e-05, "loss": 0.9512, "step": 789 }, { "epoch": 0.1699290169929017, "grad_norm": 0.0, "learning_rate": 1.8991907364740508e-05, "loss": 0.907, "step": 790 }, { "epoch": 0.1701441170144117, "grad_norm": 0.0, "learning_rate": 1.898885655644682e-05, "loss": 0.9527, "step": 791 }, { "epoch": 0.17035921703592172, "grad_norm": 0.0, "learning_rate": 1.8985801384565447e-05, "loss": 0.8886, "step": 792 }, { "epoch": 0.1705743170574317, "grad_norm": 0.0, "learning_rate": 1.8982741850579504e-05, "loss": 0.9625, "step": 793 }, { "epoch": 0.1707894170789417, "grad_norm": 0.0, "learning_rate": 1.8979677955974226e-05, "loss": 0.9867, "step": 794 }, { "epoch": 0.1710045171004517, "grad_norm": 0.0, "learning_rate": 1.8976609702236963e-05, "loss": 0.9973, "step": 795 }, { "epoch": 0.1712196171219617, "grad_norm": 0.0, "learning_rate": 1.897353709085718e-05, "loss": 0.9262, "step": 796 }, { "epoch": 0.17143471714347172, "grad_norm": 0.0, "learning_rate": 1.897046012332646e-05, "loss": 0.9404, "step": 797 }, { "epoch": 0.1716498171649817, "grad_norm": 0.0, "learning_rate": 1.8967378801138497e-05, "loss": 0.9322, "step": 798 }, { "epoch": 0.17186491718649172, "grad_norm": 0.0, "learning_rate": 1.89642931257891e-05, "loss": 0.9075, "step": 799 }, { "epoch": 0.17208001720800173, "grad_norm": 0.0, "learning_rate": 1.8961203098776194e-05, "loss": 0.9317, "step": 800 }, { "epoch": 0.1722951172295117, "grad_norm": 0.0, "learning_rate": 1.895810872159981e-05, "loss": 0.9474, "step": 801 }, { "epoch": 0.17251021725102172, "grad_norm": 0.0, "learning_rate": 1.8955009995762106e-05, "loss": 0.9365, "step": 802 }, { "epoch": 0.17272531727253174, "grad_norm": 0.0, "learning_rate": 1.895190692276733e-05, "loss": 1.0494, "step": 803 }, { "epoch": 0.17294041729404172, "grad_norm": 0.0, "learning_rate": 1.894879950412185e-05, "loss": 0.917, "step": 804 }, { "epoch": 0.17315551731555173, "grad_norm": 0.0, "learning_rate": 1.894568774133415e-05, "loss": 0.9321, "step": 805 }, { "epoch": 0.17337061733706174, "grad_norm": 0.0, "learning_rate": 1.8942571635914814e-05, "loss": 1.0108, "step": 806 }, { "epoch": 0.17358571735857173, "grad_norm": 0.0, "learning_rate": 1.8939451189376536e-05, "loss": 1.0044, "step": 807 }, { "epoch": 0.17380081738008174, "grad_norm": 0.0, "learning_rate": 1.8936326403234125e-05, "loss": 0.9226, "step": 808 }, { "epoch": 0.17401591740159175, "grad_norm": 0.0, "learning_rate": 1.893319727900448e-05, "loss": 0.9332, "step": 809 }, { "epoch": 0.17423101742310174, "grad_norm": 0.0, "learning_rate": 1.8930063818206626e-05, "loss": 0.9785, "step": 810 }, { "epoch": 0.17444611744461175, "grad_norm": 0.0, "learning_rate": 1.8926926022361674e-05, "loss": 0.9927, "step": 811 }, { "epoch": 0.17466121746612176, "grad_norm": 0.0, "learning_rate": 1.8923783892992857e-05, "loss": 1.0406, "step": 812 }, { "epoch": 0.17487631748763174, "grad_norm": 0.0, "learning_rate": 1.8920637431625497e-05, "loss": 0.9408, "step": 813 }, { "epoch": 0.17509141750914176, "grad_norm": 0.0, "learning_rate": 1.891748663978703e-05, "loss": 0.8625, "step": 814 }, { "epoch": 0.17530651753065174, "grad_norm": 0.0, "learning_rate": 1.8914331519006985e-05, "loss": 0.9882, "step": 815 }, { "epoch": 0.17552161755216175, "grad_norm": 0.0, "learning_rate": 1.8911172070817e-05, "loss": 0.9626, "step": 816 }, { "epoch": 0.17573671757367176, "grad_norm": 0.0, "learning_rate": 1.8908008296750808e-05, "loss": 0.9714, "step": 817 }, { "epoch": 0.17595181759518175, "grad_norm": 0.0, "learning_rate": 1.890484019834425e-05, "loss": 0.9162, "step": 818 }, { "epoch": 0.17616691761669176, "grad_norm": 0.0, "learning_rate": 1.8901667777135252e-05, "loss": 0.9357, "step": 819 }, { "epoch": 0.17638201763820177, "grad_norm": 0.0, "learning_rate": 1.8898491034663852e-05, "loss": 0.9579, "step": 820 }, { "epoch": 0.17659711765971176, "grad_norm": 0.0, "learning_rate": 1.8895309972472184e-05, "loss": 0.9939, "step": 821 }, { "epoch": 0.17681221768122177, "grad_norm": 0.0, "learning_rate": 1.8892124592104474e-05, "loss": 0.9175, "step": 822 }, { "epoch": 0.17702731770273178, "grad_norm": 0.0, "learning_rate": 1.8888934895107042e-05, "loss": 1.0075, "step": 823 }, { "epoch": 0.17724241772424176, "grad_norm": 0.0, "learning_rate": 1.888574088302831e-05, "loss": 0.898, "step": 824 }, { "epoch": 0.17745751774575177, "grad_norm": 0.0, "learning_rate": 1.8882542557418795e-05, "loss": 0.9271, "step": 825 }, { "epoch": 0.1776726177672618, "grad_norm": 0.0, "learning_rate": 1.8879339919831103e-05, "loss": 0.9169, "step": 826 }, { "epoch": 0.17788771778877177, "grad_norm": 0.0, "learning_rate": 1.887613297181994e-05, "loss": 0.8951, "step": 827 }, { "epoch": 0.17810281781028178, "grad_norm": 0.0, "learning_rate": 1.887292171494209e-05, "loss": 0.9792, "step": 828 }, { "epoch": 0.1783179178317918, "grad_norm": 0.0, "learning_rate": 1.886970615075645e-05, "loss": 0.9336, "step": 829 }, { "epoch": 0.17853301785330178, "grad_norm": 0.0, "learning_rate": 1.8866486280823994e-05, "loss": 0.9176, "step": 830 }, { "epoch": 0.1787481178748118, "grad_norm": 0.0, "learning_rate": 1.8863262106707783e-05, "loss": 0.9817, "step": 831 }, { "epoch": 0.1789632178963218, "grad_norm": 0.0, "learning_rate": 1.886003362997298e-05, "loss": 0.9486, "step": 832 }, { "epoch": 0.1791783179178318, "grad_norm": 0.0, "learning_rate": 1.8856800852186826e-05, "loss": 0.9598, "step": 833 }, { "epoch": 0.1793934179393418, "grad_norm": 0.0, "learning_rate": 1.8853563774918657e-05, "loss": 0.9337, "step": 834 }, { "epoch": 0.17960851796085178, "grad_norm": 0.0, "learning_rate": 1.885032239973989e-05, "loss": 0.944, "step": 835 }, { "epoch": 0.1798236179823618, "grad_norm": 0.0, "learning_rate": 1.8847076728224036e-05, "loss": 0.9886, "step": 836 }, { "epoch": 0.1800387180038718, "grad_norm": 0.0, "learning_rate": 1.884382676194668e-05, "loss": 0.8831, "step": 837 }, { "epoch": 0.1802538180253818, "grad_norm": 0.0, "learning_rate": 1.884057250248551e-05, "loss": 0.9573, "step": 838 }, { "epoch": 0.1804689180468918, "grad_norm": 0.0, "learning_rate": 1.8837313951420274e-05, "loss": 0.9467, "step": 839 }, { "epoch": 0.1806840180684018, "grad_norm": 0.0, "learning_rate": 1.8834051110332824e-05, "loss": 1.0003, "step": 840 }, { "epoch": 0.1808991180899118, "grad_norm": 0.0, "learning_rate": 1.883078398080709e-05, "loss": 0.922, "step": 841 }, { "epoch": 0.1811142181114218, "grad_norm": 0.0, "learning_rate": 1.8827512564429076e-05, "loss": 0.8729, "step": 842 }, { "epoch": 0.18132931813293182, "grad_norm": 0.0, "learning_rate": 1.8824236862786873e-05, "loss": 0.9041, "step": 843 }, { "epoch": 0.1815444181544418, "grad_norm": 0.0, "learning_rate": 1.8820956877470647e-05, "loss": 0.9577, "step": 844 }, { "epoch": 0.18175951817595182, "grad_norm": 0.0, "learning_rate": 1.8817672610072656e-05, "loss": 0.9796, "step": 845 }, { "epoch": 0.18197461819746183, "grad_norm": 0.0, "learning_rate": 1.8814384062187225e-05, "loss": 0.8799, "step": 846 }, { "epoch": 0.1821897182189718, "grad_norm": 0.0, "learning_rate": 1.881109123541076e-05, "loss": 0.9472, "step": 847 }, { "epoch": 0.18240481824048183, "grad_norm": 0.0, "learning_rate": 1.8807794131341742e-05, "loss": 0.9695, "step": 848 }, { "epoch": 0.18261991826199184, "grad_norm": 0.0, "learning_rate": 1.880449275158073e-05, "loss": 0.9385, "step": 849 }, { "epoch": 0.18283501828350182, "grad_norm": 0.0, "learning_rate": 1.8801187097730367e-05, "loss": 0.9742, "step": 850 }, { "epoch": 0.18305011830501183, "grad_norm": 0.0, "learning_rate": 1.8797877171395357e-05, "loss": 0.9294, "step": 851 }, { "epoch": 0.18326521832652184, "grad_norm": 0.0, "learning_rate": 1.8794562974182483e-05, "loss": 0.9989, "step": 852 }, { "epoch": 0.18348031834803183, "grad_norm": 0.0, "learning_rate": 1.879124450770061e-05, "loss": 0.9422, "step": 853 }, { "epoch": 0.18369541836954184, "grad_norm": 0.0, "learning_rate": 1.8787921773560656e-05, "loss": 0.9025, "step": 854 }, { "epoch": 0.18391051839105185, "grad_norm": 0.0, "learning_rate": 1.8784594773375636e-05, "loss": 0.9048, "step": 855 }, { "epoch": 0.18412561841256184, "grad_norm": 0.0, "learning_rate": 1.8781263508760617e-05, "loss": 1.0172, "step": 856 }, { "epoch": 0.18434071843407185, "grad_norm": 0.0, "learning_rate": 1.877792798133274e-05, "loss": 0.8975, "step": 857 }, { "epoch": 0.18455581845558183, "grad_norm": 0.0, "learning_rate": 1.877458819271122e-05, "loss": 0.9543, "step": 858 }, { "epoch": 0.18477091847709184, "grad_norm": 0.0, "learning_rate": 1.8771244144517337e-05, "loss": 0.9786, "step": 859 }, { "epoch": 0.18498601849860186, "grad_norm": 0.0, "learning_rate": 1.8767895838374438e-05, "loss": 0.9653, "step": 860 }, { "epoch": 0.18520111852011184, "grad_norm": 0.0, "learning_rate": 1.8764543275907946e-05, "loss": 0.9196, "step": 861 }, { "epoch": 0.18541621854162185, "grad_norm": 0.0, "learning_rate": 1.876118645874533e-05, "loss": 0.9493, "step": 862 }, { "epoch": 0.18563131856313186, "grad_norm": 0.0, "learning_rate": 1.8757825388516148e-05, "loss": 0.9833, "step": 863 }, { "epoch": 0.18584641858464185, "grad_norm": 0.0, "learning_rate": 1.8754460066852003e-05, "loss": 0.8927, "step": 864 }, { "epoch": 0.18606151860615186, "grad_norm": 0.0, "learning_rate": 1.875109049538658e-05, "loss": 0.9621, "step": 865 }, { "epoch": 0.18627661862766187, "grad_norm": 0.0, "learning_rate": 1.8747716675755608e-05, "loss": 0.924, "step": 866 }, { "epoch": 0.18649171864917186, "grad_norm": 0.0, "learning_rate": 1.8744338609596897e-05, "loss": 0.9956, "step": 867 }, { "epoch": 0.18670681867068187, "grad_norm": 0.0, "learning_rate": 1.8740956298550307e-05, "loss": 0.9217, "step": 868 }, { "epoch": 0.18692191869219188, "grad_norm": 0.0, "learning_rate": 1.8737569744257754e-05, "loss": 0.9127, "step": 869 }, { "epoch": 0.18713701871370186, "grad_norm": 0.0, "learning_rate": 1.873417894836323e-05, "loss": 1.0116, "step": 870 }, { "epoch": 0.18735211873521188, "grad_norm": 0.0, "learning_rate": 1.8730783912512772e-05, "loss": 0.8967, "step": 871 }, { "epoch": 0.1875672187567219, "grad_norm": 0.0, "learning_rate": 1.8727384638354484e-05, "loss": 0.9531, "step": 872 }, { "epoch": 0.18778231877823187, "grad_norm": 0.0, "learning_rate": 1.872398112753852e-05, "loss": 0.951, "step": 873 }, { "epoch": 0.18799741879974188, "grad_norm": 0.0, "learning_rate": 1.8720573381717096e-05, "loss": 0.9948, "step": 874 }, { "epoch": 0.1882125188212519, "grad_norm": 0.0, "learning_rate": 1.871716140254448e-05, "loss": 0.9679, "step": 875 }, { "epoch": 0.18842761884276188, "grad_norm": 0.0, "learning_rate": 1.8713745191677003e-05, "loss": 0.9771, "step": 876 }, { "epoch": 0.1886427188642719, "grad_norm": 0.0, "learning_rate": 1.8710324750773038e-05, "loss": 0.9381, "step": 877 }, { "epoch": 0.18885781888578188, "grad_norm": 0.0, "learning_rate": 1.870690008149302e-05, "loss": 0.9599, "step": 878 }, { "epoch": 0.1890729189072919, "grad_norm": 0.0, "learning_rate": 1.8703471185499438e-05, "loss": 0.9848, "step": 879 }, { "epoch": 0.1892880189288019, "grad_norm": 0.0, "learning_rate": 1.8700038064456823e-05, "loss": 0.9242, "step": 880 }, { "epoch": 0.18950311895031188, "grad_norm": 0.0, "learning_rate": 1.869660072003177e-05, "loss": 0.9521, "step": 881 }, { "epoch": 0.1897182189718219, "grad_norm": 0.0, "learning_rate": 1.8693159153892917e-05, "loss": 0.9977, "step": 882 }, { "epoch": 0.1899333189933319, "grad_norm": 0.0, "learning_rate": 1.868971336771095e-05, "loss": 0.9653, "step": 883 }, { "epoch": 0.1901484190148419, "grad_norm": 0.0, "learning_rate": 1.86862633631586e-05, "loss": 0.9261, "step": 884 }, { "epoch": 0.1903635190363519, "grad_norm": 0.0, "learning_rate": 1.868280914191066e-05, "loss": 0.9821, "step": 885 }, { "epoch": 0.19057861905786191, "grad_norm": 0.0, "learning_rate": 1.867935070564396e-05, "loss": 0.9399, "step": 886 }, { "epoch": 0.1907937190793719, "grad_norm": 0.0, "learning_rate": 1.8675888056037378e-05, "loss": 0.9537, "step": 887 }, { "epoch": 0.1910088191008819, "grad_norm": 0.0, "learning_rate": 1.867242119477183e-05, "loss": 0.9473, "step": 888 }, { "epoch": 0.19122391912239192, "grad_norm": 0.0, "learning_rate": 1.8668950123530293e-05, "loss": 0.9841, "step": 889 }, { "epoch": 0.1914390191439019, "grad_norm": 0.0, "learning_rate": 1.866547484399777e-05, "loss": 0.9584, "step": 890 }, { "epoch": 0.19165411916541192, "grad_norm": 0.0, "learning_rate": 1.8661995357861325e-05, "loss": 0.9258, "step": 891 }, { "epoch": 0.19186921918692193, "grad_norm": 0.0, "learning_rate": 1.8658511666810043e-05, "loss": 0.9893, "step": 892 }, { "epoch": 0.19208431920843191, "grad_norm": 0.0, "learning_rate": 1.8655023772535064e-05, "loss": 0.9038, "step": 893 }, { "epoch": 0.19229941922994193, "grad_norm": 0.0, "learning_rate": 1.8651531676729574e-05, "loss": 0.9736, "step": 894 }, { "epoch": 0.19251451925145194, "grad_norm": 0.0, "learning_rate": 1.8648035381088784e-05, "loss": 0.914, "step": 895 }, { "epoch": 0.19272961927296192, "grad_norm": 0.0, "learning_rate": 1.8644534887309947e-05, "loss": 0.9967, "step": 896 }, { "epoch": 0.19294471929447193, "grad_norm": 0.0, "learning_rate": 1.864103019709236e-05, "loss": 0.9352, "step": 897 }, { "epoch": 0.19315981931598195, "grad_norm": 0.0, "learning_rate": 1.863752131213736e-05, "loss": 0.8926, "step": 898 }, { "epoch": 0.19337491933749193, "grad_norm": 0.0, "learning_rate": 1.863400823414831e-05, "loss": 0.9282, "step": 899 }, { "epoch": 0.19359001935900194, "grad_norm": 0.0, "learning_rate": 1.863049096483061e-05, "loss": 0.9704, "step": 900 }, { "epoch": 0.19380511938051193, "grad_norm": 0.0, "learning_rate": 1.86269695058917e-05, "loss": 0.9286, "step": 901 }, { "epoch": 0.19402021940202194, "grad_norm": 0.0, "learning_rate": 1.8623443859041055e-05, "loss": 1.0141, "step": 902 }, { "epoch": 0.19423531942353195, "grad_norm": 0.0, "learning_rate": 1.8619914025990175e-05, "loss": 0.9915, "step": 903 }, { "epoch": 0.19445041944504193, "grad_norm": 0.0, "learning_rate": 1.86163800084526e-05, "loss": 0.8908, "step": 904 }, { "epoch": 0.19466551946655195, "grad_norm": 0.0, "learning_rate": 1.8612841808143897e-05, "loss": 0.9852, "step": 905 }, { "epoch": 0.19488061948806196, "grad_norm": 0.0, "learning_rate": 1.8609299426781662e-05, "loss": 0.9748, "step": 906 }, { "epoch": 0.19509571950957194, "grad_norm": 0.0, "learning_rate": 1.860575286608553e-05, "loss": 0.926, "step": 907 }, { "epoch": 0.19531081953108195, "grad_norm": 0.0, "learning_rate": 1.860220212777715e-05, "loss": 0.9451, "step": 908 }, { "epoch": 0.19552591955259196, "grad_norm": 0.0, "learning_rate": 1.8598647213580215e-05, "loss": 0.9022, "step": 909 }, { "epoch": 0.19574101957410195, "grad_norm": 0.0, "learning_rate": 1.859508812522043e-05, "loss": 0.9398, "step": 910 }, { "epoch": 0.19595611959561196, "grad_norm": 0.0, "learning_rate": 1.8591524864425534e-05, "loss": 0.9221, "step": 911 }, { "epoch": 0.19617121961712197, "grad_norm": 0.0, "learning_rate": 1.8587957432925297e-05, "loss": 0.838, "step": 912 }, { "epoch": 0.19638631963863196, "grad_norm": 0.0, "learning_rate": 1.8584385832451502e-05, "loss": 0.9448, "step": 913 }, { "epoch": 0.19660141966014197, "grad_norm": 0.0, "learning_rate": 1.8580810064737967e-05, "loss": 0.9553, "step": 914 }, { "epoch": 0.19681651968165198, "grad_norm": 0.0, "learning_rate": 1.857723013152052e-05, "loss": 0.9885, "step": 915 }, { "epoch": 0.19703161970316196, "grad_norm": 0.0, "learning_rate": 1.8573646034537027e-05, "loss": 0.9617, "step": 916 }, { "epoch": 0.19724671972467198, "grad_norm": 0.0, "learning_rate": 1.857005777552736e-05, "loss": 0.9704, "step": 917 }, { "epoch": 0.197461819746182, "grad_norm": 0.0, "learning_rate": 1.8566465356233425e-05, "loss": 0.9196, "step": 918 }, { "epoch": 0.19767691976769197, "grad_norm": 0.0, "learning_rate": 1.8562868778399132e-05, "loss": 0.9519, "step": 919 }, { "epoch": 0.19789201978920198, "grad_norm": 0.0, "learning_rate": 1.8559268043770427e-05, "loss": 0.9744, "step": 920 }, { "epoch": 0.19810711981071197, "grad_norm": 0.0, "learning_rate": 1.8555663154095257e-05, "loss": 0.9771, "step": 921 }, { "epoch": 0.19832221983222198, "grad_norm": 0.0, "learning_rate": 1.8552054111123607e-05, "loss": 0.8906, "step": 922 }, { "epoch": 0.198537319853732, "grad_norm": 0.0, "learning_rate": 1.8548440916607454e-05, "loss": 0.9273, "step": 923 }, { "epoch": 0.19875241987524198, "grad_norm": 0.0, "learning_rate": 1.8544823572300805e-05, "loss": 1.0007, "step": 924 }, { "epoch": 0.198967519896752, "grad_norm": 0.0, "learning_rate": 1.854120207995968e-05, "loss": 0.9524, "step": 925 }, { "epoch": 0.199182619918262, "grad_norm": 0.0, "learning_rate": 1.8537576441342114e-05, "loss": 0.9305, "step": 926 }, { "epoch": 0.19939771993977198, "grad_norm": 0.0, "learning_rate": 1.853394665820815e-05, "loss": 0.9384, "step": 927 }, { "epoch": 0.199612819961282, "grad_norm": 0.0, "learning_rate": 1.8530312732319838e-05, "loss": 0.9571, "step": 928 }, { "epoch": 0.199827919982792, "grad_norm": 0.0, "learning_rate": 1.852667466544126e-05, "loss": 0.9302, "step": 929 }, { "epoch": 0.200043020004302, "grad_norm": 0.0, "learning_rate": 1.8523032459338482e-05, "loss": 0.9765, "step": 930 }, { "epoch": 0.200258120025812, "grad_norm": 0.0, "learning_rate": 1.8519386115779595e-05, "loss": 0.9197, "step": 931 }, { "epoch": 0.20047322004732201, "grad_norm": 0.0, "learning_rate": 1.8515735636534694e-05, "loss": 0.9014, "step": 932 }, { "epoch": 0.200688320068832, "grad_norm": 0.0, "learning_rate": 1.851208102337589e-05, "loss": 1.0676, "step": 933 }, { "epoch": 0.200903420090342, "grad_norm": 0.0, "learning_rate": 1.8508422278077285e-05, "loss": 0.9074, "step": 934 }, { "epoch": 0.20111852011185202, "grad_norm": 0.0, "learning_rate": 1.8504759402414996e-05, "loss": 0.9133, "step": 935 }, { "epoch": 0.201333620133362, "grad_norm": 0.0, "learning_rate": 1.8501092398167152e-05, "loss": 1.0261, "step": 936 }, { "epoch": 0.20154872015487202, "grad_norm": 0.0, "learning_rate": 1.8497421267113874e-05, "loss": 0.9333, "step": 937 }, { "epoch": 0.20176382017638203, "grad_norm": 0.0, "learning_rate": 1.8493746011037288e-05, "loss": 1.0048, "step": 938 }, { "epoch": 0.20197892019789201, "grad_norm": 0.0, "learning_rate": 1.849006663172153e-05, "loss": 0.9366, "step": 939 }, { "epoch": 0.20219402021940203, "grad_norm": 0.0, "learning_rate": 1.8486383130952733e-05, "loss": 0.8922, "step": 940 }, { "epoch": 0.202409120240912, "grad_norm": 0.0, "learning_rate": 1.8482695510519027e-05, "loss": 0.8667, "step": 941 }, { "epoch": 0.20262422026242202, "grad_norm": 0.0, "learning_rate": 1.8479003772210548e-05, "loss": 0.916, "step": 942 }, { "epoch": 0.20283932028393203, "grad_norm": 0.0, "learning_rate": 1.847530791781943e-05, "loss": 0.9862, "step": 943 }, { "epoch": 0.20305442030544202, "grad_norm": 0.0, "learning_rate": 1.84716079491398e-05, "loss": 0.9666, "step": 944 }, { "epoch": 0.20326952032695203, "grad_norm": 0.0, "learning_rate": 1.846790386796779e-05, "loss": 0.9509, "step": 945 }, { "epoch": 0.20348462034846204, "grad_norm": 0.0, "learning_rate": 1.8464195676101527e-05, "loss": 0.9678, "step": 946 }, { "epoch": 0.20369972036997203, "grad_norm": 0.0, "learning_rate": 1.8460483375341123e-05, "loss": 0.9917, "step": 947 }, { "epoch": 0.20391482039148204, "grad_norm": 0.0, "learning_rate": 1.84567669674887e-05, "loss": 0.926, "step": 948 }, { "epoch": 0.20412992041299205, "grad_norm": 0.0, "learning_rate": 1.845304645434836e-05, "loss": 0.9559, "step": 949 }, { "epoch": 0.20434502043450203, "grad_norm": 0.0, "learning_rate": 1.8449321837726207e-05, "loss": 0.9758, "step": 950 }, { "epoch": 0.20456012045601205, "grad_norm": 0.0, "learning_rate": 1.8445593119430337e-05, "loss": 0.9428, "step": 951 }, { "epoch": 0.20477522047752206, "grad_norm": 0.0, "learning_rate": 1.844186030127083e-05, "loss": 0.9387, "step": 952 }, { "epoch": 0.20499032049903204, "grad_norm": 0.0, "learning_rate": 1.8438123385059767e-05, "loss": 0.9684, "step": 953 }, { "epoch": 0.20520542052054205, "grad_norm": 0.0, "learning_rate": 1.84343823726112e-05, "loss": 1.0092, "step": 954 }, { "epoch": 0.20542052054205207, "grad_norm": 0.0, "learning_rate": 1.8430637265741196e-05, "loss": 0.9063, "step": 955 }, { "epoch": 0.20563562056356205, "grad_norm": 0.0, "learning_rate": 1.8426888066267787e-05, "loss": 0.9236, "step": 956 }, { "epoch": 0.20585072058507206, "grad_norm": 0.0, "learning_rate": 1.8423134776011e-05, "loss": 0.8681, "step": 957 }, { "epoch": 0.20606582060658207, "grad_norm": 0.0, "learning_rate": 1.841937739679285e-05, "loss": 0.9779, "step": 958 }, { "epoch": 0.20628092062809206, "grad_norm": 0.0, "learning_rate": 1.8415615930437335e-05, "loss": 0.8851, "step": 959 }, { "epoch": 0.20649602064960207, "grad_norm": 0.0, "learning_rate": 1.8411850378770436e-05, "loss": 0.9396, "step": 960 }, { "epoch": 0.20671112067111208, "grad_norm": 0.0, "learning_rate": 1.8408080743620117e-05, "loss": 0.9375, "step": 961 }, { "epoch": 0.20692622069262206, "grad_norm": 0.0, "learning_rate": 1.8404307026816326e-05, "loss": 0.8708, "step": 962 }, { "epoch": 0.20714132071413208, "grad_norm": 0.0, "learning_rate": 1.8400529230190992e-05, "loss": 0.9968, "step": 963 }, { "epoch": 0.20735642073564206, "grad_norm": 0.0, "learning_rate": 1.839674735557802e-05, "loss": 0.9683, "step": 964 }, { "epoch": 0.20757152075715207, "grad_norm": 0.0, "learning_rate": 1.8392961404813307e-05, "loss": 0.9492, "step": 965 }, { "epoch": 0.20778662077866208, "grad_norm": 0.0, "learning_rate": 1.8389171379734714e-05, "loss": 0.9667, "step": 966 }, { "epoch": 0.20800172080017207, "grad_norm": 0.0, "learning_rate": 1.838537728218209e-05, "loss": 1.0232, "step": 967 }, { "epoch": 0.20821682082168208, "grad_norm": 0.0, "learning_rate": 1.8381579113997255e-05, "loss": 0.9801, "step": 968 }, { "epoch": 0.2084319208431921, "grad_norm": 0.0, "learning_rate": 1.837777687702401e-05, "loss": 0.8881, "step": 969 }, { "epoch": 0.20864702086470208, "grad_norm": 0.0, "learning_rate": 1.8373970573108125e-05, "loss": 0.9856, "step": 970 }, { "epoch": 0.2088621208862121, "grad_norm": 0.0, "learning_rate": 1.8370160204097352e-05, "loss": 0.9458, "step": 971 }, { "epoch": 0.2090772209077221, "grad_norm": 0.0, "learning_rate": 1.836634577184141e-05, "loss": 0.9579, "step": 972 }, { "epoch": 0.20929232092923208, "grad_norm": 0.0, "learning_rate": 1.8362527278191993e-05, "loss": 0.9369, "step": 973 }, { "epoch": 0.2095074209507421, "grad_norm": 0.0, "learning_rate": 1.8358704725002766e-05, "loss": 0.9378, "step": 974 }, { "epoch": 0.2097225209722521, "grad_norm": 0.0, "learning_rate": 1.8354878114129368e-05, "loss": 0.9235, "step": 975 }, { "epoch": 0.2099376209937621, "grad_norm": 0.0, "learning_rate": 1.83510474474294e-05, "loss": 0.9467, "step": 976 }, { "epoch": 0.2101527210152721, "grad_norm": 0.0, "learning_rate": 1.8347212726762436e-05, "loss": 0.969, "step": 977 }, { "epoch": 0.21036782103678212, "grad_norm": 0.0, "learning_rate": 1.8343373953990028e-05, "loss": 0.9659, "step": 978 }, { "epoch": 0.2105829210582921, "grad_norm": 0.0, "learning_rate": 1.8339531130975676e-05, "loss": 0.9662, "step": 979 }, { "epoch": 0.2107980210798021, "grad_norm": 0.0, "learning_rate": 1.8335684259584855e-05, "loss": 0.9583, "step": 980 }, { "epoch": 0.21101312110131212, "grad_norm": 0.0, "learning_rate": 1.8331833341685017e-05, "loss": 0.9931, "step": 981 }, { "epoch": 0.2112282211228221, "grad_norm": 0.0, "learning_rate": 1.832797837914556e-05, "loss": 0.8541, "step": 982 }, { "epoch": 0.21144332114433212, "grad_norm": 0.0, "learning_rate": 1.832411937383785e-05, "loss": 0.9403, "step": 983 }, { "epoch": 0.2116584211658421, "grad_norm": 0.0, "learning_rate": 1.8320256327635225e-05, "loss": 0.9599, "step": 984 }, { "epoch": 0.21187352118735212, "grad_norm": 0.0, "learning_rate": 1.831638924241298e-05, "loss": 0.9212, "step": 985 }, { "epoch": 0.21208862120886213, "grad_norm": 0.0, "learning_rate": 1.8312518120048354e-05, "loss": 0.9633, "step": 986 }, { "epoch": 0.2123037212303721, "grad_norm": 0.0, "learning_rate": 1.8308642962420577e-05, "loss": 0.9901, "step": 987 }, { "epoch": 0.21251882125188212, "grad_norm": 0.0, "learning_rate": 1.8304763771410815e-05, "loss": 0.9193, "step": 988 }, { "epoch": 0.21273392127339213, "grad_norm": 0.0, "learning_rate": 1.83008805489022e-05, "loss": 0.9436, "step": 989 }, { "epoch": 0.21294902129490212, "grad_norm": 0.0, "learning_rate": 1.8296993296779818e-05, "loss": 0.9382, "step": 990 }, { "epoch": 0.21316412131641213, "grad_norm": 0.0, "learning_rate": 1.8293102016930713e-05, "loss": 0.8616, "step": 991 }, { "epoch": 0.21337922133792214, "grad_norm": 0.0, "learning_rate": 1.8289206711243883e-05, "loss": 0.8718, "step": 992 }, { "epoch": 0.21359432135943213, "grad_norm": 0.0, "learning_rate": 1.8285307381610283e-05, "loss": 0.913, "step": 993 }, { "epoch": 0.21380942138094214, "grad_norm": 0.0, "learning_rate": 1.828140402992282e-05, "loss": 0.9259, "step": 994 }, { "epoch": 0.21402452140245215, "grad_norm": 0.0, "learning_rate": 1.827749665807635e-05, "loss": 0.9289, "step": 995 }, { "epoch": 0.21423962142396213, "grad_norm": 0.0, "learning_rate": 1.8273585267967692e-05, "loss": 0.956, "step": 996 }, { "epoch": 0.21445472144547215, "grad_norm": 0.0, "learning_rate": 1.8269669861495595e-05, "loss": 0.9649, "step": 997 }, { "epoch": 0.21466982146698216, "grad_norm": 0.0, "learning_rate": 1.8265750440560783e-05, "loss": 0.9886, "step": 998 }, { "epoch": 0.21488492148849214, "grad_norm": 0.0, "learning_rate": 1.8261827007065904e-05, "loss": 0.9226, "step": 999 }, { "epoch": 0.21510002151000215, "grad_norm": 0.0, "learning_rate": 1.8257899562915577e-05, "loss": 0.9202, "step": 1000 }, { "epoch": 0.21531512153151217, "grad_norm": 0.0, "learning_rate": 1.8253968110016343e-05, "loss": 0.9085, "step": 1001 }, { "epoch": 0.21553022155302215, "grad_norm": 0.0, "learning_rate": 1.8250032650276718e-05, "loss": 1.0105, "step": 1002 }, { "epoch": 0.21574532157453216, "grad_norm": 0.0, "learning_rate": 1.8246093185607137e-05, "loss": 0.9581, "step": 1003 }, { "epoch": 0.21596042159604215, "grad_norm": 0.0, "learning_rate": 1.8242149717919993e-05, "loss": 0.9517, "step": 1004 }, { "epoch": 0.21617552161755216, "grad_norm": 0.0, "learning_rate": 1.8238202249129625e-05, "loss": 0.9074, "step": 1005 }, { "epoch": 0.21639062163906217, "grad_norm": 0.0, "learning_rate": 1.82342507811523e-05, "loss": 0.9313, "step": 1006 }, { "epoch": 0.21660572166057215, "grad_norm": 0.0, "learning_rate": 1.8230295315906237e-05, "loss": 0.8858, "step": 1007 }, { "epoch": 0.21682082168208217, "grad_norm": 0.0, "learning_rate": 1.8226335855311598e-05, "loss": 0.9052, "step": 1008 }, { "epoch": 0.21703592170359218, "grad_norm": 0.0, "learning_rate": 1.8222372401290475e-05, "loss": 0.9322, "step": 1009 }, { "epoch": 0.21725102172510216, "grad_norm": 0.0, "learning_rate": 1.8218404955766905e-05, "loss": 0.9991, "step": 1010 }, { "epoch": 0.21746612174661217, "grad_norm": 0.0, "learning_rate": 1.821443352066686e-05, "loss": 0.9553, "step": 1011 }, { "epoch": 0.21768122176812219, "grad_norm": 0.0, "learning_rate": 1.8210458097918255e-05, "loss": 0.9855, "step": 1012 }, { "epoch": 0.21789632178963217, "grad_norm": 0.0, "learning_rate": 1.820647868945093e-05, "loss": 0.9742, "step": 1013 }, { "epoch": 0.21811142181114218, "grad_norm": 0.0, "learning_rate": 1.820249529719667e-05, "loss": 0.9182, "step": 1014 }, { "epoch": 0.2183265218326522, "grad_norm": 0.0, "learning_rate": 1.8198507923089185e-05, "loss": 0.9222, "step": 1015 }, { "epoch": 0.21854162185416218, "grad_norm": 0.0, "learning_rate": 1.819451656906412e-05, "loss": 0.9579, "step": 1016 }, { "epoch": 0.2187567218756722, "grad_norm": 0.0, "learning_rate": 1.8190521237059066e-05, "loss": 0.9256, "step": 1017 }, { "epoch": 0.2189718218971822, "grad_norm": 0.0, "learning_rate": 1.8186521929013523e-05, "loss": 0.9561, "step": 1018 }, { "epoch": 0.21918692191869218, "grad_norm": 0.0, "learning_rate": 1.818251864686893e-05, "loss": 0.9457, "step": 1019 }, { "epoch": 0.2194020219402022, "grad_norm": 0.0, "learning_rate": 1.8178511392568667e-05, "loss": 0.9749, "step": 1020 }, { "epoch": 0.2196171219617122, "grad_norm": 0.0, "learning_rate": 1.817450016805803e-05, "loss": 1.0254, "step": 1021 }, { "epoch": 0.2198322219832222, "grad_norm": 0.0, "learning_rate": 1.8170484975284233e-05, "loss": 0.9185, "step": 1022 }, { "epoch": 0.2200473220047322, "grad_norm": 0.0, "learning_rate": 1.8166465816196436e-05, "loss": 0.9991, "step": 1023 }, { "epoch": 0.22026242202624222, "grad_norm": 0.0, "learning_rate": 1.8162442692745722e-05, "loss": 0.9264, "step": 1024 }, { "epoch": 0.2204775220477522, "grad_norm": 0.0, "learning_rate": 1.8158415606885083e-05, "loss": 0.9932, "step": 1025 }, { "epoch": 0.2206926220692622, "grad_norm": 0.0, "learning_rate": 1.815438456056945e-05, "loss": 0.9195, "step": 1026 }, { "epoch": 0.2209077220907722, "grad_norm": 0.0, "learning_rate": 1.8150349555755666e-05, "loss": 0.9347, "step": 1027 }, { "epoch": 0.2211228221122822, "grad_norm": 0.0, "learning_rate": 1.8146310594402504e-05, "loss": 0.914, "step": 1028 }, { "epoch": 0.22133792213379222, "grad_norm": 0.0, "learning_rate": 1.8142267678470654e-05, "loss": 0.9449, "step": 1029 }, { "epoch": 0.2215530221553022, "grad_norm": 0.0, "learning_rate": 1.8138220809922726e-05, "loss": 1.0143, "step": 1030 }, { "epoch": 0.22176812217681222, "grad_norm": 0.0, "learning_rate": 1.8134169990723246e-05, "loss": 0.9446, "step": 1031 }, { "epoch": 0.22198322219832223, "grad_norm": 0.0, "learning_rate": 1.8130115222838663e-05, "loss": 0.8984, "step": 1032 }, { "epoch": 0.2221983222198322, "grad_norm": 0.0, "learning_rate": 1.8126056508237338e-05, "loss": 0.9128, "step": 1033 }, { "epoch": 0.22241342224134222, "grad_norm": 0.0, "learning_rate": 1.8121993848889553e-05, "loss": 0.9323, "step": 1034 }, { "epoch": 0.22262852226285224, "grad_norm": 0.0, "learning_rate": 1.81179272467675e-05, "loss": 0.9356, "step": 1035 }, { "epoch": 0.22284362228436222, "grad_norm": 0.0, "learning_rate": 1.811385670384529e-05, "loss": 0.9263, "step": 1036 }, { "epoch": 0.22305872230587223, "grad_norm": 0.0, "learning_rate": 1.810978222209894e-05, "loss": 0.9717, "step": 1037 }, { "epoch": 0.22327382232738224, "grad_norm": 0.0, "learning_rate": 1.8105703803506384e-05, "loss": 0.939, "step": 1038 }, { "epoch": 0.22348892234889223, "grad_norm": 0.0, "learning_rate": 1.8101621450047467e-05, "loss": 0.9302, "step": 1039 }, { "epoch": 0.22370402237040224, "grad_norm": 0.0, "learning_rate": 1.8097535163703947e-05, "loss": 0.9615, "step": 1040 }, { "epoch": 0.22391912239191225, "grad_norm": 0.0, "learning_rate": 1.8093444946459483e-05, "loss": 0.9158, "step": 1041 }, { "epoch": 0.22413422241342223, "grad_norm": 0.0, "learning_rate": 1.808935080029965e-05, "loss": 0.9593, "step": 1042 }, { "epoch": 0.22434932243493225, "grad_norm": 0.0, "learning_rate": 1.8085252727211927e-05, "loss": 0.8494, "step": 1043 }, { "epoch": 0.22456442245644226, "grad_norm": 0.0, "learning_rate": 1.8081150729185696e-05, "loss": 0.894, "step": 1044 }, { "epoch": 0.22477952247795224, "grad_norm": 0.0, "learning_rate": 1.8077044808212255e-05, "loss": 0.941, "step": 1045 }, { "epoch": 0.22499462249946225, "grad_norm": 0.0, "learning_rate": 1.807293496628479e-05, "loss": 0.9897, "step": 1046 }, { "epoch": 0.22520972252097224, "grad_norm": 0.0, "learning_rate": 1.8068821205398408e-05, "loss": 0.9227, "step": 1047 }, { "epoch": 0.22542482254248225, "grad_norm": 0.0, "learning_rate": 1.8064703527550102e-05, "loss": 0.9508, "step": 1048 }, { "epoch": 0.22563992256399226, "grad_norm": 0.0, "learning_rate": 1.8060581934738783e-05, "loss": 0.941, "step": 1049 }, { "epoch": 0.22585502258550225, "grad_norm": 0.0, "learning_rate": 1.805645642896525e-05, "loss": 0.9313, "step": 1050 }, { "epoch": 0.22607012260701226, "grad_norm": 0.0, "learning_rate": 1.8052327012232203e-05, "loss": 0.9778, "step": 1051 }, { "epoch": 0.22628522262852227, "grad_norm": 0.0, "learning_rate": 1.8048193686544248e-05, "loss": 0.9311, "step": 1052 }, { "epoch": 0.22650032265003225, "grad_norm": 0.0, "learning_rate": 1.8044056453907882e-05, "loss": 0.9217, "step": 1053 }, { "epoch": 0.22671542267154227, "grad_norm": 0.0, "learning_rate": 1.80399153163315e-05, "loss": 0.939, "step": 1054 }, { "epoch": 0.22693052269305228, "grad_norm": 0.0, "learning_rate": 1.803577027582539e-05, "loss": 0.9667, "step": 1055 }, { "epoch": 0.22714562271456226, "grad_norm": 0.0, "learning_rate": 1.8031621334401743e-05, "loss": 1.0064, "step": 1056 }, { "epoch": 0.22736072273607227, "grad_norm": 0.0, "learning_rate": 1.8027468494074632e-05, "loss": 0.9452, "step": 1057 }, { "epoch": 0.22757582275758229, "grad_norm": 0.0, "learning_rate": 1.8023311756860036e-05, "loss": 0.9721, "step": 1058 }, { "epoch": 0.22779092277909227, "grad_norm": 0.0, "learning_rate": 1.8019151124775815e-05, "loss": 0.9272, "step": 1059 }, { "epoch": 0.22800602280060228, "grad_norm": 0.0, "learning_rate": 1.8014986599841724e-05, "loss": 0.8523, "step": 1060 }, { "epoch": 0.2282211228221123, "grad_norm": 0.0, "learning_rate": 1.8010818184079405e-05, "loss": 0.861, "step": 1061 }, { "epoch": 0.22843622284362228, "grad_norm": 0.0, "learning_rate": 1.8006645879512396e-05, "loss": 0.9106, "step": 1062 }, { "epoch": 0.2286513228651323, "grad_norm": 0.0, "learning_rate": 1.8002469688166115e-05, "loss": 0.8722, "step": 1063 }, { "epoch": 0.2288664228866423, "grad_norm": 0.0, "learning_rate": 1.7998289612067864e-05, "loss": 0.9003, "step": 1064 }, { "epoch": 0.22908152290815229, "grad_norm": 0.0, "learning_rate": 1.7994105653246847e-05, "loss": 0.946, "step": 1065 }, { "epoch": 0.2292966229296623, "grad_norm": 0.0, "learning_rate": 1.7989917813734136e-05, "loss": 0.9115, "step": 1066 }, { "epoch": 0.2295117229511723, "grad_norm": 0.0, "learning_rate": 1.798572609556269e-05, "loss": 0.939, "step": 1067 }, { "epoch": 0.2297268229726823, "grad_norm": 0.0, "learning_rate": 1.7981530500767365e-05, "loss": 0.9573, "step": 1068 }, { "epoch": 0.2299419229941923, "grad_norm": 0.0, "learning_rate": 1.7977331031384878e-05, "loss": 0.9662, "step": 1069 }, { "epoch": 0.2301570230157023, "grad_norm": 0.0, "learning_rate": 1.7973127689453838e-05, "loss": 0.9753, "step": 1070 }, { "epoch": 0.2303721230372123, "grad_norm": 0.0, "learning_rate": 1.7968920477014737e-05, "loss": 0.997, "step": 1071 }, { "epoch": 0.2305872230587223, "grad_norm": 0.0, "learning_rate": 1.796470939610994e-05, "loss": 0.9278, "step": 1072 }, { "epoch": 0.2308023230802323, "grad_norm": 0.0, "learning_rate": 1.7960494448783685e-05, "loss": 0.8825, "step": 1073 }, { "epoch": 0.2310174231017423, "grad_norm": 0.0, "learning_rate": 1.79562756370821e-05, "loss": 0.8712, "step": 1074 }, { "epoch": 0.23123252312325232, "grad_norm": 0.0, "learning_rate": 1.7952052963053187e-05, "loss": 0.933, "step": 1075 }, { "epoch": 0.2314476231447623, "grad_norm": 0.0, "learning_rate": 1.794782642874681e-05, "loss": 0.9392, "step": 1076 }, { "epoch": 0.23166272316627232, "grad_norm": 0.0, "learning_rate": 1.7943596036214715e-05, "loss": 0.8855, "step": 1077 }, { "epoch": 0.23187782318778233, "grad_norm": 0.0, "learning_rate": 1.793936178751053e-05, "loss": 0.9453, "step": 1078 }, { "epoch": 0.2320929232092923, "grad_norm": 0.0, "learning_rate": 1.7935123684689735e-05, "loss": 0.9418, "step": 1079 }, { "epoch": 0.23230802323080232, "grad_norm": 0.0, "learning_rate": 1.7930881729809702e-05, "loss": 0.8568, "step": 1080 }, { "epoch": 0.23252312325231234, "grad_norm": 0.0, "learning_rate": 1.7926635924929656e-05, "loss": 0.9345, "step": 1081 }, { "epoch": 0.23273822327382232, "grad_norm": 0.0, "learning_rate": 1.79223862721107e-05, "loss": 0.9046, "step": 1082 }, { "epoch": 0.23295332329533233, "grad_norm": 0.0, "learning_rate": 1.7918132773415807e-05, "loss": 0.9445, "step": 1083 }, { "epoch": 0.23316842331684234, "grad_norm": 0.0, "learning_rate": 1.7913875430909807e-05, "loss": 0.867, "step": 1084 }, { "epoch": 0.23338352333835233, "grad_norm": 0.0, "learning_rate": 1.7909614246659406e-05, "loss": 0.9717, "step": 1085 }, { "epoch": 0.23359862335986234, "grad_norm": 0.0, "learning_rate": 1.7905349222733172e-05, "loss": 0.951, "step": 1086 }, { "epoch": 0.23381372338137235, "grad_norm": 0.0, "learning_rate": 1.790108036120153e-05, "loss": 0.9438, "step": 1087 }, { "epoch": 0.23402882340288234, "grad_norm": 0.0, "learning_rate": 1.789680766413678e-05, "loss": 0.945, "step": 1088 }, { "epoch": 0.23424392342439235, "grad_norm": 0.0, "learning_rate": 1.7892531133613074e-05, "loss": 0.9401, "step": 1089 }, { "epoch": 0.23445902344590233, "grad_norm": 0.0, "learning_rate": 1.788825077170643e-05, "loss": 0.9067, "step": 1090 }, { "epoch": 0.23467412346741234, "grad_norm": 0.0, "learning_rate": 1.7883966580494722e-05, "loss": 0.9348, "step": 1091 }, { "epoch": 0.23488922348892236, "grad_norm": 0.0, "learning_rate": 1.7879678562057688e-05, "loss": 0.9277, "step": 1092 }, { "epoch": 0.23510432351043234, "grad_norm": 0.0, "learning_rate": 1.7875386718476924e-05, "loss": 0.9813, "step": 1093 }, { "epoch": 0.23531942353194235, "grad_norm": 0.0, "learning_rate": 1.7871091051835873e-05, "loss": 0.8442, "step": 1094 }, { "epoch": 0.23553452355345236, "grad_norm": 0.0, "learning_rate": 1.7866791564219853e-05, "loss": 0.8753, "step": 1095 }, { "epoch": 0.23574962357496235, "grad_norm": 0.0, "learning_rate": 1.786248825771601e-05, "loss": 0.8609, "step": 1096 }, { "epoch": 0.23596472359647236, "grad_norm": 0.0, "learning_rate": 1.785818113441337e-05, "loss": 0.9574, "step": 1097 }, { "epoch": 0.23617982361798237, "grad_norm": 0.0, "learning_rate": 1.78538701964028e-05, "loss": 0.9111, "step": 1098 }, { "epoch": 0.23639492363949235, "grad_norm": 0.0, "learning_rate": 1.784955544577701e-05, "loss": 0.9431, "step": 1099 }, { "epoch": 0.23661002366100237, "grad_norm": 0.0, "learning_rate": 1.7845236884630583e-05, "loss": 0.9375, "step": 1100 }, { "epoch": 0.23682512368251238, "grad_norm": 0.0, "learning_rate": 1.7840914515059934e-05, "loss": 0.9123, "step": 1101 }, { "epoch": 0.23704022370402236, "grad_norm": 0.0, "learning_rate": 1.7836588339163324e-05, "loss": 0.9345, "step": 1102 }, { "epoch": 0.23725532372553237, "grad_norm": 0.0, "learning_rate": 1.7832258359040885e-05, "loss": 0.9212, "step": 1103 }, { "epoch": 0.2374704237470424, "grad_norm": 0.0, "learning_rate": 1.7827924576794568e-05, "loss": 0.811, "step": 1104 }, { "epoch": 0.23768552376855237, "grad_norm": 0.0, "learning_rate": 1.782358699452819e-05, "loss": 0.9544, "step": 1105 }, { "epoch": 0.23790062379006238, "grad_norm": 0.0, "learning_rate": 1.78192456143474e-05, "loss": 0.9565, "step": 1106 }, { "epoch": 0.2381157238115724, "grad_norm": 0.0, "learning_rate": 1.7814900438359694e-05, "loss": 0.9163, "step": 1107 }, { "epoch": 0.23833082383308238, "grad_norm": 0.0, "learning_rate": 1.781055146867442e-05, "loss": 0.9068, "step": 1108 }, { "epoch": 0.2385459238545924, "grad_norm": 0.0, "learning_rate": 1.7806198707402754e-05, "loss": 0.8795, "step": 1109 }, { "epoch": 0.23876102387610237, "grad_norm": 0.0, "learning_rate": 1.7801842156657718e-05, "loss": 0.9681, "step": 1110 }, { "epoch": 0.23897612389761239, "grad_norm": 0.0, "learning_rate": 1.7797481818554177e-05, "loss": 0.9561, "step": 1111 }, { "epoch": 0.2391912239191224, "grad_norm": 0.0, "learning_rate": 1.7793117695208828e-05, "loss": 0.9202, "step": 1112 }, { "epoch": 0.23940632394063238, "grad_norm": 0.0, "learning_rate": 1.7788749788740213e-05, "loss": 0.897, "step": 1113 }, { "epoch": 0.2396214239621424, "grad_norm": 0.0, "learning_rate": 1.7784378101268703e-05, "loss": 0.9249, "step": 1114 }, { "epoch": 0.2398365239836524, "grad_norm": 0.0, "learning_rate": 1.7780002634916512e-05, "loss": 0.9146, "step": 1115 }, { "epoch": 0.2400516240051624, "grad_norm": 0.0, "learning_rate": 1.7775623391807683e-05, "loss": 0.9302, "step": 1116 }, { "epoch": 0.2402667240266724, "grad_norm": 0.0, "learning_rate": 1.777124037406809e-05, "loss": 0.9865, "step": 1117 }, { "epoch": 0.2404818240481824, "grad_norm": 0.0, "learning_rate": 1.7766853583825447e-05, "loss": 0.9255, "step": 1118 }, { "epoch": 0.2406969240696924, "grad_norm": 0.0, "learning_rate": 1.7762463023209296e-05, "loss": 0.9343, "step": 1119 }, { "epoch": 0.2409120240912024, "grad_norm": 0.0, "learning_rate": 1.7758068694351007e-05, "loss": 0.957, "step": 1120 }, { "epoch": 0.24112712411271242, "grad_norm": 0.0, "learning_rate": 1.7753670599383784e-05, "loss": 0.9204, "step": 1121 }, { "epoch": 0.2413422241342224, "grad_norm": 0.0, "learning_rate": 1.7749268740442652e-05, "loss": 0.947, "step": 1122 }, { "epoch": 0.24155732415573242, "grad_norm": 0.0, "learning_rate": 1.774486311966447e-05, "loss": 0.8804, "step": 1123 }, { "epoch": 0.24177242417724243, "grad_norm": 0.0, "learning_rate": 1.7740453739187922e-05, "loss": 0.9007, "step": 1124 }, { "epoch": 0.2419875241987524, "grad_norm": 0.0, "learning_rate": 1.7736040601153517e-05, "loss": 0.949, "step": 1125 }, { "epoch": 0.24220262422026242, "grad_norm": 0.0, "learning_rate": 1.7731623707703584e-05, "loss": 0.9155, "step": 1126 }, { "epoch": 0.24241772424177244, "grad_norm": 0.0, "learning_rate": 1.7727203060982273e-05, "loss": 0.9138, "step": 1127 }, { "epoch": 0.24263282426328242, "grad_norm": 0.0, "learning_rate": 1.7722778663135568e-05, "loss": 0.8795, "step": 1128 }, { "epoch": 0.24284792428479243, "grad_norm": 0.0, "learning_rate": 1.771835051631127e-05, "loss": 0.9158, "step": 1129 }, { "epoch": 0.24306302430630244, "grad_norm": 0.0, "learning_rate": 1.771391862265899e-05, "loss": 0.8936, "step": 1130 }, { "epoch": 0.24327812432781243, "grad_norm": 0.0, "learning_rate": 1.770948298433017e-05, "loss": 1.0101, "step": 1131 }, { "epoch": 0.24349322434932244, "grad_norm": 0.0, "learning_rate": 1.7705043603478058e-05, "loss": 0.943, "step": 1132 }, { "epoch": 0.24370832437083242, "grad_norm": 0.0, "learning_rate": 1.7700600482257732e-05, "loss": 0.8986, "step": 1133 }, { "epoch": 0.24392342439234244, "grad_norm": 0.0, "learning_rate": 1.7696153622826077e-05, "loss": 0.9182, "step": 1134 }, { "epoch": 0.24413852441385245, "grad_norm": 0.0, "learning_rate": 1.769170302734179e-05, "loss": 0.8997, "step": 1135 }, { "epoch": 0.24435362443536243, "grad_norm": 0.0, "learning_rate": 1.7687248697965395e-05, "loss": 0.9676, "step": 1136 }, { "epoch": 0.24456872445687244, "grad_norm": 0.0, "learning_rate": 1.7682790636859217e-05, "loss": 0.9192, "step": 1137 }, { "epoch": 0.24478382447838246, "grad_norm": 0.0, "learning_rate": 1.767832884618739e-05, "loss": 0.9487, "step": 1138 }, { "epoch": 0.24499892449989244, "grad_norm": 0.0, "learning_rate": 1.767386332811587e-05, "loss": 0.8918, "step": 1139 }, { "epoch": 0.24521402452140245, "grad_norm": 0.0, "learning_rate": 1.7669394084812417e-05, "loss": 0.9278, "step": 1140 }, { "epoch": 0.24542912454291246, "grad_norm": 0.0, "learning_rate": 1.7664921118446595e-05, "loss": 0.9632, "step": 1141 }, { "epoch": 0.24564422456442245, "grad_norm": 0.0, "learning_rate": 1.766044443118978e-05, "loss": 0.8185, "step": 1142 }, { "epoch": 0.24585932458593246, "grad_norm": 0.0, "learning_rate": 1.7655964025215158e-05, "loss": 0.8948, "step": 1143 }, { "epoch": 0.24607442460744247, "grad_norm": 0.0, "learning_rate": 1.765147990269771e-05, "loss": 0.8928, "step": 1144 }, { "epoch": 0.24628952462895246, "grad_norm": 0.0, "learning_rate": 1.7646992065814233e-05, "loss": 0.8884, "step": 1145 }, { "epoch": 0.24650462465046247, "grad_norm": 0.0, "learning_rate": 1.7642500516743318e-05, "loss": 0.9392, "step": 1146 }, { "epoch": 0.24671972467197248, "grad_norm": 0.0, "learning_rate": 1.7638005257665356e-05, "loss": 0.9711, "step": 1147 }, { "epoch": 0.24693482469348246, "grad_norm": 0.0, "learning_rate": 1.763350629076255e-05, "loss": 0.9221, "step": 1148 }, { "epoch": 0.24714992471499247, "grad_norm": 0.0, "learning_rate": 1.76290036182189e-05, "loss": 0.9367, "step": 1149 }, { "epoch": 0.2473650247365025, "grad_norm": 0.0, "learning_rate": 1.7624497242220197e-05, "loss": 0.9145, "step": 1150 }, { "epoch": 0.24758012475801247, "grad_norm": 0.0, "learning_rate": 1.7619987164954034e-05, "loss": 0.9248, "step": 1151 }, { "epoch": 0.24779522477952248, "grad_norm": 0.0, "learning_rate": 1.7615473388609806e-05, "loss": 0.9956, "step": 1152 }, { "epoch": 0.24801032480103247, "grad_norm": 0.0, "learning_rate": 1.7610955915378692e-05, "loss": 0.9594, "step": 1153 }, { "epoch": 0.24822542482254248, "grad_norm": 0.0, "learning_rate": 1.760643474745368e-05, "loss": 0.9701, "step": 1154 }, { "epoch": 0.2484405248440525, "grad_norm": 0.0, "learning_rate": 1.7601909887029544e-05, "loss": 0.9441, "step": 1155 }, { "epoch": 0.24865562486556247, "grad_norm": 0.0, "learning_rate": 1.7597381336302848e-05, "loss": 0.9425, "step": 1156 }, { "epoch": 0.2488707248870725, "grad_norm": 0.0, "learning_rate": 1.759284909747195e-05, "loss": 0.8754, "step": 1157 }, { "epoch": 0.2490858249085825, "grad_norm": 0.0, "learning_rate": 1.7588313172737002e-05, "loss": 0.936, "step": 1158 }, { "epoch": 0.24930092493009248, "grad_norm": 0.0, "learning_rate": 1.7583773564299938e-05, "loss": 0.9462, "step": 1159 }, { "epoch": 0.2495160249516025, "grad_norm": 0.0, "learning_rate": 1.7579230274364486e-05, "loss": 0.8976, "step": 1160 }, { "epoch": 0.2497311249731125, "grad_norm": 0.0, "learning_rate": 1.7574683305136162e-05, "loss": 0.9256, "step": 1161 }, { "epoch": 0.2499462249946225, "grad_norm": 0.0, "learning_rate": 1.7570132658822264e-05, "loss": 0.9317, "step": 1162 }, { "epoch": 0.2501613250161325, "grad_norm": 0.0, "learning_rate": 1.7565578337631876e-05, "loss": 0.8749, "step": 1163 }, { "epoch": 0.2503764250376425, "grad_norm": 0.0, "learning_rate": 1.7561020343775862e-05, "loss": 0.8859, "step": 1164 }, { "epoch": 0.2505915250591525, "grad_norm": 0.0, "learning_rate": 1.755645867946688e-05, "loss": 0.9065, "step": 1165 }, { "epoch": 0.2508066250806625, "grad_norm": 0.0, "learning_rate": 1.7551893346919363e-05, "loss": 0.9135, "step": 1166 }, { "epoch": 0.2510217251021725, "grad_norm": 0.0, "learning_rate": 1.7547324348349523e-05, "loss": 0.957, "step": 1167 }, { "epoch": 0.2512368251236825, "grad_norm": 0.0, "learning_rate": 1.754275168597535e-05, "loss": 0.9988, "step": 1168 }, { "epoch": 0.2514519251451925, "grad_norm": 0.0, "learning_rate": 1.7538175362016622e-05, "loss": 0.9433, "step": 1169 }, { "epoch": 0.25166702516670253, "grad_norm": 0.0, "learning_rate": 1.753359537869489e-05, "loss": 1.026, "step": 1170 }, { "epoch": 0.2518821251882125, "grad_norm": 0.0, "learning_rate": 1.752901173823347e-05, "loss": 0.9297, "step": 1171 }, { "epoch": 0.2520972252097225, "grad_norm": 0.0, "learning_rate": 1.752442444285747e-05, "loss": 0.9677, "step": 1172 }, { "epoch": 0.25231232523123254, "grad_norm": 0.0, "learning_rate": 1.751983349479377e-05, "loss": 0.8354, "step": 1173 }, { "epoch": 0.2525274252527425, "grad_norm": 0.0, "learning_rate": 1.751523889627101e-05, "loss": 0.9229, "step": 1174 }, { "epoch": 0.2527425252742525, "grad_norm": 0.0, "learning_rate": 1.7510640649519617e-05, "loss": 0.9658, "step": 1175 }, { "epoch": 0.25295762529576254, "grad_norm": 0.0, "learning_rate": 1.750603875677178e-05, "loss": 0.9644, "step": 1176 }, { "epoch": 0.25317272531727253, "grad_norm": 0.0, "learning_rate": 1.7501433220261463e-05, "loss": 0.954, "step": 1177 }, { "epoch": 0.2533878253387825, "grad_norm": 0.0, "learning_rate": 1.7496824042224393e-05, "loss": 0.8957, "step": 1178 }, { "epoch": 0.25360292536029255, "grad_norm": 0.0, "learning_rate": 1.749221122489807e-05, "loss": 0.9355, "step": 1179 }, { "epoch": 0.25381802538180254, "grad_norm": 0.0, "learning_rate": 1.7487594770521764e-05, "loss": 0.8626, "step": 1180 }, { "epoch": 0.2540331254033125, "grad_norm": 0.0, "learning_rate": 1.7482974681336502e-05, "loss": 0.9312, "step": 1181 }, { "epoch": 0.25424822542482256, "grad_norm": 0.0, "learning_rate": 1.7478350959585074e-05, "loss": 0.9908, "step": 1182 }, { "epoch": 0.25446332544633254, "grad_norm": 0.0, "learning_rate": 1.7473723607512048e-05, "loss": 0.9033, "step": 1183 }, { "epoch": 0.25467842546784253, "grad_norm": 0.0, "learning_rate": 1.7469092627363738e-05, "loss": 0.8185, "step": 1184 }, { "epoch": 0.25489352548935257, "grad_norm": 0.0, "learning_rate": 1.746445802138823e-05, "loss": 0.9368, "step": 1185 }, { "epoch": 0.25510862551086255, "grad_norm": 0.0, "learning_rate": 1.745981979183536e-05, "loss": 0.9934, "step": 1186 }, { "epoch": 0.25532372553237254, "grad_norm": 0.0, "learning_rate": 1.7455177940956743e-05, "loss": 0.9105, "step": 1187 }, { "epoch": 0.2555388255538826, "grad_norm": 0.0, "learning_rate": 1.745053247100572e-05, "loss": 0.9465, "step": 1188 }, { "epoch": 0.25575392557539256, "grad_norm": 0.0, "learning_rate": 1.7445883384237423e-05, "loss": 0.9586, "step": 1189 }, { "epoch": 0.25596902559690254, "grad_norm": 0.0, "learning_rate": 1.7441230682908713e-05, "loss": 0.9355, "step": 1190 }, { "epoch": 0.2561841256184126, "grad_norm": 0.0, "learning_rate": 1.7436574369278224e-05, "loss": 0.8793, "step": 1191 }, { "epoch": 0.25639922563992257, "grad_norm": 0.0, "learning_rate": 1.7431914445606334e-05, "loss": 0.9604, "step": 1192 }, { "epoch": 0.25661432566143255, "grad_norm": 0.0, "learning_rate": 1.742725091415517e-05, "loss": 0.8647, "step": 1193 }, { "epoch": 0.2568294256829426, "grad_norm": 0.0, "learning_rate": 1.7422583777188625e-05, "loss": 0.9573, "step": 1194 }, { "epoch": 0.2570445257044526, "grad_norm": 0.0, "learning_rate": 1.7417913036972328e-05, "loss": 0.9553, "step": 1195 }, { "epoch": 0.25725962572596256, "grad_norm": 0.0, "learning_rate": 1.741323869577366e-05, "loss": 0.911, "step": 1196 }, { "epoch": 0.2574747257474726, "grad_norm": 0.0, "learning_rate": 1.7408560755861762e-05, "loss": 0.9172, "step": 1197 }, { "epoch": 0.2576898257689826, "grad_norm": 0.0, "learning_rate": 1.7403879219507504e-05, "loss": 0.8859, "step": 1198 }, { "epoch": 0.25790492579049257, "grad_norm": 0.0, "learning_rate": 1.7399194088983513e-05, "loss": 0.9185, "step": 1199 }, { "epoch": 0.2581200258120026, "grad_norm": 0.0, "learning_rate": 1.7394505366564155e-05, "loss": 0.8845, "step": 1200 }, { "epoch": 0.2583351258335126, "grad_norm": 0.0, "learning_rate": 1.738981305452555e-05, "loss": 0.9572, "step": 1201 }, { "epoch": 0.2585502258550226, "grad_norm": 0.0, "learning_rate": 1.7385117155145547e-05, "loss": 0.8483, "step": 1202 }, { "epoch": 0.2587653258765326, "grad_norm": 0.0, "learning_rate": 1.7380417670703744e-05, "loss": 0.928, "step": 1203 }, { "epoch": 0.2589804258980426, "grad_norm": 0.0, "learning_rate": 1.7375714603481483e-05, "loss": 0.9026, "step": 1204 }, { "epoch": 0.2591955259195526, "grad_norm": 0.0, "learning_rate": 1.7371007955761833e-05, "loss": 0.9375, "step": 1205 }, { "epoch": 0.25941062594106257, "grad_norm": 0.0, "learning_rate": 1.736629772982961e-05, "loss": 0.9088, "step": 1206 }, { "epoch": 0.2596257259625726, "grad_norm": 0.0, "learning_rate": 1.7361583927971367e-05, "loss": 0.9633, "step": 1207 }, { "epoch": 0.2598408259840826, "grad_norm": 0.0, "learning_rate": 1.7356866552475397e-05, "loss": 0.9222, "step": 1208 }, { "epoch": 0.2600559260055926, "grad_norm": 0.0, "learning_rate": 1.7352145605631714e-05, "loss": 0.945, "step": 1209 }, { "epoch": 0.2602710260271026, "grad_norm": 0.0, "learning_rate": 1.734742108973208e-05, "loss": 0.899, "step": 1210 }, { "epoch": 0.2604861260486126, "grad_norm": 0.0, "learning_rate": 1.7342693007069974e-05, "loss": 0.928, "step": 1211 }, { "epoch": 0.2607012260701226, "grad_norm": 0.0, "learning_rate": 1.7337961359940633e-05, "loss": 0.9018, "step": 1212 }, { "epoch": 0.2609163260916326, "grad_norm": 0.0, "learning_rate": 1.7333226150640988e-05, "loss": 0.8354, "step": 1213 }, { "epoch": 0.2611314261131426, "grad_norm": 0.0, "learning_rate": 1.732848738146973e-05, "loss": 0.908, "step": 1214 }, { "epoch": 0.2613465261346526, "grad_norm": 0.0, "learning_rate": 1.7323745054727264e-05, "loss": 0.9702, "step": 1215 }, { "epoch": 0.26156162615616263, "grad_norm": 0.0, "learning_rate": 1.731899917271573e-05, "loss": 0.922, "step": 1216 }, { "epoch": 0.2617767261776726, "grad_norm": 0.0, "learning_rate": 1.7314249737738977e-05, "loss": 0.9257, "step": 1217 }, { "epoch": 0.2619918261991826, "grad_norm": 0.0, "learning_rate": 1.73094967521026e-05, "loss": 0.8946, "step": 1218 }, { "epoch": 0.26220692622069264, "grad_norm": 0.0, "learning_rate": 1.73047402181139e-05, "loss": 1.0352, "step": 1219 }, { "epoch": 0.2624220262422026, "grad_norm": 0.0, "learning_rate": 1.729998013808192e-05, "loss": 0.906, "step": 1220 }, { "epoch": 0.2626371262637126, "grad_norm": 0.0, "learning_rate": 1.7295216514317407e-05, "loss": 0.8958, "step": 1221 }, { "epoch": 0.26285222628522265, "grad_norm": 0.0, "learning_rate": 1.729044934913283e-05, "loss": 0.9234, "step": 1222 }, { "epoch": 0.26306732630673263, "grad_norm": 0.0, "learning_rate": 1.7285678644842384e-05, "loss": 0.9542, "step": 1223 }, { "epoch": 0.2632824263282426, "grad_norm": 0.0, "learning_rate": 1.728090440376198e-05, "loss": 0.9554, "step": 1224 }, { "epoch": 0.26349752634975265, "grad_norm": 0.0, "learning_rate": 1.727612662820925e-05, "loss": 0.9607, "step": 1225 }, { "epoch": 0.26371262637126264, "grad_norm": 0.0, "learning_rate": 1.7271345320503533e-05, "loss": 0.9878, "step": 1226 }, { "epoch": 0.2639277263927726, "grad_norm": 0.0, "learning_rate": 1.7266560482965883e-05, "loss": 0.9319, "step": 1227 }, { "epoch": 0.26414282641428266, "grad_norm": 0.0, "learning_rate": 1.7261772117919077e-05, "loss": 0.963, "step": 1228 }, { "epoch": 0.26435792643579265, "grad_norm": 0.0, "learning_rate": 1.7256980227687596e-05, "loss": 0.9772, "step": 1229 }, { "epoch": 0.26457302645730263, "grad_norm": 0.0, "learning_rate": 1.7252184814597633e-05, "loss": 0.9487, "step": 1230 }, { "epoch": 0.26478812647881267, "grad_norm": 0.0, "learning_rate": 1.7247385880977097e-05, "loss": 0.8952, "step": 1231 }, { "epoch": 0.26500322650032265, "grad_norm": 0.0, "learning_rate": 1.7242583429155602e-05, "loss": 0.9095, "step": 1232 }, { "epoch": 0.26521832652183264, "grad_norm": 0.0, "learning_rate": 1.7237777461464468e-05, "loss": 0.8984, "step": 1233 }, { "epoch": 0.2654334265433427, "grad_norm": 0.0, "learning_rate": 1.7232967980236724e-05, "loss": 0.9582, "step": 1234 }, { "epoch": 0.26564852656485266, "grad_norm": 0.0, "learning_rate": 1.722815498780711e-05, "loss": 0.8788, "step": 1235 }, { "epoch": 0.26586362658636264, "grad_norm": 0.0, "learning_rate": 1.7223338486512054e-05, "loss": 0.862, "step": 1236 }, { "epoch": 0.2660787266078727, "grad_norm": 0.0, "learning_rate": 1.721851847868971e-05, "loss": 0.9502, "step": 1237 }, { "epoch": 0.26629382662938267, "grad_norm": 0.0, "learning_rate": 1.7213694966679913e-05, "loss": 0.934, "step": 1238 }, { "epoch": 0.26650892665089265, "grad_norm": 0.0, "learning_rate": 1.7208867952824215e-05, "loss": 0.8429, "step": 1239 }, { "epoch": 0.2667240266724027, "grad_norm": 0.0, "learning_rate": 1.7204037439465858e-05, "loss": 0.9647, "step": 1240 }, { "epoch": 0.2669391266939127, "grad_norm": 0.0, "learning_rate": 1.7199203428949788e-05, "loss": 0.9493, "step": 1241 }, { "epoch": 0.26715422671542266, "grad_norm": 0.0, "learning_rate": 1.7194365923622646e-05, "loss": 0.9742, "step": 1242 }, { "epoch": 0.2673693267369327, "grad_norm": 0.0, "learning_rate": 1.7189524925832772e-05, "loss": 1.0073, "step": 1243 }, { "epoch": 0.2675844267584427, "grad_norm": 0.0, "learning_rate": 1.71846804379302e-05, "loss": 0.908, "step": 1244 }, { "epoch": 0.26779952677995267, "grad_norm": 0.0, "learning_rate": 1.7179832462266654e-05, "loss": 0.8983, "step": 1245 }, { "epoch": 0.2680146268014627, "grad_norm": 0.0, "learning_rate": 1.7174981001195562e-05, "loss": 0.8735, "step": 1246 }, { "epoch": 0.2682297268229727, "grad_norm": 0.0, "learning_rate": 1.717012605707203e-05, "loss": 0.9209, "step": 1247 }, { "epoch": 0.2684448268444827, "grad_norm": 0.0, "learning_rate": 1.7165267632252872e-05, "loss": 0.9847, "step": 1248 }, { "epoch": 0.26865992686599266, "grad_norm": 0.0, "learning_rate": 1.7160405729096572e-05, "loss": 0.9389, "step": 1249 }, { "epoch": 0.2688750268875027, "grad_norm": 0.0, "learning_rate": 1.715554034996332e-05, "loss": 0.9288, "step": 1250 }, { "epoch": 0.2690901269090127, "grad_norm": 0.0, "learning_rate": 1.7150671497214976e-05, "loss": 0.9833, "step": 1251 }, { "epoch": 0.26930522693052267, "grad_norm": 0.0, "learning_rate": 1.714579917321511e-05, "loss": 0.9838, "step": 1252 }, { "epoch": 0.2695203269520327, "grad_norm": 0.0, "learning_rate": 1.7140923380328947e-05, "loss": 0.9452, "step": 1253 }, { "epoch": 0.2697354269735427, "grad_norm": 0.0, "learning_rate": 1.7136044120923427e-05, "loss": 0.9357, "step": 1254 }, { "epoch": 0.2699505269950527, "grad_norm": 0.0, "learning_rate": 1.7131161397367146e-05, "loss": 0.8808, "step": 1255 }, { "epoch": 0.2701656270165627, "grad_norm": 0.0, "learning_rate": 1.7126275212030403e-05, "loss": 0.947, "step": 1256 }, { "epoch": 0.2703807270380727, "grad_norm": 0.0, "learning_rate": 1.7121385567285162e-05, "loss": 0.9193, "step": 1257 }, { "epoch": 0.2705958270595827, "grad_norm": 0.0, "learning_rate": 1.7116492465505072e-05, "loss": 0.9182, "step": 1258 }, { "epoch": 0.2708109270810927, "grad_norm": 0.0, "learning_rate": 1.7111595909065466e-05, "loss": 1.0001, "step": 1259 }, { "epoch": 0.2710260271026027, "grad_norm": 0.0, "learning_rate": 1.7106695900343344e-05, "loss": 0.8793, "step": 1260 }, { "epoch": 0.2712411271241127, "grad_norm": 0.0, "learning_rate": 1.7101792441717386e-05, "loss": 0.95, "step": 1261 }, { "epoch": 0.27145622714562273, "grad_norm": 0.0, "learning_rate": 1.709688553556795e-05, "loss": 0.8618, "step": 1262 }, { "epoch": 0.2716713271671327, "grad_norm": 0.0, "learning_rate": 1.7091975184277066e-05, "loss": 0.8944, "step": 1263 }, { "epoch": 0.2718864271886427, "grad_norm": 0.0, "learning_rate": 1.708706139022843e-05, "loss": 0.9969, "step": 1264 }, { "epoch": 0.27210152721015274, "grad_norm": 0.0, "learning_rate": 1.708214415580742e-05, "loss": 0.9293, "step": 1265 }, { "epoch": 0.2723166272316627, "grad_norm": 0.0, "learning_rate": 1.7077223483401072e-05, "loss": 0.9909, "step": 1266 }, { "epoch": 0.2725317272531727, "grad_norm": 0.0, "learning_rate": 1.7072299375398107e-05, "loss": 0.9401, "step": 1267 }, { "epoch": 0.27274682727468275, "grad_norm": 0.0, "learning_rate": 1.7067371834188895e-05, "loss": 0.8919, "step": 1268 }, { "epoch": 0.27296192729619273, "grad_norm": 0.0, "learning_rate": 1.7062440862165486e-05, "loss": 0.9286, "step": 1269 }, { "epoch": 0.2731770273177027, "grad_norm": 0.0, "learning_rate": 1.705750646172159e-05, "loss": 0.9373, "step": 1270 }, { "epoch": 0.27339212733921275, "grad_norm": 0.0, "learning_rate": 1.705256863525259e-05, "loss": 0.8596, "step": 1271 }, { "epoch": 0.27360722736072274, "grad_norm": 0.0, "learning_rate": 1.7047627385155513e-05, "loss": 1.0048, "step": 1272 }, { "epoch": 0.2738223273822327, "grad_norm": 0.0, "learning_rate": 1.7042682713829067e-05, "loss": 0.85, "step": 1273 }, { "epoch": 0.27403742740374276, "grad_norm": 0.0, "learning_rate": 1.7037734623673616e-05, "loss": 0.9022, "step": 1274 }, { "epoch": 0.27425252742525275, "grad_norm": 0.0, "learning_rate": 1.7032783117091172e-05, "loss": 0.8225, "step": 1275 }, { "epoch": 0.27446762744676273, "grad_norm": 0.0, "learning_rate": 1.7027828196485426e-05, "loss": 0.9309, "step": 1276 }, { "epoch": 0.27468272746827277, "grad_norm": 0.0, "learning_rate": 1.702286986426171e-05, "loss": 0.9522, "step": 1277 }, { "epoch": 0.27489782748978275, "grad_norm": 0.0, "learning_rate": 1.7017908122827015e-05, "loss": 0.9399, "step": 1278 }, { "epoch": 0.27511292751129274, "grad_norm": 0.0, "learning_rate": 1.7012942974589993e-05, "loss": 0.8938, "step": 1279 }, { "epoch": 0.2753280275328028, "grad_norm": 0.0, "learning_rate": 1.700797442196094e-05, "loss": 0.9334, "step": 1280 }, { "epoch": 0.27554312755431276, "grad_norm": 0.0, "learning_rate": 1.700300246735182e-05, "loss": 0.9104, "step": 1281 }, { "epoch": 0.27575822757582275, "grad_norm": 0.0, "learning_rate": 1.6998027113176232e-05, "loss": 0.9384, "step": 1282 }, { "epoch": 0.2759733275973328, "grad_norm": 0.0, "learning_rate": 1.699304836184944e-05, "loss": 0.9585, "step": 1283 }, { "epoch": 0.27618842761884277, "grad_norm": 0.0, "learning_rate": 1.6988066215788336e-05, "loss": 0.8913, "step": 1284 }, { "epoch": 0.27640352764035275, "grad_norm": 0.0, "learning_rate": 1.6983080677411488e-05, "loss": 0.9649, "step": 1285 }, { "epoch": 0.2766186276618628, "grad_norm": 0.0, "learning_rate": 1.6978091749139088e-05, "loss": 0.9119, "step": 1286 }, { "epoch": 0.2768337276833728, "grad_norm": 0.0, "learning_rate": 1.6973099433392985e-05, "loss": 0.9106, "step": 1287 }, { "epoch": 0.27704882770488276, "grad_norm": 0.0, "learning_rate": 1.696810373259667e-05, "loss": 0.9041, "step": 1288 }, { "epoch": 0.2772639277263928, "grad_norm": 0.0, "learning_rate": 1.696310464917527e-05, "loss": 0.9965, "step": 1289 }, { "epoch": 0.2774790277479028, "grad_norm": 0.0, "learning_rate": 1.6958102185555572e-05, "loss": 0.9375, "step": 1290 }, { "epoch": 0.27769412776941277, "grad_norm": 0.0, "learning_rate": 1.6953096344165988e-05, "loss": 0.9413, "step": 1291 }, { "epoch": 0.27790922779092275, "grad_norm": 0.0, "learning_rate": 1.6948087127436573e-05, "loss": 0.9131, "step": 1292 }, { "epoch": 0.2781243278124328, "grad_norm": 0.0, "learning_rate": 1.6943074537799015e-05, "loss": 0.9294, "step": 1293 }, { "epoch": 0.2783394278339428, "grad_norm": 0.0, "learning_rate": 1.693805857768666e-05, "loss": 0.8807, "step": 1294 }, { "epoch": 0.27855452785545276, "grad_norm": 0.0, "learning_rate": 1.693303924953447e-05, "loss": 0.9086, "step": 1295 }, { "epoch": 0.2787696278769628, "grad_norm": 0.0, "learning_rate": 1.6928016555779047e-05, "loss": 0.9306, "step": 1296 }, { "epoch": 0.2789847278984728, "grad_norm": 0.0, "learning_rate": 1.6922990498858624e-05, "loss": 0.9291, "step": 1297 }, { "epoch": 0.27919982791998277, "grad_norm": 0.0, "learning_rate": 1.691796108121308e-05, "loss": 0.9442, "step": 1298 }, { "epoch": 0.2794149279414928, "grad_norm": 0.0, "learning_rate": 1.6912928305283914e-05, "loss": 0.8944, "step": 1299 }, { "epoch": 0.2796300279630028, "grad_norm": 0.0, "learning_rate": 1.6907892173514255e-05, "loss": 0.948, "step": 1300 }, { "epoch": 0.2798451279845128, "grad_norm": 0.0, "learning_rate": 1.6902852688348866e-05, "loss": 0.9667, "step": 1301 }, { "epoch": 0.2800602280060228, "grad_norm": 0.0, "learning_rate": 1.689780985223413e-05, "loss": 0.9982, "step": 1302 }, { "epoch": 0.2802753280275328, "grad_norm": 0.0, "learning_rate": 1.6892763667618068e-05, "loss": 0.8957, "step": 1303 }, { "epoch": 0.2804904280490428, "grad_norm": 0.0, "learning_rate": 1.6887714136950322e-05, "loss": 0.9446, "step": 1304 }, { "epoch": 0.2807055280705528, "grad_norm": 0.0, "learning_rate": 1.6882661262682153e-05, "loss": 0.9011, "step": 1305 }, { "epoch": 0.2809206280920628, "grad_norm": 0.0, "learning_rate": 1.6877605047266447e-05, "loss": 0.9229, "step": 1306 }, { "epoch": 0.2811357281135728, "grad_norm": 0.0, "learning_rate": 1.6872545493157718e-05, "loss": 0.9351, "step": 1307 }, { "epoch": 0.28135082813508283, "grad_norm": 0.0, "learning_rate": 1.68674826028121e-05, "loss": 0.9743, "step": 1308 }, { "epoch": 0.2815659281565928, "grad_norm": 0.0, "learning_rate": 1.686241637868734e-05, "loss": 0.8686, "step": 1309 }, { "epoch": 0.2817810281781028, "grad_norm": 0.0, "learning_rate": 1.68573468232428e-05, "loss": 0.9688, "step": 1310 }, { "epoch": 0.28199612819961284, "grad_norm": 0.0, "learning_rate": 1.6852273938939475e-05, "loss": 1.0057, "step": 1311 }, { "epoch": 0.2822112282211228, "grad_norm": 0.0, "learning_rate": 1.6847197728239967e-05, "loss": 0.9126, "step": 1312 }, { "epoch": 0.2824263282426328, "grad_norm": 0.0, "learning_rate": 1.684211819360849e-05, "loss": 0.981, "step": 1313 }, { "epoch": 0.28264142826414285, "grad_norm": 0.0, "learning_rate": 1.6837035337510873e-05, "loss": 0.8926, "step": 1314 }, { "epoch": 0.28285652828565283, "grad_norm": 0.0, "learning_rate": 1.6831949162414557e-05, "loss": 0.8593, "step": 1315 }, { "epoch": 0.2830716283071628, "grad_norm": 0.0, "learning_rate": 1.6826859670788604e-05, "loss": 0.857, "step": 1316 }, { "epoch": 0.28328672832867285, "grad_norm": 0.0, "learning_rate": 1.682176686510367e-05, "loss": 0.9312, "step": 1317 }, { "epoch": 0.28350182835018284, "grad_norm": 0.0, "learning_rate": 1.6816670747832032e-05, "loss": 0.9467, "step": 1318 }, { "epoch": 0.2837169283716928, "grad_norm": 0.0, "learning_rate": 1.6811571321447568e-05, "loss": 0.9489, "step": 1319 }, { "epoch": 0.28393202839320286, "grad_norm": 0.0, "learning_rate": 1.680646858842576e-05, "loss": 0.9092, "step": 1320 }, { "epoch": 0.28414712841471285, "grad_norm": 0.0, "learning_rate": 1.6801362551243713e-05, "loss": 0.8818, "step": 1321 }, { "epoch": 0.28436222843622283, "grad_norm": 0.0, "learning_rate": 1.679625321238011e-05, "loss": 0.9222, "step": 1322 }, { "epoch": 0.28457732845773287, "grad_norm": 0.0, "learning_rate": 1.6791140574315256e-05, "loss": 0.912, "step": 1323 }, { "epoch": 0.28479242847924285, "grad_norm": 0.0, "learning_rate": 1.6786024639531048e-05, "loss": 0.9131, "step": 1324 }, { "epoch": 0.28500752850075284, "grad_norm": 0.0, "learning_rate": 1.6780905410510992e-05, "loss": 0.8294, "step": 1325 }, { "epoch": 0.2852226285222629, "grad_norm": 0.0, "learning_rate": 1.677578288974018e-05, "loss": 0.9187, "step": 1326 }, { "epoch": 0.28543772854377286, "grad_norm": 0.0, "learning_rate": 1.6770657079705313e-05, "loss": 0.8773, "step": 1327 }, { "epoch": 0.28565282856528285, "grad_norm": 0.0, "learning_rate": 1.676552798289469e-05, "loss": 0.9423, "step": 1328 }, { "epoch": 0.2858679285867929, "grad_norm": 0.0, "learning_rate": 1.6760395601798192e-05, "loss": 0.9262, "step": 1329 }, { "epoch": 0.28608302860830287, "grad_norm": 0.0, "learning_rate": 1.675525993890731e-05, "loss": 0.8916, "step": 1330 }, { "epoch": 0.28629812862981285, "grad_norm": 0.0, "learning_rate": 1.6750120996715123e-05, "loss": 0.9801, "step": 1331 }, { "epoch": 0.28651322865132284, "grad_norm": 0.0, "learning_rate": 1.6744978777716294e-05, "loss": 0.8652, "step": 1332 }, { "epoch": 0.2867283286728329, "grad_norm": 0.0, "learning_rate": 1.6739833284407083e-05, "loss": 0.9483, "step": 1333 }, { "epoch": 0.28694342869434286, "grad_norm": 0.0, "learning_rate": 1.6734684519285346e-05, "loss": 0.8792, "step": 1334 }, { "epoch": 0.28715852871585285, "grad_norm": 0.0, "learning_rate": 1.6729532484850516e-05, "loss": 0.9063, "step": 1335 }, { "epoch": 0.2873736287373629, "grad_norm": 0.0, "learning_rate": 1.672437718360362e-05, "loss": 0.8741, "step": 1336 }, { "epoch": 0.28758872875887287, "grad_norm": 0.0, "learning_rate": 1.6719218618047263e-05, "loss": 0.9216, "step": 1337 }, { "epoch": 0.28780382878038285, "grad_norm": 0.0, "learning_rate": 1.6714056790685652e-05, "loss": 0.9989, "step": 1338 }, { "epoch": 0.2880189288018929, "grad_norm": 0.0, "learning_rate": 1.6708891704024557e-05, "loss": 0.9175, "step": 1339 }, { "epoch": 0.2882340288234029, "grad_norm": 0.0, "learning_rate": 1.6703723360571338e-05, "loss": 0.9802, "step": 1340 }, { "epoch": 0.28844912884491286, "grad_norm": 0.0, "learning_rate": 1.6698551762834944e-05, "loss": 0.919, "step": 1341 }, { "epoch": 0.2886642288664229, "grad_norm": 0.0, "learning_rate": 1.6693376913325892e-05, "loss": 0.8843, "step": 1342 }, { "epoch": 0.2888793288879329, "grad_norm": 0.0, "learning_rate": 1.6688198814556283e-05, "loss": 0.9027, "step": 1343 }, { "epoch": 0.28909442890944287, "grad_norm": 0.0, "learning_rate": 1.6683017469039794e-05, "loss": 0.9616, "step": 1344 }, { "epoch": 0.2893095289309529, "grad_norm": 0.0, "learning_rate": 1.667783287929168e-05, "loss": 0.9386, "step": 1345 }, { "epoch": 0.2895246289524629, "grad_norm": 0.0, "learning_rate": 1.667264504782877e-05, "loss": 0.9134, "step": 1346 }, { "epoch": 0.2897397289739729, "grad_norm": 0.0, "learning_rate": 1.6667453977169467e-05, "loss": 0.895, "step": 1347 }, { "epoch": 0.2899548289954829, "grad_norm": 0.0, "learning_rate": 1.6662259669833742e-05, "loss": 0.9209, "step": 1348 }, { "epoch": 0.2901699290169929, "grad_norm": 0.0, "learning_rate": 1.6657062128343143e-05, "loss": 0.8852, "step": 1349 }, { "epoch": 0.2903850290385029, "grad_norm": 0.0, "learning_rate": 1.665186135522079e-05, "loss": 0.8701, "step": 1350 }, { "epoch": 0.2906001290600129, "grad_norm": 0.0, "learning_rate": 1.6646657352991362e-05, "loss": 0.9703, "step": 1351 }, { "epoch": 0.2908152290815229, "grad_norm": 0.0, "learning_rate": 1.6641450124181115e-05, "loss": 0.8965, "step": 1352 }, { "epoch": 0.2910303291030329, "grad_norm": 0.0, "learning_rate": 1.6636239671317865e-05, "loss": 0.9075, "step": 1353 }, { "epoch": 0.29124542912454293, "grad_norm": 0.0, "learning_rate": 1.6631025996930998e-05, "loss": 0.9386, "step": 1354 }, { "epoch": 0.2914605291460529, "grad_norm": 0.0, "learning_rate": 1.6625809103551462e-05, "loss": 1.0201, "step": 1355 }, { "epoch": 0.2916756291675629, "grad_norm": 0.0, "learning_rate": 1.6620588993711764e-05, "loss": 0.9316, "step": 1356 }, { "epoch": 0.29189072918907294, "grad_norm": 0.0, "learning_rate": 1.661536566994598e-05, "loss": 0.9296, "step": 1357 }, { "epoch": 0.2921058292105829, "grad_norm": 0.0, "learning_rate": 1.661013913478974e-05, "loss": 0.9103, "step": 1358 }, { "epoch": 0.2923209292320929, "grad_norm": 0.0, "learning_rate": 1.660490939078023e-05, "loss": 0.8853, "step": 1359 }, { "epoch": 0.29253602925360295, "grad_norm": 0.0, "learning_rate": 1.6599676440456208e-05, "loss": 0.8812, "step": 1360 }, { "epoch": 0.29275112927511293, "grad_norm": 0.0, "learning_rate": 1.659444028635797e-05, "loss": 0.9172, "step": 1361 }, { "epoch": 0.2929662292966229, "grad_norm": 0.0, "learning_rate": 1.6589200931027384e-05, "loss": 0.9163, "step": 1362 }, { "epoch": 0.29318132931813295, "grad_norm": 0.0, "learning_rate": 1.6583958377007857e-05, "loss": 0.9041, "step": 1363 }, { "epoch": 0.29339642933964294, "grad_norm": 0.0, "learning_rate": 1.6578712626844365e-05, "loss": 0.8541, "step": 1364 }, { "epoch": 0.2936115293611529, "grad_norm": 0.0, "learning_rate": 1.6573463683083417e-05, "loss": 0.8958, "step": 1365 }, { "epoch": 0.29382662938266296, "grad_norm": 0.0, "learning_rate": 1.656821154827309e-05, "loss": 0.9218, "step": 1366 }, { "epoch": 0.29404172940417295, "grad_norm": 0.0, "learning_rate": 1.6562956224962996e-05, "loss": 0.8825, "step": 1367 }, { "epoch": 0.29425682942568293, "grad_norm": 0.0, "learning_rate": 1.65576977157043e-05, "loss": 0.9556, "step": 1368 }, { "epoch": 0.29447192944719297, "grad_norm": 0.0, "learning_rate": 1.6552436023049724e-05, "loss": 0.9329, "step": 1369 }, { "epoch": 0.29468702946870295, "grad_norm": 0.0, "learning_rate": 1.6547171149553517e-05, "loss": 0.9513, "step": 1370 }, { "epoch": 0.29490212949021294, "grad_norm": 0.0, "learning_rate": 1.6541903097771485e-05, "loss": 0.9487, "step": 1371 }, { "epoch": 0.295117229511723, "grad_norm": 0.0, "learning_rate": 1.6536631870260967e-05, "loss": 0.9583, "step": 1372 }, { "epoch": 0.29533232953323296, "grad_norm": 0.0, "learning_rate": 1.653135746958086e-05, "loss": 0.8895, "step": 1373 }, { "epoch": 0.29554742955474295, "grad_norm": 0.0, "learning_rate": 1.652607989829158e-05, "loss": 0.9609, "step": 1374 }, { "epoch": 0.29576252957625293, "grad_norm": 0.0, "learning_rate": 1.65207991589551e-05, "loss": 0.8839, "step": 1375 }, { "epoch": 0.29597762959776297, "grad_norm": 0.0, "learning_rate": 1.6515515254134918e-05, "loss": 0.9636, "step": 1376 }, { "epoch": 0.29619272961927295, "grad_norm": 0.0, "learning_rate": 1.6510228186396085e-05, "loss": 0.9409, "step": 1377 }, { "epoch": 0.29640782964078294, "grad_norm": 0.0, "learning_rate": 1.6504937958305168e-05, "loss": 0.9473, "step": 1378 }, { "epoch": 0.296622929662293, "grad_norm": 0.0, "learning_rate": 1.6499644572430277e-05, "loss": 0.8595, "step": 1379 }, { "epoch": 0.29683802968380296, "grad_norm": 0.0, "learning_rate": 1.649434803134106e-05, "loss": 0.9355, "step": 1380 }, { "epoch": 0.29705312970531295, "grad_norm": 0.0, "learning_rate": 1.648904833760869e-05, "loss": 0.8648, "step": 1381 }, { "epoch": 0.297268229726823, "grad_norm": 0.0, "learning_rate": 1.648374549380587e-05, "loss": 0.9624, "step": 1382 }, { "epoch": 0.29748332974833297, "grad_norm": 0.0, "learning_rate": 1.6478439502506834e-05, "loss": 0.8119, "step": 1383 }, { "epoch": 0.29769842976984295, "grad_norm": 0.0, "learning_rate": 1.647313036628735e-05, "loss": 0.877, "step": 1384 }, { "epoch": 0.297913529791353, "grad_norm": 0.0, "learning_rate": 1.64678180877247e-05, "loss": 0.9126, "step": 1385 }, { "epoch": 0.298128629812863, "grad_norm": 0.0, "learning_rate": 1.6462502669397703e-05, "loss": 0.9155, "step": 1386 }, { "epoch": 0.29834372983437296, "grad_norm": 0.0, "learning_rate": 1.64571841138867e-05, "loss": 0.9081, "step": 1387 }, { "epoch": 0.298558829855883, "grad_norm": 0.0, "learning_rate": 1.6451862423773545e-05, "loss": 0.8428, "step": 1388 }, { "epoch": 0.298773929877393, "grad_norm": 0.0, "learning_rate": 1.6446537601641628e-05, "loss": 0.9321, "step": 1389 }, { "epoch": 0.29898902989890297, "grad_norm": 0.0, "learning_rate": 1.6441209650075848e-05, "loss": 0.971, "step": 1390 }, { "epoch": 0.299204129920413, "grad_norm": 0.0, "learning_rate": 1.643587857166263e-05, "loss": 0.9492, "step": 1391 }, { "epoch": 0.299419229941923, "grad_norm": 0.0, "learning_rate": 1.6430544368989915e-05, "loss": 0.9742, "step": 1392 }, { "epoch": 0.299634329963433, "grad_norm": 0.0, "learning_rate": 1.6425207044647163e-05, "loss": 0.9534, "step": 1393 }, { "epoch": 0.299849429984943, "grad_norm": 0.0, "learning_rate": 1.641986660122534e-05, "loss": 0.9522, "step": 1394 }, { "epoch": 0.300064530006453, "grad_norm": 0.0, "learning_rate": 1.641452304131694e-05, "loss": 0.954, "step": 1395 }, { "epoch": 0.300279630027963, "grad_norm": 0.0, "learning_rate": 1.6409176367515957e-05, "loss": 0.9263, "step": 1396 }, { "epoch": 0.300494730049473, "grad_norm": 0.0, "learning_rate": 1.6403826582417908e-05, "loss": 0.916, "step": 1397 }, { "epoch": 0.300709830070983, "grad_norm": 0.0, "learning_rate": 1.6398473688619807e-05, "loss": 0.8708, "step": 1398 }, { "epoch": 0.300924930092493, "grad_norm": 0.0, "learning_rate": 1.6393117688720193e-05, "loss": 0.908, "step": 1399 }, { "epoch": 0.30114003011400303, "grad_norm": 0.0, "learning_rate": 1.63877585853191e-05, "loss": 0.9462, "step": 1400 }, { "epoch": 0.301355130135513, "grad_norm": 0.0, "learning_rate": 1.6382396381018076e-05, "loss": 0.8776, "step": 1401 }, { "epoch": 0.301570230157023, "grad_norm": 0.0, "learning_rate": 1.6377031078420168e-05, "loss": 0.8947, "step": 1402 }, { "epoch": 0.30178533017853304, "grad_norm": 0.0, "learning_rate": 1.6371662680129934e-05, "loss": 0.9212, "step": 1403 }, { "epoch": 0.302000430200043, "grad_norm": 0.0, "learning_rate": 1.636629118875343e-05, "loss": 0.9199, "step": 1404 }, { "epoch": 0.302215530221553, "grad_norm": 0.0, "learning_rate": 1.6360916606898217e-05, "loss": 0.8549, "step": 1405 }, { "epoch": 0.30243063024306305, "grad_norm": 0.0, "learning_rate": 1.6355538937173353e-05, "loss": 0.9042, "step": 1406 }, { "epoch": 0.30264573026457303, "grad_norm": 0.0, "learning_rate": 1.6350158182189394e-05, "loss": 0.898, "step": 1407 }, { "epoch": 0.302860830286083, "grad_norm": 0.0, "learning_rate": 1.6344774344558402e-05, "loss": 0.9481, "step": 1408 }, { "epoch": 0.30307593030759306, "grad_norm": 0.0, "learning_rate": 1.633938742689392e-05, "loss": 0.8741, "step": 1409 }, { "epoch": 0.30329103032910304, "grad_norm": 0.0, "learning_rate": 1.6333997431811005e-05, "loss": 0.9448, "step": 1410 }, { "epoch": 0.303506130350613, "grad_norm": 0.0, "learning_rate": 1.632860436192619e-05, "loss": 0.929, "step": 1411 }, { "epoch": 0.30372123037212306, "grad_norm": 0.0, "learning_rate": 1.6323208219857517e-05, "loss": 0.9088, "step": 1412 }, { "epoch": 0.30393633039363305, "grad_norm": 0.0, "learning_rate": 1.6317809008224506e-05, "loss": 0.9112, "step": 1413 }, { "epoch": 0.30415143041514303, "grad_norm": 0.0, "learning_rate": 1.6312406729648177e-05, "loss": 0.8788, "step": 1414 }, { "epoch": 0.30436653043665307, "grad_norm": 0.0, "learning_rate": 1.630700138675103e-05, "loss": 0.9504, "step": 1415 }, { "epoch": 0.30458163045816306, "grad_norm": 0.0, "learning_rate": 1.6301592982157055e-05, "loss": 0.8992, "step": 1416 }, { "epoch": 0.30479673047967304, "grad_norm": 0.0, "learning_rate": 1.629618151849174e-05, "loss": 0.8909, "step": 1417 }, { "epoch": 0.305011830501183, "grad_norm": 0.0, "learning_rate": 1.629076699838204e-05, "loss": 0.9028, "step": 1418 }, { "epoch": 0.30522693052269306, "grad_norm": 0.0, "learning_rate": 1.6285349424456408e-05, "loss": 0.894, "step": 1419 }, { "epoch": 0.30544203054420305, "grad_norm": 0.0, "learning_rate": 1.6279928799344765e-05, "loss": 0.9512, "step": 1420 }, { "epoch": 0.30565713056571303, "grad_norm": 0.0, "learning_rate": 1.627450512567853e-05, "loss": 0.892, "step": 1421 }, { "epoch": 0.30587223058722307, "grad_norm": 0.0, "learning_rate": 1.626907840609059e-05, "loss": 1.0079, "step": 1422 }, { "epoch": 0.30608733060873305, "grad_norm": 0.0, "learning_rate": 1.6263648643215315e-05, "loss": 0.8812, "step": 1423 }, { "epoch": 0.30630243063024304, "grad_norm": 0.0, "learning_rate": 1.625821583968855e-05, "loss": 0.8621, "step": 1424 }, { "epoch": 0.3065175306517531, "grad_norm": 0.0, "learning_rate": 1.625277999814762e-05, "loss": 0.8752, "step": 1425 }, { "epoch": 0.30673263067326306, "grad_norm": 0.0, "learning_rate": 1.6247341121231318e-05, "loss": 0.9189, "step": 1426 }, { "epoch": 0.30694773069477305, "grad_norm": 0.0, "learning_rate": 1.624189921157992e-05, "loss": 0.8413, "step": 1427 }, { "epoch": 0.3071628307162831, "grad_norm": 0.0, "learning_rate": 1.6236454271835167e-05, "loss": 0.8928, "step": 1428 }, { "epoch": 0.30737793073779307, "grad_norm": 0.0, "learning_rate": 1.6231006304640272e-05, "loss": 0.9025, "step": 1429 }, { "epoch": 0.30759303075930305, "grad_norm": 0.0, "learning_rate": 1.622555531263992e-05, "loss": 0.969, "step": 1430 }, { "epoch": 0.3078081307808131, "grad_norm": 0.0, "learning_rate": 1.6220101298480265e-05, "loss": 0.9554, "step": 1431 }, { "epoch": 0.3080232308023231, "grad_norm": 0.0, "learning_rate": 1.621464426480892e-05, "loss": 0.8342, "step": 1432 }, { "epoch": 0.30823833082383306, "grad_norm": 0.0, "learning_rate": 1.620918421427498e-05, "loss": 0.9493, "step": 1433 }, { "epoch": 0.3084534308453431, "grad_norm": 0.0, "learning_rate": 1.6203721149528982e-05, "loss": 0.9256, "step": 1434 }, { "epoch": 0.3086685308668531, "grad_norm": 0.0, "learning_rate": 1.619825507322295e-05, "loss": 0.8972, "step": 1435 }, { "epoch": 0.30888363088836307, "grad_norm": 0.0, "learning_rate": 1.6192785988010354e-05, "loss": 0.9969, "step": 1436 }, { "epoch": 0.3090987309098731, "grad_norm": 0.0, "learning_rate": 1.6187313896546125e-05, "loss": 1.0032, "step": 1437 }, { "epoch": 0.3093138309313831, "grad_norm": 0.0, "learning_rate": 1.6181838801486663e-05, "loss": 0.9535, "step": 1438 }, { "epoch": 0.3095289309528931, "grad_norm": 0.0, "learning_rate": 1.6176360705489826e-05, "loss": 0.9267, "step": 1439 }, { "epoch": 0.3097440309744031, "grad_norm": 0.0, "learning_rate": 1.6170879611214913e-05, "loss": 0.8917, "step": 1440 }, { "epoch": 0.3099591309959131, "grad_norm": 0.0, "learning_rate": 1.616539552132269e-05, "loss": 0.9056, "step": 1441 }, { "epoch": 0.3101742310174231, "grad_norm": 0.0, "learning_rate": 1.6159908438475383e-05, "loss": 0.9469, "step": 1442 }, { "epoch": 0.3103893310389331, "grad_norm": 0.0, "learning_rate": 1.6154418365336662e-05, "loss": 0.8631, "step": 1443 }, { "epoch": 0.3106044310604431, "grad_norm": 0.0, "learning_rate": 1.6148925304571647e-05, "loss": 1.0097, "step": 1444 }, { "epoch": 0.3108195310819531, "grad_norm": 0.0, "learning_rate": 1.6143429258846917e-05, "loss": 0.9314, "step": 1445 }, { "epoch": 0.31103463110346313, "grad_norm": 0.0, "learning_rate": 1.613793023083049e-05, "loss": 0.9324, "step": 1446 }, { "epoch": 0.3112497311249731, "grad_norm": 0.0, "learning_rate": 1.613242822319184e-05, "loss": 0.9114, "step": 1447 }, { "epoch": 0.3114648311464831, "grad_norm": 0.0, "learning_rate": 1.612692323860189e-05, "loss": 0.8678, "step": 1448 }, { "epoch": 0.31167993116799314, "grad_norm": 0.0, "learning_rate": 1.6121415279732992e-05, "loss": 0.9051, "step": 1449 }, { "epoch": 0.3118950311895031, "grad_norm": 0.0, "learning_rate": 1.611590434925896e-05, "loss": 0.9497, "step": 1450 }, { "epoch": 0.3121101312110131, "grad_norm": 0.0, "learning_rate": 1.6110390449855034e-05, "loss": 0.9334, "step": 1451 }, { "epoch": 0.31232523123252315, "grad_norm": 0.0, "learning_rate": 1.6104873584197914e-05, "loss": 0.9045, "step": 1452 }, { "epoch": 0.31254033125403313, "grad_norm": 0.0, "learning_rate": 1.6099353754965723e-05, "loss": 0.8545, "step": 1453 }, { "epoch": 0.3127554312755431, "grad_norm": 0.0, "learning_rate": 1.6093830964838036e-05, "loss": 0.9174, "step": 1454 }, { "epoch": 0.31297053129705316, "grad_norm": 0.0, "learning_rate": 1.6088305216495855e-05, "loss": 0.9474, "step": 1455 }, { "epoch": 0.31318563131856314, "grad_norm": 0.0, "learning_rate": 1.608277651262162e-05, "loss": 0.9851, "step": 1456 }, { "epoch": 0.3134007313400731, "grad_norm": 0.0, "learning_rate": 1.6077244855899213e-05, "loss": 0.8797, "step": 1457 }, { "epoch": 0.31361583136158316, "grad_norm": 0.0, "learning_rate": 1.6071710249013946e-05, "loss": 0.9422, "step": 1458 }, { "epoch": 0.31383093138309315, "grad_norm": 0.0, "learning_rate": 1.6066172694652552e-05, "loss": 0.9564, "step": 1459 }, { "epoch": 0.31404603140460313, "grad_norm": 0.0, "learning_rate": 1.606063219550321e-05, "loss": 0.8899, "step": 1460 }, { "epoch": 0.3142611314261131, "grad_norm": 0.0, "learning_rate": 1.6055088754255526e-05, "loss": 0.9071, "step": 1461 }, { "epoch": 0.31447623144762316, "grad_norm": 0.0, "learning_rate": 1.6049542373600525e-05, "loss": 0.9252, "step": 1462 }, { "epoch": 0.31469133146913314, "grad_norm": 0.0, "learning_rate": 1.604399305623067e-05, "loss": 0.9716, "step": 1463 }, { "epoch": 0.3149064314906431, "grad_norm": 0.0, "learning_rate": 1.6038440804839842e-05, "loss": 0.8837, "step": 1464 }, { "epoch": 0.31512153151215316, "grad_norm": 0.0, "learning_rate": 1.6032885622123347e-05, "loss": 0.8724, "step": 1465 }, { "epoch": 0.31533663153366315, "grad_norm": 0.0, "learning_rate": 1.602732751077792e-05, "loss": 0.9012, "step": 1466 }, { "epoch": 0.31555173155517313, "grad_norm": 0.0, "learning_rate": 1.6021766473501707e-05, "loss": 0.9034, "step": 1467 }, { "epoch": 0.31576683157668317, "grad_norm": 0.0, "learning_rate": 1.601620251299429e-05, "loss": 0.9144, "step": 1468 }, { "epoch": 0.31598193159819316, "grad_norm": 0.0, "learning_rate": 1.6010635631956653e-05, "loss": 0.9067, "step": 1469 }, { "epoch": 0.31619703161970314, "grad_norm": 0.0, "learning_rate": 1.600506583309121e-05, "loss": 0.9066, "step": 1470 }, { "epoch": 0.3164121316412132, "grad_norm": 0.0, "learning_rate": 1.5999493119101786e-05, "loss": 0.9147, "step": 1471 }, { "epoch": 0.31662723166272316, "grad_norm": 0.0, "learning_rate": 1.5993917492693623e-05, "loss": 0.8579, "step": 1472 }, { "epoch": 0.31684233168423315, "grad_norm": 0.0, "learning_rate": 1.5988338956573377e-05, "loss": 0.9639, "step": 1473 }, { "epoch": 0.3170574317057432, "grad_norm": 0.0, "learning_rate": 1.5982757513449114e-05, "loss": 0.8842, "step": 1474 }, { "epoch": 0.31727253172725317, "grad_norm": 0.0, "learning_rate": 1.5977173166030312e-05, "loss": 0.9394, "step": 1475 }, { "epoch": 0.31748763174876315, "grad_norm": 0.0, "learning_rate": 1.5971585917027864e-05, "loss": 0.9168, "step": 1476 }, { "epoch": 0.3177027317702732, "grad_norm": 0.0, "learning_rate": 1.596599576915406e-05, "loss": 0.8517, "step": 1477 }, { "epoch": 0.3179178317917832, "grad_norm": 0.0, "learning_rate": 1.596040272512261e-05, "loss": 0.9097, "step": 1478 }, { "epoch": 0.31813293181329316, "grad_norm": 0.0, "learning_rate": 1.5954806787648624e-05, "loss": 0.9095, "step": 1479 }, { "epoch": 0.3183480318348032, "grad_norm": 0.0, "learning_rate": 1.5949207959448614e-05, "loss": 0.841, "step": 1480 }, { "epoch": 0.3185631318563132, "grad_norm": 0.0, "learning_rate": 1.5943606243240504e-05, "loss": 0.8788, "step": 1481 }, { "epoch": 0.31877823187782317, "grad_norm": 0.0, "learning_rate": 1.5938001641743608e-05, "loss": 0.9123, "step": 1482 }, { "epoch": 0.3189933318993332, "grad_norm": 0.0, "learning_rate": 1.5932394157678646e-05, "loss": 0.9195, "step": 1483 }, { "epoch": 0.3192084319208432, "grad_norm": 0.0, "learning_rate": 1.592678379376775e-05, "loss": 0.8798, "step": 1484 }, { "epoch": 0.3194235319423532, "grad_norm": 0.0, "learning_rate": 1.5921170552734423e-05, "loss": 0.9812, "step": 1485 }, { "epoch": 0.3196386319638632, "grad_norm": 0.0, "learning_rate": 1.591555443730359e-05, "loss": 0.943, "step": 1486 }, { "epoch": 0.3198537319853732, "grad_norm": 0.0, "learning_rate": 1.590993545020156e-05, "loss": 0.9324, "step": 1487 }, { "epoch": 0.3200688320068832, "grad_norm": 0.0, "learning_rate": 1.590431359415603e-05, "loss": 0.9272, "step": 1488 }, { "epoch": 0.3202839320283932, "grad_norm": 0.0, "learning_rate": 1.589868887189611e-05, "loss": 0.8967, "step": 1489 }, { "epoch": 0.3204990320499032, "grad_norm": 0.0, "learning_rate": 1.5893061286152275e-05, "loss": 0.876, "step": 1490 }, { "epoch": 0.3207141320714132, "grad_norm": 0.0, "learning_rate": 1.588743083965641e-05, "loss": 0.8617, "step": 1491 }, { "epoch": 0.32092923209292323, "grad_norm": 0.0, "learning_rate": 1.5881797535141785e-05, "loss": 0.9914, "step": 1492 }, { "epoch": 0.3211443321144332, "grad_norm": 0.0, "learning_rate": 1.587616137534305e-05, "loss": 0.93, "step": 1493 }, { "epoch": 0.3213594321359432, "grad_norm": 0.0, "learning_rate": 1.5870522362996244e-05, "loss": 0.9264, "step": 1494 }, { "epoch": 0.32157453215745324, "grad_norm": 0.0, "learning_rate": 1.58648805008388e-05, "loss": 0.9505, "step": 1495 }, { "epoch": 0.3217896321789632, "grad_norm": 0.0, "learning_rate": 1.585923579160952e-05, "loss": 0.9704, "step": 1496 }, { "epoch": 0.3220047322004732, "grad_norm": 0.0, "learning_rate": 1.58535882380486e-05, "loss": 0.9238, "step": 1497 }, { "epoch": 0.32221983222198325, "grad_norm": 0.0, "learning_rate": 1.5847937842897606e-05, "loss": 0.9232, "step": 1498 }, { "epoch": 0.32243493224349323, "grad_norm": 0.0, "learning_rate": 1.584228460889949e-05, "loss": 0.8184, "step": 1499 }, { "epoch": 0.3226500322650032, "grad_norm": 0.0, "learning_rate": 1.583662853879859e-05, "loss": 0.9114, "step": 1500 }, { "epoch": 0.3228651322865132, "grad_norm": 0.0, "learning_rate": 1.58309696353406e-05, "loss": 0.8585, "step": 1501 }, { "epoch": 0.32308023230802324, "grad_norm": 0.0, "learning_rate": 1.5825307901272612e-05, "loss": 0.8938, "step": 1502 }, { "epoch": 0.3232953323295332, "grad_norm": 0.0, "learning_rate": 1.5819643339343077e-05, "loss": 0.9272, "step": 1503 }, { "epoch": 0.3235104323510432, "grad_norm": 0.0, "learning_rate": 1.5813975952301815e-05, "loss": 0.9421, "step": 1504 }, { "epoch": 0.32372553237255325, "grad_norm": 0.0, "learning_rate": 1.5808305742900044e-05, "loss": 0.9263, "step": 1505 }, { "epoch": 0.32394063239406323, "grad_norm": 0.0, "learning_rate": 1.580263271389032e-05, "loss": 0.9315, "step": 1506 }, { "epoch": 0.3241557324155732, "grad_norm": 0.0, "learning_rate": 1.5796956868026582e-05, "loss": 0.9453, "step": 1507 }, { "epoch": 0.32437083243708326, "grad_norm": 0.0, "learning_rate": 1.5791278208064142e-05, "loss": 0.9371, "step": 1508 }, { "epoch": 0.32458593245859324, "grad_norm": 0.0, "learning_rate": 1.578559673675967e-05, "loss": 0.9713, "step": 1509 }, { "epoch": 0.3248010324801032, "grad_norm": 0.0, "learning_rate": 1.5779912456871202e-05, "loss": 0.9865, "step": 1510 }, { "epoch": 0.32501613250161326, "grad_norm": 0.0, "learning_rate": 1.5774225371158142e-05, "loss": 0.8617, "step": 1511 }, { "epoch": 0.32523123252312325, "grad_norm": 0.0, "learning_rate": 1.5768535482381247e-05, "loss": 0.9286, "step": 1512 }, { "epoch": 0.32544633254463323, "grad_norm": 0.0, "learning_rate": 1.5762842793302638e-05, "loss": 0.9245, "step": 1513 }, { "epoch": 0.32566143256614327, "grad_norm": 0.0, "learning_rate": 1.5757147306685806e-05, "loss": 0.9326, "step": 1514 }, { "epoch": 0.32587653258765326, "grad_norm": 0.0, "learning_rate": 1.575144902529559e-05, "loss": 0.9703, "step": 1515 }, { "epoch": 0.32609163260916324, "grad_norm": 0.0, "learning_rate": 1.5745747951898183e-05, "loss": 0.8982, "step": 1516 }, { "epoch": 0.3263067326306733, "grad_norm": 0.0, "learning_rate": 1.5740044089261137e-05, "loss": 0.9359, "step": 1517 }, { "epoch": 0.32652183265218326, "grad_norm": 0.0, "learning_rate": 1.5734337440153364e-05, "loss": 0.9328, "step": 1518 }, { "epoch": 0.32673693267369325, "grad_norm": 0.0, "learning_rate": 1.5728628007345118e-05, "loss": 0.9153, "step": 1519 }, { "epoch": 0.3269520326952033, "grad_norm": 0.0, "learning_rate": 1.5722915793608017e-05, "loss": 0.8988, "step": 1520 }, { "epoch": 0.32716713271671327, "grad_norm": 0.0, "learning_rate": 1.571720080171501e-05, "loss": 0.9051, "step": 1521 }, { "epoch": 0.32738223273822326, "grad_norm": 0.0, "learning_rate": 1.5711483034440414e-05, "loss": 0.9722, "step": 1522 }, { "epoch": 0.3275973327597333, "grad_norm": 0.0, "learning_rate": 1.570576249455989e-05, "loss": 0.8677, "step": 1523 }, { "epoch": 0.3278124327812433, "grad_norm": 0.0, "learning_rate": 1.570003918485043e-05, "loss": 0.9314, "step": 1524 }, { "epoch": 0.32802753280275326, "grad_norm": 0.0, "learning_rate": 1.5694313108090388e-05, "loss": 0.9354, "step": 1525 }, { "epoch": 0.3282426328242633, "grad_norm": 0.0, "learning_rate": 1.5688584267059453e-05, "loss": 0.8223, "step": 1526 }, { "epoch": 0.3284577328457733, "grad_norm": 0.0, "learning_rate": 1.5682852664538656e-05, "loss": 0.947, "step": 1527 }, { "epoch": 0.32867283286728327, "grad_norm": 0.0, "learning_rate": 1.567711830331037e-05, "loss": 0.9461, "step": 1528 }, { "epoch": 0.3288879328887933, "grad_norm": 0.0, "learning_rate": 1.567138118615831e-05, "loss": 0.8489, "step": 1529 }, { "epoch": 0.3291030329103033, "grad_norm": 0.0, "learning_rate": 1.5665641315867524e-05, "loss": 0.9581, "step": 1530 }, { "epoch": 0.3293181329318133, "grad_norm": 0.0, "learning_rate": 1.5659898695224396e-05, "loss": 0.909, "step": 1531 }, { "epoch": 0.3295332329533233, "grad_norm": 0.0, "learning_rate": 1.565415332701665e-05, "loss": 0.8668, "step": 1532 }, { "epoch": 0.3297483329748333, "grad_norm": 0.0, "learning_rate": 1.564840521403334e-05, "loss": 0.8681, "step": 1533 }, { "epoch": 0.3299634329963433, "grad_norm": 0.0, "learning_rate": 1.5642654359064855e-05, "loss": 0.8771, "step": 1534 }, { "epoch": 0.3301785330178533, "grad_norm": 0.0, "learning_rate": 1.5636900764902914e-05, "loss": 0.9462, "step": 1535 }, { "epoch": 0.3303936330393633, "grad_norm": 0.0, "learning_rate": 1.5631144434340566e-05, "loss": 0.9157, "step": 1536 }, { "epoch": 0.3306087330608733, "grad_norm": 0.0, "learning_rate": 1.5625385370172185e-05, "loss": 0.8628, "step": 1537 }, { "epoch": 0.33082383308238333, "grad_norm": 0.0, "learning_rate": 1.561962357519348e-05, "loss": 0.9435, "step": 1538 }, { "epoch": 0.3310389331038933, "grad_norm": 0.0, "learning_rate": 1.561385905220147e-05, "loss": 0.8705, "step": 1539 }, { "epoch": 0.3312540331254033, "grad_norm": 0.0, "learning_rate": 1.560809180399452e-05, "loss": 0.8969, "step": 1540 }, { "epoch": 0.33146913314691334, "grad_norm": 0.0, "learning_rate": 1.5602321833372304e-05, "loss": 0.8876, "step": 1541 }, { "epoch": 0.3316842331684233, "grad_norm": 0.0, "learning_rate": 1.5596549143135818e-05, "loss": 0.9106, "step": 1542 }, { "epoch": 0.3318993331899333, "grad_norm": 0.0, "learning_rate": 1.5590773736087382e-05, "loss": 0.9114, "step": 1543 }, { "epoch": 0.3321144332114433, "grad_norm": 0.0, "learning_rate": 1.5584995615030634e-05, "loss": 0.9097, "step": 1544 }, { "epoch": 0.33232953323295333, "grad_norm": 0.0, "learning_rate": 1.5579214782770526e-05, "loss": 0.9352, "step": 1545 }, { "epoch": 0.3325446332544633, "grad_norm": 0.0, "learning_rate": 1.5573431242113333e-05, "loss": 0.8571, "step": 1546 }, { "epoch": 0.3327597332759733, "grad_norm": 0.0, "learning_rate": 1.556764499586664e-05, "loss": 0.8615, "step": 1547 }, { "epoch": 0.33297483329748334, "grad_norm": 0.0, "learning_rate": 1.5561856046839347e-05, "loss": 0.8344, "step": 1548 }, { "epoch": 0.3331899333189933, "grad_norm": 0.0, "learning_rate": 1.555606439784166e-05, "loss": 0.9101, "step": 1549 }, { "epoch": 0.3334050333405033, "grad_norm": 0.0, "learning_rate": 1.555027005168511e-05, "loss": 0.9512, "step": 1550 }, { "epoch": 0.33362013336201335, "grad_norm": 0.0, "learning_rate": 1.5544473011182518e-05, "loss": 0.8925, "step": 1551 }, { "epoch": 0.33383523338352333, "grad_norm": 0.0, "learning_rate": 1.5538673279148033e-05, "loss": 0.8816, "step": 1552 }, { "epoch": 0.3340503334050333, "grad_norm": 0.0, "learning_rate": 1.5532870858397093e-05, "loss": 0.8818, "step": 1553 }, { "epoch": 0.33426543342654336, "grad_norm": 0.0, "learning_rate": 1.5527065751746454e-05, "loss": 0.9292, "step": 1554 }, { "epoch": 0.33448053344805334, "grad_norm": 0.0, "learning_rate": 1.5521257962014172e-05, "loss": 0.8579, "step": 1555 }, { "epoch": 0.3346956334695633, "grad_norm": 0.0, "learning_rate": 1.5515447492019598e-05, "loss": 0.9385, "step": 1556 }, { "epoch": 0.33491073349107336, "grad_norm": 0.0, "learning_rate": 1.5509634344583395e-05, "loss": 0.8887, "step": 1557 }, { "epoch": 0.33512583351258335, "grad_norm": 0.0, "learning_rate": 1.5503818522527525e-05, "loss": 0.8728, "step": 1558 }, { "epoch": 0.33534093353409333, "grad_norm": 0.0, "learning_rate": 1.549800002867524e-05, "loss": 0.8752, "step": 1559 }, { "epoch": 0.3355560335556034, "grad_norm": 0.0, "learning_rate": 1.5492178865851092e-05, "loss": 0.9537, "step": 1560 }, { "epoch": 0.33577113357711336, "grad_norm": 0.0, "learning_rate": 1.548635503688093e-05, "loss": 0.9951, "step": 1561 }, { "epoch": 0.33598623359862334, "grad_norm": 0.0, "learning_rate": 1.5480528544591907e-05, "loss": 0.897, "step": 1562 }, { "epoch": 0.3362013336201334, "grad_norm": 0.0, "learning_rate": 1.5474699391812447e-05, "loss": 0.8981, "step": 1563 }, { "epoch": 0.33641643364164336, "grad_norm": 0.0, "learning_rate": 1.5468867581372288e-05, "loss": 0.9459, "step": 1564 }, { "epoch": 0.33663153366315335, "grad_norm": 0.0, "learning_rate": 1.546303311610244e-05, "loss": 0.9658, "step": 1565 }, { "epoch": 0.3368466336846634, "grad_norm": 0.0, "learning_rate": 1.545719599883522e-05, "loss": 0.897, "step": 1566 }, { "epoch": 0.33706173370617337, "grad_norm": 0.0, "learning_rate": 1.545135623240421e-05, "loss": 0.9418, "step": 1567 }, { "epoch": 0.33727683372768336, "grad_norm": 0.0, "learning_rate": 1.54455138196443e-05, "loss": 0.8786, "step": 1568 }, { "epoch": 0.3374919337491934, "grad_norm": 0.0, "learning_rate": 1.5439668763391652e-05, "loss": 0.9096, "step": 1569 }, { "epoch": 0.3377070337707034, "grad_norm": 0.0, "learning_rate": 1.5433821066483713e-05, "loss": 0.9589, "step": 1570 }, { "epoch": 0.33792213379221336, "grad_norm": 0.0, "learning_rate": 1.5427970731759214e-05, "loss": 0.9007, "step": 1571 }, { "epoch": 0.3381372338137234, "grad_norm": 0.0, "learning_rate": 1.5422117762058167e-05, "loss": 0.915, "step": 1572 }, { "epoch": 0.3383523338352334, "grad_norm": 0.0, "learning_rate": 1.541626216022186e-05, "loss": 0.8077, "step": 1573 }, { "epoch": 0.33856743385674337, "grad_norm": 0.0, "learning_rate": 1.5410403929092858e-05, "loss": 0.8856, "step": 1574 }, { "epoch": 0.3387825338782534, "grad_norm": 0.0, "learning_rate": 1.5404543071515008e-05, "loss": 0.891, "step": 1575 }, { "epoch": 0.3389976338997634, "grad_norm": 0.0, "learning_rate": 1.5398679590333434e-05, "loss": 0.9325, "step": 1576 }, { "epoch": 0.3392127339212734, "grad_norm": 0.0, "learning_rate": 1.539281348839452e-05, "loss": 0.9244, "step": 1577 }, { "epoch": 0.3394278339427834, "grad_norm": 0.0, "learning_rate": 1.5386944768545933e-05, "loss": 0.9226, "step": 1578 }, { "epoch": 0.3396429339642934, "grad_norm": 0.0, "learning_rate": 1.5381073433636614e-05, "loss": 0.853, "step": 1579 }, { "epoch": 0.3398580339858034, "grad_norm": 0.0, "learning_rate": 1.537519948651676e-05, "loss": 0.9198, "step": 1580 }, { "epoch": 0.3400731340073134, "grad_norm": 0.0, "learning_rate": 1.5369322930037852e-05, "loss": 0.8991, "step": 1581 }, { "epoch": 0.3402882340288234, "grad_norm": 0.0, "learning_rate": 1.5363443767052624e-05, "loss": 0.8874, "step": 1582 }, { "epoch": 0.3405033340503334, "grad_norm": 0.0, "learning_rate": 1.5357562000415086e-05, "loss": 0.8475, "step": 1583 }, { "epoch": 0.34071843407184343, "grad_norm": 0.0, "learning_rate": 1.5351677632980503e-05, "loss": 0.8716, "step": 1584 }, { "epoch": 0.3409335340933534, "grad_norm": 0.0, "learning_rate": 1.5345790667605403e-05, "loss": 0.8565, "step": 1585 }, { "epoch": 0.3411486341148634, "grad_norm": 0.0, "learning_rate": 1.533990110714759e-05, "loss": 0.9228, "step": 1586 }, { "epoch": 0.3413637341363734, "grad_norm": 0.0, "learning_rate": 1.5334008954466108e-05, "loss": 0.8969, "step": 1587 }, { "epoch": 0.3415788341578834, "grad_norm": 0.0, "learning_rate": 1.532811421242127e-05, "loss": 0.9425, "step": 1588 }, { "epoch": 0.3417939341793934, "grad_norm": 0.0, "learning_rate": 1.5322216883874643e-05, "loss": 0.9148, "step": 1589 }, { "epoch": 0.3420090342009034, "grad_norm": 0.0, "learning_rate": 1.5316316971689053e-05, "loss": 0.8639, "step": 1590 }, { "epoch": 0.34222413422241343, "grad_norm": 0.0, "learning_rate": 1.5310414478728578e-05, "loss": 0.8555, "step": 1591 }, { "epoch": 0.3424392342439234, "grad_norm": 0.0, "learning_rate": 1.5304509407858545e-05, "loss": 0.9333, "step": 1592 }, { "epoch": 0.3426543342654334, "grad_norm": 0.0, "learning_rate": 1.529860176194554e-05, "loss": 0.841, "step": 1593 }, { "epoch": 0.34286943428694344, "grad_norm": 0.0, "learning_rate": 1.529269154385739e-05, "loss": 0.9344, "step": 1594 }, { "epoch": 0.3430845343084534, "grad_norm": 0.0, "learning_rate": 1.5286778756463184e-05, "loss": 0.8864, "step": 1595 }, { "epoch": 0.3432996343299634, "grad_norm": 0.0, "learning_rate": 1.5280863402633242e-05, "loss": 0.908, "step": 1596 }, { "epoch": 0.34351473435147345, "grad_norm": 0.0, "learning_rate": 1.5274945485239143e-05, "loss": 0.8776, "step": 1597 }, { "epoch": 0.34372983437298343, "grad_norm": 0.0, "learning_rate": 1.52690250071537e-05, "loss": 0.8394, "step": 1598 }, { "epoch": 0.3439449343944934, "grad_norm": 0.0, "learning_rate": 1.526310197125098e-05, "loss": 0.9653, "step": 1599 }, { "epoch": 0.34416003441600346, "grad_norm": 0.0, "learning_rate": 1.5257176380406277e-05, "loss": 0.9606, "step": 1600 }, { "epoch": 0.34437513443751344, "grad_norm": 0.0, "learning_rate": 1.5251248237496148e-05, "loss": 0.9177, "step": 1601 }, { "epoch": 0.3445902344590234, "grad_norm": 0.0, "learning_rate": 1.5245317545398363e-05, "loss": 0.9433, "step": 1602 }, { "epoch": 0.34480533448053347, "grad_norm": 0.0, "learning_rate": 1.5239384306991948e-05, "loss": 0.8683, "step": 1603 }, { "epoch": 0.34502043450204345, "grad_norm": 0.0, "learning_rate": 1.5233448525157161e-05, "loss": 0.9103, "step": 1604 }, { "epoch": 0.34523553452355343, "grad_norm": 0.0, "learning_rate": 1.5227510202775486e-05, "loss": 0.8916, "step": 1605 }, { "epoch": 0.3454506345450635, "grad_norm": 0.0, "learning_rate": 1.5221569342729648e-05, "loss": 0.9314, "step": 1606 }, { "epoch": 0.34566573456657346, "grad_norm": 0.0, "learning_rate": 1.5215625947903604e-05, "loss": 0.88, "step": 1607 }, { "epoch": 0.34588083458808344, "grad_norm": 0.0, "learning_rate": 1.5209680021182545e-05, "loss": 0.8724, "step": 1608 }, { "epoch": 0.3460959346095935, "grad_norm": 0.0, "learning_rate": 1.5203731565452878e-05, "loss": 0.8724, "step": 1609 }, { "epoch": 0.34631103463110346, "grad_norm": 0.0, "learning_rate": 1.5197780583602249e-05, "loss": 0.9571, "step": 1610 }, { "epoch": 0.34652613465261345, "grad_norm": 0.0, "learning_rate": 1.519182707851953e-05, "loss": 0.9214, "step": 1611 }, { "epoch": 0.3467412346741235, "grad_norm": 0.0, "learning_rate": 1.5185871053094814e-05, "loss": 0.844, "step": 1612 }, { "epoch": 0.3469563346956335, "grad_norm": 0.0, "learning_rate": 1.5179912510219413e-05, "loss": 0.931, "step": 1613 }, { "epoch": 0.34717143471714346, "grad_norm": 0.0, "learning_rate": 1.5173951452785873e-05, "loss": 0.906, "step": 1614 }, { "epoch": 0.3473865347386535, "grad_norm": 0.0, "learning_rate": 1.5167987883687953e-05, "loss": 0.8774, "step": 1615 }, { "epoch": 0.3476016347601635, "grad_norm": 0.0, "learning_rate": 1.5162021805820633e-05, "loss": 0.8961, "step": 1616 }, { "epoch": 0.34781673478167346, "grad_norm": 0.0, "learning_rate": 1.515605322208011e-05, "loss": 0.9146, "step": 1617 }, { "epoch": 0.3480318348031835, "grad_norm": 0.0, "learning_rate": 1.5150082135363797e-05, "loss": 0.8896, "step": 1618 }, { "epoch": 0.3482469348246935, "grad_norm": 0.0, "learning_rate": 1.5144108548570322e-05, "loss": 0.8743, "step": 1619 }, { "epoch": 0.34846203484620347, "grad_norm": 0.0, "learning_rate": 1.513813246459953e-05, "loss": 0.9181, "step": 1620 }, { "epoch": 0.3486771348677135, "grad_norm": 0.0, "learning_rate": 1.5132153886352478e-05, "loss": 0.913, "step": 1621 }, { "epoch": 0.3488922348892235, "grad_norm": 0.0, "learning_rate": 1.5126172816731424e-05, "loss": 0.91, "step": 1622 }, { "epoch": 0.3491073349107335, "grad_norm": 0.0, "learning_rate": 1.512018925863985e-05, "loss": 0.9378, "step": 1623 }, { "epoch": 0.3493224349322435, "grad_norm": 0.0, "learning_rate": 1.5114203214982439e-05, "loss": 0.9625, "step": 1624 }, { "epoch": 0.3495375349537535, "grad_norm": 0.0, "learning_rate": 1.510821468866508e-05, "loss": 0.976, "step": 1625 }, { "epoch": 0.3497526349752635, "grad_norm": 0.0, "learning_rate": 1.5102223682594867e-05, "loss": 0.9279, "step": 1626 }, { "epoch": 0.3499677349967735, "grad_norm": 0.0, "learning_rate": 1.5096230199680096e-05, "loss": 0.8795, "step": 1627 }, { "epoch": 0.3501828350182835, "grad_norm": 0.0, "learning_rate": 1.5090234242830271e-05, "loss": 0.9363, "step": 1628 }, { "epoch": 0.3503979350397935, "grad_norm": 0.0, "learning_rate": 1.5084235814956097e-05, "loss": 0.9092, "step": 1629 }, { "epoch": 0.3506130350613035, "grad_norm": 0.0, "learning_rate": 1.507823491896947e-05, "loss": 0.838, "step": 1630 }, { "epoch": 0.3508281350828135, "grad_norm": 0.0, "learning_rate": 1.5072231557783492e-05, "loss": 0.9305, "step": 1631 }, { "epoch": 0.3510432351043235, "grad_norm": 0.0, "learning_rate": 1.5066225734312462e-05, "loss": 0.8745, "step": 1632 }, { "epoch": 0.3512583351258335, "grad_norm": 0.0, "learning_rate": 1.506021745147187e-05, "loss": 0.8888, "step": 1633 }, { "epoch": 0.3514734351473435, "grad_norm": 0.0, "learning_rate": 1.50542067121784e-05, "loss": 0.9076, "step": 1634 }, { "epoch": 0.3516885351688535, "grad_norm": 0.0, "learning_rate": 1.5048193519349929e-05, "loss": 0.8917, "step": 1635 }, { "epoch": 0.3519036351903635, "grad_norm": 0.0, "learning_rate": 1.5042177875905532e-05, "loss": 0.8241, "step": 1636 }, { "epoch": 0.35211873521187353, "grad_norm": 0.0, "learning_rate": 1.5036159784765464e-05, "loss": 0.9461, "step": 1637 }, { "epoch": 0.3523338352333835, "grad_norm": 0.0, "learning_rate": 1.5030139248851172e-05, "loss": 0.9496, "step": 1638 }, { "epoch": 0.3525489352548935, "grad_norm": 0.0, "learning_rate": 1.5024116271085291e-05, "loss": 0.8511, "step": 1639 }, { "epoch": 0.35276403527640354, "grad_norm": 0.0, "learning_rate": 1.5018090854391638e-05, "loss": 0.9623, "step": 1640 }, { "epoch": 0.3529791352979135, "grad_norm": 0.0, "learning_rate": 1.5012063001695218e-05, "loss": 0.8241, "step": 1641 }, { "epoch": 0.3531942353194235, "grad_norm": 0.0, "learning_rate": 1.5006032715922216e-05, "loss": 0.9055, "step": 1642 }, { "epoch": 0.35340933534093355, "grad_norm": 0.0, "learning_rate": 1.5000000000000002e-05, "loss": 0.9323, "step": 1643 }, { "epoch": 0.35362443536244353, "grad_norm": 0.0, "learning_rate": 1.4993964856857117e-05, "loss": 0.9202, "step": 1644 }, { "epoch": 0.3538395353839535, "grad_norm": 0.0, "learning_rate": 1.4987927289423289e-05, "loss": 0.9508, "step": 1645 }, { "epoch": 0.35405463540546356, "grad_norm": 0.0, "learning_rate": 1.498188730062942e-05, "loss": 0.9508, "step": 1646 }, { "epoch": 0.35426973542697354, "grad_norm": 0.0, "learning_rate": 1.497584489340759e-05, "loss": 0.881, "step": 1647 }, { "epoch": 0.3544848354484835, "grad_norm": 0.0, "learning_rate": 1.4969800070691042e-05, "loss": 0.8666, "step": 1648 }, { "epoch": 0.35469993546999357, "grad_norm": 0.0, "learning_rate": 1.4963752835414204e-05, "loss": 0.9159, "step": 1649 }, { "epoch": 0.35491503549150355, "grad_norm": 0.0, "learning_rate": 1.4957703190512674e-05, "loss": 0.9165, "step": 1650 }, { "epoch": 0.35513013551301353, "grad_norm": 0.0, "learning_rate": 1.495165113892321e-05, "loss": 0.9315, "step": 1651 }, { "epoch": 0.3553452355345236, "grad_norm": 0.0, "learning_rate": 1.4945596683583754e-05, "loss": 0.8489, "step": 1652 }, { "epoch": 0.35556033555603356, "grad_norm": 0.0, "learning_rate": 1.4939539827433396e-05, "loss": 0.9258, "step": 1653 }, { "epoch": 0.35577543557754354, "grad_norm": 0.0, "learning_rate": 1.4933480573412412e-05, "loss": 0.9549, "step": 1654 }, { "epoch": 0.3559905355990536, "grad_norm": 0.0, "learning_rate": 1.492741892446222e-05, "loss": 0.9642, "step": 1655 }, { "epoch": 0.35620563562056357, "grad_norm": 0.0, "learning_rate": 1.4921354883525419e-05, "loss": 0.9176, "step": 1656 }, { "epoch": 0.35642073564207355, "grad_norm": 0.0, "learning_rate": 1.4915288453545758e-05, "loss": 0.8901, "step": 1657 }, { "epoch": 0.3566358356635836, "grad_norm": 0.0, "learning_rate": 1.4909219637468153e-05, "loss": 0.9048, "step": 1658 }, { "epoch": 0.3568509356850936, "grad_norm": 0.0, "learning_rate": 1.4903148438238676e-05, "loss": 0.9159, "step": 1659 }, { "epoch": 0.35706603570660356, "grad_norm": 0.0, "learning_rate": 1.4897074858804553e-05, "loss": 0.9546, "step": 1660 }, { "epoch": 0.3572811357281136, "grad_norm": 0.0, "learning_rate": 1.4890998902114166e-05, "loss": 0.9624, "step": 1661 }, { "epoch": 0.3574962357496236, "grad_norm": 0.0, "learning_rate": 1.4884920571117057e-05, "loss": 0.9097, "step": 1662 }, { "epoch": 0.35771133577113357, "grad_norm": 0.0, "learning_rate": 1.4878839868763912e-05, "loss": 0.9043, "step": 1663 }, { "epoch": 0.3579264357926436, "grad_norm": 0.0, "learning_rate": 1.4872756798006576e-05, "loss": 0.8842, "step": 1664 }, { "epoch": 0.3581415358141536, "grad_norm": 0.0, "learning_rate": 1.4866671361798035e-05, "loss": 0.9411, "step": 1665 }, { "epoch": 0.3583566358356636, "grad_norm": 0.0, "learning_rate": 1.486058356309243e-05, "loss": 0.8752, "step": 1666 }, { "epoch": 0.3585717358571736, "grad_norm": 0.0, "learning_rate": 1.4854493404845051e-05, "loss": 0.8982, "step": 1667 }, { "epoch": 0.3587868358786836, "grad_norm": 0.0, "learning_rate": 1.4848400890012327e-05, "loss": 0.9417, "step": 1668 }, { "epoch": 0.3590019359001936, "grad_norm": 0.0, "learning_rate": 1.4842306021551833e-05, "loss": 0.9476, "step": 1669 }, { "epoch": 0.35921703592170356, "grad_norm": 0.0, "learning_rate": 1.4836208802422287e-05, "loss": 0.8808, "step": 1670 }, { "epoch": 0.3594321359432136, "grad_norm": 0.0, "learning_rate": 1.4830109235583548e-05, "loss": 0.9058, "step": 1671 }, { "epoch": 0.3596472359647236, "grad_norm": 0.0, "learning_rate": 1.4824007323996616e-05, "loss": 0.9863, "step": 1672 }, { "epoch": 0.3598623359862336, "grad_norm": 0.0, "learning_rate": 1.481790307062363e-05, "loss": 0.8945, "step": 1673 }, { "epoch": 0.3600774360077436, "grad_norm": 0.0, "learning_rate": 1.481179647842786e-05, "loss": 0.908, "step": 1674 }, { "epoch": 0.3602925360292536, "grad_norm": 0.0, "learning_rate": 1.4805687550373719e-05, "loss": 0.9296, "step": 1675 }, { "epoch": 0.3605076360507636, "grad_norm": 0.0, "learning_rate": 1.4799576289426747e-05, "loss": 0.9258, "step": 1676 }, { "epoch": 0.3607227360722736, "grad_norm": 0.0, "learning_rate": 1.4793462698553622e-05, "loss": 0.8166, "step": 1677 }, { "epoch": 0.3609378360937836, "grad_norm": 0.0, "learning_rate": 1.4787346780722152e-05, "loss": 0.9232, "step": 1678 }, { "epoch": 0.3611529361152936, "grad_norm": 0.0, "learning_rate": 1.4781228538901267e-05, "loss": 0.8803, "step": 1679 }, { "epoch": 0.3613680361368036, "grad_norm": 0.0, "learning_rate": 1.477510797606104e-05, "loss": 0.9108, "step": 1680 }, { "epoch": 0.3615831361583136, "grad_norm": 0.0, "learning_rate": 1.4768985095172657e-05, "loss": 0.8896, "step": 1681 }, { "epoch": 0.3617982361798236, "grad_norm": 0.0, "learning_rate": 1.4762859899208437e-05, "loss": 0.9184, "step": 1682 }, { "epoch": 0.36201333620133364, "grad_norm": 0.0, "learning_rate": 1.4756732391141819e-05, "loss": 0.9055, "step": 1683 }, { "epoch": 0.3622284362228436, "grad_norm": 0.0, "learning_rate": 1.4750602573947368e-05, "loss": 0.8782, "step": 1684 }, { "epoch": 0.3624435362443536, "grad_norm": 0.0, "learning_rate": 1.4744470450600766e-05, "loss": 0.9242, "step": 1685 }, { "epoch": 0.36265863626586364, "grad_norm": 0.0, "learning_rate": 1.4738336024078817e-05, "loss": 0.8612, "step": 1686 }, { "epoch": 0.3628737362873736, "grad_norm": 0.0, "learning_rate": 1.4732199297359439e-05, "loss": 0.8717, "step": 1687 }, { "epoch": 0.3630888363088836, "grad_norm": 0.0, "learning_rate": 1.4726060273421675e-05, "loss": 0.9059, "step": 1688 }, { "epoch": 0.36330393633039365, "grad_norm": 0.0, "learning_rate": 1.4719918955245676e-05, "loss": 0.8368, "step": 1689 }, { "epoch": 0.36351903635190363, "grad_norm": 0.0, "learning_rate": 1.4713775345812706e-05, "loss": 0.9615, "step": 1690 }, { "epoch": 0.3637341363734136, "grad_norm": 0.0, "learning_rate": 1.4707629448105155e-05, "loss": 0.908, "step": 1691 }, { "epoch": 0.36394923639492366, "grad_norm": 0.0, "learning_rate": 1.4701481265106499e-05, "loss": 0.8675, "step": 1692 }, { "epoch": 0.36416433641643364, "grad_norm": 0.0, "learning_rate": 1.4695330799801342e-05, "loss": 0.8997, "step": 1693 }, { "epoch": 0.3643794364379436, "grad_norm": 0.0, "learning_rate": 1.4689178055175396e-05, "loss": 0.9233, "step": 1694 }, { "epoch": 0.36459453645945367, "grad_norm": 0.0, "learning_rate": 1.4683023034215472e-05, "loss": 0.9689, "step": 1695 }, { "epoch": 0.36480963648096365, "grad_norm": 0.0, "learning_rate": 1.4676865739909484e-05, "loss": 0.9131, "step": 1696 }, { "epoch": 0.36502473650247363, "grad_norm": 0.0, "learning_rate": 1.4670706175246463e-05, "loss": 0.9643, "step": 1697 }, { "epoch": 0.3652398365239837, "grad_norm": 0.0, "learning_rate": 1.4664544343216529e-05, "loss": 0.8614, "step": 1698 }, { "epoch": 0.36545493654549366, "grad_norm": 0.0, "learning_rate": 1.4658380246810907e-05, "loss": 0.9461, "step": 1699 }, { "epoch": 0.36567003656700364, "grad_norm": 0.0, "learning_rate": 1.465221388902192e-05, "loss": 0.8719, "step": 1700 }, { "epoch": 0.3658851365885137, "grad_norm": 0.0, "learning_rate": 1.4646045272842995e-05, "loss": 0.8892, "step": 1701 }, { "epoch": 0.36610023661002367, "grad_norm": 0.0, "learning_rate": 1.4639874401268646e-05, "loss": 0.9194, "step": 1702 }, { "epoch": 0.36631533663153365, "grad_norm": 0.0, "learning_rate": 1.4633701277294487e-05, "loss": 0.8924, "step": 1703 }, { "epoch": 0.3665304366530437, "grad_norm": 0.0, "learning_rate": 1.4627525903917227e-05, "loss": 0.924, "step": 1704 }, { "epoch": 0.3667455366745537, "grad_norm": 0.0, "learning_rate": 1.4621348284134666e-05, "loss": 0.8815, "step": 1705 }, { "epoch": 0.36696063669606366, "grad_norm": 0.0, "learning_rate": 1.4615168420945689e-05, "loss": 0.9028, "step": 1706 }, { "epoch": 0.3671757367175737, "grad_norm": 0.0, "learning_rate": 1.4608986317350277e-05, "loss": 0.8602, "step": 1707 }, { "epoch": 0.3673908367390837, "grad_norm": 0.0, "learning_rate": 1.4602801976349497e-05, "loss": 0.9523, "step": 1708 }, { "epoch": 0.36760593676059367, "grad_norm": 0.0, "learning_rate": 1.4596615400945498e-05, "loss": 0.9049, "step": 1709 }, { "epoch": 0.3678210367821037, "grad_norm": 0.0, "learning_rate": 1.4590426594141513e-05, "loss": 0.9085, "step": 1710 }, { "epoch": 0.3680361368036137, "grad_norm": 0.0, "learning_rate": 1.4584235558941872e-05, "loss": 0.8336, "step": 1711 }, { "epoch": 0.3682512368251237, "grad_norm": 0.0, "learning_rate": 1.4578042298351973e-05, "loss": 0.8548, "step": 1712 }, { "epoch": 0.36846633684663366, "grad_norm": 0.0, "learning_rate": 1.4571846815378297e-05, "loss": 0.9599, "step": 1713 }, { "epoch": 0.3686814368681437, "grad_norm": 0.0, "learning_rate": 1.4565649113028399e-05, "loss": 0.9374, "step": 1714 }, { "epoch": 0.3688965368896537, "grad_norm": 0.0, "learning_rate": 1.4559449194310926e-05, "loss": 0.9406, "step": 1715 }, { "epoch": 0.36911163691116367, "grad_norm": 0.0, "learning_rate": 1.455324706223559e-05, "loss": 0.9525, "step": 1716 }, { "epoch": 0.3693267369326737, "grad_norm": 0.0, "learning_rate": 1.4547042719813177e-05, "loss": 0.8807, "step": 1717 }, { "epoch": 0.3695418369541837, "grad_norm": 0.0, "learning_rate": 1.4540836170055552e-05, "loss": 0.8947, "step": 1718 }, { "epoch": 0.3697569369756937, "grad_norm": 0.0, "learning_rate": 1.4534627415975646e-05, "loss": 0.8799, "step": 1719 }, { "epoch": 0.3699720369972037, "grad_norm": 0.0, "learning_rate": 1.4528416460587465e-05, "loss": 0.8556, "step": 1720 }, { "epoch": 0.3701871370187137, "grad_norm": 0.0, "learning_rate": 1.4522203306906076e-05, "loss": 1.0023, "step": 1721 }, { "epoch": 0.3704022370402237, "grad_norm": 0.0, "learning_rate": 1.451598795794762e-05, "loss": 0.9314, "step": 1722 }, { "epoch": 0.3706173370617337, "grad_norm": 0.0, "learning_rate": 1.4509770416729301e-05, "loss": 0.9, "step": 1723 }, { "epoch": 0.3708324370832437, "grad_norm": 0.0, "learning_rate": 1.450355068626939e-05, "loss": 0.8956, "step": 1724 }, { "epoch": 0.3710475371047537, "grad_norm": 0.0, "learning_rate": 1.449732876958722e-05, "loss": 0.9443, "step": 1725 }, { "epoch": 0.37126263712626373, "grad_norm": 0.0, "learning_rate": 1.449110466970318e-05, "loss": 0.9341, "step": 1726 }, { "epoch": 0.3714777371477737, "grad_norm": 0.0, "learning_rate": 1.4484878389638728e-05, "loss": 0.9174, "step": 1727 }, { "epoch": 0.3716928371692837, "grad_norm": 0.0, "learning_rate": 1.4478649932416367e-05, "loss": 0.9566, "step": 1728 }, { "epoch": 0.37190793719079374, "grad_norm": 0.0, "learning_rate": 1.4472419301059673e-05, "loss": 0.8323, "step": 1729 }, { "epoch": 0.3721230372123037, "grad_norm": 0.0, "learning_rate": 1.4466186498593265e-05, "loss": 0.9201, "step": 1730 }, { "epoch": 0.3723381372338137, "grad_norm": 0.0, "learning_rate": 1.4459951528042821e-05, "loss": 0.9433, "step": 1731 }, { "epoch": 0.37255323725532374, "grad_norm": 0.0, "learning_rate": 1.4453714392435069e-05, "loss": 0.9156, "step": 1732 }, { "epoch": 0.37276833727683373, "grad_norm": 0.0, "learning_rate": 1.4447475094797796e-05, "loss": 0.8842, "step": 1733 }, { "epoch": 0.3729834372983437, "grad_norm": 0.0, "learning_rate": 1.444123363815983e-05, "loss": 0.9086, "step": 1734 }, { "epoch": 0.37319853731985375, "grad_norm": 0.0, "learning_rate": 1.4434990025551046e-05, "loss": 0.9427, "step": 1735 }, { "epoch": 0.37341363734136374, "grad_norm": 0.0, "learning_rate": 1.442874426000237e-05, "loss": 0.9385, "step": 1736 }, { "epoch": 0.3736287373628737, "grad_norm": 0.0, "learning_rate": 1.4422496344545776e-05, "loss": 0.8722, "step": 1737 }, { "epoch": 0.37384383738438376, "grad_norm": 0.0, "learning_rate": 1.4416246282214275e-05, "loss": 0.8745, "step": 1738 }, { "epoch": 0.37405893740589374, "grad_norm": 0.0, "learning_rate": 1.4409994076041922e-05, "loss": 0.9344, "step": 1739 }, { "epoch": 0.3742740374274037, "grad_norm": 0.0, "learning_rate": 1.4403739729063817e-05, "loss": 0.8542, "step": 1740 }, { "epoch": 0.37448913744891377, "grad_norm": 0.0, "learning_rate": 1.4397483244316095e-05, "loss": 0.9249, "step": 1741 }, { "epoch": 0.37470423747042375, "grad_norm": 0.0, "learning_rate": 1.4391224624835928e-05, "loss": 0.8548, "step": 1742 }, { "epoch": 0.37491933749193374, "grad_norm": 0.0, "learning_rate": 1.4384963873661528e-05, "loss": 0.9514, "step": 1743 }, { "epoch": 0.3751344375134438, "grad_norm": 0.0, "learning_rate": 1.437870099383214e-05, "loss": 0.8714, "step": 1744 }, { "epoch": 0.37534953753495376, "grad_norm": 0.0, "learning_rate": 1.437243598838804e-05, "loss": 0.8158, "step": 1745 }, { "epoch": 0.37556463755646374, "grad_norm": 0.0, "learning_rate": 1.436616886037054e-05, "loss": 0.8196, "step": 1746 }, { "epoch": 0.3757797375779738, "grad_norm": 0.0, "learning_rate": 1.4359899612821981e-05, "loss": 0.9784, "step": 1747 }, { "epoch": 0.37599483759948377, "grad_norm": 0.0, "learning_rate": 1.435362824878573e-05, "loss": 0.9334, "step": 1748 }, { "epoch": 0.37620993762099375, "grad_norm": 0.0, "learning_rate": 1.4347354771306193e-05, "loss": 0.9912, "step": 1749 }, { "epoch": 0.3764250376425038, "grad_norm": 0.0, "learning_rate": 1.4341079183428776e-05, "loss": 0.9196, "step": 1750 }, { "epoch": 0.3766401376640138, "grad_norm": 0.0, "learning_rate": 1.4334801488199941e-05, "loss": 0.9339, "step": 1751 }, { "epoch": 0.37685523768552376, "grad_norm": 0.0, "learning_rate": 1.4328521688667152e-05, "loss": 0.9069, "step": 1752 }, { "epoch": 0.3770703377070338, "grad_norm": 0.0, "learning_rate": 1.4322239787878902e-05, "loss": 1.013, "step": 1753 }, { "epoch": 0.3772854377285438, "grad_norm": 0.0, "learning_rate": 1.43159557888847e-05, "loss": 0.8566, "step": 1754 }, { "epoch": 0.37750053775005377, "grad_norm": 0.0, "learning_rate": 1.4309669694735085e-05, "loss": 0.8762, "step": 1755 }, { "epoch": 0.37771563777156375, "grad_norm": 0.0, "learning_rate": 1.4303381508481595e-05, "loss": 0.8951, "step": 1756 }, { "epoch": 0.3779307377930738, "grad_norm": 0.0, "learning_rate": 1.4297091233176796e-05, "loss": 0.9563, "step": 1757 }, { "epoch": 0.3781458378145838, "grad_norm": 0.0, "learning_rate": 1.4290798871874267e-05, "loss": 0.9086, "step": 1758 }, { "epoch": 0.37836093783609376, "grad_norm": 0.0, "learning_rate": 1.4284504427628598e-05, "loss": 0.8805, "step": 1759 }, { "epoch": 0.3785760378576038, "grad_norm": 0.0, "learning_rate": 1.4278207903495388e-05, "loss": 0.8947, "step": 1760 }, { "epoch": 0.3787911378791138, "grad_norm": 0.0, "learning_rate": 1.427190930253125e-05, "loss": 0.9048, "step": 1761 }, { "epoch": 0.37900623790062377, "grad_norm": 0.0, "learning_rate": 1.4265608627793803e-05, "loss": 0.9219, "step": 1762 }, { "epoch": 0.3792213379221338, "grad_norm": 0.0, "learning_rate": 1.425930588234167e-05, "loss": 0.9345, "step": 1763 }, { "epoch": 0.3794364379436438, "grad_norm": 0.0, "learning_rate": 1.4253001069234483e-05, "loss": 0.8421, "step": 1764 }, { "epoch": 0.3796515379651538, "grad_norm": 0.0, "learning_rate": 1.4246694191532874e-05, "loss": 0.8909, "step": 1765 }, { "epoch": 0.3798666379866638, "grad_norm": 0.0, "learning_rate": 1.4240385252298485e-05, "loss": 0.856, "step": 1766 }, { "epoch": 0.3800817380081738, "grad_norm": 0.0, "learning_rate": 1.423407425459395e-05, "loss": 0.8585, "step": 1767 }, { "epoch": 0.3802968380296838, "grad_norm": 0.0, "learning_rate": 1.4227761201482908e-05, "loss": 0.8645, "step": 1768 }, { "epoch": 0.3805119380511938, "grad_norm": 0.0, "learning_rate": 1.4221446096029992e-05, "loss": 0.9566, "step": 1769 }, { "epoch": 0.3807270380727038, "grad_norm": 0.0, "learning_rate": 1.4215128941300835e-05, "loss": 0.9548, "step": 1770 }, { "epoch": 0.3809421380942138, "grad_norm": 0.0, "learning_rate": 1.420880974036206e-05, "loss": 0.8953, "step": 1771 }, { "epoch": 0.38115723811572383, "grad_norm": 0.0, "learning_rate": 1.4202488496281283e-05, "loss": 0.9062, "step": 1772 }, { "epoch": 0.3813723381372338, "grad_norm": 0.0, "learning_rate": 1.4196165212127122e-05, "loss": 0.8989, "step": 1773 }, { "epoch": 0.3815874381587438, "grad_norm": 0.0, "learning_rate": 1.4189839890969174e-05, "loss": 0.8809, "step": 1774 }, { "epoch": 0.38180253818025384, "grad_norm": 0.0, "learning_rate": 1.4183512535878032e-05, "loss": 0.947, "step": 1775 }, { "epoch": 0.3820176382017638, "grad_norm": 0.0, "learning_rate": 1.4177183149925266e-05, "loss": 0.9095, "step": 1776 }, { "epoch": 0.3822327382232738, "grad_norm": 0.0, "learning_rate": 1.4170851736183452e-05, "loss": 0.8512, "step": 1777 }, { "epoch": 0.38244783824478384, "grad_norm": 0.0, "learning_rate": 1.4164518297726129e-05, "loss": 0.862, "step": 1778 }, { "epoch": 0.38266293826629383, "grad_norm": 0.0, "learning_rate": 1.4158182837627827e-05, "loss": 0.8526, "step": 1779 }, { "epoch": 0.3828780382878038, "grad_norm": 0.0, "learning_rate": 1.4151845358964058e-05, "loss": 0.856, "step": 1780 }, { "epoch": 0.38309313830931385, "grad_norm": 0.0, "learning_rate": 1.4145505864811319e-05, "loss": 0.9, "step": 1781 }, { "epoch": 0.38330823833082384, "grad_norm": 0.0, "learning_rate": 1.4139164358247074e-05, "loss": 0.8542, "step": 1782 }, { "epoch": 0.3835233383523338, "grad_norm": 0.0, "learning_rate": 1.4132820842349778e-05, "loss": 0.882, "step": 1783 }, { "epoch": 0.38373843837384386, "grad_norm": 0.0, "learning_rate": 1.4126475320198845e-05, "loss": 0.8639, "step": 1784 }, { "epoch": 0.38395353839535384, "grad_norm": 0.0, "learning_rate": 1.4120127794874675e-05, "loss": 0.8563, "step": 1785 }, { "epoch": 0.38416863841686383, "grad_norm": 0.0, "learning_rate": 1.411377826945864e-05, "loss": 0.8972, "step": 1786 }, { "epoch": 0.38438373843837387, "grad_norm": 0.0, "learning_rate": 1.4107426747033075e-05, "loss": 0.8981, "step": 1787 }, { "epoch": 0.38459883845988385, "grad_norm": 0.0, "learning_rate": 1.4101073230681292e-05, "loss": 0.8329, "step": 1788 }, { "epoch": 0.38481393848139384, "grad_norm": 0.0, "learning_rate": 1.4094717723487565e-05, "loss": 0.8726, "step": 1789 }, { "epoch": 0.3850290385029039, "grad_norm": 0.0, "learning_rate": 1.4088360228537144e-05, "loss": 0.8814, "step": 1790 }, { "epoch": 0.38524413852441386, "grad_norm": 0.0, "learning_rate": 1.4082000748916232e-05, "loss": 0.8579, "step": 1791 }, { "epoch": 0.38545923854592384, "grad_norm": 0.0, "learning_rate": 1.4075639287712006e-05, "loss": 0.9411, "step": 1792 }, { "epoch": 0.3856743385674339, "grad_norm": 0.0, "learning_rate": 1.4069275848012596e-05, "loss": 0.9025, "step": 1793 }, { "epoch": 0.38588943858894387, "grad_norm": 0.0, "learning_rate": 1.4062910432907094e-05, "loss": 0.9487, "step": 1794 }, { "epoch": 0.38610453861045385, "grad_norm": 0.0, "learning_rate": 1.4056543045485558e-05, "loss": 0.9148, "step": 1795 }, { "epoch": 0.3863196386319639, "grad_norm": 0.0, "learning_rate": 1.4050173688839e-05, "loss": 0.8768, "step": 1796 }, { "epoch": 0.3865347386534739, "grad_norm": 0.0, "learning_rate": 1.4043802366059383e-05, "loss": 0.9105, "step": 1797 }, { "epoch": 0.38674983867498386, "grad_norm": 0.0, "learning_rate": 1.4037429080239629e-05, "loss": 0.9424, "step": 1798 }, { "epoch": 0.38696493869649384, "grad_norm": 0.0, "learning_rate": 1.4031053834473614e-05, "loss": 0.8852, "step": 1799 }, { "epoch": 0.3871800387180039, "grad_norm": 0.0, "learning_rate": 1.4024676631856162e-05, "loss": 0.9939, "step": 1800 }, { "epoch": 0.38739513873951387, "grad_norm": 0.0, "learning_rate": 1.4018297475483049e-05, "loss": 0.871, "step": 1801 }, { "epoch": 0.38761023876102385, "grad_norm": 0.0, "learning_rate": 1.4011916368451e-05, "loss": 0.8778, "step": 1802 }, { "epoch": 0.3878253387825339, "grad_norm": 0.0, "learning_rate": 1.4005533313857686e-05, "loss": 0.906, "step": 1803 }, { "epoch": 0.3880404388040439, "grad_norm": 0.0, "learning_rate": 1.3999148314801722e-05, "loss": 0.9358, "step": 1804 }, { "epoch": 0.38825553882555386, "grad_norm": 0.0, "learning_rate": 1.399276137438267e-05, "loss": 0.8868, "step": 1805 }, { "epoch": 0.3884706388470639, "grad_norm": 0.0, "learning_rate": 1.398637249570103e-05, "loss": 0.8854, "step": 1806 }, { "epoch": 0.3886857388685739, "grad_norm": 0.0, "learning_rate": 1.397998168185825e-05, "loss": 0.8583, "step": 1807 }, { "epoch": 0.38890083889008387, "grad_norm": 0.0, "learning_rate": 1.397358893595671e-05, "loss": 0.9139, "step": 1808 }, { "epoch": 0.3891159389115939, "grad_norm": 0.0, "learning_rate": 1.396719426109973e-05, "loss": 0.9681, "step": 1809 }, { "epoch": 0.3893310389331039, "grad_norm": 0.0, "learning_rate": 1.396079766039157e-05, "loss": 0.9382, "step": 1810 }, { "epoch": 0.3895461389546139, "grad_norm": 0.0, "learning_rate": 1.395439913693742e-05, "loss": 0.9427, "step": 1811 }, { "epoch": 0.3897612389761239, "grad_norm": 0.0, "learning_rate": 1.3947998693843407e-05, "loss": 0.9697, "step": 1812 }, { "epoch": 0.3899763389976339, "grad_norm": 0.0, "learning_rate": 1.3941596334216592e-05, "loss": 0.8143, "step": 1813 }, { "epoch": 0.3901914390191439, "grad_norm": 0.0, "learning_rate": 1.3935192061164957e-05, "loss": 0.819, "step": 1814 }, { "epoch": 0.3904065390406539, "grad_norm": 0.0, "learning_rate": 1.392878587779742e-05, "loss": 0.8667, "step": 1815 }, { "epoch": 0.3906216390621639, "grad_norm": 0.0, "learning_rate": 1.3922377787223828e-05, "loss": 0.8553, "step": 1816 }, { "epoch": 0.3908367390836739, "grad_norm": 0.0, "learning_rate": 1.3915967792554947e-05, "loss": 0.9214, "step": 1817 }, { "epoch": 0.39105183910518393, "grad_norm": 0.0, "learning_rate": 1.3909555896902478e-05, "loss": 0.8689, "step": 1818 }, { "epoch": 0.3912669391266939, "grad_norm": 0.0, "learning_rate": 1.390314210337903e-05, "loss": 0.8627, "step": 1819 }, { "epoch": 0.3914820391482039, "grad_norm": 0.0, "learning_rate": 1.3896726415098147e-05, "loss": 0.9638, "step": 1820 }, { "epoch": 0.39169713916971394, "grad_norm": 0.0, "learning_rate": 1.3890308835174284e-05, "loss": 0.9366, "step": 1821 }, { "epoch": 0.3919122391912239, "grad_norm": 0.0, "learning_rate": 1.388388936672282e-05, "loss": 0.8771, "step": 1822 }, { "epoch": 0.3921273392127339, "grad_norm": 0.0, "learning_rate": 1.3877468012860047e-05, "loss": 0.8946, "step": 1823 }, { "epoch": 0.39234243923424394, "grad_norm": 0.0, "learning_rate": 1.3871044776703173e-05, "loss": 0.8707, "step": 1824 }, { "epoch": 0.39255753925575393, "grad_norm": 0.0, "learning_rate": 1.3864619661370321e-05, "loss": 0.9297, "step": 1825 }, { "epoch": 0.3927726392772639, "grad_norm": 0.0, "learning_rate": 1.3858192669980529e-05, "loss": 0.905, "step": 1826 }, { "epoch": 0.39298773929877395, "grad_norm": 0.0, "learning_rate": 1.3851763805653735e-05, "loss": 0.8928, "step": 1827 }, { "epoch": 0.39320283932028394, "grad_norm": 0.0, "learning_rate": 1.38453330715108e-05, "loss": 0.8528, "step": 1828 }, { "epoch": 0.3934179393417939, "grad_norm": 0.0, "learning_rate": 1.3838900470673482e-05, "loss": 0.9355, "step": 1829 }, { "epoch": 0.39363303936330396, "grad_norm": 0.0, "learning_rate": 1.3832466006264451e-05, "loss": 0.9155, "step": 1830 }, { "epoch": 0.39384813938481394, "grad_norm": 0.0, "learning_rate": 1.382602968140728e-05, "loss": 0.8896, "step": 1831 }, { "epoch": 0.39406323940632393, "grad_norm": 0.0, "learning_rate": 1.3819591499226446e-05, "loss": 0.8921, "step": 1832 }, { "epoch": 0.39427833942783397, "grad_norm": 0.0, "learning_rate": 1.3813151462847323e-05, "loss": 0.8622, "step": 1833 }, { "epoch": 0.39449343944934395, "grad_norm": 0.0, "learning_rate": 1.3806709575396195e-05, "loss": 0.8891, "step": 1834 }, { "epoch": 0.39470853947085394, "grad_norm": 0.0, "learning_rate": 1.3800265840000236e-05, "loss": 0.9194, "step": 1835 }, { "epoch": 0.394923639492364, "grad_norm": 0.0, "learning_rate": 1.3793820259787519e-05, "loss": 0.9322, "step": 1836 }, { "epoch": 0.39513873951387396, "grad_norm": 0.0, "learning_rate": 1.3787372837887007e-05, "loss": 0.9211, "step": 1837 }, { "epoch": 0.39535383953538394, "grad_norm": 0.0, "learning_rate": 1.3780923577428574e-05, "loss": 0.8446, "step": 1838 }, { "epoch": 0.39556893955689393, "grad_norm": 0.0, "learning_rate": 1.3774472481542969e-05, "loss": 0.8529, "step": 1839 }, { "epoch": 0.39578403957840397, "grad_norm": 0.0, "learning_rate": 1.376801955336184e-05, "loss": 0.9241, "step": 1840 }, { "epoch": 0.39599913959991395, "grad_norm": 0.0, "learning_rate": 1.3761564796017724e-05, "loss": 0.905, "step": 1841 }, { "epoch": 0.39621423962142394, "grad_norm": 0.0, "learning_rate": 1.3755108212644044e-05, "loss": 0.9177, "step": 1842 }, { "epoch": 0.396429339642934, "grad_norm": 0.0, "learning_rate": 1.3748649806375112e-05, "loss": 0.9038, "step": 1843 }, { "epoch": 0.39664443966444396, "grad_norm": 0.0, "learning_rate": 1.3742189580346122e-05, "loss": 0.873, "step": 1844 }, { "epoch": 0.39685953968595394, "grad_norm": 0.0, "learning_rate": 1.3735727537693152e-05, "loss": 0.8978, "step": 1845 }, { "epoch": 0.397074639707464, "grad_norm": 0.0, "learning_rate": 1.3729263681553165e-05, "loss": 0.9516, "step": 1846 }, { "epoch": 0.39728973972897397, "grad_norm": 0.0, "learning_rate": 1.3722798015064003e-05, "loss": 0.8865, "step": 1847 }, { "epoch": 0.39750483975048395, "grad_norm": 0.0, "learning_rate": 1.371633054136438e-05, "loss": 0.874, "step": 1848 }, { "epoch": 0.397719939771994, "grad_norm": 0.0, "learning_rate": 1.37098612635939e-05, "loss": 0.8982, "step": 1849 }, { "epoch": 0.397935039793504, "grad_norm": 0.0, "learning_rate": 1.3703390184893034e-05, "loss": 0.8921, "step": 1850 }, { "epoch": 0.39815013981501396, "grad_norm": 0.0, "learning_rate": 1.3696917308403123e-05, "loss": 0.875, "step": 1851 }, { "epoch": 0.398365239836524, "grad_norm": 0.0, "learning_rate": 1.3690442637266398e-05, "loss": 0.9117, "step": 1852 }, { "epoch": 0.398580339858034, "grad_norm": 0.0, "learning_rate": 1.3683966174625942e-05, "loss": 0.8856, "step": 1853 }, { "epoch": 0.39879543987954397, "grad_norm": 0.0, "learning_rate": 1.3677487923625719e-05, "loss": 0.8696, "step": 1854 }, { "epoch": 0.399010539901054, "grad_norm": 0.0, "learning_rate": 1.3671007887410553e-05, "loss": 0.843, "step": 1855 }, { "epoch": 0.399225639922564, "grad_norm": 0.0, "learning_rate": 1.3664526069126148e-05, "loss": 0.8491, "step": 1856 }, { "epoch": 0.399440739944074, "grad_norm": 0.0, "learning_rate": 1.365804247191906e-05, "loss": 0.9062, "step": 1857 }, { "epoch": 0.399655839965584, "grad_norm": 0.0, "learning_rate": 1.3651557098936712e-05, "loss": 0.9104, "step": 1858 }, { "epoch": 0.399870939987094, "grad_norm": 0.0, "learning_rate": 1.364506995332739e-05, "loss": 0.8826, "step": 1859 }, { "epoch": 0.400086040008604, "grad_norm": 0.0, "learning_rate": 1.3638581038240242e-05, "loss": 0.9801, "step": 1860 }, { "epoch": 0.400301140030114, "grad_norm": 0.0, "learning_rate": 1.3632090356825279e-05, "loss": 0.9257, "step": 1861 }, { "epoch": 0.400516240051624, "grad_norm": 0.0, "learning_rate": 1.3625597912233355e-05, "loss": 0.9084, "step": 1862 }, { "epoch": 0.400731340073134, "grad_norm": 0.0, "learning_rate": 1.3619103707616198e-05, "loss": 0.8436, "step": 1863 }, { "epoch": 0.40094644009464403, "grad_norm": 0.0, "learning_rate": 1.3612607746126375e-05, "loss": 0.883, "step": 1864 }, { "epoch": 0.401161540116154, "grad_norm": 0.0, "learning_rate": 1.3606110030917311e-05, "loss": 0.9265, "step": 1865 }, { "epoch": 0.401376640137664, "grad_norm": 0.0, "learning_rate": 1.3599610565143291e-05, "loss": 0.8932, "step": 1866 }, { "epoch": 0.40159174015917404, "grad_norm": 0.0, "learning_rate": 1.3593109351959439e-05, "loss": 0.898, "step": 1867 }, { "epoch": 0.401806840180684, "grad_norm": 0.0, "learning_rate": 1.3586606394521729e-05, "loss": 0.8936, "step": 1868 }, { "epoch": 0.402021940202194, "grad_norm": 0.0, "learning_rate": 1.3580101695986985e-05, "loss": 0.8906, "step": 1869 }, { "epoch": 0.40223704022370405, "grad_norm": 0.0, "learning_rate": 1.3573595259512877e-05, "loss": 0.8548, "step": 1870 }, { "epoch": 0.40245214024521403, "grad_norm": 0.0, "learning_rate": 1.3567087088257913e-05, "loss": 0.9231, "step": 1871 }, { "epoch": 0.402667240266724, "grad_norm": 0.0, "learning_rate": 1.356057718538145e-05, "loss": 0.9209, "step": 1872 }, { "epoch": 0.40288234028823405, "grad_norm": 0.0, "learning_rate": 1.3554065554043675e-05, "loss": 0.9232, "step": 1873 }, { "epoch": 0.40309744030974404, "grad_norm": 0.0, "learning_rate": 1.3547552197405632e-05, "loss": 0.8643, "step": 1874 }, { "epoch": 0.403312540331254, "grad_norm": 0.0, "learning_rate": 1.3541037118629181e-05, "loss": 0.9196, "step": 1875 }, { "epoch": 0.40352764035276406, "grad_norm": 0.0, "learning_rate": 1.3534520320877036e-05, "loss": 0.8398, "step": 1876 }, { "epoch": 0.40374274037427405, "grad_norm": 0.0, "learning_rate": 1.3528001807312734e-05, "loss": 0.8372, "step": 1877 }, { "epoch": 0.40395784039578403, "grad_norm": 0.0, "learning_rate": 1.3521481581100657e-05, "loss": 0.8664, "step": 1878 }, { "epoch": 0.40417294041729407, "grad_norm": 0.0, "learning_rate": 1.3514959645406e-05, "loss": 0.8969, "step": 1879 }, { "epoch": 0.40438804043880405, "grad_norm": 0.0, "learning_rate": 1.3508436003394804e-05, "loss": 0.9254, "step": 1880 }, { "epoch": 0.40460314046031404, "grad_norm": 0.0, "learning_rate": 1.3501910658233932e-05, "loss": 0.9016, "step": 1881 }, { "epoch": 0.404818240481824, "grad_norm": 0.0, "learning_rate": 1.3495383613091073e-05, "loss": 0.9003, "step": 1882 }, { "epoch": 0.40503334050333406, "grad_norm": 0.0, "learning_rate": 1.3488854871134746e-05, "loss": 0.8732, "step": 1883 }, { "epoch": 0.40524844052484404, "grad_norm": 0.0, "learning_rate": 1.348232443553429e-05, "loss": 0.9221, "step": 1884 }, { "epoch": 0.40546354054635403, "grad_norm": 0.0, "learning_rate": 1.3475792309459862e-05, "loss": 0.8695, "step": 1885 }, { "epoch": 0.40567864056786407, "grad_norm": 0.0, "learning_rate": 1.3469258496082452e-05, "loss": 0.9422, "step": 1886 }, { "epoch": 0.40589374058937405, "grad_norm": 0.0, "learning_rate": 1.3462722998573854e-05, "loss": 0.9474, "step": 1887 }, { "epoch": 0.40610884061088404, "grad_norm": 0.0, "learning_rate": 1.345618582010669e-05, "loss": 0.9389, "step": 1888 }, { "epoch": 0.4063239406323941, "grad_norm": 0.0, "learning_rate": 1.3449646963854396e-05, "loss": 0.8372, "step": 1889 }, { "epoch": 0.40653904065390406, "grad_norm": 0.0, "learning_rate": 1.3443106432991218e-05, "loss": 0.9275, "step": 1890 }, { "epoch": 0.40675414067541404, "grad_norm": 0.0, "learning_rate": 1.3436564230692221e-05, "loss": 0.8733, "step": 1891 }, { "epoch": 0.4069692406969241, "grad_norm": 0.0, "learning_rate": 1.3430020360133283e-05, "loss": 0.9065, "step": 1892 }, { "epoch": 0.40718434071843407, "grad_norm": 0.0, "learning_rate": 1.3423474824491081e-05, "loss": 0.904, "step": 1893 }, { "epoch": 0.40739944073994405, "grad_norm": 0.0, "learning_rate": 1.3416927626943105e-05, "loss": 0.894, "step": 1894 }, { "epoch": 0.4076145407614541, "grad_norm": 0.0, "learning_rate": 1.341037877066766e-05, "loss": 0.9006, "step": 1895 }, { "epoch": 0.4078296407829641, "grad_norm": 0.0, "learning_rate": 1.3403828258843847e-05, "loss": 0.8571, "step": 1896 }, { "epoch": 0.40804474080447406, "grad_norm": 0.0, "learning_rate": 1.3397276094651575e-05, "loss": 0.9183, "step": 1897 }, { "epoch": 0.4082598408259841, "grad_norm": 0.0, "learning_rate": 1.3390722281271553e-05, "loss": 0.9314, "step": 1898 }, { "epoch": 0.4084749408474941, "grad_norm": 0.0, "learning_rate": 1.338416682188529e-05, "loss": 0.8895, "step": 1899 }, { "epoch": 0.40869004086900407, "grad_norm": 0.0, "learning_rate": 1.3377609719675096e-05, "loss": 0.8404, "step": 1900 }, { "epoch": 0.4089051408905141, "grad_norm": 0.0, "learning_rate": 1.3371050977824077e-05, "loss": 0.9122, "step": 1901 }, { "epoch": 0.4091202409120241, "grad_norm": 0.0, "learning_rate": 1.3364490599516135e-05, "loss": 0.9219, "step": 1902 }, { "epoch": 0.4093353409335341, "grad_norm": 0.0, "learning_rate": 1.3357928587935968e-05, "loss": 0.9313, "step": 1903 }, { "epoch": 0.4095504409550441, "grad_norm": 0.0, "learning_rate": 1.3351364946269071e-05, "loss": 0.9514, "step": 1904 }, { "epoch": 0.4097655409765541, "grad_norm": 0.0, "learning_rate": 1.334479967770172e-05, "loss": 0.8505, "step": 1905 }, { "epoch": 0.4099806409980641, "grad_norm": 0.0, "learning_rate": 1.3338232785420988e-05, "loss": 0.8356, "step": 1906 }, { "epoch": 0.4101957410195741, "grad_norm": 0.0, "learning_rate": 1.3331664272614733e-05, "loss": 0.8273, "step": 1907 }, { "epoch": 0.4104108410410841, "grad_norm": 0.0, "learning_rate": 1.3325094142471602e-05, "loss": 0.8633, "step": 1908 }, { "epoch": 0.4106259410625941, "grad_norm": 0.0, "learning_rate": 1.3318522398181023e-05, "loss": 0.8923, "step": 1909 }, { "epoch": 0.41084104108410413, "grad_norm": 0.0, "learning_rate": 1.3311949042933216e-05, "loss": 0.9089, "step": 1910 }, { "epoch": 0.4110561411056141, "grad_norm": 0.0, "learning_rate": 1.3305374079919178e-05, "loss": 0.8617, "step": 1911 }, { "epoch": 0.4112712411271241, "grad_norm": 0.0, "learning_rate": 1.3298797512330677e-05, "loss": 0.8599, "step": 1912 }, { "epoch": 0.41148634114863414, "grad_norm": 0.0, "learning_rate": 1.329221934336028e-05, "loss": 0.9471, "step": 1913 }, { "epoch": 0.4117014411701441, "grad_norm": 0.0, "learning_rate": 1.3285639576201315e-05, "loss": 0.8817, "step": 1914 }, { "epoch": 0.4119165411916541, "grad_norm": 0.0, "learning_rate": 1.3279058214047895e-05, "loss": 0.8725, "step": 1915 }, { "epoch": 0.41213164121316415, "grad_norm": 0.0, "learning_rate": 1.3272475260094897e-05, "loss": 0.9, "step": 1916 }, { "epoch": 0.41234674123467413, "grad_norm": 0.0, "learning_rate": 1.3265890717537983e-05, "loss": 0.9205, "step": 1917 }, { "epoch": 0.4125618412561841, "grad_norm": 0.0, "learning_rate": 1.3259304589573581e-05, "loss": 0.9393, "step": 1918 }, { "epoch": 0.41277694127769415, "grad_norm": 0.0, "learning_rate": 1.3252716879398884e-05, "loss": 0.9646, "step": 1919 }, { "epoch": 0.41299204129920414, "grad_norm": 0.0, "learning_rate": 1.3246127590211859e-05, "loss": 0.8889, "step": 1920 }, { "epoch": 0.4132071413207141, "grad_norm": 0.0, "learning_rate": 1.323953672521124e-05, "loss": 0.9257, "step": 1921 }, { "epoch": 0.41342224134222416, "grad_norm": 0.0, "learning_rate": 1.323294428759652e-05, "loss": 0.9172, "step": 1922 }, { "epoch": 0.41363734136373415, "grad_norm": 0.0, "learning_rate": 1.3226350280567965e-05, "loss": 0.8958, "step": 1923 }, { "epoch": 0.41385244138524413, "grad_norm": 0.0, "learning_rate": 1.3219754707326592e-05, "loss": 0.8886, "step": 1924 }, { "epoch": 0.4140675414067541, "grad_norm": 0.0, "learning_rate": 1.321315757107418e-05, "loss": 0.9036, "step": 1925 }, { "epoch": 0.41428264142826415, "grad_norm": 0.0, "learning_rate": 1.320655887501328e-05, "loss": 0.921, "step": 1926 }, { "epoch": 0.41449774144977414, "grad_norm": 0.0, "learning_rate": 1.3199958622347183e-05, "loss": 0.9295, "step": 1927 }, { "epoch": 0.4147128414712841, "grad_norm": 0.0, "learning_rate": 1.3193356816279945e-05, "loss": 0.8753, "step": 1928 }, { "epoch": 0.41492794149279416, "grad_norm": 0.0, "learning_rate": 1.3186753460016376e-05, "loss": 0.8717, "step": 1929 }, { "epoch": 0.41514304151430415, "grad_norm": 0.0, "learning_rate": 1.3180148556762031e-05, "loss": 0.857, "step": 1930 }, { "epoch": 0.41535814153581413, "grad_norm": 0.0, "learning_rate": 1.3173542109723227e-05, "loss": 0.888, "step": 1931 }, { "epoch": 0.41557324155732417, "grad_norm": 0.0, "learning_rate": 1.3166934122107025e-05, "loss": 0.8493, "step": 1932 }, { "epoch": 0.41578834157883415, "grad_norm": 0.0, "learning_rate": 1.3160324597121228e-05, "loss": 0.8767, "step": 1933 }, { "epoch": 0.41600344160034414, "grad_norm": 0.0, "learning_rate": 1.3153713537974396e-05, "loss": 0.9203, "step": 1934 }, { "epoch": 0.4162185416218542, "grad_norm": 0.0, "learning_rate": 1.3147100947875826e-05, "loss": 0.8639, "step": 1935 }, { "epoch": 0.41643364164336416, "grad_norm": 0.0, "learning_rate": 1.3140486830035568e-05, "loss": 0.8801, "step": 1936 }, { "epoch": 0.41664874166487414, "grad_norm": 0.0, "learning_rate": 1.31338711876644e-05, "loss": 0.9866, "step": 1937 }, { "epoch": 0.4168638416863842, "grad_norm": 0.0, "learning_rate": 1.3127254023973846e-05, "loss": 0.9479, "step": 1938 }, { "epoch": 0.41707894170789417, "grad_norm": 0.0, "learning_rate": 1.3120635342176171e-05, "loss": 0.9101, "step": 1939 }, { "epoch": 0.41729404172940415, "grad_norm": 0.0, "learning_rate": 1.311401514548438e-05, "loss": 0.8897, "step": 1940 }, { "epoch": 0.4175091417509142, "grad_norm": 0.0, "learning_rate": 1.3107393437112201e-05, "loss": 0.8602, "step": 1941 }, { "epoch": 0.4177242417724242, "grad_norm": 0.0, "learning_rate": 1.310077022027411e-05, "loss": 0.9395, "step": 1942 }, { "epoch": 0.41793934179393416, "grad_norm": 0.0, "learning_rate": 1.3094145498185305e-05, "loss": 0.8834, "step": 1943 }, { "epoch": 0.4181544418154442, "grad_norm": 0.0, "learning_rate": 1.3087519274061718e-05, "loss": 0.8549, "step": 1944 }, { "epoch": 0.4183695418369542, "grad_norm": 0.0, "learning_rate": 1.3080891551120014e-05, "loss": 0.9326, "step": 1945 }, { "epoch": 0.41858464185846417, "grad_norm": 0.0, "learning_rate": 1.3074262332577579e-05, "loss": 0.8703, "step": 1946 }, { "epoch": 0.4187997418799742, "grad_norm": 0.0, "learning_rate": 1.3067631621652525e-05, "loss": 0.8631, "step": 1947 }, { "epoch": 0.4190148419014842, "grad_norm": 0.0, "learning_rate": 1.30609994215637e-05, "loss": 0.8574, "step": 1948 }, { "epoch": 0.4192299419229942, "grad_norm": 0.0, "learning_rate": 1.3054365735530666e-05, "loss": 0.9482, "step": 1949 }, { "epoch": 0.4194450419445042, "grad_norm": 0.0, "learning_rate": 1.30477305667737e-05, "loss": 0.9064, "step": 1950 }, { "epoch": 0.4196601419660142, "grad_norm": 0.0, "learning_rate": 1.3041093918513812e-05, "loss": 0.9263, "step": 1951 }, { "epoch": 0.4198752419875242, "grad_norm": 0.0, "learning_rate": 1.3034455793972719e-05, "loss": 0.9309, "step": 1952 }, { "epoch": 0.4200903420090342, "grad_norm": 0.0, "learning_rate": 1.3027816196372862e-05, "loss": 0.9047, "step": 1953 }, { "epoch": 0.4203054420305442, "grad_norm": 0.0, "learning_rate": 1.3021175128937395e-05, "loss": 0.9666, "step": 1954 }, { "epoch": 0.4205205420520542, "grad_norm": 0.0, "learning_rate": 1.3014532594890185e-05, "loss": 0.9032, "step": 1955 }, { "epoch": 0.42073564207356423, "grad_norm": 0.0, "learning_rate": 1.3007888597455811e-05, "loss": 0.9053, "step": 1956 }, { "epoch": 0.4209507420950742, "grad_norm": 0.0, "learning_rate": 1.3001243139859562e-05, "loss": 0.8876, "step": 1957 }, { "epoch": 0.4211658421165842, "grad_norm": 0.0, "learning_rate": 1.2994596225327442e-05, "loss": 0.8634, "step": 1958 }, { "epoch": 0.42138094213809424, "grad_norm": 0.0, "learning_rate": 1.298794785708615e-05, "loss": 0.9313, "step": 1959 }, { "epoch": 0.4215960421596042, "grad_norm": 0.0, "learning_rate": 1.2981298038363095e-05, "loss": 0.8807, "step": 1960 }, { "epoch": 0.4218111421811142, "grad_norm": 0.0, "learning_rate": 1.2974646772386399e-05, "loss": 0.8325, "step": 1961 }, { "epoch": 0.42202624220262425, "grad_norm": 0.0, "learning_rate": 1.2967994062384878e-05, "loss": 0.8688, "step": 1962 }, { "epoch": 0.42224134222413423, "grad_norm": 0.0, "learning_rate": 1.2961339911588051e-05, "loss": 0.9461, "step": 1963 }, { "epoch": 0.4224564422456442, "grad_norm": 0.0, "learning_rate": 1.2954684323226137e-05, "loss": 0.9472, "step": 1964 }, { "epoch": 0.42267154226715425, "grad_norm": 0.0, "learning_rate": 1.294802730053005e-05, "loss": 0.9034, "step": 1965 }, { "epoch": 0.42288664228866424, "grad_norm": 0.0, "learning_rate": 1.2941368846731405e-05, "loss": 0.898, "step": 1966 }, { "epoch": 0.4231017423101742, "grad_norm": 0.0, "learning_rate": 1.2934708965062508e-05, "loss": 0.8332, "step": 1967 }, { "epoch": 0.4233168423316842, "grad_norm": 0.0, "learning_rate": 1.2928047658756362e-05, "loss": 0.9275, "step": 1968 }, { "epoch": 0.42353194235319425, "grad_norm": 0.0, "learning_rate": 1.2921384931046651e-05, "loss": 0.8734, "step": 1969 }, { "epoch": 0.42374704237470423, "grad_norm": 0.0, "learning_rate": 1.291472078516777e-05, "loss": 0.8414, "step": 1970 }, { "epoch": 0.4239621423962142, "grad_norm": 0.0, "learning_rate": 1.2908055224354778e-05, "loss": 0.9052, "step": 1971 }, { "epoch": 0.42417724241772425, "grad_norm": 0.0, "learning_rate": 1.2901388251843441e-05, "loss": 0.844, "step": 1972 }, { "epoch": 0.42439234243923424, "grad_norm": 0.0, "learning_rate": 1.2894719870870197e-05, "loss": 0.8645, "step": 1973 }, { "epoch": 0.4246074424607442, "grad_norm": 0.0, "learning_rate": 1.288805008467217e-05, "loss": 0.858, "step": 1974 }, { "epoch": 0.42482254248225426, "grad_norm": 0.0, "learning_rate": 1.2881378896487174e-05, "loss": 0.9439, "step": 1975 }, { "epoch": 0.42503764250376425, "grad_norm": 0.0, "learning_rate": 1.2874706309553697e-05, "loss": 0.9266, "step": 1976 }, { "epoch": 0.42525274252527423, "grad_norm": 0.0, "learning_rate": 1.2868032327110904e-05, "loss": 0.8704, "step": 1977 }, { "epoch": 0.42546784254678427, "grad_norm": 0.0, "learning_rate": 1.2861356952398643e-05, "loss": 0.9228, "step": 1978 }, { "epoch": 0.42568294256829425, "grad_norm": 0.0, "learning_rate": 1.2854680188657439e-05, "loss": 0.895, "step": 1979 }, { "epoch": 0.42589804258980424, "grad_norm": 0.0, "learning_rate": 1.284800203912848e-05, "loss": 0.8705, "step": 1980 }, { "epoch": 0.4261131426113143, "grad_norm": 0.0, "learning_rate": 1.284132250705364e-05, "loss": 0.9459, "step": 1981 }, { "epoch": 0.42632824263282426, "grad_norm": 0.0, "learning_rate": 1.2834641595675454e-05, "loss": 0.8987, "step": 1982 }, { "epoch": 0.42654334265433425, "grad_norm": 0.0, "learning_rate": 1.2827959308237137e-05, "loss": 0.9587, "step": 1983 }, { "epoch": 0.4267584426758443, "grad_norm": 0.0, "learning_rate": 1.2821275647982563e-05, "loss": 0.8533, "step": 1984 }, { "epoch": 0.42697354269735427, "grad_norm": 0.0, "learning_rate": 1.2814590618156278e-05, "loss": 0.9141, "step": 1985 }, { "epoch": 0.42718864271886425, "grad_norm": 0.0, "learning_rate": 1.2807904222003489e-05, "loss": 0.858, "step": 1986 }, { "epoch": 0.4274037427403743, "grad_norm": 0.0, "learning_rate": 1.2801216462770065e-05, "loss": 0.8622, "step": 1987 }, { "epoch": 0.4276188427618843, "grad_norm": 0.0, "learning_rate": 1.2794527343702546e-05, "loss": 0.9009, "step": 1988 }, { "epoch": 0.42783394278339426, "grad_norm": 0.0, "learning_rate": 1.2787836868048118e-05, "loss": 0.8341, "step": 1989 }, { "epoch": 0.4280490428049043, "grad_norm": 0.0, "learning_rate": 1.278114503905464e-05, "loss": 0.8735, "step": 1990 }, { "epoch": 0.4282641428264143, "grad_norm": 0.0, "learning_rate": 1.2774451859970614e-05, "loss": 0.9083, "step": 1991 }, { "epoch": 0.42847924284792427, "grad_norm": 0.0, "learning_rate": 1.2767757334045209e-05, "loss": 0.8446, "step": 1992 }, { "epoch": 0.4286943428694343, "grad_norm": 0.0, "learning_rate": 1.2761061464528246e-05, "loss": 0.9031, "step": 1993 }, { "epoch": 0.4289094428909443, "grad_norm": 0.0, "learning_rate": 1.2754364254670192e-05, "loss": 0.8702, "step": 1994 }, { "epoch": 0.4291245429124543, "grad_norm": 0.0, "learning_rate": 1.2747665707722168e-05, "loss": 0.9025, "step": 1995 }, { "epoch": 0.4293396429339643, "grad_norm": 0.0, "learning_rate": 1.2740965826935945e-05, "loss": 1.0105, "step": 1996 }, { "epoch": 0.4295547429554743, "grad_norm": 0.0, "learning_rate": 1.2734264615563941e-05, "loss": 0.8614, "step": 1997 }, { "epoch": 0.4297698429769843, "grad_norm": 0.0, "learning_rate": 1.2727562076859218e-05, "loss": 0.8942, "step": 1998 }, { "epoch": 0.4299849429984943, "grad_norm": 0.0, "learning_rate": 1.2720858214075486e-05, "loss": 0.8377, "step": 1999 }, { "epoch": 0.4302000430200043, "grad_norm": 0.0, "learning_rate": 1.2714153030467095e-05, "loss": 0.8911, "step": 2000 }, { "epoch": 0.4304151430415143, "grad_norm": 0.0, "learning_rate": 1.2707446529289043e-05, "loss": 0.8217, "step": 2001 }, { "epoch": 0.43063024306302433, "grad_norm": 0.0, "learning_rate": 1.2700738713796952e-05, "loss": 0.8554, "step": 2002 }, { "epoch": 0.4308453430845343, "grad_norm": 0.0, "learning_rate": 1.2694029587247097e-05, "loss": 0.9307, "step": 2003 }, { "epoch": 0.4310604431060443, "grad_norm": 0.0, "learning_rate": 1.2687319152896377e-05, "loss": 0.9314, "step": 2004 }, { "epoch": 0.43127554312755434, "grad_norm": 0.0, "learning_rate": 1.2680607414002346e-05, "loss": 0.8944, "step": 2005 }, { "epoch": 0.4314906431490643, "grad_norm": 0.0, "learning_rate": 1.2673894373823171e-05, "loss": 0.8735, "step": 2006 }, { "epoch": 0.4317057431705743, "grad_norm": 0.0, "learning_rate": 1.2667180035617657e-05, "loss": 0.8985, "step": 2007 }, { "epoch": 0.4319208431920843, "grad_norm": 0.0, "learning_rate": 1.2660464402645241e-05, "loss": 0.9021, "step": 2008 }, { "epoch": 0.43213594321359433, "grad_norm": 0.0, "learning_rate": 1.2653747478165988e-05, "loss": 0.8961, "step": 2009 }, { "epoch": 0.4323510432351043, "grad_norm": 0.0, "learning_rate": 1.2647029265440592e-05, "loss": 0.8643, "step": 2010 }, { "epoch": 0.4325661432566143, "grad_norm": 0.0, "learning_rate": 1.2640309767730363e-05, "loss": 0.878, "step": 2011 }, { "epoch": 0.43278124327812434, "grad_norm": 0.0, "learning_rate": 1.263358898829725e-05, "loss": 0.9013, "step": 2012 }, { "epoch": 0.4329963432996343, "grad_norm": 0.0, "learning_rate": 1.2626866930403808e-05, "loss": 0.887, "step": 2013 }, { "epoch": 0.4332114433211443, "grad_norm": 0.0, "learning_rate": 1.2620143597313229e-05, "loss": 0.9073, "step": 2014 }, { "epoch": 0.43342654334265435, "grad_norm": 0.0, "learning_rate": 1.2613418992289307e-05, "loss": 0.8922, "step": 2015 }, { "epoch": 0.43364164336416433, "grad_norm": 0.0, "learning_rate": 1.2606693118596474e-05, "loss": 0.8977, "step": 2016 }, { "epoch": 0.4338567433856743, "grad_norm": 0.0, "learning_rate": 1.2599965979499751e-05, "loss": 0.8869, "step": 2017 }, { "epoch": 0.43407184340718435, "grad_norm": 0.0, "learning_rate": 1.2593237578264802e-05, "loss": 0.857, "step": 2018 }, { "epoch": 0.43428694342869434, "grad_norm": 0.0, "learning_rate": 1.2586507918157878e-05, "loss": 0.8149, "step": 2019 }, { "epoch": 0.4345020434502043, "grad_norm": 0.0, "learning_rate": 1.2579777002445862e-05, "loss": 0.8296, "step": 2020 }, { "epoch": 0.43471714347171436, "grad_norm": 0.0, "learning_rate": 1.257304483439623e-05, "loss": 0.9026, "step": 2021 }, { "epoch": 0.43493224349322435, "grad_norm": 0.0, "learning_rate": 1.2566311417277082e-05, "loss": 0.8382, "step": 2022 }, { "epoch": 0.43514734351473433, "grad_norm": 0.0, "learning_rate": 1.2559576754357111e-05, "loss": 0.9148, "step": 2023 }, { "epoch": 0.43536244353624437, "grad_norm": 0.0, "learning_rate": 1.2552840848905619e-05, "loss": 0.9289, "step": 2024 }, { "epoch": 0.43557754355775435, "grad_norm": 0.0, "learning_rate": 1.2546103704192511e-05, "loss": 0.8939, "step": 2025 }, { "epoch": 0.43579264357926434, "grad_norm": 0.0, "learning_rate": 1.2539365323488298e-05, "loss": 0.8948, "step": 2026 }, { "epoch": 0.4360077436007744, "grad_norm": 0.0, "learning_rate": 1.2532625710064084e-05, "loss": 0.8556, "step": 2027 }, { "epoch": 0.43622284362228436, "grad_norm": 0.0, "learning_rate": 1.2525884867191579e-05, "loss": 0.8354, "step": 2028 }, { "epoch": 0.43643794364379435, "grad_norm": 0.0, "learning_rate": 1.251914279814308e-05, "loss": 0.8729, "step": 2029 }, { "epoch": 0.4366530436653044, "grad_norm": 0.0, "learning_rate": 1.251239950619149e-05, "loss": 0.8712, "step": 2030 }, { "epoch": 0.43686814368681437, "grad_norm": 0.0, "learning_rate": 1.2505654994610299e-05, "loss": 0.9243, "step": 2031 }, { "epoch": 0.43708324370832435, "grad_norm": 0.0, "learning_rate": 1.249890926667359e-05, "loss": 0.8773, "step": 2032 }, { "epoch": 0.4372983437298344, "grad_norm": 0.0, "learning_rate": 1.2492162325656035e-05, "loss": 0.893, "step": 2033 }, { "epoch": 0.4375134437513444, "grad_norm": 0.0, "learning_rate": 1.24854141748329e-05, "loss": 0.8242, "step": 2034 }, { "epoch": 0.43772854377285436, "grad_norm": 0.0, "learning_rate": 1.247866481748003e-05, "loss": 0.8699, "step": 2035 }, { "epoch": 0.4379436437943644, "grad_norm": 0.0, "learning_rate": 1.2471914256873871e-05, "loss": 0.9406, "step": 2036 }, { "epoch": 0.4381587438158744, "grad_norm": 0.0, "learning_rate": 1.2465162496291437e-05, "loss": 0.9143, "step": 2037 }, { "epoch": 0.43837384383738437, "grad_norm": 0.0, "learning_rate": 1.245840953901033e-05, "loss": 0.9109, "step": 2038 }, { "epoch": 0.4385889438588944, "grad_norm": 0.0, "learning_rate": 1.245165538830873e-05, "loss": 0.8434, "step": 2039 }, { "epoch": 0.4388040438804044, "grad_norm": 0.0, "learning_rate": 1.2444900047465409e-05, "loss": 0.8804, "step": 2040 }, { "epoch": 0.4390191439019144, "grad_norm": 0.0, "learning_rate": 1.2438143519759701e-05, "loss": 0.8827, "step": 2041 }, { "epoch": 0.4392342439234244, "grad_norm": 0.0, "learning_rate": 1.2431385808471523e-05, "loss": 0.8478, "step": 2042 }, { "epoch": 0.4394493439449344, "grad_norm": 0.0, "learning_rate": 1.242462691688137e-05, "loss": 0.9132, "step": 2043 }, { "epoch": 0.4396644439664444, "grad_norm": 0.0, "learning_rate": 1.2417866848270304e-05, "loss": 0.8799, "step": 2044 }, { "epoch": 0.4398795439879544, "grad_norm": 0.0, "learning_rate": 1.241110560591996e-05, "loss": 0.9045, "step": 2045 }, { "epoch": 0.4400946440094644, "grad_norm": 0.0, "learning_rate": 1.2404343193112543e-05, "loss": 0.9109, "step": 2046 }, { "epoch": 0.4403097440309744, "grad_norm": 0.0, "learning_rate": 1.2397579613130829e-05, "loss": 0.932, "step": 2047 }, { "epoch": 0.44052484405248443, "grad_norm": 0.0, "learning_rate": 1.2390814869258155e-05, "loss": 0.8809, "step": 2048 }, { "epoch": 0.4407399440739944, "grad_norm": 0.0, "learning_rate": 1.2384048964778428e-05, "loss": 0.863, "step": 2049 }, { "epoch": 0.4409550440955044, "grad_norm": 0.0, "learning_rate": 1.2377281902976116e-05, "loss": 0.8786, "step": 2050 }, { "epoch": 0.4411701441170144, "grad_norm": 0.0, "learning_rate": 1.2370513687136254e-05, "loss": 0.839, "step": 2051 }, { "epoch": 0.4413852441385244, "grad_norm": 0.0, "learning_rate": 1.2363744320544427e-05, "loss": 0.8945, "step": 2052 }, { "epoch": 0.4416003441600344, "grad_norm": 0.0, "learning_rate": 1.2356973806486788e-05, "loss": 0.8032, "step": 2053 }, { "epoch": 0.4418154441815444, "grad_norm": 0.0, "learning_rate": 1.2350202148250039e-05, "loss": 0.911, "step": 2054 }, { "epoch": 0.44203054420305443, "grad_norm": 0.0, "learning_rate": 1.2343429349121445e-05, "loss": 0.9078, "step": 2055 }, { "epoch": 0.4422456442245644, "grad_norm": 0.0, "learning_rate": 1.233665541238882e-05, "loss": 0.8117, "step": 2056 }, { "epoch": 0.4424607442460744, "grad_norm": 0.0, "learning_rate": 1.2329880341340533e-05, "loss": 0.9069, "step": 2057 }, { "epoch": 0.44267584426758444, "grad_norm": 0.0, "learning_rate": 1.2323104139265503e-05, "loss": 0.8582, "step": 2058 }, { "epoch": 0.4428909442890944, "grad_norm": 0.0, "learning_rate": 1.2316326809453201e-05, "loss": 0.9267, "step": 2059 }, { "epoch": 0.4431060443106044, "grad_norm": 0.0, "learning_rate": 1.2309548355193632e-05, "loss": 0.9155, "step": 2060 }, { "epoch": 0.44332114433211445, "grad_norm": 0.0, "learning_rate": 1.2302768779777363e-05, "loss": 0.8911, "step": 2061 }, { "epoch": 0.44353624435362443, "grad_norm": 0.0, "learning_rate": 1.22959880864955e-05, "loss": 0.9129, "step": 2062 }, { "epoch": 0.4437513443751344, "grad_norm": 0.0, "learning_rate": 1.228920627863969e-05, "loss": 0.9005, "step": 2063 }, { "epoch": 0.44396644439664446, "grad_norm": 0.0, "learning_rate": 1.228242335950212e-05, "loss": 0.9429, "step": 2064 }, { "epoch": 0.44418154441815444, "grad_norm": 0.0, "learning_rate": 1.2275639332375522e-05, "loss": 0.8289, "step": 2065 }, { "epoch": 0.4443966444396644, "grad_norm": 0.0, "learning_rate": 1.2268854200553158e-05, "loss": 0.8953, "step": 2066 }, { "epoch": 0.44461174446117446, "grad_norm": 0.0, "learning_rate": 1.2262067967328833e-05, "loss": 0.9239, "step": 2067 }, { "epoch": 0.44482684448268445, "grad_norm": 0.0, "learning_rate": 1.2255280635996882e-05, "loss": 0.9185, "step": 2068 }, { "epoch": 0.44504194450419443, "grad_norm": 0.0, "learning_rate": 1.2248492209852178e-05, "loss": 0.9006, "step": 2069 }, { "epoch": 0.44525704452570447, "grad_norm": 0.0, "learning_rate": 1.2241702692190119e-05, "loss": 0.8943, "step": 2070 }, { "epoch": 0.44547214454721445, "grad_norm": 0.0, "learning_rate": 1.223491208630664e-05, "loss": 0.8673, "step": 2071 }, { "epoch": 0.44568724456872444, "grad_norm": 0.0, "learning_rate": 1.2228120395498201e-05, "loss": 0.8943, "step": 2072 }, { "epoch": 0.4459023445902345, "grad_norm": 0.0, "learning_rate": 1.2221327623061784e-05, "loss": 0.8581, "step": 2073 }, { "epoch": 0.44611744461174446, "grad_norm": 0.0, "learning_rate": 1.2214533772294907e-05, "loss": 0.8938, "step": 2074 }, { "epoch": 0.44633254463325445, "grad_norm": 0.0, "learning_rate": 1.22077388464956e-05, "loss": 0.9212, "step": 2075 }, { "epoch": 0.4465476446547645, "grad_norm": 0.0, "learning_rate": 1.2200942848962422e-05, "loss": 0.8535, "step": 2076 }, { "epoch": 0.44676274467627447, "grad_norm": 0.0, "learning_rate": 1.2194145782994448e-05, "loss": 0.8934, "step": 2077 }, { "epoch": 0.44697784469778445, "grad_norm": 0.0, "learning_rate": 1.2187347651891277e-05, "loss": 0.9103, "step": 2078 }, { "epoch": 0.4471929447192945, "grad_norm": 0.0, "learning_rate": 1.218054845895302e-05, "loss": 0.9244, "step": 2079 }, { "epoch": 0.4474080447408045, "grad_norm": 0.0, "learning_rate": 1.2173748207480305e-05, "loss": 0.8724, "step": 2080 }, { "epoch": 0.44762314476231446, "grad_norm": 0.0, "learning_rate": 1.2166946900774275e-05, "loss": 0.934, "step": 2081 }, { "epoch": 0.4478382447838245, "grad_norm": 0.0, "learning_rate": 1.2160144542136585e-05, "loss": 0.8899, "step": 2082 }, { "epoch": 0.4480533448053345, "grad_norm": 0.0, "learning_rate": 1.2153341134869393e-05, "loss": 0.8439, "step": 2083 }, { "epoch": 0.44826844482684447, "grad_norm": 0.0, "learning_rate": 1.2146536682275388e-05, "loss": 0.8432, "step": 2084 }, { "epoch": 0.4484835448483545, "grad_norm": 0.0, "learning_rate": 1.2139731187657737e-05, "loss": 0.8284, "step": 2085 }, { "epoch": 0.4486986448698645, "grad_norm": 0.0, "learning_rate": 1.2132924654320137e-05, "loss": 0.8995, "step": 2086 }, { "epoch": 0.4489137448913745, "grad_norm": 0.0, "learning_rate": 1.2126117085566776e-05, "loss": 0.9141, "step": 2087 }, { "epoch": 0.4491288449128845, "grad_norm": 0.0, "learning_rate": 1.2119308484702345e-05, "loss": 0.8811, "step": 2088 }, { "epoch": 0.4493439449343945, "grad_norm": 0.0, "learning_rate": 1.2112498855032046e-05, "loss": 0.9046, "step": 2089 }, { "epoch": 0.4495590449559045, "grad_norm": 0.0, "learning_rate": 1.210568819986157e-05, "loss": 0.8966, "step": 2090 }, { "epoch": 0.4497741449774145, "grad_norm": 0.0, "learning_rate": 1.209887652249711e-05, "loss": 0.9485, "step": 2091 }, { "epoch": 0.4499892449989245, "grad_norm": 0.0, "learning_rate": 1.2092063826245352e-05, "loss": 0.8954, "step": 2092 }, { "epoch": 0.4502043450204345, "grad_norm": 0.0, "learning_rate": 1.2085250114413484e-05, "loss": 0.9482, "step": 2093 }, { "epoch": 0.4504194450419445, "grad_norm": 0.0, "learning_rate": 1.2078435390309182e-05, "loss": 0.8764, "step": 2094 }, { "epoch": 0.4506345450634545, "grad_norm": 0.0, "learning_rate": 1.2071619657240614e-05, "loss": 0.8702, "step": 2095 }, { "epoch": 0.4508496450849645, "grad_norm": 0.0, "learning_rate": 1.2064802918516433e-05, "loss": 0.9547, "step": 2096 }, { "epoch": 0.4510647451064745, "grad_norm": 0.0, "learning_rate": 1.2057985177445792e-05, "loss": 0.8725, "step": 2097 }, { "epoch": 0.4512798451279845, "grad_norm": 0.0, "learning_rate": 1.2051166437338318e-05, "loss": 0.8611, "step": 2098 }, { "epoch": 0.4514949451494945, "grad_norm": 0.0, "learning_rate": 1.2044346701504129e-05, "loss": 0.8809, "step": 2099 }, { "epoch": 0.4517100451710045, "grad_norm": 0.0, "learning_rate": 1.2037525973253826e-05, "loss": 0.8596, "step": 2100 }, { "epoch": 0.45192514519251453, "grad_norm": 0.0, "learning_rate": 1.2030704255898492e-05, "loss": 0.872, "step": 2101 }, { "epoch": 0.4521402452140245, "grad_norm": 0.0, "learning_rate": 1.2023881552749693e-05, "loss": 0.9014, "step": 2102 }, { "epoch": 0.4523553452355345, "grad_norm": 0.0, "learning_rate": 1.2017057867119464e-05, "loss": 0.8686, "step": 2103 }, { "epoch": 0.45257044525704454, "grad_norm": 0.0, "learning_rate": 1.2010233202320327e-05, "loss": 0.8744, "step": 2104 }, { "epoch": 0.4527855452785545, "grad_norm": 0.0, "learning_rate": 1.2003407561665272e-05, "loss": 0.8556, "step": 2105 }, { "epoch": 0.4530006453000645, "grad_norm": 0.0, "learning_rate": 1.1996580948467773e-05, "loss": 0.8656, "step": 2106 }, { "epoch": 0.45321574532157455, "grad_norm": 0.0, "learning_rate": 1.1989753366041763e-05, "loss": 0.8919, "step": 2107 }, { "epoch": 0.45343084534308453, "grad_norm": 0.0, "learning_rate": 1.1982924817701656e-05, "loss": 0.8446, "step": 2108 }, { "epoch": 0.4536459453645945, "grad_norm": 0.0, "learning_rate": 1.1976095306762328e-05, "loss": 0.9105, "step": 2109 }, { "epoch": 0.45386104538610456, "grad_norm": 0.0, "learning_rate": 1.1969264836539124e-05, "loss": 0.8933, "step": 2110 }, { "epoch": 0.45407614540761454, "grad_norm": 0.0, "learning_rate": 1.196243341034786e-05, "loss": 0.9292, "step": 2111 }, { "epoch": 0.4542912454291245, "grad_norm": 0.0, "learning_rate": 1.1955601031504808e-05, "loss": 0.8886, "step": 2112 }, { "epoch": 0.45450634545063456, "grad_norm": 0.0, "learning_rate": 1.194876770332671e-05, "loss": 0.7866, "step": 2113 }, { "epoch": 0.45472144547214455, "grad_norm": 0.0, "learning_rate": 1.1941933429130759e-05, "loss": 0.8492, "step": 2114 }, { "epoch": 0.45493654549365453, "grad_norm": 0.0, "learning_rate": 1.193509821223462e-05, "loss": 0.9262, "step": 2115 }, { "epoch": 0.45515164551516457, "grad_norm": 0.0, "learning_rate": 1.1928262055956403e-05, "loss": 0.8522, "step": 2116 }, { "epoch": 0.45536674553667456, "grad_norm": 0.0, "learning_rate": 1.1921424963614686e-05, "loss": 0.8842, "step": 2117 }, { "epoch": 0.45558184555818454, "grad_norm": 0.0, "learning_rate": 1.191458693852849e-05, "loss": 0.8609, "step": 2118 }, { "epoch": 0.4557969455796946, "grad_norm": 0.0, "learning_rate": 1.1907747984017295e-05, "loss": 0.9428, "step": 2119 }, { "epoch": 0.45601204560120456, "grad_norm": 0.0, "learning_rate": 1.1900908103401037e-05, "loss": 0.843, "step": 2120 }, { "epoch": 0.45622714562271455, "grad_norm": 0.0, "learning_rate": 1.1894067300000088e-05, "loss": 0.8847, "step": 2121 }, { "epoch": 0.4564422456442246, "grad_norm": 0.0, "learning_rate": 1.1887225577135279e-05, "loss": 0.935, "step": 2122 }, { "epoch": 0.45665734566573457, "grad_norm": 0.0, "learning_rate": 1.1880382938127886e-05, "loss": 0.959, "step": 2123 }, { "epoch": 0.45687244568724455, "grad_norm": 0.0, "learning_rate": 1.1873539386299629e-05, "loss": 0.8504, "step": 2124 }, { "epoch": 0.4570875457087546, "grad_norm": 0.0, "learning_rate": 1.1866694924972667e-05, "loss": 0.8217, "step": 2125 }, { "epoch": 0.4573026457302646, "grad_norm": 0.0, "learning_rate": 1.1859849557469603e-05, "loss": 0.8707, "step": 2126 }, { "epoch": 0.45751774575177456, "grad_norm": 0.0, "learning_rate": 1.185300328711348e-05, "loss": 0.8451, "step": 2127 }, { "epoch": 0.4577328457732846, "grad_norm": 0.0, "learning_rate": 1.1846156117227783e-05, "loss": 0.8761, "step": 2128 }, { "epoch": 0.4579479457947946, "grad_norm": 0.0, "learning_rate": 1.1839308051136432e-05, "loss": 0.8715, "step": 2129 }, { "epoch": 0.45816304581630457, "grad_norm": 0.0, "learning_rate": 1.1832459092163775e-05, "loss": 0.8835, "step": 2130 }, { "epoch": 0.4583781458378146, "grad_norm": 0.0, "learning_rate": 1.1825609243634601e-05, "loss": 0.8821, "step": 2131 }, { "epoch": 0.4585932458593246, "grad_norm": 0.0, "learning_rate": 1.1818758508874133e-05, "loss": 0.8317, "step": 2132 }, { "epoch": 0.4588083458808346, "grad_norm": 0.0, "learning_rate": 1.1811906891208014e-05, "loss": 0.8879, "step": 2133 }, { "epoch": 0.4590234459023446, "grad_norm": 0.0, "learning_rate": 1.1805054393962323e-05, "loss": 0.8923, "step": 2134 }, { "epoch": 0.4592385459238546, "grad_norm": 0.0, "learning_rate": 1.1798201020463567e-05, "loss": 0.8637, "step": 2135 }, { "epoch": 0.4594536459453646, "grad_norm": 0.0, "learning_rate": 1.1791346774038673e-05, "loss": 0.885, "step": 2136 }, { "epoch": 0.45966874596687457, "grad_norm": 0.0, "learning_rate": 1.1784491658014996e-05, "loss": 0.8509, "step": 2137 }, { "epoch": 0.4598838459883846, "grad_norm": 0.0, "learning_rate": 1.1777635675720313e-05, "loss": 0.8679, "step": 2138 }, { "epoch": 0.4600989460098946, "grad_norm": 0.0, "learning_rate": 1.1770778830482824e-05, "loss": 0.9006, "step": 2139 }, { "epoch": 0.4603140460314046, "grad_norm": 0.0, "learning_rate": 1.1763921125631132e-05, "loss": 0.8925, "step": 2140 }, { "epoch": 0.4605291460529146, "grad_norm": 0.0, "learning_rate": 1.1757062564494282e-05, "loss": 0.868, "step": 2141 }, { "epoch": 0.4607442460744246, "grad_norm": 0.0, "learning_rate": 1.1750203150401718e-05, "loss": 0.8293, "step": 2142 }, { "epoch": 0.4609593460959346, "grad_norm": 0.0, "learning_rate": 1.17433428866833e-05, "loss": 0.8904, "step": 2143 }, { "epoch": 0.4611744461174446, "grad_norm": 0.0, "learning_rate": 1.1736481776669307e-05, "loss": 0.8626, "step": 2144 }, { "epoch": 0.4613895461389546, "grad_norm": 0.0, "learning_rate": 1.1729619823690419e-05, "loss": 0.9423, "step": 2145 }, { "epoch": 0.4616046461604646, "grad_norm": 0.0, "learning_rate": 1.1722757031077737e-05, "loss": 0.8748, "step": 2146 }, { "epoch": 0.46181974618197463, "grad_norm": 0.0, "learning_rate": 1.171589340216276e-05, "loss": 0.8545, "step": 2147 }, { "epoch": 0.4620348462034846, "grad_norm": 0.0, "learning_rate": 1.1709028940277396e-05, "loss": 1.0443, "step": 2148 }, { "epoch": 0.4622499462249946, "grad_norm": 0.0, "learning_rate": 1.1702163648753957e-05, "loss": 0.8402, "step": 2149 }, { "epoch": 0.46246504624650464, "grad_norm": 0.0, "learning_rate": 1.1695297530925162e-05, "loss": 0.8344, "step": 2150 }, { "epoch": 0.4626801462680146, "grad_norm": 0.0, "learning_rate": 1.1688430590124128e-05, "loss": 0.8912, "step": 2151 }, { "epoch": 0.4628952462895246, "grad_norm": 0.0, "learning_rate": 1.1681562829684365e-05, "loss": 0.8248, "step": 2152 }, { "epoch": 0.46311034631103465, "grad_norm": 0.0, "learning_rate": 1.1674694252939795e-05, "loss": 0.9114, "step": 2153 }, { "epoch": 0.46332544633254463, "grad_norm": 0.0, "learning_rate": 1.1667824863224722e-05, "loss": 0.8476, "step": 2154 }, { "epoch": 0.4635405463540546, "grad_norm": 0.0, "learning_rate": 1.1660954663873856e-05, "loss": 0.8927, "step": 2155 }, { "epoch": 0.46375564637556466, "grad_norm": 0.0, "learning_rate": 1.1654083658222287e-05, "loss": 0.8641, "step": 2156 }, { "epoch": 0.46397074639707464, "grad_norm": 0.0, "learning_rate": 1.1647211849605516e-05, "loss": 0.838, "step": 2157 }, { "epoch": 0.4641858464185846, "grad_norm": 0.0, "learning_rate": 1.1640339241359408e-05, "loss": 0.8505, "step": 2158 }, { "epoch": 0.46440094644009466, "grad_norm": 0.0, "learning_rate": 1.1633465836820243e-05, "loss": 0.8776, "step": 2159 }, { "epoch": 0.46461604646160465, "grad_norm": 0.0, "learning_rate": 1.1626591639324673e-05, "loss": 0.8378, "step": 2160 }, { "epoch": 0.46483114648311463, "grad_norm": 0.0, "learning_rate": 1.1619716652209728e-05, "loss": 0.9603, "step": 2161 }, { "epoch": 0.46504624650462467, "grad_norm": 0.0, "learning_rate": 1.1612840878812839e-05, "loss": 0.9032, "step": 2162 }, { "epoch": 0.46526134652613466, "grad_norm": 0.0, "learning_rate": 1.1605964322471806e-05, "loss": 0.8626, "step": 2163 }, { "epoch": 0.46547644654764464, "grad_norm": 0.0, "learning_rate": 1.1599086986524815e-05, "loss": 0.8337, "step": 2164 }, { "epoch": 0.4656915465691547, "grad_norm": 0.0, "learning_rate": 1.1592208874310426e-05, "loss": 0.8288, "step": 2165 }, { "epoch": 0.46590664659066466, "grad_norm": 0.0, "learning_rate": 1.158532998916758e-05, "loss": 0.8519, "step": 2166 }, { "epoch": 0.46612174661217465, "grad_norm": 0.0, "learning_rate": 1.1578450334435592e-05, "loss": 0.9159, "step": 2167 }, { "epoch": 0.4663368466336847, "grad_norm": 0.0, "learning_rate": 1.1571569913454145e-05, "loss": 0.8848, "step": 2168 }, { "epoch": 0.46655194665519467, "grad_norm": 0.0, "learning_rate": 1.1564688729563306e-05, "loss": 0.828, "step": 2169 }, { "epoch": 0.46676704667670466, "grad_norm": 0.0, "learning_rate": 1.1557806786103502e-05, "loss": 0.8688, "step": 2170 }, { "epoch": 0.4669821466982147, "grad_norm": 0.0, "learning_rate": 1.1550924086415532e-05, "loss": 0.8533, "step": 2171 }, { "epoch": 0.4671972467197247, "grad_norm": 0.0, "learning_rate": 1.1544040633840563e-05, "loss": 0.9031, "step": 2172 }, { "epoch": 0.46741234674123466, "grad_norm": 0.0, "learning_rate": 1.1537156431720125e-05, "loss": 0.9393, "step": 2173 }, { "epoch": 0.4676274467627447, "grad_norm": 0.0, "learning_rate": 1.1530271483396115e-05, "loss": 0.8975, "step": 2174 }, { "epoch": 0.4678425467842547, "grad_norm": 0.0, "learning_rate": 1.1523385792210792e-05, "loss": 0.8594, "step": 2175 }, { "epoch": 0.46805764680576467, "grad_norm": 0.0, "learning_rate": 1.1516499361506772e-05, "loss": 0.8952, "step": 2176 }, { "epoch": 0.46827274682727466, "grad_norm": 0.0, "learning_rate": 1.1509612194627033e-05, "loss": 0.8499, "step": 2177 }, { "epoch": 0.4684878468487847, "grad_norm": 0.0, "learning_rate": 1.150272429491491e-05, "loss": 0.8694, "step": 2178 }, { "epoch": 0.4687029468702947, "grad_norm": 0.0, "learning_rate": 1.1495835665714096e-05, "loss": 0.8989, "step": 2179 }, { "epoch": 0.46891804689180466, "grad_norm": 0.0, "learning_rate": 1.1488946310368629e-05, "loss": 0.8317, "step": 2180 }, { "epoch": 0.4691331469133147, "grad_norm": 0.0, "learning_rate": 1.1482056232222913e-05, "loss": 0.9092, "step": 2181 }, { "epoch": 0.4693482469348247, "grad_norm": 0.0, "learning_rate": 1.1475165434621696e-05, "loss": 0.8507, "step": 2182 }, { "epoch": 0.46956334695633467, "grad_norm": 0.0, "learning_rate": 1.146827392091007e-05, "loss": 0.878, "step": 2183 }, { "epoch": 0.4697784469778447, "grad_norm": 0.0, "learning_rate": 1.146138169443348e-05, "loss": 0.9397, "step": 2184 }, { "epoch": 0.4699935469993547, "grad_norm": 0.0, "learning_rate": 1.145448875853772e-05, "loss": 0.8657, "step": 2185 }, { "epoch": 0.4702086470208647, "grad_norm": 0.0, "learning_rate": 1.1447595116568928e-05, "loss": 0.9339, "step": 2186 }, { "epoch": 0.4704237470423747, "grad_norm": 0.0, "learning_rate": 1.1440700771873576e-05, "loss": 0.9026, "step": 2187 }, { "epoch": 0.4706388470638847, "grad_norm": 0.0, "learning_rate": 1.1433805727798488e-05, "loss": 0.918, "step": 2188 }, { "epoch": 0.4708539470853947, "grad_norm": 0.0, "learning_rate": 1.142690998769082e-05, "loss": 0.8799, "step": 2189 }, { "epoch": 0.4710690471069047, "grad_norm": 0.0, "learning_rate": 1.1420013554898068e-05, "loss": 0.8194, "step": 2190 }, { "epoch": 0.4712841471284147, "grad_norm": 0.0, "learning_rate": 1.1413116432768067e-05, "loss": 0.9264, "step": 2191 }, { "epoch": 0.4714992471499247, "grad_norm": 0.0, "learning_rate": 1.1406218624648986e-05, "loss": 0.8475, "step": 2192 }, { "epoch": 0.47171434717143473, "grad_norm": 0.0, "learning_rate": 1.1399320133889318e-05, "loss": 0.8545, "step": 2193 }, { "epoch": 0.4719294471929447, "grad_norm": 0.0, "learning_rate": 1.1392420963837902e-05, "loss": 0.9143, "step": 2194 }, { "epoch": 0.4721445472144547, "grad_norm": 0.0, "learning_rate": 1.1385521117843898e-05, "loss": 0.8584, "step": 2195 }, { "epoch": 0.47235964723596474, "grad_norm": 0.0, "learning_rate": 1.1378620599256797e-05, "loss": 0.8303, "step": 2196 }, { "epoch": 0.4725747472574747, "grad_norm": 0.0, "learning_rate": 1.1371719411426412e-05, "loss": 0.9243, "step": 2197 }, { "epoch": 0.4727898472789847, "grad_norm": 0.0, "learning_rate": 1.1364817557702886e-05, "loss": 0.8806, "step": 2198 }, { "epoch": 0.47300494730049475, "grad_norm": 0.0, "learning_rate": 1.1357915041436685e-05, "loss": 0.8115, "step": 2199 }, { "epoch": 0.47322004732200473, "grad_norm": 0.0, "learning_rate": 1.135101186597859e-05, "loss": 0.8922, "step": 2200 }, { "epoch": 0.4734351473435147, "grad_norm": 0.0, "learning_rate": 1.134410803467971e-05, "loss": 0.8904, "step": 2201 }, { "epoch": 0.47365024736502476, "grad_norm": 0.0, "learning_rate": 1.133720355089147e-05, "loss": 0.8229, "step": 2202 }, { "epoch": 0.47386534738653474, "grad_norm": 0.0, "learning_rate": 1.1330298417965614e-05, "loss": 0.8808, "step": 2203 }, { "epoch": 0.4740804474080447, "grad_norm": 0.0, "learning_rate": 1.1323392639254194e-05, "loss": 0.9052, "step": 2204 }, { "epoch": 0.47429554742955476, "grad_norm": 0.0, "learning_rate": 1.1316486218109579e-05, "loss": 0.9364, "step": 2205 }, { "epoch": 0.47451064745106475, "grad_norm": 0.0, "learning_rate": 1.1309579157884451e-05, "loss": 0.895, "step": 2206 }, { "epoch": 0.47472574747257473, "grad_norm": 0.0, "learning_rate": 1.1302671461931805e-05, "loss": 0.907, "step": 2207 }, { "epoch": 0.4749408474940848, "grad_norm": 0.0, "learning_rate": 1.129576313360494e-05, "loss": 0.8734, "step": 2208 }, { "epoch": 0.47515594751559476, "grad_norm": 0.0, "learning_rate": 1.1288854176257464e-05, "loss": 0.8928, "step": 2209 }, { "epoch": 0.47537104753710474, "grad_norm": 0.0, "learning_rate": 1.1281944593243289e-05, "loss": 0.8854, "step": 2210 }, { "epoch": 0.4755861475586148, "grad_norm": 0.0, "learning_rate": 1.127503438791663e-05, "loss": 0.8162, "step": 2211 }, { "epoch": 0.47580124758012476, "grad_norm": 0.0, "learning_rate": 1.1268123563632007e-05, "loss": 0.9023, "step": 2212 }, { "epoch": 0.47601634760163475, "grad_norm": 0.0, "learning_rate": 1.126121212374424e-05, "loss": 0.945, "step": 2213 }, { "epoch": 0.4762314476231448, "grad_norm": 0.0, "learning_rate": 1.1254300071608448e-05, "loss": 0.8933, "step": 2214 }, { "epoch": 0.47644654764465477, "grad_norm": 0.0, "learning_rate": 1.1247387410580042e-05, "loss": 0.9003, "step": 2215 }, { "epoch": 0.47666164766616476, "grad_norm": 0.0, "learning_rate": 1.1240474144014735e-05, "loss": 0.8463, "step": 2216 }, { "epoch": 0.4768767476876748, "grad_norm": 0.0, "learning_rate": 1.1233560275268534e-05, "loss": 0.8618, "step": 2217 }, { "epoch": 0.4770918477091848, "grad_norm": 0.0, "learning_rate": 1.1226645807697737e-05, "loss": 0.8648, "step": 2218 }, { "epoch": 0.47730694773069476, "grad_norm": 0.0, "learning_rate": 1.1219730744658921e-05, "loss": 0.8221, "step": 2219 }, { "epoch": 0.47752204775220475, "grad_norm": 0.0, "learning_rate": 1.1212815089508973e-05, "loss": 0.8839, "step": 2220 }, { "epoch": 0.4777371477737148, "grad_norm": 0.0, "learning_rate": 1.1205898845605056e-05, "loss": 0.8595, "step": 2221 }, { "epoch": 0.47795224779522477, "grad_norm": 0.0, "learning_rate": 1.1198982016304618e-05, "loss": 0.9479, "step": 2222 }, { "epoch": 0.47816734781673476, "grad_norm": 0.0, "learning_rate": 1.1192064604965396e-05, "loss": 0.8804, "step": 2223 }, { "epoch": 0.4783824478382448, "grad_norm": 0.0, "learning_rate": 1.11851466149454e-05, "loss": 0.8391, "step": 2224 }, { "epoch": 0.4785975478597548, "grad_norm": 0.0, "learning_rate": 1.1178228049602938e-05, "loss": 0.8551, "step": 2225 }, { "epoch": 0.47881264788126476, "grad_norm": 0.0, "learning_rate": 1.1171308912296576e-05, "loss": 0.9221, "step": 2226 }, { "epoch": 0.4790277479027748, "grad_norm": 0.0, "learning_rate": 1.1164389206385177e-05, "loss": 0.8239, "step": 2227 }, { "epoch": 0.4792428479242848, "grad_norm": 0.0, "learning_rate": 1.1157468935227866e-05, "loss": 0.8526, "step": 2228 }, { "epoch": 0.47945794794579477, "grad_norm": 0.0, "learning_rate": 1.1150548102184051e-05, "loss": 0.8904, "step": 2229 }, { "epoch": 0.4796730479673048, "grad_norm": 0.0, "learning_rate": 1.1143626710613411e-05, "loss": 0.8323, "step": 2230 }, { "epoch": 0.4798881479888148, "grad_norm": 0.0, "learning_rate": 1.1136704763875895e-05, "loss": 0.8685, "step": 2231 }, { "epoch": 0.4801032480103248, "grad_norm": 0.0, "learning_rate": 1.1129782265331716e-05, "loss": 0.8913, "step": 2232 }, { "epoch": 0.4803183480318348, "grad_norm": 0.0, "learning_rate": 1.1122859218341366e-05, "loss": 0.8004, "step": 2233 }, { "epoch": 0.4805334480533448, "grad_norm": 0.0, "learning_rate": 1.1115935626265596e-05, "loss": 0.9043, "step": 2234 }, { "epoch": 0.4807485480748548, "grad_norm": 0.0, "learning_rate": 1.110901149246542e-05, "loss": 0.8763, "step": 2235 }, { "epoch": 0.4809636480963648, "grad_norm": 0.0, "learning_rate": 1.1102086820302122e-05, "loss": 0.9019, "step": 2236 }, { "epoch": 0.4811787481178748, "grad_norm": 0.0, "learning_rate": 1.1095161613137242e-05, "loss": 0.8568, "step": 2237 }, { "epoch": 0.4813938481393848, "grad_norm": 0.0, "learning_rate": 1.1088235874332583e-05, "loss": 0.9326, "step": 2238 }, { "epoch": 0.48160894816089483, "grad_norm": 0.0, "learning_rate": 1.1081309607250202e-05, "loss": 0.8604, "step": 2239 }, { "epoch": 0.4818240481824048, "grad_norm": 0.0, "learning_rate": 1.1074382815252418e-05, "loss": 0.8528, "step": 2240 }, { "epoch": 0.4820391482039148, "grad_norm": 0.0, "learning_rate": 1.1067455501701794e-05, "loss": 0.8873, "step": 2241 }, { "epoch": 0.48225424822542484, "grad_norm": 0.0, "learning_rate": 1.1060527669961163e-05, "loss": 0.8507, "step": 2242 }, { "epoch": 0.4824693482469348, "grad_norm": 0.0, "learning_rate": 1.1053599323393594e-05, "loss": 0.8448, "step": 2243 }, { "epoch": 0.4826844482684448, "grad_norm": 0.0, "learning_rate": 1.1046670465362415e-05, "loss": 0.9194, "step": 2244 }, { "epoch": 0.48289954828995485, "grad_norm": 0.0, "learning_rate": 1.1039741099231198e-05, "loss": 0.8994, "step": 2245 }, { "epoch": 0.48311464831146483, "grad_norm": 0.0, "learning_rate": 1.1032811228363766e-05, "loss": 0.9294, "step": 2246 }, { "epoch": 0.4833297483329748, "grad_norm": 0.0, "learning_rate": 1.102588085612418e-05, "loss": 0.8856, "step": 2247 }, { "epoch": 0.48354484835448486, "grad_norm": 0.0, "learning_rate": 1.1018949985876753e-05, "loss": 0.8836, "step": 2248 }, { "epoch": 0.48375994837599484, "grad_norm": 0.0, "learning_rate": 1.1012018620986028e-05, "loss": 0.8849, "step": 2249 }, { "epoch": 0.4839750483975048, "grad_norm": 0.0, "learning_rate": 1.1005086764816803e-05, "loss": 0.9167, "step": 2250 }, { "epoch": 0.48419014841901487, "grad_norm": 0.0, "learning_rate": 1.0998154420734104e-05, "loss": 0.9103, "step": 2251 }, { "epoch": 0.48440524844052485, "grad_norm": 0.0, "learning_rate": 1.0991221592103198e-05, "loss": 0.9301, "step": 2252 }, { "epoch": 0.48462034846203483, "grad_norm": 0.0, "learning_rate": 1.0984288282289586e-05, "loss": 0.8033, "step": 2253 }, { "epoch": 0.4848354484835449, "grad_norm": 0.0, "learning_rate": 1.0977354494659e-05, "loss": 0.8578, "step": 2254 }, { "epoch": 0.48505054850505486, "grad_norm": 0.0, "learning_rate": 1.0970420232577408e-05, "loss": 0.7873, "step": 2255 }, { "epoch": 0.48526564852656484, "grad_norm": 0.0, "learning_rate": 1.0963485499411007e-05, "loss": 0.8104, "step": 2256 }, { "epoch": 0.4854807485480749, "grad_norm": 0.0, "learning_rate": 1.0956550298526223e-05, "loss": 0.8278, "step": 2257 }, { "epoch": 0.48569584856958486, "grad_norm": 0.0, "learning_rate": 1.0949614633289708e-05, "loss": 0.8232, "step": 2258 }, { "epoch": 0.48591094859109485, "grad_norm": 0.0, "learning_rate": 1.0942678507068338e-05, "loss": 0.8406, "step": 2259 }, { "epoch": 0.4861260486126049, "grad_norm": 0.0, "learning_rate": 1.0935741923229219e-05, "loss": 0.893, "step": 2260 }, { "epoch": 0.4863411486341149, "grad_norm": 0.0, "learning_rate": 1.0928804885139674e-05, "loss": 0.8891, "step": 2261 }, { "epoch": 0.48655624865562486, "grad_norm": 0.0, "learning_rate": 1.0921867396167244e-05, "loss": 0.898, "step": 2262 }, { "epoch": 0.48677134867713484, "grad_norm": 0.0, "learning_rate": 1.0914929459679695e-05, "loss": 0.9301, "step": 2263 }, { "epoch": 0.4869864486986449, "grad_norm": 0.0, "learning_rate": 1.0907991079045006e-05, "loss": 0.887, "step": 2264 }, { "epoch": 0.48720154872015486, "grad_norm": 0.0, "learning_rate": 1.0901052257631376e-05, "loss": 0.8608, "step": 2265 }, { "epoch": 0.48741664874166485, "grad_norm": 0.0, "learning_rate": 1.0894112998807213e-05, "loss": 0.875, "step": 2266 }, { "epoch": 0.4876317487631749, "grad_norm": 0.0, "learning_rate": 1.088717330594114e-05, "loss": 0.8911, "step": 2267 }, { "epoch": 0.48784684878468487, "grad_norm": 0.0, "learning_rate": 1.088023318240199e-05, "loss": 0.8593, "step": 2268 }, { "epoch": 0.48806194880619486, "grad_norm": 0.0, "learning_rate": 1.0873292631558805e-05, "loss": 0.8642, "step": 2269 }, { "epoch": 0.4882770488277049, "grad_norm": 0.0, "learning_rate": 1.0866351656780835e-05, "loss": 0.8849, "step": 2270 }, { "epoch": 0.4884921488492149, "grad_norm": 0.0, "learning_rate": 1.0859410261437538e-05, "loss": 0.8878, "step": 2271 }, { "epoch": 0.48870724887072486, "grad_norm": 0.0, "learning_rate": 1.0852468448898567e-05, "loss": 0.8799, "step": 2272 }, { "epoch": 0.4889223488922349, "grad_norm": 0.0, "learning_rate": 1.0845526222533791e-05, "loss": 0.8824, "step": 2273 }, { "epoch": 0.4891374489137449, "grad_norm": 0.0, "learning_rate": 1.083858358571327e-05, "loss": 0.8392, "step": 2274 }, { "epoch": 0.48935254893525487, "grad_norm": 0.0, "learning_rate": 1.0831640541807267e-05, "loss": 0.9158, "step": 2275 }, { "epoch": 0.4895676489567649, "grad_norm": 0.0, "learning_rate": 1.082469709418624e-05, "loss": 0.8844, "step": 2276 }, { "epoch": 0.4897827489782749, "grad_norm": 0.0, "learning_rate": 1.081775324622085e-05, "loss": 0.889, "step": 2277 }, { "epoch": 0.4899978489997849, "grad_norm": 0.0, "learning_rate": 1.0810809001281942e-05, "loss": 0.8793, "step": 2278 }, { "epoch": 0.4902129490212949, "grad_norm": 0.0, "learning_rate": 1.0803864362740562e-05, "loss": 0.8235, "step": 2279 }, { "epoch": 0.4904280490428049, "grad_norm": 0.0, "learning_rate": 1.079691933396794e-05, "loss": 0.9378, "step": 2280 }, { "epoch": 0.4906431490643149, "grad_norm": 0.0, "learning_rate": 1.0789973918335503e-05, "loss": 0.8523, "step": 2281 }, { "epoch": 0.4908582490858249, "grad_norm": 0.0, "learning_rate": 1.0783028119214863e-05, "loss": 0.8547, "step": 2282 }, { "epoch": 0.4910733491073349, "grad_norm": 0.0, "learning_rate": 1.0776081939977816e-05, "loss": 0.8969, "step": 2283 }, { "epoch": 0.4912884491288449, "grad_norm": 0.0, "learning_rate": 1.0769135383996345e-05, "loss": 0.8918, "step": 2284 }, { "epoch": 0.49150354915035493, "grad_norm": 0.0, "learning_rate": 1.076218845464261e-05, "loss": 0.923, "step": 2285 }, { "epoch": 0.4917186491718649, "grad_norm": 0.0, "learning_rate": 1.0755241155288965e-05, "loss": 0.8356, "step": 2286 }, { "epoch": 0.4919337491933749, "grad_norm": 0.0, "learning_rate": 1.0748293489307931e-05, "loss": 0.8584, "step": 2287 }, { "epoch": 0.49214884921488494, "grad_norm": 0.0, "learning_rate": 1.074134546007221e-05, "loss": 0.8978, "step": 2288 }, { "epoch": 0.4923639492363949, "grad_norm": 0.0, "learning_rate": 1.0734397070954688e-05, "loss": 0.8767, "step": 2289 }, { "epoch": 0.4925790492579049, "grad_norm": 0.0, "learning_rate": 1.0727448325328414e-05, "loss": 0.819, "step": 2290 }, { "epoch": 0.49279414927941495, "grad_norm": 0.0, "learning_rate": 1.0720499226566616e-05, "loss": 0.893, "step": 2291 }, { "epoch": 0.49300924930092493, "grad_norm": 0.0, "learning_rate": 1.0713549778042697e-05, "loss": 0.9013, "step": 2292 }, { "epoch": 0.4932243493224349, "grad_norm": 0.0, "learning_rate": 1.0706599983130226e-05, "loss": 0.8755, "step": 2293 }, { "epoch": 0.49343944934394496, "grad_norm": 0.0, "learning_rate": 1.0699649845202935e-05, "loss": 0.9106, "step": 2294 }, { "epoch": 0.49365454936545494, "grad_norm": 0.0, "learning_rate": 1.0692699367634733e-05, "loss": 0.85, "step": 2295 }, { "epoch": 0.4938696493869649, "grad_norm": 0.0, "learning_rate": 1.0685748553799687e-05, "loss": 0.8947, "step": 2296 }, { "epoch": 0.49408474940847497, "grad_norm": 0.0, "learning_rate": 1.0678797407072027e-05, "loss": 0.9221, "step": 2297 }, { "epoch": 0.49429984942998495, "grad_norm": 0.0, "learning_rate": 1.067184593082615e-05, "loss": 0.8267, "step": 2298 }, { "epoch": 0.49451494945149493, "grad_norm": 0.0, "learning_rate": 1.0664894128436607e-05, "loss": 0.9163, "step": 2299 }, { "epoch": 0.494730049473005, "grad_norm": 0.0, "learning_rate": 1.0657942003278107e-05, "loss": 0.8488, "step": 2300 }, { "epoch": 0.49494514949451496, "grad_norm": 0.0, "learning_rate": 1.0650989558725524e-05, "loss": 0.8342, "step": 2301 }, { "epoch": 0.49516024951602494, "grad_norm": 0.0, "learning_rate": 1.0644036798153878e-05, "loss": 0.8952, "step": 2302 }, { "epoch": 0.495375349537535, "grad_norm": 0.0, "learning_rate": 1.0637083724938344e-05, "loss": 0.8305, "step": 2303 }, { "epoch": 0.49559044955904497, "grad_norm": 0.0, "learning_rate": 1.0630130342454254e-05, "loss": 0.8448, "step": 2304 }, { "epoch": 0.49580554958055495, "grad_norm": 0.0, "learning_rate": 1.0623176654077091e-05, "loss": 0.8784, "step": 2305 }, { "epoch": 0.49602064960206493, "grad_norm": 0.0, "learning_rate": 1.0616222663182474e-05, "loss": 0.8459, "step": 2306 }, { "epoch": 0.496235749623575, "grad_norm": 0.0, "learning_rate": 1.0609268373146178e-05, "loss": 0.8805, "step": 2307 }, { "epoch": 0.49645084964508496, "grad_norm": 0.0, "learning_rate": 1.0602313787344125e-05, "loss": 0.8233, "step": 2308 }, { "epoch": 0.49666594966659494, "grad_norm": 0.0, "learning_rate": 1.0595358909152377e-05, "loss": 0.9001, "step": 2309 }, { "epoch": 0.496881049688105, "grad_norm": 0.0, "learning_rate": 1.0588403741947141e-05, "loss": 0.9346, "step": 2310 }, { "epoch": 0.49709614970961497, "grad_norm": 0.0, "learning_rate": 1.0581448289104759e-05, "loss": 0.8904, "step": 2311 }, { "epoch": 0.49731124973112495, "grad_norm": 0.0, "learning_rate": 1.0574492554001718e-05, "loss": 0.8416, "step": 2312 }, { "epoch": 0.497526349752635, "grad_norm": 0.0, "learning_rate": 1.0567536540014636e-05, "loss": 0.8833, "step": 2313 }, { "epoch": 0.497741449774145, "grad_norm": 0.0, "learning_rate": 1.0560580250520269e-05, "loss": 0.8973, "step": 2314 }, { "epoch": 0.49795654979565496, "grad_norm": 0.0, "learning_rate": 1.0553623688895509e-05, "loss": 0.8584, "step": 2315 }, { "epoch": 0.498171649817165, "grad_norm": 0.0, "learning_rate": 1.0546666858517375e-05, "loss": 0.9071, "step": 2316 }, { "epoch": 0.498386749838675, "grad_norm": 0.0, "learning_rate": 1.0539709762763021e-05, "loss": 0.8378, "step": 2317 }, { "epoch": 0.49860184986018496, "grad_norm": 0.0, "learning_rate": 1.0532752405009733e-05, "loss": 0.9124, "step": 2318 }, { "epoch": 0.498816949881695, "grad_norm": 0.0, "learning_rate": 1.0525794788634912e-05, "loss": 0.861, "step": 2319 }, { "epoch": 0.499032049903205, "grad_norm": 0.0, "learning_rate": 1.0518836917016095e-05, "loss": 0.8088, "step": 2320 }, { "epoch": 0.499247149924715, "grad_norm": 0.0, "learning_rate": 1.0511878793530943e-05, "loss": 0.7763, "step": 2321 }, { "epoch": 0.499462249946225, "grad_norm": 0.0, "learning_rate": 1.0504920421557232e-05, "loss": 0.8514, "step": 2322 }, { "epoch": 0.499677349967735, "grad_norm": 0.0, "learning_rate": 1.0497961804472866e-05, "loss": 0.837, "step": 2323 }, { "epoch": 0.499892449989245, "grad_norm": 0.0, "learning_rate": 1.0491002945655862e-05, "loss": 0.8872, "step": 2324 }, { "epoch": 0.500107550010755, "grad_norm": 0.0, "learning_rate": 1.0484043848484356e-05, "loss": 0.8688, "step": 2325 }, { "epoch": 0.500322650032265, "grad_norm": 0.0, "learning_rate": 1.0477084516336607e-05, "loss": 0.8793, "step": 2326 }, { "epoch": 0.500537750053775, "grad_norm": 0.0, "learning_rate": 1.0470124952590976e-05, "loss": 0.84, "step": 2327 }, { "epoch": 0.500752850075285, "grad_norm": 0.0, "learning_rate": 1.0463165160625947e-05, "loss": 0.8786, "step": 2328 }, { "epoch": 0.500967950096795, "grad_norm": 0.0, "learning_rate": 1.0456205143820105e-05, "loss": 0.8552, "step": 2329 }, { "epoch": 0.501183050118305, "grad_norm": 0.0, "learning_rate": 1.0449244905552153e-05, "loss": 0.9268, "step": 2330 }, { "epoch": 0.501398150139815, "grad_norm": 0.0, "learning_rate": 1.0442284449200899e-05, "loss": 0.8553, "step": 2331 }, { "epoch": 0.501613250161325, "grad_norm": 0.0, "learning_rate": 1.0435323778145253e-05, "loss": 0.8732, "step": 2332 }, { "epoch": 0.501828350182835, "grad_norm": 0.0, "learning_rate": 1.0428362895764231e-05, "loss": 0.887, "step": 2333 }, { "epoch": 0.502043450204345, "grad_norm": 0.0, "learning_rate": 1.0421401805436954e-05, "loss": 0.8639, "step": 2334 }, { "epoch": 0.502258550225855, "grad_norm": 0.0, "learning_rate": 1.0414440510542645e-05, "loss": 0.8642, "step": 2335 }, { "epoch": 0.502473650247365, "grad_norm": 0.0, "learning_rate": 1.0407479014460622e-05, "loss": 0.7644, "step": 2336 }, { "epoch": 0.502688750268875, "grad_norm": 0.0, "learning_rate": 1.04005173205703e-05, "loss": 0.8605, "step": 2337 }, { "epoch": 0.502903850290385, "grad_norm": 0.0, "learning_rate": 1.0393555432251193e-05, "loss": 0.8586, "step": 2338 }, { "epoch": 0.503118950311895, "grad_norm": 0.0, "learning_rate": 1.038659335288291e-05, "loss": 0.8196, "step": 2339 }, { "epoch": 0.5033340503334051, "grad_norm": 0.0, "learning_rate": 1.0379631085845154e-05, "loss": 0.9193, "step": 2340 }, { "epoch": 0.503549150354915, "grad_norm": 0.0, "learning_rate": 1.0372668634517718e-05, "loss": 0.8456, "step": 2341 }, { "epoch": 0.503764250376425, "grad_norm": 0.0, "learning_rate": 1.0365706002280471e-05, "loss": 0.8803, "step": 2342 }, { "epoch": 0.5039793503979351, "grad_norm": 0.0, "learning_rate": 1.0358743192513396e-05, "loss": 0.8846, "step": 2343 }, { "epoch": 0.504194450419445, "grad_norm": 0.0, "learning_rate": 1.0351780208596542e-05, "loss": 0.8255, "step": 2344 }, { "epoch": 0.504409550440955, "grad_norm": 0.0, "learning_rate": 1.034481705391005e-05, "loss": 0.8976, "step": 2345 }, { "epoch": 0.5046246504624651, "grad_norm": 0.0, "learning_rate": 1.033785373183414e-05, "loss": 0.8455, "step": 2346 }, { "epoch": 0.504839750483975, "grad_norm": 0.0, "learning_rate": 1.0330890245749119e-05, "loss": 0.8546, "step": 2347 }, { "epoch": 0.505054850505485, "grad_norm": 0.0, "learning_rate": 1.032392659903537e-05, "loss": 0.8477, "step": 2348 }, { "epoch": 0.5052699505269951, "grad_norm": 0.0, "learning_rate": 1.0316962795073353e-05, "loss": 0.8814, "step": 2349 }, { "epoch": 0.505485050548505, "grad_norm": 0.0, "learning_rate": 1.0309998837243607e-05, "loss": 0.9282, "step": 2350 }, { "epoch": 0.505700150570015, "grad_norm": 0.0, "learning_rate": 1.0303034728926741e-05, "loss": 0.8466, "step": 2351 }, { "epoch": 0.5059152505915251, "grad_norm": 0.0, "learning_rate": 1.0296070473503448e-05, "loss": 0.7954, "step": 2352 }, { "epoch": 0.506130350613035, "grad_norm": 0.0, "learning_rate": 1.0289106074354477e-05, "loss": 0.8294, "step": 2353 }, { "epoch": 0.5063454506345451, "grad_norm": 0.0, "learning_rate": 1.0282141534860662e-05, "loss": 0.8601, "step": 2354 }, { "epoch": 0.5065605506560551, "grad_norm": 0.0, "learning_rate": 1.0275176858402891e-05, "loss": 0.818, "step": 2355 }, { "epoch": 0.506775650677565, "grad_norm": 0.0, "learning_rate": 1.026821204836213e-05, "loss": 0.8692, "step": 2356 }, { "epoch": 0.5069907506990751, "grad_norm": 0.0, "learning_rate": 1.0261247108119403e-05, "loss": 0.8231, "step": 2357 }, { "epoch": 0.5072058507205851, "grad_norm": 0.0, "learning_rate": 1.02542820410558e-05, "loss": 0.8108, "step": 2358 }, { "epoch": 0.507420950742095, "grad_norm": 0.0, "learning_rate": 1.0247316850552474e-05, "loss": 0.864, "step": 2359 }, { "epoch": 0.5076360507636051, "grad_norm": 0.0, "learning_rate": 1.0240351539990628e-05, "loss": 0.8383, "step": 2360 }, { "epoch": 0.5078511507851151, "grad_norm": 0.0, "learning_rate": 1.023338611275154e-05, "loss": 0.8019, "step": 2361 }, { "epoch": 0.508066250806625, "grad_norm": 0.0, "learning_rate": 1.0226420572216534e-05, "loss": 0.8269, "step": 2362 }, { "epoch": 0.5082813508281351, "grad_norm": 0.0, "learning_rate": 1.0219454921766991e-05, "loss": 0.8248, "step": 2363 }, { "epoch": 0.5084964508496451, "grad_norm": 0.0, "learning_rate": 1.0212489164784338e-05, "loss": 0.9252, "step": 2364 }, { "epoch": 0.508711550871155, "grad_norm": 0.0, "learning_rate": 1.020552330465007e-05, "loss": 0.7875, "step": 2365 }, { "epoch": 0.5089266508926651, "grad_norm": 0.0, "learning_rate": 1.0198557344745718e-05, "loss": 0.9084, "step": 2366 }, { "epoch": 0.5091417509141751, "grad_norm": 0.0, "learning_rate": 1.0191591288452872e-05, "loss": 0.8841, "step": 2367 }, { "epoch": 0.5093568509356851, "grad_norm": 0.0, "learning_rate": 1.0184625139153155e-05, "loss": 0.9188, "step": 2368 }, { "epoch": 0.5095719509571951, "grad_norm": 0.0, "learning_rate": 1.0177658900228249e-05, "loss": 0.78, "step": 2369 }, { "epoch": 0.5097870509787051, "grad_norm": 0.0, "learning_rate": 1.017069257505987e-05, "loss": 0.8492, "step": 2370 }, { "epoch": 0.5100021510002151, "grad_norm": 0.0, "learning_rate": 1.0163726167029785e-05, "loss": 0.8516, "step": 2371 }, { "epoch": 0.5102172510217251, "grad_norm": 0.0, "learning_rate": 1.015675967951979e-05, "loss": 0.9341, "step": 2372 }, { "epoch": 0.5104323510432351, "grad_norm": 0.0, "learning_rate": 1.0149793115911725e-05, "loss": 0.9071, "step": 2373 }, { "epoch": 0.5106474510647451, "grad_norm": 0.0, "learning_rate": 1.0142826479587475e-05, "loss": 0.88, "step": 2374 }, { "epoch": 0.5108625510862551, "grad_norm": 0.0, "learning_rate": 1.0135859773928949e-05, "loss": 0.8542, "step": 2375 }, { "epoch": 0.5110776511077652, "grad_norm": 0.0, "learning_rate": 1.0128893002318092e-05, "loss": 0.7619, "step": 2376 }, { "epoch": 0.5112927511292751, "grad_norm": 0.0, "learning_rate": 1.0121926168136884e-05, "loss": 0.8506, "step": 2377 }, { "epoch": 0.5115078511507851, "grad_norm": 0.0, "learning_rate": 1.011495927476733e-05, "loss": 0.8479, "step": 2378 }, { "epoch": 0.5117229511722952, "grad_norm": 0.0, "learning_rate": 1.0107992325591474e-05, "loss": 0.9215, "step": 2379 }, { "epoch": 0.5119380511938051, "grad_norm": 0.0, "learning_rate": 1.0101025323991375e-05, "loss": 0.8843, "step": 2380 }, { "epoch": 0.5121531512153151, "grad_norm": 0.0, "learning_rate": 1.0094058273349125e-05, "loss": 0.8613, "step": 2381 }, { "epoch": 0.5123682512368252, "grad_norm": 0.0, "learning_rate": 1.0087091177046838e-05, "loss": 0.8483, "step": 2382 }, { "epoch": 0.5125833512583351, "grad_norm": 0.0, "learning_rate": 1.008012403846665e-05, "loss": 0.8484, "step": 2383 }, { "epoch": 0.5127984512798451, "grad_norm": 0.0, "learning_rate": 1.007315686099072e-05, "loss": 0.8849, "step": 2384 }, { "epoch": 0.5130135513013552, "grad_norm": 0.0, "learning_rate": 1.006618964800122e-05, "loss": 0.7923, "step": 2385 }, { "epoch": 0.5132286513228651, "grad_norm": 0.0, "learning_rate": 1.0059222402880339e-05, "loss": 0.9168, "step": 2386 }, { "epoch": 0.5134437513443751, "grad_norm": 0.0, "learning_rate": 1.0052255129010293e-05, "loss": 0.8649, "step": 2387 }, { "epoch": 0.5136588513658852, "grad_norm": 0.0, "learning_rate": 1.0045287829773302e-05, "loss": 0.8523, "step": 2388 }, { "epoch": 0.5138739513873951, "grad_norm": 0.0, "learning_rate": 1.00383205085516e-05, "loss": 0.7904, "step": 2389 }, { "epoch": 0.5140890514089052, "grad_norm": 0.0, "learning_rate": 1.0031353168727432e-05, "loss": 0.9191, "step": 2390 }, { "epoch": 0.5143041514304152, "grad_norm": 0.0, "learning_rate": 1.0024385813683054e-05, "loss": 0.8608, "step": 2391 }, { "epoch": 0.5145192514519251, "grad_norm": 0.0, "learning_rate": 1.0017418446800722e-05, "loss": 0.9278, "step": 2392 }, { "epoch": 0.5147343514734352, "grad_norm": 0.0, "learning_rate": 1.0010451071462707e-05, "loss": 0.7781, "step": 2393 }, { "epoch": 0.5149494514949452, "grad_norm": 0.0, "learning_rate": 1.0003483691051283e-05, "loss": 0.8558, "step": 2394 }, { "epoch": 0.5151645515164551, "grad_norm": 0.0, "learning_rate": 9.996516308948722e-06, "loss": 0.8254, "step": 2395 }, { "epoch": 0.5153796515379652, "grad_norm": 0.0, "learning_rate": 9.989548928537294e-06, "loss": 0.8226, "step": 2396 }, { "epoch": 0.5155947515594752, "grad_norm": 0.0, "learning_rate": 9.98258155319928e-06, "loss": 0.8539, "step": 2397 }, { "epoch": 0.5158098515809851, "grad_norm": 0.0, "learning_rate": 9.975614186316952e-06, "loss": 0.888, "step": 2398 }, { "epoch": 0.5160249516024952, "grad_norm": 0.0, "learning_rate": 9.96864683127257e-06, "loss": 0.9246, "step": 2399 }, { "epoch": 0.5162400516240052, "grad_norm": 0.0, "learning_rate": 9.961679491448403e-06, "loss": 0.8201, "step": 2400 }, { "epoch": 0.5164551516455151, "grad_norm": 0.0, "learning_rate": 9.9547121702267e-06, "loss": 0.8765, "step": 2401 }, { "epoch": 0.5166702516670252, "grad_norm": 0.0, "learning_rate": 9.947744870989705e-06, "loss": 0.8334, "step": 2402 }, { "epoch": 0.5168853516885352, "grad_norm": 0.0, "learning_rate": 9.940777597119663e-06, "loss": 0.8789, "step": 2403 }, { "epoch": 0.5171004517100452, "grad_norm": 0.0, "learning_rate": 9.933810351998784e-06, "loss": 0.9486, "step": 2404 }, { "epoch": 0.5173155517315552, "grad_norm": 0.0, "learning_rate": 9.926843139009286e-06, "loss": 0.7912, "step": 2405 }, { "epoch": 0.5175306517530652, "grad_norm": 0.0, "learning_rate": 9.919875961533355e-06, "loss": 0.8211, "step": 2406 }, { "epoch": 0.5177457517745752, "grad_norm": 0.0, "learning_rate": 9.912908822953164e-06, "loss": 0.844, "step": 2407 }, { "epoch": 0.5179608517960852, "grad_norm": 0.0, "learning_rate": 9.90594172665088e-06, "loss": 0.8888, "step": 2408 }, { "epoch": 0.5181759518175952, "grad_norm": 0.0, "learning_rate": 9.898974676008628e-06, "loss": 0.9058, "step": 2409 }, { "epoch": 0.5183910518391052, "grad_norm": 0.0, "learning_rate": 9.892007674408527e-06, "loss": 0.8364, "step": 2410 }, { "epoch": 0.5186061518606152, "grad_norm": 0.0, "learning_rate": 9.885040725232671e-06, "loss": 0.9053, "step": 2411 }, { "epoch": 0.5188212518821251, "grad_norm": 0.0, "learning_rate": 9.87807383186312e-06, "loss": 0.8589, "step": 2412 }, { "epoch": 0.5190363519036352, "grad_norm": 0.0, "learning_rate": 9.87110699768191e-06, "loss": 0.8817, "step": 2413 }, { "epoch": 0.5192514519251452, "grad_norm": 0.0, "learning_rate": 9.864140226071054e-06, "loss": 0.8529, "step": 2414 }, { "epoch": 0.5194665519466551, "grad_norm": 0.0, "learning_rate": 9.857173520412524e-06, "loss": 0.7802, "step": 2415 }, { "epoch": 0.5196816519681652, "grad_norm": 0.0, "learning_rate": 9.850206884088276e-06, "loss": 0.8609, "step": 2416 }, { "epoch": 0.5198967519896752, "grad_norm": 0.0, "learning_rate": 9.843240320480213e-06, "loss": 0.8421, "step": 2417 }, { "epoch": 0.5201118520111851, "grad_norm": 0.0, "learning_rate": 9.83627383297022e-06, "loss": 0.8831, "step": 2418 }, { "epoch": 0.5203269520326952, "grad_norm": 0.0, "learning_rate": 9.829307424940133e-06, "loss": 0.8703, "step": 2419 }, { "epoch": 0.5205420520542052, "grad_norm": 0.0, "learning_rate": 9.822341099771755e-06, "loss": 0.9144, "step": 2420 }, { "epoch": 0.5207571520757152, "grad_norm": 0.0, "learning_rate": 9.81537486084685e-06, "loss": 0.8334, "step": 2421 }, { "epoch": 0.5209722520972252, "grad_norm": 0.0, "learning_rate": 9.808408711547132e-06, "loss": 0.886, "step": 2422 }, { "epoch": 0.5211873521187352, "grad_norm": 0.0, "learning_rate": 9.801442655254285e-06, "loss": 0.895, "step": 2423 }, { "epoch": 0.5214024521402452, "grad_norm": 0.0, "learning_rate": 9.794476695349933e-06, "loss": 0.8665, "step": 2424 }, { "epoch": 0.5216175521617552, "grad_norm": 0.0, "learning_rate": 9.787510835215662e-06, "loss": 0.944, "step": 2425 }, { "epoch": 0.5218326521832652, "grad_norm": 0.0, "learning_rate": 9.780545078233015e-06, "loss": 0.8481, "step": 2426 }, { "epoch": 0.5220477522047752, "grad_norm": 0.0, "learning_rate": 9.77357942778347e-06, "loss": 0.7987, "step": 2427 }, { "epoch": 0.5222628522262852, "grad_norm": 0.0, "learning_rate": 9.76661388724846e-06, "loss": 0.8183, "step": 2428 }, { "epoch": 0.5224779522477953, "grad_norm": 0.0, "learning_rate": 9.759648460009376e-06, "loss": 0.8736, "step": 2429 }, { "epoch": 0.5226930522693052, "grad_norm": 0.0, "learning_rate": 9.752683149447531e-06, "loss": 0.9153, "step": 2430 }, { "epoch": 0.5229081522908152, "grad_norm": 0.0, "learning_rate": 9.745717958944204e-06, "loss": 0.9107, "step": 2431 }, { "epoch": 0.5231232523123253, "grad_norm": 0.0, "learning_rate": 9.7387528918806e-06, "loss": 0.8963, "step": 2432 }, { "epoch": 0.5233383523338352, "grad_norm": 0.0, "learning_rate": 9.73178795163787e-06, "loss": 0.8101, "step": 2433 }, { "epoch": 0.5235534523553452, "grad_norm": 0.0, "learning_rate": 9.72482314159711e-06, "loss": 0.8871, "step": 2434 }, { "epoch": 0.5237685523768553, "grad_norm": 0.0, "learning_rate": 9.717858465139341e-06, "loss": 0.8651, "step": 2435 }, { "epoch": 0.5239836523983652, "grad_norm": 0.0, "learning_rate": 9.710893925645528e-06, "loss": 0.8565, "step": 2436 }, { "epoch": 0.5241987524198752, "grad_norm": 0.0, "learning_rate": 9.703929526496555e-06, "loss": 0.8544, "step": 2437 }, { "epoch": 0.5244138524413853, "grad_norm": 0.0, "learning_rate": 9.696965271073259e-06, "loss": 0.9293, "step": 2438 }, { "epoch": 0.5246289524628952, "grad_norm": 0.0, "learning_rate": 9.690001162756398e-06, "loss": 0.8482, "step": 2439 }, { "epoch": 0.5248440524844052, "grad_norm": 0.0, "learning_rate": 9.683037204926648e-06, "loss": 0.948, "step": 2440 }, { "epoch": 0.5250591525059153, "grad_norm": 0.0, "learning_rate": 9.676073400964631e-06, "loss": 0.873, "step": 2441 }, { "epoch": 0.5252742525274252, "grad_norm": 0.0, "learning_rate": 9.669109754250884e-06, "loss": 0.7878, "step": 2442 }, { "epoch": 0.5254893525489353, "grad_norm": 0.0, "learning_rate": 9.662146268165862e-06, "loss": 0.9263, "step": 2443 }, { "epoch": 0.5257044525704453, "grad_norm": 0.0, "learning_rate": 9.655182946089957e-06, "loss": 0.8667, "step": 2444 }, { "epoch": 0.5259195525919552, "grad_norm": 0.0, "learning_rate": 9.648219791403461e-06, "loss": 0.8843, "step": 2445 }, { "epoch": 0.5261346526134653, "grad_norm": 0.0, "learning_rate": 9.641256807486604e-06, "loss": 0.941, "step": 2446 }, { "epoch": 0.5263497526349753, "grad_norm": 0.0, "learning_rate": 9.63429399771953e-06, "loss": 0.829, "step": 2447 }, { "epoch": 0.5265648526564852, "grad_norm": 0.0, "learning_rate": 9.627331365482286e-06, "loss": 0.8584, "step": 2448 }, { "epoch": 0.5267799526779953, "grad_norm": 0.0, "learning_rate": 9.620368914154848e-06, "loss": 0.8885, "step": 2449 }, { "epoch": 0.5269950526995053, "grad_norm": 0.0, "learning_rate": 9.613406647117092e-06, "loss": 0.875, "step": 2450 }, { "epoch": 0.5272101527210152, "grad_norm": 0.0, "learning_rate": 9.606444567748809e-06, "loss": 0.824, "step": 2451 }, { "epoch": 0.5274252527425253, "grad_norm": 0.0, "learning_rate": 9.599482679429705e-06, "loss": 0.8999, "step": 2452 }, { "epoch": 0.5276403527640353, "grad_norm": 0.0, "learning_rate": 9.592520985539382e-06, "loss": 0.9078, "step": 2453 }, { "epoch": 0.5278554527855452, "grad_norm": 0.0, "learning_rate": 9.585559489457355e-06, "loss": 0.9486, "step": 2454 }, { "epoch": 0.5280705528070553, "grad_norm": 0.0, "learning_rate": 9.578598194563047e-06, "loss": 0.8582, "step": 2455 }, { "epoch": 0.5282856528285653, "grad_norm": 0.0, "learning_rate": 9.57163710423577e-06, "loss": 0.9133, "step": 2456 }, { "epoch": 0.5285007528500753, "grad_norm": 0.0, "learning_rate": 9.56467622185475e-06, "loss": 0.8242, "step": 2457 }, { "epoch": 0.5287158528715853, "grad_norm": 0.0, "learning_rate": 9.557715550799104e-06, "loss": 0.9218, "step": 2458 }, { "epoch": 0.5289309528930953, "grad_norm": 0.0, "learning_rate": 9.550755094447848e-06, "loss": 0.9028, "step": 2459 }, { "epoch": 0.5291460529146053, "grad_norm": 0.0, "learning_rate": 9.543794856179899e-06, "loss": 0.8956, "step": 2460 }, { "epoch": 0.5293611529361153, "grad_norm": 0.0, "learning_rate": 9.536834839374056e-06, "loss": 0.9174, "step": 2461 }, { "epoch": 0.5295762529576253, "grad_norm": 0.0, "learning_rate": 9.529875047409027e-06, "loss": 0.9135, "step": 2462 }, { "epoch": 0.5297913529791353, "grad_norm": 0.0, "learning_rate": 9.522915483663395e-06, "loss": 0.8229, "step": 2463 }, { "epoch": 0.5300064530006453, "grad_norm": 0.0, "learning_rate": 9.515956151515646e-06, "loss": 0.9024, "step": 2464 }, { "epoch": 0.5302215530221553, "grad_norm": 0.0, "learning_rate": 9.508997054344145e-06, "loss": 0.8961, "step": 2465 }, { "epoch": 0.5304366530436653, "grad_norm": 0.0, "learning_rate": 9.502038195527138e-06, "loss": 0.9333, "step": 2466 }, { "epoch": 0.5306517530651753, "grad_norm": 0.0, "learning_rate": 9.495079578442768e-06, "loss": 0.8727, "step": 2467 }, { "epoch": 0.5308668530866854, "grad_norm": 0.0, "learning_rate": 9.48812120646906e-06, "loss": 0.9574, "step": 2468 }, { "epoch": 0.5310819531081953, "grad_norm": 0.0, "learning_rate": 9.481163082983905e-06, "loss": 0.7898, "step": 2469 }, { "epoch": 0.5312970531297053, "grad_norm": 0.0, "learning_rate": 9.474205211365092e-06, "loss": 0.8729, "step": 2470 }, { "epoch": 0.5315121531512154, "grad_norm": 0.0, "learning_rate": 9.467247594990272e-06, "loss": 0.8597, "step": 2471 }, { "epoch": 0.5317272531727253, "grad_norm": 0.0, "learning_rate": 9.460290237236979e-06, "loss": 0.8334, "step": 2472 }, { "epoch": 0.5319423531942353, "grad_norm": 0.0, "learning_rate": 9.453333141482629e-06, "loss": 0.8653, "step": 2473 }, { "epoch": 0.5321574532157454, "grad_norm": 0.0, "learning_rate": 9.446376311104494e-06, "loss": 0.8342, "step": 2474 }, { "epoch": 0.5323725532372553, "grad_norm": 0.0, "learning_rate": 9.439419749479738e-06, "loss": 0.8734, "step": 2475 }, { "epoch": 0.5325876532587653, "grad_norm": 0.0, "learning_rate": 9.432463459985367e-06, "loss": 0.8927, "step": 2476 }, { "epoch": 0.5328027532802754, "grad_norm": 0.0, "learning_rate": 9.425507445998286e-06, "loss": 0.9243, "step": 2477 }, { "epoch": 0.5330178533017853, "grad_norm": 0.0, "learning_rate": 9.418551710895243e-06, "loss": 0.8621, "step": 2478 }, { "epoch": 0.5332329533232953, "grad_norm": 0.0, "learning_rate": 9.411596258052862e-06, "loss": 0.8135, "step": 2479 }, { "epoch": 0.5334480533448054, "grad_norm": 0.0, "learning_rate": 9.404641090847628e-06, "loss": 0.8087, "step": 2480 }, { "epoch": 0.5336631533663153, "grad_norm": 0.0, "learning_rate": 9.397686212655878e-06, "loss": 0.9029, "step": 2481 }, { "epoch": 0.5338782533878254, "grad_norm": 0.0, "learning_rate": 9.390731626853823e-06, "loss": 0.8882, "step": 2482 }, { "epoch": 0.5340933534093354, "grad_norm": 0.0, "learning_rate": 9.383777336817532e-06, "loss": 0.9257, "step": 2483 }, { "epoch": 0.5343084534308453, "grad_norm": 0.0, "learning_rate": 9.376823345922912e-06, "loss": 0.8271, "step": 2484 }, { "epoch": 0.5345235534523554, "grad_norm": 0.0, "learning_rate": 9.369869657545744e-06, "loss": 0.8889, "step": 2485 }, { "epoch": 0.5347386534738654, "grad_norm": 0.0, "learning_rate": 9.36291627506166e-06, "loss": 0.8463, "step": 2486 }, { "epoch": 0.5349537534953753, "grad_norm": 0.0, "learning_rate": 9.355963201846125e-06, "loss": 0.8603, "step": 2487 }, { "epoch": 0.5351688535168854, "grad_norm": 0.0, "learning_rate": 9.349010441274481e-06, "loss": 0.8922, "step": 2488 }, { "epoch": 0.5353839535383954, "grad_norm": 0.0, "learning_rate": 9.342057996721894e-06, "loss": 0.8582, "step": 2489 }, { "epoch": 0.5355990535599053, "grad_norm": 0.0, "learning_rate": 9.335105871563394e-06, "loss": 0.8989, "step": 2490 }, { "epoch": 0.5358141535814154, "grad_norm": 0.0, "learning_rate": 9.328154069173853e-06, "loss": 0.8268, "step": 2491 }, { "epoch": 0.5360292536029254, "grad_norm": 0.0, "learning_rate": 9.321202592927974e-06, "loss": 0.8695, "step": 2492 }, { "epoch": 0.5362443536244353, "grad_norm": 0.0, "learning_rate": 9.314251446200317e-06, "loss": 0.828, "step": 2493 }, { "epoch": 0.5364594536459454, "grad_norm": 0.0, "learning_rate": 9.307300632365269e-06, "loss": 0.8644, "step": 2494 }, { "epoch": 0.5366745536674553, "grad_norm": 0.0, "learning_rate": 9.300350154797065e-06, "loss": 0.8526, "step": 2495 }, { "epoch": 0.5368896536889654, "grad_norm": 0.0, "learning_rate": 9.293400016869779e-06, "loss": 0.9347, "step": 2496 }, { "epoch": 0.5371047537104754, "grad_norm": 0.0, "learning_rate": 9.286450221957304e-06, "loss": 0.8549, "step": 2497 }, { "epoch": 0.5373198537319853, "grad_norm": 0.0, "learning_rate": 9.279500773433385e-06, "loss": 0.9085, "step": 2498 }, { "epoch": 0.5375349537534954, "grad_norm": 0.0, "learning_rate": 9.272551674671591e-06, "loss": 0.9082, "step": 2499 }, { "epoch": 0.5377500537750054, "grad_norm": 0.0, "learning_rate": 9.265602929045317e-06, "loss": 0.8932, "step": 2500 }, { "epoch": 0.5379651537965153, "grad_norm": 0.0, "learning_rate": 9.258654539927795e-06, "loss": 0.8949, "step": 2501 }, { "epoch": 0.5381802538180254, "grad_norm": 0.0, "learning_rate": 9.251706510692074e-06, "loss": 0.8988, "step": 2502 }, { "epoch": 0.5383953538395354, "grad_norm": 0.0, "learning_rate": 9.244758844711035e-06, "loss": 0.862, "step": 2503 }, { "epoch": 0.5386104538610453, "grad_norm": 0.0, "learning_rate": 9.237811545357393e-06, "loss": 0.8028, "step": 2504 }, { "epoch": 0.5388255538825554, "grad_norm": 0.0, "learning_rate": 9.230864616003657e-06, "loss": 0.8007, "step": 2505 }, { "epoch": 0.5390406539040654, "grad_norm": 0.0, "learning_rate": 9.223918060022189e-06, "loss": 0.795, "step": 2506 }, { "epoch": 0.5392557539255753, "grad_norm": 0.0, "learning_rate": 9.21697188078514e-06, "loss": 0.8831, "step": 2507 }, { "epoch": 0.5394708539470854, "grad_norm": 0.0, "learning_rate": 9.210026081664499e-06, "loss": 0.8231, "step": 2508 }, { "epoch": 0.5396859539685954, "grad_norm": 0.0, "learning_rate": 9.203080666032064e-06, "loss": 0.7832, "step": 2509 }, { "epoch": 0.5399010539901054, "grad_norm": 0.0, "learning_rate": 9.196135637259443e-06, "loss": 0.8945, "step": 2510 }, { "epoch": 0.5401161540116154, "grad_norm": 0.0, "learning_rate": 9.189190998718058e-06, "loss": 0.8874, "step": 2511 }, { "epoch": 0.5403312540331254, "grad_norm": 0.0, "learning_rate": 9.182246753779154e-06, "loss": 0.9276, "step": 2512 }, { "epoch": 0.5405463540546354, "grad_norm": 0.0, "learning_rate": 9.175302905813761e-06, "loss": 0.8788, "step": 2513 }, { "epoch": 0.5407614540761454, "grad_norm": 0.0, "learning_rate": 9.168359458192737e-06, "loss": 0.8725, "step": 2514 }, { "epoch": 0.5409765540976554, "grad_norm": 0.0, "learning_rate": 9.161416414286734e-06, "loss": 0.9063, "step": 2515 }, { "epoch": 0.5411916541191654, "grad_norm": 0.0, "learning_rate": 9.154473777466209e-06, "loss": 0.9505, "step": 2516 }, { "epoch": 0.5414067541406754, "grad_norm": 0.0, "learning_rate": 9.147531551101436e-06, "loss": 0.8973, "step": 2517 }, { "epoch": 0.5416218541621854, "grad_norm": 0.0, "learning_rate": 9.140589738562465e-06, "loss": 0.9543, "step": 2518 }, { "epoch": 0.5418369541836954, "grad_norm": 0.0, "learning_rate": 9.133648343219168e-06, "loss": 0.8104, "step": 2519 }, { "epoch": 0.5420520542052054, "grad_norm": 0.0, "learning_rate": 9.126707368441196e-06, "loss": 0.899, "step": 2520 }, { "epoch": 0.5422671542267155, "grad_norm": 0.0, "learning_rate": 9.119766817598013e-06, "loss": 0.9535, "step": 2521 }, { "epoch": 0.5424822542482254, "grad_norm": 0.0, "learning_rate": 9.112826694058864e-06, "loss": 0.8499, "step": 2522 }, { "epoch": 0.5426973542697354, "grad_norm": 0.0, "learning_rate": 9.10588700119279e-06, "loss": 0.7899, "step": 2523 }, { "epoch": 0.5429124542912455, "grad_norm": 0.0, "learning_rate": 9.098947742368629e-06, "loss": 0.9355, "step": 2524 }, { "epoch": 0.5431275543127554, "grad_norm": 0.0, "learning_rate": 9.092008920954997e-06, "loss": 0.7915, "step": 2525 }, { "epoch": 0.5433426543342654, "grad_norm": 0.0, "learning_rate": 9.085070540320306e-06, "loss": 0.8155, "step": 2526 }, { "epoch": 0.5435577543557755, "grad_norm": 0.0, "learning_rate": 9.07813260383276e-06, "loss": 0.8327, "step": 2527 }, { "epoch": 0.5437728543772854, "grad_norm": 0.0, "learning_rate": 9.07119511486033e-06, "loss": 0.8158, "step": 2528 }, { "epoch": 0.5439879543987954, "grad_norm": 0.0, "learning_rate": 9.064258076770783e-06, "loss": 0.8309, "step": 2529 }, { "epoch": 0.5442030544203055, "grad_norm": 0.0, "learning_rate": 9.057321492931665e-06, "loss": 0.904, "step": 2530 }, { "epoch": 0.5444181544418154, "grad_norm": 0.0, "learning_rate": 9.050385366710295e-06, "loss": 0.8524, "step": 2531 }, { "epoch": 0.5446332544633254, "grad_norm": 0.0, "learning_rate": 9.043449701473782e-06, "loss": 0.8372, "step": 2532 }, { "epoch": 0.5448483544848355, "grad_norm": 0.0, "learning_rate": 9.036514500588995e-06, "loss": 0.906, "step": 2533 }, { "epoch": 0.5450634545063454, "grad_norm": 0.0, "learning_rate": 9.029579767422592e-06, "loss": 0.9143, "step": 2534 }, { "epoch": 0.5452785545278555, "grad_norm": 0.0, "learning_rate": 9.022645505341004e-06, "loss": 0.903, "step": 2535 }, { "epoch": 0.5454936545493655, "grad_norm": 0.0, "learning_rate": 9.015711717710418e-06, "loss": 0.8228, "step": 2536 }, { "epoch": 0.5457087545708754, "grad_norm": 0.0, "learning_rate": 9.008778407896807e-06, "loss": 0.8041, "step": 2537 }, { "epoch": 0.5459238545923855, "grad_norm": 0.0, "learning_rate": 9.001845579265897e-06, "loss": 0.8577, "step": 2538 }, { "epoch": 0.5461389546138955, "grad_norm": 0.0, "learning_rate": 8.994913235183197e-06, "loss": 0.8023, "step": 2539 }, { "epoch": 0.5463540546354054, "grad_norm": 0.0, "learning_rate": 8.987981379013975e-06, "loss": 0.8464, "step": 2540 }, { "epoch": 0.5465691546569155, "grad_norm": 0.0, "learning_rate": 8.981050014123252e-06, "loss": 0.8056, "step": 2541 }, { "epoch": 0.5467842546784255, "grad_norm": 0.0, "learning_rate": 8.974119143875822e-06, "loss": 0.8353, "step": 2542 }, { "epoch": 0.5469993546999354, "grad_norm": 0.0, "learning_rate": 8.967188771636237e-06, "loss": 0.8248, "step": 2543 }, { "epoch": 0.5472144547214455, "grad_norm": 0.0, "learning_rate": 8.960258900768804e-06, "loss": 0.8765, "step": 2544 }, { "epoch": 0.5474295547429555, "grad_norm": 0.0, "learning_rate": 8.95332953463759e-06, "loss": 0.8365, "step": 2545 }, { "epoch": 0.5476446547644654, "grad_norm": 0.0, "learning_rate": 8.946400676606408e-06, "loss": 0.8718, "step": 2546 }, { "epoch": 0.5478597547859755, "grad_norm": 0.0, "learning_rate": 8.939472330038839e-06, "loss": 0.8499, "step": 2547 }, { "epoch": 0.5480748548074855, "grad_norm": 0.0, "learning_rate": 8.932544498298207e-06, "loss": 0.8396, "step": 2548 }, { "epoch": 0.5482899548289955, "grad_norm": 0.0, "learning_rate": 8.925617184747586e-06, "loss": 0.8084, "step": 2549 }, { "epoch": 0.5485050548505055, "grad_norm": 0.0, "learning_rate": 8.9186903927498e-06, "loss": 0.8743, "step": 2550 }, { "epoch": 0.5487201548720155, "grad_norm": 0.0, "learning_rate": 8.91176412566742e-06, "loss": 0.82, "step": 2551 }, { "epoch": 0.5489352548935255, "grad_norm": 0.0, "learning_rate": 8.904838386862758e-06, "loss": 0.804, "step": 2552 }, { "epoch": 0.5491503549150355, "grad_norm": 0.0, "learning_rate": 8.897913179697881e-06, "loss": 0.8273, "step": 2553 }, { "epoch": 0.5493654549365455, "grad_norm": 0.0, "learning_rate": 8.890988507534582e-06, "loss": 0.8758, "step": 2554 }, { "epoch": 0.5495805549580555, "grad_norm": 0.0, "learning_rate": 8.884064373734406e-06, "loss": 0.8227, "step": 2555 }, { "epoch": 0.5497956549795655, "grad_norm": 0.0, "learning_rate": 8.877140781658637e-06, "loss": 0.8791, "step": 2556 }, { "epoch": 0.5500107550010755, "grad_norm": 0.0, "learning_rate": 8.870217734668287e-06, "loss": 0.8542, "step": 2557 }, { "epoch": 0.5502258550225855, "grad_norm": 0.0, "learning_rate": 8.86329523612411e-06, "loss": 0.7885, "step": 2558 }, { "epoch": 0.5504409550440955, "grad_norm": 0.0, "learning_rate": 8.85637328938659e-06, "loss": 0.8615, "step": 2559 }, { "epoch": 0.5506560550656056, "grad_norm": 0.0, "learning_rate": 8.849451897815949e-06, "loss": 0.8867, "step": 2560 }, { "epoch": 0.5508711550871155, "grad_norm": 0.0, "learning_rate": 8.842531064772137e-06, "loss": 0.8717, "step": 2561 }, { "epoch": 0.5510862551086255, "grad_norm": 0.0, "learning_rate": 8.835610793614824e-06, "loss": 0.8679, "step": 2562 }, { "epoch": 0.5513013551301356, "grad_norm": 0.0, "learning_rate": 8.828691087703429e-06, "loss": 0.9234, "step": 2563 }, { "epoch": 0.5515164551516455, "grad_norm": 0.0, "learning_rate": 8.821771950397065e-06, "loss": 0.8331, "step": 2564 }, { "epoch": 0.5517315551731555, "grad_norm": 0.0, "learning_rate": 8.814853385054602e-06, "loss": 0.863, "step": 2565 }, { "epoch": 0.5519466551946656, "grad_norm": 0.0, "learning_rate": 8.80793539503461e-06, "loss": 0.9109, "step": 2566 }, { "epoch": 0.5521617552161755, "grad_norm": 0.0, "learning_rate": 8.801017983695383e-06, "loss": 0.9305, "step": 2567 }, { "epoch": 0.5523768552376855, "grad_norm": 0.0, "learning_rate": 8.794101154394943e-06, "loss": 0.8877, "step": 2568 }, { "epoch": 0.5525919552591956, "grad_norm": 0.0, "learning_rate": 8.787184910491028e-06, "loss": 0.9097, "step": 2569 }, { "epoch": 0.5528070552807055, "grad_norm": 0.0, "learning_rate": 8.78026925534108e-06, "loss": 0.8391, "step": 2570 }, { "epoch": 0.5530221553022155, "grad_norm": 0.0, "learning_rate": 8.77335419230227e-06, "loss": 0.8835, "step": 2571 }, { "epoch": 0.5532372553237256, "grad_norm": 0.0, "learning_rate": 8.766439724731469e-06, "loss": 0.8623, "step": 2572 }, { "epoch": 0.5534523553452355, "grad_norm": 0.0, "learning_rate": 8.759525855985265e-06, "loss": 0.9333, "step": 2573 }, { "epoch": 0.5536674553667456, "grad_norm": 0.0, "learning_rate": 8.752612589419963e-06, "loss": 0.8806, "step": 2574 }, { "epoch": 0.5538825553882556, "grad_norm": 0.0, "learning_rate": 8.745699928391555e-06, "loss": 0.8479, "step": 2575 }, { "epoch": 0.5540976554097655, "grad_norm": 0.0, "learning_rate": 8.738787876255763e-06, "loss": 0.8844, "step": 2576 }, { "epoch": 0.5543127554312756, "grad_norm": 0.0, "learning_rate": 8.731876436367994e-06, "loss": 0.885, "step": 2577 }, { "epoch": 0.5545278554527856, "grad_norm": 0.0, "learning_rate": 8.724965612083373e-06, "loss": 0.8964, "step": 2578 }, { "epoch": 0.5547429554742955, "grad_norm": 0.0, "learning_rate": 8.718055406756714e-06, "loss": 0.8766, "step": 2579 }, { "epoch": 0.5549580554958056, "grad_norm": 0.0, "learning_rate": 8.71114582374254e-06, "loss": 0.9016, "step": 2580 }, { "epoch": 0.5551731555173155, "grad_norm": 0.0, "learning_rate": 8.704236866395066e-06, "loss": 0.9271, "step": 2581 }, { "epoch": 0.5553882555388255, "grad_norm": 0.0, "learning_rate": 8.697328538068197e-06, "loss": 0.7914, "step": 2582 }, { "epoch": 0.5556033555603356, "grad_norm": 0.0, "learning_rate": 8.69042084211555e-06, "loss": 0.8581, "step": 2583 }, { "epoch": 0.5558184555818455, "grad_norm": 0.0, "learning_rate": 8.683513781890426e-06, "loss": 0.8297, "step": 2584 }, { "epoch": 0.5560335556033555, "grad_norm": 0.0, "learning_rate": 8.67660736074581e-06, "loss": 0.8946, "step": 2585 }, { "epoch": 0.5562486556248656, "grad_norm": 0.0, "learning_rate": 8.669701582034389e-06, "loss": 0.8954, "step": 2586 }, { "epoch": 0.5564637556463755, "grad_norm": 0.0, "learning_rate": 8.662796449108532e-06, "loss": 0.875, "step": 2587 }, { "epoch": 0.5566788556678856, "grad_norm": 0.0, "learning_rate": 8.655891965320292e-06, "loss": 0.8429, "step": 2588 }, { "epoch": 0.5568939556893956, "grad_norm": 0.0, "learning_rate": 8.648988134021416e-06, "loss": 0.8863, "step": 2589 }, { "epoch": 0.5571090557109055, "grad_norm": 0.0, "learning_rate": 8.64208495856332e-06, "loss": 0.8868, "step": 2590 }, { "epoch": 0.5573241557324156, "grad_norm": 0.0, "learning_rate": 8.635182442297114e-06, "loss": 0.8534, "step": 2591 }, { "epoch": 0.5575392557539256, "grad_norm": 0.0, "learning_rate": 8.62828058857359e-06, "loss": 0.888, "step": 2592 }, { "epoch": 0.5577543557754355, "grad_norm": 0.0, "learning_rate": 8.621379400743206e-06, "loss": 0.8966, "step": 2593 }, { "epoch": 0.5579694557969456, "grad_norm": 0.0, "learning_rate": 8.614478882156104e-06, "loss": 0.9128, "step": 2594 }, { "epoch": 0.5581845558184556, "grad_norm": 0.0, "learning_rate": 8.607579036162101e-06, "loss": 0.8651, "step": 2595 }, { "epoch": 0.5583996558399655, "grad_norm": 0.0, "learning_rate": 8.600679866110682e-06, "loss": 0.874, "step": 2596 }, { "epoch": 0.5586147558614756, "grad_norm": 0.0, "learning_rate": 8.59378137535102e-06, "loss": 0.8994, "step": 2597 }, { "epoch": 0.5588298558829856, "grad_norm": 0.0, "learning_rate": 8.586883567231935e-06, "loss": 0.837, "step": 2598 }, { "epoch": 0.5590449559044955, "grad_norm": 0.0, "learning_rate": 8.579986445101932e-06, "loss": 0.8963, "step": 2599 }, { "epoch": 0.5592600559260056, "grad_norm": 0.0, "learning_rate": 8.573090012309181e-06, "loss": 0.9604, "step": 2600 }, { "epoch": 0.5594751559475156, "grad_norm": 0.0, "learning_rate": 8.566194272201515e-06, "loss": 0.8098, "step": 2601 }, { "epoch": 0.5596902559690256, "grad_norm": 0.0, "learning_rate": 8.559299228126427e-06, "loss": 0.8612, "step": 2602 }, { "epoch": 0.5599053559905356, "grad_norm": 0.0, "learning_rate": 8.552404883431075e-06, "loss": 0.7856, "step": 2603 }, { "epoch": 0.5601204560120456, "grad_norm": 0.0, "learning_rate": 8.54551124146228e-06, "loss": 0.9112, "step": 2604 }, { "epoch": 0.5603355560335556, "grad_norm": 0.0, "learning_rate": 8.538618305566523e-06, "loss": 0.8754, "step": 2605 }, { "epoch": 0.5605506560550656, "grad_norm": 0.0, "learning_rate": 8.531726079089934e-06, "loss": 0.8725, "step": 2606 }, { "epoch": 0.5607657560765756, "grad_norm": 0.0, "learning_rate": 8.524834565378309e-06, "loss": 0.8443, "step": 2607 }, { "epoch": 0.5609808560980856, "grad_norm": 0.0, "learning_rate": 8.517943767777089e-06, "loss": 0.8287, "step": 2608 }, { "epoch": 0.5611959561195956, "grad_norm": 0.0, "learning_rate": 8.511053689631371e-06, "loss": 0.87, "step": 2609 }, { "epoch": 0.5614110561411056, "grad_norm": 0.0, "learning_rate": 8.504164334285909e-06, "loss": 0.8951, "step": 2610 }, { "epoch": 0.5616261561626156, "grad_norm": 0.0, "learning_rate": 8.497275705085091e-06, "loss": 0.8959, "step": 2611 }, { "epoch": 0.5618412561841256, "grad_norm": 0.0, "learning_rate": 8.490387805372965e-06, "loss": 0.8058, "step": 2612 }, { "epoch": 0.5620563562056357, "grad_norm": 0.0, "learning_rate": 8.48350063849323e-06, "loss": 0.8447, "step": 2613 }, { "epoch": 0.5622714562271456, "grad_norm": 0.0, "learning_rate": 8.476614207789208e-06, "loss": 0.8771, "step": 2614 }, { "epoch": 0.5624865562486556, "grad_norm": 0.0, "learning_rate": 8.469728516603886e-06, "loss": 0.9087, "step": 2615 }, { "epoch": 0.5627016562701657, "grad_norm": 0.0, "learning_rate": 8.462843568279877e-06, "loss": 0.8484, "step": 2616 }, { "epoch": 0.5629167562916756, "grad_norm": 0.0, "learning_rate": 8.455959366159439e-06, "loss": 0.9005, "step": 2617 }, { "epoch": 0.5631318563131856, "grad_norm": 0.0, "learning_rate": 8.449075913584472e-06, "loss": 0.8341, "step": 2618 }, { "epoch": 0.5633469563346957, "grad_norm": 0.0, "learning_rate": 8.4421932138965e-06, "loss": 0.8462, "step": 2619 }, { "epoch": 0.5635620563562056, "grad_norm": 0.0, "learning_rate": 8.435311270436697e-06, "loss": 0.8988, "step": 2620 }, { "epoch": 0.5637771563777156, "grad_norm": 0.0, "learning_rate": 8.428430086545857e-06, "loss": 0.874, "step": 2621 }, { "epoch": 0.5639922563992257, "grad_norm": 0.0, "learning_rate": 8.421549665564412e-06, "loss": 0.8119, "step": 2622 }, { "epoch": 0.5642073564207356, "grad_norm": 0.0, "learning_rate": 8.414670010832425e-06, "loss": 0.8327, "step": 2623 }, { "epoch": 0.5644224564422456, "grad_norm": 0.0, "learning_rate": 8.407791125689577e-06, "loss": 0.865, "step": 2624 }, { "epoch": 0.5646375564637557, "grad_norm": 0.0, "learning_rate": 8.400913013475186e-06, "loss": 0.8528, "step": 2625 }, { "epoch": 0.5648526564852656, "grad_norm": 0.0, "learning_rate": 8.394035677528197e-06, "loss": 0.8631, "step": 2626 }, { "epoch": 0.5650677565067757, "grad_norm": 0.0, "learning_rate": 8.387159121187163e-06, "loss": 0.8367, "step": 2627 }, { "epoch": 0.5652828565282857, "grad_norm": 0.0, "learning_rate": 8.380283347790275e-06, "loss": 0.8277, "step": 2628 }, { "epoch": 0.5654979565497956, "grad_norm": 0.0, "learning_rate": 8.37340836067533e-06, "loss": 0.8245, "step": 2629 }, { "epoch": 0.5657130565713057, "grad_norm": 0.0, "learning_rate": 8.366534163179757e-06, "loss": 0.9448, "step": 2630 }, { "epoch": 0.5659281565928157, "grad_norm": 0.0, "learning_rate": 8.359660758640595e-06, "loss": 0.923, "step": 2631 }, { "epoch": 0.5661432566143256, "grad_norm": 0.0, "learning_rate": 8.352788150394489e-06, "loss": 0.9228, "step": 2632 }, { "epoch": 0.5663583566358357, "grad_norm": 0.0, "learning_rate": 8.345916341777716e-06, "loss": 0.8602, "step": 2633 }, { "epoch": 0.5665734566573457, "grad_norm": 0.0, "learning_rate": 8.339045336126149e-06, "loss": 0.8616, "step": 2634 }, { "epoch": 0.5667885566788556, "grad_norm": 0.0, "learning_rate": 8.332175136775278e-06, "loss": 0.9217, "step": 2635 }, { "epoch": 0.5670036567003657, "grad_norm": 0.0, "learning_rate": 8.325305747060208e-06, "loss": 0.9061, "step": 2636 }, { "epoch": 0.5672187567218757, "grad_norm": 0.0, "learning_rate": 8.318437170315636e-06, "loss": 0.8975, "step": 2637 }, { "epoch": 0.5674338567433856, "grad_norm": 0.0, "learning_rate": 8.311569409875876e-06, "loss": 0.8693, "step": 2638 }, { "epoch": 0.5676489567648957, "grad_norm": 0.0, "learning_rate": 8.30470246907484e-06, "loss": 0.8535, "step": 2639 }, { "epoch": 0.5678640567864057, "grad_norm": 0.0, "learning_rate": 8.297836351246044e-06, "loss": 0.8347, "step": 2640 }, { "epoch": 0.5680791568079157, "grad_norm": 0.0, "learning_rate": 8.29097105972261e-06, "loss": 0.8176, "step": 2641 }, { "epoch": 0.5682942568294257, "grad_norm": 0.0, "learning_rate": 8.284106597837244e-06, "loss": 0.8421, "step": 2642 }, { "epoch": 0.5685093568509357, "grad_norm": 0.0, "learning_rate": 8.277242968922267e-06, "loss": 0.8442, "step": 2643 }, { "epoch": 0.5687244568724457, "grad_norm": 0.0, "learning_rate": 8.270380176309584e-06, "loss": 0.7927, "step": 2644 }, { "epoch": 0.5689395568939557, "grad_norm": 0.0, "learning_rate": 8.263518223330698e-06, "loss": 0.8576, "step": 2645 }, { "epoch": 0.5691546569154657, "grad_norm": 0.0, "learning_rate": 8.256657113316705e-06, "loss": 0.8623, "step": 2646 }, { "epoch": 0.5693697569369757, "grad_norm": 0.0, "learning_rate": 8.249796849598286e-06, "loss": 0.8651, "step": 2647 }, { "epoch": 0.5695848569584857, "grad_norm": 0.0, "learning_rate": 8.242937435505718e-06, "loss": 0.8166, "step": 2648 }, { "epoch": 0.5697999569799957, "grad_norm": 0.0, "learning_rate": 8.236078874368871e-06, "loss": 0.8651, "step": 2649 }, { "epoch": 0.5700150570015057, "grad_norm": 0.0, "learning_rate": 8.22922116951718e-06, "loss": 0.8719, "step": 2650 }, { "epoch": 0.5702301570230157, "grad_norm": 0.0, "learning_rate": 8.222364324279689e-06, "loss": 0.8648, "step": 2651 }, { "epoch": 0.5704452570445258, "grad_norm": 0.0, "learning_rate": 8.215508341985007e-06, "loss": 0.8439, "step": 2652 }, { "epoch": 0.5706603570660357, "grad_norm": 0.0, "learning_rate": 8.208653225961329e-06, "loss": 0.8927, "step": 2653 }, { "epoch": 0.5708754570875457, "grad_norm": 0.0, "learning_rate": 8.201798979536438e-06, "loss": 0.9096, "step": 2654 }, { "epoch": 0.5710905571090558, "grad_norm": 0.0, "learning_rate": 8.194945606037679e-06, "loss": 0.9074, "step": 2655 }, { "epoch": 0.5713056571305657, "grad_norm": 0.0, "learning_rate": 8.188093108791988e-06, "loss": 0.8662, "step": 2656 }, { "epoch": 0.5715207571520757, "grad_norm": 0.0, "learning_rate": 8.18124149112587e-06, "loss": 0.9003, "step": 2657 }, { "epoch": 0.5717358571735858, "grad_norm": 0.0, "learning_rate": 8.1743907563654e-06, "loss": 0.903, "step": 2658 }, { "epoch": 0.5719509571950957, "grad_norm": 0.0, "learning_rate": 8.167540907836229e-06, "loss": 0.8488, "step": 2659 }, { "epoch": 0.5721660572166057, "grad_norm": 0.0, "learning_rate": 8.160691948863572e-06, "loss": 0.7802, "step": 2660 }, { "epoch": 0.5723811572381158, "grad_norm": 0.0, "learning_rate": 8.153843882772217e-06, "loss": 0.865, "step": 2661 }, { "epoch": 0.5725962572596257, "grad_norm": 0.0, "learning_rate": 8.146996712886524e-06, "loss": 0.8329, "step": 2662 }, { "epoch": 0.5728113572811357, "grad_norm": 0.0, "learning_rate": 8.1401504425304e-06, "loss": 0.8109, "step": 2663 }, { "epoch": 0.5730264573026457, "grad_norm": 0.0, "learning_rate": 8.133305075027338e-06, "loss": 0.8447, "step": 2664 }, { "epoch": 0.5732415573241557, "grad_norm": 0.0, "learning_rate": 8.126460613700373e-06, "loss": 0.8525, "step": 2665 }, { "epoch": 0.5734566573456658, "grad_norm": 0.0, "learning_rate": 8.119617061872115e-06, "loss": 0.8316, "step": 2666 }, { "epoch": 0.5736717573671757, "grad_norm": 0.0, "learning_rate": 8.112774422864724e-06, "loss": 0.8237, "step": 2667 }, { "epoch": 0.5738868573886857, "grad_norm": 0.0, "learning_rate": 8.105932699999916e-06, "loss": 0.8759, "step": 2668 }, { "epoch": 0.5741019574101958, "grad_norm": 0.0, "learning_rate": 8.099091896598964e-06, "loss": 0.9352, "step": 2669 }, { "epoch": 0.5743170574317057, "grad_norm": 0.0, "learning_rate": 8.092252015982706e-06, "loss": 0.8651, "step": 2670 }, { "epoch": 0.5745321574532157, "grad_norm": 0.0, "learning_rate": 8.085413061471511e-06, "loss": 0.8765, "step": 2671 }, { "epoch": 0.5747472574747258, "grad_norm": 0.0, "learning_rate": 8.07857503638532e-06, "loss": 0.9265, "step": 2672 }, { "epoch": 0.5749623574962357, "grad_norm": 0.0, "learning_rate": 8.0717379440436e-06, "loss": 0.8242, "step": 2673 }, { "epoch": 0.5751774575177457, "grad_norm": 0.0, "learning_rate": 8.064901787765382e-06, "loss": 0.8316, "step": 2674 }, { "epoch": 0.5753925575392558, "grad_norm": 0.0, "learning_rate": 8.058066570869244e-06, "loss": 0.8467, "step": 2675 }, { "epoch": 0.5756076575607657, "grad_norm": 0.0, "learning_rate": 8.051232296673292e-06, "loss": 0.7885, "step": 2676 }, { "epoch": 0.5758227575822757, "grad_norm": 0.0, "learning_rate": 8.044398968495195e-06, "loss": 0.9196, "step": 2677 }, { "epoch": 0.5760378576037858, "grad_norm": 0.0, "learning_rate": 8.037566589652141e-06, "loss": 0.8533, "step": 2678 }, { "epoch": 0.5762529576252957, "grad_norm": 0.0, "learning_rate": 8.030735163460877e-06, "loss": 0.8657, "step": 2679 }, { "epoch": 0.5764680576468058, "grad_norm": 0.0, "learning_rate": 8.023904693237675e-06, "loss": 0.8595, "step": 2680 }, { "epoch": 0.5766831576683158, "grad_norm": 0.0, "learning_rate": 8.017075182298348e-06, "loss": 0.8275, "step": 2681 }, { "epoch": 0.5768982576898257, "grad_norm": 0.0, "learning_rate": 8.01024663395824e-06, "loss": 0.8751, "step": 2682 }, { "epoch": 0.5771133577113358, "grad_norm": 0.0, "learning_rate": 8.003419051532232e-06, "loss": 0.8648, "step": 2683 }, { "epoch": 0.5773284577328458, "grad_norm": 0.0, "learning_rate": 7.996592438334728e-06, "loss": 0.8202, "step": 2684 }, { "epoch": 0.5775435577543557, "grad_norm": 0.0, "learning_rate": 7.989766797679678e-06, "loss": 0.9222, "step": 2685 }, { "epoch": 0.5777586577758658, "grad_norm": 0.0, "learning_rate": 7.98294213288054e-06, "loss": 0.9228, "step": 2686 }, { "epoch": 0.5779737577973758, "grad_norm": 0.0, "learning_rate": 7.97611844725031e-06, "loss": 0.9316, "step": 2687 }, { "epoch": 0.5781888578188857, "grad_norm": 0.0, "learning_rate": 7.969295744101512e-06, "loss": 0.9193, "step": 2688 }, { "epoch": 0.5784039578403958, "grad_norm": 0.0, "learning_rate": 7.962474026746176e-06, "loss": 0.8412, "step": 2689 }, { "epoch": 0.5786190578619058, "grad_norm": 0.0, "learning_rate": 7.955653298495876e-06, "loss": 0.8426, "step": 2690 }, { "epoch": 0.5788341578834157, "grad_norm": 0.0, "learning_rate": 7.948833562661686e-06, "loss": 0.843, "step": 2691 }, { "epoch": 0.5790492579049258, "grad_norm": 0.0, "learning_rate": 7.94201482255421e-06, "loss": 0.8602, "step": 2692 }, { "epoch": 0.5792643579264358, "grad_norm": 0.0, "learning_rate": 7.935197081483568e-06, "loss": 0.8297, "step": 2693 }, { "epoch": 0.5794794579479458, "grad_norm": 0.0, "learning_rate": 7.928380342759389e-06, "loss": 0.8953, "step": 2694 }, { "epoch": 0.5796945579694558, "grad_norm": 0.0, "learning_rate": 7.92156460969082e-06, "loss": 0.8546, "step": 2695 }, { "epoch": 0.5799096579909658, "grad_norm": 0.0, "learning_rate": 7.914749885586517e-06, "loss": 0.8262, "step": 2696 }, { "epoch": 0.5801247580124758, "grad_norm": 0.0, "learning_rate": 7.90793617375465e-06, "loss": 0.9249, "step": 2697 }, { "epoch": 0.5803398580339858, "grad_norm": 0.0, "learning_rate": 7.901123477502895e-06, "loss": 0.8565, "step": 2698 }, { "epoch": 0.5805549580554958, "grad_norm": 0.0, "learning_rate": 7.894311800138432e-06, "loss": 0.7988, "step": 2699 }, { "epoch": 0.5807700580770058, "grad_norm": 0.0, "learning_rate": 7.887501144967954e-06, "loss": 0.8485, "step": 2700 }, { "epoch": 0.5809851580985158, "grad_norm": 0.0, "learning_rate": 7.880691515297656e-06, "loss": 0.8233, "step": 2701 }, { "epoch": 0.5812002581200258, "grad_norm": 0.0, "learning_rate": 7.87388291443323e-06, "loss": 0.8323, "step": 2702 }, { "epoch": 0.5814153581415358, "grad_norm": 0.0, "learning_rate": 7.867075345679868e-06, "loss": 0.8995, "step": 2703 }, { "epoch": 0.5816304581630458, "grad_norm": 0.0, "learning_rate": 7.860268812342265e-06, "loss": 0.8623, "step": 2704 }, { "epoch": 0.5818455581845559, "grad_norm": 0.0, "learning_rate": 7.853463317724614e-06, "loss": 0.8695, "step": 2705 }, { "epoch": 0.5820606582060658, "grad_norm": 0.0, "learning_rate": 7.846658865130608e-06, "loss": 0.8502, "step": 2706 }, { "epoch": 0.5822757582275758, "grad_norm": 0.0, "learning_rate": 7.839855457863419e-06, "loss": 0.8688, "step": 2707 }, { "epoch": 0.5824908582490859, "grad_norm": 0.0, "learning_rate": 7.83305309922573e-06, "loss": 0.8386, "step": 2708 }, { "epoch": 0.5827059582705958, "grad_norm": 0.0, "learning_rate": 7.826251792519697e-06, "loss": 0.8422, "step": 2709 }, { "epoch": 0.5829210582921058, "grad_norm": 0.0, "learning_rate": 7.819451541046982e-06, "loss": 0.9093, "step": 2710 }, { "epoch": 0.5831361583136159, "grad_norm": 0.0, "learning_rate": 7.812652348108726e-06, "loss": 0.8475, "step": 2711 }, { "epoch": 0.5833512583351258, "grad_norm": 0.0, "learning_rate": 7.805854217005553e-06, "loss": 0.8771, "step": 2712 }, { "epoch": 0.5835663583566358, "grad_norm": 0.0, "learning_rate": 7.799057151037577e-06, "loss": 0.8727, "step": 2713 }, { "epoch": 0.5837814583781459, "grad_norm": 0.0, "learning_rate": 7.792261153504403e-06, "loss": 0.8516, "step": 2714 }, { "epoch": 0.5839965583996558, "grad_norm": 0.0, "learning_rate": 7.785466227705093e-06, "loss": 0.8151, "step": 2715 }, { "epoch": 0.5842116584211658, "grad_norm": 0.0, "learning_rate": 7.778672376938217e-06, "loss": 0.8482, "step": 2716 }, { "epoch": 0.5844267584426759, "grad_norm": 0.0, "learning_rate": 7.771879604501802e-06, "loss": 0.8717, "step": 2717 }, { "epoch": 0.5846418584641858, "grad_norm": 0.0, "learning_rate": 7.76508791369336e-06, "loss": 0.8036, "step": 2718 }, { "epoch": 0.5848569584856959, "grad_norm": 0.0, "learning_rate": 7.758297307809882e-06, "loss": 0.7963, "step": 2719 }, { "epoch": 0.5850720585072059, "grad_norm": 0.0, "learning_rate": 7.751507790147825e-06, "loss": 0.8934, "step": 2720 }, { "epoch": 0.5852871585287158, "grad_norm": 0.0, "learning_rate": 7.744719364003123e-06, "loss": 0.8517, "step": 2721 }, { "epoch": 0.5855022585502259, "grad_norm": 0.0, "learning_rate": 7.73793203267117e-06, "loss": 0.854, "step": 2722 }, { "epoch": 0.5857173585717359, "grad_norm": 0.0, "learning_rate": 7.731145799446846e-06, "loss": 0.883, "step": 2723 }, { "epoch": 0.5859324585932458, "grad_norm": 0.0, "learning_rate": 7.724360667624482e-06, "loss": 0.7992, "step": 2724 }, { "epoch": 0.5861475586147559, "grad_norm": 0.0, "learning_rate": 7.717576640497883e-06, "loss": 0.8688, "step": 2725 }, { "epoch": 0.5863626586362659, "grad_norm": 0.0, "learning_rate": 7.710793721360312e-06, "loss": 0.8981, "step": 2726 }, { "epoch": 0.5865777586577758, "grad_norm": 0.0, "learning_rate": 7.704011913504503e-06, "loss": 0.9244, "step": 2727 }, { "epoch": 0.5867928586792859, "grad_norm": 0.0, "learning_rate": 7.697231220222638e-06, "loss": 0.818, "step": 2728 }, { "epoch": 0.5870079587007959, "grad_norm": 0.0, "learning_rate": 7.690451644806372e-06, "loss": 0.8948, "step": 2729 }, { "epoch": 0.5872230587223058, "grad_norm": 0.0, "learning_rate": 7.683673190546804e-06, "loss": 0.7731, "step": 2730 }, { "epoch": 0.5874381587438159, "grad_norm": 0.0, "learning_rate": 7.676895860734497e-06, "loss": 0.8302, "step": 2731 }, { "epoch": 0.5876532587653259, "grad_norm": 0.0, "learning_rate": 7.670119658659469e-06, "loss": 0.8866, "step": 2732 }, { "epoch": 0.5878683587868359, "grad_norm": 0.0, "learning_rate": 7.663344587611181e-06, "loss": 0.7838, "step": 2733 }, { "epoch": 0.5880834588083459, "grad_norm": 0.0, "learning_rate": 7.65657065087856e-06, "loss": 0.9152, "step": 2734 }, { "epoch": 0.5882985588298559, "grad_norm": 0.0, "learning_rate": 7.649797851749965e-06, "loss": 0.8715, "step": 2735 }, { "epoch": 0.5885136588513659, "grad_norm": 0.0, "learning_rate": 7.643026193513214e-06, "loss": 0.8743, "step": 2736 }, { "epoch": 0.5887287588728759, "grad_norm": 0.0, "learning_rate": 7.636255679455575e-06, "loss": 0.8119, "step": 2737 }, { "epoch": 0.5889438588943859, "grad_norm": 0.0, "learning_rate": 7.629486312863749e-06, "loss": 0.7652, "step": 2738 }, { "epoch": 0.5891589589158959, "grad_norm": 0.0, "learning_rate": 7.622718097023884e-06, "loss": 0.8223, "step": 2739 }, { "epoch": 0.5893740589374059, "grad_norm": 0.0, "learning_rate": 7.615951035221576e-06, "loss": 0.8735, "step": 2740 }, { "epoch": 0.589589158958916, "grad_norm": 0.0, "learning_rate": 7.6091851307418475e-06, "loss": 0.8891, "step": 2741 }, { "epoch": 0.5898042589804259, "grad_norm": 0.0, "learning_rate": 7.602420386869177e-06, "loss": 0.8816, "step": 2742 }, { "epoch": 0.5900193590019359, "grad_norm": 0.0, "learning_rate": 7.59565680688746e-06, "loss": 0.78, "step": 2743 }, { "epoch": 0.590234459023446, "grad_norm": 0.0, "learning_rate": 7.588894394080044e-06, "loss": 0.8248, "step": 2744 }, { "epoch": 0.5904495590449559, "grad_norm": 0.0, "learning_rate": 7.5821331517297005e-06, "loss": 0.8889, "step": 2745 }, { "epoch": 0.5906646590664659, "grad_norm": 0.0, "learning_rate": 7.575373083118633e-06, "loss": 0.9106, "step": 2746 }, { "epoch": 0.590879759087976, "grad_norm": 0.0, "learning_rate": 7.568614191528482e-06, "loss": 0.8522, "step": 2747 }, { "epoch": 0.5910948591094859, "grad_norm": 0.0, "learning_rate": 7.5618564802403025e-06, "loss": 0.8594, "step": 2748 }, { "epoch": 0.5913099591309959, "grad_norm": 0.0, "learning_rate": 7.555099952534593e-06, "loss": 0.8711, "step": 2749 }, { "epoch": 0.5915250591525059, "grad_norm": 0.0, "learning_rate": 7.548344611691272e-06, "loss": 0.8924, "step": 2750 }, { "epoch": 0.5917401591740159, "grad_norm": 0.0, "learning_rate": 7.541590460989674e-06, "loss": 0.8712, "step": 2751 }, { "epoch": 0.5919552591955259, "grad_norm": 0.0, "learning_rate": 7.534837503708567e-06, "loss": 0.8506, "step": 2752 }, { "epoch": 0.5921703592170359, "grad_norm": 0.0, "learning_rate": 7.528085743126132e-06, "loss": 0.8605, "step": 2753 }, { "epoch": 0.5923854592385459, "grad_norm": 0.0, "learning_rate": 7.521335182519968e-06, "loss": 0.9168, "step": 2754 }, { "epoch": 0.592600559260056, "grad_norm": 0.0, "learning_rate": 7.514585825167106e-06, "loss": 0.8068, "step": 2755 }, { "epoch": 0.5928156592815659, "grad_norm": 0.0, "learning_rate": 7.507837674343969e-06, "loss": 0.8453, "step": 2756 }, { "epoch": 0.5930307593030759, "grad_norm": 0.0, "learning_rate": 7.501090733326413e-06, "loss": 0.8881, "step": 2757 }, { "epoch": 0.593245859324586, "grad_norm": 0.0, "learning_rate": 7.494345005389705e-06, "loss": 0.8327, "step": 2758 }, { "epoch": 0.5934609593460959, "grad_norm": 0.0, "learning_rate": 7.487600493808513e-06, "loss": 0.9437, "step": 2759 }, { "epoch": 0.5936760593676059, "grad_norm": 0.0, "learning_rate": 7.480857201856923e-06, "loss": 0.875, "step": 2760 }, { "epoch": 0.593891159389116, "grad_norm": 0.0, "learning_rate": 7.474115132808425e-06, "loss": 0.8221, "step": 2761 }, { "epoch": 0.5941062594106259, "grad_norm": 0.0, "learning_rate": 7.467374289935917e-06, "loss": 0.8822, "step": 2762 }, { "epoch": 0.5943213594321359, "grad_norm": 0.0, "learning_rate": 7.460634676511706e-06, "loss": 0.7984, "step": 2763 }, { "epoch": 0.594536459453646, "grad_norm": 0.0, "learning_rate": 7.4538962958074915e-06, "loss": 0.799, "step": 2764 }, { "epoch": 0.5947515594751559, "grad_norm": 0.0, "learning_rate": 7.447159151094388e-06, "loss": 0.894, "step": 2765 }, { "epoch": 0.5949666594966659, "grad_norm": 0.0, "learning_rate": 7.440423245642892e-06, "loss": 0.8448, "step": 2766 }, { "epoch": 0.595181759518176, "grad_norm": 0.0, "learning_rate": 7.43368858272292e-06, "loss": 0.899, "step": 2767 }, { "epoch": 0.5953968595396859, "grad_norm": 0.0, "learning_rate": 7.426955165603773e-06, "loss": 0.8583, "step": 2768 }, { "epoch": 0.595611959561196, "grad_norm": 0.0, "learning_rate": 7.420222997554142e-06, "loss": 0.8848, "step": 2769 }, { "epoch": 0.595827059582706, "grad_norm": 0.0, "learning_rate": 7.413492081842122e-06, "loss": 0.8996, "step": 2770 }, { "epoch": 0.5960421596042159, "grad_norm": 0.0, "learning_rate": 7.406762421735203e-06, "loss": 0.9123, "step": 2771 }, { "epoch": 0.596257259625726, "grad_norm": 0.0, "learning_rate": 7.400034020500249e-06, "loss": 0.79, "step": 2772 }, { "epoch": 0.596472359647236, "grad_norm": 0.0, "learning_rate": 7.393306881403531e-06, "loss": 0.8646, "step": 2773 }, { "epoch": 0.5966874596687459, "grad_norm": 0.0, "learning_rate": 7.386581007710694e-06, "loss": 0.883, "step": 2774 }, { "epoch": 0.596902559690256, "grad_norm": 0.0, "learning_rate": 7.379856402686773e-06, "loss": 0.8428, "step": 2775 }, { "epoch": 0.597117659711766, "grad_norm": 0.0, "learning_rate": 7.3731330695961945e-06, "loss": 0.8113, "step": 2776 }, { "epoch": 0.5973327597332759, "grad_norm": 0.0, "learning_rate": 7.366411011702754e-06, "loss": 0.8849, "step": 2777 }, { "epoch": 0.597547859754786, "grad_norm": 0.0, "learning_rate": 7.35969023226964e-06, "loss": 0.807, "step": 2778 }, { "epoch": 0.597762959776296, "grad_norm": 0.0, "learning_rate": 7.352970734559412e-06, "loss": 0.905, "step": 2779 }, { "epoch": 0.5979780597978059, "grad_norm": 0.0, "learning_rate": 7.346252521834012e-06, "loss": 0.8298, "step": 2780 }, { "epoch": 0.598193159819316, "grad_norm": 0.0, "learning_rate": 7.339535597354762e-06, "loss": 0.7963, "step": 2781 }, { "epoch": 0.598408259840826, "grad_norm": 0.0, "learning_rate": 7.332819964382348e-06, "loss": 0.9024, "step": 2782 }, { "epoch": 0.598623359862336, "grad_norm": 0.0, "learning_rate": 7.326105626176835e-06, "loss": 0.8406, "step": 2783 }, { "epoch": 0.598838459883846, "grad_norm": 0.0, "learning_rate": 7.319392585997655e-06, "loss": 0.8384, "step": 2784 }, { "epoch": 0.599053559905356, "grad_norm": 0.0, "learning_rate": 7.312680847103622e-06, "loss": 0.8416, "step": 2785 }, { "epoch": 0.599268659926866, "grad_norm": 0.0, "learning_rate": 7.30597041275291e-06, "loss": 0.8361, "step": 2786 }, { "epoch": 0.599483759948376, "grad_norm": 0.0, "learning_rate": 7.299261286203052e-06, "loss": 0.8793, "step": 2787 }, { "epoch": 0.599698859969886, "grad_norm": 0.0, "learning_rate": 7.292553470710962e-06, "loss": 0.811, "step": 2788 }, { "epoch": 0.599913959991396, "grad_norm": 0.0, "learning_rate": 7.285846969532907e-06, "loss": 0.9478, "step": 2789 }, { "epoch": 0.600129060012906, "grad_norm": 0.0, "learning_rate": 7.279141785924515e-06, "loss": 0.844, "step": 2790 }, { "epoch": 0.600344160034416, "grad_norm": 0.0, "learning_rate": 7.2724379231407875e-06, "loss": 0.8343, "step": 2791 }, { "epoch": 0.600559260055926, "grad_norm": 0.0, "learning_rate": 7.265735384436064e-06, "loss": 0.9276, "step": 2792 }, { "epoch": 0.600774360077436, "grad_norm": 0.0, "learning_rate": 7.2590341730640565e-06, "loss": 0.8287, "step": 2793 }, { "epoch": 0.600989460098946, "grad_norm": 0.0, "learning_rate": 7.2523342922778364e-06, "loss": 0.8585, "step": 2794 }, { "epoch": 0.601204560120456, "grad_norm": 0.0, "learning_rate": 7.24563574532981e-06, "loss": 0.8326, "step": 2795 }, { "epoch": 0.601419660141966, "grad_norm": 0.0, "learning_rate": 7.2389385354717575e-06, "loss": 0.8467, "step": 2796 }, { "epoch": 0.6016347601634761, "grad_norm": 0.0, "learning_rate": 7.2322426659547925e-06, "loss": 0.8483, "step": 2797 }, { "epoch": 0.601849860184986, "grad_norm": 0.0, "learning_rate": 7.225548140029388e-06, "loss": 0.9004, "step": 2798 }, { "epoch": 0.602064960206496, "grad_norm": 0.0, "learning_rate": 7.218854960945366e-06, "loss": 0.8664, "step": 2799 }, { "epoch": 0.6022800602280061, "grad_norm": 0.0, "learning_rate": 7.212163131951885e-06, "loss": 0.8856, "step": 2800 }, { "epoch": 0.602495160249516, "grad_norm": 0.0, "learning_rate": 7.205472656297457e-06, "loss": 0.7894, "step": 2801 }, { "epoch": 0.602710260271026, "grad_norm": 0.0, "learning_rate": 7.198783537229937e-06, "loss": 0.8437, "step": 2802 }, { "epoch": 0.6029253602925361, "grad_norm": 0.0, "learning_rate": 7.192095777996515e-06, "loss": 0.8406, "step": 2803 }, { "epoch": 0.603140460314046, "grad_norm": 0.0, "learning_rate": 7.185409381843727e-06, "loss": 0.8543, "step": 2804 }, { "epoch": 0.603355560335556, "grad_norm": 0.0, "learning_rate": 7.1787243520174385e-06, "loss": 0.9414, "step": 2805 }, { "epoch": 0.6035706603570661, "grad_norm": 0.0, "learning_rate": 7.172040691762864e-06, "loss": 0.8442, "step": 2806 }, { "epoch": 0.603785760378576, "grad_norm": 0.0, "learning_rate": 7.1653584043245495e-06, "loss": 0.8459, "step": 2807 }, { "epoch": 0.604000860400086, "grad_norm": 0.0, "learning_rate": 7.158677492946364e-06, "loss": 0.7783, "step": 2808 }, { "epoch": 0.6042159604215961, "grad_norm": 0.0, "learning_rate": 7.151997960871526e-06, "loss": 0.8384, "step": 2809 }, { "epoch": 0.604431060443106, "grad_norm": 0.0, "learning_rate": 7.1453198113425655e-06, "loss": 0.8171, "step": 2810 }, { "epoch": 0.6046461604646161, "grad_norm": 0.0, "learning_rate": 7.138643047601359e-06, "loss": 0.8294, "step": 2811 }, { "epoch": 0.6048612604861261, "grad_norm": 0.0, "learning_rate": 7.131967672889101e-06, "loss": 0.842, "step": 2812 }, { "epoch": 0.605076360507636, "grad_norm": 0.0, "learning_rate": 7.125293690446307e-06, "loss": 0.8082, "step": 2813 }, { "epoch": 0.6052914605291461, "grad_norm": 0.0, "learning_rate": 7.118621103512826e-06, "loss": 0.847, "step": 2814 }, { "epoch": 0.6055065605506561, "grad_norm": 0.0, "learning_rate": 7.111949915327833e-06, "loss": 0.9022, "step": 2815 }, { "epoch": 0.605721660572166, "grad_norm": 0.0, "learning_rate": 7.105280129129806e-06, "loss": 0.7914, "step": 2816 }, { "epoch": 0.6059367605936761, "grad_norm": 0.0, "learning_rate": 7.098611748156563e-06, "loss": 0.8389, "step": 2817 }, { "epoch": 0.6061518606151861, "grad_norm": 0.0, "learning_rate": 7.091944775645224e-06, "loss": 0.8014, "step": 2818 }, { "epoch": 0.606366960636696, "grad_norm": 0.0, "learning_rate": 7.085279214832233e-06, "loss": 0.8874, "step": 2819 }, { "epoch": 0.6065820606582061, "grad_norm": 0.0, "learning_rate": 7.07861506895335e-06, "loss": 0.8365, "step": 2820 }, { "epoch": 0.6067971606797161, "grad_norm": 0.0, "learning_rate": 7.071952341243642e-06, "loss": 0.7908, "step": 2821 }, { "epoch": 0.607012260701226, "grad_norm": 0.0, "learning_rate": 7.065291034937496e-06, "loss": 0.8674, "step": 2822 }, { "epoch": 0.6072273607227361, "grad_norm": 0.0, "learning_rate": 7.058631153268598e-06, "loss": 0.9119, "step": 2823 }, { "epoch": 0.6074424607442461, "grad_norm": 0.0, "learning_rate": 7.051972699469953e-06, "loss": 0.8452, "step": 2824 }, { "epoch": 0.607657560765756, "grad_norm": 0.0, "learning_rate": 7.045315676773866e-06, "loss": 0.8107, "step": 2825 }, { "epoch": 0.6078726607872661, "grad_norm": 0.0, "learning_rate": 7.038660088411951e-06, "loss": 0.9055, "step": 2826 }, { "epoch": 0.6080877608087761, "grad_norm": 0.0, "learning_rate": 7.0320059376151225e-06, "loss": 0.7948, "step": 2827 }, { "epoch": 0.6083028608302861, "grad_norm": 0.0, "learning_rate": 7.025353227613604e-06, "loss": 0.8479, "step": 2828 }, { "epoch": 0.6085179608517961, "grad_norm": 0.0, "learning_rate": 7.018701961636907e-06, "loss": 0.7937, "step": 2829 }, { "epoch": 0.6087330608733061, "grad_norm": 0.0, "learning_rate": 7.012052142913857e-06, "loss": 0.833, "step": 2830 }, { "epoch": 0.6089481608948161, "grad_norm": 0.0, "learning_rate": 7.00540377467256e-06, "loss": 0.8496, "step": 2831 }, { "epoch": 0.6091632609163261, "grad_norm": 0.0, "learning_rate": 6.998756860140437e-06, "loss": 0.8469, "step": 2832 }, { "epoch": 0.609378360937836, "grad_norm": 0.0, "learning_rate": 6.9921114025441916e-06, "loss": 0.8344, "step": 2833 }, { "epoch": 0.6095934609593461, "grad_norm": 0.0, "learning_rate": 6.985467405109816e-06, "loss": 0.8494, "step": 2834 }, { "epoch": 0.6098085609808561, "grad_norm": 0.0, "learning_rate": 6.978824871062609e-06, "loss": 0.8512, "step": 2835 }, { "epoch": 0.610023661002366, "grad_norm": 0.0, "learning_rate": 6.97218380362714e-06, "loss": 0.8474, "step": 2836 }, { "epoch": 0.6102387610238761, "grad_norm": 0.0, "learning_rate": 6.965544206027283e-06, "loss": 0.845, "step": 2837 }, { "epoch": 0.6104538610453861, "grad_norm": 0.0, "learning_rate": 6.958906081486192e-06, "loss": 0.8425, "step": 2838 }, { "epoch": 0.610668961066896, "grad_norm": 0.0, "learning_rate": 6.952269433226303e-06, "loss": 0.8225, "step": 2839 }, { "epoch": 0.6108840610884061, "grad_norm": 0.0, "learning_rate": 6.945634264469338e-06, "loss": 0.809, "step": 2840 }, { "epoch": 0.6110991611099161, "grad_norm": 0.0, "learning_rate": 6.939000578436301e-06, "loss": 0.7901, "step": 2841 }, { "epoch": 0.6113142611314261, "grad_norm": 0.0, "learning_rate": 6.932368378347473e-06, "loss": 0.8921, "step": 2842 }, { "epoch": 0.6115293611529361, "grad_norm": 0.0, "learning_rate": 6.925737667422427e-06, "loss": 0.9269, "step": 2843 }, { "epoch": 0.6117444611744461, "grad_norm": 0.0, "learning_rate": 6.91910844887999e-06, "loss": 0.8344, "step": 2844 }, { "epoch": 0.6119595611959561, "grad_norm": 0.0, "learning_rate": 6.912480725938284e-06, "loss": 0.8528, "step": 2845 }, { "epoch": 0.6121746612174661, "grad_norm": 0.0, "learning_rate": 6.9058545018146986e-06, "loss": 0.8395, "step": 2846 }, { "epoch": 0.6123897612389761, "grad_norm": 0.0, "learning_rate": 6.899229779725893e-06, "loss": 0.7104, "step": 2847 }, { "epoch": 0.6126048612604861, "grad_norm": 0.0, "learning_rate": 6.892606562887802e-06, "loss": 0.8638, "step": 2848 }, { "epoch": 0.6128199612819961, "grad_norm": 0.0, "learning_rate": 6.885984854515624e-06, "loss": 0.9188, "step": 2849 }, { "epoch": 0.6130350613035062, "grad_norm": 0.0, "learning_rate": 6.879364657823828e-06, "loss": 0.8767, "step": 2850 }, { "epoch": 0.6132501613250161, "grad_norm": 0.0, "learning_rate": 6.872745976026158e-06, "loss": 0.8117, "step": 2851 }, { "epoch": 0.6134652613465261, "grad_norm": 0.0, "learning_rate": 6.866128812335603e-06, "loss": 0.892, "step": 2852 }, { "epoch": 0.6136803613680362, "grad_norm": 0.0, "learning_rate": 6.859513169964436e-06, "loss": 0.8277, "step": 2853 }, { "epoch": 0.6138954613895461, "grad_norm": 0.0, "learning_rate": 6.8528990521241755e-06, "loss": 0.834, "step": 2854 }, { "epoch": 0.6141105614110561, "grad_norm": 0.0, "learning_rate": 6.846286462025606e-06, "loss": 0.8298, "step": 2855 }, { "epoch": 0.6143256614325662, "grad_norm": 0.0, "learning_rate": 6.8396754028787775e-06, "loss": 0.8591, "step": 2856 }, { "epoch": 0.6145407614540761, "grad_norm": 0.0, "learning_rate": 6.833065877892979e-06, "loss": 0.8147, "step": 2857 }, { "epoch": 0.6147558614755861, "grad_norm": 0.0, "learning_rate": 6.826457890276773e-06, "loss": 0.8383, "step": 2858 }, { "epoch": 0.6149709614970962, "grad_norm": 0.0, "learning_rate": 6.819851443237971e-06, "loss": 0.8019, "step": 2859 }, { "epoch": 0.6151860615186061, "grad_norm": 0.0, "learning_rate": 6.813246539983629e-06, "loss": 0.7938, "step": 2860 }, { "epoch": 0.6154011615401161, "grad_norm": 0.0, "learning_rate": 6.8066431837200574e-06, "loss": 0.802, "step": 2861 }, { "epoch": 0.6156162615616262, "grad_norm": 0.0, "learning_rate": 6.80004137765282e-06, "loss": 0.882, "step": 2862 }, { "epoch": 0.6158313615831361, "grad_norm": 0.0, "learning_rate": 6.793441124986722e-06, "loss": 0.8473, "step": 2863 }, { "epoch": 0.6160464616046462, "grad_norm": 0.0, "learning_rate": 6.7868424289258216e-06, "loss": 0.8213, "step": 2864 }, { "epoch": 0.6162615616261562, "grad_norm": 0.0, "learning_rate": 6.780245292673411e-06, "loss": 0.8285, "step": 2865 }, { "epoch": 0.6164766616476661, "grad_norm": 0.0, "learning_rate": 6.7736497194320405e-06, "loss": 0.8407, "step": 2866 }, { "epoch": 0.6166917616691762, "grad_norm": 0.0, "learning_rate": 6.767055712403481e-06, "loss": 0.8613, "step": 2867 }, { "epoch": 0.6169068616906862, "grad_norm": 0.0, "learning_rate": 6.7604632747887625e-06, "loss": 0.9103, "step": 2868 }, { "epoch": 0.6171219617121961, "grad_norm": 0.0, "learning_rate": 6.753872409788145e-06, "loss": 0.8777, "step": 2869 }, { "epoch": 0.6173370617337062, "grad_norm": 0.0, "learning_rate": 6.74728312060112e-06, "loss": 0.8276, "step": 2870 }, { "epoch": 0.6175521617552162, "grad_norm": 0.0, "learning_rate": 6.740695410426421e-06, "loss": 0.8387, "step": 2871 }, { "epoch": 0.6177672617767261, "grad_norm": 0.0, "learning_rate": 6.73410928246202e-06, "loss": 0.8302, "step": 2872 }, { "epoch": 0.6179823617982362, "grad_norm": 0.0, "learning_rate": 6.727524739905104e-06, "loss": 0.7588, "step": 2873 }, { "epoch": 0.6181974618197462, "grad_norm": 0.0, "learning_rate": 6.7209417859521105e-06, "loss": 0.9008, "step": 2874 }, { "epoch": 0.6184125618412561, "grad_norm": 0.0, "learning_rate": 6.714360423798688e-06, "loss": 0.8848, "step": 2875 }, { "epoch": 0.6186276618627662, "grad_norm": 0.0, "learning_rate": 6.707780656639722e-06, "loss": 0.7437, "step": 2876 }, { "epoch": 0.6188427618842762, "grad_norm": 0.0, "learning_rate": 6.701202487669325e-06, "loss": 0.7813, "step": 2877 }, { "epoch": 0.6190578619057862, "grad_norm": 0.0, "learning_rate": 6.694625920080826e-06, "loss": 0.7958, "step": 2878 }, { "epoch": 0.6192729619272962, "grad_norm": 0.0, "learning_rate": 6.688050957066787e-06, "loss": 0.8511, "step": 2879 }, { "epoch": 0.6194880619488062, "grad_norm": 0.0, "learning_rate": 6.681477601818977e-06, "loss": 0.8998, "step": 2880 }, { "epoch": 0.6197031619703162, "grad_norm": 0.0, "learning_rate": 6.674905857528398e-06, "loss": 0.8493, "step": 2881 }, { "epoch": 0.6199182619918262, "grad_norm": 0.0, "learning_rate": 6.6683357273852686e-06, "loss": 0.8259, "step": 2882 }, { "epoch": 0.6201333620133362, "grad_norm": 0.0, "learning_rate": 6.661767214579015e-06, "loss": 0.8647, "step": 2883 }, { "epoch": 0.6203484620348462, "grad_norm": 0.0, "learning_rate": 6.655200322298284e-06, "loss": 0.7311, "step": 2884 }, { "epoch": 0.6205635620563562, "grad_norm": 0.0, "learning_rate": 6.648635053730931e-06, "loss": 0.8312, "step": 2885 }, { "epoch": 0.6207786620778663, "grad_norm": 0.0, "learning_rate": 6.642071412064031e-06, "loss": 0.8005, "step": 2886 }, { "epoch": 0.6209937620993762, "grad_norm": 0.0, "learning_rate": 6.635509400483868e-06, "loss": 0.8456, "step": 2887 }, { "epoch": 0.6212088621208862, "grad_norm": 0.0, "learning_rate": 6.628949022175928e-06, "loss": 0.8588, "step": 2888 }, { "epoch": 0.6214239621423963, "grad_norm": 0.0, "learning_rate": 6.622390280324908e-06, "loss": 0.8227, "step": 2889 }, { "epoch": 0.6216390621639062, "grad_norm": 0.0, "learning_rate": 6.615833178114715e-06, "loss": 0.8096, "step": 2890 }, { "epoch": 0.6218541621854162, "grad_norm": 0.0, "learning_rate": 6.60927771872845e-06, "loss": 0.857, "step": 2891 }, { "epoch": 0.6220692622069263, "grad_norm": 0.0, "learning_rate": 6.60272390534843e-06, "loss": 0.8402, "step": 2892 }, { "epoch": 0.6222843622284362, "grad_norm": 0.0, "learning_rate": 6.596171741156154e-06, "loss": 0.8407, "step": 2893 }, { "epoch": 0.6224994622499462, "grad_norm": 0.0, "learning_rate": 6.58962122933234e-06, "loss": 0.8005, "step": 2894 }, { "epoch": 0.6227145622714563, "grad_norm": 0.0, "learning_rate": 6.583072373056898e-06, "loss": 0.8362, "step": 2895 }, { "epoch": 0.6229296622929662, "grad_norm": 0.0, "learning_rate": 6.576525175508922e-06, "loss": 0.8709, "step": 2896 }, { "epoch": 0.6231447623144762, "grad_norm": 0.0, "learning_rate": 6.5699796398667215e-06, "loss": 0.9053, "step": 2897 }, { "epoch": 0.6233598623359863, "grad_norm": 0.0, "learning_rate": 6.56343576930778e-06, "loss": 0.8996, "step": 2898 }, { "epoch": 0.6235749623574962, "grad_norm": 0.0, "learning_rate": 6.556893567008782e-06, "loss": 0.8484, "step": 2899 }, { "epoch": 0.6237900623790062, "grad_norm": 0.0, "learning_rate": 6.5503530361456095e-06, "loss": 0.8743, "step": 2900 }, { "epoch": 0.6240051624005163, "grad_norm": 0.0, "learning_rate": 6.543814179893312e-06, "loss": 0.8366, "step": 2901 }, { "epoch": 0.6242202624220262, "grad_norm": 0.0, "learning_rate": 6.5372770014261465e-06, "loss": 0.898, "step": 2902 }, { "epoch": 0.6244353624435363, "grad_norm": 0.0, "learning_rate": 6.530741503917551e-06, "loss": 0.8121, "step": 2903 }, { "epoch": 0.6246504624650463, "grad_norm": 0.0, "learning_rate": 6.524207690540138e-06, "loss": 0.8526, "step": 2904 }, { "epoch": 0.6248655624865562, "grad_norm": 0.0, "learning_rate": 6.517675564465713e-06, "loss": 0.8216, "step": 2905 }, { "epoch": 0.6250806625080663, "grad_norm": 0.0, "learning_rate": 6.5111451288652565e-06, "loss": 0.8103, "step": 2906 }, { "epoch": 0.6252957625295763, "grad_norm": 0.0, "learning_rate": 6.504616386908926e-06, "loss": 0.898, "step": 2907 }, { "epoch": 0.6255108625510862, "grad_norm": 0.0, "learning_rate": 6.498089341766072e-06, "loss": 0.8459, "step": 2908 }, { "epoch": 0.6257259625725963, "grad_norm": 0.0, "learning_rate": 6.491563996605198e-06, "loss": 0.884, "step": 2909 }, { "epoch": 0.6259410625941063, "grad_norm": 0.0, "learning_rate": 6.485040354594004e-06, "loss": 0.8081, "step": 2910 }, { "epoch": 0.6261561626156162, "grad_norm": 0.0, "learning_rate": 6.478518418899347e-06, "loss": 0.8372, "step": 2911 }, { "epoch": 0.6263712626371263, "grad_norm": 0.0, "learning_rate": 6.471998192687266e-06, "loss": 0.8997, "step": 2912 }, { "epoch": 0.6265863626586363, "grad_norm": 0.0, "learning_rate": 6.465479679122968e-06, "loss": 0.8349, "step": 2913 }, { "epoch": 0.6268014626801462, "grad_norm": 0.0, "learning_rate": 6.4589628813708215e-06, "loss": 0.8369, "step": 2914 }, { "epoch": 0.6270165627016563, "grad_norm": 0.0, "learning_rate": 6.4524478025943704e-06, "loss": 0.7884, "step": 2915 }, { "epoch": 0.6272316627231663, "grad_norm": 0.0, "learning_rate": 6.445934445956328e-06, "loss": 0.8948, "step": 2916 }, { "epoch": 0.6274467627446763, "grad_norm": 0.0, "learning_rate": 6.439422814618553e-06, "loss": 0.898, "step": 2917 }, { "epoch": 0.6276618627661863, "grad_norm": 0.0, "learning_rate": 6.4329129117420904e-06, "loss": 0.8429, "step": 2918 }, { "epoch": 0.6278769627876962, "grad_norm": 0.0, "learning_rate": 6.426404740487127e-06, "loss": 0.8854, "step": 2919 }, { "epoch": 0.6280920628092063, "grad_norm": 0.0, "learning_rate": 6.4198983040130154e-06, "loss": 0.8242, "step": 2920 }, { "epoch": 0.6283071628307163, "grad_norm": 0.0, "learning_rate": 6.413393605478275e-06, "loss": 0.8895, "step": 2921 }, { "epoch": 0.6285222628522262, "grad_norm": 0.0, "learning_rate": 6.406890648040563e-06, "loss": 0.8238, "step": 2922 }, { "epoch": 0.6287373628737363, "grad_norm": 0.0, "learning_rate": 6.400389434856713e-06, "loss": 0.8739, "step": 2923 }, { "epoch": 0.6289524628952463, "grad_norm": 0.0, "learning_rate": 6.39388996908269e-06, "loss": 0.8357, "step": 2924 }, { "epoch": 0.6291675629167562, "grad_norm": 0.0, "learning_rate": 6.3873922538736296e-06, "loss": 0.8206, "step": 2925 }, { "epoch": 0.6293826629382663, "grad_norm": 0.0, "learning_rate": 6.380896292383807e-06, "loss": 0.845, "step": 2926 }, { "epoch": 0.6295977629597763, "grad_norm": 0.0, "learning_rate": 6.374402087766647e-06, "loss": 0.7972, "step": 2927 }, { "epoch": 0.6298128629812862, "grad_norm": 0.0, "learning_rate": 6.367909643174723e-06, "loss": 0.7858, "step": 2928 }, { "epoch": 0.6300279630027963, "grad_norm": 0.0, "learning_rate": 6.36141896175976e-06, "loss": 0.8009, "step": 2929 }, { "epoch": 0.6302430630243063, "grad_norm": 0.0, "learning_rate": 6.354930046672612e-06, "loss": 0.9034, "step": 2930 }, { "epoch": 0.6304581630458163, "grad_norm": 0.0, "learning_rate": 6.348442901063294e-06, "loss": 0.91, "step": 2931 }, { "epoch": 0.6306732630673263, "grad_norm": 0.0, "learning_rate": 6.341957528080944e-06, "loss": 0.8292, "step": 2932 }, { "epoch": 0.6308883630888363, "grad_norm": 0.0, "learning_rate": 6.335473930873854e-06, "loss": 0.8421, "step": 2933 }, { "epoch": 0.6311034631103463, "grad_norm": 0.0, "learning_rate": 6.32899211258945e-06, "loss": 0.8132, "step": 2934 }, { "epoch": 0.6313185631318563, "grad_norm": 0.0, "learning_rate": 6.3225120763742854e-06, "loss": 0.8489, "step": 2935 }, { "epoch": 0.6315336631533663, "grad_norm": 0.0, "learning_rate": 6.316033825374064e-06, "loss": 0.8585, "step": 2936 }, { "epoch": 0.6317487631748763, "grad_norm": 0.0, "learning_rate": 6.309557362733605e-06, "loss": 0.8627, "step": 2937 }, { "epoch": 0.6319638631963863, "grad_norm": 0.0, "learning_rate": 6.303082691596876e-06, "loss": 0.8421, "step": 2938 }, { "epoch": 0.6321789632178964, "grad_norm": 0.0, "learning_rate": 6.296609815106971e-06, "loss": 0.8541, "step": 2939 }, { "epoch": 0.6323940632394063, "grad_norm": 0.0, "learning_rate": 6.290138736406103e-06, "loss": 0.8735, "step": 2940 }, { "epoch": 0.6326091632609163, "grad_norm": 0.0, "learning_rate": 6.283669458635622e-06, "loss": 0.8449, "step": 2941 }, { "epoch": 0.6328242632824264, "grad_norm": 0.0, "learning_rate": 6.277201984936001e-06, "loss": 0.897, "step": 2942 }, { "epoch": 0.6330393633039363, "grad_norm": 0.0, "learning_rate": 6.270736318446835e-06, "loss": 0.8033, "step": 2943 }, { "epoch": 0.6332544633254463, "grad_norm": 0.0, "learning_rate": 6.264272462306851e-06, "loss": 0.8205, "step": 2944 }, { "epoch": 0.6334695633469564, "grad_norm": 0.0, "learning_rate": 6.25781041965388e-06, "loss": 0.7984, "step": 2945 }, { "epoch": 0.6336846633684663, "grad_norm": 0.0, "learning_rate": 6.25135019362489e-06, "loss": 0.8499, "step": 2946 }, { "epoch": 0.6338997633899763, "grad_norm": 0.0, "learning_rate": 6.244891787355958e-06, "loss": 0.8285, "step": 2947 }, { "epoch": 0.6341148634114864, "grad_norm": 0.0, "learning_rate": 6.238435203982278e-06, "loss": 0.8323, "step": 2948 }, { "epoch": 0.6343299634329963, "grad_norm": 0.0, "learning_rate": 6.231980446638165e-06, "loss": 0.8112, "step": 2949 }, { "epoch": 0.6345450634545063, "grad_norm": 0.0, "learning_rate": 6.225527518457035e-06, "loss": 0.8998, "step": 2950 }, { "epoch": 0.6347601634760164, "grad_norm": 0.0, "learning_rate": 6.2190764225714284e-06, "loss": 0.8231, "step": 2951 }, { "epoch": 0.6349752634975263, "grad_norm": 0.0, "learning_rate": 6.212627162112996e-06, "loss": 0.8248, "step": 2952 }, { "epoch": 0.6351903635190363, "grad_norm": 0.0, "learning_rate": 6.206179740212486e-06, "loss": 0.8245, "step": 2953 }, { "epoch": 0.6354054635405464, "grad_norm": 0.0, "learning_rate": 6.199734159999768e-06, "loss": 0.8598, "step": 2954 }, { "epoch": 0.6356205635620563, "grad_norm": 0.0, "learning_rate": 6.193290424603809e-06, "loss": 0.9056, "step": 2955 }, { "epoch": 0.6358356635835664, "grad_norm": 0.0, "learning_rate": 6.186848537152677e-06, "loss": 0.7958, "step": 2956 }, { "epoch": 0.6360507636050764, "grad_norm": 0.0, "learning_rate": 6.180408500773558e-06, "loss": 0.8703, "step": 2957 }, { "epoch": 0.6362658636265863, "grad_norm": 0.0, "learning_rate": 6.173970318592722e-06, "loss": 0.8696, "step": 2958 }, { "epoch": 0.6364809636480964, "grad_norm": 0.0, "learning_rate": 6.1675339937355505e-06, "loss": 0.8716, "step": 2959 }, { "epoch": 0.6366960636696064, "grad_norm": 0.0, "learning_rate": 6.1610995293265215e-06, "loss": 0.8182, "step": 2960 }, { "epoch": 0.6369111636911163, "grad_norm": 0.0, "learning_rate": 6.154666928489201e-06, "loss": 0.8568, "step": 2961 }, { "epoch": 0.6371262637126264, "grad_norm": 0.0, "learning_rate": 6.148236194346268e-06, "loss": 0.8905, "step": 2962 }, { "epoch": 0.6373413637341364, "grad_norm": 0.0, "learning_rate": 6.141807330019476e-06, "loss": 0.8988, "step": 2963 }, { "epoch": 0.6375564637556463, "grad_norm": 0.0, "learning_rate": 6.135380338629679e-06, "loss": 0.8335, "step": 2964 }, { "epoch": 0.6377715637771564, "grad_norm": 0.0, "learning_rate": 6.128955223296831e-06, "loss": 0.8511, "step": 2965 }, { "epoch": 0.6379866637986664, "grad_norm": 0.0, "learning_rate": 6.122531987139955e-06, "loss": 0.8815, "step": 2966 }, { "epoch": 0.6382017638201763, "grad_norm": 0.0, "learning_rate": 6.1161106332771844e-06, "loss": 0.8605, "step": 2967 }, { "epoch": 0.6384168638416864, "grad_norm": 0.0, "learning_rate": 6.109691164825718e-06, "loss": 0.8377, "step": 2968 }, { "epoch": 0.6386319638631964, "grad_norm": 0.0, "learning_rate": 6.103273584901857e-06, "loss": 0.8987, "step": 2969 }, { "epoch": 0.6388470638847064, "grad_norm": 0.0, "learning_rate": 6.096857896620975e-06, "loss": 0.8334, "step": 2970 }, { "epoch": 0.6390621639062164, "grad_norm": 0.0, "learning_rate": 6.090444103097526e-06, "loss": 0.7861, "step": 2971 }, { "epoch": 0.6392772639277264, "grad_norm": 0.0, "learning_rate": 6.084032207445052e-06, "loss": 0.8797, "step": 2972 }, { "epoch": 0.6394923639492364, "grad_norm": 0.0, "learning_rate": 6.077622212776175e-06, "loss": 0.8464, "step": 2973 }, { "epoch": 0.6397074639707464, "grad_norm": 0.0, "learning_rate": 6.071214122202581e-06, "loss": 0.8037, "step": 2974 }, { "epoch": 0.6399225639922564, "grad_norm": 0.0, "learning_rate": 6.064807938835046e-06, "loss": 0.8502, "step": 2975 }, { "epoch": 0.6401376640137664, "grad_norm": 0.0, "learning_rate": 6.05840366578341e-06, "loss": 0.8899, "step": 2976 }, { "epoch": 0.6403527640352764, "grad_norm": 0.0, "learning_rate": 6.052001306156593e-06, "loss": 0.8797, "step": 2977 }, { "epoch": 0.6405678640567865, "grad_norm": 0.0, "learning_rate": 6.045600863062584e-06, "loss": 0.8293, "step": 2978 }, { "epoch": 0.6407829640782964, "grad_norm": 0.0, "learning_rate": 6.039202339608432e-06, "loss": 0.8175, "step": 2979 }, { "epoch": 0.6409980640998064, "grad_norm": 0.0, "learning_rate": 6.0328057389002736e-06, "loss": 0.8794, "step": 2980 }, { "epoch": 0.6412131641213165, "grad_norm": 0.0, "learning_rate": 6.0264110640432935e-06, "loss": 0.8181, "step": 2981 }, { "epoch": 0.6414282641428264, "grad_norm": 0.0, "learning_rate": 6.0200183181417515e-06, "loss": 0.8109, "step": 2982 }, { "epoch": 0.6416433641643364, "grad_norm": 0.0, "learning_rate": 6.0136275042989714e-06, "loss": 0.8998, "step": 2983 }, { "epoch": 0.6418584641858465, "grad_norm": 0.0, "learning_rate": 6.007238625617333e-06, "loss": 0.8342, "step": 2984 }, { "epoch": 0.6420735642073564, "grad_norm": 0.0, "learning_rate": 6.000851685198283e-06, "loss": 0.8358, "step": 2985 }, { "epoch": 0.6422886642288664, "grad_norm": 0.0, "learning_rate": 5.994466686142317e-06, "loss": 0.8734, "step": 2986 }, { "epoch": 0.6425037642503765, "grad_norm": 0.0, "learning_rate": 5.988083631549001e-06, "loss": 0.7831, "step": 2987 }, { "epoch": 0.6427188642718864, "grad_norm": 0.0, "learning_rate": 5.9817025245169545e-06, "loss": 0.8622, "step": 2988 }, { "epoch": 0.6429339642933964, "grad_norm": 0.0, "learning_rate": 5.975323368143841e-06, "loss": 0.8557, "step": 2989 }, { "epoch": 0.6431490643149065, "grad_norm": 0.0, "learning_rate": 5.968946165526389e-06, "loss": 0.8482, "step": 2990 }, { "epoch": 0.6433641643364164, "grad_norm": 0.0, "learning_rate": 5.962570919760374e-06, "loss": 0.7818, "step": 2991 }, { "epoch": 0.6435792643579265, "grad_norm": 0.0, "learning_rate": 5.956197633940622e-06, "loss": 0.8405, "step": 2992 }, { "epoch": 0.6437943643794365, "grad_norm": 0.0, "learning_rate": 5.949826311161007e-06, "loss": 0.8622, "step": 2993 }, { "epoch": 0.6440094644009464, "grad_norm": 0.0, "learning_rate": 5.943456954514445e-06, "loss": 0.8347, "step": 2994 }, { "epoch": 0.6442245644224565, "grad_norm": 0.0, "learning_rate": 5.937089567092909e-06, "loss": 0.8929, "step": 2995 }, { "epoch": 0.6444396644439665, "grad_norm": 0.0, "learning_rate": 5.930724151987411e-06, "loss": 0.8756, "step": 2996 }, { "epoch": 0.6446547644654764, "grad_norm": 0.0, "learning_rate": 5.9243607122879965e-06, "loss": 0.8236, "step": 2997 }, { "epoch": 0.6448698644869865, "grad_norm": 0.0, "learning_rate": 5.9179992510837706e-06, "loss": 0.8755, "step": 2998 }, { "epoch": 0.6450849645084965, "grad_norm": 0.0, "learning_rate": 5.9116397714628584e-06, "loss": 0.8934, "step": 2999 }, { "epoch": 0.6453000645300064, "grad_norm": 0.0, "learning_rate": 5.905282276512434e-06, "loss": 0.8067, "step": 3000 }, { "epoch": 0.6455151645515165, "grad_norm": 0.0, "learning_rate": 5.898926769318711e-06, "loss": 0.8725, "step": 3001 }, { "epoch": 0.6457302645730264, "grad_norm": 0.0, "learning_rate": 5.892573252966926e-06, "loss": 0.8711, "step": 3002 }, { "epoch": 0.6459453645945364, "grad_norm": 0.0, "learning_rate": 5.88622173054136e-06, "loss": 0.8776, "step": 3003 }, { "epoch": 0.6461604646160465, "grad_norm": 0.0, "learning_rate": 5.879872205125325e-06, "loss": 0.833, "step": 3004 }, { "epoch": 0.6463755646375564, "grad_norm": 0.0, "learning_rate": 5.873524679801157e-06, "loss": 0.8289, "step": 3005 }, { "epoch": 0.6465906646590664, "grad_norm": 0.0, "learning_rate": 5.867179157650225e-06, "loss": 0.8093, "step": 3006 }, { "epoch": 0.6468057646805765, "grad_norm": 0.0, "learning_rate": 5.860835641752927e-06, "loss": 0.8745, "step": 3007 }, { "epoch": 0.6470208647020864, "grad_norm": 0.0, "learning_rate": 5.854494135188682e-06, "loss": 0.832, "step": 3008 }, { "epoch": 0.6472359647235965, "grad_norm": 0.0, "learning_rate": 5.848154641035943e-06, "loss": 0.7589, "step": 3009 }, { "epoch": 0.6474510647451065, "grad_norm": 0.0, "learning_rate": 5.841817162372176e-06, "loss": 0.8568, "step": 3010 }, { "epoch": 0.6476661647666164, "grad_norm": 0.0, "learning_rate": 5.835481702273879e-06, "loss": 0.7875, "step": 3011 }, { "epoch": 0.6478812647881265, "grad_norm": 0.0, "learning_rate": 5.829148263816552e-06, "loss": 0.8462, "step": 3012 }, { "epoch": 0.6480963648096365, "grad_norm": 0.0, "learning_rate": 5.822816850074733e-06, "loss": 0.7954, "step": 3013 }, { "epoch": 0.6483114648311464, "grad_norm": 0.0, "learning_rate": 5.816487464121974e-06, "loss": 0.8949, "step": 3014 }, { "epoch": 0.6485265648526565, "grad_norm": 0.0, "learning_rate": 5.810160109030829e-06, "loss": 0.8385, "step": 3015 }, { "epoch": 0.6487416648741665, "grad_norm": 0.0, "learning_rate": 5.80383478787288e-06, "loss": 0.8188, "step": 3016 }, { "epoch": 0.6489567648956764, "grad_norm": 0.0, "learning_rate": 5.79751150371872e-06, "loss": 0.8876, "step": 3017 }, { "epoch": 0.6491718649171865, "grad_norm": 0.0, "learning_rate": 5.791190259637945e-06, "loss": 0.8429, "step": 3018 }, { "epoch": 0.6493869649386965, "grad_norm": 0.0, "learning_rate": 5.784871058699171e-06, "loss": 0.8433, "step": 3019 }, { "epoch": 0.6496020649602064, "grad_norm": 0.0, "learning_rate": 5.77855390397001e-06, "loss": 0.899, "step": 3020 }, { "epoch": 0.6498171649817165, "grad_norm": 0.0, "learning_rate": 5.7722387985170915e-06, "loss": 0.9477, "step": 3021 }, { "epoch": 0.6500322650032265, "grad_norm": 0.0, "learning_rate": 5.765925745406051e-06, "loss": 0.8159, "step": 3022 }, { "epoch": 0.6502473650247365, "grad_norm": 0.0, "learning_rate": 5.759614747701519e-06, "loss": 0.8309, "step": 3023 }, { "epoch": 0.6504624650462465, "grad_norm": 0.0, "learning_rate": 5.753305808467126e-06, "loss": 0.8394, "step": 3024 }, { "epoch": 0.6506775650677565, "grad_norm": 0.0, "learning_rate": 5.7469989307655225e-06, "loss": 0.9191, "step": 3025 }, { "epoch": 0.6508926650892665, "grad_norm": 0.0, "learning_rate": 5.740694117658334e-06, "loss": 0.8686, "step": 3026 }, { "epoch": 0.6511077651107765, "grad_norm": 0.0, "learning_rate": 5.734391372206203e-06, "loss": 0.9043, "step": 3027 }, { "epoch": 0.6513228651322865, "grad_norm": 0.0, "learning_rate": 5.728090697468751e-06, "loss": 0.8462, "step": 3028 }, { "epoch": 0.6515379651537965, "grad_norm": 0.0, "learning_rate": 5.7217920965046115e-06, "loss": 0.8432, "step": 3029 }, { "epoch": 0.6517530651753065, "grad_norm": 0.0, "learning_rate": 5.715495572371405e-06, "loss": 0.8181, "step": 3030 }, { "epoch": 0.6519681651968166, "grad_norm": 0.0, "learning_rate": 5.709201128125733e-06, "loss": 0.863, "step": 3031 }, { "epoch": 0.6521832652183265, "grad_norm": 0.0, "learning_rate": 5.7029087668232075e-06, "loss": 0.8946, "step": 3032 }, { "epoch": 0.6523983652398365, "grad_norm": 0.0, "learning_rate": 5.696618491518409e-06, "loss": 0.873, "step": 3033 }, { "epoch": 0.6526134652613466, "grad_norm": 0.0, "learning_rate": 5.690330305264917e-06, "loss": 0.8392, "step": 3034 }, { "epoch": 0.6528285652828565, "grad_norm": 0.0, "learning_rate": 5.6840442111153e-06, "loss": 0.8225, "step": 3035 }, { "epoch": 0.6530436653043665, "grad_norm": 0.0, "learning_rate": 5.6777602121211e-06, "loss": 0.7992, "step": 3036 }, { "epoch": 0.6532587653258766, "grad_norm": 0.0, "learning_rate": 5.6714783113328505e-06, "loss": 0.8237, "step": 3037 }, { "epoch": 0.6534738653473865, "grad_norm": 0.0, "learning_rate": 5.665198511800063e-06, "loss": 0.7702, "step": 3038 }, { "epoch": 0.6536889653688965, "grad_norm": 0.0, "learning_rate": 5.658920816571225e-06, "loss": 0.7917, "step": 3039 }, { "epoch": 0.6539040653904066, "grad_norm": 0.0, "learning_rate": 5.652645228693814e-06, "loss": 0.817, "step": 3040 }, { "epoch": 0.6541191654119165, "grad_norm": 0.0, "learning_rate": 5.64637175121427e-06, "loss": 0.8304, "step": 3041 }, { "epoch": 0.6543342654334265, "grad_norm": 0.0, "learning_rate": 5.640100387178023e-06, "loss": 0.8664, "step": 3042 }, { "epoch": 0.6545493654549366, "grad_norm": 0.0, "learning_rate": 5.633831139629462e-06, "loss": 0.8887, "step": 3043 }, { "epoch": 0.6547644654764465, "grad_norm": 0.0, "learning_rate": 5.627564011611961e-06, "loss": 0.8324, "step": 3044 }, { "epoch": 0.6549795654979566, "grad_norm": 0.0, "learning_rate": 5.621299006167864e-06, "loss": 0.8533, "step": 3045 }, { "epoch": 0.6551946655194666, "grad_norm": 0.0, "learning_rate": 5.615036126338473e-06, "loss": 0.8105, "step": 3046 }, { "epoch": 0.6554097655409765, "grad_norm": 0.0, "learning_rate": 5.6087753751640725e-06, "loss": 0.8294, "step": 3047 }, { "epoch": 0.6556248655624866, "grad_norm": 0.0, "learning_rate": 5.602516755683908e-06, "loss": 0.8281, "step": 3048 }, { "epoch": 0.6558399655839966, "grad_norm": 0.0, "learning_rate": 5.5962602709361825e-06, "loss": 0.8778, "step": 3049 }, { "epoch": 0.6560550656055065, "grad_norm": 0.0, "learning_rate": 5.590005923958079e-06, "loss": 0.849, "step": 3050 }, { "epoch": 0.6562701656270166, "grad_norm": 0.0, "learning_rate": 5.583753717785727e-06, "loss": 0.9144, "step": 3051 }, { "epoch": 0.6564852656485266, "grad_norm": 0.0, "learning_rate": 5.577503655454228e-06, "loss": 0.863, "step": 3052 }, { "epoch": 0.6567003656700365, "grad_norm": 0.0, "learning_rate": 5.5712557399976345e-06, "loss": 0.86, "step": 3053 }, { "epoch": 0.6569154656915466, "grad_norm": 0.0, "learning_rate": 5.565009974448958e-06, "loss": 0.7987, "step": 3054 }, { "epoch": 0.6571305657130566, "grad_norm": 0.0, "learning_rate": 5.558766361840177e-06, "loss": 0.8447, "step": 3055 }, { "epoch": 0.6573456657345665, "grad_norm": 0.0, "learning_rate": 5.552524905202208e-06, "loss": 0.8768, "step": 3056 }, { "epoch": 0.6575607657560766, "grad_norm": 0.0, "learning_rate": 5.546285607564932e-06, "loss": 0.9048, "step": 3057 }, { "epoch": 0.6577758657775866, "grad_norm": 0.0, "learning_rate": 5.540048471957184e-06, "loss": 0.846, "step": 3058 }, { "epoch": 0.6579909657990965, "grad_norm": 0.0, "learning_rate": 5.533813501406739e-06, "loss": 0.8433, "step": 3059 }, { "epoch": 0.6582060658206066, "grad_norm": 0.0, "learning_rate": 5.527580698940329e-06, "loss": 0.7767, "step": 3060 }, { "epoch": 0.6584211658421166, "grad_norm": 0.0, "learning_rate": 5.521350067583635e-06, "loss": 0.8352, "step": 3061 }, { "epoch": 0.6586362658636266, "grad_norm": 0.0, "learning_rate": 5.515121610361275e-06, "loss": 0.8568, "step": 3062 }, { "epoch": 0.6588513658851366, "grad_norm": 0.0, "learning_rate": 5.508895330296822e-06, "loss": 0.862, "step": 3063 }, { "epoch": 0.6590664659066466, "grad_norm": 0.0, "learning_rate": 5.502671230412782e-06, "loss": 0.8574, "step": 3064 }, { "epoch": 0.6592815659281566, "grad_norm": 0.0, "learning_rate": 5.496449313730608e-06, "loss": 0.7818, "step": 3065 }, { "epoch": 0.6594966659496666, "grad_norm": 0.0, "learning_rate": 5.4902295832707005e-06, "loss": 0.8683, "step": 3066 }, { "epoch": 0.6597117659711766, "grad_norm": 0.0, "learning_rate": 5.484012042052385e-06, "loss": 0.826, "step": 3067 }, { "epoch": 0.6599268659926866, "grad_norm": 0.0, "learning_rate": 5.477796693093933e-06, "loss": 0.8082, "step": 3068 }, { "epoch": 0.6601419660141966, "grad_norm": 0.0, "learning_rate": 5.471583539412541e-06, "loss": 0.8154, "step": 3069 }, { "epoch": 0.6603570660357067, "grad_norm": 0.0, "learning_rate": 5.4653725840243555e-06, "loss": 0.8494, "step": 3070 }, { "epoch": 0.6605721660572166, "grad_norm": 0.0, "learning_rate": 5.459163829944451e-06, "loss": 0.8576, "step": 3071 }, { "epoch": 0.6607872660787266, "grad_norm": 0.0, "learning_rate": 5.452957280186824e-06, "loss": 0.8735, "step": 3072 }, { "epoch": 0.6610023661002367, "grad_norm": 0.0, "learning_rate": 5.44675293776441e-06, "loss": 0.8773, "step": 3073 }, { "epoch": 0.6612174661217466, "grad_norm": 0.0, "learning_rate": 5.440550805689075e-06, "loss": 0.8933, "step": 3074 }, { "epoch": 0.6614325661432566, "grad_norm": 0.0, "learning_rate": 5.434350886971602e-06, "loss": 0.8691, "step": 3075 }, { "epoch": 0.6616476661647667, "grad_norm": 0.0, "learning_rate": 5.42815318462171e-06, "loss": 0.806, "step": 3076 }, { "epoch": 0.6618627661862766, "grad_norm": 0.0, "learning_rate": 5.421957701648028e-06, "loss": 0.8656, "step": 3077 }, { "epoch": 0.6620778662077866, "grad_norm": 0.0, "learning_rate": 5.415764441058127e-06, "loss": 0.8227, "step": 3078 }, { "epoch": 0.6622929662292967, "grad_norm": 0.0, "learning_rate": 5.409573405858487e-06, "loss": 0.8533, "step": 3079 }, { "epoch": 0.6625080662508066, "grad_norm": 0.0, "learning_rate": 5.403384599054504e-06, "loss": 0.8145, "step": 3080 }, { "epoch": 0.6627231662723166, "grad_norm": 0.0, "learning_rate": 5.397198023650508e-06, "loss": 0.8138, "step": 3081 }, { "epoch": 0.6629382662938267, "grad_norm": 0.0, "learning_rate": 5.391013682649726e-06, "loss": 0.8171, "step": 3082 }, { "epoch": 0.6631533663153366, "grad_norm": 0.0, "learning_rate": 5.384831579054313e-06, "loss": 0.899, "step": 3083 }, { "epoch": 0.6633684663368467, "grad_norm": 0.0, "learning_rate": 5.378651715865338e-06, "loss": 0.8329, "step": 3084 }, { "epoch": 0.6635835663583567, "grad_norm": 0.0, "learning_rate": 5.372474096082774e-06, "loss": 0.8482, "step": 3085 }, { "epoch": 0.6637986663798666, "grad_norm": 0.0, "learning_rate": 5.3662987227055115e-06, "loss": 0.8869, "step": 3086 }, { "epoch": 0.6640137664013767, "grad_norm": 0.0, "learning_rate": 5.360125598731358e-06, "loss": 0.9081, "step": 3087 }, { "epoch": 0.6642288664228866, "grad_norm": 0.0, "learning_rate": 5.3539547271570066e-06, "loss": 0.8684, "step": 3088 }, { "epoch": 0.6644439664443966, "grad_norm": 0.0, "learning_rate": 5.347786110978084e-06, "loss": 0.8483, "step": 3089 }, { "epoch": 0.6646590664659067, "grad_norm": 0.0, "learning_rate": 5.341619753189096e-06, "loss": 0.8351, "step": 3090 }, { "epoch": 0.6648741664874166, "grad_norm": 0.0, "learning_rate": 5.3354556567834725e-06, "loss": 0.8166, "step": 3091 }, { "epoch": 0.6650892665089266, "grad_norm": 0.0, "learning_rate": 5.329293824753539e-06, "loss": 0.8045, "step": 3092 }, { "epoch": 0.6653043665304367, "grad_norm": 0.0, "learning_rate": 5.323134260090516e-06, "loss": 0.8755, "step": 3093 }, { "epoch": 0.6655194665519466, "grad_norm": 0.0, "learning_rate": 5.316976965784532e-06, "loss": 0.7934, "step": 3094 }, { "epoch": 0.6657345665734566, "grad_norm": 0.0, "learning_rate": 5.310821944824606e-06, "loss": 0.8449, "step": 3095 }, { "epoch": 0.6659496665949667, "grad_norm": 0.0, "learning_rate": 5.304669200198661e-06, "loss": 0.8289, "step": 3096 }, { "epoch": 0.6661647666164766, "grad_norm": 0.0, "learning_rate": 5.2985187348935085e-06, "loss": 0.8233, "step": 3097 }, { "epoch": 0.6663798666379867, "grad_norm": 0.0, "learning_rate": 5.292370551894852e-06, "loss": 0.7926, "step": 3098 }, { "epoch": 0.6665949666594967, "grad_norm": 0.0, "learning_rate": 5.286224654187298e-06, "loss": 0.871, "step": 3099 }, { "epoch": 0.6668100666810066, "grad_norm": 0.0, "learning_rate": 5.2800810447543285e-06, "loss": 0.849, "step": 3100 }, { "epoch": 0.6670251667025167, "grad_norm": 0.0, "learning_rate": 5.273939726578328e-06, "loss": 0.7902, "step": 3101 }, { "epoch": 0.6672402667240267, "grad_norm": 0.0, "learning_rate": 5.267800702640565e-06, "loss": 0.9172, "step": 3102 }, { "epoch": 0.6674553667455366, "grad_norm": 0.0, "learning_rate": 5.2616639759211875e-06, "loss": 0.8348, "step": 3103 }, { "epoch": 0.6676704667670467, "grad_norm": 0.0, "learning_rate": 5.255529549399234e-06, "loss": 0.84, "step": 3104 }, { "epoch": 0.6678855667885567, "grad_norm": 0.0, "learning_rate": 5.249397426052634e-06, "loss": 0.8609, "step": 3105 }, { "epoch": 0.6681006668100666, "grad_norm": 0.0, "learning_rate": 5.24326760885818e-06, "loss": 0.8494, "step": 3106 }, { "epoch": 0.6683157668315767, "grad_norm": 0.0, "learning_rate": 5.2371401007915646e-06, "loss": 0.8432, "step": 3107 }, { "epoch": 0.6685308668530867, "grad_norm": 0.0, "learning_rate": 5.231014904827343e-06, "loss": 0.8151, "step": 3108 }, { "epoch": 0.6687459668745966, "grad_norm": 0.0, "learning_rate": 5.224892023938963e-06, "loss": 0.8092, "step": 3109 }, { "epoch": 0.6689610668961067, "grad_norm": 0.0, "learning_rate": 5.218771461098733e-06, "loss": 0.83, "step": 3110 }, { "epoch": 0.6691761669176167, "grad_norm": 0.0, "learning_rate": 5.212653219277854e-06, "loss": 0.8679, "step": 3111 }, { "epoch": 0.6693912669391267, "grad_norm": 0.0, "learning_rate": 5.2065373014463835e-06, "loss": 0.8994, "step": 3112 }, { "epoch": 0.6696063669606367, "grad_norm": 0.0, "learning_rate": 5.200423710573257e-06, "loss": 0.8915, "step": 3113 }, { "epoch": 0.6698214669821467, "grad_norm": 0.0, "learning_rate": 5.1943124496262844e-06, "loss": 0.823, "step": 3114 }, { "epoch": 0.6700365670036567, "grad_norm": 0.0, "learning_rate": 5.188203521572144e-06, "loss": 0.8799, "step": 3115 }, { "epoch": 0.6702516670251667, "grad_norm": 0.0, "learning_rate": 5.182096929376372e-06, "loss": 0.8864, "step": 3116 }, { "epoch": 0.6704667670466767, "grad_norm": 0.0, "learning_rate": 5.175992676003383e-06, "loss": 0.8487, "step": 3117 }, { "epoch": 0.6706818670681867, "grad_norm": 0.0, "learning_rate": 5.169890764416454e-06, "loss": 0.8256, "step": 3118 }, { "epoch": 0.6708969670896967, "grad_norm": 0.0, "learning_rate": 5.163791197577714e-06, "loss": 0.8291, "step": 3119 }, { "epoch": 0.6711120671112067, "grad_norm": 0.0, "learning_rate": 5.157693978448171e-06, "loss": 0.8803, "step": 3120 }, { "epoch": 0.6713271671327167, "grad_norm": 0.0, "learning_rate": 5.151599109987675e-06, "loss": 0.8441, "step": 3121 }, { "epoch": 0.6715422671542267, "grad_norm": 0.0, "learning_rate": 5.145506595154947e-06, "loss": 0.8248, "step": 3122 }, { "epoch": 0.6717573671757368, "grad_norm": 0.0, "learning_rate": 5.139416436907571e-06, "loss": 0.7916, "step": 3123 }, { "epoch": 0.6719724671972467, "grad_norm": 0.0, "learning_rate": 5.133328638201969e-06, "loss": 0.8352, "step": 3124 }, { "epoch": 0.6721875672187567, "grad_norm": 0.0, "learning_rate": 5.127243201993428e-06, "loss": 0.7451, "step": 3125 }, { "epoch": 0.6724026672402668, "grad_norm": 0.0, "learning_rate": 5.1211601312360915e-06, "loss": 0.8733, "step": 3126 }, { "epoch": 0.6726177672617767, "grad_norm": 0.0, "learning_rate": 5.115079428882945e-06, "loss": 0.8224, "step": 3127 }, { "epoch": 0.6728328672832867, "grad_norm": 0.0, "learning_rate": 5.109001097885837e-06, "loss": 0.8127, "step": 3128 }, { "epoch": 0.6730479673047968, "grad_norm": 0.0, "learning_rate": 5.10292514119545e-06, "loss": 0.8286, "step": 3129 }, { "epoch": 0.6732630673263067, "grad_norm": 0.0, "learning_rate": 5.0968515617613244e-06, "loss": 0.8655, "step": 3130 }, { "epoch": 0.6734781673478167, "grad_norm": 0.0, "learning_rate": 5.090780362531849e-06, "loss": 0.8389, "step": 3131 }, { "epoch": 0.6736932673693268, "grad_norm": 0.0, "learning_rate": 5.0847115464542425e-06, "loss": 0.8908, "step": 3132 }, { "epoch": 0.6739083673908367, "grad_norm": 0.0, "learning_rate": 5.0786451164745855e-06, "loss": 0.8385, "step": 3133 }, { "epoch": 0.6741234674123467, "grad_norm": 0.0, "learning_rate": 5.0725810755377825e-06, "loss": 0.8454, "step": 3134 }, { "epoch": 0.6743385674338568, "grad_norm": 0.0, "learning_rate": 5.066519426587591e-06, "loss": 0.8656, "step": 3135 }, { "epoch": 0.6745536674553667, "grad_norm": 0.0, "learning_rate": 5.060460172566605e-06, "loss": 0.8441, "step": 3136 }, { "epoch": 0.6747687674768768, "grad_norm": 0.0, "learning_rate": 5.054403316416247e-06, "loss": 0.8368, "step": 3137 }, { "epoch": 0.6749838674983868, "grad_norm": 0.0, "learning_rate": 5.048348861076791e-06, "loss": 0.8121, "step": 3138 }, { "epoch": 0.6751989675198967, "grad_norm": 0.0, "learning_rate": 5.042296809487332e-06, "loss": 0.812, "step": 3139 }, { "epoch": 0.6754140675414068, "grad_norm": 0.0, "learning_rate": 5.036247164585799e-06, "loss": 0.864, "step": 3140 }, { "epoch": 0.6756291675629168, "grad_norm": 0.0, "learning_rate": 5.0301999293089635e-06, "loss": 0.8055, "step": 3141 }, { "epoch": 0.6758442675844267, "grad_norm": 0.0, "learning_rate": 5.024155106592415e-06, "loss": 0.8351, "step": 3142 }, { "epoch": 0.6760593676059368, "grad_norm": 0.0, "learning_rate": 5.018112699370584e-06, "loss": 0.82, "step": 3143 }, { "epoch": 0.6762744676274468, "grad_norm": 0.0, "learning_rate": 5.012072710576714e-06, "loss": 0.901, "step": 3144 }, { "epoch": 0.6764895676489567, "grad_norm": 0.0, "learning_rate": 5.006035143142884e-06, "loss": 0.863, "step": 3145 }, { "epoch": 0.6767046676704668, "grad_norm": 0.0, "learning_rate": 5.000000000000003e-06, "loss": 0.8716, "step": 3146 }, { "epoch": 0.6769197676919768, "grad_norm": 0.0, "learning_rate": 4.993967284077785e-06, "loss": 0.8804, "step": 3147 }, { "epoch": 0.6771348677134867, "grad_norm": 0.0, "learning_rate": 4.987936998304782e-06, "loss": 0.838, "step": 3148 }, { "epoch": 0.6773499677349968, "grad_norm": 0.0, "learning_rate": 4.981909145608365e-06, "loss": 0.8272, "step": 3149 }, { "epoch": 0.6775650677565068, "grad_norm": 0.0, "learning_rate": 4.9758837289147095e-06, "loss": 0.8702, "step": 3150 }, { "epoch": 0.6777801677780168, "grad_norm": 0.0, "learning_rate": 4.9698607511488306e-06, "loss": 0.8694, "step": 3151 }, { "epoch": 0.6779952677995268, "grad_norm": 0.0, "learning_rate": 4.963840215234536e-06, "loss": 0.8452, "step": 3152 }, { "epoch": 0.6782103678210368, "grad_norm": 0.0, "learning_rate": 4.95782212409447e-06, "loss": 0.8548, "step": 3153 }, { "epoch": 0.6784254678425468, "grad_norm": 0.0, "learning_rate": 4.9518064806500745e-06, "loss": 0.7569, "step": 3154 }, { "epoch": 0.6786405678640568, "grad_norm": 0.0, "learning_rate": 4.945793287821604e-06, "loss": 0.8506, "step": 3155 }, { "epoch": 0.6788556678855668, "grad_norm": 0.0, "learning_rate": 4.9397825485281355e-06, "loss": 0.8379, "step": 3156 }, { "epoch": 0.6790707679070768, "grad_norm": 0.0, "learning_rate": 4.933774265687541e-06, "loss": 0.7818, "step": 3157 }, { "epoch": 0.6792858679285868, "grad_norm": 0.0, "learning_rate": 4.9277684422165085e-06, "loss": 0.8231, "step": 3158 }, { "epoch": 0.6795009679500968, "grad_norm": 0.0, "learning_rate": 4.921765081030534e-06, "loss": 0.8598, "step": 3159 }, { "epoch": 0.6797160679716068, "grad_norm": 0.0, "learning_rate": 4.915764185043906e-06, "loss": 0.8359, "step": 3160 }, { "epoch": 0.6799311679931168, "grad_norm": 0.0, "learning_rate": 4.909765757169729e-06, "loss": 0.8303, "step": 3161 }, { "epoch": 0.6801462680146269, "grad_norm": 0.0, "learning_rate": 4.9037698003199075e-06, "loss": 0.8538, "step": 3162 }, { "epoch": 0.6803613680361368, "grad_norm": 0.0, "learning_rate": 4.897776317405135e-06, "loss": 0.9153, "step": 3163 }, { "epoch": 0.6805764680576468, "grad_norm": 0.0, "learning_rate": 4.891785311334924e-06, "loss": 0.8292, "step": 3164 }, { "epoch": 0.6807915680791569, "grad_norm": 0.0, "learning_rate": 4.88579678501756e-06, "loss": 0.8085, "step": 3165 }, { "epoch": 0.6810066681006668, "grad_norm": 0.0, "learning_rate": 4.879810741360147e-06, "loss": 0.8724, "step": 3166 }, { "epoch": 0.6812217681221768, "grad_norm": 0.0, "learning_rate": 4.873827183268577e-06, "loss": 0.9115, "step": 3167 }, { "epoch": 0.6814368681436869, "grad_norm": 0.0, "learning_rate": 4.867846113647527e-06, "loss": 0.9, "step": 3168 }, { "epoch": 0.6816519681651968, "grad_norm": 0.0, "learning_rate": 4.861867535400472e-06, "loss": 0.8817, "step": 3169 }, { "epoch": 0.6818670681867068, "grad_norm": 0.0, "learning_rate": 4.855891451429682e-06, "loss": 0.8378, "step": 3170 }, { "epoch": 0.6820821682082168, "grad_norm": 0.0, "learning_rate": 4.849917864636207e-06, "loss": 0.8694, "step": 3171 }, { "epoch": 0.6822972682297268, "grad_norm": 0.0, "learning_rate": 4.843946777919896e-06, "loss": 0.8698, "step": 3172 }, { "epoch": 0.6825123682512368, "grad_norm": 0.0, "learning_rate": 4.83797819417937e-06, "loss": 0.8393, "step": 3173 }, { "epoch": 0.6827274682727468, "grad_norm": 0.0, "learning_rate": 4.832012116312047e-06, "loss": 0.8354, "step": 3174 }, { "epoch": 0.6829425682942568, "grad_norm": 0.0, "learning_rate": 4.826048547214129e-06, "loss": 0.8483, "step": 3175 }, { "epoch": 0.6831576683157669, "grad_norm": 0.0, "learning_rate": 4.820087489780588e-06, "loss": 0.8375, "step": 3176 }, { "epoch": 0.6833727683372768, "grad_norm": 0.0, "learning_rate": 4.814128946905192e-06, "loss": 0.8145, "step": 3177 }, { "epoch": 0.6835878683587868, "grad_norm": 0.0, "learning_rate": 4.808172921480472e-06, "loss": 0.7654, "step": 3178 }, { "epoch": 0.6838029683802969, "grad_norm": 0.0, "learning_rate": 4.802219416397749e-06, "loss": 0.8767, "step": 3179 }, { "epoch": 0.6840180684018068, "grad_norm": 0.0, "learning_rate": 4.796268434547125e-06, "loss": 0.8729, "step": 3180 }, { "epoch": 0.6842331684233168, "grad_norm": 0.0, "learning_rate": 4.790319978817456e-06, "loss": 0.8084, "step": 3181 }, { "epoch": 0.6844482684448269, "grad_norm": 0.0, "learning_rate": 4.784374052096396e-06, "loss": 0.8274, "step": 3182 }, { "epoch": 0.6846633684663368, "grad_norm": 0.0, "learning_rate": 4.778430657270356e-06, "loss": 0.8427, "step": 3183 }, { "epoch": 0.6848784684878468, "grad_norm": 0.0, "learning_rate": 4.772489797224518e-06, "loss": 0.853, "step": 3184 }, { "epoch": 0.6850935685093569, "grad_norm": 0.0, "learning_rate": 4.7665514748428446e-06, "loss": 0.9108, "step": 3185 }, { "epoch": 0.6853086685308668, "grad_norm": 0.0, "learning_rate": 4.760615693008053e-06, "loss": 0.8282, "step": 3186 }, { "epoch": 0.6855237685523768, "grad_norm": 0.0, "learning_rate": 4.754682454601637e-06, "loss": 0.8039, "step": 3187 }, { "epoch": 0.6857388685738869, "grad_norm": 0.0, "learning_rate": 4.748751762503856e-06, "loss": 0.8242, "step": 3188 }, { "epoch": 0.6859539685953968, "grad_norm": 0.0, "learning_rate": 4.742823619593723e-06, "loss": 0.8481, "step": 3189 }, { "epoch": 0.6861690686169069, "grad_norm": 0.0, "learning_rate": 4.7368980287490265e-06, "loss": 0.8304, "step": 3190 }, { "epoch": 0.6863841686384169, "grad_norm": 0.0, "learning_rate": 4.730974992846304e-06, "loss": 0.828, "step": 3191 }, { "epoch": 0.6865992686599268, "grad_norm": 0.0, "learning_rate": 4.72505451476086e-06, "loss": 0.858, "step": 3192 }, { "epoch": 0.6868143686814369, "grad_norm": 0.0, "learning_rate": 4.71913659736676e-06, "loss": 0.8828, "step": 3193 }, { "epoch": 0.6870294687029469, "grad_norm": 0.0, "learning_rate": 4.7132212435368166e-06, "loss": 0.8881, "step": 3194 }, { "epoch": 0.6872445687244568, "grad_norm": 0.0, "learning_rate": 4.70730845614261e-06, "loss": 0.7677, "step": 3195 }, { "epoch": 0.6874596687459669, "grad_norm": 0.0, "learning_rate": 4.701398238054461e-06, "loss": 0.8702, "step": 3196 }, { "epoch": 0.6876747687674769, "grad_norm": 0.0, "learning_rate": 4.695490592141457e-06, "loss": 0.8214, "step": 3197 }, { "epoch": 0.6878898687889868, "grad_norm": 0.0, "learning_rate": 4.689585521271427e-06, "loss": 0.8396, "step": 3198 }, { "epoch": 0.6881049688104969, "grad_norm": 0.0, "learning_rate": 4.683683028310949e-06, "loss": 0.8332, "step": 3199 }, { "epoch": 0.6883200688320069, "grad_norm": 0.0, "learning_rate": 4.677783116125361e-06, "loss": 0.8511, "step": 3200 }, { "epoch": 0.6885351688535168, "grad_norm": 0.0, "learning_rate": 4.671885787578735e-06, "loss": 0.8002, "step": 3201 }, { "epoch": 0.6887502688750269, "grad_norm": 0.0, "learning_rate": 4.665991045533895e-06, "loss": 0.85, "step": 3202 }, { "epoch": 0.6889653688965369, "grad_norm": 0.0, "learning_rate": 4.660098892852416e-06, "loss": 0.8104, "step": 3203 }, { "epoch": 0.6891804689180469, "grad_norm": 0.0, "learning_rate": 4.654209332394598e-06, "loss": 0.8423, "step": 3204 }, { "epoch": 0.6893955689395569, "grad_norm": 0.0, "learning_rate": 4.648322367019501e-06, "loss": 0.8331, "step": 3205 }, { "epoch": 0.6896106689610669, "grad_norm": 0.0, "learning_rate": 4.642437999584919e-06, "loss": 0.8494, "step": 3206 }, { "epoch": 0.6898257689825769, "grad_norm": 0.0, "learning_rate": 4.636556232947377e-06, "loss": 0.8511, "step": 3207 }, { "epoch": 0.6900408690040869, "grad_norm": 0.0, "learning_rate": 4.630677069962151e-06, "loss": 0.8809, "step": 3208 }, { "epoch": 0.6902559690255969, "grad_norm": 0.0, "learning_rate": 4.62480051348324e-06, "loss": 0.7776, "step": 3209 }, { "epoch": 0.6904710690471069, "grad_norm": 0.0, "learning_rate": 4.61892656636339e-06, "loss": 0.8266, "step": 3210 }, { "epoch": 0.6906861690686169, "grad_norm": 0.0, "learning_rate": 4.613055231454066e-06, "loss": 0.8553, "step": 3211 }, { "epoch": 0.690901269090127, "grad_norm": 0.0, "learning_rate": 4.607186511605483e-06, "loss": 0.9176, "step": 3212 }, { "epoch": 0.6911163691116369, "grad_norm": 0.0, "learning_rate": 4.601320409666573e-06, "loss": 0.788, "step": 3213 }, { "epoch": 0.6913314691331469, "grad_norm": 0.0, "learning_rate": 4.595456928484995e-06, "loss": 0.7917, "step": 3214 }, { "epoch": 0.691546569154657, "grad_norm": 0.0, "learning_rate": 4.589596070907146e-06, "loss": 0.7714, "step": 3215 }, { "epoch": 0.6917616691761669, "grad_norm": 0.0, "learning_rate": 4.5837378397781476e-06, "loss": 0.8474, "step": 3216 }, { "epoch": 0.6919767691976769, "grad_norm": 0.0, "learning_rate": 4.577882237941838e-06, "loss": 0.8369, "step": 3217 }, { "epoch": 0.692191869219187, "grad_norm": 0.0, "learning_rate": 4.572029268240787e-06, "loss": 0.8387, "step": 3218 }, { "epoch": 0.6924069692406969, "grad_norm": 0.0, "learning_rate": 4.566178933516291e-06, "loss": 0.8343, "step": 3219 }, { "epoch": 0.6926220692622069, "grad_norm": 0.0, "learning_rate": 4.56033123660835e-06, "loss": 0.8054, "step": 3220 }, { "epoch": 0.692837169283717, "grad_norm": 0.0, "learning_rate": 4.554486180355703e-06, "loss": 0.8458, "step": 3221 }, { "epoch": 0.6930522693052269, "grad_norm": 0.0, "learning_rate": 4.54864376759579e-06, "loss": 0.8086, "step": 3222 }, { "epoch": 0.6932673693267369, "grad_norm": 0.0, "learning_rate": 4.542804001164781e-06, "loss": 0.8073, "step": 3223 }, { "epoch": 0.693482469348247, "grad_norm": 0.0, "learning_rate": 4.53696688389756e-06, "loss": 0.8341, "step": 3224 }, { "epoch": 0.6936975693697569, "grad_norm": 0.0, "learning_rate": 4.531132418627716e-06, "loss": 0.8406, "step": 3225 }, { "epoch": 0.693912669391267, "grad_norm": 0.0, "learning_rate": 4.525300608187554e-06, "loss": 0.7983, "step": 3226 }, { "epoch": 0.694127769412777, "grad_norm": 0.0, "learning_rate": 4.519471455408099e-06, "loss": 0.7661, "step": 3227 }, { "epoch": 0.6943428694342869, "grad_norm": 0.0, "learning_rate": 4.513644963119071e-06, "loss": 0.8748, "step": 3228 }, { "epoch": 0.694557969455797, "grad_norm": 0.0, "learning_rate": 4.507821134148914e-06, "loss": 0.8565, "step": 3229 }, { "epoch": 0.694773069477307, "grad_norm": 0.0, "learning_rate": 4.5019999713247646e-06, "loss": 0.8136, "step": 3230 }, { "epoch": 0.6949881694988169, "grad_norm": 0.0, "learning_rate": 4.4961814774724765e-06, "loss": 0.7948, "step": 3231 }, { "epoch": 0.695203269520327, "grad_norm": 0.0, "learning_rate": 4.490365655416606e-06, "loss": 0.8567, "step": 3232 }, { "epoch": 0.695418369541837, "grad_norm": 0.0, "learning_rate": 4.484552507980403e-06, "loss": 0.874, "step": 3233 }, { "epoch": 0.6956334695633469, "grad_norm": 0.0, "learning_rate": 4.478742037985833e-06, "loss": 0.8973, "step": 3234 }, { "epoch": 0.695848569584857, "grad_norm": 0.0, "learning_rate": 4.472934248253546e-06, "loss": 0.8206, "step": 3235 }, { "epoch": 0.696063669606367, "grad_norm": 0.0, "learning_rate": 4.467129141602906e-06, "loss": 0.8304, "step": 3236 }, { "epoch": 0.6962787696278769, "grad_norm": 0.0, "learning_rate": 4.461326720851971e-06, "loss": 0.7878, "step": 3237 }, { "epoch": 0.696493869649387, "grad_norm": 0.0, "learning_rate": 4.455526988817481e-06, "loss": 0.8381, "step": 3238 }, { "epoch": 0.696708969670897, "grad_norm": 0.0, "learning_rate": 4.4497299483148935e-06, "loss": 0.9021, "step": 3239 }, { "epoch": 0.6969240696924069, "grad_norm": 0.0, "learning_rate": 4.443935602158339e-06, "loss": 0.8291, "step": 3240 }, { "epoch": 0.697139169713917, "grad_norm": 0.0, "learning_rate": 4.438143953160656e-06, "loss": 0.8662, "step": 3241 }, { "epoch": 0.697354269735427, "grad_norm": 0.0, "learning_rate": 4.4323550041333644e-06, "loss": 0.7841, "step": 3242 }, { "epoch": 0.697569369756937, "grad_norm": 0.0, "learning_rate": 4.426568757886669e-06, "loss": 0.8215, "step": 3243 }, { "epoch": 0.697784469778447, "grad_norm": 0.0, "learning_rate": 4.4207852172294775e-06, "loss": 0.7716, "step": 3244 }, { "epoch": 0.697999569799957, "grad_norm": 0.0, "learning_rate": 4.41500438496937e-06, "loss": 0.7887, "step": 3245 }, { "epoch": 0.698214669821467, "grad_norm": 0.0, "learning_rate": 4.40922626391262e-06, "loss": 0.7826, "step": 3246 }, { "epoch": 0.698429769842977, "grad_norm": 0.0, "learning_rate": 4.403450856864185e-06, "loss": 0.8297, "step": 3247 }, { "epoch": 0.698644869864487, "grad_norm": 0.0, "learning_rate": 4.397678166627698e-06, "loss": 0.8251, "step": 3248 }, { "epoch": 0.698859969885997, "grad_norm": 0.0, "learning_rate": 4.39190819600548e-06, "loss": 0.8658, "step": 3249 }, { "epoch": 0.699075069907507, "grad_norm": 0.0, "learning_rate": 4.386140947798532e-06, "loss": 0.8179, "step": 3250 }, { "epoch": 0.699290169929017, "grad_norm": 0.0, "learning_rate": 4.380376424806524e-06, "loss": 0.8295, "step": 3251 }, { "epoch": 0.699505269950527, "grad_norm": 0.0, "learning_rate": 4.374614629827818e-06, "loss": 0.8679, "step": 3252 }, { "epoch": 0.699720369972037, "grad_norm": 0.0, "learning_rate": 4.368855565659434e-06, "loss": 0.7626, "step": 3253 }, { "epoch": 0.699935469993547, "grad_norm": 0.0, "learning_rate": 4.3630992350970866e-06, "loss": 0.799, "step": 3254 }, { "epoch": 0.700150570015057, "grad_norm": 0.0, "learning_rate": 4.357345640935145e-06, "loss": 0.7635, "step": 3255 }, { "epoch": 0.700365670036567, "grad_norm": 0.0, "learning_rate": 4.351594785966662e-06, "loss": 0.8364, "step": 3256 }, { "epoch": 0.700580770058077, "grad_norm": 0.0, "learning_rate": 4.345846672983355e-06, "loss": 0.8391, "step": 3257 }, { "epoch": 0.700795870079587, "grad_norm": 0.0, "learning_rate": 4.340101304775607e-06, "loss": 0.8575, "step": 3258 }, { "epoch": 0.701010970101097, "grad_norm": 0.0, "learning_rate": 4.334358684132478e-06, "loss": 0.7963, "step": 3259 }, { "epoch": 0.701226070122607, "grad_norm": 0.0, "learning_rate": 4.3286188138416915e-06, "loss": 0.7969, "step": 3260 }, { "epoch": 0.701441170144117, "grad_norm": 0.0, "learning_rate": 4.322881696689629e-06, "loss": 0.8534, "step": 3261 }, { "epoch": 0.701656270165627, "grad_norm": 0.0, "learning_rate": 4.317147335461343e-06, "loss": 0.7832, "step": 3262 }, { "epoch": 0.701871370187137, "grad_norm": 0.0, "learning_rate": 4.3114157329405494e-06, "loss": 0.7804, "step": 3263 }, { "epoch": 0.702086470208647, "grad_norm": 0.0, "learning_rate": 4.305686891909612e-06, "loss": 0.8536, "step": 3264 }, { "epoch": 0.702301570230157, "grad_norm": 0.0, "learning_rate": 4.299960815149572e-06, "loss": 0.8466, "step": 3265 }, { "epoch": 0.702516670251667, "grad_norm": 0.0, "learning_rate": 4.294237505440112e-06, "loss": 0.8537, "step": 3266 }, { "epoch": 0.702731770273177, "grad_norm": 0.0, "learning_rate": 4.288516965559584e-06, "loss": 0.8203, "step": 3267 }, { "epoch": 0.702946870294687, "grad_norm": 0.0, "learning_rate": 4.282799198284992e-06, "loss": 0.8788, "step": 3268 }, { "epoch": 0.703161970316197, "grad_norm": 0.0, "learning_rate": 4.27708420639199e-06, "loss": 0.8223, "step": 3269 }, { "epoch": 0.703377070337707, "grad_norm": 0.0, "learning_rate": 4.271371992654884e-06, "loss": 0.7915, "step": 3270 }, { "epoch": 0.7035921703592171, "grad_norm": 0.0, "learning_rate": 4.265662559846641e-06, "loss": 0.7878, "step": 3271 }, { "epoch": 0.703807270380727, "grad_norm": 0.0, "learning_rate": 4.2599559107388645e-06, "loss": 0.7698, "step": 3272 }, { "epoch": 0.704022370402237, "grad_norm": 0.0, "learning_rate": 4.2542520481018225e-06, "loss": 0.8609, "step": 3273 }, { "epoch": 0.7042374704237471, "grad_norm": 0.0, "learning_rate": 4.248550974704413e-06, "loss": 0.8342, "step": 3274 }, { "epoch": 0.704452570445257, "grad_norm": 0.0, "learning_rate": 4.242852693314193e-06, "loss": 0.8291, "step": 3275 }, { "epoch": 0.704667670466767, "grad_norm": 0.0, "learning_rate": 4.237157206697363e-06, "loss": 0.8225, "step": 3276 }, { "epoch": 0.7048827704882771, "grad_norm": 0.0, "learning_rate": 4.231464517618757e-06, "loss": 0.8702, "step": 3277 }, { "epoch": 0.705097870509787, "grad_norm": 0.0, "learning_rate": 4.225774628841864e-06, "loss": 0.8674, "step": 3278 }, { "epoch": 0.705312970531297, "grad_norm": 0.0, "learning_rate": 4.220087543128799e-06, "loss": 0.8267, "step": 3279 }, { "epoch": 0.7055280705528071, "grad_norm": 0.0, "learning_rate": 4.21440326324033e-06, "loss": 0.8774, "step": 3280 }, { "epoch": 0.705743170574317, "grad_norm": 0.0, "learning_rate": 4.2087217919358604e-06, "loss": 0.8217, "step": 3281 }, { "epoch": 0.705958270595827, "grad_norm": 0.0, "learning_rate": 4.203043131973419e-06, "loss": 0.8131, "step": 3282 }, { "epoch": 0.7061733706173371, "grad_norm": 0.0, "learning_rate": 4.197367286109685e-06, "loss": 0.8325, "step": 3283 }, { "epoch": 0.706388470638847, "grad_norm": 0.0, "learning_rate": 4.191694257099962e-06, "loss": 0.7934, "step": 3284 }, { "epoch": 0.7066035706603571, "grad_norm": 0.0, "learning_rate": 4.1860240476981855e-06, "loss": 0.7856, "step": 3285 }, { "epoch": 0.7068186706818671, "grad_norm": 0.0, "learning_rate": 4.180356660656931e-06, "loss": 0.8334, "step": 3286 }, { "epoch": 0.707033770703377, "grad_norm": 0.0, "learning_rate": 4.174692098727392e-06, "loss": 0.8568, "step": 3287 }, { "epoch": 0.7072488707248871, "grad_norm": 0.0, "learning_rate": 4.1690303646594e-06, "loss": 0.8201, "step": 3288 }, { "epoch": 0.7074639707463971, "grad_norm": 0.0, "learning_rate": 4.163371461201415e-06, "loss": 0.8519, "step": 3289 }, { "epoch": 0.707679070767907, "grad_norm": 0.0, "learning_rate": 4.157715391100509e-06, "loss": 0.8896, "step": 3290 }, { "epoch": 0.7078941707894171, "grad_norm": 0.0, "learning_rate": 4.152062157102399e-06, "loss": 0.8375, "step": 3291 }, { "epoch": 0.7081092708109271, "grad_norm": 0.0, "learning_rate": 4.146411761951405e-06, "loss": 0.8848, "step": 3292 }, { "epoch": 0.708324370832437, "grad_norm": 0.0, "learning_rate": 4.1407642083904805e-06, "loss": 0.8201, "step": 3293 }, { "epoch": 0.7085394708539471, "grad_norm": 0.0, "learning_rate": 4.135119499161203e-06, "loss": 0.8292, "step": 3294 }, { "epoch": 0.7087545708754571, "grad_norm": 0.0, "learning_rate": 4.129477637003756e-06, "loss": 0.8072, "step": 3295 }, { "epoch": 0.708969670896967, "grad_norm": 0.0, "learning_rate": 4.123838624656954e-06, "loss": 0.8247, "step": 3296 }, { "epoch": 0.7091847709184771, "grad_norm": 0.0, "learning_rate": 4.1182024648582174e-06, "loss": 0.759, "step": 3297 }, { "epoch": 0.7093998709399871, "grad_norm": 0.0, "learning_rate": 4.112569160343592e-06, "loss": 0.8011, "step": 3298 }, { "epoch": 0.7096149709614971, "grad_norm": 0.0, "learning_rate": 4.10693871384773e-06, "loss": 0.8264, "step": 3299 }, { "epoch": 0.7098300709830071, "grad_norm": 0.0, "learning_rate": 4.101311128103895e-06, "loss": 0.8311, "step": 3300 }, { "epoch": 0.7100451710045171, "grad_norm": 0.0, "learning_rate": 4.095686405843974e-06, "loss": 0.8887, "step": 3301 }, { "epoch": 0.7102602710260271, "grad_norm": 0.0, "learning_rate": 4.090064549798445e-06, "loss": 0.8738, "step": 3302 }, { "epoch": 0.7104753710475371, "grad_norm": 0.0, "learning_rate": 4.084445562696412e-06, "loss": 0.8054, "step": 3303 }, { "epoch": 0.7106904710690471, "grad_norm": 0.0, "learning_rate": 4.07882944726558e-06, "loss": 0.8303, "step": 3304 }, { "epoch": 0.7109055710905571, "grad_norm": 0.0, "learning_rate": 4.073216206232253e-06, "loss": 0.806, "step": 3305 }, { "epoch": 0.7111206711120671, "grad_norm": 0.0, "learning_rate": 4.067605842321351e-06, "loss": 0.8685, "step": 3306 }, { "epoch": 0.7113357711335772, "grad_norm": 0.0, "learning_rate": 4.061998358256395e-06, "loss": 0.8806, "step": 3307 }, { "epoch": 0.7115508711550871, "grad_norm": 0.0, "learning_rate": 4.056393756759498e-06, "loss": 0.7702, "step": 3308 }, { "epoch": 0.7117659711765971, "grad_norm": 0.0, "learning_rate": 4.050792040551388e-06, "loss": 0.8558, "step": 3309 }, { "epoch": 0.7119810711981072, "grad_norm": 0.0, "learning_rate": 4.045193212351377e-06, "loss": 0.8164, "step": 3310 }, { "epoch": 0.7121961712196171, "grad_norm": 0.0, "learning_rate": 4.039597274877388e-06, "loss": 0.7863, "step": 3311 }, { "epoch": 0.7124112712411271, "grad_norm": 0.0, "learning_rate": 4.03400423084594e-06, "loss": 0.7666, "step": 3312 }, { "epoch": 0.7126263712626372, "grad_norm": 0.0, "learning_rate": 4.028414082972141e-06, "loss": 0.8864, "step": 3313 }, { "epoch": 0.7128414712841471, "grad_norm": 0.0, "learning_rate": 4.022826833969692e-06, "loss": 0.8624, "step": 3314 }, { "epoch": 0.7130565713056571, "grad_norm": 0.0, "learning_rate": 4.017242486550889e-06, "loss": 0.8247, "step": 3315 }, { "epoch": 0.7132716713271672, "grad_norm": 0.0, "learning_rate": 4.011661043426624e-06, "loss": 0.9008, "step": 3316 }, { "epoch": 0.7134867713486771, "grad_norm": 0.0, "learning_rate": 4.006082507306379e-06, "loss": 0.7618, "step": 3317 }, { "epoch": 0.7137018713701871, "grad_norm": 0.0, "learning_rate": 4.000506880898215e-06, "loss": 0.9087, "step": 3318 }, { "epoch": 0.7139169713916972, "grad_norm": 0.0, "learning_rate": 3.99493416690879e-06, "loss": 0.8943, "step": 3319 }, { "epoch": 0.7141320714132071, "grad_norm": 0.0, "learning_rate": 3.98936436804335e-06, "loss": 0.9004, "step": 3320 }, { "epoch": 0.7143471714347172, "grad_norm": 0.0, "learning_rate": 3.983797487005713e-06, "loss": 0.8558, "step": 3321 }, { "epoch": 0.7145622714562272, "grad_norm": 0.0, "learning_rate": 3.978233526498296e-06, "loss": 0.8216, "step": 3322 }, { "epoch": 0.7147773714777371, "grad_norm": 0.0, "learning_rate": 3.972672489222083e-06, "loss": 0.8233, "step": 3323 }, { "epoch": 0.7149924714992472, "grad_norm": 0.0, "learning_rate": 3.967114377876654e-06, "loss": 0.8813, "step": 3324 }, { "epoch": 0.7152075715207572, "grad_norm": 0.0, "learning_rate": 3.961559195160162e-06, "loss": 0.8895, "step": 3325 }, { "epoch": 0.7154226715422671, "grad_norm": 0.0, "learning_rate": 3.956006943769331e-06, "loss": 0.816, "step": 3326 }, { "epoch": 0.7156377715637772, "grad_norm": 0.0, "learning_rate": 3.950457626399475e-06, "loss": 0.8239, "step": 3327 }, { "epoch": 0.7158528715852872, "grad_norm": 0.0, "learning_rate": 3.944911245744479e-06, "loss": 0.778, "step": 3328 }, { "epoch": 0.7160679716067971, "grad_norm": 0.0, "learning_rate": 3.9393678044967895e-06, "loss": 0.8406, "step": 3329 }, { "epoch": 0.7162830716283072, "grad_norm": 0.0, "learning_rate": 3.933827305347452e-06, "loss": 0.816, "step": 3330 }, { "epoch": 0.7164981716498172, "grad_norm": 0.0, "learning_rate": 3.928289750986059e-06, "loss": 0.8223, "step": 3331 }, { "epoch": 0.7167132716713271, "grad_norm": 0.0, "learning_rate": 3.922755144100786e-06, "loss": 0.811, "step": 3332 }, { "epoch": 0.7169283716928372, "grad_norm": 0.0, "learning_rate": 3.917223487378381e-06, "loss": 0.8512, "step": 3333 }, { "epoch": 0.7171434717143472, "grad_norm": 0.0, "learning_rate": 3.911694783504148e-06, "loss": 0.7962, "step": 3334 }, { "epoch": 0.7173585717358572, "grad_norm": 0.0, "learning_rate": 3.9061690351619675e-06, "loss": 0.8626, "step": 3335 }, { "epoch": 0.7175736717573672, "grad_norm": 0.0, "learning_rate": 3.900646245034277e-06, "loss": 0.837, "step": 3336 }, { "epoch": 0.7177887717788772, "grad_norm": 0.0, "learning_rate": 3.895126415802088e-06, "loss": 0.7966, "step": 3337 }, { "epoch": 0.7180038718003872, "grad_norm": 0.0, "learning_rate": 3.889609550144968e-06, "loss": 0.799, "step": 3338 }, { "epoch": 0.7182189718218972, "grad_norm": 0.0, "learning_rate": 3.8840956507410445e-06, "loss": 0.7719, "step": 3339 }, { "epoch": 0.7184340718434071, "grad_norm": 0.0, "learning_rate": 3.878584720267011e-06, "loss": 0.8472, "step": 3340 }, { "epoch": 0.7186491718649172, "grad_norm": 0.0, "learning_rate": 3.873076761398112e-06, "loss": 0.8382, "step": 3341 }, { "epoch": 0.7188642718864272, "grad_norm": 0.0, "learning_rate": 3.86757177680816e-06, "loss": 0.88, "step": 3342 }, { "epoch": 0.7190793719079371, "grad_norm": 0.0, "learning_rate": 3.862069769169514e-06, "loss": 0.8857, "step": 3343 }, { "epoch": 0.7192944719294472, "grad_norm": 0.0, "learning_rate": 3.856570741153087e-06, "loss": 0.8266, "step": 3344 }, { "epoch": 0.7195095719509572, "grad_norm": 0.0, "learning_rate": 3.851074695428357e-06, "loss": 0.871, "step": 3345 }, { "epoch": 0.7197246719724671, "grad_norm": 0.0, "learning_rate": 3.845581634663342e-06, "loss": 0.7817, "step": 3346 }, { "epoch": 0.7199397719939772, "grad_norm": 0.0, "learning_rate": 3.840091561524618e-06, "loss": 0.8528, "step": 3347 }, { "epoch": 0.7201548720154872, "grad_norm": 0.0, "learning_rate": 3.834604478677313e-06, "loss": 0.7806, "step": 3348 }, { "epoch": 0.7203699720369972, "grad_norm": 0.0, "learning_rate": 3.829120388785091e-06, "loss": 0.8111, "step": 3349 }, { "epoch": 0.7205850720585072, "grad_norm": 0.0, "learning_rate": 3.823639294510176e-06, "loss": 0.8745, "step": 3350 }, { "epoch": 0.7208001720800172, "grad_norm": 0.0, "learning_rate": 3.818161198513337e-06, "loss": 0.8368, "step": 3351 }, { "epoch": 0.7210152721015272, "grad_norm": 0.0, "learning_rate": 3.8126861034538754e-06, "loss": 0.8364, "step": 3352 }, { "epoch": 0.7212303721230372, "grad_norm": 0.0, "learning_rate": 3.8072140119896507e-06, "loss": 0.8579, "step": 3353 }, { "epoch": 0.7214454721445472, "grad_norm": 0.0, "learning_rate": 3.801744926777051e-06, "loss": 0.8299, "step": 3354 }, { "epoch": 0.7216605721660572, "grad_norm": 0.0, "learning_rate": 3.7962788504710203e-06, "loss": 0.8566, "step": 3355 }, { "epoch": 0.7218756721875672, "grad_norm": 0.0, "learning_rate": 3.790815785725024e-06, "loss": 0.7475, "step": 3356 }, { "epoch": 0.7220907722090772, "grad_norm": 0.0, "learning_rate": 3.785355735191082e-06, "loss": 0.8159, "step": 3357 }, { "epoch": 0.7223058722305872, "grad_norm": 0.0, "learning_rate": 3.779898701519741e-06, "loss": 0.7605, "step": 3358 }, { "epoch": 0.7225209722520972, "grad_norm": 0.0, "learning_rate": 3.774444687360083e-06, "loss": 0.8236, "step": 3359 }, { "epoch": 0.7227360722736073, "grad_norm": 0.0, "learning_rate": 3.7689936953597306e-06, "loss": 0.8237, "step": 3360 }, { "epoch": 0.7229511722951172, "grad_norm": 0.0, "learning_rate": 3.763545728164838e-06, "loss": 0.8342, "step": 3361 }, { "epoch": 0.7231662723166272, "grad_norm": 0.0, "learning_rate": 3.7581007884200826e-06, "loss": 0.8037, "step": 3362 }, { "epoch": 0.7233813723381373, "grad_norm": 0.0, "learning_rate": 3.7526588787686835e-06, "loss": 0.8401, "step": 3363 }, { "epoch": 0.7235964723596472, "grad_norm": 0.0, "learning_rate": 3.747220001852384e-06, "loss": 0.8922, "step": 3364 }, { "epoch": 0.7238115723811572, "grad_norm": 0.0, "learning_rate": 3.7417841603114515e-06, "loss": 0.814, "step": 3365 }, { "epoch": 0.7240266724026673, "grad_norm": 0.0, "learning_rate": 3.736351356784689e-06, "loss": 0.8206, "step": 3366 }, { "epoch": 0.7242417724241772, "grad_norm": 0.0, "learning_rate": 3.7309215939094114e-06, "loss": 0.8887, "step": 3367 }, { "epoch": 0.7244568724456872, "grad_norm": 0.0, "learning_rate": 3.7254948743214693e-06, "loss": 0.8598, "step": 3368 }, { "epoch": 0.7246719724671973, "grad_norm": 0.0, "learning_rate": 3.7200712006552354e-06, "loss": 0.789, "step": 3369 }, { "epoch": 0.7248870724887072, "grad_norm": 0.0, "learning_rate": 3.7146505755435967e-06, "loss": 0.8248, "step": 3370 }, { "epoch": 0.7251021725102172, "grad_norm": 0.0, "learning_rate": 3.709233001617961e-06, "loss": 0.7695, "step": 3371 }, { "epoch": 0.7253172725317273, "grad_norm": 0.0, "learning_rate": 3.7038184815082634e-06, "loss": 0.8132, "step": 3372 }, { "epoch": 0.7255323725532372, "grad_norm": 0.0, "learning_rate": 3.6984070178429456e-06, "loss": 0.8293, "step": 3373 }, { "epoch": 0.7257474725747473, "grad_norm": 0.0, "learning_rate": 3.692998613248977e-06, "loss": 0.8438, "step": 3374 }, { "epoch": 0.7259625725962573, "grad_norm": 0.0, "learning_rate": 3.687593270351828e-06, "loss": 0.8776, "step": 3375 }, { "epoch": 0.7261776726177672, "grad_norm": 0.0, "learning_rate": 3.682190991775495e-06, "loss": 0.8213, "step": 3376 }, { "epoch": 0.7263927726392773, "grad_norm": 0.0, "learning_rate": 3.676791780142487e-06, "loss": 0.8083, "step": 3377 }, { "epoch": 0.7266078726607873, "grad_norm": 0.0, "learning_rate": 3.6713956380738115e-06, "loss": 0.8426, "step": 3378 }, { "epoch": 0.7268229726822972, "grad_norm": 0.0, "learning_rate": 3.666002568189e-06, "loss": 0.8066, "step": 3379 }, { "epoch": 0.7270380727038073, "grad_norm": 0.0, "learning_rate": 3.6606125731060815e-06, "loss": 0.8539, "step": 3380 }, { "epoch": 0.7272531727253173, "grad_norm": 0.0, "learning_rate": 3.6552256554416e-06, "loss": 0.8771, "step": 3381 }, { "epoch": 0.7274682727468272, "grad_norm": 0.0, "learning_rate": 3.6498418178106065e-06, "loss": 0.8585, "step": 3382 }, { "epoch": 0.7276833727683373, "grad_norm": 0.0, "learning_rate": 3.6444610628266466e-06, "loss": 0.8217, "step": 3383 }, { "epoch": 0.7278984727898473, "grad_norm": 0.0, "learning_rate": 3.639083393101783e-06, "loss": 0.8084, "step": 3384 }, { "epoch": 0.7281135728113572, "grad_norm": 0.0, "learning_rate": 3.6337088112465725e-06, "loss": 0.8811, "step": 3385 }, { "epoch": 0.7283286728328673, "grad_norm": 0.0, "learning_rate": 3.6283373198700676e-06, "loss": 0.8207, "step": 3386 }, { "epoch": 0.7285437728543773, "grad_norm": 0.0, "learning_rate": 3.6229689215798357e-06, "loss": 0.8738, "step": 3387 }, { "epoch": 0.7287588728758873, "grad_norm": 0.0, "learning_rate": 3.6176036189819273e-06, "loss": 0.8333, "step": 3388 }, { "epoch": 0.7289739728973973, "grad_norm": 0.0, "learning_rate": 3.6122414146809014e-06, "loss": 0.8149, "step": 3389 }, { "epoch": 0.7291890729189073, "grad_norm": 0.0, "learning_rate": 3.6068823112798114e-06, "loss": 0.8419, "step": 3390 }, { "epoch": 0.7294041729404173, "grad_norm": 0.0, "learning_rate": 3.6015263113801945e-06, "loss": 0.8335, "step": 3391 }, { "epoch": 0.7296192729619273, "grad_norm": 0.0, "learning_rate": 3.596173417582097e-06, "loss": 0.8499, "step": 3392 }, { "epoch": 0.7298343729834373, "grad_norm": 0.0, "learning_rate": 3.590823632484045e-06, "loss": 0.6876, "step": 3393 }, { "epoch": 0.7300494730049473, "grad_norm": 0.0, "learning_rate": 3.5854769586830608e-06, "loss": 0.8232, "step": 3394 }, { "epoch": 0.7302645730264573, "grad_norm": 0.0, "learning_rate": 3.5801333987746612e-06, "loss": 0.8097, "step": 3395 }, { "epoch": 0.7304796730479673, "grad_norm": 0.0, "learning_rate": 3.5747929553528384e-06, "loss": 0.847, "step": 3396 }, { "epoch": 0.7306947730694773, "grad_norm": 0.0, "learning_rate": 3.569455631010086e-06, "loss": 0.8704, "step": 3397 }, { "epoch": 0.7309098730909873, "grad_norm": 0.0, "learning_rate": 3.564121428337369e-06, "loss": 0.8367, "step": 3398 }, { "epoch": 0.7311249731124974, "grad_norm": 0.0, "learning_rate": 3.5587903499241538e-06, "loss": 0.7935, "step": 3399 }, { "epoch": 0.7313400731340073, "grad_norm": 0.0, "learning_rate": 3.5534623983583772e-06, "loss": 0.8496, "step": 3400 }, { "epoch": 0.7315551731555173, "grad_norm": 0.0, "learning_rate": 3.548137576226458e-06, "loss": 0.8425, "step": 3401 }, { "epoch": 0.7317702731770274, "grad_norm": 0.0, "learning_rate": 3.5428158861133055e-06, "loss": 0.7775, "step": 3402 }, { "epoch": 0.7319853731985373, "grad_norm": 0.0, "learning_rate": 3.5374973306022987e-06, "loss": 0.8436, "step": 3403 }, { "epoch": 0.7322004732200473, "grad_norm": 0.0, "learning_rate": 3.532181912275301e-06, "loss": 0.8115, "step": 3404 }, { "epoch": 0.7324155732415574, "grad_norm": 0.0, "learning_rate": 3.5268696337126553e-06, "loss": 0.8104, "step": 3405 }, { "epoch": 0.7326306732630673, "grad_norm": 0.0, "learning_rate": 3.5215604974931685e-06, "loss": 0.8203, "step": 3406 }, { "epoch": 0.7328457732845773, "grad_norm": 0.0, "learning_rate": 3.5162545061941332e-06, "loss": 0.8042, "step": 3407 }, { "epoch": 0.7330608733060874, "grad_norm": 0.0, "learning_rate": 3.510951662391314e-06, "loss": 0.7958, "step": 3408 }, { "epoch": 0.7332759733275973, "grad_norm": 0.0, "learning_rate": 3.5056519686589417e-06, "loss": 0.8623, "step": 3409 }, { "epoch": 0.7334910733491073, "grad_norm": 0.0, "learning_rate": 3.5003554275697247e-06, "loss": 0.8751, "step": 3410 }, { "epoch": 0.7337061733706174, "grad_norm": 0.0, "learning_rate": 3.4950620416948343e-06, "loss": 0.839, "step": 3411 }, { "epoch": 0.7339212733921273, "grad_norm": 0.0, "learning_rate": 3.4897718136039148e-06, "loss": 0.8098, "step": 3412 }, { "epoch": 0.7341363734136374, "grad_norm": 0.0, "learning_rate": 3.4844847458650797e-06, "loss": 0.8747, "step": 3413 }, { "epoch": 0.7343514734351474, "grad_norm": 0.0, "learning_rate": 3.479200841044903e-06, "loss": 0.8372, "step": 3414 }, { "epoch": 0.7345665734566573, "grad_norm": 0.0, "learning_rate": 3.4739201017084243e-06, "loss": 0.7872, "step": 3415 }, { "epoch": 0.7347816734781674, "grad_norm": 0.0, "learning_rate": 3.468642530419144e-06, "loss": 0.8288, "step": 3416 }, { "epoch": 0.7349967734996774, "grad_norm": 0.0, "learning_rate": 3.4633681297390333e-06, "loss": 0.8468, "step": 3417 }, { "epoch": 0.7352118735211873, "grad_norm": 0.0, "learning_rate": 3.4580969022285193e-06, "loss": 0.7515, "step": 3418 }, { "epoch": 0.7354269735426974, "grad_norm": 0.0, "learning_rate": 3.4528288504464847e-06, "loss": 0.8337, "step": 3419 }, { "epoch": 0.7356420735642074, "grad_norm": 0.0, "learning_rate": 3.4475639769502765e-06, "loss": 0.8437, "step": 3420 }, { "epoch": 0.7358571735857173, "grad_norm": 0.0, "learning_rate": 3.4423022842957e-06, "loss": 0.8372, "step": 3421 }, { "epoch": 0.7360722736072274, "grad_norm": 0.0, "learning_rate": 3.437043775037007e-06, "loss": 0.7745, "step": 3422 }, { "epoch": 0.7362873736287374, "grad_norm": 0.0, "learning_rate": 3.431788451726914e-06, "loss": 0.7843, "step": 3423 }, { "epoch": 0.7365024736502473, "grad_norm": 0.0, "learning_rate": 3.4265363169165843e-06, "loss": 0.8468, "step": 3424 }, { "epoch": 0.7367175736717574, "grad_norm": 0.0, "learning_rate": 3.4212873731556364e-06, "loss": 0.782, "step": 3425 }, { "epoch": 0.7369326736932673, "grad_norm": 0.0, "learning_rate": 3.4160416229921432e-06, "loss": 0.8175, "step": 3426 }, { "epoch": 0.7371477737147774, "grad_norm": 0.0, "learning_rate": 3.4107990689726166e-06, "loss": 0.859, "step": 3427 }, { "epoch": 0.7373628737362874, "grad_norm": 0.0, "learning_rate": 3.4055597136420314e-06, "loss": 0.8347, "step": 3428 }, { "epoch": 0.7375779737577973, "grad_norm": 0.0, "learning_rate": 3.4003235595437976e-06, "loss": 0.8341, "step": 3429 }, { "epoch": 0.7377930737793074, "grad_norm": 0.0, "learning_rate": 3.395090609219772e-06, "loss": 0.8031, "step": 3430 }, { "epoch": 0.7380081738008174, "grad_norm": 0.0, "learning_rate": 3.389860865210267e-06, "loss": 0.8352, "step": 3431 }, { "epoch": 0.7382232738223273, "grad_norm": 0.0, "learning_rate": 3.384634330054024e-06, "loss": 0.8204, "step": 3432 }, { "epoch": 0.7384383738438374, "grad_norm": 0.0, "learning_rate": 3.379411006288237e-06, "loss": 0.8, "step": 3433 }, { "epoch": 0.7386534738653474, "grad_norm": 0.0, "learning_rate": 3.3741908964485415e-06, "loss": 0.9057, "step": 3434 }, { "epoch": 0.7388685738868573, "grad_norm": 0.0, "learning_rate": 3.368974003069003e-06, "loss": 0.8363, "step": 3435 }, { "epoch": 0.7390836739083674, "grad_norm": 0.0, "learning_rate": 3.3637603286821376e-06, "loss": 0.849, "step": 3436 }, { "epoch": 0.7392987739298774, "grad_norm": 0.0, "learning_rate": 3.358549875818887e-06, "loss": 0.8682, "step": 3437 }, { "epoch": 0.7395138739513873, "grad_norm": 0.0, "learning_rate": 3.3533426470086384e-06, "loss": 0.8456, "step": 3438 }, { "epoch": 0.7397289739728974, "grad_norm": 0.0, "learning_rate": 3.348138644779212e-06, "loss": 0.79, "step": 3439 }, { "epoch": 0.7399440739944074, "grad_norm": 0.0, "learning_rate": 3.3429378716568563e-06, "loss": 0.8464, "step": 3440 }, { "epoch": 0.7401591740159174, "grad_norm": 0.0, "learning_rate": 3.3377403301662605e-06, "loss": 0.7899, "step": 3441 }, { "epoch": 0.7403742740374274, "grad_norm": 0.0, "learning_rate": 3.332546022830535e-06, "loss": 0.7862, "step": 3442 }, { "epoch": 0.7405893740589374, "grad_norm": 0.0, "learning_rate": 3.327354952171232e-06, "loss": 0.7663, "step": 3443 }, { "epoch": 0.7408044740804474, "grad_norm": 0.0, "learning_rate": 3.322167120708324e-06, "loss": 0.8582, "step": 3444 }, { "epoch": 0.7410195741019574, "grad_norm": 0.0, "learning_rate": 3.316982530960209e-06, "loss": 0.772, "step": 3445 }, { "epoch": 0.7412346741234674, "grad_norm": 0.0, "learning_rate": 3.3118011854437226e-06, "loss": 0.7947, "step": 3446 }, { "epoch": 0.7414497741449774, "grad_norm": 0.0, "learning_rate": 3.3066230866741124e-06, "loss": 0.8254, "step": 3447 }, { "epoch": 0.7416648741664874, "grad_norm": 0.0, "learning_rate": 3.301448237165058e-06, "loss": 0.7802, "step": 3448 }, { "epoch": 0.7418799741879974, "grad_norm": 0.0, "learning_rate": 3.296276639428665e-06, "loss": 0.8157, "step": 3449 }, { "epoch": 0.7420950742095074, "grad_norm": 0.0, "learning_rate": 3.2911082959754472e-06, "loss": 0.8011, "step": 3450 }, { "epoch": 0.7423101742310174, "grad_norm": 0.0, "learning_rate": 3.2859432093143497e-06, "loss": 0.8719, "step": 3451 }, { "epoch": 0.7425252742525275, "grad_norm": 0.0, "learning_rate": 3.2807813819527377e-06, "loss": 0.8872, "step": 3452 }, { "epoch": 0.7427403742740374, "grad_norm": 0.0, "learning_rate": 3.2756228163963845e-06, "loss": 0.84, "step": 3453 }, { "epoch": 0.7429554742955474, "grad_norm": 0.0, "learning_rate": 3.2704675151494893e-06, "loss": 0.8048, "step": 3454 }, { "epoch": 0.7431705743170575, "grad_norm": 0.0, "learning_rate": 3.265315480714657e-06, "loss": 0.7626, "step": 3455 }, { "epoch": 0.7433856743385674, "grad_norm": 0.0, "learning_rate": 3.2601667155929207e-06, "loss": 0.9071, "step": 3456 }, { "epoch": 0.7436007743600774, "grad_norm": 0.0, "learning_rate": 3.2550212222837106e-06, "loss": 0.8086, "step": 3457 }, { "epoch": 0.7438158743815875, "grad_norm": 0.0, "learning_rate": 3.2498790032848836e-06, "loss": 0.9005, "step": 3458 }, { "epoch": 0.7440309744030974, "grad_norm": 0.0, "learning_rate": 3.244740061092695e-06, "loss": 0.7694, "step": 3459 }, { "epoch": 0.7442460744246074, "grad_norm": 0.0, "learning_rate": 3.239604398201811e-06, "loss": 0.8148, "step": 3460 }, { "epoch": 0.7444611744461175, "grad_norm": 0.0, "learning_rate": 3.2344720171053134e-06, "loss": 0.7759, "step": 3461 }, { "epoch": 0.7446762744676274, "grad_norm": 0.0, "learning_rate": 3.2293429202946904e-06, "loss": 0.8366, "step": 3462 }, { "epoch": 0.7448913744891374, "grad_norm": 0.0, "learning_rate": 3.224217110259823e-06, "loss": 0.8395, "step": 3463 }, { "epoch": 0.7451064745106475, "grad_norm": 0.0, "learning_rate": 3.2190945894890103e-06, "loss": 0.7455, "step": 3464 }, { "epoch": 0.7453215745321574, "grad_norm": 0.0, "learning_rate": 3.213975360468954e-06, "loss": 0.7668, "step": 3465 }, { "epoch": 0.7455366745536675, "grad_norm": 0.0, "learning_rate": 3.208859425684745e-06, "loss": 0.8569, "step": 3466 }, { "epoch": 0.7457517745751775, "grad_norm": 0.0, "learning_rate": 3.2037467876198925e-06, "loss": 0.8355, "step": 3467 }, { "epoch": 0.7459668745966874, "grad_norm": 0.0, "learning_rate": 3.1986374487562887e-06, "loss": 0.7813, "step": 3468 }, { "epoch": 0.7461819746181975, "grad_norm": 0.0, "learning_rate": 3.1935314115742376e-06, "loss": 0.8324, "step": 3469 }, { "epoch": 0.7463970746397075, "grad_norm": 0.0, "learning_rate": 3.1884286785524356e-06, "loss": 0.825, "step": 3470 }, { "epoch": 0.7466121746612174, "grad_norm": 0.0, "learning_rate": 3.1833292521679725e-06, "loss": 0.8461, "step": 3471 }, { "epoch": 0.7468272746827275, "grad_norm": 0.0, "learning_rate": 3.178233134896331e-06, "loss": 0.7733, "step": 3472 }, { "epoch": 0.7470423747042375, "grad_norm": 0.0, "learning_rate": 3.1731403292114006e-06, "loss": 0.8148, "step": 3473 }, { "epoch": 0.7472574747257474, "grad_norm": 0.0, "learning_rate": 3.1680508375854434e-06, "loss": 0.8552, "step": 3474 }, { "epoch": 0.7474725747472575, "grad_norm": 0.0, "learning_rate": 3.162964662489132e-06, "loss": 0.7629, "step": 3475 }, { "epoch": 0.7476876747687675, "grad_norm": 0.0, "learning_rate": 3.1578818063915128e-06, "loss": 0.8359, "step": 3476 }, { "epoch": 0.7479027747902774, "grad_norm": 0.0, "learning_rate": 3.1528022717600338e-06, "loss": 0.7786, "step": 3477 }, { "epoch": 0.7481178748117875, "grad_norm": 0.0, "learning_rate": 3.1477260610605255e-06, "loss": 0.8366, "step": 3478 }, { "epoch": 0.7483329748332975, "grad_norm": 0.0, "learning_rate": 3.1426531767572e-06, "loss": 0.873, "step": 3479 }, { "epoch": 0.7485480748548075, "grad_norm": 0.0, "learning_rate": 3.1375836213126653e-06, "loss": 0.8455, "step": 3480 }, { "epoch": 0.7487631748763175, "grad_norm": 0.0, "learning_rate": 3.132517397187902e-06, "loss": 0.866, "step": 3481 }, { "epoch": 0.7489782748978275, "grad_norm": 0.0, "learning_rate": 3.1274545068422803e-06, "loss": 0.8536, "step": 3482 }, { "epoch": 0.7491933749193375, "grad_norm": 0.0, "learning_rate": 3.122394952733554e-06, "loss": 0.8552, "step": 3483 }, { "epoch": 0.7494084749408475, "grad_norm": 0.0, "learning_rate": 3.1173387373178487e-06, "loss": 0.9048, "step": 3484 }, { "epoch": 0.7496235749623575, "grad_norm": 0.0, "learning_rate": 3.112285863049681e-06, "loss": 0.8447, "step": 3485 }, { "epoch": 0.7498386749838675, "grad_norm": 0.0, "learning_rate": 3.1072363323819345e-06, "loss": 0.8221, "step": 3486 }, { "epoch": 0.7500537750053775, "grad_norm": 0.0, "learning_rate": 3.102190147765871e-06, "loss": 0.8438, "step": 3487 }, { "epoch": 0.7502688750268875, "grad_norm": 0.0, "learning_rate": 3.09714731165114e-06, "loss": 0.8687, "step": 3488 }, { "epoch": 0.7504839750483975, "grad_norm": 0.0, "learning_rate": 3.0921078264857474e-06, "loss": 0.7132, "step": 3489 }, { "epoch": 0.7506990750699075, "grad_norm": 0.0, "learning_rate": 3.0870716947160872e-06, "loss": 0.8303, "step": 3490 }, { "epoch": 0.7509141750914176, "grad_norm": 0.0, "learning_rate": 3.0820389187869214e-06, "loss": 0.8486, "step": 3491 }, { "epoch": 0.7511292751129275, "grad_norm": 0.0, "learning_rate": 3.0770095011413757e-06, "loss": 0.8279, "step": 3492 }, { "epoch": 0.7513443751344375, "grad_norm": 0.0, "learning_rate": 3.0719834442209596e-06, "loss": 0.9094, "step": 3493 }, { "epoch": 0.7515594751559476, "grad_norm": 0.0, "learning_rate": 3.066960750465533e-06, "loss": 0.8591, "step": 3494 }, { "epoch": 0.7517745751774575, "grad_norm": 0.0, "learning_rate": 3.061941422313339e-06, "loss": 0.8609, "step": 3495 }, { "epoch": 0.7519896751989675, "grad_norm": 0.0, "learning_rate": 3.0569254622009846e-06, "loss": 0.8767, "step": 3496 }, { "epoch": 0.7522047752204776, "grad_norm": 0.0, "learning_rate": 3.0519128725634294e-06, "loss": 0.8506, "step": 3497 }, { "epoch": 0.7524198752419875, "grad_norm": 0.0, "learning_rate": 3.0469036558340147e-06, "loss": 0.7673, "step": 3498 }, { "epoch": 0.7526349752634975, "grad_norm": 0.0, "learning_rate": 3.0418978144444267e-06, "loss": 0.8089, "step": 3499 }, { "epoch": 0.7528500752850076, "grad_norm": 0.0, "learning_rate": 3.036895350824729e-06, "loss": 0.7183, "step": 3500 }, { "epoch": 0.7530651753065175, "grad_norm": 0.0, "learning_rate": 3.0318962674033358e-06, "loss": 0.801, "step": 3501 }, { "epoch": 0.7532802753280275, "grad_norm": 0.0, "learning_rate": 3.0269005666070185e-06, "loss": 0.8857, "step": 3502 }, { "epoch": 0.7534953753495376, "grad_norm": 0.0, "learning_rate": 3.021908250860918e-06, "loss": 0.8514, "step": 3503 }, { "epoch": 0.7537104753710475, "grad_norm": 0.0, "learning_rate": 3.0169193225885162e-06, "loss": 0.874, "step": 3504 }, { "epoch": 0.7539255753925576, "grad_norm": 0.0, "learning_rate": 3.011933784211666e-06, "loss": 0.8266, "step": 3505 }, { "epoch": 0.7541406754140676, "grad_norm": 0.0, "learning_rate": 3.006951638150567e-06, "loss": 0.853, "step": 3506 }, { "epoch": 0.7543557754355775, "grad_norm": 0.0, "learning_rate": 3.0019728868237695e-06, "loss": 0.8087, "step": 3507 }, { "epoch": 0.7545708754570876, "grad_norm": 0.0, "learning_rate": 2.9969975326481816e-06, "loss": 0.8405, "step": 3508 }, { "epoch": 0.7547859754785975, "grad_norm": 0.0, "learning_rate": 2.9920255780390615e-06, "loss": 0.8382, "step": 3509 }, { "epoch": 0.7550010755001075, "grad_norm": 0.0, "learning_rate": 2.9870570254100097e-06, "loss": 0.8585, "step": 3510 }, { "epoch": 0.7552161755216176, "grad_norm": 0.0, "learning_rate": 2.9820918771729877e-06, "loss": 0.8345, "step": 3511 }, { "epoch": 0.7554312755431275, "grad_norm": 0.0, "learning_rate": 2.977130135738292e-06, "loss": 0.836, "step": 3512 }, { "epoch": 0.7556463755646375, "grad_norm": 0.0, "learning_rate": 2.972171803514573e-06, "loss": 0.8135, "step": 3513 }, { "epoch": 0.7558614755861476, "grad_norm": 0.0, "learning_rate": 2.9672168829088267e-06, "loss": 0.7953, "step": 3514 }, { "epoch": 0.7560765756076575, "grad_norm": 0.0, "learning_rate": 2.9622653763263874e-06, "loss": 0.7773, "step": 3515 }, { "epoch": 0.7562916756291675, "grad_norm": 0.0, "learning_rate": 2.9573172861709366e-06, "loss": 0.8097, "step": 3516 }, { "epoch": 0.7565067756506776, "grad_norm": 0.0, "learning_rate": 2.9523726148444896e-06, "loss": 0.8094, "step": 3517 }, { "epoch": 0.7567218756721875, "grad_norm": 0.0, "learning_rate": 2.947431364747414e-06, "loss": 0.8642, "step": 3518 }, { "epoch": 0.7569369756936976, "grad_norm": 0.0, "learning_rate": 2.9424935382784115e-06, "loss": 0.8092, "step": 3519 }, { "epoch": 0.7571520757152076, "grad_norm": 0.0, "learning_rate": 2.937559137834517e-06, "loss": 0.878, "step": 3520 }, { "epoch": 0.7573671757367175, "grad_norm": 0.0, "learning_rate": 2.9326281658111066e-06, "loss": 0.812, "step": 3521 }, { "epoch": 0.7575822757582276, "grad_norm": 0.0, "learning_rate": 2.927700624601898e-06, "loss": 0.8289, "step": 3522 }, { "epoch": 0.7577973757797376, "grad_norm": 0.0, "learning_rate": 2.9227765165989287e-06, "loss": 0.862, "step": 3523 }, { "epoch": 0.7580124758012475, "grad_norm": 0.0, "learning_rate": 2.9178558441925843e-06, "loss": 0.8011, "step": 3524 }, { "epoch": 0.7582275758227576, "grad_norm": 0.0, "learning_rate": 2.912938609771572e-06, "loss": 0.8331, "step": 3525 }, { "epoch": 0.7584426758442676, "grad_norm": 0.0, "learning_rate": 2.9080248157229353e-06, "loss": 0.8612, "step": 3526 }, { "epoch": 0.7586577758657775, "grad_norm": 0.0, "learning_rate": 2.903114464432051e-06, "loss": 0.808, "step": 3527 }, { "epoch": 0.7588728758872876, "grad_norm": 0.0, "learning_rate": 2.898207558282613e-06, "loss": 0.8355, "step": 3528 }, { "epoch": 0.7590879759087976, "grad_norm": 0.0, "learning_rate": 2.8933040996566585e-06, "loss": 0.8005, "step": 3529 }, { "epoch": 0.7593030759303075, "grad_norm": 0.0, "learning_rate": 2.8884040909345378e-06, "loss": 0.8613, "step": 3530 }, { "epoch": 0.7595181759518176, "grad_norm": 0.0, "learning_rate": 2.8835075344949294e-06, "loss": 0.8992, "step": 3531 }, { "epoch": 0.7597332759733276, "grad_norm": 0.0, "learning_rate": 2.8786144327148426e-06, "loss": 0.8036, "step": 3532 }, { "epoch": 0.7599483759948376, "grad_norm": 0.0, "learning_rate": 2.8737247879696007e-06, "loss": 0.7626, "step": 3533 }, { "epoch": 0.7601634760163476, "grad_norm": 0.0, "learning_rate": 2.868838602632854e-06, "loss": 0.8128, "step": 3534 }, { "epoch": 0.7603785760378576, "grad_norm": 0.0, "learning_rate": 2.8639558790765775e-06, "loss": 0.7767, "step": 3535 }, { "epoch": 0.7605936760593676, "grad_norm": 0.0, "learning_rate": 2.859076619671053e-06, "loss": 0.8617, "step": 3536 }, { "epoch": 0.7608087760808776, "grad_norm": 0.0, "learning_rate": 2.8542008267848963e-06, "loss": 0.781, "step": 3537 }, { "epoch": 0.7610238761023876, "grad_norm": 0.0, "learning_rate": 2.849328502785025e-06, "loss": 0.8324, "step": 3538 }, { "epoch": 0.7612389761238976, "grad_norm": 0.0, "learning_rate": 2.844459650036683e-06, "loss": 0.8358, "step": 3539 }, { "epoch": 0.7614540761454076, "grad_norm": 0.0, "learning_rate": 2.8395942709034297e-06, "loss": 0.8146, "step": 3540 }, { "epoch": 0.7616691761669176, "grad_norm": 0.0, "learning_rate": 2.8347323677471296e-06, "loss": 0.8573, "step": 3541 }, { "epoch": 0.7618842761884276, "grad_norm": 0.0, "learning_rate": 2.829873942927971e-06, "loss": 0.8516, "step": 3542 }, { "epoch": 0.7620993762099376, "grad_norm": 0.0, "learning_rate": 2.8250189988044396e-06, "loss": 0.8017, "step": 3543 }, { "epoch": 0.7623144762314477, "grad_norm": 0.0, "learning_rate": 2.820167537733348e-06, "loss": 0.7607, "step": 3544 }, { "epoch": 0.7625295762529576, "grad_norm": 0.0, "learning_rate": 2.815319562069806e-06, "loss": 0.7945, "step": 3545 }, { "epoch": 0.7627446762744676, "grad_norm": 0.0, "learning_rate": 2.8104750741672314e-06, "loss": 0.7912, "step": 3546 }, { "epoch": 0.7629597762959777, "grad_norm": 0.0, "learning_rate": 2.805634076377356e-06, "loss": 0.8817, "step": 3547 }, { "epoch": 0.7631748763174876, "grad_norm": 0.0, "learning_rate": 2.8007965710502162e-06, "loss": 0.8237, "step": 3548 }, { "epoch": 0.7633899763389976, "grad_norm": 0.0, "learning_rate": 2.7959625605341446e-06, "loss": 0.8073, "step": 3549 }, { "epoch": 0.7636050763605077, "grad_norm": 0.0, "learning_rate": 2.7911320471757887e-06, "loss": 0.8401, "step": 3550 }, { "epoch": 0.7638201763820176, "grad_norm": 0.0, "learning_rate": 2.7863050333200894e-06, "loss": 0.8643, "step": 3551 }, { "epoch": 0.7640352764035276, "grad_norm": 0.0, "learning_rate": 2.781481521310292e-06, "loss": 0.8028, "step": 3552 }, { "epoch": 0.7642503764250377, "grad_norm": 0.0, "learning_rate": 2.776661513487947e-06, "loss": 0.8299, "step": 3553 }, { "epoch": 0.7644654764465476, "grad_norm": 0.0, "learning_rate": 2.771845012192892e-06, "loss": 0.797, "step": 3554 }, { "epoch": 0.7646805764680576, "grad_norm": 0.0, "learning_rate": 2.767032019763277e-06, "loss": 0.7717, "step": 3555 }, { "epoch": 0.7648956764895677, "grad_norm": 0.0, "learning_rate": 2.762222538535533e-06, "loss": 0.7942, "step": 3556 }, { "epoch": 0.7651107765110776, "grad_norm": 0.0, "learning_rate": 2.757416570844401e-06, "loss": 0.8323, "step": 3557 }, { "epoch": 0.7653258765325877, "grad_norm": 0.0, "learning_rate": 2.752614119022904e-06, "loss": 0.852, "step": 3558 }, { "epoch": 0.7655409765540977, "grad_norm": 0.0, "learning_rate": 2.7478151854023704e-06, "loss": 0.8417, "step": 3559 }, { "epoch": 0.7657560765756076, "grad_norm": 0.0, "learning_rate": 2.743019772312412e-06, "loss": 0.8497, "step": 3560 }, { "epoch": 0.7659711765971177, "grad_norm": 0.0, "learning_rate": 2.738227882080928e-06, "loss": 0.8251, "step": 3561 }, { "epoch": 0.7661862766186277, "grad_norm": 0.0, "learning_rate": 2.73343951703412e-06, "loss": 0.7889, "step": 3562 }, { "epoch": 0.7664013766401376, "grad_norm": 0.0, "learning_rate": 2.728654679496473e-06, "loss": 0.8425, "step": 3563 }, { "epoch": 0.7666164766616477, "grad_norm": 0.0, "learning_rate": 2.7238733717907518e-06, "loss": 0.7793, "step": 3564 }, { "epoch": 0.7668315766831577, "grad_norm": 0.0, "learning_rate": 2.7190955962380184e-06, "loss": 0.7888, "step": 3565 }, { "epoch": 0.7670466767046676, "grad_norm": 0.0, "learning_rate": 2.7143213551576177e-06, "loss": 0.7984, "step": 3566 }, { "epoch": 0.7672617767261777, "grad_norm": 0.0, "learning_rate": 2.709550650867172e-06, "loss": 0.8385, "step": 3567 }, { "epoch": 0.7674768767476877, "grad_norm": 0.0, "learning_rate": 2.7047834856825972e-06, "loss": 0.7361, "step": 3568 }, { "epoch": 0.7676919767691976, "grad_norm": 0.0, "learning_rate": 2.7000198619180795e-06, "loss": 0.7814, "step": 3569 }, { "epoch": 0.7679070767907077, "grad_norm": 0.0, "learning_rate": 2.695259781886096e-06, "loss": 0.8246, "step": 3570 }, { "epoch": 0.7681221768122177, "grad_norm": 0.0, "learning_rate": 2.6905032478974025e-06, "loss": 0.7805, "step": 3571 }, { "epoch": 0.7683372768337277, "grad_norm": 0.0, "learning_rate": 2.6857502622610267e-06, "loss": 0.7737, "step": 3572 }, { "epoch": 0.7685523768552377, "grad_norm": 0.0, "learning_rate": 2.681000827284275e-06, "loss": 0.7525, "step": 3573 }, { "epoch": 0.7687674768767477, "grad_norm": 0.0, "learning_rate": 2.6762549452727383e-06, "loss": 0.8155, "step": 3574 }, { "epoch": 0.7689825768982577, "grad_norm": 0.0, "learning_rate": 2.671512618530272e-06, "loss": 0.8516, "step": 3575 }, { "epoch": 0.7691976769197677, "grad_norm": 0.0, "learning_rate": 2.6667738493590165e-06, "loss": 0.8182, "step": 3576 }, { "epoch": 0.7694127769412777, "grad_norm": 0.0, "learning_rate": 2.6620386400593733e-06, "loss": 0.8184, "step": 3577 }, { "epoch": 0.7696278769627877, "grad_norm": 0.0, "learning_rate": 2.6573069929300243e-06, "loss": 0.8455, "step": 3578 }, { "epoch": 0.7698429769842977, "grad_norm": 0.0, "learning_rate": 2.6525789102679256e-06, "loss": 0.76, "step": 3579 }, { "epoch": 0.7700580770058078, "grad_norm": 0.0, "learning_rate": 2.647854394368288e-06, "loss": 0.7416, "step": 3580 }, { "epoch": 0.7702731770273177, "grad_norm": 0.0, "learning_rate": 2.6431334475246074e-06, "loss": 0.7753, "step": 3581 }, { "epoch": 0.7704882770488277, "grad_norm": 0.0, "learning_rate": 2.6384160720286334e-06, "loss": 0.7652, "step": 3582 }, { "epoch": 0.7707033770703378, "grad_norm": 0.0, "learning_rate": 2.6337022701703907e-06, "loss": 0.8328, "step": 3583 }, { "epoch": 0.7709184770918477, "grad_norm": 0.0, "learning_rate": 2.628992044238172e-06, "loss": 0.831, "step": 3584 }, { "epoch": 0.7711335771133577, "grad_norm": 0.0, "learning_rate": 2.624285396518521e-06, "loss": 0.8608, "step": 3585 }, { "epoch": 0.7713486771348678, "grad_norm": 0.0, "learning_rate": 2.6195823292962587e-06, "loss": 0.8395, "step": 3586 }, { "epoch": 0.7715637771563777, "grad_norm": 0.0, "learning_rate": 2.6148828448544574e-06, "loss": 0.7714, "step": 3587 }, { "epoch": 0.7717788771778877, "grad_norm": 0.0, "learning_rate": 2.610186945474453e-06, "loss": 0.812, "step": 3588 }, { "epoch": 0.7719939771993978, "grad_norm": 0.0, "learning_rate": 2.6054946334358478e-06, "loss": 0.8331, "step": 3589 }, { "epoch": 0.7722090772209077, "grad_norm": 0.0, "learning_rate": 2.6008059110164906e-06, "loss": 0.8208, "step": 3590 }, { "epoch": 0.7724241772424177, "grad_norm": 0.0, "learning_rate": 2.596120780492497e-06, "loss": 0.7524, "step": 3591 }, { "epoch": 0.7726392772639278, "grad_norm": 0.0, "learning_rate": 2.5914392441382408e-06, "loss": 0.7573, "step": 3592 }, { "epoch": 0.7728543772854377, "grad_norm": 0.0, "learning_rate": 2.5867613042263384e-06, "loss": 0.88, "step": 3593 }, { "epoch": 0.7730694773069477, "grad_norm": 0.0, "learning_rate": 2.5820869630276747e-06, "loss": 0.8658, "step": 3594 }, { "epoch": 0.7732845773284577, "grad_norm": 0.0, "learning_rate": 2.5774162228113765e-06, "loss": 0.8739, "step": 3595 }, { "epoch": 0.7734996773499677, "grad_norm": 0.0, "learning_rate": 2.572749085844829e-06, "loss": 0.8657, "step": 3596 }, { "epoch": 0.7737147773714778, "grad_norm": 0.0, "learning_rate": 2.568085554393669e-06, "loss": 0.8226, "step": 3597 }, { "epoch": 0.7739298773929877, "grad_norm": 0.0, "learning_rate": 2.5634256307217766e-06, "loss": 0.814, "step": 3598 }, { "epoch": 0.7741449774144977, "grad_norm": 0.0, "learning_rate": 2.5587693170912876e-06, "loss": 0.7922, "step": 3599 }, { "epoch": 0.7743600774360078, "grad_norm": 0.0, "learning_rate": 2.5541166157625783e-06, "loss": 0.7878, "step": 3600 }, { "epoch": 0.7745751774575177, "grad_norm": 0.0, "learning_rate": 2.5494675289942806e-06, "loss": 0.8881, "step": 3601 }, { "epoch": 0.7747902774790277, "grad_norm": 0.0, "learning_rate": 2.5448220590432604e-06, "loss": 0.8459, "step": 3602 }, { "epoch": 0.7750053775005378, "grad_norm": 0.0, "learning_rate": 2.54018020816464e-06, "loss": 0.8535, "step": 3603 }, { "epoch": 0.7752204775220477, "grad_norm": 0.0, "learning_rate": 2.535541978611775e-06, "loss": 0.8069, "step": 3604 }, { "epoch": 0.7754355775435577, "grad_norm": 0.0, "learning_rate": 2.5309073726362656e-06, "loss": 0.8135, "step": 3605 }, { "epoch": 0.7756506775650678, "grad_norm": 0.0, "learning_rate": 2.5262763924879543e-06, "loss": 0.8008, "step": 3606 }, { "epoch": 0.7758657775865777, "grad_norm": 0.0, "learning_rate": 2.5216490404149286e-06, "loss": 0.8362, "step": 3607 }, { "epoch": 0.7760808776080877, "grad_norm": 0.0, "learning_rate": 2.517025318663502e-06, "loss": 0.8018, "step": 3608 }, { "epoch": 0.7762959776295978, "grad_norm": 0.0, "learning_rate": 2.5124052294782353e-06, "loss": 0.8111, "step": 3609 }, { "epoch": 0.7765110776511077, "grad_norm": 0.0, "learning_rate": 2.5077887751019294e-06, "loss": 0.8349, "step": 3610 }, { "epoch": 0.7767261776726178, "grad_norm": 0.0, "learning_rate": 2.5031759577756077e-06, "loss": 0.8328, "step": 3611 }, { "epoch": 0.7769412776941278, "grad_norm": 0.0, "learning_rate": 2.4985667797385403e-06, "loss": 0.84, "step": 3612 }, { "epoch": 0.7771563777156377, "grad_norm": 0.0, "learning_rate": 2.493961243228221e-06, "loss": 0.7584, "step": 3613 }, { "epoch": 0.7773714777371478, "grad_norm": 0.0, "learning_rate": 2.4893593504803824e-06, "loss": 0.7885, "step": 3614 }, { "epoch": 0.7775865777586578, "grad_norm": 0.0, "learning_rate": 2.484761103728991e-06, "loss": 0.8381, "step": 3615 }, { "epoch": 0.7778016777801677, "grad_norm": 0.0, "learning_rate": 2.4801665052062342e-06, "loss": 0.7956, "step": 3616 }, { "epoch": 0.7780167778016778, "grad_norm": 0.0, "learning_rate": 2.47557555714253e-06, "loss": 0.8226, "step": 3617 }, { "epoch": 0.7782318778231878, "grad_norm": 0.0, "learning_rate": 2.4709882617665337e-06, "loss": 0.8299, "step": 3618 }, { "epoch": 0.7784469778446977, "grad_norm": 0.0, "learning_rate": 2.466404621305115e-06, "loss": 0.868, "step": 3619 }, { "epoch": 0.7786620778662078, "grad_norm": 0.0, "learning_rate": 2.461824637983381e-06, "loss": 0.8727, "step": 3620 }, { "epoch": 0.7788771778877178, "grad_norm": 0.0, "learning_rate": 2.4572483140246517e-06, "loss": 0.8722, "step": 3621 }, { "epoch": 0.7790922779092277, "grad_norm": 0.0, "learning_rate": 2.452675651650479e-06, "loss": 0.8033, "step": 3622 }, { "epoch": 0.7793073779307378, "grad_norm": 0.0, "learning_rate": 2.44810665308064e-06, "loss": 0.7941, "step": 3623 }, { "epoch": 0.7795224779522478, "grad_norm": 0.0, "learning_rate": 2.4435413205331204e-06, "loss": 0.8158, "step": 3624 }, { "epoch": 0.7797375779737578, "grad_norm": 0.0, "learning_rate": 2.4389796562241408e-06, "loss": 0.8143, "step": 3625 }, { "epoch": 0.7799526779952678, "grad_norm": 0.0, "learning_rate": 2.434421662368128e-06, "loss": 0.8271, "step": 3626 }, { "epoch": 0.7801677780167778, "grad_norm": 0.0, "learning_rate": 2.4298673411777387e-06, "loss": 0.8798, "step": 3627 }, { "epoch": 0.7803828780382878, "grad_norm": 0.0, "learning_rate": 2.4253166948638394e-06, "loss": 0.8493, "step": 3628 }, { "epoch": 0.7805979780597978, "grad_norm": 0.0, "learning_rate": 2.4207697256355144e-06, "loss": 0.831, "step": 3629 }, { "epoch": 0.7808130780813078, "grad_norm": 0.0, "learning_rate": 2.4162264357000652e-06, "loss": 0.8398, "step": 3630 }, { "epoch": 0.7810281781028178, "grad_norm": 0.0, "learning_rate": 2.411686827263003e-06, "loss": 0.8504, "step": 3631 }, { "epoch": 0.7812432781243278, "grad_norm": 0.0, "learning_rate": 2.4071509025280526e-06, "loss": 0.8095, "step": 3632 }, { "epoch": 0.7814583781458379, "grad_norm": 0.0, "learning_rate": 2.402618663697157e-06, "loss": 0.8074, "step": 3633 }, { "epoch": 0.7816734781673478, "grad_norm": 0.0, "learning_rate": 2.3980901129704593e-06, "loss": 0.8497, "step": 3634 }, { "epoch": 0.7818885781888578, "grad_norm": 0.0, "learning_rate": 2.3935652525463206e-06, "loss": 0.9335, "step": 3635 }, { "epoch": 0.7821036782103679, "grad_norm": 0.0, "learning_rate": 2.3890440846213103e-06, "loss": 0.8081, "step": 3636 }, { "epoch": 0.7823187782318778, "grad_norm": 0.0, "learning_rate": 2.384526611390198e-06, "loss": 0.794, "step": 3637 }, { "epoch": 0.7825338782533878, "grad_norm": 0.0, "learning_rate": 2.3800128350459695e-06, "loss": 0.8225, "step": 3638 }, { "epoch": 0.7827489782748979, "grad_norm": 0.0, "learning_rate": 2.375502757779806e-06, "loss": 0.7981, "step": 3639 }, { "epoch": 0.7829640782964078, "grad_norm": 0.0, "learning_rate": 2.370996381781101e-06, "loss": 0.8056, "step": 3640 }, { "epoch": 0.7831791783179178, "grad_norm": 0.0, "learning_rate": 2.3664937092374497e-06, "loss": 0.8251, "step": 3641 }, { "epoch": 0.7833942783394279, "grad_norm": 0.0, "learning_rate": 2.3619947423346447e-06, "loss": 0.8447, "step": 3642 }, { "epoch": 0.7836093783609378, "grad_norm": 0.0, "learning_rate": 2.3574994832566868e-06, "loss": 0.8418, "step": 3643 }, { "epoch": 0.7838244783824478, "grad_norm": 0.0, "learning_rate": 2.3530079341857682e-06, "loss": 0.7876, "step": 3644 }, { "epoch": 0.7840395784039579, "grad_norm": 0.0, "learning_rate": 2.3485200973022904e-06, "loss": 0.9171, "step": 3645 }, { "epoch": 0.7842546784254678, "grad_norm": 0.0, "learning_rate": 2.3440359747848452e-06, "loss": 0.875, "step": 3646 }, { "epoch": 0.7844697784469779, "grad_norm": 0.0, "learning_rate": 2.339555568810221e-06, "loss": 0.8066, "step": 3647 }, { "epoch": 0.7846848784684879, "grad_norm": 0.0, "learning_rate": 2.3350788815534063e-06, "loss": 0.8182, "step": 3648 }, { "epoch": 0.7848999784899978, "grad_norm": 0.0, "learning_rate": 2.330605915187587e-06, "loss": 0.8235, "step": 3649 }, { "epoch": 0.7851150785115079, "grad_norm": 0.0, "learning_rate": 2.3261366718841307e-06, "loss": 0.8242, "step": 3650 }, { "epoch": 0.7853301785330179, "grad_norm": 0.0, "learning_rate": 2.3216711538126134e-06, "loss": 0.7175, "step": 3651 }, { "epoch": 0.7855452785545278, "grad_norm": 0.0, "learning_rate": 2.3172093631407864e-06, "loss": 0.8065, "step": 3652 }, { "epoch": 0.7857603785760379, "grad_norm": 0.0, "learning_rate": 2.312751302034605e-06, "loss": 0.7602, "step": 3653 }, { "epoch": 0.7859754785975479, "grad_norm": 0.0, "learning_rate": 2.3082969726582107e-06, "loss": 0.8188, "step": 3654 }, { "epoch": 0.7861905786190578, "grad_norm": 0.0, "learning_rate": 2.303846377173925e-06, "loss": 0.8467, "step": 3655 }, { "epoch": 0.7864056786405679, "grad_norm": 0.0, "learning_rate": 2.29939951774227e-06, "loss": 0.8326, "step": 3656 }, { "epoch": 0.7866207786620779, "grad_norm": 0.0, "learning_rate": 2.2949563965219424e-06, "loss": 0.8065, "step": 3657 }, { "epoch": 0.7868358786835878, "grad_norm": 0.0, "learning_rate": 2.290517015669833e-06, "loss": 0.797, "step": 3658 }, { "epoch": 0.7870509787050979, "grad_norm": 0.0, "learning_rate": 2.2860813773410105e-06, "loss": 0.8049, "step": 3659 }, { "epoch": 0.7872660787266079, "grad_norm": 0.0, "learning_rate": 2.2816494836887326e-06, "loss": 0.8465, "step": 3660 }, { "epoch": 0.7874811787481178, "grad_norm": 0.0, "learning_rate": 2.2772213368644334e-06, "loss": 0.8664, "step": 3661 }, { "epoch": 0.7876962787696279, "grad_norm": 0.0, "learning_rate": 2.2727969390177306e-06, "loss": 0.8373, "step": 3662 }, { "epoch": 0.7879113787911379, "grad_norm": 0.0, "learning_rate": 2.2683762922964214e-06, "loss": 0.8594, "step": 3663 }, { "epoch": 0.7881264788126479, "grad_norm": 0.0, "learning_rate": 2.263959398846488e-06, "loss": 0.8283, "step": 3664 }, { "epoch": 0.7883415788341579, "grad_norm": 0.0, "learning_rate": 2.25954626081208e-06, "loss": 0.8552, "step": 3665 }, { "epoch": 0.7885566788556679, "grad_norm": 0.0, "learning_rate": 2.25513688033553e-06, "loss": 0.8011, "step": 3666 }, { "epoch": 0.7887717788771779, "grad_norm": 0.0, "learning_rate": 2.2507312595573505e-06, "loss": 0.8083, "step": 3667 }, { "epoch": 0.7889868788986879, "grad_norm": 0.0, "learning_rate": 2.2463294006162185e-06, "loss": 0.861, "step": 3668 }, { "epoch": 0.7892019789201979, "grad_norm": 0.0, "learning_rate": 2.2419313056489954e-06, "loss": 0.7971, "step": 3669 }, { "epoch": 0.7894170789417079, "grad_norm": 0.0, "learning_rate": 2.237536976790706e-06, "loss": 0.8752, "step": 3670 }, { "epoch": 0.7896321789632179, "grad_norm": 0.0, "learning_rate": 2.2331464161745532e-06, "loss": 0.784, "step": 3671 }, { "epoch": 0.789847278984728, "grad_norm": 0.0, "learning_rate": 2.228759625931912e-06, "loss": 0.7979, "step": 3672 }, { "epoch": 0.7900623790062379, "grad_norm": 0.0, "learning_rate": 2.22437660819232e-06, "loss": 0.839, "step": 3673 }, { "epoch": 0.7902774790277479, "grad_norm": 0.0, "learning_rate": 2.2199973650834903e-06, "loss": 0.8222, "step": 3674 }, { "epoch": 0.790492579049258, "grad_norm": 0.0, "learning_rate": 2.2156218987313003e-06, "loss": 0.7779, "step": 3675 }, { "epoch": 0.7907076790707679, "grad_norm": 0.0, "learning_rate": 2.21125021125979e-06, "loss": 0.8214, "step": 3676 }, { "epoch": 0.7909227790922779, "grad_norm": 0.0, "learning_rate": 2.206882304791176e-06, "loss": 0.8106, "step": 3677 }, { "epoch": 0.7911378791137879, "grad_norm": 0.0, "learning_rate": 2.2025181814458276e-06, "loss": 0.8231, "step": 3678 }, { "epoch": 0.7913529791352979, "grad_norm": 0.0, "learning_rate": 2.1981578433422844e-06, "loss": 0.8598, "step": 3679 }, { "epoch": 0.7915680791568079, "grad_norm": 0.0, "learning_rate": 2.1938012925972507e-06, "loss": 0.8025, "step": 3680 }, { "epoch": 0.7917831791783179, "grad_norm": 0.0, "learning_rate": 2.189448531325582e-06, "loss": 0.7999, "step": 3681 }, { "epoch": 0.7919982791998279, "grad_norm": 0.0, "learning_rate": 2.1850995616403083e-06, "loss": 0.7811, "step": 3682 }, { "epoch": 0.7922133792213379, "grad_norm": 0.0, "learning_rate": 2.1807543856526037e-06, "loss": 0.7173, "step": 3683 }, { "epoch": 0.7924284792428479, "grad_norm": 0.0, "learning_rate": 2.1764130054718123e-06, "loss": 0.8026, "step": 3684 }, { "epoch": 0.7926435792643579, "grad_norm": 0.0, "learning_rate": 2.1720754232054333e-06, "loss": 0.792, "step": 3685 }, { "epoch": 0.792858679285868, "grad_norm": 0.0, "learning_rate": 2.167741640959117e-06, "loss": 0.7994, "step": 3686 }, { "epoch": 0.7930737793073779, "grad_norm": 0.0, "learning_rate": 2.163411660836676e-06, "loss": 0.8022, "step": 3687 }, { "epoch": 0.7932888793288879, "grad_norm": 0.0, "learning_rate": 2.15908548494007e-06, "loss": 0.8241, "step": 3688 }, { "epoch": 0.793503979350398, "grad_norm": 0.0, "learning_rate": 2.1547631153694193e-06, "loss": 0.7797, "step": 3689 }, { "epoch": 0.7937190793719079, "grad_norm": 0.0, "learning_rate": 2.1504445542229923e-06, "loss": 0.8033, "step": 3690 }, { "epoch": 0.7939341793934179, "grad_norm": 0.0, "learning_rate": 2.1461298035972055e-06, "loss": 0.802, "step": 3691 }, { "epoch": 0.794149279414928, "grad_norm": 0.0, "learning_rate": 2.1418188655866314e-06, "loss": 0.7829, "step": 3692 }, { "epoch": 0.7943643794364379, "grad_norm": 0.0, "learning_rate": 2.137511742283992e-06, "loss": 0.8226, "step": 3693 }, { "epoch": 0.7945794794579479, "grad_norm": 0.0, "learning_rate": 2.1332084357801508e-06, "loss": 0.7864, "step": 3694 }, { "epoch": 0.794794579479458, "grad_norm": 0.0, "learning_rate": 2.128908948164128e-06, "loss": 0.829, "step": 3695 }, { "epoch": 0.7950096795009679, "grad_norm": 0.0, "learning_rate": 2.124613281523079e-06, "loss": 0.8479, "step": 3696 }, { "epoch": 0.7952247795224779, "grad_norm": 0.0, "learning_rate": 2.120321437942312e-06, "loss": 0.8191, "step": 3697 }, { "epoch": 0.795439879543988, "grad_norm": 0.0, "learning_rate": 2.11603341950528e-06, "loss": 0.8431, "step": 3698 }, { "epoch": 0.7956549795654979, "grad_norm": 0.0, "learning_rate": 2.1117492282935726e-06, "loss": 0.822, "step": 3699 }, { "epoch": 0.795870079587008, "grad_norm": 0.0, "learning_rate": 2.1074688663869302e-06, "loss": 0.7946, "step": 3700 }, { "epoch": 0.796085179608518, "grad_norm": 0.0, "learning_rate": 2.103192335863222e-06, "loss": 0.8457, "step": 3701 }, { "epoch": 0.7963002796300279, "grad_norm": 0.0, "learning_rate": 2.0989196387984724e-06, "loss": 0.7922, "step": 3702 }, { "epoch": 0.796515379651538, "grad_norm": 0.0, "learning_rate": 2.09465077726683e-06, "loss": 0.8217, "step": 3703 }, { "epoch": 0.796730479673048, "grad_norm": 0.0, "learning_rate": 2.090385753340596e-06, "loss": 0.7897, "step": 3704 }, { "epoch": 0.7969455796945579, "grad_norm": 0.0, "learning_rate": 2.0861245690901953e-06, "loss": 0.8298, "step": 3705 }, { "epoch": 0.797160679716068, "grad_norm": 0.0, "learning_rate": 2.0818672265841955e-06, "loss": 0.8627, "step": 3706 }, { "epoch": 0.797375779737578, "grad_norm": 0.0, "learning_rate": 2.0776137278893005e-06, "loss": 0.7784, "step": 3707 }, { "epoch": 0.7975908797590879, "grad_norm": 0.0, "learning_rate": 2.0733640750703475e-06, "loss": 0.8231, "step": 3708 }, { "epoch": 0.797805979780598, "grad_norm": 0.0, "learning_rate": 2.069118270190301e-06, "loss": 0.819, "step": 3709 }, { "epoch": 0.798021079802108, "grad_norm": 0.0, "learning_rate": 2.064876315310265e-06, "loss": 0.8809, "step": 3710 }, { "epoch": 0.7982361798236179, "grad_norm": 0.0, "learning_rate": 2.060638212489474e-06, "loss": 0.8523, "step": 3711 }, { "epoch": 0.798451279845128, "grad_norm": 0.0, "learning_rate": 2.056403963785284e-06, "loss": 0.8283, "step": 3712 }, { "epoch": 0.798666379866638, "grad_norm": 0.0, "learning_rate": 2.0521735712531934e-06, "loss": 0.8472, "step": 3713 }, { "epoch": 0.798881479888148, "grad_norm": 0.0, "learning_rate": 2.0479470369468146e-06, "loss": 0.8135, "step": 3714 }, { "epoch": 0.799096579909658, "grad_norm": 0.0, "learning_rate": 2.043724362917897e-06, "loss": 0.8779, "step": 3715 }, { "epoch": 0.799311679931168, "grad_norm": 0.0, "learning_rate": 2.0395055512163166e-06, "loss": 0.8614, "step": 3716 }, { "epoch": 0.799526779952678, "grad_norm": 0.0, "learning_rate": 2.0352906038900666e-06, "loss": 0.8691, "step": 3717 }, { "epoch": 0.799741879974188, "grad_norm": 0.0, "learning_rate": 2.0310795229852653e-06, "loss": 0.8217, "step": 3718 }, { "epoch": 0.799956979995698, "grad_norm": 0.0, "learning_rate": 2.0268723105461653e-06, "loss": 0.7685, "step": 3719 }, { "epoch": 0.800172080017208, "grad_norm": 0.0, "learning_rate": 2.0226689686151244e-06, "loss": 0.8518, "step": 3720 }, { "epoch": 0.800387180038718, "grad_norm": 0.0, "learning_rate": 2.018469499232639e-06, "loss": 0.7874, "step": 3721 }, { "epoch": 0.800602280060228, "grad_norm": 0.0, "learning_rate": 2.0142739044373093e-06, "loss": 0.8481, "step": 3722 }, { "epoch": 0.800817380081738, "grad_norm": 0.0, "learning_rate": 2.010082186265866e-06, "loss": 0.8537, "step": 3723 }, { "epoch": 0.801032480103248, "grad_norm": 0.0, "learning_rate": 2.0058943467531556e-06, "loss": 0.843, "step": 3724 }, { "epoch": 0.801247580124758, "grad_norm": 0.0, "learning_rate": 2.0017103879321355e-06, "loss": 0.8251, "step": 3725 }, { "epoch": 0.801462680146268, "grad_norm": 0.0, "learning_rate": 1.9975303118338894e-06, "loss": 0.8339, "step": 3726 }, { "epoch": 0.801677780167778, "grad_norm": 0.0, "learning_rate": 1.993354120487605e-06, "loss": 0.8119, "step": 3727 }, { "epoch": 0.8018928801892881, "grad_norm": 0.0, "learning_rate": 1.9891818159205933e-06, "loss": 0.8556, "step": 3728 }, { "epoch": 0.802107980210798, "grad_norm": 0.0, "learning_rate": 1.985013400158278e-06, "loss": 0.8342, "step": 3729 }, { "epoch": 0.802323080232308, "grad_norm": 0.0, "learning_rate": 1.980848875224185e-06, "loss": 0.8769, "step": 3730 }, { "epoch": 0.8025381802538181, "grad_norm": 0.0, "learning_rate": 1.976688243139965e-06, "loss": 0.8412, "step": 3731 }, { "epoch": 0.802753280275328, "grad_norm": 0.0, "learning_rate": 1.9725315059253704e-06, "loss": 0.8078, "step": 3732 }, { "epoch": 0.802968380296838, "grad_norm": 0.0, "learning_rate": 1.9683786655982607e-06, "loss": 0.8519, "step": 3733 }, { "epoch": 0.8031834803183481, "grad_norm": 0.0, "learning_rate": 1.9642297241746146e-06, "loss": 0.8288, "step": 3734 }, { "epoch": 0.803398580339858, "grad_norm": 0.0, "learning_rate": 1.960084683668504e-06, "loss": 0.8144, "step": 3735 }, { "epoch": 0.803613680361368, "grad_norm": 0.0, "learning_rate": 1.9559435460921207e-06, "loss": 0.8163, "step": 3736 }, { "epoch": 0.8038287803828781, "grad_norm": 0.0, "learning_rate": 1.951806313455754e-06, "loss": 0.8689, "step": 3737 }, { "epoch": 0.804043880404388, "grad_norm": 0.0, "learning_rate": 1.947672987767798e-06, "loss": 0.7966, "step": 3738 }, { "epoch": 0.804258980425898, "grad_norm": 0.0, "learning_rate": 1.943543571034754e-06, "loss": 0.8034, "step": 3739 }, { "epoch": 0.8044740804474081, "grad_norm": 0.0, "learning_rate": 1.9394180652612193e-06, "loss": 0.8835, "step": 3740 }, { "epoch": 0.804689180468918, "grad_norm": 0.0, "learning_rate": 1.935296472449898e-06, "loss": 0.8549, "step": 3741 }, { "epoch": 0.8049042804904281, "grad_norm": 0.0, "learning_rate": 1.931178794601596e-06, "loss": 0.7959, "step": 3742 }, { "epoch": 0.8051193805119381, "grad_norm": 0.0, "learning_rate": 1.9270650337152107e-06, "loss": 0.8529, "step": 3743 }, { "epoch": 0.805334480533448, "grad_norm": 0.0, "learning_rate": 1.9229551917877485e-06, "loss": 0.789, "step": 3744 }, { "epoch": 0.8055495805549581, "grad_norm": 0.0, "learning_rate": 1.9188492708143035e-06, "loss": 0.7346, "step": 3745 }, { "epoch": 0.8057646805764681, "grad_norm": 0.0, "learning_rate": 1.9147472727880757e-06, "loss": 0.8305, "step": 3746 }, { "epoch": 0.805979780597978, "grad_norm": 0.0, "learning_rate": 1.910649199700353e-06, "loss": 0.7864, "step": 3747 }, { "epoch": 0.8061948806194881, "grad_norm": 0.0, "learning_rate": 1.9065550535405186e-06, "loss": 0.8718, "step": 3748 }, { "epoch": 0.8064099806409981, "grad_norm": 0.0, "learning_rate": 1.902464836296054e-06, "loss": 0.8523, "step": 3749 }, { "epoch": 0.806625080662508, "grad_norm": 0.0, "learning_rate": 1.8983785499525342e-06, "loss": 0.7193, "step": 3750 }, { "epoch": 0.8068401806840181, "grad_norm": 0.0, "learning_rate": 1.894296196493618e-06, "loss": 0.8429, "step": 3751 }, { "epoch": 0.8070552807055281, "grad_norm": 0.0, "learning_rate": 1.8902177779010645e-06, "loss": 0.7944, "step": 3752 }, { "epoch": 0.807270380727038, "grad_norm": 0.0, "learning_rate": 1.8861432961547133e-06, "loss": 0.8179, "step": 3753 }, { "epoch": 0.8074854807485481, "grad_norm": 0.0, "learning_rate": 1.8820727532325e-06, "loss": 0.8205, "step": 3754 }, { "epoch": 0.8077005807700581, "grad_norm": 0.0, "learning_rate": 1.8780061511104487e-06, "loss": 0.8612, "step": 3755 }, { "epoch": 0.8079156807915681, "grad_norm": 0.0, "learning_rate": 1.873943491762662e-06, "loss": 0.7069, "step": 3756 }, { "epoch": 0.8081307808130781, "grad_norm": 0.0, "learning_rate": 1.8698847771613393e-06, "loss": 0.8023, "step": 3757 }, { "epoch": 0.8083458808345881, "grad_norm": 0.0, "learning_rate": 1.8658300092767546e-06, "loss": 0.8138, "step": 3758 }, { "epoch": 0.8085609808560981, "grad_norm": 0.0, "learning_rate": 1.8617791900772742e-06, "loss": 0.8523, "step": 3759 }, { "epoch": 0.8087760808776081, "grad_norm": 0.0, "learning_rate": 1.8577323215293463e-06, "loss": 0.8854, "step": 3760 }, { "epoch": 0.8089911808991181, "grad_norm": 0.0, "learning_rate": 1.8536894055974974e-06, "loss": 0.8802, "step": 3761 }, { "epoch": 0.8092062809206281, "grad_norm": 0.0, "learning_rate": 1.8496504442443375e-06, "loss": 0.7475, "step": 3762 }, { "epoch": 0.8094213809421381, "grad_norm": 0.0, "learning_rate": 1.8456154394305536e-06, "loss": 0.8383, "step": 3763 }, { "epoch": 0.809636480963648, "grad_norm": 0.0, "learning_rate": 1.8415843931149192e-06, "loss": 0.8523, "step": 3764 }, { "epoch": 0.8098515809851581, "grad_norm": 0.0, "learning_rate": 1.8375573072542819e-06, "loss": 0.7621, "step": 3765 }, { "epoch": 0.8100666810066681, "grad_norm": 0.0, "learning_rate": 1.8335341838035635e-06, "loss": 0.8384, "step": 3766 }, { "epoch": 0.810281781028178, "grad_norm": 0.0, "learning_rate": 1.829515024715769e-06, "loss": 0.7647, "step": 3767 }, { "epoch": 0.8104968810496881, "grad_norm": 0.0, "learning_rate": 1.825499831941977e-06, "loss": 0.8093, "step": 3768 }, { "epoch": 0.8107119810711981, "grad_norm": 0.0, "learning_rate": 1.8214886074313343e-06, "loss": 0.8168, "step": 3769 }, { "epoch": 0.8109270810927081, "grad_norm": 0.0, "learning_rate": 1.817481353131071e-06, "loss": 0.7762, "step": 3770 }, { "epoch": 0.8111421811142181, "grad_norm": 0.0, "learning_rate": 1.8134780709864808e-06, "loss": 0.7641, "step": 3771 }, { "epoch": 0.8113572811357281, "grad_norm": 0.0, "learning_rate": 1.8094787629409361e-06, "loss": 0.8354, "step": 3772 }, { "epoch": 0.8115723811572381, "grad_norm": 0.0, "learning_rate": 1.8054834309358792e-06, "loss": 0.8101, "step": 3773 }, { "epoch": 0.8117874811787481, "grad_norm": 0.0, "learning_rate": 1.801492076910817e-06, "loss": 0.8, "step": 3774 }, { "epoch": 0.8120025812002581, "grad_norm": 0.0, "learning_rate": 1.7975047028033333e-06, "loss": 0.839, "step": 3775 }, { "epoch": 0.8122176812217681, "grad_norm": 0.0, "learning_rate": 1.7935213105490722e-06, "loss": 0.7587, "step": 3776 }, { "epoch": 0.8124327812432781, "grad_norm": 0.0, "learning_rate": 1.7895419020817462e-06, "loss": 0.8239, "step": 3777 }, { "epoch": 0.8126478812647882, "grad_norm": 0.0, "learning_rate": 1.7855664793331406e-06, "loss": 0.856, "step": 3778 }, { "epoch": 0.8128629812862981, "grad_norm": 0.0, "learning_rate": 1.7815950442330965e-06, "loss": 0.8396, "step": 3779 }, { "epoch": 0.8130780813078081, "grad_norm": 0.0, "learning_rate": 1.777627598709526e-06, "loss": 0.8679, "step": 3780 }, { "epoch": 0.8132931813293182, "grad_norm": 0.0, "learning_rate": 1.773664144688405e-06, "loss": 0.8356, "step": 3781 }, { "epoch": 0.8135082813508281, "grad_norm": 0.0, "learning_rate": 1.7697046840937638e-06, "loss": 0.7967, "step": 3782 }, { "epoch": 0.8137233813723381, "grad_norm": 0.0, "learning_rate": 1.7657492188477033e-06, "loss": 0.8001, "step": 3783 }, { "epoch": 0.8139384813938482, "grad_norm": 0.0, "learning_rate": 1.7617977508703777e-06, "loss": 0.8136, "step": 3784 }, { "epoch": 0.8141535814153581, "grad_norm": 0.0, "learning_rate": 1.7578502820800048e-06, "loss": 0.774, "step": 3785 }, { "epoch": 0.8143686814368681, "grad_norm": 0.0, "learning_rate": 1.7539068143928638e-06, "loss": 0.9083, "step": 3786 }, { "epoch": 0.8145837814583782, "grad_norm": 0.0, "learning_rate": 1.7499673497232827e-06, "loss": 0.8309, "step": 3787 }, { "epoch": 0.8147988814798881, "grad_norm": 0.0, "learning_rate": 1.746031889983657e-06, "loss": 0.8301, "step": 3788 }, { "epoch": 0.8150139815013981, "grad_norm": 0.0, "learning_rate": 1.7421004370844263e-06, "loss": 0.7986, "step": 3789 }, { "epoch": 0.8152290815229082, "grad_norm": 0.0, "learning_rate": 1.7381729929340974e-06, "loss": 0.833, "step": 3790 }, { "epoch": 0.8154441815444181, "grad_norm": 0.0, "learning_rate": 1.7342495594392227e-06, "loss": 0.83, "step": 3791 }, { "epoch": 0.8156592815659282, "grad_norm": 0.0, "learning_rate": 1.7303301385044058e-06, "loss": 0.7958, "step": 3792 }, { "epoch": 0.8158743815874382, "grad_norm": 0.0, "learning_rate": 1.726414732032311e-06, "loss": 0.7968, "step": 3793 }, { "epoch": 0.8160894816089481, "grad_norm": 0.0, "learning_rate": 1.7225033419236502e-06, "loss": 0.7709, "step": 3794 }, { "epoch": 0.8163045816304582, "grad_norm": 0.0, "learning_rate": 1.7185959700771816e-06, "loss": 0.8252, "step": 3795 }, { "epoch": 0.8165196816519682, "grad_norm": 0.0, "learning_rate": 1.7146926183897194e-06, "loss": 0.8365, "step": 3796 }, { "epoch": 0.8167347816734781, "grad_norm": 0.0, "learning_rate": 1.7107932887561196e-06, "loss": 0.7973, "step": 3797 }, { "epoch": 0.8169498816949882, "grad_norm": 0.0, "learning_rate": 1.7068979830692889e-06, "loss": 0.7536, "step": 3798 }, { "epoch": 0.8171649817164982, "grad_norm": 0.0, "learning_rate": 1.703006703220186e-06, "loss": 0.8416, "step": 3799 }, { "epoch": 0.8173800817380081, "grad_norm": 0.0, "learning_rate": 1.6991194510978015e-06, "loss": 0.8067, "step": 3800 }, { "epoch": 0.8175951817595182, "grad_norm": 0.0, "learning_rate": 1.6952362285891866e-06, "loss": 0.7911, "step": 3801 }, { "epoch": 0.8178102817810282, "grad_norm": 0.0, "learning_rate": 1.691357037579423e-06, "loss": 0.8292, "step": 3802 }, { "epoch": 0.8180253818025381, "grad_norm": 0.0, "learning_rate": 1.6874818799516468e-06, "loss": 0.7846, "step": 3803 }, { "epoch": 0.8182404818240482, "grad_norm": 0.0, "learning_rate": 1.6836107575870254e-06, "loss": 0.8502, "step": 3804 }, { "epoch": 0.8184555818455582, "grad_norm": 0.0, "learning_rate": 1.6797436723647775e-06, "loss": 0.8255, "step": 3805 }, { "epoch": 0.8186706818670682, "grad_norm": 0.0, "learning_rate": 1.675880626162153e-06, "loss": 0.8836, "step": 3806 }, { "epoch": 0.8188857818885782, "grad_norm": 0.0, "learning_rate": 1.6720216208544448e-06, "loss": 0.755, "step": 3807 }, { "epoch": 0.8191008819100882, "grad_norm": 0.0, "learning_rate": 1.6681666583149846e-06, "loss": 0.8141, "step": 3808 }, { "epoch": 0.8193159819315982, "grad_norm": 0.0, "learning_rate": 1.6643157404151468e-06, "loss": 0.8652, "step": 3809 }, { "epoch": 0.8195310819531082, "grad_norm": 0.0, "learning_rate": 1.6604688690243288e-06, "loss": 0.8787, "step": 3810 }, { "epoch": 0.8197461819746182, "grad_norm": 0.0, "learning_rate": 1.6566260460099749e-06, "loss": 0.8508, "step": 3811 }, { "epoch": 0.8199612819961282, "grad_norm": 0.0, "learning_rate": 1.652787273237565e-06, "loss": 0.7976, "step": 3812 }, { "epoch": 0.8201763820176382, "grad_norm": 0.0, "learning_rate": 1.648952552570603e-06, "loss": 0.7554, "step": 3813 }, { "epoch": 0.8203914820391482, "grad_norm": 0.0, "learning_rate": 1.6451218858706374e-06, "loss": 0.7877, "step": 3814 }, { "epoch": 0.8206065820606582, "grad_norm": 0.0, "learning_rate": 1.6412952749972355e-06, "loss": 0.8204, "step": 3815 }, { "epoch": 0.8208216820821682, "grad_norm": 0.0, "learning_rate": 1.6374727218080078e-06, "loss": 0.8325, "step": 3816 }, { "epoch": 0.8210367821036783, "grad_norm": 0.0, "learning_rate": 1.633654228158592e-06, "loss": 0.7842, "step": 3817 }, { "epoch": 0.8212518821251882, "grad_norm": 0.0, "learning_rate": 1.6298397959026513e-06, "loss": 0.7846, "step": 3818 }, { "epoch": 0.8214669821466982, "grad_norm": 0.0, "learning_rate": 1.6260294268918763e-06, "loss": 0.7951, "step": 3819 }, { "epoch": 0.8216820821682083, "grad_norm": 0.0, "learning_rate": 1.6222231229759945e-06, "loss": 0.8135, "step": 3820 }, { "epoch": 0.8218971821897182, "grad_norm": 0.0, "learning_rate": 1.6184208860027473e-06, "loss": 0.8149, "step": 3821 }, { "epoch": 0.8221122822112282, "grad_norm": 0.0, "learning_rate": 1.6146227178179142e-06, "loss": 0.8447, "step": 3822 }, { "epoch": 0.8223273822327383, "grad_norm": 0.0, "learning_rate": 1.610828620265289e-06, "loss": 0.8233, "step": 3823 }, { "epoch": 0.8225424822542482, "grad_norm": 0.0, "learning_rate": 1.6070385951866951e-06, "loss": 0.796, "step": 3824 }, { "epoch": 0.8227575822757582, "grad_norm": 0.0, "learning_rate": 1.603252644421982e-06, "loss": 0.836, "step": 3825 }, { "epoch": 0.8229726822972683, "grad_norm": 0.0, "learning_rate": 1.5994707698090118e-06, "loss": 0.7795, "step": 3826 }, { "epoch": 0.8231877823187782, "grad_norm": 0.0, "learning_rate": 1.595692973183679e-06, "loss": 0.8969, "step": 3827 }, { "epoch": 0.8234028823402882, "grad_norm": 0.0, "learning_rate": 1.5919192563798857e-06, "loss": 0.8371, "step": 3828 }, { "epoch": 0.8236179823617983, "grad_norm": 0.0, "learning_rate": 1.5881496212295654e-06, "loss": 0.858, "step": 3829 }, { "epoch": 0.8238330823833082, "grad_norm": 0.0, "learning_rate": 1.5843840695626666e-06, "loss": 0.8495, "step": 3830 }, { "epoch": 0.8240481824048183, "grad_norm": 0.0, "learning_rate": 1.5806226032071503e-06, "loss": 0.8382, "step": 3831 }, { "epoch": 0.8242632824263283, "grad_norm": 0.0, "learning_rate": 1.5768652239890014e-06, "loss": 0.8301, "step": 3832 }, { "epoch": 0.8244783824478382, "grad_norm": 0.0, "learning_rate": 1.5731119337322176e-06, "loss": 0.7716, "step": 3833 }, { "epoch": 0.8246934824693483, "grad_norm": 0.0, "learning_rate": 1.569362734258807e-06, "loss": 0.7937, "step": 3834 }, { "epoch": 0.8249085824908583, "grad_norm": 0.0, "learning_rate": 1.5656176273888025e-06, "loss": 0.8507, "step": 3835 }, { "epoch": 0.8251236825123682, "grad_norm": 0.0, "learning_rate": 1.5618766149402387e-06, "loss": 0.868, "step": 3836 }, { "epoch": 0.8253387825338783, "grad_norm": 0.0, "learning_rate": 1.5581396987291719e-06, "loss": 0.7863, "step": 3837 }, { "epoch": 0.8255538825553883, "grad_norm": 0.0, "learning_rate": 1.5544068805696666e-06, "loss": 0.8192, "step": 3838 }, { "epoch": 0.8257689825768982, "grad_norm": 0.0, "learning_rate": 1.5506781622737943e-06, "loss": 0.771, "step": 3839 }, { "epoch": 0.8259840825984083, "grad_norm": 0.0, "learning_rate": 1.5469535456516437e-06, "loss": 0.7906, "step": 3840 }, { "epoch": 0.8261991826199183, "grad_norm": 0.0, "learning_rate": 1.5432330325113043e-06, "loss": 0.8307, "step": 3841 }, { "epoch": 0.8264142826414282, "grad_norm": 0.0, "learning_rate": 1.5395166246588777e-06, "loss": 0.8545, "step": 3842 }, { "epoch": 0.8266293826629383, "grad_norm": 0.0, "learning_rate": 1.5358043238984755e-06, "loss": 0.7877, "step": 3843 }, { "epoch": 0.8268444826844483, "grad_norm": 0.0, "learning_rate": 1.532096132032208e-06, "loss": 0.8378, "step": 3844 }, { "epoch": 0.8270595827059583, "grad_norm": 0.0, "learning_rate": 1.5283920508602e-06, "loss": 0.8509, "step": 3845 }, { "epoch": 0.8272746827274683, "grad_norm": 0.0, "learning_rate": 1.524692082180571e-06, "loss": 0.8141, "step": 3846 }, { "epoch": 0.8274897827489782, "grad_norm": 0.0, "learning_rate": 1.520996227789454e-06, "loss": 0.8449, "step": 3847 }, { "epoch": 0.8277048827704883, "grad_norm": 0.0, "learning_rate": 1.5173044894809764e-06, "loss": 0.8414, "step": 3848 }, { "epoch": 0.8279199827919983, "grad_norm": 0.0, "learning_rate": 1.5136168690472696e-06, "loss": 0.7887, "step": 3849 }, { "epoch": 0.8281350828135082, "grad_norm": 0.0, "learning_rate": 1.50993336827847e-06, "loss": 0.8195, "step": 3850 }, { "epoch": 0.8283501828350183, "grad_norm": 0.0, "learning_rate": 1.5062539889627138e-06, "loss": 0.7593, "step": 3851 }, { "epoch": 0.8285652828565283, "grad_norm": 0.0, "learning_rate": 1.5025787328861275e-06, "loss": 0.8195, "step": 3852 }, { "epoch": 0.8287803828780382, "grad_norm": 0.0, "learning_rate": 1.4989076018328497e-06, "loss": 0.7996, "step": 3853 }, { "epoch": 0.8289954828995483, "grad_norm": 0.0, "learning_rate": 1.4952405975850025e-06, "loss": 0.8357, "step": 3854 }, { "epoch": 0.8292105829210583, "grad_norm": 0.0, "learning_rate": 1.4915777219227156e-06, "loss": 0.8535, "step": 3855 }, { "epoch": 0.8294256829425682, "grad_norm": 0.0, "learning_rate": 1.4879189766241131e-06, "loss": 0.7645, "step": 3856 }, { "epoch": 0.8296407829640783, "grad_norm": 0.0, "learning_rate": 1.4842643634653054e-06, "loss": 0.7997, "step": 3857 }, { "epoch": 0.8298558829855883, "grad_norm": 0.0, "learning_rate": 1.4806138842204088e-06, "loss": 0.7804, "step": 3858 }, { "epoch": 0.8300709830070983, "grad_norm": 0.0, "learning_rate": 1.4769675406615213e-06, "loss": 0.7987, "step": 3859 }, { "epoch": 0.8302860830286083, "grad_norm": 0.0, "learning_rate": 1.4733253345587429e-06, "loss": 0.8513, "step": 3860 }, { "epoch": 0.8305011830501183, "grad_norm": 0.0, "learning_rate": 1.4696872676801621e-06, "loss": 0.8596, "step": 3861 }, { "epoch": 0.8307162830716283, "grad_norm": 0.0, "learning_rate": 1.4660533417918554e-06, "loss": 0.8581, "step": 3862 }, { "epoch": 0.8309313830931383, "grad_norm": 0.0, "learning_rate": 1.4624235586578895e-06, "loss": 0.7849, "step": 3863 }, { "epoch": 0.8311464831146483, "grad_norm": 0.0, "learning_rate": 1.4587979200403213e-06, "loss": 0.8473, "step": 3864 }, { "epoch": 0.8313615831361583, "grad_norm": 0.0, "learning_rate": 1.4551764276991965e-06, "loss": 0.8544, "step": 3865 }, { "epoch": 0.8315766831576683, "grad_norm": 0.0, "learning_rate": 1.4515590833925508e-06, "loss": 0.7443, "step": 3866 }, { "epoch": 0.8317917831791783, "grad_norm": 0.0, "learning_rate": 1.4479458888763964e-06, "loss": 0.8218, "step": 3867 }, { "epoch": 0.8320068832006883, "grad_norm": 0.0, "learning_rate": 1.4443368459047413e-06, "loss": 0.7359, "step": 3868 }, { "epoch": 0.8322219832221983, "grad_norm": 0.0, "learning_rate": 1.4407319562295763e-06, "loss": 0.7972, "step": 3869 }, { "epoch": 0.8324370832437084, "grad_norm": 0.0, "learning_rate": 1.437131221600868e-06, "loss": 0.7999, "step": 3870 }, { "epoch": 0.8326521832652183, "grad_norm": 0.0, "learning_rate": 1.4335346437665787e-06, "loss": 0.8433, "step": 3871 }, { "epoch": 0.8328672832867283, "grad_norm": 0.0, "learning_rate": 1.4299422244726403e-06, "loss": 0.8353, "step": 3872 }, { "epoch": 0.8330823833082384, "grad_norm": 0.0, "learning_rate": 1.4263539654629732e-06, "loss": 0.8036, "step": 3873 }, { "epoch": 0.8332974833297483, "grad_norm": 0.0, "learning_rate": 1.4227698684794799e-06, "loss": 0.8485, "step": 3874 }, { "epoch": 0.8335125833512583, "grad_norm": 0.0, "learning_rate": 1.4191899352620342e-06, "loss": 0.7747, "step": 3875 }, { "epoch": 0.8337276833727684, "grad_norm": 0.0, "learning_rate": 1.415614167548498e-06, "loss": 0.8214, "step": 3876 }, { "epoch": 0.8339427833942783, "grad_norm": 0.0, "learning_rate": 1.4120425670747061e-06, "loss": 0.7688, "step": 3877 }, { "epoch": 0.8341578834157883, "grad_norm": 0.0, "learning_rate": 1.4084751355744675e-06, "loss": 0.7696, "step": 3878 }, { "epoch": 0.8343729834372984, "grad_norm": 0.0, "learning_rate": 1.4049118747795754e-06, "loss": 0.7973, "step": 3879 }, { "epoch": 0.8345880834588083, "grad_norm": 0.0, "learning_rate": 1.4013527864197883e-06, "loss": 0.8324, "step": 3880 }, { "epoch": 0.8348031834803183, "grad_norm": 0.0, "learning_rate": 1.3977978722228502e-06, "loss": 0.8056, "step": 3881 }, { "epoch": 0.8350182835018284, "grad_norm": 0.0, "learning_rate": 1.3942471339144726e-06, "loss": 0.8496, "step": 3882 }, { "epoch": 0.8352333835233383, "grad_norm": 0.0, "learning_rate": 1.3907005732183364e-06, "loss": 0.8699, "step": 3883 }, { "epoch": 0.8354484835448484, "grad_norm": 0.0, "learning_rate": 1.387158191856105e-06, "loss": 0.838, "step": 3884 }, { "epoch": 0.8356635835663584, "grad_norm": 0.0, "learning_rate": 1.3836199915474003e-06, "loss": 0.8418, "step": 3885 }, { "epoch": 0.8358786835878683, "grad_norm": 0.0, "learning_rate": 1.3800859740098238e-06, "loss": 0.8623, "step": 3886 }, { "epoch": 0.8360937836093784, "grad_norm": 0.0, "learning_rate": 1.3765561409589468e-06, "loss": 0.7972, "step": 3887 }, { "epoch": 0.8363088836308884, "grad_norm": 0.0, "learning_rate": 1.3730304941082994e-06, "loss": 0.8416, "step": 3888 }, { "epoch": 0.8365239836523983, "grad_norm": 0.0, "learning_rate": 1.3695090351693929e-06, "loss": 0.874, "step": 3889 }, { "epoch": 0.8367390836739084, "grad_norm": 0.0, "learning_rate": 1.365991765851693e-06, "loss": 0.7933, "step": 3890 }, { "epoch": 0.8369541836954184, "grad_norm": 0.0, "learning_rate": 1.3624786878626416e-06, "loss": 0.8634, "step": 3891 }, { "epoch": 0.8371692837169283, "grad_norm": 0.0, "learning_rate": 1.3589698029076415e-06, "loss": 0.7776, "step": 3892 }, { "epoch": 0.8373843837384384, "grad_norm": 0.0, "learning_rate": 1.3554651126900564e-06, "loss": 0.8601, "step": 3893 }, { "epoch": 0.8375994837599484, "grad_norm": 0.0, "learning_rate": 1.3519646189112202e-06, "loss": 0.7494, "step": 3894 }, { "epoch": 0.8378145837814583, "grad_norm": 0.0, "learning_rate": 1.34846832327043e-06, "loss": 0.7402, "step": 3895 }, { "epoch": 0.8380296838029684, "grad_norm": 0.0, "learning_rate": 1.3449762274649358e-06, "loss": 0.7525, "step": 3896 }, { "epoch": 0.8382447838244784, "grad_norm": 0.0, "learning_rate": 1.3414883331899608e-06, "loss": 0.8283, "step": 3897 }, { "epoch": 0.8384598838459884, "grad_norm": 0.0, "learning_rate": 1.3380046421386795e-06, "loss": 0.7787, "step": 3898 }, { "epoch": 0.8386749838674984, "grad_norm": 0.0, "learning_rate": 1.3345251560022288e-06, "loss": 0.8363, "step": 3899 }, { "epoch": 0.8388900838890084, "grad_norm": 0.0, "learning_rate": 1.331049876469709e-06, "loss": 0.7725, "step": 3900 }, { "epoch": 0.8391051839105184, "grad_norm": 0.0, "learning_rate": 1.3275788052281692e-06, "loss": 0.7987, "step": 3901 }, { "epoch": 0.8393202839320284, "grad_norm": 0.0, "learning_rate": 1.324111943962626e-06, "loss": 0.7787, "step": 3902 }, { "epoch": 0.8395353839535384, "grad_norm": 0.0, "learning_rate": 1.3206492943560401e-06, "loss": 0.7812, "step": 3903 }, { "epoch": 0.8397504839750484, "grad_norm": 0.0, "learning_rate": 1.3171908580893399e-06, "loss": 0.817, "step": 3904 }, { "epoch": 0.8399655839965584, "grad_norm": 0.0, "learning_rate": 1.3137366368413995e-06, "loss": 0.8107, "step": 3905 }, { "epoch": 0.8401806840180684, "grad_norm": 0.0, "learning_rate": 1.310286632289055e-06, "loss": 0.7542, "step": 3906 }, { "epoch": 0.8403957840395784, "grad_norm": 0.0, "learning_rate": 1.3068408461070868e-06, "loss": 0.8749, "step": 3907 }, { "epoch": 0.8406108840610884, "grad_norm": 0.0, "learning_rate": 1.3033992799682304e-06, "loss": 0.8151, "step": 3908 }, { "epoch": 0.8408259840825985, "grad_norm": 0.0, "learning_rate": 1.299961935543176e-06, "loss": 0.8267, "step": 3909 }, { "epoch": 0.8410410841041084, "grad_norm": 0.0, "learning_rate": 1.296528814500565e-06, "loss": 0.8204, "step": 3910 }, { "epoch": 0.8412561841256184, "grad_norm": 0.0, "learning_rate": 1.2930999185069804e-06, "loss": 0.7437, "step": 3911 }, { "epoch": 0.8414712841471285, "grad_norm": 0.0, "learning_rate": 1.2896752492269627e-06, "loss": 0.7713, "step": 3912 }, { "epoch": 0.8416863841686384, "grad_norm": 0.0, "learning_rate": 1.2862548083229998e-06, "loss": 0.7972, "step": 3913 }, { "epoch": 0.8419014841901484, "grad_norm": 0.0, "learning_rate": 1.2828385974555203e-06, "loss": 0.798, "step": 3914 }, { "epoch": 0.8421165842116585, "grad_norm": 0.0, "learning_rate": 1.2794266182829073e-06, "loss": 0.8511, "step": 3915 }, { "epoch": 0.8423316842331684, "grad_norm": 0.0, "learning_rate": 1.276018872461482e-06, "loss": 0.8369, "step": 3916 }, { "epoch": 0.8425467842546784, "grad_norm": 0.0, "learning_rate": 1.2726153616455173e-06, "loss": 0.7553, "step": 3917 }, { "epoch": 0.8427618842761885, "grad_norm": 0.0, "learning_rate": 1.2692160874872295e-06, "loss": 0.8025, "step": 3918 }, { "epoch": 0.8429769842976984, "grad_norm": 0.0, "learning_rate": 1.265821051636773e-06, "loss": 0.767, "step": 3919 }, { "epoch": 0.8431920843192084, "grad_norm": 0.0, "learning_rate": 1.2624302557422475e-06, "loss": 0.801, "step": 3920 }, { "epoch": 0.8434071843407185, "grad_norm": 0.0, "learning_rate": 1.2590437014496993e-06, "loss": 0.8205, "step": 3921 }, { "epoch": 0.8436222843622284, "grad_norm": 0.0, "learning_rate": 1.2556613904031055e-06, "loss": 0.8021, "step": 3922 }, { "epoch": 0.8438373843837385, "grad_norm": 0.0, "learning_rate": 1.2522833242443954e-06, "loss": 0.8669, "step": 3923 }, { "epoch": 0.8440524844052485, "grad_norm": 0.0, "learning_rate": 1.2489095046134248e-06, "loss": 0.821, "step": 3924 }, { "epoch": 0.8442675844267584, "grad_norm": 0.0, "learning_rate": 1.245539933147999e-06, "loss": 0.7905, "step": 3925 }, { "epoch": 0.8444826844482685, "grad_norm": 0.0, "learning_rate": 1.2421746114838573e-06, "loss": 0.7471, "step": 3926 }, { "epoch": 0.8446977844697785, "grad_norm": 0.0, "learning_rate": 1.238813541254672e-06, "loss": 0.7795, "step": 3927 }, { "epoch": 0.8449128844912884, "grad_norm": 0.0, "learning_rate": 1.235456724092059e-06, "loss": 0.8178, "step": 3928 }, { "epoch": 0.8451279845127985, "grad_norm": 0.0, "learning_rate": 1.2321041616255613e-06, "loss": 0.7856, "step": 3929 }, { "epoch": 0.8453430845343085, "grad_norm": 0.0, "learning_rate": 1.228755855482663e-06, "loss": 0.8766, "step": 3930 }, { "epoch": 0.8455581845558184, "grad_norm": 0.0, "learning_rate": 1.22541180728878e-06, "loss": 0.7484, "step": 3931 }, { "epoch": 0.8457732845773285, "grad_norm": 0.0, "learning_rate": 1.2220720186672585e-06, "loss": 0.7785, "step": 3932 }, { "epoch": 0.8459883845988384, "grad_norm": 0.0, "learning_rate": 1.218736491239384e-06, "loss": 0.8349, "step": 3933 }, { "epoch": 0.8462034846203484, "grad_norm": 0.0, "learning_rate": 1.2154052266243655e-06, "loss": 0.8415, "step": 3934 }, { "epoch": 0.8464185846418585, "grad_norm": 0.0, "learning_rate": 1.2120782264393427e-06, "loss": 0.7593, "step": 3935 }, { "epoch": 0.8466336846633684, "grad_norm": 0.0, "learning_rate": 1.208755492299395e-06, "loss": 0.7303, "step": 3936 }, { "epoch": 0.8468487846848785, "grad_norm": 0.0, "learning_rate": 1.2054370258175186e-06, "loss": 0.7787, "step": 3937 }, { "epoch": 0.8470638847063885, "grad_norm": 0.0, "learning_rate": 1.2021228286046461e-06, "loss": 0.798, "step": 3938 }, { "epoch": 0.8472789847278984, "grad_norm": 0.0, "learning_rate": 1.198812902269636e-06, "loss": 0.8734, "step": 3939 }, { "epoch": 0.8474940847494085, "grad_norm": 0.0, "learning_rate": 1.1955072484192697e-06, "loss": 0.7668, "step": 3940 }, { "epoch": 0.8477091847709185, "grad_norm": 0.0, "learning_rate": 1.1922058686582616e-06, "loss": 0.8076, "step": 3941 }, { "epoch": 0.8479242847924284, "grad_norm": 0.0, "learning_rate": 1.1889087645892427e-06, "loss": 0.7927, "step": 3942 }, { "epoch": 0.8481393848139385, "grad_norm": 0.0, "learning_rate": 1.1856159378127752e-06, "loss": 0.8189, "step": 3943 }, { "epoch": 0.8483544848354485, "grad_norm": 0.0, "learning_rate": 1.1823273899273436e-06, "loss": 0.8118, "step": 3944 }, { "epoch": 0.8485695848569584, "grad_norm": 0.0, "learning_rate": 1.179043122529352e-06, "loss": 0.7799, "step": 3945 }, { "epoch": 0.8487846848784685, "grad_norm": 0.0, "learning_rate": 1.1757631372131307e-06, "loss": 0.8211, "step": 3946 }, { "epoch": 0.8489997848999785, "grad_norm": 0.0, "learning_rate": 1.1724874355709259e-06, "loss": 0.7756, "step": 3947 }, { "epoch": 0.8492148849214884, "grad_norm": 0.0, "learning_rate": 1.1692160191929125e-06, "loss": 0.8393, "step": 3948 }, { "epoch": 0.8494299849429985, "grad_norm": 0.0, "learning_rate": 1.1659488896671777e-06, "loss": 0.7857, "step": 3949 }, { "epoch": 0.8496450849645085, "grad_norm": 0.0, "learning_rate": 1.1626860485797298e-06, "loss": 0.8501, "step": 3950 }, { "epoch": 0.8498601849860185, "grad_norm": 0.0, "learning_rate": 1.1594274975144948e-06, "loss": 0.7444, "step": 3951 }, { "epoch": 0.8500752850075285, "grad_norm": 0.0, "learning_rate": 1.156173238053322e-06, "loss": 0.7623, "step": 3952 }, { "epoch": 0.8502903850290385, "grad_norm": 0.0, "learning_rate": 1.1529232717759676e-06, "loss": 0.8747, "step": 3953 }, { "epoch": 0.8505054850505485, "grad_norm": 0.0, "learning_rate": 1.1496776002601128e-06, "loss": 0.8333, "step": 3954 }, { "epoch": 0.8507205850720585, "grad_norm": 0.0, "learning_rate": 1.1464362250813454e-06, "loss": 0.8083, "step": 3955 }, { "epoch": 0.8509356850935685, "grad_norm": 0.0, "learning_rate": 1.1431991478131754e-06, "loss": 0.8023, "step": 3956 }, { "epoch": 0.8511507851150785, "grad_norm": 0.0, "learning_rate": 1.1399663700270225e-06, "loss": 0.8206, "step": 3957 }, { "epoch": 0.8513658851365885, "grad_norm": 0.0, "learning_rate": 1.1367378932922179e-06, "loss": 0.8124, "step": 3958 }, { "epoch": 0.8515809851580985, "grad_norm": 0.0, "learning_rate": 1.1335137191760094e-06, "loss": 0.9286, "step": 3959 }, { "epoch": 0.8517960851796085, "grad_norm": 0.0, "learning_rate": 1.13029384924355e-06, "loss": 0.8024, "step": 3960 }, { "epoch": 0.8520111852011185, "grad_norm": 0.0, "learning_rate": 1.1270782850579077e-06, "loss": 0.8163, "step": 3961 }, { "epoch": 0.8522262852226286, "grad_norm": 0.0, "learning_rate": 1.1238670281800635e-06, "loss": 0.7791, "step": 3962 }, { "epoch": 0.8524413852441385, "grad_norm": 0.0, "learning_rate": 1.1206600801688983e-06, "loss": 0.8231, "step": 3963 }, { "epoch": 0.8526564852656485, "grad_norm": 0.0, "learning_rate": 1.1174574425812068e-06, "loss": 0.8355, "step": 3964 }, { "epoch": 0.8528715852871586, "grad_norm": 0.0, "learning_rate": 1.1142591169716931e-06, "loss": 0.8274, "step": 3965 }, { "epoch": 0.8530866853086685, "grad_norm": 0.0, "learning_rate": 1.1110651048929621e-06, "loss": 0.8579, "step": 3966 }, { "epoch": 0.8533017853301785, "grad_norm": 0.0, "learning_rate": 1.1078754078955322e-06, "loss": 0.8434, "step": 3967 }, { "epoch": 0.8535168853516886, "grad_norm": 0.0, "learning_rate": 1.1046900275278183e-06, "loss": 0.8402, "step": 3968 }, { "epoch": 0.8537319853731985, "grad_norm": 0.0, "learning_rate": 1.1015089653361488e-06, "loss": 0.8539, "step": 3969 }, { "epoch": 0.8539470853947085, "grad_norm": 0.0, "learning_rate": 1.0983322228647509e-06, "loss": 0.7604, "step": 3970 }, { "epoch": 0.8541621854162186, "grad_norm": 0.0, "learning_rate": 1.0951598016557542e-06, "loss": 0.7819, "step": 3971 }, { "epoch": 0.8543772854377285, "grad_norm": 0.0, "learning_rate": 1.0919917032491935e-06, "loss": 0.7882, "step": 3972 }, { "epoch": 0.8545923854592385, "grad_norm": 0.0, "learning_rate": 1.0888279291830017e-06, "loss": 0.7842, "step": 3973 }, { "epoch": 0.8548074854807486, "grad_norm": 0.0, "learning_rate": 1.085668480993015e-06, "loss": 0.7693, "step": 3974 }, { "epoch": 0.8550225855022585, "grad_norm": 0.0, "learning_rate": 1.0825133602129722e-06, "loss": 0.8036, "step": 3975 }, { "epoch": 0.8552376855237686, "grad_norm": 0.0, "learning_rate": 1.0793625683745024e-06, "loss": 0.8822, "step": 3976 }, { "epoch": 0.8554527855452786, "grad_norm": 0.0, "learning_rate": 1.0762161070071441e-06, "loss": 0.8464, "step": 3977 }, { "epoch": 0.8556678855667885, "grad_norm": 0.0, "learning_rate": 1.073073977638327e-06, "loss": 0.8268, "step": 3978 }, { "epoch": 0.8558829855882986, "grad_norm": 0.0, "learning_rate": 1.0699361817933761e-06, "loss": 0.7827, "step": 3979 }, { "epoch": 0.8560980856098086, "grad_norm": 0.0, "learning_rate": 1.0668027209955212e-06, "loss": 0.7975, "step": 3980 }, { "epoch": 0.8563131856313185, "grad_norm": 0.0, "learning_rate": 1.0636735967658785e-06, "loss": 0.8543, "step": 3981 }, { "epoch": 0.8565282856528286, "grad_norm": 0.0, "learning_rate": 1.060548810623464e-06, "loss": 0.793, "step": 3982 }, { "epoch": 0.8567433856743386, "grad_norm": 0.0, "learning_rate": 1.057428364085189e-06, "loss": 0.8201, "step": 3983 }, { "epoch": 0.8569584856958485, "grad_norm": 0.0, "learning_rate": 1.0543122586658528e-06, "loss": 0.844, "step": 3984 }, { "epoch": 0.8571735857173586, "grad_norm": 0.0, "learning_rate": 1.051200495878153e-06, "loss": 0.764, "step": 3985 }, { "epoch": 0.8573886857388686, "grad_norm": 0.0, "learning_rate": 1.0480930772326735e-06, "loss": 0.8134, "step": 3986 }, { "epoch": 0.8576037857603785, "grad_norm": 0.0, "learning_rate": 1.0449900042378957e-06, "loss": 0.8264, "step": 3987 }, { "epoch": 0.8578188857818886, "grad_norm": 0.0, "learning_rate": 1.0418912784001878e-06, "loss": 0.8634, "step": 3988 }, { "epoch": 0.8580339858033986, "grad_norm": 0.0, "learning_rate": 1.0387969012238064e-06, "loss": 0.8592, "step": 3989 }, { "epoch": 0.8582490858249086, "grad_norm": 0.0, "learning_rate": 1.0357068742109e-06, "loss": 0.8129, "step": 3990 }, { "epoch": 0.8584641858464186, "grad_norm": 0.0, "learning_rate": 1.0326211988615031e-06, "loss": 0.7132, "step": 3991 }, { "epoch": 0.8586792858679286, "grad_norm": 0.0, "learning_rate": 1.029539876673541e-06, "loss": 0.8411, "step": 3992 }, { "epoch": 0.8588943858894386, "grad_norm": 0.0, "learning_rate": 1.0264629091428213e-06, "loss": 0.7886, "step": 3993 }, { "epoch": 0.8591094859109486, "grad_norm": 0.0, "learning_rate": 1.0233902977630383e-06, "loss": 0.863, "step": 3994 }, { "epoch": 0.8593245859324586, "grad_norm": 0.0, "learning_rate": 1.0203220440257756e-06, "loss": 0.8503, "step": 3995 }, { "epoch": 0.8595396859539686, "grad_norm": 0.0, "learning_rate": 1.0172581494204992e-06, "loss": 0.7925, "step": 3996 }, { "epoch": 0.8597547859754786, "grad_norm": 0.0, "learning_rate": 1.0141986154345573e-06, "loss": 0.8009, "step": 3997 }, { "epoch": 0.8599698859969886, "grad_norm": 0.0, "learning_rate": 1.0111434435531852e-06, "loss": 0.8016, "step": 3998 }, { "epoch": 0.8601849860184986, "grad_norm": 0.0, "learning_rate": 1.0080926352594945e-06, "loss": 0.8077, "step": 3999 }, { "epoch": 0.8604000860400086, "grad_norm": 0.0, "learning_rate": 1.0050461920344846e-06, "loss": 0.7732, "step": 4000 }, { "epoch": 0.8606151860615187, "grad_norm": 0.0, "learning_rate": 1.0020041153570349e-06, "loss": 0.769, "step": 4001 }, { "epoch": 0.8608302860830286, "grad_norm": 0.0, "learning_rate": 9.989664067038996e-07, "loss": 0.8817, "step": 4002 }, { "epoch": 0.8610453861045386, "grad_norm": 0.0, "learning_rate": 9.959330675497202e-07, "loss": 0.7599, "step": 4003 }, { "epoch": 0.8612604861260487, "grad_norm": 0.0, "learning_rate": 9.929040993670113e-07, "loss": 0.7567, "step": 4004 }, { "epoch": 0.8614755861475586, "grad_norm": 0.0, "learning_rate": 9.898795036261688e-07, "loss": 0.7729, "step": 4005 }, { "epoch": 0.8616906861690686, "grad_norm": 0.0, "learning_rate": 9.868592817954637e-07, "loss": 0.7936, "step": 4006 }, { "epoch": 0.8619057861905787, "grad_norm": 0.0, "learning_rate": 9.838434353410476e-07, "loss": 0.7803, "step": 4007 }, { "epoch": 0.8621208862120886, "grad_norm": 0.0, "learning_rate": 9.808319657269417e-07, "loss": 0.8321, "step": 4008 }, { "epoch": 0.8623359862335986, "grad_norm": 0.0, "learning_rate": 9.77824874415051e-07, "loss": 0.8148, "step": 4009 }, { "epoch": 0.8625510862551087, "grad_norm": 0.0, "learning_rate": 9.748221628651445e-07, "loss": 0.856, "step": 4010 }, { "epoch": 0.8627661862766186, "grad_norm": 0.0, "learning_rate": 9.718238325348761e-07, "loss": 0.8032, "step": 4011 }, { "epoch": 0.8629812862981286, "grad_norm": 0.0, "learning_rate": 9.68829884879764e-07, "loss": 0.7729, "step": 4012 }, { "epoch": 0.8631963863196387, "grad_norm": 0.0, "learning_rate": 9.658403213532042e-07, "loss": 0.7833, "step": 4013 }, { "epoch": 0.8634114863411486, "grad_norm": 0.0, "learning_rate": 9.628551434064648e-07, "loss": 0.7611, "step": 4014 }, { "epoch": 0.8636265863626587, "grad_norm": 0.0, "learning_rate": 9.598743524886777e-07, "loss": 0.8537, "step": 4015 }, { "epoch": 0.8638416863841686, "grad_norm": 0.0, "learning_rate": 9.568979500468567e-07, "loss": 0.8364, "step": 4016 }, { "epoch": 0.8640567864056786, "grad_norm": 0.0, "learning_rate": 9.539259375258737e-07, "loss": 0.8443, "step": 4017 }, { "epoch": 0.8642718864271887, "grad_norm": 0.0, "learning_rate": 9.509583163684755e-07, "loss": 0.8187, "step": 4018 }, { "epoch": 0.8644869864486986, "grad_norm": 0.0, "learning_rate": 9.47995088015281e-07, "loss": 0.8419, "step": 4019 }, { "epoch": 0.8647020864702086, "grad_norm": 0.0, "learning_rate": 9.450362539047675e-07, "loss": 0.8777, "step": 4020 }, { "epoch": 0.8649171864917187, "grad_norm": 0.0, "learning_rate": 9.420818154732836e-07, "loss": 0.7702, "step": 4021 }, { "epoch": 0.8651322865132286, "grad_norm": 0.0, "learning_rate": 9.391317741550465e-07, "loss": 0.7896, "step": 4022 }, { "epoch": 0.8653473865347386, "grad_norm": 0.0, "learning_rate": 9.361861313821341e-07, "loss": 0.7701, "step": 4023 }, { "epoch": 0.8655624865562487, "grad_norm": 0.0, "learning_rate": 9.332448885844936e-07, "loss": 0.8665, "step": 4024 }, { "epoch": 0.8657775865777586, "grad_norm": 0.0, "learning_rate": 9.303080471899318e-07, "loss": 0.8586, "step": 4025 }, { "epoch": 0.8659926865992686, "grad_norm": 0.0, "learning_rate": 9.273756086241226e-07, "loss": 0.808, "step": 4026 }, { "epoch": 0.8662077866207787, "grad_norm": 0.0, "learning_rate": 9.244475743106019e-07, "loss": 0.81, "step": 4027 }, { "epoch": 0.8664228866422886, "grad_norm": 0.0, "learning_rate": 9.215239456707636e-07, "loss": 0.7797, "step": 4028 }, { "epoch": 0.8666379866637987, "grad_norm": 0.0, "learning_rate": 9.186047241238715e-07, "loss": 0.7424, "step": 4029 }, { "epoch": 0.8668530866853087, "grad_norm": 0.0, "learning_rate": 9.156899110870376e-07, "loss": 0.7831, "step": 4030 }, { "epoch": 0.8670681867068186, "grad_norm": 0.0, "learning_rate": 9.127795079752455e-07, "loss": 0.8355, "step": 4031 }, { "epoch": 0.8672832867283287, "grad_norm": 0.0, "learning_rate": 9.098735162013339e-07, "loss": 0.7995, "step": 4032 }, { "epoch": 0.8674983867498387, "grad_norm": 0.0, "learning_rate": 9.069719371759966e-07, "loss": 0.8055, "step": 4033 }, { "epoch": 0.8677134867713486, "grad_norm": 0.0, "learning_rate": 9.040747723077903e-07, "loss": 0.7507, "step": 4034 }, { "epoch": 0.8679285867928587, "grad_norm": 0.0, "learning_rate": 9.011820230031243e-07, "loss": 0.772, "step": 4035 }, { "epoch": 0.8681436868143687, "grad_norm": 0.0, "learning_rate": 8.982936906662698e-07, "loss": 0.8713, "step": 4036 }, { "epoch": 0.8683587868358786, "grad_norm": 0.0, "learning_rate": 8.954097766993497e-07, "loss": 0.8435, "step": 4037 }, { "epoch": 0.8685738868573887, "grad_norm": 0.0, "learning_rate": 8.925302825023407e-07, "loss": 0.8354, "step": 4038 }, { "epoch": 0.8687889868788987, "grad_norm": 0.0, "learning_rate": 8.896552094730781e-07, "loss": 0.8245, "step": 4039 }, { "epoch": 0.8690040869004086, "grad_norm": 0.0, "learning_rate": 8.86784559007251e-07, "loss": 0.7636, "step": 4040 }, { "epoch": 0.8692191869219187, "grad_norm": 0.0, "learning_rate": 8.839183324983969e-07, "loss": 0.8425, "step": 4041 }, { "epoch": 0.8694342869434287, "grad_norm": 0.0, "learning_rate": 8.810565313379116e-07, "loss": 0.7812, "step": 4042 }, { "epoch": 0.8696493869649387, "grad_norm": 0.0, "learning_rate": 8.781991569150361e-07, "loss": 0.805, "step": 4043 }, { "epoch": 0.8698644869864487, "grad_norm": 0.0, "learning_rate": 8.753462106168675e-07, "loss": 0.8906, "step": 4044 }, { "epoch": 0.8700795870079587, "grad_norm": 0.0, "learning_rate": 8.724976938283558e-07, "loss": 0.7649, "step": 4045 }, { "epoch": 0.8702946870294687, "grad_norm": 0.0, "learning_rate": 8.696536079322904e-07, "loss": 0.8462, "step": 4046 }, { "epoch": 0.8705097870509787, "grad_norm": 0.0, "learning_rate": 8.668139543093201e-07, "loss": 0.8447, "step": 4047 }, { "epoch": 0.8707248870724887, "grad_norm": 0.0, "learning_rate": 8.639787343379369e-07, "loss": 0.8451, "step": 4048 }, { "epoch": 0.8709399870939987, "grad_norm": 0.0, "learning_rate": 8.61147949394483e-07, "loss": 0.7721, "step": 4049 }, { "epoch": 0.8711550871155087, "grad_norm": 0.0, "learning_rate": 8.583216008531425e-07, "loss": 0.8067, "step": 4050 }, { "epoch": 0.8713701871370187, "grad_norm": 0.0, "learning_rate": 8.554996900859558e-07, "loss": 0.7619, "step": 4051 }, { "epoch": 0.8715852871585287, "grad_norm": 0.0, "learning_rate": 8.526822184627959e-07, "loss": 0.8198, "step": 4052 }, { "epoch": 0.8718003871800387, "grad_norm": 0.0, "learning_rate": 8.498691873513942e-07, "loss": 0.8013, "step": 4053 }, { "epoch": 0.8720154872015488, "grad_norm": 0.0, "learning_rate": 8.470605981173163e-07, "loss": 0.7863, "step": 4054 }, { "epoch": 0.8722305872230587, "grad_norm": 0.0, "learning_rate": 8.442564521239782e-07, "loss": 0.7414, "step": 4055 }, { "epoch": 0.8724456872445687, "grad_norm": 0.0, "learning_rate": 8.414567507326321e-07, "loss": 0.7875, "step": 4056 }, { "epoch": 0.8726607872660788, "grad_norm": 0.0, "learning_rate": 8.386614953023786e-07, "loss": 0.8031, "step": 4057 }, { "epoch": 0.8728758872875887, "grad_norm": 0.0, "learning_rate": 8.358706871901612e-07, "loss": 0.7616, "step": 4058 }, { "epoch": 0.8730909873090987, "grad_norm": 0.0, "learning_rate": 8.330843277507561e-07, "loss": 0.8623, "step": 4059 }, { "epoch": 0.8733060873306088, "grad_norm": 0.0, "learning_rate": 8.303024183367892e-07, "loss": 0.77, "step": 4060 }, { "epoch": 0.8735211873521187, "grad_norm": 0.0, "learning_rate": 8.27524960298719e-07, "loss": 0.7915, "step": 4061 }, { "epoch": 0.8737362873736287, "grad_norm": 0.0, "learning_rate": 8.247519549848482e-07, "loss": 0.8058, "step": 4062 }, { "epoch": 0.8739513873951388, "grad_norm": 0.0, "learning_rate": 8.219834037413165e-07, "loss": 0.768, "step": 4063 }, { "epoch": 0.8741664874166487, "grad_norm": 0.0, "learning_rate": 8.192193079121003e-07, "loss": 0.82, "step": 4064 }, { "epoch": 0.8743815874381587, "grad_norm": 0.0, "learning_rate": 8.164596688390114e-07, "loss": 0.8474, "step": 4065 }, { "epoch": 0.8745966874596688, "grad_norm": 0.0, "learning_rate": 8.13704487861704e-07, "loss": 0.7552, "step": 4066 }, { "epoch": 0.8748117874811787, "grad_norm": 0.0, "learning_rate": 8.109537663176603e-07, "loss": 0.8302, "step": 4067 }, { "epoch": 0.8750268875026888, "grad_norm": 0.0, "learning_rate": 8.082075055422067e-07, "loss": 0.824, "step": 4068 }, { "epoch": 0.8752419875241988, "grad_norm": 0.0, "learning_rate": 8.054657068684957e-07, "loss": 0.7457, "step": 4069 }, { "epoch": 0.8754570875457087, "grad_norm": 0.0, "learning_rate": 8.027283716275181e-07, "loss": 0.8522, "step": 4070 }, { "epoch": 0.8756721875672188, "grad_norm": 0.0, "learning_rate": 7.999955011480997e-07, "loss": 0.8092, "step": 4071 }, { "epoch": 0.8758872875887288, "grad_norm": 0.0, "learning_rate": 7.972670967568929e-07, "loss": 0.8704, "step": 4072 }, { "epoch": 0.8761023876102387, "grad_norm": 0.0, "learning_rate": 7.945431597783904e-07, "loss": 0.8121, "step": 4073 }, { "epoch": 0.8763174876317488, "grad_norm": 0.0, "learning_rate": 7.918236915349054e-07, "loss": 0.8276, "step": 4074 }, { "epoch": 0.8765325876532588, "grad_norm": 0.0, "learning_rate": 7.891086933465908e-07, "loss": 0.8533, "step": 4075 }, { "epoch": 0.8767476876747687, "grad_norm": 0.0, "learning_rate": 7.86398166531428e-07, "loss": 0.7981, "step": 4076 }, { "epoch": 0.8769627876962788, "grad_norm": 0.0, "learning_rate": 7.836921124052244e-07, "loss": 0.8322, "step": 4077 }, { "epoch": 0.8771778877177888, "grad_norm": 0.0, "learning_rate": 7.809905322816192e-07, "loss": 0.8279, "step": 4078 }, { "epoch": 0.8773929877392987, "grad_norm": 0.0, "learning_rate": 7.782934274720777e-07, "loss": 0.7878, "step": 4079 }, { "epoch": 0.8776080877608088, "grad_norm": 0.0, "learning_rate": 7.756007992858928e-07, "loss": 0.871, "step": 4080 }, { "epoch": 0.8778231877823188, "grad_norm": 0.0, "learning_rate": 7.729126490301885e-07, "loss": 0.7948, "step": 4081 }, { "epoch": 0.8780382878038288, "grad_norm": 0.0, "learning_rate": 7.70228978009907e-07, "loss": 0.8043, "step": 4082 }, { "epoch": 0.8782533878253388, "grad_norm": 0.0, "learning_rate": 7.675497875278226e-07, "loss": 0.823, "step": 4083 }, { "epoch": 0.8784684878468488, "grad_norm": 0.0, "learning_rate": 7.648750788845338e-07, "loss": 0.8113, "step": 4084 }, { "epoch": 0.8786835878683588, "grad_norm": 0.0, "learning_rate": 7.62204853378461e-07, "loss": 0.8034, "step": 4085 }, { "epoch": 0.8788986878898688, "grad_norm": 0.0, "learning_rate": 7.595391123058515e-07, "loss": 0.7887, "step": 4086 }, { "epoch": 0.8791137879113788, "grad_norm": 0.0, "learning_rate": 7.568778569607693e-07, "loss": 0.8328, "step": 4087 }, { "epoch": 0.8793288879328888, "grad_norm": 0.0, "learning_rate": 7.542210886351087e-07, "loss": 0.7811, "step": 4088 }, { "epoch": 0.8795439879543988, "grad_norm": 0.0, "learning_rate": 7.515688086185824e-07, "loss": 0.7833, "step": 4089 }, { "epoch": 0.8797590879759088, "grad_norm": 0.0, "learning_rate": 7.489210181987228e-07, "loss": 0.8512, "step": 4090 }, { "epoch": 0.8799741879974188, "grad_norm": 0.0, "learning_rate": 7.46277718660885e-07, "loss": 0.7635, "step": 4091 }, { "epoch": 0.8801892880189288, "grad_norm": 0.0, "learning_rate": 7.436389112882414e-07, "loss": 0.8277, "step": 4092 }, { "epoch": 0.8804043880404389, "grad_norm": 0.0, "learning_rate": 7.410045973617896e-07, "loss": 0.8393, "step": 4093 }, { "epoch": 0.8806194880619488, "grad_norm": 0.0, "learning_rate": 7.38374778160339e-07, "loss": 0.8259, "step": 4094 }, { "epoch": 0.8808345880834588, "grad_norm": 0.0, "learning_rate": 7.357494549605181e-07, "loss": 0.8248, "step": 4095 }, { "epoch": 0.8810496881049689, "grad_norm": 0.0, "learning_rate": 7.331286290367779e-07, "loss": 0.8801, "step": 4096 }, { "epoch": 0.8812647881264788, "grad_norm": 0.0, "learning_rate": 7.305123016613825e-07, "loss": 0.761, "step": 4097 }, { "epoch": 0.8814798881479888, "grad_norm": 0.0, "learning_rate": 7.279004741044104e-07, "loss": 0.7518, "step": 4098 }, { "epoch": 0.8816949881694989, "grad_norm": 0.0, "learning_rate": 7.25293147633761e-07, "loss": 0.886, "step": 4099 }, { "epoch": 0.8819100881910088, "grad_norm": 0.0, "learning_rate": 7.226903235151439e-07, "loss": 0.768, "step": 4100 }, { "epoch": 0.8821251882125188, "grad_norm": 0.0, "learning_rate": 7.20092003012084e-07, "loss": 0.8768, "step": 4101 }, { "epoch": 0.8823402882340288, "grad_norm": 0.0, "learning_rate": 7.174981873859232e-07, "loss": 0.7819, "step": 4102 }, { "epoch": 0.8825553882555388, "grad_norm": 0.0, "learning_rate": 7.149088778958125e-07, "loss": 0.8711, "step": 4103 }, { "epoch": 0.8827704882770488, "grad_norm": 0.0, "learning_rate": 7.123240757987182e-07, "loss": 0.8119, "step": 4104 }, { "epoch": 0.8829855882985588, "grad_norm": 0.0, "learning_rate": 7.097437823494147e-07, "loss": 0.7867, "step": 4105 }, { "epoch": 0.8832006883200688, "grad_norm": 0.0, "learning_rate": 7.07167998800491e-07, "loss": 0.8603, "step": 4106 }, { "epoch": 0.8834157883415789, "grad_norm": 0.0, "learning_rate": 7.045967264023502e-07, "loss": 0.8052, "step": 4107 }, { "epoch": 0.8836308883630888, "grad_norm": 0.0, "learning_rate": 7.020299664031971e-07, "loss": 0.7536, "step": 4108 }, { "epoch": 0.8838459883845988, "grad_norm": 0.0, "learning_rate": 6.994677200490508e-07, "loss": 0.7889, "step": 4109 }, { "epoch": 0.8840610884061089, "grad_norm": 0.0, "learning_rate": 6.969099885837405e-07, "loss": 0.7652, "step": 4110 }, { "epoch": 0.8842761884276188, "grad_norm": 0.0, "learning_rate": 6.943567732488999e-07, "loss": 0.8275, "step": 4111 }, { "epoch": 0.8844912884491288, "grad_norm": 0.0, "learning_rate": 6.91808075283975e-07, "loss": 0.8425, "step": 4112 }, { "epoch": 0.8847063884706389, "grad_norm": 0.0, "learning_rate": 6.892638959262121e-07, "loss": 0.8343, "step": 4113 }, { "epoch": 0.8849214884921488, "grad_norm": 0.0, "learning_rate": 6.867242364106719e-07, "loss": 0.7309, "step": 4114 }, { "epoch": 0.8851365885136588, "grad_norm": 0.0, "learning_rate": 6.84189097970217e-07, "loss": 0.8466, "step": 4115 }, { "epoch": 0.8853516885351689, "grad_norm": 0.0, "learning_rate": 6.816584818355132e-07, "loss": 0.8423, "step": 4116 }, { "epoch": 0.8855667885566788, "grad_norm": 0.0, "learning_rate": 6.791323892350354e-07, "loss": 0.8956, "step": 4117 }, { "epoch": 0.8857818885781888, "grad_norm": 0.0, "learning_rate": 6.766108213950584e-07, "loss": 0.7747, "step": 4118 }, { "epoch": 0.8859969885996989, "grad_norm": 0.0, "learning_rate": 6.740937795396641e-07, "loss": 0.7725, "step": 4119 }, { "epoch": 0.8862120886212088, "grad_norm": 0.0, "learning_rate": 6.715812648907361e-07, "loss": 0.8863, "step": 4120 }, { "epoch": 0.8864271886427189, "grad_norm": 0.0, "learning_rate": 6.690732786679588e-07, "loss": 0.7911, "step": 4121 }, { "epoch": 0.8866422886642289, "grad_norm": 0.0, "learning_rate": 6.665698220888206e-07, "loss": 0.8432, "step": 4122 }, { "epoch": 0.8868573886857388, "grad_norm": 0.0, "learning_rate": 6.640708963686082e-07, "loss": 0.8159, "step": 4123 }, { "epoch": 0.8870724887072489, "grad_norm": 0.0, "learning_rate": 6.615765027204101e-07, "loss": 0.8106, "step": 4124 }, { "epoch": 0.8872875887287589, "grad_norm": 0.0, "learning_rate": 6.590866423551179e-07, "loss": 0.7319, "step": 4125 }, { "epoch": 0.8875026887502688, "grad_norm": 0.0, "learning_rate": 6.566013164814167e-07, "loss": 0.7563, "step": 4126 }, { "epoch": 0.8877177887717789, "grad_norm": 0.0, "learning_rate": 6.541205263057936e-07, "loss": 0.8665, "step": 4127 }, { "epoch": 0.8879328887932889, "grad_norm": 0.0, "learning_rate": 6.516442730325378e-07, "loss": 0.7753, "step": 4128 }, { "epoch": 0.8881479888147988, "grad_norm": 0.0, "learning_rate": 6.491725578637254e-07, "loss": 0.7733, "step": 4129 }, { "epoch": 0.8883630888363089, "grad_norm": 0.0, "learning_rate": 6.467053819992419e-07, "loss": 0.8197, "step": 4130 }, { "epoch": 0.8885781888578189, "grad_norm": 0.0, "learning_rate": 6.442427466367595e-07, "loss": 0.7809, "step": 4131 }, { "epoch": 0.8887932888793288, "grad_norm": 0.0, "learning_rate": 6.417846529717509e-07, "loss": 0.8343, "step": 4132 }, { "epoch": 0.8890083889008389, "grad_norm": 0.0, "learning_rate": 6.393311021974858e-07, "loss": 0.834, "step": 4133 }, { "epoch": 0.8892234889223489, "grad_norm": 0.0, "learning_rate": 6.368820955050214e-07, "loss": 0.8609, "step": 4134 }, { "epoch": 0.8894385889438589, "grad_norm": 0.0, "learning_rate": 6.344376340832181e-07, "loss": 0.8688, "step": 4135 }, { "epoch": 0.8896536889653689, "grad_norm": 0.0, "learning_rate": 6.319977191187232e-07, "loss": 0.7604, "step": 4136 }, { "epoch": 0.8898687889868789, "grad_norm": 0.0, "learning_rate": 6.295623517959793e-07, "loss": 0.8473, "step": 4137 }, { "epoch": 0.8900838890083889, "grad_norm": 0.0, "learning_rate": 6.271315332972227e-07, "loss": 0.8738, "step": 4138 }, { "epoch": 0.8902989890298989, "grad_norm": 0.0, "learning_rate": 6.247052648024765e-07, "loss": 0.8354, "step": 4139 }, { "epoch": 0.8905140890514089, "grad_norm": 0.0, "learning_rate": 6.222835474895594e-07, "loss": 0.7946, "step": 4140 }, { "epoch": 0.8907291890729189, "grad_norm": 0.0, "learning_rate": 6.198663825340845e-07, "loss": 0.8237, "step": 4141 }, { "epoch": 0.8909442890944289, "grad_norm": 0.0, "learning_rate": 6.174537711094442e-07, "loss": 0.7563, "step": 4142 }, { "epoch": 0.891159389115939, "grad_norm": 0.0, "learning_rate": 6.150457143868305e-07, "loss": 0.7942, "step": 4143 }, { "epoch": 0.8913744891374489, "grad_norm": 0.0, "learning_rate": 6.126422135352184e-07, "loss": 0.7924, "step": 4144 }, { "epoch": 0.8915895891589589, "grad_norm": 0.0, "learning_rate": 6.102432697213734e-07, "loss": 0.7968, "step": 4145 }, { "epoch": 0.891804689180469, "grad_norm": 0.0, "learning_rate": 6.078488841098518e-07, "loss": 0.8032, "step": 4146 }, { "epoch": 0.8920197892019789, "grad_norm": 0.0, "learning_rate": 6.054590578629893e-07, "loss": 0.746, "step": 4147 }, { "epoch": 0.8922348892234889, "grad_norm": 0.0, "learning_rate": 6.030737921409169e-07, "loss": 0.8373, "step": 4148 }, { "epoch": 0.892449989244999, "grad_norm": 0.0, "learning_rate": 6.006930881015449e-07, "loss": 0.8345, "step": 4149 }, { "epoch": 0.8926650892665089, "grad_norm": 0.0, "learning_rate": 5.983169469005745e-07, "loss": 0.8288, "step": 4150 }, { "epoch": 0.8928801892880189, "grad_norm": 0.0, "learning_rate": 5.959453696914874e-07, "loss": 0.8162, "step": 4151 }, { "epoch": 0.893095289309529, "grad_norm": 0.0, "learning_rate": 5.93578357625555e-07, "loss": 0.7956, "step": 4152 }, { "epoch": 0.8933103893310389, "grad_norm": 0.0, "learning_rate": 5.912159118518246e-07, "loss": 0.8879, "step": 4153 }, { "epoch": 0.8935254893525489, "grad_norm": 0.0, "learning_rate": 5.888580335171368e-07, "loss": 0.7797, "step": 4154 }, { "epoch": 0.893740589374059, "grad_norm": 0.0, "learning_rate": 5.865047237661059e-07, "loss": 0.8232, "step": 4155 }, { "epoch": 0.8939556893955689, "grad_norm": 0.0, "learning_rate": 5.841559837411348e-07, "loss": 0.7893, "step": 4156 }, { "epoch": 0.894170789417079, "grad_norm": 0.0, "learning_rate": 5.818118145824036e-07, "loss": 0.7299, "step": 4157 }, { "epoch": 0.894385889438589, "grad_norm": 0.0, "learning_rate": 5.794722174278766e-07, "loss": 0.8014, "step": 4158 }, { "epoch": 0.8946009894600989, "grad_norm": 0.0, "learning_rate": 5.771371934132986e-07, "loss": 0.8195, "step": 4159 }, { "epoch": 0.894816089481609, "grad_norm": 0.0, "learning_rate": 5.748067436721893e-07, "loss": 0.8043, "step": 4160 }, { "epoch": 0.895031189503119, "grad_norm": 0.0, "learning_rate": 5.724808693358574e-07, "loss": 0.899, "step": 4161 }, { "epoch": 0.8952462895246289, "grad_norm": 0.0, "learning_rate": 5.701595715333807e-07, "loss": 0.7971, "step": 4162 }, { "epoch": 0.895461389546139, "grad_norm": 0.0, "learning_rate": 5.678428513916212e-07, "loss": 0.8225, "step": 4163 }, { "epoch": 0.895676489567649, "grad_norm": 0.0, "learning_rate": 5.655307100352181e-07, "loss": 0.8432, "step": 4164 }, { "epoch": 0.8958915895891589, "grad_norm": 0.0, "learning_rate": 5.632231485865857e-07, "loss": 0.8206, "step": 4165 }, { "epoch": 0.896106689610669, "grad_norm": 0.0, "learning_rate": 5.609201681659149e-07, "loss": 0.7885, "step": 4166 }, { "epoch": 0.896321789632179, "grad_norm": 0.0, "learning_rate": 5.586217698911766e-07, "loss": 0.7987, "step": 4167 }, { "epoch": 0.8965368896536889, "grad_norm": 0.0, "learning_rate": 5.56327954878112e-07, "loss": 0.8203, "step": 4168 }, { "epoch": 0.896751989675199, "grad_norm": 0.0, "learning_rate": 5.540387242402434e-07, "loss": 0.822, "step": 4169 }, { "epoch": 0.896967089696709, "grad_norm": 0.0, "learning_rate": 5.517540790888609e-07, "loss": 0.7397, "step": 4170 }, { "epoch": 0.897182189718219, "grad_norm": 0.0, "learning_rate": 5.49474020533034e-07, "loss": 0.7822, "step": 4171 }, { "epoch": 0.897397289739729, "grad_norm": 0.0, "learning_rate": 5.471985496796051e-07, "loss": 0.8293, "step": 4172 }, { "epoch": 0.897612389761239, "grad_norm": 0.0, "learning_rate": 5.449276676331849e-07, "loss": 0.7993, "step": 4173 }, { "epoch": 0.897827489782749, "grad_norm": 0.0, "learning_rate": 5.42661375496164e-07, "loss": 0.7558, "step": 4174 }, { "epoch": 0.898042589804259, "grad_norm": 0.0, "learning_rate": 5.403996743686957e-07, "loss": 0.8414, "step": 4175 }, { "epoch": 0.898257689825769, "grad_norm": 0.0, "learning_rate": 5.381425653487115e-07, "loss": 0.7719, "step": 4176 }, { "epoch": 0.898472789847279, "grad_norm": 0.0, "learning_rate": 5.358900495319142e-07, "loss": 0.863, "step": 4177 }, { "epoch": 0.898687889868789, "grad_norm": 0.0, "learning_rate": 5.33642128011771e-07, "loss": 0.7549, "step": 4178 }, { "epoch": 0.898902989890299, "grad_norm": 0.0, "learning_rate": 5.313988018795246e-07, "loss": 0.766, "step": 4179 }, { "epoch": 0.899118089911809, "grad_norm": 0.0, "learning_rate": 5.291600722241841e-07, "loss": 0.6876, "step": 4180 }, { "epoch": 0.899333189933319, "grad_norm": 0.0, "learning_rate": 5.269259401325256e-07, "loss": 0.8227, "step": 4181 }, { "epoch": 0.899548289954829, "grad_norm": 0.0, "learning_rate": 5.246964066890981e-07, "loss": 0.8183, "step": 4182 }, { "epoch": 0.899763389976339, "grad_norm": 0.0, "learning_rate": 5.224714729762137e-07, "loss": 0.8426, "step": 4183 }, { "epoch": 0.899978489997849, "grad_norm": 0.0, "learning_rate": 5.20251140073953e-07, "loss": 0.8602, "step": 4184 }, { "epoch": 0.900193590019359, "grad_norm": 0.0, "learning_rate": 5.180354090601669e-07, "loss": 0.7665, "step": 4185 }, { "epoch": 0.900408690040869, "grad_norm": 0.0, "learning_rate": 5.158242810104652e-07, "loss": 0.83, "step": 4186 }, { "epoch": 0.900623790062379, "grad_norm": 0.0, "learning_rate": 5.136177569982293e-07, "loss": 0.7985, "step": 4187 }, { "epoch": 0.900838890083889, "grad_norm": 0.0, "learning_rate": 5.114158380946022e-07, "loss": 0.8612, "step": 4188 }, { "epoch": 0.901053990105399, "grad_norm": 0.0, "learning_rate": 5.092185253684923e-07, "loss": 0.8068, "step": 4189 }, { "epoch": 0.901269090126909, "grad_norm": 0.0, "learning_rate": 5.070258198865741e-07, "loss": 0.7839, "step": 4190 }, { "epoch": 0.901484190148419, "grad_norm": 0.0, "learning_rate": 5.048377227132816e-07, "loss": 0.8212, "step": 4191 }, { "epoch": 0.901699290169929, "grad_norm": 0.0, "learning_rate": 5.026542349108155e-07, "loss": 0.7825, "step": 4192 }, { "epoch": 0.901914390191439, "grad_norm": 0.0, "learning_rate": 5.004753575391341e-07, "loss": 0.8149, "step": 4193 }, { "epoch": 0.902129490212949, "grad_norm": 0.0, "learning_rate": 4.98301091655965e-07, "loss": 0.8179, "step": 4194 }, { "epoch": 0.902344590234459, "grad_norm": 0.0, "learning_rate": 4.961314383167903e-07, "loss": 0.849, "step": 4195 }, { "epoch": 0.902559690255969, "grad_norm": 0.0, "learning_rate": 4.939663985748544e-07, "loss": 0.8117, "step": 4196 }, { "epoch": 0.902774790277479, "grad_norm": 0.0, "learning_rate": 4.918059734811642e-07, "loss": 0.8658, "step": 4197 }, { "epoch": 0.902989890298989, "grad_norm": 0.0, "learning_rate": 4.896501640844864e-07, "loss": 0.8302, "step": 4198 }, { "epoch": 0.9032049903204991, "grad_norm": 0.0, "learning_rate": 4.874989714313449e-07, "loss": 0.8427, "step": 4199 }, { "epoch": 0.903420090342009, "grad_norm": 0.0, "learning_rate": 4.853523965660245e-07, "loss": 0.8094, "step": 4200 }, { "epoch": 0.903635190363519, "grad_norm": 0.0, "learning_rate": 4.832104405305659e-07, "loss": 0.8133, "step": 4201 }, { "epoch": 0.9038502903850291, "grad_norm": 0.0, "learning_rate": 4.810731043647699e-07, "loss": 0.8374, "step": 4202 }, { "epoch": 0.904065390406539, "grad_norm": 0.0, "learning_rate": 4.78940389106195e-07, "loss": 0.8354, "step": 4203 }, { "epoch": 0.904280490428049, "grad_norm": 0.0, "learning_rate": 4.768122957901522e-07, "loss": 0.7873, "step": 4204 }, { "epoch": 0.9044955904495591, "grad_norm": 0.0, "learning_rate": 4.7468882544971615e-07, "loss": 0.772, "step": 4205 }, { "epoch": 0.904710690471069, "grad_norm": 0.0, "learning_rate": 4.72569979115709e-07, "loss": 0.8248, "step": 4206 }, { "epoch": 0.904925790492579, "grad_norm": 0.0, "learning_rate": 4.704557578167146e-07, "loss": 0.8531, "step": 4207 }, { "epoch": 0.9051408905140891, "grad_norm": 0.0, "learning_rate": 4.683461625790697e-07, "loss": 0.8226, "step": 4208 }, { "epoch": 0.905355990535599, "grad_norm": 0.0, "learning_rate": 4.662411944268641e-07, "loss": 0.8305, "step": 4209 }, { "epoch": 0.905571090557109, "grad_norm": 0.0, "learning_rate": 4.6414085438194034e-07, "loss": 0.7503, "step": 4210 }, { "epoch": 0.9057861905786191, "grad_norm": 0.0, "learning_rate": 4.620451434638995e-07, "loss": 0.7532, "step": 4211 }, { "epoch": 0.906001290600129, "grad_norm": 0.0, "learning_rate": 4.599540626900889e-07, "loss": 0.8496, "step": 4212 }, { "epoch": 0.9062163906216391, "grad_norm": 0.0, "learning_rate": 4.5786761307561546e-07, "loss": 0.846, "step": 4213 }, { "epoch": 0.9064314906431491, "grad_norm": 0.0, "learning_rate": 4.5578579563333006e-07, "loss": 0.8604, "step": 4214 }, { "epoch": 0.906646590664659, "grad_norm": 0.0, "learning_rate": 4.5370861137383984e-07, "loss": 0.7918, "step": 4215 }, { "epoch": 0.9068616906861691, "grad_norm": 0.0, "learning_rate": 4.5163606130550376e-07, "loss": 0.8504, "step": 4216 }, { "epoch": 0.9070767907076791, "grad_norm": 0.0, "learning_rate": 4.495681464344259e-07, "loss": 0.8171, "step": 4217 }, { "epoch": 0.907291890729189, "grad_norm": 0.0, "learning_rate": 4.4750486776446664e-07, "loss": 0.7802, "step": 4218 }, { "epoch": 0.9075069907506991, "grad_norm": 0.0, "learning_rate": 4.4544622629722924e-07, "loss": 0.7425, "step": 4219 }, { "epoch": 0.9077220907722091, "grad_norm": 0.0, "learning_rate": 4.4339222303207086e-07, "loss": 0.8488, "step": 4220 }, { "epoch": 0.907937190793719, "grad_norm": 0.0, "learning_rate": 4.413428589660962e-07, "loss": 0.8511, "step": 4221 }, { "epoch": 0.9081522908152291, "grad_norm": 0.0, "learning_rate": 4.39298135094155e-07, "loss": 0.8597, "step": 4222 }, { "epoch": 0.9083673908367391, "grad_norm": 0.0, "learning_rate": 4.372580524088488e-07, "loss": 0.8315, "step": 4223 }, { "epoch": 0.908582490858249, "grad_norm": 0.0, "learning_rate": 4.352226119005232e-07, "loss": 0.8143, "step": 4224 }, { "epoch": 0.9087975908797591, "grad_norm": 0.0, "learning_rate": 4.331918145572678e-07, "loss": 0.8062, "step": 4225 }, { "epoch": 0.9090126909012691, "grad_norm": 0.0, "learning_rate": 4.311656613649251e-07, "loss": 0.8176, "step": 4226 }, { "epoch": 0.9092277909227791, "grad_norm": 0.0, "learning_rate": 4.2914415330707726e-07, "loss": 0.7844, "step": 4227 }, { "epoch": 0.9094428909442891, "grad_norm": 0.0, "learning_rate": 4.2712729136505257e-07, "loss": 0.771, "step": 4228 }, { "epoch": 0.9096579909657991, "grad_norm": 0.0, "learning_rate": 4.2511507651792906e-07, "loss": 0.8222, "step": 4229 }, { "epoch": 0.9098730909873091, "grad_norm": 0.0, "learning_rate": 4.2310750974251967e-07, "loss": 0.7852, "step": 4230 }, { "epoch": 0.9100881910088191, "grad_norm": 0.0, "learning_rate": 4.211045920133894e-07, "loss": 0.8249, "step": 4231 }, { "epoch": 0.9103032910303291, "grad_norm": 0.0, "learning_rate": 4.191063243028415e-07, "loss": 0.8198, "step": 4232 }, { "epoch": 0.9105183910518391, "grad_norm": 0.0, "learning_rate": 4.171127075809223e-07, "loss": 0.8217, "step": 4233 }, { "epoch": 0.9107334910733491, "grad_norm": 0.0, "learning_rate": 4.151237428154231e-07, "loss": 0.7882, "step": 4234 }, { "epoch": 0.9109485910948592, "grad_norm": 0.0, "learning_rate": 4.13139430971875e-07, "loss": 0.8204, "step": 4235 }, { "epoch": 0.9111636911163691, "grad_norm": 0.0, "learning_rate": 4.111597730135497e-07, "loss": 0.836, "step": 4236 }, { "epoch": 0.9113787911378791, "grad_norm": 0.0, "learning_rate": 4.091847699014606e-07, "loss": 0.8045, "step": 4237 }, { "epoch": 0.9115938911593892, "grad_norm": 0.0, "learning_rate": 4.072144225943631e-07, "loss": 0.7969, "step": 4238 }, { "epoch": 0.9118089911808991, "grad_norm": 0.0, "learning_rate": 4.052487320487486e-07, "loss": 0.8162, "step": 4239 }, { "epoch": 0.9120240912024091, "grad_norm": 0.0, "learning_rate": 4.0328769921884946e-07, "loss": 0.8347, "step": 4240 }, { "epoch": 0.9122391912239192, "grad_norm": 0.0, "learning_rate": 4.0133132505663977e-07, "loss": 0.7668, "step": 4241 }, { "epoch": 0.9124542912454291, "grad_norm": 0.0, "learning_rate": 3.993796105118297e-07, "loss": 0.7102, "step": 4242 }, { "epoch": 0.9126693912669391, "grad_norm": 0.0, "learning_rate": 3.9743255653186595e-07, "loss": 0.8539, "step": 4243 }, { "epoch": 0.9128844912884492, "grad_norm": 0.0, "learning_rate": 3.9549016406193686e-07, "loss": 0.7692, "step": 4244 }, { "epoch": 0.9130995913099591, "grad_norm": 0.0, "learning_rate": 3.935524340449615e-07, "loss": 0.7315, "step": 4245 }, { "epoch": 0.9133146913314691, "grad_norm": 0.0, "learning_rate": 3.916193674216029e-07, "loss": 0.7986, "step": 4246 }, { "epoch": 0.9135297913529792, "grad_norm": 0.0, "learning_rate": 3.8969096513025826e-07, "loss": 0.7818, "step": 4247 }, { "epoch": 0.9137448913744891, "grad_norm": 0.0, "learning_rate": 3.8776722810705436e-07, "loss": 0.8394, "step": 4248 }, { "epoch": 0.9139599913959991, "grad_norm": 0.0, "learning_rate": 3.85848157285863e-07, "loss": 0.7887, "step": 4249 }, { "epoch": 0.9141750914175092, "grad_norm": 0.0, "learning_rate": 3.839337535982812e-07, "loss": 0.847, "step": 4250 }, { "epoch": 0.9143901914390191, "grad_norm": 0.0, "learning_rate": 3.8202401797364896e-07, "loss": 0.8032, "step": 4251 }, { "epoch": 0.9146052914605292, "grad_norm": 0.0, "learning_rate": 3.801189513390335e-07, "loss": 0.8157, "step": 4252 }, { "epoch": 0.9148203914820392, "grad_norm": 0.0, "learning_rate": 3.7821855461924075e-07, "loss": 0.8074, "step": 4253 }, { "epoch": 0.9150354915035491, "grad_norm": 0.0, "learning_rate": 3.763228287368037e-07, "loss": 0.8222, "step": 4254 }, { "epoch": 0.9152505915250592, "grad_norm": 0.0, "learning_rate": 3.744317746119952e-07, "loss": 0.7964, "step": 4255 }, { "epoch": 0.9154656915465692, "grad_norm": 0.0, "learning_rate": 3.7254539316281424e-07, "loss": 0.7682, "step": 4256 }, { "epoch": 0.9156807915680791, "grad_norm": 0.0, "learning_rate": 3.7066368530499384e-07, "loss": 0.8052, "step": 4257 }, { "epoch": 0.9158958915895892, "grad_norm": 0.0, "learning_rate": 3.687866519519978e-07, "loss": 0.7966, "step": 4258 }, { "epoch": 0.9161109916110992, "grad_norm": 0.0, "learning_rate": 3.6691429401502055e-07, "loss": 0.724, "step": 4259 }, { "epoch": 0.9163260916326091, "grad_norm": 0.0, "learning_rate": 3.650466124029894e-07, "loss": 0.8302, "step": 4260 }, { "epoch": 0.9165411916541192, "grad_norm": 0.0, "learning_rate": 3.6318360802255703e-07, "loss": 0.7997, "step": 4261 }, { "epoch": 0.9167562916756292, "grad_norm": 0.0, "learning_rate": 3.61325281778111e-07, "loss": 0.8212, "step": 4262 }, { "epoch": 0.9169713916971391, "grad_norm": 0.0, "learning_rate": 3.594716345717608e-07, "loss": 0.8085, "step": 4263 }, { "epoch": 0.9171864917186492, "grad_norm": 0.0, "learning_rate": 3.576226673033512e-07, "loss": 0.7907, "step": 4264 }, { "epoch": 0.9174015917401592, "grad_norm": 0.0, "learning_rate": 3.557783808704529e-07, "loss": 0.7865, "step": 4265 }, { "epoch": 0.9176166917616692, "grad_norm": 0.0, "learning_rate": 3.5393877616836327e-07, "loss": 0.7946, "step": 4266 }, { "epoch": 0.9178317917831792, "grad_norm": 0.0, "learning_rate": 3.521038540901056e-07, "loss": 0.8091, "step": 4267 }, { "epoch": 0.9180468918046892, "grad_norm": 0.0, "learning_rate": 3.502736155264352e-07, "loss": 0.7733, "step": 4268 }, { "epoch": 0.9182619918261992, "grad_norm": 0.0, "learning_rate": 3.484480613658281e-07, "loss": 0.7767, "step": 4269 }, { "epoch": 0.9184770918477092, "grad_norm": 0.0, "learning_rate": 3.4662719249449196e-07, "loss": 0.802, "step": 4270 }, { "epoch": 0.9186921918692191, "grad_norm": 0.0, "learning_rate": 3.4481100979635304e-07, "loss": 0.7932, "step": 4271 }, { "epoch": 0.9189072918907292, "grad_norm": 0.0, "learning_rate": 3.429995141530695e-07, "loss": 0.865, "step": 4272 }, { "epoch": 0.9191223919122392, "grad_norm": 0.0, "learning_rate": 3.4119270644402015e-07, "loss": 0.8355, "step": 4273 }, { "epoch": 0.9193374919337491, "grad_norm": 0.0, "learning_rate": 3.393905875463088e-07, "loss": 0.8175, "step": 4274 }, { "epoch": 0.9195525919552592, "grad_norm": 0.0, "learning_rate": 3.375931583347647e-07, "loss": 0.7882, "step": 4275 }, { "epoch": 0.9197676919767692, "grad_norm": 0.0, "learning_rate": 3.3580041968193867e-07, "loss": 0.7872, "step": 4276 }, { "epoch": 0.9199827919982791, "grad_norm": 0.0, "learning_rate": 3.340123724581046e-07, "loss": 0.8211, "step": 4277 }, { "epoch": 0.9201978920197892, "grad_norm": 0.0, "learning_rate": 3.3222901753126037e-07, "loss": 0.8118, "step": 4278 }, { "epoch": 0.9204129920412992, "grad_norm": 0.0, "learning_rate": 3.3045035576712456e-07, "loss": 0.8198, "step": 4279 }, { "epoch": 0.9206280920628092, "grad_norm": 0.0, "learning_rate": 3.286763880291399e-07, "loss": 0.7968, "step": 4280 }, { "epoch": 0.9208431920843192, "grad_norm": 0.0, "learning_rate": 3.269071151784664e-07, "loss": 0.7719, "step": 4281 }, { "epoch": 0.9210582921058292, "grad_norm": 0.0, "learning_rate": 3.25142538073987e-07, "loss": 0.8361, "step": 4282 }, { "epoch": 0.9212733921273392, "grad_norm": 0.0, "learning_rate": 3.2338265757230647e-07, "loss": 0.8345, "step": 4283 }, { "epoch": 0.9214884921488492, "grad_norm": 0.0, "learning_rate": 3.216274745277481e-07, "loss": 0.811, "step": 4284 }, { "epoch": 0.9217035921703592, "grad_norm": 0.0, "learning_rate": 3.198769897923548e-07, "loss": 0.7996, "step": 4285 }, { "epoch": 0.9219186921918692, "grad_norm": 0.0, "learning_rate": 3.1813120421589126e-07, "loss": 0.805, "step": 4286 }, { "epoch": 0.9221337922133792, "grad_norm": 0.0, "learning_rate": 3.1639011864583625e-07, "loss": 0.7861, "step": 4287 }, { "epoch": 0.9223488922348893, "grad_norm": 0.0, "learning_rate": 3.146537339273914e-07, "loss": 0.7956, "step": 4288 }, { "epoch": 0.9225639922563992, "grad_norm": 0.0, "learning_rate": 3.129220509034725e-07, "loss": 0.8571, "step": 4289 }, { "epoch": 0.9227790922779092, "grad_norm": 0.0, "learning_rate": 3.1119507041471595e-07, "loss": 0.7926, "step": 4290 }, { "epoch": 0.9229941922994193, "grad_norm": 0.0, "learning_rate": 3.0947279329947564e-07, "loss": 0.7925, "step": 4291 }, { "epoch": 0.9232092923209292, "grad_norm": 0.0, "learning_rate": 3.0775522039381724e-07, "loss": 0.8116, "step": 4292 }, { "epoch": 0.9234243923424392, "grad_norm": 0.0, "learning_rate": 3.0604235253153145e-07, "loss": 0.8038, "step": 4293 }, { "epoch": 0.9236394923639493, "grad_norm": 0.0, "learning_rate": 3.043341905441155e-07, "loss": 0.8493, "step": 4294 }, { "epoch": 0.9238545923854592, "grad_norm": 0.0, "learning_rate": 3.0263073526079046e-07, "loss": 0.8447, "step": 4295 }, { "epoch": 0.9240696924069692, "grad_norm": 0.0, "learning_rate": 3.0093198750848483e-07, "loss": 0.8189, "step": 4296 }, { "epoch": 0.9242847924284793, "grad_norm": 0.0, "learning_rate": 2.9923794811184794e-07, "loss": 0.7559, "step": 4297 }, { "epoch": 0.9244998924499892, "grad_norm": 0.0, "learning_rate": 2.9754861789324074e-07, "loss": 0.8318, "step": 4298 }, { "epoch": 0.9247149924714992, "grad_norm": 0.0, "learning_rate": 2.9586399767273956e-07, "loss": 0.8647, "step": 4299 }, { "epoch": 0.9249300924930093, "grad_norm": 0.0, "learning_rate": 2.9418408826813125e-07, "loss": 0.8733, "step": 4300 }, { "epoch": 0.9251451925145192, "grad_norm": 0.0, "learning_rate": 2.9250889049492136e-07, "loss": 0.8184, "step": 4301 }, { "epoch": 0.9253602925360292, "grad_norm": 0.0, "learning_rate": 2.9083840516632155e-07, "loss": 0.79, "step": 4302 }, { "epoch": 0.9255753925575393, "grad_norm": 0.0, "learning_rate": 2.891726330932598e-07, "loss": 0.8054, "step": 4303 }, { "epoch": 0.9257904925790492, "grad_norm": 0.0, "learning_rate": 2.875115750843771e-07, "loss": 0.7541, "step": 4304 }, { "epoch": 0.9260055926005593, "grad_norm": 0.0, "learning_rate": 2.8585523194602283e-07, "loss": 0.7893, "step": 4305 }, { "epoch": 0.9262206926220693, "grad_norm": 0.0, "learning_rate": 2.842036044822605e-07, "loss": 0.7887, "step": 4306 }, { "epoch": 0.9264357926435792, "grad_norm": 0.0, "learning_rate": 2.825566934948598e-07, "loss": 0.7705, "step": 4307 }, { "epoch": 0.9266508926650893, "grad_norm": 0.0, "learning_rate": 2.8091449978330685e-07, "loss": 0.8365, "step": 4308 }, { "epoch": 0.9268659926865993, "grad_norm": 0.0, "learning_rate": 2.792770241447962e-07, "loss": 0.7824, "step": 4309 }, { "epoch": 0.9270810927081092, "grad_norm": 0.0, "learning_rate": 2.776442673742274e-07, "loss": 0.8426, "step": 4310 }, { "epoch": 0.9272961927296193, "grad_norm": 0.0, "learning_rate": 2.7601623026421445e-07, "loss": 0.8674, "step": 4311 }, { "epoch": 0.9275112927511293, "grad_norm": 0.0, "learning_rate": 2.7439291360507824e-07, "loss": 0.7953, "step": 4312 }, { "epoch": 0.9277263927726392, "grad_norm": 0.0, "learning_rate": 2.727743181848463e-07, "loss": 0.7899, "step": 4313 }, { "epoch": 0.9279414927941493, "grad_norm": 0.0, "learning_rate": 2.711604447892602e-07, "loss": 0.8423, "step": 4314 }, { "epoch": 0.9281565928156593, "grad_norm": 0.0, "learning_rate": 2.6955129420176193e-07, "loss": 0.8094, "step": 4315 }, { "epoch": 0.9283716928371692, "grad_norm": 0.0, "learning_rate": 2.6794686720350573e-07, "loss": 0.8859, "step": 4316 }, { "epoch": 0.9285867928586793, "grad_norm": 0.0, "learning_rate": 2.6634716457335177e-07, "loss": 0.8587, "step": 4317 }, { "epoch": 0.9288018928801893, "grad_norm": 0.0, "learning_rate": 2.6475218708786445e-07, "loss": 0.8471, "step": 4318 }, { "epoch": 0.9290169929016993, "grad_norm": 0.0, "learning_rate": 2.631619355213189e-07, "loss": 0.8288, "step": 4319 }, { "epoch": 0.9292320929232093, "grad_norm": 0.0, "learning_rate": 2.615764106456897e-07, "loss": 0.766, "step": 4320 }, { "epoch": 0.9294471929447193, "grad_norm": 0.0, "learning_rate": 2.5999561323066356e-07, "loss": 0.7892, "step": 4321 }, { "epoch": 0.9296622929662293, "grad_norm": 0.0, "learning_rate": 2.584195440436299e-07, "loss": 0.8235, "step": 4322 }, { "epoch": 0.9298773929877393, "grad_norm": 0.0, "learning_rate": 2.568482038496789e-07, "loss": 0.7996, "step": 4323 }, { "epoch": 0.9300924930092493, "grad_norm": 0.0, "learning_rate": 2.5528159341161264e-07, "loss": 0.789, "step": 4324 }, { "epoch": 0.9303075930307593, "grad_norm": 0.0, "learning_rate": 2.537197134899294e-07, "loss": 0.7742, "step": 4325 }, { "epoch": 0.9305226930522693, "grad_norm": 0.0, "learning_rate": 2.521625648428361e-07, "loss": 0.7958, "step": 4326 }, { "epoch": 0.9307377930737794, "grad_norm": 0.0, "learning_rate": 2.506101482262424e-07, "loss": 0.797, "step": 4327 }, { "epoch": 0.9309528930952893, "grad_norm": 0.0, "learning_rate": 2.490624643937567e-07, "loss": 0.7892, "step": 4328 }, { "epoch": 0.9311679931167993, "grad_norm": 0.0, "learning_rate": 2.475195140966957e-07, "loss": 0.7658, "step": 4329 }, { "epoch": 0.9313830931383094, "grad_norm": 0.0, "learning_rate": 2.459812980840759e-07, "loss": 0.7597, "step": 4330 }, { "epoch": 0.9315981931598193, "grad_norm": 0.0, "learning_rate": 2.4444781710261324e-07, "loss": 0.8205, "step": 4331 }, { "epoch": 0.9318132931813293, "grad_norm": 0.0, "learning_rate": 2.429190718967289e-07, "loss": 0.83, "step": 4332 }, { "epoch": 0.9320283932028394, "grad_norm": 0.0, "learning_rate": 2.4139506320854154e-07, "loss": 0.8226, "step": 4333 }, { "epoch": 0.9322434932243493, "grad_norm": 0.0, "learning_rate": 2.398757917778727e-07, "loss": 0.8127, "step": 4334 }, { "epoch": 0.9324585932458593, "grad_norm": 0.0, "learning_rate": 2.3836125834224566e-07, "loss": 0.7189, "step": 4335 }, { "epoch": 0.9326736932673694, "grad_norm": 0.0, "learning_rate": 2.3685146363688018e-07, "loss": 0.8453, "step": 4336 }, { "epoch": 0.9328887932888793, "grad_norm": 0.0, "learning_rate": 2.3534640839469768e-07, "loss": 0.7494, "step": 4337 }, { "epoch": 0.9331038933103893, "grad_norm": 0.0, "learning_rate": 2.3384609334631713e-07, "loss": 0.7766, "step": 4338 }, { "epoch": 0.9333189933318994, "grad_norm": 0.0, "learning_rate": 2.3235051922005926e-07, "loss": 0.7919, "step": 4339 }, { "epoch": 0.9335340933534093, "grad_norm": 0.0, "learning_rate": 2.3085968674194214e-07, "loss": 0.8438, "step": 4340 }, { "epoch": 0.9337491933749194, "grad_norm": 0.0, "learning_rate": 2.2937359663567916e-07, "loss": 0.6956, "step": 4341 }, { "epoch": 0.9339642933964294, "grad_norm": 0.0, "learning_rate": 2.278922496226854e-07, "loss": 0.8082, "step": 4342 }, { "epoch": 0.9341793934179393, "grad_norm": 0.0, "learning_rate": 2.2641564642207347e-07, "loss": 0.7913, "step": 4343 }, { "epoch": 0.9343944934394494, "grad_norm": 0.0, "learning_rate": 2.2494378775064997e-07, "loss": 0.7968, "step": 4344 }, { "epoch": 0.9346095934609594, "grad_norm": 0.0, "learning_rate": 2.2347667432292109e-07, "loss": 0.8354, "step": 4345 }, { "epoch": 0.9348246934824693, "grad_norm": 0.0, "learning_rate": 2.2201430685108715e-07, "loss": 0.7958, "step": 4346 }, { "epoch": 0.9350397935039794, "grad_norm": 0.0, "learning_rate": 2.20556686045047e-07, "loss": 0.7768, "step": 4347 }, { "epoch": 0.9352548935254894, "grad_norm": 0.0, "learning_rate": 2.1910381261239456e-07, "loss": 0.777, "step": 4348 }, { "epoch": 0.9354699935469993, "grad_norm": 0.0, "learning_rate": 2.1765568725841679e-07, "loss": 0.825, "step": 4349 }, { "epoch": 0.9356850935685094, "grad_norm": 0.0, "learning_rate": 2.1621231068610028e-07, "loss": 0.7826, "step": 4350 }, { "epoch": 0.9359001935900194, "grad_norm": 0.0, "learning_rate": 2.1477368359612117e-07, "loss": 0.8391, "step": 4351 }, { "epoch": 0.9361152936115293, "grad_norm": 0.0, "learning_rate": 2.1333980668685416e-07, "loss": 0.8354, "step": 4352 }, { "epoch": 0.9363303936330394, "grad_norm": 0.0, "learning_rate": 2.1191068065436472e-07, "loss": 0.8087, "step": 4353 }, { "epoch": 0.9365454936545493, "grad_norm": 0.0, "learning_rate": 2.1048630619241563e-07, "loss": 0.8986, "step": 4354 }, { "epoch": 0.9367605936760593, "grad_norm": 0.0, "learning_rate": 2.0906668399246045e-07, "loss": 0.844, "step": 4355 }, { "epoch": 0.9369756936975694, "grad_norm": 0.0, "learning_rate": 2.0765181474364572e-07, "loss": 0.7914, "step": 4356 }, { "epoch": 0.9371907937190793, "grad_norm": 0.0, "learning_rate": 2.06241699132812e-07, "loss": 0.8216, "step": 4357 }, { "epoch": 0.9374058937405894, "grad_norm": 0.0, "learning_rate": 2.0483633784449285e-07, "loss": 0.7929, "step": 4358 }, { "epoch": 0.9376209937620994, "grad_norm": 0.0, "learning_rate": 2.0343573156091035e-07, "loss": 0.7977, "step": 4359 }, { "epoch": 0.9378360937836093, "grad_norm": 0.0, "learning_rate": 2.020398809619817e-07, "loss": 0.8777, "step": 4360 }, { "epoch": 0.9380511938051194, "grad_norm": 0.0, "learning_rate": 2.0064878672531485e-07, "loss": 0.7604, "step": 4361 }, { "epoch": 0.9382662938266294, "grad_norm": 0.0, "learning_rate": 1.9926244952620743e-07, "loss": 0.8351, "step": 4362 }, { "epoch": 0.9384813938481393, "grad_norm": 0.0, "learning_rate": 1.978808700376489e-07, "loss": 0.7879, "step": 4363 }, { "epoch": 0.9386964938696494, "grad_norm": 0.0, "learning_rate": 1.9650404893031938e-07, "loss": 0.7998, "step": 4364 }, { "epoch": 0.9389115938911594, "grad_norm": 0.0, "learning_rate": 1.9513198687258873e-07, "loss": 0.8151, "step": 4365 }, { "epoch": 0.9391266939126693, "grad_norm": 0.0, "learning_rate": 1.9376468453051633e-07, "loss": 0.8546, "step": 4366 }, { "epoch": 0.9393417939341794, "grad_norm": 0.0, "learning_rate": 1.9240214256785128e-07, "loss": 0.8536, "step": 4367 }, { "epoch": 0.9395568939556894, "grad_norm": 0.0, "learning_rate": 1.9104436164602892e-07, "loss": 0.7866, "step": 4368 }, { "epoch": 0.9397719939771993, "grad_norm": 0.0, "learning_rate": 1.8969134242418086e-07, "loss": 0.8532, "step": 4369 }, { "epoch": 0.9399870939987094, "grad_norm": 0.0, "learning_rate": 1.883430855591173e-07, "loss": 0.8375, "step": 4370 }, { "epoch": 0.9402021940202194, "grad_norm": 0.0, "learning_rate": 1.8699959170534577e-07, "loss": 0.8233, "step": 4371 }, { "epoch": 0.9404172940417294, "grad_norm": 0.0, "learning_rate": 1.8566086151505568e-07, "loss": 0.8074, "step": 4372 }, { "epoch": 0.9406323940632394, "grad_norm": 0.0, "learning_rate": 1.84326895638125e-07, "loss": 0.7656, "step": 4373 }, { "epoch": 0.9408474940847494, "grad_norm": 0.0, "learning_rate": 1.8299769472212125e-07, "loss": 0.7247, "step": 4374 }, { "epoch": 0.9410625941062594, "grad_norm": 0.0, "learning_rate": 1.8167325941229496e-07, "loss": 0.7518, "step": 4375 }, { "epoch": 0.9412776941277694, "grad_norm": 0.0, "learning_rate": 1.803535903515885e-07, "loss": 0.8295, "step": 4376 }, { "epoch": 0.9414927941492794, "grad_norm": 0.0, "learning_rate": 1.790386881806261e-07, "loss": 0.8591, "step": 4377 }, { "epoch": 0.9417078941707894, "grad_norm": 0.0, "learning_rate": 1.7772855353771823e-07, "loss": 0.8226, "step": 4378 }, { "epoch": 0.9419229941922994, "grad_norm": 0.0, "learning_rate": 1.7642318705886284e-07, "loss": 0.7837, "step": 4379 }, { "epoch": 0.9421380942138095, "grad_norm": 0.0, "learning_rate": 1.7512258937774307e-07, "loss": 0.8499, "step": 4380 }, { "epoch": 0.9423531942353194, "grad_norm": 0.0, "learning_rate": 1.7382676112572605e-07, "loss": 0.8081, "step": 4381 }, { "epoch": 0.9425682942568294, "grad_norm": 0.0, "learning_rate": 1.7253570293186418e-07, "loss": 0.8097, "step": 4382 }, { "epoch": 0.9427833942783395, "grad_norm": 0.0, "learning_rate": 1.7124941542289276e-07, "loss": 0.8137, "step": 4383 }, { "epoch": 0.9429984942998494, "grad_norm": 0.0, "learning_rate": 1.6996789922323454e-07, "loss": 0.7784, "step": 4384 }, { "epoch": 0.9432135943213594, "grad_norm": 0.0, "learning_rate": 1.68691154954993e-07, "loss": 0.7903, "step": 4385 }, { "epoch": 0.9434286943428695, "grad_norm": 0.0, "learning_rate": 1.6741918323795458e-07, "loss": 0.744, "step": 4386 }, { "epoch": 0.9436437943643794, "grad_norm": 0.0, "learning_rate": 1.6615198468959426e-07, "loss": 0.8748, "step": 4387 }, { "epoch": 0.9438588943858894, "grad_norm": 0.0, "learning_rate": 1.6488955992506218e-07, "loss": 0.8597, "step": 4388 }, { "epoch": 0.9440739944073995, "grad_norm": 0.0, "learning_rate": 1.6363190955719922e-07, "loss": 0.8796, "step": 4389 }, { "epoch": 0.9442890944289094, "grad_norm": 0.0, "learning_rate": 1.6237903419651923e-07, "loss": 0.8066, "step": 4390 }, { "epoch": 0.9445041944504194, "grad_norm": 0.0, "learning_rate": 1.6113093445122685e-07, "loss": 0.7905, "step": 4391 }, { "epoch": 0.9447192944719295, "grad_norm": 0.0, "learning_rate": 1.5988761092720517e-07, "loss": 0.837, "step": 4392 }, { "epoch": 0.9449343944934394, "grad_norm": 0.0, "learning_rate": 1.5864906422801584e-07, "loss": 0.7734, "step": 4393 }, { "epoch": 0.9451494945149495, "grad_norm": 0.0, "learning_rate": 1.574152949549057e-07, "loss": 0.7933, "step": 4394 }, { "epoch": 0.9453645945364595, "grad_norm": 0.0, "learning_rate": 1.561863037068001e-07, "loss": 0.7818, "step": 4395 }, { "epoch": 0.9455796945579694, "grad_norm": 0.0, "learning_rate": 1.5496209108030735e-07, "loss": 0.8053, "step": 4396 }, { "epoch": 0.9457947945794795, "grad_norm": 0.0, "learning_rate": 1.5374265766971097e-07, "loss": 0.8403, "step": 4397 }, { "epoch": 0.9460098946009895, "grad_norm": 0.0, "learning_rate": 1.5252800406698076e-07, "loss": 0.7811, "step": 4398 }, { "epoch": 0.9462249946224994, "grad_norm": 0.0, "learning_rate": 1.5131813086176063e-07, "loss": 0.8032, "step": 4399 }, { "epoch": 0.9464400946440095, "grad_norm": 0.0, "learning_rate": 1.5011303864137962e-07, "loss": 0.8645, "step": 4400 }, { "epoch": 0.9466551946655195, "grad_norm": 0.0, "learning_rate": 1.4891272799083979e-07, "loss": 0.8273, "step": 4401 }, { "epoch": 0.9468702946870294, "grad_norm": 0.0, "learning_rate": 1.4771719949282725e-07, "loss": 0.7975, "step": 4402 }, { "epoch": 0.9470853947085395, "grad_norm": 0.0, "learning_rate": 1.4652645372770226e-07, "loss": 0.7821, "step": 4403 }, { "epoch": 0.9473004947300495, "grad_norm": 0.0, "learning_rate": 1.4534049127350568e-07, "loss": 0.8101, "step": 4404 }, { "epoch": 0.9475155947515594, "grad_norm": 0.0, "learning_rate": 1.4415931270595708e-07, "loss": 0.7644, "step": 4405 }, { "epoch": 0.9477306947730695, "grad_norm": 0.0, "learning_rate": 1.4298291859845215e-07, "loss": 0.8539, "step": 4406 }, { "epoch": 0.9479457947945795, "grad_norm": 0.0, "learning_rate": 1.4181130952206524e-07, "loss": 0.8091, "step": 4407 }, { "epoch": 0.9481608948160895, "grad_norm": 0.0, "learning_rate": 1.406444860455447e-07, "loss": 0.7821, "step": 4408 }, { "epoch": 0.9483759948375995, "grad_norm": 0.0, "learning_rate": 1.394824487353208e-07, "loss": 0.7887, "step": 4409 }, { "epoch": 0.9485910948591095, "grad_norm": 0.0, "learning_rate": 1.3832519815549671e-07, "loss": 0.8039, "step": 4410 }, { "epoch": 0.9488061948806195, "grad_norm": 0.0, "learning_rate": 1.371727348678531e-07, "loss": 0.822, "step": 4411 }, { "epoch": 0.9490212949021295, "grad_norm": 0.0, "learning_rate": 1.3602505943184575e-07, "loss": 0.8456, "step": 4412 }, { "epoch": 0.9492363949236395, "grad_norm": 0.0, "learning_rate": 1.3488217240460788e-07, "loss": 0.7968, "step": 4413 }, { "epoch": 0.9494514949451495, "grad_norm": 0.0, "learning_rate": 1.3374407434094682e-07, "loss": 0.8276, "step": 4414 }, { "epoch": 0.9496665949666595, "grad_norm": 0.0, "learning_rate": 1.326107657933473e-07, "loss": 0.8872, "step": 4415 }, { "epoch": 0.9498816949881695, "grad_norm": 0.0, "learning_rate": 1.3148224731196367e-07, "loss": 0.8576, "step": 4416 }, { "epoch": 0.9500967950096795, "grad_norm": 0.0, "learning_rate": 1.3035851944463106e-07, "loss": 0.7301, "step": 4417 }, { "epoch": 0.9503118950311895, "grad_norm": 0.0, "learning_rate": 1.2923958273685755e-07, "loss": 0.8582, "step": 4418 }, { "epoch": 0.9505269950526996, "grad_norm": 0.0, "learning_rate": 1.2812543773182195e-07, "loss": 0.797, "step": 4419 }, { "epoch": 0.9507420950742095, "grad_norm": 0.0, "learning_rate": 1.2701608497038055e-07, "loss": 0.8463, "step": 4420 }, { "epoch": 0.9509571950957195, "grad_norm": 0.0, "learning_rate": 1.2591152499106253e-07, "loss": 0.7557, "step": 4421 }, { "epoch": 0.9511722951172296, "grad_norm": 0.0, "learning_rate": 1.2481175833006898e-07, "loss": 0.7777, "step": 4422 }, { "epoch": 0.9513873951387395, "grad_norm": 0.0, "learning_rate": 1.237167855212773e-07, "loss": 0.7364, "step": 4423 }, { "epoch": 0.9516024951602495, "grad_norm": 0.0, "learning_rate": 1.2262660709623232e-07, "loss": 0.7885, "step": 4424 }, { "epoch": 0.9518175951817596, "grad_norm": 0.0, "learning_rate": 1.2154122358415842e-07, "loss": 0.7781, "step": 4425 }, { "epoch": 0.9520326952032695, "grad_norm": 0.0, "learning_rate": 1.2046063551194643e-07, "loss": 0.7535, "step": 4426 }, { "epoch": 0.9522477952247795, "grad_norm": 0.0, "learning_rate": 1.1938484340416113e-07, "loss": 0.8175, "step": 4427 }, { "epoch": 0.9524628952462896, "grad_norm": 0.0, "learning_rate": 1.1831384778304033e-07, "loss": 0.7991, "step": 4428 }, { "epoch": 0.9526779952677995, "grad_norm": 0.0, "learning_rate": 1.1724764916849263e-07, "loss": 0.8475, "step": 4429 }, { "epoch": 0.9528930952893095, "grad_norm": 0.0, "learning_rate": 1.1618624807809841e-07, "loss": 0.7924, "step": 4430 }, { "epoch": 0.9531081953108196, "grad_norm": 0.0, "learning_rate": 1.1512964502710778e-07, "loss": 0.7432, "step": 4431 }, { "epoch": 0.9533232953323295, "grad_norm": 0.0, "learning_rate": 1.140778405284415e-07, "loss": 0.7831, "step": 4432 }, { "epoch": 0.9535383953538396, "grad_norm": 0.0, "learning_rate": 1.1303083509269452e-07, "loss": 0.8146, "step": 4433 }, { "epoch": 0.9537534953753496, "grad_norm": 0.0, "learning_rate": 1.1198862922812693e-07, "loss": 0.7749, "step": 4434 }, { "epoch": 0.9539685953968595, "grad_norm": 0.0, "learning_rate": 1.1095122344067289e-07, "loss": 0.8212, "step": 4435 }, { "epoch": 0.9541836954183696, "grad_norm": 0.0, "learning_rate": 1.0991861823393513e-07, "loss": 0.825, "step": 4436 }, { "epoch": 0.9543987954398796, "grad_norm": 0.0, "learning_rate": 1.088908141091849e-07, "loss": 0.833, "step": 4437 }, { "epoch": 0.9546138954613895, "grad_norm": 0.0, "learning_rate": 1.0786781156536418e-07, "loss": 0.7561, "step": 4438 }, { "epoch": 0.9548289954828996, "grad_norm": 0.0, "learning_rate": 1.0684961109908354e-07, "loss": 0.8169, "step": 4439 }, { "epoch": 0.9550440955044095, "grad_norm": 0.0, "learning_rate": 1.0583621320462201e-07, "loss": 0.7723, "step": 4440 }, { "epoch": 0.9552591955259195, "grad_norm": 0.0, "learning_rate": 1.048276183739283e-07, "loss": 0.8317, "step": 4441 }, { "epoch": 0.9554742955474296, "grad_norm": 0.0, "learning_rate": 1.0382382709661853e-07, "loss": 0.8565, "step": 4442 }, { "epoch": 0.9556893955689395, "grad_norm": 0.0, "learning_rate": 1.0282483985997627e-07, "loss": 0.7884, "step": 4443 }, { "epoch": 0.9559044955904495, "grad_norm": 0.0, "learning_rate": 1.018306571489569e-07, "loss": 0.824, "step": 4444 }, { "epoch": 0.9561195956119596, "grad_norm": 0.0, "learning_rate": 1.008412794461766e-07, "loss": 0.7593, "step": 4445 }, { "epoch": 0.9563346956334695, "grad_norm": 0.0, "learning_rate": 9.985670723192675e-08, "loss": 0.7813, "step": 4446 }, { "epoch": 0.9565497956549796, "grad_norm": 0.0, "learning_rate": 9.887694098415946e-08, "loss": 0.7934, "step": 4447 }, { "epoch": 0.9567648956764896, "grad_norm": 0.0, "learning_rate": 9.790198117849758e-08, "loss": 0.8128, "step": 4448 }, { "epoch": 0.9569799956979995, "grad_norm": 0.0, "learning_rate": 9.693182828823034e-08, "loss": 0.7936, "step": 4449 }, { "epoch": 0.9571950957195096, "grad_norm": 0.0, "learning_rate": 9.596648278431209e-08, "loss": 0.8732, "step": 4450 }, { "epoch": 0.9574101957410196, "grad_norm": 0.0, "learning_rate": 9.500594513536354e-08, "loss": 0.7803, "step": 4451 }, { "epoch": 0.9576252957625295, "grad_norm": 0.0, "learning_rate": 9.405021580767171e-08, "loss": 0.774, "step": 4452 }, { "epoch": 0.9578403957840396, "grad_norm": 0.0, "learning_rate": 9.309929526519212e-08, "loss": 0.8055, "step": 4453 }, { "epoch": 0.9580554958055496, "grad_norm": 0.0, "learning_rate": 9.215318396954109e-08, "loss": 0.8656, "step": 4454 }, { "epoch": 0.9582705958270595, "grad_norm": 0.0, "learning_rate": 9.121188238000456e-08, "loss": 0.8178, "step": 4455 }, { "epoch": 0.9584856958485696, "grad_norm": 0.0, "learning_rate": 9.027539095353144e-08, "loss": 0.7503, "step": 4456 }, { "epoch": 0.9587007958700796, "grad_norm": 0.0, "learning_rate": 8.934371014473475e-08, "loss": 0.7969, "step": 4457 }, { "epoch": 0.9589158958915895, "grad_norm": 0.0, "learning_rate": 8.841684040589494e-08, "loss": 0.8282, "step": 4458 }, { "epoch": 0.9591309959130996, "grad_norm": 0.0, "learning_rate": 8.749478218695429e-08, "loss": 0.7865, "step": 4459 }, { "epoch": 0.9593460959346096, "grad_norm": 0.0, "learning_rate": 8.657753593552142e-08, "loss": 0.7417, "step": 4460 }, { "epoch": 0.9595611959561196, "grad_norm": 0.0, "learning_rate": 8.566510209686796e-08, "loss": 0.7899, "step": 4461 }, { "epoch": 0.9597762959776296, "grad_norm": 0.0, "learning_rate": 8.475748111393068e-08, "loss": 0.794, "step": 4462 }, { "epoch": 0.9599913959991396, "grad_norm": 0.0, "learning_rate": 8.385467342730602e-08, "loss": 0.7789, "step": 4463 }, { "epoch": 0.9602064960206496, "grad_norm": 0.0, "learning_rate": 8.29566794752601e-08, "loss": 0.8641, "step": 4464 }, { "epoch": 0.9604215960421596, "grad_norm": 0.0, "learning_rate": 8.20634996937153e-08, "loss": 0.8061, "step": 4465 }, { "epoch": 0.9606366960636696, "grad_norm": 0.0, "learning_rate": 8.117513451626258e-08, "loss": 0.7789, "step": 4466 }, { "epoch": 0.9608517960851796, "grad_norm": 0.0, "learning_rate": 8.029158437415474e-08, "loss": 0.8158, "step": 4467 }, { "epoch": 0.9610668961066896, "grad_norm": 0.0, "learning_rate": 7.941284969630314e-08, "loss": 0.7708, "step": 4468 }, { "epoch": 0.9612819961281996, "grad_norm": 0.0, "learning_rate": 7.853893090928654e-08, "loss": 0.7141, "step": 4469 }, { "epoch": 0.9614970961497096, "grad_norm": 0.0, "learning_rate": 7.766982843734339e-08, "loss": 0.8325, "step": 4470 }, { "epoch": 0.9617121961712196, "grad_norm": 0.0, "learning_rate": 7.680554270237283e-08, "loss": 0.8071, "step": 4471 }, { "epoch": 0.9619272961927297, "grad_norm": 0.0, "learning_rate": 7.594607412393928e-08, "loss": 0.8739, "step": 4472 }, { "epoch": 0.9621423962142396, "grad_norm": 0.0, "learning_rate": 7.509142311926565e-08, "loss": 0.8425, "step": 4473 }, { "epoch": 0.9623574962357496, "grad_norm": 0.0, "learning_rate": 7.424159010323784e-08, "loss": 0.8335, "step": 4474 }, { "epoch": 0.9625725962572597, "grad_norm": 0.0, "learning_rate": 7.33965754884014e-08, "loss": 0.8169, "step": 4475 }, { "epoch": 0.9627876962787696, "grad_norm": 0.0, "learning_rate": 7.255637968496487e-08, "loss": 0.745, "step": 4476 }, { "epoch": 0.9630027963002796, "grad_norm": 0.0, "learning_rate": 7.172100310079533e-08, "loss": 0.8217, "step": 4477 }, { "epoch": 0.9632178963217897, "grad_norm": 0.0, "learning_rate": 7.08904461414217e-08, "loss": 0.7923, "step": 4478 }, { "epoch": 0.9634329963432996, "grad_norm": 0.0, "learning_rate": 7.006470921003261e-08, "loss": 0.854, "step": 4479 }, { "epoch": 0.9636480963648096, "grad_norm": 0.0, "learning_rate": 6.92437927074796e-08, "loss": 0.7779, "step": 4480 }, { "epoch": 0.9638631963863197, "grad_norm": 0.0, "learning_rate": 6.842769703226836e-08, "loss": 0.8496, "step": 4481 }, { "epoch": 0.9640782964078296, "grad_norm": 0.0, "learning_rate": 6.761642258056977e-08, "loss": 0.8233, "step": 4482 }, { "epoch": 0.9642933964293396, "grad_norm": 0.0, "learning_rate": 6.680996974621212e-08, "loss": 0.8487, "step": 4483 }, { "epoch": 0.9645084964508497, "grad_norm": 0.0, "learning_rate": 6.600833892068336e-08, "loss": 0.8127, "step": 4484 }, { "epoch": 0.9647235964723596, "grad_norm": 0.0, "learning_rate": 6.521153049312889e-08, "loss": 0.6648, "step": 4485 }, { "epoch": 0.9649386964938697, "grad_norm": 0.0, "learning_rate": 6.441954485035707e-08, "loss": 0.7624, "step": 4486 }, { "epoch": 0.9651537965153797, "grad_norm": 0.0, "learning_rate": 6.363238237683034e-08, "loss": 0.7468, "step": 4487 }, { "epoch": 0.9653688965368896, "grad_norm": 0.0, "learning_rate": 6.285004345467305e-08, "loss": 0.8262, "step": 4488 }, { "epoch": 0.9655839965583997, "grad_norm": 0.0, "learning_rate": 6.207252846366807e-08, "loss": 0.7949, "step": 4489 }, { "epoch": 0.9657990965799097, "grad_norm": 0.0, "learning_rate": 6.129983778125348e-08, "loss": 0.8098, "step": 4490 }, { "epoch": 0.9660141966014196, "grad_norm": 0.0, "learning_rate": 6.053197178252923e-08, "loss": 0.779, "step": 4491 }, { "epoch": 0.9662292966229297, "grad_norm": 0.0, "learning_rate": 5.976893084025048e-08, "loss": 0.7781, "step": 4492 }, { "epoch": 0.9664443966444397, "grad_norm": 0.0, "learning_rate": 5.901071532483204e-08, "loss": 0.8476, "step": 4493 }, { "epoch": 0.9666594966659496, "grad_norm": 0.0, "learning_rate": 5.825732560434283e-08, "loss": 0.7867, "step": 4494 }, { "epoch": 0.9668745966874597, "grad_norm": 0.0, "learning_rate": 5.7508762044514724e-08, "loss": 0.8179, "step": 4495 }, { "epoch": 0.9670896967089697, "grad_norm": 0.0, "learning_rate": 5.676502500873038e-08, "loss": 0.8339, "step": 4496 }, { "epoch": 0.9673047967304796, "grad_norm": 0.0, "learning_rate": 5.602611485803433e-08, "loss": 0.8987, "step": 4497 }, { "epoch": 0.9675198967519897, "grad_norm": 0.0, "learning_rate": 5.529203195112631e-08, "loss": 0.7883, "step": 4498 }, { "epoch": 0.9677349967734997, "grad_norm": 0.0, "learning_rate": 5.456277664436127e-08, "loss": 0.894, "step": 4499 }, { "epoch": 0.9679500967950097, "grad_norm": 0.0, "learning_rate": 5.38383492917538e-08, "loss": 0.7084, "step": 4500 }, { "epoch": 0.9681651968165197, "grad_norm": 0.0, "learning_rate": 5.311875024497037e-08, "loss": 0.7535, "step": 4501 }, { "epoch": 0.9683802968380297, "grad_norm": 0.0, "learning_rate": 5.240397985333823e-08, "loss": 0.839, "step": 4502 }, { "epoch": 0.9685953968595397, "grad_norm": 0.0, "learning_rate": 5.1694038463838695e-08, "loss": 0.7117, "step": 4503 }, { "epoch": 0.9688104968810497, "grad_norm": 0.0, "learning_rate": 5.0988926421106086e-08, "loss": 0.8232, "step": 4504 }, { "epoch": 0.9690255969025597, "grad_norm": 0.0, "learning_rate": 5.0288644067435496e-08, "loss": 0.8127, "step": 4505 }, { "epoch": 0.9692406969240697, "grad_norm": 0.0, "learning_rate": 4.959319174277388e-08, "loss": 0.8261, "step": 4506 }, { "epoch": 0.9694557969455797, "grad_norm": 0.0, "learning_rate": 4.890256978472452e-08, "loss": 0.8089, "step": 4507 }, { "epoch": 0.9696708969670897, "grad_norm": 0.0, "learning_rate": 4.8216778528545895e-08, "loss": 0.8538, "step": 4508 }, { "epoch": 0.9698859969885997, "grad_norm": 0.0, "learning_rate": 4.753581830715171e-08, "loss": 0.7969, "step": 4509 }, { "epoch": 0.9701010970101097, "grad_norm": 0.0, "learning_rate": 4.685968945110864e-08, "loss": 0.79, "step": 4510 }, { "epoch": 0.9703161970316198, "grad_norm": 0.0, "learning_rate": 4.618839228864192e-08, "loss": 0.7731, "step": 4511 }, { "epoch": 0.9705312970531297, "grad_norm": 0.0, "learning_rate": 4.552192714562642e-08, "loss": 0.8501, "step": 4512 }, { "epoch": 0.9707463970746397, "grad_norm": 0.0, "learning_rate": 4.486029434559558e-08, "loss": 0.8021, "step": 4513 }, { "epoch": 0.9709614970961498, "grad_norm": 0.0, "learning_rate": 4.420349420973358e-08, "loss": 0.8187, "step": 4514 }, { "epoch": 0.9711765971176597, "grad_norm": 0.0, "learning_rate": 4.3551527056880926e-08, "loss": 0.7364, "step": 4515 }, { "epoch": 0.9713916971391697, "grad_norm": 0.0, "learning_rate": 4.290439320353224e-08, "loss": 0.8003, "step": 4516 }, { "epoch": 0.9716067971606798, "grad_norm": 0.0, "learning_rate": 4.226209296383288e-08, "loss": 0.7776, "step": 4517 }, { "epoch": 0.9718218971821897, "grad_norm": 0.0, "learning_rate": 4.1624626649584555e-08, "loss": 0.7657, "step": 4518 }, { "epoch": 0.9720369972036997, "grad_norm": 0.0, "learning_rate": 4.099199457024194e-08, "loss": 0.8124, "step": 4519 }, { "epoch": 0.9722520972252098, "grad_norm": 0.0, "learning_rate": 4.036419703291272e-08, "loss": 0.794, "step": 4520 }, { "epoch": 0.9724671972467197, "grad_norm": 0.0, "learning_rate": 3.974123434235644e-08, "loss": 0.7758, "step": 4521 }, { "epoch": 0.9726822972682297, "grad_norm": 0.0, "learning_rate": 3.912310680098785e-08, "loss": 0.8238, "step": 4522 }, { "epoch": 0.9728973972897397, "grad_norm": 0.0, "learning_rate": 3.850981470887361e-08, "loss": 0.8327, "step": 4523 }, { "epoch": 0.9731124973112497, "grad_norm": 0.0, "learning_rate": 3.79013583637311e-08, "loss": 0.8015, "step": 4524 }, { "epoch": 0.9733275973327598, "grad_norm": 0.0, "learning_rate": 3.729773806093295e-08, "loss": 0.7736, "step": 4525 }, { "epoch": 0.9735426973542697, "grad_norm": 0.0, "learning_rate": 3.669895409350366e-08, "loss": 0.7459, "step": 4526 }, { "epoch": 0.9737577973757797, "grad_norm": 0.0, "learning_rate": 3.610500675211848e-08, "loss": 0.8383, "step": 4527 }, { "epoch": 0.9739728973972898, "grad_norm": 0.0, "learning_rate": 3.551589632510455e-08, "loss": 0.8388, "step": 4528 }, { "epoch": 0.9741879974187997, "grad_norm": 0.0, "learning_rate": 3.493162309844533e-08, "loss": 0.7762, "step": 4529 }, { "epoch": 0.9744030974403097, "grad_norm": 0.0, "learning_rate": 3.435218735576951e-08, "loss": 0.7883, "step": 4530 }, { "epoch": 0.9746181974618198, "grad_norm": 0.0, "learning_rate": 3.3777589378363177e-08, "loss": 0.7619, "step": 4531 }, { "epoch": 0.9748332974833297, "grad_norm": 0.0, "learning_rate": 3.3207829445159876e-08, "loss": 0.7375, "step": 4532 }, { "epoch": 0.9750483975048397, "grad_norm": 0.0, "learning_rate": 3.2642907832747263e-08, "loss": 0.8142, "step": 4533 }, { "epoch": 0.9752634975263498, "grad_norm": 0.0, "learning_rate": 3.2082824815362624e-08, "loss": 0.8737, "step": 4534 }, { "epoch": 0.9754785975478597, "grad_norm": 0.0, "learning_rate": 3.152758066489514e-08, "loss": 0.8554, "step": 4535 }, { "epoch": 0.9756936975693697, "grad_norm": 0.0, "learning_rate": 3.097717565088476e-08, "loss": 0.8545, "step": 4536 }, { "epoch": 0.9759087975908798, "grad_norm": 0.0, "learning_rate": 3.0431610040522194e-08, "loss": 0.7435, "step": 4537 }, { "epoch": 0.9761238976123897, "grad_norm": 0.0, "learning_rate": 2.989088409865004e-08, "loss": 0.8471, "step": 4538 }, { "epoch": 0.9763389976338998, "grad_norm": 0.0, "learning_rate": 2.9354998087759436e-08, "loss": 0.8589, "step": 4539 }, { "epoch": 0.9765540976554098, "grad_norm": 0.0, "learning_rate": 2.8823952267994503e-08, "loss": 0.7563, "step": 4540 }, { "epoch": 0.9767691976769197, "grad_norm": 0.0, "learning_rate": 2.8297746897146816e-08, "loss": 0.8363, "step": 4541 }, { "epoch": 0.9769842976984298, "grad_norm": 0.0, "learning_rate": 2.7776382230659816e-08, "loss": 0.8268, "step": 4542 }, { "epoch": 0.9771993977199398, "grad_norm": 0.0, "learning_rate": 2.7259858521627713e-08, "loss": 0.8139, "step": 4543 }, { "epoch": 0.9774144977414497, "grad_norm": 0.0, "learning_rate": 2.674817602079327e-08, "loss": 0.8172, "step": 4544 }, { "epoch": 0.9776295977629598, "grad_norm": 0.0, "learning_rate": 2.6241334976550016e-08, "loss": 0.8174, "step": 4545 }, { "epoch": 0.9778446977844698, "grad_norm": 0.0, "learning_rate": 2.5739335634941132e-08, "loss": 0.8742, "step": 4546 }, { "epoch": 0.9780597978059797, "grad_norm": 0.0, "learning_rate": 2.5242178239658356e-08, "loss": 0.8882, "step": 4547 }, { "epoch": 0.9782748978274898, "grad_norm": 0.0, "learning_rate": 2.4749863032045297e-08, "loss": 0.8035, "step": 4548 }, { "epoch": 0.9784899978489998, "grad_norm": 0.0, "learning_rate": 2.4262390251091894e-08, "loss": 0.8578, "step": 4549 }, { "epoch": 0.9787050978705097, "grad_norm": 0.0, "learning_rate": 2.3779760133441076e-08, "loss": 0.7764, "step": 4550 }, { "epoch": 0.9789201978920198, "grad_norm": 0.0, "learning_rate": 2.330197291338099e-08, "loss": 0.8403, "step": 4551 }, { "epoch": 0.9791352979135298, "grad_norm": 0.0, "learning_rate": 2.282902882285054e-08, "loss": 0.7085, "step": 4552 }, { "epoch": 0.9793503979350398, "grad_norm": 0.0, "learning_rate": 2.2360928091439415e-08, "loss": 0.8266, "step": 4553 }, { "epoch": 0.9795654979565498, "grad_norm": 0.0, "learning_rate": 2.189767094638251e-08, "loss": 0.8903, "step": 4554 }, { "epoch": 0.9797805979780598, "grad_norm": 0.0, "learning_rate": 2.143925761256549e-08, "loss": 0.7984, "step": 4555 }, { "epoch": 0.9799956979995698, "grad_norm": 0.0, "learning_rate": 2.0985688312524787e-08, "loss": 0.7936, "step": 4556 }, { "epoch": 0.9802107980210798, "grad_norm": 0.0, "learning_rate": 2.0536963266438725e-08, "loss": 0.7951, "step": 4557 }, { "epoch": 0.9804258980425898, "grad_norm": 0.0, "learning_rate": 2.009308269214194e-08, "loss": 0.8012, "step": 4558 }, { "epoch": 0.9806409980640998, "grad_norm": 0.0, "learning_rate": 1.965404680511207e-08, "loss": 0.8782, "step": 4559 }, { "epoch": 0.9808560980856098, "grad_norm": 0.0, "learning_rate": 1.9219855818477517e-08, "loss": 0.7616, "step": 4560 }, { "epoch": 0.9810711981071198, "grad_norm": 0.0, "learning_rate": 1.8790509943011903e-08, "loss": 0.865, "step": 4561 }, { "epoch": 0.9812862981286298, "grad_norm": 0.0, "learning_rate": 1.8366009387140726e-08, "loss": 0.743, "step": 4562 }, { "epoch": 0.9815013981501398, "grad_norm": 0.0, "learning_rate": 1.7946354356933594e-08, "loss": 0.8056, "step": 4563 }, { "epoch": 0.9817164981716499, "grad_norm": 0.0, "learning_rate": 1.7531545056110875e-08, "loss": 0.7515, "step": 4564 }, { "epoch": 0.9819315981931598, "grad_norm": 0.0, "learning_rate": 1.712158168603928e-08, "loss": 0.7716, "step": 4565 }, { "epoch": 0.9821466982146698, "grad_norm": 0.0, "learning_rate": 1.671646444573183e-08, "loss": 0.8555, "step": 4566 }, { "epoch": 0.9823617982361799, "grad_norm": 0.0, "learning_rate": 1.63161935318501e-08, "loss": 0.8278, "step": 4567 }, { "epoch": 0.9825768982576898, "grad_norm": 0.0, "learning_rate": 1.592076913870644e-08, "loss": 0.8405, "step": 4568 }, { "epoch": 0.9827919982791998, "grad_norm": 0.0, "learning_rate": 1.5530191458252854e-08, "loss": 0.8082, "step": 4569 }, { "epoch": 0.9830070983007099, "grad_norm": 0.0, "learning_rate": 1.514446068009656e-08, "loss": 0.7409, "step": 4570 }, { "epoch": 0.9832221983222198, "grad_norm": 0.0, "learning_rate": 1.4763576991487782e-08, "loss": 0.8378, "step": 4571 }, { "epoch": 0.9834372983437298, "grad_norm": 0.0, "learning_rate": 1.4387540577321946e-08, "loss": 0.7721, "step": 4572 }, { "epoch": 0.9836523983652399, "grad_norm": 0.0, "learning_rate": 1.4016351620145252e-08, "loss": 0.7708, "step": 4573 }, { "epoch": 0.9838674983867498, "grad_norm": 0.0, "learning_rate": 1.3650010300150229e-08, "loss": 0.8197, "step": 4574 }, { "epoch": 0.9840825984082598, "grad_norm": 0.0, "learning_rate": 1.3288516795173511e-08, "loss": 0.722, "step": 4575 }, { "epoch": 0.9842976984297699, "grad_norm": 0.0, "learning_rate": 1.2931871280700281e-08, "loss": 0.8083, "step": 4576 }, { "epoch": 0.9845127984512798, "grad_norm": 0.0, "learning_rate": 1.2580073929863157e-08, "loss": 0.736, "step": 4577 }, { "epoch": 0.9847278984727899, "grad_norm": 0.0, "learning_rate": 1.2233124913438865e-08, "loss": 0.812, "step": 4578 }, { "epoch": 0.9849429984942999, "grad_norm": 0.0, "learning_rate": 1.1891024399851569e-08, "loss": 0.8172, "step": 4579 }, { "epoch": 0.9851580985158098, "grad_norm": 0.0, "learning_rate": 1.1553772555172871e-08, "loss": 0.8418, "step": 4580 }, { "epoch": 0.9853731985373199, "grad_norm": 0.0, "learning_rate": 1.1221369543119587e-08, "loss": 0.8536, "step": 4581 }, { "epoch": 0.9855882985588299, "grad_norm": 0.0, "learning_rate": 1.0893815525054863e-08, "loss": 0.8517, "step": 4582 }, { "epoch": 0.9858033985803398, "grad_norm": 0.0, "learning_rate": 1.057111065998706e-08, "loss": 0.7984, "step": 4583 }, { "epoch": 0.9860184986018499, "grad_norm": 0.0, "learning_rate": 1.0253255104571979e-08, "loss": 0.7738, "step": 4584 }, { "epoch": 0.9862335986233599, "grad_norm": 0.0, "learning_rate": 9.940249013111747e-09, "loss": 0.8217, "step": 4585 }, { "epoch": 0.9864486986448698, "grad_norm": 0.0, "learning_rate": 9.632092537551485e-09, "loss": 0.7469, "step": 4586 }, { "epoch": 0.9866637986663799, "grad_norm": 0.0, "learning_rate": 9.328785827483756e-09, "loss": 0.8047, "step": 4587 }, { "epoch": 0.9868788986878899, "grad_norm": 0.0, "learning_rate": 9.030329030148554e-09, "loss": 0.8359, "step": 4588 }, { "epoch": 0.9870939987093998, "grad_norm": 0.0, "learning_rate": 8.736722290429988e-09, "loss": 0.7484, "step": 4589 }, { "epoch": 0.9873090987309099, "grad_norm": 0.0, "learning_rate": 8.447965750857378e-09, "loss": 0.836, "step": 4590 }, { "epoch": 0.9875241987524199, "grad_norm": 0.0, "learning_rate": 8.164059551605264e-09, "loss": 0.7724, "step": 4591 }, { "epoch": 0.9877392987739299, "grad_norm": 0.0, "learning_rate": 7.885003830494508e-09, "loss": 0.8716, "step": 4592 }, { "epoch": 0.9879543987954399, "grad_norm": 0.0, "learning_rate": 7.610798722991197e-09, "loss": 0.7755, "step": 4593 }, { "epoch": 0.9881694988169499, "grad_norm": 0.0, "learning_rate": 7.3414443622066286e-09, "loss": 0.8367, "step": 4594 }, { "epoch": 0.9883845988384599, "grad_norm": 0.0, "learning_rate": 7.07694087889621e-09, "loss": 0.8298, "step": 4595 }, { "epoch": 0.9885996988599699, "grad_norm": 0.0, "learning_rate": 6.8172884014638955e-09, "loss": 0.8246, "step": 4596 }, { "epoch": 0.9888147988814799, "grad_norm": 0.0, "learning_rate": 6.562487055955524e-09, "loss": 0.7173, "step": 4597 }, { "epoch": 0.9890298989029899, "grad_norm": 0.0, "learning_rate": 6.312536966062155e-09, "loss": 0.8021, "step": 4598 }, { "epoch": 0.9892449989244999, "grad_norm": 0.0, "learning_rate": 6.067438253121171e-09, "loss": 0.8216, "step": 4599 }, { "epoch": 0.98946009894601, "grad_norm": 0.0, "learning_rate": 5.827191036114066e-09, "loss": 0.8065, "step": 4600 }, { "epoch": 0.9896751989675199, "grad_norm": 0.0, "learning_rate": 5.591795431667546e-09, "loss": 0.8293, "step": 4601 }, { "epoch": 0.9898902989890299, "grad_norm": 0.0, "learning_rate": 5.3612515540524264e-09, "loss": 0.8094, "step": 4602 }, { "epoch": 0.99010539901054, "grad_norm": 0.0, "learning_rate": 5.135559515186961e-09, "loss": 0.8434, "step": 4603 }, { "epoch": 0.9903204990320499, "grad_norm": 0.0, "learning_rate": 4.9147194246290666e-09, "loss": 0.8218, "step": 4604 }, { "epoch": 0.9905355990535599, "grad_norm": 0.0, "learning_rate": 4.69873138958632e-09, "loss": 0.8026, "step": 4605 }, { "epoch": 0.99075069907507, "grad_norm": 0.0, "learning_rate": 4.487595514909293e-09, "loss": 0.8501, "step": 4606 }, { "epoch": 0.9909657990965799, "grad_norm": 0.0, "learning_rate": 4.281311903090446e-09, "loss": 0.8883, "step": 4607 }, { "epoch": 0.9911808991180899, "grad_norm": 0.0, "learning_rate": 4.079880654270785e-09, "loss": 0.8517, "step": 4608 }, { "epoch": 0.9913959991395999, "grad_norm": 0.0, "learning_rate": 3.883301866232092e-09, "loss": 0.8193, "step": 4609 }, { "epoch": 0.9916110991611099, "grad_norm": 0.0, "learning_rate": 3.6915756344046983e-09, "loss": 0.8327, "step": 4610 }, { "epoch": 0.9918261991826199, "grad_norm": 0.0, "learning_rate": 3.5047020518597096e-09, "loss": 0.8376, "step": 4611 }, { "epoch": 0.9920412992041299, "grad_norm": 0.0, "learning_rate": 3.32268120931456e-09, "loss": 0.8305, "step": 4612 }, { "epoch": 0.9922563992256399, "grad_norm": 0.0, "learning_rate": 3.1455131951285688e-09, "loss": 0.8133, "step": 4613 }, { "epoch": 0.99247149924715, "grad_norm": 0.0, "learning_rate": 2.9731980953084936e-09, "loss": 0.8692, "step": 4614 }, { "epoch": 0.9926865992686599, "grad_norm": 0.0, "learning_rate": 2.8057359935029784e-09, "loss": 0.8527, "step": 4615 }, { "epoch": 0.9929016992901699, "grad_norm": 0.0, "learning_rate": 2.6431269710058827e-09, "loss": 0.8769, "step": 4616 }, { "epoch": 0.99311679931168, "grad_norm": 0.0, "learning_rate": 2.4853711067551746e-09, "loss": 0.8188, "step": 4617 }, { "epoch": 0.9933318993331899, "grad_norm": 0.0, "learning_rate": 2.332468477330707e-09, "loss": 0.8034, "step": 4618 }, { "epoch": 0.9935469993546999, "grad_norm": 0.0, "learning_rate": 2.184419156959772e-09, "loss": 0.7694, "step": 4619 }, { "epoch": 0.99376209937621, "grad_norm": 0.0, "learning_rate": 2.0412232175126555e-09, "loss": 0.8007, "step": 4620 }, { "epoch": 0.9939771993977199, "grad_norm": 0.0, "learning_rate": 1.9028807285015326e-09, "loss": 0.7558, "step": 4621 }, { "epoch": 0.9941922994192299, "grad_norm": 0.0, "learning_rate": 1.7693917570837938e-09, "loss": 0.8414, "step": 4622 }, { "epoch": 0.99440739944074, "grad_norm": 0.0, "learning_rate": 1.6407563680620465e-09, "loss": 0.8665, "step": 4623 }, { "epoch": 0.9946224994622499, "grad_norm": 0.0, "learning_rate": 1.5169746238807847e-09, "loss": 0.8333, "step": 4624 }, { "epoch": 0.9948375994837599, "grad_norm": 0.0, "learning_rate": 1.3980465846297198e-09, "loss": 0.788, "step": 4625 }, { "epoch": 0.99505269950527, "grad_norm": 0.0, "learning_rate": 1.2839723080404486e-09, "loss": 0.846, "step": 4626 }, { "epoch": 0.9952677995267799, "grad_norm": 0.0, "learning_rate": 1.1747518494920063e-09, "loss": 0.7818, "step": 4627 }, { "epoch": 0.99548289954829, "grad_norm": 0.0, "learning_rate": 1.070385262003093e-09, "loss": 0.8021, "step": 4628 }, { "epoch": 0.9956979995698, "grad_norm": 0.0, "learning_rate": 9.708725962376265e-10, "loss": 0.8078, "step": 4629 }, { "epoch": 0.9959130995913099, "grad_norm": 0.0, "learning_rate": 8.76213900504741e-10, "loss": 0.8436, "step": 4630 }, { "epoch": 0.99612819961282, "grad_norm": 0.0, "learning_rate": 7.864092207554575e-10, "loss": 0.7927, "step": 4631 }, { "epoch": 0.99634329963433, "grad_norm": 0.0, "learning_rate": 7.014586005837931e-10, "loss": 0.8431, "step": 4632 }, { "epoch": 0.9965583996558399, "grad_norm": 0.0, "learning_rate": 6.213620812300924e-10, "loss": 0.8312, "step": 4633 }, { "epoch": 0.99677349967735, "grad_norm": 0.0, "learning_rate": 5.461197015765863e-10, "loss": 0.8907, "step": 4634 }, { "epoch": 0.99698859969886, "grad_norm": 0.0, "learning_rate": 4.757314981485017e-10, "loss": 0.7517, "step": 4635 }, { "epoch": 0.9972036997203699, "grad_norm": 0.0, "learning_rate": 4.10197505115173e-10, "loss": 0.7974, "step": 4636 }, { "epoch": 0.99741879974188, "grad_norm": 0.0, "learning_rate": 3.4951775429004073e-10, "loss": 0.8225, "step": 4637 }, { "epoch": 0.99763389976339, "grad_norm": 0.0, "learning_rate": 2.9369227513065255e-10, "loss": 0.8476, "step": 4638 }, { "epoch": 0.9978489997848999, "grad_norm": 0.0, "learning_rate": 2.4272109473644225e-10, "loss": 0.8227, "step": 4639 }, { "epoch": 0.99806409980641, "grad_norm": 0.0, "learning_rate": 1.9660423784984007e-10, "loss": 0.7886, "step": 4640 }, { "epoch": 0.99827919982792, "grad_norm": 0.0, "learning_rate": 1.553417268607138e-10, "loss": 0.8299, "step": 4641 }, { "epoch": 0.99849429984943, "grad_norm": 0.0, "learning_rate": 1.189335817963766e-10, "loss": 0.8425, "step": 4642 }, { "epoch": 0.99870939987094, "grad_norm": 0.0, "learning_rate": 8.737982033379944e-11, "loss": 0.8007, "step": 4643 }, { "epoch": 0.99892449989245, "grad_norm": 0.0, "learning_rate": 6.068045778961917e-11, "loss": 0.8463, "step": 4644 }, { "epoch": 0.99913959991396, "grad_norm": 0.0, "learning_rate": 3.88355071245794e-11, "loss": 0.7867, "step": 4645 }, { "epoch": 0.99935469993547, "grad_norm": 0.0, "learning_rate": 2.184497894353044e-11, "loss": 0.8235, "step": 4646 }, { "epoch": 0.99956979995698, "grad_norm": 0.0, "learning_rate": 9.708881494319145e-12, "loss": 0.8366, "step": 4647 }, { "epoch": 0.99978489997849, "grad_norm": 0.0, "learning_rate": 2.4272206677888876e-12, "loss": 0.814, "step": 4648 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.7926, "step": 4649 }, { "epoch": 1.0, "step": 4649, "total_flos": 2.7743596673706754e+19, "train_loss": 0.8857983078308376, "train_runtime": 80782.8926, "train_samples_per_second": 18.413, "train_steps_per_second": 0.058 } ], "logging_steps": 1.0, "max_steps": 4649, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7743596673706754e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }