{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 0, "global_step": 1759, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005685048322910744, "grad_norm": 0.4140625, "learning_rate": 9.99431495167709e-06, "loss": 1.908, "step": 1 }, { "epoch": 0.0011370096645821489, "grad_norm": 0.365234375, "learning_rate": 9.98862990335418e-06, "loss": 1.7944, "step": 2 }, { "epoch": 0.0017055144968732233, "grad_norm": 0.375, "learning_rate": 9.982944855031269e-06, "loss": 1.79, "step": 3 }, { "epoch": 0.0022740193291642978, "grad_norm": 0.3671875, "learning_rate": 9.977259806708358e-06, "loss": 1.7287, "step": 4 }, { "epoch": 0.0028425241614553724, "grad_norm": 0.34375, "learning_rate": 9.971574758385447e-06, "loss": 1.6088, "step": 5 }, { "epoch": 0.0034110289937464467, "grad_norm": 0.3203125, "learning_rate": 9.965889710062536e-06, "loss": 1.7391, "step": 6 }, { "epoch": 0.003979533826037522, "grad_norm": 0.3203125, "learning_rate": 9.960204661739626e-06, "loss": 1.7726, "step": 7 }, { "epoch": 0.0045480386583285955, "grad_norm": 0.3125, "learning_rate": 9.954519613416715e-06, "loss": 1.7287, "step": 8 }, { "epoch": 0.00511654349061967, "grad_norm": 0.349609375, "learning_rate": 9.948834565093804e-06, "loss": 1.7471, "step": 9 }, { "epoch": 0.005685048322910745, "grad_norm": 0.265625, "learning_rate": 9.943149516770893e-06, "loss": 1.712, "step": 10 }, { "epoch": 0.0062535531552018195, "grad_norm": 0.427734375, "learning_rate": 9.937464468447983e-06, "loss": 1.7438, "step": 11 }, { "epoch": 0.006822057987492893, "grad_norm": 0.259765625, "learning_rate": 9.931779420125072e-06, "loss": 1.7106, "step": 12 }, { "epoch": 0.007390562819783968, "grad_norm": 0.255859375, "learning_rate": 9.926094371802161e-06, "loss": 1.7682, "step": 13 }, { "epoch": 0.007959067652075043, "grad_norm": 0.2255859375, "learning_rate": 9.92040932347925e-06, "loss": 1.6526, "step": 14 }, { "epoch": 0.008527572484366117, "grad_norm": 0.2138671875, "learning_rate": 9.91472427515634e-06, "loss": 1.6204, "step": 15 }, { "epoch": 0.009096077316657191, "grad_norm": 0.224609375, "learning_rate": 9.909039226833429e-06, "loss": 1.6491, "step": 16 }, { "epoch": 0.009664582148948267, "grad_norm": 0.2333984375, "learning_rate": 9.903354178510518e-06, "loss": 1.6663, "step": 17 }, { "epoch": 0.01023308698123934, "grad_norm": 0.2373046875, "learning_rate": 9.897669130187607e-06, "loss": 1.6826, "step": 18 }, { "epoch": 0.010801591813530414, "grad_norm": 0.2177734375, "learning_rate": 9.891984081864697e-06, "loss": 1.6878, "step": 19 }, { "epoch": 0.01137009664582149, "grad_norm": 0.1943359375, "learning_rate": 9.886299033541786e-06, "loss": 1.6662, "step": 20 }, { "epoch": 0.011938601478112564, "grad_norm": 0.2236328125, "learning_rate": 9.880613985218875e-06, "loss": 1.6619, "step": 21 }, { "epoch": 0.012507106310403639, "grad_norm": 0.203125, "learning_rate": 9.874928936895964e-06, "loss": 1.6872, "step": 22 }, { "epoch": 0.013075611142694713, "grad_norm": 0.1923828125, "learning_rate": 9.869243888573054e-06, "loss": 1.6179, "step": 23 }, { "epoch": 0.013644115974985787, "grad_norm": 0.1884765625, "learning_rate": 9.863558840250143e-06, "loss": 1.6031, "step": 24 }, { "epoch": 0.014212620807276862, "grad_norm": 0.1865234375, "learning_rate": 9.857873791927232e-06, "loss": 1.509, "step": 25 }, { "epoch": 0.014781125639567936, "grad_norm": 0.173828125, "learning_rate": 9.852188743604321e-06, "loss": 1.5882, "step": 26 }, { "epoch": 0.015349630471859011, "grad_norm": 0.16015625, "learning_rate": 9.84650369528141e-06, "loss": 1.5365, "step": 27 }, { "epoch": 0.015918135304150087, "grad_norm": 0.234375, "learning_rate": 9.8408186469585e-06, "loss": 1.5988, "step": 28 }, { "epoch": 0.01648664013644116, "grad_norm": 0.16015625, "learning_rate": 9.835133598635589e-06, "loss": 1.5518, "step": 29 }, { "epoch": 0.017055144968732235, "grad_norm": 0.1708984375, "learning_rate": 9.829448550312678e-06, "loss": 1.6394, "step": 30 }, { "epoch": 0.01762364980102331, "grad_norm": 0.1611328125, "learning_rate": 9.823763501989768e-06, "loss": 1.6186, "step": 31 }, { "epoch": 0.018192154633314382, "grad_norm": 0.1787109375, "learning_rate": 9.818078453666857e-06, "loss": 1.4954, "step": 32 }, { "epoch": 0.018760659465605456, "grad_norm": 0.154296875, "learning_rate": 9.812393405343946e-06, "loss": 1.5453, "step": 33 }, { "epoch": 0.019329164297896533, "grad_norm": 0.1669921875, "learning_rate": 9.806708357021035e-06, "loss": 1.5699, "step": 34 }, { "epoch": 0.019897669130187607, "grad_norm": 0.2099609375, "learning_rate": 9.801023308698125e-06, "loss": 1.5711, "step": 35 }, { "epoch": 0.02046617396247868, "grad_norm": 0.59765625, "learning_rate": 9.795338260375214e-06, "loss": 1.5505, "step": 36 }, { "epoch": 0.021034678794769755, "grad_norm": 0.1474609375, "learning_rate": 9.789653212052303e-06, "loss": 1.5716, "step": 37 }, { "epoch": 0.02160318362706083, "grad_norm": 0.1533203125, "learning_rate": 9.783968163729392e-06, "loss": 1.5693, "step": 38 }, { "epoch": 0.022171688459351906, "grad_norm": 0.150390625, "learning_rate": 9.778283115406482e-06, "loss": 1.5446, "step": 39 }, { "epoch": 0.02274019329164298, "grad_norm": 0.1552734375, "learning_rate": 9.772598067083571e-06, "loss": 1.5746, "step": 40 }, { "epoch": 0.023308698123934053, "grad_norm": 0.1533203125, "learning_rate": 9.76691301876066e-06, "loss": 1.5173, "step": 41 }, { "epoch": 0.023877202956225127, "grad_norm": 0.1552734375, "learning_rate": 9.76122797043775e-06, "loss": 1.4743, "step": 42 }, { "epoch": 0.0244457077885162, "grad_norm": 0.1630859375, "learning_rate": 9.755542922114839e-06, "loss": 1.4843, "step": 43 }, { "epoch": 0.025014212620807278, "grad_norm": 0.185546875, "learning_rate": 9.749857873791928e-06, "loss": 1.4388, "step": 44 }, { "epoch": 0.025582717453098352, "grad_norm": 0.146484375, "learning_rate": 9.744172825469017e-06, "loss": 1.4874, "step": 45 }, { "epoch": 0.026151222285389426, "grad_norm": 0.1591796875, "learning_rate": 9.738487777146106e-06, "loss": 1.5313, "step": 46 }, { "epoch": 0.0267197271176805, "grad_norm": 0.16796875, "learning_rate": 9.732802728823196e-06, "loss": 1.5409, "step": 47 }, { "epoch": 0.027288231949971573, "grad_norm": 0.1435546875, "learning_rate": 9.727117680500285e-06, "loss": 1.4281, "step": 48 }, { "epoch": 0.02785673678226265, "grad_norm": 0.1455078125, "learning_rate": 9.721432632177374e-06, "loss": 1.4478, "step": 49 }, { "epoch": 0.028425241614553724, "grad_norm": 0.1513671875, "learning_rate": 9.715747583854463e-06, "loss": 1.4467, "step": 50 }, { "epoch": 0.028993746446844798, "grad_norm": 0.142578125, "learning_rate": 9.710062535531553e-06, "loss": 1.4835, "step": 51 }, { "epoch": 0.029562251279135872, "grad_norm": 0.138671875, "learning_rate": 9.704377487208642e-06, "loss": 1.4582, "step": 52 }, { "epoch": 0.030130756111426946, "grad_norm": 0.1396484375, "learning_rate": 9.698692438885731e-06, "loss": 1.4054, "step": 53 }, { "epoch": 0.030699260943718023, "grad_norm": 0.1484375, "learning_rate": 9.69300739056282e-06, "loss": 1.4876, "step": 54 }, { "epoch": 0.03126776577600909, "grad_norm": 0.1298828125, "learning_rate": 9.68732234223991e-06, "loss": 1.4013, "step": 55 }, { "epoch": 0.031836270608300174, "grad_norm": 0.150390625, "learning_rate": 9.681637293916999e-06, "loss": 1.4623, "step": 56 }, { "epoch": 0.03240477544059125, "grad_norm": 0.12890625, "learning_rate": 9.675952245594088e-06, "loss": 1.4563, "step": 57 }, { "epoch": 0.03297328027288232, "grad_norm": 0.142578125, "learning_rate": 9.670267197271177e-06, "loss": 1.387, "step": 58 }, { "epoch": 0.033541785105173395, "grad_norm": 0.1435546875, "learning_rate": 9.664582148948267e-06, "loss": 1.4125, "step": 59 }, { "epoch": 0.03411028993746447, "grad_norm": 0.14453125, "learning_rate": 9.658897100625356e-06, "loss": 1.3759, "step": 60 }, { "epoch": 0.03467879476975554, "grad_norm": 0.1484375, "learning_rate": 9.653212052302445e-06, "loss": 1.4997, "step": 61 }, { "epoch": 0.03524729960204662, "grad_norm": 0.1357421875, "learning_rate": 9.647527003979534e-06, "loss": 1.4087, "step": 62 }, { "epoch": 0.03581580443433769, "grad_norm": 0.138671875, "learning_rate": 9.641841955656624e-06, "loss": 1.452, "step": 63 }, { "epoch": 0.036384309266628764, "grad_norm": 0.142578125, "learning_rate": 9.636156907333713e-06, "loss": 1.3812, "step": 64 }, { "epoch": 0.03695281409891984, "grad_norm": 0.1435546875, "learning_rate": 9.630471859010802e-06, "loss": 1.4884, "step": 65 }, { "epoch": 0.03752131893121091, "grad_norm": 0.1376953125, "learning_rate": 9.624786810687892e-06, "loss": 1.4126, "step": 66 }, { "epoch": 0.03808982376350199, "grad_norm": 0.1552734375, "learning_rate": 9.61910176236498e-06, "loss": 1.4255, "step": 67 }, { "epoch": 0.038658328595793066, "grad_norm": 0.154296875, "learning_rate": 9.61341671404207e-06, "loss": 1.407, "step": 68 }, { "epoch": 0.03922683342808414, "grad_norm": 0.1435546875, "learning_rate": 9.60773166571916e-06, "loss": 1.5128, "step": 69 }, { "epoch": 0.039795338260375214, "grad_norm": 0.150390625, "learning_rate": 9.602046617396249e-06, "loss": 1.3882, "step": 70 }, { "epoch": 0.04036384309266629, "grad_norm": 0.1513671875, "learning_rate": 9.596361569073338e-06, "loss": 1.4377, "step": 71 }, { "epoch": 0.04093234792495736, "grad_norm": 0.1328125, "learning_rate": 9.590676520750427e-06, "loss": 1.4056, "step": 72 }, { "epoch": 0.041500852757248435, "grad_norm": 0.142578125, "learning_rate": 9.584991472427516e-06, "loss": 1.3935, "step": 73 }, { "epoch": 0.04206935758953951, "grad_norm": 0.142578125, "learning_rate": 9.579306424104606e-06, "loss": 1.4346, "step": 74 }, { "epoch": 0.04263786242183058, "grad_norm": 0.1767578125, "learning_rate": 9.573621375781695e-06, "loss": 1.4639, "step": 75 }, { "epoch": 0.04320636725412166, "grad_norm": 0.1435546875, "learning_rate": 9.567936327458784e-06, "loss": 1.4476, "step": 76 }, { "epoch": 0.04377487208641274, "grad_norm": 0.1484375, "learning_rate": 9.562251279135873e-06, "loss": 1.3755, "step": 77 }, { "epoch": 0.04434337691870381, "grad_norm": 0.146484375, "learning_rate": 9.556566230812963e-06, "loss": 1.4707, "step": 78 }, { "epoch": 0.044911881750994885, "grad_norm": 0.1513671875, "learning_rate": 9.550881182490052e-06, "loss": 1.2923, "step": 79 }, { "epoch": 0.04548038658328596, "grad_norm": 0.1396484375, "learning_rate": 9.545196134167141e-06, "loss": 1.3858, "step": 80 }, { "epoch": 0.04604889141557703, "grad_norm": 0.1533203125, "learning_rate": 9.53951108584423e-06, "loss": 1.3831, "step": 81 }, { "epoch": 0.046617396247868106, "grad_norm": 0.1728515625, "learning_rate": 9.53382603752132e-06, "loss": 1.3624, "step": 82 }, { "epoch": 0.04718590108015918, "grad_norm": 0.1357421875, "learning_rate": 9.528140989198409e-06, "loss": 1.4257, "step": 83 }, { "epoch": 0.047754405912450254, "grad_norm": 0.140625, "learning_rate": 9.522455940875498e-06, "loss": 1.3493, "step": 84 }, { "epoch": 0.04832291074474133, "grad_norm": 0.1435546875, "learning_rate": 9.516770892552587e-06, "loss": 1.3676, "step": 85 }, { "epoch": 0.0488914155770324, "grad_norm": 0.154296875, "learning_rate": 9.511085844229677e-06, "loss": 1.4692, "step": 86 }, { "epoch": 0.04945992040932348, "grad_norm": 0.1474609375, "learning_rate": 9.505400795906766e-06, "loss": 1.3807, "step": 87 }, { "epoch": 0.050028425241614556, "grad_norm": 0.142578125, "learning_rate": 9.499715747583855e-06, "loss": 1.4413, "step": 88 }, { "epoch": 0.05059693007390563, "grad_norm": 0.15234375, "learning_rate": 9.494030699260944e-06, "loss": 1.3698, "step": 89 }, { "epoch": 0.051165434906196704, "grad_norm": 0.154296875, "learning_rate": 9.488345650938034e-06, "loss": 1.3273, "step": 90 }, { "epoch": 0.05173393973848778, "grad_norm": 0.1484375, "learning_rate": 9.482660602615123e-06, "loss": 1.343, "step": 91 }, { "epoch": 0.05230244457077885, "grad_norm": 0.1435546875, "learning_rate": 9.476975554292212e-06, "loss": 1.4135, "step": 92 }, { "epoch": 0.052870949403069925, "grad_norm": 0.1474609375, "learning_rate": 9.471290505969301e-06, "loss": 1.371, "step": 93 }, { "epoch": 0.053439454235361, "grad_norm": 0.1474609375, "learning_rate": 9.46560545764639e-06, "loss": 1.326, "step": 94 }, { "epoch": 0.05400795906765207, "grad_norm": 0.1650390625, "learning_rate": 9.45992040932348e-06, "loss": 1.368, "step": 95 }, { "epoch": 0.054576463899943146, "grad_norm": 0.1435546875, "learning_rate": 9.454235361000569e-06, "loss": 1.3467, "step": 96 }, { "epoch": 0.05514496873223423, "grad_norm": 0.1533203125, "learning_rate": 9.448550312677658e-06, "loss": 1.3327, "step": 97 }, { "epoch": 0.0557134735645253, "grad_norm": 0.1513671875, "learning_rate": 9.442865264354748e-06, "loss": 1.2613, "step": 98 }, { "epoch": 0.056281978396816375, "grad_norm": 0.205078125, "learning_rate": 9.437180216031837e-06, "loss": 1.2793, "step": 99 }, { "epoch": 0.05685048322910745, "grad_norm": 0.1728515625, "learning_rate": 9.431495167708926e-06, "loss": 1.3421, "step": 100 }, { "epoch": 0.05741898806139852, "grad_norm": 0.1484375, "learning_rate": 9.425810119386015e-06, "loss": 1.4158, "step": 101 }, { "epoch": 0.057987492893689596, "grad_norm": 0.1474609375, "learning_rate": 9.420125071063105e-06, "loss": 1.3447, "step": 102 }, { "epoch": 0.05855599772598067, "grad_norm": 0.1455078125, "learning_rate": 9.414440022740194e-06, "loss": 1.3325, "step": 103 }, { "epoch": 0.059124502558271744, "grad_norm": 0.1513671875, "learning_rate": 9.408754974417283e-06, "loss": 1.3252, "step": 104 }, { "epoch": 0.05969300739056282, "grad_norm": 0.1455078125, "learning_rate": 9.403069926094372e-06, "loss": 1.3853, "step": 105 }, { "epoch": 0.06026151222285389, "grad_norm": 0.1591796875, "learning_rate": 9.397384877771462e-06, "loss": 1.3572, "step": 106 }, { "epoch": 0.06083001705514497, "grad_norm": 0.1982421875, "learning_rate": 9.391699829448551e-06, "loss": 1.3149, "step": 107 }, { "epoch": 0.061398521887436046, "grad_norm": 0.1552734375, "learning_rate": 9.38601478112564e-06, "loss": 1.2879, "step": 108 }, { "epoch": 0.06196702671972712, "grad_norm": 0.1455078125, "learning_rate": 9.38032973280273e-06, "loss": 1.2961, "step": 109 }, { "epoch": 0.06253553155201819, "grad_norm": 0.15625, "learning_rate": 9.374644684479819e-06, "loss": 1.2622, "step": 110 }, { "epoch": 0.06310403638430927, "grad_norm": 0.177734375, "learning_rate": 9.368959636156908e-06, "loss": 1.322, "step": 111 }, { "epoch": 0.06367254121660035, "grad_norm": 0.1513671875, "learning_rate": 9.363274587833997e-06, "loss": 1.3425, "step": 112 }, { "epoch": 0.06424104604889141, "grad_norm": 0.150390625, "learning_rate": 9.357589539511086e-06, "loss": 1.2912, "step": 113 }, { "epoch": 0.0648095508811825, "grad_norm": 0.1533203125, "learning_rate": 9.351904491188176e-06, "loss": 1.3344, "step": 114 }, { "epoch": 0.06537805571347356, "grad_norm": 0.1552734375, "learning_rate": 9.346219442865265e-06, "loss": 1.2359, "step": 115 }, { "epoch": 0.06594656054576464, "grad_norm": 0.1591796875, "learning_rate": 9.340534394542354e-06, "loss": 1.3571, "step": 116 }, { "epoch": 0.06651506537805571, "grad_norm": 0.169921875, "learning_rate": 9.334849346219443e-06, "loss": 1.353, "step": 117 }, { "epoch": 0.06708357021034679, "grad_norm": 0.162109375, "learning_rate": 9.329164297896533e-06, "loss": 1.3088, "step": 118 }, { "epoch": 0.06765207504263786, "grad_norm": 0.2890625, "learning_rate": 9.323479249573622e-06, "loss": 1.3015, "step": 119 }, { "epoch": 0.06822057987492894, "grad_norm": 0.1572265625, "learning_rate": 9.317794201250711e-06, "loss": 1.3116, "step": 120 }, { "epoch": 0.06878908470722, "grad_norm": 0.158203125, "learning_rate": 9.3121091529278e-06, "loss": 1.3272, "step": 121 }, { "epoch": 0.06935758953951109, "grad_norm": 0.294921875, "learning_rate": 9.30642410460489e-06, "loss": 1.2605, "step": 122 }, { "epoch": 0.06992609437180217, "grad_norm": 0.1953125, "learning_rate": 9.300739056281979e-06, "loss": 1.3638, "step": 123 }, { "epoch": 0.07049459920409323, "grad_norm": 0.15625, "learning_rate": 9.295054007959068e-06, "loss": 1.266, "step": 124 }, { "epoch": 0.07106310403638431, "grad_norm": 0.1591796875, "learning_rate": 9.289368959636157e-06, "loss": 1.3665, "step": 125 }, { "epoch": 0.07163160886867538, "grad_norm": 0.1591796875, "learning_rate": 9.283683911313247e-06, "loss": 1.2837, "step": 126 }, { "epoch": 0.07220011370096646, "grad_norm": 0.1591796875, "learning_rate": 9.277998862990336e-06, "loss": 1.2456, "step": 127 }, { "epoch": 0.07276861853325753, "grad_norm": 0.162109375, "learning_rate": 9.272313814667425e-06, "loss": 1.3348, "step": 128 }, { "epoch": 0.07333712336554861, "grad_norm": 0.1513671875, "learning_rate": 9.266628766344514e-06, "loss": 1.3128, "step": 129 }, { "epoch": 0.07390562819783968, "grad_norm": 0.150390625, "learning_rate": 9.260943718021604e-06, "loss": 1.2128, "step": 130 }, { "epoch": 0.07447413303013076, "grad_norm": 0.1552734375, "learning_rate": 9.255258669698693e-06, "loss": 1.2092, "step": 131 }, { "epoch": 0.07504263786242182, "grad_norm": 0.16015625, "learning_rate": 9.249573621375782e-06, "loss": 1.3114, "step": 132 }, { "epoch": 0.0756111426947129, "grad_norm": 0.171875, "learning_rate": 9.243888573052871e-06, "loss": 1.3447, "step": 133 }, { "epoch": 0.07617964752700399, "grad_norm": 0.173828125, "learning_rate": 9.23820352472996e-06, "loss": 1.2719, "step": 134 }, { "epoch": 0.07674815235929505, "grad_norm": 0.1787109375, "learning_rate": 9.23251847640705e-06, "loss": 1.3097, "step": 135 }, { "epoch": 0.07731665719158613, "grad_norm": 0.1630859375, "learning_rate": 9.22683342808414e-06, "loss": 1.2172, "step": 136 }, { "epoch": 0.0778851620238772, "grad_norm": 0.1572265625, "learning_rate": 9.221148379761228e-06, "loss": 1.2825, "step": 137 }, { "epoch": 0.07845366685616828, "grad_norm": 0.1689453125, "learning_rate": 9.215463331438318e-06, "loss": 1.2999, "step": 138 }, { "epoch": 0.07902217168845935, "grad_norm": 0.154296875, "learning_rate": 9.209778283115407e-06, "loss": 1.2604, "step": 139 }, { "epoch": 0.07959067652075043, "grad_norm": 0.16015625, "learning_rate": 9.204093234792496e-06, "loss": 1.2412, "step": 140 }, { "epoch": 0.0801591813530415, "grad_norm": 0.1640625, "learning_rate": 9.198408186469585e-06, "loss": 1.2803, "step": 141 }, { "epoch": 0.08072768618533258, "grad_norm": 0.1572265625, "learning_rate": 9.192723138146675e-06, "loss": 1.2806, "step": 142 }, { "epoch": 0.08129619101762366, "grad_norm": 0.158203125, "learning_rate": 9.187038089823764e-06, "loss": 1.2885, "step": 143 }, { "epoch": 0.08186469584991472, "grad_norm": 0.1650390625, "learning_rate": 9.181353041500853e-06, "loss": 1.2193, "step": 144 }, { "epoch": 0.0824332006822058, "grad_norm": 0.166015625, "learning_rate": 9.175667993177942e-06, "loss": 1.2306, "step": 145 }, { "epoch": 0.08300170551449687, "grad_norm": 0.154296875, "learning_rate": 9.169982944855032e-06, "loss": 1.3094, "step": 146 }, { "epoch": 0.08357021034678795, "grad_norm": 0.1689453125, "learning_rate": 9.164297896532121e-06, "loss": 1.2548, "step": 147 }, { "epoch": 0.08413871517907902, "grad_norm": 0.1728515625, "learning_rate": 9.15861284820921e-06, "loss": 1.3524, "step": 148 }, { "epoch": 0.0847072200113701, "grad_norm": 0.15625, "learning_rate": 9.1529277998863e-06, "loss": 1.2764, "step": 149 }, { "epoch": 0.08527572484366117, "grad_norm": 0.169921875, "learning_rate": 9.147242751563389e-06, "loss": 1.2598, "step": 150 }, { "epoch": 0.08584422967595225, "grad_norm": 0.162109375, "learning_rate": 9.141557703240478e-06, "loss": 1.2244, "step": 151 }, { "epoch": 0.08641273450824331, "grad_norm": 0.1630859375, "learning_rate": 9.135872654917567e-06, "loss": 1.2015, "step": 152 }, { "epoch": 0.0869812393405344, "grad_norm": 0.236328125, "learning_rate": 9.130187606594657e-06, "loss": 1.2399, "step": 153 }, { "epoch": 0.08754974417282547, "grad_norm": 0.2490234375, "learning_rate": 9.124502558271746e-06, "loss": 1.2567, "step": 154 }, { "epoch": 0.08811824900511654, "grad_norm": 0.2236328125, "learning_rate": 9.118817509948835e-06, "loss": 1.2292, "step": 155 }, { "epoch": 0.08868675383740762, "grad_norm": 0.2412109375, "learning_rate": 9.113132461625924e-06, "loss": 1.1582, "step": 156 }, { "epoch": 0.08925525866969869, "grad_norm": 0.244140625, "learning_rate": 9.107447413303014e-06, "loss": 1.2584, "step": 157 }, { "epoch": 0.08982376350198977, "grad_norm": 0.1787109375, "learning_rate": 9.101762364980103e-06, "loss": 1.3279, "step": 158 }, { "epoch": 0.09039226833428084, "grad_norm": 0.189453125, "learning_rate": 9.096077316657192e-06, "loss": 1.3173, "step": 159 }, { "epoch": 0.09096077316657192, "grad_norm": 0.1982421875, "learning_rate": 9.090392268334281e-06, "loss": 1.2544, "step": 160 }, { "epoch": 0.09152927799886298, "grad_norm": 0.232421875, "learning_rate": 9.08470722001137e-06, "loss": 1.2337, "step": 161 }, { "epoch": 0.09209778283115407, "grad_norm": 0.1572265625, "learning_rate": 9.07902217168846e-06, "loss": 1.2094, "step": 162 }, { "epoch": 0.09266628766344515, "grad_norm": 0.1806640625, "learning_rate": 9.073337123365549e-06, "loss": 1.3012, "step": 163 }, { "epoch": 0.09323479249573621, "grad_norm": 0.1728515625, "learning_rate": 9.067652075042638e-06, "loss": 1.2278, "step": 164 }, { "epoch": 0.0938032973280273, "grad_norm": 0.1787109375, "learning_rate": 9.061967026719728e-06, "loss": 1.1696, "step": 165 }, { "epoch": 0.09437180216031836, "grad_norm": 0.3515625, "learning_rate": 9.056281978396817e-06, "loss": 1.241, "step": 166 }, { "epoch": 0.09494030699260944, "grad_norm": 0.1826171875, "learning_rate": 9.050596930073906e-06, "loss": 1.2421, "step": 167 }, { "epoch": 0.09550881182490051, "grad_norm": 0.1826171875, "learning_rate": 9.044911881750995e-06, "loss": 1.2404, "step": 168 }, { "epoch": 0.09607731665719159, "grad_norm": 0.203125, "learning_rate": 9.039226833428085e-06, "loss": 1.2755, "step": 169 }, { "epoch": 0.09664582148948266, "grad_norm": 0.1826171875, "learning_rate": 9.033541785105174e-06, "loss": 1.2282, "step": 170 }, { "epoch": 0.09721432632177374, "grad_norm": 0.17578125, "learning_rate": 9.027856736782263e-06, "loss": 1.1773, "step": 171 }, { "epoch": 0.0977828311540648, "grad_norm": 0.197265625, "learning_rate": 9.022171688459352e-06, "loss": 1.233, "step": 172 }, { "epoch": 0.09835133598635588, "grad_norm": 0.1728515625, "learning_rate": 9.016486640136442e-06, "loss": 1.2483, "step": 173 }, { "epoch": 0.09891984081864696, "grad_norm": 0.173828125, "learning_rate": 9.01080159181353e-06, "loss": 1.2405, "step": 174 }, { "epoch": 0.09948834565093803, "grad_norm": 0.1953125, "learning_rate": 9.00511654349062e-06, "loss": 1.2512, "step": 175 }, { "epoch": 0.10005685048322911, "grad_norm": 0.166015625, "learning_rate": 8.99943149516771e-06, "loss": 1.2846, "step": 176 }, { "epoch": 0.10062535531552018, "grad_norm": 0.1865234375, "learning_rate": 8.993746446844799e-06, "loss": 1.1417, "step": 177 }, { "epoch": 0.10119386014781126, "grad_norm": 0.1796875, "learning_rate": 8.988061398521888e-06, "loss": 1.1475, "step": 178 }, { "epoch": 0.10176236498010233, "grad_norm": 0.21484375, "learning_rate": 8.982376350198977e-06, "loss": 1.3025, "step": 179 }, { "epoch": 0.10233086981239341, "grad_norm": 0.18359375, "learning_rate": 8.976691301876066e-06, "loss": 1.2933, "step": 180 }, { "epoch": 0.10289937464468447, "grad_norm": 0.1708984375, "learning_rate": 8.971006253553156e-06, "loss": 1.2164, "step": 181 }, { "epoch": 0.10346787947697555, "grad_norm": 0.1875, "learning_rate": 8.965321205230245e-06, "loss": 1.2365, "step": 182 }, { "epoch": 0.10403638430926662, "grad_norm": 0.1845703125, "learning_rate": 8.959636156907334e-06, "loss": 1.2234, "step": 183 }, { "epoch": 0.1046048891415577, "grad_norm": 0.1806640625, "learning_rate": 8.953951108584423e-06, "loss": 1.2725, "step": 184 }, { "epoch": 0.10517339397384878, "grad_norm": 0.1845703125, "learning_rate": 8.948266060261514e-06, "loss": 1.2689, "step": 185 }, { "epoch": 0.10574189880613985, "grad_norm": 0.19921875, "learning_rate": 8.942581011938602e-06, "loss": 1.2309, "step": 186 }, { "epoch": 0.10631040363843093, "grad_norm": 0.16796875, "learning_rate": 8.936895963615691e-06, "loss": 1.2432, "step": 187 }, { "epoch": 0.106878908470722, "grad_norm": 0.19921875, "learning_rate": 8.93121091529278e-06, "loss": 1.212, "step": 188 }, { "epoch": 0.10744741330301308, "grad_norm": 0.19140625, "learning_rate": 8.92552586696987e-06, "loss": 1.2493, "step": 189 }, { "epoch": 0.10801591813530415, "grad_norm": 0.2177734375, "learning_rate": 8.919840818646959e-06, "loss": 1.2384, "step": 190 }, { "epoch": 0.10858442296759523, "grad_norm": 0.234375, "learning_rate": 8.914155770324048e-06, "loss": 1.2348, "step": 191 }, { "epoch": 0.10915292779988629, "grad_norm": 0.181640625, "learning_rate": 8.908470722001137e-06, "loss": 1.2438, "step": 192 }, { "epoch": 0.10972143263217737, "grad_norm": 0.2197265625, "learning_rate": 8.902785673678227e-06, "loss": 1.1909, "step": 193 }, { "epoch": 0.11028993746446845, "grad_norm": 0.17578125, "learning_rate": 8.897100625355316e-06, "loss": 1.206, "step": 194 }, { "epoch": 0.11085844229675952, "grad_norm": 0.169921875, "learning_rate": 8.891415577032405e-06, "loss": 1.1958, "step": 195 }, { "epoch": 0.1114269471290506, "grad_norm": 0.17578125, "learning_rate": 8.885730528709496e-06, "loss": 1.2261, "step": 196 }, { "epoch": 0.11199545196134167, "grad_norm": 0.1708984375, "learning_rate": 8.880045480386584e-06, "loss": 1.204, "step": 197 }, { "epoch": 0.11256395679363275, "grad_norm": 0.19921875, "learning_rate": 8.874360432063673e-06, "loss": 1.2257, "step": 198 }, { "epoch": 0.11313246162592382, "grad_norm": 0.185546875, "learning_rate": 8.868675383740762e-06, "loss": 1.1711, "step": 199 }, { "epoch": 0.1137009664582149, "grad_norm": 0.2138671875, "learning_rate": 8.862990335417851e-06, "loss": 1.2515, "step": 200 }, { "epoch": 0.11426947129050596, "grad_norm": 0.1767578125, "learning_rate": 8.85730528709494e-06, "loss": 1.2117, "step": 201 }, { "epoch": 0.11483797612279704, "grad_norm": 0.19140625, "learning_rate": 8.85162023877203e-06, "loss": 1.232, "step": 202 }, { "epoch": 0.11540648095508811, "grad_norm": 0.2080078125, "learning_rate": 8.845935190449119e-06, "loss": 1.2283, "step": 203 }, { "epoch": 0.11597498578737919, "grad_norm": 0.1953125, "learning_rate": 8.840250142126208e-06, "loss": 1.1347, "step": 204 }, { "epoch": 0.11654349061967027, "grad_norm": 0.18359375, "learning_rate": 8.834565093803298e-06, "loss": 1.2001, "step": 205 }, { "epoch": 0.11711199545196134, "grad_norm": 0.177734375, "learning_rate": 8.828880045480387e-06, "loss": 1.2736, "step": 206 }, { "epoch": 0.11768050028425242, "grad_norm": 0.1884765625, "learning_rate": 8.823194997157478e-06, "loss": 1.2131, "step": 207 }, { "epoch": 0.11824900511654349, "grad_norm": 0.1708984375, "learning_rate": 8.817509948834565e-06, "loss": 1.2543, "step": 208 }, { "epoch": 0.11881750994883457, "grad_norm": 0.1953125, "learning_rate": 8.811824900511655e-06, "loss": 1.2027, "step": 209 }, { "epoch": 0.11938601478112564, "grad_norm": 0.1962890625, "learning_rate": 8.806139852188744e-06, "loss": 1.1066, "step": 210 }, { "epoch": 0.11995451961341672, "grad_norm": 0.1865234375, "learning_rate": 8.800454803865833e-06, "loss": 1.2455, "step": 211 }, { "epoch": 0.12052302444570778, "grad_norm": 0.2041015625, "learning_rate": 8.794769755542922e-06, "loss": 1.229, "step": 212 }, { "epoch": 0.12109152927799886, "grad_norm": 0.1767578125, "learning_rate": 8.789084707220012e-06, "loss": 1.232, "step": 213 }, { "epoch": 0.12166003411028994, "grad_norm": 0.1845703125, "learning_rate": 8.783399658897101e-06, "loss": 1.2403, "step": 214 }, { "epoch": 0.12222853894258101, "grad_norm": 0.1962890625, "learning_rate": 8.77771461057419e-06, "loss": 1.2721, "step": 215 }, { "epoch": 0.12279704377487209, "grad_norm": 0.1806640625, "learning_rate": 8.77202956225128e-06, "loss": 1.2892, "step": 216 }, { "epoch": 0.12336554860716316, "grad_norm": 0.193359375, "learning_rate": 8.766344513928369e-06, "loss": 1.1582, "step": 217 }, { "epoch": 0.12393405343945424, "grad_norm": 0.181640625, "learning_rate": 8.76065946560546e-06, "loss": 1.2173, "step": 218 }, { "epoch": 0.1245025582717453, "grad_norm": 0.2080078125, "learning_rate": 8.754974417282547e-06, "loss": 1.1121, "step": 219 }, { "epoch": 0.12507106310403637, "grad_norm": 0.1826171875, "learning_rate": 8.749289368959636e-06, "loss": 1.1975, "step": 220 }, { "epoch": 0.12563956793632747, "grad_norm": 0.2099609375, "learning_rate": 8.743604320636726e-06, "loss": 1.2249, "step": 221 }, { "epoch": 0.12620807276861853, "grad_norm": 0.2177734375, "learning_rate": 8.737919272313815e-06, "loss": 1.1874, "step": 222 }, { "epoch": 0.1267765776009096, "grad_norm": 0.1953125, "learning_rate": 8.732234223990904e-06, "loss": 1.2593, "step": 223 }, { "epoch": 0.1273450824332007, "grad_norm": 0.1845703125, "learning_rate": 8.726549175667995e-06, "loss": 1.2031, "step": 224 }, { "epoch": 0.12791358726549176, "grad_norm": 0.220703125, "learning_rate": 8.720864127345083e-06, "loss": 1.2882, "step": 225 }, { "epoch": 0.12848209209778283, "grad_norm": 0.17578125, "learning_rate": 8.715179079022172e-06, "loss": 1.2214, "step": 226 }, { "epoch": 0.1290505969300739, "grad_norm": 0.19921875, "learning_rate": 8.709494030699261e-06, "loss": 1.1666, "step": 227 }, { "epoch": 0.129619101762365, "grad_norm": 0.1884765625, "learning_rate": 8.70380898237635e-06, "loss": 1.1651, "step": 228 }, { "epoch": 0.13018760659465606, "grad_norm": 0.189453125, "learning_rate": 8.698123934053441e-06, "loss": 1.1696, "step": 229 }, { "epoch": 0.13075611142694712, "grad_norm": 0.201171875, "learning_rate": 8.692438885730529e-06, "loss": 1.1969, "step": 230 }, { "epoch": 0.1313246162592382, "grad_norm": 0.189453125, "learning_rate": 8.686753837407618e-06, "loss": 1.1119, "step": 231 }, { "epoch": 0.1318931210915293, "grad_norm": 0.1806640625, "learning_rate": 8.681068789084707e-06, "loss": 1.1749, "step": 232 }, { "epoch": 0.13246162592382035, "grad_norm": 0.1796875, "learning_rate": 8.675383740761797e-06, "loss": 1.2314, "step": 233 }, { "epoch": 0.13303013075611142, "grad_norm": 0.1884765625, "learning_rate": 8.669698692438886e-06, "loss": 1.1838, "step": 234 }, { "epoch": 0.13359863558840251, "grad_norm": 0.2158203125, "learning_rate": 8.664013644115977e-06, "loss": 1.1981, "step": 235 }, { "epoch": 0.13416714042069358, "grad_norm": 0.193359375, "learning_rate": 8.658328595793064e-06, "loss": 1.2723, "step": 236 }, { "epoch": 0.13473564525298465, "grad_norm": 0.1806640625, "learning_rate": 8.652643547470154e-06, "loss": 1.2209, "step": 237 }, { "epoch": 0.13530415008527572, "grad_norm": 0.1904296875, "learning_rate": 8.646958499147243e-06, "loss": 1.2632, "step": 238 }, { "epoch": 0.1358726549175668, "grad_norm": 0.1982421875, "learning_rate": 8.641273450824332e-06, "loss": 1.2079, "step": 239 }, { "epoch": 0.13644115974985788, "grad_norm": 0.208984375, "learning_rate": 8.635588402501423e-06, "loss": 1.1105, "step": 240 }, { "epoch": 0.13700966458214894, "grad_norm": 0.17578125, "learning_rate": 8.62990335417851e-06, "loss": 1.2125, "step": 241 }, { "epoch": 0.13757816941444, "grad_norm": 0.19140625, "learning_rate": 8.6242183058556e-06, "loss": 1.1694, "step": 242 }, { "epoch": 0.1381466742467311, "grad_norm": 0.1904296875, "learning_rate": 8.61853325753269e-06, "loss": 1.2188, "step": 243 }, { "epoch": 0.13871517907902217, "grad_norm": 0.2216796875, "learning_rate": 8.612848209209779e-06, "loss": 1.2661, "step": 244 }, { "epoch": 0.13928368391131324, "grad_norm": 0.2060546875, "learning_rate": 8.607163160886868e-06, "loss": 1.2173, "step": 245 }, { "epoch": 0.13985218874360433, "grad_norm": 0.1904296875, "learning_rate": 8.601478112563959e-06, "loss": 1.1444, "step": 246 }, { "epoch": 0.1404206935758954, "grad_norm": 0.18359375, "learning_rate": 8.595793064241046e-06, "loss": 1.1896, "step": 247 }, { "epoch": 0.14098919840818647, "grad_norm": 0.1865234375, "learning_rate": 8.590108015918136e-06, "loss": 1.1635, "step": 248 }, { "epoch": 0.14155770324047753, "grad_norm": 0.197265625, "learning_rate": 8.584422967595225e-06, "loss": 1.1368, "step": 249 }, { "epoch": 0.14212620807276863, "grad_norm": 0.1796875, "learning_rate": 8.578737919272314e-06, "loss": 1.1558, "step": 250 }, { "epoch": 0.1426947129050597, "grad_norm": 0.236328125, "learning_rate": 8.573052870949405e-06, "loss": 1.202, "step": 251 }, { "epoch": 0.14326321773735076, "grad_norm": 0.19140625, "learning_rate": 8.567367822626493e-06, "loss": 1.0826, "step": 252 }, { "epoch": 0.14383172256964183, "grad_norm": 0.193359375, "learning_rate": 8.561682774303582e-06, "loss": 1.2301, "step": 253 }, { "epoch": 0.14440022740193292, "grad_norm": 0.1923828125, "learning_rate": 8.555997725980671e-06, "loss": 1.1695, "step": 254 }, { "epoch": 0.144968732234224, "grad_norm": 0.263671875, "learning_rate": 8.55031267765776e-06, "loss": 1.2415, "step": 255 }, { "epoch": 0.14553723706651506, "grad_norm": 0.2021484375, "learning_rate": 8.54462762933485e-06, "loss": 1.1923, "step": 256 }, { "epoch": 0.14610574189880615, "grad_norm": 0.1923828125, "learning_rate": 8.53894258101194e-06, "loss": 1.1787, "step": 257 }, { "epoch": 0.14667424673109722, "grad_norm": 0.298828125, "learning_rate": 8.533257532689028e-06, "loss": 1.2031, "step": 258 }, { "epoch": 0.14724275156338829, "grad_norm": 0.197265625, "learning_rate": 8.527572484366117e-06, "loss": 1.1383, "step": 259 }, { "epoch": 0.14781125639567935, "grad_norm": 0.22265625, "learning_rate": 8.521887436043207e-06, "loss": 1.265, "step": 260 }, { "epoch": 0.14837976122797045, "grad_norm": 0.2099609375, "learning_rate": 8.516202387720296e-06, "loss": 1.167, "step": 261 }, { "epoch": 0.1489482660602615, "grad_norm": 0.1904296875, "learning_rate": 8.510517339397387e-06, "loss": 1.1795, "step": 262 }, { "epoch": 0.14951677089255258, "grad_norm": 0.197265625, "learning_rate": 8.504832291074474e-06, "loss": 1.0647, "step": 263 }, { "epoch": 0.15008527572484365, "grad_norm": 0.185546875, "learning_rate": 8.499147242751564e-06, "loss": 1.1345, "step": 264 }, { "epoch": 0.15065378055713474, "grad_norm": 0.1904296875, "learning_rate": 8.493462194428653e-06, "loss": 1.1979, "step": 265 }, { "epoch": 0.1512222853894258, "grad_norm": 0.185546875, "learning_rate": 8.487777146105742e-06, "loss": 1.1458, "step": 266 }, { "epoch": 0.15179079022171688, "grad_norm": 0.19921875, "learning_rate": 8.482092097782831e-06, "loss": 1.2136, "step": 267 }, { "epoch": 0.15235929505400797, "grad_norm": 0.1904296875, "learning_rate": 8.476407049459922e-06, "loss": 1.1795, "step": 268 }, { "epoch": 0.15292779988629904, "grad_norm": 0.1904296875, "learning_rate": 8.47072200113701e-06, "loss": 1.2106, "step": 269 }, { "epoch": 0.1534963047185901, "grad_norm": 0.1787109375, "learning_rate": 8.465036952814099e-06, "loss": 1.1582, "step": 270 }, { "epoch": 0.15406480955088117, "grad_norm": 0.181640625, "learning_rate": 8.459351904491188e-06, "loss": 1.231, "step": 271 }, { "epoch": 0.15463331438317227, "grad_norm": 0.1865234375, "learning_rate": 8.453666856168278e-06, "loss": 1.1488, "step": 272 }, { "epoch": 0.15520181921546333, "grad_norm": 0.2119140625, "learning_rate": 8.447981807845369e-06, "loss": 1.1066, "step": 273 }, { "epoch": 0.1557703240477544, "grad_norm": 0.197265625, "learning_rate": 8.442296759522458e-06, "loss": 1.2579, "step": 274 }, { "epoch": 0.1563388288800455, "grad_norm": 0.189453125, "learning_rate": 8.436611711199545e-06, "loss": 1.2476, "step": 275 }, { "epoch": 0.15690733371233656, "grad_norm": 0.1953125, "learning_rate": 8.430926662876635e-06, "loss": 1.1802, "step": 276 }, { "epoch": 0.15747583854462763, "grad_norm": 0.189453125, "learning_rate": 8.425241614553724e-06, "loss": 1.1924, "step": 277 }, { "epoch": 0.1580443433769187, "grad_norm": 0.1953125, "learning_rate": 8.419556566230813e-06, "loss": 1.1614, "step": 278 }, { "epoch": 0.1586128482092098, "grad_norm": 0.197265625, "learning_rate": 8.413871517907904e-06, "loss": 1.1457, "step": 279 }, { "epoch": 0.15918135304150086, "grad_norm": 0.1875, "learning_rate": 8.408186469584992e-06, "loss": 1.2021, "step": 280 }, { "epoch": 0.15974985787379192, "grad_norm": 0.2080078125, "learning_rate": 8.402501421262081e-06, "loss": 1.1019, "step": 281 }, { "epoch": 0.160318362706083, "grad_norm": 0.1923828125, "learning_rate": 8.39681637293917e-06, "loss": 1.1078, "step": 282 }, { "epoch": 0.16088686753837408, "grad_norm": 0.2099609375, "learning_rate": 8.39113132461626e-06, "loss": 1.122, "step": 283 }, { "epoch": 0.16145537237066515, "grad_norm": 0.19140625, "learning_rate": 8.38544627629335e-06, "loss": 1.1752, "step": 284 }, { "epoch": 0.16202387720295622, "grad_norm": 0.1962890625, "learning_rate": 8.37976122797044e-06, "loss": 1.1995, "step": 285 }, { "epoch": 0.1625923820352473, "grad_norm": 0.1982421875, "learning_rate": 8.374076179647527e-06, "loss": 1.1957, "step": 286 }, { "epoch": 0.16316088686753838, "grad_norm": 0.1962890625, "learning_rate": 8.368391131324616e-06, "loss": 1.1969, "step": 287 }, { "epoch": 0.16372939169982945, "grad_norm": 0.2041015625, "learning_rate": 8.362706083001706e-06, "loss": 1.1444, "step": 288 }, { "epoch": 0.1642978965321205, "grad_norm": 0.26953125, "learning_rate": 8.357021034678795e-06, "loss": 1.1647, "step": 289 }, { "epoch": 0.1648664013644116, "grad_norm": 0.1923828125, "learning_rate": 8.351335986355886e-06, "loss": 1.1527, "step": 290 }, { "epoch": 0.16543490619670267, "grad_norm": 0.2138671875, "learning_rate": 8.345650938032973e-06, "loss": 1.2655, "step": 291 }, { "epoch": 0.16600341102899374, "grad_norm": 0.203125, "learning_rate": 8.339965889710063e-06, "loss": 1.0771, "step": 292 }, { "epoch": 0.1665719158612848, "grad_norm": 0.1953125, "learning_rate": 8.334280841387152e-06, "loss": 1.194, "step": 293 }, { "epoch": 0.1671404206935759, "grad_norm": 0.197265625, "learning_rate": 8.328595793064241e-06, "loss": 1.1719, "step": 294 }, { "epoch": 0.16770892552586697, "grad_norm": 0.20703125, "learning_rate": 8.32291074474133e-06, "loss": 1.2175, "step": 295 }, { "epoch": 0.16827743035815804, "grad_norm": 0.19921875, "learning_rate": 8.317225696418421e-06, "loss": 1.1713, "step": 296 }, { "epoch": 0.16884593519044913, "grad_norm": 0.19921875, "learning_rate": 8.311540648095509e-06, "loss": 1.1617, "step": 297 }, { "epoch": 0.1694144400227402, "grad_norm": 0.19140625, "learning_rate": 8.305855599772598e-06, "loss": 1.1641, "step": 298 }, { "epoch": 0.16998294485503126, "grad_norm": 0.220703125, "learning_rate": 8.300170551449687e-06, "loss": 1.1713, "step": 299 }, { "epoch": 0.17055144968732233, "grad_norm": 0.1953125, "learning_rate": 8.294485503126777e-06, "loss": 1.1351, "step": 300 }, { "epoch": 0.17111995451961343, "grad_norm": 0.2041015625, "learning_rate": 8.288800454803868e-06, "loss": 1.1612, "step": 301 }, { "epoch": 0.1716884593519045, "grad_norm": 0.1962890625, "learning_rate": 8.283115406480955e-06, "loss": 1.1471, "step": 302 }, { "epoch": 0.17225696418419556, "grad_norm": 0.3671875, "learning_rate": 8.277430358158044e-06, "loss": 1.0846, "step": 303 }, { "epoch": 0.17282546901648663, "grad_norm": 0.19140625, "learning_rate": 8.271745309835134e-06, "loss": 1.1015, "step": 304 }, { "epoch": 0.17339397384877772, "grad_norm": 0.205078125, "learning_rate": 8.266060261512223e-06, "loss": 1.2308, "step": 305 }, { "epoch": 0.1739624786810688, "grad_norm": 0.19921875, "learning_rate": 8.260375213189312e-06, "loss": 1.2625, "step": 306 }, { "epoch": 0.17453098351335986, "grad_norm": 0.2001953125, "learning_rate": 8.254690164866403e-06, "loss": 1.1546, "step": 307 }, { "epoch": 0.17509948834565095, "grad_norm": 0.1923828125, "learning_rate": 8.24900511654349e-06, "loss": 1.1471, "step": 308 }, { "epoch": 0.17566799317794202, "grad_norm": 0.19140625, "learning_rate": 8.24332006822058e-06, "loss": 1.1519, "step": 309 }, { "epoch": 0.17623649801023308, "grad_norm": 0.2373046875, "learning_rate": 8.23763501989767e-06, "loss": 1.1644, "step": 310 }, { "epoch": 0.17680500284252415, "grad_norm": 0.2060546875, "learning_rate": 8.231949971574758e-06, "loss": 1.068, "step": 311 }, { "epoch": 0.17737350767481525, "grad_norm": 0.234375, "learning_rate": 8.22626492325185e-06, "loss": 1.1453, "step": 312 }, { "epoch": 0.1779420125071063, "grad_norm": 0.203125, "learning_rate": 8.220579874928939e-06, "loss": 1.142, "step": 313 }, { "epoch": 0.17851051733939738, "grad_norm": 0.1982421875, "learning_rate": 8.214894826606026e-06, "loss": 1.1987, "step": 314 }, { "epoch": 0.17907902217168845, "grad_norm": 0.1875, "learning_rate": 8.209209778283115e-06, "loss": 1.0933, "step": 315 }, { "epoch": 0.17964752700397954, "grad_norm": 0.2021484375, "learning_rate": 8.203524729960205e-06, "loss": 1.1985, "step": 316 }, { "epoch": 0.1802160318362706, "grad_norm": 0.1943359375, "learning_rate": 8.197839681637294e-06, "loss": 1.2162, "step": 317 }, { "epoch": 0.18078453666856167, "grad_norm": 0.201171875, "learning_rate": 8.192154633314385e-06, "loss": 1.1504, "step": 318 }, { "epoch": 0.18135304150085277, "grad_norm": 0.2314453125, "learning_rate": 8.186469584991472e-06, "loss": 1.1696, "step": 319 }, { "epoch": 0.18192154633314384, "grad_norm": 0.201171875, "learning_rate": 8.180784536668562e-06, "loss": 1.1086, "step": 320 }, { "epoch": 0.1824900511654349, "grad_norm": 0.2099609375, "learning_rate": 8.175099488345651e-06, "loss": 1.1751, "step": 321 }, { "epoch": 0.18305855599772597, "grad_norm": 0.2177734375, "learning_rate": 8.16941444002274e-06, "loss": 1.2029, "step": 322 }, { "epoch": 0.18362706083001706, "grad_norm": 0.314453125, "learning_rate": 8.163729391699831e-06, "loss": 1.0997, "step": 323 }, { "epoch": 0.18419556566230813, "grad_norm": 0.212890625, "learning_rate": 8.15804434337692e-06, "loss": 1.1088, "step": 324 }, { "epoch": 0.1847640704945992, "grad_norm": 0.2001953125, "learning_rate": 8.152359295054008e-06, "loss": 1.2044, "step": 325 }, { "epoch": 0.1853325753268903, "grad_norm": 0.2099609375, "learning_rate": 8.146674246731097e-06, "loss": 1.1171, "step": 326 }, { "epoch": 0.18590108015918136, "grad_norm": 0.2216796875, "learning_rate": 8.140989198408187e-06, "loss": 1.1866, "step": 327 }, { "epoch": 0.18646958499147243, "grad_norm": 0.201171875, "learning_rate": 8.135304150085276e-06, "loss": 1.1369, "step": 328 }, { "epoch": 0.1870380898237635, "grad_norm": 0.203125, "learning_rate": 8.129619101762367e-06, "loss": 1.1478, "step": 329 }, { "epoch": 0.1876065946560546, "grad_norm": 0.1962890625, "learning_rate": 8.123934053439454e-06, "loss": 1.1685, "step": 330 }, { "epoch": 0.18817509948834565, "grad_norm": 0.203125, "learning_rate": 8.118249005116544e-06, "loss": 1.1606, "step": 331 }, { "epoch": 0.18874360432063672, "grad_norm": 0.2373046875, "learning_rate": 8.112563956793633e-06, "loss": 1.1789, "step": 332 }, { "epoch": 0.1893121091529278, "grad_norm": 0.2001953125, "learning_rate": 8.106878908470722e-06, "loss": 1.1376, "step": 333 }, { "epoch": 0.18988061398521888, "grad_norm": 0.208984375, "learning_rate": 8.101193860147813e-06, "loss": 1.1722, "step": 334 }, { "epoch": 0.19044911881750995, "grad_norm": 0.203125, "learning_rate": 8.095508811824902e-06, "loss": 1.198, "step": 335 }, { "epoch": 0.19101762364980102, "grad_norm": 0.34375, "learning_rate": 8.08982376350199e-06, "loss": 1.1087, "step": 336 }, { "epoch": 0.1915861284820921, "grad_norm": 0.19921875, "learning_rate": 8.084138715179079e-06, "loss": 1.1206, "step": 337 }, { "epoch": 0.19215463331438318, "grad_norm": 0.197265625, "learning_rate": 8.078453666856168e-06, "loss": 1.1732, "step": 338 }, { "epoch": 0.19272313814667424, "grad_norm": 0.2177734375, "learning_rate": 8.072768618533258e-06, "loss": 1.0844, "step": 339 }, { "epoch": 0.1932916429789653, "grad_norm": 0.19921875, "learning_rate": 8.067083570210348e-06, "loss": 1.1673, "step": 340 }, { "epoch": 0.1938601478112564, "grad_norm": 0.2099609375, "learning_rate": 8.061398521887436e-06, "loss": 1.1298, "step": 341 }, { "epoch": 0.19442865264354747, "grad_norm": 0.2099609375, "learning_rate": 8.055713473564525e-06, "loss": 1.1266, "step": 342 }, { "epoch": 0.19499715747583854, "grad_norm": 0.2119140625, "learning_rate": 8.050028425241615e-06, "loss": 1.1302, "step": 343 }, { "epoch": 0.1955656623081296, "grad_norm": 0.2080078125, "learning_rate": 8.044343376918704e-06, "loss": 1.1003, "step": 344 }, { "epoch": 0.1961341671404207, "grad_norm": 0.2119140625, "learning_rate": 8.038658328595795e-06, "loss": 1.1355, "step": 345 }, { "epoch": 0.19670267197271177, "grad_norm": 0.2109375, "learning_rate": 8.032973280272884e-06, "loss": 1.1738, "step": 346 }, { "epoch": 0.19727117680500283, "grad_norm": 0.208984375, "learning_rate": 8.027288231949972e-06, "loss": 1.07, "step": 347 }, { "epoch": 0.19783968163729393, "grad_norm": 0.2099609375, "learning_rate": 8.02160318362706e-06, "loss": 1.1886, "step": 348 }, { "epoch": 0.198408186469585, "grad_norm": 0.2138671875, "learning_rate": 8.01591813530415e-06, "loss": 1.1239, "step": 349 }, { "epoch": 0.19897669130187606, "grad_norm": 0.1943359375, "learning_rate": 8.01023308698124e-06, "loss": 1.1297, "step": 350 }, { "epoch": 0.19954519613416713, "grad_norm": 0.1904296875, "learning_rate": 8.00454803865833e-06, "loss": 1.1422, "step": 351 }, { "epoch": 0.20011370096645822, "grad_norm": 0.203125, "learning_rate": 7.99886299033542e-06, "loss": 1.1244, "step": 352 }, { "epoch": 0.2006822057987493, "grad_norm": 0.2021484375, "learning_rate": 7.993177942012507e-06, "loss": 1.1232, "step": 353 }, { "epoch": 0.20125071063104036, "grad_norm": 0.2099609375, "learning_rate": 7.987492893689596e-06, "loss": 1.1205, "step": 354 }, { "epoch": 0.20181921546333143, "grad_norm": 0.205078125, "learning_rate": 7.981807845366686e-06, "loss": 1.1445, "step": 355 }, { "epoch": 0.20238772029562252, "grad_norm": 0.2041015625, "learning_rate": 7.976122797043777e-06, "loss": 1.1423, "step": 356 }, { "epoch": 0.2029562251279136, "grad_norm": 0.2080078125, "learning_rate": 7.970437748720866e-06, "loss": 1.1572, "step": 357 }, { "epoch": 0.20352472996020465, "grad_norm": 0.2109375, "learning_rate": 7.964752700397953e-06, "loss": 1.1187, "step": 358 }, { "epoch": 0.20409323479249575, "grad_norm": 0.2041015625, "learning_rate": 7.959067652075043e-06, "loss": 1.0997, "step": 359 }, { "epoch": 0.20466173962478681, "grad_norm": 0.2138671875, "learning_rate": 7.953382603752132e-06, "loss": 1.187, "step": 360 }, { "epoch": 0.20523024445707788, "grad_norm": 0.2021484375, "learning_rate": 7.947697555429221e-06, "loss": 1.1388, "step": 361 }, { "epoch": 0.20579874928936895, "grad_norm": 0.2099609375, "learning_rate": 7.942012507106312e-06, "loss": 1.2185, "step": 362 }, { "epoch": 0.20636725412166004, "grad_norm": 0.216796875, "learning_rate": 7.936327458783401e-06, "loss": 1.1465, "step": 363 }, { "epoch": 0.2069357589539511, "grad_norm": 0.208984375, "learning_rate": 7.930642410460489e-06, "loss": 1.1644, "step": 364 }, { "epoch": 0.20750426378624218, "grad_norm": 0.2236328125, "learning_rate": 7.924957362137578e-06, "loss": 1.0893, "step": 365 }, { "epoch": 0.20807276861853324, "grad_norm": 0.208984375, "learning_rate": 7.919272313814667e-06, "loss": 1.1428, "step": 366 }, { "epoch": 0.20864127345082434, "grad_norm": 0.2216796875, "learning_rate": 7.913587265491758e-06, "loss": 1.1753, "step": 367 }, { "epoch": 0.2092097782831154, "grad_norm": 0.208984375, "learning_rate": 7.907902217168848e-06, "loss": 1.193, "step": 368 }, { "epoch": 0.20977828311540647, "grad_norm": 0.205078125, "learning_rate": 7.902217168845935e-06, "loss": 1.1924, "step": 369 }, { "epoch": 0.21034678794769757, "grad_norm": 0.2177734375, "learning_rate": 7.896532120523024e-06, "loss": 1.2169, "step": 370 }, { "epoch": 0.21091529277998863, "grad_norm": 0.2099609375, "learning_rate": 7.890847072200114e-06, "loss": 1.1536, "step": 371 }, { "epoch": 0.2114837976122797, "grad_norm": 0.20703125, "learning_rate": 7.885162023877203e-06, "loss": 1.1942, "step": 372 }, { "epoch": 0.21205230244457077, "grad_norm": 0.25, "learning_rate": 7.879476975554294e-06, "loss": 1.1571, "step": 373 }, { "epoch": 0.21262080727686186, "grad_norm": 0.2353515625, "learning_rate": 7.873791927231383e-06, "loss": 1.096, "step": 374 }, { "epoch": 0.21318931210915293, "grad_norm": 0.216796875, "learning_rate": 7.86810687890847e-06, "loss": 1.178, "step": 375 }, { "epoch": 0.213757816941444, "grad_norm": 0.1953125, "learning_rate": 7.86242183058556e-06, "loss": 1.0977, "step": 376 }, { "epoch": 0.2143263217737351, "grad_norm": 0.2578125, "learning_rate": 7.856736782262649e-06, "loss": 1.1555, "step": 377 }, { "epoch": 0.21489482660602616, "grad_norm": 0.2021484375, "learning_rate": 7.85105173393974e-06, "loss": 1.1943, "step": 378 }, { "epoch": 0.21546333143831722, "grad_norm": 0.34375, "learning_rate": 7.84536668561683e-06, "loss": 1.1843, "step": 379 }, { "epoch": 0.2160318362706083, "grad_norm": 0.2099609375, "learning_rate": 7.839681637293917e-06, "loss": 1.0407, "step": 380 }, { "epoch": 0.21660034110289939, "grad_norm": 0.2021484375, "learning_rate": 7.833996588971006e-06, "loss": 1.0839, "step": 381 }, { "epoch": 0.21716884593519045, "grad_norm": 0.2001953125, "learning_rate": 7.828311540648095e-06, "loss": 1.1381, "step": 382 }, { "epoch": 0.21773735076748152, "grad_norm": 0.37890625, "learning_rate": 7.822626492325185e-06, "loss": 1.1167, "step": 383 }, { "epoch": 0.21830585559977259, "grad_norm": 0.201171875, "learning_rate": 7.816941444002276e-06, "loss": 1.0949, "step": 384 }, { "epoch": 0.21887436043206368, "grad_norm": 0.2041015625, "learning_rate": 7.811256395679365e-06, "loss": 1.0399, "step": 385 }, { "epoch": 0.21944286526435475, "grad_norm": 0.220703125, "learning_rate": 7.805571347356452e-06, "loss": 1.1692, "step": 386 }, { "epoch": 0.22001137009664581, "grad_norm": 0.224609375, "learning_rate": 7.799886299033542e-06, "loss": 1.2567, "step": 387 }, { "epoch": 0.2205798749289369, "grad_norm": 0.20703125, "learning_rate": 7.794201250710631e-06, "loss": 1.0486, "step": 388 }, { "epoch": 0.22114837976122798, "grad_norm": 0.208984375, "learning_rate": 7.788516202387722e-06, "loss": 1.1408, "step": 389 }, { "epoch": 0.22171688459351904, "grad_norm": 0.19921875, "learning_rate": 7.782831154064811e-06, "loss": 1.2003, "step": 390 }, { "epoch": 0.2222853894258101, "grad_norm": 0.208984375, "learning_rate": 7.7771461057419e-06, "loss": 1.1439, "step": 391 }, { "epoch": 0.2228538942581012, "grad_norm": 0.2099609375, "learning_rate": 7.771461057418988e-06, "loss": 1.2188, "step": 392 }, { "epoch": 0.22342239909039227, "grad_norm": 0.2158203125, "learning_rate": 7.765776009096077e-06, "loss": 1.1432, "step": 393 }, { "epoch": 0.22399090392268334, "grad_norm": 0.212890625, "learning_rate": 7.760090960773166e-06, "loss": 1.0917, "step": 394 }, { "epoch": 0.2245594087549744, "grad_norm": 0.2021484375, "learning_rate": 7.754405912450257e-06, "loss": 1.1582, "step": 395 }, { "epoch": 0.2251279135872655, "grad_norm": 0.2421875, "learning_rate": 7.748720864127347e-06, "loss": 1.1191, "step": 396 }, { "epoch": 0.22569641841955657, "grad_norm": 0.2373046875, "learning_rate": 7.743035815804434e-06, "loss": 1.0793, "step": 397 }, { "epoch": 0.22626492325184763, "grad_norm": 0.236328125, "learning_rate": 7.737350767481523e-06, "loss": 1.1396, "step": 398 }, { "epoch": 0.22683342808413873, "grad_norm": 0.224609375, "learning_rate": 7.731665719158613e-06, "loss": 1.0812, "step": 399 }, { "epoch": 0.2274019329164298, "grad_norm": 0.2138671875, "learning_rate": 7.725980670835704e-06, "loss": 1.1481, "step": 400 }, { "epoch": 0.22797043774872086, "grad_norm": 0.2158203125, "learning_rate": 7.720295622512793e-06, "loss": 1.149, "step": 401 }, { "epoch": 0.22853894258101193, "grad_norm": 0.208984375, "learning_rate": 7.714610574189882e-06, "loss": 1.133, "step": 402 }, { "epoch": 0.22910744741330302, "grad_norm": 0.2001953125, "learning_rate": 7.70892552586697e-06, "loss": 1.0832, "step": 403 }, { "epoch": 0.2296759522455941, "grad_norm": 0.212890625, "learning_rate": 7.703240477544059e-06, "loss": 1.0798, "step": 404 }, { "epoch": 0.23024445707788516, "grad_norm": 0.2158203125, "learning_rate": 7.697555429221148e-06, "loss": 1.2215, "step": 405 }, { "epoch": 0.23081296191017622, "grad_norm": 0.212890625, "learning_rate": 7.69187038089824e-06, "loss": 1.1342, "step": 406 }, { "epoch": 0.23138146674246732, "grad_norm": 0.21875, "learning_rate": 7.686185332575328e-06, "loss": 1.1093, "step": 407 }, { "epoch": 0.23194997157475838, "grad_norm": 0.2177734375, "learning_rate": 7.680500284252416e-06, "loss": 1.1095, "step": 408 }, { "epoch": 0.23251847640704945, "grad_norm": 0.2275390625, "learning_rate": 7.674815235929505e-06, "loss": 1.1655, "step": 409 }, { "epoch": 0.23308698123934055, "grad_norm": 0.201171875, "learning_rate": 7.669130187606594e-06, "loss": 1.1204, "step": 410 }, { "epoch": 0.2336554860716316, "grad_norm": 0.2734375, "learning_rate": 7.663445139283685e-06, "loss": 1.2089, "step": 411 }, { "epoch": 0.23422399090392268, "grad_norm": 0.20703125, "learning_rate": 7.657760090960775e-06, "loss": 1.1484, "step": 412 }, { "epoch": 0.23479249573621375, "grad_norm": 0.21484375, "learning_rate": 7.652075042637864e-06, "loss": 1.128, "step": 413 }, { "epoch": 0.23536100056850484, "grad_norm": 0.2119140625, "learning_rate": 7.646389994314952e-06, "loss": 1.1925, "step": 414 }, { "epoch": 0.2359295054007959, "grad_norm": 0.2109375, "learning_rate": 7.64070494599204e-06, "loss": 1.027, "step": 415 }, { "epoch": 0.23649801023308697, "grad_norm": 0.212890625, "learning_rate": 7.63501989766913e-06, "loss": 1.1515, "step": 416 }, { "epoch": 0.23706651506537807, "grad_norm": 0.2294921875, "learning_rate": 7.62933484934622e-06, "loss": 1.1105, "step": 417 }, { "epoch": 0.23763501989766914, "grad_norm": 0.2236328125, "learning_rate": 7.623649801023309e-06, "loss": 1.0647, "step": 418 }, { "epoch": 0.2382035247299602, "grad_norm": 0.220703125, "learning_rate": 7.617964752700399e-06, "loss": 1.1194, "step": 419 }, { "epoch": 0.23877202956225127, "grad_norm": 0.271484375, "learning_rate": 7.612279704377488e-06, "loss": 1.0689, "step": 420 }, { "epoch": 0.23934053439454236, "grad_norm": 0.2158203125, "learning_rate": 7.606594656054576e-06, "loss": 1.0929, "step": 421 }, { "epoch": 0.23990903922683343, "grad_norm": 0.2333984375, "learning_rate": 7.600909607731667e-06, "loss": 1.1521, "step": 422 }, { "epoch": 0.2404775440591245, "grad_norm": 0.2080078125, "learning_rate": 7.595224559408756e-06, "loss": 1.1146, "step": 423 }, { "epoch": 0.24104604889141557, "grad_norm": 0.2119140625, "learning_rate": 7.589539511085845e-06, "loss": 1.1578, "step": 424 }, { "epoch": 0.24161455372370666, "grad_norm": 0.208984375, "learning_rate": 7.583854462762934e-06, "loss": 1.1418, "step": 425 }, { "epoch": 0.24218305855599773, "grad_norm": 0.2080078125, "learning_rate": 7.578169414440023e-06, "loss": 1.082, "step": 426 }, { "epoch": 0.2427515633882888, "grad_norm": 0.2294921875, "learning_rate": 7.572484366117112e-06, "loss": 1.2185, "step": 427 }, { "epoch": 0.2433200682205799, "grad_norm": 0.2353515625, "learning_rate": 7.566799317794202e-06, "loss": 1.0544, "step": 428 }, { "epoch": 0.24388857305287096, "grad_norm": 0.21875, "learning_rate": 7.561114269471291e-06, "loss": 1.218, "step": 429 }, { "epoch": 0.24445707788516202, "grad_norm": 0.2119140625, "learning_rate": 7.55542922114838e-06, "loss": 1.1542, "step": 430 }, { "epoch": 0.2450255827174531, "grad_norm": 0.2119140625, "learning_rate": 7.54974417282547e-06, "loss": 1.0956, "step": 431 }, { "epoch": 0.24559408754974418, "grad_norm": 0.2158203125, "learning_rate": 7.544059124502559e-06, "loss": 1.0839, "step": 432 }, { "epoch": 0.24616259238203525, "grad_norm": 0.2119140625, "learning_rate": 7.538374076179649e-06, "loss": 1.0808, "step": 433 }, { "epoch": 0.24673109721432632, "grad_norm": 0.1982421875, "learning_rate": 7.532689027856737e-06, "loss": 1.119, "step": 434 }, { "epoch": 0.24729960204661738, "grad_norm": 0.2060546875, "learning_rate": 7.527003979533827e-06, "loss": 1.0927, "step": 435 }, { "epoch": 0.24786810687890848, "grad_norm": 0.2236328125, "learning_rate": 7.521318931210916e-06, "loss": 1.1131, "step": 436 }, { "epoch": 0.24843661171119955, "grad_norm": 0.2197265625, "learning_rate": 7.515633882888005e-06, "loss": 1.0975, "step": 437 }, { "epoch": 0.2490051165434906, "grad_norm": 0.220703125, "learning_rate": 7.5099488345650936e-06, "loss": 1.1046, "step": 438 }, { "epoch": 0.2495736213757817, "grad_norm": 0.2138671875, "learning_rate": 7.5042637862421845e-06, "loss": 1.1449, "step": 439 }, { "epoch": 0.25014212620807275, "grad_norm": 0.212890625, "learning_rate": 7.498578737919273e-06, "loss": 1.0995, "step": 440 }, { "epoch": 0.25071063104036384, "grad_norm": 0.205078125, "learning_rate": 7.492893689596362e-06, "loss": 1.0675, "step": 441 }, { "epoch": 0.25127913587265494, "grad_norm": 0.208984375, "learning_rate": 7.4872086412734514e-06, "loss": 1.1099, "step": 442 }, { "epoch": 0.251847640704946, "grad_norm": 0.2060546875, "learning_rate": 7.481523592950541e-06, "loss": 1.1243, "step": 443 }, { "epoch": 0.25241614553723707, "grad_norm": 0.2080078125, "learning_rate": 7.475838544627631e-06, "loss": 1.1101, "step": 444 }, { "epoch": 0.25298465036952816, "grad_norm": 0.228515625, "learning_rate": 7.470153496304719e-06, "loss": 1.1222, "step": 445 }, { "epoch": 0.2535531552018192, "grad_norm": 0.29296875, "learning_rate": 7.4644684479818084e-06, "loss": 1.1847, "step": 446 }, { "epoch": 0.2541216600341103, "grad_norm": 0.2119140625, "learning_rate": 7.458783399658898e-06, "loss": 1.1004, "step": 447 }, { "epoch": 0.2546901648664014, "grad_norm": 0.2197265625, "learning_rate": 7.453098351335987e-06, "loss": 1.111, "step": 448 }, { "epoch": 0.25525866969869243, "grad_norm": 0.2431640625, "learning_rate": 7.447413303013075e-06, "loss": 1.107, "step": 449 }, { "epoch": 0.2558271745309835, "grad_norm": 0.248046875, "learning_rate": 7.441728254690166e-06, "loss": 1.2364, "step": 450 }, { "epoch": 0.25639567936327456, "grad_norm": 0.208984375, "learning_rate": 7.436043206367255e-06, "loss": 1.1298, "step": 451 }, { "epoch": 0.25696418419556566, "grad_norm": 0.2197265625, "learning_rate": 7.430358158044344e-06, "loss": 1.023, "step": 452 }, { "epoch": 0.25753268902785675, "grad_norm": 0.234375, "learning_rate": 7.424673109721433e-06, "loss": 1.0834, "step": 453 }, { "epoch": 0.2581011938601478, "grad_norm": 0.2099609375, "learning_rate": 7.4189880613985225e-06, "loss": 1.149, "step": 454 }, { "epoch": 0.2586696986924389, "grad_norm": 0.205078125, "learning_rate": 7.4133030130756126e-06, "loss": 1.166, "step": 455 }, { "epoch": 0.25923820352473, "grad_norm": 0.2138671875, "learning_rate": 7.407617964752701e-06, "loss": 1.1256, "step": 456 }, { "epoch": 0.259806708357021, "grad_norm": 0.2001953125, "learning_rate": 7.40193291642979e-06, "loss": 1.0899, "step": 457 }, { "epoch": 0.2603752131893121, "grad_norm": 0.244140625, "learning_rate": 7.3962478681068795e-06, "loss": 1.1287, "step": 458 }, { "epoch": 0.2609437180216032, "grad_norm": 0.2421875, "learning_rate": 7.390562819783969e-06, "loss": 1.1587, "step": 459 }, { "epoch": 0.26151222285389425, "grad_norm": 0.2177734375, "learning_rate": 7.384877771461057e-06, "loss": 1.1118, "step": 460 }, { "epoch": 0.26208072768618534, "grad_norm": 0.22265625, "learning_rate": 7.379192723138148e-06, "loss": 1.0721, "step": 461 }, { "epoch": 0.2626492325184764, "grad_norm": 0.2197265625, "learning_rate": 7.3735076748152365e-06, "loss": 1.1308, "step": 462 }, { "epoch": 0.2632177373507675, "grad_norm": 0.2177734375, "learning_rate": 7.367822626492326e-06, "loss": 1.1149, "step": 463 }, { "epoch": 0.2637862421830586, "grad_norm": 0.271484375, "learning_rate": 7.362137578169415e-06, "loss": 1.0702, "step": 464 }, { "epoch": 0.2643547470153496, "grad_norm": 0.2216796875, "learning_rate": 7.356452529846504e-06, "loss": 1.1509, "step": 465 }, { "epoch": 0.2649232518476407, "grad_norm": 0.2099609375, "learning_rate": 7.350767481523594e-06, "loss": 1.1456, "step": 466 }, { "epoch": 0.2654917566799318, "grad_norm": 0.37109375, "learning_rate": 7.345082433200683e-06, "loss": 1.1031, "step": 467 }, { "epoch": 0.26606026151222284, "grad_norm": 0.224609375, "learning_rate": 7.339397384877772e-06, "loss": 1.1674, "step": 468 }, { "epoch": 0.26662876634451393, "grad_norm": 0.208984375, "learning_rate": 7.333712336554861e-06, "loss": 1.1629, "step": 469 }, { "epoch": 0.26719727117680503, "grad_norm": 0.216796875, "learning_rate": 7.3280272882319505e-06, "loss": 1.1706, "step": 470 }, { "epoch": 0.26776577600909607, "grad_norm": 0.283203125, "learning_rate": 7.32234223990904e-06, "loss": 1.1089, "step": 471 }, { "epoch": 0.26833428084138716, "grad_norm": 0.244140625, "learning_rate": 7.31665719158613e-06, "loss": 1.1472, "step": 472 }, { "epoch": 0.2689027856736782, "grad_norm": 0.22265625, "learning_rate": 7.310972143263218e-06, "loss": 1.1711, "step": 473 }, { "epoch": 0.2694712905059693, "grad_norm": 0.298828125, "learning_rate": 7.3052870949403075e-06, "loss": 1.0745, "step": 474 }, { "epoch": 0.2700397953382604, "grad_norm": 0.2109375, "learning_rate": 7.299602046617397e-06, "loss": 1.1278, "step": 475 }, { "epoch": 0.27060830017055143, "grad_norm": 0.2373046875, "learning_rate": 7.293916998294486e-06, "loss": 1.1025, "step": 476 }, { "epoch": 0.2711768050028425, "grad_norm": 0.22265625, "learning_rate": 7.288231949971576e-06, "loss": 1.1447, "step": 477 }, { "epoch": 0.2717453098351336, "grad_norm": 0.216796875, "learning_rate": 7.2825469016486645e-06, "loss": 1.107, "step": 478 }, { "epoch": 0.27231381466742466, "grad_norm": 0.2099609375, "learning_rate": 7.276861853325754e-06, "loss": 1.1354, "step": 479 }, { "epoch": 0.27288231949971575, "grad_norm": 0.265625, "learning_rate": 7.271176805002843e-06, "loss": 1.1092, "step": 480 }, { "epoch": 0.27345082433200685, "grad_norm": 0.216796875, "learning_rate": 7.265491756679932e-06, "loss": 1.1204, "step": 481 }, { "epoch": 0.2740193291642979, "grad_norm": 0.306640625, "learning_rate": 7.2598067083570215e-06, "loss": 1.1307, "step": 482 }, { "epoch": 0.274587833996589, "grad_norm": 0.2236328125, "learning_rate": 7.254121660034112e-06, "loss": 1.0877, "step": 483 }, { "epoch": 0.27515633882888, "grad_norm": 0.2119140625, "learning_rate": 7.2484366117112e-06, "loss": 1.1715, "step": 484 }, { "epoch": 0.2757248436611711, "grad_norm": 0.2119140625, "learning_rate": 7.242751563388289e-06, "loss": 1.0879, "step": 485 }, { "epoch": 0.2762933484934622, "grad_norm": 0.216796875, "learning_rate": 7.2370665150653786e-06, "loss": 1.1368, "step": 486 }, { "epoch": 0.27686185332575325, "grad_norm": 0.2119140625, "learning_rate": 7.231381466742468e-06, "loss": 1.0826, "step": 487 }, { "epoch": 0.27743035815804434, "grad_norm": 0.2314453125, "learning_rate": 7.225696418419558e-06, "loss": 1.1637, "step": 488 }, { "epoch": 0.27799886299033544, "grad_norm": 0.283203125, "learning_rate": 7.220011370096647e-06, "loss": 1.1416, "step": 489 }, { "epoch": 0.2785673678226265, "grad_norm": 0.216796875, "learning_rate": 7.2143263217737356e-06, "loss": 1.0634, "step": 490 }, { "epoch": 0.27913587265491757, "grad_norm": 0.224609375, "learning_rate": 7.208641273450825e-06, "loss": 1.0746, "step": 491 }, { "epoch": 0.27970437748720867, "grad_norm": 0.2119140625, "learning_rate": 7.202956225127914e-06, "loss": 1.1288, "step": 492 }, { "epoch": 0.2802728823194997, "grad_norm": 0.2109375, "learning_rate": 7.197271176805003e-06, "loss": 1.1281, "step": 493 }, { "epoch": 0.2808413871517908, "grad_norm": 0.208984375, "learning_rate": 7.1915861284820934e-06, "loss": 1.0536, "step": 494 }, { "epoch": 0.28140989198408184, "grad_norm": 0.2099609375, "learning_rate": 7.185901080159182e-06, "loss": 1.1439, "step": 495 }, { "epoch": 0.28197839681637293, "grad_norm": 0.2158203125, "learning_rate": 7.180216031836271e-06, "loss": 1.0977, "step": 496 }, { "epoch": 0.28254690164866403, "grad_norm": 0.220703125, "learning_rate": 7.17453098351336e-06, "loss": 1.1858, "step": 497 }, { "epoch": 0.28311540648095507, "grad_norm": 0.228515625, "learning_rate": 7.16884593519045e-06, "loss": 1.1309, "step": 498 }, { "epoch": 0.28368391131324616, "grad_norm": 0.244140625, "learning_rate": 7.163160886867538e-06, "loss": 1.0854, "step": 499 }, { "epoch": 0.28425241614553726, "grad_norm": 0.2197265625, "learning_rate": 7.157475838544629e-06, "loss": 1.1058, "step": 500 }, { "epoch": 0.2848209209778283, "grad_norm": 0.23046875, "learning_rate": 7.151790790221717e-06, "loss": 1.1219, "step": 501 }, { "epoch": 0.2853894258101194, "grad_norm": 0.2275390625, "learning_rate": 7.146105741898807e-06, "loss": 1.1093, "step": 502 }, { "epoch": 0.2859579306424105, "grad_norm": 0.2197265625, "learning_rate": 7.140420693575896e-06, "loss": 1.0696, "step": 503 }, { "epoch": 0.2865264354747015, "grad_norm": 0.2236328125, "learning_rate": 7.134735645252985e-06, "loss": 1.1791, "step": 504 }, { "epoch": 0.2870949403069926, "grad_norm": 0.2138671875, "learning_rate": 7.129050596930075e-06, "loss": 1.0811, "step": 505 }, { "epoch": 0.28766344513928366, "grad_norm": 0.220703125, "learning_rate": 7.123365548607164e-06, "loss": 1.1304, "step": 506 }, { "epoch": 0.28823194997157475, "grad_norm": 0.2197265625, "learning_rate": 7.117680500284253e-06, "loss": 1.1408, "step": 507 }, { "epoch": 0.28880045480386585, "grad_norm": 0.2158203125, "learning_rate": 7.111995451961342e-06, "loss": 1.1733, "step": 508 }, { "epoch": 0.2893689596361569, "grad_norm": 0.21875, "learning_rate": 7.106310403638431e-06, "loss": 1.1068, "step": 509 }, { "epoch": 0.289937464468448, "grad_norm": 0.220703125, "learning_rate": 7.100625355315521e-06, "loss": 1.1177, "step": 510 }, { "epoch": 0.2905059693007391, "grad_norm": 0.21484375, "learning_rate": 7.094940306992611e-06, "loss": 1.0476, "step": 511 }, { "epoch": 0.2910744741330301, "grad_norm": 0.2216796875, "learning_rate": 7.089255258669699e-06, "loss": 1.0884, "step": 512 }, { "epoch": 0.2916429789653212, "grad_norm": 0.2265625, "learning_rate": 7.083570210346788e-06, "loss": 1.0742, "step": 513 }, { "epoch": 0.2922114837976123, "grad_norm": 0.220703125, "learning_rate": 7.077885162023878e-06, "loss": 1.1325, "step": 514 }, { "epoch": 0.29277998862990334, "grad_norm": 0.2158203125, "learning_rate": 7.072200113700967e-06, "loss": 1.104, "step": 515 }, { "epoch": 0.29334849346219444, "grad_norm": 0.2216796875, "learning_rate": 7.066515065378057e-06, "loss": 1.1313, "step": 516 }, { "epoch": 0.2939169982944855, "grad_norm": 0.208984375, "learning_rate": 7.060830017055145e-06, "loss": 1.1029, "step": 517 }, { "epoch": 0.29448550312677657, "grad_norm": 0.216796875, "learning_rate": 7.055144968732235e-06, "loss": 1.173, "step": 518 }, { "epoch": 0.29505400795906767, "grad_norm": 0.23828125, "learning_rate": 7.049459920409324e-06, "loss": 1.1265, "step": 519 }, { "epoch": 0.2956225127913587, "grad_norm": 0.228515625, "learning_rate": 7.043774872086413e-06, "loss": 1.1496, "step": 520 }, { "epoch": 0.2961910176236498, "grad_norm": 0.2578125, "learning_rate": 7.038089823763502e-06, "loss": 1.1321, "step": 521 }, { "epoch": 0.2967595224559409, "grad_norm": 0.265625, "learning_rate": 7.0324047754405925e-06, "loss": 1.1708, "step": 522 }, { "epoch": 0.29732802728823193, "grad_norm": 0.23828125, "learning_rate": 7.026719727117681e-06, "loss": 1.0465, "step": 523 }, { "epoch": 0.297896532120523, "grad_norm": 0.2177734375, "learning_rate": 7.02103467879477e-06, "loss": 1.0828, "step": 524 }, { "epoch": 0.2984650369528141, "grad_norm": 0.2216796875, "learning_rate": 7.015349630471859e-06, "loss": 1.1382, "step": 525 }, { "epoch": 0.29903354178510516, "grad_norm": 0.2294921875, "learning_rate": 7.009664582148949e-06, "loss": 1.0805, "step": 526 }, { "epoch": 0.29960204661739626, "grad_norm": 0.23046875, "learning_rate": 7.003979533826039e-06, "loss": 1.0985, "step": 527 }, { "epoch": 0.3001705514496873, "grad_norm": 0.2275390625, "learning_rate": 6.998294485503128e-06, "loss": 1.1077, "step": 528 }, { "epoch": 0.3007390562819784, "grad_norm": 0.2353515625, "learning_rate": 6.9926094371802164e-06, "loss": 1.14, "step": 529 }, { "epoch": 0.3013075611142695, "grad_norm": 0.224609375, "learning_rate": 6.986924388857306e-06, "loss": 1.1161, "step": 530 }, { "epoch": 0.3018760659465605, "grad_norm": 0.240234375, "learning_rate": 6.981239340534395e-06, "loss": 1.05, "step": 531 }, { "epoch": 0.3024445707788516, "grad_norm": 0.2255859375, "learning_rate": 6.975554292211484e-06, "loss": 1.128, "step": 532 }, { "epoch": 0.3030130756111427, "grad_norm": 0.2353515625, "learning_rate": 6.969869243888574e-06, "loss": 1.204, "step": 533 }, { "epoch": 0.30358158044343375, "grad_norm": 0.23046875, "learning_rate": 6.964184195565663e-06, "loss": 1.1354, "step": 534 }, { "epoch": 0.30415008527572485, "grad_norm": 0.267578125, "learning_rate": 6.958499147242752e-06, "loss": 1.1027, "step": 535 }, { "epoch": 0.30471859010801594, "grad_norm": 0.2158203125, "learning_rate": 6.952814098919841e-06, "loss": 1.0946, "step": 536 }, { "epoch": 0.305287094940307, "grad_norm": 0.2373046875, "learning_rate": 6.9471290505969305e-06, "loss": 1.0781, "step": 537 }, { "epoch": 0.3058555997725981, "grad_norm": 0.21875, "learning_rate": 6.9414440022740206e-06, "loss": 1.1938, "step": 538 }, { "epoch": 0.30642410460488917, "grad_norm": 0.2314453125, "learning_rate": 6.93575895395111e-06, "loss": 1.0309, "step": 539 }, { "epoch": 0.3069926094371802, "grad_norm": 0.2119140625, "learning_rate": 6.930073905628198e-06, "loss": 1.1307, "step": 540 }, { "epoch": 0.3075611142694713, "grad_norm": 0.234375, "learning_rate": 6.9243888573052875e-06, "loss": 1.1286, "step": 541 }, { "epoch": 0.30812961910176234, "grad_norm": 0.2255859375, "learning_rate": 6.918703808982377e-06, "loss": 1.059, "step": 542 }, { "epoch": 0.30869812393405344, "grad_norm": 0.2314453125, "learning_rate": 6.913018760659466e-06, "loss": 1.1868, "step": 543 }, { "epoch": 0.30926662876634453, "grad_norm": 0.2890625, "learning_rate": 6.907333712336556e-06, "loss": 1.105, "step": 544 }, { "epoch": 0.30983513359863557, "grad_norm": 0.2158203125, "learning_rate": 6.9016486640136445e-06, "loss": 1.1077, "step": 545 }, { "epoch": 0.31040363843092666, "grad_norm": 0.21484375, "learning_rate": 6.895963615690734e-06, "loss": 1.1621, "step": 546 }, { "epoch": 0.31097214326321776, "grad_norm": 0.216796875, "learning_rate": 6.890278567367823e-06, "loss": 1.1266, "step": 547 }, { "epoch": 0.3115406480955088, "grad_norm": 0.2158203125, "learning_rate": 6.884593519044912e-06, "loss": 1.1881, "step": 548 }, { "epoch": 0.3121091529277999, "grad_norm": 0.216796875, "learning_rate": 6.878908470722002e-06, "loss": 1.1163, "step": 549 }, { "epoch": 0.312677657760091, "grad_norm": 0.2265625, "learning_rate": 6.873223422399092e-06, "loss": 1.1578, "step": 550 }, { "epoch": 0.313246162592382, "grad_norm": 0.35546875, "learning_rate": 6.86753837407618e-06, "loss": 1.1147, "step": 551 }, { "epoch": 0.3138146674246731, "grad_norm": 0.2275390625, "learning_rate": 6.861853325753269e-06, "loss": 1.0557, "step": 552 }, { "epoch": 0.31438317225696416, "grad_norm": 0.228515625, "learning_rate": 6.8561682774303585e-06, "loss": 1.1514, "step": 553 }, { "epoch": 0.31495167708925526, "grad_norm": 0.22265625, "learning_rate": 6.850483229107448e-06, "loss": 1.0513, "step": 554 }, { "epoch": 0.31552018192154635, "grad_norm": 0.2314453125, "learning_rate": 6.844798180784538e-06, "loss": 1.1182, "step": 555 }, { "epoch": 0.3160886867538374, "grad_norm": 0.2294921875, "learning_rate": 6.839113132461626e-06, "loss": 1.087, "step": 556 }, { "epoch": 0.3166571915861285, "grad_norm": 0.2373046875, "learning_rate": 6.8334280841387155e-06, "loss": 1.152, "step": 557 }, { "epoch": 0.3172256964184196, "grad_norm": 0.232421875, "learning_rate": 6.827743035815805e-06, "loss": 1.0821, "step": 558 }, { "epoch": 0.3177942012507106, "grad_norm": 0.294921875, "learning_rate": 6.822057987492894e-06, "loss": 1.1429, "step": 559 }, { "epoch": 0.3183627060830017, "grad_norm": 0.244140625, "learning_rate": 6.816372939169984e-06, "loss": 1.0829, "step": 560 }, { "epoch": 0.3189312109152928, "grad_norm": 0.2412109375, "learning_rate": 6.810687890847073e-06, "loss": 1.052, "step": 561 }, { "epoch": 0.31949971574758385, "grad_norm": 0.2216796875, "learning_rate": 6.805002842524162e-06, "loss": 1.0661, "step": 562 }, { "epoch": 0.32006822057987494, "grad_norm": 0.2275390625, "learning_rate": 6.799317794201251e-06, "loss": 1.0865, "step": 563 }, { "epoch": 0.320636725412166, "grad_norm": 0.2109375, "learning_rate": 6.79363274587834e-06, "loss": 1.111, "step": 564 }, { "epoch": 0.3212052302444571, "grad_norm": 0.2236328125, "learning_rate": 6.7879476975554295e-06, "loss": 1.0947, "step": 565 }, { "epoch": 0.32177373507674817, "grad_norm": 0.2333984375, "learning_rate": 6.78226264923252e-06, "loss": 1.0984, "step": 566 }, { "epoch": 0.3223422399090392, "grad_norm": 0.2236328125, "learning_rate": 6.776577600909609e-06, "loss": 1.1371, "step": 567 }, { "epoch": 0.3229107447413303, "grad_norm": 0.2412109375, "learning_rate": 6.770892552586697e-06, "loss": 1.1053, "step": 568 }, { "epoch": 0.3234792495736214, "grad_norm": 0.2197265625, "learning_rate": 6.7652075042637865e-06, "loss": 1.0954, "step": 569 }, { "epoch": 0.32404775440591244, "grad_norm": 0.2294921875, "learning_rate": 6.759522455940876e-06, "loss": 1.0896, "step": 570 }, { "epoch": 0.32461625923820353, "grad_norm": 0.2119140625, "learning_rate": 6.753837407617966e-06, "loss": 1.067, "step": 571 }, { "epoch": 0.3251847640704946, "grad_norm": 0.2333984375, "learning_rate": 6.748152359295055e-06, "loss": 1.0648, "step": 572 }, { "epoch": 0.32575326890278566, "grad_norm": 0.2314453125, "learning_rate": 6.7424673109721436e-06, "loss": 1.1034, "step": 573 }, { "epoch": 0.32632177373507676, "grad_norm": 0.220703125, "learning_rate": 6.736782262649233e-06, "loss": 1.1768, "step": 574 }, { "epoch": 0.3268902785673678, "grad_norm": 0.2333984375, "learning_rate": 6.731097214326322e-06, "loss": 1.0923, "step": 575 }, { "epoch": 0.3274587833996589, "grad_norm": 0.224609375, "learning_rate": 6.725412166003411e-06, "loss": 1.2017, "step": 576 }, { "epoch": 0.32802728823195, "grad_norm": 0.21484375, "learning_rate": 6.719727117680501e-06, "loss": 1.1276, "step": 577 }, { "epoch": 0.328595793064241, "grad_norm": 0.232421875, "learning_rate": 6.714042069357591e-06, "loss": 1.1592, "step": 578 }, { "epoch": 0.3291642978965321, "grad_norm": 0.22265625, "learning_rate": 6.708357021034679e-06, "loss": 1.0625, "step": 579 }, { "epoch": 0.3297328027288232, "grad_norm": 0.236328125, "learning_rate": 6.702671972711768e-06, "loss": 1.0928, "step": 580 }, { "epoch": 0.33030130756111425, "grad_norm": 0.431640625, "learning_rate": 6.696986924388858e-06, "loss": 1.1381, "step": 581 }, { "epoch": 0.33086981239340535, "grad_norm": 0.21484375, "learning_rate": 6.691301876065948e-06, "loss": 1.1218, "step": 582 }, { "epoch": 0.33143831722569644, "grad_norm": 0.216796875, "learning_rate": 6.685616827743037e-06, "loss": 1.191, "step": 583 }, { "epoch": 0.3320068220579875, "grad_norm": 0.2236328125, "learning_rate": 6.679931779420125e-06, "loss": 1.1707, "step": 584 }, { "epoch": 0.3325753268902786, "grad_norm": 0.220703125, "learning_rate": 6.674246731097215e-06, "loss": 1.1147, "step": 585 }, { "epoch": 0.3331438317225696, "grad_norm": 0.2236328125, "learning_rate": 6.668561682774304e-06, "loss": 1.0718, "step": 586 }, { "epoch": 0.3337123365548607, "grad_norm": 0.220703125, "learning_rate": 6.662876634451393e-06, "loss": 1.1012, "step": 587 }, { "epoch": 0.3342808413871518, "grad_norm": 0.2138671875, "learning_rate": 6.657191586128483e-06, "loss": 1.1303, "step": 588 }, { "epoch": 0.33484934621944284, "grad_norm": 0.259765625, "learning_rate": 6.6515065378055725e-06, "loss": 1.0902, "step": 589 }, { "epoch": 0.33541785105173394, "grad_norm": 0.2177734375, "learning_rate": 6.645821489482661e-06, "loss": 1.0581, "step": 590 }, { "epoch": 0.33598635588402503, "grad_norm": 0.2333984375, "learning_rate": 6.64013644115975e-06, "loss": 1.0748, "step": 591 }, { "epoch": 0.3365548607163161, "grad_norm": 0.2158203125, "learning_rate": 6.634451392836839e-06, "loss": 1.091, "step": 592 }, { "epoch": 0.33712336554860717, "grad_norm": 0.224609375, "learning_rate": 6.6287663445139295e-06, "loss": 1.1355, "step": 593 }, { "epoch": 0.33769187038089826, "grad_norm": 0.23046875, "learning_rate": 6.623081296191019e-06, "loss": 1.0849, "step": 594 }, { "epoch": 0.3382603752131893, "grad_norm": 0.2421875, "learning_rate": 6.617396247868107e-06, "loss": 1.1165, "step": 595 }, { "epoch": 0.3388288800454804, "grad_norm": 0.2294921875, "learning_rate": 6.611711199545196e-06, "loss": 1.1427, "step": 596 }, { "epoch": 0.33939738487777144, "grad_norm": 0.2412109375, "learning_rate": 6.606026151222286e-06, "loss": 1.1559, "step": 597 }, { "epoch": 0.33996588971006253, "grad_norm": 0.2197265625, "learning_rate": 6.600341102899375e-06, "loss": 1.098, "step": 598 }, { "epoch": 0.3405343945423536, "grad_norm": 0.2197265625, "learning_rate": 6.594656054576465e-06, "loss": 1.1276, "step": 599 }, { "epoch": 0.34110289937464466, "grad_norm": 0.2314453125, "learning_rate": 6.588971006253554e-06, "loss": 1.03, "step": 600 }, { "epoch": 0.34167140420693576, "grad_norm": 0.251953125, "learning_rate": 6.583285957930643e-06, "loss": 1.1247, "step": 601 }, { "epoch": 0.34223990903922685, "grad_norm": 0.232421875, "learning_rate": 6.577600909607732e-06, "loss": 1.1662, "step": 602 }, { "epoch": 0.3428084138715179, "grad_norm": 0.2373046875, "learning_rate": 6.571915861284821e-06, "loss": 1.0399, "step": 603 }, { "epoch": 0.343376918703809, "grad_norm": 0.2275390625, "learning_rate": 6.566230812961911e-06, "loss": 1.0758, "step": 604 }, { "epoch": 0.3439454235361001, "grad_norm": 0.21484375, "learning_rate": 6.5605457646390005e-06, "loss": 1.0467, "step": 605 }, { "epoch": 0.3445139283683911, "grad_norm": 0.2197265625, "learning_rate": 6.55486071631609e-06, "loss": 1.0796, "step": 606 }, { "epoch": 0.3450824332006822, "grad_norm": 0.2236328125, "learning_rate": 6.549175667993178e-06, "loss": 1.0402, "step": 607 }, { "epoch": 0.34565093803297325, "grad_norm": 0.23828125, "learning_rate": 6.543490619670267e-06, "loss": 1.1163, "step": 608 }, { "epoch": 0.34621944286526435, "grad_norm": 0.22265625, "learning_rate": 6.537805571347357e-06, "loss": 1.0999, "step": 609 }, { "epoch": 0.34678794769755544, "grad_norm": 0.34765625, "learning_rate": 6.532120523024447e-06, "loss": 1.1704, "step": 610 }, { "epoch": 0.3473564525298465, "grad_norm": 0.2333984375, "learning_rate": 6.526435474701536e-06, "loss": 1.1954, "step": 611 }, { "epoch": 0.3479249573621376, "grad_norm": 0.224609375, "learning_rate": 6.520750426378624e-06, "loss": 1.1541, "step": 612 }, { "epoch": 0.34849346219442867, "grad_norm": 0.21484375, "learning_rate": 6.515065378055714e-06, "loss": 1.1508, "step": 613 }, { "epoch": 0.3490619670267197, "grad_norm": 0.2265625, "learning_rate": 6.509380329732803e-06, "loss": 1.0732, "step": 614 }, { "epoch": 0.3496304718590108, "grad_norm": 0.236328125, "learning_rate": 6.503695281409893e-06, "loss": 1.033, "step": 615 }, { "epoch": 0.3501989766913019, "grad_norm": 0.2392578125, "learning_rate": 6.498010233086982e-06, "loss": 1.1519, "step": 616 }, { "epoch": 0.35076748152359294, "grad_norm": 0.228515625, "learning_rate": 6.4923251847640715e-06, "loss": 1.1355, "step": 617 }, { "epoch": 0.35133598635588403, "grad_norm": 0.2177734375, "learning_rate": 6.48664013644116e-06, "loss": 1.0783, "step": 618 }, { "epoch": 0.3519044911881751, "grad_norm": 0.2333984375, "learning_rate": 6.480955088118249e-06, "loss": 1.1574, "step": 619 }, { "epoch": 0.35247299602046617, "grad_norm": 0.228515625, "learning_rate": 6.4752700397953384e-06, "loss": 1.063, "step": 620 }, { "epoch": 0.35304150085275726, "grad_norm": 0.2197265625, "learning_rate": 6.4695849914724285e-06, "loss": 1.0674, "step": 621 }, { "epoch": 0.3536100056850483, "grad_norm": 0.23046875, "learning_rate": 6.463899943149518e-06, "loss": 1.0593, "step": 622 }, { "epoch": 0.3541785105173394, "grad_norm": 0.2412109375, "learning_rate": 6.458214894826606e-06, "loss": 1.0881, "step": 623 }, { "epoch": 0.3547470153496305, "grad_norm": 0.2265625, "learning_rate": 6.4525298465036955e-06, "loss": 1.0741, "step": 624 }, { "epoch": 0.35531552018192153, "grad_norm": 0.216796875, "learning_rate": 6.446844798180785e-06, "loss": 0.9801, "step": 625 }, { "epoch": 0.3558840250142126, "grad_norm": 0.236328125, "learning_rate": 6.441159749857875e-06, "loss": 1.1376, "step": 626 }, { "epoch": 0.3564525298465037, "grad_norm": 0.267578125, "learning_rate": 6.435474701534964e-06, "loss": 1.1309, "step": 627 }, { "epoch": 0.35702103467879476, "grad_norm": 0.2294921875, "learning_rate": 6.429789653212053e-06, "loss": 1.0535, "step": 628 }, { "epoch": 0.35758953951108585, "grad_norm": 0.220703125, "learning_rate": 6.424104604889142e-06, "loss": 1.1213, "step": 629 }, { "epoch": 0.3581580443433769, "grad_norm": 0.22265625, "learning_rate": 6.418419556566231e-06, "loss": 1.0661, "step": 630 }, { "epoch": 0.358726549175668, "grad_norm": 0.283203125, "learning_rate": 6.41273450824332e-06, "loss": 1.1388, "step": 631 }, { "epoch": 0.3592950540079591, "grad_norm": 0.234375, "learning_rate": 6.40704945992041e-06, "loss": 1.1756, "step": 632 }, { "epoch": 0.3598635588402501, "grad_norm": 0.220703125, "learning_rate": 6.4013644115975e-06, "loss": 1.0707, "step": 633 }, { "epoch": 0.3604320636725412, "grad_norm": 0.2255859375, "learning_rate": 6.395679363274588e-06, "loss": 1.0211, "step": 634 }, { "epoch": 0.3610005685048323, "grad_norm": 0.2314453125, "learning_rate": 6.389994314951677e-06, "loss": 1.0265, "step": 635 }, { "epoch": 0.36156907333712335, "grad_norm": 0.2265625, "learning_rate": 6.3843092666287665e-06, "loss": 1.0598, "step": 636 }, { "epoch": 0.36213757816941444, "grad_norm": 0.2412109375, "learning_rate": 6.378624218305857e-06, "loss": 1.0755, "step": 637 }, { "epoch": 0.36270608300170554, "grad_norm": 0.232421875, "learning_rate": 6.372939169982946e-06, "loss": 1.1002, "step": 638 }, { "epoch": 0.3632745878339966, "grad_norm": 0.234375, "learning_rate": 6.367254121660035e-06, "loss": 1.0487, "step": 639 }, { "epoch": 0.36384309266628767, "grad_norm": 0.2412109375, "learning_rate": 6.3615690733371235e-06, "loss": 1.1121, "step": 640 }, { "epoch": 0.36441159749857877, "grad_norm": 0.3359375, "learning_rate": 6.355884025014213e-06, "loss": 1.0855, "step": 641 }, { "epoch": 0.3649801023308698, "grad_norm": 0.23828125, "learning_rate": 6.350198976691302e-06, "loss": 1.1012, "step": 642 }, { "epoch": 0.3655486071631609, "grad_norm": 0.2333984375, "learning_rate": 6.344513928368392e-06, "loss": 1.0729, "step": 643 }, { "epoch": 0.36611711199545194, "grad_norm": 0.234375, "learning_rate": 6.338828880045481e-06, "loss": 1.1046, "step": 644 }, { "epoch": 0.36668561682774303, "grad_norm": 0.248046875, "learning_rate": 6.333143831722571e-06, "loss": 1.0248, "step": 645 }, { "epoch": 0.3672541216600341, "grad_norm": 0.25, "learning_rate": 6.327458783399659e-06, "loss": 1.1227, "step": 646 }, { "epoch": 0.36782262649232517, "grad_norm": 0.24609375, "learning_rate": 6.321773735076748e-06, "loss": 1.1181, "step": 647 }, { "epoch": 0.36839113132461626, "grad_norm": 0.22265625, "learning_rate": 6.316088686753838e-06, "loss": 1.1041, "step": 648 }, { "epoch": 0.36895963615690736, "grad_norm": 0.2353515625, "learning_rate": 6.310403638430928e-06, "loss": 0.9795, "step": 649 }, { "epoch": 0.3695281409891984, "grad_norm": 0.2197265625, "learning_rate": 6.304718590108017e-06, "loss": 1.0298, "step": 650 }, { "epoch": 0.3700966458214895, "grad_norm": 0.23828125, "learning_rate": 6.299033541785105e-06, "loss": 1.1293, "step": 651 }, { "epoch": 0.3706651506537806, "grad_norm": 0.29296875, "learning_rate": 6.2933484934621945e-06, "loss": 1.1538, "step": 652 }, { "epoch": 0.3712336554860716, "grad_norm": 0.28515625, "learning_rate": 6.287663445139284e-06, "loss": 1.0943, "step": 653 }, { "epoch": 0.3718021603183627, "grad_norm": 0.236328125, "learning_rate": 6.281978396816374e-06, "loss": 1.0766, "step": 654 }, { "epoch": 0.37237066515065376, "grad_norm": 0.2275390625, "learning_rate": 6.276293348493463e-06, "loss": 1.0232, "step": 655 }, { "epoch": 0.37293916998294485, "grad_norm": 0.21484375, "learning_rate": 6.270608300170552e-06, "loss": 1.0853, "step": 656 }, { "epoch": 0.37350767481523595, "grad_norm": 0.2255859375, "learning_rate": 6.264923251847641e-06, "loss": 1.1089, "step": 657 }, { "epoch": 0.374076179647527, "grad_norm": 0.2333984375, "learning_rate": 6.25923820352473e-06, "loss": 1.05, "step": 658 }, { "epoch": 0.3746446844798181, "grad_norm": 0.2177734375, "learning_rate": 6.25355315520182e-06, "loss": 1.1287, "step": 659 }, { "epoch": 0.3752131893121092, "grad_norm": 0.2353515625, "learning_rate": 6.247868106878909e-06, "loss": 1.0565, "step": 660 }, { "epoch": 0.3757816941444002, "grad_norm": 0.2314453125, "learning_rate": 6.242183058555999e-06, "loss": 1.1237, "step": 661 }, { "epoch": 0.3763501989766913, "grad_norm": 0.240234375, "learning_rate": 6.236498010233087e-06, "loss": 1.1189, "step": 662 }, { "epoch": 0.3769187038089824, "grad_norm": 0.228515625, "learning_rate": 6.230812961910176e-06, "loss": 1.0793, "step": 663 }, { "epoch": 0.37748720864127344, "grad_norm": 0.2265625, "learning_rate": 6.2251279135872656e-06, "loss": 1.0785, "step": 664 }, { "epoch": 0.37805571347356454, "grad_norm": 0.2333984375, "learning_rate": 6.219442865264356e-06, "loss": 1.0965, "step": 665 }, { "epoch": 0.3786242183058556, "grad_norm": 0.220703125, "learning_rate": 6.213757816941445e-06, "loss": 1.062, "step": 666 }, { "epoch": 0.37919272313814667, "grad_norm": 0.236328125, "learning_rate": 6.208072768618534e-06, "loss": 1.1091, "step": 667 }, { "epoch": 0.37976122797043776, "grad_norm": 0.22265625, "learning_rate": 6.202387720295623e-06, "loss": 1.1464, "step": 668 }, { "epoch": 0.3803297328027288, "grad_norm": 0.2353515625, "learning_rate": 6.196702671972712e-06, "loss": 1.0689, "step": 669 }, { "epoch": 0.3808982376350199, "grad_norm": 0.283203125, "learning_rate": 6.191017623649802e-06, "loss": 1.1, "step": 670 }, { "epoch": 0.381466742467311, "grad_norm": 0.2431640625, "learning_rate": 6.185332575326891e-06, "loss": 1.0502, "step": 671 }, { "epoch": 0.38203524729960203, "grad_norm": 0.34375, "learning_rate": 6.1796475270039804e-06, "loss": 1.0808, "step": 672 }, { "epoch": 0.3826037521318931, "grad_norm": 0.2353515625, "learning_rate": 6.173962478681069e-06, "loss": 1.0786, "step": 673 }, { "epoch": 0.3831722569641842, "grad_norm": 0.2470703125, "learning_rate": 6.168277430358158e-06, "loss": 1.0606, "step": 674 }, { "epoch": 0.38374076179647526, "grad_norm": 0.234375, "learning_rate": 6.162592382035247e-06, "loss": 1.0859, "step": 675 }, { "epoch": 0.38430926662876636, "grad_norm": 0.244140625, "learning_rate": 6.1569073337123375e-06, "loss": 1.1471, "step": 676 }, { "epoch": 0.3848777714610574, "grad_norm": 0.2353515625, "learning_rate": 6.151222285389427e-06, "loss": 1.0965, "step": 677 }, { "epoch": 0.3854462762933485, "grad_norm": 0.2314453125, "learning_rate": 6.145537237066516e-06, "loss": 1.104, "step": 678 }, { "epoch": 0.3860147811256396, "grad_norm": 0.267578125, "learning_rate": 6.139852188743604e-06, "loss": 1.1523, "step": 679 }, { "epoch": 0.3865832859579306, "grad_norm": 0.2490234375, "learning_rate": 6.134167140420694e-06, "loss": 1.0549, "step": 680 }, { "epoch": 0.3871517907902217, "grad_norm": 0.2373046875, "learning_rate": 6.128482092097784e-06, "loss": 1.1288, "step": 681 }, { "epoch": 0.3877202956225128, "grad_norm": 0.22265625, "learning_rate": 6.122797043774873e-06, "loss": 1.1435, "step": 682 }, { "epoch": 0.38828880045480385, "grad_norm": 0.2353515625, "learning_rate": 6.117111995451962e-06, "loss": 1.0975, "step": 683 }, { "epoch": 0.38885730528709495, "grad_norm": 0.23828125, "learning_rate": 6.111426947129051e-06, "loss": 1.0183, "step": 684 }, { "epoch": 0.38942581011938604, "grad_norm": 0.2412109375, "learning_rate": 6.10574189880614e-06, "loss": 1.0384, "step": 685 }, { "epoch": 0.3899943149516771, "grad_norm": 0.251953125, "learning_rate": 6.100056850483229e-06, "loss": 1.0586, "step": 686 }, { "epoch": 0.3905628197839682, "grad_norm": 0.2314453125, "learning_rate": 6.094371802160319e-06, "loss": 1.0952, "step": 687 }, { "epoch": 0.3911313246162592, "grad_norm": 0.2412109375, "learning_rate": 6.0886867538374085e-06, "loss": 1.0493, "step": 688 }, { "epoch": 0.3916998294485503, "grad_norm": 0.2353515625, "learning_rate": 6.083001705514498e-06, "loss": 1.0618, "step": 689 }, { "epoch": 0.3922683342808414, "grad_norm": 0.232421875, "learning_rate": 6.077316657191586e-06, "loss": 1.0441, "step": 690 }, { "epoch": 0.39283683911313244, "grad_norm": 0.2294921875, "learning_rate": 6.071631608868675e-06, "loss": 1.1149, "step": 691 }, { "epoch": 0.39340534394542354, "grad_norm": 0.2314453125, "learning_rate": 6.065946560545765e-06, "loss": 1.1097, "step": 692 }, { "epoch": 0.39397384877771463, "grad_norm": 0.220703125, "learning_rate": 6.060261512222855e-06, "loss": 0.9976, "step": 693 }, { "epoch": 0.39454235361000567, "grad_norm": 0.2333984375, "learning_rate": 6.054576463899944e-06, "loss": 1.1257, "step": 694 }, { "epoch": 0.39511085844229676, "grad_norm": 0.26953125, "learning_rate": 6.048891415577033e-06, "loss": 1.1024, "step": 695 }, { "epoch": 0.39567936327458786, "grad_norm": 0.228515625, "learning_rate": 6.043206367254122e-06, "loss": 1.1152, "step": 696 }, { "epoch": 0.3962478681068789, "grad_norm": 0.2314453125, "learning_rate": 6.037521318931211e-06, "loss": 1.1064, "step": 697 }, { "epoch": 0.39681637293917, "grad_norm": 0.2353515625, "learning_rate": 6.031836270608301e-06, "loss": 1.0834, "step": 698 }, { "epoch": 0.39738487777146103, "grad_norm": 0.271484375, "learning_rate": 6.02615122228539e-06, "loss": 1.0854, "step": 699 }, { "epoch": 0.3979533826037521, "grad_norm": 0.27734375, "learning_rate": 6.0204661739624795e-06, "loss": 1.1025, "step": 700 }, { "epoch": 0.3985218874360432, "grad_norm": 0.2734375, "learning_rate": 6.014781125639568e-06, "loss": 1.1938, "step": 701 }, { "epoch": 0.39909039226833426, "grad_norm": 0.236328125, "learning_rate": 6.009096077316657e-06, "loss": 1.0712, "step": 702 }, { "epoch": 0.39965889710062535, "grad_norm": 0.2392578125, "learning_rate": 6.0034110289937464e-06, "loss": 1.0831, "step": 703 }, { "epoch": 0.40022740193291645, "grad_norm": 0.248046875, "learning_rate": 5.9977259806708365e-06, "loss": 1.1409, "step": 704 }, { "epoch": 0.4007959067652075, "grad_norm": 0.2353515625, "learning_rate": 5.992040932347926e-06, "loss": 1.0605, "step": 705 }, { "epoch": 0.4013644115974986, "grad_norm": 0.2275390625, "learning_rate": 5.986355884025015e-06, "loss": 1.1285, "step": 706 }, { "epoch": 0.4019329164297897, "grad_norm": 0.265625, "learning_rate": 5.9806708357021034e-06, "loss": 1.1196, "step": 707 }, { "epoch": 0.4025014212620807, "grad_norm": 0.2421875, "learning_rate": 5.974985787379193e-06, "loss": 1.1072, "step": 708 }, { "epoch": 0.4030699260943718, "grad_norm": 0.232421875, "learning_rate": 5.969300739056283e-06, "loss": 1.1151, "step": 709 }, { "epoch": 0.40363843092666285, "grad_norm": 4.96875, "learning_rate": 5.963615690733372e-06, "loss": 1.0464, "step": 710 }, { "epoch": 0.40420693575895394, "grad_norm": 0.28515625, "learning_rate": 5.957930642410461e-06, "loss": 1.1116, "step": 711 }, { "epoch": 0.40477544059124504, "grad_norm": 0.265625, "learning_rate": 5.95224559408755e-06, "loss": 1.1081, "step": 712 }, { "epoch": 0.4053439454235361, "grad_norm": 0.232421875, "learning_rate": 5.946560545764639e-06, "loss": 1.001, "step": 713 }, { "epoch": 0.4059124502558272, "grad_norm": 0.234375, "learning_rate": 5.940875497441728e-06, "loss": 1.0497, "step": 714 }, { "epoch": 0.40648095508811827, "grad_norm": 0.2255859375, "learning_rate": 5.935190449118818e-06, "loss": 1.1287, "step": 715 }, { "epoch": 0.4070494599204093, "grad_norm": 0.248046875, "learning_rate": 5.9295054007959076e-06, "loss": 1.1259, "step": 716 }, { "epoch": 0.4076179647527004, "grad_norm": 0.2451171875, "learning_rate": 5.923820352472997e-06, "loss": 1.0481, "step": 717 }, { "epoch": 0.4081864695849915, "grad_norm": 0.2373046875, "learning_rate": 5.918135304150085e-06, "loss": 1.1, "step": 718 }, { "epoch": 0.40875497441728254, "grad_norm": 0.248046875, "learning_rate": 5.9124502558271745e-06, "loss": 1.0421, "step": 719 }, { "epoch": 0.40932347924957363, "grad_norm": 0.236328125, "learning_rate": 5.906765207504265e-06, "loss": 1.1492, "step": 720 }, { "epoch": 0.40989198408186467, "grad_norm": 0.302734375, "learning_rate": 5.901080159181354e-06, "loss": 0.9986, "step": 721 }, { "epoch": 0.41046048891415576, "grad_norm": 0.2197265625, "learning_rate": 5.895395110858443e-06, "loss": 1.1111, "step": 722 }, { "epoch": 0.41102899374644686, "grad_norm": 0.2421875, "learning_rate": 5.8897100625355315e-06, "loss": 1.1654, "step": 723 }, { "epoch": 0.4115974985787379, "grad_norm": 0.248046875, "learning_rate": 5.884025014212621e-06, "loss": 1.1639, "step": 724 }, { "epoch": 0.412166003411029, "grad_norm": 0.2451171875, "learning_rate": 5.87833996588971e-06, "loss": 1.0514, "step": 725 }, { "epoch": 0.4127345082433201, "grad_norm": 0.2314453125, "learning_rate": 5.8726549175668e-06, "loss": 1.1116, "step": 726 }, { "epoch": 0.4133030130756111, "grad_norm": 0.2353515625, "learning_rate": 5.866969869243889e-06, "loss": 1.088, "step": 727 }, { "epoch": 0.4138715179079022, "grad_norm": 0.2294921875, "learning_rate": 5.861284820920979e-06, "loss": 1.0943, "step": 728 }, { "epoch": 0.4144400227401933, "grad_norm": 0.228515625, "learning_rate": 5.855599772598067e-06, "loss": 1.1635, "step": 729 }, { "epoch": 0.41500852757248435, "grad_norm": 0.2314453125, "learning_rate": 5.849914724275156e-06, "loss": 1.0727, "step": 730 }, { "epoch": 0.41557703240477545, "grad_norm": 0.2314453125, "learning_rate": 5.844229675952246e-06, "loss": 1.1469, "step": 731 }, { "epoch": 0.4161455372370665, "grad_norm": 0.265625, "learning_rate": 5.838544627629336e-06, "loss": 1.0535, "step": 732 }, { "epoch": 0.4167140420693576, "grad_norm": 0.2216796875, "learning_rate": 5.832859579306425e-06, "loss": 1.0679, "step": 733 }, { "epoch": 0.4172825469016487, "grad_norm": 0.2333984375, "learning_rate": 5.827174530983514e-06, "loss": 1.084, "step": 734 }, { "epoch": 0.4178510517339397, "grad_norm": 0.2421875, "learning_rate": 5.8214894826606025e-06, "loss": 1.1059, "step": 735 }, { "epoch": 0.4184195565662308, "grad_norm": 0.24609375, "learning_rate": 5.815804434337692e-06, "loss": 1.0625, "step": 736 }, { "epoch": 0.4189880613985219, "grad_norm": 0.2421875, "learning_rate": 5.810119386014782e-06, "loss": 1.0533, "step": 737 }, { "epoch": 0.41955656623081294, "grad_norm": 0.2294921875, "learning_rate": 5.804434337691871e-06, "loss": 1.1052, "step": 738 }, { "epoch": 0.42012507106310404, "grad_norm": 0.2373046875, "learning_rate": 5.79874928936896e-06, "loss": 1.0414, "step": 739 }, { "epoch": 0.42069357589539513, "grad_norm": 0.2373046875, "learning_rate": 5.793064241046049e-06, "loss": 1.115, "step": 740 }, { "epoch": 0.4212620807276862, "grad_norm": 0.2255859375, "learning_rate": 5.787379192723138e-06, "loss": 1.0502, "step": 741 }, { "epoch": 0.42183058555997727, "grad_norm": 0.2421875, "learning_rate": 5.781694144400228e-06, "loss": 1.088, "step": 742 }, { "epoch": 0.42239909039226836, "grad_norm": 0.2412109375, "learning_rate": 5.776009096077317e-06, "loss": 1.1163, "step": 743 }, { "epoch": 0.4229675952245594, "grad_norm": 0.26171875, "learning_rate": 5.770324047754407e-06, "loss": 1.0875, "step": 744 }, { "epoch": 0.4235361000568505, "grad_norm": 0.2490234375, "learning_rate": 5.764638999431496e-06, "loss": 1.1327, "step": 745 }, { "epoch": 0.42410460488914153, "grad_norm": 0.23046875, "learning_rate": 5.758953951108584e-06, "loss": 1.048, "step": 746 }, { "epoch": 0.42467310972143263, "grad_norm": 0.228515625, "learning_rate": 5.7532689027856736e-06, "loss": 1.057, "step": 747 }, { "epoch": 0.4252416145537237, "grad_norm": 0.2255859375, "learning_rate": 5.747583854462764e-06, "loss": 1.1313, "step": 748 }, { "epoch": 0.42581011938601476, "grad_norm": 0.263671875, "learning_rate": 5.741898806139853e-06, "loss": 1.1583, "step": 749 }, { "epoch": 0.42637862421830586, "grad_norm": 0.2294921875, "learning_rate": 5.736213757816942e-06, "loss": 1.0858, "step": 750 }, { "epoch": 0.42694712905059695, "grad_norm": 0.2890625, "learning_rate": 5.7305287094940306e-06, "loss": 1.0946, "step": 751 }, { "epoch": 0.427515633882888, "grad_norm": 0.2490234375, "learning_rate": 5.72484366117112e-06, "loss": 1.0751, "step": 752 }, { "epoch": 0.4280841387151791, "grad_norm": 0.2236328125, "learning_rate": 5.71915861284821e-06, "loss": 1.1162, "step": 753 }, { "epoch": 0.4286526435474702, "grad_norm": 0.240234375, "learning_rate": 5.713473564525299e-06, "loss": 1.0653, "step": 754 }, { "epoch": 0.4292211483797612, "grad_norm": 0.2451171875, "learning_rate": 5.7077885162023884e-06, "loss": 1.1621, "step": 755 }, { "epoch": 0.4297896532120523, "grad_norm": 0.287109375, "learning_rate": 5.702103467879478e-06, "loss": 1.0691, "step": 756 }, { "epoch": 0.43035815804434335, "grad_norm": 0.2294921875, "learning_rate": 5.696418419556566e-06, "loss": 1.1316, "step": 757 }, { "epoch": 0.43092666287663445, "grad_norm": 0.251953125, "learning_rate": 5.690733371233655e-06, "loss": 1.0668, "step": 758 }, { "epoch": 0.43149516770892554, "grad_norm": 0.234375, "learning_rate": 5.6850483229107454e-06, "loss": 1.1438, "step": 759 }, { "epoch": 0.4320636725412166, "grad_norm": 0.2353515625, "learning_rate": 5.679363274587835e-06, "loss": 1.0494, "step": 760 }, { "epoch": 0.4326321773735077, "grad_norm": 0.224609375, "learning_rate": 5.673678226264924e-06, "loss": 1.0927, "step": 761 }, { "epoch": 0.43320068220579877, "grad_norm": 0.2451171875, "learning_rate": 5.667993177942012e-06, "loss": 1.0069, "step": 762 }, { "epoch": 0.4337691870380898, "grad_norm": 0.2265625, "learning_rate": 5.662308129619102e-06, "loss": 1.1173, "step": 763 }, { "epoch": 0.4343376918703809, "grad_norm": 0.236328125, "learning_rate": 5.656623081296192e-06, "loss": 1.1004, "step": 764 }, { "epoch": 0.434906196702672, "grad_norm": 0.23828125, "learning_rate": 5.650938032973281e-06, "loss": 1.0685, "step": 765 }, { "epoch": 0.43547470153496304, "grad_norm": 0.2294921875, "learning_rate": 5.64525298465037e-06, "loss": 1.0752, "step": 766 }, { "epoch": 0.43604320636725413, "grad_norm": 0.2373046875, "learning_rate": 5.6395679363274595e-06, "loss": 1.1224, "step": 767 }, { "epoch": 0.43661171119954517, "grad_norm": 0.2451171875, "learning_rate": 5.633882888004548e-06, "loss": 1.1353, "step": 768 }, { "epoch": 0.43718021603183627, "grad_norm": 0.2314453125, "learning_rate": 5.628197839681637e-06, "loss": 1.1016, "step": 769 }, { "epoch": 0.43774872086412736, "grad_norm": 0.255859375, "learning_rate": 5.622512791358727e-06, "loss": 1.0625, "step": 770 }, { "epoch": 0.4383172256964184, "grad_norm": 0.2314453125, "learning_rate": 5.6168277430358165e-06, "loss": 1.1211, "step": 771 }, { "epoch": 0.4388857305287095, "grad_norm": 0.2373046875, "learning_rate": 5.611142694712906e-06, "loss": 1.1191, "step": 772 }, { "epoch": 0.4394542353610006, "grad_norm": 0.2265625, "learning_rate": 5.605457646389995e-06, "loss": 1.0703, "step": 773 }, { "epoch": 0.44002274019329163, "grad_norm": 0.24609375, "learning_rate": 5.599772598067083e-06, "loss": 1.2009, "step": 774 }, { "epoch": 0.4405912450255827, "grad_norm": 0.234375, "learning_rate": 5.5940875497441735e-06, "loss": 1.1007, "step": 775 }, { "epoch": 0.4411597498578738, "grad_norm": 0.232421875, "learning_rate": 5.588402501421263e-06, "loss": 1.0483, "step": 776 }, { "epoch": 0.44172825469016486, "grad_norm": 0.2333984375, "learning_rate": 5.582717453098352e-06, "loss": 1.0586, "step": 777 }, { "epoch": 0.44229675952245595, "grad_norm": 0.2275390625, "learning_rate": 5.577032404775441e-06, "loss": 1.0978, "step": 778 }, { "epoch": 0.442865264354747, "grad_norm": 0.2421875, "learning_rate": 5.57134735645253e-06, "loss": 1.1303, "step": 779 }, { "epoch": 0.4434337691870381, "grad_norm": 0.2314453125, "learning_rate": 5.565662308129619e-06, "loss": 1.1195, "step": 780 }, { "epoch": 0.4440022740193292, "grad_norm": 0.24609375, "learning_rate": 5.559977259806709e-06, "loss": 1.0158, "step": 781 }, { "epoch": 0.4445707788516202, "grad_norm": 0.2412109375, "learning_rate": 5.554292211483798e-06, "loss": 1.1063, "step": 782 }, { "epoch": 0.4451392836839113, "grad_norm": 0.2421875, "learning_rate": 5.5486071631608875e-06, "loss": 1.0424, "step": 783 }, { "epoch": 0.4457077885162024, "grad_norm": 0.236328125, "learning_rate": 5.542922114837977e-06, "loss": 1.0206, "step": 784 }, { "epoch": 0.44627629334849345, "grad_norm": 0.2578125, "learning_rate": 5.537237066515065e-06, "loss": 1.1367, "step": 785 }, { "epoch": 0.44684479818078454, "grad_norm": 0.2373046875, "learning_rate": 5.531552018192155e-06, "loss": 1.0499, "step": 786 }, { "epoch": 0.44741330301307564, "grad_norm": 0.2373046875, "learning_rate": 5.5258669698692445e-06, "loss": 1.135, "step": 787 }, { "epoch": 0.4479818078453667, "grad_norm": 0.244140625, "learning_rate": 5.520181921546334e-06, "loss": 1.0927, "step": 788 }, { "epoch": 0.44855031267765777, "grad_norm": 0.2275390625, "learning_rate": 5.514496873223423e-06, "loss": 1.1196, "step": 789 }, { "epoch": 0.4491188175099488, "grad_norm": 0.2431640625, "learning_rate": 5.5088118249005114e-06, "loss": 1.0738, "step": 790 }, { "epoch": 0.4496873223422399, "grad_norm": 0.232421875, "learning_rate": 5.503126776577601e-06, "loss": 1.164, "step": 791 }, { "epoch": 0.450255827174531, "grad_norm": 0.236328125, "learning_rate": 5.497441728254691e-06, "loss": 1.0898, "step": 792 }, { "epoch": 0.45082433200682204, "grad_norm": 0.2431640625, "learning_rate": 5.49175667993178e-06, "loss": 1.0249, "step": 793 }, { "epoch": 0.45139283683911313, "grad_norm": 0.2412109375, "learning_rate": 5.486071631608869e-06, "loss": 1.1554, "step": 794 }, { "epoch": 0.4519613416714042, "grad_norm": 0.234375, "learning_rate": 5.4803865832859585e-06, "loss": 1.1155, "step": 795 }, { "epoch": 0.45252984650369527, "grad_norm": 0.2353515625, "learning_rate": 5.474701534963047e-06, "loss": 1.0828, "step": 796 }, { "epoch": 0.45309835133598636, "grad_norm": 0.240234375, "learning_rate": 5.469016486640137e-06, "loss": 1.1175, "step": 797 }, { "epoch": 0.45366685616827745, "grad_norm": 0.2451171875, "learning_rate": 5.463331438317226e-06, "loss": 1.0304, "step": 798 }, { "epoch": 0.4542353610005685, "grad_norm": 0.271484375, "learning_rate": 5.4576463899943156e-06, "loss": 1.0697, "step": 799 }, { "epoch": 0.4548038658328596, "grad_norm": 0.23046875, "learning_rate": 5.451961341671405e-06, "loss": 1.0951, "step": 800 }, { "epoch": 0.4553723706651506, "grad_norm": 0.2392578125, "learning_rate": 5.446276293348493e-06, "loss": 1.0394, "step": 801 }, { "epoch": 0.4559408754974417, "grad_norm": 0.24609375, "learning_rate": 5.4405912450255825e-06, "loss": 1.0355, "step": 802 }, { "epoch": 0.4565093803297328, "grad_norm": 0.232421875, "learning_rate": 5.4349061967026726e-06, "loss": 1.054, "step": 803 }, { "epoch": 0.45707788516202386, "grad_norm": 0.2333984375, "learning_rate": 5.429221148379762e-06, "loss": 1.0151, "step": 804 }, { "epoch": 0.45764638999431495, "grad_norm": 0.2314453125, "learning_rate": 5.423536100056851e-06, "loss": 1.0832, "step": 805 }, { "epoch": 0.45821489482660605, "grad_norm": 0.23828125, "learning_rate": 5.41785105173394e-06, "loss": 1.0324, "step": 806 }, { "epoch": 0.4587833996588971, "grad_norm": 0.2470703125, "learning_rate": 5.412166003411029e-06, "loss": 1.0338, "step": 807 }, { "epoch": 0.4593519044911882, "grad_norm": 0.2373046875, "learning_rate": 5.406480955088119e-06, "loss": 1.0798, "step": 808 }, { "epoch": 0.4599204093234793, "grad_norm": 0.2412109375, "learning_rate": 5.400795906765208e-06, "loss": 1.1484, "step": 809 }, { "epoch": 0.4604889141557703, "grad_norm": 0.23046875, "learning_rate": 5.395110858442297e-06, "loss": 1.1284, "step": 810 }, { "epoch": 0.4610574189880614, "grad_norm": 0.240234375, "learning_rate": 5.389425810119387e-06, "loss": 1.1036, "step": 811 }, { "epoch": 0.46162592382035245, "grad_norm": 0.2373046875, "learning_rate": 5.383740761796476e-06, "loss": 1.0986, "step": 812 }, { "epoch": 0.46219442865264354, "grad_norm": 0.234375, "learning_rate": 5.378055713473564e-06, "loss": 1.0709, "step": 813 }, { "epoch": 0.46276293348493464, "grad_norm": 0.2412109375, "learning_rate": 5.372370665150654e-06, "loss": 1.0916, "step": 814 }, { "epoch": 0.4633314383172257, "grad_norm": 0.2470703125, "learning_rate": 5.366685616827744e-06, "loss": 1.0833, "step": 815 }, { "epoch": 0.46389994314951677, "grad_norm": 0.23828125, "learning_rate": 5.361000568504833e-06, "loss": 1.0497, "step": 816 }, { "epoch": 0.46446844798180786, "grad_norm": 0.228515625, "learning_rate": 5.355315520181922e-06, "loss": 1.112, "step": 817 }, { "epoch": 0.4650369528140989, "grad_norm": 0.2333984375, "learning_rate": 5.3496304718590105e-06, "loss": 1.0848, "step": 818 }, { "epoch": 0.46560545764639, "grad_norm": 0.2392578125, "learning_rate": 5.343945423536101e-06, "loss": 1.0671, "step": 819 }, { "epoch": 0.4661739624786811, "grad_norm": 0.2451171875, "learning_rate": 5.33826037521319e-06, "loss": 1.0708, "step": 820 }, { "epoch": 0.46674246731097213, "grad_norm": 0.2392578125, "learning_rate": 5.332575326890279e-06, "loss": 1.0696, "step": 821 }, { "epoch": 0.4673109721432632, "grad_norm": 0.2421875, "learning_rate": 5.326890278567368e-06, "loss": 1.0896, "step": 822 }, { "epoch": 0.46787947697555426, "grad_norm": 0.2421875, "learning_rate": 5.321205230244458e-06, "loss": 1.1364, "step": 823 }, { "epoch": 0.46844798180784536, "grad_norm": 0.25, "learning_rate": 5.315520181921546e-06, "loss": 1.0105, "step": 824 }, { "epoch": 0.46901648664013645, "grad_norm": 0.2353515625, "learning_rate": 5.309835133598636e-06, "loss": 1.0443, "step": 825 }, { "epoch": 0.4695849914724275, "grad_norm": 0.24609375, "learning_rate": 5.304150085275725e-06, "loss": 1.0612, "step": 826 }, { "epoch": 0.4701534963047186, "grad_norm": 0.23828125, "learning_rate": 5.298465036952815e-06, "loss": 1.1147, "step": 827 }, { "epoch": 0.4707220011370097, "grad_norm": 0.23046875, "learning_rate": 5.292779988629904e-06, "loss": 1.1323, "step": 828 }, { "epoch": 0.4712905059693007, "grad_norm": 0.23828125, "learning_rate": 5.287094940306992e-06, "loss": 1.0799, "step": 829 }, { "epoch": 0.4718590108015918, "grad_norm": 0.23828125, "learning_rate": 5.281409891984083e-06, "loss": 1.0665, "step": 830 }, { "epoch": 0.4724275156338829, "grad_norm": 0.23828125, "learning_rate": 5.275724843661172e-06, "loss": 1.1297, "step": 831 }, { "epoch": 0.47299602046617395, "grad_norm": 0.25, "learning_rate": 5.270039795338261e-06, "loss": 1.0634, "step": 832 }, { "epoch": 0.47356452529846504, "grad_norm": 0.2333984375, "learning_rate": 5.26435474701535e-06, "loss": 1.0847, "step": 833 }, { "epoch": 0.47413303013075614, "grad_norm": 0.236328125, "learning_rate": 5.258669698692439e-06, "loss": 1.0803, "step": 834 }, { "epoch": 0.4747015349630472, "grad_norm": 0.23046875, "learning_rate": 5.252984650369528e-06, "loss": 1.0999, "step": 835 }, { "epoch": 0.4752700397953383, "grad_norm": 0.2353515625, "learning_rate": 5.247299602046618e-06, "loss": 1.0695, "step": 836 }, { "epoch": 0.4758385446276293, "grad_norm": 0.2431640625, "learning_rate": 5.241614553723707e-06, "loss": 1.1538, "step": 837 }, { "epoch": 0.4764070494599204, "grad_norm": 0.228515625, "learning_rate": 5.235929505400796e-06, "loss": 1.1202, "step": 838 }, { "epoch": 0.4769755542922115, "grad_norm": 0.2314453125, "learning_rate": 5.230244457077886e-06, "loss": 1.0204, "step": 839 }, { "epoch": 0.47754405912450254, "grad_norm": 0.240234375, "learning_rate": 5.224559408754974e-06, "loss": 1.017, "step": 840 }, { "epoch": 0.47811256395679363, "grad_norm": 0.2451171875, "learning_rate": 5.218874360432065e-06, "loss": 1.0521, "step": 841 }, { "epoch": 0.47868106878908473, "grad_norm": 0.2373046875, "learning_rate": 5.2131893121091534e-06, "loss": 1.0944, "step": 842 }, { "epoch": 0.47924957362137577, "grad_norm": 0.2392578125, "learning_rate": 5.207504263786243e-06, "loss": 1.2008, "step": 843 }, { "epoch": 0.47981807845366686, "grad_norm": 0.23828125, "learning_rate": 5.201819215463332e-06, "loss": 1.1037, "step": 844 }, { "epoch": 0.48038658328595796, "grad_norm": 0.234375, "learning_rate": 5.196134167140421e-06, "loss": 1.0529, "step": 845 }, { "epoch": 0.480955088118249, "grad_norm": 0.23828125, "learning_rate": 5.19044911881751e-06, "loss": 1.0967, "step": 846 }, { "epoch": 0.4815235929505401, "grad_norm": 0.234375, "learning_rate": 5.1847640704946e-06, "loss": 1.0276, "step": 847 }, { "epoch": 0.48209209778283113, "grad_norm": 0.2333984375, "learning_rate": 5.179079022171689e-06, "loss": 1.0684, "step": 848 }, { "epoch": 0.4826606026151222, "grad_norm": 0.2333984375, "learning_rate": 5.173393973848778e-06, "loss": 1.0358, "step": 849 }, { "epoch": 0.4832291074474133, "grad_norm": 0.23828125, "learning_rate": 5.1677089255258674e-06, "loss": 1.0832, "step": 850 }, { "epoch": 0.48379761227970436, "grad_norm": 0.2412109375, "learning_rate": 5.162023877202957e-06, "loss": 1.0389, "step": 851 }, { "epoch": 0.48436611711199545, "grad_norm": 0.2412109375, "learning_rate": 5.156338828880047e-06, "loss": 1.0202, "step": 852 }, { "epoch": 0.48493462194428655, "grad_norm": 0.2353515625, "learning_rate": 5.150653780557135e-06, "loss": 1.0253, "step": 853 }, { "epoch": 0.4855031267765776, "grad_norm": 0.23046875, "learning_rate": 5.1449687322342245e-06, "loss": 1.1075, "step": 854 }, { "epoch": 0.4860716316088687, "grad_norm": 0.2451171875, "learning_rate": 5.139283683911314e-06, "loss": 1.1327, "step": 855 }, { "epoch": 0.4866401364411598, "grad_norm": 0.251953125, "learning_rate": 5.133598635588403e-06, "loss": 1.0573, "step": 856 }, { "epoch": 0.4872086412734508, "grad_norm": 0.228515625, "learning_rate": 5.127913587265491e-06, "loss": 1.1322, "step": 857 }, { "epoch": 0.4877771461057419, "grad_norm": 0.2421875, "learning_rate": 5.1222285389425815e-06, "loss": 1.1632, "step": 858 }, { "epoch": 0.48834565093803295, "grad_norm": 0.26171875, "learning_rate": 5.116543490619671e-06, "loss": 1.1041, "step": 859 }, { "epoch": 0.48891415577032404, "grad_norm": 0.2431640625, "learning_rate": 5.11085844229676e-06, "loss": 1.1225, "step": 860 }, { "epoch": 0.48948266060261514, "grad_norm": 0.25, "learning_rate": 5.105173393973849e-06, "loss": 1.0818, "step": 861 }, { "epoch": 0.4900511654349062, "grad_norm": 0.244140625, "learning_rate": 5.0994883456509385e-06, "loss": 1.1603, "step": 862 }, { "epoch": 0.49061967026719727, "grad_norm": 0.2470703125, "learning_rate": 5.093803297328029e-06, "loss": 1.0842, "step": 863 }, { "epoch": 0.49118817509948837, "grad_norm": 0.23828125, "learning_rate": 5.088118249005117e-06, "loss": 1.091, "step": 864 }, { "epoch": 0.4917566799317794, "grad_norm": 0.2451171875, "learning_rate": 5.082433200682206e-06, "loss": 1.0413, "step": 865 }, { "epoch": 0.4923251847640705, "grad_norm": 0.2431640625, "learning_rate": 5.0767481523592955e-06, "loss": 1.1707, "step": 866 }, { "epoch": 0.4928936895963616, "grad_norm": 0.24609375, "learning_rate": 5.071063104036385e-06, "loss": 1.1393, "step": 867 }, { "epoch": 0.49346219442865263, "grad_norm": 0.232421875, "learning_rate": 5.065378055713473e-06, "loss": 1.0508, "step": 868 }, { "epoch": 0.49403069926094373, "grad_norm": 0.2412109375, "learning_rate": 5.059693007390564e-06, "loss": 1.0609, "step": 869 }, { "epoch": 0.49459920409323477, "grad_norm": 0.306640625, "learning_rate": 5.0540079590676525e-06, "loss": 1.0546, "step": 870 }, { "epoch": 0.49516770892552586, "grad_norm": 0.2421875, "learning_rate": 5.048322910744742e-06, "loss": 1.0695, "step": 871 }, { "epoch": 0.49573621375781696, "grad_norm": 0.2265625, "learning_rate": 5.042637862421831e-06, "loss": 1.0704, "step": 872 }, { "epoch": 0.496304718590108, "grad_norm": 0.255859375, "learning_rate": 5.03695281409892e-06, "loss": 1.1214, "step": 873 }, { "epoch": 0.4968732234223991, "grad_norm": 0.24609375, "learning_rate": 5.03126776577601e-06, "loss": 1.1142, "step": 874 }, { "epoch": 0.4974417282546902, "grad_norm": 0.2353515625, "learning_rate": 5.025582717453099e-06, "loss": 1.0529, "step": 875 }, { "epoch": 0.4980102330869812, "grad_norm": 0.25, "learning_rate": 5.019897669130188e-06, "loss": 1.1074, "step": 876 }, { "epoch": 0.4985787379192723, "grad_norm": 0.2294921875, "learning_rate": 5.014212620807277e-06, "loss": 1.0767, "step": 877 }, { "epoch": 0.4991472427515634, "grad_norm": 0.2392578125, "learning_rate": 5.0085275724843665e-06, "loss": 1.0467, "step": 878 }, { "epoch": 0.49971574758385445, "grad_norm": 0.2490234375, "learning_rate": 5.002842524161455e-06, "loss": 0.991, "step": 879 }, { "epoch": 0.5002842524161455, "grad_norm": 0.265625, "learning_rate": 4.997157475838545e-06, "loss": 1.0824, "step": 880 }, { "epoch": 0.5008527572484366, "grad_norm": 0.24609375, "learning_rate": 4.991472427515634e-06, "loss": 1.1334, "step": 881 }, { "epoch": 0.5014212620807277, "grad_norm": 0.2353515625, "learning_rate": 4.9857873791927235e-06, "loss": 1.0544, "step": 882 }, { "epoch": 0.5019897669130188, "grad_norm": 0.2373046875, "learning_rate": 4.980102330869813e-06, "loss": 1.0695, "step": 883 }, { "epoch": 0.5025582717453099, "grad_norm": 0.2314453125, "learning_rate": 4.974417282546902e-06, "loss": 1.1192, "step": 884 }, { "epoch": 0.503126776577601, "grad_norm": 0.2265625, "learning_rate": 4.968732234223991e-06, "loss": 1.07, "step": 885 }, { "epoch": 0.503695281409892, "grad_norm": 0.236328125, "learning_rate": 4.9630471859010806e-06, "loss": 1.0405, "step": 886 }, { "epoch": 0.504263786242183, "grad_norm": 0.251953125, "learning_rate": 4.95736213757817e-06, "loss": 1.0313, "step": 887 }, { "epoch": 0.5048322910744741, "grad_norm": 0.2392578125, "learning_rate": 4.951677089255259e-06, "loss": 1.0783, "step": 888 }, { "epoch": 0.5054007959067652, "grad_norm": 0.25, "learning_rate": 4.945992040932348e-06, "loss": 1.1491, "step": 889 }, { "epoch": 0.5059693007390563, "grad_norm": 0.2392578125, "learning_rate": 4.9403069926094376e-06, "loss": 1.0894, "step": 890 }, { "epoch": 0.5065378055713473, "grad_norm": 0.48046875, "learning_rate": 4.934621944286527e-06, "loss": 1.0806, "step": 891 }, { "epoch": 0.5071063104036384, "grad_norm": 0.23828125, "learning_rate": 4.928936895963616e-06, "loss": 1.1264, "step": 892 }, { "epoch": 0.5076748152359295, "grad_norm": 0.2412109375, "learning_rate": 4.923251847640705e-06, "loss": 1.1653, "step": 893 }, { "epoch": 0.5082433200682206, "grad_norm": 0.2353515625, "learning_rate": 4.9175667993177946e-06, "loss": 1.1296, "step": 894 }, { "epoch": 0.5088118249005117, "grad_norm": 0.2333984375, "learning_rate": 4.911881750994884e-06, "loss": 1.0691, "step": 895 }, { "epoch": 0.5093803297328028, "grad_norm": 0.2470703125, "learning_rate": 4.906196702671973e-06, "loss": 1.0733, "step": 896 }, { "epoch": 0.5099488345650938, "grad_norm": 0.244140625, "learning_rate": 4.900511654349062e-06, "loss": 1.086, "step": 897 }, { "epoch": 0.5105173393973849, "grad_norm": 0.244140625, "learning_rate": 4.894826606026152e-06, "loss": 1.0067, "step": 898 }, { "epoch": 0.511085844229676, "grad_norm": 0.2275390625, "learning_rate": 4.889141557703241e-06, "loss": 1.116, "step": 899 }, { "epoch": 0.511654349061967, "grad_norm": 0.2392578125, "learning_rate": 4.88345650938033e-06, "loss": 1.0153, "step": 900 }, { "epoch": 0.5122228538942581, "grad_norm": 0.236328125, "learning_rate": 4.877771461057419e-06, "loss": 1.0647, "step": 901 }, { "epoch": 0.5127913587265491, "grad_norm": 0.25390625, "learning_rate": 4.872086412734509e-06, "loss": 1.1427, "step": 902 }, { "epoch": 0.5133598635588402, "grad_norm": 0.25390625, "learning_rate": 4.866401364411598e-06, "loss": 1.107, "step": 903 }, { "epoch": 0.5139283683911313, "grad_norm": 0.232421875, "learning_rate": 4.860716316088687e-06, "loss": 1.0691, "step": 904 }, { "epoch": 0.5144968732234224, "grad_norm": 0.232421875, "learning_rate": 4.855031267765776e-06, "loss": 1.0665, "step": 905 }, { "epoch": 0.5150653780557135, "grad_norm": 0.263671875, "learning_rate": 4.849346219442866e-06, "loss": 1.1015, "step": 906 }, { "epoch": 0.5156338828880046, "grad_norm": 0.279296875, "learning_rate": 4.843661171119955e-06, "loss": 1.0275, "step": 907 }, { "epoch": 0.5162023877202956, "grad_norm": 0.24609375, "learning_rate": 4.837976122797044e-06, "loss": 1.014, "step": 908 }, { "epoch": 0.5167708925525867, "grad_norm": 0.2373046875, "learning_rate": 4.832291074474133e-06, "loss": 1.1322, "step": 909 }, { "epoch": 0.5173393973848778, "grad_norm": 0.2451171875, "learning_rate": 4.826606026151223e-06, "loss": 1.0095, "step": 910 }, { "epoch": 0.5179079022171689, "grad_norm": 0.234375, "learning_rate": 4.820920977828312e-06, "loss": 1.024, "step": 911 }, { "epoch": 0.51847640704946, "grad_norm": 0.2451171875, "learning_rate": 4.815235929505401e-06, "loss": 1.0925, "step": 912 }, { "epoch": 0.519044911881751, "grad_norm": 0.2470703125, "learning_rate": 4.80955088118249e-06, "loss": 1.0588, "step": 913 }, { "epoch": 0.519613416714042, "grad_norm": 0.259765625, "learning_rate": 4.80386583285958e-06, "loss": 1.1041, "step": 914 }, { "epoch": 0.5201819215463331, "grad_norm": 0.2451171875, "learning_rate": 4.798180784536669e-06, "loss": 1.0787, "step": 915 }, { "epoch": 0.5207504263786242, "grad_norm": 0.2451171875, "learning_rate": 4.792495736213758e-06, "loss": 1.1601, "step": 916 }, { "epoch": 0.5213189312109153, "grad_norm": 0.251953125, "learning_rate": 4.786810687890847e-06, "loss": 1.0978, "step": 917 }, { "epoch": 0.5218874360432064, "grad_norm": 0.240234375, "learning_rate": 4.781125639567937e-06, "loss": 1.1109, "step": 918 }, { "epoch": 0.5224559408754974, "grad_norm": 0.236328125, "learning_rate": 4.775440591245026e-06, "loss": 1.0523, "step": 919 }, { "epoch": 0.5230244457077885, "grad_norm": 0.23046875, "learning_rate": 4.769755542922115e-06, "loss": 1.0041, "step": 920 }, { "epoch": 0.5235929505400796, "grad_norm": 0.248046875, "learning_rate": 4.764070494599204e-06, "loss": 1.0618, "step": 921 }, { "epoch": 0.5241614553723707, "grad_norm": 0.23828125, "learning_rate": 4.758385446276294e-06, "loss": 1.0712, "step": 922 }, { "epoch": 0.5247299602046618, "grad_norm": 0.240234375, "learning_rate": 4.752700397953383e-06, "loss": 1.0655, "step": 923 }, { "epoch": 0.5252984650369528, "grad_norm": 0.2490234375, "learning_rate": 4.747015349630472e-06, "loss": 1.0999, "step": 924 }, { "epoch": 0.5258669698692439, "grad_norm": 0.234375, "learning_rate": 4.741330301307561e-06, "loss": 1.1079, "step": 925 }, { "epoch": 0.526435474701535, "grad_norm": 0.255859375, "learning_rate": 4.735645252984651e-06, "loss": 1.0675, "step": 926 }, { "epoch": 0.527003979533826, "grad_norm": 0.248046875, "learning_rate": 4.72996020466174e-06, "loss": 1.0629, "step": 927 }, { "epoch": 0.5275724843661171, "grad_norm": 0.2421875, "learning_rate": 4.724275156338829e-06, "loss": 1.0248, "step": 928 }, { "epoch": 0.5281409891984082, "grad_norm": 0.2421875, "learning_rate": 4.7185901080159184e-06, "loss": 1.1263, "step": 929 }, { "epoch": 0.5287094940306992, "grad_norm": 0.232421875, "learning_rate": 4.712905059693008e-06, "loss": 1.0787, "step": 930 }, { "epoch": 0.5292779988629903, "grad_norm": 0.2490234375, "learning_rate": 4.707220011370097e-06, "loss": 1.1277, "step": 931 }, { "epoch": 0.5298465036952814, "grad_norm": 0.6171875, "learning_rate": 4.701534963047186e-06, "loss": 1.1027, "step": 932 }, { "epoch": 0.5304150085275725, "grad_norm": 0.244140625, "learning_rate": 4.6958499147242754e-06, "loss": 1.1158, "step": 933 }, { "epoch": 0.5309835133598636, "grad_norm": 0.25, "learning_rate": 4.690164866401365e-06, "loss": 1.0227, "step": 934 }, { "epoch": 0.5315520181921546, "grad_norm": 0.2431640625, "learning_rate": 4.684479818078454e-06, "loss": 1.1386, "step": 935 }, { "epoch": 0.5321205230244457, "grad_norm": 0.2451171875, "learning_rate": 4.678794769755543e-06, "loss": 1.0347, "step": 936 }, { "epoch": 0.5326890278567368, "grad_norm": 0.2421875, "learning_rate": 4.6731097214326324e-06, "loss": 1.1248, "step": 937 }, { "epoch": 0.5332575326890279, "grad_norm": 0.240234375, "learning_rate": 4.667424673109722e-06, "loss": 1.116, "step": 938 }, { "epoch": 0.533826037521319, "grad_norm": 0.2470703125, "learning_rate": 4.661739624786811e-06, "loss": 1.0835, "step": 939 }, { "epoch": 0.5343945423536101, "grad_norm": 0.2470703125, "learning_rate": 4.6560545764639e-06, "loss": 1.098, "step": 940 }, { "epoch": 0.534963047185901, "grad_norm": 0.234375, "learning_rate": 4.6503695281409895e-06, "loss": 1.0889, "step": 941 }, { "epoch": 0.5355315520181921, "grad_norm": 0.2373046875, "learning_rate": 4.644684479818079e-06, "loss": 1.1402, "step": 942 }, { "epoch": 0.5361000568504832, "grad_norm": 0.23828125, "learning_rate": 4.638999431495168e-06, "loss": 1.1682, "step": 943 }, { "epoch": 0.5366685616827743, "grad_norm": 0.248046875, "learning_rate": 4.633314383172257e-06, "loss": 1.0001, "step": 944 }, { "epoch": 0.5372370665150654, "grad_norm": 0.2412109375, "learning_rate": 4.6276293348493465e-06, "loss": 1.122, "step": 945 }, { "epoch": 0.5378055713473564, "grad_norm": 0.2451171875, "learning_rate": 4.621944286526436e-06, "loss": 1.0835, "step": 946 }, { "epoch": 0.5383740761796475, "grad_norm": 0.2353515625, "learning_rate": 4.616259238203525e-06, "loss": 1.0335, "step": 947 }, { "epoch": 0.5389425810119386, "grad_norm": 0.326171875, "learning_rate": 4.610574189880614e-06, "loss": 1.0421, "step": 948 }, { "epoch": 0.5395110858442297, "grad_norm": 0.251953125, "learning_rate": 4.6048891415577035e-06, "loss": 1.1291, "step": 949 }, { "epoch": 0.5400795906765208, "grad_norm": 0.2333984375, "learning_rate": 4.599204093234793e-06, "loss": 1.1754, "step": 950 }, { "epoch": 0.5406480955088119, "grad_norm": 0.244140625, "learning_rate": 4.593519044911882e-06, "loss": 1.059, "step": 951 }, { "epoch": 0.5412166003411029, "grad_norm": 0.244140625, "learning_rate": 4.587833996588971e-06, "loss": 1.078, "step": 952 }, { "epoch": 0.541785105173394, "grad_norm": 0.2451171875, "learning_rate": 4.5821489482660605e-06, "loss": 1.1194, "step": 953 }, { "epoch": 0.542353610005685, "grad_norm": 0.2431640625, "learning_rate": 4.57646389994315e-06, "loss": 1.0921, "step": 954 }, { "epoch": 0.5429221148379761, "grad_norm": 0.2373046875, "learning_rate": 4.570778851620239e-06, "loss": 1.0684, "step": 955 }, { "epoch": 0.5434906196702672, "grad_norm": 0.251953125, "learning_rate": 4.565093803297328e-06, "loss": 1.0499, "step": 956 }, { "epoch": 0.5440591245025582, "grad_norm": 0.259765625, "learning_rate": 4.5594087549744175e-06, "loss": 0.943, "step": 957 }, { "epoch": 0.5446276293348493, "grad_norm": 0.26171875, "learning_rate": 4.553723706651507e-06, "loss": 1.0833, "step": 958 }, { "epoch": 0.5451961341671404, "grad_norm": 0.263671875, "learning_rate": 4.548038658328596e-06, "loss": 1.1057, "step": 959 }, { "epoch": 0.5457646389994315, "grad_norm": 0.236328125, "learning_rate": 4.542353610005685e-06, "loss": 1.1592, "step": 960 }, { "epoch": 0.5463331438317226, "grad_norm": 0.234375, "learning_rate": 4.5366685616827745e-06, "loss": 1.0333, "step": 961 }, { "epoch": 0.5469016486640137, "grad_norm": 0.2421875, "learning_rate": 4.530983513359864e-06, "loss": 1.0861, "step": 962 }, { "epoch": 0.5474701534963047, "grad_norm": 0.2421875, "learning_rate": 4.525298465036953e-06, "loss": 1.0672, "step": 963 }, { "epoch": 0.5480386583285958, "grad_norm": 0.23828125, "learning_rate": 4.519613416714042e-06, "loss": 1.0911, "step": 964 }, { "epoch": 0.5486071631608869, "grad_norm": 0.2431640625, "learning_rate": 4.5139283683911315e-06, "loss": 1.0339, "step": 965 }, { "epoch": 0.549175667993178, "grad_norm": 0.25390625, "learning_rate": 4.508243320068221e-06, "loss": 1.015, "step": 966 }, { "epoch": 0.5497441728254691, "grad_norm": 0.244140625, "learning_rate": 4.50255827174531e-06, "loss": 1.1203, "step": 967 }, { "epoch": 0.55031267765776, "grad_norm": 0.2314453125, "learning_rate": 4.496873223422399e-06, "loss": 1.0926, "step": 968 }, { "epoch": 0.5508811824900511, "grad_norm": 0.2490234375, "learning_rate": 4.4911881750994885e-06, "loss": 1.121, "step": 969 }, { "epoch": 0.5514496873223422, "grad_norm": 0.271484375, "learning_rate": 4.485503126776578e-06, "loss": 1.0474, "step": 970 }, { "epoch": 0.5520181921546333, "grad_norm": 0.2421875, "learning_rate": 4.479818078453667e-06, "loss": 1.0298, "step": 971 }, { "epoch": 0.5525866969869244, "grad_norm": 0.2373046875, "learning_rate": 4.474133030130757e-06, "loss": 1.0892, "step": 972 }, { "epoch": 0.5531552018192155, "grad_norm": 0.306640625, "learning_rate": 4.4684479818078456e-06, "loss": 1.0382, "step": 973 }, { "epoch": 0.5537237066515065, "grad_norm": 0.234375, "learning_rate": 4.462762933484935e-06, "loss": 1.0364, "step": 974 }, { "epoch": 0.5542922114837976, "grad_norm": 0.2490234375, "learning_rate": 4.457077885162024e-06, "loss": 1.0341, "step": 975 }, { "epoch": 0.5548607163160887, "grad_norm": 0.2373046875, "learning_rate": 4.451392836839113e-06, "loss": 1.1429, "step": 976 }, { "epoch": 0.5554292211483798, "grad_norm": 0.244140625, "learning_rate": 4.4457077885162026e-06, "loss": 1.0341, "step": 977 }, { "epoch": 0.5559977259806709, "grad_norm": 0.2451171875, "learning_rate": 4.440022740193292e-06, "loss": 1.1188, "step": 978 }, { "epoch": 0.5565662308129619, "grad_norm": 0.248046875, "learning_rate": 4.434337691870381e-06, "loss": 1.1151, "step": 979 }, { "epoch": 0.557134735645253, "grad_norm": 0.2431640625, "learning_rate": 4.42865264354747e-06, "loss": 1.0575, "step": 980 }, { "epoch": 0.557703240477544, "grad_norm": 0.255859375, "learning_rate": 4.4229675952245596e-06, "loss": 1.1349, "step": 981 }, { "epoch": 0.5582717453098351, "grad_norm": 0.26953125, "learning_rate": 4.417282546901649e-06, "loss": 1.0689, "step": 982 }, { "epoch": 0.5588402501421262, "grad_norm": 0.236328125, "learning_rate": 4.411597498578739e-06, "loss": 1.0889, "step": 983 }, { "epoch": 0.5594087549744173, "grad_norm": 0.23828125, "learning_rate": 4.405912450255827e-06, "loss": 1.0753, "step": 984 }, { "epoch": 0.5599772598067083, "grad_norm": 0.2392578125, "learning_rate": 4.400227401932917e-06, "loss": 1.1096, "step": 985 }, { "epoch": 0.5605457646389994, "grad_norm": 0.244140625, "learning_rate": 4.394542353610006e-06, "loss": 1.0931, "step": 986 }, { "epoch": 0.5611142694712905, "grad_norm": 0.25, "learning_rate": 4.388857305287095e-06, "loss": 0.9937, "step": 987 }, { "epoch": 0.5616827743035816, "grad_norm": 0.244140625, "learning_rate": 4.383172256964184e-06, "loss": 1.0421, "step": 988 }, { "epoch": 0.5622512791358727, "grad_norm": 0.2451171875, "learning_rate": 4.377487208641274e-06, "loss": 1.1381, "step": 989 }, { "epoch": 0.5628197839681637, "grad_norm": 0.28125, "learning_rate": 4.371802160318363e-06, "loss": 1.1007, "step": 990 }, { "epoch": 0.5633882888004548, "grad_norm": 0.228515625, "learning_rate": 4.366117111995452e-06, "loss": 1.0265, "step": 991 }, { "epoch": 0.5639567936327459, "grad_norm": 0.248046875, "learning_rate": 4.360432063672541e-06, "loss": 1.1142, "step": 992 }, { "epoch": 0.564525298465037, "grad_norm": 0.2490234375, "learning_rate": 4.354747015349631e-06, "loss": 1.0534, "step": 993 }, { "epoch": 0.5650938032973281, "grad_norm": 0.2373046875, "learning_rate": 4.349061967026721e-06, "loss": 1.0579, "step": 994 }, { "epoch": 0.5656623081296192, "grad_norm": 0.2412109375, "learning_rate": 4.343376918703809e-06, "loss": 1.0537, "step": 995 }, { "epoch": 0.5662308129619101, "grad_norm": 0.267578125, "learning_rate": 4.337691870380898e-06, "loss": 1.1215, "step": 996 }, { "epoch": 0.5667993177942012, "grad_norm": 0.236328125, "learning_rate": 4.3320068220579885e-06, "loss": 0.9887, "step": 997 }, { "epoch": 0.5673678226264923, "grad_norm": 0.251953125, "learning_rate": 4.326321773735077e-06, "loss": 1.1396, "step": 998 }, { "epoch": 0.5679363274587834, "grad_norm": 0.259765625, "learning_rate": 4.320636725412166e-06, "loss": 1.0536, "step": 999 }, { "epoch": 0.5685048322910745, "grad_norm": 0.2392578125, "learning_rate": 4.314951677089255e-06, "loss": 1.1476, "step": 1000 }, { "epoch": 0.5690733371233655, "grad_norm": 0.2294921875, "learning_rate": 4.309266628766345e-06, "loss": 1.1125, "step": 1001 }, { "epoch": 0.5696418419556566, "grad_norm": 0.232421875, "learning_rate": 4.303581580443434e-06, "loss": 1.0696, "step": 1002 }, { "epoch": 0.5702103467879477, "grad_norm": 0.2412109375, "learning_rate": 4.297896532120523e-06, "loss": 1.1256, "step": 1003 }, { "epoch": 0.5707788516202388, "grad_norm": 0.240234375, "learning_rate": 4.292211483797612e-06, "loss": 1.0976, "step": 1004 }, { "epoch": 0.5713473564525299, "grad_norm": 0.259765625, "learning_rate": 4.2865264354747025e-06, "loss": 1.0604, "step": 1005 }, { "epoch": 0.571915861284821, "grad_norm": 0.265625, "learning_rate": 4.280841387151791e-06, "loss": 1.0675, "step": 1006 }, { "epoch": 0.572484366117112, "grad_norm": 0.251953125, "learning_rate": 4.27515633882888e-06, "loss": 1.0855, "step": 1007 }, { "epoch": 0.573052870949403, "grad_norm": 0.2470703125, "learning_rate": 4.26947129050597e-06, "loss": 1.1308, "step": 1008 }, { "epoch": 0.5736213757816941, "grad_norm": 0.251953125, "learning_rate": 4.263786242183059e-06, "loss": 1.1076, "step": 1009 }, { "epoch": 0.5741898806139852, "grad_norm": 0.248046875, "learning_rate": 4.258101193860148e-06, "loss": 1.0536, "step": 1010 }, { "epoch": 0.5747583854462763, "grad_norm": 0.25, "learning_rate": 4.252416145537237e-06, "loss": 1.0854, "step": 1011 }, { "epoch": 0.5753268902785673, "grad_norm": 0.265625, "learning_rate": 4.246731097214326e-06, "loss": 0.9825, "step": 1012 }, { "epoch": 0.5758953951108584, "grad_norm": 0.2421875, "learning_rate": 4.241046048891416e-06, "loss": 1.0979, "step": 1013 }, { "epoch": 0.5764638999431495, "grad_norm": 0.263671875, "learning_rate": 4.235361000568505e-06, "loss": 1.0574, "step": 1014 }, { "epoch": 0.5770324047754406, "grad_norm": 0.2373046875, "learning_rate": 4.229675952245594e-06, "loss": 1.0365, "step": 1015 }, { "epoch": 0.5776009096077317, "grad_norm": 0.244140625, "learning_rate": 4.223990903922684e-06, "loss": 1.1248, "step": 1016 }, { "epoch": 0.5781694144400228, "grad_norm": 0.25390625, "learning_rate": 4.218305855599773e-06, "loss": 1.0437, "step": 1017 }, { "epoch": 0.5787379192723138, "grad_norm": 0.23828125, "learning_rate": 4.212620807276862e-06, "loss": 1.0536, "step": 1018 }, { "epoch": 0.5793064241046049, "grad_norm": 0.248046875, "learning_rate": 4.206935758953952e-06, "loss": 1.0932, "step": 1019 }, { "epoch": 0.579874928936896, "grad_norm": 0.2333984375, "learning_rate": 4.2012507106310404e-06, "loss": 1.0583, "step": 1020 }, { "epoch": 0.5804434337691871, "grad_norm": 0.2490234375, "learning_rate": 4.19556566230813e-06, "loss": 1.1593, "step": 1021 }, { "epoch": 0.5810119386014782, "grad_norm": 0.24609375, "learning_rate": 4.18988061398522e-06, "loss": 1.0544, "step": 1022 }, { "epoch": 0.5815804434337691, "grad_norm": 0.255859375, "learning_rate": 4.184195565662308e-06, "loss": 1.0957, "step": 1023 }, { "epoch": 0.5821489482660602, "grad_norm": 0.2421875, "learning_rate": 4.1785105173393974e-06, "loss": 0.9881, "step": 1024 }, { "epoch": 0.5827174530983513, "grad_norm": 0.2314453125, "learning_rate": 4.172825469016487e-06, "loss": 1.0114, "step": 1025 }, { "epoch": 0.5832859579306424, "grad_norm": 0.25, "learning_rate": 4.167140420693576e-06, "loss": 1.1096, "step": 1026 }, { "epoch": 0.5838544627629335, "grad_norm": 0.255859375, "learning_rate": 4.161455372370665e-06, "loss": 1.0735, "step": 1027 }, { "epoch": 0.5844229675952246, "grad_norm": 0.2490234375, "learning_rate": 4.1557703240477545e-06, "loss": 1.0283, "step": 1028 }, { "epoch": 0.5849914724275156, "grad_norm": 0.2373046875, "learning_rate": 4.150085275724844e-06, "loss": 1.0951, "step": 1029 }, { "epoch": 0.5855599772598067, "grad_norm": 0.23828125, "learning_rate": 4.144400227401934e-06, "loss": 1.0738, "step": 1030 }, { "epoch": 0.5861284820920978, "grad_norm": 0.2392578125, "learning_rate": 4.138715179079022e-06, "loss": 1.0382, "step": 1031 }, { "epoch": 0.5866969869243889, "grad_norm": 0.248046875, "learning_rate": 4.1330301307561115e-06, "loss": 1.1723, "step": 1032 }, { "epoch": 0.58726549175668, "grad_norm": 0.2578125, "learning_rate": 4.1273450824332016e-06, "loss": 1.1324, "step": 1033 }, { "epoch": 0.587833996588971, "grad_norm": 0.2373046875, "learning_rate": 4.12166003411029e-06, "loss": 1.0903, "step": 1034 }, { "epoch": 0.588402501421262, "grad_norm": 0.349609375, "learning_rate": 4.115974985787379e-06, "loss": 1.0795, "step": 1035 }, { "epoch": 0.5889710062535531, "grad_norm": 0.2392578125, "learning_rate": 4.110289937464469e-06, "loss": 1.1134, "step": 1036 }, { "epoch": 0.5895395110858442, "grad_norm": 0.2490234375, "learning_rate": 4.104604889141558e-06, "loss": 1.0185, "step": 1037 }, { "epoch": 0.5901080159181353, "grad_norm": 0.328125, "learning_rate": 4.098919840818647e-06, "loss": 1.0511, "step": 1038 }, { "epoch": 0.5906765207504264, "grad_norm": 0.248046875, "learning_rate": 4.093234792495736e-06, "loss": 1.0957, "step": 1039 }, { "epoch": 0.5912450255827174, "grad_norm": 0.259765625, "learning_rate": 4.0875497441728255e-06, "loss": 1.0724, "step": 1040 }, { "epoch": 0.5918135304150085, "grad_norm": 0.25, "learning_rate": 4.081864695849916e-06, "loss": 1.074, "step": 1041 }, { "epoch": 0.5923820352472996, "grad_norm": 0.2431640625, "learning_rate": 4.076179647527004e-06, "loss": 1.094, "step": 1042 }, { "epoch": 0.5929505400795907, "grad_norm": 0.2412109375, "learning_rate": 4.070494599204093e-06, "loss": 0.9802, "step": 1043 }, { "epoch": 0.5935190449118818, "grad_norm": 0.2470703125, "learning_rate": 4.064809550881183e-06, "loss": 1.1207, "step": 1044 }, { "epoch": 0.5940875497441728, "grad_norm": 0.265625, "learning_rate": 4.059124502558272e-06, "loss": 1.0489, "step": 1045 }, { "epoch": 0.5946560545764639, "grad_norm": 0.2578125, "learning_rate": 4.053439454235361e-06, "loss": 1.0111, "step": 1046 }, { "epoch": 0.595224559408755, "grad_norm": 0.2421875, "learning_rate": 4.047754405912451e-06, "loss": 1.1201, "step": 1047 }, { "epoch": 0.595793064241046, "grad_norm": 0.2412109375, "learning_rate": 4.0420693575895395e-06, "loss": 1.0893, "step": 1048 }, { "epoch": 0.5963615690733371, "grad_norm": 0.255859375, "learning_rate": 4.036384309266629e-06, "loss": 1.1487, "step": 1049 }, { "epoch": 0.5969300739056282, "grad_norm": 0.2353515625, "learning_rate": 4.030699260943718e-06, "loss": 1.0702, "step": 1050 }, { "epoch": 0.5974985787379192, "grad_norm": 0.23828125, "learning_rate": 4.025014212620807e-06, "loss": 1.0795, "step": 1051 }, { "epoch": 0.5980670835702103, "grad_norm": 0.25, "learning_rate": 4.019329164297897e-06, "loss": 1.0923, "step": 1052 }, { "epoch": 0.5986355884025014, "grad_norm": 0.26171875, "learning_rate": 4.013644115974986e-06, "loss": 1.0313, "step": 1053 }, { "epoch": 0.5992040932347925, "grad_norm": 0.2451171875, "learning_rate": 4.007959067652075e-06, "loss": 1.0795, "step": 1054 }, { "epoch": 0.5997725980670836, "grad_norm": 0.341796875, "learning_rate": 4.002274019329165e-06, "loss": 1.0597, "step": 1055 }, { "epoch": 0.6003411028993746, "grad_norm": 0.251953125, "learning_rate": 3.9965889710062535e-06, "loss": 1.0297, "step": 1056 }, { "epoch": 0.6009096077316657, "grad_norm": 0.2353515625, "learning_rate": 3.990903922683343e-06, "loss": 1.1187, "step": 1057 }, { "epoch": 0.6014781125639568, "grad_norm": 0.25, "learning_rate": 3.985218874360433e-06, "loss": 1.0814, "step": 1058 }, { "epoch": 0.6020466173962479, "grad_norm": 0.2373046875, "learning_rate": 3.979533826037521e-06, "loss": 1.1546, "step": 1059 }, { "epoch": 0.602615122228539, "grad_norm": 0.23828125, "learning_rate": 3.9738487777146106e-06, "loss": 1.0671, "step": 1060 }, { "epoch": 0.6031836270608301, "grad_norm": 0.2451171875, "learning_rate": 3.968163729391701e-06, "loss": 1.0533, "step": 1061 }, { "epoch": 0.603752131893121, "grad_norm": 0.314453125, "learning_rate": 3.962478681068789e-06, "loss": 0.9948, "step": 1062 }, { "epoch": 0.6043206367254121, "grad_norm": 0.2412109375, "learning_rate": 3.956793632745879e-06, "loss": 1.0669, "step": 1063 }, { "epoch": 0.6048891415577032, "grad_norm": 0.240234375, "learning_rate": 3.9511085844229676e-06, "loss": 1.1072, "step": 1064 }, { "epoch": 0.6054576463899943, "grad_norm": 0.2412109375, "learning_rate": 3.945423536100057e-06, "loss": 1.0862, "step": 1065 }, { "epoch": 0.6060261512222854, "grad_norm": 0.2392578125, "learning_rate": 3.939738487777147e-06, "loss": 1.139, "step": 1066 }, { "epoch": 0.6065946560545764, "grad_norm": 0.25390625, "learning_rate": 3.934053439454235e-06, "loss": 1.1288, "step": 1067 }, { "epoch": 0.6071631608868675, "grad_norm": 0.234375, "learning_rate": 3.9283683911313246e-06, "loss": 1.0848, "step": 1068 }, { "epoch": 0.6077316657191586, "grad_norm": 0.2412109375, "learning_rate": 3.922683342808415e-06, "loss": 1.0459, "step": 1069 }, { "epoch": 0.6083001705514497, "grad_norm": 0.2392578125, "learning_rate": 3.916998294485503e-06, "loss": 1.0543, "step": 1070 }, { "epoch": 0.6088686753837408, "grad_norm": 0.244140625, "learning_rate": 3.911313246162592e-06, "loss": 0.9961, "step": 1071 }, { "epoch": 0.6094371802160319, "grad_norm": 0.248046875, "learning_rate": 3.9056281978396824e-06, "loss": 1.0547, "step": 1072 }, { "epoch": 0.6100056850483229, "grad_norm": 0.2431640625, "learning_rate": 3.899943149516771e-06, "loss": 1.0742, "step": 1073 }, { "epoch": 0.610574189880614, "grad_norm": 0.25390625, "learning_rate": 3.894258101193861e-06, "loss": 1.0297, "step": 1074 }, { "epoch": 0.611142694712905, "grad_norm": 0.244140625, "learning_rate": 3.88857305287095e-06, "loss": 1.0704, "step": 1075 }, { "epoch": 0.6117111995451961, "grad_norm": 0.248046875, "learning_rate": 3.882888004548039e-06, "loss": 1.0677, "step": 1076 }, { "epoch": 0.6122797043774872, "grad_norm": 0.2421875, "learning_rate": 3.877202956225129e-06, "loss": 1.1179, "step": 1077 }, { "epoch": 0.6128482092097783, "grad_norm": 0.2470703125, "learning_rate": 3.871517907902217e-06, "loss": 1.1176, "step": 1078 }, { "epoch": 0.6134167140420693, "grad_norm": 0.2265625, "learning_rate": 3.865832859579306e-06, "loss": 1.1356, "step": 1079 }, { "epoch": 0.6139852188743604, "grad_norm": 0.265625, "learning_rate": 3.8601478112563965e-06, "loss": 0.9787, "step": 1080 }, { "epoch": 0.6145537237066515, "grad_norm": 0.265625, "learning_rate": 3.854462762933485e-06, "loss": 1.071, "step": 1081 }, { "epoch": 0.6151222285389426, "grad_norm": 0.25390625, "learning_rate": 3.848777714610574e-06, "loss": 1.0812, "step": 1082 }, { "epoch": 0.6156907333712337, "grad_norm": 0.2373046875, "learning_rate": 3.843092666287664e-06, "loss": 1.0795, "step": 1083 }, { "epoch": 0.6162592382035247, "grad_norm": 0.2470703125, "learning_rate": 3.837407617964753e-06, "loss": 1.0684, "step": 1084 }, { "epoch": 0.6168277430358158, "grad_norm": 0.259765625, "learning_rate": 3.831722569641843e-06, "loss": 1.0963, "step": 1085 }, { "epoch": 0.6173962478681069, "grad_norm": 0.2392578125, "learning_rate": 3.826037521318932e-06, "loss": 1.0228, "step": 1086 }, { "epoch": 0.617964752700398, "grad_norm": 0.2470703125, "learning_rate": 3.82035247299602e-06, "loss": 1.0563, "step": 1087 }, { "epoch": 0.6185332575326891, "grad_norm": 0.2578125, "learning_rate": 3.81466742467311e-06, "loss": 1.0611, "step": 1088 }, { "epoch": 0.6191017623649802, "grad_norm": 0.25390625, "learning_rate": 3.8089823763501993e-06, "loss": 1.15, "step": 1089 }, { "epoch": 0.6196702671972711, "grad_norm": 0.2431640625, "learning_rate": 3.803297328027288e-06, "loss": 1.1381, "step": 1090 }, { "epoch": 0.6202387720295622, "grad_norm": 0.25, "learning_rate": 3.797612279704378e-06, "loss": 1.0245, "step": 1091 }, { "epoch": 0.6208072768618533, "grad_norm": 0.2578125, "learning_rate": 3.791927231381467e-06, "loss": 1.0672, "step": 1092 }, { "epoch": 0.6213757816941444, "grad_norm": 0.2470703125, "learning_rate": 3.786242183058556e-06, "loss": 1.0413, "step": 1093 }, { "epoch": 0.6219442865264355, "grad_norm": 0.26171875, "learning_rate": 3.7805571347356456e-06, "loss": 1.0468, "step": 1094 }, { "epoch": 0.6225127913587265, "grad_norm": 0.251953125, "learning_rate": 3.774872086412735e-06, "loss": 1.0179, "step": 1095 }, { "epoch": 0.6230812961910176, "grad_norm": 0.236328125, "learning_rate": 3.7691870380898245e-06, "loss": 1.0592, "step": 1096 }, { "epoch": 0.6236498010233087, "grad_norm": 0.234375, "learning_rate": 3.7635019897669133e-06, "loss": 1.1009, "step": 1097 }, { "epoch": 0.6242183058555998, "grad_norm": 0.2470703125, "learning_rate": 3.7578169414440026e-06, "loss": 1.0789, "step": 1098 }, { "epoch": 0.6247868106878909, "grad_norm": 0.240234375, "learning_rate": 3.7521318931210923e-06, "loss": 1.0743, "step": 1099 }, { "epoch": 0.625355315520182, "grad_norm": 0.255859375, "learning_rate": 3.746446844798181e-06, "loss": 1.043, "step": 1100 }, { "epoch": 0.625923820352473, "grad_norm": 0.24609375, "learning_rate": 3.7407617964752703e-06, "loss": 1.0813, "step": 1101 }, { "epoch": 0.626492325184764, "grad_norm": 0.431640625, "learning_rate": 3.7350767481523596e-06, "loss": 1.0837, "step": 1102 }, { "epoch": 0.6270608300170551, "grad_norm": 0.2333984375, "learning_rate": 3.729391699829449e-06, "loss": 1.0419, "step": 1103 }, { "epoch": 0.6276293348493462, "grad_norm": 0.2578125, "learning_rate": 3.7237066515065377e-06, "loss": 1.1174, "step": 1104 }, { "epoch": 0.6281978396816373, "grad_norm": 0.25, "learning_rate": 3.7180216031836274e-06, "loss": 1.1324, "step": 1105 }, { "epoch": 0.6287663445139283, "grad_norm": 0.251953125, "learning_rate": 3.7123365548607166e-06, "loss": 1.102, "step": 1106 }, { "epoch": 0.6293348493462194, "grad_norm": 0.26171875, "learning_rate": 3.7066515065378063e-06, "loss": 1.1071, "step": 1107 }, { "epoch": 0.6299033541785105, "grad_norm": 0.240234375, "learning_rate": 3.700966458214895e-06, "loss": 1.0341, "step": 1108 }, { "epoch": 0.6304718590108016, "grad_norm": 0.240234375, "learning_rate": 3.6952814098919844e-06, "loss": 1.1205, "step": 1109 }, { "epoch": 0.6310403638430927, "grad_norm": 0.25390625, "learning_rate": 3.689596361569074e-06, "loss": 1.0833, "step": 1110 }, { "epoch": 0.6316088686753838, "grad_norm": 0.2451171875, "learning_rate": 3.683911313246163e-06, "loss": 1.0993, "step": 1111 }, { "epoch": 0.6321773735076748, "grad_norm": 0.26171875, "learning_rate": 3.678226264923252e-06, "loss": 1.0986, "step": 1112 }, { "epoch": 0.6327458783399659, "grad_norm": 0.2470703125, "learning_rate": 3.6725412166003414e-06, "loss": 1.1076, "step": 1113 }, { "epoch": 0.633314383172257, "grad_norm": 0.255859375, "learning_rate": 3.6668561682774306e-06, "loss": 0.9729, "step": 1114 }, { "epoch": 0.6338828880045481, "grad_norm": 0.240234375, "learning_rate": 3.66117111995452e-06, "loss": 1.1297, "step": 1115 }, { "epoch": 0.6344513928368392, "grad_norm": 0.267578125, "learning_rate": 3.655486071631609e-06, "loss": 1.0246, "step": 1116 }, { "epoch": 0.6350198976691301, "grad_norm": 0.2490234375, "learning_rate": 3.6498010233086984e-06, "loss": 1.0804, "step": 1117 }, { "epoch": 0.6355884025014212, "grad_norm": 0.24609375, "learning_rate": 3.644115974985788e-06, "loss": 1.0818, "step": 1118 }, { "epoch": 0.6361569073337123, "grad_norm": 0.2470703125, "learning_rate": 3.638430926662877e-06, "loss": 1.1666, "step": 1119 }, { "epoch": 0.6367254121660034, "grad_norm": 0.23828125, "learning_rate": 3.632745878339966e-06, "loss": 1.1203, "step": 1120 }, { "epoch": 0.6372939169982945, "grad_norm": 0.25390625, "learning_rate": 3.627060830017056e-06, "loss": 1.064, "step": 1121 }, { "epoch": 0.6378624218305856, "grad_norm": 0.240234375, "learning_rate": 3.6213757816941447e-06, "loss": 1.1056, "step": 1122 }, { "epoch": 0.6384309266628766, "grad_norm": 0.25, "learning_rate": 3.615690733371234e-06, "loss": 1.1603, "step": 1123 }, { "epoch": 0.6389994314951677, "grad_norm": 0.2451171875, "learning_rate": 3.6100056850483236e-06, "loss": 1.0744, "step": 1124 }, { "epoch": 0.6395679363274588, "grad_norm": 0.23828125, "learning_rate": 3.6043206367254124e-06, "loss": 1.0741, "step": 1125 }, { "epoch": 0.6401364411597499, "grad_norm": 0.25390625, "learning_rate": 3.5986355884025017e-06, "loss": 1.0674, "step": 1126 }, { "epoch": 0.640704945992041, "grad_norm": 0.251953125, "learning_rate": 3.592950540079591e-06, "loss": 1.0454, "step": 1127 }, { "epoch": 0.641273450824332, "grad_norm": 0.2333984375, "learning_rate": 3.58726549175668e-06, "loss": 1.1022, "step": 1128 }, { "epoch": 0.641841955656623, "grad_norm": 0.2412109375, "learning_rate": 3.581580443433769e-06, "loss": 1.15, "step": 1129 }, { "epoch": 0.6424104604889141, "grad_norm": 0.2412109375, "learning_rate": 3.5758953951108587e-06, "loss": 1.1186, "step": 1130 }, { "epoch": 0.6429789653212052, "grad_norm": 0.3125, "learning_rate": 3.570210346787948e-06, "loss": 1.0473, "step": 1131 }, { "epoch": 0.6435474701534963, "grad_norm": 0.24609375, "learning_rate": 3.5645252984650376e-06, "loss": 1.1357, "step": 1132 }, { "epoch": 0.6441159749857874, "grad_norm": 0.2470703125, "learning_rate": 3.5588402501421264e-06, "loss": 1.0481, "step": 1133 }, { "epoch": 0.6446844798180784, "grad_norm": 0.240234375, "learning_rate": 3.5531552018192157e-06, "loss": 1.0754, "step": 1134 }, { "epoch": 0.6452529846503695, "grad_norm": 0.26171875, "learning_rate": 3.5474701534963054e-06, "loss": 1.0752, "step": 1135 }, { "epoch": 0.6458214894826606, "grad_norm": 0.2421875, "learning_rate": 3.541785105173394e-06, "loss": 1.0756, "step": 1136 }, { "epoch": 0.6463899943149517, "grad_norm": 0.2373046875, "learning_rate": 3.5361000568504834e-06, "loss": 1.0396, "step": 1137 }, { "epoch": 0.6469584991472428, "grad_norm": 0.234375, "learning_rate": 3.5304150085275727e-06, "loss": 1.1133, "step": 1138 }, { "epoch": 0.6475270039795338, "grad_norm": 0.24609375, "learning_rate": 3.524729960204662e-06, "loss": 1.0655, "step": 1139 }, { "epoch": 0.6480955088118249, "grad_norm": 0.236328125, "learning_rate": 3.519044911881751e-06, "loss": 1.0964, "step": 1140 }, { "epoch": 0.648664013644116, "grad_norm": 0.279296875, "learning_rate": 3.5133598635588405e-06, "loss": 1.0927, "step": 1141 }, { "epoch": 0.6492325184764071, "grad_norm": 0.244140625, "learning_rate": 3.5076748152359297e-06, "loss": 1.0739, "step": 1142 }, { "epoch": 0.6498010233086982, "grad_norm": 0.25, "learning_rate": 3.5019897669130194e-06, "loss": 1.0863, "step": 1143 }, { "epoch": 0.6503695281409893, "grad_norm": 0.2490234375, "learning_rate": 3.4963047185901082e-06, "loss": 1.027, "step": 1144 }, { "epoch": 0.6509380329732802, "grad_norm": 0.24609375, "learning_rate": 3.4906196702671975e-06, "loss": 1.0578, "step": 1145 }, { "epoch": 0.6515065378055713, "grad_norm": 0.244140625, "learning_rate": 3.484934621944287e-06, "loss": 1.0546, "step": 1146 }, { "epoch": 0.6520750426378624, "grad_norm": 0.2451171875, "learning_rate": 3.479249573621376e-06, "loss": 1.0907, "step": 1147 }, { "epoch": 0.6526435474701535, "grad_norm": 0.2421875, "learning_rate": 3.4735645252984652e-06, "loss": 1.1428, "step": 1148 }, { "epoch": 0.6532120523024446, "grad_norm": 0.2490234375, "learning_rate": 3.467879476975555e-06, "loss": 0.9906, "step": 1149 }, { "epoch": 0.6537805571347356, "grad_norm": 0.255859375, "learning_rate": 3.4621944286526437e-06, "loss": 1.0663, "step": 1150 }, { "epoch": 0.6543490619670267, "grad_norm": 0.244140625, "learning_rate": 3.456509380329733e-06, "loss": 1.1098, "step": 1151 }, { "epoch": 0.6549175667993178, "grad_norm": 0.236328125, "learning_rate": 3.4508243320068222e-06, "loss": 1.0292, "step": 1152 }, { "epoch": 0.6554860716316089, "grad_norm": 0.24609375, "learning_rate": 3.4451392836839115e-06, "loss": 1.0538, "step": 1153 }, { "epoch": 0.6560545764639, "grad_norm": 0.244140625, "learning_rate": 3.439454235361001e-06, "loss": 1.0592, "step": 1154 }, { "epoch": 0.6566230812961911, "grad_norm": 0.2451171875, "learning_rate": 3.43376918703809e-06, "loss": 1.0942, "step": 1155 }, { "epoch": 0.657191586128482, "grad_norm": 0.2412109375, "learning_rate": 3.4280841387151793e-06, "loss": 1.0031, "step": 1156 }, { "epoch": 0.6577600909607731, "grad_norm": 0.2578125, "learning_rate": 3.422399090392269e-06, "loss": 1.0632, "step": 1157 }, { "epoch": 0.6583285957930642, "grad_norm": 0.248046875, "learning_rate": 3.4167140420693578e-06, "loss": 1.0874, "step": 1158 }, { "epoch": 0.6588971006253553, "grad_norm": 0.25, "learning_rate": 3.411028993746447e-06, "loss": 1.0432, "step": 1159 }, { "epoch": 0.6594656054576464, "grad_norm": 0.2392578125, "learning_rate": 3.4053439454235367e-06, "loss": 1.0041, "step": 1160 }, { "epoch": 0.6600341102899374, "grad_norm": 0.2431640625, "learning_rate": 3.3996588971006255e-06, "loss": 1.0876, "step": 1161 }, { "epoch": 0.6606026151222285, "grad_norm": 0.251953125, "learning_rate": 3.3939738487777148e-06, "loss": 1.0232, "step": 1162 }, { "epoch": 0.6611711199545196, "grad_norm": 0.287109375, "learning_rate": 3.3882888004548044e-06, "loss": 1.1047, "step": 1163 }, { "epoch": 0.6617396247868107, "grad_norm": 0.2431640625, "learning_rate": 3.3826037521318933e-06, "loss": 1.061, "step": 1164 }, { "epoch": 0.6623081296191018, "grad_norm": 0.265625, "learning_rate": 3.376918703808983e-06, "loss": 1.046, "step": 1165 }, { "epoch": 0.6628766344513929, "grad_norm": 0.2490234375, "learning_rate": 3.3712336554860718e-06, "loss": 1.0766, "step": 1166 }, { "epoch": 0.6634451392836839, "grad_norm": 0.2431640625, "learning_rate": 3.365548607163161e-06, "loss": 1.079, "step": 1167 }, { "epoch": 0.664013644115975, "grad_norm": 0.236328125, "learning_rate": 3.3598635588402507e-06, "loss": 1.0139, "step": 1168 }, { "epoch": 0.6645821489482661, "grad_norm": 0.248046875, "learning_rate": 3.3541785105173395e-06, "loss": 1.0987, "step": 1169 }, { "epoch": 0.6651506537805572, "grad_norm": 0.2578125, "learning_rate": 3.348493462194429e-06, "loss": 1.0642, "step": 1170 }, { "epoch": 0.6657191586128482, "grad_norm": 0.24609375, "learning_rate": 3.3428084138715185e-06, "loss": 1.1116, "step": 1171 }, { "epoch": 0.6662876634451392, "grad_norm": 0.255859375, "learning_rate": 3.3371233655486073e-06, "loss": 1.0733, "step": 1172 }, { "epoch": 0.6668561682774303, "grad_norm": 0.248046875, "learning_rate": 3.3314383172256965e-06, "loss": 1.0799, "step": 1173 }, { "epoch": 0.6674246731097214, "grad_norm": 0.248046875, "learning_rate": 3.3257532689027862e-06, "loss": 1.0987, "step": 1174 }, { "epoch": 0.6679931779420125, "grad_norm": 0.2421875, "learning_rate": 3.320068220579875e-06, "loss": 0.9762, "step": 1175 }, { "epoch": 0.6685616827743036, "grad_norm": 0.236328125, "learning_rate": 3.3143831722569647e-06, "loss": 1.1378, "step": 1176 }, { "epoch": 0.6691301876065947, "grad_norm": 0.2373046875, "learning_rate": 3.3086981239340536e-06, "loss": 1.0735, "step": 1177 }, { "epoch": 0.6696986924388857, "grad_norm": 0.376953125, "learning_rate": 3.303013075611143e-06, "loss": 1.0922, "step": 1178 }, { "epoch": 0.6702671972711768, "grad_norm": 0.255859375, "learning_rate": 3.2973280272882325e-06, "loss": 1.0883, "step": 1179 }, { "epoch": 0.6708357021034679, "grad_norm": 0.23828125, "learning_rate": 3.2916429789653213e-06, "loss": 1.072, "step": 1180 }, { "epoch": 0.671404206935759, "grad_norm": 0.25390625, "learning_rate": 3.2859579306424106e-06, "loss": 1.0508, "step": 1181 }, { "epoch": 0.6719727117680501, "grad_norm": 0.248046875, "learning_rate": 3.2802728823195002e-06, "loss": 1.0678, "step": 1182 }, { "epoch": 0.672541216600341, "grad_norm": 0.306640625, "learning_rate": 3.274587833996589e-06, "loss": 1.0804, "step": 1183 }, { "epoch": 0.6731097214326321, "grad_norm": 0.23828125, "learning_rate": 3.2689027856736783e-06, "loss": 1.1294, "step": 1184 }, { "epoch": 0.6736782262649232, "grad_norm": 0.236328125, "learning_rate": 3.263217737350768e-06, "loss": 1.0708, "step": 1185 }, { "epoch": 0.6742467310972143, "grad_norm": 0.25, "learning_rate": 3.257532689027857e-06, "loss": 1.0569, "step": 1186 }, { "epoch": 0.6748152359295054, "grad_norm": 0.2490234375, "learning_rate": 3.2518476407049465e-06, "loss": 1.1119, "step": 1187 }, { "epoch": 0.6753837407617965, "grad_norm": 0.2490234375, "learning_rate": 3.2461625923820358e-06, "loss": 1.0132, "step": 1188 }, { "epoch": 0.6759522455940875, "grad_norm": 0.310546875, "learning_rate": 3.2404775440591246e-06, "loss": 1.0547, "step": 1189 }, { "epoch": 0.6765207504263786, "grad_norm": 0.248046875, "learning_rate": 3.2347924957362143e-06, "loss": 1.0136, "step": 1190 }, { "epoch": 0.6770892552586697, "grad_norm": 0.244140625, "learning_rate": 3.229107447413303e-06, "loss": 1.077, "step": 1191 }, { "epoch": 0.6776577600909608, "grad_norm": 0.24609375, "learning_rate": 3.2234223990903924e-06, "loss": 1.0727, "step": 1192 }, { "epoch": 0.6782262649232519, "grad_norm": 0.2431640625, "learning_rate": 3.217737350767482e-06, "loss": 1.047, "step": 1193 }, { "epoch": 0.6787947697555429, "grad_norm": 0.251953125, "learning_rate": 3.212052302444571e-06, "loss": 1.1038, "step": 1194 }, { "epoch": 0.679363274587834, "grad_norm": 0.2431640625, "learning_rate": 3.20636725412166e-06, "loss": 1.0422, "step": 1195 }, { "epoch": 0.6799317794201251, "grad_norm": 0.2421875, "learning_rate": 3.20068220579875e-06, "loss": 1.0519, "step": 1196 }, { "epoch": 0.6805002842524162, "grad_norm": 0.2412109375, "learning_rate": 3.1949971574758386e-06, "loss": 1.0441, "step": 1197 }, { "epoch": 0.6810687890847072, "grad_norm": 0.248046875, "learning_rate": 3.1893121091529283e-06, "loss": 1.0382, "step": 1198 }, { "epoch": 0.6816372939169983, "grad_norm": 0.2490234375, "learning_rate": 3.1836270608300175e-06, "loss": 1.0843, "step": 1199 }, { "epoch": 0.6822057987492893, "grad_norm": 0.25, "learning_rate": 3.1779420125071064e-06, "loss": 1.0649, "step": 1200 }, { "epoch": 0.6827743035815804, "grad_norm": 0.3046875, "learning_rate": 3.172256964184196e-06, "loss": 1.1431, "step": 1201 }, { "epoch": 0.6833428084138715, "grad_norm": 0.2470703125, "learning_rate": 3.1665719158612853e-06, "loss": 1.1554, "step": 1202 }, { "epoch": 0.6839113132461626, "grad_norm": 0.2431640625, "learning_rate": 3.160886867538374e-06, "loss": 1.0166, "step": 1203 }, { "epoch": 0.6844798180784537, "grad_norm": 0.2392578125, "learning_rate": 3.155201819215464e-06, "loss": 1.0695, "step": 1204 }, { "epoch": 0.6850483229107447, "grad_norm": 0.255859375, "learning_rate": 3.1495167708925526e-06, "loss": 1.0552, "step": 1205 }, { "epoch": 0.6856168277430358, "grad_norm": 0.244140625, "learning_rate": 3.143831722569642e-06, "loss": 1.1499, "step": 1206 }, { "epoch": 0.6861853325753269, "grad_norm": 0.25390625, "learning_rate": 3.1381466742467316e-06, "loss": 1.0669, "step": 1207 }, { "epoch": 0.686753837407618, "grad_norm": 0.275390625, "learning_rate": 3.1324616259238204e-06, "loss": 1.1408, "step": 1208 }, { "epoch": 0.6873223422399091, "grad_norm": 0.25390625, "learning_rate": 3.12677657760091e-06, "loss": 1.0954, "step": 1209 }, { "epoch": 0.6878908470722002, "grad_norm": 0.2373046875, "learning_rate": 3.1210915292779993e-06, "loss": 1.0373, "step": 1210 }, { "epoch": 0.6884593519044911, "grad_norm": 0.23828125, "learning_rate": 3.115406480955088e-06, "loss": 1.0553, "step": 1211 }, { "epoch": 0.6890278567367822, "grad_norm": 0.2490234375, "learning_rate": 3.109721432632178e-06, "loss": 1.0856, "step": 1212 }, { "epoch": 0.6895963615690733, "grad_norm": 0.251953125, "learning_rate": 3.104036384309267e-06, "loss": 1.0697, "step": 1213 }, { "epoch": 0.6901648664013644, "grad_norm": 0.251953125, "learning_rate": 3.098351335986356e-06, "loss": 1.1176, "step": 1214 }, { "epoch": 0.6907333712336555, "grad_norm": 0.2578125, "learning_rate": 3.0926662876634456e-06, "loss": 1.0131, "step": 1215 }, { "epoch": 0.6913018760659465, "grad_norm": 0.326171875, "learning_rate": 3.0869812393405344e-06, "loss": 1.0174, "step": 1216 }, { "epoch": 0.6918703808982376, "grad_norm": 0.25390625, "learning_rate": 3.0812961910176237e-06, "loss": 1.0534, "step": 1217 }, { "epoch": 0.6924388857305287, "grad_norm": 0.2490234375, "learning_rate": 3.0756111426947134e-06, "loss": 1.0712, "step": 1218 }, { "epoch": 0.6930073905628198, "grad_norm": 0.251953125, "learning_rate": 3.069926094371802e-06, "loss": 1.0209, "step": 1219 }, { "epoch": 0.6935758953951109, "grad_norm": 0.2451171875, "learning_rate": 3.064241046048892e-06, "loss": 1.0528, "step": 1220 }, { "epoch": 0.694144400227402, "grad_norm": 0.2392578125, "learning_rate": 3.058555997725981e-06, "loss": 1.0407, "step": 1221 }, { "epoch": 0.694712905059693, "grad_norm": 0.333984375, "learning_rate": 3.05287094940307e-06, "loss": 1.1313, "step": 1222 }, { "epoch": 0.6952814098919841, "grad_norm": 0.28125, "learning_rate": 3.0471859010801596e-06, "loss": 1.0944, "step": 1223 }, { "epoch": 0.6958499147242752, "grad_norm": 0.28515625, "learning_rate": 3.041500852757249e-06, "loss": 1.1464, "step": 1224 }, { "epoch": 0.6964184195565662, "grad_norm": 0.265625, "learning_rate": 3.0358158044343377e-06, "loss": 1.0857, "step": 1225 }, { "epoch": 0.6969869243888573, "grad_norm": 0.248046875, "learning_rate": 3.0301307561114274e-06, "loss": 1.0346, "step": 1226 }, { "epoch": 0.6975554292211483, "grad_norm": 0.255859375, "learning_rate": 3.0244457077885166e-06, "loss": 0.9902, "step": 1227 }, { "epoch": 0.6981239340534394, "grad_norm": 0.255859375, "learning_rate": 3.0187606594656055e-06, "loss": 1.1096, "step": 1228 }, { "epoch": 0.6986924388857305, "grad_norm": 0.251953125, "learning_rate": 3.013075611142695e-06, "loss": 1.1221, "step": 1229 }, { "epoch": 0.6992609437180216, "grad_norm": 0.23046875, "learning_rate": 3.007390562819784e-06, "loss": 1.1267, "step": 1230 }, { "epoch": 0.6998294485503127, "grad_norm": 0.2451171875, "learning_rate": 3.0017055144968732e-06, "loss": 1.1816, "step": 1231 }, { "epoch": 0.7003979533826038, "grad_norm": 0.2490234375, "learning_rate": 2.996020466173963e-06, "loss": 1.0227, "step": 1232 }, { "epoch": 0.7009664582148948, "grad_norm": 0.25390625, "learning_rate": 2.9903354178510517e-06, "loss": 1.0878, "step": 1233 }, { "epoch": 0.7015349630471859, "grad_norm": 0.2470703125, "learning_rate": 2.9846503695281414e-06, "loss": 1.0054, "step": 1234 }, { "epoch": 0.702103467879477, "grad_norm": 0.240234375, "learning_rate": 2.9789653212052307e-06, "loss": 1.0346, "step": 1235 }, { "epoch": 0.7026719727117681, "grad_norm": 0.24609375, "learning_rate": 2.9732802728823195e-06, "loss": 1.0655, "step": 1236 }, { "epoch": 0.7032404775440592, "grad_norm": 0.259765625, "learning_rate": 2.967595224559409e-06, "loss": 1.0324, "step": 1237 }, { "epoch": 0.7038089823763501, "grad_norm": 0.28125, "learning_rate": 2.9619101762364984e-06, "loss": 1.0658, "step": 1238 }, { "epoch": 0.7043774872086412, "grad_norm": 0.27734375, "learning_rate": 2.9562251279135872e-06, "loss": 1.0922, "step": 1239 }, { "epoch": 0.7049459920409323, "grad_norm": 0.255859375, "learning_rate": 2.950540079590677e-06, "loss": 1.0464, "step": 1240 }, { "epoch": 0.7055144968732234, "grad_norm": 0.24609375, "learning_rate": 2.9448550312677657e-06, "loss": 1.0621, "step": 1241 }, { "epoch": 0.7060830017055145, "grad_norm": 0.2412109375, "learning_rate": 2.939169982944855e-06, "loss": 1.0513, "step": 1242 }, { "epoch": 0.7066515065378056, "grad_norm": 0.2373046875, "learning_rate": 2.9334849346219447e-06, "loss": 1.0723, "step": 1243 }, { "epoch": 0.7072200113700966, "grad_norm": 0.259765625, "learning_rate": 2.9277998862990335e-06, "loss": 1.0745, "step": 1244 }, { "epoch": 0.7077885162023877, "grad_norm": 0.244140625, "learning_rate": 2.922114837976123e-06, "loss": 1.0702, "step": 1245 }, { "epoch": 0.7083570210346788, "grad_norm": 0.2470703125, "learning_rate": 2.9164297896532124e-06, "loss": 1.1323, "step": 1246 }, { "epoch": 0.7089255258669699, "grad_norm": 0.2373046875, "learning_rate": 2.9107447413303013e-06, "loss": 1.1398, "step": 1247 }, { "epoch": 0.709494030699261, "grad_norm": 0.2392578125, "learning_rate": 2.905059693007391e-06, "loss": 1.0253, "step": 1248 }, { "epoch": 0.710062535531552, "grad_norm": 0.2412109375, "learning_rate": 2.89937464468448e-06, "loss": 1.1136, "step": 1249 }, { "epoch": 0.7106310403638431, "grad_norm": 0.2421875, "learning_rate": 2.893689596361569e-06, "loss": 1.1541, "step": 1250 }, { "epoch": 0.7111995451961342, "grad_norm": 0.25, "learning_rate": 2.8880045480386587e-06, "loss": 1.1109, "step": 1251 }, { "epoch": 0.7117680500284252, "grad_norm": 0.2490234375, "learning_rate": 2.882319499715748e-06, "loss": 1.0202, "step": 1252 }, { "epoch": 0.7123365548607163, "grad_norm": 0.2490234375, "learning_rate": 2.8766344513928368e-06, "loss": 1.0819, "step": 1253 }, { "epoch": 0.7129050596930074, "grad_norm": 0.291015625, "learning_rate": 2.8709494030699265e-06, "loss": 1.0025, "step": 1254 }, { "epoch": 0.7134735645252984, "grad_norm": 0.25, "learning_rate": 2.8652643547470153e-06, "loss": 0.9829, "step": 1255 }, { "epoch": 0.7140420693575895, "grad_norm": 0.251953125, "learning_rate": 2.859579306424105e-06, "loss": 1.0473, "step": 1256 }, { "epoch": 0.7146105741898806, "grad_norm": 0.248046875, "learning_rate": 2.8538942581011942e-06, "loss": 1.0845, "step": 1257 }, { "epoch": 0.7151790790221717, "grad_norm": 0.267578125, "learning_rate": 2.848209209778283e-06, "loss": 1.158, "step": 1258 }, { "epoch": 0.7157475838544628, "grad_norm": 0.263671875, "learning_rate": 2.8425241614553727e-06, "loss": 1.133, "step": 1259 }, { "epoch": 0.7163160886867538, "grad_norm": 0.2421875, "learning_rate": 2.836839113132462e-06, "loss": 1.1153, "step": 1260 }, { "epoch": 0.7168845935190449, "grad_norm": 0.23828125, "learning_rate": 2.831154064809551e-06, "loss": 1.0465, "step": 1261 }, { "epoch": 0.717453098351336, "grad_norm": 0.318359375, "learning_rate": 2.8254690164866405e-06, "loss": 1.0826, "step": 1262 }, { "epoch": 0.7180216031836271, "grad_norm": 0.248046875, "learning_rate": 2.8197839681637297e-06, "loss": 1.0907, "step": 1263 }, { "epoch": 0.7185901080159182, "grad_norm": 0.2421875, "learning_rate": 2.8140989198408186e-06, "loss": 1.0584, "step": 1264 }, { "epoch": 0.7191586128482093, "grad_norm": 0.2353515625, "learning_rate": 2.8084138715179082e-06, "loss": 1.0774, "step": 1265 }, { "epoch": 0.7197271176805002, "grad_norm": 0.251953125, "learning_rate": 2.8027288231949975e-06, "loss": 0.9978, "step": 1266 }, { "epoch": 0.7202956225127913, "grad_norm": 0.2431640625, "learning_rate": 2.7970437748720867e-06, "loss": 1.0776, "step": 1267 }, { "epoch": 0.7208641273450824, "grad_norm": 0.2578125, "learning_rate": 2.791358726549176e-06, "loss": 1.1309, "step": 1268 }, { "epoch": 0.7214326321773735, "grad_norm": 0.251953125, "learning_rate": 2.785673678226265e-06, "loss": 1.1064, "step": 1269 }, { "epoch": 0.7220011370096646, "grad_norm": 0.251953125, "learning_rate": 2.7799886299033545e-06, "loss": 1.0966, "step": 1270 }, { "epoch": 0.7225696418419557, "grad_norm": 0.2392578125, "learning_rate": 2.7743035815804438e-06, "loss": 1.0853, "step": 1271 }, { "epoch": 0.7231381466742467, "grad_norm": 0.2470703125, "learning_rate": 2.7686185332575326e-06, "loss": 1.1181, "step": 1272 }, { "epoch": 0.7237066515065378, "grad_norm": 0.298828125, "learning_rate": 2.7629334849346223e-06, "loss": 1.0796, "step": 1273 }, { "epoch": 0.7242751563388289, "grad_norm": 0.248046875, "learning_rate": 2.7572484366117115e-06, "loss": 1.0349, "step": 1274 }, { "epoch": 0.72484366117112, "grad_norm": 0.251953125, "learning_rate": 2.7515633882888003e-06, "loss": 1.0613, "step": 1275 }, { "epoch": 0.7254121660034111, "grad_norm": 0.248046875, "learning_rate": 2.74587833996589e-06, "loss": 1.0678, "step": 1276 }, { "epoch": 0.7259806708357021, "grad_norm": 0.263671875, "learning_rate": 2.7401932916429793e-06, "loss": 1.0725, "step": 1277 }, { "epoch": 0.7265491756679932, "grad_norm": 0.30078125, "learning_rate": 2.7345082433200685e-06, "loss": 1.1199, "step": 1278 }, { "epoch": 0.7271176805002842, "grad_norm": 0.240234375, "learning_rate": 2.7288231949971578e-06, "loss": 1.1007, "step": 1279 }, { "epoch": 0.7276861853325753, "grad_norm": 0.2490234375, "learning_rate": 2.7231381466742466e-06, "loss": 1.0257, "step": 1280 }, { "epoch": 0.7282546901648664, "grad_norm": 0.31640625, "learning_rate": 2.7174530983513363e-06, "loss": 1.1159, "step": 1281 }, { "epoch": 0.7288231949971575, "grad_norm": 0.3125, "learning_rate": 2.7117680500284255e-06, "loss": 1.0114, "step": 1282 }, { "epoch": 0.7293916998294485, "grad_norm": 0.25, "learning_rate": 2.7060830017055144e-06, "loss": 1.0717, "step": 1283 }, { "epoch": 0.7299602046617396, "grad_norm": 0.25390625, "learning_rate": 2.700397953382604e-06, "loss": 1.0801, "step": 1284 }, { "epoch": 0.7305287094940307, "grad_norm": 0.2490234375, "learning_rate": 2.6947129050596933e-06, "loss": 1.166, "step": 1285 }, { "epoch": 0.7310972143263218, "grad_norm": 0.27734375, "learning_rate": 2.689027856736782e-06, "loss": 1.0952, "step": 1286 }, { "epoch": 0.7316657191586129, "grad_norm": 0.255859375, "learning_rate": 2.683342808413872e-06, "loss": 1.1625, "step": 1287 }, { "epoch": 0.7322342239909039, "grad_norm": 0.240234375, "learning_rate": 2.677657760090961e-06, "loss": 1.0279, "step": 1288 }, { "epoch": 0.732802728823195, "grad_norm": 0.2412109375, "learning_rate": 2.6719727117680503e-06, "loss": 1.1334, "step": 1289 }, { "epoch": 0.7333712336554861, "grad_norm": 0.23828125, "learning_rate": 2.6662876634451396e-06, "loss": 1.0823, "step": 1290 }, { "epoch": 0.7339397384877772, "grad_norm": 0.25, "learning_rate": 2.660602615122229e-06, "loss": 1.0841, "step": 1291 }, { "epoch": 0.7345082433200683, "grad_norm": 0.23828125, "learning_rate": 2.654917566799318e-06, "loss": 1.053, "step": 1292 }, { "epoch": 0.7350767481523593, "grad_norm": 0.251953125, "learning_rate": 2.6492325184764073e-06, "loss": 1.0486, "step": 1293 }, { "epoch": 0.7356452529846503, "grad_norm": 0.23828125, "learning_rate": 2.643547470153496e-06, "loss": 1.1235, "step": 1294 }, { "epoch": 0.7362137578169414, "grad_norm": 0.2412109375, "learning_rate": 2.637862421830586e-06, "loss": 1.1248, "step": 1295 }, { "epoch": 0.7367822626492325, "grad_norm": 0.267578125, "learning_rate": 2.632177373507675e-06, "loss": 1.0144, "step": 1296 }, { "epoch": 0.7373507674815236, "grad_norm": 0.2431640625, "learning_rate": 2.626492325184764e-06, "loss": 1.1128, "step": 1297 }, { "epoch": 0.7379192723138147, "grad_norm": 0.251953125, "learning_rate": 2.6208072768618536e-06, "loss": 1.0676, "step": 1298 }, { "epoch": 0.7384877771461057, "grad_norm": 0.259765625, "learning_rate": 2.615122228538943e-06, "loss": 1.0592, "step": 1299 }, { "epoch": 0.7390562819783968, "grad_norm": 0.2421875, "learning_rate": 2.6094371802160325e-06, "loss": 1.055, "step": 1300 }, { "epoch": 0.7396247868106879, "grad_norm": 0.255859375, "learning_rate": 2.6037521318931213e-06, "loss": 1.147, "step": 1301 }, { "epoch": 0.740193291642979, "grad_norm": 0.26171875, "learning_rate": 2.5980670835702106e-06, "loss": 1.0178, "step": 1302 }, { "epoch": 0.7407617964752701, "grad_norm": 0.248046875, "learning_rate": 2.5923820352473e-06, "loss": 1.0559, "step": 1303 }, { "epoch": 0.7413303013075612, "grad_norm": 0.2412109375, "learning_rate": 2.586696986924389e-06, "loss": 1.079, "step": 1304 }, { "epoch": 0.7418988061398522, "grad_norm": 0.33984375, "learning_rate": 2.5810119386014784e-06, "loss": 1.1143, "step": 1305 }, { "epoch": 0.7424673109721432, "grad_norm": 0.251953125, "learning_rate": 2.5753268902785676e-06, "loss": 1.0231, "step": 1306 }, { "epoch": 0.7430358158044343, "grad_norm": 0.25390625, "learning_rate": 2.569641841955657e-06, "loss": 1.0559, "step": 1307 }, { "epoch": 0.7436043206367254, "grad_norm": 0.271484375, "learning_rate": 2.5639567936327457e-06, "loss": 1.109, "step": 1308 }, { "epoch": 0.7441728254690165, "grad_norm": 0.2392578125, "learning_rate": 2.5582717453098354e-06, "loss": 1.1557, "step": 1309 }, { "epoch": 0.7447413303013075, "grad_norm": 0.244140625, "learning_rate": 2.5525866969869246e-06, "loss": 1.0812, "step": 1310 }, { "epoch": 0.7453098351335986, "grad_norm": 0.349609375, "learning_rate": 2.5469016486640143e-06, "loss": 1.1149, "step": 1311 }, { "epoch": 0.7458783399658897, "grad_norm": 0.3125, "learning_rate": 2.541216600341103e-06, "loss": 0.999, "step": 1312 }, { "epoch": 0.7464468447981808, "grad_norm": 0.255859375, "learning_rate": 2.5355315520181924e-06, "loss": 1.0871, "step": 1313 }, { "epoch": 0.7470153496304719, "grad_norm": 0.2421875, "learning_rate": 2.529846503695282e-06, "loss": 1.091, "step": 1314 }, { "epoch": 0.747583854462763, "grad_norm": 0.25, "learning_rate": 2.524161455372371e-06, "loss": 1.078, "step": 1315 }, { "epoch": 0.748152359295054, "grad_norm": 0.26171875, "learning_rate": 2.51847640704946e-06, "loss": 1.0212, "step": 1316 }, { "epoch": 0.7487208641273451, "grad_norm": 0.248046875, "learning_rate": 2.5127913587265494e-06, "loss": 1.0834, "step": 1317 }, { "epoch": 0.7492893689596362, "grad_norm": 0.24609375, "learning_rate": 2.5071063104036386e-06, "loss": 1.0613, "step": 1318 }, { "epoch": 0.7498578737919273, "grad_norm": 0.26171875, "learning_rate": 2.5014212620807275e-06, "loss": 1.0922, "step": 1319 }, { "epoch": 0.7504263786242183, "grad_norm": 0.2421875, "learning_rate": 2.495736213757817e-06, "loss": 1.0308, "step": 1320 }, { "epoch": 0.7509948834565093, "grad_norm": 0.255859375, "learning_rate": 2.4900511654349064e-06, "loss": 1.0225, "step": 1321 }, { "epoch": 0.7515633882888004, "grad_norm": 0.234375, "learning_rate": 2.4843661171119956e-06, "loss": 1.0624, "step": 1322 }, { "epoch": 0.7521318931210915, "grad_norm": 0.2431640625, "learning_rate": 2.478681068789085e-06, "loss": 1.116, "step": 1323 }, { "epoch": 0.7527003979533826, "grad_norm": 0.244140625, "learning_rate": 2.472996020466174e-06, "loss": 1.0858, "step": 1324 }, { "epoch": 0.7532689027856737, "grad_norm": 0.275390625, "learning_rate": 2.4673109721432634e-06, "loss": 1.1082, "step": 1325 }, { "epoch": 0.7538374076179648, "grad_norm": 0.287109375, "learning_rate": 2.4616259238203527e-06, "loss": 1.1172, "step": 1326 }, { "epoch": 0.7544059124502558, "grad_norm": 0.2578125, "learning_rate": 2.455940875497442e-06, "loss": 1.0447, "step": 1327 }, { "epoch": 0.7549744172825469, "grad_norm": 0.267578125, "learning_rate": 2.450255827174531e-06, "loss": 1.0899, "step": 1328 }, { "epoch": 0.755542922114838, "grad_norm": 0.25, "learning_rate": 2.4445707788516204e-06, "loss": 1.0471, "step": 1329 }, { "epoch": 0.7561114269471291, "grad_norm": 0.236328125, "learning_rate": 2.4388857305287097e-06, "loss": 1.0292, "step": 1330 }, { "epoch": 0.7566799317794202, "grad_norm": 0.251953125, "learning_rate": 2.433200682205799e-06, "loss": 1.0241, "step": 1331 }, { "epoch": 0.7572484366117112, "grad_norm": 0.25390625, "learning_rate": 2.427515633882888e-06, "loss": 0.9876, "step": 1332 }, { "epoch": 0.7578169414440022, "grad_norm": 0.2490234375, "learning_rate": 2.4218305855599774e-06, "loss": 1.0951, "step": 1333 }, { "epoch": 0.7583854462762933, "grad_norm": 0.296875, "learning_rate": 2.4161455372370667e-06, "loss": 1.1191, "step": 1334 }, { "epoch": 0.7589539511085844, "grad_norm": 0.2451171875, "learning_rate": 2.410460488914156e-06, "loss": 1.0525, "step": 1335 }, { "epoch": 0.7595224559408755, "grad_norm": 0.2421875, "learning_rate": 2.404775440591245e-06, "loss": 1.0256, "step": 1336 }, { "epoch": 0.7600909607731666, "grad_norm": 0.2490234375, "learning_rate": 2.3990903922683344e-06, "loss": 1.1162, "step": 1337 }, { "epoch": 0.7606594656054576, "grad_norm": 0.279296875, "learning_rate": 2.3934053439454237e-06, "loss": 1.0458, "step": 1338 }, { "epoch": 0.7612279704377487, "grad_norm": 0.251953125, "learning_rate": 2.387720295622513e-06, "loss": 1.0769, "step": 1339 }, { "epoch": 0.7617964752700398, "grad_norm": 0.25, "learning_rate": 2.382035247299602e-06, "loss": 1.0329, "step": 1340 }, { "epoch": 0.7623649801023309, "grad_norm": 0.2392578125, "learning_rate": 2.3763501989766915e-06, "loss": 1.0976, "step": 1341 }, { "epoch": 0.762933484934622, "grad_norm": 0.244140625, "learning_rate": 2.3706651506537807e-06, "loss": 1.0853, "step": 1342 }, { "epoch": 0.763501989766913, "grad_norm": 0.271484375, "learning_rate": 2.36498010233087e-06, "loss": 1.0876, "step": 1343 }, { "epoch": 0.7640704945992041, "grad_norm": 0.2412109375, "learning_rate": 2.3592950540079592e-06, "loss": 1.1004, "step": 1344 }, { "epoch": 0.7646389994314952, "grad_norm": 0.2470703125, "learning_rate": 2.3536100056850485e-06, "loss": 1.0274, "step": 1345 }, { "epoch": 0.7652075042637863, "grad_norm": 0.248046875, "learning_rate": 2.3479249573621377e-06, "loss": 1.0605, "step": 1346 }, { "epoch": 0.7657760090960773, "grad_norm": 0.2470703125, "learning_rate": 2.342239909039227e-06, "loss": 1.0591, "step": 1347 }, { "epoch": 0.7663445139283684, "grad_norm": 0.263671875, "learning_rate": 2.3365548607163162e-06, "loss": 1.0611, "step": 1348 }, { "epoch": 0.7669130187606594, "grad_norm": 0.25390625, "learning_rate": 2.3308698123934055e-06, "loss": 1.1235, "step": 1349 }, { "epoch": 0.7674815235929505, "grad_norm": 0.2431640625, "learning_rate": 2.3251847640704947e-06, "loss": 1.0416, "step": 1350 }, { "epoch": 0.7680500284252416, "grad_norm": 0.2333984375, "learning_rate": 2.319499715747584e-06, "loss": 1.0921, "step": 1351 }, { "epoch": 0.7686185332575327, "grad_norm": 0.267578125, "learning_rate": 2.3138146674246732e-06, "loss": 1.1456, "step": 1352 }, { "epoch": 0.7691870380898238, "grad_norm": 0.25390625, "learning_rate": 2.3081296191017625e-06, "loss": 1.108, "step": 1353 }, { "epoch": 0.7697555429221148, "grad_norm": 0.2421875, "learning_rate": 2.3024445707788517e-06, "loss": 1.0853, "step": 1354 }, { "epoch": 0.7703240477544059, "grad_norm": 0.25, "learning_rate": 2.296759522455941e-06, "loss": 1.1412, "step": 1355 }, { "epoch": 0.770892552586697, "grad_norm": 0.25, "learning_rate": 2.2910744741330302e-06, "loss": 1.0555, "step": 1356 }, { "epoch": 0.7714610574189881, "grad_norm": 0.25, "learning_rate": 2.2853894258101195e-06, "loss": 1.1109, "step": 1357 }, { "epoch": 0.7720295622512792, "grad_norm": 0.2373046875, "learning_rate": 2.2797043774872088e-06, "loss": 1.0528, "step": 1358 }, { "epoch": 0.7725980670835703, "grad_norm": 0.26953125, "learning_rate": 2.274019329164298e-06, "loss": 1.1055, "step": 1359 }, { "epoch": 0.7731665719158612, "grad_norm": 0.24609375, "learning_rate": 2.2683342808413873e-06, "loss": 1.127, "step": 1360 }, { "epoch": 0.7737350767481523, "grad_norm": 0.25390625, "learning_rate": 2.2626492325184765e-06, "loss": 1.0089, "step": 1361 }, { "epoch": 0.7743035815804434, "grad_norm": 0.251953125, "learning_rate": 2.2569641841955658e-06, "loss": 1.0826, "step": 1362 }, { "epoch": 0.7748720864127345, "grad_norm": 0.248046875, "learning_rate": 2.251279135872655e-06, "loss": 1.0177, "step": 1363 }, { "epoch": 0.7754405912450256, "grad_norm": 0.244140625, "learning_rate": 2.2455940875497443e-06, "loss": 1.0023, "step": 1364 }, { "epoch": 0.7760090960773166, "grad_norm": 0.2392578125, "learning_rate": 2.2399090392268335e-06, "loss": 1.0682, "step": 1365 }, { "epoch": 0.7765776009096077, "grad_norm": 0.25390625, "learning_rate": 2.2342239909039228e-06, "loss": 1.088, "step": 1366 }, { "epoch": 0.7771461057418988, "grad_norm": 0.267578125, "learning_rate": 2.228538942581012e-06, "loss": 1.0783, "step": 1367 }, { "epoch": 0.7777146105741899, "grad_norm": 0.26953125, "learning_rate": 2.2228538942581013e-06, "loss": 1.0154, "step": 1368 }, { "epoch": 0.778283115406481, "grad_norm": 0.279296875, "learning_rate": 2.2171688459351905e-06, "loss": 1.0743, "step": 1369 }, { "epoch": 0.7788516202387721, "grad_norm": 0.2578125, "learning_rate": 2.2114837976122798e-06, "loss": 1.0116, "step": 1370 }, { "epoch": 0.7794201250710631, "grad_norm": 0.24609375, "learning_rate": 2.2057987492893695e-06, "loss": 1.0901, "step": 1371 }, { "epoch": 0.7799886299033542, "grad_norm": 0.244140625, "learning_rate": 2.2001137009664583e-06, "loss": 1.0869, "step": 1372 }, { "epoch": 0.7805571347356453, "grad_norm": 0.240234375, "learning_rate": 2.1944286526435475e-06, "loss": 1.0867, "step": 1373 }, { "epoch": 0.7811256395679363, "grad_norm": 0.2421875, "learning_rate": 2.188743604320637e-06, "loss": 1.0927, "step": 1374 }, { "epoch": 0.7816941444002274, "grad_norm": 0.2421875, "learning_rate": 2.183058555997726e-06, "loss": 0.9865, "step": 1375 }, { "epoch": 0.7822626492325184, "grad_norm": 0.255859375, "learning_rate": 2.1773735076748153e-06, "loss": 1.1224, "step": 1376 }, { "epoch": 0.7828311540648095, "grad_norm": 0.2412109375, "learning_rate": 2.1716884593519046e-06, "loss": 1.0655, "step": 1377 }, { "epoch": 0.7833996588971006, "grad_norm": 0.341796875, "learning_rate": 2.1660034110289942e-06, "loss": 1.0291, "step": 1378 }, { "epoch": 0.7839681637293917, "grad_norm": 0.361328125, "learning_rate": 2.160318362706083e-06, "loss": 1.0201, "step": 1379 }, { "epoch": 0.7845366685616828, "grad_norm": 0.25, "learning_rate": 2.1546333143831723e-06, "loss": 1.0986, "step": 1380 }, { "epoch": 0.7851051733939739, "grad_norm": 0.2421875, "learning_rate": 2.1489482660602616e-06, "loss": 1.0419, "step": 1381 }, { "epoch": 0.7856736782262649, "grad_norm": 0.25, "learning_rate": 2.1432632177373512e-06, "loss": 1.0537, "step": 1382 }, { "epoch": 0.786242183058556, "grad_norm": 0.404296875, "learning_rate": 2.13757816941444e-06, "loss": 0.9841, "step": 1383 }, { "epoch": 0.7868106878908471, "grad_norm": 0.2470703125, "learning_rate": 2.1318931210915293e-06, "loss": 1.0753, "step": 1384 }, { "epoch": 0.7873791927231382, "grad_norm": 0.25390625, "learning_rate": 2.1262080727686186e-06, "loss": 1.1392, "step": 1385 }, { "epoch": 0.7879476975554293, "grad_norm": 0.240234375, "learning_rate": 2.120523024445708e-06, "loss": 1.162, "step": 1386 }, { "epoch": 0.7885162023877202, "grad_norm": 0.248046875, "learning_rate": 2.114837976122797e-06, "loss": 1.0924, "step": 1387 }, { "epoch": 0.7890847072200113, "grad_norm": 0.259765625, "learning_rate": 2.1091529277998863e-06, "loss": 0.9883, "step": 1388 }, { "epoch": 0.7896532120523024, "grad_norm": 0.25390625, "learning_rate": 2.103467879476976e-06, "loss": 1.0945, "step": 1389 }, { "epoch": 0.7902217168845935, "grad_norm": 0.240234375, "learning_rate": 2.097782831154065e-06, "loss": 1.0637, "step": 1390 }, { "epoch": 0.7907902217168846, "grad_norm": 0.248046875, "learning_rate": 2.092097782831154e-06, "loss": 1.0201, "step": 1391 }, { "epoch": 0.7913587265491757, "grad_norm": 0.2490234375, "learning_rate": 2.0864127345082434e-06, "loss": 1.0591, "step": 1392 }, { "epoch": 0.7919272313814667, "grad_norm": 0.2451171875, "learning_rate": 2.0807276861853326e-06, "loss": 1.1237, "step": 1393 }, { "epoch": 0.7924957362137578, "grad_norm": 0.26171875, "learning_rate": 2.075042637862422e-06, "loss": 1.1483, "step": 1394 }, { "epoch": 0.7930642410460489, "grad_norm": 0.251953125, "learning_rate": 2.069357589539511e-06, "loss": 1.1257, "step": 1395 }, { "epoch": 0.79363274587834, "grad_norm": 0.271484375, "learning_rate": 2.0636725412166008e-06, "loss": 1.0482, "step": 1396 }, { "epoch": 0.7942012507106311, "grad_norm": 0.2490234375, "learning_rate": 2.0579874928936896e-06, "loss": 1.0755, "step": 1397 }, { "epoch": 0.7947697555429221, "grad_norm": 0.265625, "learning_rate": 2.052302444570779e-06, "loss": 1.1344, "step": 1398 }, { "epoch": 0.7953382603752132, "grad_norm": 0.259765625, "learning_rate": 2.046617396247868e-06, "loss": 1.0665, "step": 1399 }, { "epoch": 0.7959067652075043, "grad_norm": 0.25390625, "learning_rate": 2.040932347924958e-06, "loss": 1.0778, "step": 1400 }, { "epoch": 0.7964752700397953, "grad_norm": 0.244140625, "learning_rate": 2.0352472996020466e-06, "loss": 1.1382, "step": 1401 }, { "epoch": 0.7970437748720864, "grad_norm": 0.2490234375, "learning_rate": 2.029562251279136e-06, "loss": 1.1511, "step": 1402 }, { "epoch": 0.7976122797043775, "grad_norm": 0.248046875, "learning_rate": 2.0238772029562256e-06, "loss": 1.119, "step": 1403 }, { "epoch": 0.7981807845366685, "grad_norm": 0.248046875, "learning_rate": 2.0181921546333144e-06, "loss": 1.1192, "step": 1404 }, { "epoch": 0.7987492893689596, "grad_norm": 0.2431640625, "learning_rate": 2.0125071063104036e-06, "loss": 1.0983, "step": 1405 }, { "epoch": 0.7993177942012507, "grad_norm": 0.251953125, "learning_rate": 2.006822057987493e-06, "loss": 1.0299, "step": 1406 }, { "epoch": 0.7998862990335418, "grad_norm": 0.2470703125, "learning_rate": 2.0011370096645826e-06, "loss": 1.077, "step": 1407 }, { "epoch": 0.8004548038658329, "grad_norm": 0.2451171875, "learning_rate": 1.9954519613416714e-06, "loss": 1.0981, "step": 1408 }, { "epoch": 0.8010233086981239, "grad_norm": 0.2431640625, "learning_rate": 1.9897669130187606e-06, "loss": 1.0435, "step": 1409 }, { "epoch": 0.801591813530415, "grad_norm": 0.26171875, "learning_rate": 1.9840818646958503e-06, "loss": 1.0767, "step": 1410 }, { "epoch": 0.8021603183627061, "grad_norm": 0.24609375, "learning_rate": 1.9783968163729396e-06, "loss": 1.1362, "step": 1411 }, { "epoch": 0.8027288231949972, "grad_norm": 0.244140625, "learning_rate": 1.9727117680500284e-06, "loss": 1.025, "step": 1412 }, { "epoch": 0.8032973280272883, "grad_norm": 0.251953125, "learning_rate": 1.9670267197271177e-06, "loss": 1.0997, "step": 1413 }, { "epoch": 0.8038658328595794, "grad_norm": 0.251953125, "learning_rate": 1.9613416714042073e-06, "loss": 1.0256, "step": 1414 }, { "epoch": 0.8044343376918703, "grad_norm": 0.2578125, "learning_rate": 1.955656623081296e-06, "loss": 1.083, "step": 1415 }, { "epoch": 0.8050028425241614, "grad_norm": 0.255859375, "learning_rate": 1.9499715747583854e-06, "loss": 1.0468, "step": 1416 }, { "epoch": 0.8055713473564525, "grad_norm": 0.251953125, "learning_rate": 1.944286526435475e-06, "loss": 1.0402, "step": 1417 }, { "epoch": 0.8061398521887436, "grad_norm": 0.2490234375, "learning_rate": 1.9386014781125643e-06, "loss": 1.1251, "step": 1418 }, { "epoch": 0.8067083570210347, "grad_norm": 0.251953125, "learning_rate": 1.932916429789653e-06, "loss": 1.0963, "step": 1419 }, { "epoch": 0.8072768618533257, "grad_norm": 0.25, "learning_rate": 1.9272313814667424e-06, "loss": 0.9875, "step": 1420 }, { "epoch": 0.8078453666856168, "grad_norm": 0.2412109375, "learning_rate": 1.921546333143832e-06, "loss": 1.0864, "step": 1421 }, { "epoch": 0.8084138715179079, "grad_norm": 0.2451171875, "learning_rate": 1.9158612848209214e-06, "loss": 1.1494, "step": 1422 }, { "epoch": 0.808982376350199, "grad_norm": 0.2490234375, "learning_rate": 1.91017623649801e-06, "loss": 1.0445, "step": 1423 }, { "epoch": 0.8095508811824901, "grad_norm": 0.2490234375, "learning_rate": 1.9044911881750997e-06, "loss": 1.1448, "step": 1424 }, { "epoch": 0.8101193860147812, "grad_norm": 0.251953125, "learning_rate": 1.898806139852189e-06, "loss": 0.9997, "step": 1425 }, { "epoch": 0.8106878908470722, "grad_norm": 0.236328125, "learning_rate": 1.893121091529278e-06, "loss": 1.0862, "step": 1426 }, { "epoch": 0.8112563956793633, "grad_norm": 0.263671875, "learning_rate": 1.8874360432063674e-06, "loss": 1.1609, "step": 1427 }, { "epoch": 0.8118249005116543, "grad_norm": 0.2421875, "learning_rate": 1.8817509948834567e-06, "loss": 1.0642, "step": 1428 }, { "epoch": 0.8123934053439454, "grad_norm": 0.240234375, "learning_rate": 1.8760659465605461e-06, "loss": 1.0402, "step": 1429 }, { "epoch": 0.8129619101762365, "grad_norm": 0.248046875, "learning_rate": 1.8703808982376352e-06, "loss": 1.1381, "step": 1430 }, { "epoch": 0.8135304150085275, "grad_norm": 0.248046875, "learning_rate": 1.8646958499147244e-06, "loss": 1.0734, "step": 1431 }, { "epoch": 0.8140989198408186, "grad_norm": 0.267578125, "learning_rate": 1.8590108015918137e-06, "loss": 1.1054, "step": 1432 }, { "epoch": 0.8146674246731097, "grad_norm": 0.24609375, "learning_rate": 1.8533257532689031e-06, "loss": 1.0203, "step": 1433 }, { "epoch": 0.8152359295054008, "grad_norm": 0.248046875, "learning_rate": 1.8476407049459922e-06, "loss": 1.0867, "step": 1434 }, { "epoch": 0.8158044343376919, "grad_norm": 0.267578125, "learning_rate": 1.8419556566230814e-06, "loss": 1.126, "step": 1435 }, { "epoch": 0.816372939169983, "grad_norm": 0.2412109375, "learning_rate": 1.8362706083001707e-06, "loss": 1.1708, "step": 1436 }, { "epoch": 0.816941444002274, "grad_norm": 0.248046875, "learning_rate": 1.83058555997726e-06, "loss": 1.0675, "step": 1437 }, { "epoch": 0.8175099488345651, "grad_norm": 0.25, "learning_rate": 1.8249005116543492e-06, "loss": 1.0941, "step": 1438 }, { "epoch": 0.8180784536668562, "grad_norm": 0.255859375, "learning_rate": 1.8192154633314384e-06, "loss": 1.0377, "step": 1439 }, { "epoch": 0.8186469584991473, "grad_norm": 0.240234375, "learning_rate": 1.813530415008528e-06, "loss": 1.0801, "step": 1440 }, { "epoch": 0.8192154633314384, "grad_norm": 0.28125, "learning_rate": 1.807845366685617e-06, "loss": 1.0745, "step": 1441 }, { "epoch": 0.8197839681637293, "grad_norm": 0.2578125, "learning_rate": 1.8021603183627062e-06, "loss": 1.0508, "step": 1442 }, { "epoch": 0.8203524729960204, "grad_norm": 0.2451171875, "learning_rate": 1.7964752700397955e-06, "loss": 1.0508, "step": 1443 }, { "epoch": 0.8209209778283115, "grad_norm": 0.236328125, "learning_rate": 1.7907902217168845e-06, "loss": 1.0595, "step": 1444 }, { "epoch": 0.8214894826606026, "grad_norm": 0.2431640625, "learning_rate": 1.785105173393974e-06, "loss": 1.0588, "step": 1445 }, { "epoch": 0.8220579874928937, "grad_norm": 0.2490234375, "learning_rate": 1.7794201250710632e-06, "loss": 1.0692, "step": 1446 }, { "epoch": 0.8226264923251848, "grad_norm": 0.2421875, "learning_rate": 1.7737350767481527e-06, "loss": 1.0129, "step": 1447 }, { "epoch": 0.8231949971574758, "grad_norm": 0.251953125, "learning_rate": 1.7680500284252417e-06, "loss": 1.075, "step": 1448 }, { "epoch": 0.8237635019897669, "grad_norm": 0.25390625, "learning_rate": 1.762364980102331e-06, "loss": 1.0324, "step": 1449 }, { "epoch": 0.824332006822058, "grad_norm": 0.2431640625, "learning_rate": 1.7566799317794202e-06, "loss": 1.0643, "step": 1450 }, { "epoch": 0.8249005116543491, "grad_norm": 0.236328125, "learning_rate": 1.7509948834565097e-06, "loss": 1.0561, "step": 1451 }, { "epoch": 0.8254690164866402, "grad_norm": 0.2451171875, "learning_rate": 1.7453098351335987e-06, "loss": 1.0574, "step": 1452 }, { "epoch": 0.8260375213189312, "grad_norm": 0.2412109375, "learning_rate": 1.739624786810688e-06, "loss": 1.0839, "step": 1453 }, { "epoch": 0.8266060261512223, "grad_norm": 0.25, "learning_rate": 1.7339397384877775e-06, "loss": 1.0554, "step": 1454 }, { "epoch": 0.8271745309835133, "grad_norm": 0.296875, "learning_rate": 1.7282546901648665e-06, "loss": 1.101, "step": 1455 }, { "epoch": 0.8277430358158044, "grad_norm": 0.25, "learning_rate": 1.7225696418419557e-06, "loss": 1.0352, "step": 1456 }, { "epoch": 0.8283115406480955, "grad_norm": 0.2578125, "learning_rate": 1.716884593519045e-06, "loss": 1.0883, "step": 1457 }, { "epoch": 0.8288800454803866, "grad_norm": 0.25390625, "learning_rate": 1.7111995451961345e-06, "loss": 1.0712, "step": 1458 }, { "epoch": 0.8294485503126776, "grad_norm": 0.240234375, "learning_rate": 1.7055144968732235e-06, "loss": 1.0188, "step": 1459 }, { "epoch": 0.8300170551449687, "grad_norm": 0.271484375, "learning_rate": 1.6998294485503128e-06, "loss": 1.0638, "step": 1460 }, { "epoch": 0.8305855599772598, "grad_norm": 0.24609375, "learning_rate": 1.6941444002274022e-06, "loss": 1.1421, "step": 1461 }, { "epoch": 0.8311540648095509, "grad_norm": 0.248046875, "learning_rate": 1.6884593519044915e-06, "loss": 0.9846, "step": 1462 }, { "epoch": 0.831722569641842, "grad_norm": 0.251953125, "learning_rate": 1.6827743035815805e-06, "loss": 1.0462, "step": 1463 }, { "epoch": 0.832291074474133, "grad_norm": 0.2578125, "learning_rate": 1.6770892552586698e-06, "loss": 1.103, "step": 1464 }, { "epoch": 0.8328595793064241, "grad_norm": 0.2470703125, "learning_rate": 1.6714042069357592e-06, "loss": 1.042, "step": 1465 }, { "epoch": 0.8334280841387152, "grad_norm": 0.2734375, "learning_rate": 1.6657191586128483e-06, "loss": 1.0934, "step": 1466 }, { "epoch": 0.8339965889710063, "grad_norm": 0.251953125, "learning_rate": 1.6600341102899375e-06, "loss": 1.1545, "step": 1467 }, { "epoch": 0.8345650938032974, "grad_norm": 0.2412109375, "learning_rate": 1.6543490619670268e-06, "loss": 1.0998, "step": 1468 }, { "epoch": 0.8351335986355884, "grad_norm": 0.248046875, "learning_rate": 1.6486640136441162e-06, "loss": 1.065, "step": 1469 }, { "epoch": 0.8357021034678794, "grad_norm": 0.2451171875, "learning_rate": 1.6429789653212053e-06, "loss": 1.1379, "step": 1470 }, { "epoch": 0.8362706083001705, "grad_norm": 0.2470703125, "learning_rate": 1.6372939169982945e-06, "loss": 1.095, "step": 1471 }, { "epoch": 0.8368391131324616, "grad_norm": 0.27734375, "learning_rate": 1.631608868675384e-06, "loss": 1.0597, "step": 1472 }, { "epoch": 0.8374076179647527, "grad_norm": 0.25, "learning_rate": 1.6259238203524733e-06, "loss": 1.0733, "step": 1473 }, { "epoch": 0.8379761227970438, "grad_norm": 0.263671875, "learning_rate": 1.6202387720295623e-06, "loss": 1.0406, "step": 1474 }, { "epoch": 0.8385446276293349, "grad_norm": 0.24609375, "learning_rate": 1.6145537237066516e-06, "loss": 0.9839, "step": 1475 }, { "epoch": 0.8391131324616259, "grad_norm": 0.25, "learning_rate": 1.608868675383741e-06, "loss": 1.0771, "step": 1476 }, { "epoch": 0.839681637293917, "grad_norm": 0.2890625, "learning_rate": 1.60318362706083e-06, "loss": 1.0442, "step": 1477 }, { "epoch": 0.8402501421262081, "grad_norm": 0.244140625, "learning_rate": 1.5974985787379193e-06, "loss": 1.0805, "step": 1478 }, { "epoch": 0.8408186469584992, "grad_norm": 0.25, "learning_rate": 1.5918135304150088e-06, "loss": 1.1118, "step": 1479 }, { "epoch": 0.8413871517907903, "grad_norm": 0.2431640625, "learning_rate": 1.586128482092098e-06, "loss": 1.0872, "step": 1480 }, { "epoch": 0.8419556566230812, "grad_norm": 0.25, "learning_rate": 1.580443433769187e-06, "loss": 1.0853, "step": 1481 }, { "epoch": 0.8425241614553723, "grad_norm": 0.25390625, "learning_rate": 1.5747583854462763e-06, "loss": 1.0516, "step": 1482 }, { "epoch": 0.8430926662876634, "grad_norm": 0.2470703125, "learning_rate": 1.5690733371233658e-06, "loss": 1.1216, "step": 1483 }, { "epoch": 0.8436611711199545, "grad_norm": 0.251953125, "learning_rate": 1.563388288800455e-06, "loss": 1.067, "step": 1484 }, { "epoch": 0.8442296759522456, "grad_norm": 0.251953125, "learning_rate": 1.557703240477544e-06, "loss": 1.0573, "step": 1485 }, { "epoch": 0.8447981807845367, "grad_norm": 0.2470703125, "learning_rate": 1.5520181921546335e-06, "loss": 1.1193, "step": 1486 }, { "epoch": 0.8453666856168277, "grad_norm": 0.2373046875, "learning_rate": 1.5463331438317228e-06, "loss": 1.0588, "step": 1487 }, { "epoch": 0.8459351904491188, "grad_norm": 0.283203125, "learning_rate": 1.5406480955088118e-06, "loss": 1.0328, "step": 1488 }, { "epoch": 0.8465036952814099, "grad_norm": 0.2734375, "learning_rate": 1.534963047185901e-06, "loss": 1.0762, "step": 1489 }, { "epoch": 0.847072200113701, "grad_norm": 0.2451171875, "learning_rate": 1.5292779988629906e-06, "loss": 1.042, "step": 1490 }, { "epoch": 0.8476407049459921, "grad_norm": 0.2353515625, "learning_rate": 1.5235929505400798e-06, "loss": 1.0728, "step": 1491 }, { "epoch": 0.8482092097782831, "grad_norm": 0.234375, "learning_rate": 1.5179079022171688e-06, "loss": 1.1138, "step": 1492 }, { "epoch": 0.8487777146105742, "grad_norm": 0.2451171875, "learning_rate": 1.5122228538942583e-06, "loss": 1.0696, "step": 1493 }, { "epoch": 0.8493462194428653, "grad_norm": 0.2421875, "learning_rate": 1.5065378055713476e-06, "loss": 1.0518, "step": 1494 }, { "epoch": 0.8499147242751564, "grad_norm": 0.244140625, "learning_rate": 1.5008527572484366e-06, "loss": 1.0555, "step": 1495 }, { "epoch": 0.8504832291074474, "grad_norm": 0.244140625, "learning_rate": 1.4951677089255259e-06, "loss": 1.0433, "step": 1496 }, { "epoch": 0.8510517339397385, "grad_norm": 0.267578125, "learning_rate": 1.4894826606026153e-06, "loss": 1.1475, "step": 1497 }, { "epoch": 0.8516202387720295, "grad_norm": 0.2470703125, "learning_rate": 1.4837976122797046e-06, "loss": 1.0324, "step": 1498 }, { "epoch": 0.8521887436043206, "grad_norm": 0.244140625, "learning_rate": 1.4781125639567936e-06, "loss": 1.0664, "step": 1499 }, { "epoch": 0.8527572484366117, "grad_norm": 0.25, "learning_rate": 1.4724275156338829e-06, "loss": 0.9972, "step": 1500 }, { "epoch": 0.8533257532689028, "grad_norm": 0.2490234375, "learning_rate": 1.4667424673109723e-06, "loss": 1.1142, "step": 1501 }, { "epoch": 0.8538942581011939, "grad_norm": 0.25, "learning_rate": 1.4610574189880616e-06, "loss": 1.2051, "step": 1502 }, { "epoch": 0.8544627629334849, "grad_norm": 0.2431640625, "learning_rate": 1.4553723706651506e-06, "loss": 1.0648, "step": 1503 }, { "epoch": 0.855031267765776, "grad_norm": 0.255859375, "learning_rate": 1.44968732234224e-06, "loss": 1.1197, "step": 1504 }, { "epoch": 0.8555997725980671, "grad_norm": 0.25390625, "learning_rate": 1.4440022740193293e-06, "loss": 0.9973, "step": 1505 }, { "epoch": 0.8561682774303582, "grad_norm": 0.248046875, "learning_rate": 1.4383172256964184e-06, "loss": 1.149, "step": 1506 }, { "epoch": 0.8567367822626493, "grad_norm": 0.2470703125, "learning_rate": 1.4326321773735076e-06, "loss": 1.106, "step": 1507 }, { "epoch": 0.8573052870949404, "grad_norm": 0.2431640625, "learning_rate": 1.4269471290505971e-06, "loss": 1.1342, "step": 1508 }, { "epoch": 0.8578737919272313, "grad_norm": 0.23828125, "learning_rate": 1.4212620807276864e-06, "loss": 1.0687, "step": 1509 }, { "epoch": 0.8584422967595224, "grad_norm": 0.25, "learning_rate": 1.4155770324047754e-06, "loss": 1.07, "step": 1510 }, { "epoch": 0.8590108015918135, "grad_norm": 0.24609375, "learning_rate": 1.4098919840818649e-06, "loss": 1.0928, "step": 1511 }, { "epoch": 0.8595793064241046, "grad_norm": 0.2392578125, "learning_rate": 1.4042069357589541e-06, "loss": 1.0635, "step": 1512 }, { "epoch": 0.8601478112563957, "grad_norm": 0.279296875, "learning_rate": 1.3985218874360434e-06, "loss": 1.0697, "step": 1513 }, { "epoch": 0.8607163160886867, "grad_norm": 0.240234375, "learning_rate": 1.3928368391131324e-06, "loss": 1.0769, "step": 1514 }, { "epoch": 0.8612848209209778, "grad_norm": 0.2734375, "learning_rate": 1.3871517907902219e-06, "loss": 1.0724, "step": 1515 }, { "epoch": 0.8618533257532689, "grad_norm": 0.2451171875, "learning_rate": 1.3814667424673111e-06, "loss": 1.0667, "step": 1516 }, { "epoch": 0.86242183058556, "grad_norm": 0.255859375, "learning_rate": 1.3757816941444002e-06, "loss": 1.0498, "step": 1517 }, { "epoch": 0.8629903354178511, "grad_norm": 0.255859375, "learning_rate": 1.3700966458214896e-06, "loss": 1.0924, "step": 1518 }, { "epoch": 0.8635588402501422, "grad_norm": 0.240234375, "learning_rate": 1.3644115974985789e-06, "loss": 1.0528, "step": 1519 }, { "epoch": 0.8641273450824332, "grad_norm": 0.251953125, "learning_rate": 1.3587265491756681e-06, "loss": 1.0413, "step": 1520 }, { "epoch": 0.8646958499147243, "grad_norm": 0.2578125, "learning_rate": 1.3530415008527572e-06, "loss": 1.0968, "step": 1521 }, { "epoch": 0.8652643547470154, "grad_norm": 0.2490234375, "learning_rate": 1.3473564525298466e-06, "loss": 1.0546, "step": 1522 }, { "epoch": 0.8658328595793064, "grad_norm": 0.392578125, "learning_rate": 1.341671404206936e-06, "loss": 1.0544, "step": 1523 }, { "epoch": 0.8664013644115975, "grad_norm": 0.2333984375, "learning_rate": 1.3359863558840252e-06, "loss": 1.0498, "step": 1524 }, { "epoch": 0.8669698692438885, "grad_norm": 0.244140625, "learning_rate": 1.3303013075611144e-06, "loss": 1.0322, "step": 1525 }, { "epoch": 0.8675383740761796, "grad_norm": 0.2470703125, "learning_rate": 1.3246162592382037e-06, "loss": 1.0691, "step": 1526 }, { "epoch": 0.8681068789084707, "grad_norm": 0.255859375, "learning_rate": 1.318931210915293e-06, "loss": 1.0063, "step": 1527 }, { "epoch": 0.8686753837407618, "grad_norm": 0.240234375, "learning_rate": 1.313246162592382e-06, "loss": 1.0665, "step": 1528 }, { "epoch": 0.8692438885730529, "grad_norm": 0.30859375, "learning_rate": 1.3075611142694714e-06, "loss": 1.0923, "step": 1529 }, { "epoch": 0.869812393405344, "grad_norm": 0.244140625, "learning_rate": 1.3018760659465607e-06, "loss": 1.0448, "step": 1530 }, { "epoch": 0.870380898237635, "grad_norm": 0.259765625, "learning_rate": 1.29619101762365e-06, "loss": 1.0482, "step": 1531 }, { "epoch": 0.8709494030699261, "grad_norm": 0.244140625, "learning_rate": 1.2905059693007392e-06, "loss": 1.0662, "step": 1532 }, { "epoch": 0.8715179079022172, "grad_norm": 0.25390625, "learning_rate": 1.2848209209778284e-06, "loss": 1.0578, "step": 1533 }, { "epoch": 0.8720864127345083, "grad_norm": 0.259765625, "learning_rate": 1.2791358726549177e-06, "loss": 1.1213, "step": 1534 }, { "epoch": 0.8726549175667994, "grad_norm": 0.43359375, "learning_rate": 1.2734508243320071e-06, "loss": 1.0541, "step": 1535 }, { "epoch": 0.8732234223990903, "grad_norm": 0.2490234375, "learning_rate": 1.2677657760090962e-06, "loss": 1.1014, "step": 1536 }, { "epoch": 0.8737919272313814, "grad_norm": 0.251953125, "learning_rate": 1.2620807276861854e-06, "loss": 1.0683, "step": 1537 }, { "epoch": 0.8743604320636725, "grad_norm": 0.236328125, "learning_rate": 1.2563956793632747e-06, "loss": 1.0051, "step": 1538 }, { "epoch": 0.8749289368959636, "grad_norm": 0.2451171875, "learning_rate": 1.2507106310403637e-06, "loss": 1.0418, "step": 1539 }, { "epoch": 0.8754974417282547, "grad_norm": 0.26171875, "learning_rate": 1.2450255827174532e-06, "loss": 0.9655, "step": 1540 }, { "epoch": 0.8760659465605458, "grad_norm": 0.25, "learning_rate": 1.2393405343945425e-06, "loss": 1.0845, "step": 1541 }, { "epoch": 0.8766344513928368, "grad_norm": 0.23828125, "learning_rate": 1.2336554860716317e-06, "loss": 1.1162, "step": 1542 }, { "epoch": 0.8772029562251279, "grad_norm": 0.33203125, "learning_rate": 1.227970437748721e-06, "loss": 1.0949, "step": 1543 }, { "epoch": 0.877771461057419, "grad_norm": 0.251953125, "learning_rate": 1.2222853894258102e-06, "loss": 1.0508, "step": 1544 }, { "epoch": 0.8783399658897101, "grad_norm": 0.2578125, "learning_rate": 1.2166003411028995e-06, "loss": 1.0349, "step": 1545 }, { "epoch": 0.8789084707220012, "grad_norm": 0.271484375, "learning_rate": 1.2109152927799887e-06, "loss": 1.1457, "step": 1546 }, { "epoch": 0.8794769755542922, "grad_norm": 0.244140625, "learning_rate": 1.205230244457078e-06, "loss": 1.0769, "step": 1547 }, { "epoch": 0.8800454803865833, "grad_norm": 0.248046875, "learning_rate": 1.1995451961341672e-06, "loss": 1.0972, "step": 1548 }, { "epoch": 0.8806139852188744, "grad_norm": 0.25, "learning_rate": 1.1938601478112565e-06, "loss": 1.1038, "step": 1549 }, { "epoch": 0.8811824900511654, "grad_norm": 0.26171875, "learning_rate": 1.1881750994883457e-06, "loss": 1.0772, "step": 1550 }, { "epoch": 0.8817509948834565, "grad_norm": 0.2392578125, "learning_rate": 1.182490051165435e-06, "loss": 1.059, "step": 1551 }, { "epoch": 0.8823194997157476, "grad_norm": 0.24609375, "learning_rate": 1.1768050028425242e-06, "loss": 1.054, "step": 1552 }, { "epoch": 0.8828880045480386, "grad_norm": 0.2470703125, "learning_rate": 1.1711199545196135e-06, "loss": 1.0096, "step": 1553 }, { "epoch": 0.8834565093803297, "grad_norm": 0.251953125, "learning_rate": 1.1654349061967027e-06, "loss": 1.0053, "step": 1554 }, { "epoch": 0.8840250142126208, "grad_norm": 0.2451171875, "learning_rate": 1.159749857873792e-06, "loss": 1.1256, "step": 1555 }, { "epoch": 0.8845935190449119, "grad_norm": 0.251953125, "learning_rate": 1.1540648095508812e-06, "loss": 1.0382, "step": 1556 }, { "epoch": 0.885162023877203, "grad_norm": 0.2470703125, "learning_rate": 1.1483797612279705e-06, "loss": 1.07, "step": 1557 }, { "epoch": 0.885730528709494, "grad_norm": 0.263671875, "learning_rate": 1.1426947129050598e-06, "loss": 1.0376, "step": 1558 }, { "epoch": 0.8862990335417851, "grad_norm": 0.2734375, "learning_rate": 1.137009664582149e-06, "loss": 1.0251, "step": 1559 }, { "epoch": 0.8868675383740762, "grad_norm": 0.236328125, "learning_rate": 1.1313246162592383e-06, "loss": 1.0331, "step": 1560 }, { "epoch": 0.8874360432063673, "grad_norm": 0.2421875, "learning_rate": 1.1256395679363275e-06, "loss": 1.09, "step": 1561 }, { "epoch": 0.8880045480386584, "grad_norm": 0.255859375, "learning_rate": 1.1199545196134168e-06, "loss": 1.0704, "step": 1562 }, { "epoch": 0.8885730528709495, "grad_norm": 0.248046875, "learning_rate": 1.114269471290506e-06, "loss": 1.0958, "step": 1563 }, { "epoch": 0.8891415577032404, "grad_norm": 0.251953125, "learning_rate": 1.1085844229675953e-06, "loss": 1.072, "step": 1564 }, { "epoch": 0.8897100625355315, "grad_norm": 0.2392578125, "learning_rate": 1.1028993746446847e-06, "loss": 1.1287, "step": 1565 }, { "epoch": 0.8902785673678226, "grad_norm": 0.25390625, "learning_rate": 1.0972143263217738e-06, "loss": 1.039, "step": 1566 }, { "epoch": 0.8908470722001137, "grad_norm": 0.259765625, "learning_rate": 1.091529277998863e-06, "loss": 1.1269, "step": 1567 }, { "epoch": 0.8914155770324048, "grad_norm": 0.2421875, "learning_rate": 1.0858442296759523e-06, "loss": 1.1131, "step": 1568 }, { "epoch": 0.8919840818646958, "grad_norm": 0.2431640625, "learning_rate": 1.0801591813530415e-06, "loss": 1.1078, "step": 1569 }, { "epoch": 0.8925525866969869, "grad_norm": 0.25, "learning_rate": 1.0744741330301308e-06, "loss": 1.0927, "step": 1570 }, { "epoch": 0.893121091529278, "grad_norm": 0.255859375, "learning_rate": 1.06878908470722e-06, "loss": 1.1055, "step": 1571 }, { "epoch": 0.8936895963615691, "grad_norm": 0.251953125, "learning_rate": 1.0631040363843093e-06, "loss": 1.0647, "step": 1572 }, { "epoch": 0.8942581011938602, "grad_norm": 0.25390625, "learning_rate": 1.0574189880613985e-06, "loss": 1.0074, "step": 1573 }, { "epoch": 0.8948266060261513, "grad_norm": 0.25, "learning_rate": 1.051733939738488e-06, "loss": 1.1115, "step": 1574 }, { "epoch": 0.8953951108584423, "grad_norm": 0.251953125, "learning_rate": 1.046048891415577e-06, "loss": 1.1077, "step": 1575 }, { "epoch": 0.8959636156907334, "grad_norm": 0.2412109375, "learning_rate": 1.0403638430926663e-06, "loss": 1.1228, "step": 1576 }, { "epoch": 0.8965321205230244, "grad_norm": 0.2421875, "learning_rate": 1.0346787947697556e-06, "loss": 1.1277, "step": 1577 }, { "epoch": 0.8971006253553155, "grad_norm": 0.2412109375, "learning_rate": 1.0289937464468448e-06, "loss": 1.0711, "step": 1578 }, { "epoch": 0.8976691301876066, "grad_norm": 0.234375, "learning_rate": 1.023308698123934e-06, "loss": 1.1173, "step": 1579 }, { "epoch": 0.8982376350198976, "grad_norm": 0.2412109375, "learning_rate": 1.0176236498010233e-06, "loss": 1.0653, "step": 1580 }, { "epoch": 0.8988061398521887, "grad_norm": 0.2431640625, "learning_rate": 1.0119386014781128e-06, "loss": 1.056, "step": 1581 }, { "epoch": 0.8993746446844798, "grad_norm": 0.2470703125, "learning_rate": 1.0062535531552018e-06, "loss": 1.1488, "step": 1582 }, { "epoch": 0.8999431495167709, "grad_norm": 0.2490234375, "learning_rate": 1.0005685048322913e-06, "loss": 1.0756, "step": 1583 }, { "epoch": 0.900511654349062, "grad_norm": 0.27734375, "learning_rate": 9.948834565093803e-07, "loss": 1.122, "step": 1584 }, { "epoch": 0.9010801591813531, "grad_norm": 0.2490234375, "learning_rate": 9.891984081864698e-07, "loss": 1.0868, "step": 1585 }, { "epoch": 0.9016486640136441, "grad_norm": 0.25390625, "learning_rate": 9.835133598635588e-07, "loss": 1.0558, "step": 1586 }, { "epoch": 0.9022171688459352, "grad_norm": 0.2431640625, "learning_rate": 9.77828311540648e-07, "loss": 1.1154, "step": 1587 }, { "epoch": 0.9027856736782263, "grad_norm": 0.251953125, "learning_rate": 9.721432632177375e-07, "loss": 1.0394, "step": 1588 }, { "epoch": 0.9033541785105174, "grad_norm": 0.251953125, "learning_rate": 9.664582148948266e-07, "loss": 1.0665, "step": 1589 }, { "epoch": 0.9039226833428085, "grad_norm": 0.248046875, "learning_rate": 9.60773166571916e-07, "loss": 1.0411, "step": 1590 }, { "epoch": 0.9044911881750994, "grad_norm": 0.26171875, "learning_rate": 9.55088118249005e-07, "loss": 1.0644, "step": 1591 }, { "epoch": 0.9050596930073905, "grad_norm": 0.2734375, "learning_rate": 9.494030699260945e-07, "loss": 1.0801, "step": 1592 }, { "epoch": 0.9056281978396816, "grad_norm": 0.2470703125, "learning_rate": 9.437180216031837e-07, "loss": 1.0751, "step": 1593 }, { "epoch": 0.9061967026719727, "grad_norm": 0.2578125, "learning_rate": 9.380329732802731e-07, "loss": 1.1169, "step": 1594 }, { "epoch": 0.9067652075042638, "grad_norm": 0.2431640625, "learning_rate": 9.323479249573622e-07, "loss": 1.0638, "step": 1595 }, { "epoch": 0.9073337123365549, "grad_norm": 0.25, "learning_rate": 9.266628766344516e-07, "loss": 1.092, "step": 1596 }, { "epoch": 0.9079022171688459, "grad_norm": 0.255859375, "learning_rate": 9.209778283115407e-07, "loss": 1.0436, "step": 1597 }, { "epoch": 0.908470722001137, "grad_norm": 0.25, "learning_rate": 9.1529277998863e-07, "loss": 1.069, "step": 1598 }, { "epoch": 0.9090392268334281, "grad_norm": 0.2470703125, "learning_rate": 9.096077316657192e-07, "loss": 1.0838, "step": 1599 }, { "epoch": 0.9096077316657192, "grad_norm": 0.2412109375, "learning_rate": 9.039226833428085e-07, "loss": 1.0823, "step": 1600 }, { "epoch": 0.9101762364980103, "grad_norm": 0.251953125, "learning_rate": 8.982376350198977e-07, "loss": 1.0913, "step": 1601 }, { "epoch": 0.9107447413303013, "grad_norm": 0.28125, "learning_rate": 8.92552586696987e-07, "loss": 1.0003, "step": 1602 }, { "epoch": 0.9113132461625923, "grad_norm": 0.251953125, "learning_rate": 8.868675383740763e-07, "loss": 1.1033, "step": 1603 }, { "epoch": 0.9118817509948834, "grad_norm": 0.5546875, "learning_rate": 8.811824900511655e-07, "loss": 0.9948, "step": 1604 }, { "epoch": 0.9124502558271745, "grad_norm": 0.283203125, "learning_rate": 8.754974417282548e-07, "loss": 1.073, "step": 1605 }, { "epoch": 0.9130187606594656, "grad_norm": 0.259765625, "learning_rate": 8.69812393405344e-07, "loss": 1.1479, "step": 1606 }, { "epoch": 0.9135872654917567, "grad_norm": 0.240234375, "learning_rate": 8.641273450824332e-07, "loss": 1.0619, "step": 1607 }, { "epoch": 0.9141557703240477, "grad_norm": 0.25390625, "learning_rate": 8.584422967595225e-07, "loss": 1.0398, "step": 1608 }, { "epoch": 0.9147242751563388, "grad_norm": 0.2470703125, "learning_rate": 8.527572484366118e-07, "loss": 1.0538, "step": 1609 }, { "epoch": 0.9152927799886299, "grad_norm": 0.26953125, "learning_rate": 8.470722001137011e-07, "loss": 1.0778, "step": 1610 }, { "epoch": 0.915861284820921, "grad_norm": 0.263671875, "learning_rate": 8.413871517907903e-07, "loss": 1.0741, "step": 1611 }, { "epoch": 0.9164297896532121, "grad_norm": 0.2451171875, "learning_rate": 8.357021034678796e-07, "loss": 1.0578, "step": 1612 }, { "epoch": 0.9169982944855031, "grad_norm": 0.23828125, "learning_rate": 8.300170551449688e-07, "loss": 1.0578, "step": 1613 }, { "epoch": 0.9175667993177942, "grad_norm": 0.2421875, "learning_rate": 8.243320068220581e-07, "loss": 1.0712, "step": 1614 }, { "epoch": 0.9181353041500853, "grad_norm": 0.25, "learning_rate": 8.186469584991473e-07, "loss": 1.0694, "step": 1615 }, { "epoch": 0.9187038089823764, "grad_norm": 0.2373046875, "learning_rate": 8.129619101762366e-07, "loss": 1.1087, "step": 1616 }, { "epoch": 0.9192723138146675, "grad_norm": 0.2451171875, "learning_rate": 8.072768618533258e-07, "loss": 1.0687, "step": 1617 }, { "epoch": 0.9198408186469585, "grad_norm": 0.248046875, "learning_rate": 8.01591813530415e-07, "loss": 1.0962, "step": 1618 }, { "epoch": 0.9204093234792495, "grad_norm": 0.23828125, "learning_rate": 7.959067652075044e-07, "loss": 1.0903, "step": 1619 }, { "epoch": 0.9209778283115406, "grad_norm": 0.255859375, "learning_rate": 7.902217168845935e-07, "loss": 1.1918, "step": 1620 }, { "epoch": 0.9215463331438317, "grad_norm": 0.2421875, "learning_rate": 7.845366685616829e-07, "loss": 1.0169, "step": 1621 }, { "epoch": 0.9221148379761228, "grad_norm": 0.2421875, "learning_rate": 7.78851620238772e-07, "loss": 1.1132, "step": 1622 }, { "epoch": 0.9226833428084139, "grad_norm": 0.2373046875, "learning_rate": 7.731665719158614e-07, "loss": 1.0704, "step": 1623 }, { "epoch": 0.9232518476407049, "grad_norm": 0.2431640625, "learning_rate": 7.674815235929505e-07, "loss": 1.0678, "step": 1624 }, { "epoch": 0.923820352472996, "grad_norm": 0.2734375, "learning_rate": 7.617964752700399e-07, "loss": 1.0855, "step": 1625 }, { "epoch": 0.9243888573052871, "grad_norm": 0.3203125, "learning_rate": 7.561114269471292e-07, "loss": 1.0818, "step": 1626 }, { "epoch": 0.9249573621375782, "grad_norm": 0.271484375, "learning_rate": 7.504263786242183e-07, "loss": 1.049, "step": 1627 }, { "epoch": 0.9255258669698693, "grad_norm": 0.380859375, "learning_rate": 7.447413303013077e-07, "loss": 1.1809, "step": 1628 }, { "epoch": 0.9260943718021604, "grad_norm": 0.240234375, "learning_rate": 7.390562819783968e-07, "loss": 1.0113, "step": 1629 }, { "epoch": 0.9266628766344513, "grad_norm": 0.255859375, "learning_rate": 7.333712336554862e-07, "loss": 1.0194, "step": 1630 }, { "epoch": 0.9272313814667424, "grad_norm": 0.251953125, "learning_rate": 7.276861853325753e-07, "loss": 1.0968, "step": 1631 }, { "epoch": 0.9277998862990335, "grad_norm": 0.26171875, "learning_rate": 7.220011370096647e-07, "loss": 1.0711, "step": 1632 }, { "epoch": 0.9283683911313246, "grad_norm": 0.25, "learning_rate": 7.163160886867538e-07, "loss": 1.1428, "step": 1633 }, { "epoch": 0.9289368959636157, "grad_norm": 0.25390625, "learning_rate": 7.106310403638432e-07, "loss": 1.0883, "step": 1634 }, { "epoch": 0.9295054007959067, "grad_norm": 0.25, "learning_rate": 7.049459920409324e-07, "loss": 1.0516, "step": 1635 }, { "epoch": 0.9300739056281978, "grad_norm": 0.267578125, "learning_rate": 6.992609437180217e-07, "loss": 1.1221, "step": 1636 }, { "epoch": 0.9306424104604889, "grad_norm": 0.251953125, "learning_rate": 6.935758953951109e-07, "loss": 1.0222, "step": 1637 }, { "epoch": 0.93121091529278, "grad_norm": 0.2451171875, "learning_rate": 6.878908470722001e-07, "loss": 1.1209, "step": 1638 }, { "epoch": 0.9317794201250711, "grad_norm": 0.2431640625, "learning_rate": 6.822057987492894e-07, "loss": 1.0674, "step": 1639 }, { "epoch": 0.9323479249573622, "grad_norm": 0.2421875, "learning_rate": 6.765207504263786e-07, "loss": 1.0886, "step": 1640 }, { "epoch": 0.9329164297896532, "grad_norm": 0.259765625, "learning_rate": 6.70835702103468e-07, "loss": 1.0966, "step": 1641 }, { "epoch": 0.9334849346219443, "grad_norm": 0.248046875, "learning_rate": 6.651506537805572e-07, "loss": 1.0347, "step": 1642 }, { "epoch": 0.9340534394542354, "grad_norm": 0.259765625, "learning_rate": 6.594656054576465e-07, "loss": 1.067, "step": 1643 }, { "epoch": 0.9346219442865265, "grad_norm": 0.2373046875, "learning_rate": 6.537805571347357e-07, "loss": 1.0647, "step": 1644 }, { "epoch": 0.9351904491188175, "grad_norm": 0.248046875, "learning_rate": 6.48095508811825e-07, "loss": 1.0968, "step": 1645 }, { "epoch": 0.9357589539511085, "grad_norm": 0.25, "learning_rate": 6.424104604889142e-07, "loss": 1.0477, "step": 1646 }, { "epoch": 0.9363274587833996, "grad_norm": 0.25, "learning_rate": 6.367254121660036e-07, "loss": 1.0628, "step": 1647 }, { "epoch": 0.9368959636156907, "grad_norm": 0.24609375, "learning_rate": 6.310403638430927e-07, "loss": 1.0684, "step": 1648 }, { "epoch": 0.9374644684479818, "grad_norm": 0.2470703125, "learning_rate": 6.253553155201819e-07, "loss": 1.0605, "step": 1649 }, { "epoch": 0.9380329732802729, "grad_norm": 0.2578125, "learning_rate": 6.196702671972712e-07, "loss": 1.052, "step": 1650 }, { "epoch": 0.938601478112564, "grad_norm": 0.25, "learning_rate": 6.139852188743605e-07, "loss": 1.0851, "step": 1651 }, { "epoch": 0.939169982944855, "grad_norm": 0.2490234375, "learning_rate": 6.083001705514497e-07, "loss": 1.0593, "step": 1652 }, { "epoch": 0.9397384877771461, "grad_norm": 0.267578125, "learning_rate": 6.02615122228539e-07, "loss": 1.0835, "step": 1653 }, { "epoch": 0.9403069926094372, "grad_norm": 0.248046875, "learning_rate": 5.969300739056282e-07, "loss": 1.0856, "step": 1654 }, { "epoch": 0.9408754974417283, "grad_norm": 0.251953125, "learning_rate": 5.912450255827175e-07, "loss": 1.1246, "step": 1655 }, { "epoch": 0.9414440022740194, "grad_norm": 0.298828125, "learning_rate": 5.855599772598067e-07, "loss": 1.0285, "step": 1656 }, { "epoch": 0.9420125071063103, "grad_norm": 0.24609375, "learning_rate": 5.79874928936896e-07, "loss": 1.1381, "step": 1657 }, { "epoch": 0.9425810119386014, "grad_norm": 0.267578125, "learning_rate": 5.741898806139852e-07, "loss": 1.1311, "step": 1658 }, { "epoch": 0.9431495167708925, "grad_norm": 0.2373046875, "learning_rate": 5.685048322910745e-07, "loss": 1.0949, "step": 1659 }, { "epoch": 0.9437180216031836, "grad_norm": 0.25390625, "learning_rate": 5.628197839681638e-07, "loss": 1.1399, "step": 1660 }, { "epoch": 0.9442865264354747, "grad_norm": 0.244140625, "learning_rate": 5.57134735645253e-07, "loss": 1.1481, "step": 1661 }, { "epoch": 0.9448550312677658, "grad_norm": 0.25390625, "learning_rate": 5.514496873223424e-07, "loss": 1.1405, "step": 1662 }, { "epoch": 0.9454235361000568, "grad_norm": 0.244140625, "learning_rate": 5.457646389994315e-07, "loss": 1.0536, "step": 1663 }, { "epoch": 0.9459920409323479, "grad_norm": 0.240234375, "learning_rate": 5.400795906765208e-07, "loss": 1.1801, "step": 1664 }, { "epoch": 0.946560545764639, "grad_norm": 0.2470703125, "learning_rate": 5.3439454235361e-07, "loss": 1.0744, "step": 1665 }, { "epoch": 0.9471290505969301, "grad_norm": 0.2490234375, "learning_rate": 5.287094940306993e-07, "loss": 1.0023, "step": 1666 }, { "epoch": 0.9476975554292212, "grad_norm": 0.255859375, "learning_rate": 5.230244457077885e-07, "loss": 0.9895, "step": 1667 }, { "epoch": 0.9482660602615123, "grad_norm": 0.259765625, "learning_rate": 5.173393973848778e-07, "loss": 1.1264, "step": 1668 }, { "epoch": 0.9488345650938033, "grad_norm": 0.2490234375, "learning_rate": 5.11654349061967e-07, "loss": 1.1107, "step": 1669 }, { "epoch": 0.9494030699260944, "grad_norm": 0.96875, "learning_rate": 5.059693007390564e-07, "loss": 1.0379, "step": 1670 }, { "epoch": 0.9499715747583855, "grad_norm": 0.25, "learning_rate": 5.002842524161456e-07, "loss": 1.0002, "step": 1671 }, { "epoch": 0.9505400795906765, "grad_norm": 0.2578125, "learning_rate": 4.945992040932349e-07, "loss": 1.0849, "step": 1672 }, { "epoch": 0.9511085844229676, "grad_norm": 0.2431640625, "learning_rate": 4.88914155770324e-07, "loss": 1.1335, "step": 1673 }, { "epoch": 0.9516770892552586, "grad_norm": 0.259765625, "learning_rate": 4.832291074474133e-07, "loss": 1.1563, "step": 1674 }, { "epoch": 0.9522455940875497, "grad_norm": 0.25, "learning_rate": 4.775440591245025e-07, "loss": 1.021, "step": 1675 }, { "epoch": 0.9528140989198408, "grad_norm": 0.369140625, "learning_rate": 4.7185901080159185e-07, "loss": 1.0038, "step": 1676 }, { "epoch": 0.9533826037521319, "grad_norm": 0.25, "learning_rate": 4.661739624786811e-07, "loss": 0.9966, "step": 1677 }, { "epoch": 0.953951108584423, "grad_norm": 0.2333984375, "learning_rate": 4.6048891415577036e-07, "loss": 1.1127, "step": 1678 }, { "epoch": 0.9545196134167141, "grad_norm": 0.24609375, "learning_rate": 4.548038658328596e-07, "loss": 1.0377, "step": 1679 }, { "epoch": 0.9550881182490051, "grad_norm": 0.279296875, "learning_rate": 4.4911881750994886e-07, "loss": 1.1638, "step": 1680 }, { "epoch": 0.9556566230812962, "grad_norm": 0.255859375, "learning_rate": 4.4343376918703817e-07, "loss": 0.9992, "step": 1681 }, { "epoch": 0.9562251279135873, "grad_norm": 0.255859375, "learning_rate": 4.377487208641274e-07, "loss": 1.0561, "step": 1682 }, { "epoch": 0.9567936327458784, "grad_norm": 0.259765625, "learning_rate": 4.320636725412166e-07, "loss": 1.0646, "step": 1683 }, { "epoch": 0.9573621375781695, "grad_norm": 0.244140625, "learning_rate": 4.263786242183059e-07, "loss": 1.0638, "step": 1684 }, { "epoch": 0.9579306424104604, "grad_norm": 0.2333984375, "learning_rate": 4.2069357589539513e-07, "loss": 1.0492, "step": 1685 }, { "epoch": 0.9584991472427515, "grad_norm": 0.267578125, "learning_rate": 4.150085275724844e-07, "loss": 1.0404, "step": 1686 }, { "epoch": 0.9590676520750426, "grad_norm": 0.287109375, "learning_rate": 4.0932347924957363e-07, "loss": 1.083, "step": 1687 }, { "epoch": 0.9596361569073337, "grad_norm": 0.2392578125, "learning_rate": 4.036384309266629e-07, "loss": 1.1342, "step": 1688 }, { "epoch": 0.9602046617396248, "grad_norm": 0.296875, "learning_rate": 3.979533826037522e-07, "loss": 1.0808, "step": 1689 }, { "epoch": 0.9607731665719159, "grad_norm": 0.248046875, "learning_rate": 3.9226833428084145e-07, "loss": 1.0469, "step": 1690 }, { "epoch": 0.9613416714042069, "grad_norm": 0.2451171875, "learning_rate": 3.865832859579307e-07, "loss": 1.0651, "step": 1691 }, { "epoch": 0.961910176236498, "grad_norm": 0.240234375, "learning_rate": 3.8089823763501995e-07, "loss": 1.0705, "step": 1692 }, { "epoch": 0.9624786810687891, "grad_norm": 0.25390625, "learning_rate": 3.7521318931210915e-07, "loss": 1.08, "step": 1693 }, { "epoch": 0.9630471859010802, "grad_norm": 0.380859375, "learning_rate": 3.695281409891984e-07, "loss": 1.0504, "step": 1694 }, { "epoch": 0.9636156907333713, "grad_norm": 0.236328125, "learning_rate": 3.6384309266628766e-07, "loss": 1.0815, "step": 1695 }, { "epoch": 0.9641841955656623, "grad_norm": 0.25390625, "learning_rate": 3.581580443433769e-07, "loss": 1.0705, "step": 1696 }, { "epoch": 0.9647527003979534, "grad_norm": 0.25390625, "learning_rate": 3.524729960204662e-07, "loss": 1.1327, "step": 1697 }, { "epoch": 0.9653212052302445, "grad_norm": 0.25390625, "learning_rate": 3.4678794769755547e-07, "loss": 1.0119, "step": 1698 }, { "epoch": 0.9658897100625355, "grad_norm": 0.2470703125, "learning_rate": 3.411028993746447e-07, "loss": 1.0659, "step": 1699 }, { "epoch": 0.9664582148948266, "grad_norm": 0.255859375, "learning_rate": 3.35417851051734e-07, "loss": 1.0708, "step": 1700 }, { "epoch": 0.9670267197271177, "grad_norm": 0.2578125, "learning_rate": 3.2973280272882323e-07, "loss": 1.0961, "step": 1701 }, { "epoch": 0.9675952245594087, "grad_norm": 0.25, "learning_rate": 3.240477544059125e-07, "loss": 1.0487, "step": 1702 }, { "epoch": 0.9681637293916998, "grad_norm": 0.267578125, "learning_rate": 3.183627060830018e-07, "loss": 1.096, "step": 1703 }, { "epoch": 0.9687322342239909, "grad_norm": 0.25390625, "learning_rate": 3.1267765776009093e-07, "loss": 1.0803, "step": 1704 }, { "epoch": 0.969300739056282, "grad_norm": 0.251953125, "learning_rate": 3.0699260943718024e-07, "loss": 1.0948, "step": 1705 }, { "epoch": 0.9698692438885731, "grad_norm": 0.2373046875, "learning_rate": 3.013075611142695e-07, "loss": 1.0903, "step": 1706 }, { "epoch": 0.9704377487208641, "grad_norm": 0.255859375, "learning_rate": 2.9562251279135875e-07, "loss": 1.0973, "step": 1707 }, { "epoch": 0.9710062535531552, "grad_norm": 0.2412109375, "learning_rate": 2.89937464468448e-07, "loss": 1.0925, "step": 1708 }, { "epoch": 0.9715747583854463, "grad_norm": 0.26171875, "learning_rate": 2.8425241614553725e-07, "loss": 1.057, "step": 1709 }, { "epoch": 0.9721432632177374, "grad_norm": 0.251953125, "learning_rate": 2.785673678226265e-07, "loss": 1.0918, "step": 1710 }, { "epoch": 0.9727117680500285, "grad_norm": 0.251953125, "learning_rate": 2.7288231949971576e-07, "loss": 1.0676, "step": 1711 }, { "epoch": 0.9732802728823196, "grad_norm": 0.2490234375, "learning_rate": 2.67197271176805e-07, "loss": 1.1316, "step": 1712 }, { "epoch": 0.9738487777146105, "grad_norm": 0.2392578125, "learning_rate": 2.6151222285389426e-07, "loss": 1.1022, "step": 1713 }, { "epoch": 0.9744172825469016, "grad_norm": 0.248046875, "learning_rate": 2.558271745309835e-07, "loss": 1.1001, "step": 1714 }, { "epoch": 0.9749857873791927, "grad_norm": 0.248046875, "learning_rate": 2.501421262080728e-07, "loss": 1.0884, "step": 1715 }, { "epoch": 0.9755542922114838, "grad_norm": 0.265625, "learning_rate": 2.44457077885162e-07, "loss": 1.1353, "step": 1716 }, { "epoch": 0.9761227970437749, "grad_norm": 0.255859375, "learning_rate": 2.387720295622513e-07, "loss": 1.0429, "step": 1717 }, { "epoch": 0.9766913018760659, "grad_norm": 0.2890625, "learning_rate": 2.3308698123934055e-07, "loss": 1.1118, "step": 1718 }, { "epoch": 0.977259806708357, "grad_norm": 0.2470703125, "learning_rate": 2.274019329164298e-07, "loss": 1.0896, "step": 1719 }, { "epoch": 0.9778283115406481, "grad_norm": 0.248046875, "learning_rate": 2.2171688459351909e-07, "loss": 1.1255, "step": 1720 }, { "epoch": 0.9783968163729392, "grad_norm": 0.25390625, "learning_rate": 2.160318362706083e-07, "loss": 1.0243, "step": 1721 }, { "epoch": 0.9789653212052303, "grad_norm": 0.248046875, "learning_rate": 2.1034678794769756e-07, "loss": 1.0861, "step": 1722 }, { "epoch": 0.9795338260375214, "grad_norm": 0.279296875, "learning_rate": 2.0466173962478682e-07, "loss": 1.1392, "step": 1723 }, { "epoch": 0.9801023308698124, "grad_norm": 0.248046875, "learning_rate": 1.989766913018761e-07, "loss": 1.1313, "step": 1724 }, { "epoch": 0.9806708357021034, "grad_norm": 0.251953125, "learning_rate": 1.9329164297896535e-07, "loss": 1.0845, "step": 1725 }, { "epoch": 0.9812393405343945, "grad_norm": 0.2490234375, "learning_rate": 1.8760659465605458e-07, "loss": 1.0585, "step": 1726 }, { "epoch": 0.9818078453666856, "grad_norm": 0.265625, "learning_rate": 1.8192154633314383e-07, "loss": 1.0937, "step": 1727 }, { "epoch": 0.9823763501989767, "grad_norm": 0.251953125, "learning_rate": 1.762364980102331e-07, "loss": 1.0952, "step": 1728 }, { "epoch": 0.9829448550312677, "grad_norm": 0.25390625, "learning_rate": 1.7055144968732236e-07, "loss": 1.0393, "step": 1729 }, { "epoch": 0.9835133598635588, "grad_norm": 0.259765625, "learning_rate": 1.6486640136441161e-07, "loss": 1.1354, "step": 1730 }, { "epoch": 0.9840818646958499, "grad_norm": 0.2490234375, "learning_rate": 1.591813530415009e-07, "loss": 1.0669, "step": 1731 }, { "epoch": 0.984650369528141, "grad_norm": 0.25, "learning_rate": 1.5349630471859012e-07, "loss": 1.0779, "step": 1732 }, { "epoch": 0.9852188743604321, "grad_norm": 0.25, "learning_rate": 1.4781125639567937e-07, "loss": 1.078, "step": 1733 }, { "epoch": 0.9857873791927232, "grad_norm": 0.2421875, "learning_rate": 1.4212620807276863e-07, "loss": 1.0819, "step": 1734 }, { "epoch": 0.9863558840250142, "grad_norm": 0.2451171875, "learning_rate": 1.3644115974985788e-07, "loss": 0.9768, "step": 1735 }, { "epoch": 0.9869243888573053, "grad_norm": 0.25, "learning_rate": 1.3075611142694713e-07, "loss": 1.0684, "step": 1736 }, { "epoch": 0.9874928936895964, "grad_norm": 0.251953125, "learning_rate": 1.250710631040364e-07, "loss": 1.1027, "step": 1737 }, { "epoch": 0.9880613985218875, "grad_norm": 0.2451171875, "learning_rate": 1.1938601478112564e-07, "loss": 1.0801, "step": 1738 }, { "epoch": 0.9886299033541786, "grad_norm": 0.2490234375, "learning_rate": 1.137009664582149e-07, "loss": 1.1181, "step": 1739 }, { "epoch": 0.9891984081864695, "grad_norm": 0.234375, "learning_rate": 1.0801591813530416e-07, "loss": 1.0206, "step": 1740 }, { "epoch": 0.9897669130187606, "grad_norm": 0.2431640625, "learning_rate": 1.0233086981239341e-07, "loss": 1.0657, "step": 1741 }, { "epoch": 0.9903354178510517, "grad_norm": 0.25, "learning_rate": 9.664582148948267e-08, "loss": 1.0526, "step": 1742 }, { "epoch": 0.9909039226833428, "grad_norm": 0.2470703125, "learning_rate": 9.096077316657191e-08, "loss": 1.0686, "step": 1743 }, { "epoch": 0.9914724275156339, "grad_norm": 0.25, "learning_rate": 8.527572484366118e-08, "loss": 1.1151, "step": 1744 }, { "epoch": 0.992040932347925, "grad_norm": 0.240234375, "learning_rate": 7.959067652075045e-08, "loss": 1.1506, "step": 1745 }, { "epoch": 0.992609437180216, "grad_norm": 0.25, "learning_rate": 7.390562819783969e-08, "loss": 1.0644, "step": 1746 }, { "epoch": 0.9931779420125071, "grad_norm": 0.240234375, "learning_rate": 6.822057987492894e-08, "loss": 1.0743, "step": 1747 }, { "epoch": 0.9937464468447982, "grad_norm": 0.2470703125, "learning_rate": 6.25355315520182e-08, "loss": 1.003, "step": 1748 }, { "epoch": 0.9943149516770893, "grad_norm": 0.244140625, "learning_rate": 5.685048322910745e-08, "loss": 1.1203, "step": 1749 }, { "epoch": 0.9948834565093804, "grad_norm": 0.2470703125, "learning_rate": 5.1165434906196704e-08, "loss": 1.0009, "step": 1750 }, { "epoch": 0.9954519613416714, "grad_norm": 0.24609375, "learning_rate": 4.548038658328596e-08, "loss": 1.09, "step": 1751 }, { "epoch": 0.9960204661739624, "grad_norm": 0.25390625, "learning_rate": 3.9795338260375223e-08, "loss": 1.1157, "step": 1752 }, { "epoch": 0.9965889710062535, "grad_norm": 0.2451171875, "learning_rate": 3.411028993746447e-08, "loss": 1.1547, "step": 1753 }, { "epoch": 0.9971574758385446, "grad_norm": 0.2431640625, "learning_rate": 2.8425241614553726e-08, "loss": 1.0319, "step": 1754 }, { "epoch": 0.9977259806708357, "grad_norm": 0.2578125, "learning_rate": 2.274019329164298e-08, "loss": 1.0665, "step": 1755 }, { "epoch": 0.9982944855031268, "grad_norm": 0.24609375, "learning_rate": 1.7055144968732235e-08, "loss": 1.0305, "step": 1756 }, { "epoch": 0.9988629903354178, "grad_norm": 0.255859375, "learning_rate": 1.137009664582149e-08, "loss": 1.0681, "step": 1757 }, { "epoch": 0.9994314951677089, "grad_norm": 0.255859375, "learning_rate": 5.685048322910745e-09, "loss": 0.9648, "step": 1758 }, { "epoch": 1.0, "grad_norm": 0.28125, "learning_rate": 0.0, "loss": 1.0561, "step": 1759 } ], "logging_steps": 1.0, "max_steps": 1759, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.753088139426005e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }