| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 1300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0015384615384615385, | |
| "grad_norm": 60.89158644985447, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2107, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.003076923076923077, | |
| "grad_norm": 31.499919740759644, | |
| "learning_rate": 1.9999992699997636e-05, | |
| "loss": 0.7837, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004615384615384616, | |
| "grad_norm": 6.103692995379824, | |
| "learning_rate": 1.999997080000119e-05, | |
| "loss": 0.3218, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.006153846153846154, | |
| "grad_norm": 2.6468306725602764, | |
| "learning_rate": 1.9999934300042646e-05, | |
| "loss": 0.2287, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007692307692307693, | |
| "grad_norm": 3.4867739541054443, | |
| "learning_rate": 1.9999883200175286e-05, | |
| "loss": 0.2423, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.009230769230769232, | |
| "grad_norm": 2.641228316079499, | |
| "learning_rate": 1.9999817500473724e-05, | |
| "loss": 0.2253, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.010769230769230769, | |
| "grad_norm": 1.205202032246029, | |
| "learning_rate": 1.9999737201033877e-05, | |
| "loss": 0.1668, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.012307692307692308, | |
| "grad_norm": 1.9244405042319512, | |
| "learning_rate": 1.999964230197298e-05, | |
| "loss": 0.1847, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.013846153846153847, | |
| "grad_norm": 1.4464951947932236, | |
| "learning_rate": 1.999953280342959e-05, | |
| "loss": 0.1606, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.015384615384615385, | |
| "grad_norm": 1.4255296128290331, | |
| "learning_rate": 1.999940870556357e-05, | |
| "loss": 0.1471, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.016923076923076923, | |
| "grad_norm": 1.3096222246733347, | |
| "learning_rate": 1.9999270008556108e-05, | |
| "loss": 0.1463, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.018461538461538463, | |
| "grad_norm": 1.018551951164187, | |
| "learning_rate": 1.99991167126097e-05, | |
| "loss": 0.1405, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 1.1849805458454175, | |
| "learning_rate": 1.9998948817948157e-05, | |
| "loss": 0.1381, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.021538461538461538, | |
| "grad_norm": 1.01325599101221, | |
| "learning_rate": 1.9998766324816606e-05, | |
| "loss": 0.1453, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.023076923076923078, | |
| "grad_norm": 1.0760344920602294, | |
| "learning_rate": 1.999856923348149e-05, | |
| "loss": 0.119, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.024615384615384615, | |
| "grad_norm": 1.2012958039069457, | |
| "learning_rate": 1.9998357544230558e-05, | |
| "loss": 0.1507, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.026153846153846153, | |
| "grad_norm": 0.8734600178036379, | |
| "learning_rate": 1.9998131257372878e-05, | |
| "loss": 0.1334, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.027692307692307693, | |
| "grad_norm": 0.7321244232592351, | |
| "learning_rate": 1.9997890373238827e-05, | |
| "loss": 0.1259, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02923076923076923, | |
| "grad_norm": 0.8736141545923927, | |
| "learning_rate": 1.99976348921801e-05, | |
| "loss": 0.1328, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.03076923076923077, | |
| "grad_norm": 0.7433251995484863, | |
| "learning_rate": 1.9997364814569696e-05, | |
| "loss": 0.1155, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03230769230769231, | |
| "grad_norm": 0.7421987879217785, | |
| "learning_rate": 1.9997080140801932e-05, | |
| "loss": 0.1321, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.033846153846153845, | |
| "grad_norm": 0.7417777234873172, | |
| "learning_rate": 1.9996780871292428e-05, | |
| "loss": 0.1082, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.03538461538461538, | |
| "grad_norm": 1.2517479737195332, | |
| "learning_rate": 1.999646700647812e-05, | |
| "loss": 0.1219, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.036923076923076927, | |
| "grad_norm": 0.9094077693848758, | |
| "learning_rate": 1.9996138546817248e-05, | |
| "loss": 0.1314, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.038461538461538464, | |
| "grad_norm": 0.7797984417608248, | |
| "learning_rate": 1.9995795492789368e-05, | |
| "loss": 0.1331, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.7976816314521917, | |
| "learning_rate": 1.9995437844895337e-05, | |
| "loss": 0.114, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04153846153846154, | |
| "grad_norm": 0.8888193123533839, | |
| "learning_rate": 1.9995065603657317e-05, | |
| "loss": 0.1399, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.043076923076923075, | |
| "grad_norm": 0.8169350017845466, | |
| "learning_rate": 1.9994678769618784e-05, | |
| "loss": 0.1148, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.04461538461538461, | |
| "grad_norm": 0.6804970735056192, | |
| "learning_rate": 1.999427734334452e-05, | |
| "loss": 0.1223, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.046153846153846156, | |
| "grad_norm": 0.7358036673244082, | |
| "learning_rate": 1.99938613254206e-05, | |
| "loss": 0.1281, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.047692307692307694, | |
| "grad_norm": 0.726915700239593, | |
| "learning_rate": 1.9993430716454415e-05, | |
| "loss": 0.123, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.04923076923076923, | |
| "grad_norm": 0.8370086699996789, | |
| "learning_rate": 1.9992985517074653e-05, | |
| "loss": 0.1121, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.05076923076923077, | |
| "grad_norm": 0.870233844578574, | |
| "learning_rate": 1.9992525727931303e-05, | |
| "loss": 0.1242, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.052307692307692305, | |
| "grad_norm": 0.6572319680505859, | |
| "learning_rate": 1.9992051349695662e-05, | |
| "loss": 0.115, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.05384615384615385, | |
| "grad_norm": 0.8992858978866627, | |
| "learning_rate": 1.9991562383060316e-05, | |
| "loss": 0.1173, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.055384615384615386, | |
| "grad_norm": 0.6238895374463513, | |
| "learning_rate": 1.9991058828739164e-05, | |
| "loss": 0.1188, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05692307692307692, | |
| "grad_norm": 0.5905879008819883, | |
| "learning_rate": 1.9990540687467394e-05, | |
| "loss": 0.1162, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.05846153846153846, | |
| "grad_norm": 0.7475485922157608, | |
| "learning_rate": 1.9990007960001487e-05, | |
| "loss": 0.1341, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.7062185012543412, | |
| "learning_rate": 1.9989460647119232e-05, | |
| "loss": 0.1276, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.06153846153846154, | |
| "grad_norm": 0.6940436820752224, | |
| "learning_rate": 1.9988898749619702e-05, | |
| "loss": 0.1173, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06307692307692307, | |
| "grad_norm": 0.6162083864347133, | |
| "learning_rate": 1.998832226832327e-05, | |
| "loss": 0.1148, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.06461538461538462, | |
| "grad_norm": 0.5010819273602969, | |
| "learning_rate": 1.9987731204071596e-05, | |
| "loss": 0.0971, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06615384615384616, | |
| "grad_norm": 0.6715601368175806, | |
| "learning_rate": 1.9987125557727633e-05, | |
| "loss": 0.1279, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.06769230769230769, | |
| "grad_norm": 0.6360471415422658, | |
| "learning_rate": 1.998650533017563e-05, | |
| "loss": 0.1106, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06923076923076923, | |
| "grad_norm": 0.6293809806468533, | |
| "learning_rate": 1.9985870522321118e-05, | |
| "loss": 0.1367, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.07076923076923076, | |
| "grad_norm": 0.6592095131341471, | |
| "learning_rate": 1.9985221135090917e-05, | |
| "loss": 0.1149, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.07230769230769231, | |
| "grad_norm": 0.7078516914096971, | |
| "learning_rate": 1.9984557169433126e-05, | |
| "loss": 0.1115, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.07384615384615385, | |
| "grad_norm": 0.5877725742836197, | |
| "learning_rate": 1.9983878626317147e-05, | |
| "loss": 0.1236, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.07538461538461538, | |
| "grad_norm": 0.5884104828878249, | |
| "learning_rate": 1.9983185506733643e-05, | |
| "loss": 0.12, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 0.549670934870528, | |
| "learning_rate": 1.9982477811694578e-05, | |
| "loss": 0.1104, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07846153846153846, | |
| "grad_norm": 0.8220615421808383, | |
| "learning_rate": 1.9981755542233175e-05, | |
| "loss": 0.1219, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.6798315000266852, | |
| "learning_rate": 1.998101869940396e-05, | |
| "loss": 0.1182, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.08153846153846153, | |
| "grad_norm": 0.5366855615637781, | |
| "learning_rate": 1.9980267284282718e-05, | |
| "loss": 0.1017, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.08307692307692308, | |
| "grad_norm": 0.7397417525631895, | |
| "learning_rate": 1.9979501297966516e-05, | |
| "loss": 0.114, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08461538461538462, | |
| "grad_norm": 0.7086921001674501, | |
| "learning_rate": 1.9978720741573693e-05, | |
| "loss": 0.1179, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.08615384615384615, | |
| "grad_norm": 0.7075396570589779, | |
| "learning_rate": 1.9977925616243865e-05, | |
| "loss": 0.1143, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.0876923076923077, | |
| "grad_norm": 0.5477986445198524, | |
| "learning_rate": 1.9977115923137912e-05, | |
| "loss": 0.1115, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.08923076923076922, | |
| "grad_norm": 0.5949900627411874, | |
| "learning_rate": 1.997629166343799e-05, | |
| "loss": 0.1056, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.09076923076923077, | |
| "grad_norm": 0.60111264237703, | |
| "learning_rate": 1.9975452838347513e-05, | |
| "loss": 0.1097, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.09230769230769231, | |
| "grad_norm": 0.6683497211941376, | |
| "learning_rate": 1.997459944909117e-05, | |
| "loss": 0.1164, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09384615384615384, | |
| "grad_norm": 0.7704030456569986, | |
| "learning_rate": 1.9973731496914914e-05, | |
| "loss": 0.1164, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.09538461538461539, | |
| "grad_norm": 0.707210267374331, | |
| "learning_rate": 1.9972848983085945e-05, | |
| "loss": 0.1192, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09692307692307692, | |
| "grad_norm": 0.6016618867618871, | |
| "learning_rate": 1.9971951908892743e-05, | |
| "loss": 0.111, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.09846153846153846, | |
| "grad_norm": 0.662240507889871, | |
| "learning_rate": 1.997104027564503e-05, | |
| "loss": 0.1163, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.5798279422985586, | |
| "learning_rate": 1.9970114084673796e-05, | |
| "loss": 0.1102, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.10153846153846154, | |
| "grad_norm": 0.5611805453593508, | |
| "learning_rate": 1.9969173337331283e-05, | |
| "loss": 0.1104, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.10307692307692308, | |
| "grad_norm": 0.7514312817329308, | |
| "learning_rate": 1.996821803499097e-05, | |
| "loss": 0.1239, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.10461538461538461, | |
| "grad_norm": 0.5863699065118245, | |
| "learning_rate": 1.9967248179047612e-05, | |
| "loss": 0.1132, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10615384615384615, | |
| "grad_norm": 0.5464510643573779, | |
| "learning_rate": 1.9966263770917192e-05, | |
| "loss": 0.1211, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.1076923076923077, | |
| "grad_norm": 0.566891174856317, | |
| "learning_rate": 1.996526481203695e-05, | |
| "loss": 0.1077, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10923076923076923, | |
| "grad_norm": 0.6246113449127777, | |
| "learning_rate": 1.9964251303865362e-05, | |
| "loss": 0.1074, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.11076923076923077, | |
| "grad_norm": 0.6001393125416704, | |
| "learning_rate": 1.9963223247882154e-05, | |
| "loss": 0.1086, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1123076923076923, | |
| "grad_norm": 0.5021027088495942, | |
| "learning_rate": 1.996218064558829e-05, | |
| "loss": 0.0983, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.11384615384615385, | |
| "grad_norm": 0.5707671002037145, | |
| "learning_rate": 1.9961123498505965e-05, | |
| "loss": 0.1076, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11538461538461539, | |
| "grad_norm": 0.6742358069569517, | |
| "learning_rate": 1.9960051808178616e-05, | |
| "loss": 0.1148, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11692307692307692, | |
| "grad_norm": 0.48574494522940415, | |
| "learning_rate": 1.995896557617091e-05, | |
| "loss": 0.1104, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11846153846153847, | |
| "grad_norm": 0.5943862604998493, | |
| "learning_rate": 1.9957864804068752e-05, | |
| "loss": 0.1076, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.6097303023134425, | |
| "learning_rate": 1.9956749493479263e-05, | |
| "loss": 0.1115, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.12153846153846154, | |
| "grad_norm": 0.6195963924681878, | |
| "learning_rate": 1.99556196460308e-05, | |
| "loss": 0.1212, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.12307692307692308, | |
| "grad_norm": 0.5930349140433203, | |
| "learning_rate": 1.9954475263372943e-05, | |
| "loss": 0.1074, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12461538461538461, | |
| "grad_norm": 0.5556371192720652, | |
| "learning_rate": 1.995331634717649e-05, | |
| "loss": 0.1064, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.12615384615384614, | |
| "grad_norm": 0.6530827388442122, | |
| "learning_rate": 1.9952142899133452e-05, | |
| "loss": 0.1121, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.1276923076923077, | |
| "grad_norm": 0.8859927355734215, | |
| "learning_rate": 1.9950954920957074e-05, | |
| "loss": 0.1152, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.12923076923076923, | |
| "grad_norm": 0.5935612351835159, | |
| "learning_rate": 1.99497524143818e-05, | |
| "loss": 0.1179, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.13076923076923078, | |
| "grad_norm": 0.4991766795445498, | |
| "learning_rate": 1.994853538116329e-05, | |
| "loss": 0.1149, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.13230769230769232, | |
| "grad_norm": 0.5555671407736567, | |
| "learning_rate": 1.9947303823078418e-05, | |
| "loss": 0.1012, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.13384615384615384, | |
| "grad_norm": 0.8909613249571459, | |
| "learning_rate": 1.994605774192525e-05, | |
| "loss": 0.0979, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.13538461538461538, | |
| "grad_norm": 0.5310150106650096, | |
| "learning_rate": 1.9944797139523068e-05, | |
| "loss": 0.1088, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.13692307692307693, | |
| "grad_norm": 0.749673935234374, | |
| "learning_rate": 1.994352201771236e-05, | |
| "loss": 0.1034, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.13846153846153847, | |
| "grad_norm": 0.5378188928120664, | |
| "learning_rate": 1.99422323783548e-05, | |
| "loss": 0.1057, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.511920881530378, | |
| "learning_rate": 1.9940928223333254e-05, | |
| "loss": 0.1121, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.14153846153846153, | |
| "grad_norm": 0.5822824054135747, | |
| "learning_rate": 1.99396095545518e-05, | |
| "loss": 0.1148, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.14307692307692307, | |
| "grad_norm": 0.6672810329410515, | |
| "learning_rate": 1.9938276373935688e-05, | |
| "loss": 0.1158, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.14461538461538462, | |
| "grad_norm": 0.4952193512759965, | |
| "learning_rate": 1.9936928683431368e-05, | |
| "loss": 0.1076, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.14615384615384616, | |
| "grad_norm": 0.625543647549158, | |
| "learning_rate": 1.9935566485006464e-05, | |
| "loss": 0.1125, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1476923076923077, | |
| "grad_norm": 0.6035686191037584, | |
| "learning_rate": 1.993418978064979e-05, | |
| "loss": 0.109, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14923076923076922, | |
| "grad_norm": 0.5236314046610593, | |
| "learning_rate": 1.993279857237133e-05, | |
| "loss": 0.1152, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.15076923076923077, | |
| "grad_norm": 0.6169915505799958, | |
| "learning_rate": 1.9931392862202255e-05, | |
| "loss": 0.1187, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1523076923076923, | |
| "grad_norm": 0.5079224872644268, | |
| "learning_rate": 1.99299726521949e-05, | |
| "loss": 0.1066, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 0.6279665388236755, | |
| "learning_rate": 1.992853794442277e-05, | |
| "loss": 0.1077, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15538461538461537, | |
| "grad_norm": 0.5259964466933738, | |
| "learning_rate": 1.992708874098054e-05, | |
| "loss": 0.1107, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.15692307692307692, | |
| "grad_norm": 0.5221587453615723, | |
| "learning_rate": 1.9925625043984052e-05, | |
| "loss": 0.103, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.15846153846153846, | |
| "grad_norm": 0.5299978643337, | |
| "learning_rate": 1.9924146855570298e-05, | |
| "loss": 0.1083, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.5822846737863101, | |
| "learning_rate": 1.9922654177897436e-05, | |
| "loss": 0.1069, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.16153846153846155, | |
| "grad_norm": 0.5319364893865596, | |
| "learning_rate": 1.9921147013144782e-05, | |
| "loss": 0.0997, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.16307692307692306, | |
| "grad_norm": 0.6977409875795292, | |
| "learning_rate": 1.9919625363512788e-05, | |
| "loss": 0.118, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.1646153846153846, | |
| "grad_norm": 0.48032168376624, | |
| "learning_rate": 1.9918089231223066e-05, | |
| "loss": 0.1048, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.16615384615384615, | |
| "grad_norm": 0.5258925153786688, | |
| "learning_rate": 1.991653861851837e-05, | |
| "loss": 0.1125, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1676923076923077, | |
| "grad_norm": 0.7141043684744723, | |
| "learning_rate": 1.99149735276626e-05, | |
| "loss": 0.1181, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.16923076923076924, | |
| "grad_norm": 0.5038196623101785, | |
| "learning_rate": 1.9913393960940785e-05, | |
| "loss": 0.0971, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.17076923076923076, | |
| "grad_norm": 0.48472163979718835, | |
| "learning_rate": 1.9911799920659093e-05, | |
| "loss": 0.1039, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.1723076923076923, | |
| "grad_norm": 0.669381566002829, | |
| "learning_rate": 1.9910191409144825e-05, | |
| "loss": 0.1073, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.17384615384615384, | |
| "grad_norm": 0.57115456078115, | |
| "learning_rate": 1.9908568428746408e-05, | |
| "loss": 0.0954, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.1753846153846154, | |
| "grad_norm": 0.5395647646350689, | |
| "learning_rate": 1.9906930981833392e-05, | |
| "loss": 0.1088, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.17692307692307693, | |
| "grad_norm": 0.5912868508214638, | |
| "learning_rate": 1.9905279070796454e-05, | |
| "loss": 0.1035, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.17846153846153845, | |
| "grad_norm": 0.56400423327266, | |
| "learning_rate": 1.9903612698047387e-05, | |
| "loss": 0.1223, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.6509772627591756, | |
| "learning_rate": 1.9901931866019087e-05, | |
| "loss": 0.1165, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.18153846153846154, | |
| "grad_norm": 0.4920607096781014, | |
| "learning_rate": 1.990023657716558e-05, | |
| "loss": 0.1036, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.18307692307692308, | |
| "grad_norm": 1.003150053450431, | |
| "learning_rate": 1.989852683396198e-05, | |
| "loss": 0.109, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.18461538461538463, | |
| "grad_norm": 0.5598317796088923, | |
| "learning_rate": 1.9896802638904512e-05, | |
| "loss": 0.1099, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.18615384615384614, | |
| "grad_norm": 1.5739160303036723, | |
| "learning_rate": 1.9895063994510512e-05, | |
| "loss": 0.1149, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.18769230769230769, | |
| "grad_norm": 0.6575202377156634, | |
| "learning_rate": 1.9893310903318394e-05, | |
| "loss": 0.1115, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.18923076923076923, | |
| "grad_norm": 0.6474947347376268, | |
| "learning_rate": 1.9891543367887675e-05, | |
| "loss": 0.1105, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.19076923076923077, | |
| "grad_norm": 0.6273901725857466, | |
| "learning_rate": 1.9889761390798952e-05, | |
| "loss": 0.1137, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 0.5821482969182745, | |
| "learning_rate": 1.988796497465392e-05, | |
| "loss": 0.0932, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.19384615384615383, | |
| "grad_norm": 0.6213235073882689, | |
| "learning_rate": 1.9886154122075344e-05, | |
| "loss": 0.1111, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.19538461538461538, | |
| "grad_norm": 0.6063602698529751, | |
| "learning_rate": 1.988432883570707e-05, | |
| "loss": 0.1102, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.19692307692307692, | |
| "grad_norm": 0.5820333042475602, | |
| "learning_rate": 1.9882489118214013e-05, | |
| "loss": 0.0904, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.19846153846153847, | |
| "grad_norm": 0.5911684289948976, | |
| "learning_rate": 1.9880634972282168e-05, | |
| "loss": 0.1168, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.6156260168090836, | |
| "learning_rate": 1.987876640061858e-05, | |
| "loss": 0.1037, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.20153846153846153, | |
| "grad_norm": 0.6339706412660758, | |
| "learning_rate": 1.9876883405951378e-05, | |
| "loss": 0.1023, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.20307692307692307, | |
| "grad_norm": 0.5925980576093635, | |
| "learning_rate": 1.9874985991029725e-05, | |
| "loss": 0.1058, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.20461538461538462, | |
| "grad_norm": 0.44542298631854954, | |
| "learning_rate": 1.987307415862385e-05, | |
| "loss": 0.0947, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.20615384615384616, | |
| "grad_norm": 0.6583404135837835, | |
| "learning_rate": 1.987114791152503e-05, | |
| "loss": 0.1079, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2076923076923077, | |
| "grad_norm": 0.6888868393971377, | |
| "learning_rate": 1.9869207252545582e-05, | |
| "loss": 0.1046, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.20923076923076922, | |
| "grad_norm": 0.5864137998381262, | |
| "learning_rate": 1.9867252184518878e-05, | |
| "loss": 0.1128, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.21076923076923076, | |
| "grad_norm": 0.6371765472826588, | |
| "learning_rate": 1.986528271029931e-05, | |
| "loss": 0.1108, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2123076923076923, | |
| "grad_norm": 0.6990532523375615, | |
| "learning_rate": 1.9863298832762317e-05, | |
| "loss": 0.1121, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.21384615384615385, | |
| "grad_norm": 0.5777908223862595, | |
| "learning_rate": 1.9861300554804357e-05, | |
| "loss": 0.1041, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.2153846153846154, | |
| "grad_norm": 0.5437019574342667, | |
| "learning_rate": 1.985928787934292e-05, | |
| "loss": 0.1041, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2169230769230769, | |
| "grad_norm": 0.5652145969922582, | |
| "learning_rate": 1.985726080931651e-05, | |
| "loss": 0.0977, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.21846153846153846, | |
| "grad_norm": 0.6831475109006377, | |
| "learning_rate": 1.9855219347684654e-05, | |
| "loss": 0.1136, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.5819561900456608, | |
| "learning_rate": 1.9853163497427885e-05, | |
| "loss": 0.1022, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.22153846153846155, | |
| "grad_norm": 0.5243548380003794, | |
| "learning_rate": 1.985109326154774e-05, | |
| "loss": 0.1068, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2230769230769231, | |
| "grad_norm": 0.6756018205387149, | |
| "learning_rate": 1.9849008643066774e-05, | |
| "loss": 0.1116, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2246153846153846, | |
| "grad_norm": 0.5271647520035011, | |
| "learning_rate": 1.9846909645028524e-05, | |
| "loss": 0.1004, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.22615384615384615, | |
| "grad_norm": 0.49376009484771377, | |
| "learning_rate": 1.984479627049753e-05, | |
| "loss": 0.0986, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.2276923076923077, | |
| "grad_norm": 0.5009074295953662, | |
| "learning_rate": 1.9842668522559326e-05, | |
| "loss": 0.1083, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.22923076923076924, | |
| "grad_norm": 0.5120110822425591, | |
| "learning_rate": 1.9840526404320415e-05, | |
| "loss": 0.1035, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 0.4393813675110799, | |
| "learning_rate": 1.9838369918908295e-05, | |
| "loss": 0.1116, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2323076923076923, | |
| "grad_norm": 0.6224825279714632, | |
| "learning_rate": 1.983619906947144e-05, | |
| "loss": 0.1067, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.23384615384615384, | |
| "grad_norm": 0.5038143439477663, | |
| "learning_rate": 1.9834013859179284e-05, | |
| "loss": 0.1104, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.2353846153846154, | |
| "grad_norm": 0.600725559118982, | |
| "learning_rate": 1.9831814291222233e-05, | |
| "loss": 0.1114, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.23692307692307693, | |
| "grad_norm": 0.561148124391653, | |
| "learning_rate": 1.982960036881167e-05, | |
| "loss": 0.1129, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.23846153846153847, | |
| "grad_norm": 0.4254379063861859, | |
| "learning_rate": 1.982737209517991e-05, | |
| "loss": 0.1018, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.5302552159073871, | |
| "learning_rate": 1.982512947358024e-05, | |
| "loss": 0.1103, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.24153846153846154, | |
| "grad_norm": 0.4768982697935648, | |
| "learning_rate": 1.982287250728689e-05, | |
| "loss": 0.0947, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.24307692307692308, | |
| "grad_norm": 0.4859749472919824, | |
| "learning_rate": 1.9820601199595027e-05, | |
| "loss": 0.1049, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.24461538461538462, | |
| "grad_norm": 0.49517524710561983, | |
| "learning_rate": 1.981831555382076e-05, | |
| "loss": 0.104, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "grad_norm": 0.462632720362608, | |
| "learning_rate": 1.981601557330114e-05, | |
| "loss": 0.101, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.24769230769230768, | |
| "grad_norm": 0.46154906358075987, | |
| "learning_rate": 1.9813701261394136e-05, | |
| "loss": 0.0971, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.24923076923076923, | |
| "grad_norm": 0.47520236421542805, | |
| "learning_rate": 1.9811372621478643e-05, | |
| "loss": 0.0887, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.25076923076923074, | |
| "grad_norm": 0.4386838659657528, | |
| "learning_rate": 1.980902965695448e-05, | |
| "loss": 0.1054, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.2523076923076923, | |
| "grad_norm": 0.5157191022451466, | |
| "learning_rate": 1.9806672371242372e-05, | |
| "loss": 0.1031, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.25384615384615383, | |
| "grad_norm": 0.49794944103500205, | |
| "learning_rate": 1.9804300767783958e-05, | |
| "loss": 0.1058, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2553846153846154, | |
| "grad_norm": 0.5125213345791613, | |
| "learning_rate": 1.9801914850041787e-05, | |
| "loss": 0.1012, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.2569230769230769, | |
| "grad_norm": 0.5380352018124325, | |
| "learning_rate": 1.979951462149929e-05, | |
| "loss": 0.1096, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.25846153846153846, | |
| "grad_norm": 0.5283064485685516, | |
| "learning_rate": 1.979710008566081e-05, | |
| "loss": 0.0985, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.5187932463326723, | |
| "learning_rate": 1.979467124605156e-05, | |
| "loss": 0.0908, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.26153846153846155, | |
| "grad_norm": 0.5436736115984068, | |
| "learning_rate": 1.979222810621766e-05, | |
| "loss": 0.1033, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2630769230769231, | |
| "grad_norm": 0.4474415033552116, | |
| "learning_rate": 1.9789770669726088e-05, | |
| "loss": 0.1053, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.26461538461538464, | |
| "grad_norm": 0.544303710798621, | |
| "learning_rate": 1.97872989401647e-05, | |
| "loss": 0.1159, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.26615384615384613, | |
| "grad_norm": 0.6201173307395067, | |
| "learning_rate": 1.9784812921142232e-05, | |
| "loss": 0.098, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.2676923076923077, | |
| "grad_norm": 0.447026396377833, | |
| "learning_rate": 1.9782312616288262e-05, | |
| "loss": 0.106, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2692307692307692, | |
| "grad_norm": 0.5016997942574009, | |
| "learning_rate": 1.977979802925324e-05, | |
| "loss": 0.1059, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.27076923076923076, | |
| "grad_norm": 0.5883579853747641, | |
| "learning_rate": 1.977726916370847e-05, | |
| "loss": 0.1049, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2723076923076923, | |
| "grad_norm": 0.43307778470945424, | |
| "learning_rate": 1.977472602334609e-05, | |
| "loss": 0.0917, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.27384615384615385, | |
| "grad_norm": 0.4108595764973751, | |
| "learning_rate": 1.977216861187909e-05, | |
| "loss": 0.0892, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.2753846153846154, | |
| "grad_norm": 0.5320809885623358, | |
| "learning_rate": 1.976959693304129e-05, | |
| "loss": 0.0994, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.27692307692307694, | |
| "grad_norm": 0.46480787256802003, | |
| "learning_rate": 1.9767010990587342e-05, | |
| "loss": 0.1013, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2784615384615385, | |
| "grad_norm": 0.545604832179799, | |
| "learning_rate": 1.9764410788292724e-05, | |
| "loss": 0.0989, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.43112552357630096, | |
| "learning_rate": 1.976179632995373e-05, | |
| "loss": 0.1045, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.2815384615384615, | |
| "grad_norm": 0.6658366935016972, | |
| "learning_rate": 1.9759167619387474e-05, | |
| "loss": 0.1233, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.28307692307692306, | |
| "grad_norm": 0.46835535422995606, | |
| "learning_rate": 1.9756524660431876e-05, | |
| "loss": 0.0956, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2846153846153846, | |
| "grad_norm": 0.4607296339259721, | |
| "learning_rate": 1.9753867456945653e-05, | |
| "loss": 0.1161, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.28615384615384615, | |
| "grad_norm": 0.41625713094163824, | |
| "learning_rate": 1.9751196012808328e-05, | |
| "loss": 0.0896, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2876923076923077, | |
| "grad_norm": 0.46438501415946737, | |
| "learning_rate": 1.9748510331920204e-05, | |
| "loss": 0.1016, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.28923076923076924, | |
| "grad_norm": 0.5188858027802394, | |
| "learning_rate": 1.9745810418202383e-05, | |
| "loss": 0.1199, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2907692307692308, | |
| "grad_norm": 0.4563595166293062, | |
| "learning_rate": 1.9743096275596735e-05, | |
| "loss": 0.1028, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.2923076923076923, | |
| "grad_norm": 0.48359296387625755, | |
| "learning_rate": 1.9740367908065914e-05, | |
| "loss": 0.0925, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.29384615384615387, | |
| "grad_norm": 0.5276131684607785, | |
| "learning_rate": 1.9737625319593338e-05, | |
| "loss": 0.1083, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.2953846153846154, | |
| "grad_norm": 0.38771222370278047, | |
| "learning_rate": 1.973486851418318e-05, | |
| "loss": 0.0904, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2969230769230769, | |
| "grad_norm": 0.5429417039030707, | |
| "learning_rate": 1.9732097495860388e-05, | |
| "loss": 0.0963, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.29846153846153844, | |
| "grad_norm": 0.5257696459782908, | |
| "learning_rate": 1.9729312268670642e-05, | |
| "loss": 0.0976, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.4550685781931343, | |
| "learning_rate": 1.972651283668038e-05, | |
| "loss": 0.0992, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.30153846153846153, | |
| "grad_norm": 0.49086953834293273, | |
| "learning_rate": 1.9723699203976768e-05, | |
| "loss": 0.0994, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.3030769230769231, | |
| "grad_norm": 0.5264210943330065, | |
| "learning_rate": 1.9720871374667714e-05, | |
| "loss": 0.1021, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.3046153846153846, | |
| "grad_norm": 0.507327301576836, | |
| "learning_rate": 1.9718029352881857e-05, | |
| "loss": 0.0972, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.30615384615384617, | |
| "grad_norm": 0.4732432615412584, | |
| "learning_rate": 1.971517314276854e-05, | |
| "loss": 0.0918, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.6042723204316705, | |
| "learning_rate": 1.9712302748497838e-05, | |
| "loss": 0.1064, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.30923076923076925, | |
| "grad_norm": 0.46540271774907827, | |
| "learning_rate": 1.9709418174260523e-05, | |
| "loss": 0.0906, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.31076923076923074, | |
| "grad_norm": 0.5214921994645452, | |
| "learning_rate": 1.9706519424268077e-05, | |
| "loss": 0.093, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.3123076923076923, | |
| "grad_norm": 0.4249410321063521, | |
| "learning_rate": 1.9703606502752674e-05, | |
| "loss": 0.1011, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.31384615384615383, | |
| "grad_norm": 0.5120931365008091, | |
| "learning_rate": 1.970067941396719e-05, | |
| "loss": 0.1093, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.3153846153846154, | |
| "grad_norm": 0.46019038450225697, | |
| "learning_rate": 1.9697738162185163e-05, | |
| "loss": 0.1094, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3169230769230769, | |
| "grad_norm": 0.43539468765926526, | |
| "learning_rate": 1.969478275170083e-05, | |
| "loss": 0.0867, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.31846153846153846, | |
| "grad_norm": 0.499590646424274, | |
| "learning_rate": 1.969181318682909e-05, | |
| "loss": 0.0908, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.540818973654555, | |
| "learning_rate": 1.9688829471905507e-05, | |
| "loss": 0.1023, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.32153846153846155, | |
| "grad_norm": 0.4730876526778854, | |
| "learning_rate": 1.9685831611286312e-05, | |
| "loss": 0.1026, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.3230769230769231, | |
| "grad_norm": 0.49776463757579537, | |
| "learning_rate": 1.968281960934838e-05, | |
| "loss": 0.0996, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.32461538461538464, | |
| "grad_norm": 0.4311409270544766, | |
| "learning_rate": 1.967979347048923e-05, | |
| "loss": 0.099, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.3261538461538461, | |
| "grad_norm": 0.437076731735488, | |
| "learning_rate": 1.9676753199127033e-05, | |
| "loss": 0.0947, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.32769230769230767, | |
| "grad_norm": 0.4670077669146603, | |
| "learning_rate": 1.9673698799700582e-05, | |
| "loss": 0.0933, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.3292307692307692, | |
| "grad_norm": 0.4919569174836587, | |
| "learning_rate": 1.9670630276669305e-05, | |
| "loss": 0.1001, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.33076923076923076, | |
| "grad_norm": 0.47210284604663544, | |
| "learning_rate": 1.9667547634513248e-05, | |
| "loss": 0.1009, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.3323076923076923, | |
| "grad_norm": 0.37651995607019445, | |
| "learning_rate": 1.9664450877733065e-05, | |
| "loss": 0.0898, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.33384615384615385, | |
| "grad_norm": 0.534190751602756, | |
| "learning_rate": 1.9661340010850025e-05, | |
| "loss": 0.1104, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.3353846153846154, | |
| "grad_norm": 0.4951807767134038, | |
| "learning_rate": 1.9658215038405997e-05, | |
| "loss": 0.1068, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.33692307692307694, | |
| "grad_norm": 0.5728927862246131, | |
| "learning_rate": 1.9655075964963443e-05, | |
| "loss": 0.1055, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.3384615384615385, | |
| "grad_norm": 0.47984244005432625, | |
| "learning_rate": 1.9651922795105404e-05, | |
| "loss": 0.1021, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.5413827050893782, | |
| "learning_rate": 1.9648755533435517e-05, | |
| "loss": 0.1075, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.3415384615384615, | |
| "grad_norm": 0.4794837943323609, | |
| "learning_rate": 1.9645574184577982e-05, | |
| "loss": 0.1042, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.34307692307692306, | |
| "grad_norm": 0.47022722324174265, | |
| "learning_rate": 1.9642378753177573e-05, | |
| "loss": 0.0931, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.3446153846153846, | |
| "grad_norm": 0.43452426712372355, | |
| "learning_rate": 1.963916924389962e-05, | |
| "loss": 0.0918, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.34615384615384615, | |
| "grad_norm": 0.4951463295701747, | |
| "learning_rate": 1.9635945661430006e-05, | |
| "loss": 0.0958, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.3476923076923077, | |
| "grad_norm": 0.5231519816188233, | |
| "learning_rate": 1.9632708010475166e-05, | |
| "loss": 0.0985, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.34923076923076923, | |
| "grad_norm": 0.4125636678302975, | |
| "learning_rate": 1.9629456295762067e-05, | |
| "loss": 0.0922, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.3507692307692308, | |
| "grad_norm": 0.45904306816999607, | |
| "learning_rate": 1.962619052203822e-05, | |
| "loss": 0.0992, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.3523076923076923, | |
| "grad_norm": 0.4970133845701183, | |
| "learning_rate": 1.9622910694071654e-05, | |
| "loss": 0.0932, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.35384615384615387, | |
| "grad_norm": 0.37664569358821015, | |
| "learning_rate": 1.961961681665092e-05, | |
| "loss": 0.0906, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3553846153846154, | |
| "grad_norm": 0.4876745281374299, | |
| "learning_rate": 1.9616308894585078e-05, | |
| "loss": 0.0994, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.3569230769230769, | |
| "grad_norm": 0.4680368478168109, | |
| "learning_rate": 1.9612986932703698e-05, | |
| "loss": 0.1076, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.35846153846153844, | |
| "grad_norm": 0.4857848795913984, | |
| "learning_rate": 1.9609650935856847e-05, | |
| "loss": 0.1032, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.7291002862027431, | |
| "learning_rate": 1.9606300908915076e-05, | |
| "loss": 0.1071, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.36153846153846153, | |
| "grad_norm": 0.6588220126790021, | |
| "learning_rate": 1.9602936856769432e-05, | |
| "loss": 0.0943, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.3630769230769231, | |
| "grad_norm": 0.5848274657760204, | |
| "learning_rate": 1.959955878433143e-05, | |
| "loss": 0.1132, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.3646153846153846, | |
| "grad_norm": 0.49893024052580587, | |
| "learning_rate": 1.9596166696533062e-05, | |
| "loss": 0.1029, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.36615384615384616, | |
| "grad_norm": 0.5662642891333689, | |
| "learning_rate": 1.959276059832677e-05, | |
| "loss": 0.1079, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.3676923076923077, | |
| "grad_norm": 0.5411680818094676, | |
| "learning_rate": 1.9589340494685464e-05, | |
| "loss": 0.0942, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.36923076923076925, | |
| "grad_norm": 0.49716800756716995, | |
| "learning_rate": 1.95859063906025e-05, | |
| "loss": 0.1028, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3707692307692308, | |
| "grad_norm": 0.5605318150208776, | |
| "learning_rate": 1.9582458291091664e-05, | |
| "loss": 0.0976, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.3723076923076923, | |
| "grad_norm": 0.6030924939164457, | |
| "learning_rate": 1.9578996201187187e-05, | |
| "loss": 0.1022, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.37384615384615383, | |
| "grad_norm": 0.4416558734944546, | |
| "learning_rate": 1.957552012594372e-05, | |
| "loss": 0.0903, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.37538461538461537, | |
| "grad_norm": 0.5329784217079914, | |
| "learning_rate": 1.957203007043634e-05, | |
| "loss": 0.1164, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.3769230769230769, | |
| "grad_norm": 0.4644145144804026, | |
| "learning_rate": 1.956852603976052e-05, | |
| "loss": 0.0995, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.37846153846153846, | |
| "grad_norm": 0.43308288576075765, | |
| "learning_rate": 1.9565008039032158e-05, | |
| "loss": 0.0866, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.556662892440361, | |
| "learning_rate": 1.9561476073387527e-05, | |
| "loss": 0.0977, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.38153846153846155, | |
| "grad_norm": 0.5153442689897639, | |
| "learning_rate": 1.9557930147983303e-05, | |
| "loss": 0.0979, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3830769230769231, | |
| "grad_norm": 0.4731621488928602, | |
| "learning_rate": 1.9554370267996537e-05, | |
| "loss": 0.1052, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.4461201241494072, | |
| "learning_rate": 1.9550796438624656e-05, | |
| "loss": 0.0947, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3861538461538462, | |
| "grad_norm": 0.47389146156943096, | |
| "learning_rate": 1.954720866508546e-05, | |
| "loss": 0.0995, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.38769230769230767, | |
| "grad_norm": 0.54566585639894, | |
| "learning_rate": 1.9543606952617088e-05, | |
| "loss": 0.1016, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3892307692307692, | |
| "grad_norm": 0.582991146196512, | |
| "learning_rate": 1.9539991306478046e-05, | |
| "loss": 0.1188, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.39076923076923076, | |
| "grad_norm": 0.4578903975000862, | |
| "learning_rate": 1.9536361731947182e-05, | |
| "loss": 0.0928, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3923076923076923, | |
| "grad_norm": 0.590776299920465, | |
| "learning_rate": 1.953271823432367e-05, | |
| "loss": 0.1132, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.39384615384615385, | |
| "grad_norm": 0.6253009080591825, | |
| "learning_rate": 1.9529060818927032e-05, | |
| "loss": 0.0979, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3953846153846154, | |
| "grad_norm": 0.531397871124118, | |
| "learning_rate": 1.952538949109708e-05, | |
| "loss": 0.0959, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.39692307692307693, | |
| "grad_norm": 0.46705935199941867, | |
| "learning_rate": 1.9521704256193962e-05, | |
| "loss": 0.104, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.3984615384615385, | |
| "grad_norm": 0.5705361981014778, | |
| "learning_rate": 1.9518005119598124e-05, | |
| "loss": 0.1083, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.5695832340146562, | |
| "learning_rate": 1.9514292086710307e-05, | |
| "loss": 0.0992, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4015384615384615, | |
| "grad_norm": 0.4464529106040974, | |
| "learning_rate": 1.9510565162951538e-05, | |
| "loss": 0.0987, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.40307692307692305, | |
| "grad_norm": 0.43620342439803284, | |
| "learning_rate": 1.9506824353763127e-05, | |
| "loss": 0.103, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.4046153846153846, | |
| "grad_norm": 0.48936436282657725, | |
| "learning_rate": 1.9503069664606663e-05, | |
| "loss": 0.1125, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.40615384615384614, | |
| "grad_norm": 0.5935399529172605, | |
| "learning_rate": 1.9499301100963987e-05, | |
| "loss": 0.1065, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.4076923076923077, | |
| "grad_norm": 0.5489632101646817, | |
| "learning_rate": 1.9495518668337204e-05, | |
| "loss": 0.0874, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.40923076923076923, | |
| "grad_norm": 0.4872571558922733, | |
| "learning_rate": 1.949172237224867e-05, | |
| "loss": 0.1067, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.4107692307692308, | |
| "grad_norm": 0.495735492589365, | |
| "learning_rate": 1.9487912218240983e-05, | |
| "loss": 0.1025, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.4123076923076923, | |
| "grad_norm": 0.520835006964255, | |
| "learning_rate": 1.9484088211876963e-05, | |
| "loss": 0.0926, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.41384615384615386, | |
| "grad_norm": 0.4979596342187618, | |
| "learning_rate": 1.9480250358739667e-05, | |
| "loss": 0.1001, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.4153846153846154, | |
| "grad_norm": 0.5398277717258808, | |
| "learning_rate": 1.9476398664432356e-05, | |
| "loss": 0.1038, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4169230769230769, | |
| "grad_norm": 0.4620486050085766, | |
| "learning_rate": 1.947253313457851e-05, | |
| "loss": 0.0909, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.41846153846153844, | |
| "grad_norm": 0.5088637188704777, | |
| "learning_rate": 1.9468653774821803e-05, | |
| "loss": 0.1038, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.5312467309003265, | |
| "learning_rate": 1.94647605908261e-05, | |
| "loss": 0.1003, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.42153846153846153, | |
| "grad_norm": 0.5353118330387302, | |
| "learning_rate": 1.9460853588275454e-05, | |
| "loss": 0.0915, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.4230769230769231, | |
| "grad_norm": 0.5883396050759481, | |
| "learning_rate": 1.9456932772874092e-05, | |
| "loss": 0.0964, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.4246153846153846, | |
| "grad_norm": 0.3782557300574429, | |
| "learning_rate": 1.9452998150346403e-05, | |
| "loss": 0.0941, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.42615384615384616, | |
| "grad_norm": 0.5038727416567336, | |
| "learning_rate": 1.944904972643694e-05, | |
| "loss": 0.0966, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.4276923076923077, | |
| "grad_norm": 0.6044080637491093, | |
| "learning_rate": 1.9445087506910403e-05, | |
| "loss": 0.0945, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.42923076923076925, | |
| "grad_norm": 0.4283489498603955, | |
| "learning_rate": 1.944111149755164e-05, | |
| "loss": 0.0923, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.4307692307692308, | |
| "grad_norm": 0.4755286664345765, | |
| "learning_rate": 1.9437121704165612e-05, | |
| "loss": 0.0981, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4323076923076923, | |
| "grad_norm": 0.4034836375835124, | |
| "learning_rate": 1.9433118132577432e-05, | |
| "loss": 0.0956, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.4338461538461538, | |
| "grad_norm": 0.5243724118676012, | |
| "learning_rate": 1.9429100788632313e-05, | |
| "loss": 0.0968, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.43538461538461537, | |
| "grad_norm": 0.4909950952163068, | |
| "learning_rate": 1.9425069678195577e-05, | |
| "loss": 0.0836, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.4369230769230769, | |
| "grad_norm": 0.6197661444062966, | |
| "learning_rate": 1.9421024807152652e-05, | |
| "loss": 0.1062, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.43846153846153846, | |
| "grad_norm": 0.4641188532073514, | |
| "learning_rate": 1.9416966181409047e-05, | |
| "loss": 0.0907, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.581115105716189, | |
| "learning_rate": 1.9412893806890358e-05, | |
| "loss": 0.1136, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.44153846153846155, | |
| "grad_norm": 0.5532740369160736, | |
| "learning_rate": 1.9408807689542257e-05, | |
| "loss": 0.095, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.4430769230769231, | |
| "grad_norm": 0.610460188491936, | |
| "learning_rate": 1.9404707835330475e-05, | |
| "loss": 0.102, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.44461538461538463, | |
| "grad_norm": 0.48489275085594646, | |
| "learning_rate": 1.94005942502408e-05, | |
| "loss": 0.1086, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.4461538461538462, | |
| "grad_norm": 0.4350745012942025, | |
| "learning_rate": 1.9396466940279067e-05, | |
| "loss": 0.1027, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.44769230769230767, | |
| "grad_norm": 0.4950020730573976, | |
| "learning_rate": 1.9392325911471154e-05, | |
| "loss": 0.1055, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.4492307692307692, | |
| "grad_norm": 0.4788608653907359, | |
| "learning_rate": 1.9388171169862967e-05, | |
| "loss": 0.1056, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.45076923076923076, | |
| "grad_norm": 0.451391245157289, | |
| "learning_rate": 1.9384002721520423e-05, | |
| "loss": 0.1034, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.4523076923076923, | |
| "grad_norm": 0.46706566593370247, | |
| "learning_rate": 1.9379820572529463e-05, | |
| "loss": 0.1032, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.45384615384615384, | |
| "grad_norm": 0.5208831601136724, | |
| "learning_rate": 1.937562472899603e-05, | |
| "loss": 0.1048, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.4553846153846154, | |
| "grad_norm": 0.4746886544547485, | |
| "learning_rate": 1.9371415197046054e-05, | |
| "loss": 0.0962, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.45692307692307693, | |
| "grad_norm": 0.5099090103562837, | |
| "learning_rate": 1.936719198282545e-05, | |
| "loss": 0.0975, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.4584615384615385, | |
| "grad_norm": 0.5371114216801979, | |
| "learning_rate": 1.936295509250012e-05, | |
| "loss": 0.0995, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 0.4583000967152501, | |
| "learning_rate": 1.935870453225592e-05, | |
| "loss": 0.0867, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.4824466157164947, | |
| "learning_rate": 1.9354440308298676e-05, | |
| "loss": 0.1027, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.46307692307692305, | |
| "grad_norm": 0.3731332197286788, | |
| "learning_rate": 1.9350162426854152e-05, | |
| "loss": 0.0937, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.4646153846153846, | |
| "grad_norm": 0.5904943219369095, | |
| "learning_rate": 1.9345870894168056e-05, | |
| "loss": 0.1018, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.46615384615384614, | |
| "grad_norm": 0.4937106647701358, | |
| "learning_rate": 1.934156571650603e-05, | |
| "loss": 0.099, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.4676923076923077, | |
| "grad_norm": 0.3616397861267969, | |
| "learning_rate": 1.9337246900153637e-05, | |
| "loss": 0.0899, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.46923076923076923, | |
| "grad_norm": 0.4337547379197528, | |
| "learning_rate": 1.933291445141635e-05, | |
| "loss": 0.1094, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.4707692307692308, | |
| "grad_norm": 0.47132758397572894, | |
| "learning_rate": 1.932856837661954e-05, | |
| "loss": 0.0932, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.4723076923076923, | |
| "grad_norm": 0.5249116456685259, | |
| "learning_rate": 1.9324208682108493e-05, | |
| "loss": 0.0955, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.47384615384615386, | |
| "grad_norm": 0.4099876886395493, | |
| "learning_rate": 1.931983537424835e-05, | |
| "loss": 0.0925, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4753846153846154, | |
| "grad_norm": 0.4571620412260989, | |
| "learning_rate": 1.931544845942415e-05, | |
| "loss": 0.0974, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.47692307692307695, | |
| "grad_norm": 0.5347160775026656, | |
| "learning_rate": 1.9311047944040792e-05, | |
| "loss": 0.1021, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.47846153846153844, | |
| "grad_norm": 0.45731698473766913, | |
| "learning_rate": 1.9306633834523022e-05, | |
| "loss": 0.0995, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.4463396489272035, | |
| "learning_rate": 1.930220613731545e-05, | |
| "loss": 0.0936, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4815384615384615, | |
| "grad_norm": 0.449418485015138, | |
| "learning_rate": 1.9297764858882516e-05, | |
| "loss": 0.0883, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.48307692307692307, | |
| "grad_norm": 0.5562810223949127, | |
| "learning_rate": 1.9293310005708485e-05, | |
| "loss": 0.1089, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4846153846153846, | |
| "grad_norm": 0.40867491037166837, | |
| "learning_rate": 1.9288841584297445e-05, | |
| "loss": 0.087, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.48615384615384616, | |
| "grad_norm": 0.6068014941268159, | |
| "learning_rate": 1.9284359601173295e-05, | |
| "loss": 0.1002, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.4876923076923077, | |
| "grad_norm": 0.5485840263407294, | |
| "learning_rate": 1.927986406287973e-05, | |
| "loss": 0.1071, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.48923076923076925, | |
| "grad_norm": 0.4326599983402119, | |
| "learning_rate": 1.9275354975980245e-05, | |
| "loss": 0.0972, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.4907692307692308, | |
| "grad_norm": 0.48038555828580165, | |
| "learning_rate": 1.92708323470581e-05, | |
| "loss": 0.0962, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.49230769230769234, | |
| "grad_norm": 0.5096604190301004, | |
| "learning_rate": 1.926629618271634e-05, | |
| "loss": 0.1089, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4938461538461538, | |
| "grad_norm": 0.5074786000084778, | |
| "learning_rate": 1.9261746489577767e-05, | |
| "loss": 0.0953, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.49538461538461537, | |
| "grad_norm": 0.3551898568112085, | |
| "learning_rate": 1.9257183274284934e-05, | |
| "loss": 0.0812, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.4969230769230769, | |
| "grad_norm": 0.4154869588023741, | |
| "learning_rate": 1.925260654350014e-05, | |
| "loss": 0.0878, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.49846153846153846, | |
| "grad_norm": 0.46068163561485903, | |
| "learning_rate": 1.924801630390541e-05, | |
| "loss": 0.0994, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.49072829349035685, | |
| "learning_rate": 1.92434125622025e-05, | |
| "loss": 0.1022, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5015384615384615, | |
| "grad_norm": 0.43272682508224936, | |
| "learning_rate": 1.9238795325112867e-05, | |
| "loss": 0.1084, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.5030769230769231, | |
| "grad_norm": 0.4497283552516036, | |
| "learning_rate": 1.9234164599377692e-05, | |
| "loss": 0.0967, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.5046153846153846, | |
| "grad_norm": 0.4492106413880127, | |
| "learning_rate": 1.9229520391757828e-05, | |
| "loss": 0.1007, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.5061538461538462, | |
| "grad_norm": 0.4890385196278267, | |
| "learning_rate": 1.9224862709033823e-05, | |
| "loss": 0.09, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.5076923076923077, | |
| "grad_norm": 0.4508644606981008, | |
| "learning_rate": 1.9220191558005897e-05, | |
| "loss": 0.0969, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5092307692307693, | |
| "grad_norm": 0.5761631636154984, | |
| "learning_rate": 1.9215506945493933e-05, | |
| "loss": 0.1051, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.5107692307692308, | |
| "grad_norm": 0.4285970063917521, | |
| "learning_rate": 1.921080887833746e-05, | |
| "loss": 0.0952, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.5123076923076924, | |
| "grad_norm": 0.46226415222748873, | |
| "learning_rate": 1.9206097363395668e-05, | |
| "loss": 0.099, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.5138461538461538, | |
| "grad_norm": 0.44278457207986294, | |
| "learning_rate": 1.9201372407547367e-05, | |
| "loss": 0.1025, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.5153846153846153, | |
| "grad_norm": 0.5672748490771847, | |
| "learning_rate": 1.9196634017690993e-05, | |
| "loss": 0.1013, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.5169230769230769, | |
| "grad_norm": 0.4566472958118853, | |
| "learning_rate": 1.9191882200744602e-05, | |
| "loss": 0.1158, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.5184615384615384, | |
| "grad_norm": 0.5248322529451815, | |
| "learning_rate": 1.9187116963645845e-05, | |
| "loss": 0.0957, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.44219344780447695, | |
| "learning_rate": 1.918233831335197e-05, | |
| "loss": 0.0975, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.5215384615384615, | |
| "grad_norm": 0.378145742243331, | |
| "learning_rate": 1.9177546256839814e-05, | |
| "loss": 0.0855, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.5230769230769231, | |
| "grad_norm": 0.4543314495704193, | |
| "learning_rate": 1.9172740801105777e-05, | |
| "loss": 0.101, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5246153846153846, | |
| "grad_norm": 0.5388495866373283, | |
| "learning_rate": 1.9167921953165827e-05, | |
| "loss": 0.1077, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.5261538461538462, | |
| "grad_norm": 0.4931391711463696, | |
| "learning_rate": 1.9163089720055484e-05, | |
| "loss": 0.0937, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.5276923076923077, | |
| "grad_norm": 0.5226419648010249, | |
| "learning_rate": 1.9158244108829815e-05, | |
| "loss": 0.0978, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.5292307692307693, | |
| "grad_norm": 0.514073044628901, | |
| "learning_rate": 1.915338512656341e-05, | |
| "loss": 0.1116, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5307692307692308, | |
| "grad_norm": 0.536463160406073, | |
| "learning_rate": 1.9148512780350384e-05, | |
| "loss": 0.09, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.5323076923076923, | |
| "grad_norm": 0.40472290941801015, | |
| "learning_rate": 1.914362707730437e-05, | |
| "loss": 0.096, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5338461538461539, | |
| "grad_norm": 0.4419558495038164, | |
| "learning_rate": 1.9138728024558494e-05, | |
| "loss": 0.1043, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.5353846153846153, | |
| "grad_norm": 0.45250393937417616, | |
| "learning_rate": 1.913381562926538e-05, | |
| "loss": 0.1044, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.536923076923077, | |
| "grad_norm": 0.4970429188190532, | |
| "learning_rate": 1.9128889898597117e-05, | |
| "loss": 0.1002, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 0.5382027157131241, | |
| "learning_rate": 1.912395083974528e-05, | |
| "loss": 0.0992, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.4173235180068536, | |
| "learning_rate": 1.91189984599209e-05, | |
| "loss": 0.0873, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.5415384615384615, | |
| "grad_norm": 0.42760297701096356, | |
| "learning_rate": 1.9114032766354453e-05, | |
| "loss": 0.096, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5430769230769231, | |
| "grad_norm": 0.524358521462744, | |
| "learning_rate": 1.910905376629585e-05, | |
| "loss": 0.0959, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.5446153846153846, | |
| "grad_norm": 0.47250449647238696, | |
| "learning_rate": 1.910406146701444e-05, | |
| "loss": 0.0849, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5461538461538461, | |
| "grad_norm": 0.44398596927938827, | |
| "learning_rate": 1.9099055875798974e-05, | |
| "loss": 0.1024, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.5476923076923077, | |
| "grad_norm": 0.46499606497240575, | |
| "learning_rate": 1.9094036999957623e-05, | |
| "loss": 0.0925, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5492307692307692, | |
| "grad_norm": 0.3886634015877156, | |
| "learning_rate": 1.9089004846817947e-05, | |
| "loss": 0.0807, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.5507692307692308, | |
| "grad_norm": 0.4118047391602366, | |
| "learning_rate": 1.908395942372689e-05, | |
| "loss": 0.0988, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5523076923076923, | |
| "grad_norm": 0.434600405160762, | |
| "learning_rate": 1.9078900738050776e-05, | |
| "loss": 0.1069, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.5538461538461539, | |
| "grad_norm": 0.4148807319841889, | |
| "learning_rate": 1.9073828797175284e-05, | |
| "loss": 0.1028, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5553846153846154, | |
| "grad_norm": 0.42028520435641853, | |
| "learning_rate": 1.9068743608505454e-05, | |
| "loss": 0.1056, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.556923076923077, | |
| "grad_norm": 0.43510420212696854, | |
| "learning_rate": 1.9063645179465663e-05, | |
| "loss": 0.1009, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5584615384615385, | |
| "grad_norm": 0.4936074681803371, | |
| "learning_rate": 1.905853351749962e-05, | |
| "loss": 0.1, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.3632144105495107, | |
| "learning_rate": 1.9053408630070352e-05, | |
| "loss": 0.0916, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5615384615384615, | |
| "grad_norm": 0.3536771592078361, | |
| "learning_rate": 1.9048270524660197e-05, | |
| "loss": 0.0913, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.563076923076923, | |
| "grad_norm": 0.483138657389253, | |
| "learning_rate": 1.904311920877079e-05, | |
| "loss": 0.0992, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5646153846153846, | |
| "grad_norm": 0.4057661988297129, | |
| "learning_rate": 1.903795468992306e-05, | |
| "loss": 0.1027, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.5661538461538461, | |
| "grad_norm": 0.40666719198061757, | |
| "learning_rate": 1.9032776975657207e-05, | |
| "loss": 0.0981, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5676923076923077, | |
| "grad_norm": 0.44070081776133335, | |
| "learning_rate": 1.902758607353269e-05, | |
| "loss": 0.1104, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.5692307692307692, | |
| "grad_norm": 0.5047821437081584, | |
| "learning_rate": 1.9022381991128235e-05, | |
| "loss": 0.0963, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5707692307692308, | |
| "grad_norm": 0.42260385596144445, | |
| "learning_rate": 1.9017164736041795e-05, | |
| "loss": 0.1029, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.5723076923076923, | |
| "grad_norm": 0.4244718881899699, | |
| "learning_rate": 1.9011934315890576e-05, | |
| "loss": 0.0966, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5738461538461539, | |
| "grad_norm": 0.4953235016748773, | |
| "learning_rate": 1.9006690738310988e-05, | |
| "loss": 0.1037, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.5753846153846154, | |
| "grad_norm": 0.3770254505903871, | |
| "learning_rate": 1.900143401095866e-05, | |
| "loss": 0.086, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 0.41771963874965723, | |
| "learning_rate": 1.8996164141508412e-05, | |
| "loss": 0.0858, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.5784615384615385, | |
| "grad_norm": 0.4061141404696647, | |
| "learning_rate": 1.899088113765426e-05, | |
| "loss": 0.1021, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 0.45946326931814974, | |
| "learning_rate": 1.898558500710939e-05, | |
| "loss": 0.0979, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.5815384615384616, | |
| "grad_norm": 0.4143960427519077, | |
| "learning_rate": 1.8980275757606157e-05, | |
| "loss": 0.0965, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.583076923076923, | |
| "grad_norm": 0.40587432220084, | |
| "learning_rate": 1.8974953396896066e-05, | |
| "loss": 0.0953, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.5846153846153846, | |
| "grad_norm": 0.4036824003812063, | |
| "learning_rate": 1.8969617932749766e-05, | |
| "loss": 0.0947, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5861538461538461, | |
| "grad_norm": 0.431070678494207, | |
| "learning_rate": 1.896426937295704e-05, | |
| "loss": 0.103, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.5876923076923077, | |
| "grad_norm": 0.3955536485889764, | |
| "learning_rate": 1.8958907725326783e-05, | |
| "loss": 0.0928, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5892307692307692, | |
| "grad_norm": 0.4357741837489556, | |
| "learning_rate": 1.8953532997687008e-05, | |
| "loss": 0.0997, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.5907692307692308, | |
| "grad_norm": 0.39754441486925746, | |
| "learning_rate": 1.8948145197884815e-05, | |
| "loss": 0.0859, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5923076923076923, | |
| "grad_norm": 0.43549014378990397, | |
| "learning_rate": 1.89427443337864e-05, | |
| "loss": 0.0934, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5938461538461538, | |
| "grad_norm": 0.42300645280983395, | |
| "learning_rate": 1.893733041327702e-05, | |
| "loss": 0.0951, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5953846153846154, | |
| "grad_norm": 0.41282699722561766, | |
| "learning_rate": 1.8931903444261007e-05, | |
| "loss": 0.0941, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.5969230769230769, | |
| "grad_norm": 0.4480032982295742, | |
| "learning_rate": 1.8926463434661738e-05, | |
| "loss": 0.0905, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5984615384615385, | |
| "grad_norm": 0.44297004392731376, | |
| "learning_rate": 1.8921010392421628e-05, | |
| "loss": 0.1016, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.5433110545398998, | |
| "learning_rate": 1.8915544325502123e-05, | |
| "loss": 0.0935, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6015384615384616, | |
| "grad_norm": 0.40571632988210427, | |
| "learning_rate": 1.891006524188368e-05, | |
| "loss": 0.0937, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.6030769230769231, | |
| "grad_norm": 0.43732620714659554, | |
| "learning_rate": 1.8904573149565766e-05, | |
| "loss": 0.0947, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.6046153846153847, | |
| "grad_norm": 0.4600862732120384, | |
| "learning_rate": 1.889906805656684e-05, | |
| "loss": 0.095, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.6061538461538462, | |
| "grad_norm": 0.44446566484906674, | |
| "learning_rate": 1.8893549970924335e-05, | |
| "loss": 0.1005, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.6076923076923076, | |
| "grad_norm": 0.4372133849372649, | |
| "learning_rate": 1.888801890069467e-05, | |
| "loss": 0.0926, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.6092307692307692, | |
| "grad_norm": 0.4496197566055402, | |
| "learning_rate": 1.8882474853953193e-05, | |
| "loss": 0.0889, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.6107692307692307, | |
| "grad_norm": 0.43381954647371906, | |
| "learning_rate": 1.8876917838794226e-05, | |
| "loss": 0.0892, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.6123076923076923, | |
| "grad_norm": 0.5656036739453132, | |
| "learning_rate": 1.8871347863331015e-05, | |
| "loss": 0.087, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.6138461538461538, | |
| "grad_norm": 0.42869081240488516, | |
| "learning_rate": 1.886576493569572e-05, | |
| "loss": 0.0999, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.9645884517575197, | |
| "learning_rate": 1.8860169064039422e-05, | |
| "loss": 0.0877, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6169230769230769, | |
| "grad_norm": 0.3909376825178073, | |
| "learning_rate": 1.8854560256532098e-05, | |
| "loss": 0.09, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.6184615384615385, | |
| "grad_norm": 0.41607837043434165, | |
| "learning_rate": 1.884893852136261e-05, | |
| "loss": 0.0962, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.4118435975798478, | |
| "learning_rate": 1.884330386673869e-05, | |
| "loss": 0.0914, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.6215384615384615, | |
| "grad_norm": 0.41468438143981523, | |
| "learning_rate": 1.8837656300886937e-05, | |
| "loss": 0.0932, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.6230769230769231, | |
| "grad_norm": 0.42739734031323684, | |
| "learning_rate": 1.8831995832052802e-05, | |
| "loss": 0.1006, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.6246153846153846, | |
| "grad_norm": 0.40676045777564296, | |
| "learning_rate": 1.8826322468500567e-05, | |
| "loss": 0.0911, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.6261538461538462, | |
| "grad_norm": 0.4400751065379498, | |
| "learning_rate": 1.8820636218513354e-05, | |
| "loss": 0.099, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.6276923076923077, | |
| "grad_norm": 0.42437410068559034, | |
| "learning_rate": 1.8814937090393082e-05, | |
| "loss": 0.0968, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.6292307692307693, | |
| "grad_norm": 0.3919701002838343, | |
| "learning_rate": 1.8809225092460488e-05, | |
| "loss": 0.0936, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.6307692307692307, | |
| "grad_norm": 0.5028949538298951, | |
| "learning_rate": 1.880350023305509e-05, | |
| "loss": 0.0948, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6323076923076923, | |
| "grad_norm": 0.4075578032238594, | |
| "learning_rate": 1.8797762520535178e-05, | |
| "loss": 0.0981, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.6338461538461538, | |
| "grad_norm": 0.34153544828671295, | |
| "learning_rate": 1.8792011963277827e-05, | |
| "loss": 0.0887, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.6353846153846154, | |
| "grad_norm": 0.49338561568395417, | |
| "learning_rate": 1.8786248569678847e-05, | |
| "loss": 0.1008, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.6369230769230769, | |
| "grad_norm": 0.3911057155983341, | |
| "learning_rate": 1.8780472348152792e-05, | |
| "loss": 0.0957, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.6384615384615384, | |
| "grad_norm": 0.4863828952857313, | |
| "learning_rate": 1.8774683307132956e-05, | |
| "loss": 0.0962, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.4959721632631627, | |
| "learning_rate": 1.876888145507133e-05, | |
| "loss": 0.0974, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.6415384615384615, | |
| "grad_norm": 0.44751359386684436, | |
| "learning_rate": 1.8763066800438638e-05, | |
| "loss": 0.1072, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.6430769230769231, | |
| "grad_norm": 0.4168309844234487, | |
| "learning_rate": 1.8757239351724262e-05, | |
| "loss": 0.0986, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6446153846153846, | |
| "grad_norm": 0.48992479490511887, | |
| "learning_rate": 1.8751399117436292e-05, | |
| "loss": 0.1142, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.6461538461538462, | |
| "grad_norm": 0.5250869322124576, | |
| "learning_rate": 1.8745546106101466e-05, | |
| "loss": 0.0966, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6476923076923077, | |
| "grad_norm": 0.41428994308159467, | |
| "learning_rate": 1.873968032626518e-05, | |
| "loss": 0.0964, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.6492307692307693, | |
| "grad_norm": 0.40863286499944806, | |
| "learning_rate": 1.8733801786491487e-05, | |
| "loss": 0.0982, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6507692307692308, | |
| "grad_norm": 0.40530569813116685, | |
| "learning_rate": 1.8727910495363043e-05, | |
| "loss": 0.0893, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.6523076923076923, | |
| "grad_norm": 0.3994309755298727, | |
| "learning_rate": 1.872200646148115e-05, | |
| "loss": 0.0917, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6538461538461539, | |
| "grad_norm": 0.41310852897533573, | |
| "learning_rate": 1.8716089693465696e-05, | |
| "loss": 0.0869, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6553846153846153, | |
| "grad_norm": 0.4477653763330846, | |
| "learning_rate": 1.8710160199955158e-05, | |
| "loss": 0.1001, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6569230769230769, | |
| "grad_norm": 0.4749180294295519, | |
| "learning_rate": 1.8704217989606606e-05, | |
| "loss": 0.0989, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.6584615384615384, | |
| "grad_norm": 0.4374196609501067, | |
| "learning_rate": 1.869826307109567e-05, | |
| "loss": 0.0955, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 0.39438024250010495, | |
| "learning_rate": 1.869229545311653e-05, | |
| "loss": 0.0939, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.6615384615384615, | |
| "grad_norm": 0.4203742463904514, | |
| "learning_rate": 1.8686315144381914e-05, | |
| "loss": 0.0964, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6630769230769231, | |
| "grad_norm": 0.4761695840193704, | |
| "learning_rate": 1.8680322153623077e-05, | |
| "loss": 0.1037, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.6646153846153846, | |
| "grad_norm": 0.3677131847676501, | |
| "learning_rate": 1.8674316489589782e-05, | |
| "loss": 0.09, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6661538461538462, | |
| "grad_norm": 0.4356656851311741, | |
| "learning_rate": 1.8668298161050308e-05, | |
| "loss": 0.1005, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.6676923076923077, | |
| "grad_norm": 0.5173595615830835, | |
| "learning_rate": 1.8662267176791418e-05, | |
| "loss": 0.104, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6692307692307692, | |
| "grad_norm": 0.565640191679134, | |
| "learning_rate": 1.8656223545618345e-05, | |
| "loss": 0.1059, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.6707692307692308, | |
| "grad_norm": 0.5304970770470983, | |
| "learning_rate": 1.8650167276354802e-05, | |
| "loss": 0.0969, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6723076923076923, | |
| "grad_norm": 0.3582385963482857, | |
| "learning_rate": 1.8644098377842934e-05, | |
| "loss": 0.0901, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.6738461538461539, | |
| "grad_norm": 0.4778942975736773, | |
| "learning_rate": 1.863801685894335e-05, | |
| "loss": 0.0997, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.6753846153846154, | |
| "grad_norm": 0.6029016365712476, | |
| "learning_rate": 1.8631922728535054e-05, | |
| "loss": 0.0938, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.676923076923077, | |
| "grad_norm": 0.6376765327998692, | |
| "learning_rate": 1.8625815995515493e-05, | |
| "loss": 0.1069, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6784615384615384, | |
| "grad_norm": 0.4435527425755955, | |
| "learning_rate": 1.8619696668800494e-05, | |
| "loss": 0.0934, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.5412282043268205, | |
| "learning_rate": 1.8613564757324276e-05, | |
| "loss": 0.099, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6815384615384615, | |
| "grad_norm": 0.5181149152675278, | |
| "learning_rate": 1.860742027003944e-05, | |
| "loss": 0.1045, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.683076923076923, | |
| "grad_norm": 0.40925583148990097, | |
| "learning_rate": 1.8601263215916928e-05, | |
| "loss": 0.0893, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6846153846153846, | |
| "grad_norm": 0.42471412248636775, | |
| "learning_rate": 1.8595093603946053e-05, | |
| "loss": 0.0986, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.6861538461538461, | |
| "grad_norm": 0.35148548657598855, | |
| "learning_rate": 1.858891144313445e-05, | |
| "loss": 0.0911, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.6876923076923077, | |
| "grad_norm": 0.40331605360650485, | |
| "learning_rate": 1.8582716742508066e-05, | |
| "loss": 0.0967, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.6892307692307692, | |
| "grad_norm": 0.44814576432850384, | |
| "learning_rate": 1.8576509511111182e-05, | |
| "loss": 0.0917, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.6907692307692308, | |
| "grad_norm": 0.4489452721920411, | |
| "learning_rate": 1.8570289758006346e-05, | |
| "loss": 0.0989, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.3822084964419905, | |
| "learning_rate": 1.8564057492274407e-05, | |
| "loss": 0.0923, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6938461538461539, | |
| "grad_norm": 0.47514026675128, | |
| "learning_rate": 1.8557812723014476e-05, | |
| "loss": 0.1076, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.6953846153846154, | |
| "grad_norm": 0.5418245481869945, | |
| "learning_rate": 1.8551555459343918e-05, | |
| "loss": 0.1116, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.696923076923077, | |
| "grad_norm": 0.5023521612267582, | |
| "learning_rate": 1.8545285710398343e-05, | |
| "loss": 0.1078, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.6984615384615385, | |
| "grad_norm": 0.5662436210170848, | |
| "learning_rate": 1.8539003485331584e-05, | |
| "loss": 0.0951, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.42546907920042626, | |
| "learning_rate": 1.853270879331569e-05, | |
| "loss": 0.0942, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.7015384615384616, | |
| "grad_norm": 0.41449390666943065, | |
| "learning_rate": 1.8526401643540924e-05, | |
| "loss": 0.0994, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.703076923076923, | |
| "grad_norm": 0.48839987100466653, | |
| "learning_rate": 1.852008204521572e-05, | |
| "loss": 0.1008, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.7046153846153846, | |
| "grad_norm": 0.43738519055543357, | |
| "learning_rate": 1.8513750007566696e-05, | |
| "loss": 0.0954, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.7061538461538461, | |
| "grad_norm": 0.41775219787174245, | |
| "learning_rate": 1.850740553983863e-05, | |
| "loss": 0.0875, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.7076923076923077, | |
| "grad_norm": 0.7358518540594707, | |
| "learning_rate": 1.8501048651294447e-05, | |
| "loss": 0.0933, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7092307692307692, | |
| "grad_norm": 0.4241607505141305, | |
| "learning_rate": 1.8494679351215212e-05, | |
| "loss": 0.1001, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.7107692307692308, | |
| "grad_norm": 0.4683808796780716, | |
| "learning_rate": 1.84882976489001e-05, | |
| "loss": 0.0853, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.7123076923076923, | |
| "grad_norm": 0.44346576858606745, | |
| "learning_rate": 1.8481903553666405e-05, | |
| "loss": 0.0995, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.7138461538461538, | |
| "grad_norm": 0.45180799988411885, | |
| "learning_rate": 1.84754970748495e-05, | |
| "loss": 0.0848, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.7153846153846154, | |
| "grad_norm": 0.4173368978655163, | |
| "learning_rate": 1.846907822180286e-05, | |
| "loss": 0.0954, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.7169230769230769, | |
| "grad_norm": 0.42068772817462785, | |
| "learning_rate": 1.8462647003898005e-05, | |
| "loss": 0.0951, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.7184615384615385, | |
| "grad_norm": 0.49661800554688595, | |
| "learning_rate": 1.845620343052452e-05, | |
| "loss": 0.0949, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.48646267743412663, | |
| "learning_rate": 1.844974751109002e-05, | |
| "loss": 0.0964, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.7215384615384616, | |
| "grad_norm": 0.4106987772360316, | |
| "learning_rate": 1.8443279255020153e-05, | |
| "loss": 0.0974, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.7230769230769231, | |
| "grad_norm": 0.3921781146014997, | |
| "learning_rate": 1.843679867175858e-05, | |
| "loss": 0.0848, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7246153846153847, | |
| "grad_norm": 0.44000195303506684, | |
| "learning_rate": 1.8430305770766947e-05, | |
| "loss": 0.1023, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.7261538461538461, | |
| "grad_norm": 0.5186352660566815, | |
| "learning_rate": 1.84238005615249e-05, | |
| "loss": 0.0943, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.7276923076923076, | |
| "grad_norm": 0.4684858223587338, | |
| "learning_rate": 1.8417283053530047e-05, | |
| "loss": 0.1027, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.7292307692307692, | |
| "grad_norm": 0.39874288545350156, | |
| "learning_rate": 1.8410753256297948e-05, | |
| "loss": 0.1023, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.7307692307692307, | |
| "grad_norm": 0.3687327920324425, | |
| "learning_rate": 1.8404211179362116e-05, | |
| "loss": 0.102, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.7323076923076923, | |
| "grad_norm": 0.37907564191738674, | |
| "learning_rate": 1.8397656832273982e-05, | |
| "loss": 0.0902, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.7338461538461538, | |
| "grad_norm": 0.5844119747750028, | |
| "learning_rate": 1.8391090224602895e-05, | |
| "loss": 0.1024, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.7353846153846154, | |
| "grad_norm": 0.4973700871932812, | |
| "learning_rate": 1.8384511365936112e-05, | |
| "loss": 0.0986, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.7369230769230769, | |
| "grad_norm": 0.4220278832402647, | |
| "learning_rate": 1.837792026587876e-05, | |
| "loss": 0.0947, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.7384615384615385, | |
| "grad_norm": 0.48741372675693995, | |
| "learning_rate": 1.837131693405386e-05, | |
| "loss": 0.1039, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.5095101140536407, | |
| "learning_rate": 1.8364701380102267e-05, | |
| "loss": 0.1014, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.7415384615384616, | |
| "grad_norm": 0.3686730605957879, | |
| "learning_rate": 1.8358073613682705e-05, | |
| "loss": 0.0879, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7430769230769231, | |
| "grad_norm": 0.4018358216960023, | |
| "learning_rate": 1.8351433644471708e-05, | |
| "loss": 0.0951, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.7446153846153846, | |
| "grad_norm": 0.42272315942515715, | |
| "learning_rate": 1.8344781482163635e-05, | |
| "loss": 0.1006, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.7461538461538462, | |
| "grad_norm": 0.36114348383132194, | |
| "learning_rate": 1.8338117136470648e-05, | |
| "loss": 0.0876, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.7476923076923077, | |
| "grad_norm": 0.44109486857223745, | |
| "learning_rate": 1.8331440617122694e-05, | |
| "loss": 0.1014, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7492307692307693, | |
| "grad_norm": 0.4735198416376007, | |
| "learning_rate": 1.8324751933867496e-05, | |
| "loss": 0.101, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.7507692307692307, | |
| "grad_norm": 0.5212542112337067, | |
| "learning_rate": 1.831805109647053e-05, | |
| "loss": 0.0965, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7523076923076923, | |
| "grad_norm": 0.3867194456131576, | |
| "learning_rate": 1.831133811471503e-05, | |
| "loss": 0.0923, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.7538461538461538, | |
| "grad_norm": 0.4232158553279915, | |
| "learning_rate": 1.8304612998401947e-05, | |
| "loss": 0.086, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7553846153846154, | |
| "grad_norm": 0.4882561711422476, | |
| "learning_rate": 1.829787575734995e-05, | |
| "loss": 0.084, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.7569230769230769, | |
| "grad_norm": 0.49255164116648453, | |
| "learning_rate": 1.8291126401395425e-05, | |
| "loss": 0.1011, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7584615384615384, | |
| "grad_norm": 0.39462697940845287, | |
| "learning_rate": 1.8284364940392426e-05, | |
| "loss": 0.0981, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.42965433064236386, | |
| "learning_rate": 1.8277591384212693e-05, | |
| "loss": 0.1049, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7615384615384615, | |
| "grad_norm": 0.42153666897549313, | |
| "learning_rate": 1.827080574274562e-05, | |
| "loss": 0.1015, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.7630769230769231, | |
| "grad_norm": 0.5244873385591531, | |
| "learning_rate": 1.8264008025898248e-05, | |
| "loss": 0.1021, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.7646153846153846, | |
| "grad_norm": 0.48573346836145637, | |
| "learning_rate": 1.825719824359524e-05, | |
| "loss": 0.1012, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.7661538461538462, | |
| "grad_norm": 0.4201750596778976, | |
| "learning_rate": 1.8250376405778897e-05, | |
| "loss": 0.0914, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7676923076923077, | |
| "grad_norm": 0.3689138791311251, | |
| "learning_rate": 1.824354252240909e-05, | |
| "loss": 0.0971, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.446354377870851, | |
| "learning_rate": 1.8236696603463297e-05, | |
| "loss": 0.1104, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7707692307692308, | |
| "grad_norm": 0.5302794302913245, | |
| "learning_rate": 1.8229838658936566e-05, | |
| "loss": 0.0913, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.7723076923076924, | |
| "grad_norm": 0.6151683521942346, | |
| "learning_rate": 1.8222968698841495e-05, | |
| "loss": 0.0842, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7738461538461539, | |
| "grad_norm": 0.4018248722446391, | |
| "learning_rate": 1.821608673320823e-05, | |
| "loss": 0.0896, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.7753846153846153, | |
| "grad_norm": 0.36582381181570395, | |
| "learning_rate": 1.8209192772084446e-05, | |
| "loss": 0.0899, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7769230769230769, | |
| "grad_norm": 0.42007534297254007, | |
| "learning_rate": 1.820228682553533e-05, | |
| "loss": 0.1012, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.7784615384615384, | |
| "grad_norm": 0.3706728601304087, | |
| "learning_rate": 1.8195368903643565e-05, | |
| "loss": 0.0937, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.3386897127268892, | |
| "learning_rate": 1.818843901650932e-05, | |
| "loss": 0.0864, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.7815384615384615, | |
| "grad_norm": 0.4604244232600737, | |
| "learning_rate": 1.8181497174250236e-05, | |
| "loss": 0.0886, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7830769230769231, | |
| "grad_norm": 0.46651975178638583, | |
| "learning_rate": 1.8174543387001403e-05, | |
| "loss": 0.0942, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.7846153846153846, | |
| "grad_norm": 0.37252137712706535, | |
| "learning_rate": 1.8167577664915354e-05, | |
| "loss": 0.098, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7861538461538462, | |
| "grad_norm": 0.4549144644337013, | |
| "learning_rate": 1.816060001816205e-05, | |
| "loss": 0.1043, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.7876923076923077, | |
| "grad_norm": 0.49452287761067387, | |
| "learning_rate": 1.8153610456928853e-05, | |
| "loss": 0.0976, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.7892307692307692, | |
| "grad_norm": 0.5123177646707533, | |
| "learning_rate": 1.8146608991420533e-05, | |
| "loss": 0.0974, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.7907692307692308, | |
| "grad_norm": 0.39618956334101985, | |
| "learning_rate": 1.8139595631859228e-05, | |
| "loss": 0.0891, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7923076923076923, | |
| "grad_norm": 0.3864877721629785, | |
| "learning_rate": 1.8132570388484442e-05, | |
| "loss": 0.0849, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.7938461538461539, | |
| "grad_norm": 0.48621924311976095, | |
| "learning_rate": 1.8125533271553045e-05, | |
| "loss": 0.1014, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.7953846153846154, | |
| "grad_norm": 0.46058902465123464, | |
| "learning_rate": 1.811848429133922e-05, | |
| "loss": 0.1114, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.796923076923077, | |
| "grad_norm": 0.40700166379805536, | |
| "learning_rate": 1.811142345813449e-05, | |
| "loss": 0.0903, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7984615384615384, | |
| "grad_norm": 0.38144448483836707, | |
| "learning_rate": 1.810435078224767e-05, | |
| "loss": 0.0999, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.4087914237321906, | |
| "learning_rate": 1.809726627400487e-05, | |
| "loss": 0.0905, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8015384615384615, | |
| "grad_norm": 0.454530116045963, | |
| "learning_rate": 1.8090169943749477e-05, | |
| "loss": 0.0981, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.803076923076923, | |
| "grad_norm": 0.387856092368395, | |
| "learning_rate": 1.8083061801842133e-05, | |
| "loss": 0.0914, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.8046153846153846, | |
| "grad_norm": 0.4175456777206446, | |
| "learning_rate": 1.8075941858660737e-05, | |
| "loss": 0.0996, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.8061538461538461, | |
| "grad_norm": 0.7864656011521015, | |
| "learning_rate": 1.8068810124600403e-05, | |
| "loss": 0.0899, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.8076923076923077, | |
| "grad_norm": 0.48921911883769525, | |
| "learning_rate": 1.8061666610073465e-05, | |
| "loss": 0.0979, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.8092307692307692, | |
| "grad_norm": 0.48154421315499524, | |
| "learning_rate": 1.805451132550946e-05, | |
| "loss": 0.1026, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.8107692307692308, | |
| "grad_norm": 0.45822965917426073, | |
| "learning_rate": 1.8047344281355112e-05, | |
| "loss": 0.1056, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.8123076923076923, | |
| "grad_norm": 0.4699180830359611, | |
| "learning_rate": 1.8040165488074294e-05, | |
| "loss": 0.0978, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.8138461538461539, | |
| "grad_norm": 0.4386694887627527, | |
| "learning_rate": 1.8032974956148064e-05, | |
| "loss": 0.0852, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.8153846153846154, | |
| "grad_norm": 0.3982089506558907, | |
| "learning_rate": 1.8025772696074593e-05, | |
| "loss": 0.0847, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.816923076923077, | |
| "grad_norm": 0.4451526496740161, | |
| "learning_rate": 1.8018558718369187e-05, | |
| "loss": 0.0949, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.8184615384615385, | |
| "grad_norm": 0.42797389669933644, | |
| "learning_rate": 1.8011333033564255e-05, | |
| "loss": 0.096, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.5816907168348651, | |
| "learning_rate": 1.8004095652209304e-05, | |
| "loss": 0.0979, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.8215384615384616, | |
| "grad_norm": 0.4209426200782058, | |
| "learning_rate": 1.799684658487091e-05, | |
| "loss": 0.0912, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.823076923076923, | |
| "grad_norm": 0.4459278908996159, | |
| "learning_rate": 1.7989585842132713e-05, | |
| "loss": 0.0932, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.8246153846153846, | |
| "grad_norm": 0.3996692934376165, | |
| "learning_rate": 1.7982313434595405e-05, | |
| "loss": 0.0846, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.8261538461538461, | |
| "grad_norm": 0.46838655788136374, | |
| "learning_rate": 1.7975029372876706e-05, | |
| "loss": 0.0931, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.8276923076923077, | |
| "grad_norm": 0.44272759560569114, | |
| "learning_rate": 1.7967733667611346e-05, | |
| "loss": 0.0991, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.8292307692307692, | |
| "grad_norm": 0.6005247882009117, | |
| "learning_rate": 1.7960426329451062e-05, | |
| "loss": 0.0947, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.8307692307692308, | |
| "grad_norm": 0.43973649591876807, | |
| "learning_rate": 1.7953107369064563e-05, | |
| "loss": 0.0946, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8323076923076923, | |
| "grad_norm": 0.36356195409947895, | |
| "learning_rate": 1.7945776797137544e-05, | |
| "loss": 0.0829, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.8338461538461538, | |
| "grad_norm": 0.4199497642090339, | |
| "learning_rate": 1.7938434624372638e-05, | |
| "loss": 0.0925, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.8353846153846154, | |
| "grad_norm": 0.5142377507204998, | |
| "learning_rate": 1.7931080861489425e-05, | |
| "loss": 0.0973, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.8369230769230769, | |
| "grad_norm": 0.5226363888586346, | |
| "learning_rate": 1.7923715519224397e-05, | |
| "loss": 0.1065, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.8384615384615385, | |
| "grad_norm": 0.4729813679434869, | |
| "learning_rate": 1.791633860833096e-05, | |
| "loss": 0.0895, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.3606695909284096, | |
| "learning_rate": 1.7908950139579406e-05, | |
| "loss": 0.0832, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.8415384615384616, | |
| "grad_norm": 0.570980416724036, | |
| "learning_rate": 1.7901550123756906e-05, | |
| "loss": 0.1002, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.8430769230769231, | |
| "grad_norm": 0.5594313800490134, | |
| "learning_rate": 1.7894138571667482e-05, | |
| "loss": 0.1008, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8446153846153847, | |
| "grad_norm": 0.4315817673384466, | |
| "learning_rate": 1.7886715494132008e-05, | |
| "loss": 0.0888, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 0.3916992320727455, | |
| "learning_rate": 1.787928090198818e-05, | |
| "loss": 0.0858, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8476923076923077, | |
| "grad_norm": 0.5103149586075123, | |
| "learning_rate": 1.7871834806090502e-05, | |
| "loss": 0.1078, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.8492307692307692, | |
| "grad_norm": 0.5179913026426116, | |
| "learning_rate": 1.7864377217310282e-05, | |
| "loss": 0.0941, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8507692307692307, | |
| "grad_norm": 0.5462964124972205, | |
| "learning_rate": 1.7856908146535602e-05, | |
| "loss": 0.0844, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.8523076923076923, | |
| "grad_norm": 0.40593162957333356, | |
| "learning_rate": 1.784942760467131e-05, | |
| "loss": 0.0943, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8538461538461538, | |
| "grad_norm": 0.38375960879766213, | |
| "learning_rate": 1.7841935602638997e-05, | |
| "loss": 0.0804, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.8553846153846154, | |
| "grad_norm": 0.38136544658194893, | |
| "learning_rate": 1.7834432151376992e-05, | |
| "loss": 0.0945, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8569230769230769, | |
| "grad_norm": 0.4194877597337682, | |
| "learning_rate": 1.7826917261840337e-05, | |
| "loss": 0.092, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.8584615384615385, | |
| "grad_norm": 0.4923272058939714, | |
| "learning_rate": 1.7819390945000775e-05, | |
| "loss": 0.0986, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 0.4875318696461418, | |
| "learning_rate": 1.781185321184673e-05, | |
| "loss": 0.1017, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.8615384615384616, | |
| "grad_norm": 0.39052926422478096, | |
| "learning_rate": 1.7804304073383298e-05, | |
| "loss": 0.1091, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8630769230769231, | |
| "grad_norm": 0.38763635529234175, | |
| "learning_rate": 1.7796743540632226e-05, | |
| "loss": 0.0825, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.8646153846153846, | |
| "grad_norm": 0.3905770386460791, | |
| "learning_rate": 1.778917162463189e-05, | |
| "loss": 0.0875, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8661538461538462, | |
| "grad_norm": 0.5703432509432513, | |
| "learning_rate": 1.77815883364373e-05, | |
| "loss": 0.1001, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.8676923076923077, | |
| "grad_norm": 0.4597959384906591, | |
| "learning_rate": 1.777399368712005e-05, | |
| "loss": 0.0882, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8692307692307693, | |
| "grad_norm": 0.45607761156419957, | |
| "learning_rate": 1.7766387687768338e-05, | |
| "loss": 0.0991, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.8707692307692307, | |
| "grad_norm": 0.39506065545533703, | |
| "learning_rate": 1.7758770349486924e-05, | |
| "loss": 0.0943, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8723076923076923, | |
| "grad_norm": 0.4913700102929278, | |
| "learning_rate": 1.7751141683397128e-05, | |
| "loss": 0.1026, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.8738461538461538, | |
| "grad_norm": 0.4481916097965894, | |
| "learning_rate": 1.7743501700636804e-05, | |
| "loss": 0.1035, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8753846153846154, | |
| "grad_norm": 0.40743252425944065, | |
| "learning_rate": 1.7735850412360332e-05, | |
| "loss": 0.0915, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.8769230769230769, | |
| "grad_norm": 0.36201263572487186, | |
| "learning_rate": 1.7728187829738596e-05, | |
| "loss": 0.096, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8784615384615385, | |
| "grad_norm": 0.41877789233929563, | |
| "learning_rate": 1.772051396395897e-05, | |
| "loss": 0.0921, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.43388638709693894, | |
| "learning_rate": 1.7712828826225303e-05, | |
| "loss": 0.1016, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8815384615384615, | |
| "grad_norm": 0.4621448068884329, | |
| "learning_rate": 1.7705132427757895e-05, | |
| "loss": 0.0957, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.8830769230769231, | |
| "grad_norm": 0.384208019183389, | |
| "learning_rate": 1.7697424779793497e-05, | |
| "loss": 0.0812, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8846153846153846, | |
| "grad_norm": 0.35252484159436787, | |
| "learning_rate": 1.7689705893585273e-05, | |
| "loss": 0.0918, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.8861538461538462, | |
| "grad_norm": 0.40143829209617715, | |
| "learning_rate": 1.7681975780402807e-05, | |
| "loss": 0.0843, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8876923076923077, | |
| "grad_norm": 0.4307373370195823, | |
| "learning_rate": 1.7674234451532065e-05, | |
| "loss": 0.0961, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.8892307692307693, | |
| "grad_norm": 0.46711054218809045, | |
| "learning_rate": 1.766648191827539e-05, | |
| "loss": 0.0918, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8907692307692308, | |
| "grad_norm": 0.43239124932710876, | |
| "learning_rate": 1.7658718191951483e-05, | |
| "loss": 0.0865, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.8923076923076924, | |
| "grad_norm": 0.36077221637299095, | |
| "learning_rate": 1.7650943283895393e-05, | |
| "loss": 0.0791, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8938461538461538, | |
| "grad_norm": 0.452062987622938, | |
| "learning_rate": 1.7643157205458483e-05, | |
| "loss": 0.0977, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.8953846153846153, | |
| "grad_norm": 0.3729118607027113, | |
| "learning_rate": 1.7635359968008438e-05, | |
| "loss": 0.0964, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.8969230769230769, | |
| "grad_norm": 0.31864024958361536, | |
| "learning_rate": 1.7627551582929223e-05, | |
| "loss": 0.0706, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.8984615384615384, | |
| "grad_norm": 0.4897692560311474, | |
| "learning_rate": 1.761973206162109e-05, | |
| "loss": 0.095, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.43634370121716065, | |
| "learning_rate": 1.7611901415500536e-05, | |
| "loss": 0.1012, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.9015384615384615, | |
| "grad_norm": 0.4303303314789484, | |
| "learning_rate": 1.7604059656000313e-05, | |
| "loss": 0.0991, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.9030769230769231, | |
| "grad_norm": 0.3965278873786633, | |
| "learning_rate": 1.759620679456939e-05, | |
| "loss": 0.0889, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.9046153846153846, | |
| "grad_norm": 0.3471384513881275, | |
| "learning_rate": 1.758834284267295e-05, | |
| "loss": 0.0901, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.9061538461538462, | |
| "grad_norm": 0.35615891292502644, | |
| "learning_rate": 1.7580467811792374e-05, | |
| "loss": 0.084, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.9076923076923077, | |
| "grad_norm": 0.43993009413917056, | |
| "learning_rate": 1.7572581713425195e-05, | |
| "loss": 0.0986, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.9092307692307692, | |
| "grad_norm": 0.41369164253274765, | |
| "learning_rate": 1.7564684559085138e-05, | |
| "loss": 0.0888, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.9107692307692308, | |
| "grad_norm": 0.3683031999775181, | |
| "learning_rate": 1.7556776360302038e-05, | |
| "loss": 0.0912, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.9123076923076923, | |
| "grad_norm": 0.4537934276100818, | |
| "learning_rate": 1.7548857128621878e-05, | |
| "loss": 0.0921, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.9138461538461539, | |
| "grad_norm": 0.43435742031494934, | |
| "learning_rate": 1.7540926875606734e-05, | |
| "loss": 0.0869, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.9153846153846154, | |
| "grad_norm": 0.38341724227303553, | |
| "learning_rate": 1.753298561283478e-05, | |
| "loss": 0.0903, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.916923076923077, | |
| "grad_norm": 0.41746460815399494, | |
| "learning_rate": 1.7525033351900268e-05, | |
| "loss": 0.1006, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.9184615384615384, | |
| "grad_norm": 0.3777485553214645, | |
| "learning_rate": 1.7517070104413497e-05, | |
| "loss": 0.0947, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.41830568154844494, | |
| "learning_rate": 1.7509095882000823e-05, | |
| "loss": 0.1039, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.9215384615384615, | |
| "grad_norm": 0.36147731651312526, | |
| "learning_rate": 1.7501110696304598e-05, | |
| "loss": 0.0875, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.3427586374547824, | |
| "learning_rate": 1.7493114558983207e-05, | |
| "loss": 0.0816, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9246153846153846, | |
| "grad_norm": 0.3939804203050907, | |
| "learning_rate": 1.7485107481711014e-05, | |
| "loss": 0.0946, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.9261538461538461, | |
| "grad_norm": 0.381608432957127, | |
| "learning_rate": 1.7477089476178354e-05, | |
| "loss": 0.0929, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.9276923076923077, | |
| "grad_norm": 0.4292122557448878, | |
| "learning_rate": 1.7469060554091518e-05, | |
| "loss": 0.0965, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.9292307692307692, | |
| "grad_norm": 0.3762116025408573, | |
| "learning_rate": 1.7461020727172736e-05, | |
| "loss": 0.0938, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.9307692307692308, | |
| "grad_norm": 0.3455952506447102, | |
| "learning_rate": 1.745297000716016e-05, | |
| "loss": 0.0741, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.9323076923076923, | |
| "grad_norm": 0.38631458311410155, | |
| "learning_rate": 1.7444908405807845e-05, | |
| "loss": 0.0865, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.9338461538461539, | |
| "grad_norm": 0.37275711289343844, | |
| "learning_rate": 1.7436835934885735e-05, | |
| "loss": 0.0886, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.9353846153846154, | |
| "grad_norm": 0.4401140100719851, | |
| "learning_rate": 1.742875260617964e-05, | |
| "loss": 0.0976, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.936923076923077, | |
| "grad_norm": 0.3921855404257059, | |
| "learning_rate": 1.7420658431491224e-05, | |
| "loss": 0.0928, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.9384615384615385, | |
| "grad_norm": 0.4113951217410347, | |
| "learning_rate": 1.741255342263798e-05, | |
| "loss": 0.0885, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.3262775097841378, | |
| "learning_rate": 1.7404437591453237e-05, | |
| "loss": 0.0873, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.9415384615384615, | |
| "grad_norm": 0.42350913769649273, | |
| "learning_rate": 1.73963109497861e-05, | |
| "loss": 0.0958, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.943076923076923, | |
| "grad_norm": 0.36988134430557396, | |
| "learning_rate": 1.7388173509501475e-05, | |
| "loss": 0.0928, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.9446153846153846, | |
| "grad_norm": 0.39410658459241166, | |
| "learning_rate": 1.7380025282480028e-05, | |
| "loss": 0.0868, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.9461538461538461, | |
| "grad_norm": 0.41180225877084126, | |
| "learning_rate": 1.7371866280618176e-05, | |
| "loss": 0.0911, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.9476923076923077, | |
| "grad_norm": 0.35890123689534553, | |
| "learning_rate": 1.7363696515828062e-05, | |
| "loss": 0.0899, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.9492307692307692, | |
| "grad_norm": 0.2987459340450158, | |
| "learning_rate": 1.7355516000037555e-05, | |
| "loss": 0.0769, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.9507692307692308, | |
| "grad_norm": 0.43301969400641127, | |
| "learning_rate": 1.73473247451902e-05, | |
| "loss": 0.0892, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9523076923076923, | |
| "grad_norm": 0.379899779524929, | |
| "learning_rate": 1.733912276324524e-05, | |
| "loss": 0.0751, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.9538461538461539, | |
| "grad_norm": 0.35284361586327184, | |
| "learning_rate": 1.7330910066177574e-05, | |
| "loss": 0.0941, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9553846153846154, | |
| "grad_norm": 0.4459109096619344, | |
| "learning_rate": 1.7322686665977738e-05, | |
| "loss": 0.086, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.9569230769230769, | |
| "grad_norm": 0.4302144299712948, | |
| "learning_rate": 1.7314452574651902e-05, | |
| "loss": 0.0865, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.9584615384615385, | |
| "grad_norm": 0.4053685427552524, | |
| "learning_rate": 1.7306207804221845e-05, | |
| "loss": 0.092, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.46831862180021, | |
| "learning_rate": 1.7297952366724935e-05, | |
| "loss": 0.1046, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 0.4538044661350598, | |
| "learning_rate": 1.7289686274214116e-05, | |
| "loss": 0.1076, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.963076923076923, | |
| "grad_norm": 0.48389404664226576, | |
| "learning_rate": 1.7281409538757886e-05, | |
| "loss": 0.0923, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9646153846153847, | |
| "grad_norm": 0.3865282157362884, | |
| "learning_rate": 1.727312217244028e-05, | |
| "loss": 0.0881, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.9661538461538461, | |
| "grad_norm": 0.41052873518942035, | |
| "learning_rate": 1.726482418736086e-05, | |
| "loss": 0.0982, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9676923076923077, | |
| "grad_norm": 0.4633845480863643, | |
| "learning_rate": 1.7256515595634688e-05, | |
| "loss": 0.0963, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.9692307692307692, | |
| "grad_norm": 0.49146329005893413, | |
| "learning_rate": 1.7248196409392312e-05, | |
| "loss": 0.0924, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9707692307692307, | |
| "grad_norm": 0.35983268721595085, | |
| "learning_rate": 1.7239866640779745e-05, | |
| "loss": 0.0884, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.9723076923076923, | |
| "grad_norm": 0.490761583695885, | |
| "learning_rate": 1.7231526301958454e-05, | |
| "loss": 0.0973, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9738461538461538, | |
| "grad_norm": 0.3594410933659223, | |
| "learning_rate": 1.722317540510534e-05, | |
| "loss": 0.0929, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.9753846153846154, | |
| "grad_norm": 0.32824532953007957, | |
| "learning_rate": 1.7214813962412715e-05, | |
| "loss": 0.0886, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9769230769230769, | |
| "grad_norm": 0.3557861935512496, | |
| "learning_rate": 1.720644198608829e-05, | |
| "loss": 0.0885, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.9784615384615385, | |
| "grad_norm": 0.45266556902976496, | |
| "learning_rate": 1.7198059488355153e-05, | |
| "loss": 0.0933, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.40557942924254287, | |
| "learning_rate": 1.7189666481451755e-05, | |
| "loss": 0.0951, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.9815384615384616, | |
| "grad_norm": 0.35736570345844687, | |
| "learning_rate": 1.718126297763189e-05, | |
| "loss": 0.0888, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.9830769230769231, | |
| "grad_norm": 0.4631511259456779, | |
| "learning_rate": 1.717284898916468e-05, | |
| "loss": 0.091, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.9846153846153847, | |
| "grad_norm": 0.43267790292943314, | |
| "learning_rate": 1.7164424528334548e-05, | |
| "loss": 0.0961, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9861538461538462, | |
| "grad_norm": 0.46968347009435635, | |
| "learning_rate": 1.715598960744121e-05, | |
| "loss": 0.0887, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.9876923076923076, | |
| "grad_norm": 0.447356115085785, | |
| "learning_rate": 1.7147544238799664e-05, | |
| "loss": 0.0908, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.9892307692307692, | |
| "grad_norm": 0.32693771260343074, | |
| "learning_rate": 1.7139088434740142e-05, | |
| "loss": 0.0872, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.9907692307692307, | |
| "grad_norm": 0.422322021584036, | |
| "learning_rate": 1.7130622207608126e-05, | |
| "loss": 0.0975, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.9923076923076923, | |
| "grad_norm": 0.34862552621490467, | |
| "learning_rate": 1.712214556976431e-05, | |
| "loss": 0.0944, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.9938461538461538, | |
| "grad_norm": 0.3800495940989224, | |
| "learning_rate": 1.7113658533584594e-05, | |
| "loss": 0.0793, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.9953846153846154, | |
| "grad_norm": 0.5118689124842419, | |
| "learning_rate": 1.7105161111460046e-05, | |
| "loss": 0.0949, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.9969230769230769, | |
| "grad_norm": 0.412157009432569, | |
| "learning_rate": 1.7096653315796915e-05, | |
| "loss": 0.1011, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.9984615384615385, | |
| "grad_norm": 0.3780510510068702, | |
| "learning_rate": 1.7088135159016584e-05, | |
| "loss": 0.0982, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.41945932684457304, | |
| "learning_rate": 1.7079606653555563e-05, | |
| "loss": 0.0953, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0015384615384615, | |
| "grad_norm": 0.43622049064952106, | |
| "learning_rate": 1.7071067811865477e-05, | |
| "loss": 0.0749, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.003076923076923, | |
| "grad_norm": 0.38132209199435835, | |
| "learning_rate": 1.706251864641304e-05, | |
| "loss": 0.0726, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.0046153846153847, | |
| "grad_norm": 0.30052022007987506, | |
| "learning_rate": 1.7053959169680033e-05, | |
| "loss": 0.0715, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.0061538461538462, | |
| "grad_norm": 0.3185644660405221, | |
| "learning_rate": 1.7045389394163297e-05, | |
| "loss": 0.0688, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.0076923076923077, | |
| "grad_norm": 0.35208492988636686, | |
| "learning_rate": 1.7036809332374713e-05, | |
| "loss": 0.0689, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.0092307692307692, | |
| "grad_norm": 0.44091806848570947, | |
| "learning_rate": 1.7028218996841173e-05, | |
| "loss": 0.0709, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.0107692307692309, | |
| "grad_norm": 0.37173547317449596, | |
| "learning_rate": 1.7019618400104572e-05, | |
| "loss": 0.0663, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.0123076923076924, | |
| "grad_norm": 0.3752761957439137, | |
| "learning_rate": 1.7011007554721778e-05, | |
| "loss": 0.0696, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.0138461538461538, | |
| "grad_norm": 0.3413943290234514, | |
| "learning_rate": 1.700238647326464e-05, | |
| "loss": 0.0675, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.0153846153846153, | |
| "grad_norm": 0.37509528395315783, | |
| "learning_rate": 1.6993755168319934e-05, | |
| "loss": 0.0718, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.0169230769230768, | |
| "grad_norm": 0.39194820102366207, | |
| "learning_rate": 1.6985113652489374e-05, | |
| "loss": 0.062, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.0184615384615385, | |
| "grad_norm": 0.296028112394497, | |
| "learning_rate": 1.697646193838957e-05, | |
| "loss": 0.0658, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 0.3737919319488901, | |
| "learning_rate": 1.6967800038652035e-05, | |
| "loss": 0.0615, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.0215384615384615, | |
| "grad_norm": 0.36005445736837366, | |
| "learning_rate": 1.6959127965923144e-05, | |
| "loss": 0.0691, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.023076923076923, | |
| "grad_norm": 0.3514084769551997, | |
| "learning_rate": 1.695044573286413e-05, | |
| "loss": 0.0724, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.0246153846153847, | |
| "grad_norm": 0.37783560046213566, | |
| "learning_rate": 1.6941753352151057e-05, | |
| "loss": 0.063, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.0261538461538462, | |
| "grad_norm": 0.3639679221324666, | |
| "learning_rate": 1.69330508364748e-05, | |
| "loss": 0.0682, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.0276923076923077, | |
| "grad_norm": 0.42252833886280206, | |
| "learning_rate": 1.692433819854104e-05, | |
| "loss": 0.0694, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.0292307692307692, | |
| "grad_norm": 0.44692488489777815, | |
| "learning_rate": 1.6915615451070234e-05, | |
| "loss": 0.0705, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.0307692307692307, | |
| "grad_norm": 0.3678986564514892, | |
| "learning_rate": 1.6906882606797595e-05, | |
| "loss": 0.0657, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.0323076923076924, | |
| "grad_norm": 0.43913282105160406, | |
| "learning_rate": 1.689813967847308e-05, | |
| "loss": 0.071, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.0338461538461539, | |
| "grad_norm": 0.32313030736032045, | |
| "learning_rate": 1.6889386678861365e-05, | |
| "loss": 0.0665, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.0353846153846153, | |
| "grad_norm": 0.3913357186511293, | |
| "learning_rate": 1.6880623620741843e-05, | |
| "loss": 0.0677, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.0369230769230768, | |
| "grad_norm": 0.3502824876065054, | |
| "learning_rate": 1.6871850516908575e-05, | |
| "loss": 0.0651, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.0384615384615385, | |
| "grad_norm": 0.5615618864576786, | |
| "learning_rate": 1.68630673801703e-05, | |
| "loss": 0.0669, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.45343396226547117, | |
| "learning_rate": 1.68542742233504e-05, | |
| "loss": 0.0817, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.0415384615384615, | |
| "grad_norm": 0.40223580906946954, | |
| "learning_rate": 1.684547105928689e-05, | |
| "loss": 0.0691, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.043076923076923, | |
| "grad_norm": 0.39525415008396314, | |
| "learning_rate": 1.683665790083239e-05, | |
| "loss": 0.0709, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.0446153846153847, | |
| "grad_norm": 0.29528931632742, | |
| "learning_rate": 1.682783476085412e-05, | |
| "loss": 0.0562, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.0461538461538462, | |
| "grad_norm": 0.4540404912733847, | |
| "learning_rate": 1.6819001652233867e-05, | |
| "loss": 0.0668, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.0476923076923077, | |
| "grad_norm": 0.4901824401666557, | |
| "learning_rate": 1.6810158587867973e-05, | |
| "loss": 0.0735, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.0492307692307692, | |
| "grad_norm": 0.35657036998216646, | |
| "learning_rate": 1.6801305580667318e-05, | |
| "loss": 0.0659, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.0507692307692307, | |
| "grad_norm": 0.34213142704883004, | |
| "learning_rate": 1.679244264355729e-05, | |
| "loss": 0.0631, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.0523076923076924, | |
| "grad_norm": 0.39311143967753803, | |
| "learning_rate": 1.6783569789477795e-05, | |
| "loss": 0.0652, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.0538461538461539, | |
| "grad_norm": 0.3501379606305712, | |
| "learning_rate": 1.677468703138319e-05, | |
| "loss": 0.0699, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.0553846153846154, | |
| "grad_norm": 0.359427195373609, | |
| "learning_rate": 1.6765794382242315e-05, | |
| "loss": 0.0657, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.0569230769230769, | |
| "grad_norm": 0.3900165610051957, | |
| "learning_rate": 1.6756891855038436e-05, | |
| "loss": 0.0648, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.0584615384615386, | |
| "grad_norm": 0.3768191856115309, | |
| "learning_rate": 1.6747979462769253e-05, | |
| "loss": 0.0661, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 0.3788900739237935, | |
| "learning_rate": 1.673905721844686e-05, | |
| "loss": 0.0689, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.0615384615384615, | |
| "grad_norm": 0.38294691852219787, | |
| "learning_rate": 1.6730125135097736e-05, | |
| "loss": 0.0733, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.063076923076923, | |
| "grad_norm": 0.35955788229196967, | |
| "learning_rate": 1.6721183225762726e-05, | |
| "loss": 0.0653, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.0646153846153845, | |
| "grad_norm": 0.4314633145507115, | |
| "learning_rate": 1.6712231503497028e-05, | |
| "loss": 0.0693, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.0661538461538462, | |
| "grad_norm": 0.3819072156771492, | |
| "learning_rate": 1.670326998137016e-05, | |
| "loss": 0.0686, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.0676923076923077, | |
| "grad_norm": 0.32895823745294955, | |
| "learning_rate": 1.669429867246594e-05, | |
| "loss": 0.0696, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.0692307692307692, | |
| "grad_norm": 0.4310560863117649, | |
| "learning_rate": 1.668531758988249e-05, | |
| "loss": 0.0715, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.0707692307692307, | |
| "grad_norm": 0.49530944777177144, | |
| "learning_rate": 1.6676326746732197e-05, | |
| "loss": 0.0684, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.0723076923076924, | |
| "grad_norm": 0.3399836957762591, | |
| "learning_rate": 1.666732615614169e-05, | |
| "loss": 0.0772, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.073846153846154, | |
| "grad_norm": 0.3717915125573905, | |
| "learning_rate": 1.665831583125184e-05, | |
| "loss": 0.0735, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.0753846153846154, | |
| "grad_norm": 0.4045082908654806, | |
| "learning_rate": 1.6649295785217722e-05, | |
| "loss": 0.0712, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 0.38922594233503904, | |
| "learning_rate": 1.664026603120861e-05, | |
| "loss": 0.0709, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0784615384615384, | |
| "grad_norm": 0.5013858686476608, | |
| "learning_rate": 1.6631226582407954e-05, | |
| "loss": 0.0764, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.3717981315840534, | |
| "learning_rate": 1.6622177452013347e-05, | |
| "loss": 0.0705, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.0815384615384616, | |
| "grad_norm": 0.38788483529072765, | |
| "learning_rate": 1.661311865323652e-05, | |
| "loss": 0.0719, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.083076923076923, | |
| "grad_norm": 1.053330169816148, | |
| "learning_rate": 1.660405019930333e-05, | |
| "loss": 0.0735, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.0846153846153845, | |
| "grad_norm": 0.6972695498767989, | |
| "learning_rate": 1.6594972103453727e-05, | |
| "loss": 0.0718, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.0861538461538462, | |
| "grad_norm": 0.37342568285958827, | |
| "learning_rate": 1.6585884378941727e-05, | |
| "loss": 0.0753, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.0876923076923077, | |
| "grad_norm": 0.4267000057052577, | |
| "learning_rate": 1.6576787039035417e-05, | |
| "loss": 0.0649, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.0892307692307692, | |
| "grad_norm": 0.36310651208043226, | |
| "learning_rate": 1.6567680097016917e-05, | |
| "loss": 0.0686, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.0907692307692307, | |
| "grad_norm": 0.42763138085861285, | |
| "learning_rate": 1.6558563566182365e-05, | |
| "loss": 0.0657, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.0923076923076924, | |
| "grad_norm": 0.4251635596635269, | |
| "learning_rate": 1.65494374598419e-05, | |
| "loss": 0.077, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.093846153846154, | |
| "grad_norm": 0.6823950567290125, | |
| "learning_rate": 1.6540301791319647e-05, | |
| "loss": 0.0642, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.0953846153846154, | |
| "grad_norm": 0.30086732326600235, | |
| "learning_rate": 1.6531156573953677e-05, | |
| "loss": 0.0635, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.096923076923077, | |
| "grad_norm": 0.3272124590429498, | |
| "learning_rate": 1.652200182109602e-05, | |
| "loss": 0.0711, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.0984615384615384, | |
| "grad_norm": 0.3783056835805405, | |
| "learning_rate": 1.6512837546112617e-05, | |
| "loss": 0.0702, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 0.34618660310859284, | |
| "learning_rate": 1.6503663762383312e-05, | |
| "loss": 0.066, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.1015384615384616, | |
| "grad_norm": 0.3509817308378501, | |
| "learning_rate": 1.6494480483301836e-05, | |
| "loss": 0.0681, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.103076923076923, | |
| "grad_norm": 0.435238684258864, | |
| "learning_rate": 1.6485287722275783e-05, | |
| "loss": 0.0654, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.1046153846153846, | |
| "grad_norm": 0.4858221064894468, | |
| "learning_rate": 1.6476085492726582e-05, | |
| "loss": 0.0621, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.106153846153846, | |
| "grad_norm": 0.5547851396481795, | |
| "learning_rate": 1.6466873808089496e-05, | |
| "loss": 0.065, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.1076923076923078, | |
| "grad_norm": 0.3551740998729382, | |
| "learning_rate": 1.645765268181359e-05, | |
| "loss": 0.0695, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.1092307692307692, | |
| "grad_norm": 0.5426959849480466, | |
| "learning_rate": 1.6448422127361707e-05, | |
| "loss": 0.0679, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.1107692307692307, | |
| "grad_norm": 0.4014572952029352, | |
| "learning_rate": 1.6439182158210468e-05, | |
| "loss": 0.0682, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.1123076923076922, | |
| "grad_norm": 0.37264714876106975, | |
| "learning_rate": 1.642993278785023e-05, | |
| "loss": 0.0622, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.113846153846154, | |
| "grad_norm": 0.4085985468005736, | |
| "learning_rate": 1.642067402978508e-05, | |
| "loss": 0.0708, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.1153846153846154, | |
| "grad_norm": 0.3741294371749341, | |
| "learning_rate": 1.64114058975328e-05, | |
| "loss": 0.0631, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.116923076923077, | |
| "grad_norm": 0.45876871469434627, | |
| "learning_rate": 1.640212840462488e-05, | |
| "loss": 0.0663, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.1184615384615384, | |
| "grad_norm": 0.3422472176749258, | |
| "learning_rate": 1.639284156460646e-05, | |
| "loss": 0.0613, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.39030560028876443, | |
| "learning_rate": 1.6383545391036327e-05, | |
| "loss": 0.0659, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.1215384615384616, | |
| "grad_norm": 0.32978927481973175, | |
| "learning_rate": 1.63742398974869e-05, | |
| "loss": 0.0583, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.123076923076923, | |
| "grad_norm": 0.3669713631154274, | |
| "learning_rate": 1.63649250975442e-05, | |
| "loss": 0.0723, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.1246153846153846, | |
| "grad_norm": 0.35663226905770323, | |
| "learning_rate": 1.6355601004807856e-05, | |
| "loss": 0.0687, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.126153846153846, | |
| "grad_norm": 0.39492116903405033, | |
| "learning_rate": 1.6346267632891027e-05, | |
| "loss": 0.0705, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.1276923076923078, | |
| "grad_norm": 0.36069535113212814, | |
| "learning_rate": 1.6336924995420453e-05, | |
| "loss": 0.0634, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.1292307692307693, | |
| "grad_norm": 0.4776466189937346, | |
| "learning_rate": 1.6327573106036384e-05, | |
| "loss": 0.0772, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.1307692307692307, | |
| "grad_norm": 0.3872373026437133, | |
| "learning_rate": 1.6318211978392588e-05, | |
| "loss": 0.0729, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.1323076923076922, | |
| "grad_norm": 0.3782947210886841, | |
| "learning_rate": 1.630884162615631e-05, | |
| "loss": 0.0698, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.1338461538461537, | |
| "grad_norm": 0.38313732578124937, | |
| "learning_rate": 1.6299462063008272e-05, | |
| "loss": 0.0659, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.1353846153846154, | |
| "grad_norm": 0.32705882716559326, | |
| "learning_rate": 1.6290073302642637e-05, | |
| "loss": 0.0649, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.136923076923077, | |
| "grad_norm": 0.3322452475653334, | |
| "learning_rate": 1.6280675358767005e-05, | |
| "loss": 0.0675, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.1384615384615384, | |
| "grad_norm": 0.3711248581770431, | |
| "learning_rate": 1.6271268245102377e-05, | |
| "loss": 0.0737, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.1400000000000001, | |
| "grad_norm": 0.39494242888459563, | |
| "learning_rate": 1.626185197538314e-05, | |
| "loss": 0.0706, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.1415384615384616, | |
| "grad_norm": 0.3116998586738848, | |
| "learning_rate": 1.6252426563357054e-05, | |
| "loss": 0.0641, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.143076923076923, | |
| "grad_norm": 0.42963979037379485, | |
| "learning_rate": 1.6242992022785225e-05, | |
| "loss": 0.0683, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.1446153846153846, | |
| "grad_norm": 0.36479367771942905, | |
| "learning_rate": 1.623354836744209e-05, | |
| "loss": 0.0713, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.146153846153846, | |
| "grad_norm": 0.6468927692309728, | |
| "learning_rate": 1.6224095611115385e-05, | |
| "loss": 0.0652, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.1476923076923078, | |
| "grad_norm": 0.6063113124725473, | |
| "learning_rate": 1.6214633767606142e-05, | |
| "loss": 0.0746, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.1492307692307693, | |
| "grad_norm": 0.4304453405042257, | |
| "learning_rate": 1.620516285072866e-05, | |
| "loss": 0.0698, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.1507692307692308, | |
| "grad_norm": 0.317379420605374, | |
| "learning_rate": 1.6195682874310473e-05, | |
| "loss": 0.0647, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.1523076923076923, | |
| "grad_norm": 0.39131831324165883, | |
| "learning_rate": 1.6186193852192356e-05, | |
| "loss": 0.0755, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.35798460402065585, | |
| "learning_rate": 1.617669579822829e-05, | |
| "loss": 0.0746, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1553846153846155, | |
| "grad_norm": 0.3502631169305045, | |
| "learning_rate": 1.6167188726285433e-05, | |
| "loss": 0.0697, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.156923076923077, | |
| "grad_norm": 0.40987521557285755, | |
| "learning_rate": 1.6157672650244113e-05, | |
| "loss": 0.0798, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.1584615384615384, | |
| "grad_norm": 0.352033824405812, | |
| "learning_rate": 1.6148147583997813e-05, | |
| "loss": 0.0678, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.4345817539730366, | |
| "learning_rate": 1.6138613541453127e-05, | |
| "loss": 0.0728, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.1615384615384616, | |
| "grad_norm": 0.35074866147767675, | |
| "learning_rate": 1.6129070536529767e-05, | |
| "loss": 0.0687, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.1630769230769231, | |
| "grad_norm": 0.36887909746623887, | |
| "learning_rate": 1.611951858316052e-05, | |
| "loss": 0.072, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.1646153846153846, | |
| "grad_norm": 0.2983907917380261, | |
| "learning_rate": 1.6109957695291246e-05, | |
| "loss": 0.0685, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.166153846153846, | |
| "grad_norm": 0.3934269192468973, | |
| "learning_rate": 1.610038788688084e-05, | |
| "loss": 0.0677, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.1676923076923078, | |
| "grad_norm": 0.37318389733402674, | |
| "learning_rate": 1.6090809171901237e-05, | |
| "loss": 0.0718, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.1692307692307693, | |
| "grad_norm": 0.38645652176143314, | |
| "learning_rate": 1.6081221564337356e-05, | |
| "loss": 0.0687, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.1707692307692308, | |
| "grad_norm": 0.38329410093067623, | |
| "learning_rate": 1.6071625078187113e-05, | |
| "loss": 0.0666, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.1723076923076923, | |
| "grad_norm": 0.30301582024440277, | |
| "learning_rate": 1.6062019727461384e-05, | |
| "loss": 0.0671, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.1738461538461538, | |
| "grad_norm": 0.32424463849184526, | |
| "learning_rate": 1.605240552618398e-05, | |
| "loss": 0.0715, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.1753846153846155, | |
| "grad_norm": 0.35922202384394764, | |
| "learning_rate": 1.6042782488391644e-05, | |
| "loss": 0.0732, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.176923076923077, | |
| "grad_norm": 0.3647659403794086, | |
| "learning_rate": 1.603315062813401e-05, | |
| "loss": 0.0707, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.1784615384615384, | |
| "grad_norm": 0.34982427128139343, | |
| "learning_rate": 1.6023509959473608e-05, | |
| "loss": 0.0725, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 0.427718797984176, | |
| "learning_rate": 1.601386049648581e-05, | |
| "loss": 0.0737, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.1815384615384614, | |
| "grad_norm": 0.3924968355298374, | |
| "learning_rate": 1.6004202253258844e-05, | |
| "loss": 0.0649, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.1830769230769231, | |
| "grad_norm": 0.30260539928386654, | |
| "learning_rate": 1.5994535243893742e-05, | |
| "loss": 0.0695, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.1846153846153846, | |
| "grad_norm": 0.35596304966031656, | |
| "learning_rate": 1.5984859482504347e-05, | |
| "loss": 0.067, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.1861538461538461, | |
| "grad_norm": 0.3129839541191524, | |
| "learning_rate": 1.5975174983217273e-05, | |
| "loss": 0.069, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.1876923076923076, | |
| "grad_norm": 0.3674547487152307, | |
| "learning_rate": 1.5965481760171897e-05, | |
| "loss": 0.0631, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.1892307692307693, | |
| "grad_norm": 0.3335296888041543, | |
| "learning_rate": 1.5955779827520327e-05, | |
| "loss": 0.0641, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.1907692307692308, | |
| "grad_norm": 0.39407159089071714, | |
| "learning_rate": 1.5946069199427387e-05, | |
| "loss": 0.071, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.1923076923076923, | |
| "grad_norm": 0.37085530721330084, | |
| "learning_rate": 1.5936349890070602e-05, | |
| "loss": 0.0733, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.1938461538461538, | |
| "grad_norm": 0.3065421660271998, | |
| "learning_rate": 1.592662191364017e-05, | |
| "loss": 0.0646, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.1953846153846155, | |
| "grad_norm": 0.4157113450091342, | |
| "learning_rate": 1.5916885284338937e-05, | |
| "loss": 0.0719, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.196923076923077, | |
| "grad_norm": 0.37188603187355035, | |
| "learning_rate": 1.5907140016382385e-05, | |
| "loss": 0.0682, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.1984615384615385, | |
| "grad_norm": 0.34699200375559375, | |
| "learning_rate": 1.5897386123998613e-05, | |
| "loss": 0.0745, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.31961705854482836, | |
| "learning_rate": 1.588762362142831e-05, | |
| "loss": 0.0647, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.2015384615384614, | |
| "grad_norm": 0.35665468708944836, | |
| "learning_rate": 1.5877852522924733e-05, | |
| "loss": 0.0657, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.2030769230769232, | |
| "grad_norm": 0.3379980896855412, | |
| "learning_rate": 1.586807284275369e-05, | |
| "loss": 0.0631, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.2046153846153846, | |
| "grad_norm": 0.398246969459009, | |
| "learning_rate": 1.5858284595193514e-05, | |
| "loss": 0.0732, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.2061538461538461, | |
| "grad_norm": 0.34921906662522734, | |
| "learning_rate": 1.584848779453506e-05, | |
| "loss": 0.0627, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.2076923076923076, | |
| "grad_norm": 0.37101620781077305, | |
| "learning_rate": 1.5838682455081657e-05, | |
| "loss": 0.0621, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.209230769230769, | |
| "grad_norm": 0.341332526505076, | |
| "learning_rate": 1.5828868591149104e-05, | |
| "loss": 0.063, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.2107692307692308, | |
| "grad_norm": 0.3041915924214054, | |
| "learning_rate": 1.581904621706565e-05, | |
| "loss": 0.0678, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.2123076923076923, | |
| "grad_norm": 0.36665714928540116, | |
| "learning_rate": 1.580921534717196e-05, | |
| "loss": 0.0743, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.2138461538461538, | |
| "grad_norm": 0.3377775940167995, | |
| "learning_rate": 1.5799375995821116e-05, | |
| "loss": 0.064, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.2153846153846155, | |
| "grad_norm": 0.35353779842042976, | |
| "learning_rate": 1.5789528177378574e-05, | |
| "loss": 0.079, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.216923076923077, | |
| "grad_norm": 0.3232778165659884, | |
| "learning_rate": 1.577967190622215e-05, | |
| "loss": 0.066, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.2184615384615385, | |
| "grad_norm": 0.3577677354201108, | |
| "learning_rate": 1.5769807196742008e-05, | |
| "loss": 0.0653, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 0.31783041296834197, | |
| "learning_rate": 1.5759934063340627e-05, | |
| "loss": 0.0706, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.2215384615384615, | |
| "grad_norm": 0.40205477740857387, | |
| "learning_rate": 1.575005252043279e-05, | |
| "loss": 0.0725, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.2230769230769232, | |
| "grad_norm": 0.3925627479595986, | |
| "learning_rate": 1.5740162582445545e-05, | |
| "loss": 0.07, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.2246153846153847, | |
| "grad_norm": 0.3311986658898681, | |
| "learning_rate": 1.5730264263818212e-05, | |
| "loss": 0.0762, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.2261538461538461, | |
| "grad_norm": 0.35012400117324377, | |
| "learning_rate": 1.5720357579002346e-05, | |
| "loss": 0.0621, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.2276923076923076, | |
| "grad_norm": 0.351604049632638, | |
| "learning_rate": 1.5710442542461705e-05, | |
| "loss": 0.0638, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.2292307692307691, | |
| "grad_norm": 0.356583134009754, | |
| "learning_rate": 1.5700519168672248e-05, | |
| "loss": 0.0713, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 0.3285074874868023, | |
| "learning_rate": 1.5690587472122104e-05, | |
| "loss": 0.0643, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.2323076923076923, | |
| "grad_norm": 0.3450768181316114, | |
| "learning_rate": 1.568064746731156e-05, | |
| "loss": 0.0633, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.2338461538461538, | |
| "grad_norm": 0.3539947251115169, | |
| "learning_rate": 1.5670699168753022e-05, | |
| "loss": 0.0662, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.2353846153846153, | |
| "grad_norm": 0.32288987773337546, | |
| "learning_rate": 1.5660742590971014e-05, | |
| "loss": 0.0679, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.236923076923077, | |
| "grad_norm": 0.5408106149836094, | |
| "learning_rate": 1.5650777748502144e-05, | |
| "loss": 0.0739, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.2384615384615385, | |
| "grad_norm": 0.3333851374522989, | |
| "learning_rate": 1.5640804655895086e-05, | |
| "loss": 0.0666, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.3514156125809535, | |
| "learning_rate": 1.5630823327710558e-05, | |
| "loss": 0.0609, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.2415384615384615, | |
| "grad_norm": 0.4355584652770312, | |
| "learning_rate": 1.5620833778521306e-05, | |
| "loss": 0.0698, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.2430769230769232, | |
| "grad_norm": 0.3863399662303979, | |
| "learning_rate": 1.561083602291208e-05, | |
| "loss": 0.0747, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.2446153846153847, | |
| "grad_norm": 0.4272070354943623, | |
| "learning_rate": 1.5600830075479604e-05, | |
| "loss": 0.0625, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.2461538461538462, | |
| "grad_norm": 0.34034969408428656, | |
| "learning_rate": 1.559081595083256e-05, | |
| "loss": 0.0611, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.2476923076923077, | |
| "grad_norm": 0.42893862551923334, | |
| "learning_rate": 1.5580793663591583e-05, | |
| "loss": 0.0669, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.2492307692307691, | |
| "grad_norm": 0.37930257242842513, | |
| "learning_rate": 1.557076322838922e-05, | |
| "loss": 0.0699, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.2507692307692309, | |
| "grad_norm": 0.33145455304026866, | |
| "learning_rate": 1.5560724659869905e-05, | |
| "loss": 0.0681, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.2523076923076923, | |
| "grad_norm": 0.4059667858998905, | |
| "learning_rate": 1.555067797268995e-05, | |
| "loss": 0.068, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.2538461538461538, | |
| "grad_norm": 0.3467956370682722, | |
| "learning_rate": 1.5540623181517532e-05, | |
| "loss": 0.0659, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.2553846153846153, | |
| "grad_norm": 0.5088686209429053, | |
| "learning_rate": 1.5530560301032644e-05, | |
| "loss": 0.0795, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.2569230769230768, | |
| "grad_norm": 0.3799873270111144, | |
| "learning_rate": 1.5520489345927095e-05, | |
| "loss": 0.0691, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.2584615384615385, | |
| "grad_norm": 0.41391636034513485, | |
| "learning_rate": 1.551041033090449e-05, | |
| "loss": 0.0721, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 0.40772591130685176, | |
| "learning_rate": 1.5500323270680194e-05, | |
| "loss": 0.0681, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.2615384615384615, | |
| "grad_norm": 0.3349423110799659, | |
| "learning_rate": 1.549022817998132e-05, | |
| "loss": 0.0623, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.2630769230769232, | |
| "grad_norm": 0.5203080319006015, | |
| "learning_rate": 1.5480125073546705e-05, | |
| "loss": 0.0659, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.2646153846153847, | |
| "grad_norm": 0.42351756936892165, | |
| "learning_rate": 1.5470013966126886e-05, | |
| "loss": 0.0702, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.2661538461538462, | |
| "grad_norm": 0.3483945108051493, | |
| "learning_rate": 1.5459894872484083e-05, | |
| "loss": 0.0636, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.2676923076923077, | |
| "grad_norm": 0.37839615755712913, | |
| "learning_rate": 1.5449767807392184e-05, | |
| "loss": 0.0676, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.2692307692307692, | |
| "grad_norm": 0.4465419577943629, | |
| "learning_rate": 1.5439632785636707e-05, | |
| "loss": 0.0631, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.2707692307692309, | |
| "grad_norm": 0.39462290024075386, | |
| "learning_rate": 1.542948982201479e-05, | |
| "loss": 0.0634, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.2723076923076924, | |
| "grad_norm": 0.3797179172041511, | |
| "learning_rate": 1.5419338931335155e-05, | |
| "loss": 0.0699, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.2738461538461539, | |
| "grad_norm": 0.3416577637686262, | |
| "learning_rate": 1.5409180128418123e-05, | |
| "loss": 0.0636, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.2753846153846153, | |
| "grad_norm": 0.4613004722112054, | |
| "learning_rate": 1.539901342809554e-05, | |
| "loss": 0.0684, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.2769230769230768, | |
| "grad_norm": 0.42702108586314186, | |
| "learning_rate": 1.5388838845210798e-05, | |
| "loss": 0.0792, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.2784615384615385, | |
| "grad_norm": 0.3530542594699597, | |
| "learning_rate": 1.5378656394618788e-05, | |
| "loss": 0.0647, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.43241017204879106, | |
| "learning_rate": 1.5368466091185893e-05, | |
| "loss": 0.0781, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.2815384615384615, | |
| "grad_norm": 0.5132567987415939, | |
| "learning_rate": 1.5358267949789968e-05, | |
| "loss": 0.0728, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.283076923076923, | |
| "grad_norm": 0.36683064622627476, | |
| "learning_rate": 1.5348061985320298e-05, | |
| "loss": 0.0725, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.2846153846153845, | |
| "grad_norm": 0.33492481470642155, | |
| "learning_rate": 1.53378482126776e-05, | |
| "loss": 0.0644, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.2861538461538462, | |
| "grad_norm": 0.3210179795426996, | |
| "learning_rate": 1.5327626646773975e-05, | |
| "loss": 0.0694, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.2876923076923077, | |
| "grad_norm": 0.3454605703445639, | |
| "learning_rate": 1.5317397302532933e-05, | |
| "loss": 0.063, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.2892307692307692, | |
| "grad_norm": 0.36111805478759756, | |
| "learning_rate": 1.530716019488931e-05, | |
| "loss": 0.0729, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.290769230769231, | |
| "grad_norm": 0.4150741361759226, | |
| "learning_rate": 1.529691533878929e-05, | |
| "loss": 0.0692, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.2923076923076924, | |
| "grad_norm": 0.44151832030522525, | |
| "learning_rate": 1.528666274919037e-05, | |
| "loss": 0.0668, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.2938461538461539, | |
| "grad_norm": 0.39281808968582055, | |
| "learning_rate": 1.527640244106133e-05, | |
| "loss": 0.0614, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.2953846153846154, | |
| "grad_norm": 0.3355371699856118, | |
| "learning_rate": 1.526613442938223e-05, | |
| "loss": 0.066, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.2969230769230768, | |
| "grad_norm": 0.3343752965517015, | |
| "learning_rate": 1.5255858729144368e-05, | |
| "loss": 0.0618, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.2984615384615386, | |
| "grad_norm": 0.4219851770072625, | |
| "learning_rate": 1.5245575355350273e-05, | |
| "loss": 0.0706, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.47236267011925603, | |
| "learning_rate": 1.5235284323013674e-05, | |
| "loss": 0.0768, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.3015384615384615, | |
| "grad_norm": 0.3160781099772521, | |
| "learning_rate": 1.5224985647159489e-05, | |
| "loss": 0.0677, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.303076923076923, | |
| "grad_norm": 0.4211840324694599, | |
| "learning_rate": 1.5214679342823786e-05, | |
| "loss": 0.0703, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.3046153846153845, | |
| "grad_norm": 0.4382171224080807, | |
| "learning_rate": 1.5204365425053773e-05, | |
| "loss": 0.0728, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.3061538461538462, | |
| "grad_norm": 0.3730723749029246, | |
| "learning_rate": 1.5194043908907774e-05, | |
| "loss": 0.0644, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.3076923076923077, | |
| "grad_norm": 0.43669340037201293, | |
| "learning_rate": 1.518371480945521e-05, | |
| "loss": 0.0725, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3092307692307692, | |
| "grad_norm": 0.29956287148615507, | |
| "learning_rate": 1.5173378141776569e-05, | |
| "loss": 0.0595, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.3107692307692307, | |
| "grad_norm": 0.4010201519686623, | |
| "learning_rate": 1.5163033920963393e-05, | |
| "loss": 0.0715, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.3123076923076922, | |
| "grad_norm": 0.3284401287146176, | |
| "learning_rate": 1.515268216211825e-05, | |
| "loss": 0.0649, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.3138461538461539, | |
| "grad_norm": 0.36834718137576217, | |
| "learning_rate": 1.5142322880354706e-05, | |
| "loss": 0.0761, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.3153846153846154, | |
| "grad_norm": 0.4038185340311018, | |
| "learning_rate": 1.5131956090797326e-05, | |
| "loss": 0.0713, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.3169230769230769, | |
| "grad_norm": 0.3485378538805922, | |
| "learning_rate": 1.5121581808581623e-05, | |
| "loss": 0.0721, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.3184615384615386, | |
| "grad_norm": 0.324099392540781, | |
| "learning_rate": 1.5111200048854055e-05, | |
| "loss": 0.0704, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.3958228511004757, | |
| "learning_rate": 1.5100810826771997e-05, | |
| "loss": 0.0665, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.3215384615384616, | |
| "grad_norm": 0.4382455185601089, | |
| "learning_rate": 1.5090414157503715e-05, | |
| "loss": 0.076, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.323076923076923, | |
| "grad_norm": 0.3260556756343216, | |
| "learning_rate": 1.5080010056228353e-05, | |
| "loss": 0.0671, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.3246153846153845, | |
| "grad_norm": 0.3038142480938741, | |
| "learning_rate": 1.5069598538135905e-05, | |
| "loss": 0.0681, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.3261538461538462, | |
| "grad_norm": 0.3817393937929787, | |
| "learning_rate": 1.505917961842719e-05, | |
| "loss": 0.07, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.3276923076923077, | |
| "grad_norm": 0.43653178960329847, | |
| "learning_rate": 1.504875331231384e-05, | |
| "loss": 0.0682, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.3292307692307692, | |
| "grad_norm": 0.4191601359367802, | |
| "learning_rate": 1.5038319635018264e-05, | |
| "loss": 0.0709, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.3307692307692307, | |
| "grad_norm": 0.40482174127743453, | |
| "learning_rate": 1.5027878601773633e-05, | |
| "loss": 0.0681, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.3323076923076922, | |
| "grad_norm": 0.39987356115193523, | |
| "learning_rate": 1.5017430227823867e-05, | |
| "loss": 0.0653, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.333846153846154, | |
| "grad_norm": 0.36706667375847013, | |
| "learning_rate": 1.5006974528423585e-05, | |
| "loss": 0.0631, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.3353846153846154, | |
| "grad_norm": 0.3710865070928732, | |
| "learning_rate": 1.4996511518838129e-05, | |
| "loss": 0.0608, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.3369230769230769, | |
| "grad_norm": 0.5238263977692204, | |
| "learning_rate": 1.4986041214343487e-05, | |
| "loss": 0.0739, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.3384615384615386, | |
| "grad_norm": 0.38666104678955204, | |
| "learning_rate": 1.4975563630226311e-05, | |
| "loss": 0.0515, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.31886781420723, | |
| "learning_rate": 1.4965078781783882e-05, | |
| "loss": 0.0649, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.3415384615384616, | |
| "grad_norm": 0.32582039756760706, | |
| "learning_rate": 1.4954586684324077e-05, | |
| "loss": 0.0682, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.343076923076923, | |
| "grad_norm": 0.4323139162984884, | |
| "learning_rate": 1.494408735316537e-05, | |
| "loss": 0.0722, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.3446153846153845, | |
| "grad_norm": 0.4522628240441855, | |
| "learning_rate": 1.4933580803636787e-05, | |
| "loss": 0.0641, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.3461538461538463, | |
| "grad_norm": 0.38114193229980947, | |
| "learning_rate": 1.4923067051077893e-05, | |
| "loss": 0.0724, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.3476923076923077, | |
| "grad_norm": 0.4247141871033444, | |
| "learning_rate": 1.4912546110838775e-05, | |
| "loss": 0.0713, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.3492307692307692, | |
| "grad_norm": 0.4058780791287694, | |
| "learning_rate": 1.490201799828001e-05, | |
| "loss": 0.0741, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.3507692307692307, | |
| "grad_norm": 0.43398667126598395, | |
| "learning_rate": 1.4891482728772645e-05, | |
| "loss": 0.0694, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.3523076923076922, | |
| "grad_norm": 0.4177065232740013, | |
| "learning_rate": 1.4880940317698182e-05, | |
| "loss": 0.0731, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.353846153846154, | |
| "grad_norm": 0.3284355368343791, | |
| "learning_rate": 1.4870390780448545e-05, | |
| "loss": 0.0747, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.3553846153846154, | |
| "grad_norm": 0.44954781903955254, | |
| "learning_rate": 1.485983413242606e-05, | |
| "loss": 0.0628, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.356923076923077, | |
| "grad_norm": 0.5129727422741225, | |
| "learning_rate": 1.4849270389043444e-05, | |
| "loss": 0.0687, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.3584615384615384, | |
| "grad_norm": 0.4467443171166444, | |
| "learning_rate": 1.4838699565723764e-05, | |
| "loss": 0.0652, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.4496929006760978, | |
| "learning_rate": 1.4828121677900427e-05, | |
| "loss": 0.0744, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.3615384615384616, | |
| "grad_norm": 0.3414293713875311, | |
| "learning_rate": 1.4817536741017153e-05, | |
| "loss": 0.0728, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.363076923076923, | |
| "grad_norm": 0.316120367911126, | |
| "learning_rate": 1.4806944770527958e-05, | |
| "loss": 0.0666, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.3646153846153846, | |
| "grad_norm": 0.40460300362699714, | |
| "learning_rate": 1.479634578189712e-05, | |
| "loss": 0.0727, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.3661538461538463, | |
| "grad_norm": 0.45659447717946505, | |
| "learning_rate": 1.4785739790599174e-05, | |
| "loss": 0.0782, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.3676923076923078, | |
| "grad_norm": 0.40368455430062866, | |
| "learning_rate": 1.4775126812118865e-05, | |
| "loss": 0.0704, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.3692307692307693, | |
| "grad_norm": 0.35547903664705943, | |
| "learning_rate": 1.4764506861951151e-05, | |
| "loss": 0.0774, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.3707692307692307, | |
| "grad_norm": 0.3259048929714102, | |
| "learning_rate": 1.4753879955601162e-05, | |
| "loss": 0.0568, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.3723076923076922, | |
| "grad_norm": 0.33398368713521703, | |
| "learning_rate": 1.474324610858419e-05, | |
| "loss": 0.0696, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.373846153846154, | |
| "grad_norm": 0.3975414141870634, | |
| "learning_rate": 1.4732605336425651e-05, | |
| "loss": 0.0653, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.3753846153846154, | |
| "grad_norm": 0.3695667545172178, | |
| "learning_rate": 1.472195765466108e-05, | |
| "loss": 0.0657, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.376923076923077, | |
| "grad_norm": 0.3070295149212851, | |
| "learning_rate": 1.4711303078836098e-05, | |
| "loss": 0.0607, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.3784615384615384, | |
| "grad_norm": 0.43373009062750495, | |
| "learning_rate": 1.4700641624506392e-05, | |
| "loss": 0.077, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 0.3207986109640768, | |
| "learning_rate": 1.4689973307237687e-05, | |
| "loss": 0.0663, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.3815384615384616, | |
| "grad_norm": 0.33029490975452497, | |
| "learning_rate": 1.4679298142605735e-05, | |
| "loss": 0.0661, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.383076923076923, | |
| "grad_norm": 0.39797201272606875, | |
| "learning_rate": 1.466861614619628e-05, | |
| "loss": 0.0683, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 0.3569032293188448, | |
| "learning_rate": 1.465792733360504e-05, | |
| "loss": 0.0712, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3861538461538463, | |
| "grad_norm": 0.33179043133017183, | |
| "learning_rate": 1.4647231720437687e-05, | |
| "loss": 0.076, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.3876923076923076, | |
| "grad_norm": 0.3185134355526598, | |
| "learning_rate": 1.4636529322309825e-05, | |
| "loss": 0.0716, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.3892307692307693, | |
| "grad_norm": 0.38927875322344885, | |
| "learning_rate": 1.4625820154846953e-05, | |
| "loss": 0.0607, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.3907692307692308, | |
| "grad_norm": 0.3592258244516518, | |
| "learning_rate": 1.4615104233684467e-05, | |
| "loss": 0.0636, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.3923076923076922, | |
| "grad_norm": 0.440027348689472, | |
| "learning_rate": 1.4604381574467616e-05, | |
| "loss": 0.0812, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.393846153846154, | |
| "grad_norm": 0.31797170088925414, | |
| "learning_rate": 1.4593652192851487e-05, | |
| "loss": 0.0624, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.3953846153846154, | |
| "grad_norm": 0.44417520359272944, | |
| "learning_rate": 1.4582916104500977e-05, | |
| "loss": 0.0734, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.396923076923077, | |
| "grad_norm": 0.34291352445561796, | |
| "learning_rate": 1.457217332509079e-05, | |
| "loss": 0.0684, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.3984615384615384, | |
| "grad_norm": 0.3078336428229946, | |
| "learning_rate": 1.4561423870305383e-05, | |
| "loss": 0.0594, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.45773936133687876, | |
| "learning_rate": 1.4550667755838965e-05, | |
| "loss": 0.0735, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.4015384615384616, | |
| "grad_norm": 0.3093789239621972, | |
| "learning_rate": 1.4539904997395468e-05, | |
| "loss": 0.0665, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.403076923076923, | |
| "grad_norm": 0.33556510387782423, | |
| "learning_rate": 1.4529135610688529e-05, | |
| "loss": 0.067, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.4046153846153846, | |
| "grad_norm": 0.3821254219317531, | |
| "learning_rate": 1.4518359611441452e-05, | |
| "loss": 0.0795, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.406153846153846, | |
| "grad_norm": 0.3968625825470183, | |
| "learning_rate": 1.4507577015387204e-05, | |
| "loss": 0.0694, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.4076923076923076, | |
| "grad_norm": 0.40920318095123365, | |
| "learning_rate": 1.4496787838268378e-05, | |
| "loss": 0.0751, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.4092307692307693, | |
| "grad_norm": 0.333518429842856, | |
| "learning_rate": 1.4485992095837178e-05, | |
| "loss": 0.0678, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.4107692307692308, | |
| "grad_norm": 0.4180262330660976, | |
| "learning_rate": 1.4475189803855399e-05, | |
| "loss": 0.0669, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.4123076923076923, | |
| "grad_norm": 0.3270671609911376, | |
| "learning_rate": 1.4464380978094386e-05, | |
| "loss": 0.0628, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.413846153846154, | |
| "grad_norm": 0.3321568177478066, | |
| "learning_rate": 1.445356563433503e-05, | |
| "loss": 0.0675, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.4153846153846155, | |
| "grad_norm": 0.4278414076070176, | |
| "learning_rate": 1.4442743788367741e-05, | |
| "loss": 0.0756, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.416923076923077, | |
| "grad_norm": 0.3573839526102382, | |
| "learning_rate": 1.4431915455992416e-05, | |
| "loss": 0.0743, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.4184615384615384, | |
| "grad_norm": 0.3965761887911091, | |
| "learning_rate": 1.4421080653018426e-05, | |
| "loss": 0.0686, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 0.3277382293498253, | |
| "learning_rate": 1.4410239395264594e-05, | |
| "loss": 0.0593, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.4215384615384616, | |
| "grad_norm": 0.413916370460171, | |
| "learning_rate": 1.4399391698559153e-05, | |
| "loss": 0.057, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.4230769230769231, | |
| "grad_norm": 0.40814754638707723, | |
| "learning_rate": 1.438853757873975e-05, | |
| "loss": 0.0673, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.4246153846153846, | |
| "grad_norm": 0.37821667270068476, | |
| "learning_rate": 1.4377677051653404e-05, | |
| "loss": 0.0703, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.426153846153846, | |
| "grad_norm": 0.5102274833966777, | |
| "learning_rate": 1.4366810133156495e-05, | |
| "loss": 0.076, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.4276923076923076, | |
| "grad_norm": 0.44547602217095333, | |
| "learning_rate": 1.4355936839114718e-05, | |
| "loss": 0.0747, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.4292307692307693, | |
| "grad_norm": 0.3911040675941954, | |
| "learning_rate": 1.43450571854031e-05, | |
| "loss": 0.0715, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.4307692307692308, | |
| "grad_norm": 0.3796479456399161, | |
| "learning_rate": 1.4334171187905928e-05, | |
| "loss": 0.0732, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.4323076923076923, | |
| "grad_norm": 0.33940059068204437, | |
| "learning_rate": 1.4323278862516774e-05, | |
| "loss": 0.0702, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.4338461538461538, | |
| "grad_norm": 0.3509046699774803, | |
| "learning_rate": 1.431238022513843e-05, | |
| "loss": 0.0657, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.4353846153846153, | |
| "grad_norm": 0.36668044712623865, | |
| "learning_rate": 1.430147529168292e-05, | |
| "loss": 0.0625, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.436923076923077, | |
| "grad_norm": 0.4281432928035457, | |
| "learning_rate": 1.4290564078071445e-05, | |
| "loss": 0.0813, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.4384615384615385, | |
| "grad_norm": 0.33710258376419955, | |
| "learning_rate": 1.4279646600234388e-05, | |
| "loss": 0.068, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.32354838701141914, | |
| "learning_rate": 1.4268722874111265e-05, | |
| "loss": 0.0648, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.4415384615384617, | |
| "grad_norm": 0.3021595123301058, | |
| "learning_rate": 1.4257792915650728e-05, | |
| "loss": 0.0653, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.4430769230769231, | |
| "grad_norm": 0.44802198690971745, | |
| "learning_rate": 1.4246856740810517e-05, | |
| "loss": 0.0768, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.4446153846153846, | |
| "grad_norm": 0.3593940216420963, | |
| "learning_rate": 1.4235914365557455e-05, | |
| "loss": 0.0638, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.4461538461538461, | |
| "grad_norm": 0.34159209475332464, | |
| "learning_rate": 1.4224965805867413e-05, | |
| "loss": 0.0643, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.4476923076923076, | |
| "grad_norm": 0.35984267550149907, | |
| "learning_rate": 1.4214011077725293e-05, | |
| "loss": 0.0723, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.4492307692307693, | |
| "grad_norm": 0.39317799185440994, | |
| "learning_rate": 1.4203050197125005e-05, | |
| "loss": 0.0643, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.4507692307692308, | |
| "grad_norm": 0.336857468826351, | |
| "learning_rate": 1.4192083180069441e-05, | |
| "loss": 0.0618, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.4523076923076923, | |
| "grad_norm": 0.35186803870389044, | |
| "learning_rate": 1.4181110042570447e-05, | |
| "loss": 0.0683, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.4538461538461538, | |
| "grad_norm": 0.29942739165233795, | |
| "learning_rate": 1.4170130800648814e-05, | |
| "loss": 0.0652, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.4553846153846153, | |
| "grad_norm": 0.34891798063795276, | |
| "learning_rate": 1.4159145470334237e-05, | |
| "loss": 0.0696, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.456923076923077, | |
| "grad_norm": 0.4077733821140671, | |
| "learning_rate": 1.4148154067665305e-05, | |
| "loss": 0.0699, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.4584615384615385, | |
| "grad_norm": 0.40284125050375413, | |
| "learning_rate": 1.4137156608689469e-05, | |
| "loss": 0.0742, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 0.42566126627046824, | |
| "learning_rate": 1.4126153109463025e-05, | |
| "loss": 0.0642, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.4615384615384617, | |
| "grad_norm": 0.32133228937421, | |
| "learning_rate": 1.411514358605109e-05, | |
| "loss": 0.068, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.463076923076923, | |
| "grad_norm": 0.36895430822571307, | |
| "learning_rate": 1.410412805452757e-05, | |
| "loss": 0.0667, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.4646153846153847, | |
| "grad_norm": 0.3166854845654635, | |
| "learning_rate": 1.4093106530975146e-05, | |
| "loss": 0.061, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.4661538461538461, | |
| "grad_norm": 0.5003043211716166, | |
| "learning_rate": 1.4082079031485253e-05, | |
| "loss": 0.0656, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.4676923076923076, | |
| "grad_norm": 0.39875601093346025, | |
| "learning_rate": 1.4071045572158038e-05, | |
| "loss": 0.0681, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.4692307692307693, | |
| "grad_norm": 0.3993223750486499, | |
| "learning_rate": 1.4060006169102363e-05, | |
| "loss": 0.0692, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.4707692307692308, | |
| "grad_norm": 0.3477397845990775, | |
| "learning_rate": 1.4048960838435755e-05, | |
| "loss": 0.0702, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.4723076923076923, | |
| "grad_norm": 0.4126048706500608, | |
| "learning_rate": 1.403790959628441e-05, | |
| "loss": 0.0703, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.4738461538461538, | |
| "grad_norm": 0.3012393164802371, | |
| "learning_rate": 1.4026852458783141e-05, | |
| "loss": 0.0631, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.4753846153846153, | |
| "grad_norm": 0.41825312268062614, | |
| "learning_rate": 1.4015789442075376e-05, | |
| "loss": 0.0639, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.476923076923077, | |
| "grad_norm": 0.35965621239571177, | |
| "learning_rate": 1.4004720562313125e-05, | |
| "loss": 0.0631, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.4784615384615385, | |
| "grad_norm": 0.329531529153342, | |
| "learning_rate": 1.3993645835656955e-05, | |
| "loss": 0.0794, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.34849163011002293, | |
| "learning_rate": 1.3982565278275976e-05, | |
| "loss": 0.067, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.4815384615384615, | |
| "grad_norm": 0.3857507607852618, | |
| "learning_rate": 1.3971478906347806e-05, | |
| "loss": 0.0786, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.483076923076923, | |
| "grad_norm": 0.3354179087450714, | |
| "learning_rate": 1.3960386736058552e-05, | |
| "loss": 0.0653, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.4846153846153847, | |
| "grad_norm": 0.4517425643119976, | |
| "learning_rate": 1.394928878360279e-05, | |
| "loss": 0.0696, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.4861538461538462, | |
| "grad_norm": 0.3726999175701167, | |
| "learning_rate": 1.3938185065183534e-05, | |
| "loss": 0.0703, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.4876923076923076, | |
| "grad_norm": 0.32753752193184116, | |
| "learning_rate": 1.3927075597012215e-05, | |
| "loss": 0.0688, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.4892307692307694, | |
| "grad_norm": 0.3625776900702585, | |
| "learning_rate": 1.391596039530867e-05, | |
| "loss": 0.0699, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.4907692307692308, | |
| "grad_norm": 0.4221103719027582, | |
| "learning_rate": 1.3904839476301091e-05, | |
| "loss": 0.0745, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.4923076923076923, | |
| "grad_norm": 0.3300247767704618, | |
| "learning_rate": 1.3893712856226028e-05, | |
| "loss": 0.063, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.4938461538461538, | |
| "grad_norm": 0.35465084506366884, | |
| "learning_rate": 1.388258055132835e-05, | |
| "loss": 0.0628, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.4953846153846153, | |
| "grad_norm": 0.3956402484871412, | |
| "learning_rate": 1.3871442577861234e-05, | |
| "loss": 0.0718, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.496923076923077, | |
| "grad_norm": 0.32259127869403026, | |
| "learning_rate": 1.3860298952086118e-05, | |
| "loss": 0.0673, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.4984615384615385, | |
| "grad_norm": 0.30587983895963383, | |
| "learning_rate": 1.3849149690272704e-05, | |
| "loss": 0.0642, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.3268162425630392, | |
| "learning_rate": 1.383799480869892e-05, | |
| "loss": 0.0655, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.5015384615384615, | |
| "grad_norm": 0.35344360561508237, | |
| "learning_rate": 1.3826834323650899e-05, | |
| "loss": 0.0717, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.503076923076923, | |
| "grad_norm": 0.4240517174731372, | |
| "learning_rate": 1.3815668251422953e-05, | |
| "loss": 0.0767, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.5046153846153847, | |
| "grad_norm": 0.3890020659351386, | |
| "learning_rate": 1.3804496608317557e-05, | |
| "loss": 0.0707, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.5061538461538462, | |
| "grad_norm": 0.34522079737200434, | |
| "learning_rate": 1.3793319410645307e-05, | |
| "loss": 0.0704, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.5076923076923077, | |
| "grad_norm": 0.35470349339672885, | |
| "learning_rate": 1.3782136674724924e-05, | |
| "loss": 0.0681, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.5092307692307694, | |
| "grad_norm": 0.37327575501099824, | |
| "learning_rate": 1.3770948416883205e-05, | |
| "loss": 0.0609, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.5107692307692306, | |
| "grad_norm": 0.3727506860019033, | |
| "learning_rate": 1.3759754653455013e-05, | |
| "loss": 0.0624, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.5123076923076924, | |
| "grad_norm": 0.3169662482724166, | |
| "learning_rate": 1.3748555400783245e-05, | |
| "loss": 0.0679, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.5138461538461538, | |
| "grad_norm": 0.318203785112587, | |
| "learning_rate": 1.3737350675218819e-05, | |
| "loss": 0.0721, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.5153846153846153, | |
| "grad_norm": 0.3459767757333529, | |
| "learning_rate": 1.3726140493120639e-05, | |
| "loss": 0.0692, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.516923076923077, | |
| "grad_norm": 0.35213169704344977, | |
| "learning_rate": 1.3714924870855573e-05, | |
| "loss": 0.059, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.5184615384615383, | |
| "grad_norm": 0.444276142733395, | |
| "learning_rate": 1.3703703824798438e-05, | |
| "loss": 0.0684, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.35207361668294573, | |
| "learning_rate": 1.3692477371331965e-05, | |
| "loss": 0.0744, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.5215384615384615, | |
| "grad_norm": 0.35999767828776713, | |
| "learning_rate": 1.3681245526846782e-05, | |
| "loss": 0.0713, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.523076923076923, | |
| "grad_norm": 0.39013944310312315, | |
| "learning_rate": 1.3670008307741388e-05, | |
| "loss": 0.0725, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.5246153846153847, | |
| "grad_norm": 0.38236299835311816, | |
| "learning_rate": 1.3658765730422126e-05, | |
| "loss": 0.0679, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.5261538461538462, | |
| "grad_norm": 0.3391508447409303, | |
| "learning_rate": 1.3647517811303164e-05, | |
| "loss": 0.0674, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.5276923076923077, | |
| "grad_norm": 0.6412950373687982, | |
| "learning_rate": 1.3636264566806473e-05, | |
| "loss": 0.0741, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.5292307692307694, | |
| "grad_norm": 0.3350577163257067, | |
| "learning_rate": 1.362500601336179e-05, | |
| "loss": 0.0614, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.5307692307692307, | |
| "grad_norm": 0.3626403986182635, | |
| "learning_rate": 1.3613742167406614e-05, | |
| "loss": 0.0691, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.5323076923076924, | |
| "grad_norm": 0.3470732304493993, | |
| "learning_rate": 1.3602473045386165e-05, | |
| "loss": 0.0742, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.5338461538461539, | |
| "grad_norm": 0.33260189078997204, | |
| "learning_rate": 1.3591198663753358e-05, | |
| "loss": 0.0686, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.5353846153846153, | |
| "grad_norm": 0.28916096300010014, | |
| "learning_rate": 1.3579919038968805e-05, | |
| "loss": 0.0638, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.536923076923077, | |
| "grad_norm": 0.43372086950966615, | |
| "learning_rate": 1.3568634187500762e-05, | |
| "loss": 0.07, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.40435446045850537, | |
| "learning_rate": 1.3557344125825113e-05, | |
| "loss": 0.0686, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 0.3600272169695901, | |
| "learning_rate": 1.3546048870425356e-05, | |
| "loss": 0.075, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.5415384615384615, | |
| "grad_norm": 0.31816083037803755, | |
| "learning_rate": 1.3534748437792573e-05, | |
| "loss": 0.0737, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.543076923076923, | |
| "grad_norm": 0.33273148854011647, | |
| "learning_rate": 1.3523442844425393e-05, | |
| "loss": 0.0665, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.5446153846153847, | |
| "grad_norm": 0.37169937500283534, | |
| "learning_rate": 1.3512132106829996e-05, | |
| "loss": 0.0654, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.546153846153846, | |
| "grad_norm": 0.329535717539556, | |
| "learning_rate": 1.3500816241520059e-05, | |
| "loss": 0.0648, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.5476923076923077, | |
| "grad_norm": 0.25023308868192906, | |
| "learning_rate": 1.3489495265016753e-05, | |
| "loss": 0.0558, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.5492307692307692, | |
| "grad_norm": 0.3555280320936877, | |
| "learning_rate": 1.3478169193848705e-05, | |
| "loss": 0.0695, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.5507692307692307, | |
| "grad_norm": 0.3533955661877233, | |
| "learning_rate": 1.346683804455199e-05, | |
| "loss": 0.0651, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.5523076923076924, | |
| "grad_norm": 0.3802507308411157, | |
| "learning_rate": 1.3455501833670089e-05, | |
| "loss": 0.0737, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.5538461538461539, | |
| "grad_norm": 0.3088088179326214, | |
| "learning_rate": 1.3444160577753872e-05, | |
| "loss": 0.0681, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.5553846153846154, | |
| "grad_norm": 0.2929009087213979, | |
| "learning_rate": 1.3432814293361585e-05, | |
| "loss": 0.062, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.556923076923077, | |
| "grad_norm": 0.37467092752746656, | |
| "learning_rate": 1.34214629970588e-05, | |
| "loss": 0.0709, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.5584615384615383, | |
| "grad_norm": 0.33157137643925366, | |
| "learning_rate": 1.3410106705418424e-05, | |
| "loss": 0.0655, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.36425079800863963, | |
| "learning_rate": 1.3398745435020642e-05, | |
| "loss": 0.0717, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.5615384615384615, | |
| "grad_norm": 0.3794215971913442, | |
| "learning_rate": 1.3387379202452917e-05, | |
| "loss": 0.07, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.563076923076923, | |
| "grad_norm": 0.35292364976240903, | |
| "learning_rate": 1.337600802430995e-05, | |
| "loss": 0.0737, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.5646153846153847, | |
| "grad_norm": 0.36309299670186634, | |
| "learning_rate": 1.3364631917193671e-05, | |
| "loss": 0.0678, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.566153846153846, | |
| "grad_norm": 0.43174462874417424, | |
| "learning_rate": 1.33532508977132e-05, | |
| "loss": 0.0664, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.5676923076923077, | |
| "grad_norm": 0.2980059236096669, | |
| "learning_rate": 1.3341864982484828e-05, | |
| "loss": 0.0592, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.5692307692307692, | |
| "grad_norm": 0.3505262612950089, | |
| "learning_rate": 1.3330474188132004e-05, | |
| "loss": 0.0735, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.5707692307692307, | |
| "grad_norm": 0.3640243702743533, | |
| "learning_rate": 1.3319078531285286e-05, | |
| "loss": 0.0684, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.5723076923076924, | |
| "grad_norm": 0.37347682099255064, | |
| "learning_rate": 1.3307678028582342e-05, | |
| "loss": 0.0631, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.573846153846154, | |
| "grad_norm": 0.30683658621574744, | |
| "learning_rate": 1.329627269666791e-05, | |
| "loss": 0.064, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.5753846153846154, | |
| "grad_norm": 0.38325694612265304, | |
| "learning_rate": 1.328486255219378e-05, | |
| "loss": 0.0748, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.5769230769230769, | |
| "grad_norm": 0.34834422809752846, | |
| "learning_rate": 1.3273447611818768e-05, | |
| "loss": 0.0706, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.5784615384615384, | |
| "grad_norm": 0.33431857255788205, | |
| "learning_rate": 1.3262027892208696e-05, | |
| "loss": 0.0607, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 0.36935448940068555, | |
| "learning_rate": 1.3250603410036356e-05, | |
| "loss": 0.0657, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 1.5815384615384616, | |
| "grad_norm": 0.5029604354345404, | |
| "learning_rate": 1.3239174181981496e-05, | |
| "loss": 0.0709, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.583076923076923, | |
| "grad_norm": 0.34010742851810977, | |
| "learning_rate": 1.3227740224730799e-05, | |
| "loss": 0.0639, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 1.5846153846153848, | |
| "grad_norm": 0.35326235063290296, | |
| "learning_rate": 1.3216301554977844e-05, | |
| "loss": 0.07, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.586153846153846, | |
| "grad_norm": 0.371088178596748, | |
| "learning_rate": 1.3204858189423097e-05, | |
| "loss": 0.0689, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 1.5876923076923077, | |
| "grad_norm": 0.3525413313256349, | |
| "learning_rate": 1.3193410144773876e-05, | |
| "loss": 0.0646, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.5892307692307692, | |
| "grad_norm": 0.47439467360396914, | |
| "learning_rate": 1.3181957437744333e-05, | |
| "loss": 0.0665, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 1.5907692307692307, | |
| "grad_norm": 0.3389914679073154, | |
| "learning_rate": 1.3170500085055424e-05, | |
| "loss": 0.0663, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.5923076923076924, | |
| "grad_norm": 0.34343403261475547, | |
| "learning_rate": 1.3159038103434889e-05, | |
| "loss": 0.0693, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.5938461538461537, | |
| "grad_norm": 0.30611366359410186, | |
| "learning_rate": 1.314757150961723e-05, | |
| "loss": 0.0676, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.5953846153846154, | |
| "grad_norm": 0.44257355506663903, | |
| "learning_rate": 1.3136100320343674e-05, | |
| "loss": 0.0701, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 1.596923076923077, | |
| "grad_norm": 0.32909363170626144, | |
| "learning_rate": 1.3124624552362166e-05, | |
| "loss": 0.0627, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.5984615384615384, | |
| "grad_norm": 0.38341925530098436, | |
| "learning_rate": 1.3113144222427334e-05, | |
| "loss": 0.0718, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.3390749420830194, | |
| "learning_rate": 1.3101659347300462e-05, | |
| "loss": 0.0613, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.6015384615384616, | |
| "grad_norm": 0.3462273945212573, | |
| "learning_rate": 1.3090169943749475e-05, | |
| "loss": 0.0735, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.603076923076923, | |
| "grad_norm": 0.3695043310551725, | |
| "learning_rate": 1.3078676028548908e-05, | |
| "loss": 0.0694, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.6046153846153848, | |
| "grad_norm": 0.3530125489309211, | |
| "learning_rate": 1.3067177618479883e-05, | |
| "loss": 0.0631, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.606153846153846, | |
| "grad_norm": 0.32964686006837474, | |
| "learning_rate": 1.305567473033008e-05, | |
| "loss": 0.0679, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.6076923076923078, | |
| "grad_norm": 0.3713682256507068, | |
| "learning_rate": 1.3044167380893726e-05, | |
| "loss": 0.0701, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.6092307692307692, | |
| "grad_norm": 0.3762742954199106, | |
| "learning_rate": 1.3032655586971552e-05, | |
| "loss": 0.0628, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.6107692307692307, | |
| "grad_norm": 0.3265101319762403, | |
| "learning_rate": 1.3021139365370787e-05, | |
| "loss": 0.0656, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.6123076923076924, | |
| "grad_norm": 0.28711481275165773, | |
| "learning_rate": 1.300961873290512e-05, | |
| "loss": 0.068, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.6138461538461537, | |
| "grad_norm": 0.4010476086338245, | |
| "learning_rate": 1.2998093706394676e-05, | |
| "loss": 0.0717, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 1.6153846153846154, | |
| "grad_norm": 0.3145419470926991, | |
| "learning_rate": 1.2986564302666e-05, | |
| "loss": 0.0694, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.616923076923077, | |
| "grad_norm": 0.3926012570647249, | |
| "learning_rate": 1.297503053855203e-05, | |
| "loss": 0.0713, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 1.6184615384615384, | |
| "grad_norm": 0.3304899518184055, | |
| "learning_rate": 1.2963492430892066e-05, | |
| "loss": 0.0665, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 0.3249058555050507, | |
| "learning_rate": 1.295194999653175e-05, | |
| "loss": 0.0716, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 1.6215384615384614, | |
| "grad_norm": 0.34563682161792825, | |
| "learning_rate": 1.294040325232304e-05, | |
| "loss": 0.0689, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.623076923076923, | |
| "grad_norm": 0.3623157841595741, | |
| "learning_rate": 1.292885221512419e-05, | |
| "loss": 0.0686, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.6246153846153846, | |
| "grad_norm": 0.4032343951986735, | |
| "learning_rate": 1.291729690179972e-05, | |
| "loss": 0.0709, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.626153846153846, | |
| "grad_norm": 0.42548798300770907, | |
| "learning_rate": 1.2905737329220394e-05, | |
| "loss": 0.0769, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 1.6276923076923078, | |
| "grad_norm": 0.38063725968362255, | |
| "learning_rate": 1.2894173514263191e-05, | |
| "loss": 0.0718, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.6292307692307693, | |
| "grad_norm": 0.4260166630050922, | |
| "learning_rate": 1.2882605473811282e-05, | |
| "loss": 0.0755, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 1.6307692307692307, | |
| "grad_norm": 0.3183113830256884, | |
| "learning_rate": 1.2871033224754022e-05, | |
| "loss": 0.0659, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.6323076923076925, | |
| "grad_norm": 0.33423253446578904, | |
| "learning_rate": 1.2859456783986892e-05, | |
| "loss": 0.0675, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 1.6338461538461537, | |
| "grad_norm": 0.38425643174539403, | |
| "learning_rate": 1.2847876168411506e-05, | |
| "loss": 0.0668, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.6353846153846154, | |
| "grad_norm": 0.39834995039582705, | |
| "learning_rate": 1.2836291394935568e-05, | |
| "loss": 0.0702, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 1.636923076923077, | |
| "grad_norm": 0.3592062233324518, | |
| "learning_rate": 1.2824702480472846e-05, | |
| "loss": 0.0698, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.6384615384615384, | |
| "grad_norm": 0.30141364654191644, | |
| "learning_rate": 1.2813109441943166e-05, | |
| "loss": 0.0608, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.31650470078418513, | |
| "learning_rate": 1.280151229627237e-05, | |
| "loss": 0.0727, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.6415384615384614, | |
| "grad_norm": 0.3073328562892288, | |
| "learning_rate": 1.2789911060392295e-05, | |
| "loss": 0.0597, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 1.643076923076923, | |
| "grad_norm": 0.352934869488259, | |
| "learning_rate": 1.2778305751240749e-05, | |
| "loss": 0.0646, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.6446153846153846, | |
| "grad_norm": 0.3477210583426811, | |
| "learning_rate": 1.2766696385761494e-05, | |
| "loss": 0.0694, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 1.646153846153846, | |
| "grad_norm": 0.31714834966112554, | |
| "learning_rate": 1.2755082980904206e-05, | |
| "loss": 0.0693, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.6476923076923078, | |
| "grad_norm": 0.3328830298996729, | |
| "learning_rate": 1.274346555362446e-05, | |
| "loss": 0.0651, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 1.6492307692307693, | |
| "grad_norm": 0.4081142662510038, | |
| "learning_rate": 1.2731844120883705e-05, | |
| "loss": 0.0677, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.6507692307692308, | |
| "grad_norm": 0.3634578478803434, | |
| "learning_rate": 1.2720218699649243e-05, | |
| "loss": 0.0674, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 1.6523076923076923, | |
| "grad_norm": 0.38192663091543777, | |
| "learning_rate": 1.270858930689419e-05, | |
| "loss": 0.0701, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.6538461538461537, | |
| "grad_norm": 0.3172214274621381, | |
| "learning_rate": 1.269695595959747e-05, | |
| "loss": 0.0704, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.6553846153846155, | |
| "grad_norm": 0.38108705084014816, | |
| "learning_rate": 1.2685318674743769e-05, | |
| "loss": 0.0773, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.656923076923077, | |
| "grad_norm": 0.32308540109864475, | |
| "learning_rate": 1.2673677469323532e-05, | |
| "loss": 0.0648, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 1.6584615384615384, | |
| "grad_norm": 0.397581930467522, | |
| "learning_rate": 1.2662032360332926e-05, | |
| "loss": 0.0639, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.6600000000000001, | |
| "grad_norm": 0.428597873391097, | |
| "learning_rate": 1.2650383364773812e-05, | |
| "loss": 0.0649, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 1.6615384615384614, | |
| "grad_norm": 0.3627584918043698, | |
| "learning_rate": 1.2638730499653731e-05, | |
| "loss": 0.0685, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.6630769230769231, | |
| "grad_norm": 0.39434710297076264, | |
| "learning_rate": 1.262707378198587e-05, | |
| "loss": 0.0623, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 1.6646153846153846, | |
| "grad_norm": 0.4398747205110735, | |
| "learning_rate": 1.2615413228789044e-05, | |
| "loss": 0.0731, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.666153846153846, | |
| "grad_norm": 0.42581951735488976, | |
| "learning_rate": 1.2603748857087668e-05, | |
| "loss": 0.0722, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 1.6676923076923078, | |
| "grad_norm": 0.33276187980893557, | |
| "learning_rate": 1.2592080683911726e-05, | |
| "loss": 0.0696, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.669230769230769, | |
| "grad_norm": 0.41489629589539934, | |
| "learning_rate": 1.258040872629676e-05, | |
| "loss": 0.0719, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.6707692307692308, | |
| "grad_norm": 0.3446331527576127, | |
| "learning_rate": 1.2568733001283828e-05, | |
| "loss": 0.0559, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.6723076923076923, | |
| "grad_norm": 0.35834532855166323, | |
| "learning_rate": 1.2557053525919503e-05, | |
| "loss": 0.0746, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 1.6738461538461538, | |
| "grad_norm": 0.4154468285899175, | |
| "learning_rate": 1.2545370317255817e-05, | |
| "loss": 0.0685, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.6753846153846155, | |
| "grad_norm": 0.3870928793180616, | |
| "learning_rate": 1.2533683392350264e-05, | |
| "loss": 0.081, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.676923076923077, | |
| "grad_norm": 0.32370828199921, | |
| "learning_rate": 1.252199276826576e-05, | |
| "loss": 0.0535, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.6784615384615384, | |
| "grad_norm": 0.36882979778488023, | |
| "learning_rate": 1.2510298462070619e-05, | |
| "loss": 0.0671, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.32092949735914467, | |
| "learning_rate": 1.2498600490838535e-05, | |
| "loss": 0.0626, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.6815384615384614, | |
| "grad_norm": 0.5727815255595513, | |
| "learning_rate": 1.2486898871648552e-05, | |
| "loss": 0.0734, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 1.6830769230769231, | |
| "grad_norm": 0.3458495040870996, | |
| "learning_rate": 1.2475193621585036e-05, | |
| "loss": 0.0653, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.6846153846153846, | |
| "grad_norm": 0.35967546735381495, | |
| "learning_rate": 1.2463484757737663e-05, | |
| "loss": 0.0705, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.6861538461538461, | |
| "grad_norm": 0.38237562063590214, | |
| "learning_rate": 1.2451772297201376e-05, | |
| "loss": 0.0661, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.6876923076923078, | |
| "grad_norm": 0.47686706186257616, | |
| "learning_rate": 1.2440056257076376e-05, | |
| "loss": 0.0613, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 1.689230769230769, | |
| "grad_norm": 0.46719251208554496, | |
| "learning_rate": 1.2428336654468085e-05, | |
| "loss": 0.0662, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.6907692307692308, | |
| "grad_norm": 0.3686979455788657, | |
| "learning_rate": 1.241661350648713e-05, | |
| "loss": 0.0735, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 0.33821254997323613, | |
| "learning_rate": 1.240488683024931e-05, | |
| "loss": 0.0671, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6938461538461538, | |
| "grad_norm": 0.314525906392349, | |
| "learning_rate": 1.2393156642875579e-05, | |
| "loss": 0.0593, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 1.6953846153846155, | |
| "grad_norm": 0.37318354708691176, | |
| "learning_rate": 1.2381422961492018e-05, | |
| "loss": 0.0612, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.696923076923077, | |
| "grad_norm": 0.38841429558968976, | |
| "learning_rate": 1.2369685803229802e-05, | |
| "loss": 0.0618, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 1.6984615384615385, | |
| "grad_norm": 0.4236775137197504, | |
| "learning_rate": 1.2357945185225194e-05, | |
| "loss": 0.0657, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 0.33764294486901314, | |
| "learning_rate": 1.2346201124619502e-05, | |
| "loss": 0.0649, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.7015384615384614, | |
| "grad_norm": 0.4418410618698052, | |
| "learning_rate": 1.2334453638559057e-05, | |
| "loss": 0.0687, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.7030769230769232, | |
| "grad_norm": 0.3121204113437058, | |
| "learning_rate": 1.2322702744195192e-05, | |
| "loss": 0.0611, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 1.7046153846153846, | |
| "grad_norm": 0.3445242526989668, | |
| "learning_rate": 1.231094845868422e-05, | |
| "loss": 0.0702, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.7061538461538461, | |
| "grad_norm": 0.3181762559945284, | |
| "learning_rate": 1.2299190799187405e-05, | |
| "loss": 0.0617, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 1.7076923076923078, | |
| "grad_norm": 0.35189126228058387, | |
| "learning_rate": 1.2287429782870936e-05, | |
| "loss": 0.0654, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.709230769230769, | |
| "grad_norm": 0.3893933792288036, | |
| "learning_rate": 1.22756654269059e-05, | |
| "loss": 0.0731, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 1.7107692307692308, | |
| "grad_norm": 0.36384662807264734, | |
| "learning_rate": 1.2263897748468265e-05, | |
| "loss": 0.0748, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.7123076923076923, | |
| "grad_norm": 0.35150416540561524, | |
| "learning_rate": 1.2252126764738845e-05, | |
| "loss": 0.063, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 1.7138461538461538, | |
| "grad_norm": 0.37272242307664083, | |
| "learning_rate": 1.2240352492903282e-05, | |
| "loss": 0.0658, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.7153846153846155, | |
| "grad_norm": 0.34210913074854066, | |
| "learning_rate": 1.222857495015202e-05, | |
| "loss": 0.0631, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.7169230769230768, | |
| "grad_norm": 0.3261822986248889, | |
| "learning_rate": 1.2216794153680274e-05, | |
| "loss": 0.0682, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.7184615384615385, | |
| "grad_norm": 0.38673754458534254, | |
| "learning_rate": 1.2205010120688012e-05, | |
| "loss": 0.0744, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.322372777644759, | |
| "learning_rate": 1.2193222868379933e-05, | |
| "loss": 0.0695, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.7215384615384615, | |
| "grad_norm": 0.3627083236569345, | |
| "learning_rate": 1.2181432413965428e-05, | |
| "loss": 0.0742, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 1.7230769230769232, | |
| "grad_norm": 0.3603766452102839, | |
| "learning_rate": 1.2169638774658566e-05, | |
| "loss": 0.0701, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.7246153846153847, | |
| "grad_norm": 0.400801215452547, | |
| "learning_rate": 1.2157841967678064e-05, | |
| "loss": 0.073, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 1.7261538461538461, | |
| "grad_norm": 0.35897450361326094, | |
| "learning_rate": 1.2146042010247268e-05, | |
| "loss": 0.0713, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.7276923076923076, | |
| "grad_norm": 0.32908439200162665, | |
| "learning_rate": 1.2134238919594122e-05, | |
| "loss": 0.073, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 1.7292307692307691, | |
| "grad_norm": 0.3195562766482599, | |
| "learning_rate": 1.2122432712951142e-05, | |
| "loss": 0.0639, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.7307692307692308, | |
| "grad_norm": 0.3845258420163186, | |
| "learning_rate": 1.2110623407555398e-05, | |
| "loss": 0.061, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.7323076923076923, | |
| "grad_norm": 0.3101783646667082, | |
| "learning_rate": 1.2098811020648475e-05, | |
| "loss": 0.0612, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.7338461538461538, | |
| "grad_norm": 0.40958171970575463, | |
| "learning_rate": 1.2086995569476474e-05, | |
| "loss": 0.0602, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 1.7353846153846155, | |
| "grad_norm": 0.36131350860502126, | |
| "learning_rate": 1.2075177071289952e-05, | |
| "loss": 0.0716, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.7369230769230768, | |
| "grad_norm": 0.3245396638728491, | |
| "learning_rate": 1.2063355543343925e-05, | |
| "loss": 0.0664, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 1.7384615384615385, | |
| "grad_norm": 0.34948238997533076, | |
| "learning_rate": 1.2051531002897823e-05, | |
| "loss": 0.0743, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 0.33482788469371083, | |
| "learning_rate": 1.2039703467215489e-05, | |
| "loss": 0.07, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 1.7415384615384615, | |
| "grad_norm": 0.3106255436550464, | |
| "learning_rate": 1.2027872953565125e-05, | |
| "loss": 0.0656, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.7430769230769232, | |
| "grad_norm": 0.3230656932594039, | |
| "learning_rate": 1.2016039479219293e-05, | |
| "loss": 0.0673, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 1.7446153846153845, | |
| "grad_norm": 0.41382601752001624, | |
| "learning_rate": 1.2004203061454864e-05, | |
| "loss": 0.0665, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.7461538461538462, | |
| "grad_norm": 0.3742034275244198, | |
| "learning_rate": 1.1992363717553015e-05, | |
| "loss": 0.0752, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.7476923076923077, | |
| "grad_norm": 0.36413647568781654, | |
| "learning_rate": 1.1980521464799197e-05, | |
| "loss": 0.071, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.7492307692307691, | |
| "grad_norm": 0.373128673273757, | |
| "learning_rate": 1.1968676320483103e-05, | |
| "loss": 0.0702, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 1.7507692307692309, | |
| "grad_norm": 0.3372236802921592, | |
| "learning_rate": 1.1956828301898648e-05, | |
| "loss": 0.0678, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.7523076923076923, | |
| "grad_norm": 0.40138229132959674, | |
| "learning_rate": 1.194497742634395e-05, | |
| "loss": 0.0717, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 1.7538461538461538, | |
| "grad_norm": 0.33981746405629404, | |
| "learning_rate": 1.1933123711121284e-05, | |
| "loss": 0.0707, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.7553846153846155, | |
| "grad_norm": 0.31343455180354113, | |
| "learning_rate": 1.1921267173537085e-05, | |
| "loss": 0.0649, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 1.7569230769230768, | |
| "grad_norm": 0.40773047383140626, | |
| "learning_rate": 1.1909407830901905e-05, | |
| "loss": 0.0643, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.7584615384615385, | |
| "grad_norm": 0.3528139994471679, | |
| "learning_rate": 1.1897545700530387e-05, | |
| "loss": 0.0611, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.4034945403026721, | |
| "learning_rate": 1.1885680799741249e-05, | |
| "loss": 0.0743, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.7615384615384615, | |
| "grad_norm": 0.41781100877516925, | |
| "learning_rate": 1.187381314585725e-05, | |
| "loss": 0.0678, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.7630769230769232, | |
| "grad_norm": 0.36789269013488135, | |
| "learning_rate": 1.186194275620517e-05, | |
| "loss": 0.0662, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.7646153846153845, | |
| "grad_norm": 0.4280148153142728, | |
| "learning_rate": 1.1850069648115785e-05, | |
| "loss": 0.0703, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 1.7661538461538462, | |
| "grad_norm": 0.34082568199061963, | |
| "learning_rate": 1.1838193838923835e-05, | |
| "loss": 0.0664, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.7676923076923077, | |
| "grad_norm": 0.40801522956984687, | |
| "learning_rate": 1.1826315345968014e-05, | |
| "loss": 0.0684, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 1.7692307692307692, | |
| "grad_norm": 0.5130512133374202, | |
| "learning_rate": 1.1814434186590922e-05, | |
| "loss": 0.0673, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.7707692307692309, | |
| "grad_norm": 0.41911565805686907, | |
| "learning_rate": 1.180255037813906e-05, | |
| "loss": 0.0721, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 1.7723076923076924, | |
| "grad_norm": 0.4585867938819892, | |
| "learning_rate": 1.1790663937962789e-05, | |
| "loss": 0.07, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.7738461538461539, | |
| "grad_norm": 0.4179899966673112, | |
| "learning_rate": 1.1778774883416325e-05, | |
| "loss": 0.0742, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 1.7753846153846153, | |
| "grad_norm": 0.38592221008164235, | |
| "learning_rate": 1.1766883231857686e-05, | |
| "loss": 0.0744, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.7769230769230768, | |
| "grad_norm": 0.35229801433260116, | |
| "learning_rate": 1.1754989000648693e-05, | |
| "loss": 0.0726, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.7784615384615385, | |
| "grad_norm": 0.340673871886494, | |
| "learning_rate": 1.1743092207154929e-05, | |
| "loss": 0.0636, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 0.3495561290874491, | |
| "learning_rate": 1.1731192868745716e-05, | |
| "loss": 0.056, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 1.7815384615384615, | |
| "grad_norm": 0.3734216760946435, | |
| "learning_rate": 1.1719291002794096e-05, | |
| "loss": 0.0681, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.7830769230769232, | |
| "grad_norm": 0.3417807928208906, | |
| "learning_rate": 1.1707386626676798e-05, | |
| "loss": 0.0729, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 1.7846153846153845, | |
| "grad_norm": 0.3437047768651414, | |
| "learning_rate": 1.1695479757774217e-05, | |
| "loss": 0.0694, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.7861538461538462, | |
| "grad_norm": 0.47420240578398315, | |
| "learning_rate": 1.1683570413470384e-05, | |
| "loss": 0.0676, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 1.7876923076923077, | |
| "grad_norm": 0.3250761492618577, | |
| "learning_rate": 1.1671658611152954e-05, | |
| "loss": 0.0733, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.7892307692307692, | |
| "grad_norm": 0.38754162428676897, | |
| "learning_rate": 1.1659744368213159e-05, | |
| "loss": 0.0621, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 1.790769230769231, | |
| "grad_norm": 0.33675515675870277, | |
| "learning_rate": 1.1647827702045802e-05, | |
| "loss": 0.0672, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.7923076923076922, | |
| "grad_norm": 0.44334916416488984, | |
| "learning_rate": 1.163590863004922e-05, | |
| "loss": 0.0767, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.7938461538461539, | |
| "grad_norm": 0.36810606943536545, | |
| "learning_rate": 1.1623987169625261e-05, | |
| "loss": 0.0757, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.7953846153846154, | |
| "grad_norm": 0.3878325616113022, | |
| "learning_rate": 1.1612063338179269e-05, | |
| "loss": 0.0674, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 1.7969230769230768, | |
| "grad_norm": 0.36763716766560983, | |
| "learning_rate": 1.1600137153120039e-05, | |
| "loss": 0.0694, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.7984615384615386, | |
| "grad_norm": 0.294038432326372, | |
| "learning_rate": 1.1588208631859808e-05, | |
| "loss": 0.0624, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.3066013994606816, | |
| "learning_rate": 1.1576277791814219e-05, | |
| "loss": 0.064, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.8015384615384615, | |
| "grad_norm": 0.3791728156018938, | |
| "learning_rate": 1.156434465040231e-05, | |
| "loss": 0.0685, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 1.803076923076923, | |
| "grad_norm": 0.34991093402366064, | |
| "learning_rate": 1.1552409225046472e-05, | |
| "loss": 0.0702, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.8046153846153845, | |
| "grad_norm": 0.35156981872263665, | |
| "learning_rate": 1.154047153317243e-05, | |
| "loss": 0.0648, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 1.8061538461538462, | |
| "grad_norm": 0.47105508744307845, | |
| "learning_rate": 1.152853159220922e-05, | |
| "loss": 0.0662, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.8076923076923077, | |
| "grad_norm": 0.34193953754457573, | |
| "learning_rate": 1.1516589419589159e-05, | |
| "loss": 0.0662, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.8092307692307692, | |
| "grad_norm": 0.3080144424855269, | |
| "learning_rate": 1.1504645032747832e-05, | |
| "loss": 0.0718, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.810769230769231, | |
| "grad_norm": 0.3166715432520699, | |
| "learning_rate": 1.1492698449124042e-05, | |
| "loss": 0.0632, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 1.8123076923076922, | |
| "grad_norm": 0.3116243301245059, | |
| "learning_rate": 1.148074968615981e-05, | |
| "loss": 0.0668, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.8138461538461539, | |
| "grad_norm": 0.3923187036355073, | |
| "learning_rate": 1.1468798761300335e-05, | |
| "loss": 0.0721, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 1.8153846153846154, | |
| "grad_norm": 0.3293677766442383, | |
| "learning_rate": 1.1456845691993975e-05, | |
| "loss": 0.0636, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.8169230769230769, | |
| "grad_norm": 0.33075818730038914, | |
| "learning_rate": 1.1444890495692214e-05, | |
| "loss": 0.0688, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.8184615384615386, | |
| "grad_norm": 0.3270293740539091, | |
| "learning_rate": 1.1432933189849647e-05, | |
| "loss": 0.0675, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.8199999999999998, | |
| "grad_norm": 0.2923335748923596, | |
| "learning_rate": 1.1420973791923941e-05, | |
| "loss": 0.0701, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 1.8215384615384616, | |
| "grad_norm": 0.3093982567613493, | |
| "learning_rate": 1.1409012319375828e-05, | |
| "loss": 0.0659, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.823076923076923, | |
| "grad_norm": 0.3799881014599721, | |
| "learning_rate": 1.1397048789669061e-05, | |
| "loss": 0.0714, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.8246153846153845, | |
| "grad_norm": 0.36261575151573644, | |
| "learning_rate": 1.13850832202704e-05, | |
| "loss": 0.0693, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.8261538461538462, | |
| "grad_norm": 0.3132085768992873, | |
| "learning_rate": 1.1373115628649582e-05, | |
| "loss": 0.0682, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 1.8276923076923077, | |
| "grad_norm": 0.3461207920147732, | |
| "learning_rate": 1.1361146032279295e-05, | |
| "loss": 0.0711, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.8292307692307692, | |
| "grad_norm": 0.3150515227081572, | |
| "learning_rate": 1.1349174448635158e-05, | |
| "loss": 0.0592, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 1.830769230769231, | |
| "grad_norm": 0.33512808523002696, | |
| "learning_rate": 1.1337200895195688e-05, | |
| "loss": 0.0697, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.8323076923076922, | |
| "grad_norm": 0.35007766877677027, | |
| "learning_rate": 1.1325225389442278e-05, | |
| "loss": 0.0667, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 1.833846153846154, | |
| "grad_norm": 0.34216015472816874, | |
| "learning_rate": 1.1313247948859168e-05, | |
| "loss": 0.0769, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.8353846153846154, | |
| "grad_norm": 0.34775267785403957, | |
| "learning_rate": 1.1301268590933434e-05, | |
| "loss": 0.0663, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 1.8369230769230769, | |
| "grad_norm": 0.33480692927199907, | |
| "learning_rate": 1.1289287333154941e-05, | |
| "loss": 0.0762, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.8384615384615386, | |
| "grad_norm": 0.32637056156167593, | |
| "learning_rate": 1.1277304193016332e-05, | |
| "loss": 0.0713, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.36829791104284954, | |
| "learning_rate": 1.1265319188012995e-05, | |
| "loss": 0.0737, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.8415384615384616, | |
| "grad_norm": 0.36062035876290466, | |
| "learning_rate": 1.1253332335643043e-05, | |
| "loss": 0.0689, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 1.843076923076923, | |
| "grad_norm": 0.3711995102972711, | |
| "learning_rate": 1.124134365340729e-05, | |
| "loss": 0.0642, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.8446153846153845, | |
| "grad_norm": 0.30208317420183395, | |
| "learning_rate": 1.1229353158809216e-05, | |
| "loss": 0.0643, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 0.33621334713281376, | |
| "learning_rate": 1.1217360869354948e-05, | |
| "loss": 0.0716, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.8476923076923077, | |
| "grad_norm": 0.4109500586936904, | |
| "learning_rate": 1.1205366802553231e-05, | |
| "loss": 0.0627, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 1.8492307692307692, | |
| "grad_norm": 0.3355086005987176, | |
| "learning_rate": 1.1193370975915414e-05, | |
| "loss": 0.0651, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.8507692307692307, | |
| "grad_norm": 0.33311821347747733, | |
| "learning_rate": 1.118137340695541e-05, | |
| "loss": 0.0707, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 1.8523076923076922, | |
| "grad_norm": 0.3783217276792658, | |
| "learning_rate": 1.1169374113189669e-05, | |
| "loss": 0.0688, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.853846153846154, | |
| "grad_norm": 0.43132614741663744, | |
| "learning_rate": 1.1157373112137171e-05, | |
| "loss": 0.0799, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.8553846153846154, | |
| "grad_norm": 0.2922605690189237, | |
| "learning_rate": 1.1145370421319377e-05, | |
| "loss": 0.0592, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.856923076923077, | |
| "grad_norm": 0.3223022612523878, | |
| "learning_rate": 1.1133366058260232e-05, | |
| "loss": 0.0647, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 1.8584615384615386, | |
| "grad_norm": 0.3018670306666712, | |
| "learning_rate": 1.11213600404861e-05, | |
| "loss": 0.0685, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.8599999999999999, | |
| "grad_norm": 0.3889446684181748, | |
| "learning_rate": 1.1109352385525782e-05, | |
| "loss": 0.0766, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 1.8615384615384616, | |
| "grad_norm": 0.4477268822990713, | |
| "learning_rate": 1.1097343110910452e-05, | |
| "loss": 0.0645, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.863076923076923, | |
| "grad_norm": 0.33752078106966593, | |
| "learning_rate": 1.1085332234173664e-05, | |
| "loss": 0.0724, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 1.8646153846153846, | |
| "grad_norm": 0.33949138455564243, | |
| "learning_rate": 1.1073319772851299e-05, | |
| "loss": 0.0635, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.8661538461538463, | |
| "grad_norm": 0.339715631924927, | |
| "learning_rate": 1.106130574448156e-05, | |
| "loss": 0.0708, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 1.8676923076923075, | |
| "grad_norm": 0.38187248950026703, | |
| "learning_rate": 1.1049290166604928e-05, | |
| "loss": 0.0648, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.8692307692307693, | |
| "grad_norm": 0.4755273814617505, | |
| "learning_rate": 1.1037273056764157e-05, | |
| "loss": 0.0676, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.8707692307692307, | |
| "grad_norm": 0.35341205325615144, | |
| "learning_rate": 1.1025254432504234e-05, | |
| "loss": 0.0742, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.8723076923076922, | |
| "grad_norm": 0.3254062177799031, | |
| "learning_rate": 1.1013234311372353e-05, | |
| "loss": 0.0649, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 1.873846153846154, | |
| "grad_norm": 0.31246579893201815, | |
| "learning_rate": 1.1001212710917897e-05, | |
| "loss": 0.0649, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.8753846153846154, | |
| "grad_norm": 0.33928603890752973, | |
| "learning_rate": 1.0989189648692408e-05, | |
| "loss": 0.07, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 1.876923076923077, | |
| "grad_norm": 0.3450403297222314, | |
| "learning_rate": 1.0977165142249566e-05, | |
| "loss": 0.067, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.8784615384615386, | |
| "grad_norm": 0.3423853230449218, | |
| "learning_rate": 1.0965139209145153e-05, | |
| "loss": 0.0702, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.32174442781609414, | |
| "learning_rate": 1.0953111866937038e-05, | |
| "loss": 0.0701, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.8815384615384616, | |
| "grad_norm": 0.3676532027177272, | |
| "learning_rate": 1.0941083133185146e-05, | |
| "loss": 0.0705, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 1.883076923076923, | |
| "grad_norm": 0.41997872234324835, | |
| "learning_rate": 1.0929053025451432e-05, | |
| "loss": 0.0659, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.8846153846153846, | |
| "grad_norm": 0.4110637997348029, | |
| "learning_rate": 1.0917021561299864e-05, | |
| "loss": 0.0729, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.8861538461538463, | |
| "grad_norm": 0.38396879751089963, | |
| "learning_rate": 1.090498875829638e-05, | |
| "loss": 0.0786, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.8876923076923076, | |
| "grad_norm": 0.3530760565665137, | |
| "learning_rate": 1.089295463400888e-05, | |
| "loss": 0.0736, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 1.8892307692307693, | |
| "grad_norm": 0.3828535267508954, | |
| "learning_rate": 1.0880919206007193e-05, | |
| "loss": 0.0756, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.8907692307692308, | |
| "grad_norm": 0.4620093651236917, | |
| "learning_rate": 1.0868882491863048e-05, | |
| "loss": 0.0672, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 1.8923076923076922, | |
| "grad_norm": 0.34394347660813923, | |
| "learning_rate": 1.0856844509150056e-05, | |
| "loss": 0.0628, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.893846153846154, | |
| "grad_norm": 0.34240923368033127, | |
| "learning_rate": 1.0844805275443673e-05, | |
| "loss": 0.0663, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 1.8953846153846152, | |
| "grad_norm": 0.4216830435498585, | |
| "learning_rate": 1.0832764808321186e-05, | |
| "loss": 0.0731, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.896923076923077, | |
| "grad_norm": 0.34817819208217915, | |
| "learning_rate": 1.0820723125361685e-05, | |
| "loss": 0.0696, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 1.8984615384615384, | |
| "grad_norm": 0.3361061093793128, | |
| "learning_rate": 1.0808680244146035e-05, | |
| "loss": 0.0608, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 0.3127731515392646, | |
| "learning_rate": 1.0796636182256846e-05, | |
| "loss": 0.0654, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.9015384615384616, | |
| "grad_norm": 0.34974984864646785, | |
| "learning_rate": 1.0784590957278452e-05, | |
| "loss": 0.0696, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.903076923076923, | |
| "grad_norm": 0.3241892888461775, | |
| "learning_rate": 1.077254458679689e-05, | |
| "loss": 0.0615, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 1.9046153846153846, | |
| "grad_norm": 0.37471636624302423, | |
| "learning_rate": 1.0760497088399863e-05, | |
| "loss": 0.0657, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.9061538461538463, | |
| "grad_norm": 0.4476088794896166, | |
| "learning_rate": 1.074844847967673e-05, | |
| "loss": 0.0696, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 1.9076923076923076, | |
| "grad_norm": 0.35752628031586536, | |
| "learning_rate": 1.0736398778218458e-05, | |
| "loss": 0.0662, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.9092307692307693, | |
| "grad_norm": 0.40745054566528704, | |
| "learning_rate": 1.0724348001617626e-05, | |
| "loss": 0.0688, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 1.9107692307692308, | |
| "grad_norm": 0.39673775592008276, | |
| "learning_rate": 1.0712296167468366e-05, | |
| "loss": 0.0636, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.9123076923076923, | |
| "grad_norm": 0.36255910467507374, | |
| "learning_rate": 1.0700243293366365e-05, | |
| "loss": 0.0606, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 1.913846153846154, | |
| "grad_norm": 0.5136376481460118, | |
| "learning_rate": 1.0688189396908826e-05, | |
| "loss": 0.0645, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.9153846153846152, | |
| "grad_norm": 0.3099567930694205, | |
| "learning_rate": 1.0676134495694439e-05, | |
| "loss": 0.0644, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.916923076923077, | |
| "grad_norm": 0.37000890844695966, | |
| "learning_rate": 1.0664078607323367e-05, | |
| "loss": 0.0687, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.9184615384615384, | |
| "grad_norm": 0.34210135022540517, | |
| "learning_rate": 1.0652021749397216e-05, | |
| "loss": 0.0699, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.3609562310788875, | |
| "learning_rate": 1.0639963939519005e-05, | |
| "loss": 0.0738, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.9215384615384616, | |
| "grad_norm": 0.3700975852964543, | |
| "learning_rate": 1.0627905195293135e-05, | |
| "loss": 0.0664, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 0.3859166868285145, | |
| "learning_rate": 1.0615845534325384e-05, | |
| "loss": 0.0713, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.9246153846153846, | |
| "grad_norm": 0.4115264368388931, | |
| "learning_rate": 1.0603784974222862e-05, | |
| "loss": 0.0705, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 1.926153846153846, | |
| "grad_norm": 0.3145083279396702, | |
| "learning_rate": 1.0591723532593992e-05, | |
| "loss": 0.0604, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.9276923076923076, | |
| "grad_norm": 0.3197633328292509, | |
| "learning_rate": 1.0579661227048484e-05, | |
| "loss": 0.0604, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 1.9292307692307693, | |
| "grad_norm": 0.3622577046638927, | |
| "learning_rate": 1.056759807519731e-05, | |
| "loss": 0.0716, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.9307692307692308, | |
| "grad_norm": 0.3497451203917846, | |
| "learning_rate": 1.0555534094652675e-05, | |
| "loss": 0.0673, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.9323076923076923, | |
| "grad_norm": 0.3320313053856611, | |
| "learning_rate": 1.0543469303028002e-05, | |
| "loss": 0.0635, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.933846153846154, | |
| "grad_norm": 0.3233116362666333, | |
| "learning_rate": 1.0531403717937888e-05, | |
| "loss": 0.062, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 1.9353846153846153, | |
| "grad_norm": 0.3685802421725291, | |
| "learning_rate": 1.0519337356998094e-05, | |
| "loss": 0.0692, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.936923076923077, | |
| "grad_norm": 0.336210753592891, | |
| "learning_rate": 1.0507270237825513e-05, | |
| "loss": 0.0675, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 1.9384615384615385, | |
| "grad_norm": 0.3289919747166384, | |
| "learning_rate": 1.0495202378038144e-05, | |
| "loss": 0.0657, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 0.33651073591370484, | |
| "learning_rate": 1.0483133795255072e-05, | |
| "loss": 0.0717, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 1.9415384615384617, | |
| "grad_norm": 0.3648322054454426, | |
| "learning_rate": 1.0471064507096427e-05, | |
| "loss": 0.0793, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.943076923076923, | |
| "grad_norm": 0.33381701009336806, | |
| "learning_rate": 1.045899453118338e-05, | |
| "loss": 0.0617, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 1.9446153846153846, | |
| "grad_norm": 0.3641612409279995, | |
| "learning_rate": 1.0446923885138101e-05, | |
| "loss": 0.0597, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.9461538461538461, | |
| "grad_norm": 0.34585249190313433, | |
| "learning_rate": 1.0434852586583737e-05, | |
| "loss": 0.0674, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.9476923076923076, | |
| "grad_norm": 0.3289388167113336, | |
| "learning_rate": 1.0422780653144392e-05, | |
| "loss": 0.0735, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.9492307692307693, | |
| "grad_norm": 0.3887566915602357, | |
| "learning_rate": 1.0410708102445091e-05, | |
| "loss": 0.0766, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 1.9507692307692308, | |
| "grad_norm": 0.30555149229418166, | |
| "learning_rate": 1.0398634952111766e-05, | |
| "loss": 0.0718, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.9523076923076923, | |
| "grad_norm": 0.32317250467343567, | |
| "learning_rate": 1.0386561219771222e-05, | |
| "loss": 0.0652, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 1.953846153846154, | |
| "grad_norm": 0.3006439298211834, | |
| "learning_rate": 1.0374486923051117e-05, | |
| "loss": 0.0623, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.9553846153846153, | |
| "grad_norm": 0.33386303419709507, | |
| "learning_rate": 1.0362412079579925e-05, | |
| "loss": 0.0701, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 1.956923076923077, | |
| "grad_norm": 0.33099092128522206, | |
| "learning_rate": 1.0350336706986925e-05, | |
| "loss": 0.068, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.9584615384615385, | |
| "grad_norm": 0.3262986897907234, | |
| "learning_rate": 1.0338260822902166e-05, | |
| "loss": 0.0614, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.34485599175028897, | |
| "learning_rate": 1.0326184444956449e-05, | |
| "loss": 0.0658, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.9615384615384617, | |
| "grad_norm": 0.31792277168038097, | |
| "learning_rate": 1.0314107590781284e-05, | |
| "loss": 0.0574, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.963076923076923, | |
| "grad_norm": 0.3208911821070251, | |
| "learning_rate": 1.030203027800889e-05, | |
| "loss": 0.0633, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.9646153846153847, | |
| "grad_norm": 0.33055045597617766, | |
| "learning_rate": 1.0289952524272147e-05, | |
| "loss": 0.0706, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 1.9661538461538461, | |
| "grad_norm": 0.34088210301412697, | |
| "learning_rate": 1.027787434720458e-05, | |
| "loss": 0.0739, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.9676923076923076, | |
| "grad_norm": 0.32073888049730687, | |
| "learning_rate": 1.0265795764440335e-05, | |
| "loss": 0.0651, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 1.9692307692307693, | |
| "grad_norm": 0.3076473967656385, | |
| "learning_rate": 1.025371679361415e-05, | |
| "loss": 0.0683, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.9707692307692306, | |
| "grad_norm": 0.3621459788069735, | |
| "learning_rate": 1.0241637452361323e-05, | |
| "loss": 0.0677, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 1.9723076923076923, | |
| "grad_norm": 0.3573542495717642, | |
| "learning_rate": 1.0229557758317703e-05, | |
| "loss": 0.0679, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.9738461538461538, | |
| "grad_norm": 0.3483850527238064, | |
| "learning_rate": 1.0217477729119648e-05, | |
| "loss": 0.074, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 1.9753846153846153, | |
| "grad_norm": 0.3063790927358412, | |
| "learning_rate": 1.0205397382404006e-05, | |
| "loss": 0.0652, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.976923076923077, | |
| "grad_norm": 0.34619566294720616, | |
| "learning_rate": 1.0193316735808085e-05, | |
| "loss": 0.0676, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.9784615384615385, | |
| "grad_norm": 0.46164644987132686, | |
| "learning_rate": 1.018123580696964e-05, | |
| "loss": 0.0711, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 0.33352482622772156, | |
| "learning_rate": 1.0169154613526831e-05, | |
| "loss": 0.0696, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 1.9815384615384617, | |
| "grad_norm": 0.3101921292214142, | |
| "learning_rate": 1.0157073173118207e-05, | |
| "loss": 0.0647, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.983076923076923, | |
| "grad_norm": 0.31015657112545686, | |
| "learning_rate": 1.0144991503382676e-05, | |
| "loss": 0.0614, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 1.9846153846153847, | |
| "grad_norm": 0.3503219066829283, | |
| "learning_rate": 1.0132909621959482e-05, | |
| "loss": 0.077, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.9861538461538462, | |
| "grad_norm": 0.2992678727460164, | |
| "learning_rate": 1.0120827546488175e-05, | |
| "loss": 0.065, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 1.9876923076923076, | |
| "grad_norm": 0.3232588558361219, | |
| "learning_rate": 1.0108745294608595e-05, | |
| "loss": 0.0596, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.9892307692307694, | |
| "grad_norm": 0.3219005317029137, | |
| "learning_rate": 1.0096662883960833e-05, | |
| "loss": 0.0693, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 1.9907692307692306, | |
| "grad_norm": 0.36479544578271617, | |
| "learning_rate": 1.0084580332185214e-05, | |
| "loss": 0.071, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.9923076923076923, | |
| "grad_norm": 0.36208947868270336, | |
| "learning_rate": 1.0072497656922266e-05, | |
| "loss": 0.0697, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.9938461538461538, | |
| "grad_norm": 0.374662600377983, | |
| "learning_rate": 1.0060414875812709e-05, | |
| "loss": 0.0724, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.9953846153846153, | |
| "grad_norm": 0.4368778771016829, | |
| "learning_rate": 1.0048332006497406e-05, | |
| "loss": 0.0777, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 1.996923076923077, | |
| "grad_norm": 0.40482597277564725, | |
| "learning_rate": 1.003624906661735e-05, | |
| "loss": 0.0672, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.9984615384615385, | |
| "grad_norm": 0.3944268530980909, | |
| "learning_rate": 1.0024166073813634e-05, | |
| "loss": 0.0619, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.3312157049217859, | |
| "learning_rate": 1.0012083045727445e-05, | |
| "loss": 0.0633, | |
| "step": 1300 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 2600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 263636178075648.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |