| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.993178717598909, |
| "eval_steps": 500, |
| "global_step": 915, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005457025920873124, |
| "grad_norm": 8.40548507464911, |
| "learning_rate": 4.347826086956522e-07, |
| "loss": 1.4358, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.010914051841746248, |
| "grad_norm": 8.27768117898077, |
| "learning_rate": 8.695652173913044e-07, |
| "loss": 1.4226, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01637107776261937, |
| "grad_norm": 8.34817859423518, |
| "learning_rate": 1.3043478260869566e-06, |
| "loss": 1.4277, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.021828103683492497, |
| "grad_norm": 8.241415349829516, |
| "learning_rate": 1.7391304347826088e-06, |
| "loss": 1.4422, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.027285129604365622, |
| "grad_norm": 7.847219720997312, |
| "learning_rate": 2.173913043478261e-06, |
| "loss": 1.4021, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03274215552523874, |
| "grad_norm": 6.490709031031551, |
| "learning_rate": 2.6086956521739132e-06, |
| "loss": 1.385, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03819918144611187, |
| "grad_norm": 5.970054048772479, |
| "learning_rate": 3.043478260869566e-06, |
| "loss": 1.3497, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.04365620736698499, |
| "grad_norm": 3.386624409374888, |
| "learning_rate": 3.4782608695652175e-06, |
| "loss": 1.3161, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.04911323328785812, |
| "grad_norm": 2.764488361103231, |
| "learning_rate": 3.91304347826087e-06, |
| "loss": 1.3038, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.054570259208731244, |
| "grad_norm": 4.202620684729429, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 1.302, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06002728512960437, |
| "grad_norm": 6.086560136767305, |
| "learning_rate": 4.782608695652174e-06, |
| "loss": 1.3128, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.06548431105047749, |
| "grad_norm": 6.073370326241115, |
| "learning_rate": 5.2173913043478265e-06, |
| "loss": 1.3088, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.07094133697135062, |
| "grad_norm": 5.6470857345830945, |
| "learning_rate": 5.652173913043479e-06, |
| "loss": 1.2941, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.07639836289222374, |
| "grad_norm": 4.138470108037422, |
| "learning_rate": 6.086956521739132e-06, |
| "loss": 1.2619, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.08185538881309687, |
| "grad_norm": 3.7821082022725294, |
| "learning_rate": 6.521739130434783e-06, |
| "loss": 1.2451, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08731241473396999, |
| "grad_norm": 2.805651263629899, |
| "learning_rate": 6.956521739130435e-06, |
| "loss": 1.229, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0927694406548431, |
| "grad_norm": 1.760678215208644, |
| "learning_rate": 7.391304347826087e-06, |
| "loss": 1.1933, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.09822646657571624, |
| "grad_norm": 1.471427773413438, |
| "learning_rate": 7.82608695652174e-06, |
| "loss": 1.2011, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.10368349249658936, |
| "grad_norm": 1.6385745213912601, |
| "learning_rate": 8.260869565217392e-06, |
| "loss": 1.1761, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.10914051841746249, |
| "grad_norm": 1.8107444245592996, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 1.1718, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1145975443383356, |
| "grad_norm": 1.5828548709190915, |
| "learning_rate": 9.130434782608697e-06, |
| "loss": 1.1686, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.12005457025920874, |
| "grad_norm": 1.0833660495626252, |
| "learning_rate": 9.565217391304349e-06, |
| "loss": 1.1517, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.12551159618008187, |
| "grad_norm": 1.0067497148092934, |
| "learning_rate": 1e-05, |
| "loss": 1.1596, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.13096862210095497, |
| "grad_norm": 1.1511016542466355, |
| "learning_rate": 1.0434782608695653e-05, |
| "loss": 1.1185, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1364256480218281, |
| "grad_norm": 1.1129912962754631, |
| "learning_rate": 1.0869565217391305e-05, |
| "loss": 1.1044, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.14188267394270124, |
| "grad_norm": 0.982424004614232, |
| "learning_rate": 1.1304347826086957e-05, |
| "loss": 1.1154, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.14733969986357434, |
| "grad_norm": 0.8935088021503061, |
| "learning_rate": 1.1739130434782611e-05, |
| "loss": 1.0855, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.15279672578444747, |
| "grad_norm": 0.9536160463503343, |
| "learning_rate": 1.2173913043478263e-05, |
| "loss": 1.0844, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.1582537517053206, |
| "grad_norm": 0.7998848521277707, |
| "learning_rate": 1.2608695652173915e-05, |
| "loss": 1.0866, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.16371077762619374, |
| "grad_norm": 0.7673685776489387, |
| "learning_rate": 1.3043478260869566e-05, |
| "loss": 1.1062, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.16916780354706684, |
| "grad_norm": 0.9634656988616612, |
| "learning_rate": 1.3478260869565218e-05, |
| "loss": 1.0713, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.17462482946793997, |
| "grad_norm": 0.7656758207809929, |
| "learning_rate": 1.391304347826087e-05, |
| "loss": 1.0804, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1800818553888131, |
| "grad_norm": 0.8925856020905516, |
| "learning_rate": 1.4347826086956522e-05, |
| "loss": 1.0791, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.1855388813096862, |
| "grad_norm": 0.9604475518815286, |
| "learning_rate": 1.4782608695652174e-05, |
| "loss": 1.0743, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.19099590723055934, |
| "grad_norm": 0.6673627257965253, |
| "learning_rate": 1.5217391304347828e-05, |
| "loss": 1.0827, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.19645293315143247, |
| "grad_norm": 0.6466823830941191, |
| "learning_rate": 1.565217391304348e-05, |
| "loss": 1.074, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.2019099590723056, |
| "grad_norm": 0.8141432611114343, |
| "learning_rate": 1.6086956521739132e-05, |
| "loss": 1.0935, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2073669849931787, |
| "grad_norm": 0.7303516492961905, |
| "learning_rate": 1.6521739130434785e-05, |
| "loss": 1.0733, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.21282401091405184, |
| "grad_norm": 0.6688305682070583, |
| "learning_rate": 1.6956521739130437e-05, |
| "loss": 1.0424, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.21828103683492497, |
| "grad_norm": 0.6750314584251758, |
| "learning_rate": 1.739130434782609e-05, |
| "loss": 1.039, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.22373806275579808, |
| "grad_norm": 0.6414610193667182, |
| "learning_rate": 1.782608695652174e-05, |
| "loss": 1.0673, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.2291950886766712, |
| "grad_norm": 0.6004774591873056, |
| "learning_rate": 1.8260869565217393e-05, |
| "loss": 1.0472, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.23465211459754434, |
| "grad_norm": 1.001192284108013, |
| "learning_rate": 1.8695652173913045e-05, |
| "loss": 1.0668, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.24010914051841747, |
| "grad_norm": 1.2347279701133878, |
| "learning_rate": 1.9130434782608697e-05, |
| "loss": 1.0515, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.24556616643929058, |
| "grad_norm": 0.7545298666968991, |
| "learning_rate": 1.956521739130435e-05, |
| "loss": 1.0118, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.25102319236016374, |
| "grad_norm": 1.7910029989682172, |
| "learning_rate": 2e-05, |
| "loss": 1.0403, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.25648021828103684, |
| "grad_norm": 0.6646975296811077, |
| "learning_rate": 2.0434782608695657e-05, |
| "loss": 1.0404, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.26193724420190995, |
| "grad_norm": 1.8544620284122977, |
| "learning_rate": 2.0869565217391306e-05, |
| "loss": 1.0452, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2673942701227831, |
| "grad_norm": 0.9159008525844143, |
| "learning_rate": 2.1304347826086958e-05, |
| "loss": 1.0352, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.2728512960436562, |
| "grad_norm": 1.8651550264621868, |
| "learning_rate": 2.173913043478261e-05, |
| "loss": 1.0478, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2783083219645293, |
| "grad_norm": 1.2948016317604922, |
| "learning_rate": 2.2173913043478262e-05, |
| "loss": 1.0309, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2837653478854025, |
| "grad_norm": 1.5798033957703332, |
| "learning_rate": 2.2608695652173914e-05, |
| "loss": 1.0342, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2892223738062756, |
| "grad_norm": 1.6098370993374367, |
| "learning_rate": 2.3043478260869567e-05, |
| "loss": 1.0329, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.2946793997271487, |
| "grad_norm": 1.2453365308049091, |
| "learning_rate": 2.3478260869565222e-05, |
| "loss": 1.0392, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.30013642564802184, |
| "grad_norm": 1.5844016545843662, |
| "learning_rate": 2.391304347826087e-05, |
| "loss": 1.0125, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.30559345156889495, |
| "grad_norm": 1.4627626883931912, |
| "learning_rate": 2.4347826086956526e-05, |
| "loss": 1.0499, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.31105047748976805, |
| "grad_norm": 1.4143561739785215, |
| "learning_rate": 2.4782608695652175e-05, |
| "loss": 1.0281, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3165075034106412, |
| "grad_norm": 1.4070850348598627, |
| "learning_rate": 2.521739130434783e-05, |
| "loss": 1.0195, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3219645293315143, |
| "grad_norm": 1.078315144035433, |
| "learning_rate": 2.565217391304348e-05, |
| "loss": 1.0216, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.3274215552523875, |
| "grad_norm": 1.1643154896766894, |
| "learning_rate": 2.608695652173913e-05, |
| "loss": 1.0097, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3328785811732606, |
| "grad_norm": 1.2461170132940949, |
| "learning_rate": 2.6521739130434784e-05, |
| "loss": 1.0209, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.3383356070941337, |
| "grad_norm": 2.008669853014052, |
| "learning_rate": 2.6956521739130436e-05, |
| "loss": 1.0392, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.34379263301500684, |
| "grad_norm": 1.1644314565358613, |
| "learning_rate": 2.739130434782609e-05, |
| "loss": 1.0117, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.34924965893587995, |
| "grad_norm": 1.799658123734067, |
| "learning_rate": 2.782608695652174e-05, |
| "loss": 1.0083, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.35470668485675305, |
| "grad_norm": 1.3978781458275338, |
| "learning_rate": 2.8260869565217396e-05, |
| "loss": 1.0082, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3601637107776262, |
| "grad_norm": 2.0511544765413583, |
| "learning_rate": 2.8695652173913044e-05, |
| "loss": 1.0248, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3656207366984993, |
| "grad_norm": 1.2539457961483822, |
| "learning_rate": 2.91304347826087e-05, |
| "loss": 1.021, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3710777626193724, |
| "grad_norm": 2.2336673036604777, |
| "learning_rate": 2.956521739130435e-05, |
| "loss": 0.9919, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3765347885402456, |
| "grad_norm": 1.9545303539529588, |
| "learning_rate": 3.0000000000000004e-05, |
| "loss": 1.0026, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3819918144611187, |
| "grad_norm": 1.715545568872597, |
| "learning_rate": 3.0434782608695656e-05, |
| "loss": 1.015, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3874488403819918, |
| "grad_norm": 1.9877889103452786, |
| "learning_rate": 3.086956521739131e-05, |
| "loss": 1.0161, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.39290586630286495, |
| "grad_norm": 1.403829736078723, |
| "learning_rate": 3.130434782608696e-05, |
| "loss": 1.0094, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.39836289222373805, |
| "grad_norm": 1.2911183724867115, |
| "learning_rate": 3.173913043478261e-05, |
| "loss": 1.0188, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.4038199181446112, |
| "grad_norm": 1.725635724089668, |
| "learning_rate": 3.2173913043478265e-05, |
| "loss": 1.0233, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.4092769440654843, |
| "grad_norm": 1.5524172183602378, |
| "learning_rate": 3.260869565217392e-05, |
| "loss": 1.0153, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4147339699863574, |
| "grad_norm": 1.6937536176639882, |
| "learning_rate": 3.304347826086957e-05, |
| "loss": 1.0087, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4201909959072306, |
| "grad_norm": 1.031762197478065, |
| "learning_rate": 3.347826086956522e-05, |
| "loss": 0.996, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.4256480218281037, |
| "grad_norm": 1.2214301629403135, |
| "learning_rate": 3.391304347826087e-05, |
| "loss": 0.9952, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.4311050477489768, |
| "grad_norm": 1.5276490268514877, |
| "learning_rate": 3.4347826086956526e-05, |
| "loss": 1.0261, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.43656207366984995, |
| "grad_norm": 1.5857383204051316, |
| "learning_rate": 3.478260869565218e-05, |
| "loss": 1.0023, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.44201909959072305, |
| "grad_norm": 1.7303601564744104, |
| "learning_rate": 3.521739130434783e-05, |
| "loss": 0.9994, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.44747612551159616, |
| "grad_norm": 1.0635597436256417, |
| "learning_rate": 3.565217391304348e-05, |
| "loss": 1.0098, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.4529331514324693, |
| "grad_norm": 3.079665829601424, |
| "learning_rate": 3.6086956521739134e-05, |
| "loss": 1.0082, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.4583901773533424, |
| "grad_norm": 1.8491839328742012, |
| "learning_rate": 3.6521739130434786e-05, |
| "loss": 0.9895, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4638472032742155, |
| "grad_norm": 3.00445118120071, |
| "learning_rate": 3.695652173913044e-05, |
| "loss": 1.0142, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4693042291950887, |
| "grad_norm": 2.9140568598663514, |
| "learning_rate": 3.739130434782609e-05, |
| "loss": 1.0107, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.4747612551159618, |
| "grad_norm": 2.411767008345035, |
| "learning_rate": 3.782608695652174e-05, |
| "loss": 1.017, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.48021828103683495, |
| "grad_norm": 2.6524907076537505, |
| "learning_rate": 3.8260869565217395e-05, |
| "loss": 1.0203, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.48567530695770805, |
| "grad_norm": 2.133767717181375, |
| "learning_rate": 3.869565217391305e-05, |
| "loss": 0.9971, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.49113233287858116, |
| "grad_norm": 2.281609780939416, |
| "learning_rate": 3.91304347826087e-05, |
| "loss": 1.0142, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4965893587994543, |
| "grad_norm": 1.6150193464799012, |
| "learning_rate": 3.956521739130435e-05, |
| "loss": 0.9959, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.5020463847203275, |
| "grad_norm": 2.1774944953837663, |
| "learning_rate": 4e-05, |
| "loss": 1.0178, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5075034106412005, |
| "grad_norm": 1.8522257265922395, |
| "learning_rate": 3.9999854286581316e-05, |
| "loss": 0.9939, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.5129604365620737, |
| "grad_norm": 2.0976106058434145, |
| "learning_rate": 3.999941714844849e-05, |
| "loss": 0.9989, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5184174624829468, |
| "grad_norm": 2.033288450566452, |
| "learning_rate": 3.999868859197122e-05, |
| "loss": 1.014, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5238744884038199, |
| "grad_norm": 1.64201358795121, |
| "learning_rate": 3.999766862776556e-05, |
| "loss": 0.9962, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.529331514324693, |
| "grad_norm": 1.9801453443085917, |
| "learning_rate": 3.999635727069373e-05, |
| "loss": 0.9898, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5347885402455662, |
| "grad_norm": 1.6558664282997086, |
| "learning_rate": 3.9994754539863984e-05, |
| "loss": 0.9937, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5402455661664393, |
| "grad_norm": 1.702235161068369, |
| "learning_rate": 3.999286045863026e-05, |
| "loss": 0.9922, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5457025920873124, |
| "grad_norm": 1.6798920660304613, |
| "learning_rate": 3.999067505459185e-05, |
| "loss": 0.9922, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5511596180081856, |
| "grad_norm": 1.14927571951454, |
| "learning_rate": 3.998819835959304e-05, |
| "loss": 0.959, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5566166439290586, |
| "grad_norm": 1.994460504643915, |
| "learning_rate": 3.998543040972259e-05, |
| "loss": 0.9896, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.5620736698499318, |
| "grad_norm": 2.225475662267806, |
| "learning_rate": 3.998237124531324e-05, |
| "loss": 0.9838, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.567530695770805, |
| "grad_norm": 0.9844824291875355, |
| "learning_rate": 3.9979020910941135e-05, |
| "loss": 0.9851, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.572987721691678, |
| "grad_norm": 2.4932772876759817, |
| "learning_rate": 3.9975379455425126e-05, |
| "loss": 0.9843, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5784447476125512, |
| "grad_norm": 1.8029107577553645, |
| "learning_rate": 3.9971446931826116e-05, |
| "loss": 0.9991, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5839017735334243, |
| "grad_norm": 2.290841965389965, |
| "learning_rate": 3.996722339744625e-05, |
| "loss": 1.0061, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.5893587994542974, |
| "grad_norm": 1.8729223351387532, |
| "learning_rate": 3.9962708913828086e-05, |
| "loss": 0.9968, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.5948158253751705, |
| "grad_norm": 2.1962460148515826, |
| "learning_rate": 3.995790354675372e-05, |
| "loss": 1.0082, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.6002728512960437, |
| "grad_norm": 1.9722134818162933, |
| "learning_rate": 3.995280736624378e-05, |
| "loss": 0.9975, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6057298772169167, |
| "grad_norm": 2.0118864615891394, |
| "learning_rate": 3.994742044655647e-05, |
| "loss": 0.9889, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.6111869031377899, |
| "grad_norm": 2.090834428592416, |
| "learning_rate": 3.994174286618643e-05, |
| "loss": 1.0247, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.616643929058663, |
| "grad_norm": 1.5840918067308427, |
| "learning_rate": 3.993577470786363e-05, |
| "loss": 0.9859, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6221009549795361, |
| "grad_norm": 1.4515746682829112, |
| "learning_rate": 3.9929516058552143e-05, |
| "loss": 0.9761, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6275579809004093, |
| "grad_norm": 1.504559871894639, |
| "learning_rate": 3.992296700944889e-05, |
| "loss": 0.975, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6330150068212824, |
| "grad_norm": 1.5927280628411824, |
| "learning_rate": 3.99161276559823e-05, |
| "loss": 0.9979, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6384720327421555, |
| "grad_norm": 1.60127479724257, |
| "learning_rate": 3.990899809781093e-05, |
| "loss": 0.9743, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6439290586630286, |
| "grad_norm": 1.991280239304608, |
| "learning_rate": 3.990157843882202e-05, |
| "loss": 0.981, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6493860845839018, |
| "grad_norm": 1.1853516129644286, |
| "learning_rate": 3.989386878712994e-05, |
| "loss": 0.9767, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.654843110504775, |
| "grad_norm": 1.3174203496686017, |
| "learning_rate": 3.9885869255074674e-05, |
| "loss": 0.9904, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.660300136425648, |
| "grad_norm": 1.5305788456446745, |
| "learning_rate": 3.987757995922014e-05, |
| "loss": 0.9925, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.6657571623465212, |
| "grad_norm": 1.881540433498657, |
| "learning_rate": 3.9869001020352484e-05, |
| "loss": 1.0114, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.6712141882673943, |
| "grad_norm": 1.1981839977942124, |
| "learning_rate": 3.9860132563478384e-05, |
| "loss": 0.9883, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.6766712141882674, |
| "grad_norm": 2.0059502915759264, |
| "learning_rate": 3.985097471782313e-05, |
| "loss": 0.9939, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.6821282401091405, |
| "grad_norm": 1.4590850776551136, |
| "learning_rate": 3.984152761682884e-05, |
| "loss": 0.9831, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.6875852660300137, |
| "grad_norm": 1.1239926129461735, |
| "learning_rate": 3.983179139815245e-05, |
| "loss": 1.0005, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.6930422919508867, |
| "grad_norm": 2.1543984625500836, |
| "learning_rate": 3.982176620366372e-05, |
| "loss": 0.9639, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.6984993178717599, |
| "grad_norm": 1.8429479992055005, |
| "learning_rate": 3.98114521794432e-05, |
| "loss": 0.9941, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.703956343792633, |
| "grad_norm": 1.0868587312281466, |
| "learning_rate": 3.9800849475780054e-05, |
| "loss": 1.0049, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.7094133697135061, |
| "grad_norm": 2.118679896726006, |
| "learning_rate": 3.97899582471699e-05, |
| "loss": 0.9724, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7148703956343793, |
| "grad_norm": 1.1508635163751133, |
| "learning_rate": 3.977877865231256e-05, |
| "loss": 0.9917, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.7203274215552524, |
| "grad_norm": 2.0430027109665905, |
| "learning_rate": 3.976731085410974e-05, |
| "loss": 0.9862, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7257844474761255, |
| "grad_norm": 1.4515209852628121, |
| "learning_rate": 3.975555501966263e-05, |
| "loss": 0.9895, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.7312414733969986, |
| "grad_norm": 1.6287273596496654, |
| "learning_rate": 3.974351132026952e-05, |
| "loss": 0.9763, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.7366984993178718, |
| "grad_norm": 1.5473296478105147, |
| "learning_rate": 3.973117993142327e-05, |
| "loss": 0.9817, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7421555252387448, |
| "grad_norm": 1.403531094420758, |
| "learning_rate": 3.9718561032808774e-05, |
| "loss": 0.9928, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.747612551159618, |
| "grad_norm": 1.2777940256720086, |
| "learning_rate": 3.97056548083003e-05, |
| "loss": 0.9654, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.7530695770804912, |
| "grad_norm": 2.2605652171854183, |
| "learning_rate": 3.9692461445958876e-05, |
| "loss": 0.98, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.7585266030013642, |
| "grad_norm": 1.5555447148375732, |
| "learning_rate": 3.967898113802948e-05, |
| "loss": 0.973, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.7639836289222374, |
| "grad_norm": 1.5712062419592667, |
| "learning_rate": 3.9665214080938294e-05, |
| "loss": 0.9837, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7694406548431105, |
| "grad_norm": 1.87211562183804, |
| "learning_rate": 3.9651160475289805e-05, |
| "loss": 1.0069, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.7748976807639836, |
| "grad_norm": 1.684924662180551, |
| "learning_rate": 3.963682052586392e-05, |
| "loss": 0.9854, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.7803547066848567, |
| "grad_norm": 1.1116249825439455, |
| "learning_rate": 3.962219444161294e-05, |
| "loss": 0.9808, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.7858117326057299, |
| "grad_norm": 1.794929259692561, |
| "learning_rate": 3.960728243565853e-05, |
| "loss": 0.9826, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.791268758526603, |
| "grad_norm": 1.4024768691530294, |
| "learning_rate": 3.959208472528863e-05, |
| "loss": 0.97, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.7967257844474761, |
| "grad_norm": 1.5359858110261895, |
| "learning_rate": 3.957660153195428e-05, |
| "loss": 1.0029, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.8021828103683493, |
| "grad_norm": 1.8187808557656198, |
| "learning_rate": 3.956083308126638e-05, |
| "loss": 0.9576, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.8076398362892224, |
| "grad_norm": 1.4399907834108585, |
| "learning_rate": 3.954477960299241e-05, |
| "loss": 0.9612, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.8130968622100955, |
| "grad_norm": 1.1465593393044229, |
| "learning_rate": 3.95284413310531e-05, |
| "loss": 0.9936, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.8185538881309686, |
| "grad_norm": 1.458918663058527, |
| "learning_rate": 3.9511818503518985e-05, |
| "loss": 0.9813, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8240109140518418, |
| "grad_norm": 2.6076181813742476, |
| "learning_rate": 3.949491136260698e-05, |
| "loss": 0.9798, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.8294679399727148, |
| "grad_norm": 1.0393193084437864, |
| "learning_rate": 3.9477720154676806e-05, |
| "loss": 0.9722, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.834924965893588, |
| "grad_norm": 3.9060717284201085, |
| "learning_rate": 3.9460245130227435e-05, |
| "loss": 0.9727, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.8403819918144612, |
| "grad_norm": 3.3082527760716767, |
| "learning_rate": 3.9442486543893424e-05, |
| "loss": 0.9794, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.8458390177353342, |
| "grad_norm": 2.4057404986106485, |
| "learning_rate": 3.94244446544412e-05, |
| "loss": 0.9837, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8512960436562074, |
| "grad_norm": 1.8476216743035543, |
| "learning_rate": 3.94061197247653e-05, |
| "loss": 0.978, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.8567530695770805, |
| "grad_norm": 2.400979620356147, |
| "learning_rate": 3.9387512021884555e-05, |
| "loss": 0.981, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.8622100954979536, |
| "grad_norm": 2.1733630004298643, |
| "learning_rate": 3.936862181693815e-05, |
| "loss": 0.9776, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.8676671214188267, |
| "grad_norm": 1.8102603434505127, |
| "learning_rate": 3.934944938518172e-05, |
| "loss": 0.9937, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.8731241473396999, |
| "grad_norm": 2.0904632664136913, |
| "learning_rate": 3.932999500598333e-05, |
| "loss": 0.9577, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.878581173260573, |
| "grad_norm": 1.72487012815194, |
| "learning_rate": 3.931025896281939e-05, |
| "loss": 0.9885, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.8840381991814461, |
| "grad_norm": 1.859132027046651, |
| "learning_rate": 3.929024154327052e-05, |
| "loss": 0.9768, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.8894952251023193, |
| "grad_norm": 2.043990751240127, |
| "learning_rate": 3.926994303901739e-05, |
| "loss": 0.988, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.8949522510231923, |
| "grad_norm": 1.2949639926877792, |
| "learning_rate": 3.9249363745836453e-05, |
| "loss": 0.9803, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.9004092769440655, |
| "grad_norm": 1.95004872308144, |
| "learning_rate": 3.922850396359562e-05, |
| "loss": 0.9681, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9058663028649386, |
| "grad_norm": 1.5438513810678176, |
| "learning_rate": 3.92073639962499e-05, |
| "loss": 0.9832, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.9113233287858117, |
| "grad_norm": 0.8915095612184046, |
| "learning_rate": 3.9185944151837006e-05, |
| "loss": 0.9933, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.9167803547066848, |
| "grad_norm": 1.7381086459322714, |
| "learning_rate": 3.9164244742472795e-05, |
| "loss": 0.9923, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.922237380627558, |
| "grad_norm": 1.5006202521018344, |
| "learning_rate": 3.914226608434678e-05, |
| "loss": 0.9803, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.927694406548431, |
| "grad_norm": 1.7809759035226784, |
| "learning_rate": 3.912000849771751e-05, |
| "loss": 0.9845, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9331514324693042, |
| "grad_norm": 1.416880011606568, |
| "learning_rate": 3.909747230690789e-05, |
| "loss": 0.9813, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.9386084583901774, |
| "grad_norm": 1.2752605112134887, |
| "learning_rate": 3.907465784030045e-05, |
| "loss": 0.979, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.9440654843110505, |
| "grad_norm": 1.8931991472592369, |
| "learning_rate": 3.90515654303326e-05, |
| "loss": 0.9651, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.9495225102319236, |
| "grad_norm": 1.0457088342185985, |
| "learning_rate": 3.902819541349171e-05, |
| "loss": 0.9575, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.9549795361527967, |
| "grad_norm": 1.9658747343963177, |
| "learning_rate": 3.900454813031032e-05, |
| "loss": 0.9709, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9604365620736699, |
| "grad_norm": 1.5573294008142207, |
| "learning_rate": 3.898062392536106e-05, |
| "loss": 0.9852, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.965893587994543, |
| "grad_norm": 1.7467537921928091, |
| "learning_rate": 3.895642314725169e-05, |
| "loss": 0.9671, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.9713506139154161, |
| "grad_norm": 1.6127230465883864, |
| "learning_rate": 3.893194614862005e-05, |
| "loss": 0.969, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.9768076398362893, |
| "grad_norm": 1.6603200328670693, |
| "learning_rate": 3.890719328612882e-05, |
| "loss": 0.9795, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.9822646657571623, |
| "grad_norm": 1.6320378665613324, |
| "learning_rate": 3.888216492046045e-05, |
| "loss": 0.9553, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9877216916780355, |
| "grad_norm": 0.928699164443798, |
| "learning_rate": 3.88568614163118e-05, |
| "loss": 0.9844, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.9931787175989086, |
| "grad_norm": 1.2989789969103307, |
| "learning_rate": 3.883128314238888e-05, |
| "loss": 0.9633, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.9986357435197817, |
| "grad_norm": 1.5050415954099332, |
| "learning_rate": 3.880543047140146e-05, |
| "loss": 0.9832, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.004092769440655, |
| "grad_norm": 3.1493232961865725, |
| "learning_rate": 3.877930378005766e-05, |
| "loss": 1.6761, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.009549795361528, |
| "grad_norm": 1.045095816055446, |
| "learning_rate": 3.8752903449058414e-05, |
| "loss": 0.9363, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.015006821282401, |
| "grad_norm": 1.7070253819059258, |
| "learning_rate": 3.872622986309198e-05, |
| "loss": 0.9788, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.0204638472032743, |
| "grad_norm": 1.5326319060129026, |
| "learning_rate": 3.8699283410828304e-05, |
| "loss": 0.9738, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.0259208731241474, |
| "grad_norm": 1.1087556695241214, |
| "learning_rate": 3.867206448491335e-05, |
| "loss": 0.974, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.0313778990450204, |
| "grad_norm": 1.4845940458146507, |
| "learning_rate": 3.8644573481963386e-05, |
| "loss": 0.9676, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.0368349249658937, |
| "grad_norm": 1.4362719095357956, |
| "learning_rate": 3.861681080255922e-05, |
| "loss": 0.9382, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0422919508867667, |
| "grad_norm": 1.4674385107699772, |
| "learning_rate": 3.858877685124034e-05, |
| "loss": 0.94, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.0477489768076398, |
| "grad_norm": 1.084446006406934, |
| "learning_rate": 3.8560472036499044e-05, |
| "loss": 0.9548, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.053206002728513, |
| "grad_norm": 1.7424024173389683, |
| "learning_rate": 3.8531896770774454e-05, |
| "loss": 0.966, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.058663028649386, |
| "grad_norm": 1.7927777941962322, |
| "learning_rate": 3.8503051470446544e-05, |
| "loss": 0.9371, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.0641200545702592, |
| "grad_norm": 0.8728719723252784, |
| "learning_rate": 3.847393655583004e-05, |
| "loss": 0.9778, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.0695770804911324, |
| "grad_norm": 1.5459212237514233, |
| "learning_rate": 3.844455245116832e-05, |
| "loss": 0.9714, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.0750341064120055, |
| "grad_norm": 1.723318009783005, |
| "learning_rate": 3.8414899584627223e-05, |
| "loss": 0.9483, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.0804911323328785, |
| "grad_norm": 1.6105441502277638, |
| "learning_rate": 3.838497838828879e-05, |
| "loss": 0.9529, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.0859481582537518, |
| "grad_norm": 1.235861043156412, |
| "learning_rate": 3.835478929814502e-05, |
| "loss": 0.9714, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.0914051841746248, |
| "grad_norm": 1.5553009472910362, |
| "learning_rate": 3.8324332754091447e-05, |
| "loss": 0.9499, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.096862210095498, |
| "grad_norm": 1.9631947357338404, |
| "learning_rate": 3.82936091999208e-05, |
| "loss": 0.9481, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.1023192360163712, |
| "grad_norm": 0.771286766088072, |
| "learning_rate": 3.826261908331649e-05, |
| "loss": 0.9528, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.1077762619372442, |
| "grad_norm": 1.8335561541725196, |
| "learning_rate": 3.8231362855846105e-05, |
| "loss": 0.9498, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.1132332878581173, |
| "grad_norm": 1.8424106742867963, |
| "learning_rate": 3.8199840972954806e-05, |
| "loss": 0.9476, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.1186903137789905, |
| "grad_norm": 0.7950788375956499, |
| "learning_rate": 3.816805389395873e-05, |
| "loss": 0.9422, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.1241473396998636, |
| "grad_norm": 1.6715342615720261, |
| "learning_rate": 3.813600208203828e-05, |
| "loss": 0.9652, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.1296043656207366, |
| "grad_norm": 1.0978850847460873, |
| "learning_rate": 3.810368600423136e-05, |
| "loss": 0.9578, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.13506139154161, |
| "grad_norm": 2.252408921193313, |
| "learning_rate": 3.8071106131426586e-05, |
| "loss": 0.9667, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.140518417462483, |
| "grad_norm": 1.1643241254847931, |
| "learning_rate": 3.803826293835642e-05, |
| "loss": 0.9514, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.145975443383356, |
| "grad_norm": 1.9506655247258313, |
| "learning_rate": 3.8005156903590265e-05, |
| "loss": 0.9436, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1514324693042293, |
| "grad_norm": 1.6736581284768521, |
| "learning_rate": 3.797178850952747e-05, |
| "loss": 0.9563, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.1568894952251023, |
| "grad_norm": 1.698498967382254, |
| "learning_rate": 3.79381582423903e-05, |
| "loss": 0.96, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.1623465211459754, |
| "grad_norm": 1.4463473539957177, |
| "learning_rate": 3.790426659221689e-05, |
| "loss": 0.9583, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.1678035470668486, |
| "grad_norm": 1.996119225700199, |
| "learning_rate": 3.7870114052854056e-05, |
| "loss": 0.9686, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.1732605729877217, |
| "grad_norm": 1.2453858458138212, |
| "learning_rate": 3.783570112195013e-05, |
| "loss": 0.9476, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.1787175989085947, |
| "grad_norm": 1.9429791252993835, |
| "learning_rate": 3.780102830094768e-05, |
| "loss": 0.9633, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.184174624829468, |
| "grad_norm": 1.7144005781733527, |
| "learning_rate": 3.7766096095076236e-05, |
| "loss": 0.9452, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.189631650750341, |
| "grad_norm": 1.2919220781788054, |
| "learning_rate": 3.7730905013344925e-05, |
| "loss": 0.9505, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.195088676671214, |
| "grad_norm": 1.7283120463695893, |
| "learning_rate": 3.7695455568535015e-05, |
| "loss": 0.9583, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.2005457025920874, |
| "grad_norm": 1.2984823063070836, |
| "learning_rate": 3.76597482771925e-05, |
| "loss": 0.925, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.2060027285129604, |
| "grad_norm": 1.2101553255952835, |
| "learning_rate": 3.7623783659620515e-05, |
| "loss": 0.9671, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.2114597544338335, |
| "grad_norm": 1.9193420409227075, |
| "learning_rate": 3.7587562239871804e-05, |
| "loss": 0.9713, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.2169167803547067, |
| "grad_norm": 1.145139436855805, |
| "learning_rate": 3.755108454574107e-05, |
| "loss": 0.9688, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.2223738062755798, |
| "grad_norm": 2.3369999491203814, |
| "learning_rate": 3.751435110875724e-05, |
| "loss": 0.966, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.2278308321964528, |
| "grad_norm": 1.6283559400501786, |
| "learning_rate": 3.7477362464175794e-05, |
| "loss": 0.9629, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.233287858117326, |
| "grad_norm": 2.1896432971296447, |
| "learning_rate": 3.7440119150970924e-05, |
| "loss": 0.967, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.2387448840381992, |
| "grad_norm": 1.4314027126167852, |
| "learning_rate": 3.7402621711827675e-05, |
| "loss": 0.9391, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.2442019099590724, |
| "grad_norm": 2.448680005865948, |
| "learning_rate": 3.7364870693134044e-05, |
| "loss": 0.9791, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.2496589358799455, |
| "grad_norm": 1.988787930308905, |
| "learning_rate": 3.732686664497304e-05, |
| "loss": 0.9678, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.2551159618008185, |
| "grad_norm": 2.063824899367631, |
| "learning_rate": 3.7288610121114634e-05, |
| "loss": 0.9617, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2605729877216918, |
| "grad_norm": 1.7243515110002714, |
| "learning_rate": 3.725010167900772e-05, |
| "loss": 0.9533, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.2660300136425648, |
| "grad_norm": 1.8647332677788166, |
| "learning_rate": 3.721134187977197e-05, |
| "loss": 0.9563, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.271487039563438, |
| "grad_norm": 1.636320006353433, |
| "learning_rate": 3.7172331288189667e-05, |
| "loss": 0.9568, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.2769440654843112, |
| "grad_norm": 1.7187722452357803, |
| "learning_rate": 3.713307047269748e-05, |
| "loss": 0.9538, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.2824010914051842, |
| "grad_norm": 1.5589845753526528, |
| "learning_rate": 3.7093560005378175e-05, |
| "loss": 0.9426, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.2878581173260573, |
| "grad_norm": 1.8373924763108647, |
| "learning_rate": 3.705380046195228e-05, |
| "loss": 0.9244, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.2933151432469305, |
| "grad_norm": 1.3882254378197982, |
| "learning_rate": 3.701379242176969e-05, |
| "loss": 0.9498, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.2987721691678036, |
| "grad_norm": 1.7021142374331253, |
| "learning_rate": 3.697353646780124e-05, |
| "loss": 0.9434, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.3042291950886766, |
| "grad_norm": 1.3543258636289206, |
| "learning_rate": 3.693303318663019e-05, |
| "loss": 0.9543, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.30968622100955, |
| "grad_norm": 1.6810213439521031, |
| "learning_rate": 3.689228316844371e-05, |
| "loss": 0.9462, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.315143246930423, |
| "grad_norm": 1.3377038870303093, |
| "learning_rate": 3.685128700702423e-05, |
| "loss": 0.9576, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.320600272851296, |
| "grad_norm": 1.5727626762086575, |
| "learning_rate": 3.681004529974085e-05, |
| "loss": 0.9583, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.3260572987721693, |
| "grad_norm": 1.2786793127927039, |
| "learning_rate": 3.676855864754057e-05, |
| "loss": 0.9357, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.3315143246930423, |
| "grad_norm": 1.648396462433026, |
| "learning_rate": 3.67268276549396e-05, |
| "loss": 0.9735, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.3369713506139154, |
| "grad_norm": 1.2216794004695668, |
| "learning_rate": 3.668485293001448e-05, |
| "loss": 0.9741, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.3424283765347886, |
| "grad_norm": 1.5971696430835944, |
| "learning_rate": 3.664263508439329e-05, |
| "loss": 0.9484, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.3478854024556617, |
| "grad_norm": 1.3024833094157782, |
| "learning_rate": 3.660017473324669e-05, |
| "loss": 0.9406, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.3533424283765347, |
| "grad_norm": 1.5316788751229022, |
| "learning_rate": 3.655747249527897e-05, |
| "loss": 0.9601, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.358799454297408, |
| "grad_norm": 1.5547319797496317, |
| "learning_rate": 3.6514528992719044e-05, |
| "loss": 0.9474, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.364256480218281, |
| "grad_norm": 1.206667830823351, |
| "learning_rate": 3.6471344851311356e-05, |
| "loss": 0.9502, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.369713506139154, |
| "grad_norm": 1.2600525155706597, |
| "learning_rate": 3.64279207003068e-05, |
| "loss": 0.9452, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.3751705320600274, |
| "grad_norm": 1.3484101306757132, |
| "learning_rate": 3.638425717245353e-05, |
| "loss": 0.9502, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.3806275579809004, |
| "grad_norm": 1.2235801669480915, |
| "learning_rate": 3.634035490398774e-05, |
| "loss": 0.9384, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.3860845839017735, |
| "grad_norm": 1.5485793543675035, |
| "learning_rate": 3.629621453462438e-05, |
| "loss": 0.959, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.3915416098226467, |
| "grad_norm": 1.4002101413586943, |
| "learning_rate": 3.625183670754787e-05, |
| "loss": 0.9472, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.3969986357435198, |
| "grad_norm": 0.9434127178746972, |
| "learning_rate": 3.6207222069402696e-05, |
| "loss": 0.9455, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.4024556616643928, |
| "grad_norm": 0.9858801112297753, |
| "learning_rate": 3.6162371270284004e-05, |
| "loss": 0.9436, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.407912687585266, |
| "grad_norm": 1.3469345939907027, |
| "learning_rate": 3.611728496372813e-05, |
| "loss": 0.9368, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.4133697135061392, |
| "grad_norm": 1.8149253369471827, |
| "learning_rate": 3.6071963806703054e-05, |
| "loss": 0.9427, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.4188267394270122, |
| "grad_norm": 0.7473132379864426, |
| "learning_rate": 3.6026408459598844e-05, |
| "loss": 0.9638, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.4242837653478855, |
| "grad_norm": 1.6128737568835454, |
| "learning_rate": 3.598061958621804e-05, |
| "loss": 0.9557, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.4297407912687585, |
| "grad_norm": 1.4020351576310623, |
| "learning_rate": 3.593459785376597e-05, |
| "loss": 0.9421, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.4351978171896316, |
| "grad_norm": 1.2945719219835932, |
| "learning_rate": 3.5888343932841035e-05, |
| "loss": 0.9532, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.4406548431105048, |
| "grad_norm": 1.2851599043172512, |
| "learning_rate": 3.584185849742492e-05, |
| "loss": 0.9307, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.446111869031378, |
| "grad_norm": 1.2427656903613609, |
| "learning_rate": 3.579514222487281e-05, |
| "loss": 0.9538, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.451568894952251, |
| "grad_norm": 1.2877332071545373, |
| "learning_rate": 3.5748195795903474e-05, |
| "loss": 0.9339, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.4570259208731242, |
| "grad_norm": 1.198006739181478, |
| "learning_rate": 3.5701019894589376e-05, |
| "loss": 0.9512, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.4624829467939973, |
| "grad_norm": 1.5795004337836194, |
| "learning_rate": 3.565361520834671e-05, |
| "loss": 0.9448, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.4679399727148703, |
| "grad_norm": 1.1556792865151078, |
| "learning_rate": 3.5605982427925356e-05, |
| "loss": 0.9332, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.4733969986357436, |
| "grad_norm": 0.5930547881100959, |
| "learning_rate": 3.555812224739884e-05, |
| "loss": 0.9613, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.4788540245566166, |
| "grad_norm": 1.4579608488740115, |
| "learning_rate": 3.5510035364154236e-05, |
| "loss": 0.957, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.4843110504774897, |
| "grad_norm": 0.9399997272018373, |
| "learning_rate": 3.5461722478881935e-05, |
| "loss": 0.9362, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.489768076398363, |
| "grad_norm": 1.181780640902133, |
| "learning_rate": 3.541318429556552e-05, |
| "loss": 0.9304, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.495225102319236, |
| "grad_norm": 2.438002638433228, |
| "learning_rate": 3.5364421521471443e-05, |
| "loss": 0.9539, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.500682128240109, |
| "grad_norm": 0.9264166142215685, |
| "learning_rate": 3.531543486713877e-05, |
| "loss": 0.9592, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.5061391541609823, |
| "grad_norm": 4.380791625672203, |
| "learning_rate": 3.5266225046368765e-05, |
| "loss": 0.9625, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.5115961800818554, |
| "grad_norm": 4.119745847530299, |
| "learning_rate": 3.521679277621457e-05, |
| "loss": 0.9811, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.5170532060027284, |
| "grad_norm": 1.3888384210153164, |
| "learning_rate": 3.5167138776970686e-05, |
| "loss": 0.9344, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.5225102319236017, |
| "grad_norm": 3.242363274569884, |
| "learning_rate": 3.5117263772162515e-05, |
| "loss": 0.9699, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.5279672578444747, |
| "grad_norm": 2.131900747816542, |
| "learning_rate": 3.5067168488535794e-05, |
| "loss": 0.9899, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.5334242837653478, |
| "grad_norm": 3.1589070088722515, |
| "learning_rate": 3.501685365604604e-05, |
| "loss": 0.9546, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.538881309686221, |
| "grad_norm": 2.6438273574397404, |
| "learning_rate": 3.496632000784787e-05, |
| "loss": 0.9694, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.544338335607094, |
| "grad_norm": 2.0669427502395594, |
| "learning_rate": 3.4915568280284335e-05, |
| "loss": 0.9452, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.5497953615279672, |
| "grad_norm": 2.1718089915480014, |
| "learning_rate": 3.4864599212876234e-05, |
| "loss": 0.9454, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.5552523874488404, |
| "grad_norm": 2.0439265869282193, |
| "learning_rate": 3.481341354831125e-05, |
| "loss": 0.9266, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.5607094133697135, |
| "grad_norm": 1.7375290887295285, |
| "learning_rate": 3.476201203243322e-05, |
| "loss": 0.9461, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.5661664392905865, |
| "grad_norm": 1.7370946125028597, |
| "learning_rate": 3.4710395414231195e-05, |
| "loss": 0.9657, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.5716234652114598, |
| "grad_norm": 1.403531131584409, |
| "learning_rate": 3.465856444582856e-05, |
| "loss": 0.9495, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.5770804911323328, |
| "grad_norm": 1.4819115235994536, |
| "learning_rate": 3.460651988247208e-05, |
| "loss": 0.9617, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.5825375170532059, |
| "grad_norm": 1.761856728208756, |
| "learning_rate": 3.4554262482520875e-05, |
| "loss": 0.921, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.5879945429740792, |
| "grad_norm": 1.0191878209582437, |
| "learning_rate": 3.4501793007435394e-05, |
| "loss": 0.9447, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.5934515688949522, |
| "grad_norm": 2.274348027783054, |
| "learning_rate": 3.444911222176629e-05, |
| "loss": 0.9497, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.5989085948158253, |
| "grad_norm": 1.5339383301336882, |
| "learning_rate": 3.43962208931433e-05, |
| "loss": 0.9669, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.6043656207366985, |
| "grad_norm": 2.550276251211631, |
| "learning_rate": 3.434311979226406e-05, |
| "loss": 0.956, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.6098226466575716, |
| "grad_norm": 1.7875909094899942, |
| "learning_rate": 3.428980969288287e-05, |
| "loss": 0.9495, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.6152796725784446, |
| "grad_norm": 2.823228050378481, |
| "learning_rate": 3.42362913717994e-05, |
| "loss": 0.9362, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.620736698499318, |
| "grad_norm": 2.4678216750780857, |
| "learning_rate": 3.41825656088474e-05, |
| "loss": 0.9386, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.626193724420191, |
| "grad_norm": 1.9114157924579258, |
| "learning_rate": 3.4128633186883346e-05, |
| "loss": 0.9576, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.631650750341064, |
| "grad_norm": 1.8379349077219813, |
| "learning_rate": 3.407449489177499e-05, |
| "loss": 0.9479, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.6371077762619373, |
| "grad_norm": 1.852909901213652, |
| "learning_rate": 3.4020151512389924e-05, |
| "loss": 0.9279, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6425648021828103, |
| "grad_norm": 1.3420457124335345, |
| "learning_rate": 3.396560384058413e-05, |
| "loss": 0.9298, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.6480218281036834, |
| "grad_norm": 2.1617773929000172, |
| "learning_rate": 3.391085267119037e-05, |
| "loss": 0.9225, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.6534788540245566, |
| "grad_norm": 1.316355967958462, |
| "learning_rate": 3.3855898802006644e-05, |
| "loss": 0.9342, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.65893587994543, |
| "grad_norm": 2.453815979459407, |
| "learning_rate": 3.380074303378458e-05, |
| "loss": 0.9394, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.6643929058663027, |
| "grad_norm": 1.748815933891966, |
| "learning_rate": 3.374538617021773e-05, |
| "loss": 0.9315, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.669849931787176, |
| "grad_norm": 2.5597232277901973, |
| "learning_rate": 3.3689829017929875e-05, |
| "loss": 0.9573, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.6753069577080493, |
| "grad_norm": 2.368134432470627, |
| "learning_rate": 3.363407238646327e-05, |
| "loss": 0.9494, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.680763983628922, |
| "grad_norm": 1.724634315811694, |
| "learning_rate": 3.357811708826686e-05, |
| "loss": 0.9407, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.6862210095497954, |
| "grad_norm": 1.8226179705374004, |
| "learning_rate": 3.352196393868442e-05, |
| "loss": 0.9495, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.6916780354706686, |
| "grad_norm": 1.6945951192803632, |
| "learning_rate": 3.34656137559427e-05, |
| "loss": 0.9402, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6971350613915415, |
| "grad_norm": 1.402641679011377, |
| "learning_rate": 3.3409067361139464e-05, |
| "loss": 0.9191, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.7025920873124147, |
| "grad_norm": 1.3467589645615918, |
| "learning_rate": 3.3352325578231565e-05, |
| "loss": 0.9636, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.708049113233288, |
| "grad_norm": 1.25752862289665, |
| "learning_rate": 3.329538923402293e-05, |
| "loss": 0.9554, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.7135061391541608, |
| "grad_norm": 0.986547181961436, |
| "learning_rate": 3.323825915815248e-05, |
| "loss": 0.9305, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.718963165075034, |
| "grad_norm": 1.4979513167093783, |
| "learning_rate": 3.31809361830821e-05, |
| "loss": 0.9567, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.7244201909959074, |
| "grad_norm": 0.7937925026119881, |
| "learning_rate": 3.312342114408444e-05, |
| "loss": 0.9458, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.7298772169167802, |
| "grad_norm": 1.8876612539551143, |
| "learning_rate": 3.30657148792308e-05, |
| "loss": 0.9649, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.7353342428376535, |
| "grad_norm": 1.226595551778844, |
| "learning_rate": 3.3007818229378896e-05, |
| "loss": 0.9643, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.7407912687585267, |
| "grad_norm": 2.213786521631912, |
| "learning_rate": 3.29497320381606e-05, |
| "loss": 0.9584, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.7462482946793996, |
| "grad_norm": 2.1570819482352235, |
| "learning_rate": 3.2891457151969675e-05, |
| "loss": 0.9531, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.7517053206002728, |
| "grad_norm": 1.4381280543608101, |
| "learning_rate": 3.2832994419949393e-05, |
| "loss": 0.9421, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.7571623465211461, |
| "grad_norm": 1.737184951842976, |
| "learning_rate": 3.277434469398022e-05, |
| "loss": 0.9416, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.762619372442019, |
| "grad_norm": 1.632657953412784, |
| "learning_rate": 3.2715508828667366e-05, |
| "loss": 0.9321, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.7680763983628922, |
| "grad_norm": 1.488744578094212, |
| "learning_rate": 3.265648768132834e-05, |
| "loss": 0.9365, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.7735334242837655, |
| "grad_norm": 1.6336003571844502, |
| "learning_rate": 3.2597282111980444e-05, |
| "loss": 0.9515, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.7789904502046383, |
| "grad_norm": 1.4154118064331849, |
| "learning_rate": 3.253789298332828e-05, |
| "loss": 0.9641, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.7844474761255116, |
| "grad_norm": 1.5366612747550772, |
| "learning_rate": 3.2478321160751134e-05, |
| "loss": 0.9456, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.7899045020463848, |
| "grad_norm": 1.3450928156923279, |
| "learning_rate": 3.241856751229041e-05, |
| "loss": 0.9486, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.795361527967258, |
| "grad_norm": 1.2765561802175178, |
| "learning_rate": 3.2358632908636955e-05, |
| "loss": 0.9567, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.800818553888131, |
| "grad_norm": 1.162610958798, |
| "learning_rate": 3.229851822311834e-05, |
| "loss": 0.9288, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.8062755798090042, |
| "grad_norm": 1.1625836925107373, |
| "learning_rate": 3.223822433168623e-05, |
| "loss": 0.9263, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.8117326057298773, |
| "grad_norm": 0.8071251992329053, |
| "learning_rate": 3.217775211290351e-05, |
| "loss": 0.9482, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.8171896316507503, |
| "grad_norm": 0.89790755928994, |
| "learning_rate": 3.211710244793156e-05, |
| "loss": 0.9173, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.8226466575716236, |
| "grad_norm": 0.8356390239967052, |
| "learning_rate": 3.205627622051738e-05, |
| "loss": 0.9504, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.8281036834924966, |
| "grad_norm": 0.6998885337784212, |
| "learning_rate": 3.199527431698073e-05, |
| "loss": 0.9459, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.8335607094133697, |
| "grad_norm": 0.8727569735519537, |
| "learning_rate": 3.19340976262012e-05, |
| "loss": 0.9435, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.839017735334243, |
| "grad_norm": 0.6362860972023866, |
| "learning_rate": 3.187274703960526e-05, |
| "loss": 0.9406, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.844474761255116, |
| "grad_norm": 0.8761738610839735, |
| "learning_rate": 3.181122345115329e-05, |
| "loss": 0.9353, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.849931787175989, |
| "grad_norm": 0.7208261657101167, |
| "learning_rate": 3.174952775732651e-05, |
| "loss": 0.9368, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.8553888130968623, |
| "grad_norm": 0.8342099154714143, |
| "learning_rate": 3.1687660857114e-05, |
| "loss": 0.9515, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8608458390177354, |
| "grad_norm": 0.7588834066746923, |
| "learning_rate": 3.1625623651999485e-05, |
| "loss": 0.946, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.8663028649386084, |
| "grad_norm": 0.7261790084313842, |
| "learning_rate": 3.1563417045948295e-05, |
| "loss": 0.9332, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.8717598908594817, |
| "grad_norm": 0.5170313983982283, |
| "learning_rate": 3.150104194539417e-05, |
| "loss": 0.9305, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.8772169167803547, |
| "grad_norm": 0.7727261576998418, |
| "learning_rate": 3.1438499259226e-05, |
| "loss": 0.9437, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.8826739427012278, |
| "grad_norm": 1.0590324797396327, |
| "learning_rate": 3.137578989877466e-05, |
| "loss": 0.9496, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.888130968622101, |
| "grad_norm": 0.7511992016971163, |
| "learning_rate": 3.131291477779968e-05, |
| "loss": 0.9556, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.893587994542974, |
| "grad_norm": 1.081487500255035, |
| "learning_rate": 3.124987481247594e-05, |
| "loss": 0.9479, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.8990450204638472, |
| "grad_norm": 1.4968005117001788, |
| "learning_rate": 3.118667092138033e-05, |
| "loss": 0.9214, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.9045020463847204, |
| "grad_norm": 0.6464116981961434, |
| "learning_rate": 3.112330402547834e-05, |
| "loss": 0.9599, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.9099590723055935, |
| "grad_norm": 1.1571751705071633, |
| "learning_rate": 3.10597750481107e-05, |
| "loss": 0.9438, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.9154160982264665, |
| "grad_norm": 1.383173192553895, |
| "learning_rate": 3.099608491497983e-05, |
| "loss": 0.9369, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.9208731241473398, |
| "grad_norm": 1.0142077195831358, |
| "learning_rate": 3.093223455413645e-05, |
| "loss": 0.9181, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.9263301500682128, |
| "grad_norm": 1.146163334987763, |
| "learning_rate": 3.0868224895965996e-05, |
| "loss": 0.9396, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.931787175989086, |
| "grad_norm": 0.6987837846263671, |
| "learning_rate": 3.080405687317507e-05, |
| "loss": 0.9303, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.9372442019099592, |
| "grad_norm": 1.3380093833598752, |
| "learning_rate": 3.073973142077788e-05, |
| "loss": 0.9462, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.9427012278308322, |
| "grad_norm": 0.6049244168030435, |
| "learning_rate": 3.067524947608258e-05, |
| "loss": 0.9187, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.9481582537517053, |
| "grad_norm": 0.8098504286256158, |
| "learning_rate": 3.061061197867763e-05, |
| "loss": 0.9162, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.9536152796725785, |
| "grad_norm": 0.7357777980477844, |
| "learning_rate": 3.05458198704181e-05, |
| "loss": 0.9344, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.9590723055934516, |
| "grad_norm": 0.5713529931575109, |
| "learning_rate": 3.0480874095411946e-05, |
| "loss": 0.9515, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.9645293315143246, |
| "grad_norm": 0.8373330331353604, |
| "learning_rate": 3.0415775600006267e-05, |
| "loss": 0.9546, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.969986357435198, |
| "grad_norm": 0.6868147137493235, |
| "learning_rate": 3.035052533277349e-05, |
| "loss": 0.907, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.975443383356071, |
| "grad_norm": 0.47372940490854243, |
| "learning_rate": 3.0285124244497576e-05, |
| "loss": 0.9246, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.980900409276944, |
| "grad_norm": 0.6977343075907223, |
| "learning_rate": 3.0219573288160128e-05, |
| "loss": 0.9562, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.9863574351978173, |
| "grad_norm": 0.6563089786155916, |
| "learning_rate": 3.0153873418926543e-05, |
| "loss": 0.9344, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.9918144611186903, |
| "grad_norm": 0.7033335661318982, |
| "learning_rate": 3.0088025594132086e-05, |
| "loss": 0.9479, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.9972714870395634, |
| "grad_norm": 1.1633808323873716, |
| "learning_rate": 3.0022030773267908e-05, |
| "loss": 0.935, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.0027285129604366, |
| "grad_norm": 2.256649667221531, |
| "learning_rate": 2.9955889917967114e-05, |
| "loss": 1.6487, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.00818553888131, |
| "grad_norm": 0.9257803693615221, |
| "learning_rate": 2.9889603991990718e-05, |
| "loss": 0.9194, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.0136425648021827, |
| "grad_norm": 0.8374064842179173, |
| "learning_rate": 2.9823173961213614e-05, |
| "loss": 0.936, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.019099590723056, |
| "grad_norm": 0.6888393857507884, |
| "learning_rate": 2.9756600793610477e-05, |
| "loss": 0.9069, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.0245566166439293, |
| "grad_norm": 0.6078836940762362, |
| "learning_rate": 2.9689885459241705e-05, |
| "loss": 0.9181, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.030013642564802, |
| "grad_norm": 0.6540715623371649, |
| "learning_rate": 2.9623028930239234e-05, |
| "loss": 0.9365, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.0354706684856754, |
| "grad_norm": 0.6022481328295576, |
| "learning_rate": 2.955603218079241e-05, |
| "loss": 0.923, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.0409276944065486, |
| "grad_norm": 0.7165752848226464, |
| "learning_rate": 2.9488896187133767e-05, |
| "loss": 0.9181, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.0463847203274215, |
| "grad_norm": 0.8352826439816641, |
| "learning_rate": 2.942162192752483e-05, |
| "loss": 0.9236, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.0518417462482947, |
| "grad_norm": 1.124128627018019, |
| "learning_rate": 2.935421038224182e-05, |
| "loss": 0.919, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.057298772169168, |
| "grad_norm": 1.0339665065551706, |
| "learning_rate": 2.9286662533561423e-05, |
| "loss": 0.9367, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.062755798090041, |
| "grad_norm": 1.2298783039098067, |
| "learning_rate": 2.9218979365746426e-05, |
| "loss": 0.9456, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.068212824010914, |
| "grad_norm": 0.8183361526417724, |
| "learning_rate": 2.9151161865031414e-05, |
| "loss": 0.9444, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.0736698499317874, |
| "grad_norm": 0.484619834541414, |
| "learning_rate": 2.908321101960837e-05, |
| "loss": 0.9085, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.07912687585266, |
| "grad_norm": 0.3810542728807868, |
| "learning_rate": 2.9015127819612292e-05, |
| "loss": 0.8991, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.0845839017735335, |
| "grad_norm": 0.4925827663184475, |
| "learning_rate": 2.894691325710677e-05, |
| "loss": 0.9218, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.0900409276944067, |
| "grad_norm": 0.7465936328564935, |
| "learning_rate": 2.8878568326069494e-05, |
| "loss": 0.93, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.0954979536152796, |
| "grad_norm": 1.0199914288512335, |
| "learning_rate": 2.8810094022377842e-05, |
| "loss": 0.9388, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.100954979536153, |
| "grad_norm": 1.4039532764332685, |
| "learning_rate": 2.8741491343794296e-05, |
| "loss": 0.9205, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.106412005457026, |
| "grad_norm": 0.6570765199675046, |
| "learning_rate": 2.867276128995193e-05, |
| "loss": 0.9472, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.111869031377899, |
| "grad_norm": 0.47805545813863976, |
| "learning_rate": 2.860390486233987e-05, |
| "loss": 0.9213, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.117326057298772, |
| "grad_norm": 0.9100198379548127, |
| "learning_rate": 2.8534923064288652e-05, |
| "loss": 0.9185, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.1227830832196455, |
| "grad_norm": 1.359999448910369, |
| "learning_rate": 2.8465816900955635e-05, |
| "loss": 0.9103, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.1282401091405183, |
| "grad_norm": 0.7267473662850902, |
| "learning_rate": 2.8396587379310366e-05, |
| "loss": 0.9263, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.1336971350613916, |
| "grad_norm": 0.6852106225837414, |
| "learning_rate": 2.8327235508119854e-05, |
| "loss": 0.9056, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.139154160982265, |
| "grad_norm": 0.6935707651834161, |
| "learning_rate": 2.8257762297933927e-05, |
| "loss": 0.9279, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.1446111869031377, |
| "grad_norm": 0.8762210438590792, |
| "learning_rate": 2.81881687610705e-05, |
| "loss": 0.9069, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.150068212824011, |
| "grad_norm": 1.1906568951863223, |
| "learning_rate": 2.8118455911600767e-05, |
| "loss": 0.929, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.155525238744884, |
| "grad_norm": 0.980254177026494, |
| "learning_rate": 2.8048624765334502e-05, |
| "loss": 0.9323, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.160982264665757, |
| "grad_norm": 1.0373134164423028, |
| "learning_rate": 2.7978676339805208e-05, |
| "loss": 0.9208, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.1664392905866303, |
| "grad_norm": 1.0207154812500114, |
| "learning_rate": 2.79086116542553e-05, |
| "loss": 0.9096, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.1718963165075036, |
| "grad_norm": 1.1988463269854843, |
| "learning_rate": 2.783843172962128e-05, |
| "loss": 0.9402, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.1773533424283764, |
| "grad_norm": 0.7969790707530212, |
| "learning_rate": 2.7768137588518807e-05, |
| "loss": 0.908, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.1828103683492497, |
| "grad_norm": 0.4748645421435369, |
| "learning_rate": 2.769773025522785e-05, |
| "loss": 0.914, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.188267394270123, |
| "grad_norm": 0.45121822491331515, |
| "learning_rate": 2.7627210755677733e-05, |
| "loss": 0.9307, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.193724420190996, |
| "grad_norm": 0.8118676469523863, |
| "learning_rate": 2.7556580117432185e-05, |
| "loss": 0.9102, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.199181446111869, |
| "grad_norm": 1.1207703065447276, |
| "learning_rate": 2.7485839369674384e-05, |
| "loss": 0.9231, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.2046384720327423, |
| "grad_norm": 0.9740106870010401, |
| "learning_rate": 2.7414989543191964e-05, |
| "loss": 0.9087, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.210095497953615, |
| "grad_norm": 0.9634686443072049, |
| "learning_rate": 2.734403167036195e-05, |
| "loss": 0.9082, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.2155525238744884, |
| "grad_norm": 0.9832162277660468, |
| "learning_rate": 2.727296678513577e-05, |
| "loss": 0.9241, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.2210095497953617, |
| "grad_norm": 1.0746452821377297, |
| "learning_rate": 2.720179592302417e-05, |
| "loss": 0.9407, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.2264665757162345, |
| "grad_norm": 0.8835118585227068, |
| "learning_rate": 2.71305201210821e-05, |
| "loss": 0.906, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.231923601637108, |
| "grad_norm": 0.806040386235616, |
| "learning_rate": 2.7059140417893645e-05, |
| "loss": 0.9142, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.237380627557981, |
| "grad_norm": 0.7956258201623788, |
| "learning_rate": 2.6987657853556864e-05, |
| "loss": 0.8814, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.242837653478854, |
| "grad_norm": 0.7155012234587093, |
| "learning_rate": 2.6916073469668633e-05, |
| "loss": 0.9408, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.248294679399727, |
| "grad_norm": 0.745980798963711, |
| "learning_rate": 2.6844388309309494e-05, |
| "loss": 0.9334, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.2537517053206004, |
| "grad_norm": 0.8718383779341066, |
| "learning_rate": 2.6772603417028408e-05, |
| "loss": 0.9244, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.2592087312414733, |
| "grad_norm": 0.8697224939003284, |
| "learning_rate": 2.6700719838827595e-05, |
| "loss": 0.9132, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.2646657571623465, |
| "grad_norm": 0.7800957792385944, |
| "learning_rate": 2.662873862214724e-05, |
| "loss": 0.9253, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.27012278308322, |
| "grad_norm": 0.8009973379664055, |
| "learning_rate": 2.655666081585027e-05, |
| "loss": 0.9, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.2755798090040926, |
| "grad_norm": 0.8649005822972493, |
| "learning_rate": 2.6484487470207035e-05, |
| "loss": 0.9204, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.281036834924966, |
| "grad_norm": 0.8818657424466958, |
| "learning_rate": 2.641221963688002e-05, |
| "loss": 0.9155, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.286493860845839, |
| "grad_norm": 0.5647385759805507, |
| "learning_rate": 2.633985836890854e-05, |
| "loss": 0.9206, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.291950886766712, |
| "grad_norm": 0.5034679857244327, |
| "learning_rate": 2.6267404720693375e-05, |
| "loss": 0.9204, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.2974079126875853, |
| "grad_norm": 0.710256150433762, |
| "learning_rate": 2.6194859747981385e-05, |
| "loss": 0.9191, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.3028649386084585, |
| "grad_norm": 0.5706543763177601, |
| "learning_rate": 2.6122224507850182e-05, |
| "loss": 0.9185, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.3083219645293314, |
| "grad_norm": 0.6833880125599795, |
| "learning_rate": 2.604950005869268e-05, |
| "loss": 0.9213, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.3137789904502046, |
| "grad_norm": 0.8483843690019908, |
| "learning_rate": 2.5976687460201683e-05, |
| "loss": 0.9126, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.319236016371078, |
| "grad_norm": 0.8129051361925009, |
| "learning_rate": 2.5903787773354463e-05, |
| "loss": 0.9188, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.3246930422919507, |
| "grad_norm": 0.5996381128568273, |
| "learning_rate": 2.583080206039728e-05, |
| "loss": 0.9096, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.330150068212824, |
| "grad_norm": 0.41863958371356735, |
| "learning_rate": 2.57577313848299e-05, |
| "loss": 0.9432, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.3356070941336973, |
| "grad_norm": 0.34060059093315503, |
| "learning_rate": 2.5684576811390125e-05, |
| "loss": 0.9137, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.34106412005457, |
| "grad_norm": 0.5069480306429284, |
| "learning_rate": 2.5611339406038257e-05, |
| "loss": 0.9124, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.3465211459754434, |
| "grad_norm": 0.5427881229277935, |
| "learning_rate": 2.5538020235941552e-05, |
| "loss": 0.9166, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.3519781718963166, |
| "grad_norm": 0.543245106400598, |
| "learning_rate": 2.5464620369458724e-05, |
| "loss": 0.9197, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.3574351978171895, |
| "grad_norm": 0.5487542346479996, |
| "learning_rate": 2.5391140876124305e-05, |
| "loss": 0.9203, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.3628922237380627, |
| "grad_norm": 0.504474417772234, |
| "learning_rate": 2.531758282663311e-05, |
| "loss": 0.9139, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.368349249658936, |
| "grad_norm": 0.3570671212002871, |
| "learning_rate": 2.524394729282464e-05, |
| "loss": 0.9227, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.373806275579809, |
| "grad_norm": 0.33080967390463245, |
| "learning_rate": 2.5170235347667425e-05, |
| "loss": 0.9298, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.379263301500682, |
| "grad_norm": 0.2629370339698779, |
| "learning_rate": 2.5096448065243415e-05, |
| "loss": 0.9222, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.3847203274215554, |
| "grad_norm": 0.32467107495565267, |
| "learning_rate": 2.5022586520732334e-05, |
| "loss": 0.9092, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.390177353342428, |
| "grad_norm": 0.27556269692287366, |
| "learning_rate": 2.494865179039599e-05, |
| "loss": 0.8993, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.3956343792633015, |
| "grad_norm": 0.279539516282507, |
| "learning_rate": 2.4874644951562618e-05, |
| "loss": 0.9019, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.4010914051841747, |
| "grad_norm": 0.33354360728490134, |
| "learning_rate": 2.4800567082611165e-05, |
| "loss": 0.9152, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.4065484311050476, |
| "grad_norm": 0.33169175944263035, |
| "learning_rate": 2.4726419262955595e-05, |
| "loss": 0.9091, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.412005457025921, |
| "grad_norm": 0.3587055937970976, |
| "learning_rate": 2.465220257302913e-05, |
| "loss": 0.9202, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.417462482946794, |
| "grad_norm": 0.40441219606068757, |
| "learning_rate": 2.4577918094268523e-05, |
| "loss": 0.9226, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.422919508867667, |
| "grad_norm": 0.4865996215311924, |
| "learning_rate": 2.4503566909098318e-05, |
| "loss": 0.9093, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.42837653478854, |
| "grad_norm": 0.38008820904475854, |
| "learning_rate": 2.4429150100915054e-05, |
| "loss": 0.9322, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.4338335607094135, |
| "grad_norm": 0.41170329827458135, |
| "learning_rate": 2.435466875407148e-05, |
| "loss": 0.9324, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.4392905866302863, |
| "grad_norm": 0.3622800817675993, |
| "learning_rate": 2.4280123953860767e-05, |
| "loss": 0.9001, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.4447476125511596, |
| "grad_norm": 0.2682950261173189, |
| "learning_rate": 2.4205516786500684e-05, |
| "loss": 0.9314, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.450204638472033, |
| "grad_norm": 0.2805378098796358, |
| "learning_rate": 2.4130848339117766e-05, |
| "loss": 0.9341, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.4556616643929057, |
| "grad_norm": 0.26782126481321455, |
| "learning_rate": 2.4056119699731495e-05, |
| "loss": 0.9077, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.461118690313779, |
| "grad_norm": 0.37285051812558306, |
| "learning_rate": 2.3981331957238414e-05, |
| "loss": 0.9235, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.466575716234652, |
| "grad_norm": 0.3129713500376212, |
| "learning_rate": 2.3906486201396287e-05, |
| "loss": 0.9213, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.472032742155525, |
| "grad_norm": 0.36665287480858777, |
| "learning_rate": 2.3831583522808224e-05, |
| "loss": 0.917, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.4774897680763983, |
| "grad_norm": 0.3443704371520464, |
| "learning_rate": 2.375662501290675e-05, |
| "loss": 0.9189, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.4829467939972716, |
| "grad_norm": 0.31197899443616667, |
| "learning_rate": 2.368161176393793e-05, |
| "loss": 0.9127, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.488403819918145, |
| "grad_norm": 0.35012014939390956, |
| "learning_rate": 2.360654486894548e-05, |
| "loss": 0.9113, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.4938608458390177, |
| "grad_norm": 0.35258642719846595, |
| "learning_rate": 2.3531425421754782e-05, |
| "loss": 0.9137, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.499317871759891, |
| "grad_norm": 0.4818508820401416, |
| "learning_rate": 2.3456254516956973e-05, |
| "loss": 0.9322, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.504774897680764, |
| "grad_norm": 0.41831055845919374, |
| "learning_rate": 2.3381033249893007e-05, |
| "loss": 0.9358, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.510231923601637, |
| "grad_norm": 0.46003166070829415, |
| "learning_rate": 2.3305762716637696e-05, |
| "loss": 0.9134, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.5156889495225103, |
| "grad_norm": 0.34405667621405894, |
| "learning_rate": 2.32304440139837e-05, |
| "loss": 0.914, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.5211459754433836, |
| "grad_norm": 0.30837605247167627, |
| "learning_rate": 2.315507823942559e-05, |
| "loss": 0.8906, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.5266030013642564, |
| "grad_norm": 0.35159469224889583, |
| "learning_rate": 2.3079666491143827e-05, |
| "loss": 0.9291, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.5320600272851297, |
| "grad_norm": 0.3797916060475412, |
| "learning_rate": 2.3004209867988783e-05, |
| "loss": 0.9087, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.5375170532060025, |
| "grad_norm": 0.40916286067612617, |
| "learning_rate": 2.2928709469464705e-05, |
| "loss": 0.9158, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.542974079126876, |
| "grad_norm": 0.28077601639148303, |
| "learning_rate": 2.2853166395713715e-05, |
| "loss": 0.908, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.548431105047749, |
| "grad_norm": 0.30535476691189556, |
| "learning_rate": 2.2777581747499767e-05, |
| "loss": 0.9288, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.5538881309686223, |
| "grad_norm": 0.2741959984551279, |
| "learning_rate": 2.2701956626192603e-05, |
| "loss": 0.9123, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.559345156889495, |
| "grad_norm": 0.29160243799401836, |
| "learning_rate": 2.262629213375173e-05, |
| "loss": 0.9153, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.5648021828103684, |
| "grad_norm": 0.31211888825075323, |
| "learning_rate": 2.255058937271032e-05, |
| "loss": 0.9019, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.5702592087312413, |
| "grad_norm": 0.24605091808209184, |
| "learning_rate": 2.2474849446159193e-05, |
| "loss": 0.9041, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.5757162346521145, |
| "grad_norm": 0.296940058046894, |
| "learning_rate": 2.2399073457730723e-05, |
| "loss": 0.8933, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.581173260572988, |
| "grad_norm": 0.39017704428903854, |
| "learning_rate": 2.2323262511582726e-05, |
| "loss": 0.9219, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.586630286493861, |
| "grad_norm": 0.26845683489444067, |
| "learning_rate": 2.2247417712382423e-05, |
| "loss": 0.9072, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.592087312414734, |
| "grad_norm": 0.29710964002091833, |
| "learning_rate": 2.217154016529031e-05, |
| "loss": 0.9254, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.597544338335607, |
| "grad_norm": 0.2773002611218211, |
| "learning_rate": 2.2095630975944068e-05, |
| "loss": 0.9196, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.60300136425648, |
| "grad_norm": 0.27685282385866905, |
| "learning_rate": 2.2019691250442442e-05, |
| "loss": 0.9048, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.6084583901773533, |
| "grad_norm": 0.35014690047193237, |
| "learning_rate": 2.1943722095329138e-05, |
| "loss": 0.9113, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.6139154160982265, |
| "grad_norm": 0.2596786590850847, |
| "learning_rate": 2.1867724617576685e-05, |
| "loss": 0.9161, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.6193724420191, |
| "grad_norm": 0.3426543130719377, |
| "learning_rate": 2.1791699924570313e-05, |
| "loss": 0.8926, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.6248294679399726, |
| "grad_norm": 0.3078282469487072, |
| "learning_rate": 2.1715649124091814e-05, |
| "loss": 0.9183, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.630286493860846, |
| "grad_norm": 0.22901258390983542, |
| "learning_rate": 2.16395733243034e-05, |
| "loss": 0.9344, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.6357435197817187, |
| "grad_norm": 0.371108470895669, |
| "learning_rate": 2.156347363373156e-05, |
| "loss": 0.9192, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.641200545702592, |
| "grad_norm": 0.3675376564769477, |
| "learning_rate": 2.14873511612509e-05, |
| "loss": 0.914, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.6466575716234653, |
| "grad_norm": 0.47791366315200284, |
| "learning_rate": 2.141120701606799e-05, |
| "loss": 0.9078, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.6521145975443385, |
| "grad_norm": 0.4222978650582422, |
| "learning_rate": 2.1335042307705206e-05, |
| "loss": 0.9099, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.6575716234652114, |
| "grad_norm": 0.3556115683063452, |
| "learning_rate": 2.125885814598454e-05, |
| "loss": 0.9064, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.6630286493860846, |
| "grad_norm": 0.356222691019892, |
| "learning_rate": 2.1182655641011468e-05, |
| "loss": 0.9109, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.6684856753069575, |
| "grad_norm": 0.2950967727936582, |
| "learning_rate": 2.1106435903158734e-05, |
| "loss": 0.907, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.6739427012278307, |
| "grad_norm": 0.2589049008249365, |
| "learning_rate": 2.10302000430502e-05, |
| "loss": 0.9167, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.679399727148704, |
| "grad_norm": 0.2679428400644797, |
| "learning_rate": 2.0953949171544646e-05, |
| "loss": 0.9029, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.6848567530695773, |
| "grad_norm": 0.30000226534532, |
| "learning_rate": 2.0877684399719596e-05, |
| "loss": 0.902, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.69031377899045, |
| "grad_norm": 0.31357462517216056, |
| "learning_rate": 2.0801406838855095e-05, |
| "loss": 0.9151, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.6957708049113234, |
| "grad_norm": 0.2692910544239183, |
| "learning_rate": 2.0725117600417572e-05, |
| "loss": 0.9218, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.701227830832196, |
| "grad_norm": 0.30151763927530156, |
| "learning_rate": 2.0648817796043598e-05, |
| "loss": 0.9198, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.7066848567530695, |
| "grad_norm": 0.2758793028048215, |
| "learning_rate": 2.0572508537523705e-05, |
| "loss": 0.8979, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.7121418826739427, |
| "grad_norm": 0.2812105414991479, |
| "learning_rate": 2.0496190936786196e-05, |
| "loss": 0.9131, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.717598908594816, |
| "grad_norm": 0.2963610249601614, |
| "learning_rate": 2.041986610588091e-05, |
| "loss": 0.9377, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.723055934515689, |
| "grad_norm": 0.3097919911404899, |
| "learning_rate": 2.0343535156963057e-05, |
| "loss": 0.9262, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.728512960436562, |
| "grad_norm": 0.34847730033316476, |
| "learning_rate": 2.026719920227699e-05, |
| "loss": 0.8998, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.733969986357435, |
| "grad_norm": 0.30531935495612433, |
| "learning_rate": 2.0190859354139994e-05, |
| "loss": 0.9269, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.739427012278308, |
| "grad_norm": 0.2669945648424582, |
| "learning_rate": 2.0114516724926103e-05, |
| "loss": 0.9455, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.7448840381991815, |
| "grad_norm": 0.2785334692894501, |
| "learning_rate": 2.0038172427049862e-05, |
| "loss": 0.912, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.7503410641200547, |
| "grad_norm": 0.3445461005907961, |
| "learning_rate": 1.9961827572950138e-05, |
| "loss": 0.9163, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.7557980900409276, |
| "grad_norm": 0.39296279811877044, |
| "learning_rate": 1.98854832750739e-05, |
| "loss": 0.9369, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.761255115961801, |
| "grad_norm": 0.39702351389810686, |
| "learning_rate": 1.9809140645860013e-05, |
| "loss": 0.891, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.7667121418826737, |
| "grad_norm": 0.2512865215587987, |
| "learning_rate": 1.9732800797723018e-05, |
| "loss": 0.9115, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.772169167803547, |
| "grad_norm": 0.2820633130771331, |
| "learning_rate": 1.965646484303695e-05, |
| "loss": 0.9212, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.77762619372442, |
| "grad_norm": 0.32145777353057775, |
| "learning_rate": 1.9580133894119098e-05, |
| "loss": 0.9207, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.7830832196452935, |
| "grad_norm": 0.33762112618327617, |
| "learning_rate": 1.9503809063213807e-05, |
| "loss": 0.8845, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.7885402455661663, |
| "grad_norm": 0.24634508212661455, |
| "learning_rate": 1.9427491462476295e-05, |
| "loss": 0.9156, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.7939972714870396, |
| "grad_norm": 0.3457860742517539, |
| "learning_rate": 1.9351182203956405e-05, |
| "loss": 0.9106, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.799454297407913, |
| "grad_norm": 0.3810319883859794, |
| "learning_rate": 1.927488239958243e-05, |
| "loss": 0.8924, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.8049113233287857, |
| "grad_norm": 0.37285981835585597, |
| "learning_rate": 1.919859316114491e-05, |
| "loss": 0.906, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.810368349249659, |
| "grad_norm": 0.24108156149639062, |
| "learning_rate": 1.9122315600280418e-05, |
| "loss": 0.9175, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.815825375170532, |
| "grad_norm": 0.3943374958725155, |
| "learning_rate": 1.904605082845536e-05, |
| "loss": 0.9078, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.821282401091405, |
| "grad_norm": 0.3145717053046707, |
| "learning_rate": 1.89697999569498e-05, |
| "loss": 0.9135, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.8267394270122783, |
| "grad_norm": 0.22533549622277005, |
| "learning_rate": 1.8893564096841273e-05, |
| "loss": 0.909, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.8321964529331516, |
| "grad_norm": 0.23525731404627342, |
| "learning_rate": 1.881734435898854e-05, |
| "loss": 0.9299, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.8376534788540244, |
| "grad_norm": 0.2512060708918993, |
| "learning_rate": 1.8741141854015468e-05, |
| "loss": 0.8893, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.8431105047748977, |
| "grad_norm": 0.19994216173059465, |
| "learning_rate": 1.8664957692294808e-05, |
| "loss": 0.9221, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.848567530695771, |
| "grad_norm": 0.20556264949760783, |
| "learning_rate": 1.858879298393202e-05, |
| "loss": 0.9316, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.854024556616644, |
| "grad_norm": 0.20256542941627978, |
| "learning_rate": 1.8512648838749105e-05, |
| "loss": 0.9093, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.859481582537517, |
| "grad_norm": 0.22020875876934895, |
| "learning_rate": 1.8436526366268444e-05, |
| "loss": 0.9049, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.8649386084583903, |
| "grad_norm": 0.20768534379511697, |
| "learning_rate": 1.8360426675696606e-05, |
| "loss": 0.9144, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.870395634379263, |
| "grad_norm": 0.28896251352128466, |
| "learning_rate": 1.828435087590819e-05, |
| "loss": 0.9145, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.8758526603001364, |
| "grad_norm": 0.3131376106100284, |
| "learning_rate": 1.8208300075429693e-05, |
| "loss": 0.9308, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.8813096862210097, |
| "grad_norm": 0.24876481284966392, |
| "learning_rate": 1.8132275382423325e-05, |
| "loss": 0.9115, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.8867667121418825, |
| "grad_norm": 0.2530867014542135, |
| "learning_rate": 1.8056277904670865e-05, |
| "loss": 0.8851, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.892223738062756, |
| "grad_norm": 0.2592890449900578, |
| "learning_rate": 1.798030874955756e-05, |
| "loss": 0.9058, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.897680763983629, |
| "grad_norm": 0.22039748569474332, |
| "learning_rate": 1.7904369024055942e-05, |
| "loss": 0.9176, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.903137789904502, |
| "grad_norm": 0.2209833356939442, |
| "learning_rate": 1.7828459834709694e-05, |
| "loss": 0.917, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.908594815825375, |
| "grad_norm": 0.23766546854501655, |
| "learning_rate": 1.7752582287617583e-05, |
| "loss": 0.8989, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.9140518417462484, |
| "grad_norm": 0.2376537458371181, |
| "learning_rate": 1.767673748841728e-05, |
| "loss": 0.8946, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.9195088676671213, |
| "grad_norm": 0.262071528071461, |
| "learning_rate": 1.7600926542269277e-05, |
| "loss": 0.9231, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.9249658935879945, |
| "grad_norm": 0.29376545282596106, |
| "learning_rate": 1.7525150553840806e-05, |
| "loss": 0.8938, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.930422919508868, |
| "grad_norm": 0.3134884408737219, |
| "learning_rate": 1.7449410627289687e-05, |
| "loss": 0.9168, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.9358799454297406, |
| "grad_norm": 0.2712354478643755, |
| "learning_rate": 1.7373707866248278e-05, |
| "loss": 0.933, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.941336971350614, |
| "grad_norm": 0.24553201691764942, |
| "learning_rate": 1.7298043373807404e-05, |
| "loss": 0.9159, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.946793997271487, |
| "grad_norm": 0.3030078675065205, |
| "learning_rate": 1.7222418252500243e-05, |
| "loss": 0.9062, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.9522510231923604, |
| "grad_norm": 0.23890406347684276, |
| "learning_rate": 1.7146833604286295e-05, |
| "loss": 0.8945, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.9577080491132333, |
| "grad_norm": 0.2670091183635565, |
| "learning_rate": 1.7071290530535298e-05, |
| "loss": 0.909, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.9631650750341065, |
| "grad_norm": 0.23126297362235826, |
| "learning_rate": 1.6995790132011223e-05, |
| "loss": 0.9143, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.9686221009549794, |
| "grad_norm": 0.31050871509494943, |
| "learning_rate": 1.6920333508856176e-05, |
| "loss": 0.8994, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.9740791268758526, |
| "grad_norm": 0.22661046923902323, |
| "learning_rate": 1.6844921760574417e-05, |
| "loss": 0.9294, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.979536152796726, |
| "grad_norm": 0.3118001086032258, |
| "learning_rate": 1.676955598601631e-05, |
| "loss": 0.9041, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.984993178717599, |
| "grad_norm": 0.23665950368215852, |
| "learning_rate": 1.6694237283362314e-05, |
| "loss": 0.9038, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.990450204638472, |
| "grad_norm": 0.24492951232429386, |
| "learning_rate": 1.6618966750106996e-05, |
| "loss": 0.916, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.9959072305593453, |
| "grad_norm": 0.25300337782976023, |
| "learning_rate": 1.6543745483043037e-05, |
| "loss": 0.9083, |
| "step": 549 |
| }, |
| { |
| "epoch": 3.001364256480218, |
| "grad_norm": 0.584103118759897, |
| "learning_rate": 1.6468574578245225e-05, |
| "loss": 1.6082, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.0068212824010914, |
| "grad_norm": 0.671101312579536, |
| "learning_rate": 1.639345513105452e-05, |
| "loss": 0.8859, |
| "step": 551 |
| }, |
| { |
| "epoch": 3.0122783083219646, |
| "grad_norm": 0.3787017346934449, |
| "learning_rate": 1.6318388236062072e-05, |
| "loss": 0.8951, |
| "step": 552 |
| }, |
| { |
| "epoch": 3.0177353342428375, |
| "grad_norm": 0.42606514302989157, |
| "learning_rate": 1.624337498709326e-05, |
| "loss": 0.8877, |
| "step": 553 |
| }, |
| { |
| "epoch": 3.0231923601637107, |
| "grad_norm": 0.35542176787821733, |
| "learning_rate": 1.616841647719178e-05, |
| "loss": 0.8895, |
| "step": 554 |
| }, |
| { |
| "epoch": 3.028649386084584, |
| "grad_norm": 0.35418748629561114, |
| "learning_rate": 1.6093513798603713e-05, |
| "loss": 0.8968, |
| "step": 555 |
| }, |
| { |
| "epoch": 3.034106412005457, |
| "grad_norm": 0.4142394476010708, |
| "learning_rate": 1.6018668042761593e-05, |
| "loss": 0.8855, |
| "step": 556 |
| }, |
| { |
| "epoch": 3.03956343792633, |
| "grad_norm": 0.26285840734342447, |
| "learning_rate": 1.594388030026851e-05, |
| "loss": 0.8685, |
| "step": 557 |
| }, |
| { |
| "epoch": 3.0450204638472034, |
| "grad_norm": 0.3399484818274934, |
| "learning_rate": 1.586915166088224e-05, |
| "loss": 0.908, |
| "step": 558 |
| }, |
| { |
| "epoch": 3.050477489768076, |
| "grad_norm": 0.3389204352265327, |
| "learning_rate": 1.5794483213499326e-05, |
| "loss": 0.8911, |
| "step": 559 |
| }, |
| { |
| "epoch": 3.0559345156889495, |
| "grad_norm": 0.33188066961256374, |
| "learning_rate": 1.5719876046139243e-05, |
| "loss": 0.9147, |
| "step": 560 |
| }, |
| { |
| "epoch": 3.0613915416098227, |
| "grad_norm": 0.3377610682449399, |
| "learning_rate": 1.564533124592852e-05, |
| "loss": 0.8949, |
| "step": 561 |
| }, |
| { |
| "epoch": 3.0668485675306956, |
| "grad_norm": 0.2957318174966501, |
| "learning_rate": 1.557084989908495e-05, |
| "loss": 0.8986, |
| "step": 562 |
| }, |
| { |
| "epoch": 3.072305593451569, |
| "grad_norm": 0.430673617485615, |
| "learning_rate": 1.5496433090901685e-05, |
| "loss": 0.8949, |
| "step": 563 |
| }, |
| { |
| "epoch": 3.077762619372442, |
| "grad_norm": 0.2554433088355423, |
| "learning_rate": 1.5422081905731484e-05, |
| "loss": 0.8882, |
| "step": 564 |
| }, |
| { |
| "epoch": 3.083219645293315, |
| "grad_norm": 0.32618011312611783, |
| "learning_rate": 1.534779742697088e-05, |
| "loss": 0.9174, |
| "step": 565 |
| }, |
| { |
| "epoch": 3.088676671214188, |
| "grad_norm": 0.31352014509777587, |
| "learning_rate": 1.5273580737044416e-05, |
| "loss": 0.8918, |
| "step": 566 |
| }, |
| { |
| "epoch": 3.0941336971350615, |
| "grad_norm": 0.2557790089027306, |
| "learning_rate": 1.5199432917388835e-05, |
| "loss": 0.9007, |
| "step": 567 |
| }, |
| { |
| "epoch": 3.0995907230559343, |
| "grad_norm": 0.27540644472124487, |
| "learning_rate": 1.5125355048437389e-05, |
| "loss": 0.884, |
| "step": 568 |
| }, |
| { |
| "epoch": 3.1050477489768076, |
| "grad_norm": 0.34235171994492863, |
| "learning_rate": 1.5051348209604016e-05, |
| "loss": 0.8686, |
| "step": 569 |
| }, |
| { |
| "epoch": 3.110504774897681, |
| "grad_norm": 0.25008950788915946, |
| "learning_rate": 1.4977413479267675e-05, |
| "loss": 0.9026, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.1159618008185537, |
| "grad_norm": 0.3964129492366135, |
| "learning_rate": 1.4903551934756592e-05, |
| "loss": 0.8992, |
| "step": 571 |
| }, |
| { |
| "epoch": 3.121418826739427, |
| "grad_norm": 0.40686134783523276, |
| "learning_rate": 1.4829764652332585e-05, |
| "loss": 0.9209, |
| "step": 572 |
| }, |
| { |
| "epoch": 3.1268758526603, |
| "grad_norm": 0.2670447009105334, |
| "learning_rate": 1.4756052707175361e-05, |
| "loss": 0.9153, |
| "step": 573 |
| }, |
| { |
| "epoch": 3.132332878581173, |
| "grad_norm": 0.4402126815582449, |
| "learning_rate": 1.4682417173366892e-05, |
| "loss": 0.907, |
| "step": 574 |
| }, |
| { |
| "epoch": 3.1377899045020463, |
| "grad_norm": 0.23815050001596294, |
| "learning_rate": 1.4608859123875703e-05, |
| "loss": 0.9038, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.1432469304229196, |
| "grad_norm": 0.3030117101013267, |
| "learning_rate": 1.4535379630541284e-05, |
| "loss": 0.9065, |
| "step": 576 |
| }, |
| { |
| "epoch": 3.148703956343793, |
| "grad_norm": 0.37381110214711166, |
| "learning_rate": 1.4461979764058454e-05, |
| "loss": 0.9096, |
| "step": 577 |
| }, |
| { |
| "epoch": 3.1541609822646657, |
| "grad_norm": 0.24485627188888226, |
| "learning_rate": 1.4388660593961756e-05, |
| "loss": 0.8858, |
| "step": 578 |
| }, |
| { |
| "epoch": 3.159618008185539, |
| "grad_norm": 0.25282112926237954, |
| "learning_rate": 1.4315423188609878e-05, |
| "loss": 0.8905, |
| "step": 579 |
| }, |
| { |
| "epoch": 3.1650750341064118, |
| "grad_norm": 0.24907017187679334, |
| "learning_rate": 1.4242268615170106e-05, |
| "loss": 0.9068, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.170532060027285, |
| "grad_norm": 0.2129960819490356, |
| "learning_rate": 1.4169197939602723e-05, |
| "loss": 0.8912, |
| "step": 581 |
| }, |
| { |
| "epoch": 3.1759890859481583, |
| "grad_norm": 0.24279078285844446, |
| "learning_rate": 1.409621222664554e-05, |
| "loss": 0.8838, |
| "step": 582 |
| }, |
| { |
| "epoch": 3.1814461118690316, |
| "grad_norm": 0.23381673434042413, |
| "learning_rate": 1.4023312539798322e-05, |
| "loss": 0.8896, |
| "step": 583 |
| }, |
| { |
| "epoch": 3.1869031377899044, |
| "grad_norm": 0.22227554143448716, |
| "learning_rate": 1.3950499941307332e-05, |
| "loss": 0.8826, |
| "step": 584 |
| }, |
| { |
| "epoch": 3.1923601637107777, |
| "grad_norm": 0.22806009027283225, |
| "learning_rate": 1.3877775492149828e-05, |
| "loss": 0.899, |
| "step": 585 |
| }, |
| { |
| "epoch": 3.197817189631651, |
| "grad_norm": 0.25047196400087585, |
| "learning_rate": 1.3805140252018618e-05, |
| "loss": 0.8954, |
| "step": 586 |
| }, |
| { |
| "epoch": 3.203274215552524, |
| "grad_norm": 0.2118062936691214, |
| "learning_rate": 1.373259527930663e-05, |
| "loss": 0.8966, |
| "step": 587 |
| }, |
| { |
| "epoch": 3.208731241473397, |
| "grad_norm": 0.2730005282503477, |
| "learning_rate": 1.366014163109146e-05, |
| "loss": 0.8795, |
| "step": 588 |
| }, |
| { |
| "epoch": 3.2141882673942703, |
| "grad_norm": 0.29613230912460564, |
| "learning_rate": 1.3587780363119986e-05, |
| "loss": 0.8796, |
| "step": 589 |
| }, |
| { |
| "epoch": 3.219645293315143, |
| "grad_norm": 0.23990776796738883, |
| "learning_rate": 1.3515512529792978e-05, |
| "loss": 0.9071, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.2251023192360164, |
| "grad_norm": 0.2538388076227864, |
| "learning_rate": 1.3443339184149739e-05, |
| "loss": 0.9036, |
| "step": 591 |
| }, |
| { |
| "epoch": 3.2305593451568897, |
| "grad_norm": 0.24743496996389577, |
| "learning_rate": 1.337126137785276e-05, |
| "loss": 0.8861, |
| "step": 592 |
| }, |
| { |
| "epoch": 3.2360163710777625, |
| "grad_norm": 0.20121450134982874, |
| "learning_rate": 1.329928016117241e-05, |
| "loss": 0.8939, |
| "step": 593 |
| }, |
| { |
| "epoch": 3.241473396998636, |
| "grad_norm": 0.2869931420078408, |
| "learning_rate": 1.3227396582971594e-05, |
| "loss": 0.8906, |
| "step": 594 |
| }, |
| { |
| "epoch": 3.246930422919509, |
| "grad_norm": 0.1908364191371087, |
| "learning_rate": 1.3155611690690515e-05, |
| "loss": 0.886, |
| "step": 595 |
| }, |
| { |
| "epoch": 3.252387448840382, |
| "grad_norm": 0.3472699144561854, |
| "learning_rate": 1.3083926530331372e-05, |
| "loss": 0.9158, |
| "step": 596 |
| }, |
| { |
| "epoch": 3.257844474761255, |
| "grad_norm": 0.22549962507966057, |
| "learning_rate": 1.3012342146443144e-05, |
| "loss": 0.8764, |
| "step": 597 |
| }, |
| { |
| "epoch": 3.2633015006821284, |
| "grad_norm": 0.26789532061692434, |
| "learning_rate": 1.2940859582106357e-05, |
| "loss": 0.8841, |
| "step": 598 |
| }, |
| { |
| "epoch": 3.2687585266030013, |
| "grad_norm": 0.2522357843484046, |
| "learning_rate": 1.2869479878917904e-05, |
| "loss": 0.8819, |
| "step": 599 |
| }, |
| { |
| "epoch": 3.2742155525238745, |
| "grad_norm": 0.21493911054710754, |
| "learning_rate": 1.2798204076975835e-05, |
| "loss": 0.92, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.279672578444748, |
| "grad_norm": 0.2945646091669156, |
| "learning_rate": 1.2727033214864233e-05, |
| "loss": 0.8838, |
| "step": 601 |
| }, |
| { |
| "epoch": 3.2851296043656206, |
| "grad_norm": 0.2829300287180026, |
| "learning_rate": 1.265596832963806e-05, |
| "loss": 0.8755, |
| "step": 602 |
| }, |
| { |
| "epoch": 3.290586630286494, |
| "grad_norm": 0.2536303900570064, |
| "learning_rate": 1.2585010456808046e-05, |
| "loss": 0.8904, |
| "step": 603 |
| }, |
| { |
| "epoch": 3.296043656207367, |
| "grad_norm": 0.3585519781803995, |
| "learning_rate": 1.2514160630325617e-05, |
| "loss": 0.8922, |
| "step": 604 |
| }, |
| { |
| "epoch": 3.30150068212824, |
| "grad_norm": 0.2792945795336993, |
| "learning_rate": 1.2443419882567821e-05, |
| "loss": 0.8771, |
| "step": 605 |
| }, |
| { |
| "epoch": 3.3069577080491133, |
| "grad_norm": 0.35260384633142106, |
| "learning_rate": 1.2372789244322272e-05, |
| "loss": 0.901, |
| "step": 606 |
| }, |
| { |
| "epoch": 3.3124147339699865, |
| "grad_norm": 0.31364366488160306, |
| "learning_rate": 1.2302269744772155e-05, |
| "loss": 0.8818, |
| "step": 607 |
| }, |
| { |
| "epoch": 3.3178717598908594, |
| "grad_norm": 0.23743622737062894, |
| "learning_rate": 1.22318624114812e-05, |
| "loss": 0.9072, |
| "step": 608 |
| }, |
| { |
| "epoch": 3.3233287858117326, |
| "grad_norm": 0.3642214485244677, |
| "learning_rate": 1.216156827037873e-05, |
| "loss": 0.8833, |
| "step": 609 |
| }, |
| { |
| "epoch": 3.328785811732606, |
| "grad_norm": 0.2925427624739931, |
| "learning_rate": 1.2091388345744703e-05, |
| "loss": 0.911, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.3342428376534787, |
| "grad_norm": 0.2377203948239386, |
| "learning_rate": 1.2021323660194798e-05, |
| "loss": 0.8965, |
| "step": 611 |
| }, |
| { |
| "epoch": 3.339699863574352, |
| "grad_norm": 0.2706687731608815, |
| "learning_rate": 1.1951375234665501e-05, |
| "loss": 0.9036, |
| "step": 612 |
| }, |
| { |
| "epoch": 3.3451568894952253, |
| "grad_norm": 0.2679343617436159, |
| "learning_rate": 1.1881544088399237e-05, |
| "loss": 0.8939, |
| "step": 613 |
| }, |
| { |
| "epoch": 3.350613915416098, |
| "grad_norm": 0.22617857543228842, |
| "learning_rate": 1.1811831238929508e-05, |
| "loss": 0.9021, |
| "step": 614 |
| }, |
| { |
| "epoch": 3.3560709413369714, |
| "grad_norm": 0.2904617911241792, |
| "learning_rate": 1.1742237702066074e-05, |
| "loss": 0.8863, |
| "step": 615 |
| }, |
| { |
| "epoch": 3.3615279672578446, |
| "grad_norm": 0.22733511585309843, |
| "learning_rate": 1.1672764491880153e-05, |
| "loss": 0.9143, |
| "step": 616 |
| }, |
| { |
| "epoch": 3.3669849931787175, |
| "grad_norm": 0.256013923198982, |
| "learning_rate": 1.1603412620689637e-05, |
| "loss": 0.899, |
| "step": 617 |
| }, |
| { |
| "epoch": 3.3724420190995907, |
| "grad_norm": 0.25205210893149643, |
| "learning_rate": 1.1534183099044363e-05, |
| "loss": 0.8853, |
| "step": 618 |
| }, |
| { |
| "epoch": 3.377899045020464, |
| "grad_norm": 0.23143271683735414, |
| "learning_rate": 1.1465076935711355e-05, |
| "loss": 0.8947, |
| "step": 619 |
| }, |
| { |
| "epoch": 3.383356070941337, |
| "grad_norm": 0.22370756793978866, |
| "learning_rate": 1.1396095137660134e-05, |
| "loss": 0.8785, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.38881309686221, |
| "grad_norm": 0.21290283764682943, |
| "learning_rate": 1.1327238710048075e-05, |
| "loss": 0.9032, |
| "step": 621 |
| }, |
| { |
| "epoch": 3.3942701227830834, |
| "grad_norm": 0.2685069204258351, |
| "learning_rate": 1.1258508656205715e-05, |
| "loss": 0.8941, |
| "step": 622 |
| }, |
| { |
| "epoch": 3.399727148703956, |
| "grad_norm": 0.20912948755324795, |
| "learning_rate": 1.118990597762216e-05, |
| "loss": 0.8913, |
| "step": 623 |
| }, |
| { |
| "epoch": 3.4051841746248295, |
| "grad_norm": 0.24827347077451523, |
| "learning_rate": 1.1121431673930509e-05, |
| "loss": 0.883, |
| "step": 624 |
| }, |
| { |
| "epoch": 3.4106412005457027, |
| "grad_norm": 0.22274674891516377, |
| "learning_rate": 1.1053086742893244e-05, |
| "loss": 0.9017, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.4160982264665756, |
| "grad_norm": 0.23575151807168895, |
| "learning_rate": 1.0984872180387715e-05, |
| "loss": 0.8988, |
| "step": 626 |
| }, |
| { |
| "epoch": 3.421555252387449, |
| "grad_norm": 0.21353314466163129, |
| "learning_rate": 1.0916788980391633e-05, |
| "loss": 0.9098, |
| "step": 627 |
| }, |
| { |
| "epoch": 3.427012278308322, |
| "grad_norm": 0.22040517357317185, |
| "learning_rate": 1.0848838134968589e-05, |
| "loss": 0.884, |
| "step": 628 |
| }, |
| { |
| "epoch": 3.432469304229195, |
| "grad_norm": 0.22910802159215685, |
| "learning_rate": 1.0781020634253579e-05, |
| "loss": 0.8833, |
| "step": 629 |
| }, |
| { |
| "epoch": 3.437926330150068, |
| "grad_norm": 0.21849412085599912, |
| "learning_rate": 1.0713337466438578e-05, |
| "loss": 0.8839, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.4433833560709415, |
| "grad_norm": 0.21965410678288466, |
| "learning_rate": 1.0645789617758181e-05, |
| "loss": 0.9005, |
| "step": 631 |
| }, |
| { |
| "epoch": 3.4488403819918143, |
| "grad_norm": 0.204035562242123, |
| "learning_rate": 1.057837807247518e-05, |
| "loss": 0.892, |
| "step": 632 |
| }, |
| { |
| "epoch": 3.4542974079126876, |
| "grad_norm": 0.16983377384281073, |
| "learning_rate": 1.0511103812866238e-05, |
| "loss": 0.8812, |
| "step": 633 |
| }, |
| { |
| "epoch": 3.459754433833561, |
| "grad_norm": 0.2042102923266645, |
| "learning_rate": 1.0443967819207602e-05, |
| "loss": 0.88, |
| "step": 634 |
| }, |
| { |
| "epoch": 3.4652114597544337, |
| "grad_norm": 0.18518985041839892, |
| "learning_rate": 1.0376971069760774e-05, |
| "loss": 0.9172, |
| "step": 635 |
| }, |
| { |
| "epoch": 3.470668485675307, |
| "grad_norm": 0.19653140995159937, |
| "learning_rate": 1.0310114540758298e-05, |
| "loss": 0.895, |
| "step": 636 |
| }, |
| { |
| "epoch": 3.47612551159618, |
| "grad_norm": 0.22830479434165665, |
| "learning_rate": 1.0243399206389527e-05, |
| "loss": 0.9044, |
| "step": 637 |
| }, |
| { |
| "epoch": 3.481582537517053, |
| "grad_norm": 0.19206764620071587, |
| "learning_rate": 1.0176826038786394e-05, |
| "loss": 0.8818, |
| "step": 638 |
| }, |
| { |
| "epoch": 3.4870395634379263, |
| "grad_norm": 0.21389623128712906, |
| "learning_rate": 1.011039600800928e-05, |
| "loss": 0.8956, |
| "step": 639 |
| }, |
| { |
| "epoch": 3.4924965893587996, |
| "grad_norm": 0.21993143291851755, |
| "learning_rate": 1.004411008203289e-05, |
| "loss": 0.8927, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.4979536152796724, |
| "grad_norm": 0.1894006892821513, |
| "learning_rate": 9.977969226732099e-06, |
| "loss": 0.8771, |
| "step": 641 |
| }, |
| { |
| "epoch": 3.5034106412005457, |
| "grad_norm": 0.19959640202420684, |
| "learning_rate": 9.911974405867917e-06, |
| "loss": 0.8912, |
| "step": 642 |
| }, |
| { |
| "epoch": 3.508867667121419, |
| "grad_norm": 0.14759174219062646, |
| "learning_rate": 9.846126581073457e-06, |
| "loss": 0.8992, |
| "step": 643 |
| }, |
| { |
| "epoch": 3.5143246930422922, |
| "grad_norm": 0.20035668476318763, |
| "learning_rate": 9.780426711839877e-06, |
| "loss": 0.9006, |
| "step": 644 |
| }, |
| { |
| "epoch": 3.519781718963165, |
| "grad_norm": 0.16797091670116737, |
| "learning_rate": 9.714875755502429e-06, |
| "loss": 0.8873, |
| "step": 645 |
| }, |
| { |
| "epoch": 3.5252387448840383, |
| "grad_norm": 0.189909496119316, |
| "learning_rate": 9.649474667226513e-06, |
| "loss": 0.9186, |
| "step": 646 |
| }, |
| { |
| "epoch": 3.530695770804911, |
| "grad_norm": 0.1662855707845877, |
| "learning_rate": 9.58422439999374e-06, |
| "loss": 0.9061, |
| "step": 647 |
| }, |
| { |
| "epoch": 3.5361527967257844, |
| "grad_norm": 0.1877435970889167, |
| "learning_rate": 9.519125904588059e-06, |
| "loss": 0.9124, |
| "step": 648 |
| }, |
| { |
| "epoch": 3.5416098226466577, |
| "grad_norm": 0.18966972578830213, |
| "learning_rate": 9.45418012958191e-06, |
| "loss": 0.9002, |
| "step": 649 |
| }, |
| { |
| "epoch": 3.547066848567531, |
| "grad_norm": 0.18521500133290328, |
| "learning_rate": 9.389388021322381e-06, |
| "loss": 0.8921, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.552523874488404, |
| "grad_norm": 0.20655179032846327, |
| "learning_rate": 9.32475052391742e-06, |
| "loss": 0.8975, |
| "step": 651 |
| }, |
| { |
| "epoch": 3.557980900409277, |
| "grad_norm": 0.1819692294620117, |
| "learning_rate": 9.26026857922212e-06, |
| "loss": 0.9082, |
| "step": 652 |
| }, |
| { |
| "epoch": 3.56343792633015, |
| "grad_norm": 0.18675168504713038, |
| "learning_rate": 9.19594312682493e-06, |
| "loss": 0.9045, |
| "step": 653 |
| }, |
| { |
| "epoch": 3.568894952251023, |
| "grad_norm": 0.16349611233292402, |
| "learning_rate": 9.131775104034009e-06, |
| "loss": 0.8907, |
| "step": 654 |
| }, |
| { |
| "epoch": 3.5743519781718964, |
| "grad_norm": 0.17657868890026518, |
| "learning_rate": 9.067765445863545e-06, |
| "loss": 0.8777, |
| "step": 655 |
| }, |
| { |
| "epoch": 3.5798090040927697, |
| "grad_norm": 0.1520862113066698, |
| "learning_rate": 9.00391508502017e-06, |
| "loss": 0.8761, |
| "step": 656 |
| }, |
| { |
| "epoch": 3.5852660300136425, |
| "grad_norm": 0.16877815138189672, |
| "learning_rate": 8.940224951889304e-06, |
| "loss": 0.869, |
| "step": 657 |
| }, |
| { |
| "epoch": 3.590723055934516, |
| "grad_norm": 0.16925000281087574, |
| "learning_rate": 8.876695974521659e-06, |
| "loss": 0.9011, |
| "step": 658 |
| }, |
| { |
| "epoch": 3.5961800818553886, |
| "grad_norm": 0.16759697258423073, |
| "learning_rate": 8.813329078619679e-06, |
| "loss": 0.9045, |
| "step": 659 |
| }, |
| { |
| "epoch": 3.601637107776262, |
| "grad_norm": 0.1896922083229097, |
| "learning_rate": 8.750125187524068e-06, |
| "loss": 0.86, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.607094133697135, |
| "grad_norm": 0.17884520359215278, |
| "learning_rate": 8.687085222200323e-06, |
| "loss": 0.9095, |
| "step": 661 |
| }, |
| { |
| "epoch": 3.6125511596180084, |
| "grad_norm": 0.176877762158684, |
| "learning_rate": 8.624210101225343e-06, |
| "loss": 0.8985, |
| "step": 662 |
| }, |
| { |
| "epoch": 3.6180081855388813, |
| "grad_norm": 0.2002369650449839, |
| "learning_rate": 8.561500740774008e-06, |
| "loss": 0.8929, |
| "step": 663 |
| }, |
| { |
| "epoch": 3.6234652114597545, |
| "grad_norm": 0.17592875629565122, |
| "learning_rate": 8.498958054605837e-06, |
| "loss": 0.8778, |
| "step": 664 |
| }, |
| { |
| "epoch": 3.6289222373806274, |
| "grad_norm": 0.21757591177018767, |
| "learning_rate": 8.436582954051707e-06, |
| "loss": 0.9046, |
| "step": 665 |
| }, |
| { |
| "epoch": 3.6343792633015006, |
| "grad_norm": 0.16964570321715836, |
| "learning_rate": 8.374376348000523e-06, |
| "loss": 0.8766, |
| "step": 666 |
| }, |
| { |
| "epoch": 3.639836289222374, |
| "grad_norm": 0.20816910485872794, |
| "learning_rate": 8.312339142886003e-06, |
| "loss": 0.8948, |
| "step": 667 |
| }, |
| { |
| "epoch": 3.645293315143247, |
| "grad_norm": 0.21318859663355175, |
| "learning_rate": 8.250472242673486e-06, |
| "loss": 0.9035, |
| "step": 668 |
| }, |
| { |
| "epoch": 3.65075034106412, |
| "grad_norm": 0.17223582052559827, |
| "learning_rate": 8.188776548846717e-06, |
| "loss": 0.8914, |
| "step": 669 |
| }, |
| { |
| "epoch": 3.6562073669849933, |
| "grad_norm": 0.20492759686497783, |
| "learning_rate": 8.127252960394744e-06, |
| "loss": 0.8871, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.661664392905866, |
| "grad_norm": 0.17660213480793235, |
| "learning_rate": 8.065902373798808e-06, |
| "loss": 0.8658, |
| "step": 671 |
| }, |
| { |
| "epoch": 3.6671214188267394, |
| "grad_norm": 0.18013543727863568, |
| "learning_rate": 8.004725683019276e-06, |
| "loss": 0.9016, |
| "step": 672 |
| }, |
| { |
| "epoch": 3.6725784447476126, |
| "grad_norm": 0.1844280666804985, |
| "learning_rate": 7.943723779482628e-06, |
| "loss": 0.9034, |
| "step": 673 |
| }, |
| { |
| "epoch": 3.678035470668486, |
| "grad_norm": 0.14933482527632957, |
| "learning_rate": 7.882897552068447e-06, |
| "loss": 0.9044, |
| "step": 674 |
| }, |
| { |
| "epoch": 3.6834924965893587, |
| "grad_norm": 0.180577120421336, |
| "learning_rate": 7.822247887096499e-06, |
| "loss": 0.8987, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.688949522510232, |
| "grad_norm": 0.18976867015358279, |
| "learning_rate": 7.761775668313775e-06, |
| "loss": 0.9055, |
| "step": 676 |
| }, |
| { |
| "epoch": 3.694406548431105, |
| "grad_norm": 0.14380655448071636, |
| "learning_rate": 7.70148177688166e-06, |
| "loss": 0.8819, |
| "step": 677 |
| }, |
| { |
| "epoch": 3.699863574351978, |
| "grad_norm": 0.1605511243289739, |
| "learning_rate": 7.641367091363056e-06, |
| "loss": 0.8765, |
| "step": 678 |
| }, |
| { |
| "epoch": 3.7053206002728514, |
| "grad_norm": 0.16966229691015783, |
| "learning_rate": 7.581432487709595e-06, |
| "loss": 0.8956, |
| "step": 679 |
| }, |
| { |
| "epoch": 3.7107776261937246, |
| "grad_norm": 0.15825612639259118, |
| "learning_rate": 7.521678839248867e-06, |
| "loss": 0.8757, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.7162346521145975, |
| "grad_norm": 0.15905765650755102, |
| "learning_rate": 7.462107016671727e-06, |
| "loss": 0.9021, |
| "step": 681 |
| }, |
| { |
| "epoch": 3.7216916780354707, |
| "grad_norm": 0.1678589543544254, |
| "learning_rate": 7.402717888019561e-06, |
| "loss": 0.9037, |
| "step": 682 |
| }, |
| { |
| "epoch": 3.7271487039563436, |
| "grad_norm": 0.16250907925377683, |
| "learning_rate": 7.343512318671668e-06, |
| "loss": 0.8996, |
| "step": 683 |
| }, |
| { |
| "epoch": 3.732605729877217, |
| "grad_norm": 0.1796362073897607, |
| "learning_rate": 7.284491171332637e-06, |
| "loss": 0.9044, |
| "step": 684 |
| }, |
| { |
| "epoch": 3.73806275579809, |
| "grad_norm": 0.15668011051829173, |
| "learning_rate": 7.225655306019783e-06, |
| "loss": 0.888, |
| "step": 685 |
| }, |
| { |
| "epoch": 3.7435197817189634, |
| "grad_norm": 0.1668930240876366, |
| "learning_rate": 7.167005580050608e-06, |
| "loss": 0.9017, |
| "step": 686 |
| }, |
| { |
| "epoch": 3.748976807639836, |
| "grad_norm": 0.18870659107182658, |
| "learning_rate": 7.108542848030333e-06, |
| "loss": 0.8767, |
| "step": 687 |
| }, |
| { |
| "epoch": 3.7544338335607095, |
| "grad_norm": 0.15696986217820777, |
| "learning_rate": 7.050267961839407e-06, |
| "loss": 0.8909, |
| "step": 688 |
| }, |
| { |
| "epoch": 3.7598908594815823, |
| "grad_norm": 0.18431028719776638, |
| "learning_rate": 6.992181770621109e-06, |
| "loss": 0.8868, |
| "step": 689 |
| }, |
| { |
| "epoch": 3.7653478854024556, |
| "grad_norm": 0.16154837397895874, |
| "learning_rate": 6.934285120769206e-06, |
| "loss": 0.8994, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.770804911323329, |
| "grad_norm": 0.1608522865427035, |
| "learning_rate": 6.87657885591557e-06, |
| "loss": 0.9054, |
| "step": 691 |
| }, |
| { |
| "epoch": 3.776261937244202, |
| "grad_norm": 0.17546410153871858, |
| "learning_rate": 6.819063816917904e-06, |
| "loss": 0.8771, |
| "step": 692 |
| }, |
| { |
| "epoch": 3.781718963165075, |
| "grad_norm": 0.17779343503619688, |
| "learning_rate": 6.761740841847517e-06, |
| "loss": 0.8828, |
| "step": 693 |
| }, |
| { |
| "epoch": 3.787175989085948, |
| "grad_norm": 0.1620894791729856, |
| "learning_rate": 6.704610765977073e-06, |
| "loss": 0.8896, |
| "step": 694 |
| }, |
| { |
| "epoch": 3.792633015006821, |
| "grad_norm": 0.16551990476440234, |
| "learning_rate": 6.647674421768435e-06, |
| "loss": 0.8885, |
| "step": 695 |
| }, |
| { |
| "epoch": 3.7980900409276943, |
| "grad_norm": 0.17247511398164073, |
| "learning_rate": 6.590932638860543e-06, |
| "loss": 0.9229, |
| "step": 696 |
| }, |
| { |
| "epoch": 3.8035470668485676, |
| "grad_norm": 0.17229017961388754, |
| "learning_rate": 6.5343862440573095e-06, |
| "loss": 0.8809, |
| "step": 697 |
| }, |
| { |
| "epoch": 3.809004092769441, |
| "grad_norm": 0.15732362181652573, |
| "learning_rate": 6.478036061315587e-06, |
| "loss": 0.903, |
| "step": 698 |
| }, |
| { |
| "epoch": 3.8144611186903137, |
| "grad_norm": 0.14793725507686076, |
| "learning_rate": 6.421882911733146e-06, |
| "loss": 0.9084, |
| "step": 699 |
| }, |
| { |
| "epoch": 3.819918144611187, |
| "grad_norm": 0.18160474710129887, |
| "learning_rate": 6.365927613536737e-06, |
| "loss": 0.8833, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.8253751705320598, |
| "grad_norm": 0.16205271433369595, |
| "learning_rate": 6.310170982070132e-06, |
| "loss": 0.903, |
| "step": 701 |
| }, |
| { |
| "epoch": 3.830832196452933, |
| "grad_norm": 0.1755196814184644, |
| "learning_rate": 6.254613829782274e-06, |
| "loss": 0.8866, |
| "step": 702 |
| }, |
| { |
| "epoch": 3.8362892223738063, |
| "grad_norm": 0.16947891319556294, |
| "learning_rate": 6.199256966215423e-06, |
| "loss": 0.9072, |
| "step": 703 |
| }, |
| { |
| "epoch": 3.8417462482946796, |
| "grad_norm": 0.1598029992685231, |
| "learning_rate": 6.1441011979933615e-06, |
| "loss": 0.8965, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.8472032742155524, |
| "grad_norm": 0.17633255200544773, |
| "learning_rate": 6.089147328809637e-06, |
| "loss": 0.9213, |
| "step": 705 |
| }, |
| { |
| "epoch": 3.8526603001364257, |
| "grad_norm": 0.14858434315925467, |
| "learning_rate": 6.034396159415874e-06, |
| "loss": 0.9057, |
| "step": 706 |
| }, |
| { |
| "epoch": 3.8581173260572985, |
| "grad_norm": 0.1359593564440916, |
| "learning_rate": 5.979848487610078e-06, |
| "loss": 0.9002, |
| "step": 707 |
| }, |
| { |
| "epoch": 3.863574351978172, |
| "grad_norm": 0.1546596886497959, |
| "learning_rate": 5.92550510822502e-06, |
| "loss": 0.881, |
| "step": 708 |
| }, |
| { |
| "epoch": 3.869031377899045, |
| "grad_norm": 0.1553240834204749, |
| "learning_rate": 5.871366813116661e-06, |
| "loss": 0.9015, |
| "step": 709 |
| }, |
| { |
| "epoch": 3.8744884038199183, |
| "grad_norm": 0.14118959880699977, |
| "learning_rate": 5.817434391152605e-06, |
| "loss": 0.8907, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.879945429740791, |
| "grad_norm": 0.14059851937404533, |
| "learning_rate": 5.763708628200609e-06, |
| "loss": 0.8891, |
| "step": 711 |
| }, |
| { |
| "epoch": 3.8854024556616644, |
| "grad_norm": 0.15427945771110663, |
| "learning_rate": 5.710190307117138e-06, |
| "loss": 0.8951, |
| "step": 712 |
| }, |
| { |
| "epoch": 3.8908594815825372, |
| "grad_norm": 0.1445538887040146, |
| "learning_rate": 5.656880207735938e-06, |
| "loss": 0.8877, |
| "step": 713 |
| }, |
| { |
| "epoch": 3.8963165075034105, |
| "grad_norm": 0.15649585838748734, |
| "learning_rate": 5.603779106856699e-06, |
| "loss": 0.9074, |
| "step": 714 |
| }, |
| { |
| "epoch": 3.901773533424284, |
| "grad_norm": 0.13648774675224182, |
| "learning_rate": 5.550887778233713e-06, |
| "loss": 0.8941, |
| "step": 715 |
| }, |
| { |
| "epoch": 3.907230559345157, |
| "grad_norm": 0.15565409065858304, |
| "learning_rate": 5.498206992564612e-06, |
| "loss": 0.9173, |
| "step": 716 |
| }, |
| { |
| "epoch": 3.91268758526603, |
| "grad_norm": 0.13922969052192785, |
| "learning_rate": 5.4457375174791325e-06, |
| "loss": 0.8893, |
| "step": 717 |
| }, |
| { |
| "epoch": 3.918144611186903, |
| "grad_norm": 0.15294676839534935, |
| "learning_rate": 5.3934801175279276e-06, |
| "loss": 0.9154, |
| "step": 718 |
| }, |
| { |
| "epoch": 3.923601637107776, |
| "grad_norm": 0.15092879808147422, |
| "learning_rate": 5.341435554171448e-06, |
| "loss": 0.8827, |
| "step": 719 |
| }, |
| { |
| "epoch": 3.9290586630286493, |
| "grad_norm": 0.14825666022997366, |
| "learning_rate": 5.289604585768813e-06, |
| "loss": 0.8848, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.9345156889495225, |
| "grad_norm": 0.1606715610763504, |
| "learning_rate": 5.237987967566787e-06, |
| "loss": 0.8772, |
| "step": 721 |
| }, |
| { |
| "epoch": 3.939972714870396, |
| "grad_norm": 0.16522816411905664, |
| "learning_rate": 5.1865864516887535e-06, |
| "loss": 0.8976, |
| "step": 722 |
| }, |
| { |
| "epoch": 3.9454297407912686, |
| "grad_norm": 0.15958019587002623, |
| "learning_rate": 5.1354007871237765e-06, |
| "loss": 0.906, |
| "step": 723 |
| }, |
| { |
| "epoch": 3.950886766712142, |
| "grad_norm": 0.150449287740693, |
| "learning_rate": 5.084431719715668e-06, |
| "loss": 0.8925, |
| "step": 724 |
| }, |
| { |
| "epoch": 3.956343792633015, |
| "grad_norm": 0.1654448721490872, |
| "learning_rate": 5.033679992152143e-06, |
| "loss": 0.8949, |
| "step": 725 |
| }, |
| { |
| "epoch": 3.961800818553888, |
| "grad_norm": 0.15862344300369557, |
| "learning_rate": 4.983146343953964e-06, |
| "loss": 0.8802, |
| "step": 726 |
| }, |
| { |
| "epoch": 3.9672578444747613, |
| "grad_norm": 0.13976420034767134, |
| "learning_rate": 4.932831511464206e-06, |
| "loss": 0.887, |
| "step": 727 |
| }, |
| { |
| "epoch": 3.9727148703956345, |
| "grad_norm": 0.18682370943191948, |
| "learning_rate": 4.88273622783749e-06, |
| "loss": 0.8953, |
| "step": 728 |
| }, |
| { |
| "epoch": 3.9781718963165074, |
| "grad_norm": 0.142893917159586, |
| "learning_rate": 4.83286122302932e-06, |
| "loss": 0.8823, |
| "step": 729 |
| }, |
| { |
| "epoch": 3.9836289222373806, |
| "grad_norm": 0.1501981132875881, |
| "learning_rate": 4.783207223785431e-06, |
| "loss": 0.8964, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.989085948158254, |
| "grad_norm": 0.15657458729040308, |
| "learning_rate": 4.733774953631238e-06, |
| "loss": 0.8979, |
| "step": 731 |
| }, |
| { |
| "epoch": 3.9945429740791267, |
| "grad_norm": 0.13982230103959686, |
| "learning_rate": 4.68456513286124e-06, |
| "loss": 0.8923, |
| "step": 732 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.27858828514063777, |
| "learning_rate": 4.6355784785285615e-06, |
| "loss": 1.5566, |
| "step": 733 |
| }, |
| { |
| "epoch": 4.005457025920873, |
| "grad_norm": 0.17089905607735426, |
| "learning_rate": 4.586815704434488e-06, |
| "loss": 0.887, |
| "step": 734 |
| }, |
| { |
| "epoch": 4.0109140518417465, |
| "grad_norm": 0.14705477825085042, |
| "learning_rate": 4.538277521118071e-06, |
| "loss": 0.8841, |
| "step": 735 |
| }, |
| { |
| "epoch": 4.01637107776262, |
| "grad_norm": 0.1636792606316968, |
| "learning_rate": 4.489964635845769e-06, |
| "loss": 0.8899, |
| "step": 736 |
| }, |
| { |
| "epoch": 4.021828103683492, |
| "grad_norm": 0.15198479944975976, |
| "learning_rate": 4.44187775260116e-06, |
| "loss": 0.8881, |
| "step": 737 |
| }, |
| { |
| "epoch": 4.0272851296043655, |
| "grad_norm": 0.13291652839803894, |
| "learning_rate": 4.3940175720746494e-06, |
| "loss": 0.8696, |
| "step": 738 |
| }, |
| { |
| "epoch": 4.032742155525239, |
| "grad_norm": 0.1490103061507369, |
| "learning_rate": 4.346384791653298e-06, |
| "loss": 0.8984, |
| "step": 739 |
| }, |
| { |
| "epoch": 4.038199181446112, |
| "grad_norm": 0.17175563522601708, |
| "learning_rate": 4.2989801054106305e-06, |
| "loss": 0.8665, |
| "step": 740 |
| }, |
| { |
| "epoch": 4.043656207366985, |
| "grad_norm": 0.1499319026668514, |
| "learning_rate": 4.251804204096535e-06, |
| "loss": 0.8779, |
| "step": 741 |
| }, |
| { |
| "epoch": 4.0491132332878585, |
| "grad_norm": 0.16227165418614628, |
| "learning_rate": 4.204857775127198e-06, |
| "loss": 0.8755, |
| "step": 742 |
| }, |
| { |
| "epoch": 4.054570259208731, |
| "grad_norm": 0.1581981145043867, |
| "learning_rate": 4.1581415025750795e-06, |
| "loss": 0.8895, |
| "step": 743 |
| }, |
| { |
| "epoch": 4.060027285129604, |
| "grad_norm": 0.15513935379525345, |
| "learning_rate": 4.111656067158971e-06, |
| "loss": 0.8974, |
| "step": 744 |
| }, |
| { |
| "epoch": 4.0654843110504775, |
| "grad_norm": 0.14535697871945671, |
| "learning_rate": 4.065402146234034e-06, |
| "loss": 0.8485, |
| "step": 745 |
| }, |
| { |
| "epoch": 4.070941336971351, |
| "grad_norm": 0.1297532212724062, |
| "learning_rate": 4.019380413781968e-06, |
| "loss": 0.885, |
| "step": 746 |
| }, |
| { |
| "epoch": 4.076398362892224, |
| "grad_norm": 0.1488778393601588, |
| "learning_rate": 3.973591540401165e-06, |
| "loss": 0.9015, |
| "step": 747 |
| }, |
| { |
| "epoch": 4.081855388813097, |
| "grad_norm": 0.13978030494695767, |
| "learning_rate": 3.928036193296958e-06, |
| "loss": 0.8887, |
| "step": 748 |
| }, |
| { |
| "epoch": 4.08731241473397, |
| "grad_norm": 0.14411923483228978, |
| "learning_rate": 3.882715036271874e-06, |
| "loss": 0.8734, |
| "step": 749 |
| }, |
| { |
| "epoch": 4.092769440654843, |
| "grad_norm": 0.139081525574305, |
| "learning_rate": 3.837628729715994e-06, |
| "loss": 0.8781, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.098226466575716, |
| "grad_norm": 0.14858634817778646, |
| "learning_rate": 3.7927779305973066e-06, |
| "loss": 0.8708, |
| "step": 751 |
| }, |
| { |
| "epoch": 4.1036834924965895, |
| "grad_norm": 0.1364174899816674, |
| "learning_rate": 3.7481632924521383e-06, |
| "loss": 0.8741, |
| "step": 752 |
| }, |
| { |
| "epoch": 4.109140518417463, |
| "grad_norm": 0.13932957227692389, |
| "learning_rate": 3.7037854653756287e-06, |
| "loss": 0.8921, |
| "step": 753 |
| }, |
| { |
| "epoch": 4.114597544338336, |
| "grad_norm": 0.14304788451278092, |
| "learning_rate": 3.65964509601227e-06, |
| "loss": 0.8765, |
| "step": 754 |
| }, |
| { |
| "epoch": 4.120054570259208, |
| "grad_norm": 0.1629674318472855, |
| "learning_rate": 3.6157428275464713e-06, |
| "loss": 0.8865, |
| "step": 755 |
| }, |
| { |
| "epoch": 4.125511596180082, |
| "grad_norm": 0.1363245824129024, |
| "learning_rate": 3.572079299693201e-06, |
| "loss": 0.9084, |
| "step": 756 |
| }, |
| { |
| "epoch": 4.130968622100955, |
| "grad_norm": 0.1508822391111529, |
| "learning_rate": 3.528655148688649e-06, |
| "loss": 0.8851, |
| "step": 757 |
| }, |
| { |
| "epoch": 4.136425648021828, |
| "grad_norm": 0.14303462818362056, |
| "learning_rate": 3.485471007280965e-06, |
| "loss": 0.8758, |
| "step": 758 |
| }, |
| { |
| "epoch": 4.1418826739427015, |
| "grad_norm": 0.14526482585748274, |
| "learning_rate": 3.4425275047210337e-06, |
| "loss": 0.8888, |
| "step": 759 |
| }, |
| { |
| "epoch": 4.147339699863575, |
| "grad_norm": 0.13337908689426514, |
| "learning_rate": 3.399825266753316e-06, |
| "loss": 0.8996, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.152796725784447, |
| "grad_norm": 0.12506301079113333, |
| "learning_rate": 3.357364915606711e-06, |
| "loss": 0.8817, |
| "step": 761 |
| }, |
| { |
| "epoch": 4.15825375170532, |
| "grad_norm": 0.13808109230175336, |
| "learning_rate": 3.3151470699855226e-06, |
| "loss": 0.8784, |
| "step": 762 |
| }, |
| { |
| "epoch": 4.163710777626194, |
| "grad_norm": 0.1322332471365764, |
| "learning_rate": 3.2731723450604047e-06, |
| "loss": 0.8905, |
| "step": 763 |
| }, |
| { |
| "epoch": 4.169167803547067, |
| "grad_norm": 0.13389925768742733, |
| "learning_rate": 3.23144135245943e-06, |
| "loss": 0.8952, |
| "step": 764 |
| }, |
| { |
| "epoch": 4.17462482946794, |
| "grad_norm": 0.13683886240243162, |
| "learning_rate": 3.1899547002591548e-06, |
| "loss": 0.8755, |
| "step": 765 |
| }, |
| { |
| "epoch": 4.1800818553888135, |
| "grad_norm": 0.12558097802450152, |
| "learning_rate": 3.148712992975773e-06, |
| "loss": 0.8579, |
| "step": 766 |
| }, |
| { |
| "epoch": 4.185538881309686, |
| "grad_norm": 0.13782990446140714, |
| "learning_rate": 3.107716831556298e-06, |
| "loss": 0.8929, |
| "step": 767 |
| }, |
| { |
| "epoch": 4.190995907230559, |
| "grad_norm": 0.13743942655956906, |
| "learning_rate": 3.0669668133698114e-06, |
| "loss": 0.8627, |
| "step": 768 |
| }, |
| { |
| "epoch": 4.196452933151432, |
| "grad_norm": 0.12989089669107465, |
| "learning_rate": 3.026463532198767e-06, |
| "loss": 0.8799, |
| "step": 769 |
| }, |
| { |
| "epoch": 4.201909959072306, |
| "grad_norm": 0.15662525453225684, |
| "learning_rate": 2.9862075782303155e-06, |
| "loss": 0.8731, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.207366984993179, |
| "grad_norm": 0.1350418186415897, |
| "learning_rate": 2.946199538047727e-06, |
| "loss": 0.8602, |
| "step": 771 |
| }, |
| { |
| "epoch": 4.212824010914052, |
| "grad_norm": 0.12498595424209477, |
| "learning_rate": 2.9064399946218304e-06, |
| "loss": 0.868, |
| "step": 772 |
| }, |
| { |
| "epoch": 4.218281036834925, |
| "grad_norm": 0.21098257229096243, |
| "learning_rate": 2.866929527302522e-06, |
| "loss": 0.8883, |
| "step": 773 |
| }, |
| { |
| "epoch": 4.223738062755798, |
| "grad_norm": 0.133123941008207, |
| "learning_rate": 2.8276687118103384e-06, |
| "loss": 0.8878, |
| "step": 774 |
| }, |
| { |
| "epoch": 4.229195088676671, |
| "grad_norm": 0.1418691768230737, |
| "learning_rate": 2.7886581202280338e-06, |
| "loss": 0.8978, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.234652114597544, |
| "grad_norm": 0.14622777292208364, |
| "learning_rate": 2.749898320992286e-06, |
| "loss": 0.8855, |
| "step": 776 |
| }, |
| { |
| "epoch": 4.240109140518418, |
| "grad_norm": 0.13868949813718004, |
| "learning_rate": 2.711389878885371e-06, |
| "loss": 0.8782, |
| "step": 777 |
| }, |
| { |
| "epoch": 4.245566166439291, |
| "grad_norm": 0.12620162322262743, |
| "learning_rate": 2.673133355026969e-06, |
| "loss": 0.8742, |
| "step": 778 |
| }, |
| { |
| "epoch": 4.251023192360163, |
| "grad_norm": 0.1271015484185532, |
| "learning_rate": 2.6351293068659643e-06, |
| "loss": 0.8748, |
| "step": 779 |
| }, |
| { |
| "epoch": 4.256480218281037, |
| "grad_norm": 0.18196702435356202, |
| "learning_rate": 2.597378288172332e-06, |
| "loss": 0.8851, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.26193724420191, |
| "grad_norm": 0.16872955546686272, |
| "learning_rate": 2.559880849029079e-06, |
| "loss": 0.8802, |
| "step": 781 |
| }, |
| { |
| "epoch": 4.267394270122783, |
| "grad_norm": 0.13072466845715314, |
| "learning_rate": 2.5226375358242085e-06, |
| "loss": 0.8877, |
| "step": 782 |
| }, |
| { |
| "epoch": 4.272851296043656, |
| "grad_norm": 0.14754876950071485, |
| "learning_rate": 2.485648891242767e-06, |
| "loss": 0.8904, |
| "step": 783 |
| }, |
| { |
| "epoch": 4.27830832196453, |
| "grad_norm": 0.1590434780138768, |
| "learning_rate": 2.448915454258942e-06, |
| "loss": 0.9032, |
| "step": 784 |
| }, |
| { |
| "epoch": 4.283765347885402, |
| "grad_norm": 0.14151048099974572, |
| "learning_rate": 2.412437760128199e-06, |
| "loss": 0.8918, |
| "step": 785 |
| }, |
| { |
| "epoch": 4.289222373806275, |
| "grad_norm": 0.13620855260975054, |
| "learning_rate": 2.376216340379489e-06, |
| "loss": 0.8845, |
| "step": 786 |
| }, |
| { |
| "epoch": 4.294679399727149, |
| "grad_norm": 0.1901936677421411, |
| "learning_rate": 2.3402517228075073e-06, |
| "loss": 0.8851, |
| "step": 787 |
| }, |
| { |
| "epoch": 4.300136425648022, |
| "grad_norm": 0.19671986170174766, |
| "learning_rate": 2.3045444314649856e-06, |
| "loss": 0.8678, |
| "step": 788 |
| }, |
| { |
| "epoch": 4.305593451568895, |
| "grad_norm": 0.14899305081412742, |
| "learning_rate": 2.2690949866550803e-06, |
| "loss": 0.8893, |
| "step": 789 |
| }, |
| { |
| "epoch": 4.311050477489768, |
| "grad_norm": 0.16143293820225038, |
| "learning_rate": 2.2339039049237687e-06, |
| "loss": 0.9024, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.316507503410641, |
| "grad_norm": 0.17932782006553405, |
| "learning_rate": 2.19897169905233e-06, |
| "loss": 0.8929, |
| "step": 791 |
| }, |
| { |
| "epoch": 4.321964529331514, |
| "grad_norm": 0.14806731199839362, |
| "learning_rate": 2.164298878049882e-06, |
| "loss": 0.8662, |
| "step": 792 |
| }, |
| { |
| "epoch": 4.327421555252387, |
| "grad_norm": 0.1289649779602983, |
| "learning_rate": 2.1298859471459443e-06, |
| "loss": 0.8813, |
| "step": 793 |
| }, |
| { |
| "epoch": 4.332878581173261, |
| "grad_norm": 0.19801253886238948, |
| "learning_rate": 2.0957334077831115e-06, |
| "loss": 0.9005, |
| "step": 794 |
| }, |
| { |
| "epoch": 4.338335607094134, |
| "grad_norm": 0.19694630717701755, |
| "learning_rate": 2.0618417576097016e-06, |
| "loss": 0.9052, |
| "step": 795 |
| }, |
| { |
| "epoch": 4.343792633015007, |
| "grad_norm": 0.13211759110481675, |
| "learning_rate": 2.028211490472538e-06, |
| "loss": 0.8727, |
| "step": 796 |
| }, |
| { |
| "epoch": 4.34924965893588, |
| "grad_norm": 0.16942182286893248, |
| "learning_rate": 1.99484309640974e-06, |
| "loss": 0.8939, |
| "step": 797 |
| }, |
| { |
| "epoch": 4.354706684856753, |
| "grad_norm": 0.15525627631122169, |
| "learning_rate": 1.9617370616435827e-06, |
| "loss": 0.8769, |
| "step": 798 |
| }, |
| { |
| "epoch": 4.360163710777626, |
| "grad_norm": 0.14568495391925143, |
| "learning_rate": 1.9288938685734206e-06, |
| "loss": 0.8801, |
| "step": 799 |
| }, |
| { |
| "epoch": 4.365620736698499, |
| "grad_norm": 0.1482638689148959, |
| "learning_rate": 1.8963139957686439e-06, |
| "loss": 0.8865, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.371077762619373, |
| "grad_norm": 0.1698206452526069, |
| "learning_rate": 1.863997917961724e-06, |
| "loss": 0.8756, |
| "step": 801 |
| }, |
| { |
| "epoch": 4.376534788540246, |
| "grad_norm": 0.15989049182819062, |
| "learning_rate": 1.8319461060412735e-06, |
| "loss": 0.8827, |
| "step": 802 |
| }, |
| { |
| "epoch": 4.381991814461118, |
| "grad_norm": 0.12598713099232536, |
| "learning_rate": 1.8001590270452007e-06, |
| "loss": 0.8955, |
| "step": 803 |
| }, |
| { |
| "epoch": 4.387448840381992, |
| "grad_norm": 0.13029377486709406, |
| "learning_rate": 1.7686371441539041e-06, |
| "loss": 0.8964, |
| "step": 804 |
| }, |
| { |
| "epoch": 4.392905866302865, |
| "grad_norm": 0.15126612488881352, |
| "learning_rate": 1.7373809166835131e-06, |
| "loss": 0.8838, |
| "step": 805 |
| }, |
| { |
| "epoch": 4.398362892223738, |
| "grad_norm": 0.13445739913334448, |
| "learning_rate": 1.7063908000791984e-06, |
| "loss": 0.8958, |
| "step": 806 |
| }, |
| { |
| "epoch": 4.403819918144611, |
| "grad_norm": 0.16664103966071625, |
| "learning_rate": 1.6756672459085565e-06, |
| "loss": 0.8826, |
| "step": 807 |
| }, |
| { |
| "epoch": 4.409276944065485, |
| "grad_norm": 0.1762539743129894, |
| "learning_rate": 1.645210701854989e-06, |
| "loss": 0.8785, |
| "step": 808 |
| }, |
| { |
| "epoch": 4.414733969986357, |
| "grad_norm": 0.15934966334590775, |
| "learning_rate": 1.615021611711216e-06, |
| "loss": 0.8854, |
| "step": 809 |
| }, |
| { |
| "epoch": 4.42019099590723, |
| "grad_norm": 0.124968014984558, |
| "learning_rate": 1.5851004153727845e-06, |
| "loss": 0.8788, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.425648021828104, |
| "grad_norm": 0.1385331750299138, |
| "learning_rate": 1.5554475488316812e-06, |
| "loss": 0.8916, |
| "step": 811 |
| }, |
| { |
| "epoch": 4.431105047748977, |
| "grad_norm": 0.13707927403446576, |
| "learning_rate": 1.5260634441699585e-06, |
| "loss": 0.8742, |
| "step": 812 |
| }, |
| { |
| "epoch": 4.43656207366985, |
| "grad_norm": 0.12627498054063097, |
| "learning_rate": 1.496948529553457e-06, |
| "loss": 0.887, |
| "step": 813 |
| }, |
| { |
| "epoch": 4.442019099590723, |
| "grad_norm": 0.1486808085420501, |
| "learning_rate": 1.468103229225546e-06, |
| "loss": 0.8808, |
| "step": 814 |
| }, |
| { |
| "epoch": 4.447476125511596, |
| "grad_norm": 0.14605688992873062, |
| "learning_rate": 1.4395279635009595e-06, |
| "loss": 0.8708, |
| "step": 815 |
| }, |
| { |
| "epoch": 4.452933151432469, |
| "grad_norm": 0.13906895719296147, |
| "learning_rate": 1.4112231487596618e-06, |
| "loss": 0.8649, |
| "step": 816 |
| }, |
| { |
| "epoch": 4.458390177353342, |
| "grad_norm": 0.11788044087197277, |
| "learning_rate": 1.3831891974407862e-06, |
| "loss": 0.8783, |
| "step": 817 |
| }, |
| { |
| "epoch": 4.463847203274216, |
| "grad_norm": 0.11778162515868901, |
| "learning_rate": 1.3554265180366177e-06, |
| "loss": 0.91, |
| "step": 818 |
| }, |
| { |
| "epoch": 4.469304229195089, |
| "grad_norm": 0.14670430911084376, |
| "learning_rate": 1.3279355150866536e-06, |
| "loss": 0.8694, |
| "step": 819 |
| }, |
| { |
| "epoch": 4.474761255115962, |
| "grad_norm": 0.12231810737886735, |
| "learning_rate": 1.3007165891716978e-06, |
| "loss": 0.8519, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.480218281036835, |
| "grad_norm": 0.1271770535078628, |
| "learning_rate": 1.2737701369080213e-06, |
| "loss": 0.9097, |
| "step": 821 |
| }, |
| { |
| "epoch": 4.485675306957708, |
| "grad_norm": 0.1385648556146423, |
| "learning_rate": 1.2470965509415911e-06, |
| "loss": 0.8968, |
| "step": 822 |
| }, |
| { |
| "epoch": 4.491132332878581, |
| "grad_norm": 0.152826890949677, |
| "learning_rate": 1.2206962199423478e-06, |
| "loss": 0.8831, |
| "step": 823 |
| }, |
| { |
| "epoch": 4.496589358799454, |
| "grad_norm": 0.12805457871619716, |
| "learning_rate": 1.1945695285985437e-06, |
| "loss": 0.9114, |
| "step": 824 |
| }, |
| { |
| "epoch": 4.502046384720328, |
| "grad_norm": 0.1200667730156898, |
| "learning_rate": 1.1687168576111251e-06, |
| "loss": 0.897, |
| "step": 825 |
| }, |
| { |
| "epoch": 4.507503410641201, |
| "grad_norm": 0.14528129682089547, |
| "learning_rate": 1.1431385836882058e-06, |
| "loss": 0.8645, |
| "step": 826 |
| }, |
| { |
| "epoch": 4.512960436562073, |
| "grad_norm": 0.11909725664621254, |
| "learning_rate": 1.1178350795395553e-06, |
| "loss": 0.875, |
| "step": 827 |
| }, |
| { |
| "epoch": 4.5184174624829465, |
| "grad_norm": 0.140122744341455, |
| "learning_rate": 1.0928067138711817e-06, |
| "loss": 0.8825, |
| "step": 828 |
| }, |
| { |
| "epoch": 4.52387448840382, |
| "grad_norm": 0.15162334185887835, |
| "learning_rate": 1.06805385137996e-06, |
| "loss": 0.8794, |
| "step": 829 |
| }, |
| { |
| "epoch": 4.529331514324693, |
| "grad_norm": 0.14150098143714812, |
| "learning_rate": 1.0435768527483114e-06, |
| "loss": 0.8937, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.534788540245566, |
| "grad_norm": 0.1260117766174468, |
| "learning_rate": 1.019376074638949e-06, |
| "loss": 0.8815, |
| "step": 831 |
| }, |
| { |
| "epoch": 4.54024556616644, |
| "grad_norm": 0.12409915203852431, |
| "learning_rate": 9.954518696896854e-07, |
| "loss": 0.8834, |
| "step": 832 |
| }, |
| { |
| "epoch": 4.545702592087313, |
| "grad_norm": 0.136871178123947, |
| "learning_rate": 9.718045865082914e-07, |
| "loss": 0.8793, |
| "step": 833 |
| }, |
| { |
| "epoch": 4.551159618008185, |
| "grad_norm": 0.14107625789727535, |
| "learning_rate": 9.484345696674135e-07, |
| "loss": 0.9022, |
| "step": 834 |
| }, |
| { |
| "epoch": 4.5566166439290585, |
| "grad_norm": 0.15144418956800026, |
| "learning_rate": 9.253421596995538e-07, |
| "loss": 0.8668, |
| "step": 835 |
| }, |
| { |
| "epoch": 4.562073669849932, |
| "grad_norm": 0.14652192158608265, |
| "learning_rate": 9.025276930921168e-07, |
| "loss": 0.8952, |
| "step": 836 |
| }, |
| { |
| "epoch": 4.567530695770805, |
| "grad_norm": 0.1414596872944082, |
| "learning_rate": 8.799915022824912e-07, |
| "loss": 0.89, |
| "step": 837 |
| }, |
| { |
| "epoch": 4.572987721691678, |
| "grad_norm": 0.11762183453991368, |
| "learning_rate": 8.577339156532228e-07, |
| "loss": 0.8891, |
| "step": 838 |
| }, |
| { |
| "epoch": 4.578444747612551, |
| "grad_norm": 0.11696302332812643, |
| "learning_rate": 8.35755257527211e-07, |
| "loss": 0.8865, |
| "step": 839 |
| }, |
| { |
| "epoch": 4.583901773533424, |
| "grad_norm": 0.14096367257161824, |
| "learning_rate": 8.140558481629978e-07, |
| "loss": 0.883, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.589358799454297, |
| "grad_norm": 0.13025076800848065, |
| "learning_rate": 7.92636003750098e-07, |
| "loss": 0.861, |
| "step": 841 |
| }, |
| { |
| "epoch": 4.5948158253751705, |
| "grad_norm": 0.13357496841366284, |
| "learning_rate": 7.714960364043844e-07, |
| "loss": 0.8917, |
| "step": 842 |
| }, |
| { |
| "epoch": 4.600272851296044, |
| "grad_norm": 0.12340312706447396, |
| "learning_rate": 7.506362541635482e-07, |
| "loss": 0.8899, |
| "step": 843 |
| }, |
| { |
| "epoch": 4.605729877216917, |
| "grad_norm": 0.14740826905136978, |
| "learning_rate": 7.300569609826103e-07, |
| "loss": 0.9164, |
| "step": 844 |
| }, |
| { |
| "epoch": 4.61118690313779, |
| "grad_norm": 0.12349556901637186, |
| "learning_rate": 7.097584567294858e-07, |
| "loss": 0.9002, |
| "step": 845 |
| }, |
| { |
| "epoch": 4.616643929058663, |
| "grad_norm": 0.12502508254095465, |
| "learning_rate": 6.897410371806202e-07, |
| "loss": 0.8966, |
| "step": 846 |
| }, |
| { |
| "epoch": 4.622100954979536, |
| "grad_norm": 0.1211951289522415, |
| "learning_rate": 6.70004994016673e-07, |
| "loss": 0.8834, |
| "step": 847 |
| }, |
| { |
| "epoch": 4.627557980900409, |
| "grad_norm": 0.12281440993768762, |
| "learning_rate": 6.505506148182816e-07, |
| "loss": 0.8871, |
| "step": 848 |
| }, |
| { |
| "epoch": 4.6330150068212825, |
| "grad_norm": 0.12502962964548078, |
| "learning_rate": 6.313781830618549e-07, |
| "loss": 0.8767, |
| "step": 849 |
| }, |
| { |
| "epoch": 4.638472032742156, |
| "grad_norm": 0.12943087930152467, |
| "learning_rate": 6.124879781154458e-07, |
| "loss": 0.875, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.643929058663028, |
| "grad_norm": 0.14031219133585143, |
| "learning_rate": 5.938802752346972e-07, |
| "loss": 0.8927, |
| "step": 851 |
| }, |
| { |
| "epoch": 4.6493860845839015, |
| "grad_norm": 0.12542619019610873, |
| "learning_rate": 5.755553455588025e-07, |
| "loss": 0.8876, |
| "step": 852 |
| }, |
| { |
| "epoch": 4.654843110504775, |
| "grad_norm": 0.12638050340038925, |
| "learning_rate": 5.575134561065798e-07, |
| "loss": 0.8665, |
| "step": 853 |
| }, |
| { |
| "epoch": 4.660300136425648, |
| "grad_norm": 0.12333899862571804, |
| "learning_rate": 5.397548697725686e-07, |
| "loss": 0.8903, |
| "step": 854 |
| }, |
| { |
| "epoch": 4.665757162346521, |
| "grad_norm": 0.1346635192449993, |
| "learning_rate": 5.22279845323197e-07, |
| "loss": 0.8725, |
| "step": 855 |
| }, |
| { |
| "epoch": 4.6712141882673945, |
| "grad_norm": 0.13647426558512074, |
| "learning_rate": 5.050886373930231e-07, |
| "loss": 0.8875, |
| "step": 856 |
| }, |
| { |
| "epoch": 4.676671214188268, |
| "grad_norm": 0.11566165671981071, |
| "learning_rate": 4.881814964810172e-07, |
| "loss": 0.8749, |
| "step": 857 |
| }, |
| { |
| "epoch": 4.68212824010914, |
| "grad_norm": 0.12363326959711636, |
| "learning_rate": 4.715586689469054e-07, |
| "loss": 0.8769, |
| "step": 858 |
| }, |
| { |
| "epoch": 4.6875852660300135, |
| "grad_norm": 0.11764168862039581, |
| "learning_rate": 4.552203970075941e-07, |
| "loss": 0.8918, |
| "step": 859 |
| }, |
| { |
| "epoch": 4.693042291950887, |
| "grad_norm": 0.11426043356029422, |
| "learning_rate": 4.391669187336267e-07, |
| "loss": 0.89, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.69849931787176, |
| "grad_norm": 0.11108719415747546, |
| "learning_rate": 4.2339846804572596e-07, |
| "loss": 0.8804, |
| "step": 861 |
| }, |
| { |
| "epoch": 4.703956343792633, |
| "grad_norm": 0.12913366570368975, |
| "learning_rate": 4.079152747113746e-07, |
| "loss": 0.8803, |
| "step": 862 |
| }, |
| { |
| "epoch": 4.709413369713506, |
| "grad_norm": 0.11303274704940805, |
| "learning_rate": 3.9271756434147825e-07, |
| "loss": 0.8707, |
| "step": 863 |
| }, |
| { |
| "epoch": 4.714870395634379, |
| "grad_norm": 0.12668292386761498, |
| "learning_rate": 3.778055583870677e-07, |
| "loss": 0.8615, |
| "step": 864 |
| }, |
| { |
| "epoch": 4.720327421555252, |
| "grad_norm": 0.12160210451896335, |
| "learning_rate": 3.631794741360839e-07, |
| "loss": 0.8749, |
| "step": 865 |
| }, |
| { |
| "epoch": 4.7257844474761255, |
| "grad_norm": 0.12255220583052599, |
| "learning_rate": 3.4883952471019833e-07, |
| "loss": 0.8656, |
| "step": 866 |
| }, |
| { |
| "epoch": 4.731241473396999, |
| "grad_norm": 0.13097406090149366, |
| "learning_rate": 3.347859190617153e-07, |
| "loss": 0.9104, |
| "step": 867 |
| }, |
| { |
| "epoch": 4.736698499317872, |
| "grad_norm": 0.13879798939381358, |
| "learning_rate": 3.210188619705257e-07, |
| "loss": 0.8932, |
| "step": 868 |
| }, |
| { |
| "epoch": 4.742155525238745, |
| "grad_norm": 0.10869652399193062, |
| "learning_rate": 3.0753855404112907e-07, |
| "loss": 0.8617, |
| "step": 869 |
| }, |
| { |
| "epoch": 4.747612551159618, |
| "grad_norm": 0.14405688232051542, |
| "learning_rate": 2.943451916997009e-07, |
| "loss": 0.8849, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.753069577080491, |
| "grad_norm": 0.10950574849744894, |
| "learning_rate": 2.814389671912321e-07, |
| "loss": 0.8894, |
| "step": 871 |
| }, |
| { |
| "epoch": 4.758526603001364, |
| "grad_norm": 0.12549196376105284, |
| "learning_rate": 2.6882006857672946e-07, |
| "loss": 0.8666, |
| "step": 872 |
| }, |
| { |
| "epoch": 4.7639836289222375, |
| "grad_norm": 0.12049040986742628, |
| "learning_rate": 2.564886797304844e-07, |
| "loss": 0.8925, |
| "step": 873 |
| }, |
| { |
| "epoch": 4.769440654843111, |
| "grad_norm": 0.13628451786758633, |
| "learning_rate": 2.444449803373772e-07, |
| "loss": 0.8736, |
| "step": 874 |
| }, |
| { |
| "epoch": 4.774897680763983, |
| "grad_norm": 0.11819888541785435, |
| "learning_rate": 2.3268914589026582e-07, |
| "loss": 0.876, |
| "step": 875 |
| }, |
| { |
| "epoch": 4.780354706684856, |
| "grad_norm": 0.13249771798161303, |
| "learning_rate": 2.212213476874392e-07, |
| "loss": 0.8721, |
| "step": 876 |
| }, |
| { |
| "epoch": 4.78581173260573, |
| "grad_norm": 0.16019402654757406, |
| "learning_rate": 2.100417528301013e-07, |
| "loss": 0.8574, |
| "step": 877 |
| }, |
| { |
| "epoch": 4.791268758526603, |
| "grad_norm": 0.12709978273924896, |
| "learning_rate": 1.9915052421995095e-07, |
| "loss": 0.8788, |
| "step": 878 |
| }, |
| { |
| "epoch": 4.796725784447476, |
| "grad_norm": 0.1269380086439239, |
| "learning_rate": 1.8854782055680588e-07, |
| "loss": 0.8856, |
| "step": 879 |
| }, |
| { |
| "epoch": 4.8021828103683495, |
| "grad_norm": 0.12816104723044472, |
| "learning_rate": 1.7823379633628236e-07, |
| "loss": 0.8682, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.807639836289223, |
| "grad_norm": 0.11897350073492513, |
| "learning_rate": 1.6820860184755705e-07, |
| "loss": 0.8893, |
| "step": 881 |
| }, |
| { |
| "epoch": 4.813096862210095, |
| "grad_norm": 0.1676441632472798, |
| "learning_rate": 1.584723831711621e-07, |
| "loss": 0.8827, |
| "step": 882 |
| }, |
| { |
| "epoch": 4.818553888130968, |
| "grad_norm": 0.11260862448875701, |
| "learning_rate": 1.4902528217687339e-07, |
| "loss": 0.8668, |
| "step": 883 |
| }, |
| { |
| "epoch": 4.824010914051842, |
| "grad_norm": 0.11073774182348436, |
| "learning_rate": 1.398674365216235e-07, |
| "loss": 0.8985, |
| "step": 884 |
| }, |
| { |
| "epoch": 4.829467939972715, |
| "grad_norm": 0.11457097434689421, |
| "learning_rate": 1.309989796475164e-07, |
| "loss": 0.8671, |
| "step": 885 |
| }, |
| { |
| "epoch": 4.834924965893588, |
| "grad_norm": 0.11278616103798808, |
| "learning_rate": 1.22420040779867e-07, |
| "loss": 0.8627, |
| "step": 886 |
| }, |
| { |
| "epoch": 4.8403819918144615, |
| "grad_norm": 0.11658568907087213, |
| "learning_rate": 1.1413074492532927e-07, |
| "loss": 0.8698, |
| "step": 887 |
| }, |
| { |
| "epoch": 4.845839017735334, |
| "grad_norm": 0.12477104191258748, |
| "learning_rate": 1.06131212870062e-07, |
| "loss": 0.8972, |
| "step": 888 |
| }, |
| { |
| "epoch": 4.851296043656207, |
| "grad_norm": 0.11569142614675672, |
| "learning_rate": 9.842156117798817e-08, |
| "loss": 0.8808, |
| "step": 889 |
| }, |
| { |
| "epoch": 4.85675306957708, |
| "grad_norm": 0.15639110653049954, |
| "learning_rate": 9.10019021890718e-08, |
| "loss": 0.8757, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.862210095497954, |
| "grad_norm": 0.1414742721261049, |
| "learning_rate": 8.387234401770361e-08, |
| "loss": 0.884, |
| "step": 891 |
| }, |
| { |
| "epoch": 4.867667121418827, |
| "grad_norm": 0.11635396600358816, |
| "learning_rate": 7.703299055111357e-08, |
| "loss": 0.9047, |
| "step": 892 |
| }, |
| { |
| "epoch": 4.8731241473397, |
| "grad_norm": 0.1168433074137953, |
| "learning_rate": 7.048394144785863e-08, |
| "loss": 0.8669, |
| "step": 893 |
| }, |
| { |
| "epoch": 4.878581173260573, |
| "grad_norm": 0.11125431182410457, |
| "learning_rate": 6.422529213637063e-08, |
| "loss": 0.8713, |
| "step": 894 |
| }, |
| { |
| "epoch": 4.884038199181446, |
| "grad_norm": 0.11391519788296704, |
| "learning_rate": 5.8257133813570675e-08, |
| "loss": 0.8851, |
| "step": 895 |
| }, |
| { |
| "epoch": 4.889495225102319, |
| "grad_norm": 0.1050397723513658, |
| "learning_rate": 5.257955344353471e-08, |
| "loss": 0.8742, |
| "step": 896 |
| }, |
| { |
| "epoch": 4.894952251023192, |
| "grad_norm": 0.110886347004846, |
| "learning_rate": 4.71926337562234e-08, |
| "loss": 0.8835, |
| "step": 897 |
| }, |
| { |
| "epoch": 4.900409276944066, |
| "grad_norm": 0.13022649928545438, |
| "learning_rate": 4.2096453246287526e-08, |
| "loss": 0.8798, |
| "step": 898 |
| }, |
| { |
| "epoch": 4.905866302864939, |
| "grad_norm": 0.11447299895739564, |
| "learning_rate": 3.729108617191557e-08, |
| "loss": 0.8915, |
| "step": 899 |
| }, |
| { |
| "epoch": 4.911323328785811, |
| "grad_norm": 0.1226591978951474, |
| "learning_rate": 3.277660255375237e-08, |
| "loss": 0.9051, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.916780354706685, |
| "grad_norm": 0.11938229502321866, |
| "learning_rate": 2.855306817388659e-08, |
| "loss": 0.8961, |
| "step": 901 |
| }, |
| { |
| "epoch": 4.922237380627558, |
| "grad_norm": 0.13559091937945114, |
| "learning_rate": 2.462054457487595e-08, |
| "loss": 0.8778, |
| "step": 902 |
| }, |
| { |
| "epoch": 4.927694406548431, |
| "grad_norm": 0.12486180567731954, |
| "learning_rate": 2.097908905887014e-08, |
| "loss": 0.8877, |
| "step": 903 |
| }, |
| { |
| "epoch": 4.933151432469304, |
| "grad_norm": 0.12698382896355306, |
| "learning_rate": 1.7628754686760397e-08, |
| "loss": 0.8837, |
| "step": 904 |
| }, |
| { |
| "epoch": 4.938608458390178, |
| "grad_norm": 0.10840982827247776, |
| "learning_rate": 1.4569590277413447e-08, |
| "loss": 0.8738, |
| "step": 905 |
| }, |
| { |
| "epoch": 4.94406548431105, |
| "grad_norm": 0.11341918799763352, |
| "learning_rate": 1.1801640406963188e-08, |
| "loss": 0.8731, |
| "step": 906 |
| }, |
| { |
| "epoch": 4.949522510231923, |
| "grad_norm": 0.11594299163597076, |
| "learning_rate": 9.32494540815121e-09, |
| "loss": 0.8704, |
| "step": 907 |
| }, |
| { |
| "epoch": 4.954979536152797, |
| "grad_norm": 0.10946767908645595, |
| "learning_rate": 7.13954136974504e-09, |
| "loss": 0.8916, |
| "step": 908 |
| }, |
| { |
| "epoch": 4.96043656207367, |
| "grad_norm": 0.1131154133991995, |
| "learning_rate": 5.245460136018565e-09, |
| "loss": 0.8931, |
| "step": 909 |
| }, |
| { |
| "epoch": 4.965893587994543, |
| "grad_norm": 0.11676272543288499, |
| "learning_rate": 3.6427293062724077e-09, |
| "loss": 0.8906, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.971350613915416, |
| "grad_norm": 0.11538578325055797, |
| "learning_rate": 2.3313722344497914e-09, |
| "loss": 0.8779, |
| "step": 911 |
| }, |
| { |
| "epoch": 4.97680763983629, |
| "grad_norm": 0.1264648783936699, |
| "learning_rate": 1.3114080287790488e-09, |
| "loss": 0.8652, |
| "step": 912 |
| }, |
| { |
| "epoch": 4.982264665757162, |
| "grad_norm": 0.11491863538487673, |
| "learning_rate": 5.828515515116096e-10, |
| "loss": 0.8722, |
| "step": 913 |
| }, |
| { |
| "epoch": 4.987721691678035, |
| "grad_norm": 0.1086827877742504, |
| "learning_rate": 1.457134186866327e-10, |
| "loss": 0.9013, |
| "step": 914 |
| }, |
| { |
| "epoch": 4.993178717598909, |
| "grad_norm": 0.11831005135790107, |
| "learning_rate": 0.0, |
| "loss": 0.8894, |
| "step": 915 |
| }, |
| { |
| "epoch": 4.993178717598909, |
| "step": 915, |
| "total_flos": 1.883960626772548e+19, |
| "train_loss": 0.9399711781512192, |
| "train_runtime": 49360.7108, |
| "train_samples_per_second": 9.497, |
| "train_steps_per_second": 0.019 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 915, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.883960626772548e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|