| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1212, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008250825082508251, |
| "grad_norm": 1.7177605628967285, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.0661, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0016501650165016502, |
| "grad_norm": 1.5985264778137207, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 1.0534, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0024752475247524753, |
| "grad_norm": 1.708420753479004, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 1.0598, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0033003300330033004, |
| "grad_norm": 1.608269453048706, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.0497, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.004125412541254125, |
| "grad_norm": 1.68818998336792, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.0521, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0049504950495049506, |
| "grad_norm": 1.5812345743179321, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 1.0328, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.005775577557755775, |
| "grad_norm": 1.5700342655181885, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 1.0412, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.006600660066006601, |
| "grad_norm": 1.6265473365783691, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.0552, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.007425742574257425, |
| "grad_norm": 1.623134970664978, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 1.0526, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00825082508250825, |
| "grad_norm": 1.6078376770019531, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.04, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.009075907590759076, |
| "grad_norm": 1.6052888631820679, |
| "learning_rate": 5.5e-07, |
| "loss": 1.0722, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.009900990099009901, |
| "grad_norm": 1.5547384023666382, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.0436, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.010726072607260726, |
| "grad_norm": 1.5156539678573608, |
| "learning_rate": 6.5e-07, |
| "loss": 1.0181, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01155115511551155, |
| "grad_norm": 1.4860498905181885, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.0169, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.012376237623762377, |
| "grad_norm": 1.4493576288223267, |
| "learning_rate": 7.5e-07, |
| "loss": 1.0193, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.013201320132013201, |
| "grad_norm": 1.4495404958724976, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.0234, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.014026402640264026, |
| "grad_norm": 1.4204658269882202, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.0524, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.01485148514851485, |
| "grad_norm": 1.24644136428833, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.0206, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.015676567656765675, |
| "grad_norm": 1.1867165565490723, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.0333, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0165016501650165, |
| "grad_norm": 1.0698730945587158, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.0293, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.017326732673267328, |
| "grad_norm": 0.9976843595504761, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.012, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.018151815181518153, |
| "grad_norm": 0.9542626738548279, |
| "learning_rate": 1.1e-06, |
| "loss": 0.9987, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.018976897689768978, |
| "grad_norm": 0.9308854341506958, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.0111, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.019801980198019802, |
| "grad_norm": 0.8705531358718872, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 0.9736, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.020627062706270627, |
| "grad_norm": 0.8885819315910339, |
| "learning_rate": 1.25e-06, |
| "loss": 0.9616, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.02145214521452145, |
| "grad_norm": 0.8245412111282349, |
| "learning_rate": 1.3e-06, |
| "loss": 0.9697, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.022277227722772276, |
| "grad_norm": 0.7995723485946655, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 0.9678, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0231023102310231, |
| "grad_norm": 0.7252822518348694, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 0.9638, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.02392739273927393, |
| "grad_norm": 0.6858912706375122, |
| "learning_rate": 1.45e-06, |
| "loss": 0.9485, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.024752475247524754, |
| "grad_norm": 0.6455612778663635, |
| "learning_rate": 1.5e-06, |
| "loss": 0.9397, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02557755775577558, |
| "grad_norm": 0.6329395771026611, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 0.9771, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.026402640264026403, |
| "grad_norm": 0.6316455602645874, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.9789, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.027227722772277228, |
| "grad_norm": 0.6126256585121155, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 0.9506, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.028052805280528052, |
| "grad_norm": 0.5972760319709778, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.964, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.028877887788778877, |
| "grad_norm": 0.5646793246269226, |
| "learning_rate": 1.75e-06, |
| "loss": 0.9211, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0297029702970297, |
| "grad_norm": 0.5675886869430542, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.946, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.03052805280528053, |
| "grad_norm": 0.540591299533844, |
| "learning_rate": 1.85e-06, |
| "loss": 0.9402, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.03135313531353135, |
| "grad_norm": 0.5284631848335266, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.918, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.03217821782178218, |
| "grad_norm": 0.5251491665840149, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.9419, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.033003300330033, |
| "grad_norm": 0.5078873038291931, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.9256, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03382838283828383, |
| "grad_norm": 0.5054848194122314, |
| "learning_rate": 2.05e-06, |
| "loss": 0.9344, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.034653465346534656, |
| "grad_norm": 0.49411484599113464, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.9143, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.03547854785478548, |
| "grad_norm": 0.4762454330921173, |
| "learning_rate": 2.15e-06, |
| "loss": 0.9235, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.036303630363036306, |
| "grad_norm": 0.46506863832473755, |
| "learning_rate": 2.2e-06, |
| "loss": 0.9018, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.03712871287128713, |
| "grad_norm": 0.4539809226989746, |
| "learning_rate": 2.25e-06, |
| "loss": 0.8876, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.037953795379537955, |
| "grad_norm": 0.4611811935901642, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.9106, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.038778877887788776, |
| "grad_norm": 0.4345838129520416, |
| "learning_rate": 2.35e-06, |
| "loss": 0.8899, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.039603960396039604, |
| "grad_norm": 0.4400959014892578, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.8978, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.040429042904290426, |
| "grad_norm": 0.42290085554122925, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.8991, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.041254125412541254, |
| "grad_norm": 0.4143967032432556, |
| "learning_rate": 2.5e-06, |
| "loss": 0.869, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04207920792079208, |
| "grad_norm": 0.39597901701927185, |
| "learning_rate": 2.55e-06, |
| "loss": 0.8457, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0429042904290429, |
| "grad_norm": 0.3814972937107086, |
| "learning_rate": 2.6e-06, |
| "loss": 0.8418, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.04372937293729373, |
| "grad_norm": 0.398303359746933, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.8684, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.04455445544554455, |
| "grad_norm": 0.3740525543689728, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.8673, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.04537953795379538, |
| "grad_norm": 0.4020557701587677, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.8882, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0462046204620462, |
| "grad_norm": 0.38221463561058044, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.8663, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.04702970297029703, |
| "grad_norm": 0.3905200958251953, |
| "learning_rate": 2.85e-06, |
| "loss": 0.8683, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.04785478547854786, |
| "grad_norm": 0.3818514347076416, |
| "learning_rate": 2.9e-06, |
| "loss": 0.8721, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.04867986798679868, |
| "grad_norm": 0.35962340235710144, |
| "learning_rate": 2.95e-06, |
| "loss": 0.8523, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.04950495049504951, |
| "grad_norm": 0.3732520341873169, |
| "learning_rate": 3e-06, |
| "loss": 0.8376, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.05033003300330033, |
| "grad_norm": 0.3615162670612335, |
| "learning_rate": 3.05e-06, |
| "loss": 0.8523, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.05115511551155116, |
| "grad_norm": 0.37727200984954834, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.8296, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.05198019801980198, |
| "grad_norm": 0.35481664538383484, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.8513, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.052805280528052806, |
| "grad_norm": 0.35886090993881226, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.8584, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.05363036303630363, |
| "grad_norm": 0.3518712818622589, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.8583, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.054455445544554455, |
| "grad_norm": 0.33794984221458435, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.8414, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.05528052805280528, |
| "grad_norm": 0.336191862821579, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 0.8674, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.056105610561056105, |
| "grad_norm": 0.3314290940761566, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.8182, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.05693069306930693, |
| "grad_norm": 0.3418997526168823, |
| "learning_rate": 3.45e-06, |
| "loss": 0.8405, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.057755775577557754, |
| "grad_norm": 0.3234967589378357, |
| "learning_rate": 3.5e-06, |
| "loss": 0.8417, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05858085808580858, |
| "grad_norm": 0.33048129081726074, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 0.8213, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0594059405940594, |
| "grad_norm": 0.3219156563282013, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.8266, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.06023102310231023, |
| "grad_norm": 0.31983497738838196, |
| "learning_rate": 3.65e-06, |
| "loss": 0.8298, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.06105610561056106, |
| "grad_norm": 0.32737359404563904, |
| "learning_rate": 3.7e-06, |
| "loss": 0.8048, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.06188118811881188, |
| "grad_norm": 0.325057715177536, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.8299, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0627062706270627, |
| "grad_norm": 0.32514944672584534, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.8279, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.06353135313531354, |
| "grad_norm": 0.33182644844055176, |
| "learning_rate": 3.85e-06, |
| "loss": 0.8295, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.06435643564356436, |
| "grad_norm": 0.3327374756336212, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.8365, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.06518151815181518, |
| "grad_norm": 0.31190282106399536, |
| "learning_rate": 3.95e-06, |
| "loss": 0.8437, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.066006600660066, |
| "grad_norm": 0.3261486887931824, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.8258, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06683168316831684, |
| "grad_norm": 0.3433217704296112, |
| "learning_rate": 4.05e-06, |
| "loss": 0.8379, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.06765676567656766, |
| "grad_norm": 0.32538896799087524, |
| "learning_rate": 4.1e-06, |
| "loss": 0.8037, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.06848184818481848, |
| "grad_norm": 0.3292044401168823, |
| "learning_rate": 4.15e-06, |
| "loss": 0.8385, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.06930693069306931, |
| "grad_norm": 0.32659634947776794, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.8648, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.07013201320132013, |
| "grad_norm": 0.32226109504699707, |
| "learning_rate": 4.25e-06, |
| "loss": 0.8544, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.07095709570957096, |
| "grad_norm": 0.3303010165691376, |
| "learning_rate": 4.3e-06, |
| "loss": 0.8162, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.07178217821782178, |
| "grad_norm": 0.3263317346572876, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 0.8326, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.07260726072607261, |
| "grad_norm": 0.31490302085876465, |
| "learning_rate": 4.4e-06, |
| "loss": 0.821, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.07343234323432343, |
| "grad_norm": 0.32537841796875, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 0.8262, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.07425742574257425, |
| "grad_norm": 0.32452619075775146, |
| "learning_rate": 4.5e-06, |
| "loss": 0.7995, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07508250825082509, |
| "grad_norm": 0.3285425901412964, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 0.8243, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.07590759075907591, |
| "grad_norm": 0.32374563813209534, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.8089, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.07673267326732673, |
| "grad_norm": 0.32995370030403137, |
| "learning_rate": 4.65e-06, |
| "loss": 0.8281, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.07755775577557755, |
| "grad_norm": 0.31566327810287476, |
| "learning_rate": 4.7e-06, |
| "loss": 0.8248, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.07838283828382839, |
| "grad_norm": 0.32131826877593994, |
| "learning_rate": 4.75e-06, |
| "loss": 0.8183, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.07920792079207921, |
| "grad_norm": 0.3297450840473175, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.8159, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.08003300330033003, |
| "grad_norm": 0.33907413482666016, |
| "learning_rate": 4.85e-06, |
| "loss": 0.8167, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.08085808580858085, |
| "grad_norm": 0.33130621910095215, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.8293, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.08168316831683169, |
| "grad_norm": 0.3269996643066406, |
| "learning_rate": 4.95e-06, |
| "loss": 0.7956, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.08250825082508251, |
| "grad_norm": 0.31901946663856506, |
| "learning_rate": 5e-06, |
| "loss": 0.7986, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08333333333333333, |
| "grad_norm": 0.3291037082672119, |
| "learning_rate": 4.999999760155817e-06, |
| "loss": 0.8103, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.08415841584158416, |
| "grad_norm": 0.32120469212532043, |
| "learning_rate": 4.999999040623315e-06, |
| "loss": 0.8099, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.08498349834983498, |
| "grad_norm": 0.3390505909919739, |
| "learning_rate": 4.999997841402631e-06, |
| "loss": 0.8195, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0858085808580858, |
| "grad_norm": 0.33092647790908813, |
| "learning_rate": 4.9999961624939945e-06, |
| "loss": 0.816, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.08663366336633663, |
| "grad_norm": 0.32288476824760437, |
| "learning_rate": 4.999994003897729e-06, |
| "loss": 0.8034, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.08745874587458746, |
| "grad_norm": 0.33893638849258423, |
| "learning_rate": 4.999991365614248e-06, |
| "loss": 0.8255, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.08828382838283828, |
| "grad_norm": 0.3212301731109619, |
| "learning_rate": 4.999988247644058e-06, |
| "loss": 0.8053, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0891089108910891, |
| "grad_norm": 0.3236828148365021, |
| "learning_rate": 4.999984649987758e-06, |
| "loss": 0.7893, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.08993399339933994, |
| "grad_norm": 0.3349727988243103, |
| "learning_rate": 4.999980572646038e-06, |
| "loss": 0.8239, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.09075907590759076, |
| "grad_norm": 0.31924861669540405, |
| "learning_rate": 4.999976015619679e-06, |
| "loss": 0.792, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09158415841584158, |
| "grad_norm": 0.3242986798286438, |
| "learning_rate": 4.999970978909556e-06, |
| "loss": 0.7889, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0924092409240924, |
| "grad_norm": 0.32954731583595276, |
| "learning_rate": 4.999965462516636e-06, |
| "loss": 0.7983, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.09323432343234324, |
| "grad_norm": 0.43535739183425903, |
| "learning_rate": 4.999959466441976e-06, |
| "loss": 0.7884, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.09405940594059406, |
| "grad_norm": 0.3296893537044525, |
| "learning_rate": 4.999952990686729e-06, |
| "loss": 0.8129, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.09488448844884488, |
| "grad_norm": 0.33009904623031616, |
| "learning_rate": 4.999946035252136e-06, |
| "loss": 0.8134, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.09570957095709572, |
| "grad_norm": 0.33187994360923767, |
| "learning_rate": 4.999938600139531e-06, |
| "loss": 0.7787, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.09653465346534654, |
| "grad_norm": 0.33838731050491333, |
| "learning_rate": 4.999930685350342e-06, |
| "loss": 0.8065, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.09735973597359736, |
| "grad_norm": 0.3391963541507721, |
| "learning_rate": 4.999922290886087e-06, |
| "loss": 0.7982, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.09818481848184818, |
| "grad_norm": 0.33598625659942627, |
| "learning_rate": 4.999913416748376e-06, |
| "loss": 0.8136, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.09900990099009901, |
| "grad_norm": 0.33401018381118774, |
| "learning_rate": 4.999904062938913e-06, |
| "loss": 0.7953, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09983498349834984, |
| "grad_norm": 0.3376487195491791, |
| "learning_rate": 4.999894229459492e-06, |
| "loss": 0.8085, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.10066006600660066, |
| "grad_norm": 0.33256658911705017, |
| "learning_rate": 4.999883916312e-06, |
| "loss": 0.8121, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.10148514851485149, |
| "grad_norm": 0.3282712399959564, |
| "learning_rate": 4.999873123498416e-06, |
| "loss": 0.8101, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.10231023102310231, |
| "grad_norm": 0.3370251953601837, |
| "learning_rate": 4.999861851020811e-06, |
| "loss": 0.8091, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.10313531353135313, |
| "grad_norm": 0.34541329741477966, |
| "learning_rate": 4.999850098881347e-06, |
| "loss": 0.8045, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.10396039603960396, |
| "grad_norm": 0.3403375446796417, |
| "learning_rate": 4.99983786708228e-06, |
| "loss": 0.8307, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.10478547854785479, |
| "grad_norm": 0.3415558338165283, |
| "learning_rate": 4.9998251556259555e-06, |
| "loss": 0.8012, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.10561056105610561, |
| "grad_norm": 0.33992183208465576, |
| "learning_rate": 4.9998119645148145e-06, |
| "loss": 0.8036, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.10643564356435643, |
| "grad_norm": 0.3449820280075073, |
| "learning_rate": 4.999798293751387e-06, |
| "loss": 0.7627, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.10726072607260725, |
| "grad_norm": 0.3471784293651581, |
| "learning_rate": 4.999784143338296e-06, |
| "loss": 0.7936, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.10808580858085809, |
| "grad_norm": 0.37417036294937134, |
| "learning_rate": 4.999769513278258e-06, |
| "loss": 0.767, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.10891089108910891, |
| "grad_norm": 0.3380601108074188, |
| "learning_rate": 4.999754403574077e-06, |
| "loss": 0.7926, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.10973597359735973, |
| "grad_norm": 0.36881914734840393, |
| "learning_rate": 4.999738814228655e-06, |
| "loss": 0.7982, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.11056105610561057, |
| "grad_norm": 0.3481132388114929, |
| "learning_rate": 4.999722745244983e-06, |
| "loss": 0.8036, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.11138613861386139, |
| "grad_norm": 0.3401270806789398, |
| "learning_rate": 4.999706196626143e-06, |
| "loss": 0.7879, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.11221122112211221, |
| "grad_norm": 0.3421446979045868, |
| "learning_rate": 4.99968916837531e-06, |
| "loss": 0.7994, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.11303630363036303, |
| "grad_norm": 0.3525069057941437, |
| "learning_rate": 4.999671660495754e-06, |
| "loss": 0.799, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.11386138613861387, |
| "grad_norm": 0.33705243468284607, |
| "learning_rate": 4.999653672990831e-06, |
| "loss": 0.797, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.11468646864686469, |
| "grad_norm": 0.34886419773101807, |
| "learning_rate": 4.999635205863994e-06, |
| "loss": 0.7911, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.11551155115511551, |
| "grad_norm": 0.3489723205566406, |
| "learning_rate": 4.999616259118787e-06, |
| "loss": 0.785, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.11633663366336634, |
| "grad_norm": 0.3380016088485718, |
| "learning_rate": 4.999596832758844e-06, |
| "loss": 0.7946, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.11716171617161716, |
| "grad_norm": 0.3427796959877014, |
| "learning_rate": 4.999576926787893e-06, |
| "loss": 0.8117, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.11798679867986799, |
| "grad_norm": 0.35554543137550354, |
| "learning_rate": 4.9995565412097535e-06, |
| "loss": 0.7546, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.1188118811881188, |
| "grad_norm": 0.3470841646194458, |
| "learning_rate": 4.999535676028338e-06, |
| "loss": 0.7796, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.11963696369636964, |
| "grad_norm": 0.3392399251461029, |
| "learning_rate": 4.999514331247648e-06, |
| "loss": 0.7639, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.12046204620462046, |
| "grad_norm": 0.33807244896888733, |
| "learning_rate": 4.999492506871781e-06, |
| "loss": 0.7789, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.12128712871287128, |
| "grad_norm": 0.3525167405605316, |
| "learning_rate": 4.999470202904923e-06, |
| "loss": 0.7929, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.12211221122112212, |
| "grad_norm": 0.35303160548210144, |
| "learning_rate": 4.9994474193513545e-06, |
| "loss": 0.8019, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.12293729372937294, |
| "grad_norm": 0.3498503267765045, |
| "learning_rate": 4.999424156215446e-06, |
| "loss": 0.7801, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.12376237623762376, |
| "grad_norm": 0.348417192697525, |
| "learning_rate": 4.9994004135016625e-06, |
| "loss": 0.7744, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12458745874587458, |
| "grad_norm": 0.35064205527305603, |
| "learning_rate": 4.999376191214559e-06, |
| "loss": 0.756, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.1254125412541254, |
| "grad_norm": 0.3553641736507416, |
| "learning_rate": 4.999351489358783e-06, |
| "loss": 0.7864, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.12623762376237624, |
| "grad_norm": 0.35194501280784607, |
| "learning_rate": 4.999326307939076e-06, |
| "loss": 0.7616, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.12706270627062707, |
| "grad_norm": 0.34095126390457153, |
| "learning_rate": 4.999300646960267e-06, |
| "loss": 0.7951, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.12788778877887788, |
| "grad_norm": 0.33836010098457336, |
| "learning_rate": 4.999274506427281e-06, |
| "loss": 0.8008, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.12871287128712872, |
| "grad_norm": 0.343062162399292, |
| "learning_rate": 4.9992478863451335e-06, |
| "loss": 0.7906, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.12953795379537955, |
| "grad_norm": 0.3474920988082886, |
| "learning_rate": 4.999220786718932e-06, |
| "loss": 0.7654, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.13036303630363036, |
| "grad_norm": 0.36109790205955505, |
| "learning_rate": 4.9991932075538765e-06, |
| "loss": 0.7615, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1311881188118812, |
| "grad_norm": 0.3488277792930603, |
| "learning_rate": 4.99916514885526e-06, |
| "loss": 0.7799, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.132013201320132, |
| "grad_norm": 0.36712321639060974, |
| "learning_rate": 4.9991366106284635e-06, |
| "loss": 0.8018, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.13283828382838284, |
| "grad_norm": 0.34038668870925903, |
| "learning_rate": 4.999107592878964e-06, |
| "loss": 0.7605, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.13366336633663367, |
| "grad_norm": 0.35164085030555725, |
| "learning_rate": 4.999078095612332e-06, |
| "loss": 0.7894, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.13448844884488448, |
| "grad_norm": 0.35475778579711914, |
| "learning_rate": 4.9990481188342234e-06, |
| "loss": 0.7915, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.1353135313531353, |
| "grad_norm": 0.3551616072654724, |
| "learning_rate": 4.999017662550392e-06, |
| "loss": 0.7929, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.13613861386138615, |
| "grad_norm": 0.34733644127845764, |
| "learning_rate": 4.99898672676668e-06, |
| "loss": 0.7892, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.13696369636963696, |
| "grad_norm": 0.35961049795150757, |
| "learning_rate": 4.998955311489025e-06, |
| "loss": 0.7567, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1377887788778878, |
| "grad_norm": 0.3517046570777893, |
| "learning_rate": 4.998923416723456e-06, |
| "loss": 0.755, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.13861386138613863, |
| "grad_norm": 0.35234466195106506, |
| "learning_rate": 4.998891042476089e-06, |
| "loss": 0.7761, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.13943894389438943, |
| "grad_norm": 0.3564783036708832, |
| "learning_rate": 4.9988581887531386e-06, |
| "loss": 0.7763, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.14026402640264027, |
| "grad_norm": 0.3533042371273041, |
| "learning_rate": 4.998824855560907e-06, |
| "loss": 0.7615, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.14108910891089108, |
| "grad_norm": 0.3700357973575592, |
| "learning_rate": 4.998791042905791e-06, |
| "loss": 0.7607, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.1419141914191419, |
| "grad_norm": 0.35690873861312866, |
| "learning_rate": 4.99875675079428e-06, |
| "loss": 0.7713, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.14273927392739275, |
| "grad_norm": 0.36469247937202454, |
| "learning_rate": 4.9987219792329505e-06, |
| "loss": 0.7696, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.14356435643564355, |
| "grad_norm": 0.37469345331192017, |
| "learning_rate": 4.998686728228476e-06, |
| "loss": 0.7739, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.1443894389438944, |
| "grad_norm": 0.36179906129837036, |
| "learning_rate": 4.9986509977876205e-06, |
| "loss": 0.7708, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.14521452145214522, |
| "grad_norm": 0.37290266156196594, |
| "learning_rate": 4.9986147879172395e-06, |
| "loss": 0.7663, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.14603960396039603, |
| "grad_norm": 0.36196717619895935, |
| "learning_rate": 4.998578098624282e-06, |
| "loss": 0.7517, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.14686468646864687, |
| "grad_norm": 0.38069623708724976, |
| "learning_rate": 4.998540929915784e-06, |
| "loss": 0.7966, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1476897689768977, |
| "grad_norm": 0.3567394018173218, |
| "learning_rate": 4.998503281798882e-06, |
| "loss": 0.7781, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.1485148514851485, |
| "grad_norm": 0.3728371560573578, |
| "learning_rate": 4.998465154280796e-06, |
| "loss": 0.7537, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.14933993399339934, |
| "grad_norm": 0.3626164197921753, |
| "learning_rate": 4.998426547368844e-06, |
| "loss": 0.7811, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.15016501650165018, |
| "grad_norm": 0.3611028492450714, |
| "learning_rate": 4.998387461070433e-06, |
| "loss": 0.7635, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.15099009900990099, |
| "grad_norm": 0.37311699986457825, |
| "learning_rate": 4.998347895393063e-06, |
| "loss": 0.7513, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.15181518151815182, |
| "grad_norm": 0.3596293032169342, |
| "learning_rate": 4.998307850344325e-06, |
| "loss": 0.7806, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.15264026402640263, |
| "grad_norm": 0.3608078956604004, |
| "learning_rate": 4.998267325931903e-06, |
| "loss": 0.7766, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.15346534653465346, |
| "grad_norm": 0.3593875467777252, |
| "learning_rate": 4.998226322163573e-06, |
| "loss": 0.7557, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.1542904290429043, |
| "grad_norm": 0.3720031976699829, |
| "learning_rate": 4.998184839047202e-06, |
| "loss": 0.7779, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.1551155115511551, |
| "grad_norm": 0.36318108439445496, |
| "learning_rate": 4.998142876590749e-06, |
| "loss": 0.7707, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.15594059405940594, |
| "grad_norm": 0.3631824851036072, |
| "learning_rate": 4.998100434802267e-06, |
| "loss": 0.7808, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.15676567656765678, |
| "grad_norm": 0.37768399715423584, |
| "learning_rate": 4.9980575136899e-06, |
| "loss": 0.7674, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.15759075907590758, |
| "grad_norm": 0.3613969385623932, |
| "learning_rate": 4.998014113261882e-06, |
| "loss": 0.7673, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.15841584158415842, |
| "grad_norm": 0.3773675262928009, |
| "learning_rate": 4.99797023352654e-06, |
| "loss": 0.7719, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.15924092409240925, |
| "grad_norm": 0.36111781001091003, |
| "learning_rate": 4.997925874492295e-06, |
| "loss": 0.7558, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.16006600660066006, |
| "grad_norm": 0.3750019967556, |
| "learning_rate": 4.997881036167659e-06, |
| "loss": 0.778, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.1608910891089109, |
| "grad_norm": 0.37131670117378235, |
| "learning_rate": 4.997835718561232e-06, |
| "loss": 0.7325, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.1617161716171617, |
| "grad_norm": 0.37602895498275757, |
| "learning_rate": 4.9977899216817124e-06, |
| "loss": 0.7573, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.16254125412541254, |
| "grad_norm": 0.37271130084991455, |
| "learning_rate": 4.9977436455378865e-06, |
| "loss": 0.8003, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.16336633663366337, |
| "grad_norm": 0.37385690212249756, |
| "learning_rate": 4.997696890138635e-06, |
| "loss": 0.757, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.16419141914191418, |
| "grad_norm": 0.3819064199924469, |
| "learning_rate": 4.997649655492925e-06, |
| "loss": 0.7425, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.16501650165016502, |
| "grad_norm": 0.3783811032772064, |
| "learning_rate": 4.997601941609824e-06, |
| "loss": 0.7637, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.16584158415841585, |
| "grad_norm": 0.353215754032135, |
| "learning_rate": 4.997553748498486e-06, |
| "loss": 0.7579, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.36633336544036865, |
| "learning_rate": 4.9975050761681574e-06, |
| "loss": 0.7514, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1674917491749175, |
| "grad_norm": 0.36640000343322754, |
| "learning_rate": 4.997455924628176e-06, |
| "loss": 0.7634, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.16831683168316833, |
| "grad_norm": 0.36854660511016846, |
| "learning_rate": 4.997406293887976e-06, |
| "loss": 0.7585, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.16914191419141913, |
| "grad_norm": 0.36816444993019104, |
| "learning_rate": 4.9973561839570775e-06, |
| "loss": 0.7268, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.16996699669966997, |
| "grad_norm": 0.37374815344810486, |
| "learning_rate": 4.997305594845097e-06, |
| "loss": 0.7654, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1707920792079208, |
| "grad_norm": 0.37258732318878174, |
| "learning_rate": 4.997254526561739e-06, |
| "loss": 0.7441, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.1716171617161716, |
| "grad_norm": 0.3762890696525574, |
| "learning_rate": 4.997202979116805e-06, |
| "loss": 0.7766, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.17244224422442245, |
| "grad_norm": 0.3801199197769165, |
| "learning_rate": 4.997150952520185e-06, |
| "loss": 0.7552, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.17326732673267325, |
| "grad_norm": 0.38364118337631226, |
| "learning_rate": 4.997098446781861e-06, |
| "loss": 0.76, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1740924092409241, |
| "grad_norm": 0.36506178975105286, |
| "learning_rate": 4.997045461911907e-06, |
| "loss": 0.755, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.17491749174917492, |
| "grad_norm": 0.3866812586784363, |
| "learning_rate": 4.996991997920491e-06, |
| "loss": 0.757, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.17574257425742573, |
| "grad_norm": 0.37725409865379333, |
| "learning_rate": 4.99693805481787e-06, |
| "loss": 0.7598, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.17656765676567657, |
| "grad_norm": 0.3766557276248932, |
| "learning_rate": 4.996883632614396e-06, |
| "loss": 0.7658, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1773927392739274, |
| "grad_norm": 0.3847043812274933, |
| "learning_rate": 4.99682873132051e-06, |
| "loss": 0.7284, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.1782178217821782, |
| "grad_norm": 0.3873996138572693, |
| "learning_rate": 4.996773350946747e-06, |
| "loss": 0.7336, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.17904290429042904, |
| "grad_norm": 0.3642526865005493, |
| "learning_rate": 4.9967174915037305e-06, |
| "loss": 0.7498, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.17986798679867988, |
| "grad_norm": 0.3840898871421814, |
| "learning_rate": 4.996661153002183e-06, |
| "loss": 0.7708, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1806930693069307, |
| "grad_norm": 0.3865383267402649, |
| "learning_rate": 4.996604335452911e-06, |
| "loss": 0.7667, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.18151815181518152, |
| "grad_norm": 0.367243230342865, |
| "learning_rate": 4.996547038866817e-06, |
| "loss": 0.7565, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.18234323432343233, |
| "grad_norm": 0.3867991864681244, |
| "learning_rate": 4.996489263254897e-06, |
| "loss": 0.7634, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.18316831683168316, |
| "grad_norm": 0.3795239329338074, |
| "learning_rate": 4.996431008628234e-06, |
| "loss": 0.7624, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.183993399339934, |
| "grad_norm": 0.3892177641391754, |
| "learning_rate": 4.996372274998007e-06, |
| "loss": 0.7672, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.1848184818481848, |
| "grad_norm": 0.3779730498790741, |
| "learning_rate": 4.9963130623754855e-06, |
| "loss": 0.7367, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.18564356435643564, |
| "grad_norm": 0.36853930354118347, |
| "learning_rate": 4.99625337077203e-06, |
| "loss": 0.7609, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.18646864686468648, |
| "grad_norm": 0.37936636805534363, |
| "learning_rate": 4.996193200199094e-06, |
| "loss": 0.7647, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.18729372937293728, |
| "grad_norm": 0.3741963505744934, |
| "learning_rate": 4.996132550668224e-06, |
| "loss": 0.7413, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.18811881188118812, |
| "grad_norm": 0.3922561705112457, |
| "learning_rate": 4.996071422191057e-06, |
| "loss": 0.7381, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.18894389438943895, |
| "grad_norm": 0.388773113489151, |
| "learning_rate": 4.996009814779321e-06, |
| "loss": 0.7642, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.18976897689768976, |
| "grad_norm": 0.37353891134262085, |
| "learning_rate": 4.995947728444837e-06, |
| "loss": 0.7481, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1905940594059406, |
| "grad_norm": 0.3833358585834503, |
| "learning_rate": 4.995885163199519e-06, |
| "loss": 0.7479, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.19141914191419143, |
| "grad_norm": 0.3960364758968353, |
| "learning_rate": 4.9958221190553705e-06, |
| "loss": 0.7751, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.19224422442244224, |
| "grad_norm": 0.3736567795276642, |
| "learning_rate": 4.995758596024488e-06, |
| "loss": 0.7623, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.19306930693069307, |
| "grad_norm": 0.37923943996429443, |
| "learning_rate": 4.9956945941190614e-06, |
| "loss": 0.765, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.19389438943894388, |
| "grad_norm": 0.37835410237312317, |
| "learning_rate": 4.99563011335137e-06, |
| "loss": 0.7569, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.19471947194719472, |
| "grad_norm": 0.37825924158096313, |
| "learning_rate": 4.9955651537337865e-06, |
| "loss": 0.7551, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.19554455445544555, |
| "grad_norm": 0.38854920864105225, |
| "learning_rate": 4.995499715278774e-06, |
| "loss": 0.7527, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.19636963696369636, |
| "grad_norm": 0.38994744420051575, |
| "learning_rate": 4.995433797998891e-06, |
| "loss": 0.7664, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1971947194719472, |
| "grad_norm": 0.3996884524822235, |
| "learning_rate": 4.995367401906783e-06, |
| "loss": 0.7932, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.19801980198019803, |
| "grad_norm": 0.3820636570453644, |
| "learning_rate": 4.99530052701519e-06, |
| "loss": 0.7575, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.19884488448844884, |
| "grad_norm": 0.3814580738544464, |
| "learning_rate": 4.9952331733369455e-06, |
| "loss": 0.7476, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.19966996699669967, |
| "grad_norm": 0.4007291793823242, |
| "learning_rate": 4.995165340884971e-06, |
| "loss": 0.7671, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.2004950495049505, |
| "grad_norm": 0.38576772809028625, |
| "learning_rate": 4.995097029672282e-06, |
| "loss": 0.7705, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.20132013201320131, |
| "grad_norm": 0.3890525698661804, |
| "learning_rate": 4.995028239711987e-06, |
| "loss": 0.739, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.20214521452145215, |
| "grad_norm": 0.3844315707683563, |
| "learning_rate": 4.994958971017285e-06, |
| "loss": 0.7464, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.20297029702970298, |
| "grad_norm": 0.3911251425743103, |
| "learning_rate": 4.994889223601466e-06, |
| "loss": 0.7628, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.2037953795379538, |
| "grad_norm": 0.40388089418411255, |
| "learning_rate": 4.994818997477912e-06, |
| "loss": 0.7437, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.20462046204620463, |
| "grad_norm": 0.3855500817298889, |
| "learning_rate": 4.994748292660101e-06, |
| "loss": 0.7687, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.20544554455445543, |
| "grad_norm": 0.3879557251930237, |
| "learning_rate": 4.994677109161597e-06, |
| "loss": 0.7712, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.20627062706270627, |
| "grad_norm": 0.40615251660346985, |
| "learning_rate": 4.9946054469960574e-06, |
| "loss": 0.7495, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2070957095709571, |
| "grad_norm": 0.3867475986480713, |
| "learning_rate": 4.9945333061772346e-06, |
| "loss": 0.7467, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2079207920792079, |
| "grad_norm": 0.37825512886047363, |
| "learning_rate": 4.99446068671897e-06, |
| "loss": 0.7413, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.20874587458745875, |
| "grad_norm": 0.3996836543083191, |
| "learning_rate": 4.994387588635197e-06, |
| "loss": 0.7635, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.20957095709570958, |
| "grad_norm": 0.38707849383354187, |
| "learning_rate": 4.994314011939941e-06, |
| "loss": 0.7418, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.2103960396039604, |
| "grad_norm": 0.3831859827041626, |
| "learning_rate": 4.994239956647321e-06, |
| "loss": 0.7593, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.21122112211221122, |
| "grad_norm": 0.3887327015399933, |
| "learning_rate": 4.994165422771545e-06, |
| "loss": 0.777, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.21204620462046206, |
| "grad_norm": 0.39478468894958496, |
| "learning_rate": 4.994090410326916e-06, |
| "loss": 0.7535, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.21287128712871287, |
| "grad_norm": 0.3960399627685547, |
| "learning_rate": 4.994014919327824e-06, |
| "loss": 0.7519, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2136963696369637, |
| "grad_norm": 0.40766602754592896, |
| "learning_rate": 4.9939389497887565e-06, |
| "loss": 0.7339, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.2145214521452145, |
| "grad_norm": 0.397185742855072, |
| "learning_rate": 4.993862501724289e-06, |
| "loss": 0.7823, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.21534653465346534, |
| "grad_norm": 0.41641128063201904, |
| "learning_rate": 4.993785575149092e-06, |
| "loss": 0.7289, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.21617161716171618, |
| "grad_norm": 0.39782091975212097, |
| "learning_rate": 4.993708170077922e-06, |
| "loss": 0.7472, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.21699669966996699, |
| "grad_norm": 0.39305055141448975, |
| "learning_rate": 4.993630286525634e-06, |
| "loss": 0.7659, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.21782178217821782, |
| "grad_norm": 0.39734795689582825, |
| "learning_rate": 4.993551924507172e-06, |
| "loss": 0.7543, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.21864686468646866, |
| "grad_norm": 0.43676817417144775, |
| "learning_rate": 4.99347308403757e-06, |
| "loss": 0.75, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.21947194719471946, |
| "grad_norm": 0.3790392279624939, |
| "learning_rate": 4.993393765131956e-06, |
| "loss": 0.7481, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.2202970297029703, |
| "grad_norm": 0.3992585241794586, |
| "learning_rate": 4.993313967805551e-06, |
| "loss": 0.7467, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.22112211221122113, |
| "grad_norm": 0.40704596042633057, |
| "learning_rate": 4.9932336920736645e-06, |
| "loss": 0.761, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.22194719471947194, |
| "grad_norm": 0.394362211227417, |
| "learning_rate": 4.9931529379517006e-06, |
| "loss": 0.7424, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.22277227722772278, |
| "grad_norm": 0.4009395241737366, |
| "learning_rate": 4.993071705455152e-06, |
| "loss": 0.7471, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2235973597359736, |
| "grad_norm": 0.4041494131088257, |
| "learning_rate": 4.992989994599607e-06, |
| "loss": 0.7309, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.22442244224422442, |
| "grad_norm": 0.39666467905044556, |
| "learning_rate": 4.992907805400744e-06, |
| "loss": 0.7448, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.22524752475247525, |
| "grad_norm": 0.39431697130203247, |
| "learning_rate": 4.992825137874332e-06, |
| "loss": 0.7679, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.22607260726072606, |
| "grad_norm": 0.39886102080345154, |
| "learning_rate": 4.992741992036234e-06, |
| "loss": 0.7564, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.2268976897689769, |
| "grad_norm": 0.38774392008781433, |
| "learning_rate": 4.992658367902402e-06, |
| "loss": 0.7545, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.22772277227722773, |
| "grad_norm": 0.40633663535118103, |
| "learning_rate": 4.992574265488883e-06, |
| "loss": 0.7531, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.22854785478547854, |
| "grad_norm": 0.4006880819797516, |
| "learning_rate": 4.9924896848118145e-06, |
| "loss": 0.7205, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.22937293729372937, |
| "grad_norm": 0.40613582730293274, |
| "learning_rate": 4.992404625887423e-06, |
| "loss": 0.756, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.2301980198019802, |
| "grad_norm": 0.41099947690963745, |
| "learning_rate": 4.9923190887320315e-06, |
| "loss": 0.7728, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.23102310231023102, |
| "grad_norm": 0.41351890563964844, |
| "learning_rate": 4.992233073362052e-06, |
| "loss": 0.7472, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.23184818481848185, |
| "grad_norm": 0.3932720124721527, |
| "learning_rate": 4.992146579793988e-06, |
| "loss": 0.7465, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.23267326732673269, |
| "grad_norm": 0.4054405987262726, |
| "learning_rate": 4.992059608044436e-06, |
| "loss": 0.7425, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.2334983498349835, |
| "grad_norm": 0.4057449400424957, |
| "learning_rate": 4.991972158130084e-06, |
| "loss": 0.7665, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.23432343234323433, |
| "grad_norm": 0.4057076573371887, |
| "learning_rate": 4.99188423006771e-06, |
| "loss": 0.7388, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.23514851485148514, |
| "grad_norm": 0.4172205328941345, |
| "learning_rate": 4.991795823874188e-06, |
| "loss": 0.7421, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.23597359735973597, |
| "grad_norm": 0.42152348160743713, |
| "learning_rate": 4.9917069395664786e-06, |
| "loss": 0.7247, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.2367986798679868, |
| "grad_norm": 0.3970670998096466, |
| "learning_rate": 4.991617577161638e-06, |
| "loss": 0.736, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.2376237623762376, |
| "grad_norm": 0.41120800375938416, |
| "learning_rate": 4.991527736676811e-06, |
| "loss": 0.7373, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.23844884488448845, |
| "grad_norm": 0.4015986919403076, |
| "learning_rate": 4.991437418129237e-06, |
| "loss": 0.7249, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.23927392739273928, |
| "grad_norm": 0.4287201762199402, |
| "learning_rate": 4.991346621536245e-06, |
| "loss": 0.7792, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.2400990099009901, |
| "grad_norm": 0.40472856163978577, |
| "learning_rate": 4.991255346915258e-06, |
| "loss": 0.7365, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.24092409240924093, |
| "grad_norm": 0.4001949727535248, |
| "learning_rate": 4.991163594283789e-06, |
| "loss": 0.7265, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.24174917491749176, |
| "grad_norm": 0.3954242467880249, |
| "learning_rate": 4.991071363659442e-06, |
| "loss": 0.7257, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.24257425742574257, |
| "grad_norm": 0.3959173560142517, |
| "learning_rate": 4.990978655059914e-06, |
| "loss": 0.749, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.2433993399339934, |
| "grad_norm": 0.4046264588832855, |
| "learning_rate": 4.990885468502995e-06, |
| "loss": 0.739, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.24422442244224424, |
| "grad_norm": 0.3978911340236664, |
| "learning_rate": 4.990791804006563e-06, |
| "loss": 0.7098, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.24504950495049505, |
| "grad_norm": 0.3915862441062927, |
| "learning_rate": 4.9906976615885916e-06, |
| "loss": 0.7323, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.24587458745874588, |
| "grad_norm": 0.39746829867362976, |
| "learning_rate": 4.990603041267144e-06, |
| "loss": 0.741, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.2466996699669967, |
| "grad_norm": 0.4023366868495941, |
| "learning_rate": 4.990507943060374e-06, |
| "loss": 0.7712, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.24752475247524752, |
| "grad_norm": 0.4111311137676239, |
| "learning_rate": 4.9904123669865315e-06, |
| "loss": 0.7451, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.24834983498349836, |
| "grad_norm": 0.4031809866428375, |
| "learning_rate": 4.990316313063953e-06, |
| "loss": 0.732, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.24917491749174916, |
| "grad_norm": 0.4134192168712616, |
| "learning_rate": 4.99021978131107e-06, |
| "loss": 0.7404, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.39537039399147034, |
| "learning_rate": 4.990122771746403e-06, |
| "loss": 0.7175, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.2508250825082508, |
| "grad_norm": 0.42326098680496216, |
| "learning_rate": 4.990025284388567e-06, |
| "loss": 0.7663, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.25165016501650167, |
| "grad_norm": 0.42979490756988525, |
| "learning_rate": 4.989927319256269e-06, |
| "loss": 0.7227, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.2524752475247525, |
| "grad_norm": 0.420631468296051, |
| "learning_rate": 4.989828876368303e-06, |
| "loss": 0.7506, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.2533003300330033, |
| "grad_norm": 0.4130260944366455, |
| "learning_rate": 4.989729955743559e-06, |
| "loss": 0.7324, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.25412541254125415, |
| "grad_norm": 0.39602625370025635, |
| "learning_rate": 4.989630557401018e-06, |
| "loss": 0.7482, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.25495049504950495, |
| "grad_norm": 0.41538646817207336, |
| "learning_rate": 4.989530681359751e-06, |
| "loss": 0.7193, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.25577557755775576, |
| "grad_norm": 0.40971875190734863, |
| "learning_rate": 4.989430327638923e-06, |
| "loss": 0.7497, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2566006600660066, |
| "grad_norm": 0.4176967442035675, |
| "learning_rate": 4.989329496257789e-06, |
| "loss": 0.7371, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.25742574257425743, |
| "grad_norm": 0.4094579517841339, |
| "learning_rate": 4.989228187235695e-06, |
| "loss": 0.7436, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.25825082508250824, |
| "grad_norm": 0.4166909158229828, |
| "learning_rate": 4.9891264005920805e-06, |
| "loss": 0.7224, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.2590759075907591, |
| "grad_norm": 0.4074993431568146, |
| "learning_rate": 4.989024136346477e-06, |
| "loss": 0.7467, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.2599009900990099, |
| "grad_norm": 0.4112595319747925, |
| "learning_rate": 4.988921394518504e-06, |
| "loss": 0.761, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.2607260726072607, |
| "grad_norm": 0.4261530339717865, |
| "learning_rate": 4.9888181751278765e-06, |
| "loss": 0.7578, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.2615511551155115, |
| "grad_norm": 0.4100978672504425, |
| "learning_rate": 4.9887144781944e-06, |
| "loss": 0.7663, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.2623762376237624, |
| "grad_norm": 0.42287471890449524, |
| "learning_rate": 4.988610303737972e-06, |
| "loss": 0.7569, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.2632013201320132, |
| "grad_norm": 0.41293367743492126, |
| "learning_rate": 4.98850565177858e-06, |
| "loss": 0.7413, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.264026402640264, |
| "grad_norm": 0.40984824299812317, |
| "learning_rate": 4.988400522336304e-06, |
| "loss": 0.7402, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.26485148514851486, |
| "grad_norm": 0.41604360938072205, |
| "learning_rate": 4.9882949154313156e-06, |
| "loss": 0.7368, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.26567656765676567, |
| "grad_norm": 0.4161511957645416, |
| "learning_rate": 4.988188831083879e-06, |
| "loss": 0.7365, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.2665016501650165, |
| "grad_norm": 0.4176185131072998, |
| "learning_rate": 4.988082269314348e-06, |
| "loss": 0.7454, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.26732673267326734, |
| "grad_norm": 0.40819647908210754, |
| "learning_rate": 4.987975230143171e-06, |
| "loss": 0.7155, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.26815181518151815, |
| "grad_norm": 0.41623732447624207, |
| "learning_rate": 4.9878677135908845e-06, |
| "loss": 0.7423, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.26897689768976896, |
| "grad_norm": 0.4013398587703705, |
| "learning_rate": 4.987759719678119e-06, |
| "loss": 0.7312, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.2698019801980198, |
| "grad_norm": 0.41542699933052063, |
| "learning_rate": 4.987651248425596e-06, |
| "loss": 0.7202, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.2706270627062706, |
| "grad_norm": 0.4313049018383026, |
| "learning_rate": 4.987542299854128e-06, |
| "loss": 0.753, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.27145214521452143, |
| "grad_norm": 0.41977638006210327, |
| "learning_rate": 4.98743287398462e-06, |
| "loss": 0.7394, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.2722772277227723, |
| "grad_norm": 0.4171437919139862, |
| "learning_rate": 4.987322970838068e-06, |
| "loss": 0.7304, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2731023102310231, |
| "grad_norm": 0.4311671853065491, |
| "learning_rate": 4.987212590435559e-06, |
| "loss": 0.732, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.2739273927392739, |
| "grad_norm": 0.4147193729877472, |
| "learning_rate": 4.987101732798273e-06, |
| "loss": 0.7396, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.2747524752475248, |
| "grad_norm": 0.4111470878124237, |
| "learning_rate": 4.986990397947481e-06, |
| "loss": 0.7449, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.2755775577557756, |
| "grad_norm": 0.42444244027137756, |
| "learning_rate": 4.986878585904546e-06, |
| "loss": 0.7163, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.2764026402640264, |
| "grad_norm": 0.41067928075790405, |
| "learning_rate": 4.986766296690919e-06, |
| "loss": 0.7316, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.27722772277227725, |
| "grad_norm": 0.42234039306640625, |
| "learning_rate": 4.986653530328149e-06, |
| "loss": 0.7039, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.27805280528052806, |
| "grad_norm": 0.4271155595779419, |
| "learning_rate": 4.986540286837871e-06, |
| "loss": 0.7461, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.27887788778877887, |
| "grad_norm": 0.41107016801834106, |
| "learning_rate": 4.9864265662418155e-06, |
| "loss": 0.7162, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.27970297029702973, |
| "grad_norm": 0.4205438792705536, |
| "learning_rate": 4.9863123685618005e-06, |
| "loss": 0.7067, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.28052805280528054, |
| "grad_norm": 0.408677339553833, |
| "learning_rate": 4.986197693819739e-06, |
| "loss": 0.7201, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.28135313531353134, |
| "grad_norm": 0.44216471910476685, |
| "learning_rate": 4.9860825420376345e-06, |
| "loss": 0.7355, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.28217821782178215, |
| "grad_norm": 0.4323996305465698, |
| "learning_rate": 4.985966913237581e-06, |
| "loss": 0.6978, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.283003300330033, |
| "grad_norm": 0.43284088373184204, |
| "learning_rate": 4.985850807441764e-06, |
| "loss": 0.7424, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.2838283828382838, |
| "grad_norm": 0.4161660075187683, |
| "learning_rate": 4.985734224672464e-06, |
| "loss": 0.746, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.28465346534653463, |
| "grad_norm": 0.40742814540863037, |
| "learning_rate": 4.985617164952048e-06, |
| "loss": 0.7378, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.2854785478547855, |
| "grad_norm": 0.4200558662414551, |
| "learning_rate": 4.985499628302978e-06, |
| "loss": 0.7339, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.2863036303630363, |
| "grad_norm": 0.42096641659736633, |
| "learning_rate": 4.985381614747807e-06, |
| "loss": 0.7441, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.2871287128712871, |
| "grad_norm": 0.42450881004333496, |
| "learning_rate": 4.9852631243091755e-06, |
| "loss": 0.7372, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.28795379537953797, |
| "grad_norm": 0.4473407566547394, |
| "learning_rate": 4.985144157009824e-06, |
| "loss": 0.7134, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.2887788778877888, |
| "grad_norm": 0.4309209883213043, |
| "learning_rate": 4.985024712872575e-06, |
| "loss": 0.7484, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2896039603960396, |
| "grad_norm": 0.43831008672714233, |
| "learning_rate": 4.984904791920349e-06, |
| "loss": 0.7577, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.29042904290429045, |
| "grad_norm": 0.4128909111022949, |
| "learning_rate": 4.984784394176155e-06, |
| "loss": 0.742, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.29125412541254125, |
| "grad_norm": 0.4246348440647125, |
| "learning_rate": 4.984663519663097e-06, |
| "loss": 0.7224, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.29207920792079206, |
| "grad_norm": 0.4162266254425049, |
| "learning_rate": 4.984542168404364e-06, |
| "loss": 0.7193, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.2929042904290429, |
| "grad_norm": 0.42818185687065125, |
| "learning_rate": 4.984420340423242e-06, |
| "loss": 0.7171, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.29372937293729373, |
| "grad_norm": 0.43851590156555176, |
| "learning_rate": 4.984298035743107e-06, |
| "loss": 0.734, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.29455445544554454, |
| "grad_norm": 0.4138251841068268, |
| "learning_rate": 4.984175254387426e-06, |
| "loss": 0.7258, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.2953795379537954, |
| "grad_norm": 0.42890465259552, |
| "learning_rate": 4.984051996379758e-06, |
| "loss": 0.725, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.2962046204620462, |
| "grad_norm": 0.43275824189186096, |
| "learning_rate": 4.983928261743753e-06, |
| "loss": 0.7337, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.297029702970297, |
| "grad_norm": 0.44491758942604065, |
| "learning_rate": 4.9838040505031525e-06, |
| "loss": 0.7519, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2978547854785479, |
| "grad_norm": 0.4430970251560211, |
| "learning_rate": 4.983679362681789e-06, |
| "loss": 0.7417, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.2986798679867987, |
| "grad_norm": 0.43559929728507996, |
| "learning_rate": 4.9835541983035886e-06, |
| "loss": 0.7444, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.2995049504950495, |
| "grad_norm": 0.43662071228027344, |
| "learning_rate": 4.9834285573925665e-06, |
| "loss": 0.7648, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.30033003300330036, |
| "grad_norm": 0.43434590101242065, |
| "learning_rate": 4.9833024399728295e-06, |
| "loss": 0.7179, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.30115511551155116, |
| "grad_norm": 0.41816094517707825, |
| "learning_rate": 4.9831758460685765e-06, |
| "loss": 0.7328, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.30198019801980197, |
| "grad_norm": 0.4434167146682739, |
| "learning_rate": 4.983048775704098e-06, |
| "loss": 0.7213, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.3028052805280528, |
| "grad_norm": 0.4452936351299286, |
| "learning_rate": 4.982921228903776e-06, |
| "loss": 0.7552, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.30363036303630364, |
| "grad_norm": 0.4163966774940491, |
| "learning_rate": 4.982793205692083e-06, |
| "loss": 0.7167, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.30445544554455445, |
| "grad_norm": 0.42152345180511475, |
| "learning_rate": 4.982664706093585e-06, |
| "loss": 0.7176, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.30528052805280526, |
| "grad_norm": 0.44663283228874207, |
| "learning_rate": 4.982535730132936e-06, |
| "loss": 0.7474, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3061056105610561, |
| "grad_norm": 0.42691031098365784, |
| "learning_rate": 4.982406277834884e-06, |
| "loss": 0.7148, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.3069306930693069, |
| "grad_norm": 0.437341570854187, |
| "learning_rate": 4.9822763492242674e-06, |
| "loss": 0.7374, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.30775577557755773, |
| "grad_norm": 0.42526715993881226, |
| "learning_rate": 4.982145944326018e-06, |
| "loss": 0.7161, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.3085808580858086, |
| "grad_norm": 0.4362766444683075, |
| "learning_rate": 4.9820150631651545e-06, |
| "loss": 0.7356, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.3094059405940594, |
| "grad_norm": 0.42550137639045715, |
| "learning_rate": 4.981883705766792e-06, |
| "loss": 0.7474, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.3102310231023102, |
| "grad_norm": 0.45245617628097534, |
| "learning_rate": 4.981751872156134e-06, |
| "loss": 0.6949, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.3110561056105611, |
| "grad_norm": 0.433254599571228, |
| "learning_rate": 4.981619562358475e-06, |
| "loss": 0.7263, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.3118811881188119, |
| "grad_norm": 0.4480639398097992, |
| "learning_rate": 4.981486776399204e-06, |
| "loss": 0.7289, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.3127062706270627, |
| "grad_norm": 0.44228753447532654, |
| "learning_rate": 4.9813535143037985e-06, |
| "loss": 0.7316, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.31353135313531355, |
| "grad_norm": 0.4304775893688202, |
| "learning_rate": 4.981219776097828e-06, |
| "loss": 0.7453, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.31435643564356436, |
| "grad_norm": 0.42373213171958923, |
| "learning_rate": 4.981085561806953e-06, |
| "loss": 0.7361, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.31518151815181517, |
| "grad_norm": 0.4551517963409424, |
| "learning_rate": 4.980950871456927e-06, |
| "loss": 0.7465, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.31600660066006603, |
| "grad_norm": 0.4127417206764221, |
| "learning_rate": 4.980815705073594e-06, |
| "loss": 0.7183, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.31683168316831684, |
| "grad_norm": 0.42258021235466003, |
| "learning_rate": 4.9806800626828885e-06, |
| "loss": 0.7249, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.31765676567656764, |
| "grad_norm": 0.44680890440940857, |
| "learning_rate": 4.980543944310836e-06, |
| "loss": 0.7355, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.3184818481848185, |
| "grad_norm": 0.45091512799263, |
| "learning_rate": 4.980407349983556e-06, |
| "loss": 0.7173, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.3193069306930693, |
| "grad_norm": 0.43471530079841614, |
| "learning_rate": 4.980270279727256e-06, |
| "loss": 0.7381, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.3201320132013201, |
| "grad_norm": 0.42503878474235535, |
| "learning_rate": 4.980132733568237e-06, |
| "loss": 0.7378, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.320957095709571, |
| "grad_norm": 0.44271692633628845, |
| "learning_rate": 4.979994711532892e-06, |
| "loss": 0.7208, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.3217821782178218, |
| "grad_norm": 0.4279749393463135, |
| "learning_rate": 4.979856213647702e-06, |
| "loss": 0.7319, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3226072607260726, |
| "grad_norm": 0.4179086983203888, |
| "learning_rate": 4.979717239939242e-06, |
| "loss": 0.7349, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.3234323432343234, |
| "grad_norm": 0.4250434935092926, |
| "learning_rate": 4.979577790434179e-06, |
| "loss": 0.713, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.32425742574257427, |
| "grad_norm": 0.43326812982559204, |
| "learning_rate": 4.979437865159268e-06, |
| "loss": 0.7149, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.3250825082508251, |
| "grad_norm": 0.44481202960014343, |
| "learning_rate": 4.979297464141358e-06, |
| "loss": 0.7421, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.3259075907590759, |
| "grad_norm": 0.436131089925766, |
| "learning_rate": 4.979156587407388e-06, |
| "loss": 0.7519, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.32673267326732675, |
| "grad_norm": 0.42413946986198425, |
| "learning_rate": 4.9790152349843904e-06, |
| "loss": 0.7376, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.32755775577557755, |
| "grad_norm": 0.4464262127876282, |
| "learning_rate": 4.978873406899485e-06, |
| "loss": 0.7322, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.32838283828382836, |
| "grad_norm": 0.43812641501426697, |
| "learning_rate": 4.978731103179887e-06, |
| "loss": 0.7336, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.3292079207920792, |
| "grad_norm": 0.4382777810096741, |
| "learning_rate": 4.9785883238529e-06, |
| "loss": 0.7248, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.33003300330033003, |
| "grad_norm": 0.43636494874954224, |
| "learning_rate": 4.978445068945918e-06, |
| "loss": 0.7357, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.33085808580858084, |
| "grad_norm": 0.4354000687599182, |
| "learning_rate": 4.978301338486432e-06, |
| "loss": 0.7376, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.3316831683168317, |
| "grad_norm": 0.44714194536209106, |
| "learning_rate": 4.978157132502019e-06, |
| "loss": 0.7002, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.3325082508250825, |
| "grad_norm": 0.43645936250686646, |
| "learning_rate": 4.978012451020347e-06, |
| "loss": 0.7332, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.4704788625240326, |
| "learning_rate": 4.977867294069178e-06, |
| "loss": 0.7192, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.3341584158415842, |
| "grad_norm": 0.4357188642024994, |
| "learning_rate": 4.977721661676364e-06, |
| "loss": 0.721, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.334983498349835, |
| "grad_norm": 0.4423423111438751, |
| "learning_rate": 4.977575553869848e-06, |
| "loss": 0.7302, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.3358085808580858, |
| "grad_norm": 0.453336626291275, |
| "learning_rate": 4.977428970677664e-06, |
| "loss": 0.7213, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.33663366336633666, |
| "grad_norm": 0.45698311924934387, |
| "learning_rate": 4.9772819121279395e-06, |
| "loss": 0.7483, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.33745874587458746, |
| "grad_norm": 0.4569329619407654, |
| "learning_rate": 4.97713437824889e-06, |
| "loss": 0.7171, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.33828382838283827, |
| "grad_norm": 0.4383382201194763, |
| "learning_rate": 4.976986369068823e-06, |
| "loss": 0.7348, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.33910891089108913, |
| "grad_norm": 0.4543409049510956, |
| "learning_rate": 4.9768378846161395e-06, |
| "loss": 0.7117, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.33993399339933994, |
| "grad_norm": 0.43206459283828735, |
| "learning_rate": 4.976688924919328e-06, |
| "loss": 0.7225, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.34075907590759075, |
| "grad_norm": 0.4427265226840973, |
| "learning_rate": 4.976539490006972e-06, |
| "loss": 0.7373, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.3415841584158416, |
| "grad_norm": 0.44355079531669617, |
| "learning_rate": 4.976389579907745e-06, |
| "loss": 0.7211, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.3424092409240924, |
| "grad_norm": 0.4588415324687958, |
| "learning_rate": 4.976239194650407e-06, |
| "loss": 0.7244, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.3432343234323432, |
| "grad_norm": 0.4601019620895386, |
| "learning_rate": 4.976088334263818e-06, |
| "loss": 0.7356, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.34405940594059403, |
| "grad_norm": 0.4477859139442444, |
| "learning_rate": 4.975936998776922e-06, |
| "loss": 0.7194, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.3448844884488449, |
| "grad_norm": 0.4489600360393524, |
| "learning_rate": 4.975785188218757e-06, |
| "loss": 0.7178, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.3457095709570957, |
| "grad_norm": 0.45358383655548096, |
| "learning_rate": 4.975632902618451e-06, |
| "loss": 0.7521, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.3465346534653465, |
| "grad_norm": 0.43763983249664307, |
| "learning_rate": 4.975480142005225e-06, |
| "loss": 0.7251, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.34735973597359737, |
| "grad_norm": 0.4395206570625305, |
| "learning_rate": 4.975326906408389e-06, |
| "loss": 0.738, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.3481848184818482, |
| "grad_norm": 0.45151060819625854, |
| "learning_rate": 4.975173195857346e-06, |
| "loss": 0.7455, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.349009900990099, |
| "grad_norm": 0.45000430941581726, |
| "learning_rate": 4.975019010381589e-06, |
| "loss": 0.7263, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.34983498349834985, |
| "grad_norm": 0.45460546016693115, |
| "learning_rate": 4.9748643500107015e-06, |
| "loss": 0.7155, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.35066006600660066, |
| "grad_norm": 0.4484122097492218, |
| "learning_rate": 4.974709214774361e-06, |
| "loss": 0.7278, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.35148514851485146, |
| "grad_norm": 0.4488012492656708, |
| "learning_rate": 4.974553604702332e-06, |
| "loss": 0.725, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.3523102310231023, |
| "grad_norm": 0.45438095927238464, |
| "learning_rate": 4.974397519824474e-06, |
| "loss": 0.7087, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.35313531353135313, |
| "grad_norm": 0.4460967481136322, |
| "learning_rate": 4.974240960170734e-06, |
| "loss": 0.7219, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.35396039603960394, |
| "grad_norm": 0.4628778100013733, |
| "learning_rate": 4.974083925771154e-06, |
| "loss": 0.7032, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.3547854785478548, |
| "grad_norm": 0.4375389516353607, |
| "learning_rate": 4.973926416655863e-06, |
| "loss": 0.6971, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3556105610561056, |
| "grad_norm": 0.47083836793899536, |
| "learning_rate": 4.9737684328550835e-06, |
| "loss": 0.7262, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.3564356435643564, |
| "grad_norm": 0.4437631666660309, |
| "learning_rate": 4.9736099743991305e-06, |
| "loss": 0.7158, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.3572607260726073, |
| "grad_norm": 0.44406312704086304, |
| "learning_rate": 4.973451041318407e-06, |
| "loss": 0.7324, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.3580858085808581, |
| "grad_norm": 0.4787375032901764, |
| "learning_rate": 4.973291633643408e-06, |
| "loss": 0.7336, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.3589108910891089, |
| "grad_norm": 0.45829135179519653, |
| "learning_rate": 4.9731317514047195e-06, |
| "loss": 0.7057, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.35973597359735976, |
| "grad_norm": 0.4471598267555237, |
| "learning_rate": 4.972971394633021e-06, |
| "loss": 0.7004, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.36056105610561057, |
| "grad_norm": 0.45264288783073425, |
| "learning_rate": 4.972810563359079e-06, |
| "loss": 0.7318, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.3613861386138614, |
| "grad_norm": 0.4582453668117523, |
| "learning_rate": 4.972649257613754e-06, |
| "loss": 0.7188, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.36221122112211224, |
| "grad_norm": 0.4455133080482483, |
| "learning_rate": 4.972487477427996e-06, |
| "loss": 0.7228, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.36303630363036304, |
| "grad_norm": 0.47273391485214233, |
| "learning_rate": 4.972325222832848e-06, |
| "loss": 0.728, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.36386138613861385, |
| "grad_norm": 0.4556146562099457, |
| "learning_rate": 4.97216249385944e-06, |
| "loss": 0.7391, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.36468646864686466, |
| "grad_norm": 0.43739357590675354, |
| "learning_rate": 4.971999290538999e-06, |
| "loss": 0.7249, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.3655115511551155, |
| "grad_norm": 0.4616910517215729, |
| "learning_rate": 4.971835612902838e-06, |
| "loss": 0.7307, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.36633663366336633, |
| "grad_norm": 0.45879244804382324, |
| "learning_rate": 4.971671460982362e-06, |
| "loss": 0.7089, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.36716171617161714, |
| "grad_norm": 0.4455641806125641, |
| "learning_rate": 4.971506834809069e-06, |
| "loss": 0.6983, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.367986798679868, |
| "grad_norm": 0.4431065320968628, |
| "learning_rate": 4.971341734414546e-06, |
| "loss": 0.7155, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.3688118811881188, |
| "grad_norm": 0.4592761993408203, |
| "learning_rate": 4.971176159830471e-06, |
| "loss": 0.7128, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.3696369636963696, |
| "grad_norm": 0.4585060179233551, |
| "learning_rate": 4.971010111088615e-06, |
| "loss": 0.721, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.3704620462046205, |
| "grad_norm": 0.4845431447029114, |
| "learning_rate": 4.970843588220839e-06, |
| "loss": 0.7197, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.3712871287128713, |
| "grad_norm": 0.452482670545578, |
| "learning_rate": 4.970676591259094e-06, |
| "loss": 0.7445, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3721122112211221, |
| "grad_norm": 0.4393105208873749, |
| "learning_rate": 4.970509120235422e-06, |
| "loss": 0.7188, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.37293729372937295, |
| "grad_norm": 0.4629392921924591, |
| "learning_rate": 4.970341175181957e-06, |
| "loss": 0.7369, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.37376237623762376, |
| "grad_norm": 0.4740729033946991, |
| "learning_rate": 4.970172756130922e-06, |
| "loss": 0.7187, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.37458745874587457, |
| "grad_norm": 0.45829933881759644, |
| "learning_rate": 4.970003863114636e-06, |
| "loss": 0.7142, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.37541254125412543, |
| "grad_norm": 0.46147412061691284, |
| "learning_rate": 4.969834496165502e-06, |
| "loss": 0.7088, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.37623762376237624, |
| "grad_norm": 0.4474492371082306, |
| "learning_rate": 4.96966465531602e-06, |
| "loss": 0.7322, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.37706270627062705, |
| "grad_norm": 0.4511535167694092, |
| "learning_rate": 4.969494340598776e-06, |
| "loss": 0.7258, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.3778877887788779, |
| "grad_norm": 0.46486011147499084, |
| "learning_rate": 4.96932355204645e-06, |
| "loss": 0.698, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.3787128712871287, |
| "grad_norm": 0.45214101672172546, |
| "learning_rate": 4.969152289691813e-06, |
| "loss": 0.7265, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.3795379537953795, |
| "grad_norm": 0.475598007440567, |
| "learning_rate": 4.968980553567726e-06, |
| "loss": 0.7156, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3803630363036304, |
| "grad_norm": 0.4582163691520691, |
| "learning_rate": 4.968808343707139e-06, |
| "loss": 0.7333, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.3811881188118812, |
| "grad_norm": 0.4644453227519989, |
| "learning_rate": 4.968635660143096e-06, |
| "loss": 0.7415, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.382013201320132, |
| "grad_norm": 0.45985257625579834, |
| "learning_rate": 4.968462502908732e-06, |
| "loss": 0.7594, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.38283828382838286, |
| "grad_norm": 0.455812931060791, |
| "learning_rate": 4.968288872037269e-06, |
| "loss": 0.7357, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.38366336633663367, |
| "grad_norm": 0.47641173005104065, |
| "learning_rate": 4.968114767562026e-06, |
| "loss": 0.7339, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.3844884488448845, |
| "grad_norm": 0.4478452503681183, |
| "learning_rate": 4.967940189516405e-06, |
| "loss": 0.6818, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.38531353135313534, |
| "grad_norm": 0.4571129381656647, |
| "learning_rate": 4.9677651379339065e-06, |
| "loss": 0.6977, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.38613861386138615, |
| "grad_norm": 0.46259671449661255, |
| "learning_rate": 4.967589612848117e-06, |
| "loss": 0.7378, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.38696369636963696, |
| "grad_norm": 0.4669695496559143, |
| "learning_rate": 4.9674136142927165e-06, |
| "loss": 0.6989, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.38778877887788776, |
| "grad_norm": 0.45127734541893005, |
| "learning_rate": 4.967237142301474e-06, |
| "loss": 0.7299, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3886138613861386, |
| "grad_norm": 0.4583680033683777, |
| "learning_rate": 4.967060196908251e-06, |
| "loss": 0.7245, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.38943894389438943, |
| "grad_norm": 0.46106797456741333, |
| "learning_rate": 4.966882778146997e-06, |
| "loss": 0.718, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.39026402640264024, |
| "grad_norm": 0.4659496247768402, |
| "learning_rate": 4.9667048860517575e-06, |
| "loss": 0.7357, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.3910891089108911, |
| "grad_norm": 0.4740099012851715, |
| "learning_rate": 4.966526520656663e-06, |
| "loss": 0.7338, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.3919141914191419, |
| "grad_norm": 0.46527743339538574, |
| "learning_rate": 4.966347681995938e-06, |
| "loss": 0.6972, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.3927392739273927, |
| "grad_norm": 0.4390571713447571, |
| "learning_rate": 4.966168370103897e-06, |
| "loss": 0.711, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.3935643564356436, |
| "grad_norm": 0.46139904856681824, |
| "learning_rate": 4.965988585014946e-06, |
| "loss": 0.7396, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.3943894389438944, |
| "grad_norm": 0.4442991614341736, |
| "learning_rate": 4.9658083267635814e-06, |
| "loss": 0.7077, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.3952145214521452, |
| "grad_norm": 0.4543309509754181, |
| "learning_rate": 4.965627595384391e-06, |
| "loss": 0.7017, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.39603960396039606, |
| "grad_norm": 0.45788052678108215, |
| "learning_rate": 4.965446390912051e-06, |
| "loss": 0.7096, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.39686468646864687, |
| "grad_norm": 0.4616749584674835, |
| "learning_rate": 4.965264713381331e-06, |
| "loss": 0.7261, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.3976897689768977, |
| "grad_norm": 0.4634535014629364, |
| "learning_rate": 4.965082562827091e-06, |
| "loss": 0.7193, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.39851485148514854, |
| "grad_norm": 0.4743461608886719, |
| "learning_rate": 4.96489993928428e-06, |
| "loss": 0.7076, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.39933993399339934, |
| "grad_norm": 0.4673152565956116, |
| "learning_rate": 4.964716842787939e-06, |
| "loss": 0.7153, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.40016501650165015, |
| "grad_norm": 0.456820547580719, |
| "learning_rate": 4.964533273373201e-06, |
| "loss": 0.7348, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.400990099009901, |
| "grad_norm": 0.4581908881664276, |
| "learning_rate": 4.964349231075287e-06, |
| "loss": 0.7185, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.4018151815181518, |
| "grad_norm": 0.4570706784725189, |
| "learning_rate": 4.964164715929512e-06, |
| "loss": 0.7033, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.40264026402640263, |
| "grad_norm": 0.4756205677986145, |
| "learning_rate": 4.9639797279712775e-06, |
| "loss": 0.7375, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.4034653465346535, |
| "grad_norm": 0.4577876925468445, |
| "learning_rate": 4.96379426723608e-06, |
| "loss": 0.736, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.4042904290429043, |
| "grad_norm": 0.4596739411354065, |
| "learning_rate": 4.963608333759505e-06, |
| "loss": 0.7389, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4051155115511551, |
| "grad_norm": 0.4691055715084076, |
| "learning_rate": 4.963421927577227e-06, |
| "loss": 0.7018, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.40594059405940597, |
| "grad_norm": 0.4546271860599518, |
| "learning_rate": 4.963235048725014e-06, |
| "loss": 0.7242, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.4067656765676568, |
| "grad_norm": 0.465183287858963, |
| "learning_rate": 4.963047697238722e-06, |
| "loss": 0.7073, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.4075907590759076, |
| "grad_norm": 0.456122487783432, |
| "learning_rate": 4.962859873154301e-06, |
| "loss": 0.7329, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.4084158415841584, |
| "grad_norm": 0.4514836370944977, |
| "learning_rate": 4.962671576507788e-06, |
| "loss": 0.7001, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.40924092409240925, |
| "grad_norm": 0.45486271381378174, |
| "learning_rate": 4.9624828073353144e-06, |
| "loss": 0.742, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.41006600660066006, |
| "grad_norm": 0.48726174235343933, |
| "learning_rate": 4.962293565673099e-06, |
| "loss": 0.7257, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.41089108910891087, |
| "grad_norm": 0.4707367420196533, |
| "learning_rate": 4.9621038515574535e-06, |
| "loss": 0.7586, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.41171617161716173, |
| "grad_norm": 0.4668041467666626, |
| "learning_rate": 4.961913665024778e-06, |
| "loss": 0.7141, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.41254125412541254, |
| "grad_norm": 0.47152554988861084, |
| "learning_rate": 4.961723006111566e-06, |
| "loss": 0.7227, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.41336633663366334, |
| "grad_norm": 0.449969083070755, |
| "learning_rate": 4.9615318748544e-06, |
| "loss": 0.7256, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.4141914191419142, |
| "grad_norm": 0.45015445351600647, |
| "learning_rate": 4.9613402712899516e-06, |
| "loss": 0.7176, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.415016501650165, |
| "grad_norm": 0.4499993920326233, |
| "learning_rate": 4.961148195454988e-06, |
| "loss": 0.7228, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.4158415841584158, |
| "grad_norm": 0.4471442401409149, |
| "learning_rate": 4.960955647386361e-06, |
| "loss": 0.7019, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 0.46286097168922424, |
| "learning_rate": 4.9607626271210165e-06, |
| "loss": 0.6826, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.4174917491749175, |
| "grad_norm": 0.46081873774528503, |
| "learning_rate": 4.960569134695991e-06, |
| "loss": 0.7256, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.4183168316831683, |
| "grad_norm": 0.46333253383636475, |
| "learning_rate": 4.9603751701484115e-06, |
| "loss": 0.7328, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.41914191419141916, |
| "grad_norm": 0.45679524540901184, |
| "learning_rate": 4.960180733515494e-06, |
| "loss": 0.7332, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.41996699669966997, |
| "grad_norm": 0.45151451230049133, |
| "learning_rate": 4.959985824834546e-06, |
| "loss": 0.7418, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.4207920792079208, |
| "grad_norm": 0.47151753306388855, |
| "learning_rate": 4.9597904441429664e-06, |
| "loss": 0.7294, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.42161716171617164, |
| "grad_norm": 0.4531096816062927, |
| "learning_rate": 4.959594591478243e-06, |
| "loss": 0.707, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.42244224422442245, |
| "grad_norm": 0.4700416028499603, |
| "learning_rate": 4.959398266877955e-06, |
| "loss": 0.728, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.42326732673267325, |
| "grad_norm": 0.4657609760761261, |
| "learning_rate": 4.959201470379774e-06, |
| "loss": 0.7175, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.4240924092409241, |
| "grad_norm": 0.4651918113231659, |
| "learning_rate": 4.959004202021459e-06, |
| "loss": 0.7181, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.4249174917491749, |
| "grad_norm": 0.451061487197876, |
| "learning_rate": 4.95880646184086e-06, |
| "loss": 0.7576, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.42574257425742573, |
| "grad_norm": 0.4648871421813965, |
| "learning_rate": 4.958608249875921e-06, |
| "loss": 0.7155, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.4265676567656766, |
| "grad_norm": 0.45435768365859985, |
| "learning_rate": 4.9584095661646725e-06, |
| "loss": 0.7244, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.4273927392739274, |
| "grad_norm": 0.4550236463546753, |
| "learning_rate": 4.958210410745237e-06, |
| "loss": 0.729, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.4282178217821782, |
| "grad_norm": 0.4895179271697998, |
| "learning_rate": 4.958010783655827e-06, |
| "loss": 0.7091, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.429042904290429, |
| "grad_norm": 0.4642874002456665, |
| "learning_rate": 4.957810684934747e-06, |
| "loss": 0.7122, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.4298679867986799, |
| "grad_norm": 0.4746834933757782, |
| "learning_rate": 4.9576101146203905e-06, |
| "loss": 0.6977, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.4306930693069307, |
| "grad_norm": 0.4938143491744995, |
| "learning_rate": 4.957409072751243e-06, |
| "loss": 0.7213, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.4315181518151815, |
| "grad_norm": 0.4942130744457245, |
| "learning_rate": 4.957207559365877e-06, |
| "loss": 0.6968, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.43234323432343236, |
| "grad_norm": 0.47856637835502625, |
| "learning_rate": 4.957005574502961e-06, |
| "loss": 0.7075, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.43316831683168316, |
| "grad_norm": 0.4804016351699829, |
| "learning_rate": 4.9568031182012485e-06, |
| "loss": 0.7298, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.43399339933993397, |
| "grad_norm": 0.46422865986824036, |
| "learning_rate": 4.956600190499588e-06, |
| "loss": 0.696, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.43481848184818483, |
| "grad_norm": 0.4641623795032501, |
| "learning_rate": 4.956396791436915e-06, |
| "loss": 0.7217, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.43564356435643564, |
| "grad_norm": 0.4901314377784729, |
| "learning_rate": 4.956192921052256e-06, |
| "loss": 0.7075, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.43646864686468645, |
| "grad_norm": 0.46495771408081055, |
| "learning_rate": 4.955988579384731e-06, |
| "loss": 0.7046, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.4372937293729373, |
| "grad_norm": 0.4624289870262146, |
| "learning_rate": 4.955783766473546e-06, |
| "loss": 0.7527, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.4381188118811881, |
| "grad_norm": 0.4648906886577606, |
| "learning_rate": 4.955578482358e-06, |
| "loss": 0.7188, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.4389438943894389, |
| "grad_norm": 0.46676790714263916, |
| "learning_rate": 4.955372727077483e-06, |
| "loss": 0.6907, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.4397689768976898, |
| "grad_norm": 0.46131378412246704, |
| "learning_rate": 4.955166500671474e-06, |
| "loss": 0.7272, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.4405940594059406, |
| "grad_norm": 0.4766186773777008, |
| "learning_rate": 4.954959803179542e-06, |
| "loss": 0.6933, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.4414191419141914, |
| "grad_norm": 0.47350916266441345, |
| "learning_rate": 4.954752634641347e-06, |
| "loss": 0.7001, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.44224422442244227, |
| "grad_norm": 0.4756217300891876, |
| "learning_rate": 4.954544995096641e-06, |
| "loss": 0.7256, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.4430693069306931, |
| "grad_norm": 0.48165464401245117, |
| "learning_rate": 4.954336884585264e-06, |
| "loss": 0.7212, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.4438943894389439, |
| "grad_norm": 0.4626830816268921, |
| "learning_rate": 4.954128303147146e-06, |
| "loss": 0.7183, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.44471947194719474, |
| "grad_norm": 0.47849732637405396, |
| "learning_rate": 4.953919250822312e-06, |
| "loss": 0.7089, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.44554455445544555, |
| "grad_norm": 0.46145811676979065, |
| "learning_rate": 4.95370972765087e-06, |
| "loss": 0.7144, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.44636963696369636, |
| "grad_norm": 0.4692698121070862, |
| "learning_rate": 4.953499733673026e-06, |
| "loss": 0.7307, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.4471947194719472, |
| "grad_norm": 0.4780563414096832, |
| "learning_rate": 4.95328926892907e-06, |
| "loss": 0.6988, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.44801980198019803, |
| "grad_norm": 0.45988762378692627, |
| "learning_rate": 4.953078333459386e-06, |
| "loss": 0.7342, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.44884488448844884, |
| "grad_norm": 0.47155487537384033, |
| "learning_rate": 4.952866927304447e-06, |
| "loss": 0.6989, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.44966996699669964, |
| "grad_norm": 0.48000568151474, |
| "learning_rate": 4.952655050504817e-06, |
| "loss": 0.7206, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.4504950495049505, |
| "grad_norm": 0.47779932618141174, |
| "learning_rate": 4.95244270310115e-06, |
| "loss": 0.7062, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.4513201320132013, |
| "grad_norm": 0.48263299465179443, |
| "learning_rate": 4.95222988513419e-06, |
| "loss": 0.6971, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.4521452145214521, |
| "grad_norm": 0.4724646210670471, |
| "learning_rate": 4.9520165966447715e-06, |
| "loss": 0.7213, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.452970297029703, |
| "grad_norm": 0.4655725657939911, |
| "learning_rate": 4.9518028376738196e-06, |
| "loss": 0.7234, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.4537953795379538, |
| "grad_norm": 0.4778476357460022, |
| "learning_rate": 4.9515886082623485e-06, |
| "loss": 0.7148, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4546204620462046, |
| "grad_norm": 0.47832944989204407, |
| "learning_rate": 4.951373908451465e-06, |
| "loss": 0.705, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.45544554455445546, |
| "grad_norm": 0.4635158181190491, |
| "learning_rate": 4.951158738282364e-06, |
| "loss": 0.7112, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.45627062706270627, |
| "grad_norm": 0.48013851046562195, |
| "learning_rate": 4.95094309779633e-06, |
| "loss": 0.7216, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.4570957095709571, |
| "grad_norm": 0.4669191241264343, |
| "learning_rate": 4.950726987034741e-06, |
| "loss": 0.7045, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.45792079207920794, |
| "grad_norm": 0.4785819947719574, |
| "learning_rate": 4.950510406039063e-06, |
| "loss": 0.6867, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.45874587458745875, |
| "grad_norm": 0.46676164865493774, |
| "learning_rate": 4.9502933548508515e-06, |
| "loss": 0.7278, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.45957095709570955, |
| "grad_norm": 0.4710772931575775, |
| "learning_rate": 4.950075833511755e-06, |
| "loss": 0.7155, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.4603960396039604, |
| "grad_norm": 0.48126041889190674, |
| "learning_rate": 4.949857842063509e-06, |
| "loss": 0.7033, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.4612211221122112, |
| "grad_norm": 0.4683317542076111, |
| "learning_rate": 4.949639380547941e-06, |
| "loss": 0.7294, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.46204620462046203, |
| "grad_norm": 0.47730982303619385, |
| "learning_rate": 4.949420449006968e-06, |
| "loss": 0.7065, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4628712871287129, |
| "grad_norm": 0.4867958426475525, |
| "learning_rate": 4.949201047482599e-06, |
| "loss": 0.72, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.4636963696369637, |
| "grad_norm": 0.4763893485069275, |
| "learning_rate": 4.94898117601693e-06, |
| "loss": 0.7158, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.4645214521452145, |
| "grad_norm": 0.49231722950935364, |
| "learning_rate": 4.94876083465215e-06, |
| "loss": 0.729, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.46534653465346537, |
| "grad_norm": 0.4658971130847931, |
| "learning_rate": 4.948540023430538e-06, |
| "loss": 0.6907, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.4661716171617162, |
| "grad_norm": 0.4774307906627655, |
| "learning_rate": 4.948318742394459e-06, |
| "loss": 0.6925, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.466996699669967, |
| "grad_norm": 0.47116512060165405, |
| "learning_rate": 4.948096991586375e-06, |
| "loss": 0.7229, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.46782178217821785, |
| "grad_norm": 0.47676825523376465, |
| "learning_rate": 4.947874771048833e-06, |
| "loss": 0.7248, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.46864686468646866, |
| "grad_norm": 0.4748082756996155, |
| "learning_rate": 4.94765208082447e-06, |
| "loss": 0.7335, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.46947194719471946, |
| "grad_norm": 0.4675842821598053, |
| "learning_rate": 4.9474289209560174e-06, |
| "loss": 0.7169, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.47029702970297027, |
| "grad_norm": 0.46060022711753845, |
| "learning_rate": 4.947205291486293e-06, |
| "loss": 0.7246, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.47112211221122113, |
| "grad_norm": 0.4750185012817383, |
| "learning_rate": 4.9469811924582065e-06, |
| "loss": 0.6989, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.47194719471947194, |
| "grad_norm": 0.46624764800071716, |
| "learning_rate": 4.9467566239147555e-06, |
| "loss": 0.7053, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.47277227722772275, |
| "grad_norm": 0.4750285744667053, |
| "learning_rate": 4.94653158589903e-06, |
| "loss": 0.7189, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.4735973597359736, |
| "grad_norm": 0.48672083020210266, |
| "learning_rate": 4.946306078454209e-06, |
| "loss": 0.7255, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.4744224422442244, |
| "grad_norm": 0.4581200182437897, |
| "learning_rate": 4.9460801016235625e-06, |
| "loss": 0.6919, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.4752475247524752, |
| "grad_norm": 0.4761126637458801, |
| "learning_rate": 4.945853655450449e-06, |
| "loss": 0.7125, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.4760726072607261, |
| "grad_norm": 0.4857783913612366, |
| "learning_rate": 4.945626739978319e-06, |
| "loss": 0.7098, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.4768976897689769, |
| "grad_norm": 0.5091535449028015, |
| "learning_rate": 4.94539935525071e-06, |
| "loss": 0.723, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.4777227722772277, |
| "grad_norm": 0.5000525712966919, |
| "learning_rate": 4.9451715013112545e-06, |
| "loss": 0.7165, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.47854785478547857, |
| "grad_norm": 0.4900214970111847, |
| "learning_rate": 4.9449431782036695e-06, |
| "loss": 0.7106, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4793729372937294, |
| "grad_norm": 0.4770774841308594, |
| "learning_rate": 4.9447143859717664e-06, |
| "loss": 0.7395, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.4801980198019802, |
| "grad_norm": 0.49591994285583496, |
| "learning_rate": 4.944485124659443e-06, |
| "loss": 0.6861, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.48102310231023104, |
| "grad_norm": 0.4733443856239319, |
| "learning_rate": 4.944255394310689e-06, |
| "loss": 0.7321, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.48184818481848185, |
| "grad_norm": 0.4814870357513428, |
| "learning_rate": 4.944025194969586e-06, |
| "loss": 0.7042, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.48267326732673266, |
| "grad_norm": 0.4886230528354645, |
| "learning_rate": 4.943794526680302e-06, |
| "loss": 0.7151, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.4834983498349835, |
| "grad_norm": 0.47293350100517273, |
| "learning_rate": 4.943563389487097e-06, |
| "loss": 0.7109, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.48432343234323433, |
| "grad_norm": 0.4692576229572296, |
| "learning_rate": 4.94333178343432e-06, |
| "loss": 0.6982, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.48514851485148514, |
| "grad_norm": 0.479910671710968, |
| "learning_rate": 4.9430997085664105e-06, |
| "loss": 0.728, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.485973597359736, |
| "grad_norm": 0.48567378520965576, |
| "learning_rate": 4.942867164927899e-06, |
| "loss": 0.6874, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.4867986798679868, |
| "grad_norm": 0.4664131700992584, |
| "learning_rate": 4.942634152563405e-06, |
| "loss": 0.7371, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4876237623762376, |
| "grad_norm": 0.4846361577510834, |
| "learning_rate": 4.942400671517635e-06, |
| "loss": 0.7066, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.4884488448844885, |
| "grad_norm": 0.48338061571121216, |
| "learning_rate": 4.942166721835392e-06, |
| "loss": 0.7282, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.4892739273927393, |
| "grad_norm": 0.4792849123477936, |
| "learning_rate": 4.941932303561563e-06, |
| "loss": 0.7033, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.4900990099009901, |
| "grad_norm": 0.47274795174598694, |
| "learning_rate": 4.941697416741128e-06, |
| "loss": 0.7016, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.4909240924092409, |
| "grad_norm": 0.4659174680709839, |
| "learning_rate": 4.9414620614191555e-06, |
| "loss": 0.7162, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.49174917491749176, |
| "grad_norm": 0.486172080039978, |
| "learning_rate": 4.941226237640804e-06, |
| "loss": 0.6966, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.49257425742574257, |
| "grad_norm": 0.4819536507129669, |
| "learning_rate": 4.940989945451323e-06, |
| "loss": 0.7138, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.4933993399339934, |
| "grad_norm": 0.4761784076690674, |
| "learning_rate": 4.940753184896051e-06, |
| "loss": 0.6963, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.49422442244224424, |
| "grad_norm": 0.48324984312057495, |
| "learning_rate": 4.940515956020416e-06, |
| "loss": 0.7029, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.49504950495049505, |
| "grad_norm": 0.49788162112236023, |
| "learning_rate": 4.940278258869937e-06, |
| "loss": 0.7065, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.49587458745874585, |
| "grad_norm": 0.5038782954216003, |
| "learning_rate": 4.940040093490223e-06, |
| "loss": 0.6954, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.4966996699669967, |
| "grad_norm": 0.4979074001312256, |
| "learning_rate": 4.939801459926969e-06, |
| "loss": 0.7374, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.4975247524752475, |
| "grad_norm": 0.468302458524704, |
| "learning_rate": 4.9395623582259665e-06, |
| "loss": 0.7201, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.49834983498349833, |
| "grad_norm": 0.484279990196228, |
| "learning_rate": 4.939322788433091e-06, |
| "loss": 0.7053, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.4991749174917492, |
| "grad_norm": 0.4879089295864105, |
| "learning_rate": 4.939082750594311e-06, |
| "loss": 0.721, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.48636701703071594, |
| "learning_rate": 4.938842244755683e-06, |
| "loss": 0.694, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.5008250825082509, |
| "grad_norm": 0.48307308554649353, |
| "learning_rate": 4.938601270963355e-06, |
| "loss": 0.7183, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.5016501650165016, |
| "grad_norm": 0.476773738861084, |
| "learning_rate": 4.938359829263564e-06, |
| "loss": 0.7125, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.5024752475247525, |
| "grad_norm": 0.4792875051498413, |
| "learning_rate": 4.938117919702636e-06, |
| "loss": 0.71, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.5033003300330033, |
| "grad_norm": 0.486017644405365, |
| "learning_rate": 4.937875542326989e-06, |
| "loss": 0.7208, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5041254125412541, |
| "grad_norm": 0.49490052461624146, |
| "learning_rate": 4.937632697183126e-06, |
| "loss": 0.7107, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.504950495049505, |
| "grad_norm": 0.4896621108055115, |
| "learning_rate": 4.937389384317647e-06, |
| "loss": 0.731, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.5057755775577558, |
| "grad_norm": 0.4702865779399872, |
| "learning_rate": 4.937145603777234e-06, |
| "loss": 0.6861, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.5066006600660066, |
| "grad_norm": 0.49433633685112, |
| "learning_rate": 4.936901355608665e-06, |
| "loss": 0.7039, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.5074257425742574, |
| "grad_norm": 0.46913057565689087, |
| "learning_rate": 4.936656639858805e-06, |
| "loss": 0.7173, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.5082508250825083, |
| "grad_norm": 0.4838907718658447, |
| "learning_rate": 4.936411456574608e-06, |
| "loss": 0.7291, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.509075907590759, |
| "grad_norm": 0.48521754145622253, |
| "learning_rate": 4.936165805803119e-06, |
| "loss": 0.7169, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.5099009900990099, |
| "grad_norm": 0.48856601119041443, |
| "learning_rate": 4.9359196875914725e-06, |
| "loss": 0.7279, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.5107260726072608, |
| "grad_norm": 0.4963640868663788, |
| "learning_rate": 4.935673101986892e-06, |
| "loss": 0.737, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.5115511551155115, |
| "grad_norm": 0.48461318016052246, |
| "learning_rate": 4.935426049036692e-06, |
| "loss": 0.7006, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5123762376237624, |
| "grad_norm": 0.477611780166626, |
| "learning_rate": 4.935178528788275e-06, |
| "loss": 0.6816, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.5132013201320133, |
| "grad_norm": 0.4781244397163391, |
| "learning_rate": 4.934930541289134e-06, |
| "loss": 0.6946, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.514026402640264, |
| "grad_norm": 0.47109952569007874, |
| "learning_rate": 4.934682086586853e-06, |
| "loss": 0.6967, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.5148514851485149, |
| "grad_norm": 0.4875166416168213, |
| "learning_rate": 4.934433164729103e-06, |
| "loss": 0.7215, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.5156765676567657, |
| "grad_norm": 0.4789406955242157, |
| "learning_rate": 4.934183775763647e-06, |
| "loss": 0.7005, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.5165016501650165, |
| "grad_norm": 0.4995609223842621, |
| "learning_rate": 4.933933919738336e-06, |
| "loss": 0.7326, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.5173267326732673, |
| "grad_norm": 0.4869863986968994, |
| "learning_rate": 4.933683596701111e-06, |
| "loss": 0.7073, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.5181518151815182, |
| "grad_norm": 0.4834015369415283, |
| "learning_rate": 4.933432806700004e-06, |
| "loss": 0.7098, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.518976897689769, |
| "grad_norm": 0.483574241399765, |
| "learning_rate": 4.933181549783132e-06, |
| "loss": 0.6863, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.5198019801980198, |
| "grad_norm": 0.48267966508865356, |
| "learning_rate": 4.93292982599871e-06, |
| "loss": 0.7008, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5206270627062707, |
| "grad_norm": 0.471221387386322, |
| "learning_rate": 4.932677635395035e-06, |
| "loss": 0.7033, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.5214521452145214, |
| "grad_norm": 0.4946180284023285, |
| "learning_rate": 4.932424978020495e-06, |
| "loss": 0.7133, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.5222772277227723, |
| "grad_norm": 0.48916375637054443, |
| "learning_rate": 4.93217185392357e-06, |
| "loss": 0.7147, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.523102310231023, |
| "grad_norm": 0.49226388335227966, |
| "learning_rate": 4.931918263152829e-06, |
| "loss": 0.7093, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.5239273927392739, |
| "grad_norm": 0.5069959759712219, |
| "learning_rate": 4.931664205756928e-06, |
| "loss": 0.7247, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.5247524752475248, |
| "grad_norm": 0.5008878707885742, |
| "learning_rate": 4.9314096817846166e-06, |
| "loss": 0.7134, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.5255775577557755, |
| "grad_norm": 0.4857085645198822, |
| "learning_rate": 4.9311546912847305e-06, |
| "loss": 0.7214, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.5264026402640264, |
| "grad_norm": 0.4922746419906616, |
| "learning_rate": 4.930899234306196e-06, |
| "loss": 0.7348, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.5272277227722773, |
| "grad_norm": 0.522466242313385, |
| "learning_rate": 4.930643310898028e-06, |
| "loss": 0.7035, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.528052805280528, |
| "grad_norm": 0.4872569441795349, |
| "learning_rate": 4.930386921109334e-06, |
| "loss": 0.7033, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5288778877887789, |
| "grad_norm": 0.5073298215866089, |
| "learning_rate": 4.930130064989308e-06, |
| "loss": 0.6994, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.5297029702970297, |
| "grad_norm": 0.48736149072647095, |
| "learning_rate": 4.929872742587233e-06, |
| "loss": 0.7089, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.5305280528052805, |
| "grad_norm": 0.4810430109500885, |
| "learning_rate": 4.929614953952485e-06, |
| "loss": 0.7034, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.5313531353135313, |
| "grad_norm": 0.5020985007286072, |
| "learning_rate": 4.929356699134526e-06, |
| "loss": 0.7086, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.5321782178217822, |
| "grad_norm": 0.48785045742988586, |
| "learning_rate": 4.929097978182909e-06, |
| "loss": 0.6918, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.533003300330033, |
| "grad_norm": 0.5045816898345947, |
| "learning_rate": 4.928838791147277e-06, |
| "loss": 0.7361, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.5338283828382838, |
| "grad_norm": 0.5134187936782837, |
| "learning_rate": 4.9285791380773596e-06, |
| "loss": 0.6857, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.5346534653465347, |
| "grad_norm": 0.4964018762111664, |
| "learning_rate": 4.9283190190229795e-06, |
| "loss": 0.7001, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.5354785478547854, |
| "grad_norm": 0.5150806307792664, |
| "learning_rate": 4.928058434034047e-06, |
| "loss": 0.7254, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.5363036303630363, |
| "grad_norm": 0.5018999576568604, |
| "learning_rate": 4.927797383160561e-06, |
| "loss": 0.7308, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5371287128712872, |
| "grad_norm": 0.47672152519226074, |
| "learning_rate": 4.927535866452612e-06, |
| "loss": 0.7032, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.5379537953795379, |
| "grad_norm": 0.5027835965156555, |
| "learning_rate": 4.927273883960378e-06, |
| "loss": 0.7258, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.5387788778877888, |
| "grad_norm": 0.5115182399749756, |
| "learning_rate": 4.9270114357341265e-06, |
| "loss": 0.7054, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.5396039603960396, |
| "grad_norm": 0.49437984824180603, |
| "learning_rate": 4.926748521824215e-06, |
| "loss": 0.698, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.5404290429042904, |
| "grad_norm": 0.49635687470436096, |
| "learning_rate": 4.926485142281091e-06, |
| "loss": 0.6807, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.5412541254125413, |
| "grad_norm": 0.4810151755809784, |
| "learning_rate": 4.92622129715529e-06, |
| "loss": 0.6833, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.5420792079207921, |
| "grad_norm": 0.48570844531059265, |
| "learning_rate": 4.9259569864974374e-06, |
| "loss": 0.7319, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.5429042904290429, |
| "grad_norm": 0.48881736397743225, |
| "learning_rate": 4.925692210358248e-06, |
| "loss": 0.6801, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.5437293729372937, |
| "grad_norm": 0.4965687096118927, |
| "learning_rate": 4.925426968788525e-06, |
| "loss": 0.6843, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.5445544554455446, |
| "grad_norm": 0.5146209597587585, |
| "learning_rate": 4.925161261839163e-06, |
| "loss": 0.71, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5453795379537953, |
| "grad_norm": 0.5105807781219482, |
| "learning_rate": 4.924895089561144e-06, |
| "loss": 0.7021, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.5462046204620462, |
| "grad_norm": 0.5018863081932068, |
| "learning_rate": 4.92462845200554e-06, |
| "loss": 0.678, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.5470297029702971, |
| "grad_norm": 0.4923059940338135, |
| "learning_rate": 4.924361349223512e-06, |
| "loss": 0.6943, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.5478547854785478, |
| "grad_norm": 0.5007607936859131, |
| "learning_rate": 4.92409378126631e-06, |
| "loss": 0.6811, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.5486798679867987, |
| "grad_norm": 0.481728732585907, |
| "learning_rate": 4.923825748185275e-06, |
| "loss": 0.7166, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.5495049504950495, |
| "grad_norm": 0.4846872091293335, |
| "learning_rate": 4.923557250031834e-06, |
| "loss": 0.6815, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.5503300330033003, |
| "grad_norm": 0.4947971999645233, |
| "learning_rate": 4.923288286857508e-06, |
| "loss": 0.7153, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.5511551155115512, |
| "grad_norm": 0.5153589248657227, |
| "learning_rate": 4.923018858713902e-06, |
| "loss": 0.7191, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.551980198019802, |
| "grad_norm": 0.515349805355072, |
| "learning_rate": 4.922748965652713e-06, |
| "loss": 0.7223, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.5528052805280528, |
| "grad_norm": 0.4987703263759613, |
| "learning_rate": 4.922478607725728e-06, |
| "loss": 0.6917, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5536303630363036, |
| "grad_norm": 0.4923804700374603, |
| "learning_rate": 4.92220778498482e-06, |
| "loss": 0.7272, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.5544554455445545, |
| "grad_norm": 0.495511531829834, |
| "learning_rate": 4.921936497481956e-06, |
| "loss": 0.7183, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.5552805280528053, |
| "grad_norm": 0.5124722719192505, |
| "learning_rate": 4.921664745269187e-06, |
| "loss": 0.7023, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.5561056105610561, |
| "grad_norm": 0.49752897024154663, |
| "learning_rate": 4.921392528398656e-06, |
| "loss": 0.7228, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.556930693069307, |
| "grad_norm": 0.5025122761726379, |
| "learning_rate": 4.9211198469225955e-06, |
| "loss": 0.697, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.5577557755775577, |
| "grad_norm": 0.5009769201278687, |
| "learning_rate": 4.920846700893326e-06, |
| "loss": 0.7079, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.5585808580858086, |
| "grad_norm": 0.5044685006141663, |
| "learning_rate": 4.920573090363257e-06, |
| "loss": 0.7345, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.5594059405940595, |
| "grad_norm": 0.4943746328353882, |
| "learning_rate": 4.920299015384888e-06, |
| "loss": 0.692, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.5602310231023102, |
| "grad_norm": 0.4920106828212738, |
| "learning_rate": 4.920024476010808e-06, |
| "loss": 0.6911, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.5610561056105611, |
| "grad_norm": 0.48839354515075684, |
| "learning_rate": 4.919749472293693e-06, |
| "loss": 0.709, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5618811881188119, |
| "grad_norm": 0.4939538538455963, |
| "learning_rate": 4.91947400428631e-06, |
| "loss": 0.7341, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.5627062706270627, |
| "grad_norm": 0.5017114877700806, |
| "learning_rate": 4.919198072041515e-06, |
| "loss": 0.7234, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.5635313531353136, |
| "grad_norm": 0.5011210441589355, |
| "learning_rate": 4.918921675612252e-06, |
| "loss": 0.7105, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.5643564356435643, |
| "grad_norm": 0.5012781620025635, |
| "learning_rate": 4.918644815051554e-06, |
| "loss": 0.7018, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.5651815181518152, |
| "grad_norm": 0.5037400126457214, |
| "learning_rate": 4.9183674904125455e-06, |
| "loss": 0.6873, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.566006600660066, |
| "grad_norm": 0.4981100857257843, |
| "learning_rate": 4.918089701748436e-06, |
| "loss": 0.7274, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.5668316831683168, |
| "grad_norm": 0.5186753273010254, |
| "learning_rate": 4.917811449112529e-06, |
| "loss": 0.719, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.5676567656765676, |
| "grad_norm": 0.4970262944698334, |
| "learning_rate": 4.917532732558212e-06, |
| "loss": 0.6961, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.5684818481848185, |
| "grad_norm": 0.5173365473747253, |
| "learning_rate": 4.9172535521389655e-06, |
| "loss": 0.743, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.5693069306930693, |
| "grad_norm": 0.5220822691917419, |
| "learning_rate": 4.9169739079083564e-06, |
| "loss": 0.6913, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5701320132013201, |
| "grad_norm": 0.5165892839431763, |
| "learning_rate": 4.916693799920041e-06, |
| "loss": 0.7194, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.570957095709571, |
| "grad_norm": 0.5441265106201172, |
| "learning_rate": 4.9164132282277665e-06, |
| "loss": 0.7358, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.5717821782178217, |
| "grad_norm": 0.5081223845481873, |
| "learning_rate": 4.916132192885366e-06, |
| "loss": 0.7314, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.5726072607260726, |
| "grad_norm": 0.48418229818344116, |
| "learning_rate": 4.915850693946766e-06, |
| "loss": 0.685, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.5734323432343235, |
| "grad_norm": 0.5044229626655579, |
| "learning_rate": 4.915568731465977e-06, |
| "loss": 0.6891, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.5742574257425742, |
| "grad_norm": 0.5088455677032471, |
| "learning_rate": 4.9152863054971e-06, |
| "loss": 0.7153, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.5750825082508251, |
| "grad_norm": 0.5048734545707703, |
| "learning_rate": 4.915003416094327e-06, |
| "loss": 0.7097, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.5759075907590759, |
| "grad_norm": 0.5223882794380188, |
| "learning_rate": 4.914720063311939e-06, |
| "loss": 0.6743, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.5767326732673267, |
| "grad_norm": 0.49376392364501953, |
| "learning_rate": 4.914436247204301e-06, |
| "loss": 0.7214, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.5775577557755776, |
| "grad_norm": 0.49322131276130676, |
| "learning_rate": 4.914151967825872e-06, |
| "loss": 0.7095, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5783828382838284, |
| "grad_norm": 0.5010339021682739, |
| "learning_rate": 4.913867225231197e-06, |
| "loss": 0.7048, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.5792079207920792, |
| "grad_norm": 0.5402106642723083, |
| "learning_rate": 4.913582019474914e-06, |
| "loss": 0.7228, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.58003300330033, |
| "grad_norm": 0.5094380378723145, |
| "learning_rate": 4.913296350611745e-06, |
| "loss": 0.7018, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.5808580858085809, |
| "grad_norm": 0.48760926723480225, |
| "learning_rate": 4.913010218696502e-06, |
| "loss": 0.6736, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.5816831683168316, |
| "grad_norm": 0.5021440386772156, |
| "learning_rate": 4.9127236237840885e-06, |
| "loss": 0.7187, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.5825082508250825, |
| "grad_norm": 0.5223121047019958, |
| "learning_rate": 4.9124365659294935e-06, |
| "loss": 0.702, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.5833333333333334, |
| "grad_norm": 0.5153369307518005, |
| "learning_rate": 4.912149045187797e-06, |
| "loss": 0.6944, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.5841584158415841, |
| "grad_norm": 0.5183912515640259, |
| "learning_rate": 4.911861061614168e-06, |
| "loss": 0.6992, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.584983498349835, |
| "grad_norm": 0.5021354556083679, |
| "learning_rate": 4.911572615263862e-06, |
| "loss": 0.6783, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.5858085808580858, |
| "grad_norm": 0.49843043088912964, |
| "learning_rate": 4.9112837061922255e-06, |
| "loss": 0.6867, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5866336633663366, |
| "grad_norm": 0.5146470665931702, |
| "learning_rate": 4.9109943344546924e-06, |
| "loss": 0.6862, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.5874587458745875, |
| "grad_norm": 0.49383649230003357, |
| "learning_rate": 4.910704500106786e-06, |
| "loss": 0.7119, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.5882838283828383, |
| "grad_norm": 0.497063010931015, |
| "learning_rate": 4.91041420320412e-06, |
| "loss": 0.7091, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.5891089108910891, |
| "grad_norm": 0.4999721944332123, |
| "learning_rate": 4.910123443802394e-06, |
| "loss": 0.7166, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.5899339933993399, |
| "grad_norm": 0.49842125177383423, |
| "learning_rate": 4.909832221957397e-06, |
| "loss": 0.7051, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.5907590759075908, |
| "grad_norm": 0.5021651387214661, |
| "learning_rate": 4.909540537725007e-06, |
| "loss": 0.7108, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.5915841584158416, |
| "grad_norm": 0.5190720558166504, |
| "learning_rate": 4.909248391161193e-06, |
| "loss": 0.6969, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.5924092409240924, |
| "grad_norm": 0.5224063992500305, |
| "learning_rate": 4.9089557823220096e-06, |
| "loss": 0.7128, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.5932343234323433, |
| "grad_norm": 0.5178462266921997, |
| "learning_rate": 4.908662711263601e-06, |
| "loss": 0.7036, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.594059405940594, |
| "grad_norm": 0.5114635825157166, |
| "learning_rate": 4.9083691780422e-06, |
| "loss": 0.7143, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5948844884488449, |
| "grad_norm": 0.5062857866287231, |
| "learning_rate": 4.90807518271413e-06, |
| "loss": 0.6911, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.5957095709570958, |
| "grad_norm": 0.5059993267059326, |
| "learning_rate": 4.9077807253358e-06, |
| "loss": 0.6971, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.5965346534653465, |
| "grad_norm": 0.5015937685966492, |
| "learning_rate": 4.9074858059637084e-06, |
| "loss": 0.699, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.5973597359735974, |
| "grad_norm": 0.5120682716369629, |
| "learning_rate": 4.907190424654446e-06, |
| "loss": 0.6871, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.5981848184818482, |
| "grad_norm": 0.5022520422935486, |
| "learning_rate": 4.906894581464687e-06, |
| "loss": 0.6961, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.599009900990099, |
| "grad_norm": 0.481477290391922, |
| "learning_rate": 4.906598276451194e-06, |
| "loss": 0.6732, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.5998349834983498, |
| "grad_norm": 0.5167291164398193, |
| "learning_rate": 4.906301509670826e-06, |
| "loss": 0.713, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.6006600660066007, |
| "grad_norm": 0.5114957690238953, |
| "learning_rate": 4.906004281180521e-06, |
| "loss": 0.7362, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.6014851485148515, |
| "grad_norm": 0.504904568195343, |
| "learning_rate": 4.905706591037313e-06, |
| "loss": 0.7097, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.6023102310231023, |
| "grad_norm": 0.5066644549369812, |
| "learning_rate": 4.9054084392983185e-06, |
| "loss": 0.7078, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6031353135313532, |
| "grad_norm": 0.5130860209465027, |
| "learning_rate": 4.905109826020746e-06, |
| "loss": 0.7124, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.6039603960396039, |
| "grad_norm": 0.5110951662063599, |
| "learning_rate": 4.904810751261894e-06, |
| "loss": 0.6983, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.6047854785478548, |
| "grad_norm": 0.5051254630088806, |
| "learning_rate": 4.904511215079147e-06, |
| "loss": 0.7374, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.6056105610561056, |
| "grad_norm": 0.4949505031108856, |
| "learning_rate": 4.904211217529976e-06, |
| "loss": 0.707, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.6064356435643564, |
| "grad_norm": 0.5078654885292053, |
| "learning_rate": 4.903910758671946e-06, |
| "loss": 0.7112, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.6072607260726073, |
| "grad_norm": 0.5021851062774658, |
| "learning_rate": 4.903609838562706e-06, |
| "loss": 0.6755, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.608085808580858, |
| "grad_norm": 0.5080071687698364, |
| "learning_rate": 4.9033084572599966e-06, |
| "loss": 0.7008, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.6089108910891089, |
| "grad_norm": 0.5117108225822449, |
| "learning_rate": 4.903006614821645e-06, |
| "loss": 0.6919, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.6097359735973598, |
| "grad_norm": 0.5092887282371521, |
| "learning_rate": 4.902704311305566e-06, |
| "loss": 0.6827, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.6105610561056105, |
| "grad_norm": 0.5054975152015686, |
| "learning_rate": 4.902401546769766e-06, |
| "loss": 0.6716, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6113861386138614, |
| "grad_norm": 0.4999292492866516, |
| "learning_rate": 4.9020983212723365e-06, |
| "loss": 0.6656, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.6122112211221122, |
| "grad_norm": 0.5277410745620728, |
| "learning_rate": 4.90179463487146e-06, |
| "loss": 0.702, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.613036303630363, |
| "grad_norm": 0.5241521596908569, |
| "learning_rate": 4.901490487625406e-06, |
| "loss": 0.6835, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.6138613861386139, |
| "grad_norm": 0.5295579433441162, |
| "learning_rate": 4.901185879592534e-06, |
| "loss": 0.6853, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.6146864686468647, |
| "grad_norm": 0.49168312549591064, |
| "learning_rate": 4.900880810831289e-06, |
| "loss": 0.7004, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.6155115511551155, |
| "grad_norm": 0.496028333902359, |
| "learning_rate": 4.9005752814002076e-06, |
| "loss": 0.7039, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.6163366336633663, |
| "grad_norm": 0.5242146849632263, |
| "learning_rate": 4.900269291357912e-06, |
| "loss": 0.7277, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.6171617161716172, |
| "grad_norm": 0.5047608613967896, |
| "learning_rate": 4.899962840763115e-06, |
| "loss": 0.7096, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.6179867986798679, |
| "grad_norm": 0.51358562707901, |
| "learning_rate": 4.899655929674617e-06, |
| "loss": 0.6923, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.6188118811881188, |
| "grad_norm": 0.5006871819496155, |
| "learning_rate": 4.899348558151306e-06, |
| "loss": 0.7124, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6196369636963697, |
| "grad_norm": 0.5047678351402283, |
| "learning_rate": 4.89904072625216e-06, |
| "loss": 0.7228, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.6204620462046204, |
| "grad_norm": 0.5035737156867981, |
| "learning_rate": 4.8987324340362445e-06, |
| "loss": 0.6694, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.6212871287128713, |
| "grad_norm": 0.5005902647972107, |
| "learning_rate": 4.898423681562711e-06, |
| "loss": 0.7096, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.6221122112211221, |
| "grad_norm": 0.5169086456298828, |
| "learning_rate": 4.8981144688908035e-06, |
| "loss": 0.677, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.6229372937293729, |
| "grad_norm": 0.5119456052780151, |
| "learning_rate": 4.897804796079852e-06, |
| "loss": 0.7085, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.6237623762376238, |
| "grad_norm": 0.5147557854652405, |
| "learning_rate": 4.897494663189275e-06, |
| "loss": 0.7082, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.6245874587458746, |
| "grad_norm": 0.5276811718940735, |
| "learning_rate": 4.897184070278579e-06, |
| "loss": 0.6951, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.6254125412541254, |
| "grad_norm": 0.5133523344993591, |
| "learning_rate": 4.89687301740736e-06, |
| "loss": 0.7086, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.6262376237623762, |
| "grad_norm": 0.5005844831466675, |
| "learning_rate": 4.8965615046353e-06, |
| "loss": 0.6754, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.6270627062706271, |
| "grad_norm": 0.529815137386322, |
| "learning_rate": 4.8962495320221714e-06, |
| "loss": 0.6916, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.6278877887788779, |
| "grad_norm": 0.49290475249290466, |
| "learning_rate": 4.895937099627834e-06, |
| "loss": 0.6809, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.6287128712871287, |
| "grad_norm": 0.5003436207771301, |
| "learning_rate": 4.895624207512237e-06, |
| "loss": 0.7153, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.6295379537953796, |
| "grad_norm": 0.5121909379959106, |
| "learning_rate": 4.895310855735415e-06, |
| "loss": 0.6851, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.6303630363036303, |
| "grad_norm": 0.5076280832290649, |
| "learning_rate": 4.894997044357492e-06, |
| "loss": 0.6824, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.6311881188118812, |
| "grad_norm": 0.49783045053482056, |
| "learning_rate": 4.894682773438683e-06, |
| "loss": 0.6987, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.6320132013201321, |
| "grad_norm": 0.5217746496200562, |
| "learning_rate": 4.894368043039286e-06, |
| "loss": 0.6937, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.6328382838283828, |
| "grad_norm": 0.529611349105835, |
| "learning_rate": 4.894052853219693e-06, |
| "loss": 0.7008, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.6336633663366337, |
| "grad_norm": 0.5516501069068909, |
| "learning_rate": 4.893737204040378e-06, |
| "loss": 0.6885, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.6344884488448845, |
| "grad_norm": 0.5219238996505737, |
| "learning_rate": 4.89342109556191e-06, |
| "loss": 0.7011, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.6353135313531353, |
| "grad_norm": 0.5126083493232727, |
| "learning_rate": 4.89310452784494e-06, |
| "loss": 0.7126, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6361386138613861, |
| "grad_norm": 0.49892565608024597, |
| "learning_rate": 4.892787500950209e-06, |
| "loss": 0.6844, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.636963696369637, |
| "grad_norm": 0.5378970503807068, |
| "learning_rate": 4.892470014938548e-06, |
| "loss": 0.7181, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.6377887788778878, |
| "grad_norm": 0.5072008371353149, |
| "learning_rate": 4.892152069870874e-06, |
| "loss": 0.7237, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.6386138613861386, |
| "grad_norm": 0.5335053205490112, |
| "learning_rate": 4.891833665808195e-06, |
| "loss": 0.7044, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.6394389438943895, |
| "grad_norm": 0.503537118434906, |
| "learning_rate": 4.891514802811601e-06, |
| "loss": 0.6794, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.6402640264026402, |
| "grad_norm": 0.520697295665741, |
| "learning_rate": 4.891195480942277e-06, |
| "loss": 0.699, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.6410891089108911, |
| "grad_norm": 0.5216426849365234, |
| "learning_rate": 4.890875700261492e-06, |
| "loss": 0.7019, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.641914191419142, |
| "grad_norm": 0.5011724829673767, |
| "learning_rate": 4.890555460830604e-06, |
| "loss": 0.7019, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.6427392739273927, |
| "grad_norm": 0.507106363773346, |
| "learning_rate": 4.890234762711059e-06, |
| "loss": 0.6956, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.6435643564356436, |
| "grad_norm": 0.5091033577919006, |
| "learning_rate": 4.889913605964391e-06, |
| "loss": 0.6891, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6443894389438944, |
| "grad_norm": 0.516913115978241, |
| "learning_rate": 4.889591990652222e-06, |
| "loss": 0.692, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.6452145214521452, |
| "grad_norm": 0.5138155817985535, |
| "learning_rate": 4.8892699168362626e-06, |
| "loss": 0.7113, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.6460396039603961, |
| "grad_norm": 0.5041294693946838, |
| "learning_rate": 4.88894738457831e-06, |
| "loss": 0.6814, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.6468646864686468, |
| "grad_norm": 0.5104554891586304, |
| "learning_rate": 4.888624393940251e-06, |
| "loss": 0.6742, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.6476897689768977, |
| "grad_norm": 0.5303329229354858, |
| "learning_rate": 4.888300944984059e-06, |
| "loss": 0.6837, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.6485148514851485, |
| "grad_norm": 0.5213814377784729, |
| "learning_rate": 4.887977037771797e-06, |
| "loss": 0.6708, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.6493399339933993, |
| "grad_norm": 0.5052602291107178, |
| "learning_rate": 4.887652672365613e-06, |
| "loss": 0.7113, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.6501650165016502, |
| "grad_norm": 0.5011994242668152, |
| "learning_rate": 4.887327848827746e-06, |
| "loss": 0.6981, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.650990099009901, |
| "grad_norm": 0.5150925517082214, |
| "learning_rate": 4.887002567220521e-06, |
| "loss": 0.7011, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.6518151815181518, |
| "grad_norm": 0.5412828326225281, |
| "learning_rate": 4.886676827606352e-06, |
| "loss": 0.6775, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6526402640264026, |
| "grad_norm": 0.49732375144958496, |
| "learning_rate": 4.886350630047741e-06, |
| "loss": 0.6911, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.6534653465346535, |
| "grad_norm": 0.531814455986023, |
| "learning_rate": 4.886023974607275e-06, |
| "loss": 0.6921, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.6542904290429042, |
| "grad_norm": 0.5216978192329407, |
| "learning_rate": 4.885696861347633e-06, |
| "loss": 0.7006, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.6551155115511551, |
| "grad_norm": 0.5057194232940674, |
| "learning_rate": 4.8853692903315796e-06, |
| "loss": 0.688, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.655940594059406, |
| "grad_norm": 0.5147150754928589, |
| "learning_rate": 4.885041261621967e-06, |
| "loss": 0.7002, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.6567656765676567, |
| "grad_norm": 0.5100370645523071, |
| "learning_rate": 4.884712775281737e-06, |
| "loss": 0.6832, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.6575907590759076, |
| "grad_norm": 0.5073105096817017, |
| "learning_rate": 4.884383831373918e-06, |
| "loss": 0.7098, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.6584158415841584, |
| "grad_norm": 0.529285728931427, |
| "learning_rate": 4.884054429961625e-06, |
| "loss": 0.6986, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.6592409240924092, |
| "grad_norm": 0.5070759654045105, |
| "learning_rate": 4.8837245711080626e-06, |
| "loss": 0.7007, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.6600660066006601, |
| "grad_norm": 0.5227763652801514, |
| "learning_rate": 4.883394254876523e-06, |
| "loss": 0.6642, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6608910891089109, |
| "grad_norm": 0.5032903552055359, |
| "learning_rate": 4.883063481330384e-06, |
| "loss": 0.7151, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.6617161716171617, |
| "grad_norm": 0.5112184882164001, |
| "learning_rate": 4.8827322505331155e-06, |
| "loss": 0.6944, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.6625412541254125, |
| "grad_norm": 0.49932217597961426, |
| "learning_rate": 4.882400562548271e-06, |
| "loss": 0.7018, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.6633663366336634, |
| "grad_norm": 0.5168468952178955, |
| "learning_rate": 4.8820684174394935e-06, |
| "loss": 0.7097, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.6641914191419142, |
| "grad_norm": 0.5187894105911255, |
| "learning_rate": 4.881735815270513e-06, |
| "loss": 0.7187, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.665016501650165, |
| "grad_norm": 0.5122649073600769, |
| "learning_rate": 4.881402756105149e-06, |
| "loss": 0.6636, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.6658415841584159, |
| "grad_norm": 0.5010491013526917, |
| "learning_rate": 4.8810692400073065e-06, |
| "loss": 0.696, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.5105947852134705, |
| "learning_rate": 4.880735267040978e-06, |
| "loss": 0.6849, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.6674917491749175, |
| "grad_norm": 0.5258045196533203, |
| "learning_rate": 4.880400837270246e-06, |
| "loss": 0.6923, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.6683168316831684, |
| "grad_norm": 0.5446014404296875, |
| "learning_rate": 4.88006595075928e-06, |
| "loss": 0.7248, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6691419141914191, |
| "grad_norm": 0.5149955749511719, |
| "learning_rate": 4.879730607572334e-06, |
| "loss": 0.7088, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.66996699669967, |
| "grad_norm": 0.5132419466972351, |
| "learning_rate": 4.879394807773755e-06, |
| "loss": 0.7184, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.6707920792079208, |
| "grad_norm": 0.5261136293411255, |
| "learning_rate": 4.879058551427972e-06, |
| "loss": 0.7055, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.6716171617161716, |
| "grad_norm": 0.5141221880912781, |
| "learning_rate": 4.878721838599506e-06, |
| "loss": 0.6954, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.6724422442244224, |
| "grad_norm": 0.5086462497711182, |
| "learning_rate": 4.878384669352964e-06, |
| "loss": 0.6901, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.6732673267326733, |
| "grad_norm": 0.5050605535507202, |
| "learning_rate": 4.878047043753039e-06, |
| "loss": 0.7118, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.6740924092409241, |
| "grad_norm": 0.52677321434021, |
| "learning_rate": 4.8777089618645146e-06, |
| "loss": 0.678, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.6749174917491749, |
| "grad_norm": 0.5086498260498047, |
| "learning_rate": 4.877370423752259e-06, |
| "loss": 0.6868, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.6757425742574258, |
| "grad_norm": 0.5587587952613831, |
| "learning_rate": 4.87703142948123e-06, |
| "loss": 0.7176, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.6765676567656765, |
| "grad_norm": 0.5186298489570618, |
| "learning_rate": 4.876691979116474e-06, |
| "loss": 0.7037, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6773927392739274, |
| "grad_norm": 0.5174757242202759, |
| "learning_rate": 4.87635207272312e-06, |
| "loss": 0.7068, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.6782178217821783, |
| "grad_norm": 0.5191114544868469, |
| "learning_rate": 4.876011710366389e-06, |
| "loss": 0.6974, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.679042904290429, |
| "grad_norm": 0.506994903087616, |
| "learning_rate": 4.875670892111589e-06, |
| "loss": 0.697, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.6798679867986799, |
| "grad_norm": 0.5108266472816467, |
| "learning_rate": 4.875329618024113e-06, |
| "loss": 0.693, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.6806930693069307, |
| "grad_norm": 0.5139864087104797, |
| "learning_rate": 4.874987888169445e-06, |
| "loss": 0.6934, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.6815181518151815, |
| "grad_norm": 0.5133042931556702, |
| "learning_rate": 4.874645702613152e-06, |
| "loss": 0.7016, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.6823432343234324, |
| "grad_norm": 0.5109182000160217, |
| "learning_rate": 4.874303061420893e-06, |
| "loss": 0.71, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.6831683168316832, |
| "grad_norm": 0.49460044503211975, |
| "learning_rate": 4.8739599646584126e-06, |
| "loss": 0.7166, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.683993399339934, |
| "grad_norm": 0.507247269153595, |
| "learning_rate": 4.873616412391541e-06, |
| "loss": 0.6971, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.6848184818481848, |
| "grad_norm": 0.5000693798065186, |
| "learning_rate": 4.873272404686199e-06, |
| "loss": 0.6985, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6856435643564357, |
| "grad_norm": 0.5239745378494263, |
| "learning_rate": 4.872927941608392e-06, |
| "loss": 0.7111, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.6864686468646864, |
| "grad_norm": 0.5349238514900208, |
| "learning_rate": 4.872583023224215e-06, |
| "loss": 0.6929, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.6872937293729373, |
| "grad_norm": 0.5302279591560364, |
| "learning_rate": 4.872237649599848e-06, |
| "loss": 0.6902, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.6881188118811881, |
| "grad_norm": 0.4991590082645416, |
| "learning_rate": 4.871891820801561e-06, |
| "loss": 0.7048, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.6889438943894389, |
| "grad_norm": 0.5134434700012207, |
| "learning_rate": 4.871545536895709e-06, |
| "loss": 0.7008, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.6897689768976898, |
| "grad_norm": 0.5120382905006409, |
| "learning_rate": 4.871198797948736e-06, |
| "loss": 0.6798, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.6905940594059405, |
| "grad_norm": 0.5111243724822998, |
| "learning_rate": 4.870851604027173e-06, |
| "loss": 0.7356, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.6914191419141914, |
| "grad_norm": 0.5056900978088379, |
| "learning_rate": 4.870503955197638e-06, |
| "loss": 0.6962, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.6922442244224423, |
| "grad_norm": 0.5044076442718506, |
| "learning_rate": 4.870155851526834e-06, |
| "loss": 0.6862, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.693069306930693, |
| "grad_norm": 0.5099409222602844, |
| "learning_rate": 4.869807293081555e-06, |
| "loss": 0.7154, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6938943894389439, |
| "grad_norm": 0.5157263278961182, |
| "learning_rate": 4.869458279928682e-06, |
| "loss": 0.6567, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.6947194719471947, |
| "grad_norm": 0.5214593410491943, |
| "learning_rate": 4.869108812135181e-06, |
| "loss": 0.7135, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.6955445544554455, |
| "grad_norm": 0.5076237916946411, |
| "learning_rate": 4.868758889768106e-06, |
| "loss": 0.703, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.6963696369636964, |
| "grad_norm": 0.5151360630989075, |
| "learning_rate": 4.868408512894599e-06, |
| "loss": 0.6772, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.6971947194719472, |
| "grad_norm": 0.5417853593826294, |
| "learning_rate": 4.868057681581888e-06, |
| "loss": 0.6825, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.698019801980198, |
| "grad_norm": 0.5326299667358398, |
| "learning_rate": 4.8677063958972895e-06, |
| "loss": 0.6777, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.6988448844884488, |
| "grad_norm": 0.5056005716323853, |
| "learning_rate": 4.867354655908206e-06, |
| "loss": 0.737, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.6996699669966997, |
| "grad_norm": 0.5118707418441772, |
| "learning_rate": 4.867002461682129e-06, |
| "loss": 0.6844, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.7004950495049505, |
| "grad_norm": 0.5164377093315125, |
| "learning_rate": 4.866649813286634e-06, |
| "loss": 0.6765, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.7013201320132013, |
| "grad_norm": 0.5244525074958801, |
| "learning_rate": 4.866296710789387e-06, |
| "loss": 0.6837, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7021452145214522, |
| "grad_norm": 0.5185418128967285, |
| "learning_rate": 4.865943154258138e-06, |
| "loss": 0.7103, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.7029702970297029, |
| "grad_norm": 0.5295969843864441, |
| "learning_rate": 4.8655891437607285e-06, |
| "loss": 0.7153, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.7037953795379538, |
| "grad_norm": 0.5205761194229126, |
| "learning_rate": 4.865234679365082e-06, |
| "loss": 0.69, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.7046204620462047, |
| "grad_norm": 0.5231923460960388, |
| "learning_rate": 4.864879761139212e-06, |
| "loss": 0.7192, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.7054455445544554, |
| "grad_norm": 0.5015358328819275, |
| "learning_rate": 4.864524389151219e-06, |
| "loss": 0.7005, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.7062706270627063, |
| "grad_norm": 0.538537323474884, |
| "learning_rate": 4.8641685634692905e-06, |
| "loss": 0.7083, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.7070957095709571, |
| "grad_norm": 0.5275318622589111, |
| "learning_rate": 4.8638122841616994e-06, |
| "loss": 0.6984, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.7079207920792079, |
| "grad_norm": 0.5181031823158264, |
| "learning_rate": 4.863455551296808e-06, |
| "loss": 0.6884, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.7087458745874587, |
| "grad_norm": 0.5242864489555359, |
| "learning_rate": 4.863098364943065e-06, |
| "loss": 0.6954, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.7095709570957096, |
| "grad_norm": 0.5044652819633484, |
| "learning_rate": 4.862740725169004e-06, |
| "loss": 0.6902, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.7103960396039604, |
| "grad_norm": 0.5207858085632324, |
| "learning_rate": 4.8623826320432486e-06, |
| "loss": 0.6509, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.7112211221122112, |
| "grad_norm": 0.5226578712463379, |
| "learning_rate": 4.8620240856345075e-06, |
| "loss": 0.695, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.7120462046204621, |
| "grad_norm": 0.5265222191810608, |
| "learning_rate": 4.8616650860115766e-06, |
| "loss": 0.6764, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.7128712871287128, |
| "grad_norm": 0.5239368677139282, |
| "learning_rate": 4.86130563324334e-06, |
| "loss": 0.6903, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.7136963696369637, |
| "grad_norm": 0.5724884271621704, |
| "learning_rate": 4.860945727398767e-06, |
| "loss": 0.6824, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.7145214521452146, |
| "grad_norm": 0.5248749256134033, |
| "learning_rate": 4.860585368546915e-06, |
| "loss": 0.6789, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.7153465346534653, |
| "grad_norm": 0.5303323864936829, |
| "learning_rate": 4.8602245567569275e-06, |
| "loss": 0.6902, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.7161716171617162, |
| "grad_norm": 0.5075418949127197, |
| "learning_rate": 4.859863292098036e-06, |
| "loss": 0.6688, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.716996699669967, |
| "grad_norm": 0.5122492909431458, |
| "learning_rate": 4.859501574639558e-06, |
| "loss": 0.6973, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.7178217821782178, |
| "grad_norm": 0.5190475583076477, |
| "learning_rate": 4.8591394044508985e-06, |
| "loss": 0.6814, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7186468646864687, |
| "grad_norm": 0.5296965837478638, |
| "learning_rate": 4.858776781601549e-06, |
| "loss": 0.7114, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.7194719471947195, |
| "grad_norm": 0.5140368938446045, |
| "learning_rate": 4.858413706161087e-06, |
| "loss": 0.6939, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.7202970297029703, |
| "grad_norm": 0.5177188515663147, |
| "learning_rate": 4.858050178199179e-06, |
| "loss": 0.6884, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.7211221122112211, |
| "grad_norm": 0.5166962146759033, |
| "learning_rate": 4.857686197785576e-06, |
| "loss": 0.6941, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.721947194719472, |
| "grad_norm": 0.5186828970909119, |
| "learning_rate": 4.857321764990118e-06, |
| "loss": 0.6668, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.7227722772277227, |
| "grad_norm": 0.5125436186790466, |
| "learning_rate": 4.85695687988273e-06, |
| "loss": 0.6797, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.7235973597359736, |
| "grad_norm": 0.52864670753479, |
| "learning_rate": 4.8565915425334235e-06, |
| "loss": 0.6886, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.7244224422442245, |
| "grad_norm": 0.5181275606155396, |
| "learning_rate": 4.856225753012299e-06, |
| "loss": 0.6788, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.7252475247524752, |
| "grad_norm": 0.5550081133842468, |
| "learning_rate": 4.8558595113895426e-06, |
| "loss": 0.7227, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.7260726072607261, |
| "grad_norm": 0.5321540832519531, |
| "learning_rate": 4.855492817735425e-06, |
| "loss": 0.6892, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.726897689768977, |
| "grad_norm": 0.527124285697937, |
| "learning_rate": 4.8551256721203094e-06, |
| "loss": 0.6916, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.7277227722772277, |
| "grad_norm": 0.5251095294952393, |
| "learning_rate": 4.854758074614639e-06, |
| "loss": 0.719, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.7285478547854786, |
| "grad_norm": 0.5159496068954468, |
| "learning_rate": 4.854390025288948e-06, |
| "loss": 0.7029, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.7293729372937293, |
| "grad_norm": 0.5572521090507507, |
| "learning_rate": 4.854021524213855e-06, |
| "loss": 0.6813, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.7301980198019802, |
| "grad_norm": 0.5227091908454895, |
| "learning_rate": 4.853652571460067e-06, |
| "loss": 0.6545, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.731023102310231, |
| "grad_norm": 0.5276998281478882, |
| "learning_rate": 4.853283167098376e-06, |
| "loss": 0.6736, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.7318481848184818, |
| "grad_norm": 0.5453947186470032, |
| "learning_rate": 4.852913311199663e-06, |
| "loss": 0.7141, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.7326732673267327, |
| "grad_norm": 0.5115426778793335, |
| "learning_rate": 4.852543003834894e-06, |
| "loss": 0.6854, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.7334983498349835, |
| "grad_norm": 0.530997633934021, |
| "learning_rate": 4.852172245075121e-06, |
| "loss": 0.6976, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.7343234323432343, |
| "grad_norm": 0.5112131834030151, |
| "learning_rate": 4.851801034991484e-06, |
| "loss": 0.7007, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.7351485148514851, |
| "grad_norm": 0.509002685546875, |
| "learning_rate": 4.851429373655208e-06, |
| "loss": 0.6736, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.735973597359736, |
| "grad_norm": 0.532152533531189, |
| "learning_rate": 4.851057261137608e-06, |
| "loss": 0.6868, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.7367986798679867, |
| "grad_norm": 0.5529546141624451, |
| "learning_rate": 4.850684697510082e-06, |
| "loss": 0.7098, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.7376237623762376, |
| "grad_norm": 0.5317026972770691, |
| "learning_rate": 4.850311682844115e-06, |
| "loss": 0.6951, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.7384488448844885, |
| "grad_norm": 0.5340789556503296, |
| "learning_rate": 4.84993821721128e-06, |
| "loss": 0.7039, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.7392739273927392, |
| "grad_norm": 0.5217326283454895, |
| "learning_rate": 4.849564300683235e-06, |
| "loss": 0.6869, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.7400990099009901, |
| "grad_norm": 0.5279721021652222, |
| "learning_rate": 4.849189933331727e-06, |
| "loss": 0.7007, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.740924092409241, |
| "grad_norm": 0.5272139310836792, |
| "learning_rate": 4.848815115228587e-06, |
| "loss": 0.6999, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.7417491749174917, |
| "grad_norm": 0.5041061043739319, |
| "learning_rate": 4.848439846445732e-06, |
| "loss": 0.6941, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.7425742574257426, |
| "grad_norm": 0.5143195986747742, |
| "learning_rate": 4.84806412705517e-06, |
| "loss": 0.6728, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7433993399339934, |
| "grad_norm": 0.5403009653091431, |
| "learning_rate": 4.84768795712899e-06, |
| "loss": 0.6737, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.7442244224422442, |
| "grad_norm": 0.5111673474311829, |
| "learning_rate": 4.84731133673937e-06, |
| "loss": 0.6987, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.745049504950495, |
| "grad_norm": 0.5134066343307495, |
| "learning_rate": 4.846934265958575e-06, |
| "loss": 0.7023, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.7458745874587459, |
| "grad_norm": 0.5278534889221191, |
| "learning_rate": 4.846556744858953e-06, |
| "loss": 0.6627, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.7466996699669967, |
| "grad_norm": 0.5452783107757568, |
| "learning_rate": 4.846178773512945e-06, |
| "loss": 0.7039, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.7475247524752475, |
| "grad_norm": 0.5415001511573792, |
| "learning_rate": 4.845800351993072e-06, |
| "loss": 0.7216, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.7483498349834984, |
| "grad_norm": 0.5375738143920898, |
| "learning_rate": 4.845421480371943e-06, |
| "loss": 0.6907, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.7491749174917491, |
| "grad_norm": 0.5274370312690735, |
| "learning_rate": 4.8450421587222565e-06, |
| "loss": 0.7073, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.5074566006660461, |
| "learning_rate": 4.844662387116793e-06, |
| "loss": 0.7051, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.7508250825082509, |
| "grad_norm": 0.5361227989196777, |
| "learning_rate": 4.844282165628422e-06, |
| "loss": 0.6943, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7516501650165016, |
| "grad_norm": 0.5223832130432129, |
| "learning_rate": 4.843901494330099e-06, |
| "loss": 0.6883, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.7524752475247525, |
| "grad_norm": 0.5316516757011414, |
| "learning_rate": 4.8435203732948644e-06, |
| "loss": 0.6857, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.7533003300330033, |
| "grad_norm": 0.5260957479476929, |
| "learning_rate": 4.843138802595847e-06, |
| "loss": 0.7223, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.7541254125412541, |
| "grad_norm": 0.5405908226966858, |
| "learning_rate": 4.842756782306261e-06, |
| "loss": 0.7023, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.754950495049505, |
| "grad_norm": 0.5285031795501709, |
| "learning_rate": 4.842374312499405e-06, |
| "loss": 0.6847, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.7557755775577558, |
| "grad_norm": 0.5381676554679871, |
| "learning_rate": 4.841991393248667e-06, |
| "loss": 0.6667, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.7566006600660066, |
| "grad_norm": 0.5372406840324402, |
| "learning_rate": 4.841608024627519e-06, |
| "loss": 0.7108, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.7574257425742574, |
| "grad_norm": 0.5164246559143066, |
| "learning_rate": 4.841224206709521e-06, |
| "loss": 0.6913, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.7582508250825083, |
| "grad_norm": 0.5240136384963989, |
| "learning_rate": 4.840839939568317e-06, |
| "loss": 0.6998, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.759075907590759, |
| "grad_norm": 0.5204344987869263, |
| "learning_rate": 4.840455223277639e-06, |
| "loss": 0.6873, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7599009900990099, |
| "grad_norm": 0.5269798636436462, |
| "learning_rate": 4.8400700579113055e-06, |
| "loss": 0.6638, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.7607260726072608, |
| "grad_norm": 0.5244765877723694, |
| "learning_rate": 4.839684443543218e-06, |
| "loss": 0.6875, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.7615511551155115, |
| "grad_norm": 0.5180036425590515, |
| "learning_rate": 4.839298380247368e-06, |
| "loss": 0.6912, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.7623762376237624, |
| "grad_norm": 0.5370272994041443, |
| "learning_rate": 4.838911868097832e-06, |
| "loss": 0.6948, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.7632013201320133, |
| "grad_norm": 0.5375697016716003, |
| "learning_rate": 4.83852490716877e-06, |
| "loss": 0.6939, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.764026402640264, |
| "grad_norm": 0.5340301394462585, |
| "learning_rate": 4.838137497534433e-06, |
| "loss": 0.6722, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.7648514851485149, |
| "grad_norm": 0.5227602124214172, |
| "learning_rate": 4.837749639269153e-06, |
| "loss": 0.6788, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.7656765676567657, |
| "grad_norm": 0.5335901975631714, |
| "learning_rate": 4.8373613324473515e-06, |
| "loss": 0.6758, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.7665016501650165, |
| "grad_norm": 0.519488513469696, |
| "learning_rate": 4.836972577143535e-06, |
| "loss": 0.6944, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.7673267326732673, |
| "grad_norm": 0.5218436121940613, |
| "learning_rate": 4.836583373432296e-06, |
| "loss": 0.6949, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7681518151815182, |
| "grad_norm": 0.5284325480461121, |
| "learning_rate": 4.836193721388313e-06, |
| "loss": 0.6852, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.768976897689769, |
| "grad_norm": 0.5303220152854919, |
| "learning_rate": 4.83580362108635e-06, |
| "loss": 0.6743, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.7698019801980198, |
| "grad_norm": 0.5296517610549927, |
| "learning_rate": 4.835413072601259e-06, |
| "loss": 0.6946, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.7706270627062707, |
| "grad_norm": 0.5141308307647705, |
| "learning_rate": 4.835022076007976e-06, |
| "loss": 0.6888, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.7714521452145214, |
| "grad_norm": 0.5189119577407837, |
| "learning_rate": 4.834630631381524e-06, |
| "loss": 0.6854, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.7722772277227723, |
| "grad_norm": 0.5247241854667664, |
| "learning_rate": 4.8342387387970105e-06, |
| "loss": 0.7161, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.773102310231023, |
| "grad_norm": 0.5307490229606628, |
| "learning_rate": 4.83384639832963e-06, |
| "loss": 0.6736, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.7739273927392739, |
| "grad_norm": 0.5190678834915161, |
| "learning_rate": 4.833453610054665e-06, |
| "loss": 0.6782, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.7747524752475248, |
| "grad_norm": 0.5487401485443115, |
| "learning_rate": 4.833060374047479e-06, |
| "loss": 0.7075, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.7755775577557755, |
| "grad_norm": 0.5287332534790039, |
| "learning_rate": 4.832666690383526e-06, |
| "loss": 0.6936, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7764026402640264, |
| "grad_norm": 0.5404759049415588, |
| "learning_rate": 4.832272559138345e-06, |
| "loss": 0.6818, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.7772277227722773, |
| "grad_norm": 0.5293354988098145, |
| "learning_rate": 4.831877980387558e-06, |
| "loss": 0.6965, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.778052805280528, |
| "grad_norm": 0.5324503183364868, |
| "learning_rate": 4.831482954206877e-06, |
| "loss": 0.7106, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.7788778877887789, |
| "grad_norm": 0.5342603325843811, |
| "learning_rate": 4.831087480672095e-06, |
| "loss": 0.6779, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.7797029702970297, |
| "grad_norm": 0.5596241354942322, |
| "learning_rate": 4.830691559859098e-06, |
| "loss": 0.6838, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.7805280528052805, |
| "grad_norm": 0.5506138801574707, |
| "learning_rate": 4.830295191843848e-06, |
| "loss": 0.6828, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.7813531353135313, |
| "grad_norm": 0.532753586769104, |
| "learning_rate": 4.829898376702403e-06, |
| "loss": 0.6913, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.7821782178217822, |
| "grad_norm": 0.5353578329086304, |
| "learning_rate": 4.8295011145108995e-06, |
| "loss": 0.7041, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.783003300330033, |
| "grad_norm": 0.5337108373641968, |
| "learning_rate": 4.829103405345563e-06, |
| "loss": 0.6887, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.7838283828382838, |
| "grad_norm": 0.5403887033462524, |
| "learning_rate": 4.828705249282704e-06, |
| "loss": 0.6977, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7846534653465347, |
| "grad_norm": 0.518827497959137, |
| "learning_rate": 4.8283066463987185e-06, |
| "loss": 0.6974, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.7854785478547854, |
| "grad_norm": 0.5307276248931885, |
| "learning_rate": 4.827907596770089e-06, |
| "loss": 0.6946, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.7863036303630363, |
| "grad_norm": 0.5107436180114746, |
| "learning_rate": 4.827508100473384e-06, |
| "loss": 0.6929, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.7871287128712872, |
| "grad_norm": 0.5285372138023376, |
| "learning_rate": 4.8271081575852555e-06, |
| "loss": 0.7141, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.7879537953795379, |
| "grad_norm": 0.5414252281188965, |
| "learning_rate": 4.8267077681824425e-06, |
| "loss": 0.6888, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.7887788778877888, |
| "grad_norm": 0.5231629014015198, |
| "learning_rate": 4.826306932341772e-06, |
| "loss": 0.6945, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.7896039603960396, |
| "grad_norm": 0.5293689370155334, |
| "learning_rate": 4.825905650140153e-06, |
| "loss": 0.6726, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.7904290429042904, |
| "grad_norm": 0.5288978815078735, |
| "learning_rate": 4.825503921654582e-06, |
| "loss": 0.6847, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.7912541254125413, |
| "grad_norm": 0.5331034064292908, |
| "learning_rate": 4.8251017469621404e-06, |
| "loss": 0.7272, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.7920792079207921, |
| "grad_norm": 0.5253425240516663, |
| "learning_rate": 4.824699126139995e-06, |
| "loss": 0.6815, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7929042904290429, |
| "grad_norm": 0.521532416343689, |
| "learning_rate": 4.824296059265402e-06, |
| "loss": 0.6879, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.7937293729372937, |
| "grad_norm": 0.5396357774734497, |
| "learning_rate": 4.823892546415696e-06, |
| "loss": 0.6855, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.7945544554455446, |
| "grad_norm": 0.5377248525619507, |
| "learning_rate": 4.823488587668303e-06, |
| "loss": 0.6673, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.7953795379537953, |
| "grad_norm": 0.5296643376350403, |
| "learning_rate": 4.823084183100732e-06, |
| "loss": 0.7033, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.7962046204620462, |
| "grad_norm": 0.5253770351409912, |
| "learning_rate": 4.822679332790581e-06, |
| "loss": 0.671, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.7970297029702971, |
| "grad_norm": 0.5348721742630005, |
| "learning_rate": 4.8222740368155265e-06, |
| "loss": 0.7095, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.7978547854785478, |
| "grad_norm": 0.5385889410972595, |
| "learning_rate": 4.821868295253338e-06, |
| "loss": 0.6846, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.7986798679867987, |
| "grad_norm": 0.5366014838218689, |
| "learning_rate": 4.821462108181866e-06, |
| "loss": 0.6879, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.7995049504950495, |
| "grad_norm": 0.5168699026107788, |
| "learning_rate": 4.821055475679048e-06, |
| "loss": 0.6778, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.8003300330033003, |
| "grad_norm": 0.5198276042938232, |
| "learning_rate": 4.820648397822907e-06, |
| "loss": 0.6992, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.8011551155115512, |
| "grad_norm": 0.5328154563903809, |
| "learning_rate": 4.8202408746915514e-06, |
| "loss": 0.6701, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.801980198019802, |
| "grad_norm": 0.5303798317909241, |
| "learning_rate": 4.819832906363174e-06, |
| "loss": 0.6732, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.8028052805280528, |
| "grad_norm": 0.5164825916290283, |
| "learning_rate": 4.8194244929160546e-06, |
| "loss": 0.7238, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.8036303630363036, |
| "grad_norm": 0.5376397371292114, |
| "learning_rate": 4.819015634428557e-06, |
| "loss": 0.7045, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.8044554455445545, |
| "grad_norm": 0.5353125929832458, |
| "learning_rate": 4.818606330979132e-06, |
| "loss": 0.683, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.8052805280528053, |
| "grad_norm": 0.53989177942276, |
| "learning_rate": 4.818196582646313e-06, |
| "loss": 0.6934, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.8061056105610561, |
| "grad_norm": 0.5275681018829346, |
| "learning_rate": 4.817786389508723e-06, |
| "loss": 0.7117, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.806930693069307, |
| "grad_norm": 0.5146031379699707, |
| "learning_rate": 4.817375751645066e-06, |
| "loss": 0.697, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.8077557755775577, |
| "grad_norm": 0.5338720083236694, |
| "learning_rate": 4.8169646691341356e-06, |
| "loss": 0.6825, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.8085808580858086, |
| "grad_norm": 0.5359170436859131, |
| "learning_rate": 4.816553142054806e-06, |
| "loss": 0.6914, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.8094059405940595, |
| "grad_norm": 0.5431671142578125, |
| "learning_rate": 4.81614117048604e-06, |
| "loss": 0.6957, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.8102310231023102, |
| "grad_norm": 0.5225863456726074, |
| "learning_rate": 4.815728754506884e-06, |
| "loss": 0.7052, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.8110561056105611, |
| "grad_norm": 0.5262391567230225, |
| "learning_rate": 4.815315894196473e-06, |
| "loss": 0.6849, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.8118811881188119, |
| "grad_norm": 0.5163729190826416, |
| "learning_rate": 4.814902589634022e-06, |
| "loss": 0.7071, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.8127062706270627, |
| "grad_norm": 0.5278923511505127, |
| "learning_rate": 4.814488840898835e-06, |
| "loss": 0.6958, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.8135313531353136, |
| "grad_norm": 0.5270055532455444, |
| "learning_rate": 4.8140746480703e-06, |
| "loss": 0.657, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.8143564356435643, |
| "grad_norm": 0.5140476226806641, |
| "learning_rate": 4.813660011227891e-06, |
| "loss": 0.6889, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.8151815181518152, |
| "grad_norm": 0.5366138219833374, |
| "learning_rate": 4.813244930451165e-06, |
| "loss": 0.7055, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.816006600660066, |
| "grad_norm": 0.5412192940711975, |
| "learning_rate": 4.812829405819768e-06, |
| "loss": 0.6629, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.8168316831683168, |
| "grad_norm": 0.5427959561347961, |
| "learning_rate": 4.812413437413428e-06, |
| "loss": 0.7067, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8176567656765676, |
| "grad_norm": 0.5499939322471619, |
| "learning_rate": 4.811997025311958e-06, |
| "loss": 0.6934, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.8184818481848185, |
| "grad_norm": 0.5278768539428711, |
| "learning_rate": 4.8115801695952585e-06, |
| "loss": 0.6473, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.8193069306930693, |
| "grad_norm": 0.5308309197425842, |
| "learning_rate": 4.8111628703433134e-06, |
| "loss": 0.6929, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.8201320132013201, |
| "grad_norm": 0.5305526852607727, |
| "learning_rate": 4.810745127636192e-06, |
| "loss": 0.6882, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.820957095709571, |
| "grad_norm": 0.5366460084915161, |
| "learning_rate": 4.81032694155405e-06, |
| "loss": 0.6863, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.8217821782178217, |
| "grad_norm": 0.550890326499939, |
| "learning_rate": 4.809908312177125e-06, |
| "loss": 0.6772, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.8226072607260726, |
| "grad_norm": 0.5378448963165283, |
| "learning_rate": 4.809489239585743e-06, |
| "loss": 0.689, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.8234323432343235, |
| "grad_norm": 0.5434515476226807, |
| "learning_rate": 4.8090697238603125e-06, |
| "loss": 0.6868, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.8242574257425742, |
| "grad_norm": 0.5296987295150757, |
| "learning_rate": 4.80864976508133e-06, |
| "loss": 0.684, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.8250825082508251, |
| "grad_norm": 0.5145077705383301, |
| "learning_rate": 4.8082293633293746e-06, |
| "loss": 0.6633, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8259075907590759, |
| "grad_norm": 0.5234752297401428, |
| "learning_rate": 4.80780851868511e-06, |
| "loss": 0.7047, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.8267326732673267, |
| "grad_norm": 0.5214663743972778, |
| "learning_rate": 4.807387231229287e-06, |
| "loss": 0.711, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.8275577557755776, |
| "grad_norm": 0.5485701560974121, |
| "learning_rate": 4.80696550104274e-06, |
| "loss": 0.6849, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.8283828382838284, |
| "grad_norm": 0.5370991826057434, |
| "learning_rate": 4.806543328206388e-06, |
| "loss": 0.6935, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.8292079207920792, |
| "grad_norm": 0.5193862318992615, |
| "learning_rate": 4.806120712801237e-06, |
| "loss": 0.6995, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.83003300330033, |
| "grad_norm": 0.5568088889122009, |
| "learning_rate": 4.805697654908375e-06, |
| "loss": 0.7021, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.8308580858085809, |
| "grad_norm": 0.5302184820175171, |
| "learning_rate": 4.805274154608977e-06, |
| "loss": 0.659, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.8316831683168316, |
| "grad_norm": 0.5468152165412903, |
| "learning_rate": 4.8048502119843025e-06, |
| "loss": 0.7086, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.8325082508250825, |
| "grad_norm": 0.5607840418815613, |
| "learning_rate": 4.804425827115695e-06, |
| "loss": 0.6795, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.5256121754646301, |
| "learning_rate": 4.804001000084585e-06, |
| "loss": 0.6816, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.8341584158415841, |
| "grad_norm": 0.5150307416915894, |
| "learning_rate": 4.803575730972484e-06, |
| "loss": 0.6607, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.834983498349835, |
| "grad_norm": 0.5352784395217896, |
| "learning_rate": 4.803150019860993e-06, |
| "loss": 0.6728, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.8358085808580858, |
| "grad_norm": 0.5387719869613647, |
| "learning_rate": 4.802723866831793e-06, |
| "loss": 0.6761, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.8366336633663366, |
| "grad_norm": 0.5325175523757935, |
| "learning_rate": 4.802297271966654e-06, |
| "loss": 0.6962, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.8374587458745875, |
| "grad_norm": 0.5446581840515137, |
| "learning_rate": 4.801870235347429e-06, |
| "loss": 0.6853, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.8382838283828383, |
| "grad_norm": 0.5339834094047546, |
| "learning_rate": 4.801442757056055e-06, |
| "loss": 0.6919, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.8391089108910891, |
| "grad_norm": 0.5403542518615723, |
| "learning_rate": 4.8010148371745555e-06, |
| "loss": 0.6876, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.8399339933993399, |
| "grad_norm": 0.5399143695831299, |
| "learning_rate": 4.8005864757850365e-06, |
| "loss": 0.6815, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.8407590759075908, |
| "grad_norm": 0.5371220707893372, |
| "learning_rate": 4.800157672969692e-06, |
| "loss": 0.6983, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.8415841584158416, |
| "grad_norm": 0.5120360851287842, |
| "learning_rate": 4.799728428810796e-06, |
| "loss": 0.6885, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.8424092409240924, |
| "grad_norm": 0.5353764295578003, |
| "learning_rate": 4.799298743390713e-06, |
| "loss": 0.663, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.8432343234323433, |
| "grad_norm": 0.5427811145782471, |
| "learning_rate": 4.798868616791886e-06, |
| "loss": 0.6836, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.844059405940594, |
| "grad_norm": 0.5221230387687683, |
| "learning_rate": 4.798438049096847e-06, |
| "loss": 0.7194, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.8448844884488449, |
| "grad_norm": 0.524575412273407, |
| "learning_rate": 4.798007040388212e-06, |
| "loss": 0.6602, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.8457095709570958, |
| "grad_norm": 0.5420461893081665, |
| "learning_rate": 4.79757559074868e-06, |
| "loss": 0.6931, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.8465346534653465, |
| "grad_norm": 0.5208996534347534, |
| "learning_rate": 4.797143700261035e-06, |
| "loss": 0.6835, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.8473597359735974, |
| "grad_norm": 0.5422525405883789, |
| "learning_rate": 4.796711369008149e-06, |
| "loss": 0.6942, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.8481848184818482, |
| "grad_norm": 0.5329200029373169, |
| "learning_rate": 4.796278597072972e-06, |
| "loss": 0.6494, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.849009900990099, |
| "grad_norm": 0.540357232093811, |
| "learning_rate": 4.795845384538545e-06, |
| "loss": 0.6849, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.8498349834983498, |
| "grad_norm": 0.5337477922439575, |
| "learning_rate": 4.7954117314879886e-06, |
| "loss": 0.6903, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.8506600660066007, |
| "grad_norm": 0.5205554962158203, |
| "learning_rate": 4.794977638004512e-06, |
| "loss": 0.6593, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.8514851485148515, |
| "grad_norm": 0.5238302946090698, |
| "learning_rate": 4.7945431041714065e-06, |
| "loss": 0.7004, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.8523102310231023, |
| "grad_norm": 0.5418237447738647, |
| "learning_rate": 4.794108130072048e-06, |
| "loss": 0.6659, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.8531353135313532, |
| "grad_norm": 0.5398669838905334, |
| "learning_rate": 4.793672715789899e-06, |
| "loss": 0.6813, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.8539603960396039, |
| "grad_norm": 0.5366935133934021, |
| "learning_rate": 4.793236861408501e-06, |
| "loss": 0.6994, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.8547854785478548, |
| "grad_norm": 0.5426564812660217, |
| "learning_rate": 4.792800567011488e-06, |
| "loss": 0.6837, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.8556105610561056, |
| "grad_norm": 0.5445976257324219, |
| "learning_rate": 4.792363832682571e-06, |
| "loss": 0.6925, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.8564356435643564, |
| "grad_norm": 0.5414583086967468, |
| "learning_rate": 4.79192665850555e-06, |
| "loss": 0.7103, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.8572607260726073, |
| "grad_norm": 0.5352795720100403, |
| "learning_rate": 4.791489044564307e-06, |
| "loss": 0.6754, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.858085808580858, |
| "grad_norm": 0.5245605707168579, |
| "learning_rate": 4.791050990942811e-06, |
| "loss": 0.7076, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.8589108910891089, |
| "grad_norm": 0.5312214493751526, |
| "learning_rate": 4.790612497725112e-06, |
| "loss": 0.6879, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.8597359735973598, |
| "grad_norm": 0.5388454794883728, |
| "learning_rate": 4.790173564995347e-06, |
| "loss": 0.6727, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.8605610561056105, |
| "grad_norm": 0.5453711748123169, |
| "learning_rate": 4.789734192837736e-06, |
| "loss": 0.703, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.8613861386138614, |
| "grad_norm": 0.5142508149147034, |
| "learning_rate": 4.789294381336585e-06, |
| "loss": 0.7039, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.8622112211221122, |
| "grad_norm": 0.5323778390884399, |
| "learning_rate": 4.78885413057628e-06, |
| "loss": 0.6765, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.863036303630363, |
| "grad_norm": 0.5433087348937988, |
| "learning_rate": 4.788413440641297e-06, |
| "loss": 0.671, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.8638613861386139, |
| "grad_norm": 0.5310676693916321, |
| "learning_rate": 4.787972311616193e-06, |
| "loss": 0.7094, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.8646864686468647, |
| "grad_norm": 0.517675518989563, |
| "learning_rate": 4.787530743585609e-06, |
| "loss": 0.6508, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.8655115511551155, |
| "grad_norm": 0.5481309294700623, |
| "learning_rate": 4.787088736634271e-06, |
| "loss": 0.6996, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.8663366336633663, |
| "grad_norm": 0.5559577345848083, |
| "learning_rate": 4.78664629084699e-06, |
| "loss": 0.7064, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.8671617161716172, |
| "grad_norm": 0.5332601070404053, |
| "learning_rate": 4.7862034063086595e-06, |
| "loss": 0.6724, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.8679867986798679, |
| "grad_norm": 0.5553277134895325, |
| "learning_rate": 4.78576008310426e-06, |
| "loss": 0.6721, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.8688118811881188, |
| "grad_norm": 0.5259321928024292, |
| "learning_rate": 4.785316321318851e-06, |
| "loss": 0.6826, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.8696369636963697, |
| "grad_norm": 0.5209128856658936, |
| "learning_rate": 4.7848721210375825e-06, |
| "loss": 0.7025, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.8704620462046204, |
| "grad_norm": 0.5183115601539612, |
| "learning_rate": 4.784427482345685e-06, |
| "loss": 0.6801, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.8712871287128713, |
| "grad_norm": 0.5196675658226013, |
| "learning_rate": 4.7839824053284725e-06, |
| "loss": 0.6795, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.8721122112211221, |
| "grad_norm": 0.5218325853347778, |
| "learning_rate": 4.783536890071345e-06, |
| "loss": 0.6981, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.8729372937293729, |
| "grad_norm": 0.5484752655029297, |
| "learning_rate": 4.783090936659786e-06, |
| "loss": 0.7011, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.8737623762376238, |
| "grad_norm": 0.5428327918052673, |
| "learning_rate": 4.782644545179363e-06, |
| "loss": 0.6846, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.8745874587458746, |
| "grad_norm": 0.5338976979255676, |
| "learning_rate": 4.782197715715728e-06, |
| "loss": 0.6863, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8754125412541254, |
| "grad_norm": 0.518388032913208, |
| "learning_rate": 4.781750448354615e-06, |
| "loss": 0.6712, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.8762376237623762, |
| "grad_norm": 0.5285707116127014, |
| "learning_rate": 4.781302743181845e-06, |
| "loss": 0.6974, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.8770627062706271, |
| "grad_norm": 0.5459383130073547, |
| "learning_rate": 4.780854600283321e-06, |
| "loss": 0.6724, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.8778877887788779, |
| "grad_norm": 0.5176935791969299, |
| "learning_rate": 4.780406019745031e-06, |
| "loss": 0.6815, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.8787128712871287, |
| "grad_norm": 0.5563409924507141, |
| "learning_rate": 4.779957001653045e-06, |
| "loss": 0.7005, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.8795379537953796, |
| "grad_norm": 0.564302384853363, |
| "learning_rate": 4.7795075460935215e-06, |
| "loss": 0.678, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.8803630363036303, |
| "grad_norm": 0.5702047348022461, |
| "learning_rate": 4.7790576531526965e-06, |
| "loss": 0.6726, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.8811881188118812, |
| "grad_norm": 0.5401086211204529, |
| "learning_rate": 4.778607322916896e-06, |
| "loss": 0.6825, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.8820132013201321, |
| "grad_norm": 0.5245928764343262, |
| "learning_rate": 4.778156555472526e-06, |
| "loss": 0.6922, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.8828382838283828, |
| "grad_norm": 0.537380039691925, |
| "learning_rate": 4.777705350906079e-06, |
| "loss": 0.6899, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.8836633663366337, |
| "grad_norm": 0.534204363822937, |
| "learning_rate": 4.777253709304128e-06, |
| "loss": 0.6639, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.8844884488448845, |
| "grad_norm": 0.5462052822113037, |
| "learning_rate": 4.776801630753332e-06, |
| "loss": 0.6976, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.8853135313531353, |
| "grad_norm": 0.5318037867546082, |
| "learning_rate": 4.776349115340436e-06, |
| "loss": 0.6753, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.8861386138613861, |
| "grad_norm": 0.5356978178024292, |
| "learning_rate": 4.775896163152265e-06, |
| "loss": 0.7035, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.886963696369637, |
| "grad_norm": 0.529208242893219, |
| "learning_rate": 4.77544277427573e-06, |
| "loss": 0.6842, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.8877887788778878, |
| "grad_norm": 0.5417758226394653, |
| "learning_rate": 4.774988948797824e-06, |
| "loss": 0.6752, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.8886138613861386, |
| "grad_norm": 0.513358473777771, |
| "learning_rate": 4.774534686805625e-06, |
| "loss": 0.7093, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.8894389438943895, |
| "grad_norm": 0.5584703087806702, |
| "learning_rate": 4.7740799883862966e-06, |
| "loss": 0.6931, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.8902640264026402, |
| "grad_norm": 0.5548607707023621, |
| "learning_rate": 4.773624853627083e-06, |
| "loss": 0.6807, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.8910891089108911, |
| "grad_norm": 0.5403873920440674, |
| "learning_rate": 4.7731692826153115e-06, |
| "loss": 0.6467, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.891914191419142, |
| "grad_norm": 0.5343154668807983, |
| "learning_rate": 4.772713275438397e-06, |
| "loss": 0.6882, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.8927392739273927, |
| "grad_norm": 0.5312783718109131, |
| "learning_rate": 4.772256832183837e-06, |
| "loss": 0.699, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.8935643564356436, |
| "grad_norm": 0.5227769017219543, |
| "learning_rate": 4.77179995293921e-06, |
| "loss": 0.6801, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.8943894389438944, |
| "grad_norm": 0.5363834500312805, |
| "learning_rate": 4.77134263779218e-06, |
| "loss": 0.6731, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.8952145214521452, |
| "grad_norm": 0.5440719127655029, |
| "learning_rate": 4.7708848868304946e-06, |
| "loss": 0.6865, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.8960396039603961, |
| "grad_norm": 0.5516568422317505, |
| "learning_rate": 4.7704267001419856e-06, |
| "loss": 0.6729, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.8968646864686468, |
| "grad_norm": 0.538250207901001, |
| "learning_rate": 4.769968077814567e-06, |
| "loss": 0.6934, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.8976897689768977, |
| "grad_norm": 0.5338899493217468, |
| "learning_rate": 4.769509019936237e-06, |
| "loss": 0.6719, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.8985148514851485, |
| "grad_norm": 0.5299314856529236, |
| "learning_rate": 4.769049526595079e-06, |
| "loss": 0.6693, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.8993399339933993, |
| "grad_norm": 0.5240779519081116, |
| "learning_rate": 4.7685895978792564e-06, |
| "loss": 0.6822, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.9001650165016502, |
| "grad_norm": 0.5221059322357178, |
| "learning_rate": 4.768129233877019e-06, |
| "loss": 0.6918, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.900990099009901, |
| "grad_norm": 0.5516021251678467, |
| "learning_rate": 4.7676684346766994e-06, |
| "loss": 0.6783, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.9018151815181518, |
| "grad_norm": 0.5486177802085876, |
| "learning_rate": 4.767207200366713e-06, |
| "loss": 0.6908, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.9026402640264026, |
| "grad_norm": 0.5327460765838623, |
| "learning_rate": 4.7667455310355615e-06, |
| "loss": 0.6942, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.9034653465346535, |
| "grad_norm": 0.5445359349250793, |
| "learning_rate": 4.766283426771825e-06, |
| "loss": 0.6819, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.9042904290429042, |
| "grad_norm": 0.5357023477554321, |
| "learning_rate": 4.765820887664172e-06, |
| "loss": 0.7024, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.9051155115511551, |
| "grad_norm": 0.5279941558837891, |
| "learning_rate": 4.76535791380135e-06, |
| "loss": 0.6669, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.905940594059406, |
| "grad_norm": 0.5274310111999512, |
| "learning_rate": 4.7648945052721955e-06, |
| "loss": 0.6944, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.9067656765676567, |
| "grad_norm": 0.5562543272972107, |
| "learning_rate": 4.764430662165623e-06, |
| "loss": 0.713, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.9075907590759076, |
| "grad_norm": 0.566295862197876, |
| "learning_rate": 4.763966384570633e-06, |
| "loss": 0.6813, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9084158415841584, |
| "grad_norm": 0.5282127857208252, |
| "learning_rate": 4.763501672576308e-06, |
| "loss": 0.6782, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.9092409240924092, |
| "grad_norm": 0.5187849998474121, |
| "learning_rate": 4.763036526271817e-06, |
| "loss": 0.6759, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.9100660066006601, |
| "grad_norm": 0.5376170873641968, |
| "learning_rate": 4.762570945746408e-06, |
| "loss": 0.6775, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.9108910891089109, |
| "grad_norm": 0.5497514605522156, |
| "learning_rate": 4.762104931089415e-06, |
| "loss": 0.7128, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.9117161716171617, |
| "grad_norm": 0.5497869253158569, |
| "learning_rate": 4.761638482390256e-06, |
| "loss": 0.6651, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.9125412541254125, |
| "grad_norm": 0.5260938405990601, |
| "learning_rate": 4.761171599738429e-06, |
| "loss": 0.674, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.9133663366336634, |
| "grad_norm": 0.5394425988197327, |
| "learning_rate": 4.760704283223518e-06, |
| "loss": 0.6801, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.9141914191419142, |
| "grad_norm": 0.5538128018379211, |
| "learning_rate": 4.760236532935191e-06, |
| "loss": 0.7046, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.915016501650165, |
| "grad_norm": 0.5472164154052734, |
| "learning_rate": 4.759768348963196e-06, |
| "loss": 0.6729, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.9158415841584159, |
| "grad_norm": 0.5471000075340271, |
| "learning_rate": 4.759299731397366e-06, |
| "loss": 0.663, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.9166666666666666, |
| "grad_norm": 0.5478131771087646, |
| "learning_rate": 4.758830680327618e-06, |
| "loss": 0.6781, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.9174917491749175, |
| "grad_norm": 0.5534459352493286, |
| "learning_rate": 4.7583611958439514e-06, |
| "loss": 0.6947, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.9183168316831684, |
| "grad_norm": 0.5353650450706482, |
| "learning_rate": 4.7578912780364475e-06, |
| "loss": 0.703, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.9191419141914191, |
| "grad_norm": 0.542109489440918, |
| "learning_rate": 4.757420926995273e-06, |
| "loss": 0.6543, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.91996699669967, |
| "grad_norm": 0.5209757685661316, |
| "learning_rate": 4.756950142810677e-06, |
| "loss": 0.6426, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.9207920792079208, |
| "grad_norm": 0.528536319732666, |
| "learning_rate": 4.75647892557299e-06, |
| "loss": 0.6917, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.9216171617161716, |
| "grad_norm": 0.5359807014465332, |
| "learning_rate": 4.756007275372627e-06, |
| "loss": 0.6741, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.9224422442244224, |
| "grad_norm": 0.5328729748725891, |
| "learning_rate": 4.755535192300088e-06, |
| "loss": 0.6721, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.9232673267326733, |
| "grad_norm": 0.538567841053009, |
| "learning_rate": 4.755062676445952e-06, |
| "loss": 0.6773, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.9240924092409241, |
| "grad_norm": 0.5369272828102112, |
| "learning_rate": 4.754589727900885e-06, |
| "loss": 0.6917, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.9249174917491749, |
| "grad_norm": 0.5393624305725098, |
| "learning_rate": 4.754116346755632e-06, |
| "loss": 0.7045, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.9257425742574258, |
| "grad_norm": 0.5320336222648621, |
| "learning_rate": 4.753642533101025e-06, |
| "loss": 0.6799, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.9265676567656765, |
| "grad_norm": 0.5338489413261414, |
| "learning_rate": 4.753168287027977e-06, |
| "loss": 0.6868, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.9273927392739274, |
| "grad_norm": 0.539506196975708, |
| "learning_rate": 4.752693608627484e-06, |
| "loss": 0.6798, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.9282178217821783, |
| "grad_norm": 0.5377005338668823, |
| "learning_rate": 4.7522184979906225e-06, |
| "loss": 0.6777, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.929042904290429, |
| "grad_norm": 0.5573501586914062, |
| "learning_rate": 4.751742955208558e-06, |
| "loss": 0.6861, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.9298679867986799, |
| "grad_norm": 0.5515655875205994, |
| "learning_rate": 4.751266980372534e-06, |
| "loss": 0.6908, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.9306930693069307, |
| "grad_norm": 0.5649194717407227, |
| "learning_rate": 4.750790573573879e-06, |
| "loss": 0.67, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.9315181518151815, |
| "grad_norm": 0.5307170748710632, |
| "learning_rate": 4.750313734904003e-06, |
| "loss": 0.6783, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.9323432343234324, |
| "grad_norm": 0.5290861129760742, |
| "learning_rate": 4.7498364644544e-06, |
| "loss": 0.6725, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.9331683168316832, |
| "grad_norm": 0.5453407764434814, |
| "learning_rate": 4.749358762316646e-06, |
| "loss": 0.7064, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.933993399339934, |
| "grad_norm": 0.5274932980537415, |
| "learning_rate": 4.7488806285824e-06, |
| "loss": 0.6802, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.9348184818481848, |
| "grad_norm": 0.527488648891449, |
| "learning_rate": 4.7484020633434055e-06, |
| "loss": 0.6571, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.9356435643564357, |
| "grad_norm": 0.5450428128242493, |
| "learning_rate": 4.747923066691487e-06, |
| "loss": 0.6708, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.9364686468646864, |
| "grad_norm": 0.5410321354866028, |
| "learning_rate": 4.74744363871855e-06, |
| "loss": 0.6869, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.9372937293729373, |
| "grad_norm": 0.5409252643585205, |
| "learning_rate": 4.746963779516587e-06, |
| "loss": 0.6935, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.9381188118811881, |
| "grad_norm": 0.5387977361679077, |
| "learning_rate": 4.746483489177671e-06, |
| "loss": 0.6679, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.9389438943894389, |
| "grad_norm": 0.5344523787498474, |
| "learning_rate": 4.746002767793957e-06, |
| "loss": 0.6861, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.9397689768976898, |
| "grad_norm": 0.5451862812042236, |
| "learning_rate": 4.745521615457685e-06, |
| "loss": 0.6688, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.9405940594059405, |
| "grad_norm": 0.5420905947685242, |
| "learning_rate": 4.745040032261175e-06, |
| "loss": 0.7016, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.9414191419141914, |
| "grad_norm": 0.5483806729316711, |
| "learning_rate": 4.744558018296831e-06, |
| "loss": 0.7091, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.9422442244224423, |
| "grad_norm": 0.5476734638214111, |
| "learning_rate": 4.74407557365714e-06, |
| "loss": 0.6928, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.943069306930693, |
| "grad_norm": 0.539954662322998, |
| "learning_rate": 4.743592698434671e-06, |
| "loss": 0.6617, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.9438943894389439, |
| "grad_norm": 0.5607476234436035, |
| "learning_rate": 4.7431093927220775e-06, |
| "loss": 0.6649, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.9447194719471947, |
| "grad_norm": 0.5419979691505432, |
| "learning_rate": 4.742625656612091e-06, |
| "loss": 0.6745, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.9455445544554455, |
| "grad_norm": 0.5784286260604858, |
| "learning_rate": 4.74214149019753e-06, |
| "loss": 0.6946, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.9463696369636964, |
| "grad_norm": 0.5449656248092651, |
| "learning_rate": 4.741656893571295e-06, |
| "loss": 0.685, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.9471947194719472, |
| "grad_norm": 0.5477748513221741, |
| "learning_rate": 4.741171866826366e-06, |
| "loss": 0.6714, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.948019801980198, |
| "grad_norm": 0.5583825707435608, |
| "learning_rate": 4.74068641005581e-06, |
| "loss": 0.6839, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.9488448844884488, |
| "grad_norm": 0.5398768186569214, |
| "learning_rate": 4.7402005233527725e-06, |
| "loss": 0.6763, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.9496699669966997, |
| "grad_norm": 0.5726080536842346, |
| "learning_rate": 4.739714206810484e-06, |
| "loss": 0.6851, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.9504950495049505, |
| "grad_norm": 0.5650553703308105, |
| "learning_rate": 4.739227460522256e-06, |
| "loss": 0.6828, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.9513201320132013, |
| "grad_norm": 0.558864414691925, |
| "learning_rate": 4.738740284581484e-06, |
| "loss": 0.6811, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.9521452145214522, |
| "grad_norm": 0.5432051420211792, |
| "learning_rate": 4.738252679081644e-06, |
| "loss": 0.6819, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.9529702970297029, |
| "grad_norm": 0.5568391680717468, |
| "learning_rate": 4.7377646441162975e-06, |
| "loss": 0.7298, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.9537953795379538, |
| "grad_norm": 0.569330632686615, |
| "learning_rate": 4.7372761797790836e-06, |
| "loss": 0.6487, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.9546204620462047, |
| "grad_norm": 0.5513132810592651, |
| "learning_rate": 4.736787286163728e-06, |
| "loss": 0.6921, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.9554455445544554, |
| "grad_norm": 0.561714768409729, |
| "learning_rate": 4.736297963364038e-06, |
| "loss": 0.6669, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.9562706270627063, |
| "grad_norm": 0.5341464877128601, |
| "learning_rate": 4.735808211473901e-06, |
| "loss": 0.6884, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.9570957095709571, |
| "grad_norm": 0.5575276613235474, |
| "learning_rate": 4.73531803058729e-06, |
| "loss": 0.6735, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.9579207920792079, |
| "grad_norm": 0.5622995495796204, |
| "learning_rate": 4.734827420798257e-06, |
| "loss": 0.6815, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.9587458745874587, |
| "grad_norm": 0.5288589596748352, |
| "learning_rate": 4.734336382200939e-06, |
| "loss": 0.6947, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.9595709570957096, |
| "grad_norm": 0.5552820563316345, |
| "learning_rate": 4.733844914889554e-06, |
| "loss": 0.6736, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.9603960396039604, |
| "grad_norm": 0.5323270559310913, |
| "learning_rate": 4.7333530189584024e-06, |
| "loss": 0.6516, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.9612211221122112, |
| "grad_norm": 0.5653538703918457, |
| "learning_rate": 4.732860694501867e-06, |
| "loss": 0.6808, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.9620462046204621, |
| "grad_norm": 0.5389164090156555, |
| "learning_rate": 4.732367941614412e-06, |
| "loss": 0.683, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.9628712871287128, |
| "grad_norm": 0.5325968861579895, |
| "learning_rate": 4.731874760390586e-06, |
| "loss": 0.6952, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.9636963696369637, |
| "grad_norm": 0.5450141429901123, |
| "learning_rate": 4.7313811509250165e-06, |
| "loss": 0.6852, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.9645214521452146, |
| "grad_norm": 0.5620688796043396, |
| "learning_rate": 4.730887113312417e-06, |
| "loss": 0.6882, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.9653465346534653, |
| "grad_norm": 0.5411251783370972, |
| "learning_rate": 4.730392647647579e-06, |
| "loss": 0.6699, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.9661716171617162, |
| "grad_norm": 0.557712197303772, |
| "learning_rate": 4.72989775402538e-06, |
| "loss": 0.6922, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.966996699669967, |
| "grad_norm": 0.5456312298774719, |
| "learning_rate": 4.729402432540776e-06, |
| "loss": 0.6858, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.9678217821782178, |
| "grad_norm": 0.5637298822402954, |
| "learning_rate": 4.72890668328881e-06, |
| "loss": 0.7151, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.9686468646864687, |
| "grad_norm": 0.5458623170852661, |
| "learning_rate": 4.728410506364601e-06, |
| "loss": 0.6895, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.9694719471947195, |
| "grad_norm": 0.5525950193405151, |
| "learning_rate": 4.727913901863355e-06, |
| "loss": 0.6662, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.9702970297029703, |
| "grad_norm": 0.5622225999832153, |
| "learning_rate": 4.727416869880357e-06, |
| "loss": 0.6842, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.9711221122112211, |
| "grad_norm": 0.5422683358192444, |
| "learning_rate": 4.726919410510976e-06, |
| "loss": 0.668, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.971947194719472, |
| "grad_norm": 0.5314889550209045, |
| "learning_rate": 4.726421523850662e-06, |
| "loss": 0.7056, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.9727722772277227, |
| "grad_norm": 0.5366458892822266, |
| "learning_rate": 4.725923209994947e-06, |
| "loss": 0.6655, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.9735973597359736, |
| "grad_norm": 0.5452573895454407, |
| "learning_rate": 4.725424469039445e-06, |
| "loss": 0.6691, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.9744224422442245, |
| "grad_norm": 0.5398051142692566, |
| "learning_rate": 4.724925301079852e-06, |
| "loss": 0.6985, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.9752475247524752, |
| "grad_norm": 0.5489977598190308, |
| "learning_rate": 4.724425706211947e-06, |
| "loss": 0.7002, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.9760726072607261, |
| "grad_norm": 0.5429955720901489, |
| "learning_rate": 4.72392568453159e-06, |
| "loss": 0.6927, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.976897689768977, |
| "grad_norm": 0.5515938401222229, |
| "learning_rate": 4.7234252361347215e-06, |
| "loss": 0.678, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.9777227722772277, |
| "grad_norm": 0.5300772190093994, |
| "learning_rate": 4.722924361117365e-06, |
| "loss": 0.6836, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.9785478547854786, |
| "grad_norm": 0.5720294713973999, |
| "learning_rate": 4.722423059575627e-06, |
| "loss": 0.6688, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.9793729372937293, |
| "grad_norm": 0.5381259918212891, |
| "learning_rate": 4.7219213316056955e-06, |
| "loss": 0.7059, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.9801980198019802, |
| "grad_norm": 0.545156717300415, |
| "learning_rate": 4.721419177303839e-06, |
| "loss": 0.6747, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.981023102310231, |
| "grad_norm": 0.55919349193573, |
| "learning_rate": 4.720916596766409e-06, |
| "loss": 0.6804, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.9818481848184818, |
| "grad_norm": 0.5549430251121521, |
| "learning_rate": 4.7204135900898364e-06, |
| "loss": 0.6678, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.9826732673267327, |
| "grad_norm": 0.5881220698356628, |
| "learning_rate": 4.719910157370638e-06, |
| "loss": 0.66, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.9834983498349835, |
| "grad_norm": 0.5495649576187134, |
| "learning_rate": 4.71940629870541e-06, |
| "loss": 0.6778, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.9843234323432343, |
| "grad_norm": 0.5426628589630127, |
| "learning_rate": 4.7189020141908295e-06, |
| "loss": 0.6767, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.9851485148514851, |
| "grad_norm": 0.5726175904273987, |
| "learning_rate": 4.718397303923656e-06, |
| "loss": 0.7013, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.985973597359736, |
| "grad_norm": 0.5587770342826843, |
| "learning_rate": 4.7178921680007316e-06, |
| "loss": 0.701, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.9867986798679867, |
| "grad_norm": 0.5477480292320251, |
| "learning_rate": 4.71738660651898e-06, |
| "loss": 0.6908, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.9876237623762376, |
| "grad_norm": 0.5668710470199585, |
| "learning_rate": 4.7168806195754045e-06, |
| "loss": 0.6655, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.9884488448844885, |
| "grad_norm": 0.5600260496139526, |
| "learning_rate": 4.716374207267094e-06, |
| "loss": 0.6763, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.9892739273927392, |
| "grad_norm": 0.5428857803344727, |
| "learning_rate": 4.715867369691214e-06, |
| "loss": 0.6983, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.9900990099009901, |
| "grad_norm": 0.5704364776611328, |
| "learning_rate": 4.715360106945015e-06, |
| "loss": 0.6835, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.990924092409241, |
| "grad_norm": 0.5516597032546997, |
| "learning_rate": 4.714852419125828e-06, |
| "loss": 0.6907, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.9917491749174917, |
| "grad_norm": 0.5384538769721985, |
| "learning_rate": 4.7143443063310665e-06, |
| "loss": 0.6746, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.9925742574257426, |
| "grad_norm": 0.5800072550773621, |
| "learning_rate": 4.713835768658224e-06, |
| "loss": 0.6915, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.9933993399339934, |
| "grad_norm": 0.5570341348648071, |
| "learning_rate": 4.713326806204877e-06, |
| "loss": 0.7025, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.9942244224422442, |
| "grad_norm": 0.5560991168022156, |
| "learning_rate": 4.712817419068682e-06, |
| "loss": 0.6893, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.995049504950495, |
| "grad_norm": 0.5329129695892334, |
| "learning_rate": 4.712307607347379e-06, |
| "loss": 0.704, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.9958745874587459, |
| "grad_norm": 0.5526347160339355, |
| "learning_rate": 4.7117973711387874e-06, |
| "loss": 0.7007, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.9966996699669967, |
| "grad_norm": 0.5711978077888489, |
| "learning_rate": 4.71128671054081e-06, |
| "loss": 0.6582, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.9975247524752475, |
| "grad_norm": 0.5508738160133362, |
| "learning_rate": 4.710775625651429e-06, |
| "loss": 0.6575, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.9983498349834984, |
| "grad_norm": 0.538335919380188, |
| "learning_rate": 4.710264116568709e-06, |
| "loss": 0.6864, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.9991749174917491, |
| "grad_norm": 0.5674409866333008, |
| "learning_rate": 4.709752183390796e-06, |
| "loss": 0.6696, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5489563941955566, |
| "learning_rate": 4.709239826215918e-06, |
| "loss": 0.6512, |
| "step": 1212 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 7272, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 1212, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.715053320858239e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|