| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 255, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011764705882352941, |
| "grad_norm": 3.6131326491907996, |
| "learning_rate": 9.090909090909091e-07, |
| "loss": 0.7929452061653137, |
| "num_input_tokens_seen": 0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.023529411764705882, |
| "grad_norm": 3.9380904690801763, |
| "learning_rate": 1.8181818181818183e-06, |
| "loss": 0.8463644981384277, |
| "num_input_tokens_seen": 0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.03529411764705882, |
| "grad_norm": 3.5075707953425375, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 0.7615697383880615, |
| "num_input_tokens_seen": 0, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.047058823529411764, |
| "grad_norm": 3.568991966465913, |
| "learning_rate": 3.6363636363636366e-06, |
| "loss": 0.7781215906143188, |
| "num_input_tokens_seen": 0, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 3.6216549878210613, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 0.8030836582183838, |
| "num_input_tokens_seen": 0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.07058823529411765, |
| "grad_norm": 3.0788368926498566, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 0.7026812434196472, |
| "num_input_tokens_seen": 0, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.08235294117647059, |
| "grad_norm": 3.1470952265114955, |
| "learning_rate": 6.363636363636364e-06, |
| "loss": 0.7064052224159241, |
| "num_input_tokens_seen": 0, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.09411764705882353, |
| "grad_norm": 2.1537924186137105, |
| "learning_rate": 7.272727272727273e-06, |
| "loss": 0.5964475274085999, |
| "num_input_tokens_seen": 0, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.10588235294117647, |
| "grad_norm": 2.1368649029511326, |
| "learning_rate": 8.181818181818183e-06, |
| "loss": 0.6179602146148682, |
| "num_input_tokens_seen": 0, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 1.981711432431228, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 0.5555359125137329, |
| "num_input_tokens_seen": 0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12941176470588237, |
| "grad_norm": 1.559928530743653, |
| "learning_rate": 1e-05, |
| "loss": 0.5162748694419861, |
| "num_input_tokens_seen": 0, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1411764705882353, |
| "grad_norm": 1.4191567801273215, |
| "learning_rate": 1.0909090909090909e-05, |
| "loss": 0.4931896924972534, |
| "num_input_tokens_seen": 0, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.15294117647058825, |
| "grad_norm": 1.2550232555964334, |
| "learning_rate": 1.181818181818182e-05, |
| "loss": 0.5178971886634827, |
| "num_input_tokens_seen": 0, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.16470588235294117, |
| "grad_norm": 1.5353572118512469, |
| "learning_rate": 1.2727272727272728e-05, |
| "loss": 0.4872450530529022, |
| "num_input_tokens_seen": 0, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 1.3077407975869617, |
| "learning_rate": 1.3636363636363637e-05, |
| "loss": 0.4353964030742645, |
| "num_input_tokens_seen": 0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.18823529411764706, |
| "grad_norm": 1.2379322286899677, |
| "learning_rate": 1.4545454545454546e-05, |
| "loss": 0.4468710124492645, |
| "num_input_tokens_seen": 0, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.9822538115083431, |
| "learning_rate": 1.5454545454545454e-05, |
| "loss": 0.4612148106098175, |
| "num_input_tokens_seen": 0, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.21176470588235294, |
| "grad_norm": 1.019398009956526, |
| "learning_rate": 1.6363636363636366e-05, |
| "loss": 0.4416177570819855, |
| "num_input_tokens_seen": 0, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.2235294117647059, |
| "grad_norm": 0.6811851291872145, |
| "learning_rate": 1.7272727272727274e-05, |
| "loss": 0.4400823712348938, |
| "num_input_tokens_seen": 0, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 0.5706281411245361, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 0.407284140586853, |
| "num_input_tokens_seen": 0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.24705882352941178, |
| "grad_norm": 0.5144726641840578, |
| "learning_rate": 1.9090909090909094e-05, |
| "loss": 0.40131646394729614, |
| "num_input_tokens_seen": 0, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.25882352941176473, |
| "grad_norm": 0.5271151443718087, |
| "learning_rate": 2e-05, |
| "loss": 0.3690889775753021, |
| "num_input_tokens_seen": 0, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.27058823529411763, |
| "grad_norm": 0.46867880268180473, |
| "learning_rate": 1.999969615124717e-05, |
| "loss": 0.38862237334251404, |
| "num_input_tokens_seen": 0, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.2823529411764706, |
| "grad_norm": 0.44956811034445643, |
| "learning_rate": 1.9998784623453477e-05, |
| "loss": 0.38012465834617615, |
| "num_input_tokens_seen": 0, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 0.3973152538462978, |
| "learning_rate": 1.9997265472012247e-05, |
| "loss": 0.3671787679195404, |
| "num_input_tokens_seen": 0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.3058823529411765, |
| "grad_norm": 0.44092842775695007, |
| "learning_rate": 1.999513878924193e-05, |
| "loss": 0.40712660551071167, |
| "num_input_tokens_seen": 0, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.3176470588235294, |
| "grad_norm": 0.48371948306233614, |
| "learning_rate": 1.9992404704380513e-05, |
| "loss": 0.37348443269729614, |
| "num_input_tokens_seen": 0, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.32941176470588235, |
| "grad_norm": 0.3835238779084052, |
| "learning_rate": 1.9989063383577644e-05, |
| "loss": 0.36719316244125366, |
| "num_input_tokens_seen": 0, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.3411764705882353, |
| "grad_norm": 0.4114683262445794, |
| "learning_rate": 1.9985115029884556e-05, |
| "loss": 0.3744957447052002, |
| "num_input_tokens_seen": 0, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 0.3926833772366512, |
| "learning_rate": 1.9980559883241723e-05, |
| "loss": 0.36837196350097656, |
| "num_input_tokens_seen": 0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.36470588235294116, |
| "grad_norm": 0.34510721456392296, |
| "learning_rate": 1.9975398220464268e-05, |
| "loss": 0.35771483182907104, |
| "num_input_tokens_seen": 0, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.3764705882352941, |
| "grad_norm": 0.2906065104717026, |
| "learning_rate": 1.996963035522515e-05, |
| "loss": 0.3718343675136566, |
| "num_input_tokens_seen": 0, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.38823529411764707, |
| "grad_norm": 0.28680989428590703, |
| "learning_rate": 1.99632566380361e-05, |
| "loss": 0.35502949357032776, |
| "num_input_tokens_seen": 0, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.36479584489715183, |
| "learning_rate": 1.995627745622632e-05, |
| "loss": 0.3561074733734131, |
| "num_input_tokens_seen": 0, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 0.3233890688969203, |
| "learning_rate": 1.994869323391895e-05, |
| "loss": 0.36625605821609497, |
| "num_input_tokens_seen": 0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.4235294117647059, |
| "grad_norm": 0.3014081868844546, |
| "learning_rate": 1.9940504432005293e-05, |
| "loss": 0.32261648774147034, |
| "num_input_tokens_seen": 0, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.43529411764705883, |
| "grad_norm": 0.29988977041893033, |
| "learning_rate": 1.993171154811679e-05, |
| "loss": 0.36725232005119324, |
| "num_input_tokens_seen": 0, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.4470588235294118, |
| "grad_norm": 0.2843556547007244, |
| "learning_rate": 1.992231511659481e-05, |
| "loss": 0.3372398316860199, |
| "num_input_tokens_seen": 0, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.4588235294117647, |
| "grad_norm": 0.25146289166175934, |
| "learning_rate": 1.9912315708458144e-05, |
| "loss": 0.35911282896995544, |
| "num_input_tokens_seen": 0, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.25109365889274177, |
| "learning_rate": 1.9901713931368333e-05, |
| "loss": 0.35379254817962646, |
| "num_input_tokens_seen": 0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4823529411764706, |
| "grad_norm": 0.25470949144259586, |
| "learning_rate": 1.989051042959273e-05, |
| "loss": 0.34498846530914307, |
| "num_input_tokens_seen": 0, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.49411764705882355, |
| "grad_norm": 0.2603263992351511, |
| "learning_rate": 1.9878705883965342e-05, |
| "loss": 0.346971720457077, |
| "num_input_tokens_seen": 0, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.5058823529411764, |
| "grad_norm": 0.2837708452421679, |
| "learning_rate": 1.986630101184546e-05, |
| "loss": 0.3518391251564026, |
| "num_input_tokens_seen": 0, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.5176470588235295, |
| "grad_norm": 0.2690921713644025, |
| "learning_rate": 1.9853296567074075e-05, |
| "loss": 0.3417142331600189, |
| "num_input_tokens_seen": 0, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 0.27838893675816295, |
| "learning_rate": 1.983969333992804e-05, |
| "loss": 0.33975788950920105, |
| "num_input_tokens_seen": 0, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.5411764705882353, |
| "grad_norm": 0.2500924041093808, |
| "learning_rate": 1.982549215707209e-05, |
| "loss": 0.3427805006504059, |
| "num_input_tokens_seen": 0, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.5529411764705883, |
| "grad_norm": 0.2515785105886048, |
| "learning_rate": 1.9810693881508548e-05, |
| "loss": 0.34949395060539246, |
| "num_input_tokens_seen": 0, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.5647058823529412, |
| "grad_norm": 0.29781103489548, |
| "learning_rate": 1.9795299412524948e-05, |
| "loss": 0.34314972162246704, |
| "num_input_tokens_seen": 0, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.5764705882352941, |
| "grad_norm": 0.2938887318454496, |
| "learning_rate": 1.9779309685639317e-05, |
| "loss": 0.3414318263530731, |
| "num_input_tokens_seen": 0, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.2895540362722232, |
| "learning_rate": 1.9762725672543372e-05, |
| "loss": 0.3192686140537262, |
| "num_input_tokens_seen": 0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.23066275708054598, |
| "learning_rate": 1.9745548381043454e-05, |
| "loss": 0.3385634422302246, |
| "num_input_tokens_seen": 0, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.611764705882353, |
| "grad_norm": 0.280246896213228, |
| "learning_rate": 1.9727778854999283e-05, |
| "loss": 0.35149312019348145, |
| "num_input_tokens_seen": 0, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.6235294117647059, |
| "grad_norm": 0.27445702417204193, |
| "learning_rate": 1.9709418174260523e-05, |
| "loss": 0.3358836770057678, |
| "num_input_tokens_seen": 0, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.6352941176470588, |
| "grad_norm": 0.27123307051788814, |
| "learning_rate": 1.969046745460116e-05, |
| "loss": 0.35038888454437256, |
| "num_input_tokens_seen": 0, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 0.3022409414703534, |
| "learning_rate": 1.9670927847651707e-05, |
| "loss": 0.3619537651538849, |
| "num_input_tokens_seen": 0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.6588235294117647, |
| "grad_norm": 0.28043980426354626, |
| "learning_rate": 1.9650800540829204e-05, |
| "loss": 0.334235817193985, |
| "num_input_tokens_seen": 0, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.6705882352941176, |
| "grad_norm": 0.2608407829948446, |
| "learning_rate": 1.963008675726506e-05, |
| "loss": 0.3367481827735901, |
| "num_input_tokens_seen": 0, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.6823529411764706, |
| "grad_norm": 0.28536414145460753, |
| "learning_rate": 1.9608787755730746e-05, |
| "loss": 0.3296854496002197, |
| "num_input_tokens_seen": 0, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.6941176470588235, |
| "grad_norm": 0.27980621624734936, |
| "learning_rate": 1.958690483056126e-05, |
| "loss": 0.32561179995536804, |
| "num_input_tokens_seen": 0, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 0.2424680758848498, |
| "learning_rate": 1.9564439311576515e-05, |
| "loss": 0.33346784114837646, |
| "num_input_tokens_seen": 0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.7176470588235294, |
| "grad_norm": 0.24779814083785615, |
| "learning_rate": 1.954139256400049e-05, |
| "loss": 0.34621721506118774, |
| "num_input_tokens_seen": 0, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.7294117647058823, |
| "grad_norm": 0.28660266256207545, |
| "learning_rate": 1.951776598837829e-05, |
| "loss": 0.31782716512680054, |
| "num_input_tokens_seen": 0, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.7411764705882353, |
| "grad_norm": 0.2628734068408129, |
| "learning_rate": 1.9493561020491024e-05, |
| "loss": 0.3253316283226013, |
| "num_input_tokens_seen": 0, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.7529411764705882, |
| "grad_norm": 0.253172171843236, |
| "learning_rate": 1.9468779131268553e-05, |
| "loss": 0.32543760538101196, |
| "num_input_tokens_seen": 0, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 0.28753705660744233, |
| "learning_rate": 1.9443421826700096e-05, |
| "loss": 0.32660526037216187, |
| "num_input_tokens_seen": 0, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.7764705882352941, |
| "grad_norm": 0.2550674488664895, |
| "learning_rate": 1.9417490647742738e-05, |
| "loss": 0.29762235283851624, |
| "num_input_tokens_seen": 0, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.788235294117647, |
| "grad_norm": 0.2387936654216341, |
| "learning_rate": 1.9390987170227746e-05, |
| "loss": 0.34908509254455566, |
| "num_input_tokens_seen": 0, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.28736279751275334, |
| "learning_rate": 1.9363913004764847e-05, |
| "loss": 0.3289881944656372, |
| "num_input_tokens_seen": 0, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.8117647058823529, |
| "grad_norm": 0.2857068101908039, |
| "learning_rate": 1.9336269796644314e-05, |
| "loss": 0.316879540681839, |
| "num_input_tokens_seen": 0, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 0.262248761213064, |
| "learning_rate": 1.9308059225737015e-05, |
| "loss": 0.3085065484046936, |
| "num_input_tokens_seen": 0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.8352941176470589, |
| "grad_norm": 0.27771726352284015, |
| "learning_rate": 1.9279283006392304e-05, |
| "loss": 0.3186359405517578, |
| "num_input_tokens_seen": 0, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.8470588235294118, |
| "grad_norm": 0.31802345881089383, |
| "learning_rate": 1.924994288733386e-05, |
| "loss": 0.329565167427063, |
| "num_input_tokens_seen": 0, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.8588235294117647, |
| "grad_norm": 0.28385852442224846, |
| "learning_rate": 1.9220040651553388e-05, |
| "loss": 0.3364284634590149, |
| "num_input_tokens_seen": 0, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.8705882352941177, |
| "grad_norm": 0.2533928699463178, |
| "learning_rate": 1.918957811620231e-05, |
| "loss": 0.3229159712791443, |
| "num_input_tokens_seen": 0, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 0.27551639833029534, |
| "learning_rate": 1.915855713248129e-05, |
| "loss": 0.317361056804657, |
| "num_input_tokens_seen": 0, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.8941176470588236, |
| "grad_norm": 0.26990146657663827, |
| "learning_rate": 1.912697958552778e-05, |
| "loss": 0.31780922412872314, |
| "num_input_tokens_seen": 0, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.9058823529411765, |
| "grad_norm": 0.3150863510764379, |
| "learning_rate": 1.9094847394301427e-05, |
| "loss": 0.33098268508911133, |
| "num_input_tokens_seen": 0, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.9176470588235294, |
| "grad_norm": 0.3234901727951674, |
| "learning_rate": 1.906216251146748e-05, |
| "loss": 0.341233491897583, |
| "num_input_tokens_seen": 0, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.9294117647058824, |
| "grad_norm": 0.26331326581875253, |
| "learning_rate": 1.902892692327811e-05, |
| "loss": 0.33283838629722595, |
| "num_input_tokens_seen": 0, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.31786869502135223, |
| "learning_rate": 1.899514264945173e-05, |
| "loss": 0.3331839442253113, |
| "num_input_tokens_seen": 0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.9529411764705882, |
| "grad_norm": 0.29539690375673217, |
| "learning_rate": 1.8960811743050227e-05, |
| "loss": 0.33531326055526733, |
| "num_input_tokens_seen": 0, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.9647058823529412, |
| "grad_norm": 0.30472122382886785, |
| "learning_rate": 1.8925936290354224e-05, |
| "loss": 0.3103257417678833, |
| "num_input_tokens_seen": 0, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.9764705882352941, |
| "grad_norm": 0.2530167563030317, |
| "learning_rate": 1.8890518410736275e-05, |
| "loss": 0.32245466113090515, |
| "num_input_tokens_seen": 0, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.9882352941176471, |
| "grad_norm": 0.28897856083778817, |
| "learning_rate": 1.8854560256532098e-05, |
| "loss": 0.3198079764842987, |
| "num_input_tokens_seen": 0, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.26450715598842334, |
| "learning_rate": 1.8818064012909755e-05, |
| "loss": 0.3213130235671997, |
| "num_input_tokens_seen": 0, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.011764705882353, |
| "grad_norm": 0.2626207427201876, |
| "learning_rate": 1.878103189773686e-05, |
| "loss": 0.2763475179672241, |
| "num_input_tokens_seen": 0, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.0235294117647058, |
| "grad_norm": 0.2629994874410627, |
| "learning_rate": 1.8743466161445823e-05, |
| "loss": 0.2665697932243347, |
| "num_input_tokens_seen": 0, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.035294117647059, |
| "grad_norm": 0.29251013301945034, |
| "learning_rate": 1.8705369086897063e-05, |
| "loss": 0.2806475758552551, |
| "num_input_tokens_seen": 0, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.0470588235294118, |
| "grad_norm": 0.2852660256104482, |
| "learning_rate": 1.86667429892403e-05, |
| "loss": 0.2648066282272339, |
| "num_input_tokens_seen": 0, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.0588235294117647, |
| "grad_norm": 0.2762797160323279, |
| "learning_rate": 1.862759021577385e-05, |
| "loss": 0.2734478712081909, |
| "num_input_tokens_seen": 0, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.0705882352941176, |
| "grad_norm": 0.3020506174745607, |
| "learning_rate": 1.8587913145801998e-05, |
| "loss": 0.2635505795478821, |
| "num_input_tokens_seen": 0, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.0823529411764705, |
| "grad_norm": 0.28351991002826543, |
| "learning_rate": 1.8547714190490385e-05, |
| "loss": 0.2799134850502014, |
| "num_input_tokens_seen": 0, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.0941176470588236, |
| "grad_norm": 0.2740110743262188, |
| "learning_rate": 1.8506995792719498e-05, |
| "loss": 0.2726055979728699, |
| "num_input_tokens_seen": 0, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.1058823529411765, |
| "grad_norm": 0.30269062031162386, |
| "learning_rate": 1.8465760426936212e-05, |
| "loss": 0.2837594747543335, |
| "num_input_tokens_seen": 0, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.1176470588235294, |
| "grad_norm": 0.30315593582121325, |
| "learning_rate": 1.8424010599003424e-05, |
| "loss": 0.2731676995754242, |
| "num_input_tokens_seen": 0, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.1294117647058823, |
| "grad_norm": 0.26149596901353317, |
| "learning_rate": 1.838174884604776e-05, |
| "loss": 0.2705945372581482, |
| "num_input_tokens_seen": 0, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.1411764705882352, |
| "grad_norm": 0.3026007790348899, |
| "learning_rate": 1.8338977736305408e-05, |
| "loss": 0.2789444923400879, |
| "num_input_tokens_seen": 0, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.1529411764705881, |
| "grad_norm": 0.3349505482436329, |
| "learning_rate": 1.8295699868966038e-05, |
| "loss": 0.2682260572910309, |
| "num_input_tokens_seen": 0, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.1647058823529413, |
| "grad_norm": 0.2891949628165266, |
| "learning_rate": 1.8251917874014854e-05, |
| "loss": 0.28042054176330566, |
| "num_input_tokens_seen": 0, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 0.2941393562882544, |
| "learning_rate": 1.8207634412072765e-05, |
| "loss": 0.25862598419189453, |
| "num_input_tokens_seen": 0, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.188235294117647, |
| "grad_norm": 0.32722595542360156, |
| "learning_rate": 1.8162852174234712e-05, |
| "loss": 0.2712678909301758, |
| "num_input_tokens_seen": 0, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.2755950618714099, |
| "learning_rate": 1.8117573881906114e-05, |
| "loss": 0.26205819845199585, |
| "num_input_tokens_seen": 0, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.2117647058823529, |
| "grad_norm": 0.2571435526644292, |
| "learning_rate": 1.8071802286637505e-05, |
| "loss": 0.2622745633125305, |
| "num_input_tokens_seen": 0, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.223529411764706, |
| "grad_norm": 0.2591172743832164, |
| "learning_rate": 1.8025540169957315e-05, |
| "loss": 0.25631460547447205, |
| "num_input_tokens_seen": 0, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.2352941176470589, |
| "grad_norm": 0.2817321177900711, |
| "learning_rate": 1.7978790343202826e-05, |
| "loss": 0.2782523036003113, |
| "num_input_tokens_seen": 0, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.2470588235294118, |
| "grad_norm": 0.33225523735776513, |
| "learning_rate": 1.7931555647349358e-05, |
| "loss": 0.2600249946117401, |
| "num_input_tokens_seen": 0, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.2588235294117647, |
| "grad_norm": 0.2786742348476795, |
| "learning_rate": 1.7883838952837595e-05, |
| "loss": 0.25568312406539917, |
| "num_input_tokens_seen": 0, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.2705882352941176, |
| "grad_norm": 0.2771371487960206, |
| "learning_rate": 1.7835643159399156e-05, |
| "loss": 0.2384142279624939, |
| "num_input_tokens_seen": 0, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.2823529411764705, |
| "grad_norm": 0.31328815588599274, |
| "learning_rate": 1.778697119588039e-05, |
| "loss": 0.2667343318462372, |
| "num_input_tokens_seen": 0, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.2941176470588236, |
| "grad_norm": 0.2690014796691674, |
| "learning_rate": 1.7737826020064377e-05, |
| "loss": 0.2558494210243225, |
| "num_input_tokens_seen": 0, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.3058823529411765, |
| "grad_norm": 0.3038451633685586, |
| "learning_rate": 1.76882106184912e-05, |
| "loss": 0.25802576541900635, |
| "num_input_tokens_seen": 0, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.3176470588235294, |
| "grad_norm": 0.26349039262552754, |
| "learning_rate": 1.7638128006276422e-05, |
| "loss": 0.26081448793411255, |
| "num_input_tokens_seen": 0, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.3294117647058823, |
| "grad_norm": 0.27581161125402026, |
| "learning_rate": 1.758758122692791e-05, |
| "loss": 0.27647483348846436, |
| "num_input_tokens_seen": 0, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.3411764705882354, |
| "grad_norm": 0.3235486769428178, |
| "learning_rate": 1.753657335216083e-05, |
| "loss": 0.2677750587463379, |
| "num_input_tokens_seen": 0, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.3529411764705883, |
| "grad_norm": 0.2809145367414571, |
| "learning_rate": 1.7485107481711014e-05, |
| "loss": 0.2682688236236572, |
| "num_input_tokens_seen": 0, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.3647058823529412, |
| "grad_norm": 0.2619951939456424, |
| "learning_rate": 1.743318674314656e-05, |
| "loss": 0.25316929817199707, |
| "num_input_tokens_seen": 0, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.3764705882352941, |
| "grad_norm": 0.27411080913366315, |
| "learning_rate": 1.7380814291677818e-05, |
| "loss": 0.2697577476501465, |
| "num_input_tokens_seen": 0, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.388235294117647, |
| "grad_norm": 0.3338822677438316, |
| "learning_rate": 1.7327993309965583e-05, |
| "loss": 0.2708876132965088, |
| "num_input_tokens_seen": 0, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.31962282276030907, |
| "learning_rate": 1.7274727007927747e-05, |
| "loss": 0.27048563957214355, |
| "num_input_tokens_seen": 0, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.4117647058823528, |
| "grad_norm": 0.285342846378909, |
| "learning_rate": 1.7221018622544197e-05, |
| "loss": 0.2710177004337311, |
| "num_input_tokens_seen": 0, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.423529411764706, |
| "grad_norm": 0.308814170391406, |
| "learning_rate": 1.7166871417660116e-05, |
| "loss": 0.2526181936264038, |
| "num_input_tokens_seen": 0, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.4352941176470588, |
| "grad_norm": 0.27775597890631276, |
| "learning_rate": 1.7112288683787637e-05, |
| "loss": 0.26763850450515747, |
| "num_input_tokens_seen": 0, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.4470588235294117, |
| "grad_norm": 0.2958185178060128, |
| "learning_rate": 1.7057273737905887e-05, |
| "loss": 0.268245667219162, |
| "num_input_tokens_seen": 0, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.4588235294117646, |
| "grad_norm": 0.2483775556217329, |
| "learning_rate": 1.70018299232594e-05, |
| "loss": 0.25788575410842896, |
| "num_input_tokens_seen": 0, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 0.2811097779442606, |
| "learning_rate": 1.6945960609154966e-05, |
| "loss": 0.26732224225997925, |
| "num_input_tokens_seen": 0, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.4823529411764707, |
| "grad_norm": 0.2934299916938348, |
| "learning_rate": 1.688966919075687e-05, |
| "loss": 0.26281166076660156, |
| "num_input_tokens_seen": 0, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.4941176470588236, |
| "grad_norm": 0.2368134963295287, |
| "learning_rate": 1.6832959088880557e-05, |
| "loss": 0.25862863659858704, |
| "num_input_tokens_seen": 0, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.5058823529411764, |
| "grad_norm": 0.2708304514650526, |
| "learning_rate": 1.677583374978478e-05, |
| "loss": 0.2421874701976776, |
| "num_input_tokens_seen": 0, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.5176470588235293, |
| "grad_norm": 0.2767120423486198, |
| "learning_rate": 1.6718296644962146e-05, |
| "loss": 0.2624642550945282, |
| "num_input_tokens_seen": 0, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.5294117647058822, |
| "grad_norm": 0.2813174470652987, |
| "learning_rate": 1.6660351270928164e-05, |
| "loss": 0.24937519431114197, |
| "num_input_tokens_seen": 0, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.5411764705882351, |
| "grad_norm": 0.3009488397968105, |
| "learning_rate": 1.660200114900876e-05, |
| "loss": 0.2704227566719055, |
| "num_input_tokens_seen": 0, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.5529411764705883, |
| "grad_norm": 0.3141059797795813, |
| "learning_rate": 1.6543249825126285e-05, |
| "loss": 0.26932939887046814, |
| "num_input_tokens_seen": 0, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.5647058823529412, |
| "grad_norm": 0.25053717473426707, |
| "learning_rate": 1.6484100869584044e-05, |
| "loss": 0.2592698633670807, |
| "num_input_tokens_seen": 0, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.576470588235294, |
| "grad_norm": 0.25700597213890997, |
| "learning_rate": 1.6424557876849308e-05, |
| "loss": 0.27053964138031006, |
| "num_input_tokens_seen": 0, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.5882352941176472, |
| "grad_norm": 0.30182930329649144, |
| "learning_rate": 1.636462446533489e-05, |
| "loss": 0.25989019870758057, |
| "num_input_tokens_seen": 0, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.26390881674937633, |
| "learning_rate": 1.6304304277179267e-05, |
| "loss": 0.2570236027240753, |
| "num_input_tokens_seen": 0, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.611764705882353, |
| "grad_norm": 0.2652947312714827, |
| "learning_rate": 1.6243600978025215e-05, |
| "loss": 0.2678568363189697, |
| "num_input_tokens_seen": 0, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.6235294117647059, |
| "grad_norm": 0.2575940385752971, |
| "learning_rate": 1.6182518256797095e-05, |
| "loss": 0.2600210905075073, |
| "num_input_tokens_seen": 0, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.6352941176470588, |
| "grad_norm": 0.2610590842320019, |
| "learning_rate": 1.612105982547663e-05, |
| "loss": 0.26671087741851807, |
| "num_input_tokens_seen": 0, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.6470588235294117, |
| "grad_norm": 0.25464302295329627, |
| "learning_rate": 1.605922941887737e-05, |
| "loss": 0.2668280005455017, |
| "num_input_tokens_seen": 0, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.6588235294117646, |
| "grad_norm": 0.26069231826980477, |
| "learning_rate": 1.599703079441769e-05, |
| "loss": 0.2653328478336334, |
| "num_input_tokens_seen": 0, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.6705882352941175, |
| "grad_norm": 0.27072482250492486, |
| "learning_rate": 1.5934467731892497e-05, |
| "loss": 0.2632245719432831, |
| "num_input_tokens_seen": 0, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.6823529411764706, |
| "grad_norm": 0.24138888757547514, |
| "learning_rate": 1.5871544033243488e-05, |
| "loss": 0.26093634963035583, |
| "num_input_tokens_seen": 0, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.6941176470588235, |
| "grad_norm": 0.25857892670146815, |
| "learning_rate": 1.5808263522328137e-05, |
| "loss": 0.2518957853317261, |
| "num_input_tokens_seen": 0, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 0.25322801625227936, |
| "learning_rate": 1.5744630044687307e-05, |
| "loss": 0.25198179483413696, |
| "num_input_tokens_seen": 0, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.7176470588235295, |
| "grad_norm": 0.23398219863607192, |
| "learning_rate": 1.568064746731156e-05, |
| "loss": 0.25039592385292053, |
| "num_input_tokens_seen": 0, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.7294117647058824, |
| "grad_norm": 0.22752785226042835, |
| "learning_rate": 1.561631967840617e-05, |
| "loss": 0.25004899501800537, |
| "num_input_tokens_seen": 0, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.7411764705882353, |
| "grad_norm": 0.26867363858385673, |
| "learning_rate": 1.5551650587154815e-05, |
| "loss": 0.2628065347671509, |
| "num_input_tokens_seen": 0, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.7529411764705882, |
| "grad_norm": 0.2572214700469002, |
| "learning_rate": 1.5486644123482047e-05, |
| "loss": 0.2694377899169922, |
| "num_input_tokens_seen": 0, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 0.2649069012394484, |
| "learning_rate": 1.542130423781444e-05, |
| "loss": 0.2698570787906647, |
| "num_input_tokens_seen": 0, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.776470588235294, |
| "grad_norm": 0.3129557276746984, |
| "learning_rate": 1.5355634900840558e-05, |
| "loss": 0.2620123624801636, |
| "num_input_tokens_seen": 0, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.788235294117647, |
| "grad_norm": 0.219414643912218, |
| "learning_rate": 1.5289640103269626e-05, |
| "loss": 0.24250832200050354, |
| "num_input_tokens_seen": 0, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.2787522458312503, |
| "learning_rate": 1.5223323855589027e-05, |
| "loss": 0.2599625885486603, |
| "num_input_tokens_seen": 0, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.811764705882353, |
| "grad_norm": 0.24624844789559322, |
| "learning_rate": 1.5156690187820596e-05, |
| "loss": 0.2539859712123871, |
| "num_input_tokens_seen": 0, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.8235294117647058, |
| "grad_norm": 0.28786535612403885, |
| "learning_rate": 1.50897431492757e-05, |
| "loss": 0.251323938369751, |
| "num_input_tokens_seen": 0, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.835294117647059, |
| "grad_norm": 0.2861446800798861, |
| "learning_rate": 1.5022486808309171e-05, |
| "loss": 0.2852325439453125, |
| "num_input_tokens_seen": 0, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.8470588235294119, |
| "grad_norm": 0.30835997118524755, |
| "learning_rate": 1.4954925252072077e-05, |
| "loss": 0.2626144289970398, |
| "num_input_tokens_seen": 0, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.8588235294117648, |
| "grad_norm": 0.27166093756727683, |
| "learning_rate": 1.4887062586263334e-05, |
| "loss": 0.26250118017196655, |
| "num_input_tokens_seen": 0, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.8705882352941177, |
| "grad_norm": 0.30818985316404857, |
| "learning_rate": 1.4818902934880222e-05, |
| "loss": 0.27699387073516846, |
| "num_input_tokens_seen": 0, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.8823529411764706, |
| "grad_norm": 0.30205479197808555, |
| "learning_rate": 1.4750450439967751e-05, |
| "loss": 0.272649347782135, |
| "num_input_tokens_seen": 0, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.8941176470588235, |
| "grad_norm": 0.29949042144033816, |
| "learning_rate": 1.4681709261366963e-05, |
| "loss": 0.2485789656639099, |
| "num_input_tokens_seen": 0, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.9058823529411764, |
| "grad_norm": 0.267903631477539, |
| "learning_rate": 1.4612683576462135e-05, |
| "loss": 0.2616223096847534, |
| "num_input_tokens_seen": 0, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.9176470588235293, |
| "grad_norm": 0.27260315220708237, |
| "learning_rate": 1.4543377579926915e-05, |
| "loss": 0.27286335825920105, |
| "num_input_tokens_seen": 0, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.9294117647058824, |
| "grad_norm": 0.28592302424298965, |
| "learning_rate": 1.4473795483469442e-05, |
| "loss": 0.24860531091690063, |
| "num_input_tokens_seen": 0, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.9411764705882353, |
| "grad_norm": 0.27067444548694936, |
| "learning_rate": 1.4403941515576344e-05, |
| "loss": 0.2611614167690277, |
| "num_input_tokens_seen": 0, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.9529411764705882, |
| "grad_norm": 0.26432408877050523, |
| "learning_rate": 1.4333819921255836e-05, |
| "loss": 0.26266223192214966, |
| "num_input_tokens_seen": 0, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.9647058823529413, |
| "grad_norm": 0.32069387585361836, |
| "learning_rate": 1.4263434961779709e-05, |
| "loss": 0.24890068173408508, |
| "num_input_tokens_seen": 0, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.9764705882352942, |
| "grad_norm": 0.28968277975368684, |
| "learning_rate": 1.41927909144244e-05, |
| "loss": 0.2612011432647705, |
| "num_input_tokens_seen": 0, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.988235294117647, |
| "grad_norm": 0.2593706365289158, |
| "learning_rate": 1.412189207221104e-05, |
| "loss": 0.24890106916427612, |
| "num_input_tokens_seen": 0, |
| "step": 169 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.25908450639554936, |
| "learning_rate": 1.4050742743644588e-05, |
| "loss": 0.25550538301467896, |
| "num_input_tokens_seen": 0, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.011764705882353, |
| "grad_norm": 0.32606044201267254, |
| "learning_rate": 1.3979347252451994e-05, |
| "loss": 0.20405685901641846, |
| "num_input_tokens_seen": 0, |
| "step": 171 |
| }, |
| { |
| "epoch": 2.023529411764706, |
| "grad_norm": 0.31532835367496725, |
| "learning_rate": 1.3907709937319451e-05, |
| "loss": 0.2080579251050949, |
| "num_input_tokens_seen": 0, |
| "step": 172 |
| }, |
| { |
| "epoch": 2.0352941176470587, |
| "grad_norm": 0.23106550000023307, |
| "learning_rate": 1.3835835151628728e-05, |
| "loss": 0.1862945556640625, |
| "num_input_tokens_seen": 0, |
| "step": 173 |
| }, |
| { |
| "epoch": 2.0470588235294116, |
| "grad_norm": 0.2399759682184491, |
| "learning_rate": 1.3763727263192626e-05, |
| "loss": 0.18684154748916626, |
| "num_input_tokens_seen": 0, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.0588235294117645, |
| "grad_norm": 0.24298409208730917, |
| "learning_rate": 1.3691390653989536e-05, |
| "loss": 0.19205346703529358, |
| "num_input_tokens_seen": 0, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.070588235294118, |
| "grad_norm": 0.2606890104298591, |
| "learning_rate": 1.3618829719897158e-05, |
| "loss": 0.19722914695739746, |
| "num_input_tokens_seen": 0, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.0823529411764707, |
| "grad_norm": 0.2634006155067239, |
| "learning_rate": 1.3546048870425356e-05, |
| "loss": 0.18658706545829773, |
| "num_input_tokens_seen": 0, |
| "step": 177 |
| }, |
| { |
| "epoch": 2.0941176470588236, |
| "grad_norm": 0.2474551016529151, |
| "learning_rate": 1.3473052528448203e-05, |
| "loss": 0.18761307001113892, |
| "num_input_tokens_seen": 0, |
| "step": 178 |
| }, |
| { |
| "epoch": 2.1058823529411765, |
| "grad_norm": 0.2773501459528279, |
| "learning_rate": 1.3399845129935191e-05, |
| "loss": 0.2006130963563919, |
| "num_input_tokens_seen": 0, |
| "step": 179 |
| }, |
| { |
| "epoch": 2.1176470588235294, |
| "grad_norm": 0.24768518968840073, |
| "learning_rate": 1.3326431123681667e-05, |
| "loss": 0.1869545876979828, |
| "num_input_tokens_seen": 0, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.1294117647058823, |
| "grad_norm": 0.24087563849344726, |
| "learning_rate": 1.3252814971038477e-05, |
| "loss": 0.19419728219509125, |
| "num_input_tokens_seen": 0, |
| "step": 181 |
| }, |
| { |
| "epoch": 2.1411764705882352, |
| "grad_norm": 0.24859116981429222, |
| "learning_rate": 1.3179001145640856e-05, |
| "loss": 0.1937357634305954, |
| "num_input_tokens_seen": 0, |
| "step": 182 |
| }, |
| { |
| "epoch": 2.152941176470588, |
| "grad_norm": 0.2513377458414818, |
| "learning_rate": 1.3104994133136563e-05, |
| "loss": 0.18806332349777222, |
| "num_input_tokens_seen": 0, |
| "step": 183 |
| }, |
| { |
| "epoch": 2.164705882352941, |
| "grad_norm": 0.24195612774749747, |
| "learning_rate": 1.3030798430913289e-05, |
| "loss": 0.19312450289726257, |
| "num_input_tokens_seen": 0, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.176470588235294, |
| "grad_norm": 0.2598954308224352, |
| "learning_rate": 1.295641854782535e-05, |
| "loss": 0.19178995490074158, |
| "num_input_tokens_seen": 0, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.1882352941176473, |
| "grad_norm": 0.2738424910649441, |
| "learning_rate": 1.2881859003919688e-05, |
| "loss": 0.19293949007987976, |
| "num_input_tokens_seen": 0, |
| "step": 186 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.24146821641260552, |
| "learning_rate": 1.2807124330161188e-05, |
| "loss": 0.18528440594673157, |
| "num_input_tokens_seen": 0, |
| "step": 187 |
| }, |
| { |
| "epoch": 2.211764705882353, |
| "grad_norm": 0.257111381442425, |
| "learning_rate": 1.2732219068157335e-05, |
| "loss": 0.18848256766796112, |
| "num_input_tokens_seen": 0, |
| "step": 188 |
| }, |
| { |
| "epoch": 2.223529411764706, |
| "grad_norm": 0.2526409622347608, |
| "learning_rate": 1.2657147769882215e-05, |
| "loss": 0.18127834796905518, |
| "num_input_tokens_seen": 0, |
| "step": 189 |
| }, |
| { |
| "epoch": 2.235294117647059, |
| "grad_norm": 0.23701529976763616, |
| "learning_rate": 1.2581914997399899e-05, |
| "loss": 0.18892061710357666, |
| "num_input_tokens_seen": 0, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.2470588235294118, |
| "grad_norm": 0.24297086363023263, |
| "learning_rate": 1.2506525322587207e-05, |
| "loss": 0.19873817265033722, |
| "num_input_tokens_seen": 0, |
| "step": 191 |
| }, |
| { |
| "epoch": 2.2588235294117647, |
| "grad_norm": 0.2537032696104157, |
| "learning_rate": 1.2430983326855873e-05, |
| "loss": 0.1893860250711441, |
| "num_input_tokens_seen": 0, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.2705882352941176, |
| "grad_norm": 0.23876942589975814, |
| "learning_rate": 1.2355293600874132e-05, |
| "loss": 0.18759432435035706, |
| "num_input_tokens_seen": 0, |
| "step": 193 |
| }, |
| { |
| "epoch": 2.2823529411764705, |
| "grad_norm": 0.2435388542806445, |
| "learning_rate": 1.2279460744287755e-05, |
| "loss": 0.18849223852157593, |
| "num_input_tokens_seen": 0, |
| "step": 194 |
| }, |
| { |
| "epoch": 2.2941176470588234, |
| "grad_norm": 0.2647343889775541, |
| "learning_rate": 1.220348936544052e-05, |
| "loss": 0.18661049008369446, |
| "num_input_tokens_seen": 0, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.3058823529411763, |
| "grad_norm": 0.25540155279573523, |
| "learning_rate": 1.2127384081094167e-05, |
| "loss": 0.18517085909843445, |
| "num_input_tokens_seen": 0, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.317647058823529, |
| "grad_norm": 0.24552318557540526, |
| "learning_rate": 1.205114951614785e-05, |
| "loss": 0.17878204584121704, |
| "num_input_tokens_seen": 0, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.3294117647058825, |
| "grad_norm": 0.2258935926658077, |
| "learning_rate": 1.197479030335706e-05, |
| "loss": 0.18578067421913147, |
| "num_input_tokens_seen": 0, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.3411764705882354, |
| "grad_norm": 0.22583777859137, |
| "learning_rate": 1.1898311083052113e-05, |
| "loss": 0.19397635757923126, |
| "num_input_tokens_seen": 0, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.3529411764705883, |
| "grad_norm": 0.23201542489820412, |
| "learning_rate": 1.1821716502856154e-05, |
| "loss": 0.18146567046642303, |
| "num_input_tokens_seen": 0, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.364705882352941, |
| "grad_norm": 0.28552700838642453, |
| "learning_rate": 1.1745011217402709e-05, |
| "loss": 0.19469541311264038, |
| "num_input_tokens_seen": 0, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.376470588235294, |
| "grad_norm": 0.24910488131854605, |
| "learning_rate": 1.1668199888052844e-05, |
| "loss": 0.18924464285373688, |
| "num_input_tokens_seen": 0, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.388235294117647, |
| "grad_norm": 0.24952841695443162, |
| "learning_rate": 1.159128718261189e-05, |
| "loss": 0.18815085291862488, |
| "num_input_tokens_seen": 0, |
| "step": 203 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.22629712220582293, |
| "learning_rate": 1.1514277775045768e-05, |
| "loss": 0.18509158492088318, |
| "num_input_tokens_seen": 0, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.411764705882353, |
| "grad_norm": 0.2258797767600323, |
| "learning_rate": 1.1437176345196967e-05, |
| "loss": 0.17601992189884186, |
| "num_input_tokens_seen": 0, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.4235294117647057, |
| "grad_norm": 0.2994549469629298, |
| "learning_rate": 1.135998757850015e-05, |
| "loss": 0.19033361971378326, |
| "num_input_tokens_seen": 0, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.435294117647059, |
| "grad_norm": 0.28669793445051134, |
| "learning_rate": 1.128271616569741e-05, |
| "loss": 0.19659247994422913, |
| "num_input_tokens_seen": 0, |
| "step": 207 |
| }, |
| { |
| "epoch": 2.447058823529412, |
| "grad_norm": 0.24321969874326846, |
| "learning_rate": 1.1205366802553231e-05, |
| "loss": 0.189006507396698, |
| "num_input_tokens_seen": 0, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.458823529411765, |
| "grad_norm": 0.23277687621799142, |
| "learning_rate": 1.1127944189569122e-05, |
| "loss": 0.18315881490707397, |
| "num_input_tokens_seen": 0, |
| "step": 209 |
| }, |
| { |
| "epoch": 2.4705882352941178, |
| "grad_norm": 0.24644185758060683, |
| "learning_rate": 1.1050453031697958e-05, |
| "loss": 0.18082918226718903, |
| "num_input_tokens_seen": 0, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.4823529411764707, |
| "grad_norm": 0.27537652887423003, |
| "learning_rate": 1.0972898038058077e-05, |
| "loss": 0.18804597854614258, |
| "num_input_tokens_seen": 0, |
| "step": 211 |
| }, |
| { |
| "epoch": 2.4941176470588236, |
| "grad_norm": 0.22999355280888956, |
| "learning_rate": 1.0895283921647098e-05, |
| "loss": 0.18512041866779327, |
| "num_input_tokens_seen": 0, |
| "step": 212 |
| }, |
| { |
| "epoch": 2.5058823529411764, |
| "grad_norm": 0.24328460263907906, |
| "learning_rate": 1.0817615399055513e-05, |
| "loss": 0.18306857347488403, |
| "num_input_tokens_seen": 0, |
| "step": 213 |
| }, |
| { |
| "epoch": 2.5176470588235293, |
| "grad_norm": 0.24353741537161722, |
| "learning_rate": 1.0739897190180066e-05, |
| "loss": 0.18730933964252472, |
| "num_input_tokens_seen": 0, |
| "step": 214 |
| }, |
| { |
| "epoch": 2.5294117647058822, |
| "grad_norm": 0.24168464720218039, |
| "learning_rate": 1.0662134017936924e-05, |
| "loss": 0.1890895515680313, |
| "num_input_tokens_seen": 0, |
| "step": 215 |
| }, |
| { |
| "epoch": 2.541176470588235, |
| "grad_norm": 0.2464118373551017, |
| "learning_rate": 1.0584330607974673e-05, |
| "loss": 0.1896791309118271, |
| "num_input_tokens_seen": 0, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.552941176470588, |
| "grad_norm": 0.2272154213017855, |
| "learning_rate": 1.0506491688387128e-05, |
| "loss": 0.187567800283432, |
| "num_input_tokens_seen": 0, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.564705882352941, |
| "grad_norm": 0.23687499350154168, |
| "learning_rate": 1.0428621989426016e-05, |
| "loss": 0.19160117208957672, |
| "num_input_tokens_seen": 0, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.576470588235294, |
| "grad_norm": 0.23229299934050784, |
| "learning_rate": 1.0350726243213519e-05, |
| "loss": 0.18402451276779175, |
| "num_input_tokens_seen": 0, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.588235294117647, |
| "grad_norm": 0.2515873476821987, |
| "learning_rate": 1.0272809183454701e-05, |
| "loss": 0.18722085654735565, |
| "num_input_tokens_seen": 0, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.22755973648814593, |
| "learning_rate": 1.0194875545149854e-05, |
| "loss": 0.18111610412597656, |
| "num_input_tokens_seen": 0, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.611764705882353, |
| "grad_norm": 0.23007823552128587, |
| "learning_rate": 1.0116930064306736e-05, |
| "loss": 0.19812649488449097, |
| "num_input_tokens_seen": 0, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.623529411764706, |
| "grad_norm": 0.22528651243150996, |
| "learning_rate": 1.0038977477652779e-05, |
| "loss": 0.18580538034439087, |
| "num_input_tokens_seen": 0, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.635294117647059, |
| "grad_norm": 0.23539863556511334, |
| "learning_rate": 9.961022522347226e-06, |
| "loss": 0.18501965701580048, |
| "num_input_tokens_seen": 0, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.6470588235294117, |
| "grad_norm": 0.22782974012346754, |
| "learning_rate": 9.883069935693267e-06, |
| "loss": 0.18402716517448425, |
| "num_input_tokens_seen": 0, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.6588235294117646, |
| "grad_norm": 0.24487953714591462, |
| "learning_rate": 9.80512445485015e-06, |
| "loss": 0.18938913941383362, |
| "num_input_tokens_seen": 0, |
| "step": 226 |
| }, |
| { |
| "epoch": 2.6705882352941175, |
| "grad_norm": 0.24462792166495934, |
| "learning_rate": 9.7271908165453e-06, |
| "loss": 0.19719335436820984, |
| "num_input_tokens_seen": 0, |
| "step": 227 |
| }, |
| { |
| "epoch": 2.682352941176471, |
| "grad_norm": 0.2366728459616901, |
| "learning_rate": 9.649273756786486e-06, |
| "loss": 0.185680091381073, |
| "num_input_tokens_seen": 0, |
| "step": 228 |
| }, |
| { |
| "epoch": 2.6941176470588237, |
| "grad_norm": 0.2303882056729561, |
| "learning_rate": 9.57137801057399e-06, |
| "loss": 0.19624218344688416, |
| "num_input_tokens_seen": 0, |
| "step": 229 |
| }, |
| { |
| "epoch": 2.7058823529411766, |
| "grad_norm": 0.22987803077687444, |
| "learning_rate": 9.493508311612874e-06, |
| "loss": 0.17861570417881012, |
| "num_input_tokens_seen": 0, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.7176470588235295, |
| "grad_norm": 0.26388168681073687, |
| "learning_rate": 9.415669392025329e-06, |
| "loss": 0.18734458088874817, |
| "num_input_tokens_seen": 0, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.7294117647058824, |
| "grad_norm": 0.22186631357859773, |
| "learning_rate": 9.337865982063076e-06, |
| "loss": 0.1946583092212677, |
| "num_input_tokens_seen": 0, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.7411764705882353, |
| "grad_norm": 0.2311568846601055, |
| "learning_rate": 9.260102809819939e-06, |
| "loss": 0.18761436641216278, |
| "num_input_tokens_seen": 0, |
| "step": 233 |
| }, |
| { |
| "epoch": 2.7529411764705882, |
| "grad_norm": 0.22628859572679205, |
| "learning_rate": 9.182384600944494e-06, |
| "loss": 0.18877655267715454, |
| "num_input_tokens_seen": 0, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.764705882352941, |
| "grad_norm": 0.24528368812451035, |
| "learning_rate": 9.104716078352906e-06, |
| "loss": 0.18831658363342285, |
| "num_input_tokens_seen": 0, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.776470588235294, |
| "grad_norm": 0.22998847186224078, |
| "learning_rate": 9.027101961941925e-06, |
| "loss": 0.18712544441223145, |
| "num_input_tokens_seen": 0, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.788235294117647, |
| "grad_norm": 0.22929072663885758, |
| "learning_rate": 8.949546968302042e-06, |
| "loss": 0.20112478733062744, |
| "num_input_tokens_seen": 0, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.23007743920004314, |
| "learning_rate": 8.872055810430881e-06, |
| "loss": 0.18601751327514648, |
| "num_input_tokens_seen": 0, |
| "step": 238 |
| }, |
| { |
| "epoch": 2.8117647058823527, |
| "grad_norm": 0.23659776591024959, |
| "learning_rate": 8.79463319744677e-06, |
| "loss": 0.1808547079563141, |
| "num_input_tokens_seen": 0, |
| "step": 239 |
| }, |
| { |
| "epoch": 2.8235294117647056, |
| "grad_norm": 0.23509031587485976, |
| "learning_rate": 8.717283834302593e-06, |
| "loss": 0.18669113516807556, |
| "num_input_tokens_seen": 0, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.835294117647059, |
| "grad_norm": 0.23472960086401704, |
| "learning_rate": 8.640012421499856e-06, |
| "loss": 0.19292673468589783, |
| "num_input_tokens_seen": 0, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.847058823529412, |
| "grad_norm": 0.25429765516678327, |
| "learning_rate": 8.562823654803035e-06, |
| "loss": 0.18981140851974487, |
| "num_input_tokens_seen": 0, |
| "step": 242 |
| }, |
| { |
| "epoch": 2.8588235294117648, |
| "grad_norm": 0.23658961623110553, |
| "learning_rate": 8.485722224954237e-06, |
| "loss": 0.1999085545539856, |
| "num_input_tokens_seen": 0, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.8705882352941177, |
| "grad_norm": 0.23199472766369356, |
| "learning_rate": 8.408712817388113e-06, |
| "loss": 0.1827118992805481, |
| "num_input_tokens_seen": 0, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.8823529411764706, |
| "grad_norm": 0.25077191290374024, |
| "learning_rate": 8.331800111947158e-06, |
| "loss": 0.1857125163078308, |
| "num_input_tokens_seen": 0, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.8941176470588235, |
| "grad_norm": 0.23140729045512615, |
| "learning_rate": 8.254988782597295e-06, |
| "loss": 0.18820548057556152, |
| "num_input_tokens_seen": 0, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.9058823529411764, |
| "grad_norm": 0.22239003021686357, |
| "learning_rate": 8.178283497143851e-06, |
| "loss": 0.19076308608055115, |
| "num_input_tokens_seen": 0, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.9176470588235293, |
| "grad_norm": 0.21803105840925516, |
| "learning_rate": 8.10168891694789e-06, |
| "loss": 0.18549099564552307, |
| "num_input_tokens_seen": 0, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.9294117647058826, |
| "grad_norm": 0.23828217230216947, |
| "learning_rate": 8.025209696642942e-06, |
| "loss": 0.1965373158454895, |
| "num_input_tokens_seen": 0, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 0.2390427784115555, |
| "learning_rate": 7.948850483852153e-06, |
| "loss": 0.18414372205734253, |
| "num_input_tokens_seen": 0, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.9529411764705884, |
| "grad_norm": 0.24685443360941575, |
| "learning_rate": 7.872615918905833e-06, |
| "loss": 0.19256475567817688, |
| "num_input_tokens_seen": 0, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.9647058823529413, |
| "grad_norm": 0.23889907605167213, |
| "learning_rate": 7.796510634559487e-06, |
| "loss": 0.19201350212097168, |
| "num_input_tokens_seen": 0, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.976470588235294, |
| "grad_norm": 0.24349828471939475, |
| "learning_rate": 7.720539255712252e-06, |
| "loss": 0.18964079022407532, |
| "num_input_tokens_seen": 0, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.988235294117647, |
| "grad_norm": 0.25208339650704936, |
| "learning_rate": 7.644706399125871e-06, |
| "loss": 0.19716620445251465, |
| "num_input_tokens_seen": 0, |
| "step": 254 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.22219530818543262, |
| "learning_rate": 7.569016673144132e-06, |
| "loss": 0.19001775979995728, |
| "num_input_tokens_seen": 0, |
| "step": 255 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 425, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 267247958228992.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|