{ "best_metric": 1.3536509342731968, "best_model_checkpoint": "train/20241110-Compress:128x-Lr:5e-5-Llama3-8B-instruct-GPT2-Large-RAG-no-ft_token-onlySquad-everymem/checkpoint-2000", "epoch": 2.9482218536944904, "eval_steps": 250, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014741109268472453, "grad_norm": 60.07453256489496, "learning_rate": 5.000000000000001e-07, "loss": 5.3128, "step": 1 }, { "epoch": 0.0029482218536944905, "grad_norm": 8.733331116343598, "learning_rate": 1.0000000000000002e-06, "loss": 5.4938, "step": 2 }, { "epoch": 0.004422332780541736, "grad_norm": 8.08199632212124, "learning_rate": 1.5e-06, "loss": 5.3039, "step": 3 }, { "epoch": 0.005896443707388981, "grad_norm": 11.216266835432938, "learning_rate": 2.0000000000000003e-06, "loss": 5.6106, "step": 4 }, { "epoch": 0.0073705546342362266, "grad_norm": 7.955290074361608, "learning_rate": 2.5e-06, "loss": 5.5726, "step": 5 }, { "epoch": 0.008844665561083471, "grad_norm": 9.106922375657431, "learning_rate": 3e-06, "loss": 5.5885, "step": 6 }, { "epoch": 0.010318776487930717, "grad_norm": 8.042001140800268, "learning_rate": 3.5000000000000004e-06, "loss": 5.6774, "step": 7 }, { "epoch": 0.011792887414777962, "grad_norm": 10.604193207133617, "learning_rate": 4.000000000000001e-06, "loss": 5.4744, "step": 8 }, { "epoch": 0.013266998341625208, "grad_norm": 8.53183752698761, "learning_rate": 4.5e-06, "loss": 5.4754, "step": 9 }, { "epoch": 0.014741109268472453, "grad_norm": 7.677238500744552, "learning_rate": 5e-06, "loss": 5.3791, "step": 10 }, { "epoch": 0.016215220195319697, "grad_norm": 8.643226273365615, "learning_rate": 5.500000000000001e-06, "loss": 5.3982, "step": 11 }, { "epoch": 0.017689331122166942, "grad_norm": 9.731075603316468, "learning_rate": 6e-06, "loss": 5.4731, "step": 12 }, { "epoch": 0.019163442049014188, "grad_norm": 9.248172476377807, "learning_rate": 6.5000000000000004e-06, "loss": 5.1765, "step": 13 }, { "epoch": 0.020637552975861433, "grad_norm": 9.322102147045022, "learning_rate": 7.000000000000001e-06, "loss": 4.9715, "step": 14 }, { "epoch": 0.02211166390270868, "grad_norm": 10.020342925213049, "learning_rate": 7.5e-06, "loss": 4.6458, "step": 15 }, { "epoch": 0.023585774829555924, "grad_norm": 8.733915968623275, "learning_rate": 8.000000000000001e-06, "loss": 4.6077, "step": 16 }, { "epoch": 0.02505988575640317, "grad_norm": 8.52083905382283, "learning_rate": 8.500000000000002e-06, "loss": 4.2914, "step": 17 }, { "epoch": 0.026533996683250415, "grad_norm": 7.458635320534619, "learning_rate": 9e-06, "loss": 3.6652, "step": 18 }, { "epoch": 0.02800810761009766, "grad_norm": 9.672252444296076, "learning_rate": 9.5e-06, "loss": 3.711, "step": 19 }, { "epoch": 0.029482218536944906, "grad_norm": 7.66675653266759, "learning_rate": 1e-05, "loss": 3.5079, "step": 20 }, { "epoch": 0.03095632946379215, "grad_norm": 10.44581574259858, "learning_rate": 1.05e-05, "loss": 3.3664, "step": 21 }, { "epoch": 0.032430440390639394, "grad_norm": 6.918333228152697, "learning_rate": 1.1000000000000001e-05, "loss": 3.2715, "step": 22 }, { "epoch": 0.03390455131748664, "grad_norm": 8.039144591569439, "learning_rate": 1.1500000000000002e-05, "loss": 3.2561, "step": 23 }, { "epoch": 0.035378662244333885, "grad_norm": 11.24719033653777, "learning_rate": 1.2e-05, "loss": 3.0512, "step": 24 }, { "epoch": 0.03685277317118113, "grad_norm": 5.266655433417465, "learning_rate": 1.25e-05, "loss": 2.6325, "step": 25 }, { "epoch": 0.038326884098028376, "grad_norm": 6.049276500979496, "learning_rate": 1.3000000000000001e-05, "loss": 2.5456, "step": 26 }, { "epoch": 0.03980099502487562, "grad_norm": 5.106762325961276, "learning_rate": 1.3500000000000001e-05, "loss": 2.4749, "step": 27 }, { "epoch": 0.04127510595172287, "grad_norm": 4.638951060677336, "learning_rate": 1.4000000000000001e-05, "loss": 2.391, "step": 28 }, { "epoch": 0.04274921687857011, "grad_norm": 6.824087944152775, "learning_rate": 1.45e-05, "loss": 2.2701, "step": 29 }, { "epoch": 0.04422332780541736, "grad_norm": 4.199459022922485, "learning_rate": 1.5e-05, "loss": 2.0462, "step": 30 }, { "epoch": 0.0456974387322646, "grad_norm": 4.396155515799207, "learning_rate": 1.55e-05, "loss": 2.0697, "step": 31 }, { "epoch": 0.04717154965911185, "grad_norm": 4.299862242865135, "learning_rate": 1.6000000000000003e-05, "loss": 2.1916, "step": 32 }, { "epoch": 0.048645660585959094, "grad_norm": 3.7825349248167552, "learning_rate": 1.65e-05, "loss": 1.9529, "step": 33 }, { "epoch": 0.05011977151280634, "grad_norm": 3.7040902340487194, "learning_rate": 1.7000000000000003e-05, "loss": 2.0785, "step": 34 }, { "epoch": 0.051593882439653585, "grad_norm": 3.460638097897077, "learning_rate": 1.75e-05, "loss": 2.1621, "step": 35 }, { "epoch": 0.05306799336650083, "grad_norm": 2.8348314214300725, "learning_rate": 1.8e-05, "loss": 1.9602, "step": 36 }, { "epoch": 0.054542104293348076, "grad_norm": 3.025563677769368, "learning_rate": 1.85e-05, "loss": 1.8633, "step": 37 }, { "epoch": 0.05601621522019532, "grad_norm": 2.9487561399906106, "learning_rate": 1.9e-05, "loss": 1.8529, "step": 38 }, { "epoch": 0.05749032614704257, "grad_norm": 2.7692086660081747, "learning_rate": 1.9500000000000003e-05, "loss": 1.8249, "step": 39 }, { "epoch": 0.05896443707388981, "grad_norm": 2.4644171440822245, "learning_rate": 2e-05, "loss": 1.6347, "step": 40 }, { "epoch": 0.06043854800073706, "grad_norm": 2.820971467445497, "learning_rate": 2.05e-05, "loss": 1.7595, "step": 41 }, { "epoch": 0.0619126589275843, "grad_norm": 2.5783210448662888, "learning_rate": 2.1e-05, "loss": 1.6692, "step": 42 }, { "epoch": 0.06338676985443155, "grad_norm": 2.671578374091079, "learning_rate": 2.15e-05, "loss": 1.72, "step": 43 }, { "epoch": 0.06486088078127879, "grad_norm": 2.7470052191341385, "learning_rate": 2.2000000000000003e-05, "loss": 1.663, "step": 44 }, { "epoch": 0.06633499170812604, "grad_norm": 2.6093861471155053, "learning_rate": 2.25e-05, "loss": 1.6486, "step": 45 }, { "epoch": 0.06780910263497328, "grad_norm": 2.445351109794031, "learning_rate": 2.3000000000000003e-05, "loss": 1.6445, "step": 46 }, { "epoch": 0.06928321356182053, "grad_norm": 2.4889447871054267, "learning_rate": 2.35e-05, "loss": 1.6753, "step": 47 }, { "epoch": 0.07075732448866777, "grad_norm": 2.5561578001822136, "learning_rate": 2.4e-05, "loss": 1.5483, "step": 48 }, { "epoch": 0.07223143541551502, "grad_norm": 2.9474363081685837, "learning_rate": 2.45e-05, "loss": 1.7294, "step": 49 }, { "epoch": 0.07370554634236226, "grad_norm": 3.469094611568158, "learning_rate": 2.5e-05, "loss": 1.7549, "step": 50 }, { "epoch": 0.07517965726920951, "grad_norm": 2.3765105412990017, "learning_rate": 2.5500000000000003e-05, "loss": 1.6607, "step": 51 }, { "epoch": 0.07665376819605675, "grad_norm": 2.2024043642859237, "learning_rate": 2.6000000000000002e-05, "loss": 1.6378, "step": 52 }, { "epoch": 0.078127879122904, "grad_norm": 2.2045094353933994, "learning_rate": 2.6500000000000004e-05, "loss": 1.5672, "step": 53 }, { "epoch": 0.07960199004975124, "grad_norm": 2.3485353572556207, "learning_rate": 2.7000000000000002e-05, "loss": 1.6867, "step": 54 }, { "epoch": 0.0810761009765985, "grad_norm": 2.0958025536321485, "learning_rate": 2.7500000000000004e-05, "loss": 1.5966, "step": 55 }, { "epoch": 0.08255021190344573, "grad_norm": 2.2140841787888146, "learning_rate": 2.8000000000000003e-05, "loss": 1.6477, "step": 56 }, { "epoch": 0.08402432283029299, "grad_norm": 2.190250738064768, "learning_rate": 2.8499999999999998e-05, "loss": 1.7013, "step": 57 }, { "epoch": 0.08549843375714022, "grad_norm": 2.2469060606527256, "learning_rate": 2.9e-05, "loss": 1.5045, "step": 58 }, { "epoch": 0.08697254468398748, "grad_norm": 2.3748468048479636, "learning_rate": 2.95e-05, "loss": 1.5645, "step": 59 }, { "epoch": 0.08844665561083472, "grad_norm": 2.1135562851727565, "learning_rate": 3e-05, "loss": 1.414, "step": 60 }, { "epoch": 0.08992076653768195, "grad_norm": 2.4495184561382, "learning_rate": 3.05e-05, "loss": 1.6775, "step": 61 }, { "epoch": 0.0913948774645292, "grad_norm": 2.2952538822341926, "learning_rate": 3.1e-05, "loss": 1.6133, "step": 62 }, { "epoch": 0.09286898839137644, "grad_norm": 2.3339189975153145, "learning_rate": 3.15e-05, "loss": 1.4888, "step": 63 }, { "epoch": 0.0943430993182237, "grad_norm": 2.4837805078684165, "learning_rate": 3.2000000000000005e-05, "loss": 1.3822, "step": 64 }, { "epoch": 0.09581721024507094, "grad_norm": 2.125424735313146, "learning_rate": 3.2500000000000004e-05, "loss": 1.5128, "step": 65 }, { "epoch": 0.09729132117191819, "grad_norm": 2.1939160667228603, "learning_rate": 3.3e-05, "loss": 1.6228, "step": 66 }, { "epoch": 0.09876543209876543, "grad_norm": 2.2599410744808663, "learning_rate": 3.35e-05, "loss": 1.4886, "step": 67 }, { "epoch": 0.10023954302561268, "grad_norm": 2.3504282094528537, "learning_rate": 3.4000000000000007e-05, "loss": 1.4913, "step": 68 }, { "epoch": 0.10171365395245992, "grad_norm": 2.220646974093592, "learning_rate": 3.45e-05, "loss": 1.4719, "step": 69 }, { "epoch": 0.10318776487930717, "grad_norm": 2.219596808652634, "learning_rate": 3.5e-05, "loss": 1.6242, "step": 70 }, { "epoch": 0.10466187580615441, "grad_norm": 2.521804990115732, "learning_rate": 3.55e-05, "loss": 1.574, "step": 71 }, { "epoch": 0.10613598673300166, "grad_norm": 2.418983850337695, "learning_rate": 3.6e-05, "loss": 1.5851, "step": 72 }, { "epoch": 0.1076100976598489, "grad_norm": 2.3829440721551327, "learning_rate": 3.65e-05, "loss": 1.5656, "step": 73 }, { "epoch": 0.10908420858669615, "grad_norm": 2.083327471406955, "learning_rate": 3.7e-05, "loss": 1.4853, "step": 74 }, { "epoch": 0.11055831951354339, "grad_norm": 2.3810321097421787, "learning_rate": 3.7500000000000003e-05, "loss": 1.5504, "step": 75 }, { "epoch": 0.11203243044039064, "grad_norm": 2.1333391706068925, "learning_rate": 3.8e-05, "loss": 1.4577, "step": 76 }, { "epoch": 0.11350654136723788, "grad_norm": 2.3322361228318202, "learning_rate": 3.85e-05, "loss": 1.4335, "step": 77 }, { "epoch": 0.11498065229408513, "grad_norm": 2.317312645442672, "learning_rate": 3.9000000000000006e-05, "loss": 1.3808, "step": 78 }, { "epoch": 0.11645476322093237, "grad_norm": 2.285064330249301, "learning_rate": 3.9500000000000005e-05, "loss": 1.624, "step": 79 }, { "epoch": 0.11792887414777962, "grad_norm": 2.172801746268828, "learning_rate": 4e-05, "loss": 1.5205, "step": 80 }, { "epoch": 0.11940298507462686, "grad_norm": 2.4825932093214043, "learning_rate": 4.05e-05, "loss": 1.6809, "step": 81 }, { "epoch": 0.12087709600147412, "grad_norm": 2.1861811340548196, "learning_rate": 4.1e-05, "loss": 1.5905, "step": 82 }, { "epoch": 0.12235120692832135, "grad_norm": 2.19343502902696, "learning_rate": 4.15e-05, "loss": 1.6213, "step": 83 }, { "epoch": 0.1238253178551686, "grad_norm": 2.1723747501769344, "learning_rate": 4.2e-05, "loss": 1.4623, "step": 84 }, { "epoch": 0.12529942878201586, "grad_norm": 2.4383566891465773, "learning_rate": 4.25e-05, "loss": 1.4901, "step": 85 }, { "epoch": 0.1267735397088631, "grad_norm": 2.3552544592809173, "learning_rate": 4.3e-05, "loss": 1.4424, "step": 86 }, { "epoch": 0.12824765063571034, "grad_norm": 2.313199736432175, "learning_rate": 4.35e-05, "loss": 1.4244, "step": 87 }, { "epoch": 0.12972176156255757, "grad_norm": 2.232278974470001, "learning_rate": 4.4000000000000006e-05, "loss": 1.3362, "step": 88 }, { "epoch": 0.13119587248940484, "grad_norm": 2.2980216142121, "learning_rate": 4.4500000000000004e-05, "loss": 1.446, "step": 89 }, { "epoch": 0.13266998341625208, "grad_norm": 2.370243730848777, "learning_rate": 4.5e-05, "loss": 1.3717, "step": 90 }, { "epoch": 0.13414409434309932, "grad_norm": 2.492164922629904, "learning_rate": 4.55e-05, "loss": 1.1968, "step": 91 }, { "epoch": 0.13561820526994656, "grad_norm": 2.2965884905808354, "learning_rate": 4.600000000000001e-05, "loss": 1.5956, "step": 92 }, { "epoch": 0.1370923161967938, "grad_norm": 2.1611898477632527, "learning_rate": 4.6500000000000005e-05, "loss": 1.6309, "step": 93 }, { "epoch": 0.13856642712364106, "grad_norm": 2.4363494505139305, "learning_rate": 4.7e-05, "loss": 1.3905, "step": 94 }, { "epoch": 0.1400405380504883, "grad_norm": 2.4511373449074316, "learning_rate": 4.75e-05, "loss": 1.403, "step": 95 }, { "epoch": 0.14151464897733554, "grad_norm": 2.4144022187138723, "learning_rate": 4.8e-05, "loss": 1.4598, "step": 96 }, { "epoch": 0.14298875990418278, "grad_norm": 2.3264944011490507, "learning_rate": 4.85e-05, "loss": 1.3477, "step": 97 }, { "epoch": 0.14446287083103004, "grad_norm": 2.2386647906080652, "learning_rate": 4.9e-05, "loss": 1.4037, "step": 98 }, { "epoch": 0.14593698175787728, "grad_norm": 2.2837059700766367, "learning_rate": 4.9500000000000004e-05, "loss": 1.5816, "step": 99 }, { "epoch": 0.14741109268472452, "grad_norm": 2.0542020623160435, "learning_rate": 5e-05, "loss": 1.4499, "step": 100 }, { "epoch": 0.14888520361157176, "grad_norm": 2.2869258973159696, "learning_rate": 4.999999216450553e-05, "loss": 1.4949, "step": 101 }, { "epoch": 0.15035931453841903, "grad_norm": 2.4569463427114426, "learning_rate": 4.9999968658027006e-05, "loss": 1.4299, "step": 102 }, { "epoch": 0.15183342546526626, "grad_norm": 2.2649475137048203, "learning_rate": 4.999992948057919e-05, "loss": 1.3784, "step": 103 }, { "epoch": 0.1533075363921135, "grad_norm": 2.3440270832306833, "learning_rate": 4.999987463218663e-05, "loss": 1.4544, "step": 104 }, { "epoch": 0.15478164731896074, "grad_norm": 2.3017407215569876, "learning_rate": 4.9999804112883694e-05, "loss": 1.4348, "step": 105 }, { "epoch": 0.156255758245808, "grad_norm": 2.2729379426244196, "learning_rate": 4.99997179227146e-05, "loss": 1.4182, "step": 106 }, { "epoch": 0.15772986917265525, "grad_norm": 2.3402770696752766, "learning_rate": 4.999961606173337e-05, "loss": 1.4748, "step": 107 }, { "epoch": 0.15920398009950248, "grad_norm": 2.0200906417830087, "learning_rate": 4.9999498530003866e-05, "loss": 1.3034, "step": 108 }, { "epoch": 0.16067809102634972, "grad_norm": 2.2056140231569823, "learning_rate": 4.999936532759974e-05, "loss": 1.5566, "step": 109 }, { "epoch": 0.162152201953197, "grad_norm": 1.9644309609452897, "learning_rate": 4.9999216454604505e-05, "loss": 1.3413, "step": 110 }, { "epoch": 0.16362631288004423, "grad_norm": 2.194795377921606, "learning_rate": 4.9999051911111484e-05, "loss": 1.3687, "step": 111 }, { "epoch": 0.16510042380689147, "grad_norm": 2.243023334451526, "learning_rate": 4.99988716972238e-05, "loss": 1.3149, "step": 112 }, { "epoch": 0.1665745347337387, "grad_norm": 1.8637104299887242, "learning_rate": 4.999867581305444e-05, "loss": 1.3861, "step": 113 }, { "epoch": 0.16804864566058597, "grad_norm": 2.11830374370151, "learning_rate": 4.9998464258726174e-05, "loss": 1.5428, "step": 114 }, { "epoch": 0.1695227565874332, "grad_norm": 2.1675916189082027, "learning_rate": 4.999823703437162e-05, "loss": 1.4209, "step": 115 }, { "epoch": 0.17099686751428045, "grad_norm": 2.158804444651952, "learning_rate": 4.999799414013322e-05, "loss": 1.5244, "step": 116 }, { "epoch": 0.1724709784411277, "grad_norm": 2.10007557505902, "learning_rate": 4.9997735576163215e-05, "loss": 1.2787, "step": 117 }, { "epoch": 0.17394508936797495, "grad_norm": 2.2262712254655623, "learning_rate": 4.9997461342623686e-05, "loss": 1.3344, "step": 118 }, { "epoch": 0.1754192002948222, "grad_norm": 2.4157571615803617, "learning_rate": 4.999717143968654e-05, "loss": 1.2447, "step": 119 }, { "epoch": 0.17689331122166943, "grad_norm": 2.576413968202097, "learning_rate": 4.9996865867533496e-05, "loss": 1.3676, "step": 120 }, { "epoch": 0.17836742214851667, "grad_norm": 2.1417070981178585, "learning_rate": 4.99965446263561e-05, "loss": 1.3313, "step": 121 }, { "epoch": 0.1798415330753639, "grad_norm": 2.1695232145320675, "learning_rate": 4.9996207716355726e-05, "loss": 1.4916, "step": 122 }, { "epoch": 0.18131564400221117, "grad_norm": 2.057817287338122, "learning_rate": 4.999585513774354e-05, "loss": 1.3747, "step": 123 }, { "epoch": 0.1827897549290584, "grad_norm": 2.030849945580629, "learning_rate": 4.9995486890740573e-05, "loss": 1.3014, "step": 124 }, { "epoch": 0.18426386585590565, "grad_norm": 2.1873825041687285, "learning_rate": 4.9995102975577655e-05, "loss": 1.3355, "step": 125 }, { "epoch": 0.1857379767827529, "grad_norm": 2.1007903428340198, "learning_rate": 4.999470339249543e-05, "loss": 1.2883, "step": 126 }, { "epoch": 0.18721208770960016, "grad_norm": 2.0257121964766123, "learning_rate": 4.9994288141744374e-05, "loss": 1.2599, "step": 127 }, { "epoch": 0.1886861986364474, "grad_norm": 2.263108743789892, "learning_rate": 4.999385722358479e-05, "loss": 1.5406, "step": 128 }, { "epoch": 0.19016030956329463, "grad_norm": 1.9354255399579277, "learning_rate": 4.999341063828679e-05, "loss": 1.356, "step": 129 }, { "epoch": 0.19163442049014187, "grad_norm": 2.096777250510134, "learning_rate": 4.9992948386130315e-05, "loss": 1.4541, "step": 130 }, { "epoch": 0.19310853141698914, "grad_norm": 2.0980935662045264, "learning_rate": 4.9992470467405104e-05, "loss": 1.4141, "step": 131 }, { "epoch": 0.19458264234383638, "grad_norm": 2.0678307971833743, "learning_rate": 4.999197688241076e-05, "loss": 1.527, "step": 132 }, { "epoch": 0.19605675327068361, "grad_norm": 2.1781128148267594, "learning_rate": 4.999146763145668e-05, "loss": 1.3192, "step": 133 }, { "epoch": 0.19753086419753085, "grad_norm": 2.112986545850289, "learning_rate": 4.9990942714862066e-05, "loss": 1.3348, "step": 134 }, { "epoch": 0.19900497512437812, "grad_norm": 2.0761106666880003, "learning_rate": 4.999040213295597e-05, "loss": 1.261, "step": 135 }, { "epoch": 0.20047908605122536, "grad_norm": 2.0671243555598866, "learning_rate": 4.9989845886077246e-05, "loss": 1.4277, "step": 136 }, { "epoch": 0.2019531969780726, "grad_norm": 2.060321466492083, "learning_rate": 4.9989273974574566e-05, "loss": 1.1909, "step": 137 }, { "epoch": 0.20342730790491984, "grad_norm": 2.3633685135296627, "learning_rate": 4.998868639880644e-05, "loss": 1.514, "step": 138 }, { "epoch": 0.2049014188317671, "grad_norm": 2.6710520586819735, "learning_rate": 4.998808315914117e-05, "loss": 1.4533, "step": 139 }, { "epoch": 0.20637552975861434, "grad_norm": 2.639175829986413, "learning_rate": 4.9987464255956894e-05, "loss": 1.2372, "step": 140 }, { "epoch": 0.20784964068546158, "grad_norm": 2.019685189672108, "learning_rate": 4.9986829689641574e-05, "loss": 1.2871, "step": 141 }, { "epoch": 0.20932375161230882, "grad_norm": 2.124995736510544, "learning_rate": 4.998617946059297e-05, "loss": 1.4551, "step": 142 }, { "epoch": 0.21079786253915608, "grad_norm": 2.5968191533332483, "learning_rate": 4.998551356921868e-05, "loss": 1.5249, "step": 143 }, { "epoch": 0.21227197346600332, "grad_norm": 2.4765086845062374, "learning_rate": 4.99848320159361e-05, "loss": 1.3807, "step": 144 }, { "epoch": 0.21374608439285056, "grad_norm": 2.2909221711411485, "learning_rate": 4.9984134801172464e-05, "loss": 1.3572, "step": 145 }, { "epoch": 0.2152201953196978, "grad_norm": 2.1637558702623942, "learning_rate": 4.998342192536482e-05, "loss": 1.2176, "step": 146 }, { "epoch": 0.21669430624654507, "grad_norm": 2.1417890293576667, "learning_rate": 4.998269338896e-05, "loss": 1.3897, "step": 147 }, { "epoch": 0.2181684171733923, "grad_norm": 2.2458002313310574, "learning_rate": 4.998194919241471e-05, "loss": 1.336, "step": 148 }, { "epoch": 0.21964252810023954, "grad_norm": 2.0876164341051844, "learning_rate": 4.9981189336195425e-05, "loss": 1.2021, "step": 149 }, { "epoch": 0.22111663902708678, "grad_norm": 2.3041501210764332, "learning_rate": 4.998041382077846e-05, "loss": 1.4176, "step": 150 }, { "epoch": 0.22259074995393405, "grad_norm": 1.9738966865945022, "learning_rate": 4.9979622646649935e-05, "loss": 1.26, "step": 151 }, { "epoch": 0.22406486088078129, "grad_norm": 2.2744536865242067, "learning_rate": 4.997881581430579e-05, "loss": 1.4, "step": 152 }, { "epoch": 0.22553897180762852, "grad_norm": 2.057234892886687, "learning_rate": 4.997799332425178e-05, "loss": 1.4368, "step": 153 }, { "epoch": 0.22701308273447576, "grad_norm": 1.870734512789051, "learning_rate": 4.997715517700347e-05, "loss": 1.2676, "step": 154 }, { "epoch": 0.228487193661323, "grad_norm": 1.9809288679212573, "learning_rate": 4.9976301373086254e-05, "loss": 1.2366, "step": 155 }, { "epoch": 0.22996130458817027, "grad_norm": 2.27817966306643, "learning_rate": 4.997543191303532e-05, "loss": 1.4254, "step": 156 }, { "epoch": 0.2314354155150175, "grad_norm": 2.2603263192352316, "learning_rate": 4.9974546797395685e-05, "loss": 1.4432, "step": 157 }, { "epoch": 0.23290952644186474, "grad_norm": 2.1279717030257177, "learning_rate": 4.9973646026722166e-05, "loss": 1.1968, "step": 158 }, { "epoch": 0.23438363736871198, "grad_norm": 2.159393783023919, "learning_rate": 4.997272960157942e-05, "loss": 1.258, "step": 159 }, { "epoch": 0.23585774829555925, "grad_norm": 2.0580971859078585, "learning_rate": 4.997179752254188e-05, "loss": 1.259, "step": 160 }, { "epoch": 0.2373318592224065, "grad_norm": 2.2874756608512397, "learning_rate": 4.997084979019382e-05, "loss": 1.3561, "step": 161 }, { "epoch": 0.23880597014925373, "grad_norm": 2.1581319805864245, "learning_rate": 4.996988640512931e-05, "loss": 1.344, "step": 162 }, { "epoch": 0.24028008107610097, "grad_norm": 2.114902628828346, "learning_rate": 4.9968907367952245e-05, "loss": 1.3797, "step": 163 }, { "epoch": 0.24175419200294823, "grad_norm": 1.9930572406379172, "learning_rate": 4.9967912679276316e-05, "loss": 1.2952, "step": 164 }, { "epoch": 0.24322830292979547, "grad_norm": 2.2824373682460206, "learning_rate": 4.996690233972505e-05, "loss": 1.3217, "step": 165 }, { "epoch": 0.2447024138566427, "grad_norm": 2.2326896019675755, "learning_rate": 4.996587634993175e-05, "loss": 1.3398, "step": 166 }, { "epoch": 0.24617652478348995, "grad_norm": 2.3153740833158944, "learning_rate": 4.996483471053955e-05, "loss": 1.1899, "step": 167 }, { "epoch": 0.2476506357103372, "grad_norm": 2.1275641734466646, "learning_rate": 4.996377742220139e-05, "loss": 1.2928, "step": 168 }, { "epoch": 0.24912474663718445, "grad_norm": 2.1284514055034474, "learning_rate": 4.9962704485580034e-05, "loss": 1.2846, "step": 169 }, { "epoch": 0.2505988575640317, "grad_norm": 2.032220696546676, "learning_rate": 4.996161590134802e-05, "loss": 1.1947, "step": 170 }, { "epoch": 0.25207296849087896, "grad_norm": 2.078988261649795, "learning_rate": 4.996051167018773e-05, "loss": 1.3022, "step": 171 }, { "epoch": 0.2535470794177262, "grad_norm": 1.9245332771371737, "learning_rate": 4.995939179279134e-05, "loss": 1.2621, "step": 172 }, { "epoch": 0.25502119034457343, "grad_norm": 2.2310349903107127, "learning_rate": 4.9958256269860826e-05, "loss": 1.4542, "step": 173 }, { "epoch": 0.2564953012714207, "grad_norm": 2.5456973670659044, "learning_rate": 4.995710510210798e-05, "loss": 1.5041, "step": 174 }, { "epoch": 0.2579694121982679, "grad_norm": 2.0450927896130877, "learning_rate": 4.9955938290254404e-05, "loss": 1.3539, "step": 175 }, { "epoch": 0.25944352312511515, "grad_norm": 1.9694790776873259, "learning_rate": 4.99547558350315e-05, "loss": 1.3855, "step": 176 }, { "epoch": 0.2609176340519624, "grad_norm": 2.150462247260363, "learning_rate": 4.9953557737180477e-05, "loss": 1.2111, "step": 177 }, { "epoch": 0.2623917449788097, "grad_norm": 2.157549480343808, "learning_rate": 4.9952343997452355e-05, "loss": 1.121, "step": 178 }, { "epoch": 0.2638658559056569, "grad_norm": 2.0538273740694, "learning_rate": 4.995111461660794e-05, "loss": 1.2719, "step": 179 }, { "epoch": 0.26533996683250416, "grad_norm": 2.0655051792498154, "learning_rate": 4.9949869595417876e-05, "loss": 1.3966, "step": 180 }, { "epoch": 0.2668140777593514, "grad_norm": 2.112112910453105, "learning_rate": 4.994860893466258e-05, "loss": 1.2133, "step": 181 }, { "epoch": 0.26828818868619864, "grad_norm": 2.041606383179758, "learning_rate": 4.994733263513228e-05, "loss": 1.3579, "step": 182 }, { "epoch": 0.2697622996130459, "grad_norm": 2.2264266123212977, "learning_rate": 4.994604069762702e-05, "loss": 1.3151, "step": 183 }, { "epoch": 0.2712364105398931, "grad_norm": 2.0875805730745745, "learning_rate": 4.994473312295663e-05, "loss": 1.4472, "step": 184 }, { "epoch": 0.27271052146674035, "grad_norm": 2.0647329323078547, "learning_rate": 4.994340991194076e-05, "loss": 1.2512, "step": 185 }, { "epoch": 0.2741846323935876, "grad_norm": 2.0908871336677417, "learning_rate": 4.994207106540884e-05, "loss": 1.4345, "step": 186 }, { "epoch": 0.2756587433204349, "grad_norm": 2.2470357229926883, "learning_rate": 4.994071658420012e-05, "loss": 1.4348, "step": 187 }, { "epoch": 0.2771328542472821, "grad_norm": 2.187489754789066, "learning_rate": 4.993934646916364e-05, "loss": 1.4358, "step": 188 }, { "epoch": 0.27860696517412936, "grad_norm": 2.052108596672067, "learning_rate": 4.993796072115824e-05, "loss": 1.2834, "step": 189 }, { "epoch": 0.2800810761009766, "grad_norm": 1.8352796309282002, "learning_rate": 4.993655934105256e-05, "loss": 1.2275, "step": 190 }, { "epoch": 0.28155518702782384, "grad_norm": 2.152859773065615, "learning_rate": 4.993514232972504e-05, "loss": 1.3193, "step": 191 }, { "epoch": 0.2830292979546711, "grad_norm": 2.0014521572123236, "learning_rate": 4.9933709688063935e-05, "loss": 1.2589, "step": 192 }, { "epoch": 0.2845034088815183, "grad_norm": 2.2173567144813147, "learning_rate": 4.993226141696726e-05, "loss": 1.263, "step": 193 }, { "epoch": 0.28597751980836555, "grad_norm": 2.2802622498721807, "learning_rate": 4.9930797517342853e-05, "loss": 1.363, "step": 194 }, { "epoch": 0.28745163073521285, "grad_norm": 2.408918808078341, "learning_rate": 4.992931799010836e-05, "loss": 1.2691, "step": 195 }, { "epoch": 0.2889257416620601, "grad_norm": 2.158329168583253, "learning_rate": 4.992782283619118e-05, "loss": 1.2367, "step": 196 }, { "epoch": 0.2903998525889073, "grad_norm": 2.266705170005193, "learning_rate": 4.992631205652857e-05, "loss": 1.435, "step": 197 }, { "epoch": 0.29187396351575456, "grad_norm": 2.163361473703773, "learning_rate": 4.992478565206752e-05, "loss": 1.2911, "step": 198 }, { "epoch": 0.2933480744426018, "grad_norm": 2.4103515150210675, "learning_rate": 4.992324362376484e-05, "loss": 1.4612, "step": 199 }, { "epoch": 0.29482218536944904, "grad_norm": 2.32427732249594, "learning_rate": 4.992168597258715e-05, "loss": 1.3205, "step": 200 }, { "epoch": 0.2962962962962963, "grad_norm": 2.057321347050703, "learning_rate": 4.992011269951083e-05, "loss": 1.2807, "step": 201 }, { "epoch": 0.2977704072231435, "grad_norm": 2.0120031655743613, "learning_rate": 4.991852380552209e-05, "loss": 1.1971, "step": 202 }, { "epoch": 0.2992445181499908, "grad_norm": 2.0449170947816024, "learning_rate": 4.99169192916169e-05, "loss": 1.2771, "step": 203 }, { "epoch": 0.30071862907683805, "grad_norm": 1.772968381433036, "learning_rate": 4.991529915880103e-05, "loss": 1.155, "step": 204 }, { "epoch": 0.3021927400036853, "grad_norm": 1.8942414052203644, "learning_rate": 4.991366340809005e-05, "loss": 1.4558, "step": 205 }, { "epoch": 0.30366685093053253, "grad_norm": 1.9430482855003286, "learning_rate": 4.99120120405093e-05, "loss": 1.4347, "step": 206 }, { "epoch": 0.30514096185737977, "grad_norm": 1.904407361077687, "learning_rate": 4.9910345057093936e-05, "loss": 1.2035, "step": 207 }, { "epoch": 0.306615072784227, "grad_norm": 2.117338618554566, "learning_rate": 4.990866245888889e-05, "loss": 1.3922, "step": 208 }, { "epoch": 0.30808918371107424, "grad_norm": 2.2109265007041987, "learning_rate": 4.9906964246948874e-05, "loss": 1.272, "step": 209 }, { "epoch": 0.3095632946379215, "grad_norm": 2.2221450620077046, "learning_rate": 4.99052504223384e-05, "loss": 1.3045, "step": 210 }, { "epoch": 0.3110374055647688, "grad_norm": 2.2814985166031136, "learning_rate": 4.990352098613176e-05, "loss": 1.3479, "step": 211 }, { "epoch": 0.312511516491616, "grad_norm": 1.8637887195681744, "learning_rate": 4.9901775939413026e-05, "loss": 1.2338, "step": 212 }, { "epoch": 0.31398562741846325, "grad_norm": 2.2469759858719462, "learning_rate": 4.990001528327607e-05, "loss": 1.2643, "step": 213 }, { "epoch": 0.3154597383453105, "grad_norm": 1.866628076233919, "learning_rate": 4.989823901882454e-05, "loss": 1.1215, "step": 214 }, { "epoch": 0.31693384927215773, "grad_norm": 2.098325484091713, "learning_rate": 4.989644714717187e-05, "loss": 1.2449, "step": 215 }, { "epoch": 0.31840796019900497, "grad_norm": 2.19805595595384, "learning_rate": 4.989463966944127e-05, "loss": 1.3506, "step": 216 }, { "epoch": 0.3198820711258522, "grad_norm": 1.9409355959301762, "learning_rate": 4.989281658676573e-05, "loss": 1.3007, "step": 217 }, { "epoch": 0.32135618205269945, "grad_norm": 2.224686752728598, "learning_rate": 4.989097790028806e-05, "loss": 1.3699, "step": 218 }, { "epoch": 0.3228302929795467, "grad_norm": 2.1150039730530703, "learning_rate": 4.98891236111608e-05, "loss": 1.2784, "step": 219 }, { "epoch": 0.324304403906394, "grad_norm": 2.0687462982421208, "learning_rate": 4.988725372054629e-05, "loss": 1.265, "step": 220 }, { "epoch": 0.3257785148332412, "grad_norm": 2.017591478242658, "learning_rate": 4.988536822961666e-05, "loss": 1.3475, "step": 221 }, { "epoch": 0.32725262576008846, "grad_norm": 2.2960578347142704, "learning_rate": 4.988346713955381e-05, "loss": 1.3781, "step": 222 }, { "epoch": 0.3287267366869357, "grad_norm": 2.348079801870228, "learning_rate": 4.9881550451549405e-05, "loss": 1.3377, "step": 223 }, { "epoch": 0.33020084761378293, "grad_norm": 2.3804706008848853, "learning_rate": 4.987961816680492e-05, "loss": 1.4397, "step": 224 }, { "epoch": 0.33167495854063017, "grad_norm": 2.21766926408657, "learning_rate": 4.9877670286531585e-05, "loss": 1.4468, "step": 225 }, { "epoch": 0.3331490694674774, "grad_norm": 2.067784907719143, "learning_rate": 4.98757068119504e-05, "loss": 1.3678, "step": 226 }, { "epoch": 0.33462318039432465, "grad_norm": 1.8092801663808136, "learning_rate": 4.9873727744292144e-05, "loss": 1.3744, "step": 227 }, { "epoch": 0.33609729132117194, "grad_norm": 2.336259698495588, "learning_rate": 4.987173308479738e-05, "loss": 1.4336, "step": 228 }, { "epoch": 0.3375714022480192, "grad_norm": 2.1077638477679996, "learning_rate": 4.9869722834716446e-05, "loss": 1.2226, "step": 229 }, { "epoch": 0.3390455131748664, "grad_norm": 2.168399142596623, "learning_rate": 4.9867696995309445e-05, "loss": 1.1254, "step": 230 }, { "epoch": 0.34051962410171366, "grad_norm": 2.3340461754697257, "learning_rate": 4.986565556784625e-05, "loss": 1.382, "step": 231 }, { "epoch": 0.3419937350285609, "grad_norm": 2.285808346294586, "learning_rate": 4.98635985536065e-05, "loss": 1.4417, "step": 232 }, { "epoch": 0.34346784595540814, "grad_norm": 2.2190865274881673, "learning_rate": 4.986152595387963e-05, "loss": 1.2052, "step": 233 }, { "epoch": 0.3449419568822554, "grad_norm": 2.2158955224606554, "learning_rate": 4.9859437769964815e-05, "loss": 1.3346, "step": 234 }, { "epoch": 0.3464160678091026, "grad_norm": 2.346915688396923, "learning_rate": 4.985733400317101e-05, "loss": 1.4133, "step": 235 }, { "epoch": 0.3478901787359499, "grad_norm": 2.213782754372508, "learning_rate": 4.985521465481695e-05, "loss": 1.2508, "step": 236 }, { "epoch": 0.34936428966279715, "grad_norm": 2.0687721136011854, "learning_rate": 4.985307972623112e-05, "loss": 1.2997, "step": 237 }, { "epoch": 0.3508384005896444, "grad_norm": 2.0865933533770247, "learning_rate": 4.985092921875178e-05, "loss": 1.282, "step": 238 }, { "epoch": 0.3523125115164916, "grad_norm": 2.007893126535043, "learning_rate": 4.984876313372695e-05, "loss": 1.2666, "step": 239 }, { "epoch": 0.35378662244333886, "grad_norm": 2.146802578388938, "learning_rate": 4.984658147251442e-05, "loss": 1.275, "step": 240 }, { "epoch": 0.3552607333701861, "grad_norm": 2.2235913178564166, "learning_rate": 4.984438423648174e-05, "loss": 1.2332, "step": 241 }, { "epoch": 0.35673484429703334, "grad_norm": 2.001170650245758, "learning_rate": 4.9842171427006225e-05, "loss": 1.26, "step": 242 }, { "epoch": 0.3582089552238806, "grad_norm": 1.9479047057520653, "learning_rate": 4.983994304547495e-05, "loss": 1.2985, "step": 243 }, { "epoch": 0.3596830661507278, "grad_norm": 2.2888523044357836, "learning_rate": 4.9837699093284765e-05, "loss": 1.2783, "step": 244 }, { "epoch": 0.3611571770775751, "grad_norm": 2.2766630486961796, "learning_rate": 4.983543957184224e-05, "loss": 1.4208, "step": 245 }, { "epoch": 0.36263128800442235, "grad_norm": 2.174700977334657, "learning_rate": 4.983316448256377e-05, "loss": 1.0316, "step": 246 }, { "epoch": 0.3641053989312696, "grad_norm": 2.216858070637988, "learning_rate": 4.983087382687544e-05, "loss": 1.4312, "step": 247 }, { "epoch": 0.3655795098581168, "grad_norm": 2.141129594347063, "learning_rate": 4.982856760621313e-05, "loss": 1.1785, "step": 248 }, { "epoch": 0.36705362078496406, "grad_norm": 2.0701406731466854, "learning_rate": 4.9826245822022474e-05, "loss": 1.2943, "step": 249 }, { "epoch": 0.3685277317118113, "grad_norm": 2.2628542037975055, "learning_rate": 4.9823908475758875e-05, "loss": 1.4268, "step": 250 }, { "epoch": 0.3685277317118113, "eval_bleu": 0.060988405140425576, "eval_bleu_1gram": 0.363389690884114, "eval_bleu_2gram": 0.13692189309069136, "eval_bleu_3gram": 0.0594690762203493, "eval_bleu_4gram": 0.027518294018401463, "eval_rag_val_loss": 1.3507157791686315, "eval_rouge1": 0.35418166324377137, "eval_rouge2": 0.12902688038756455, "eval_rougeL": 0.33440322022015795, "step": 250 }, { "epoch": 0.37000184263865854, "grad_norm": 2.160960153059647, "learning_rate": 4.982155556888745e-05, "loss": 1.3588, "step": 251 }, { "epoch": 0.3714759535655058, "grad_norm": 2.1119092623191196, "learning_rate": 4.981918710288309e-05, "loss": 1.232, "step": 252 }, { "epoch": 0.3729500644923531, "grad_norm": 2.141363865663582, "learning_rate": 4.981680307923047e-05, "loss": 1.457, "step": 253 }, { "epoch": 0.3744241754192003, "grad_norm": 2.2004860297767017, "learning_rate": 4.981440349942397e-05, "loss": 1.3541, "step": 254 }, { "epoch": 0.37589828634604755, "grad_norm": 1.9201909572293434, "learning_rate": 4.981198836496775e-05, "loss": 1.3238, "step": 255 }, { "epoch": 0.3773723972728948, "grad_norm": 2.068922043760165, "learning_rate": 4.9809557677375704e-05, "loss": 1.3368, "step": 256 }, { "epoch": 0.378846508199742, "grad_norm": 2.336106106085852, "learning_rate": 4.98071114381715e-05, "loss": 1.2895, "step": 257 }, { "epoch": 0.38032061912658927, "grad_norm": 2.239195099529316, "learning_rate": 4.980464964888852e-05, "loss": 1.2853, "step": 258 }, { "epoch": 0.3817947300534365, "grad_norm": 2.1713101317974006, "learning_rate": 4.980217231106991e-05, "loss": 1.3764, "step": 259 }, { "epoch": 0.38326884098028374, "grad_norm": 2.0592504391314894, "learning_rate": 4.979967942626858e-05, "loss": 1.3408, "step": 260 }, { "epoch": 0.38474295190713104, "grad_norm": 2.280983478183606, "learning_rate": 4.979717099604715e-05, "loss": 1.2861, "step": 261 }, { "epoch": 0.3862170628339783, "grad_norm": 1.9774594891397812, "learning_rate": 4.979464702197801e-05, "loss": 1.3697, "step": 262 }, { "epoch": 0.3876911737608255, "grad_norm": 2.538139668534761, "learning_rate": 4.9792107505643304e-05, "loss": 1.277, "step": 263 }, { "epoch": 0.38916528468767275, "grad_norm": 2.4832269667134517, "learning_rate": 4.9789552448634874e-05, "loss": 1.3344, "step": 264 }, { "epoch": 0.39063939561452, "grad_norm": 2.301922189817643, "learning_rate": 4.9786981852554346e-05, "loss": 1.3224, "step": 265 }, { "epoch": 0.39211350654136723, "grad_norm": 2.441097832081594, "learning_rate": 4.978439571901307e-05, "loss": 1.5166, "step": 266 }, { "epoch": 0.39358761746821447, "grad_norm": 2.14435291764784, "learning_rate": 4.9781794049632135e-05, "loss": 1.3127, "step": 267 }, { "epoch": 0.3950617283950617, "grad_norm": 2.0063687488434048, "learning_rate": 4.9779176846042366e-05, "loss": 1.1991, "step": 268 }, { "epoch": 0.396535839321909, "grad_norm": 2.3006759645371244, "learning_rate": 4.977654410988434e-05, "loss": 1.256, "step": 269 }, { "epoch": 0.39800995024875624, "grad_norm": 1.979106487229267, "learning_rate": 4.977389584280835e-05, "loss": 1.3147, "step": 270 }, { "epoch": 0.3994840611756035, "grad_norm": 2.047964956102956, "learning_rate": 4.9771232046474444e-05, "loss": 1.1918, "step": 271 }, { "epoch": 0.4009581721024507, "grad_norm": 2.0621394217907683, "learning_rate": 4.976855272255239e-05, "loss": 1.3617, "step": 272 }, { "epoch": 0.40243228302929795, "grad_norm": 2.244945783406488, "learning_rate": 4.976585787272168e-05, "loss": 1.4755, "step": 273 }, { "epoch": 0.4039063939561452, "grad_norm": 2.2459057538511904, "learning_rate": 4.976314749867158e-05, "loss": 1.3753, "step": 274 }, { "epoch": 0.40538050488299243, "grad_norm": 2.0997648243690223, "learning_rate": 4.976042160210104e-05, "loss": 1.3604, "step": 275 }, { "epoch": 0.40685461580983967, "grad_norm": 2.080661115301407, "learning_rate": 4.975768018471877e-05, "loss": 1.3336, "step": 276 }, { "epoch": 0.4083287267366869, "grad_norm": 2.257720206199161, "learning_rate": 4.9754923248243195e-05, "loss": 1.38, "step": 277 }, { "epoch": 0.4098028376635342, "grad_norm": 1.9824879602400989, "learning_rate": 4.975215079440247e-05, "loss": 1.5691, "step": 278 }, { "epoch": 0.41127694859038144, "grad_norm": 2.0472463023499032, "learning_rate": 4.974936282493448e-05, "loss": 1.3303, "step": 279 }, { "epoch": 0.4127510595172287, "grad_norm": 2.047163731834898, "learning_rate": 4.974655934158684e-05, "loss": 1.3189, "step": 280 }, { "epoch": 0.4142251704440759, "grad_norm": 1.9969161218937135, "learning_rate": 4.974374034611687e-05, "loss": 1.2914, "step": 281 }, { "epoch": 0.41569928137092316, "grad_norm": 1.9382102033824216, "learning_rate": 4.9740905840291646e-05, "loss": 1.2017, "step": 282 }, { "epoch": 0.4171733922977704, "grad_norm": 2.1925655025497393, "learning_rate": 4.9738055825887936e-05, "loss": 1.3672, "step": 283 }, { "epoch": 0.41864750322461763, "grad_norm": 2.031703956935638, "learning_rate": 4.973519030469225e-05, "loss": 1.1802, "step": 284 }, { "epoch": 0.4201216141514649, "grad_norm": 2.2350586598867523, "learning_rate": 4.97323092785008e-05, "loss": 1.2368, "step": 285 }, { "epoch": 0.42159572507831217, "grad_norm": 2.1773911278329563, "learning_rate": 4.972941274911953e-05, "loss": 1.3322, "step": 286 }, { "epoch": 0.4230698360051594, "grad_norm": 2.3439947381985817, "learning_rate": 4.97265007183641e-05, "loss": 1.2351, "step": 287 }, { "epoch": 0.42454394693200664, "grad_norm": 2.1233689275340244, "learning_rate": 4.9723573188059894e-05, "loss": 1.2737, "step": 288 }, { "epoch": 0.4260180578588539, "grad_norm": 2.2729797951253, "learning_rate": 4.972063016004199e-05, "loss": 1.2025, "step": 289 }, { "epoch": 0.4274921687857011, "grad_norm": 2.0295837610764176, "learning_rate": 4.971767163615522e-05, "loss": 1.2462, "step": 290 }, { "epoch": 0.42896627971254836, "grad_norm": 1.8951294259434261, "learning_rate": 4.971469761825407e-05, "loss": 1.4506, "step": 291 }, { "epoch": 0.4304403906393956, "grad_norm": 2.304981484493935, "learning_rate": 4.971170810820279e-05, "loss": 1.3765, "step": 292 }, { "epoch": 0.43191450156624284, "grad_norm": 2.1569419524579687, "learning_rate": 4.970870310787532e-05, "loss": 1.2958, "step": 293 }, { "epoch": 0.43338861249309013, "grad_norm": 2.28782927847875, "learning_rate": 4.970568261915531e-05, "loss": 1.267, "step": 294 }, { "epoch": 0.43486272341993737, "grad_norm": 2.01414305153722, "learning_rate": 4.970264664393614e-05, "loss": 1.1497, "step": 295 }, { "epoch": 0.4363368343467846, "grad_norm": 1.9306957992854965, "learning_rate": 4.9699595184120853e-05, "loss": 1.3978, "step": 296 }, { "epoch": 0.43781094527363185, "grad_norm": 2.09492505251309, "learning_rate": 4.9696528241622244e-05, "loss": 1.4026, "step": 297 }, { "epoch": 0.4392850562004791, "grad_norm": 2.1558122052235147, "learning_rate": 4.9693445818362783e-05, "loss": 1.2173, "step": 298 }, { "epoch": 0.4407591671273263, "grad_norm": 2.0013012229373217, "learning_rate": 4.969034791627466e-05, "loss": 1.2905, "step": 299 }, { "epoch": 0.44223327805417356, "grad_norm": 2.0318809116339716, "learning_rate": 4.9687234537299765e-05, "loss": 1.2261, "step": 300 }, { "epoch": 0.4437073889810208, "grad_norm": 1.899635470707426, "learning_rate": 4.968410568338967e-05, "loss": 1.2103, "step": 301 }, { "epoch": 0.4451814999078681, "grad_norm": 1.944702543053058, "learning_rate": 4.968096135650569e-05, "loss": 1.2691, "step": 302 }, { "epoch": 0.44665561083471533, "grad_norm": 2.072150339556501, "learning_rate": 4.9677801558618795e-05, "loss": 1.3789, "step": 303 }, { "epoch": 0.44812972176156257, "grad_norm": 2.17625632997805, "learning_rate": 4.967462629170969e-05, "loss": 1.32, "step": 304 }, { "epoch": 0.4496038326884098, "grad_norm": 2.1969559243546803, "learning_rate": 4.967143555776873e-05, "loss": 1.3849, "step": 305 }, { "epoch": 0.45107794361525705, "grad_norm": 2.19579660094179, "learning_rate": 4.9668229358796014e-05, "loss": 1.1626, "step": 306 }, { "epoch": 0.4525520545421043, "grad_norm": 2.0683514213949414, "learning_rate": 4.966500769680131e-05, "loss": 1.1795, "step": 307 }, { "epoch": 0.4540261654689515, "grad_norm": 2.234660203777943, "learning_rate": 4.966177057380409e-05, "loss": 1.2999, "step": 308 }, { "epoch": 0.45550027639579876, "grad_norm": 2.2561423912698992, "learning_rate": 4.965851799183349e-05, "loss": 1.2796, "step": 309 }, { "epoch": 0.456974387322646, "grad_norm": 2.1108191173911597, "learning_rate": 4.9655249952928375e-05, "loss": 1.1156, "step": 310 }, { "epoch": 0.4584484982494933, "grad_norm": 2.3380892740113817, "learning_rate": 4.965196645913728e-05, "loss": 1.2801, "step": 311 }, { "epoch": 0.45992260917634054, "grad_norm": 2.262039188224311, "learning_rate": 4.964866751251842e-05, "loss": 1.552, "step": 312 }, { "epoch": 0.4613967201031878, "grad_norm": 2.2624604321808235, "learning_rate": 4.964535311513971e-05, "loss": 1.2801, "step": 313 }, { "epoch": 0.462870831030035, "grad_norm": 2.054629364931785, "learning_rate": 4.9642023269078745e-05, "loss": 1.3185, "step": 314 }, { "epoch": 0.46434494195688225, "grad_norm": 2.207399806754624, "learning_rate": 4.963867797642281e-05, "loss": 1.2118, "step": 315 }, { "epoch": 0.4658190528837295, "grad_norm": 2.2997860269943584, "learning_rate": 4.963531723926885e-05, "loss": 1.3844, "step": 316 }, { "epoch": 0.46729316381057673, "grad_norm": 1.9291530594919164, "learning_rate": 4.963194105972353e-05, "loss": 1.3337, "step": 317 }, { "epoch": 0.46876727473742397, "grad_norm": 2.1767672319922933, "learning_rate": 4.962854943990316e-05, "loss": 1.2581, "step": 318 }, { "epoch": 0.47024138566427126, "grad_norm": 1.8236775218217782, "learning_rate": 4.962514238193375e-05, "loss": 1.0308, "step": 319 }, { "epoch": 0.4717154965911185, "grad_norm": 2.354443379121333, "learning_rate": 4.9621719887950966e-05, "loss": 1.4263, "step": 320 }, { "epoch": 0.47318960751796574, "grad_norm": 2.0796785212551527, "learning_rate": 4.9618281960100164e-05, "loss": 1.0673, "step": 321 }, { "epoch": 0.474663718444813, "grad_norm": 2.1859297429806746, "learning_rate": 4.9614828600536386e-05, "loss": 1.2587, "step": 322 }, { "epoch": 0.4761378293716602, "grad_norm": 2.2660827174362335, "learning_rate": 4.9611359811424324e-05, "loss": 1.3933, "step": 323 }, { "epoch": 0.47761194029850745, "grad_norm": 1.9668334836308698, "learning_rate": 4.960787559493836e-05, "loss": 1.2097, "step": 324 }, { "epoch": 0.4790860512253547, "grad_norm": 2.1433776087165235, "learning_rate": 4.960437595326253e-05, "loss": 1.3049, "step": 325 }, { "epoch": 0.48056016215220193, "grad_norm": 1.9406255307404165, "learning_rate": 4.960086088859055e-05, "loss": 1.1075, "step": 326 }, { "epoch": 0.4820342730790492, "grad_norm": 2.056684096874072, "learning_rate": 4.95973304031258e-05, "loss": 1.2689, "step": 327 }, { "epoch": 0.48350838400589646, "grad_norm": 2.110473459828361, "learning_rate": 4.9593784499081336e-05, "loss": 1.3217, "step": 328 }, { "epoch": 0.4849824949327437, "grad_norm": 2.1025313515882758, "learning_rate": 4.959022317867986e-05, "loss": 1.3446, "step": 329 }, { "epoch": 0.48645660585959094, "grad_norm": 1.8790374519712598, "learning_rate": 4.9586646444153764e-05, "loss": 1.1213, "step": 330 }, { "epoch": 0.4879307167864382, "grad_norm": 2.01392523487953, "learning_rate": 4.958305429774507e-05, "loss": 1.2299, "step": 331 }, { "epoch": 0.4894048277132854, "grad_norm": 2.0683687118016953, "learning_rate": 4.9579446741705485e-05, "loss": 1.3273, "step": 332 }, { "epoch": 0.49087893864013266, "grad_norm": 1.9631798070952575, "learning_rate": 4.957582377829637e-05, "loss": 1.2879, "step": 333 }, { "epoch": 0.4923530495669799, "grad_norm": 1.9760840519160152, "learning_rate": 4.957218540978874e-05, "loss": 1.2564, "step": 334 }, { "epoch": 0.49382716049382713, "grad_norm": 1.8468866983678984, "learning_rate": 4.9568531638463264e-05, "loss": 1.1714, "step": 335 }, { "epoch": 0.4953012714206744, "grad_norm": 1.9925117976702122, "learning_rate": 4.9564862466610284e-05, "loss": 1.3089, "step": 336 }, { "epoch": 0.49677538234752167, "grad_norm": 1.9050058150515576, "learning_rate": 4.9561177896529764e-05, "loss": 1.3064, "step": 337 }, { "epoch": 0.4982494932743689, "grad_norm": 1.9884736992830654, "learning_rate": 4.9557477930531346e-05, "loss": 1.416, "step": 338 }, { "epoch": 0.49972360420121614, "grad_norm": 2.11427247216338, "learning_rate": 4.9553762570934314e-05, "loss": 1.3491, "step": 339 }, { "epoch": 0.5011977151280634, "grad_norm": 2.1606384070358122, "learning_rate": 4.955003182006761e-05, "loss": 1.2685, "step": 340 }, { "epoch": 0.5026718260549107, "grad_norm": 2.2208931471718274, "learning_rate": 4.954628568026981e-05, "loss": 1.0785, "step": 341 }, { "epoch": 0.5041459369817579, "grad_norm": 2.0073281976859434, "learning_rate": 4.954252415388914e-05, "loss": 1.2732, "step": 342 }, { "epoch": 0.5056200479086052, "grad_norm": 1.877148605154568, "learning_rate": 4.953874724328347e-05, "loss": 1.2156, "step": 343 }, { "epoch": 0.5070941588354524, "grad_norm": 1.8281311947970296, "learning_rate": 4.953495495082032e-05, "loss": 1.2077, "step": 344 }, { "epoch": 0.5085682697622996, "grad_norm": 2.1697400254654218, "learning_rate": 4.953114727887686e-05, "loss": 1.2965, "step": 345 }, { "epoch": 0.5100423806891469, "grad_norm": 2.3119034899925635, "learning_rate": 4.952732422983989e-05, "loss": 1.3241, "step": 346 }, { "epoch": 0.5115164916159941, "grad_norm": 2.1524397868419514, "learning_rate": 4.9523485806105826e-05, "loss": 1.313, "step": 347 }, { "epoch": 0.5129906025428413, "grad_norm": 2.031570644013201, "learning_rate": 4.951963201008076e-05, "loss": 1.233, "step": 348 }, { "epoch": 0.5144647134696886, "grad_norm": 1.984724599725456, "learning_rate": 4.9515762844180405e-05, "loss": 1.3872, "step": 349 }, { "epoch": 0.5159388243965358, "grad_norm": 2.15107094018951, "learning_rate": 4.9511878310830106e-05, "loss": 1.2244, "step": 350 }, { "epoch": 0.5174129353233831, "grad_norm": 2.056993010528069, "learning_rate": 4.950797841246484e-05, "loss": 1.2503, "step": 351 }, { "epoch": 0.5188870462502303, "grad_norm": 1.9407666263715788, "learning_rate": 4.950406315152921e-05, "loss": 1.1569, "step": 352 }, { "epoch": 0.5203611571770775, "grad_norm": 2.0450707557208236, "learning_rate": 4.9500132530477475e-05, "loss": 1.2594, "step": 353 }, { "epoch": 0.5218352681039248, "grad_norm": 2.194835461682133, "learning_rate": 4.949618655177348e-05, "loss": 1.4107, "step": 354 }, { "epoch": 0.523309379030772, "grad_norm": 2.07432449559781, "learning_rate": 4.949222521789074e-05, "loss": 1.3165, "step": 355 }, { "epoch": 0.5247834899576194, "grad_norm": 1.8738634479526186, "learning_rate": 4.948824853131236e-05, "loss": 1.2597, "step": 356 }, { "epoch": 0.5262576008844666, "grad_norm": 2.0462843647042095, "learning_rate": 4.948425649453111e-05, "loss": 1.147, "step": 357 }, { "epoch": 0.5277317118113138, "grad_norm": 2.09063545988514, "learning_rate": 4.948024911004933e-05, "loss": 1.2049, "step": 358 }, { "epoch": 0.5292058227381611, "grad_norm": 2.084491128274472, "learning_rate": 4.9476226380379014e-05, "loss": 1.403, "step": 359 }, { "epoch": 0.5306799336650083, "grad_norm": 2.2283108455842378, "learning_rate": 4.947218830804178e-05, "loss": 1.3204, "step": 360 }, { "epoch": 0.5321540445918556, "grad_norm": 2.025646050145778, "learning_rate": 4.946813489556883e-05, "loss": 1.2594, "step": 361 }, { "epoch": 0.5336281555187028, "grad_norm": 1.838089740600478, "learning_rate": 4.946406614550103e-05, "loss": 1.2587, "step": 362 }, { "epoch": 0.53510226644555, "grad_norm": 2.2159673945441662, "learning_rate": 4.945998206038881e-05, "loss": 1.3473, "step": 363 }, { "epoch": 0.5365763773723973, "grad_norm": 2.450213216729124, "learning_rate": 4.945588264279225e-05, "loss": 1.4596, "step": 364 }, { "epoch": 0.5380504882992445, "grad_norm": 2.4155916475705284, "learning_rate": 4.945176789528102e-05, "loss": 1.3494, "step": 365 }, { "epoch": 0.5395245992260918, "grad_norm": 1.9711835443861054, "learning_rate": 4.944763782043441e-05, "loss": 1.2697, "step": 366 }, { "epoch": 0.540998710152939, "grad_norm": 2.1401899132285105, "learning_rate": 4.944349242084131e-05, "loss": 1.2864, "step": 367 }, { "epoch": 0.5424728210797862, "grad_norm": 2.46491319948465, "learning_rate": 4.943933169910023e-05, "loss": 1.377, "step": 368 }, { "epoch": 0.5439469320066335, "grad_norm": 1.9413107036819173, "learning_rate": 4.9435155657819266e-05, "loss": 1.1262, "step": 369 }, { "epoch": 0.5454210429334807, "grad_norm": 2.0282202793775794, "learning_rate": 4.9430964299616136e-05, "loss": 1.2003, "step": 370 }, { "epoch": 0.5468951538603279, "grad_norm": 1.9445308355788467, "learning_rate": 4.942675762711813e-05, "loss": 1.2367, "step": 371 }, { "epoch": 0.5483692647871752, "grad_norm": 2.065632839447704, "learning_rate": 4.942253564296218e-05, "loss": 1.2945, "step": 372 }, { "epoch": 0.5498433757140225, "grad_norm": 2.175782126626843, "learning_rate": 4.9418298349794767e-05, "loss": 1.4343, "step": 373 }, { "epoch": 0.5513174866408698, "grad_norm": 2.0269666846632837, "learning_rate": 4.941404575027202e-05, "loss": 1.2105, "step": 374 }, { "epoch": 0.552791597567717, "grad_norm": 1.9860498764213603, "learning_rate": 4.9409777847059625e-05, "loss": 1.4023, "step": 375 }, { "epoch": 0.5542657084945642, "grad_norm": 2.008988091934329, "learning_rate": 4.940549464283287e-05, "loss": 1.1874, "step": 376 }, { "epoch": 0.5557398194214115, "grad_norm": 1.862189910984415, "learning_rate": 4.940119614027663e-05, "loss": 1.2061, "step": 377 }, { "epoch": 0.5572139303482587, "grad_norm": 2.0094235854852855, "learning_rate": 4.93968823420854e-05, "loss": 1.3476, "step": 378 }, { "epoch": 0.558688041275106, "grad_norm": 2.1016605169986273, "learning_rate": 4.9392553250963215e-05, "loss": 1.2061, "step": 379 }, { "epoch": 0.5601621522019532, "grad_norm": 2.0838546355028553, "learning_rate": 4.9388208869623734e-05, "loss": 1.372, "step": 380 }, { "epoch": 0.5616362631288004, "grad_norm": 2.009855070060756, "learning_rate": 4.938384920079019e-05, "loss": 1.0564, "step": 381 }, { "epoch": 0.5631103740556477, "grad_norm": 1.955387056772603, "learning_rate": 4.937947424719538e-05, "loss": 1.2033, "step": 382 }, { "epoch": 0.5645844849824949, "grad_norm": 2.150539521234464, "learning_rate": 4.937508401158171e-05, "loss": 1.3464, "step": 383 }, { "epoch": 0.5660585959093422, "grad_norm": 2.082205416209983, "learning_rate": 4.937067849670115e-05, "loss": 1.2359, "step": 384 }, { "epoch": 0.5675327068361894, "grad_norm": 2.05006428129317, "learning_rate": 4.936625770531525e-05, "loss": 1.2841, "step": 385 }, { "epoch": 0.5690068177630366, "grad_norm": 1.9641481537149434, "learning_rate": 4.936182164019515e-05, "loss": 1.2608, "step": 386 }, { "epoch": 0.5704809286898839, "grad_norm": 2.0476687690384185, "learning_rate": 4.935737030412153e-05, "loss": 1.1781, "step": 387 }, { "epoch": 0.5719550396167311, "grad_norm": 2.0823949099032624, "learning_rate": 4.935290369988468e-05, "loss": 1.3901, "step": 388 }, { "epoch": 0.5734291505435785, "grad_norm": 1.7481022490356495, "learning_rate": 4.934842183028443e-05, "loss": 1.1215, "step": 389 }, { "epoch": 0.5749032614704257, "grad_norm": 1.9034712503084286, "learning_rate": 4.9343924698130206e-05, "loss": 1.2012, "step": 390 }, { "epoch": 0.5763773723972729, "grad_norm": 2.047799869698074, "learning_rate": 4.9339412306240984e-05, "loss": 1.3761, "step": 391 }, { "epoch": 0.5778514833241202, "grad_norm": 1.8023441177453772, "learning_rate": 4.933488465744531e-05, "loss": 1.2168, "step": 392 }, { "epoch": 0.5793255942509674, "grad_norm": 2.0559262794829736, "learning_rate": 4.933034175458129e-05, "loss": 1.2567, "step": 393 }, { "epoch": 0.5807997051778147, "grad_norm": 2.164753056598148, "learning_rate": 4.9325783600496596e-05, "loss": 1.1754, "step": 394 }, { "epoch": 0.5822738161046619, "grad_norm": 2.029442320265994, "learning_rate": 4.9321210198048465e-05, "loss": 1.1687, "step": 395 }, { "epoch": 0.5837479270315091, "grad_norm": 2.107390197572395, "learning_rate": 4.931662155010367e-05, "loss": 1.2562, "step": 396 }, { "epoch": 0.5852220379583564, "grad_norm": 2.019384736699085, "learning_rate": 4.931201765953858e-05, "loss": 1.1105, "step": 397 }, { "epoch": 0.5866961488852036, "grad_norm": 2.2672364916460555, "learning_rate": 4.9307398529239083e-05, "loss": 1.2842, "step": 398 }, { "epoch": 0.5881702598120508, "grad_norm": 2.0788770189778374, "learning_rate": 4.930276416210063e-05, "loss": 1.2266, "step": 399 }, { "epoch": 0.5896443707388981, "grad_norm": 2.221347846096359, "learning_rate": 4.929811456102824e-05, "loss": 1.3556, "step": 400 }, { "epoch": 0.5911184816657453, "grad_norm": 1.948906393767564, "learning_rate": 4.929344972893646e-05, "loss": 1.3895, "step": 401 }, { "epoch": 0.5925925925925926, "grad_norm": 1.9712978407062367, "learning_rate": 4.928876966874938e-05, "loss": 1.2019, "step": 402 }, { "epoch": 0.5940667035194398, "grad_norm": 1.945725975369612, "learning_rate": 4.9284074383400655e-05, "loss": 1.1657, "step": 403 }, { "epoch": 0.595540814446287, "grad_norm": 1.8959772313879533, "learning_rate": 4.927936387583348e-05, "loss": 1.143, "step": 404 }, { "epoch": 0.5970149253731343, "grad_norm": 2.2246420625959455, "learning_rate": 4.9274638149000585e-05, "loss": 1.243, "step": 405 }, { "epoch": 0.5984890362999816, "grad_norm": 1.8525758698405916, "learning_rate": 4.9269897205864235e-05, "loss": 1.2374, "step": 406 }, { "epoch": 0.5999631472268289, "grad_norm": 1.8355280642815066, "learning_rate": 4.926514104939625e-05, "loss": 1.0791, "step": 407 }, { "epoch": 0.6014372581536761, "grad_norm": 1.8732144436945921, "learning_rate": 4.9260369682577965e-05, "loss": 1.3617, "step": 408 }, { "epoch": 0.6029113690805233, "grad_norm": 1.9717214049960703, "learning_rate": 4.9255583108400285e-05, "loss": 1.3498, "step": 409 }, { "epoch": 0.6043854800073706, "grad_norm": 1.81252288803908, "learning_rate": 4.9250781329863606e-05, "loss": 1.1872, "step": 410 }, { "epoch": 0.6058595909342178, "grad_norm": 2.0171176794897607, "learning_rate": 4.924596434997787e-05, "loss": 1.1937, "step": 411 }, { "epoch": 0.6073337018610651, "grad_norm": 1.9974272989499402, "learning_rate": 4.924113217176256e-05, "loss": 1.296, "step": 412 }, { "epoch": 0.6088078127879123, "grad_norm": 2.013570285310219, "learning_rate": 4.9236284798246666e-05, "loss": 1.2883, "step": 413 }, { "epoch": 0.6102819237147595, "grad_norm": 2.2676354268774634, "learning_rate": 4.923142223246873e-05, "loss": 1.2576, "step": 414 }, { "epoch": 0.6117560346416068, "grad_norm": 2.2390542285071615, "learning_rate": 4.922654447747679e-05, "loss": 1.2484, "step": 415 }, { "epoch": 0.613230145568454, "grad_norm": 2.2585292853145145, "learning_rate": 4.922165153632842e-05, "loss": 1.3442, "step": 416 }, { "epoch": 0.6147042564953012, "grad_norm": 2.037357362774416, "learning_rate": 4.9216743412090694e-05, "loss": 1.245, "step": 417 }, { "epoch": 0.6161783674221485, "grad_norm": 2.12896627638999, "learning_rate": 4.9211820107840234e-05, "loss": 1.1341, "step": 418 }, { "epoch": 0.6176524783489957, "grad_norm": 2.1209822426951446, "learning_rate": 4.920688162666316e-05, "loss": 1.2986, "step": 419 }, { "epoch": 0.619126589275843, "grad_norm": 2.05137000942245, "learning_rate": 4.920192797165511e-05, "loss": 1.1913, "step": 420 }, { "epoch": 0.6206007002026902, "grad_norm": 1.9914100476330134, "learning_rate": 4.919695914592122e-05, "loss": 1.3651, "step": 421 }, { "epoch": 0.6220748111295376, "grad_norm": 2.3821683419458197, "learning_rate": 4.919197515257616e-05, "loss": 1.1148, "step": 422 }, { "epoch": 0.6235489220563848, "grad_norm": 2.1971093694440493, "learning_rate": 4.9186975994744075e-05, "loss": 1.3372, "step": 423 }, { "epoch": 0.625023032983232, "grad_norm": 2.0914250460988812, "learning_rate": 4.918196167555866e-05, "loss": 1.2952, "step": 424 }, { "epoch": 0.6264971439100793, "grad_norm": 1.983962969918925, "learning_rate": 4.9176932198163074e-05, "loss": 1.1064, "step": 425 }, { "epoch": 0.6279712548369265, "grad_norm": 1.9711856005689212, "learning_rate": 4.917188756570999e-05, "loss": 1.265, "step": 426 }, { "epoch": 0.6294453657637737, "grad_norm": 1.73837005731274, "learning_rate": 4.9166827781361594e-05, "loss": 1.1183, "step": 427 }, { "epoch": 0.630919476690621, "grad_norm": 1.9541952024001754, "learning_rate": 4.916175284828955e-05, "loss": 1.2701, "step": 428 }, { "epoch": 0.6323935876174682, "grad_norm": 2.370116483344817, "learning_rate": 4.915666276967501e-05, "loss": 1.3287, "step": 429 }, { "epoch": 0.6338676985443155, "grad_norm": 1.7666012925822072, "learning_rate": 4.9151557548708676e-05, "loss": 1.1203, "step": 430 }, { "epoch": 0.6353418094711627, "grad_norm": 1.9875185839665874, "learning_rate": 4.9146437188590675e-05, "loss": 1.2712, "step": 431 }, { "epoch": 0.6368159203980099, "grad_norm": 2.145819383100576, "learning_rate": 4.914130169253066e-05, "loss": 1.1801, "step": 432 }, { "epoch": 0.6382900313248572, "grad_norm": 2.155605828080233, "learning_rate": 4.913615106374777e-05, "loss": 1.3039, "step": 433 }, { "epoch": 0.6397641422517044, "grad_norm": 2.159786254762208, "learning_rate": 4.91309853054706e-05, "loss": 1.3584, "step": 434 }, { "epoch": 0.6412382531785517, "grad_norm": 2.102967654006211, "learning_rate": 4.912580442093727e-05, "loss": 1.3143, "step": 435 }, { "epoch": 0.6427123641053989, "grad_norm": 2.1292429806444777, "learning_rate": 4.9120608413395366e-05, "loss": 1.2479, "step": 436 }, { "epoch": 0.6441864750322461, "grad_norm": 2.140441329761487, "learning_rate": 4.911539728610194e-05, "loss": 1.2183, "step": 437 }, { "epoch": 0.6456605859590934, "grad_norm": 1.9254192143533253, "learning_rate": 4.9110171042323536e-05, "loss": 1.3119, "step": 438 }, { "epoch": 0.6471346968859407, "grad_norm": 2.0710788675908143, "learning_rate": 4.910492968533618e-05, "loss": 1.2548, "step": 439 }, { "epoch": 0.648608807812788, "grad_norm": 2.25912817402177, "learning_rate": 4.909967321842535e-05, "loss": 1.2045, "step": 440 }, { "epoch": 0.6500829187396352, "grad_norm": 1.9285014596488004, "learning_rate": 4.9094401644886e-05, "loss": 1.0268, "step": 441 }, { "epoch": 0.6515570296664824, "grad_norm": 2.009919363653346, "learning_rate": 4.908911496802257e-05, "loss": 1.3264, "step": 442 }, { "epoch": 0.6530311405933297, "grad_norm": 2.135518463933816, "learning_rate": 4.908381319114898e-05, "loss": 1.2015, "step": 443 }, { "epoch": 0.6545052515201769, "grad_norm": 2.407682908696776, "learning_rate": 4.9078496317588556e-05, "loss": 1.2866, "step": 444 }, { "epoch": 0.6559793624470242, "grad_norm": 2.201244011198754, "learning_rate": 4.907316435067415e-05, "loss": 1.3383, "step": 445 }, { "epoch": 0.6574534733738714, "grad_norm": 1.8702115266675, "learning_rate": 4.906781729374804e-05, "loss": 1.2211, "step": 446 }, { "epoch": 0.6589275843007186, "grad_norm": 2.089769251556199, "learning_rate": 4.906245515016197e-05, "loss": 1.3378, "step": 447 }, { "epoch": 0.6604016952275659, "grad_norm": 1.9409628655381197, "learning_rate": 4.905707792327715e-05, "loss": 1.3955, "step": 448 }, { "epoch": 0.6618758061544131, "grad_norm": 2.0360023658004236, "learning_rate": 4.9051685616464246e-05, "loss": 1.1945, "step": 449 }, { "epoch": 0.6633499170812603, "grad_norm": 1.9885038539781024, "learning_rate": 4.904627823310335e-05, "loss": 1.0618, "step": 450 }, { "epoch": 0.6648240280081076, "grad_norm": 1.6678005732280874, "learning_rate": 4.9040855776584035e-05, "loss": 1.1625, "step": 451 }, { "epoch": 0.6662981389349548, "grad_norm": 1.9374636985085243, "learning_rate": 4.9035418250305314e-05, "loss": 1.2179, "step": 452 }, { "epoch": 0.6677722498618021, "grad_norm": 2.005213379956785, "learning_rate": 4.9029965657675636e-05, "loss": 1.2416, "step": 453 }, { "epoch": 0.6692463607886493, "grad_norm": 1.9783222672630472, "learning_rate": 4.9024498002112906e-05, "loss": 1.1952, "step": 454 }, { "epoch": 0.6707204717154965, "grad_norm": 1.8468994139011359, "learning_rate": 4.901901528704447e-05, "loss": 1.1894, "step": 455 }, { "epoch": 0.6721945826423439, "grad_norm": 2.234376600571706, "learning_rate": 4.90135175159071e-05, "loss": 1.2095, "step": 456 }, { "epoch": 0.6736686935691911, "grad_norm": 2.180512740355362, "learning_rate": 4.900800469214703e-05, "loss": 1.2991, "step": 457 }, { "epoch": 0.6751428044960384, "grad_norm": 2.089047517344716, "learning_rate": 4.900247681921991e-05, "loss": 1.3254, "step": 458 }, { "epoch": 0.6766169154228856, "grad_norm": 2.0382585950464582, "learning_rate": 4.899693390059082e-05, "loss": 1.3636, "step": 459 }, { "epoch": 0.6780910263497328, "grad_norm": 1.875009155251086, "learning_rate": 4.89913759397343e-05, "loss": 1.3337, "step": 460 }, { "epoch": 0.6795651372765801, "grad_norm": 2.1144247011211883, "learning_rate": 4.898580294013428e-05, "loss": 1.2858, "step": 461 }, { "epoch": 0.6810392482034273, "grad_norm": 1.90616282279367, "learning_rate": 4.898021490528415e-05, "loss": 1.2073, "step": 462 }, { "epoch": 0.6825133591302746, "grad_norm": 2.1775424479257137, "learning_rate": 4.89746118386867e-05, "loss": 1.1231, "step": 463 }, { "epoch": 0.6839874700571218, "grad_norm": 1.821908995636708, "learning_rate": 4.8968993743854176e-05, "loss": 1.2769, "step": 464 }, { "epoch": 0.685461580983969, "grad_norm": 2.0738383660968056, "learning_rate": 4.89633606243082e-05, "loss": 1.1314, "step": 465 }, { "epoch": 0.6869356919108163, "grad_norm": 1.7848406957282792, "learning_rate": 4.895771248357983e-05, "loss": 1.2714, "step": 466 }, { "epoch": 0.6884098028376635, "grad_norm": 1.8811139561328127, "learning_rate": 4.895204932520957e-05, "loss": 1.2305, "step": 467 }, { "epoch": 0.6898839137645107, "grad_norm": 1.902865171939238, "learning_rate": 4.8946371152747285e-05, "loss": 1.1978, "step": 468 }, { "epoch": 0.691358024691358, "grad_norm": 2.19455073162271, "learning_rate": 4.8940677969752295e-05, "loss": 1.3188, "step": 469 }, { "epoch": 0.6928321356182052, "grad_norm": 2.2049202604862015, "learning_rate": 4.893496977979331e-05, "loss": 1.3052, "step": 470 }, { "epoch": 0.6943062465450525, "grad_norm": 2.0898325696984816, "learning_rate": 4.892924658644844e-05, "loss": 1.213, "step": 471 }, { "epoch": 0.6957803574718998, "grad_norm": 2.1422562892116788, "learning_rate": 4.892350839330522e-05, "loss": 1.1761, "step": 472 }, { "epoch": 0.697254468398747, "grad_norm": 2.191169821212926, "learning_rate": 4.891775520396057e-05, "loss": 1.1573, "step": 473 }, { "epoch": 0.6987285793255943, "grad_norm": 1.9583943201944165, "learning_rate": 4.8911987022020823e-05, "loss": 1.3179, "step": 474 }, { "epoch": 0.7002026902524415, "grad_norm": 2.215594559690398, "learning_rate": 4.89062038511017e-05, "loss": 1.2166, "step": 475 }, { "epoch": 0.7016768011792888, "grad_norm": 1.9954085695085577, "learning_rate": 4.8900405694828313e-05, "loss": 1.1596, "step": 476 }, { "epoch": 0.703150912106136, "grad_norm": 1.9018829226195624, "learning_rate": 4.8894592556835186e-05, "loss": 1.1444, "step": 477 }, { "epoch": 0.7046250230329832, "grad_norm": 1.8500109904194764, "learning_rate": 4.8888764440766225e-05, "loss": 1.2272, "step": 478 }, { "epoch": 0.7060991339598305, "grad_norm": 1.8737272393267435, "learning_rate": 4.888292135027472e-05, "loss": 1.183, "step": 479 }, { "epoch": 0.7075732448866777, "grad_norm": 1.984382148789873, "learning_rate": 4.887706328902335e-05, "loss": 1.3547, "step": 480 }, { "epoch": 0.709047355813525, "grad_norm": 2.0626195237177365, "learning_rate": 4.8871190260684174e-05, "loss": 1.2089, "step": 481 }, { "epoch": 0.7105214667403722, "grad_norm": 1.888542604805625, "learning_rate": 4.886530226893865e-05, "loss": 1.2145, "step": 482 }, { "epoch": 0.7119955776672194, "grad_norm": 2.0370059110530434, "learning_rate": 4.88593993174776e-05, "loss": 1.3982, "step": 483 }, { "epoch": 0.7134696885940667, "grad_norm": 2.0973353646630897, "learning_rate": 4.885348141000122e-05, "loss": 1.2679, "step": 484 }, { "epoch": 0.7149437995209139, "grad_norm": 2.0808773314265365, "learning_rate": 4.8847548550219105e-05, "loss": 1.3143, "step": 485 }, { "epoch": 0.7164179104477612, "grad_norm": 1.694005717437896, "learning_rate": 4.884160074185019e-05, "loss": 1.148, "step": 486 }, { "epoch": 0.7178920213746084, "grad_norm": 2.0785468110811727, "learning_rate": 4.8835637988622804e-05, "loss": 1.1844, "step": 487 }, { "epoch": 0.7193661323014556, "grad_norm": 1.9692434267808736, "learning_rate": 4.8829660294274636e-05, "loss": 1.2574, "step": 488 }, { "epoch": 0.720840243228303, "grad_norm": 1.9106101528460606, "learning_rate": 4.8823667662552744e-05, "loss": 1.2299, "step": 489 }, { "epoch": 0.7223143541551502, "grad_norm": 2.115233585995063, "learning_rate": 4.881766009721354e-05, "loss": 1.2379, "step": 490 }, { "epoch": 0.7237884650819975, "grad_norm": 2.1014320304673944, "learning_rate": 4.8811637602022806e-05, "loss": 1.338, "step": 491 }, { "epoch": 0.7252625760088447, "grad_norm": 1.946196634765168, "learning_rate": 4.8805600180755685e-05, "loss": 1.3733, "step": 492 }, { "epoch": 0.7267366869356919, "grad_norm": 1.8318412373144093, "learning_rate": 4.8799547837196667e-05, "loss": 1.1944, "step": 493 }, { "epoch": 0.7282107978625392, "grad_norm": 1.999811998114201, "learning_rate": 4.87934805751396e-05, "loss": 1.168, "step": 494 }, { "epoch": 0.7296849087893864, "grad_norm": 2.0644993194926333, "learning_rate": 4.8787398398387684e-05, "loss": 1.3807, "step": 495 }, { "epoch": 0.7311590197162336, "grad_norm": 2.0542475588374205, "learning_rate": 4.878130131075347e-05, "loss": 1.3363, "step": 496 }, { "epoch": 0.7326331306430809, "grad_norm": 1.821939159068415, "learning_rate": 4.877518931605885e-05, "loss": 1.0891, "step": 497 }, { "epoch": 0.7341072415699281, "grad_norm": 2.227487044205755, "learning_rate": 4.8769062418135066e-05, "loss": 1.2584, "step": 498 }, { "epoch": 0.7355813524967754, "grad_norm": 1.9641403243518822, "learning_rate": 4.8762920620822704e-05, "loss": 1.3964, "step": 499 }, { "epoch": 0.7370554634236226, "grad_norm": 1.9716949840177131, "learning_rate": 4.875676392797168e-05, "loss": 1.3206, "step": 500 }, { "epoch": 0.7370554634236226, "eval_bleu": 0.055774073834922035, "eval_bleu_1gram": 0.37808094579880375, "eval_bleu_2gram": 0.1418425954043098, "eval_bleu_3gram": 0.057676944149388065, "eval_bleu_4gram": 0.02660982363073516, "eval_rag_val_loss": 1.285063938748452, "eval_rouge1": 0.36083841954396995, "eval_rouge2": 0.13233125594780745, "eval_rougeL": 0.34319937693866764, "step": 500 }, { "epoch": 0.7385295743504698, "grad_norm": 2.1677674896023906, "learning_rate": 4.875059234344126e-05, "loss": 1.3091, "step": 501 }, { "epoch": 0.7400036852773171, "grad_norm": 1.9605691875458138, "learning_rate": 4.874440587110003e-05, "loss": 1.2964, "step": 502 }, { "epoch": 0.7414777962041643, "grad_norm": 2.2039033853917265, "learning_rate": 4.873820451482592e-05, "loss": 1.3183, "step": 503 }, { "epoch": 0.7429519071310116, "grad_norm": 2.09489705563449, "learning_rate": 4.873198827850618e-05, "loss": 1.3084, "step": 504 }, { "epoch": 0.7444260180578589, "grad_norm": 2.059308212192914, "learning_rate": 4.872575716603739e-05, "loss": 1.2798, "step": 505 }, { "epoch": 0.7459001289847061, "grad_norm": 1.9626106328213022, "learning_rate": 4.871951118132547e-05, "loss": 1.2511, "step": 506 }, { "epoch": 0.7473742399115534, "grad_norm": 2.0680077733939664, "learning_rate": 4.8713250328285654e-05, "loss": 1.4027, "step": 507 }, { "epoch": 0.7488483508384006, "grad_norm": 1.910817599336462, "learning_rate": 4.8706974610842474e-05, "loss": 1.186, "step": 508 }, { "epoch": 0.7503224617652479, "grad_norm": 2.0216965421192046, "learning_rate": 4.87006840329298e-05, "loss": 1.1711, "step": 509 }, { "epoch": 0.7517965726920951, "grad_norm": 1.8937950355538036, "learning_rate": 4.8694378598490826e-05, "loss": 1.1817, "step": 510 }, { "epoch": 0.7532706836189423, "grad_norm": 2.334370779827751, "learning_rate": 4.868805831147805e-05, "loss": 1.3584, "step": 511 }, { "epoch": 0.7547447945457896, "grad_norm": 1.981356329727122, "learning_rate": 4.868172317585326e-05, "loss": 1.3112, "step": 512 }, { "epoch": 0.7562189054726368, "grad_norm": 1.9281924243681243, "learning_rate": 4.867537319558758e-05, "loss": 1.2023, "step": 513 }, { "epoch": 0.757693016399484, "grad_norm": 2.124951754751616, "learning_rate": 4.866900837466144e-05, "loss": 1.2516, "step": 514 }, { "epoch": 0.7591671273263313, "grad_norm": 1.8938717038167021, "learning_rate": 4.8662628717064544e-05, "loss": 1.2533, "step": 515 }, { "epoch": 0.7606412382531785, "grad_norm": 1.9922080244615223, "learning_rate": 4.865623422679593e-05, "loss": 1.3051, "step": 516 }, { "epoch": 0.7621153491800258, "grad_norm": 2.0660913584433596, "learning_rate": 4.8649824907863894e-05, "loss": 1.2343, "step": 517 }, { "epoch": 0.763589460106873, "grad_norm": 2.013637420329931, "learning_rate": 4.864340076428607e-05, "loss": 1.0919, "step": 518 }, { "epoch": 0.7650635710337202, "grad_norm": 2.0847230726434187, "learning_rate": 4.863696180008937e-05, "loss": 1.3213, "step": 519 }, { "epoch": 0.7665376819605675, "grad_norm": 2.075592218477311, "learning_rate": 4.8630508019309976e-05, "loss": 1.3024, "step": 520 }, { "epoch": 0.7680117928874147, "grad_norm": 2.275404560881412, "learning_rate": 4.8624039425993375e-05, "loss": 1.2831, "step": 521 }, { "epoch": 0.7694859038142621, "grad_norm": 2.04531975986017, "learning_rate": 4.861755602419434e-05, "loss": 1.3419, "step": 522 }, { "epoch": 0.7709600147411093, "grad_norm": 1.8090717516155312, "learning_rate": 4.861105781797692e-05, "loss": 1.2487, "step": 523 }, { "epoch": 0.7724341256679566, "grad_norm": 2.025295862318772, "learning_rate": 4.8604544811414465e-05, "loss": 1.2153, "step": 524 }, { "epoch": 0.7739082365948038, "grad_norm": 2.0171148427440513, "learning_rate": 4.859801700858957e-05, "loss": 1.2247, "step": 525 }, { "epoch": 0.775382347521651, "grad_norm": 2.219450181973006, "learning_rate": 4.859147441359412e-05, "loss": 1.2851, "step": 526 }, { "epoch": 0.7768564584484983, "grad_norm": 2.180578890417121, "learning_rate": 4.858491703052927e-05, "loss": 1.1897, "step": 527 }, { "epoch": 0.7783305693753455, "grad_norm": 1.8132534434213397, "learning_rate": 4.8578344863505464e-05, "loss": 1.1746, "step": 528 }, { "epoch": 0.7798046803021927, "grad_norm": 1.9187744685796695, "learning_rate": 4.857175791664238e-05, "loss": 1.1502, "step": 529 }, { "epoch": 0.78127879122904, "grad_norm": 2.0903931082856086, "learning_rate": 4.856515619406898e-05, "loss": 1.3205, "step": 530 }, { "epoch": 0.7827529021558872, "grad_norm": 1.959422525562634, "learning_rate": 4.855853969992349e-05, "loss": 1.2338, "step": 531 }, { "epoch": 0.7842270130827345, "grad_norm": 2.0417601765089075, "learning_rate": 4.8551908438353374e-05, "loss": 1.1853, "step": 532 }, { "epoch": 0.7857011240095817, "grad_norm": 2.0775756109734065, "learning_rate": 4.854526241351539e-05, "loss": 1.3464, "step": 533 }, { "epoch": 0.7871752349364289, "grad_norm": 1.9584754594673106, "learning_rate": 4.853860162957552e-05, "loss": 1.1816, "step": 534 }, { "epoch": 0.7886493458632762, "grad_norm": 1.9574614944665798, "learning_rate": 4.8531926090709016e-05, "loss": 1.1436, "step": 535 }, { "epoch": 0.7901234567901234, "grad_norm": 1.9334624274770835, "learning_rate": 4.8525235801100346e-05, "loss": 1.2473, "step": 536 }, { "epoch": 0.7915975677169707, "grad_norm": 1.8858199417148978, "learning_rate": 4.851853076494327e-05, "loss": 1.0027, "step": 537 }, { "epoch": 0.793071678643818, "grad_norm": 1.9347865885463942, "learning_rate": 4.8511810986440766e-05, "loss": 1.2732, "step": 538 }, { "epoch": 0.7945457895706652, "grad_norm": 1.9681903104100333, "learning_rate": 4.8505076469805054e-05, "loss": 1.3098, "step": 539 }, { "epoch": 0.7960199004975125, "grad_norm": 1.9624206283943824, "learning_rate": 4.849832721925759e-05, "loss": 1.376, "step": 540 }, { "epoch": 0.7974940114243597, "grad_norm": 1.8819495791857475, "learning_rate": 4.849156323902908e-05, "loss": 1.1738, "step": 541 }, { "epoch": 0.798968122351207, "grad_norm": 2.0948502795269244, "learning_rate": 4.848478453335946e-05, "loss": 1.157, "step": 542 }, { "epoch": 0.8004422332780542, "grad_norm": 2.041524635616624, "learning_rate": 4.8477991106497874e-05, "loss": 1.301, "step": 543 }, { "epoch": 0.8019163442049014, "grad_norm": 1.8144047692695384, "learning_rate": 4.847118296270272e-05, "loss": 1.156, "step": 544 }, { "epoch": 0.8033904551317487, "grad_norm": 2.1132726757277536, "learning_rate": 4.8464360106241615e-05, "loss": 1.2778, "step": 545 }, { "epoch": 0.8048645660585959, "grad_norm": 3.0966376497560573, "learning_rate": 4.845752254139139e-05, "loss": 1.1861, "step": 546 }, { "epoch": 0.8063386769854431, "grad_norm": 2.0261325636007146, "learning_rate": 4.845067027243809e-05, "loss": 1.2881, "step": 547 }, { "epoch": 0.8078127879122904, "grad_norm": 1.9175504706155164, "learning_rate": 4.844380330367701e-05, "loss": 1.2062, "step": 548 }, { "epoch": 0.8092868988391376, "grad_norm": 1.9827917796826646, "learning_rate": 4.843692163941264e-05, "loss": 1.3992, "step": 549 }, { "epoch": 0.8107610097659849, "grad_norm": 1.834017784781216, "learning_rate": 4.8430025283958645e-05, "loss": 1.2839, "step": 550 }, { "epoch": 0.8122351206928321, "grad_norm": 2.0331237367050887, "learning_rate": 4.842311424163797e-05, "loss": 1.1902, "step": 551 }, { "epoch": 0.8137092316196793, "grad_norm": 2.0631466632065076, "learning_rate": 4.8416188516782715e-05, "loss": 1.2254, "step": 552 }, { "epoch": 0.8151833425465266, "grad_norm": 2.1253826694378195, "learning_rate": 4.84092481137342e-05, "loss": 1.1905, "step": 553 }, { "epoch": 0.8166574534733738, "grad_norm": 1.9397678638475906, "learning_rate": 4.840229303684294e-05, "loss": 1.0805, "step": 554 }, { "epoch": 0.8181315644002212, "grad_norm": 1.9404558584121527, "learning_rate": 4.8395323290468655e-05, "loss": 1.2385, "step": 555 }, { "epoch": 0.8196056753270684, "grad_norm": 2.0377459605783828, "learning_rate": 4.838833887898026e-05, "loss": 1.2827, "step": 556 }, { "epoch": 0.8210797862539156, "grad_norm": 1.9552230552589236, "learning_rate": 4.838133980675586e-05, "loss": 1.1314, "step": 557 }, { "epoch": 0.8225538971807629, "grad_norm": 2.4270729643200473, "learning_rate": 4.837432607818275e-05, "loss": 1.1526, "step": 558 }, { "epoch": 0.8240280081076101, "grad_norm": 2.1303970017082956, "learning_rate": 4.836729769765741e-05, "loss": 1.2425, "step": 559 }, { "epoch": 0.8255021190344574, "grad_norm": 2.172936598581813, "learning_rate": 4.83602546695855e-05, "loss": 1.2685, "step": 560 }, { "epoch": 0.8269762299613046, "grad_norm": 2.2078528571304323, "learning_rate": 4.835319699838189e-05, "loss": 1.4396, "step": 561 }, { "epoch": 0.8284503408881518, "grad_norm": 2.2831535236071225, "learning_rate": 4.834612468847058e-05, "loss": 1.3671, "step": 562 }, { "epoch": 0.8299244518149991, "grad_norm": 2.4225634180985796, "learning_rate": 4.833903774428481e-05, "loss": 1.1211, "step": 563 }, { "epoch": 0.8313985627418463, "grad_norm": 2.0585197420950254, "learning_rate": 4.833193617026692e-05, "loss": 1.2473, "step": 564 }, { "epoch": 0.8328726736686936, "grad_norm": 2.123752003130493, "learning_rate": 4.8324819970868473e-05, "loss": 1.1063, "step": 565 }, { "epoch": 0.8343467845955408, "grad_norm": 2.06125973488086, "learning_rate": 4.831768915055019e-05, "loss": 1.2642, "step": 566 }, { "epoch": 0.835820895522388, "grad_norm": 2.289950253974949, "learning_rate": 4.831054371378194e-05, "loss": 1.2359, "step": 567 }, { "epoch": 0.8372950064492353, "grad_norm": 2.3426004260835054, "learning_rate": 4.830338366504277e-05, "loss": 1.2169, "step": 568 }, { "epoch": 0.8387691173760825, "grad_norm": 1.9481625801209133, "learning_rate": 4.829620900882089e-05, "loss": 1.0236, "step": 569 }, { "epoch": 0.8402432283029297, "grad_norm": 2.237443963739705, "learning_rate": 4.8289019749613645e-05, "loss": 1.2508, "step": 570 }, { "epoch": 0.8417173392297771, "grad_norm": 2.078363390564736, "learning_rate": 4.8281815891927554e-05, "loss": 1.2404, "step": 571 }, { "epoch": 0.8431914501566243, "grad_norm": 1.909683512285726, "learning_rate": 4.827459744027828e-05, "loss": 1.1803, "step": 572 }, { "epoch": 0.8446655610834716, "grad_norm": 2.0717913258314717, "learning_rate": 4.826736439919063e-05, "loss": 1.1904, "step": 573 }, { "epoch": 0.8461396720103188, "grad_norm": 2.076241693910707, "learning_rate": 4.826011677319857e-05, "loss": 1.1462, "step": 574 }, { "epoch": 0.847613782937166, "grad_norm": 1.976534696240186, "learning_rate": 4.825285456684518e-05, "loss": 1.3073, "step": 575 }, { "epoch": 0.8490878938640133, "grad_norm": 1.7233490880594058, "learning_rate": 4.824557778468272e-05, "loss": 1.1766, "step": 576 }, { "epoch": 0.8505620047908605, "grad_norm": 1.9581380367971337, "learning_rate": 4.823828643127255e-05, "loss": 1.246, "step": 577 }, { "epoch": 0.8520361157177078, "grad_norm": 2.0687864041078607, "learning_rate": 4.823098051118519e-05, "loss": 1.3372, "step": 578 }, { "epoch": 0.853510226644555, "grad_norm": 2.189905859957472, "learning_rate": 4.822366002900027e-05, "loss": 1.3677, "step": 579 }, { "epoch": 0.8549843375714022, "grad_norm": 2.4451510342558116, "learning_rate": 4.821632498930656e-05, "loss": 1.2972, "step": 580 }, { "epoch": 0.8564584484982495, "grad_norm": 2.109782426834654, "learning_rate": 4.820897539670195e-05, "loss": 1.178, "step": 581 }, { "epoch": 0.8579325594250967, "grad_norm": 2.076736330401802, "learning_rate": 4.820161125579347e-05, "loss": 1.3122, "step": 582 }, { "epoch": 0.859406670351944, "grad_norm": 1.9224180989374273, "learning_rate": 4.819423257119723e-05, "loss": 1.3098, "step": 583 }, { "epoch": 0.8608807812787912, "grad_norm": 2.1120258769848705, "learning_rate": 4.818683934753851e-05, "loss": 1.3675, "step": 584 }, { "epoch": 0.8623548922056384, "grad_norm": 2.163586505638915, "learning_rate": 4.817943158945166e-05, "loss": 1.314, "step": 585 }, { "epoch": 0.8638290031324857, "grad_norm": 2.004994593206331, "learning_rate": 4.817200930158015e-05, "loss": 1.1991, "step": 586 }, { "epoch": 0.8653031140593329, "grad_norm": 1.9715819804604295, "learning_rate": 4.816457248857657e-05, "loss": 1.1582, "step": 587 }, { "epoch": 0.8667772249861803, "grad_norm": 1.9883807144317942, "learning_rate": 4.815712115510261e-05, "loss": 1.17, "step": 588 }, { "epoch": 0.8682513359130275, "grad_norm": 2.0368205054953776, "learning_rate": 4.8149655305829066e-05, "loss": 1.219, "step": 589 }, { "epoch": 0.8697254468398747, "grad_norm": 1.8652392961527031, "learning_rate": 4.814217494543581e-05, "loss": 0.9524, "step": 590 }, { "epoch": 0.871199557766722, "grad_norm": 1.9035658152018742, "learning_rate": 4.813468007861185e-05, "loss": 1.1589, "step": 591 }, { "epoch": 0.8726736686935692, "grad_norm": 2.600853992475037, "learning_rate": 4.812717071005525e-05, "loss": 1.3503, "step": 592 }, { "epoch": 0.8741477796204165, "grad_norm": 2.272746607091333, "learning_rate": 4.8119646844473185e-05, "loss": 1.3487, "step": 593 }, { "epoch": 0.8756218905472637, "grad_norm": 2.0525614250925783, "learning_rate": 4.811210848658191e-05, "loss": 1.071, "step": 594 }, { "epoch": 0.8770960014741109, "grad_norm": 1.809496594495644, "learning_rate": 4.8104555641106766e-05, "loss": 1.176, "step": 595 }, { "epoch": 0.8785701124009582, "grad_norm": 2.3619692857248937, "learning_rate": 4.8096988312782174e-05, "loss": 1.386, "step": 596 }, { "epoch": 0.8800442233278054, "grad_norm": 2.037182522244084, "learning_rate": 4.808940650635163e-05, "loss": 1.3055, "step": 597 }, { "epoch": 0.8815183342546526, "grad_norm": 2.15064561598413, "learning_rate": 4.8081810226567725e-05, "loss": 1.3171, "step": 598 }, { "epoch": 0.8829924451814999, "grad_norm": 1.991979371212999, "learning_rate": 4.8074199478192097e-05, "loss": 1.259, "step": 599 }, { "epoch": 0.8844665561083471, "grad_norm": 1.8916465584558655, "learning_rate": 4.8066574265995464e-05, "loss": 1.2733, "step": 600 }, { "epoch": 0.8859406670351944, "grad_norm": 2.0892228104247574, "learning_rate": 4.805893459475761e-05, "loss": 1.1211, "step": 601 }, { "epoch": 0.8874147779620416, "grad_norm": 2.2975196193318133, "learning_rate": 4.805128046926739e-05, "loss": 1.4107, "step": 602 }, { "epoch": 0.8888888888888888, "grad_norm": 2.3377194959428786, "learning_rate": 4.804361189432271e-05, "loss": 1.3729, "step": 603 }, { "epoch": 0.8903629998157362, "grad_norm": 2.0799486003540437, "learning_rate": 4.803592887473053e-05, "loss": 1.4055, "step": 604 }, { "epoch": 0.8918371107425834, "grad_norm": 1.883444715542704, "learning_rate": 4.802823141530687e-05, "loss": 1.1879, "step": 605 }, { "epoch": 0.8933112216694307, "grad_norm": 2.1097459255034057, "learning_rate": 4.8020519520876816e-05, "loss": 1.3586, "step": 606 }, { "epoch": 0.8947853325962779, "grad_norm": 1.725516507317666, "learning_rate": 4.801279319627448e-05, "loss": 1.2184, "step": 607 }, { "epoch": 0.8962594435231251, "grad_norm": 1.9249049547189436, "learning_rate": 4.8005052446343016e-05, "loss": 1.2399, "step": 608 }, { "epoch": 0.8977335544499724, "grad_norm": 1.8938657240555792, "learning_rate": 4.799729727593466e-05, "loss": 1.348, "step": 609 }, { "epoch": 0.8992076653768196, "grad_norm": 1.9734427819743867, "learning_rate": 4.798952768991063e-05, "loss": 1.2651, "step": 610 }, { "epoch": 0.9006817763036669, "grad_norm": 1.9313136982212107, "learning_rate": 4.798174369314123e-05, "loss": 1.1679, "step": 611 }, { "epoch": 0.9021558872305141, "grad_norm": 1.9853136140618186, "learning_rate": 4.7973945290505766e-05, "loss": 1.067, "step": 612 }, { "epoch": 0.9036299981573613, "grad_norm": 2.126086237912291, "learning_rate": 4.796613248689259e-05, "loss": 1.1798, "step": 613 }, { "epoch": 0.9051041090842086, "grad_norm": 2.070994944825535, "learning_rate": 4.795830528719908e-05, "loss": 1.1508, "step": 614 }, { "epoch": 0.9065782200110558, "grad_norm": 2.346249379405375, "learning_rate": 4.795046369633163e-05, "loss": 1.2087, "step": 615 }, { "epoch": 0.908052330937903, "grad_norm": 2.1317907547891726, "learning_rate": 4.7942607719205663e-05, "loss": 1.2089, "step": 616 }, { "epoch": 0.9095264418647503, "grad_norm": 1.9293677802610365, "learning_rate": 4.793473736074561e-05, "loss": 1.1851, "step": 617 }, { "epoch": 0.9110005527915975, "grad_norm": 2.069014232160644, "learning_rate": 4.792685262588492e-05, "loss": 1.3706, "step": 618 }, { "epoch": 0.9124746637184448, "grad_norm": 2.2153408024298753, "learning_rate": 4.791895351956607e-05, "loss": 1.3359, "step": 619 }, { "epoch": 0.913948774645292, "grad_norm": 1.9095202676357212, "learning_rate": 4.791104004674052e-05, "loss": 1.0717, "step": 620 }, { "epoch": 0.9154228855721394, "grad_norm": 13.471162301194251, "learning_rate": 4.7903112212368756e-05, "loss": 1.1935, "step": 621 }, { "epoch": 0.9168969964989866, "grad_norm": 1.9117068996147892, "learning_rate": 4.789517002142026e-05, "loss": 1.0653, "step": 622 }, { "epoch": 0.9183711074258338, "grad_norm": 2.190341411959799, "learning_rate": 4.788721347887349e-05, "loss": 1.1469, "step": 623 }, { "epoch": 0.9198452183526811, "grad_norm": 1.9103813428812797, "learning_rate": 4.7879242589715955e-05, "loss": 1.2228, "step": 624 }, { "epoch": 0.9213193292795283, "grad_norm": 2.120853192649248, "learning_rate": 4.78712573589441e-05, "loss": 1.5102, "step": 625 }, { "epoch": 0.9227934402063755, "grad_norm": 2.043984038691899, "learning_rate": 4.7863257791563384e-05, "loss": 1.3634, "step": 626 }, { "epoch": 0.9242675511332228, "grad_norm": 2.141355181157796, "learning_rate": 4.785524389258827e-05, "loss": 1.2094, "step": 627 }, { "epoch": 0.92574166206007, "grad_norm": 2.11080635391907, "learning_rate": 4.7847215667042165e-05, "loss": 1.3711, "step": 628 }, { "epoch": 0.9272157729869173, "grad_norm": 1.9479170066574807, "learning_rate": 4.78391731199575e-05, "loss": 1.2955, "step": 629 }, { "epoch": 0.9286898839137645, "grad_norm": 1.8953083135774045, "learning_rate": 4.7831116256375644e-05, "loss": 1.2126, "step": 630 }, { "epoch": 0.9301639948406117, "grad_norm": 1.9209758236787056, "learning_rate": 4.782304508134696e-05, "loss": 1.3091, "step": 631 }, { "epoch": 0.931638105767459, "grad_norm": 2.149785288358179, "learning_rate": 4.7814959599930794e-05, "loss": 1.1405, "step": 632 }, { "epoch": 0.9331122166943062, "grad_norm": 1.9780288504183043, "learning_rate": 4.7806859817195425e-05, "loss": 1.3233, "step": 633 }, { "epoch": 0.9345863276211535, "grad_norm": 1.877057409019274, "learning_rate": 4.779874573821814e-05, "loss": 1.2103, "step": 634 }, { "epoch": 0.9360604385480007, "grad_norm": 1.8423641621634335, "learning_rate": 4.779061736808514e-05, "loss": 1.2466, "step": 635 }, { "epoch": 0.9375345494748479, "grad_norm": 1.8419044438944796, "learning_rate": 4.778247471189163e-05, "loss": 1.1547, "step": 636 }, { "epoch": 0.9390086604016952, "grad_norm": 1.7555866984463395, "learning_rate": 4.777431777474174e-05, "loss": 1.1401, "step": 637 }, { "epoch": 0.9404827713285425, "grad_norm": 1.8523676284673225, "learning_rate": 4.776614656174856e-05, "loss": 1.2028, "step": 638 }, { "epoch": 0.9419568822553898, "grad_norm": 1.9608194384046775, "learning_rate": 4.775796107803413e-05, "loss": 1.2168, "step": 639 }, { "epoch": 0.943430993182237, "grad_norm": 2.073442044285478, "learning_rate": 4.7749761328729436e-05, "loss": 1.3276, "step": 640 }, { "epoch": 0.9449051041090842, "grad_norm": 1.9092279530543865, "learning_rate": 4.77415473189744e-05, "loss": 1.3189, "step": 641 }, { "epoch": 0.9463792150359315, "grad_norm": 1.846063100436894, "learning_rate": 4.77333190539179e-05, "loss": 1.1727, "step": 642 }, { "epoch": 0.9478533259627787, "grad_norm": 1.8016767612824964, "learning_rate": 4.772507653871773e-05, "loss": 1.0516, "step": 643 }, { "epoch": 0.949327436889626, "grad_norm": 2.0776773988930164, "learning_rate": 4.7716819778540625e-05, "loss": 1.3486, "step": 644 }, { "epoch": 0.9508015478164732, "grad_norm": 1.9144252511209512, "learning_rate": 4.770854877856225e-05, "loss": 1.2902, "step": 645 }, { "epoch": 0.9522756587433204, "grad_norm": 2.0997841269936215, "learning_rate": 4.7700263543967195e-05, "loss": 1.2422, "step": 646 }, { "epoch": 0.9537497696701677, "grad_norm": 1.8913345266590964, "learning_rate": 4.769196407994898e-05, "loss": 1.1748, "step": 647 }, { "epoch": 0.9552238805970149, "grad_norm": 1.8777613333760153, "learning_rate": 4.768365039171002e-05, "loss": 1.2275, "step": 648 }, { "epoch": 0.9566979915238621, "grad_norm": 2.141268222818146, "learning_rate": 4.7675322484461674e-05, "loss": 1.3079, "step": 649 }, { "epoch": 0.9581721024507094, "grad_norm": 1.7453270237932232, "learning_rate": 4.766698036342421e-05, "loss": 1.1708, "step": 650 }, { "epoch": 0.9596462133775566, "grad_norm": 1.7688942408973285, "learning_rate": 4.765862403382678e-05, "loss": 1.1481, "step": 651 }, { "epoch": 0.9611203243044039, "grad_norm": 1.9145566964052019, "learning_rate": 4.7650253500907494e-05, "loss": 1.2498, "step": 652 }, { "epoch": 0.9625944352312511, "grad_norm": 1.9962693825809683, "learning_rate": 4.76418687699133e-05, "loss": 1.3667, "step": 653 }, { "epoch": 0.9640685461580984, "grad_norm": 2.1657392155197015, "learning_rate": 4.76334698461001e-05, "loss": 1.1556, "step": 654 }, { "epoch": 0.9655426570849457, "grad_norm": 2.1168464864607475, "learning_rate": 4.7625056734732654e-05, "loss": 1.2167, "step": 655 }, { "epoch": 0.9670167680117929, "grad_norm": 2.1468955315242537, "learning_rate": 4.7616629441084655e-05, "loss": 1.2656, "step": 656 }, { "epoch": 0.9684908789386402, "grad_norm": 1.7846384445232284, "learning_rate": 4.760818797043864e-05, "loss": 1.262, "step": 657 }, { "epoch": 0.9699649898654874, "grad_norm": 1.9537546592456716, "learning_rate": 4.759973232808609e-05, "loss": 1.1513, "step": 658 }, { "epoch": 0.9714391007923346, "grad_norm": 1.6830230949480942, "learning_rate": 4.75912625193273e-05, "loss": 1.1101, "step": 659 }, { "epoch": 0.9729132117191819, "grad_norm": 2.125750128796163, "learning_rate": 4.7582778549471494e-05, "loss": 1.1691, "step": 660 }, { "epoch": 0.9743873226460291, "grad_norm": 1.8110385132627622, "learning_rate": 4.7574280423836776e-05, "loss": 1.032, "step": 661 }, { "epoch": 0.9758614335728764, "grad_norm": 1.8613661675103053, "learning_rate": 4.756576814775009e-05, "loss": 1.2318, "step": 662 }, { "epoch": 0.9773355444997236, "grad_norm": 1.8092233703011353, "learning_rate": 4.7557241726547266e-05, "loss": 1.2504, "step": 663 }, { "epoch": 0.9788096554265708, "grad_norm": 1.985076239728845, "learning_rate": 4.7548701165573003e-05, "loss": 1.1856, "step": 664 }, { "epoch": 0.9802837663534181, "grad_norm": 2.0783639641375466, "learning_rate": 4.754014647018088e-05, "loss": 1.281, "step": 665 }, { "epoch": 0.9817578772802653, "grad_norm": 1.9446935933057636, "learning_rate": 4.75315776457333e-05, "loss": 1.0435, "step": 666 }, { "epoch": 0.9832319882071126, "grad_norm": 1.8953543536540938, "learning_rate": 4.752299469760154e-05, "loss": 1.1943, "step": 667 }, { "epoch": 0.9847060991339598, "grad_norm": 1.9803635307817333, "learning_rate": 4.751439763116575e-05, "loss": 1.2471, "step": 668 }, { "epoch": 0.986180210060807, "grad_norm": 1.959071209733459, "learning_rate": 4.750578645181489e-05, "loss": 1.2497, "step": 669 }, { "epoch": 0.9876543209876543, "grad_norm": 1.990320684865664, "learning_rate": 4.74971611649468e-05, "loss": 1.3056, "step": 670 }, { "epoch": 0.9891284319145016, "grad_norm": 2.013947372756586, "learning_rate": 4.748852177596815e-05, "loss": 1.2216, "step": 671 }, { "epoch": 0.9906025428413489, "grad_norm": 1.9298390317409342, "learning_rate": 4.747986829029445e-05, "loss": 1.3089, "step": 672 }, { "epoch": 0.9920766537681961, "grad_norm": 2.048765401543755, "learning_rate": 4.747120071335004e-05, "loss": 1.2695, "step": 673 }, { "epoch": 0.9935507646950433, "grad_norm": 2.040587224216355, "learning_rate": 4.746251905056811e-05, "loss": 1.2202, "step": 674 }, { "epoch": 0.9950248756218906, "grad_norm": 2.08900608851092, "learning_rate": 4.745382330739067e-05, "loss": 1.2773, "step": 675 }, { "epoch": 0.9964989865487378, "grad_norm": 1.8691066950127027, "learning_rate": 4.7445113489268544e-05, "loss": 1.2241, "step": 676 }, { "epoch": 0.997973097475585, "grad_norm": 1.9610680768433717, "learning_rate": 4.74363896016614e-05, "loss": 1.2745, "step": 677 }, { "epoch": 0.9994472084024323, "grad_norm": 1.913787261480544, "learning_rate": 4.742765165003772e-05, "loss": 1.2185, "step": 678 }, { "epoch": 1.0009213193292796, "grad_norm": 1.9030406388249856, "learning_rate": 4.741889963987478e-05, "loss": 1.1003, "step": 679 }, { "epoch": 1.0023954302561269, "grad_norm": 1.7255712917636827, "learning_rate": 4.741013357665871e-05, "loss": 0.8799, "step": 680 }, { "epoch": 1.0038695411829741, "grad_norm": 2.0221458053240133, "learning_rate": 4.7401353465884406e-05, "loss": 1.0527, "step": 681 }, { "epoch": 1.0053436521098214, "grad_norm": 1.71364515121558, "learning_rate": 4.73925593130556e-05, "loss": 0.8397, "step": 682 }, { "epoch": 1.0068177630366686, "grad_norm": 1.8330641317683178, "learning_rate": 4.7383751123684806e-05, "loss": 1.0199, "step": 683 }, { "epoch": 1.0082918739635158, "grad_norm": 1.8175427234775692, "learning_rate": 4.737492890329335e-05, "loss": 0.8541, "step": 684 }, { "epoch": 1.009765984890363, "grad_norm": 1.8448878511081652, "learning_rate": 4.736609265741135e-05, "loss": 0.9082, "step": 685 }, { "epoch": 1.0112400958172103, "grad_norm": 1.8561063614574258, "learning_rate": 4.7357242391577724e-05, "loss": 1.0351, "step": 686 }, { "epoch": 1.0127142067440575, "grad_norm": 2.1164540501938687, "learning_rate": 4.7348378111340145e-05, "loss": 0.8755, "step": 687 }, { "epoch": 1.0141883176709048, "grad_norm": 1.8560148379168484, "learning_rate": 4.733949982225511e-05, "loss": 0.9088, "step": 688 }, { "epoch": 1.015662428597752, "grad_norm": 1.9268731354869293, "learning_rate": 4.7330607529887884e-05, "loss": 0.842, "step": 689 }, { "epoch": 1.0171365395245993, "grad_norm": 1.9495917137353407, "learning_rate": 4.73217012398125e-05, "loss": 0.9127, "step": 690 }, { "epoch": 1.0186106504514465, "grad_norm": 2.0965748420018864, "learning_rate": 4.731278095761178e-05, "loss": 0.9504, "step": 691 }, { "epoch": 1.0200847613782937, "grad_norm": 2.0332367790727597, "learning_rate": 4.73038466888773e-05, "loss": 0.9875, "step": 692 }, { "epoch": 1.021558872305141, "grad_norm": 2.1661996582662124, "learning_rate": 4.729489843920942e-05, "loss": 0.8168, "step": 693 }, { "epoch": 1.0230329832319882, "grad_norm": 1.968743763262846, "learning_rate": 4.728593621421726e-05, "loss": 0.8037, "step": 694 }, { "epoch": 1.0245070941588355, "grad_norm": 2.024344693811203, "learning_rate": 4.727696001951869e-05, "loss": 0.7848, "step": 695 }, { "epoch": 1.0259812050856827, "grad_norm": 1.9778951140695331, "learning_rate": 4.726796986074034e-05, "loss": 0.7949, "step": 696 }, { "epoch": 1.02745531601253, "grad_norm": 2.2535455107628932, "learning_rate": 4.725896574351763e-05, "loss": 0.8676, "step": 697 }, { "epoch": 1.0289294269393772, "grad_norm": 2.073275191645863, "learning_rate": 4.7249947673494645e-05, "loss": 0.8216, "step": 698 }, { "epoch": 1.0304035378662244, "grad_norm": 2.0964909163391288, "learning_rate": 4.72409156563243e-05, "loss": 0.8171, "step": 699 }, { "epoch": 1.0318776487930716, "grad_norm": 2.103901748850725, "learning_rate": 4.7231869697668214e-05, "loss": 0.8849, "step": 700 }, { "epoch": 1.0333517597199189, "grad_norm": 2.0982996914534895, "learning_rate": 4.722280980319675e-05, "loss": 0.9081, "step": 701 }, { "epoch": 1.0348258706467661, "grad_norm": 1.9468427407422475, "learning_rate": 4.7213735978589016e-05, "loss": 0.8634, "step": 702 }, { "epoch": 1.0362999815736134, "grad_norm": 2.1305042114216186, "learning_rate": 4.720464822953284e-05, "loss": 1.0387, "step": 703 }, { "epoch": 1.0377740925004606, "grad_norm": 2.1700491056731215, "learning_rate": 4.719554656172478e-05, "loss": 1.0, "step": 704 }, { "epoch": 1.0392482034273078, "grad_norm": 2.0736518846769973, "learning_rate": 4.7186430980870124e-05, "loss": 0.8973, "step": 705 }, { "epoch": 1.040722314354155, "grad_norm": 2.0509463289214107, "learning_rate": 4.717730149268287e-05, "loss": 0.8504, "step": 706 }, { "epoch": 1.0421964252810023, "grad_norm": 1.9043050130034445, "learning_rate": 4.716815810288575e-05, "loss": 0.9101, "step": 707 }, { "epoch": 1.0436705362078496, "grad_norm": 2.6989031329755617, "learning_rate": 4.7159000817210205e-05, "loss": 1.0448, "step": 708 }, { "epoch": 1.0451446471346968, "grad_norm": 2.1237488597666982, "learning_rate": 4.714982964139639e-05, "loss": 0.8646, "step": 709 }, { "epoch": 1.046618758061544, "grad_norm": 2.1244473580194567, "learning_rate": 4.714064458119314e-05, "loss": 0.8437, "step": 710 }, { "epoch": 1.0480928689883915, "grad_norm": 2.3009020736393864, "learning_rate": 4.713144564235803e-05, "loss": 0.8483, "step": 711 }, { "epoch": 1.0495669799152387, "grad_norm": 1.9265381600357607, "learning_rate": 4.7122232830657315e-05, "loss": 0.8802, "step": 712 }, { "epoch": 1.051041090842086, "grad_norm": 1.9121853712992407, "learning_rate": 4.7113006151865944e-05, "loss": 0.7716, "step": 713 }, { "epoch": 1.0525152017689332, "grad_norm": 2.1986840040095132, "learning_rate": 4.710376561176758e-05, "loss": 0.876, "step": 714 }, { "epoch": 1.0539893126957804, "grad_norm": 1.9374701589932, "learning_rate": 4.7094511216154546e-05, "loss": 0.8389, "step": 715 }, { "epoch": 1.0554634236226277, "grad_norm": 1.8132012424790414, "learning_rate": 4.708524297082786e-05, "loss": 0.8338, "step": 716 }, { "epoch": 1.056937534549475, "grad_norm": 2.1225222559684265, "learning_rate": 4.7075960881597236e-05, "loss": 1.0327, "step": 717 }, { "epoch": 1.0584116454763222, "grad_norm": 2.353622092217103, "learning_rate": 4.706666495428105e-05, "loss": 0.872, "step": 718 }, { "epoch": 1.0598857564031694, "grad_norm": 2.459591355071336, "learning_rate": 4.705735519470636e-05, "loss": 1.0292, "step": 719 }, { "epoch": 1.0613598673300166, "grad_norm": 1.9859983757527173, "learning_rate": 4.7048031608708876e-05, "loss": 0.9116, "step": 720 }, { "epoch": 1.0628339782568639, "grad_norm": 1.9695113390256938, "learning_rate": 4.703869420213301e-05, "loss": 0.7871, "step": 721 }, { "epoch": 1.0643080891837111, "grad_norm": 2.0351891688871375, "learning_rate": 4.702934298083181e-05, "loss": 0.8454, "step": 722 }, { "epoch": 1.0657822001105584, "grad_norm": 2.2767669313655103, "learning_rate": 4.701997795066699e-05, "loss": 1.111, "step": 723 }, { "epoch": 1.0672563110374056, "grad_norm": 2.076375009354976, "learning_rate": 4.701059911750893e-05, "loss": 0.8593, "step": 724 }, { "epoch": 1.0687304219642528, "grad_norm": 1.9598197639078405, "learning_rate": 4.7001206487236644e-05, "loss": 0.8146, "step": 725 }, { "epoch": 1.0702045328911, "grad_norm": 2.2336154527066805, "learning_rate": 4.69918000657378e-05, "loss": 0.8401, "step": 726 }, { "epoch": 1.0716786438179473, "grad_norm": 1.921488032887159, "learning_rate": 4.698237985890873e-05, "loss": 1.0239, "step": 727 }, { "epoch": 1.0731527547447945, "grad_norm": 2.3782026882239524, "learning_rate": 4.697294587265438e-05, "loss": 0.8969, "step": 728 }, { "epoch": 1.0746268656716418, "grad_norm": 1.991252485274915, "learning_rate": 4.696349811288836e-05, "loss": 0.7702, "step": 729 }, { "epoch": 1.076100976598489, "grad_norm": 1.8935526729295626, "learning_rate": 4.695403658553288e-05, "loss": 1.0603, "step": 730 }, { "epoch": 1.0775750875253363, "grad_norm": 1.8050689294180877, "learning_rate": 4.6944561296518816e-05, "loss": 0.8187, "step": 731 }, { "epoch": 1.0790491984521835, "grad_norm": 2.1252048618076516, "learning_rate": 4.693507225178564e-05, "loss": 0.8356, "step": 732 }, { "epoch": 1.0805233093790307, "grad_norm": 1.8966245432479611, "learning_rate": 4.692556945728147e-05, "loss": 0.8438, "step": 733 }, { "epoch": 1.081997420305878, "grad_norm": 2.11700832833448, "learning_rate": 4.691605291896304e-05, "loss": 0.9486, "step": 734 }, { "epoch": 1.0834715312327252, "grad_norm": 2.18044571353521, "learning_rate": 4.690652264279567e-05, "loss": 1.0402, "step": 735 }, { "epoch": 1.0849456421595725, "grad_norm": 1.9223859774504748, "learning_rate": 4.689697863475334e-05, "loss": 0.9198, "step": 736 }, { "epoch": 1.0864197530864197, "grad_norm": 2.0394886492324247, "learning_rate": 4.688742090081859e-05, "loss": 0.9611, "step": 737 }, { "epoch": 1.087893864013267, "grad_norm": 1.8852217838739027, "learning_rate": 4.68778494469826e-05, "loss": 0.8523, "step": 738 }, { "epoch": 1.0893679749401142, "grad_norm": 1.9998773298790626, "learning_rate": 4.686826427924514e-05, "loss": 1.0245, "step": 739 }, { "epoch": 1.0908420858669614, "grad_norm": 2.2449130728359714, "learning_rate": 4.685866540361456e-05, "loss": 0.9075, "step": 740 }, { "epoch": 1.0923161967938086, "grad_norm": 2.0133277754464816, "learning_rate": 4.684905282610781e-05, "loss": 0.861, "step": 741 }, { "epoch": 1.0937903077206559, "grad_norm": 2.0109469516534997, "learning_rate": 4.6839426552750454e-05, "loss": 0.9054, "step": 742 }, { "epoch": 1.0952644186475031, "grad_norm": 2.0807514086041032, "learning_rate": 4.6829786589576604e-05, "loss": 0.8852, "step": 743 }, { "epoch": 1.0967385295743504, "grad_norm": 2.075137751959063, "learning_rate": 4.6820132942628974e-05, "loss": 0.8688, "step": 744 }, { "epoch": 1.0982126405011976, "grad_norm": 2.0282234532422057, "learning_rate": 4.6810465617958856e-05, "loss": 0.8218, "step": 745 }, { "epoch": 1.099686751428045, "grad_norm": 2.155909304032889, "learning_rate": 4.680078462162611e-05, "loss": 0.9871, "step": 746 }, { "epoch": 1.1011608623548923, "grad_norm": 2.3718527972522865, "learning_rate": 4.679108995969917e-05, "loss": 0.9483, "step": 747 }, { "epoch": 1.1026349732817395, "grad_norm": 1.938546667104507, "learning_rate": 4.678138163825503e-05, "loss": 0.9689, "step": 748 }, { "epoch": 1.1041090842085868, "grad_norm": 2.0883755383967175, "learning_rate": 4.677165966337924e-05, "loss": 0.9299, "step": 749 }, { "epoch": 1.105583195135434, "grad_norm": 2.008761289383238, "learning_rate": 4.676192404116594e-05, "loss": 0.8614, "step": 750 }, { "epoch": 1.105583195135434, "eval_bleu": 0.06399562286530244, "eval_bleu_1gram": 0.370668396858067, "eval_bleu_2gram": 0.1429514401479156, "eval_bleu_3gram": 0.06151974793254733, "eval_bleu_4gram": 0.029711019412066747, "eval_rag_val_loss": 1.3046053641585893, "eval_rouge1": 0.36533687514129215, "eval_rouge2": 0.13739864261910306, "eval_rougeL": 0.3462712047942598, "step": 750 }, { "epoch": 1.1070573060622813, "grad_norm": 2.16588166272366, "learning_rate": 4.6752174777717786e-05, "loss": 0.8999, "step": 751 }, { "epoch": 1.1085314169891285, "grad_norm": 1.9939417875357592, "learning_rate": 4.674241187914601e-05, "loss": 0.6986, "step": 752 }, { "epoch": 1.1100055279159757, "grad_norm": 2.110044133054738, "learning_rate": 4.673263535157038e-05, "loss": 0.9342, "step": 753 }, { "epoch": 1.111479638842823, "grad_norm": 2.1552844995751457, "learning_rate": 4.6722845201119214e-05, "loss": 0.9767, "step": 754 }, { "epoch": 1.1129537497696702, "grad_norm": 2.0640128396100628, "learning_rate": 4.671304143392936e-05, "loss": 0.8829, "step": 755 }, { "epoch": 1.1144278606965174, "grad_norm": 2.1248296781610163, "learning_rate": 4.670322405614621e-05, "loss": 0.8458, "step": 756 }, { "epoch": 1.1159019716233647, "grad_norm": 2.223377613539845, "learning_rate": 4.6693393073923686e-05, "loss": 0.9535, "step": 757 }, { "epoch": 1.117376082550212, "grad_norm": 1.8988987240735957, "learning_rate": 4.6683548493424236e-05, "loss": 0.8801, "step": 758 }, { "epoch": 1.1188501934770592, "grad_norm": 1.9693701842021263, "learning_rate": 4.667369032081883e-05, "loss": 0.835, "step": 759 }, { "epoch": 1.1203243044039064, "grad_norm": 2.1275912922189906, "learning_rate": 4.666381856228697e-05, "loss": 0.9267, "step": 760 }, { "epoch": 1.1217984153307536, "grad_norm": 2.1855215662632026, "learning_rate": 4.665393322401664e-05, "loss": 1.1006, "step": 761 }, { "epoch": 1.1232725262576009, "grad_norm": 2.1434642590194044, "learning_rate": 4.6644034312204387e-05, "loss": 0.7724, "step": 762 }, { "epoch": 1.1247466371844481, "grad_norm": 1.986634357443556, "learning_rate": 4.6634121833055235e-05, "loss": 0.8982, "step": 763 }, { "epoch": 1.1262207481112954, "grad_norm": 2.06711455178237, "learning_rate": 4.662419579278271e-05, "loss": 0.913, "step": 764 }, { "epoch": 1.1276948590381426, "grad_norm": 2.0516101143069823, "learning_rate": 4.6614256197608855e-05, "loss": 0.8824, "step": 765 }, { "epoch": 1.1291689699649898, "grad_norm": 2.132642690305373, "learning_rate": 4.660430305376419e-05, "loss": 0.9603, "step": 766 }, { "epoch": 1.130643080891837, "grad_norm": 2.1226711976076453, "learning_rate": 4.659433636748775e-05, "loss": 0.9997, "step": 767 }, { "epoch": 1.1321171918186843, "grad_norm": 1.9603882282503755, "learning_rate": 4.658435614502705e-05, "loss": 0.7845, "step": 768 }, { "epoch": 1.1335913027455315, "grad_norm": 2.2031282465484723, "learning_rate": 4.657436239263808e-05, "loss": 0.8592, "step": 769 }, { "epoch": 1.1350654136723788, "grad_norm": 2.14105487600287, "learning_rate": 4.6564355116585325e-05, "loss": 0.9459, "step": 770 }, { "epoch": 1.136539524599226, "grad_norm": 2.234611338674003, "learning_rate": 4.655433432314174e-05, "loss": 0.7991, "step": 771 }, { "epoch": 1.1380136355260733, "grad_norm": 2.199602164797109, "learning_rate": 4.654430001858874e-05, "loss": 0.9755, "step": 772 }, { "epoch": 1.1394877464529205, "grad_norm": 2.0953930912548753, "learning_rate": 4.653425220921626e-05, "loss": 0.9649, "step": 773 }, { "epoch": 1.1409618573797677, "grad_norm": 2.414257054066097, "learning_rate": 4.6524190901322626e-05, "loss": 0.9341, "step": 774 }, { "epoch": 1.142435968306615, "grad_norm": 2.4428490853714533, "learning_rate": 4.651411610121469e-05, "loss": 0.9433, "step": 775 }, { "epoch": 1.1439100792334624, "grad_norm": 2.344236704836353, "learning_rate": 4.650402781520772e-05, "loss": 0.9488, "step": 776 }, { "epoch": 1.1453841901603097, "grad_norm": 2.0965162763448224, "learning_rate": 4.649392604962546e-05, "loss": 0.9401, "step": 777 }, { "epoch": 1.146858301087157, "grad_norm": 2.2880451949330887, "learning_rate": 4.648381081080009e-05, "loss": 0.9117, "step": 778 }, { "epoch": 1.1483324120140042, "grad_norm": 2.125792411710407, "learning_rate": 4.647368210507225e-05, "loss": 0.9868, "step": 779 }, { "epoch": 1.1498065229408514, "grad_norm": 2.404087730006564, "learning_rate": 4.6463539938791e-05, "loss": 0.9315, "step": 780 }, { "epoch": 1.1512806338676986, "grad_norm": 2.24696548131175, "learning_rate": 4.645338431831388e-05, "loss": 0.8959, "step": 781 }, { "epoch": 1.1527547447945459, "grad_norm": 1.8994627644784556, "learning_rate": 4.6443215250006806e-05, "loss": 0.8136, "step": 782 }, { "epoch": 1.154228855721393, "grad_norm": 2.305150493086377, "learning_rate": 4.643303274024416e-05, "loss": 0.8949, "step": 783 }, { "epoch": 1.1557029666482403, "grad_norm": 2.3418441144109425, "learning_rate": 4.642283679540874e-05, "loss": 0.9366, "step": 784 }, { "epoch": 1.1571770775750876, "grad_norm": 2.282061602303444, "learning_rate": 4.641262742189178e-05, "loss": 0.9895, "step": 785 }, { "epoch": 1.1586511885019348, "grad_norm": 2.003159411725432, "learning_rate": 4.640240462609291e-05, "loss": 0.9784, "step": 786 }, { "epoch": 1.160125299428782, "grad_norm": 2.1751357376011606, "learning_rate": 4.639216841442018e-05, "loss": 0.948, "step": 787 }, { "epoch": 1.1615994103556293, "grad_norm": 2.3391766496732274, "learning_rate": 4.6381918793290055e-05, "loss": 1.12, "step": 788 }, { "epoch": 1.1630735212824765, "grad_norm": 1.9524664417074247, "learning_rate": 4.6371655769127396e-05, "loss": 0.9211, "step": 789 }, { "epoch": 1.1645476322093238, "grad_norm": 2.135361374550138, "learning_rate": 4.63613793483655e-05, "loss": 0.9112, "step": 790 }, { "epoch": 1.166021743136171, "grad_norm": 2.55733005678738, "learning_rate": 4.6351089537446e-05, "loss": 1.0751, "step": 791 }, { "epoch": 1.1674958540630183, "grad_norm": 2.1046426316585505, "learning_rate": 4.6340786342818964e-05, "loss": 0.9468, "step": 792 }, { "epoch": 1.1689699649898655, "grad_norm": 2.113937078732325, "learning_rate": 4.633046977094286e-05, "loss": 0.9944, "step": 793 }, { "epoch": 1.1704440759167127, "grad_norm": 2.3389769717514417, "learning_rate": 4.632013982828451e-05, "loss": 1.0197, "step": 794 }, { "epoch": 1.17191818684356, "grad_norm": 2.141442469972838, "learning_rate": 4.630979652131913e-05, "loss": 0.8802, "step": 795 }, { "epoch": 1.1733922977704072, "grad_norm": 2.240101126023013, "learning_rate": 4.629943985653032e-05, "loss": 1.0432, "step": 796 }, { "epoch": 1.1748664086972544, "grad_norm": 2.00506772295574, "learning_rate": 4.6289069840410036e-05, "loss": 0.9188, "step": 797 }, { "epoch": 1.1763405196241017, "grad_norm": 2.0769965757912043, "learning_rate": 4.627868647945863e-05, "loss": 0.8203, "step": 798 }, { "epoch": 1.177814630550949, "grad_norm": 1.9266860414914588, "learning_rate": 4.62682897801848e-05, "loss": 0.7773, "step": 799 }, { "epoch": 1.1792887414777962, "grad_norm": 2.0973097872049946, "learning_rate": 4.625787974910559e-05, "loss": 0.9134, "step": 800 }, { "epoch": 1.1807628524046434, "grad_norm": 2.105888933424708, "learning_rate": 4.6247456392746444e-05, "loss": 0.8999, "step": 801 }, { "epoch": 1.1822369633314906, "grad_norm": 1.9325553803200959, "learning_rate": 4.623701971764112e-05, "loss": 0.8738, "step": 802 }, { "epoch": 1.1837110742583379, "grad_norm": 1.8377735453334865, "learning_rate": 4.622656973033174e-05, "loss": 0.723, "step": 803 }, { "epoch": 1.1851851851851851, "grad_norm": 2.1948903176747385, "learning_rate": 4.621610643736878e-05, "loss": 0.8949, "step": 804 }, { "epoch": 1.1866592961120324, "grad_norm": 1.9519778735790683, "learning_rate": 4.620562984531103e-05, "loss": 0.9135, "step": 805 }, { "epoch": 1.1881334070388796, "grad_norm": 2.228558846672229, "learning_rate": 4.619513996072564e-05, "loss": 0.9466, "step": 806 }, { "epoch": 1.1896075179657268, "grad_norm": 2.0690832555363894, "learning_rate": 4.618463679018808e-05, "loss": 1.0067, "step": 807 }, { "epoch": 1.191081628892574, "grad_norm": 2.054567050666888, "learning_rate": 4.617412034028217e-05, "loss": 0.9289, "step": 808 }, { "epoch": 1.1925557398194213, "grad_norm": 1.9769870467970003, "learning_rate": 4.616359061760001e-05, "loss": 0.9604, "step": 809 }, { "epoch": 1.1940298507462686, "grad_norm": 2.252451408954459, "learning_rate": 4.6153047628742066e-05, "loss": 0.9012, "step": 810 }, { "epoch": 1.1955039616731158, "grad_norm": 2.1976599038612754, "learning_rate": 4.61424913803171e-05, "loss": 0.9228, "step": 811 }, { "epoch": 1.196978072599963, "grad_norm": 2.4045034237728387, "learning_rate": 4.613192187894218e-05, "loss": 0.9596, "step": 812 }, { "epoch": 1.1984521835268105, "grad_norm": 2.2348114867789652, "learning_rate": 4.612133913124268e-05, "loss": 0.9212, "step": 813 }, { "epoch": 1.1999262944536577, "grad_norm": 2.282197624809855, "learning_rate": 4.61107431438523e-05, "loss": 1.0216, "step": 814 }, { "epoch": 1.201400405380505, "grad_norm": 1.9795536131472213, "learning_rate": 4.610013392341301e-05, "loss": 0.8063, "step": 815 }, { "epoch": 1.2028745163073522, "grad_norm": 2.001636312584393, "learning_rate": 4.608951147657511e-05, "loss": 0.9027, "step": 816 }, { "epoch": 1.2043486272341994, "grad_norm": 2.082576258111479, "learning_rate": 4.607887580999715e-05, "loss": 0.976, "step": 817 }, { "epoch": 1.2058227381610467, "grad_norm": 2.1636387379125472, "learning_rate": 4.6068226930345995e-05, "loss": 0.8607, "step": 818 }, { "epoch": 1.207296849087894, "grad_norm": 2.1789577999151417, "learning_rate": 4.605756484429678e-05, "loss": 0.9533, "step": 819 }, { "epoch": 1.2087709600147412, "grad_norm": 2.2059815197513823, "learning_rate": 4.604688955853293e-05, "loss": 0.864, "step": 820 }, { "epoch": 1.2102450709415884, "grad_norm": 2.0244787181019235, "learning_rate": 4.603620107974612e-05, "loss": 0.8539, "step": 821 }, { "epoch": 1.2117191818684356, "grad_norm": 2.041108957009288, "learning_rate": 4.602549941463633e-05, "loss": 0.9383, "step": 822 }, { "epoch": 1.2131932927952829, "grad_norm": 2.226627924443128, "learning_rate": 4.601478456991178e-05, "loss": 1.0137, "step": 823 }, { "epoch": 1.2146674037221301, "grad_norm": 1.8073388642259651, "learning_rate": 4.6004056552288956e-05, "loss": 0.7434, "step": 824 }, { "epoch": 1.2161415146489774, "grad_norm": 1.924396628406642, "learning_rate": 4.5993315368492603e-05, "loss": 0.7881, "step": 825 }, { "epoch": 1.2176156255758246, "grad_norm": 1.8477121872932871, "learning_rate": 4.5982561025255726e-05, "loss": 0.951, "step": 826 }, { "epoch": 1.2190897365026718, "grad_norm": 2.027322464024514, "learning_rate": 4.5971793529319576e-05, "loss": 0.8818, "step": 827 }, { "epoch": 1.220563847429519, "grad_norm": 1.9426168738987375, "learning_rate": 4.596101288743362e-05, "loss": 0.8802, "step": 828 }, { "epoch": 1.2220379583563663, "grad_norm": 2.1249894534578586, "learning_rate": 4.595021910635563e-05, "loss": 0.9717, "step": 829 }, { "epoch": 1.2235120692832135, "grad_norm": 2.2478085548157307, "learning_rate": 4.5939412192851535e-05, "loss": 0.9689, "step": 830 }, { "epoch": 1.2249861802100608, "grad_norm": 2.157395321283548, "learning_rate": 4.592859215369557e-05, "loss": 0.9168, "step": 831 }, { "epoch": 1.226460291136908, "grad_norm": 2.2376116804669786, "learning_rate": 4.591775899567015e-05, "loss": 1.011, "step": 832 }, { "epoch": 1.2279344020637553, "grad_norm": 2.1632287798490673, "learning_rate": 4.590691272556592e-05, "loss": 0.7803, "step": 833 }, { "epoch": 1.2294085129906025, "grad_norm": 2.150881289460229, "learning_rate": 4.589605335018176e-05, "loss": 0.8649, "step": 834 }, { "epoch": 1.2308826239174497, "grad_norm": 2.05554946978702, "learning_rate": 4.588518087632475e-05, "loss": 0.9742, "step": 835 }, { "epoch": 1.232356734844297, "grad_norm": 2.0700427761357343, "learning_rate": 4.587429531081019e-05, "loss": 0.9154, "step": 836 }, { "epoch": 1.2338308457711442, "grad_norm": 2.0873822984234893, "learning_rate": 4.5863396660461575e-05, "loss": 0.9017, "step": 837 }, { "epoch": 1.2353049566979915, "grad_norm": 2.541127090296368, "learning_rate": 4.585248493211063e-05, "loss": 0.9248, "step": 838 }, { "epoch": 1.2367790676248387, "grad_norm": 2.231036486414092, "learning_rate": 4.5841560132597244e-05, "loss": 0.8194, "step": 839 }, { "epoch": 1.238253178551686, "grad_norm": 2.1110391339641654, "learning_rate": 4.583062226876952e-05, "loss": 1.0548, "step": 840 }, { "epoch": 1.2397272894785332, "grad_norm": 2.00150945445755, "learning_rate": 4.5819671347483725e-05, "loss": 0.7942, "step": 841 }, { "epoch": 1.2412014004053806, "grad_norm": 2.224222231595229, "learning_rate": 4.580870737560435e-05, "loss": 0.9539, "step": 842 }, { "epoch": 1.2426755113322279, "grad_norm": 2.1722286677393634, "learning_rate": 4.579773036000405e-05, "loss": 1.1787, "step": 843 }, { "epoch": 1.244149622259075, "grad_norm": 1.973701789175354, "learning_rate": 4.5786740307563636e-05, "loss": 0.802, "step": 844 }, { "epoch": 1.2456237331859223, "grad_norm": 1.9083870337425375, "learning_rate": 4.577573722517211e-05, "loss": 0.8798, "step": 845 }, { "epoch": 1.2470978441127696, "grad_norm": 1.9777860812156953, "learning_rate": 4.5764721119726653e-05, "loss": 0.8692, "step": 846 }, { "epoch": 1.2485719550396168, "grad_norm": 1.9923988379783895, "learning_rate": 4.575369199813258e-05, "loss": 0.8975, "step": 847 }, { "epoch": 1.250046065966464, "grad_norm": 2.165235291132767, "learning_rate": 4.5742649867303386e-05, "loss": 0.8108, "step": 848 }, { "epoch": 1.2515201768933113, "grad_norm": 2.1734588117397915, "learning_rate": 4.573159473416072e-05, "loss": 0.846, "step": 849 }, { "epoch": 1.2529942878201585, "grad_norm": 2.138760388079887, "learning_rate": 4.572052660563437e-05, "loss": 0.8959, "step": 850 }, { "epoch": 1.2544683987470058, "grad_norm": 2.0865500475843417, "learning_rate": 4.570944548866228e-05, "loss": 0.9034, "step": 851 }, { "epoch": 1.255942509673853, "grad_norm": 2.0803760010819405, "learning_rate": 4.569835139019054e-05, "loss": 0.9335, "step": 852 }, { "epoch": 1.2574166206007003, "grad_norm": 1.9470637761129816, "learning_rate": 4.5687244317173356e-05, "loss": 0.8827, "step": 853 }, { "epoch": 1.2588907315275475, "grad_norm": 2.2024078284038917, "learning_rate": 4.567612427657308e-05, "loss": 1.0242, "step": 854 }, { "epoch": 1.2603648424543947, "grad_norm": 2.1324214367185617, "learning_rate": 4.566499127536021e-05, "loss": 0.9432, "step": 855 }, { "epoch": 1.261838953381242, "grad_norm": 2.4715112623497943, "learning_rate": 4.565384532051335e-05, "loss": 1.0171, "step": 856 }, { "epoch": 1.2633130643080892, "grad_norm": 2.158047797365789, "learning_rate": 4.56426864190192e-05, "loss": 0.9087, "step": 857 }, { "epoch": 1.2647871752349364, "grad_norm": 2.0789789725410457, "learning_rate": 4.563151457787263e-05, "loss": 1.0233, "step": 858 }, { "epoch": 1.2662612861617837, "grad_norm": 2.3485756595501863, "learning_rate": 4.562032980407658e-05, "loss": 0.9734, "step": 859 }, { "epoch": 1.267735397088631, "grad_norm": 1.9388552202193756, "learning_rate": 4.56091321046421e-05, "loss": 0.7635, "step": 860 }, { "epoch": 1.2692095080154782, "grad_norm": 2.336735821823869, "learning_rate": 4.5597921486588366e-05, "loss": 1.0621, "step": 861 }, { "epoch": 1.2706836189423254, "grad_norm": 1.9321627783824913, "learning_rate": 4.558669795694263e-05, "loss": 0.9628, "step": 862 }, { "epoch": 1.2721577298691726, "grad_norm": 1.9115652804551695, "learning_rate": 4.557546152274025e-05, "loss": 0.8609, "step": 863 }, { "epoch": 1.2736318407960199, "grad_norm": 2.200497514650085, "learning_rate": 4.556421219102466e-05, "loss": 0.795, "step": 864 }, { "epoch": 1.2751059517228671, "grad_norm": 2.1082956448932, "learning_rate": 4.555294996884738e-05, "loss": 0.8697, "step": 865 }, { "epoch": 1.2765800626497144, "grad_norm": 1.8975096267837965, "learning_rate": 4.5541674863268035e-05, "loss": 0.9019, "step": 866 }, { "epoch": 1.2780541735765616, "grad_norm": 2.0363454437836563, "learning_rate": 4.553038688135429e-05, "loss": 1.1155, "step": 867 }, { "epoch": 1.2795282845034088, "grad_norm": 2.1483033294397655, "learning_rate": 4.551908603018191e-05, "loss": 0.923, "step": 868 }, { "epoch": 1.281002395430256, "grad_norm": 2.254741018272112, "learning_rate": 4.5507772316834715e-05, "loss": 0.9167, "step": 869 }, { "epoch": 1.2824765063571033, "grad_norm": 2.2703551275946916, "learning_rate": 4.549644574840458e-05, "loss": 1.0144, "step": 870 }, { "epoch": 1.2839506172839505, "grad_norm": 1.7611857282446275, "learning_rate": 4.5485106331991446e-05, "loss": 0.7649, "step": 871 }, { "epoch": 1.2854247282107978, "grad_norm": 1.9795245867553886, "learning_rate": 4.5473754074703324e-05, "loss": 0.9093, "step": 872 }, { "epoch": 1.286898839137645, "grad_norm": 2.195423707147723, "learning_rate": 4.546238898365623e-05, "loss": 0.8621, "step": 873 }, { "epoch": 1.2883729500644923, "grad_norm": 2.3054809239065124, "learning_rate": 4.545101106597428e-05, "loss": 1.06, "step": 874 }, { "epoch": 1.2898470609913395, "grad_norm": 2.135549835766822, "learning_rate": 4.5439620328789593e-05, "loss": 0.9123, "step": 875 }, { "epoch": 1.2913211719181867, "grad_norm": 1.9791291350436715, "learning_rate": 4.5428216779242336e-05, "loss": 0.8985, "step": 876 }, { "epoch": 1.292795282845034, "grad_norm": 1.9188516919155103, "learning_rate": 4.541680042448069e-05, "loss": 0.8559, "step": 877 }, { "epoch": 1.2942693937718812, "grad_norm": 1.907123928170029, "learning_rate": 4.540537127166089e-05, "loss": 0.8925, "step": 878 }, { "epoch": 1.2957435046987285, "grad_norm": 2.5657764050952694, "learning_rate": 4.5393929327947195e-05, "loss": 1.0066, "step": 879 }, { "epoch": 1.2972176156255757, "grad_norm": 2.308992148166687, "learning_rate": 4.538247460051184e-05, "loss": 0.9849, "step": 880 }, { "epoch": 1.298691726552423, "grad_norm": 2.1700782204518894, "learning_rate": 4.537100709653512e-05, "loss": 0.8825, "step": 881 }, { "epoch": 1.3001658374792704, "grad_norm": 2.1858423764498296, "learning_rate": 4.535952682320531e-05, "loss": 0.9954, "step": 882 }, { "epoch": 1.3016399484061176, "grad_norm": 2.1490195855641203, "learning_rate": 4.534803378771871e-05, "loss": 0.8861, "step": 883 }, { "epoch": 1.3031140593329649, "grad_norm": 2.087121063376755, "learning_rate": 4.53365279972796e-05, "loss": 0.9296, "step": 884 }, { "epoch": 1.304588170259812, "grad_norm": 2.190867855960344, "learning_rate": 4.532500945910026e-05, "loss": 1.0472, "step": 885 }, { "epoch": 1.3060622811866593, "grad_norm": 2.260859561781617, "learning_rate": 4.5313478180400995e-05, "loss": 1.0088, "step": 886 }, { "epoch": 1.3075363921135066, "grad_norm": 2.004968551236895, "learning_rate": 4.530193416841003e-05, "loss": 0.9398, "step": 887 }, { "epoch": 1.3090105030403538, "grad_norm": 2.233905022551378, "learning_rate": 4.529037743036362e-05, "loss": 1.0276, "step": 888 }, { "epoch": 1.310484613967201, "grad_norm": 2.3979734288669166, "learning_rate": 4.5278807973506e-05, "loss": 0.9118, "step": 889 }, { "epoch": 1.3119587248940483, "grad_norm": 2.029197477489297, "learning_rate": 4.526722580508934e-05, "loss": 0.8168, "step": 890 }, { "epoch": 1.3134328358208955, "grad_norm": 2.1872518126157243, "learning_rate": 4.525563093237383e-05, "loss": 0.9177, "step": 891 }, { "epoch": 1.3149069467477428, "grad_norm": 2.195383308327591, "learning_rate": 4.524402336262756e-05, "loss": 0.9518, "step": 892 }, { "epoch": 1.31638105767459, "grad_norm": 2.0010755746230036, "learning_rate": 4.523240310312664e-05, "loss": 0.8984, "step": 893 }, { "epoch": 1.3178551686014373, "grad_norm": 2.2745403747731956, "learning_rate": 4.522077016115511e-05, "loss": 0.9583, "step": 894 }, { "epoch": 1.3193292795282845, "grad_norm": 2.1592788431975767, "learning_rate": 4.520912454400494e-05, "loss": 1.0017, "step": 895 }, { "epoch": 1.3208033904551317, "grad_norm": 1.9850286773800931, "learning_rate": 4.519746625897607e-05, "loss": 0.9531, "step": 896 }, { "epoch": 1.322277501381979, "grad_norm": 2.358685019951996, "learning_rate": 4.518579531337638e-05, "loss": 1.0593, "step": 897 }, { "epoch": 1.3237516123088262, "grad_norm": 2.083117003335377, "learning_rate": 4.5174111714521685e-05, "loss": 0.8895, "step": 898 }, { "epoch": 1.3252257232356734, "grad_norm": 1.9343728701775431, "learning_rate": 4.516241546973571e-05, "loss": 0.9486, "step": 899 }, { "epoch": 1.3266998341625207, "grad_norm": 1.919264035534218, "learning_rate": 4.515070658635013e-05, "loss": 0.8823, "step": 900 }, { "epoch": 1.328173945089368, "grad_norm": 2.3104311993906936, "learning_rate": 4.5138985071704546e-05, "loss": 0.942, "step": 901 }, { "epoch": 1.3296480560162152, "grad_norm": 1.847395122374864, "learning_rate": 4.512725093314645e-05, "loss": 0.8007, "step": 902 }, { "epoch": 1.3311221669430624, "grad_norm": 2.3215391468080724, "learning_rate": 4.5115504178031285e-05, "loss": 1.0044, "step": 903 }, { "epoch": 1.3325962778699096, "grad_norm": 2.263011714330166, "learning_rate": 4.5103744813722374e-05, "loss": 0.9485, "step": 904 }, { "epoch": 1.3340703887967569, "grad_norm": 2.2254377770245473, "learning_rate": 4.509197284759094e-05, "loss": 0.9388, "step": 905 }, { "epoch": 1.3355444997236043, "grad_norm": 2.2325438351829026, "learning_rate": 4.508018828701612e-05, "loss": 0.9107, "step": 906 }, { "epoch": 1.3370186106504516, "grad_norm": 2.2093167784275316, "learning_rate": 4.506839113938496e-05, "loss": 0.9843, "step": 907 }, { "epoch": 1.3384927215772988, "grad_norm": 2.0875284935382865, "learning_rate": 4.505658141209237e-05, "loss": 0.9235, "step": 908 }, { "epoch": 1.339966832504146, "grad_norm": 2.4462949101726514, "learning_rate": 4.504475911254115e-05, "loss": 1.0006, "step": 909 }, { "epoch": 1.3414409434309933, "grad_norm": 2.154765627124424, "learning_rate": 4.503292424814198e-05, "loss": 0.8826, "step": 910 }, { "epoch": 1.3429150543578405, "grad_norm": 2.1578336787859103, "learning_rate": 4.502107682631343e-05, "loss": 0.9837, "step": 911 }, { "epoch": 1.3443891652846878, "grad_norm": 2.24550667429518, "learning_rate": 4.500921685448193e-05, "loss": 0.973, "step": 912 }, { "epoch": 1.345863276211535, "grad_norm": 2.159692200523254, "learning_rate": 4.499734434008178e-05, "loss": 1.0253, "step": 913 }, { "epoch": 1.3473373871383822, "grad_norm": 1.9751874038053623, "learning_rate": 4.498545929055515e-05, "loss": 0.8587, "step": 914 }, { "epoch": 1.3488114980652295, "grad_norm": 2.2381534240332406, "learning_rate": 4.497356171335204e-05, "loss": 1.0202, "step": 915 }, { "epoch": 1.3502856089920767, "grad_norm": 2.069962036411257, "learning_rate": 4.496165161593035e-05, "loss": 1.0596, "step": 916 }, { "epoch": 1.351759719918924, "grad_norm": 2.114174137706234, "learning_rate": 4.4949729005755765e-05, "loss": 0.9441, "step": 917 }, { "epoch": 1.3532338308457712, "grad_norm": 2.2200139703396955, "learning_rate": 4.493779389030187e-05, "loss": 0.9941, "step": 918 }, { "epoch": 1.3547079417726184, "grad_norm": 2.22486068203694, "learning_rate": 4.492584627705008e-05, "loss": 0.9372, "step": 919 }, { "epoch": 1.3561820526994657, "grad_norm": 2.289729935659045, "learning_rate": 4.491388617348959e-05, "loss": 1.1114, "step": 920 }, { "epoch": 1.357656163626313, "grad_norm": 2.015448155888284, "learning_rate": 4.490191358711751e-05, "loss": 0.8411, "step": 921 }, { "epoch": 1.3591302745531602, "grad_norm": 2.2714609684480296, "learning_rate": 4.488992852543871e-05, "loss": 0.9354, "step": 922 }, { "epoch": 1.3606043854800074, "grad_norm": 2.1302708723095285, "learning_rate": 4.4877930995965905e-05, "loss": 0.8423, "step": 923 }, { "epoch": 1.3620784964068546, "grad_norm": 2.244455605920835, "learning_rate": 4.486592100621961e-05, "loss": 0.9476, "step": 924 }, { "epoch": 1.3635526073337019, "grad_norm": 2.181748267158982, "learning_rate": 4.4853898563728184e-05, "loss": 0.9649, "step": 925 }, { "epoch": 1.365026718260549, "grad_norm": 2.224689431964621, "learning_rate": 4.484186367602775e-05, "loss": 0.9142, "step": 926 }, { "epoch": 1.3665008291873963, "grad_norm": 1.9482718635153693, "learning_rate": 4.482981635066227e-05, "loss": 0.8489, "step": 927 }, { "epoch": 1.3679749401142436, "grad_norm": 2.0837211120836483, "learning_rate": 4.481775659518346e-05, "loss": 0.8821, "step": 928 }, { "epoch": 1.3694490510410908, "grad_norm": 2.1907596824263997, "learning_rate": 4.480568441715086e-05, "loss": 0.8815, "step": 929 }, { "epoch": 1.370923161967938, "grad_norm": 2.478501773010937, "learning_rate": 4.479359982413181e-05, "loss": 1.0561, "step": 930 }, { "epoch": 1.3723972728947853, "grad_norm": 2.1300624682440246, "learning_rate": 4.478150282370138e-05, "loss": 0.8961, "step": 931 }, { "epoch": 1.3738713838216325, "grad_norm": 2.0765711202220087, "learning_rate": 4.476939342344246e-05, "loss": 0.8984, "step": 932 }, { "epoch": 1.3753454947484798, "grad_norm": 2.0774014009106114, "learning_rate": 4.475727163094572e-05, "loss": 0.9186, "step": 933 }, { "epoch": 1.376819605675327, "grad_norm": 2.0964412189361155, "learning_rate": 4.474513745380955e-05, "loss": 0.893, "step": 934 }, { "epoch": 1.3782937166021743, "grad_norm": 2.1425523086981593, "learning_rate": 4.473299089964015e-05, "loss": 0.9024, "step": 935 }, { "epoch": 1.3797678275290215, "grad_norm": 1.981045370033946, "learning_rate": 4.472083197605146e-05, "loss": 0.8723, "step": 936 }, { "epoch": 1.3812419384558687, "grad_norm": 2.1374220449052346, "learning_rate": 4.470866069066516e-05, "loss": 1.0531, "step": 937 }, { "epoch": 1.382716049382716, "grad_norm": 1.8176718619840355, "learning_rate": 4.4696477051110705e-05, "loss": 0.9017, "step": 938 }, { "epoch": 1.3841901603095632, "grad_norm": 2.2044963491560567, "learning_rate": 4.468428106502528e-05, "loss": 0.8732, "step": 939 }, { "epoch": 1.3856642712364104, "grad_norm": 2.094365271508991, "learning_rate": 4.4672072740053816e-05, "loss": 0.9693, "step": 940 }, { "epoch": 1.3871383821632577, "grad_norm": 2.0825649243111117, "learning_rate": 4.4659852083848975e-05, "loss": 0.8923, "step": 941 }, { "epoch": 1.388612493090105, "grad_norm": 2.15728104860599, "learning_rate": 4.464761910407113e-05, "loss": 0.9632, "step": 942 }, { "epoch": 1.3900866040169522, "grad_norm": 1.9219840414236171, "learning_rate": 4.463537380838841e-05, "loss": 0.8661, "step": 943 }, { "epoch": 1.3915607149437994, "grad_norm": 1.8728675796248748, "learning_rate": 4.462311620447666e-05, "loss": 0.9446, "step": 944 }, { "epoch": 1.3930348258706466, "grad_norm": 2.013795243932427, "learning_rate": 4.461084630001942e-05, "loss": 0.9397, "step": 945 }, { "epoch": 1.3945089367974939, "grad_norm": 1.9315817489368468, "learning_rate": 4.459856410270795e-05, "loss": 0.9513, "step": 946 }, { "epoch": 1.3959830477243411, "grad_norm": 2.0235963265670125, "learning_rate": 4.4586269620241216e-05, "loss": 0.8755, "step": 947 }, { "epoch": 1.3974571586511886, "grad_norm": 2.0377127319866717, "learning_rate": 4.457396286032589e-05, "loss": 0.8572, "step": 948 }, { "epoch": 1.3989312695780358, "grad_norm": 1.9833905038153732, "learning_rate": 4.4561643830676336e-05, "loss": 0.9384, "step": 949 }, { "epoch": 1.400405380504883, "grad_norm": 2.1118260589555407, "learning_rate": 4.454931253901461e-05, "loss": 0.8585, "step": 950 }, { "epoch": 1.4018794914317303, "grad_norm": 2.1045945994512936, "learning_rate": 4.453696899307045e-05, "loss": 1.0185, "step": 951 }, { "epoch": 1.4033536023585775, "grad_norm": 2.608850483515089, "learning_rate": 4.4524613200581284e-05, "loss": 0.9334, "step": 952 }, { "epoch": 1.4048277132854248, "grad_norm": 1.886960790047875, "learning_rate": 4.4512245169292206e-05, "loss": 0.9301, "step": 953 }, { "epoch": 1.406301824212272, "grad_norm": 2.165308184108102, "learning_rate": 4.449986490695599e-05, "loss": 0.9081, "step": 954 }, { "epoch": 1.4077759351391192, "grad_norm": 2.138969728117426, "learning_rate": 4.4487472421333074e-05, "loss": 0.9461, "step": 955 }, { "epoch": 1.4092500460659665, "grad_norm": 2.116184010882281, "learning_rate": 4.447506772019155e-05, "loss": 0.8954, "step": 956 }, { "epoch": 1.4107241569928137, "grad_norm": 2.4236881483113484, "learning_rate": 4.44626508113072e-05, "loss": 1.0826, "step": 957 }, { "epoch": 1.412198267919661, "grad_norm": 2.2888907410429997, "learning_rate": 4.445022170246341e-05, "loss": 1.0129, "step": 958 }, { "epoch": 1.4136723788465082, "grad_norm": 2.2068308511410875, "learning_rate": 4.443778040145124e-05, "loss": 0.8024, "step": 959 }, { "epoch": 1.4151464897733554, "grad_norm": 2.111688997692045, "learning_rate": 4.44253269160694e-05, "loss": 0.8809, "step": 960 }, { "epoch": 1.4166206007002027, "grad_norm": 2.1063159377646548, "learning_rate": 4.441286125412422e-05, "loss": 0.8221, "step": 961 }, { "epoch": 1.41809471162705, "grad_norm": 2.3900196580597126, "learning_rate": 4.440038342342967e-05, "loss": 0.9752, "step": 962 }, { "epoch": 1.4195688225538972, "grad_norm": 2.254498964121215, "learning_rate": 4.4387893431807344e-05, "loss": 1.0107, "step": 963 }, { "epoch": 1.4210429334807444, "grad_norm": 1.9548310420010717, "learning_rate": 4.437539128708647e-05, "loss": 0.8048, "step": 964 }, { "epoch": 1.4225170444075916, "grad_norm": 2.1652717379271733, "learning_rate": 4.4362876997103885e-05, "loss": 0.9293, "step": 965 }, { "epoch": 1.4239911553344389, "grad_norm": 2.162034168675647, "learning_rate": 4.4350350569704045e-05, "loss": 0.9655, "step": 966 }, { "epoch": 1.4254652662612861, "grad_norm": 2.0253198771319583, "learning_rate": 4.4337812012738996e-05, "loss": 0.8926, "step": 967 }, { "epoch": 1.4269393771881334, "grad_norm": 2.2863270947121483, "learning_rate": 4.4325261334068426e-05, "loss": 0.9236, "step": 968 }, { "epoch": 1.4284134881149806, "grad_norm": 2.190140139132774, "learning_rate": 4.431269854155957e-05, "loss": 0.885, "step": 969 }, { "epoch": 1.4298875990418278, "grad_norm": 2.1117702871589845, "learning_rate": 4.4300123643087304e-05, "loss": 0.8968, "step": 970 }, { "epoch": 1.431361709968675, "grad_norm": 2.0768099187525926, "learning_rate": 4.428753664653406e-05, "loss": 1.0252, "step": 971 }, { "epoch": 1.4328358208955223, "grad_norm": 1.9742196412531687, "learning_rate": 4.427493755978987e-05, "loss": 0.9111, "step": 972 }, { "epoch": 1.4343099318223698, "grad_norm": 2.3778599033651537, "learning_rate": 4.426232639075234e-05, "loss": 0.9418, "step": 973 }, { "epoch": 1.435784042749217, "grad_norm": 2.145188305643283, "learning_rate": 4.424970314732664e-05, "loss": 0.8455, "step": 974 }, { "epoch": 1.4372581536760642, "grad_norm": 2.3332571516770395, "learning_rate": 4.423706783742554e-05, "loss": 0.9865, "step": 975 }, { "epoch": 1.4387322646029115, "grad_norm": 2.143856533061526, "learning_rate": 4.422442046896933e-05, "loss": 0.8736, "step": 976 }, { "epoch": 1.4402063755297587, "grad_norm": 1.9226438643619628, "learning_rate": 4.421176104988589e-05, "loss": 0.9054, "step": 977 }, { "epoch": 1.441680486456606, "grad_norm": 2.1425612109063463, "learning_rate": 4.419908958811064e-05, "loss": 0.8925, "step": 978 }, { "epoch": 1.4431545973834532, "grad_norm": 1.9245339584981918, "learning_rate": 4.418640609158656e-05, "loss": 1.0231, "step": 979 }, { "epoch": 1.4446287083103004, "grad_norm": 1.96400430694776, "learning_rate": 4.417371056826417e-05, "loss": 0.971, "step": 980 }, { "epoch": 1.4461028192371477, "grad_norm": 2.1814939608406165, "learning_rate": 4.4161003026101525e-05, "loss": 0.8756, "step": 981 }, { "epoch": 1.447576930163995, "grad_norm": 2.5783821671276668, "learning_rate": 4.41482834730642e-05, "loss": 0.9422, "step": 982 }, { "epoch": 1.4490510410908422, "grad_norm": 1.8931985788790948, "learning_rate": 4.4135551917125334e-05, "loss": 0.8122, "step": 983 }, { "epoch": 1.4505251520176894, "grad_norm": 2.000112291998384, "learning_rate": 4.4122808366265556e-05, "loss": 0.8663, "step": 984 }, { "epoch": 1.4519992629445366, "grad_norm": 2.1373852347634172, "learning_rate": 4.411005282847304e-05, "loss": 1.0739, "step": 985 }, { "epoch": 1.4534733738713839, "grad_norm": 2.0027675553741564, "learning_rate": 4.409728531174345e-05, "loss": 0.8158, "step": 986 }, { "epoch": 1.454947484798231, "grad_norm": 2.335885956364599, "learning_rate": 4.4084505824079975e-05, "loss": 0.9766, "step": 987 }, { "epoch": 1.4564215957250783, "grad_norm": 2.0976646607867924, "learning_rate": 4.40717143734933e-05, "loss": 0.8829, "step": 988 }, { "epoch": 1.4578957066519256, "grad_norm": 1.9928512365133537, "learning_rate": 4.405891096800162e-05, "loss": 0.8751, "step": 989 }, { "epoch": 1.4593698175787728, "grad_norm": 2.0690861362616153, "learning_rate": 4.404609561563062e-05, "loss": 0.7255, "step": 990 }, { "epoch": 1.46084392850562, "grad_norm": 2.126675786484125, "learning_rate": 4.403326832441345e-05, "loss": 0.9419, "step": 991 }, { "epoch": 1.4623180394324673, "grad_norm": 2.5408294632997834, "learning_rate": 4.402042910239078e-05, "loss": 0.8087, "step": 992 }, { "epoch": 1.4637921503593145, "grad_norm": 2.5023094001005926, "learning_rate": 4.400757795761074e-05, "loss": 1.085, "step": 993 }, { "epoch": 1.4652662612861618, "grad_norm": 2.0273296377760275, "learning_rate": 4.399471489812893e-05, "loss": 0.8819, "step": 994 }, { "epoch": 1.466740372213009, "grad_norm": 2.158216160523416, "learning_rate": 4.398183993200843e-05, "loss": 0.8468, "step": 995 }, { "epoch": 1.4682144831398563, "grad_norm": 2.289238307828457, "learning_rate": 4.3968953067319777e-05, "loss": 0.9412, "step": 996 }, { "epoch": 1.4696885940667035, "grad_norm": 2.1959830240938008, "learning_rate": 4.395605431214096e-05, "loss": 1.0248, "step": 997 }, { "epoch": 1.4711627049935507, "grad_norm": 2.2759365752958245, "learning_rate": 4.394314367455744e-05, "loss": 0.8326, "step": 998 }, { "epoch": 1.472636815920398, "grad_norm": 2.192129196076837, "learning_rate": 4.393022116266212e-05, "loss": 0.9279, "step": 999 }, { "epoch": 1.4741109268472452, "grad_norm": 2.267188223504149, "learning_rate": 4.3917286784555325e-05, "loss": 1.0734, "step": 1000 }, { "epoch": 1.4741109268472452, "eval_bleu": 0.06368364494116487, "eval_bleu_1gram": 0.37779041510756206, "eval_bleu_2gram": 0.14664905145244073, "eval_bleu_3gram": 0.06307244873293259, "eval_bleu_4gram": 0.029932547098929712, "eval_rag_val_loss": 1.2889798286781515, "eval_rouge1": 0.3668781001800645, "eval_rouge2": 0.1406197045544848, "eval_rougeL": 0.34767524437344755, "step": 1000 }, { "epoch": 1.4755850377740924, "grad_norm": 2.0601527411120335, "learning_rate": 4.390434054834483e-05, "loss": 0.9469, "step": 1001 }, { "epoch": 1.4770591487009397, "grad_norm": 2.3007049806968136, "learning_rate": 4.389138246214588e-05, "loss": 0.937, "step": 1002 }, { "epoch": 1.478533259627787, "grad_norm": 2.22559900685884, "learning_rate": 4.387841253408109e-05, "loss": 1.0062, "step": 1003 }, { "epoch": 1.4800073705546342, "grad_norm": 1.921038003417512, "learning_rate": 4.386543077228053e-05, "loss": 0.825, "step": 1004 }, { "epoch": 1.4814814814814814, "grad_norm": 2.1476408990149696, "learning_rate": 4.3852437184881687e-05, "loss": 1.0196, "step": 1005 }, { "epoch": 1.4829555924083286, "grad_norm": 1.948744783131466, "learning_rate": 4.383943178002944e-05, "loss": 0.8722, "step": 1006 }, { "epoch": 1.4844297033351759, "grad_norm": 2.099385662136957, "learning_rate": 4.382641456587611e-05, "loss": 0.9071, "step": 1007 }, { "epoch": 1.4859038142620231, "grad_norm": 1.979175393603978, "learning_rate": 4.38133855505814e-05, "loss": 0.9564, "step": 1008 }, { "epoch": 1.4873779251888704, "grad_norm": 1.9461475096895469, "learning_rate": 4.3800344742312396e-05, "loss": 0.9271, "step": 1009 }, { "epoch": 1.4888520361157176, "grad_norm": 1.8615780769632653, "learning_rate": 4.3787292149243605e-05, "loss": 0.9162, "step": 1010 }, { "epoch": 1.4903261470425648, "grad_norm": 2.0304594408844174, "learning_rate": 4.3774227779556906e-05, "loss": 0.7461, "step": 1011 }, { "epoch": 1.491800257969412, "grad_norm": 2.2012782371568504, "learning_rate": 4.376115164144157e-05, "loss": 1.0135, "step": 1012 }, { "epoch": 1.4932743688962593, "grad_norm": 1.9437413745946335, "learning_rate": 4.374806374309421e-05, "loss": 0.7982, "step": 1013 }, { "epoch": 1.4947484798231068, "grad_norm": 1.9917949813411828, "learning_rate": 4.3734964092718885e-05, "loss": 0.9187, "step": 1014 }, { "epoch": 1.496222590749954, "grad_norm": 2.217186927929751, "learning_rate": 4.372185269852693e-05, "loss": 1.0142, "step": 1015 }, { "epoch": 1.4976967016768012, "grad_norm": 1.9093929708231379, "learning_rate": 4.370872956873712e-05, "loss": 0.8837, "step": 1016 }, { "epoch": 1.4991708126036485, "grad_norm": 1.9262625381091374, "learning_rate": 4.369559471157552e-05, "loss": 0.9992, "step": 1017 }, { "epoch": 1.5006449235304957, "grad_norm": 2.354595977887861, "learning_rate": 4.36824481352756e-05, "loss": 1.0728, "step": 1018 }, { "epoch": 1.502119034457343, "grad_norm": 1.9804042226297554, "learning_rate": 4.366928984807815e-05, "loss": 0.9786, "step": 1019 }, { "epoch": 1.5035931453841902, "grad_norm": 1.8136519356000642, "learning_rate": 4.36561198582313e-05, "loss": 0.8765, "step": 1020 }, { "epoch": 1.5050672563110374, "grad_norm": 2.1816096973873718, "learning_rate": 4.364293817399052e-05, "loss": 1.0117, "step": 1021 }, { "epoch": 1.5065413672378847, "grad_norm": 1.9971574610416516, "learning_rate": 4.362974480361862e-05, "loss": 0.8838, "step": 1022 }, { "epoch": 1.508015478164732, "grad_norm": 2.2185796752887303, "learning_rate": 4.361653975538572e-05, "loss": 1.0031, "step": 1023 }, { "epoch": 1.5094895890915792, "grad_norm": 2.151961775599785, "learning_rate": 4.3603323037569265e-05, "loss": 0.9658, "step": 1024 }, { "epoch": 1.5109637000184264, "grad_norm": 1.996138659903301, "learning_rate": 4.359009465845402e-05, "loss": 0.7935, "step": 1025 }, { "epoch": 1.5124378109452736, "grad_norm": 2.040227213903046, "learning_rate": 4.3576854626332055e-05, "loss": 0.8959, "step": 1026 }, { "epoch": 1.5139119218721209, "grad_norm": 1.908128032163378, "learning_rate": 4.356360294950275e-05, "loss": 0.9367, "step": 1027 }, { "epoch": 1.515386032798968, "grad_norm": 2.3312279421501705, "learning_rate": 4.3550339636272775e-05, "loss": 1.0823, "step": 1028 }, { "epoch": 1.5168601437258153, "grad_norm": 1.9833661015258592, "learning_rate": 4.35370646949561e-05, "loss": 0.9458, "step": 1029 }, { "epoch": 1.5183342546526626, "grad_norm": 2.173083840288406, "learning_rate": 4.352377813387398e-05, "loss": 0.8959, "step": 1030 }, { "epoch": 1.5198083655795098, "grad_norm": 2.2517736649614926, "learning_rate": 4.3510479961354964e-05, "loss": 0.9542, "step": 1031 }, { "epoch": 1.521282476506357, "grad_norm": 1.8272063726785033, "learning_rate": 4.349717018573487e-05, "loss": 0.849, "step": 1032 }, { "epoch": 1.5227565874332043, "grad_norm": 2.1489210659634987, "learning_rate": 4.348384881535679e-05, "loss": 0.9796, "step": 1033 }, { "epoch": 1.5242306983600515, "grad_norm": 2.0461937156398045, "learning_rate": 4.347051585857109e-05, "loss": 0.9869, "step": 1034 }, { "epoch": 1.525704809286899, "grad_norm": 1.9851536462159478, "learning_rate": 4.34571713237354e-05, "loss": 0.9416, "step": 1035 }, { "epoch": 1.5271789202137462, "grad_norm": 2.09720599636288, "learning_rate": 4.344381521921458e-05, "loss": 0.9207, "step": 1036 }, { "epoch": 1.5286530311405935, "grad_norm": 1.7177400569480874, "learning_rate": 4.3430447553380785e-05, "loss": 0.7669, "step": 1037 }, { "epoch": 1.5301271420674407, "grad_norm": 2.325987993558694, "learning_rate": 4.34170683346134e-05, "loss": 0.9968, "step": 1038 }, { "epoch": 1.531601252994288, "grad_norm": 2.145151851054095, "learning_rate": 4.3403677571299026e-05, "loss": 1.1038, "step": 1039 }, { "epoch": 1.5330753639211352, "grad_norm": 1.8744116813832954, "learning_rate": 4.339027527183154e-05, "loss": 0.8663, "step": 1040 }, { "epoch": 1.5345494748479824, "grad_norm": 2.0225486423221275, "learning_rate": 4.337686144461204e-05, "loss": 0.9458, "step": 1041 }, { "epoch": 1.5360235857748297, "grad_norm": 2.1847687154959816, "learning_rate": 4.3363436098048825e-05, "loss": 0.9986, "step": 1042 }, { "epoch": 1.537497696701677, "grad_norm": 2.085860804601567, "learning_rate": 4.3349999240557446e-05, "loss": 0.8742, "step": 1043 }, { "epoch": 1.5389718076285241, "grad_norm": 2.3290292916591344, "learning_rate": 4.333655088056065e-05, "loss": 1.0263, "step": 1044 }, { "epoch": 1.5404459185553714, "grad_norm": 2.1062104399726898, "learning_rate": 4.332309102648841e-05, "loss": 1.0566, "step": 1045 }, { "epoch": 1.5419200294822186, "grad_norm": 2.0361475662684896, "learning_rate": 4.330961968677788e-05, "loss": 0.8616, "step": 1046 }, { "epoch": 1.5433941404090659, "grad_norm": 2.2938500031751534, "learning_rate": 4.329613686987344e-05, "loss": 1.0519, "step": 1047 }, { "epoch": 1.544868251335913, "grad_norm": 1.9860135019617478, "learning_rate": 4.328264258422665e-05, "loss": 0.8979, "step": 1048 }, { "epoch": 1.5463423622627603, "grad_norm": 2.0572217969659037, "learning_rate": 4.3269136838296264e-05, "loss": 1.0275, "step": 1049 }, { "epoch": 1.5478164731896076, "grad_norm": 2.57005276280727, "learning_rate": 4.325561964054822e-05, "loss": 1.1249, "step": 1050 }, { "epoch": 1.5492905841164548, "grad_norm": 2.041270262973285, "learning_rate": 4.324209099945563e-05, "loss": 0.8258, "step": 1051 }, { "epoch": 1.550764695043302, "grad_norm": 2.0359742612879352, "learning_rate": 4.322855092349878e-05, "loss": 1.0596, "step": 1052 }, { "epoch": 1.5522388059701493, "grad_norm": 1.984482949992679, "learning_rate": 4.321499942116511e-05, "loss": 0.8099, "step": 1053 }, { "epoch": 1.5537129168969965, "grad_norm": 1.9117644547337156, "learning_rate": 4.320143650094927e-05, "loss": 0.9376, "step": 1054 }, { "epoch": 1.5551870278238438, "grad_norm": 2.3779094342694993, "learning_rate": 4.318786217135301e-05, "loss": 1.1182, "step": 1055 }, { "epoch": 1.556661138750691, "grad_norm": 2.080230793253057, "learning_rate": 4.3174276440885276e-05, "loss": 0.896, "step": 1056 }, { "epoch": 1.5581352496775382, "grad_norm": 1.9658510572414183, "learning_rate": 4.316067931806212e-05, "loss": 0.8402, "step": 1057 }, { "epoch": 1.5596093606043855, "grad_norm": 1.9057944331878567, "learning_rate": 4.3147070811406765e-05, "loss": 0.8667, "step": 1058 }, { "epoch": 1.5610834715312327, "grad_norm": 1.8819146132441154, "learning_rate": 4.313345092944957e-05, "loss": 0.8169, "step": 1059 }, { "epoch": 1.56255758245808, "grad_norm": 1.8565368148190442, "learning_rate": 4.3119819680728e-05, "loss": 0.996, "step": 1060 }, { "epoch": 1.5640316933849272, "grad_norm": 2.069005589672593, "learning_rate": 4.310617707378668e-05, "loss": 0.871, "step": 1061 }, { "epoch": 1.5655058043117744, "grad_norm": 1.9759824602838987, "learning_rate": 4.309252311717732e-05, "loss": 0.8985, "step": 1062 }, { "epoch": 1.5669799152386217, "grad_norm": 2.330225873485221, "learning_rate": 4.307885781945876e-05, "loss": 0.965, "step": 1063 }, { "epoch": 1.568454026165469, "grad_norm": 2.218863363459297, "learning_rate": 4.3065181189196956e-05, "loss": 1.0154, "step": 1064 }, { "epoch": 1.5699281370923162, "grad_norm": 2.2145589009537447, "learning_rate": 4.305149323496497e-05, "loss": 0.9942, "step": 1065 }, { "epoch": 1.5714022480191634, "grad_norm": 2.017970413520414, "learning_rate": 4.303779396534293e-05, "loss": 0.8901, "step": 1066 }, { "epoch": 1.5728763589460106, "grad_norm": 2.1822141817186047, "learning_rate": 4.30240833889181e-05, "loss": 1.0046, "step": 1067 }, { "epoch": 1.5743504698728579, "grad_norm": 2.1257538299867003, "learning_rate": 4.30103615142848e-05, "loss": 0.962, "step": 1068 }, { "epoch": 1.575824580799705, "grad_norm": 2.1851832519434096, "learning_rate": 4.2996628350044454e-05, "loss": 1.0044, "step": 1069 }, { "epoch": 1.5772986917265523, "grad_norm": 2.3237650965487, "learning_rate": 4.298288390480554e-05, "loss": 0.887, "step": 1070 }, { "epoch": 1.5787728026533996, "grad_norm": 2.2247564118165712, "learning_rate": 4.296912818718363e-05, "loss": 0.7967, "step": 1071 }, { "epoch": 1.5802469135802468, "grad_norm": 2.3802606645058124, "learning_rate": 4.295536120580135e-05, "loss": 1.0854, "step": 1072 }, { "epoch": 1.581721024507094, "grad_norm": 2.2150101641212947, "learning_rate": 4.2941582969288384e-05, "loss": 0.9781, "step": 1073 }, { "epoch": 1.5831951354339413, "grad_norm": 2.1850055095681706, "learning_rate": 4.292779348628148e-05, "loss": 1.0478, "step": 1074 }, { "epoch": 1.5846692463607885, "grad_norm": 2.159151971674502, "learning_rate": 4.2913992765424434e-05, "loss": 0.944, "step": 1075 }, { "epoch": 1.5861433572876358, "grad_norm": 1.986238820816951, "learning_rate": 4.2900180815368076e-05, "loss": 0.9184, "step": 1076 }, { "epoch": 1.587617468214483, "grad_norm": 2.2301587425490084, "learning_rate": 4.2886357644770294e-05, "loss": 0.9187, "step": 1077 }, { "epoch": 1.5890915791413303, "grad_norm": 2.298118929176726, "learning_rate": 4.287252326229598e-05, "loss": 0.8891, "step": 1078 }, { "epoch": 1.5905656900681775, "grad_norm": 2.19080919914355, "learning_rate": 4.285867767661709e-05, "loss": 0.9844, "step": 1079 }, { "epoch": 1.5920398009950247, "grad_norm": 1.877275484886218, "learning_rate": 4.284482089641257e-05, "loss": 0.8082, "step": 1080 }, { "epoch": 1.593513911921872, "grad_norm": 2.08240315334625, "learning_rate": 4.283095293036842e-05, "loss": 1.0105, "step": 1081 }, { "epoch": 1.5949880228487192, "grad_norm": 2.0280814473276534, "learning_rate": 4.281707378717761e-05, "loss": 0.9649, "step": 1082 }, { "epoch": 1.5964621337755664, "grad_norm": 2.085261431800012, "learning_rate": 4.280318347554013e-05, "loss": 0.9637, "step": 1083 }, { "epoch": 1.597936244702414, "grad_norm": 1.977121387218641, "learning_rate": 4.2789282004163e-05, "loss": 1.0224, "step": 1084 }, { "epoch": 1.5994103556292611, "grad_norm": 2.1093361179865524, "learning_rate": 4.27753693817602e-05, "loss": 0.8577, "step": 1085 }, { "epoch": 1.6008844665561084, "grad_norm": 1.8681758036745522, "learning_rate": 4.276144561705271e-05, "loss": 0.8207, "step": 1086 }, { "epoch": 1.6023585774829556, "grad_norm": 2.0647183828005837, "learning_rate": 4.27475107187685e-05, "loss": 0.7787, "step": 1087 }, { "epoch": 1.6038326884098029, "grad_norm": 1.9564309789043606, "learning_rate": 4.273356469564251e-05, "loss": 0.8856, "step": 1088 }, { "epoch": 1.60530679933665, "grad_norm": 1.9658450538681504, "learning_rate": 4.271960755641668e-05, "loss": 0.8754, "step": 1089 }, { "epoch": 1.6067809102634973, "grad_norm": 1.941426206060192, "learning_rate": 4.270563930983986e-05, "loss": 0.8615, "step": 1090 }, { "epoch": 1.6082550211903446, "grad_norm": 2.2233985237999927, "learning_rate": 4.269165996466793e-05, "loss": 1.0366, "step": 1091 }, { "epoch": 1.6097291321171918, "grad_norm": 2.0011094115779158, "learning_rate": 4.267766952966369e-05, "loss": 0.8826, "step": 1092 }, { "epoch": 1.611203243044039, "grad_norm": 2.2824127160521352, "learning_rate": 4.266366801359689e-05, "loss": 1.0911, "step": 1093 }, { "epoch": 1.6126773539708863, "grad_norm": 2.0992196222551223, "learning_rate": 4.264965542524424e-05, "loss": 0.78, "step": 1094 }, { "epoch": 1.6141514648977335, "grad_norm": 2.143668246031029, "learning_rate": 4.263563177338938e-05, "loss": 0.9438, "step": 1095 }, { "epoch": 1.6156255758245808, "grad_norm": 2.108508461877109, "learning_rate": 4.262159706682291e-05, "loss": 0.9782, "step": 1096 }, { "epoch": 1.617099686751428, "grad_norm": 2.023102957269874, "learning_rate": 4.2607551314342297e-05, "loss": 0.9522, "step": 1097 }, { "epoch": 1.6185737976782753, "grad_norm": 1.980666291807677, "learning_rate": 4.259349452475202e-05, "loss": 0.8598, "step": 1098 }, { "epoch": 1.6200479086051225, "grad_norm": 2.117421042311227, "learning_rate": 4.25794267068634e-05, "loss": 0.9703, "step": 1099 }, { "epoch": 1.6215220195319697, "grad_norm": 2.0210526599695804, "learning_rate": 4.256534786949472e-05, "loss": 1.0064, "step": 1100 }, { "epoch": 1.6229961304588172, "grad_norm": 2.194316814697495, "learning_rate": 4.255125802147114e-05, "loss": 0.9941, "step": 1101 }, { "epoch": 1.6244702413856644, "grad_norm": 2.1190490421598582, "learning_rate": 4.253715717162474e-05, "loss": 0.8082, "step": 1102 }, { "epoch": 1.6259443523125117, "grad_norm": 2.2342345720458283, "learning_rate": 4.252304532879449e-05, "loss": 0.9677, "step": 1103 }, { "epoch": 1.627418463239359, "grad_norm": 2.323411201784855, "learning_rate": 4.2508922501826244e-05, "loss": 0.9558, "step": 1104 }, { "epoch": 1.6288925741662061, "grad_norm": 2.107839067544761, "learning_rate": 4.249478869957276e-05, "loss": 0.9822, "step": 1105 }, { "epoch": 1.6303666850930534, "grad_norm": 2.231284718817714, "learning_rate": 4.248064393089366e-05, "loss": 1.0091, "step": 1106 }, { "epoch": 1.6318407960199006, "grad_norm": 2.6051188038040785, "learning_rate": 4.246648820465544e-05, "loss": 0.9776, "step": 1107 }, { "epoch": 1.6333149069467479, "grad_norm": 2.210970928896464, "learning_rate": 4.2452321529731475e-05, "loss": 0.8778, "step": 1108 }, { "epoch": 1.634789017873595, "grad_norm": 2.4123958031482653, "learning_rate": 4.2438143915002e-05, "loss": 0.9612, "step": 1109 }, { "epoch": 1.6362631288004423, "grad_norm": 2.142023673773553, "learning_rate": 4.242395536935409e-05, "loss": 1.0033, "step": 1110 }, { "epoch": 1.6377372397272896, "grad_norm": 2.260760537520021, "learning_rate": 4.2409755901681716e-05, "loss": 0.9344, "step": 1111 }, { "epoch": 1.6392113506541368, "grad_norm": 2.095071290289489, "learning_rate": 4.239554552088563e-05, "loss": 1.0281, "step": 1112 }, { "epoch": 1.640685461580984, "grad_norm": 2.066937960486773, "learning_rate": 4.238132423587349e-05, "loss": 0.9908, "step": 1113 }, { "epoch": 1.6421595725078313, "grad_norm": 2.1829013989312607, "learning_rate": 4.236709205555973e-05, "loss": 0.9033, "step": 1114 }, { "epoch": 1.6436336834346785, "grad_norm": 2.055989248329282, "learning_rate": 4.235284898886568e-05, "loss": 1.0429, "step": 1115 }, { "epoch": 1.6451077943615258, "grad_norm": 2.095267358076016, "learning_rate": 4.233859504471943e-05, "loss": 0.913, "step": 1116 }, { "epoch": 1.646581905288373, "grad_norm": 2.088243902452578, "learning_rate": 4.2324330232055924e-05, "loss": 0.8843, "step": 1117 }, { "epoch": 1.6480560162152202, "grad_norm": 1.9821130070347275, "learning_rate": 4.231005455981692e-05, "loss": 0.8616, "step": 1118 }, { "epoch": 1.6495301271420675, "grad_norm": 2.208622343716899, "learning_rate": 4.2295768036950953e-05, "loss": 0.967, "step": 1119 }, { "epoch": 1.6510042380689147, "grad_norm": 1.9738481905226999, "learning_rate": 4.22814706724134e-05, "loss": 0.9397, "step": 1120 }, { "epoch": 1.652478348995762, "grad_norm": 2.1853851994908835, "learning_rate": 4.226716247516641e-05, "loss": 0.9971, "step": 1121 }, { "epoch": 1.6539524599226092, "grad_norm": 2.189722076966168, "learning_rate": 4.2252843454178925e-05, "loss": 1.0716, "step": 1122 }, { "epoch": 1.6554265708494564, "grad_norm": 1.9323732782148078, "learning_rate": 4.223851361842668e-05, "loss": 0.8801, "step": 1123 }, { "epoch": 1.6569006817763037, "grad_norm": 2.000600605428326, "learning_rate": 4.222417297689217e-05, "loss": 0.976, "step": 1124 }, { "epoch": 1.658374792703151, "grad_norm": 2.239454032063363, "learning_rate": 4.2209821538564684e-05, "loss": 0.8883, "step": 1125 }, { "epoch": 1.6598489036299982, "grad_norm": 2.1193180414283925, "learning_rate": 4.219545931244027e-05, "loss": 1.0061, "step": 1126 }, { "epoch": 1.6613230145568454, "grad_norm": 2.0348272661761313, "learning_rate": 4.218108630752174e-05, "loss": 1.1691, "step": 1127 }, { "epoch": 1.6627971254836926, "grad_norm": 2.067179717195584, "learning_rate": 4.2166702532818665e-05, "loss": 0.8621, "step": 1128 }, { "epoch": 1.6642712364105399, "grad_norm": 2.177774663304461, "learning_rate": 4.2152307997347365e-05, "loss": 0.9612, "step": 1129 }, { "epoch": 1.665745347337387, "grad_norm": 2.1122117923762453, "learning_rate": 4.213790271013089e-05, "loss": 0.9539, "step": 1130 }, { "epoch": 1.6672194582642343, "grad_norm": 2.334419701491658, "learning_rate": 4.212348668019906e-05, "loss": 1.0128, "step": 1131 }, { "epoch": 1.6686935691910816, "grad_norm": 1.9277362920100334, "learning_rate": 4.2109059916588414e-05, "loss": 0.7352, "step": 1132 }, { "epoch": 1.6701676801179288, "grad_norm": 2.0057443141901836, "learning_rate": 4.20946224283422e-05, "loss": 0.7963, "step": 1133 }, { "epoch": 1.671641791044776, "grad_norm": 2.1590959868302937, "learning_rate": 4.2080174224510426e-05, "loss": 0.9293, "step": 1134 }, { "epoch": 1.6731159019716233, "grad_norm": 1.9798100145301905, "learning_rate": 4.2065715314149775e-05, "loss": 0.8662, "step": 1135 }, { "epoch": 1.6745900128984705, "grad_norm": 1.9668793040696075, "learning_rate": 4.2051245706323696e-05, "loss": 0.8783, "step": 1136 }, { "epoch": 1.6760641238253178, "grad_norm": 2.1414171966640354, "learning_rate": 4.2036765410102285e-05, "loss": 0.8645, "step": 1137 }, { "epoch": 1.677538234752165, "grad_norm": 1.9412231358955574, "learning_rate": 4.202227443456238e-05, "loss": 0.9271, "step": 1138 }, { "epoch": 1.6790123456790123, "grad_norm": 1.970585104033048, "learning_rate": 4.200777278878749e-05, "loss": 0.9274, "step": 1139 }, { "epoch": 1.6804864566058595, "grad_norm": 1.9667226252703676, "learning_rate": 4.199326048186782e-05, "loss": 0.9698, "step": 1140 }, { "epoch": 1.6819605675327067, "grad_norm": 1.987994939728558, "learning_rate": 4.197873752290027e-05, "loss": 0.939, "step": 1141 }, { "epoch": 1.683434678459554, "grad_norm": 1.8921832764229545, "learning_rate": 4.1964203920988385e-05, "loss": 0.8252, "step": 1142 }, { "epoch": 1.6849087893864012, "grad_norm": 2.207454026500613, "learning_rate": 4.19496596852424e-05, "loss": 0.9802, "step": 1143 }, { "epoch": 1.6863829003132484, "grad_norm": 1.9972349364834399, "learning_rate": 4.1935104824779246e-05, "loss": 0.9472, "step": 1144 }, { "epoch": 1.6878570112400957, "grad_norm": 2.1117333685806092, "learning_rate": 4.192053934872247e-05, "loss": 0.9221, "step": 1145 }, { "epoch": 1.689331122166943, "grad_norm": 2.278870687380538, "learning_rate": 4.1905963266202276e-05, "loss": 1.1567, "step": 1146 }, { "epoch": 1.6908052330937902, "grad_norm": 2.1538293492007474, "learning_rate": 4.189137658635555e-05, "loss": 1.0821, "step": 1147 }, { "epoch": 1.6922793440206374, "grad_norm": 2.352293388869019, "learning_rate": 4.187677931832578e-05, "loss": 0.9302, "step": 1148 }, { "epoch": 1.6937534549474846, "grad_norm": 2.2738762396129153, "learning_rate": 4.1862171471263126e-05, "loss": 1.0232, "step": 1149 }, { "epoch": 1.695227565874332, "grad_norm": 2.0729408710031487, "learning_rate": 4.184755305432436e-05, "loss": 1.0372, "step": 1150 }, { "epoch": 1.6967016768011793, "grad_norm": 2.191680292684263, "learning_rate": 4.1832924076672876e-05, "loss": 0.9847, "step": 1151 }, { "epoch": 1.6981757877280266, "grad_norm": 2.0231165097411727, "learning_rate": 4.181828454747872e-05, "loss": 0.9103, "step": 1152 }, { "epoch": 1.6996498986548738, "grad_norm": 2.1845425913948584, "learning_rate": 4.180363447591849e-05, "loss": 1.1059, "step": 1153 }, { "epoch": 1.701124009581721, "grad_norm": 2.3506635297291654, "learning_rate": 4.178897387117546e-05, "loss": 1.1999, "step": 1154 }, { "epoch": 1.7025981205085683, "grad_norm": 2.243871395471787, "learning_rate": 4.177430274243947e-05, "loss": 1.0269, "step": 1155 }, { "epoch": 1.7040722314354155, "grad_norm": 1.9007704101454101, "learning_rate": 4.175962109890696e-05, "loss": 0.9147, "step": 1156 }, { "epoch": 1.7055463423622628, "grad_norm": 2.3979034326245046, "learning_rate": 4.1744928949780975e-05, "loss": 1.0474, "step": 1157 }, { "epoch": 1.70702045328911, "grad_norm": 2.0569867515749305, "learning_rate": 4.173022630427113e-05, "loss": 0.8985, "step": 1158 }, { "epoch": 1.7084945642159572, "grad_norm": 2.183474733661148, "learning_rate": 4.1715513171593614e-05, "loss": 1.0012, "step": 1159 }, { "epoch": 1.7099686751428045, "grad_norm": 2.2004273042985734, "learning_rate": 4.170078956097121e-05, "loss": 0.9946, "step": 1160 }, { "epoch": 1.7114427860696517, "grad_norm": 2.5034997285721228, "learning_rate": 4.168605548163326e-05, "loss": 1.0172, "step": 1161 }, { "epoch": 1.712916896996499, "grad_norm": 1.885240184750797, "learning_rate": 4.167131094281565e-05, "loss": 0.8562, "step": 1162 }, { "epoch": 1.7143910079233462, "grad_norm": 2.210768622428974, "learning_rate": 4.165655595376088e-05, "loss": 1.0193, "step": 1163 }, { "epoch": 1.7158651188501934, "grad_norm": 2.0292242659501674, "learning_rate": 4.1641790523717935e-05, "loss": 0.87, "step": 1164 }, { "epoch": 1.7173392297770407, "grad_norm": 2.169589919442977, "learning_rate": 4.162701466194237e-05, "loss": 1.0039, "step": 1165 }, { "epoch": 1.718813340703888, "grad_norm": 1.8099962238672815, "learning_rate": 4.161222837769627e-05, "loss": 0.8283, "step": 1166 }, { "epoch": 1.7202874516307354, "grad_norm": 2.128427882214095, "learning_rate": 4.159743168024829e-05, "loss": 1.007, "step": 1167 }, { "epoch": 1.7217615625575826, "grad_norm": 2.0128498934224894, "learning_rate": 4.158262457887356e-05, "loss": 0.8892, "step": 1168 }, { "epoch": 1.7232356734844299, "grad_norm": 2.086685332355967, "learning_rate": 4.156780708285378e-05, "loss": 0.9307, "step": 1169 }, { "epoch": 1.724709784411277, "grad_norm": 1.9799911254144476, "learning_rate": 4.155297920147713e-05, "loss": 0.8492, "step": 1170 }, { "epoch": 1.7261838953381243, "grad_norm": 2.105960484198276, "learning_rate": 4.153814094403831e-05, "loss": 0.9455, "step": 1171 }, { "epoch": 1.7276580062649716, "grad_norm": 1.8313802444958855, "learning_rate": 4.1523292319838524e-05, "loss": 0.7656, "step": 1172 }, { "epoch": 1.7291321171918188, "grad_norm": 2.167336311539327, "learning_rate": 4.150843333818549e-05, "loss": 1.0169, "step": 1173 }, { "epoch": 1.730606228118666, "grad_norm": 1.9909918095311485, "learning_rate": 4.149356400839339e-05, "loss": 0.8362, "step": 1174 }, { "epoch": 1.7320803390455133, "grad_norm": 2.3295349357688915, "learning_rate": 4.1478684339782926e-05, "loss": 1.0576, "step": 1175 }, { "epoch": 1.7335544499723605, "grad_norm": 1.9404863906968939, "learning_rate": 4.1463794341681244e-05, "loss": 0.9372, "step": 1176 }, { "epoch": 1.7350285608992078, "grad_norm": 2.0925317892670154, "learning_rate": 4.1448894023422005e-05, "loss": 0.8852, "step": 1177 }, { "epoch": 1.736502671826055, "grad_norm": 2.245208406524171, "learning_rate": 4.143398339434529e-05, "loss": 1.0159, "step": 1178 }, { "epoch": 1.7379767827529022, "grad_norm": 2.118265138134169, "learning_rate": 4.1419062463797695e-05, "loss": 0.8723, "step": 1179 }, { "epoch": 1.7394508936797495, "grad_norm": 2.125902545220477, "learning_rate": 4.140413124113225e-05, "loss": 0.9934, "step": 1180 }, { "epoch": 1.7409250046065967, "grad_norm": 2.1856554156121404, "learning_rate": 4.138918973570842e-05, "loss": 0.8072, "step": 1181 }, { "epoch": 1.742399115533444, "grad_norm": 2.035823075774443, "learning_rate": 4.1374237956892133e-05, "loss": 0.9483, "step": 1182 }, { "epoch": 1.7438732264602912, "grad_norm": 2.2895027232073724, "learning_rate": 4.135927591405577e-05, "loss": 0.9652, "step": 1183 }, { "epoch": 1.7453473373871384, "grad_norm": 1.8976386002292633, "learning_rate": 4.134430361657813e-05, "loss": 0.8437, "step": 1184 }, { "epoch": 1.7468214483139857, "grad_norm": 2.3538958863575585, "learning_rate": 4.1329321073844415e-05, "loss": 0.9971, "step": 1185 }, { "epoch": 1.748295559240833, "grad_norm": 2.185557020230307, "learning_rate": 4.131432829524631e-05, "loss": 0.8896, "step": 1186 }, { "epoch": 1.7497696701676801, "grad_norm": 2.1036859724939747, "learning_rate": 4.129932529018187e-05, "loss": 0.9972, "step": 1187 }, { "epoch": 1.7512437810945274, "grad_norm": 2.2273320524563496, "learning_rate": 4.128431206805557e-05, "loss": 0.9971, "step": 1188 }, { "epoch": 1.7527178920213746, "grad_norm": 1.9636589102983906, "learning_rate": 4.126928863827827e-05, "loss": 0.9305, "step": 1189 }, { "epoch": 1.7541920029482219, "grad_norm": 2.1567630986622572, "learning_rate": 4.1254255010267285e-05, "loss": 1.0236, "step": 1190 }, { "epoch": 1.755666113875069, "grad_norm": 1.984800698582881, "learning_rate": 4.123921119344627e-05, "loss": 0.9577, "step": 1191 }, { "epoch": 1.7571402248019163, "grad_norm": 2.096734155054922, "learning_rate": 4.122415719724528e-05, "loss": 1.1217, "step": 1192 }, { "epoch": 1.7586143357287636, "grad_norm": 2.0820035377934105, "learning_rate": 4.120909303110078e-05, "loss": 0.9467, "step": 1193 }, { "epoch": 1.7600884466556108, "grad_norm": 1.8772354151867507, "learning_rate": 4.119401870445555e-05, "loss": 0.9255, "step": 1194 }, { "epoch": 1.761562557582458, "grad_norm": 1.8986462764774703, "learning_rate": 4.1178934226758803e-05, "loss": 0.8816, "step": 1195 }, { "epoch": 1.7630366685093053, "grad_norm": 1.9070377910772192, "learning_rate": 4.1163839607466084e-05, "loss": 0.9666, "step": 1196 }, { "epoch": 1.7645107794361525, "grad_norm": 1.9649295371042983, "learning_rate": 4.114873485603927e-05, "loss": 0.9586, "step": 1197 }, { "epoch": 1.7659848903629998, "grad_norm": 1.8841641588066425, "learning_rate": 4.113361998194665e-05, "loss": 0.8304, "step": 1198 }, { "epoch": 1.767459001289847, "grad_norm": 1.9956938759243557, "learning_rate": 4.111849499466281e-05, "loss": 1.1546, "step": 1199 }, { "epoch": 1.7689331122166942, "grad_norm": 1.8528617453179212, "learning_rate": 4.110335990366868e-05, "loss": 0.8902, "step": 1200 }, { "epoch": 1.7704072231435415, "grad_norm": 1.8993693133184644, "learning_rate": 4.108821471845155e-05, "loss": 0.9785, "step": 1201 }, { "epoch": 1.7718813340703887, "grad_norm": 2.212921761868596, "learning_rate": 4.107305944850502e-05, "loss": 1.0636, "step": 1202 }, { "epoch": 1.773355444997236, "grad_norm": 2.037698106554504, "learning_rate": 4.105789410332901e-05, "loss": 0.8605, "step": 1203 }, { "epoch": 1.7748295559240832, "grad_norm": 2.0950890429745326, "learning_rate": 4.104271869242975e-05, "loss": 0.9287, "step": 1204 }, { "epoch": 1.7763036668509304, "grad_norm": 1.9339221405759686, "learning_rate": 4.10275332253198e-05, "loss": 0.8765, "step": 1205 }, { "epoch": 1.7777777777777777, "grad_norm": 1.981509084037232, "learning_rate": 4.1012337711518e-05, "loss": 0.9807, "step": 1206 }, { "epoch": 1.779251888704625, "grad_norm": 2.241759894132981, "learning_rate": 4.099713216054952e-05, "loss": 0.9803, "step": 1207 }, { "epoch": 1.7807259996314722, "grad_norm": 2.0407917976851593, "learning_rate": 4.098191658194578e-05, "loss": 1.0267, "step": 1208 }, { "epoch": 1.7822001105583194, "grad_norm": 2.196688183672831, "learning_rate": 4.096669098524451e-05, "loss": 1.0112, "step": 1209 }, { "epoch": 1.7836742214851666, "grad_norm": 1.9488022231175437, "learning_rate": 4.095145537998972e-05, "loss": 0.7952, "step": 1210 }, { "epoch": 1.7851483324120139, "grad_norm": 2.2964951596864154, "learning_rate": 4.0936209775731686e-05, "loss": 0.9858, "step": 1211 }, { "epoch": 1.786622443338861, "grad_norm": 2.09379873646053, "learning_rate": 4.0920954182026965e-05, "loss": 0.9595, "step": 1212 }, { "epoch": 1.7880965542657083, "grad_norm": 2.095850673575996, "learning_rate": 4.090568860843836e-05, "loss": 0.8826, "step": 1213 }, { "epoch": 1.7895706651925556, "grad_norm": 2.3272362363119203, "learning_rate": 4.089041306453494e-05, "loss": 0.8852, "step": 1214 }, { "epoch": 1.7910447761194028, "grad_norm": 2.03595341685727, "learning_rate": 4.0875127559892015e-05, "loss": 0.9402, "step": 1215 }, { "epoch": 1.79251888704625, "grad_norm": 2.2050843960965962, "learning_rate": 4.085983210409114e-05, "loss": 0.9513, "step": 1216 }, { "epoch": 1.7939929979730975, "grad_norm": 2.102178206884701, "learning_rate": 4.084452670672012e-05, "loss": 1.024, "step": 1217 }, { "epoch": 1.7954671088999448, "grad_norm": 2.0491276350281917, "learning_rate": 4.082921137737299e-05, "loss": 1.0402, "step": 1218 }, { "epoch": 1.796941219826792, "grad_norm": 2.1325143458967215, "learning_rate": 4.081388612564999e-05, "loss": 0.9632, "step": 1219 }, { "epoch": 1.7984153307536392, "grad_norm": 2.3531361127492203, "learning_rate": 4.07985509611576e-05, "loss": 0.9952, "step": 1220 }, { "epoch": 1.7998894416804865, "grad_norm": 2.2676690713418397, "learning_rate": 4.078320589350851e-05, "loss": 0.8939, "step": 1221 }, { "epoch": 1.8013635526073337, "grad_norm": 2.1094455283348084, "learning_rate": 4.076785093232162e-05, "loss": 0.9388, "step": 1222 }, { "epoch": 1.802837663534181, "grad_norm": 2.293297609209925, "learning_rate": 4.0752486087222006e-05, "loss": 0.9984, "step": 1223 }, { "epoch": 1.8043117744610282, "grad_norm": 2.3891620023974833, "learning_rate": 4.073711136784099e-05, "loss": 0.9318, "step": 1224 }, { "epoch": 1.8057858853878754, "grad_norm": 2.2220460292055617, "learning_rate": 4.072172678381603e-05, "loss": 1.0211, "step": 1225 }, { "epoch": 1.8072599963147227, "grad_norm": 1.9689221382574746, "learning_rate": 4.07063323447908e-05, "loss": 0.9718, "step": 1226 }, { "epoch": 1.80873410724157, "grad_norm": 1.9464580426166063, "learning_rate": 4.0690928060415144e-05, "loss": 0.9714, "step": 1227 }, { "epoch": 1.8102082181684171, "grad_norm": 2.1287796962814856, "learning_rate": 4.067551394034508e-05, "loss": 0.9782, "step": 1228 }, { "epoch": 1.8116823290952644, "grad_norm": 2.104662682515094, "learning_rate": 4.066008999424279e-05, "loss": 1.0841, "step": 1229 }, { "epoch": 1.8131564400221116, "grad_norm": 1.8790503940290642, "learning_rate": 4.06446562317766e-05, "loss": 0.8488, "step": 1230 }, { "epoch": 1.8146305509489589, "grad_norm": 1.944342743015306, "learning_rate": 4.062921266262102e-05, "loss": 1.0367, "step": 1231 }, { "epoch": 1.816104661875806, "grad_norm": 1.9426469426686526, "learning_rate": 4.0613759296456675e-05, "loss": 0.9975, "step": 1232 }, { "epoch": 1.8175787728026536, "grad_norm": 1.9853188380196218, "learning_rate": 4.059829614297036e-05, "loss": 0.9575, "step": 1233 }, { "epoch": 1.8190528837295008, "grad_norm": 2.2588101727018297, "learning_rate": 4.058282321185498e-05, "loss": 0.943, "step": 1234 }, { "epoch": 1.820526994656348, "grad_norm": 2.1664224144614224, "learning_rate": 4.0567340512809586e-05, "loss": 1.0848, "step": 1235 }, { "epoch": 1.8220011055831953, "grad_norm": 1.9099007964210601, "learning_rate": 4.0551848055539345e-05, "loss": 0.878, "step": 1236 }, { "epoch": 1.8234752165100425, "grad_norm": 2.2012892846626286, "learning_rate": 4.0536345849755545e-05, "loss": 1.0854, "step": 1237 }, { "epoch": 1.8249493274368898, "grad_norm": 2.041156614261652, "learning_rate": 4.0520833905175576e-05, "loss": 0.984, "step": 1238 }, { "epoch": 1.826423438363737, "grad_norm": 2.1215051072971955, "learning_rate": 4.0505312231522944e-05, "loss": 0.8779, "step": 1239 }, { "epoch": 1.8278975492905842, "grad_norm": 2.2286862601399227, "learning_rate": 4.048978083852724e-05, "loss": 0.9759, "step": 1240 }, { "epoch": 1.8293716602174315, "grad_norm": 2.1318744091524073, "learning_rate": 4.0474239735924166e-05, "loss": 0.89, "step": 1241 }, { "epoch": 1.8308457711442787, "grad_norm": 2.053970037596517, "learning_rate": 4.045868893345549e-05, "loss": 0.985, "step": 1242 }, { "epoch": 1.832319882071126, "grad_norm": 2.1830706848499593, "learning_rate": 4.0443128440869084e-05, "loss": 1.065, "step": 1243 }, { "epoch": 1.8337939929979732, "grad_norm": 1.8896116583347424, "learning_rate": 4.042755826791886e-05, "loss": 0.8172, "step": 1244 }, { "epoch": 1.8352681039248204, "grad_norm": 2.3491985821278467, "learning_rate": 4.041197842436484e-05, "loss": 1.0177, "step": 1245 }, { "epoch": 1.8367422148516677, "grad_norm": 2.084474085979101, "learning_rate": 4.0396388919973074e-05, "loss": 0.8246, "step": 1246 }, { "epoch": 1.838216325778515, "grad_norm": 1.9786311847345015, "learning_rate": 4.038078976451567e-05, "loss": 0.9336, "step": 1247 }, { "epoch": 1.8396904367053621, "grad_norm": 1.9411030154516424, "learning_rate": 4.036518096777082e-05, "loss": 0.8979, "step": 1248 }, { "epoch": 1.8411645476322094, "grad_norm": 2.001895125874785, "learning_rate": 4.0349562539522725e-05, "loss": 0.9214, "step": 1249 }, { "epoch": 1.8426386585590566, "grad_norm": 2.1060647492485467, "learning_rate": 4.033393448956162e-05, "loss": 0.8721, "step": 1250 }, { "epoch": 1.8426386585590566, "eval_bleu": 0.06528889957848946, "eval_bleu_1gram": 0.38291950386907603, "eval_bleu_2gram": 0.14961806404093644, "eval_bleu_3gram": 0.0650686865590738, "eval_bleu_4gram": 0.030582768677076953, "eval_rag_val_loss": 1.2789816753838652, "eval_rouge1": 0.37153906817185656, "eval_rouge2": 0.14307934584181875, "eval_rougeL": 0.35218731899720773, "step": 1250 }, { "epoch": 1.8441127694859039, "grad_norm": 2.086583298286281, "learning_rate": 4.03182968276838e-05, "loss": 0.9549, "step": 1251 }, { "epoch": 1.845586880412751, "grad_norm": 2.2192712091032143, "learning_rate": 4.030264956369157e-05, "loss": 0.8337, "step": 1252 }, { "epoch": 1.8470609913395983, "grad_norm": 1.8679488796452042, "learning_rate": 4.028699270739326e-05, "loss": 0.7859, "step": 1253 }, { "epoch": 1.8485351022664456, "grad_norm": 2.0914497835504053, "learning_rate": 4.027132626860318e-05, "loss": 0.9101, "step": 1254 }, { "epoch": 1.8500092131932928, "grad_norm": 2.4499751965571237, "learning_rate": 4.02556502571417e-05, "loss": 1.0431, "step": 1255 }, { "epoch": 1.85148332412014, "grad_norm": 2.075273665800216, "learning_rate": 4.023996468283515e-05, "loss": 0.8171, "step": 1256 }, { "epoch": 1.8529574350469873, "grad_norm": 2.2002428831061867, "learning_rate": 4.022426955551588e-05, "loss": 0.9325, "step": 1257 }, { "epoch": 1.8544315459738345, "grad_norm": 1.978974691081696, "learning_rate": 4.020856488502221e-05, "loss": 0.9126, "step": 1258 }, { "epoch": 1.8559056569006818, "grad_norm": 2.020835706047422, "learning_rate": 4.019285068119845e-05, "loss": 1.0405, "step": 1259 }, { "epoch": 1.857379767827529, "grad_norm": 2.081605105420067, "learning_rate": 4.017712695389487e-05, "loss": 0.9883, "step": 1260 }, { "epoch": 1.8588538787543762, "grad_norm": 2.0785666548571595, "learning_rate": 4.0161393712967756e-05, "loss": 0.8296, "step": 1261 }, { "epoch": 1.8603279896812235, "grad_norm": 2.210997024651453, "learning_rate": 4.01456509682793e-05, "loss": 0.9254, "step": 1262 }, { "epoch": 1.8618021006080707, "grad_norm": 2.1296588092531032, "learning_rate": 4.012989872969768e-05, "loss": 0.9984, "step": 1263 }, { "epoch": 1.863276211534918, "grad_norm": 2.0118467419841957, "learning_rate": 4.011413700709703e-05, "loss": 0.9274, "step": 1264 }, { "epoch": 1.8647503224617652, "grad_norm": 2.215893262971134, "learning_rate": 4.009836581035742e-05, "loss": 0.9353, "step": 1265 }, { "epoch": 1.8662244333886124, "grad_norm": 2.30497134772178, "learning_rate": 4.008258514936486e-05, "loss": 0.92, "step": 1266 }, { "epoch": 1.8676985443154597, "grad_norm": 2.060121262694115, "learning_rate": 4.006679503401129e-05, "loss": 0.9637, "step": 1267 }, { "epoch": 1.869172655242307, "grad_norm": 2.202456541985775, "learning_rate": 4.0050995474194576e-05, "loss": 1.0534, "step": 1268 }, { "epoch": 1.8706467661691542, "grad_norm": 2.027980697035859, "learning_rate": 4.003518647981852e-05, "loss": 0.989, "step": 1269 }, { "epoch": 1.8721208770960014, "grad_norm": 2.0980148151045532, "learning_rate": 4.0019368060792806e-05, "loss": 0.7965, "step": 1270 }, { "epoch": 1.8735949880228486, "grad_norm": 1.8703525803429502, "learning_rate": 4.000354022703306e-05, "loss": 0.9871, "step": 1271 }, { "epoch": 1.8750690989496959, "grad_norm": 2.0692324718264348, "learning_rate": 3.998770298846079e-05, "loss": 0.8525, "step": 1272 }, { "epoch": 1.876543209876543, "grad_norm": 2.127689454802242, "learning_rate": 3.9971856355003396e-05, "loss": 1.0532, "step": 1273 }, { "epoch": 1.8780173208033903, "grad_norm": 2.0811489733670974, "learning_rate": 3.9956000336594185e-05, "loss": 0.9506, "step": 1274 }, { "epoch": 1.8794914317302376, "grad_norm": 2.319905666044834, "learning_rate": 3.994013494317233e-05, "loss": 1.0595, "step": 1275 }, { "epoch": 1.8809655426570848, "grad_norm": 2.0732705917964163, "learning_rate": 3.9924260184682894e-05, "loss": 0.9686, "step": 1276 }, { "epoch": 1.882439653583932, "grad_norm": 2.1997057848047246, "learning_rate": 3.9908376071076805e-05, "loss": 0.946, "step": 1277 }, { "epoch": 1.8839137645107793, "grad_norm": 1.7922460225535128, "learning_rate": 3.9892482612310836e-05, "loss": 0.8398, "step": 1278 }, { "epoch": 1.8853878754376265, "grad_norm": 2.0100939661333115, "learning_rate": 3.9876579818347654e-05, "loss": 0.919, "step": 1279 }, { "epoch": 1.8868619863644738, "grad_norm": 2.2011124095991015, "learning_rate": 3.986066769915575e-05, "loss": 0.9261, "step": 1280 }, { "epoch": 1.888336097291321, "grad_norm": 1.9822083306514797, "learning_rate": 3.984474626470948e-05, "loss": 1.0185, "step": 1281 }, { "epoch": 1.8898102082181683, "grad_norm": 1.883394713231469, "learning_rate": 3.982881552498902e-05, "loss": 0.9019, "step": 1282 }, { "epoch": 1.8912843191450157, "grad_norm": 1.939562038655746, "learning_rate": 3.981287548998039e-05, "loss": 0.9636, "step": 1283 }, { "epoch": 1.892758430071863, "grad_norm": 2.0338299139695826, "learning_rate": 3.979692616967543e-05, "loss": 0.8708, "step": 1284 }, { "epoch": 1.8942325409987102, "grad_norm": 2.158041058147106, "learning_rate": 3.978096757407182e-05, "loss": 1.0043, "step": 1285 }, { "epoch": 1.8957066519255574, "grad_norm": 2.198532403932254, "learning_rate": 3.976499971317302e-05, "loss": 0.9762, "step": 1286 }, { "epoch": 1.8971807628524047, "grad_norm": 2.02743653542642, "learning_rate": 3.974902259698833e-05, "loss": 1.1047, "step": 1287 }, { "epoch": 1.898654873779252, "grad_norm": 2.031430984650463, "learning_rate": 3.973303623553283e-05, "loss": 0.9479, "step": 1288 }, { "epoch": 1.9001289847060991, "grad_norm": 1.939976862543306, "learning_rate": 3.9717040638827406e-05, "loss": 0.9502, "step": 1289 }, { "epoch": 1.9016030956329464, "grad_norm": 1.8611046575874046, "learning_rate": 3.9701035816898734e-05, "loss": 0.9603, "step": 1290 }, { "epoch": 1.9030772065597936, "grad_norm": 2.069543659753339, "learning_rate": 3.9685021779779264e-05, "loss": 0.985, "step": 1291 }, { "epoch": 1.9045513174866409, "grad_norm": 2.014262130467834, "learning_rate": 3.966899853750724e-05, "loss": 1.0909, "step": 1292 }, { "epoch": 1.906025428413488, "grad_norm": 2.178199725189626, "learning_rate": 3.9652966100126655e-05, "loss": 0.883, "step": 1293 }, { "epoch": 1.9074995393403353, "grad_norm": 2.1052703480848702, "learning_rate": 3.9636924477687265e-05, "loss": 0.9975, "step": 1294 }, { "epoch": 1.9089736502671826, "grad_norm": 2.083701088553833, "learning_rate": 3.9620873680244616e-05, "loss": 1.0168, "step": 1295 }, { "epoch": 1.9104477611940298, "grad_norm": 2.14321364207495, "learning_rate": 3.960481371785997e-05, "loss": 0.938, "step": 1296 }, { "epoch": 1.911921872120877, "grad_norm": 2.1626107204074523, "learning_rate": 3.958874460060035e-05, "loss": 0.9512, "step": 1297 }, { "epoch": 1.9133959830477243, "grad_norm": 2.083653374567243, "learning_rate": 3.95726663385385e-05, "loss": 0.894, "step": 1298 }, { "epoch": 1.9148700939745718, "grad_norm": 2.098139020039451, "learning_rate": 3.955657894175293e-05, "loss": 0.9695, "step": 1299 }, { "epoch": 1.916344204901419, "grad_norm": 2.1254510961706776, "learning_rate": 3.9540482420327845e-05, "loss": 0.9272, "step": 1300 }, { "epoch": 1.9178183158282662, "grad_norm": 2.2841004630673587, "learning_rate": 3.952437678435319e-05, "loss": 0.9237, "step": 1301 }, { "epoch": 1.9192924267551135, "grad_norm": 2.4064678737541643, "learning_rate": 3.950826204392461e-05, "loss": 1.0287, "step": 1302 }, { "epoch": 1.9207665376819607, "grad_norm": 2.2154088182533327, "learning_rate": 3.949213820914347e-05, "loss": 0.9862, "step": 1303 }, { "epoch": 1.922240648608808, "grad_norm": 2.162000424225348, "learning_rate": 3.9476005290116814e-05, "loss": 0.9418, "step": 1304 }, { "epoch": 1.9237147595356552, "grad_norm": 1.8436250805966965, "learning_rate": 3.94598632969574e-05, "loss": 0.8697, "step": 1305 }, { "epoch": 1.9251888704625024, "grad_norm": 1.9469100336685357, "learning_rate": 3.944371223978366e-05, "loss": 0.9004, "step": 1306 }, { "epoch": 1.9266629813893497, "grad_norm": 2.2985027539956815, "learning_rate": 3.942755212871973e-05, "loss": 1.0155, "step": 1307 }, { "epoch": 1.928137092316197, "grad_norm": 2.2331930382156977, "learning_rate": 3.94113829738954e-05, "loss": 0.912, "step": 1308 }, { "epoch": 1.9296112032430441, "grad_norm": 1.9785121303877702, "learning_rate": 3.939520478544614e-05, "loss": 0.9622, "step": 1309 }, { "epoch": 1.9310853141698914, "grad_norm": 1.9146820311346038, "learning_rate": 3.937901757351307e-05, "loss": 0.992, "step": 1310 }, { "epoch": 1.9325594250967386, "grad_norm": 1.9640233657324926, "learning_rate": 3.936282134824297e-05, "loss": 0.8326, "step": 1311 }, { "epoch": 1.9340335360235859, "grad_norm": 2.2084543686812648, "learning_rate": 3.93466161197883e-05, "loss": 1.1163, "step": 1312 }, { "epoch": 1.935507646950433, "grad_norm": 1.892508711196423, "learning_rate": 3.933040189830711e-05, "loss": 0.8804, "step": 1313 }, { "epoch": 1.9369817578772803, "grad_norm": 1.9845043350483782, "learning_rate": 3.931417869396313e-05, "loss": 0.9703, "step": 1314 }, { "epoch": 1.9384558688041276, "grad_norm": 2.158702837368654, "learning_rate": 3.929794651692571e-05, "loss": 0.8794, "step": 1315 }, { "epoch": 1.9399299797309748, "grad_norm": 1.817173358996697, "learning_rate": 3.928170537736981e-05, "loss": 0.8314, "step": 1316 }, { "epoch": 1.941404090657822, "grad_norm": 2.0481997563650567, "learning_rate": 3.9265455285476025e-05, "loss": 0.9657, "step": 1317 }, { "epoch": 1.9428782015846693, "grad_norm": 2.1902531328617574, "learning_rate": 3.9249196251430556e-05, "loss": 1.0561, "step": 1318 }, { "epoch": 1.9443523125115165, "grad_norm": 2.226034536842658, "learning_rate": 3.92329282854252e-05, "loss": 1.0669, "step": 1319 }, { "epoch": 1.9458264234383638, "grad_norm": 2.2033513811827565, "learning_rate": 3.9216651397657364e-05, "loss": 1.0903, "step": 1320 }, { "epoch": 1.947300534365211, "grad_norm": 2.1171588966154777, "learning_rate": 3.9200365598330056e-05, "loss": 0.9809, "step": 1321 }, { "epoch": 1.9487746452920582, "grad_norm": 2.1937208820377707, "learning_rate": 3.9184070897651854e-05, "loss": 1.095, "step": 1322 }, { "epoch": 1.9502487562189055, "grad_norm": 2.2427504626928374, "learning_rate": 3.916776730583691e-05, "loss": 0.9496, "step": 1323 }, { "epoch": 1.9517228671457527, "grad_norm": 2.1770423508091885, "learning_rate": 3.915145483310498e-05, "loss": 1.0122, "step": 1324 }, { "epoch": 1.9531969780726, "grad_norm": 1.9695376077176368, "learning_rate": 3.9135133489681356e-05, "loss": 0.9061, "step": 1325 }, { "epoch": 1.9546710889994472, "grad_norm": 1.9959568641144934, "learning_rate": 3.91188032857969e-05, "loss": 0.895, "step": 1326 }, { "epoch": 1.9561451999262944, "grad_norm": 1.939663140949236, "learning_rate": 3.910246423168803e-05, "loss": 0.9571, "step": 1327 }, { "epoch": 1.9576193108531417, "grad_norm": 2.2508984996971146, "learning_rate": 3.908611633759672e-05, "loss": 0.9875, "step": 1328 }, { "epoch": 1.959093421779989, "grad_norm": 2.151931196991882, "learning_rate": 3.906975961377046e-05, "loss": 1.0392, "step": 1329 }, { "epoch": 1.9605675327068361, "grad_norm": 2.120874664063797, "learning_rate": 3.905339407046231e-05, "loss": 1.1065, "step": 1330 }, { "epoch": 1.9620416436336834, "grad_norm": 2.165331196637501, "learning_rate": 3.9037019717930826e-05, "loss": 1.0694, "step": 1331 }, { "epoch": 1.9635157545605306, "grad_norm": 2.1167407250652257, "learning_rate": 3.902063656644012e-05, "loss": 0.9237, "step": 1332 }, { "epoch": 1.9649898654873779, "grad_norm": 2.1523392083592623, "learning_rate": 3.900424462625977e-05, "loss": 0.9871, "step": 1333 }, { "epoch": 1.966463976414225, "grad_norm": 2.3103300687786756, "learning_rate": 3.898784390766491e-05, "loss": 0.9891, "step": 1334 }, { "epoch": 1.9679380873410723, "grad_norm": 2.112098461565283, "learning_rate": 3.897143442093616e-05, "loss": 0.9266, "step": 1335 }, { "epoch": 1.9694121982679196, "grad_norm": 1.9204234402350115, "learning_rate": 3.895501617635964e-05, "loss": 0.863, "step": 1336 }, { "epoch": 1.9708863091947668, "grad_norm": 1.9170680524182075, "learning_rate": 3.893858918422693e-05, "loss": 0.8693, "step": 1337 }, { "epoch": 1.972360420121614, "grad_norm": 2.024998361092363, "learning_rate": 3.892215345483515e-05, "loss": 0.9048, "step": 1338 }, { "epoch": 1.9738345310484613, "grad_norm": 2.310589800425441, "learning_rate": 3.890570899848685e-05, "loss": 1.1133, "step": 1339 }, { "epoch": 1.9753086419753085, "grad_norm": 2.3476998171199255, "learning_rate": 3.888925582549006e-05, "loss": 1.0502, "step": 1340 }, { "epoch": 1.9767827529021558, "grad_norm": 2.0197108057947775, "learning_rate": 3.887279394615829e-05, "loss": 0.9258, "step": 1341 }, { "epoch": 1.978256863829003, "grad_norm": 1.9745177295822933, "learning_rate": 3.885632337081049e-05, "loss": 0.9706, "step": 1342 }, { "epoch": 1.9797309747558502, "grad_norm": 2.0723108399947665, "learning_rate": 3.8839844109771086e-05, "loss": 0.8527, "step": 1343 }, { "epoch": 1.9812050856826975, "grad_norm": 2.0758245829006383, "learning_rate": 3.8823356173369895e-05, "loss": 0.9908, "step": 1344 }, { "epoch": 1.9826791966095447, "grad_norm": 1.8979805604443654, "learning_rate": 3.8806859571942244e-05, "loss": 0.9915, "step": 1345 }, { "epoch": 1.984153307536392, "grad_norm": 2.2030472978457385, "learning_rate": 3.8790354315828846e-05, "loss": 0.9454, "step": 1346 }, { "epoch": 1.9856274184632392, "grad_norm": 2.1253058269560317, "learning_rate": 3.877384041537584e-05, "loss": 1.1081, "step": 1347 }, { "epoch": 1.9871015293900864, "grad_norm": 2.2339276486175224, "learning_rate": 3.8757317880934786e-05, "loss": 1.0477, "step": 1348 }, { "epoch": 1.988575640316934, "grad_norm": 1.74167485242803, "learning_rate": 3.8740786722862676e-05, "loss": 0.7763, "step": 1349 }, { "epoch": 1.9900497512437811, "grad_norm": 2.336827137434745, "learning_rate": 3.872424695152189e-05, "loss": 1.0492, "step": 1350 }, { "epoch": 1.9915238621706284, "grad_norm": 2.1578144534776866, "learning_rate": 3.870769857728022e-05, "loss": 0.9071, "step": 1351 }, { "epoch": 1.9929979730974756, "grad_norm": 2.204236663206911, "learning_rate": 3.869114161051082e-05, "loss": 0.9344, "step": 1352 }, { "epoch": 1.9944720840243229, "grad_norm": 2.094661115855522, "learning_rate": 3.867457606159226e-05, "loss": 0.8977, "step": 1353 }, { "epoch": 1.99594619495117, "grad_norm": 1.9728678674782243, "learning_rate": 3.86580019409085e-05, "loss": 0.9153, "step": 1354 }, { "epoch": 1.9974203058780173, "grad_norm": 1.851553035663608, "learning_rate": 3.8641419258848835e-05, "loss": 0.8542, "step": 1355 }, { "epoch": 1.9988944168048646, "grad_norm": 2.2227801404056557, "learning_rate": 3.862482802580795e-05, "loss": 0.9748, "step": 1356 }, { "epoch": 2.000368527731712, "grad_norm": 1.9962615000489687, "learning_rate": 3.860822825218588e-05, "loss": 0.8629, "step": 1357 }, { "epoch": 2.0018426386585593, "grad_norm": 1.9020914462676604, "learning_rate": 3.859161994838803e-05, "loss": 0.6634, "step": 1358 }, { "epoch": 2.0033167495854065, "grad_norm": 1.5972108849754263, "learning_rate": 3.8575003124825135e-05, "loss": 0.5798, "step": 1359 }, { "epoch": 2.0047908605122537, "grad_norm": 1.9442328708831498, "learning_rate": 3.855837779191329e-05, "loss": 0.5766, "step": 1360 }, { "epoch": 2.006264971439101, "grad_norm": 1.738806484464985, "learning_rate": 3.8541743960073893e-05, "loss": 0.5748, "step": 1361 }, { "epoch": 2.0077390823659482, "grad_norm": 1.8961826952743852, "learning_rate": 3.8525101639733706e-05, "loss": 0.6489, "step": 1362 }, { "epoch": 2.0092131932927955, "grad_norm": 1.6294958570672902, "learning_rate": 3.850845084132478e-05, "loss": 0.6, "step": 1363 }, { "epoch": 2.0106873042196427, "grad_norm": 1.805394418161016, "learning_rate": 3.84917915752845e-05, "loss": 0.6923, "step": 1364 }, { "epoch": 2.01216141514649, "grad_norm": 1.7940829934918343, "learning_rate": 3.847512385205556e-05, "loss": 0.5275, "step": 1365 }, { "epoch": 2.013635526073337, "grad_norm": 1.7410158742348585, "learning_rate": 3.845844768208593e-05, "loss": 0.5822, "step": 1366 }, { "epoch": 2.0151096370001844, "grad_norm": 1.8702009456194406, "learning_rate": 3.8441763075828904e-05, "loss": 0.4839, "step": 1367 }, { "epoch": 2.0165837479270317, "grad_norm": 2.361165762873421, "learning_rate": 3.842507004374304e-05, "loss": 0.5575, "step": 1368 }, { "epoch": 2.018057858853879, "grad_norm": 2.1407228990429985, "learning_rate": 3.8408368596292224e-05, "loss": 0.6206, "step": 1369 }, { "epoch": 2.019531969780726, "grad_norm": 2.011783813609558, "learning_rate": 3.839165874394555e-05, "loss": 0.5276, "step": 1370 }, { "epoch": 2.0210060807075734, "grad_norm": 2.2065961830929735, "learning_rate": 3.8374940497177434e-05, "loss": 0.6491, "step": 1371 }, { "epoch": 2.0224801916344206, "grad_norm": 2.408256288855844, "learning_rate": 3.835821386646753e-05, "loss": 0.6727, "step": 1372 }, { "epoch": 2.023954302561268, "grad_norm": 2.0404486493821357, "learning_rate": 3.834147886230074e-05, "loss": 0.5946, "step": 1373 }, { "epoch": 2.025428413488115, "grad_norm": 2.312756189125463, "learning_rate": 3.8324735495167246e-05, "loss": 0.7123, "step": 1374 }, { "epoch": 2.0269025244149623, "grad_norm": 2.155247994511694, "learning_rate": 3.8307983775562435e-05, "loss": 0.651, "step": 1375 }, { "epoch": 2.0283766353418096, "grad_norm": 2.064424137428052, "learning_rate": 3.8291223713986955e-05, "loss": 0.491, "step": 1376 }, { "epoch": 2.029850746268657, "grad_norm": 1.9624906065892207, "learning_rate": 3.827445532094669e-05, "loss": 0.6008, "step": 1377 }, { "epoch": 2.031324857195504, "grad_norm": 2.2442812829885765, "learning_rate": 3.8257678606952705e-05, "loss": 0.5519, "step": 1378 }, { "epoch": 2.0327989681223513, "grad_norm": 1.9197771361269074, "learning_rate": 3.824089358252133e-05, "loss": 0.611, "step": 1379 }, { "epoch": 2.0342730790491985, "grad_norm": 2.103909341418194, "learning_rate": 3.822410025817406e-05, "loss": 0.5927, "step": 1380 }, { "epoch": 2.0357471899760458, "grad_norm": 2.2549534419427797, "learning_rate": 3.820729864443764e-05, "loss": 0.4948, "step": 1381 }, { "epoch": 2.037221300902893, "grad_norm": 1.7884269818295455, "learning_rate": 3.819048875184398e-05, "loss": 0.4875, "step": 1382 }, { "epoch": 2.0386954118297402, "grad_norm": 1.9912893028235934, "learning_rate": 3.8173670590930165e-05, "loss": 0.5012, "step": 1383 }, { "epoch": 2.0401695227565875, "grad_norm": 2.2563430597868948, "learning_rate": 3.815684417223851e-05, "loss": 0.6226, "step": 1384 }, { "epoch": 2.0416436336834347, "grad_norm": 2.119789579673196, "learning_rate": 3.814000950631647e-05, "loss": 0.6459, "step": 1385 }, { "epoch": 2.043117744610282, "grad_norm": 1.9855418941983933, "learning_rate": 3.812316660371666e-05, "loss": 0.6076, "step": 1386 }, { "epoch": 2.044591855537129, "grad_norm": 2.2837217344007916, "learning_rate": 3.810631547499692e-05, "loss": 0.6108, "step": 1387 }, { "epoch": 2.0460659664639764, "grad_norm": 1.8055052782867362, "learning_rate": 3.808945613072017e-05, "loss": 0.4794, "step": 1388 }, { "epoch": 2.0475400773908237, "grad_norm": 2.058351332345123, "learning_rate": 3.807258858145453e-05, "loss": 0.589, "step": 1389 }, { "epoch": 2.049014188317671, "grad_norm": 2.087503595406183, "learning_rate": 3.8055712837773225e-05, "loss": 0.5993, "step": 1390 }, { "epoch": 2.050488299244518, "grad_norm": 2.3207485418622293, "learning_rate": 3.803882891025466e-05, "loss": 0.5598, "step": 1391 }, { "epoch": 2.0519624101713654, "grad_norm": 2.0049176554525285, "learning_rate": 3.802193680948236e-05, "loss": 0.5659, "step": 1392 }, { "epoch": 2.0534365210982126, "grad_norm": 2.12817851936162, "learning_rate": 3.800503654604493e-05, "loss": 0.5682, "step": 1393 }, { "epoch": 2.05491063202506, "grad_norm": 1.9818301966862133, "learning_rate": 3.798812813053615e-05, "loss": 0.4631, "step": 1394 }, { "epoch": 2.056384742951907, "grad_norm": 1.8309535128298038, "learning_rate": 3.7971211573554865e-05, "loss": 0.6825, "step": 1395 }, { "epoch": 2.0578588538787543, "grad_norm": 1.9250180355069346, "learning_rate": 3.795428688570505e-05, "loss": 0.5707, "step": 1396 }, { "epoch": 2.0593329648056016, "grad_norm": 2.0655949808268064, "learning_rate": 3.793735407759577e-05, "loss": 0.583, "step": 1397 }, { "epoch": 2.060807075732449, "grad_norm": 2.256928478738768, "learning_rate": 3.792041315984118e-05, "loss": 0.6377, "step": 1398 }, { "epoch": 2.062281186659296, "grad_norm": 2.2745729737314626, "learning_rate": 3.7903464143060506e-05, "loss": 0.6017, "step": 1399 }, { "epoch": 2.0637552975861433, "grad_norm": 2.2006789893624177, "learning_rate": 3.788650703787808e-05, "loss": 0.6729, "step": 1400 }, { "epoch": 2.0652294085129905, "grad_norm": 1.9137425992238308, "learning_rate": 3.7869541854923275e-05, "loss": 0.5983, "step": 1401 }, { "epoch": 2.0667035194398378, "grad_norm": 1.8786165961886592, "learning_rate": 3.785256860483054e-05, "loss": 0.5569, "step": 1402 }, { "epoch": 2.068177630366685, "grad_norm": 2.2976982042826832, "learning_rate": 3.783558729823939e-05, "loss": 0.7199, "step": 1403 }, { "epoch": 2.0696517412935322, "grad_norm": 2.059523081339658, "learning_rate": 3.781859794579436e-05, "loss": 0.5398, "step": 1404 }, { "epoch": 2.0711258522203795, "grad_norm": 2.1654905159107742, "learning_rate": 3.780160055814507e-05, "loss": 0.6677, "step": 1405 }, { "epoch": 2.0725999631472267, "grad_norm": 1.9272679902005418, "learning_rate": 3.778459514594613e-05, "loss": 0.5577, "step": 1406 }, { "epoch": 2.074074074074074, "grad_norm": 2.114825180079281, "learning_rate": 3.776758171985723e-05, "loss": 0.5633, "step": 1407 }, { "epoch": 2.075548185000921, "grad_norm": 1.831013476098729, "learning_rate": 3.775056029054304e-05, "loss": 0.5579, "step": 1408 }, { "epoch": 2.0770222959277684, "grad_norm": 2.167287028584359, "learning_rate": 3.773353086867328e-05, "loss": 0.5525, "step": 1409 }, { "epoch": 2.0784964068546157, "grad_norm": 2.074081847419755, "learning_rate": 3.7716493464922654e-05, "loss": 0.6214, "step": 1410 }, { "epoch": 2.079970517781463, "grad_norm": 1.9336763614770334, "learning_rate": 3.769944808997088e-05, "loss": 0.5521, "step": 1411 }, { "epoch": 2.08144462870831, "grad_norm": 2.2270683918503176, "learning_rate": 3.768239475450269e-05, "loss": 0.5582, "step": 1412 }, { "epoch": 2.0829187396351574, "grad_norm": 2.0497260585205104, "learning_rate": 3.7665333469207766e-05, "loss": 0.5018, "step": 1413 }, { "epoch": 2.0843928505620046, "grad_norm": 1.9966177952325421, "learning_rate": 3.7648264244780804e-05, "loss": 0.5675, "step": 1414 }, { "epoch": 2.085866961488852, "grad_norm": 2.098755844862378, "learning_rate": 3.7631187091921483e-05, "loss": 0.5559, "step": 1415 }, { "epoch": 2.087341072415699, "grad_norm": 2.197087340857371, "learning_rate": 3.761410202133443e-05, "loss": 0.5689, "step": 1416 }, { "epoch": 2.0888151833425463, "grad_norm": 1.88785387726026, "learning_rate": 3.759700904372924e-05, "loss": 0.5074, "step": 1417 }, { "epoch": 2.0902892942693936, "grad_norm": 1.8018770338689392, "learning_rate": 3.757990816982046e-05, "loss": 0.52, "step": 1418 }, { "epoch": 2.091763405196241, "grad_norm": 2.104395775234997, "learning_rate": 3.756279941032761e-05, "loss": 0.5899, "step": 1419 }, { "epoch": 2.093237516123088, "grad_norm": 1.9250200171482656, "learning_rate": 3.754568277597512e-05, "loss": 0.5582, "step": 1420 }, { "epoch": 2.0947116270499353, "grad_norm": 2.027028083113662, "learning_rate": 3.7528558277492395e-05, "loss": 0.5727, "step": 1421 }, { "epoch": 2.096185737976783, "grad_norm": 2.0331296000529755, "learning_rate": 3.751142592561373e-05, "loss": 0.5811, "step": 1422 }, { "epoch": 2.09765984890363, "grad_norm": 2.001699917295029, "learning_rate": 3.749428573107837e-05, "loss": 0.5934, "step": 1423 }, { "epoch": 2.0991339598304775, "grad_norm": 2.185245987512605, "learning_rate": 3.747713770463046e-05, "loss": 0.5257, "step": 1424 }, { "epoch": 2.1006080707573247, "grad_norm": 2.2239079227734697, "learning_rate": 3.7459981857019064e-05, "loss": 0.6278, "step": 1425 }, { "epoch": 2.102082181684172, "grad_norm": 2.270648202144941, "learning_rate": 3.7442818198998156e-05, "loss": 0.6609, "step": 1426 }, { "epoch": 2.103556292611019, "grad_norm": 2.2061413616633003, "learning_rate": 3.7425646741326585e-05, "loss": 0.5897, "step": 1427 }, { "epoch": 2.1050304035378664, "grad_norm": 2.0890265177079517, "learning_rate": 3.74084674947681e-05, "loss": 0.6214, "step": 1428 }, { "epoch": 2.1065045144647137, "grad_norm": 1.9219530291340394, "learning_rate": 3.739128047009134e-05, "loss": 0.5339, "step": 1429 }, { "epoch": 2.107978625391561, "grad_norm": 2.006063805612249, "learning_rate": 3.7374085678069794e-05, "loss": 0.6012, "step": 1430 }, { "epoch": 2.109452736318408, "grad_norm": 1.9386926487323304, "learning_rate": 3.735688312948186e-05, "loss": 0.5959, "step": 1431 }, { "epoch": 2.1109268472452554, "grad_norm": 2.2361244875216246, "learning_rate": 3.733967283511077e-05, "loss": 0.6838, "step": 1432 }, { "epoch": 2.1124009581721026, "grad_norm": 2.24042444329109, "learning_rate": 3.7322454805744605e-05, "loss": 0.6196, "step": 1433 }, { "epoch": 2.11387506909895, "grad_norm": 2.218717655429681, "learning_rate": 3.730522905217632e-05, "loss": 0.5356, "step": 1434 }, { "epoch": 2.115349180025797, "grad_norm": 2.1165005741384455, "learning_rate": 3.728799558520369e-05, "loss": 0.6688, "step": 1435 }, { "epoch": 2.1168232909526443, "grad_norm": 2.5270548777748467, "learning_rate": 3.7270754415629346e-05, "loss": 0.6197, "step": 1436 }, { "epoch": 2.1182974018794916, "grad_norm": 2.3519546333920176, "learning_rate": 3.725350555426072e-05, "loss": 0.5918, "step": 1437 }, { "epoch": 2.119771512806339, "grad_norm": 2.0109809781867423, "learning_rate": 3.7236249011910085e-05, "loss": 0.5309, "step": 1438 }, { "epoch": 2.121245623733186, "grad_norm": 2.224562861330565, "learning_rate": 3.7218984799394534e-05, "loss": 0.6383, "step": 1439 }, { "epoch": 2.1227197346600333, "grad_norm": 2.3853572077330236, "learning_rate": 3.7201712927535954e-05, "loss": 0.6702, "step": 1440 }, { "epoch": 2.1241938455868805, "grad_norm": 2.107555593750616, "learning_rate": 3.7184433407161026e-05, "loss": 0.6309, "step": 1441 }, { "epoch": 2.1256679565137278, "grad_norm": 1.8893273684560714, "learning_rate": 3.716714624910126e-05, "loss": 0.5675, "step": 1442 }, { "epoch": 2.127142067440575, "grad_norm": 2.0651587486670473, "learning_rate": 3.714985146419291e-05, "loss": 0.5531, "step": 1443 }, { "epoch": 2.1286161783674222, "grad_norm": 2.0359403011510615, "learning_rate": 3.713254906327703e-05, "loss": 0.512, "step": 1444 }, { "epoch": 2.1300902892942695, "grad_norm": 1.9590339083889103, "learning_rate": 3.711523905719946e-05, "loss": 0.6451, "step": 1445 }, { "epoch": 2.1315644002211167, "grad_norm": 2.077675792358046, "learning_rate": 3.70979214568108e-05, "loss": 0.6244, "step": 1446 }, { "epoch": 2.133038511147964, "grad_norm": 2.265480931404063, "learning_rate": 3.70805962729664e-05, "loss": 0.6779, "step": 1447 }, { "epoch": 2.134512622074811, "grad_norm": 2.118458946739586, "learning_rate": 3.706326351652636e-05, "loss": 0.5837, "step": 1448 }, { "epoch": 2.1359867330016584, "grad_norm": 2.0620666973526958, "learning_rate": 3.704592319835557e-05, "loss": 0.5178, "step": 1449 }, { "epoch": 2.1374608439285057, "grad_norm": 2.1499584016546027, "learning_rate": 3.702857532932359e-05, "loss": 0.647, "step": 1450 }, { "epoch": 2.138934954855353, "grad_norm": 1.9526370240020705, "learning_rate": 3.7011219920304774e-05, "loss": 0.6135, "step": 1451 }, { "epoch": 2.1404090657822, "grad_norm": 2.101369288755746, "learning_rate": 3.699385698217816e-05, "loss": 0.6957, "step": 1452 }, { "epoch": 2.1418831767090474, "grad_norm": 2.058164026913824, "learning_rate": 3.6976486525827546e-05, "loss": 0.5328, "step": 1453 }, { "epoch": 2.1433572876358946, "grad_norm": 2.0514823950231316, "learning_rate": 3.695910856214141e-05, "loss": 0.6125, "step": 1454 }, { "epoch": 2.144831398562742, "grad_norm": 2.157051932440572, "learning_rate": 3.694172310201295e-05, "loss": 0.6515, "step": 1455 }, { "epoch": 2.146305509489589, "grad_norm": 2.1202299772436453, "learning_rate": 3.692433015634005e-05, "loss": 0.6089, "step": 1456 }, { "epoch": 2.1477796204164363, "grad_norm": 1.9236543683430283, "learning_rate": 3.690692973602532e-05, "loss": 0.6313, "step": 1457 }, { "epoch": 2.1492537313432836, "grad_norm": 2.1964225813266713, "learning_rate": 3.6889521851976005e-05, "loss": 0.6338, "step": 1458 }, { "epoch": 2.150727842270131, "grad_norm": 2.3781880514481055, "learning_rate": 3.6872106515104065e-05, "loss": 0.5034, "step": 1459 }, { "epoch": 2.152201953196978, "grad_norm": 2.083079233568122, "learning_rate": 3.6854683736326125e-05, "loss": 0.5972, "step": 1460 }, { "epoch": 2.1536760641238253, "grad_norm": 2.43712818537755, "learning_rate": 3.683725352656348e-05, "loss": 0.6257, "step": 1461 }, { "epoch": 2.1551501750506725, "grad_norm": 2.4131779207415565, "learning_rate": 3.681981589674206e-05, "loss": 0.5463, "step": 1462 }, { "epoch": 2.1566242859775198, "grad_norm": 2.330592134734503, "learning_rate": 3.6802370857792464e-05, "loss": 0.5802, "step": 1463 }, { "epoch": 2.158098396904367, "grad_norm": 2.431509138481951, "learning_rate": 3.678491842064995e-05, "loss": 0.7072, "step": 1464 }, { "epoch": 2.1595725078312142, "grad_norm": 2.315003148647334, "learning_rate": 3.6767458596254364e-05, "loss": 0.5862, "step": 1465 }, { "epoch": 2.1610466187580615, "grad_norm": 2.198547586099124, "learning_rate": 3.674999139555024e-05, "loss": 0.5326, "step": 1466 }, { "epoch": 2.1625207296849087, "grad_norm": 2.098607137246966, "learning_rate": 3.67325168294867e-05, "loss": 0.5296, "step": 1467 }, { "epoch": 2.163994840611756, "grad_norm": 1.944291363851364, "learning_rate": 3.67150349090175e-05, "loss": 0.6115, "step": 1468 }, { "epoch": 2.165468951538603, "grad_norm": 2.1945411711781233, "learning_rate": 3.669754564510099e-05, "loss": 0.6329, "step": 1469 }, { "epoch": 2.1669430624654504, "grad_norm": 2.1521108954832044, "learning_rate": 3.668004904870014e-05, "loss": 0.565, "step": 1470 }, { "epoch": 2.1684171733922977, "grad_norm": 1.9899254017175203, "learning_rate": 3.666254513078251e-05, "loss": 0.485, "step": 1471 }, { "epoch": 2.169891284319145, "grad_norm": 1.9226339438894875, "learning_rate": 3.664503390232024e-05, "loss": 0.5293, "step": 1472 }, { "epoch": 2.171365395245992, "grad_norm": 2.0101567101950626, "learning_rate": 3.6627515374290065e-05, "loss": 0.5915, "step": 1473 }, { "epoch": 2.1728395061728394, "grad_norm": 1.978065853831648, "learning_rate": 3.66099895576733e-05, "loss": 0.4891, "step": 1474 }, { "epoch": 2.1743136170996866, "grad_norm": 1.976584453234681, "learning_rate": 3.6592456463455804e-05, "loss": 0.5935, "step": 1475 }, { "epoch": 2.175787728026534, "grad_norm": 2.27146621657483, "learning_rate": 3.657491610262802e-05, "loss": 0.6586, "step": 1476 }, { "epoch": 2.177261838953381, "grad_norm": 2.3702518784799524, "learning_rate": 3.655736848618495e-05, "loss": 0.6818, "step": 1477 }, { "epoch": 2.1787359498802283, "grad_norm": 2.029884466256663, "learning_rate": 3.653981362512612e-05, "loss": 0.5436, "step": 1478 }, { "epoch": 2.1802100608070756, "grad_norm": 2.2594318852313835, "learning_rate": 3.652225153045562e-05, "loss": 0.6356, "step": 1479 }, { "epoch": 2.181684171733923, "grad_norm": 1.973983650414223, "learning_rate": 3.650468221318206e-05, "loss": 0.4959, "step": 1480 }, { "epoch": 2.18315828266077, "grad_norm": 2.009260790769453, "learning_rate": 3.648710568431859e-05, "loss": 0.6101, "step": 1481 }, { "epoch": 2.1846323935876173, "grad_norm": 1.9610468009487878, "learning_rate": 3.6469521954882865e-05, "loss": 0.6085, "step": 1482 }, { "epoch": 2.1861065045144645, "grad_norm": 2.2390412376869033, "learning_rate": 3.645193103589707e-05, "loss": 0.6667, "step": 1483 }, { "epoch": 2.1875806154413118, "grad_norm": 2.1359413308790858, "learning_rate": 3.6434332938387875e-05, "loss": 0.6158, "step": 1484 }, { "epoch": 2.189054726368159, "grad_norm": 1.9670005776191644, "learning_rate": 3.6416727673386484e-05, "loss": 0.5916, "step": 1485 }, { "epoch": 2.1905288372950062, "grad_norm": 2.1417542980191486, "learning_rate": 3.639911525192857e-05, "loss": 0.732, "step": 1486 }, { "epoch": 2.1920029482218535, "grad_norm": 1.9961971845907573, "learning_rate": 3.638149568505428e-05, "loss": 0.5897, "step": 1487 }, { "epoch": 2.1934770591487007, "grad_norm": 2.1004231707772343, "learning_rate": 3.636386898380827e-05, "loss": 0.6746, "step": 1488 }, { "epoch": 2.194951170075548, "grad_norm": 2.0894050494197556, "learning_rate": 3.634623515923965e-05, "loss": 0.6284, "step": 1489 }, { "epoch": 2.196425281002395, "grad_norm": 1.9713251136107017, "learning_rate": 3.632859422240199e-05, "loss": 0.5111, "step": 1490 }, { "epoch": 2.1978993919292424, "grad_norm": 1.988224470905424, "learning_rate": 3.631094618435334e-05, "loss": 0.6639, "step": 1491 }, { "epoch": 2.19937350285609, "grad_norm": 2.20157368469646, "learning_rate": 3.629329105615617e-05, "loss": 0.544, "step": 1492 }, { "epoch": 2.2008476137829374, "grad_norm": 1.9362515765110442, "learning_rate": 3.6275628848877445e-05, "loss": 0.5377, "step": 1493 }, { "epoch": 2.2023217247097846, "grad_norm": 2.1891808727642306, "learning_rate": 3.6257959573588505e-05, "loss": 0.6001, "step": 1494 }, { "epoch": 2.203795835636632, "grad_norm": 2.0878090909448432, "learning_rate": 3.624028324136517e-05, "loss": 0.6455, "step": 1495 }, { "epoch": 2.205269946563479, "grad_norm": 2.003683512840515, "learning_rate": 3.622259986328765e-05, "loss": 0.5655, "step": 1496 }, { "epoch": 2.2067440574903263, "grad_norm": 1.9854923616798894, "learning_rate": 3.620490945044059e-05, "loss": 0.5546, "step": 1497 }, { "epoch": 2.2082181684171736, "grad_norm": 2.1095132287969856, "learning_rate": 3.618721201391304e-05, "loss": 0.7687, "step": 1498 }, { "epoch": 2.209692279344021, "grad_norm": 2.185534220668236, "learning_rate": 3.616950756479846e-05, "loss": 0.6225, "step": 1499 }, { "epoch": 2.211166390270868, "grad_norm": 2.056651869812861, "learning_rate": 3.615179611419469e-05, "loss": 0.5415, "step": 1500 }, { "epoch": 2.211166390270868, "eval_bleu": 0.07328609392745158, "eval_bleu_1gram": 0.3760537712274766, "eval_bleu_2gram": 0.1506597861877169, "eval_bleu_3gram": 0.06810676407884586, "eval_bleu_4gram": 0.03396201539050454, "eval_rag_val_loss": 1.3775733505846353, "eval_rouge1": 0.3699492084035536, "eval_rouge2": 0.14635198762283277, "eval_rougeL": 0.35132086618685193, "step": 1500 }, { "epoch": 2.2126405011977153, "grad_norm": 2.498044775275212, "learning_rate": 3.613407767320398e-05, "loss": 0.5655, "step": 1501 }, { "epoch": 2.2141146121245625, "grad_norm": 1.9815123928817404, "learning_rate": 3.6116352252932936e-05, "loss": 0.6173, "step": 1502 }, { "epoch": 2.2155887230514097, "grad_norm": 2.1860319388482856, "learning_rate": 3.609861986449256e-05, "loss": 0.502, "step": 1503 }, { "epoch": 2.217062833978257, "grad_norm": 2.0451133077331645, "learning_rate": 3.6080880518998216e-05, "loss": 0.5582, "step": 1504 }, { "epoch": 2.2185369449051042, "grad_norm": 2.1638882012530067, "learning_rate": 3.606313422756962e-05, "loss": 0.7475, "step": 1505 }, { "epoch": 2.2200110558319515, "grad_norm": 2.0165203382931094, "learning_rate": 3.604538100133086e-05, "loss": 0.6701, "step": 1506 }, { "epoch": 2.2214851667587987, "grad_norm": 2.0469786639571503, "learning_rate": 3.602762085141035e-05, "loss": 0.5434, "step": 1507 }, { "epoch": 2.222959277685646, "grad_norm": 2.3550502727998293, "learning_rate": 3.600985378894086e-05, "loss": 0.6579, "step": 1508 }, { "epoch": 2.224433388612493, "grad_norm": 2.139874180455176, "learning_rate": 3.599207982505949e-05, "loss": 0.5812, "step": 1509 }, { "epoch": 2.2259074995393404, "grad_norm": 2.153859568779428, "learning_rate": 3.597429897090765e-05, "loss": 0.6886, "step": 1510 }, { "epoch": 2.2273816104661877, "grad_norm": 2.104772222522811, "learning_rate": 3.5956511237631106e-05, "loss": 0.5817, "step": 1511 }, { "epoch": 2.228855721393035, "grad_norm": 2.067220891450716, "learning_rate": 3.59387166363799e-05, "loss": 0.6084, "step": 1512 }, { "epoch": 2.230329832319882, "grad_norm": 1.9415066422538692, "learning_rate": 3.592091517830838e-05, "loss": 0.589, "step": 1513 }, { "epoch": 2.2318039432467294, "grad_norm": 2.020629584144704, "learning_rate": 3.5903106874575235e-05, "loss": 0.636, "step": 1514 }, { "epoch": 2.2332780541735766, "grad_norm": 2.143076363203829, "learning_rate": 3.5885291736343375e-05, "loss": 0.6207, "step": 1515 }, { "epoch": 2.234752165100424, "grad_norm": 2.294539217599005, "learning_rate": 3.586746977478006e-05, "loss": 0.6515, "step": 1516 }, { "epoch": 2.236226276027271, "grad_norm": 2.0208286272236666, "learning_rate": 3.58496410010568e-05, "loss": 0.6067, "step": 1517 }, { "epoch": 2.2377003869541183, "grad_norm": 2.245885583933836, "learning_rate": 3.583180542634937e-05, "loss": 0.6533, "step": 1518 }, { "epoch": 2.2391744978809656, "grad_norm": 2.1431025069406098, "learning_rate": 3.5813963061837815e-05, "loss": 0.7149, "step": 1519 }, { "epoch": 2.240648608807813, "grad_norm": 1.9540416550115465, "learning_rate": 3.5796113918706426e-05, "loss": 0.7468, "step": 1520 }, { "epoch": 2.24212271973466, "grad_norm": 1.8062984658045163, "learning_rate": 3.577825800814376e-05, "loss": 0.6133, "step": 1521 }, { "epoch": 2.2435968306615073, "grad_norm": 1.9659465629867174, "learning_rate": 3.576039534134262e-05, "loss": 0.6408, "step": 1522 }, { "epoch": 2.2450709415883545, "grad_norm": 2.129295047639557, "learning_rate": 3.57425259295e-05, "loss": 0.5593, "step": 1523 }, { "epoch": 2.2465450525152018, "grad_norm": 1.9875460541385643, "learning_rate": 3.5724649783817185e-05, "loss": 0.6647, "step": 1524 }, { "epoch": 2.248019163442049, "grad_norm": 2.1215614098357714, "learning_rate": 3.5706766915499646e-05, "loss": 0.5596, "step": 1525 }, { "epoch": 2.2494932743688962, "grad_norm": 2.022676891989229, "learning_rate": 3.568887733575706e-05, "loss": 0.6743, "step": 1526 }, { "epoch": 2.2509673852957435, "grad_norm": 1.9047117436706928, "learning_rate": 3.567098105580333e-05, "loss": 0.5473, "step": 1527 }, { "epoch": 2.2524414962225907, "grad_norm": 2.7524797356714514, "learning_rate": 3.5653078086856546e-05, "loss": 0.6065, "step": 1528 }, { "epoch": 2.253915607149438, "grad_norm": 2.112121376552387, "learning_rate": 3.563516844013901e-05, "loss": 0.7392, "step": 1529 }, { "epoch": 2.255389718076285, "grad_norm": 1.8151237964708888, "learning_rate": 3.561725212687718e-05, "loss": 0.5593, "step": 1530 }, { "epoch": 2.2568638290031324, "grad_norm": 2.2395541048730134, "learning_rate": 3.559932915830172e-05, "loss": 0.7774, "step": 1531 }, { "epoch": 2.2583379399299797, "grad_norm": 2.2036426558291713, "learning_rate": 3.558139954564746e-05, "loss": 0.6017, "step": 1532 }, { "epoch": 2.259812050856827, "grad_norm": 1.9370127649787634, "learning_rate": 3.556346330015338e-05, "loss": 0.5289, "step": 1533 }, { "epoch": 2.261286161783674, "grad_norm": 2.1255032560529514, "learning_rate": 3.554552043306264e-05, "loss": 0.6531, "step": 1534 }, { "epoch": 2.2627602727105214, "grad_norm": 2.239968933503115, "learning_rate": 3.552757095562253e-05, "loss": 0.626, "step": 1535 }, { "epoch": 2.2642343836373686, "grad_norm": 2.0736720052426683, "learning_rate": 3.55096148790845e-05, "loss": 0.6464, "step": 1536 }, { "epoch": 2.265708494564216, "grad_norm": 2.0799756521854005, "learning_rate": 3.5491652214704115e-05, "loss": 0.5696, "step": 1537 }, { "epoch": 2.267182605491063, "grad_norm": 2.0809668133503187, "learning_rate": 3.547368297374109e-05, "loss": 0.6186, "step": 1538 }, { "epoch": 2.2686567164179103, "grad_norm": 2.023681390269657, "learning_rate": 3.545570716745927e-05, "loss": 0.5908, "step": 1539 }, { "epoch": 2.2701308273447576, "grad_norm": 2.2237398157856836, "learning_rate": 3.543772480712658e-05, "loss": 0.6718, "step": 1540 }, { "epoch": 2.271604938271605, "grad_norm": 1.9923868117017933, "learning_rate": 3.5419735904015095e-05, "loss": 0.5969, "step": 1541 }, { "epoch": 2.273079049198452, "grad_norm": 2.1338573313950233, "learning_rate": 3.540174046940096e-05, "loss": 0.6362, "step": 1542 }, { "epoch": 2.2745531601252993, "grad_norm": 2.25632995717401, "learning_rate": 3.538373851456442e-05, "loss": 0.7535, "step": 1543 }, { "epoch": 2.2760272710521465, "grad_norm": 2.3072691859634804, "learning_rate": 3.536573005078981e-05, "loss": 0.6516, "step": 1544 }, { "epoch": 2.2775013819789938, "grad_norm": 2.0622824351839544, "learning_rate": 3.5347715089365576e-05, "loss": 0.587, "step": 1545 }, { "epoch": 2.278975492905841, "grad_norm": 2.2195640467550617, "learning_rate": 3.532969364158417e-05, "loss": 0.6163, "step": 1546 }, { "epoch": 2.2804496038326882, "grad_norm": 2.196348984148115, "learning_rate": 3.5311665718742184e-05, "loss": 0.6869, "step": 1547 }, { "epoch": 2.2819237147595355, "grad_norm": 2.055430462900018, "learning_rate": 3.529363133214021e-05, "loss": 0.6057, "step": 1548 }, { "epoch": 2.2833978256863827, "grad_norm": 2.243903908724015, "learning_rate": 3.527559049308291e-05, "loss": 0.6716, "step": 1549 }, { "epoch": 2.28487193661323, "grad_norm": 2.3063586367113014, "learning_rate": 3.525754321287902e-05, "loss": 0.7659, "step": 1550 }, { "epoch": 2.286346047540077, "grad_norm": 2.103468020439744, "learning_rate": 3.523948950284127e-05, "loss": 0.6187, "step": 1551 }, { "epoch": 2.287820158466925, "grad_norm": 2.4385357147974127, "learning_rate": 3.522142937428645e-05, "loss": 0.5421, "step": 1552 }, { "epoch": 2.289294269393772, "grad_norm": 1.9874623828902853, "learning_rate": 3.5203362838535355e-05, "loss": 0.6634, "step": 1553 }, { "epoch": 2.2907683803206194, "grad_norm": 1.989000890935868, "learning_rate": 3.518528990691281e-05, "loss": 0.6425, "step": 1554 }, { "epoch": 2.2922424912474666, "grad_norm": 2.1727447963586886, "learning_rate": 3.516721059074764e-05, "loss": 0.6132, "step": 1555 }, { "epoch": 2.293716602174314, "grad_norm": 2.2569784452045436, "learning_rate": 3.5149124901372677e-05, "loss": 0.7065, "step": 1556 }, { "epoch": 2.295190713101161, "grad_norm": 2.240084416127154, "learning_rate": 3.513103285012475e-05, "loss": 0.7332, "step": 1557 }, { "epoch": 2.2966648240280083, "grad_norm": 2.1553241013387114, "learning_rate": 3.511293444834466e-05, "loss": 0.7187, "step": 1558 }, { "epoch": 2.2981389349548556, "grad_norm": 1.9443436013668962, "learning_rate": 3.509482970737722e-05, "loss": 0.6451, "step": 1559 }, { "epoch": 2.299613045881703, "grad_norm": 2.1545610309135816, "learning_rate": 3.5076718638571185e-05, "loss": 0.5346, "step": 1560 }, { "epoch": 2.30108715680855, "grad_norm": 2.205655856004502, "learning_rate": 3.505860125327928e-05, "loss": 0.6164, "step": 1561 }, { "epoch": 2.3025612677353973, "grad_norm": 2.239997458797103, "learning_rate": 3.504047756285822e-05, "loss": 0.6144, "step": 1562 }, { "epoch": 2.3040353786622445, "grad_norm": 2.1571512964673643, "learning_rate": 3.5022347578668644e-05, "loss": 0.5079, "step": 1563 }, { "epoch": 2.3055094895890917, "grad_norm": 2.0847413709036258, "learning_rate": 3.5004211312075143e-05, "loss": 0.7491, "step": 1564 }, { "epoch": 2.306983600515939, "grad_norm": 2.1501527909391798, "learning_rate": 3.498606877444625e-05, "loss": 0.576, "step": 1565 }, { "epoch": 2.308457711442786, "grad_norm": 2.2687452205891354, "learning_rate": 3.4967919977154406e-05, "loss": 0.6498, "step": 1566 }, { "epoch": 2.3099318223696335, "grad_norm": 2.1961698656655457, "learning_rate": 3.4949764931576014e-05, "loss": 0.5248, "step": 1567 }, { "epoch": 2.3114059332964807, "grad_norm": 1.896305724037486, "learning_rate": 3.4931603649091374e-05, "loss": 0.5817, "step": 1568 }, { "epoch": 2.312880044223328, "grad_norm": 2.3557428355512355, "learning_rate": 3.4913436141084676e-05, "loss": 0.7334, "step": 1569 }, { "epoch": 2.314354155150175, "grad_norm": 2.1317437816487574, "learning_rate": 3.489526241894406e-05, "loss": 0.6361, "step": 1570 }, { "epoch": 2.3158282660770224, "grad_norm": 2.049282027331808, "learning_rate": 3.487708249406153e-05, "loss": 0.6305, "step": 1571 }, { "epoch": 2.3173023770038697, "grad_norm": 2.3247153897566863, "learning_rate": 3.4858896377832966e-05, "loss": 0.6709, "step": 1572 }, { "epoch": 2.318776487930717, "grad_norm": 1.9183448686868807, "learning_rate": 3.4840704081658155e-05, "loss": 0.5515, "step": 1573 }, { "epoch": 2.320250598857564, "grad_norm": 2.0277654249723707, "learning_rate": 3.482250561694075e-05, "loss": 0.6201, "step": 1574 }, { "epoch": 2.3217247097844114, "grad_norm": 1.9201814586602848, "learning_rate": 3.4804300995088264e-05, "loss": 0.5648, "step": 1575 }, { "epoch": 2.3231988207112586, "grad_norm": 2.3197722657010056, "learning_rate": 3.478609022751207e-05, "loss": 0.5776, "step": 1576 }, { "epoch": 2.324672931638106, "grad_norm": 2.2448754763317806, "learning_rate": 3.4767873325627406e-05, "loss": 0.5442, "step": 1577 }, { "epoch": 2.326147042564953, "grad_norm": 2.150835841740423, "learning_rate": 3.4749650300853343e-05, "loss": 0.6484, "step": 1578 }, { "epoch": 2.3276211534918003, "grad_norm": 2.207053935516707, "learning_rate": 3.473142116461279e-05, "loss": 0.5753, "step": 1579 }, { "epoch": 2.3290952644186476, "grad_norm": 2.1715457584374196, "learning_rate": 3.47131859283325e-05, "loss": 0.5615, "step": 1580 }, { "epoch": 2.330569375345495, "grad_norm": 2.1862161411043055, "learning_rate": 3.469494460344304e-05, "loss": 0.5544, "step": 1581 }, { "epoch": 2.332043486272342, "grad_norm": 2.2016366029699053, "learning_rate": 3.467669720137879e-05, "loss": 0.6268, "step": 1582 }, { "epoch": 2.3335175971991893, "grad_norm": 2.1833261179137455, "learning_rate": 3.465844373357794e-05, "loss": 0.6144, "step": 1583 }, { "epoch": 2.3349917081260365, "grad_norm": 2.2042021586896405, "learning_rate": 3.464018421148249e-05, "loss": 0.5622, "step": 1584 }, { "epoch": 2.3364658190528838, "grad_norm": 2.33099976288325, "learning_rate": 3.462191864653821e-05, "loss": 0.7194, "step": 1585 }, { "epoch": 2.337939929979731, "grad_norm": 2.019235379000701, "learning_rate": 3.460364705019472e-05, "loss": 0.5636, "step": 1586 }, { "epoch": 2.3394140409065782, "grad_norm": 2.139319028442589, "learning_rate": 3.458536943390536e-05, "loss": 0.5846, "step": 1587 }, { "epoch": 2.3408881518334255, "grad_norm": 2.0449850661051934, "learning_rate": 3.456708580912725e-05, "loss": 0.5976, "step": 1588 }, { "epoch": 2.3423622627602727, "grad_norm": 2.2348657882639698, "learning_rate": 3.4548796187321295e-05, "loss": 0.6132, "step": 1589 }, { "epoch": 2.34383637368712, "grad_norm": 2.112077465348989, "learning_rate": 3.453050057995217e-05, "loss": 0.5691, "step": 1590 }, { "epoch": 2.345310484613967, "grad_norm": 2.1257851216127803, "learning_rate": 3.451219899848827e-05, "loss": 0.5755, "step": 1591 }, { "epoch": 2.3467845955408144, "grad_norm": 2.2636970472540767, "learning_rate": 3.449389145440175e-05, "loss": 0.6274, "step": 1592 }, { "epoch": 2.3482587064676617, "grad_norm": 2.257388170752782, "learning_rate": 3.4475577959168505e-05, "loss": 0.6637, "step": 1593 }, { "epoch": 2.349732817394509, "grad_norm": 2.2028515152243515, "learning_rate": 3.445725852426817e-05, "loss": 0.5846, "step": 1594 }, { "epoch": 2.351206928321356, "grad_norm": 2.1858716080873175, "learning_rate": 3.443893316118407e-05, "loss": 0.6611, "step": 1595 }, { "epoch": 2.3526810392482034, "grad_norm": 2.2594135244205997, "learning_rate": 3.4420601881403284e-05, "loss": 0.6987, "step": 1596 }, { "epoch": 2.3541551501750506, "grad_norm": 1.8950840723517064, "learning_rate": 3.440226469641658e-05, "loss": 0.5321, "step": 1597 }, { "epoch": 2.355629261101898, "grad_norm": 1.9044842906573412, "learning_rate": 3.4383921617718427e-05, "loss": 0.511, "step": 1598 }, { "epoch": 2.357103372028745, "grad_norm": 2.07245016040396, "learning_rate": 3.4365572656807e-05, "loss": 0.6332, "step": 1599 }, { "epoch": 2.3585774829555923, "grad_norm": 1.8717452728713149, "learning_rate": 3.4347217825184134e-05, "loss": 0.4783, "step": 1600 }, { "epoch": 2.3600515938824396, "grad_norm": 2.0193064581131175, "learning_rate": 3.432885713435539e-05, "loss": 0.5224, "step": 1601 }, { "epoch": 2.361525704809287, "grad_norm": 1.9798855195521348, "learning_rate": 3.431049059582996e-05, "loss": 0.7776, "step": 1602 }, { "epoch": 2.362999815736134, "grad_norm": 1.8885706309310057, "learning_rate": 3.4292118221120715e-05, "loss": 0.5851, "step": 1603 }, { "epoch": 2.3644739266629813, "grad_norm": 2.048645302295026, "learning_rate": 3.42737400217442e-05, "loss": 0.6062, "step": 1604 }, { "epoch": 2.3659480375898285, "grad_norm": 2.240212770472601, "learning_rate": 3.425535600922059e-05, "loss": 0.631, "step": 1605 }, { "epoch": 2.3674221485166758, "grad_norm": 2.1015867777843735, "learning_rate": 3.423696619507369e-05, "loss": 0.7041, "step": 1606 }, { "epoch": 2.368896259443523, "grad_norm": 2.0363275302203356, "learning_rate": 3.4218570590831e-05, "loss": 0.5921, "step": 1607 }, { "epoch": 2.3703703703703702, "grad_norm": 2.2842191421213065, "learning_rate": 3.4200169208023594e-05, "loss": 0.6188, "step": 1608 }, { "epoch": 2.3718444812972175, "grad_norm": 2.0134414555819973, "learning_rate": 3.418176205818618e-05, "loss": 0.6518, "step": 1609 }, { "epoch": 2.3733185922240647, "grad_norm": 2.3847443300838234, "learning_rate": 3.4163349152857096e-05, "loss": 0.6519, "step": 1610 }, { "epoch": 2.374792703150912, "grad_norm": 2.1176408233114454, "learning_rate": 3.4144930503578286e-05, "loss": 0.6196, "step": 1611 }, { "epoch": 2.376266814077759, "grad_norm": 2.2174447747098323, "learning_rate": 3.412650612189528e-05, "loss": 0.587, "step": 1612 }, { "epoch": 2.3777409250046064, "grad_norm": 2.3932788554537807, "learning_rate": 3.4108076019357204e-05, "loss": 0.6685, "step": 1613 }, { "epoch": 2.3792150359314537, "grad_norm": 2.0806821993674105, "learning_rate": 3.4089640207516786e-05, "loss": 0.6117, "step": 1614 }, { "epoch": 2.380689146858301, "grad_norm": 1.974993056273024, "learning_rate": 3.4071198697930315e-05, "loss": 0.6035, "step": 1615 }, { "epoch": 2.382163257785148, "grad_norm": 2.2686843736473006, "learning_rate": 3.405275150215766e-05, "loss": 0.6301, "step": 1616 }, { "epoch": 2.3836373687119954, "grad_norm": 2.3221559747294496, "learning_rate": 3.403429863176226e-05, "loss": 0.635, "step": 1617 }, { "epoch": 2.3851114796388426, "grad_norm": 1.9138882304674754, "learning_rate": 3.40158400983111e-05, "loss": 0.4695, "step": 1618 }, { "epoch": 2.38658559056569, "grad_norm": 2.1386268367118424, "learning_rate": 3.399737591337471e-05, "loss": 0.6905, "step": 1619 }, { "epoch": 2.388059701492537, "grad_norm": 2.1165609523576605, "learning_rate": 3.397890608852718e-05, "loss": 0.6475, "step": 1620 }, { "epoch": 2.3895338124193843, "grad_norm": 2.3544161391505805, "learning_rate": 3.396043063534613e-05, "loss": 0.5902, "step": 1621 }, { "epoch": 2.3910079233462316, "grad_norm": 2.6073489975477826, "learning_rate": 3.39419495654127e-05, "loss": 0.6563, "step": 1622 }, { "epoch": 2.392482034273079, "grad_norm": 2.1594732767312173, "learning_rate": 3.3923462890311544e-05, "loss": 0.6567, "step": 1623 }, { "epoch": 2.393956145199926, "grad_norm": 2.050201741037959, "learning_rate": 3.3904970621630866e-05, "loss": 0.5623, "step": 1624 }, { "epoch": 2.3954302561267733, "grad_norm": 1.9199545965190692, "learning_rate": 3.388647277096234e-05, "loss": 0.5753, "step": 1625 }, { "epoch": 2.396904367053621, "grad_norm": 2.225925715890711, "learning_rate": 3.386796934990115e-05, "loss": 0.7408, "step": 1626 }, { "epoch": 2.398378477980468, "grad_norm": 2.053161518579216, "learning_rate": 3.3849460370045966e-05, "loss": 0.5472, "step": 1627 }, { "epoch": 2.3998525889073155, "grad_norm": 2.0378831982820036, "learning_rate": 3.3830945842998954e-05, "loss": 0.5904, "step": 1628 }, { "epoch": 2.4013266998341627, "grad_norm": 2.383738653579057, "learning_rate": 3.381242578036576e-05, "loss": 0.6263, "step": 1629 }, { "epoch": 2.40280081076101, "grad_norm": 2.32123164714569, "learning_rate": 3.379390019375548e-05, "loss": 0.6709, "step": 1630 }, { "epoch": 2.404274921687857, "grad_norm": 2.210554002048283, "learning_rate": 3.377536909478069e-05, "loss": 0.6343, "step": 1631 }, { "epoch": 2.4057490326147044, "grad_norm": 1.9938712988998408, "learning_rate": 3.3756832495057414e-05, "loss": 0.5383, "step": 1632 }, { "epoch": 2.4072231435415516, "grad_norm": 2.3269112514527603, "learning_rate": 3.373829040620513e-05, "loss": 0.6258, "step": 1633 }, { "epoch": 2.408697254468399, "grad_norm": 2.239519931575913, "learning_rate": 3.3719742839846743e-05, "loss": 0.5987, "step": 1634 }, { "epoch": 2.410171365395246, "grad_norm": 1.8153468170048173, "learning_rate": 3.370118980760861e-05, "loss": 0.5035, "step": 1635 }, { "epoch": 2.4116454763220934, "grad_norm": 2.1293752171253466, "learning_rate": 3.3682631321120504e-05, "loss": 0.5633, "step": 1636 }, { "epoch": 2.4131195872489406, "grad_norm": 2.2818729124997406, "learning_rate": 3.366406739201562e-05, "loss": 0.7138, "step": 1637 }, { "epoch": 2.414593698175788, "grad_norm": 2.205056067897876, "learning_rate": 3.364549803193057e-05, "loss": 0.6895, "step": 1638 }, { "epoch": 2.416067809102635, "grad_norm": 2.088062589752315, "learning_rate": 3.362692325250534e-05, "loss": 0.612, "step": 1639 }, { "epoch": 2.4175419200294823, "grad_norm": 2.18593639621884, "learning_rate": 3.360834306538336e-05, "loss": 0.7077, "step": 1640 }, { "epoch": 2.4190160309563296, "grad_norm": 2.142158460246331, "learning_rate": 3.3589757482211416e-05, "loss": 0.6836, "step": 1641 }, { "epoch": 2.420490141883177, "grad_norm": 2.1258669935806784, "learning_rate": 3.3571166514639684e-05, "loss": 0.6169, "step": 1642 }, { "epoch": 2.421964252810024, "grad_norm": 2.0015266075757623, "learning_rate": 3.3552570174321724e-05, "loss": 0.5313, "step": 1643 }, { "epoch": 2.4234383637368713, "grad_norm": 2.1737845828621585, "learning_rate": 3.353396847291446e-05, "loss": 0.5489, "step": 1644 }, { "epoch": 2.4249124746637185, "grad_norm": 2.2792978164160975, "learning_rate": 3.3515361422078165e-05, "loss": 0.7155, "step": 1645 }, { "epoch": 2.4263865855905657, "grad_norm": 2.4459790183701258, "learning_rate": 3.3496749033476485e-05, "loss": 0.6578, "step": 1646 }, { "epoch": 2.427860696517413, "grad_norm": 1.895714018983193, "learning_rate": 3.347813131877638e-05, "loss": 0.5251, "step": 1647 }, { "epoch": 2.4293348074442602, "grad_norm": 2.4280636380762544, "learning_rate": 3.34595082896482e-05, "loss": 0.6126, "step": 1648 }, { "epoch": 2.4308089183711075, "grad_norm": 1.832494095692614, "learning_rate": 3.344087995776558e-05, "loss": 0.5688, "step": 1649 }, { "epoch": 2.4322830292979547, "grad_norm": 2.3485949475572943, "learning_rate": 3.34222463348055e-05, "loss": 0.7384, "step": 1650 }, { "epoch": 2.433757140224802, "grad_norm": 2.352445620355233, "learning_rate": 3.340360743244825e-05, "loss": 0.5171, "step": 1651 }, { "epoch": 2.435231251151649, "grad_norm": 2.0465282117295693, "learning_rate": 3.338496326237743e-05, "loss": 0.6108, "step": 1652 }, { "epoch": 2.4367053620784964, "grad_norm": 2.350662008337525, "learning_rate": 3.336631383627995e-05, "loss": 0.7292, "step": 1653 }, { "epoch": 2.4381794730053437, "grad_norm": 2.4889830074320316, "learning_rate": 3.334765916584599e-05, "loss": 0.611, "step": 1654 }, { "epoch": 2.439653583932191, "grad_norm": 2.0540902897974416, "learning_rate": 3.332899926276905e-05, "loss": 0.5989, "step": 1655 }, { "epoch": 2.441127694859038, "grad_norm": 2.3478741790618947, "learning_rate": 3.33103341387459e-05, "loss": 0.6371, "step": 1656 }, { "epoch": 2.4426018057858854, "grad_norm": 2.2325188456356115, "learning_rate": 3.3291663805476566e-05, "loss": 0.6552, "step": 1657 }, { "epoch": 2.4440759167127326, "grad_norm": 1.9876015931081423, "learning_rate": 3.3272988274664364e-05, "loss": 0.5477, "step": 1658 }, { "epoch": 2.44555002763958, "grad_norm": 1.9722560363282582, "learning_rate": 3.325430755801584e-05, "loss": 0.5973, "step": 1659 }, { "epoch": 2.447024138566427, "grad_norm": 2.330023688985876, "learning_rate": 3.323562166724082e-05, "loss": 0.6883, "step": 1660 }, { "epoch": 2.4484982494932743, "grad_norm": 2.235911021419038, "learning_rate": 3.321693061405235e-05, "loss": 0.6653, "step": 1661 }, { "epoch": 2.4499723604201216, "grad_norm": 2.145861159455947, "learning_rate": 3.319823441016673e-05, "loss": 0.6645, "step": 1662 }, { "epoch": 2.451446471346969, "grad_norm": 2.2093845480533165, "learning_rate": 3.317953306730347e-05, "loss": 0.583, "step": 1663 }, { "epoch": 2.452920582273816, "grad_norm": 2.296414179863845, "learning_rate": 3.316082659718532e-05, "loss": 0.6079, "step": 1664 }, { "epoch": 2.4543946932006633, "grad_norm": 2.1006383743019272, "learning_rate": 3.314211501153823e-05, "loss": 0.5959, "step": 1665 }, { "epoch": 2.4558688041275105, "grad_norm": 2.1659680731930924, "learning_rate": 3.312339832209137e-05, "loss": 0.6335, "step": 1666 }, { "epoch": 2.4573429150543578, "grad_norm": 2.2341287117048263, "learning_rate": 3.3104676540577094e-05, "loss": 0.6466, "step": 1667 }, { "epoch": 2.458817025981205, "grad_norm": 2.371887777944933, "learning_rate": 3.308594967873095e-05, "loss": 0.6297, "step": 1668 }, { "epoch": 2.4602911369080522, "grad_norm": 2.1983724425883318, "learning_rate": 3.3067217748291695e-05, "loss": 0.6957, "step": 1669 }, { "epoch": 2.4617652478348995, "grad_norm": 2.0454686116206937, "learning_rate": 3.304848076100122e-05, "loss": 0.7221, "step": 1670 }, { "epoch": 2.4632393587617467, "grad_norm": 2.1932056675466374, "learning_rate": 3.302973872860463e-05, "loss": 0.5938, "step": 1671 }, { "epoch": 2.464713469688594, "grad_norm": 2.131387311254093, "learning_rate": 3.301099166285017e-05, "loss": 0.6065, "step": 1672 }, { "epoch": 2.466187580615441, "grad_norm": 2.176862081817402, "learning_rate": 3.299223957548923e-05, "loss": 0.5861, "step": 1673 }, { "epoch": 2.4676616915422884, "grad_norm": 2.133787274741814, "learning_rate": 3.2973482478276364e-05, "loss": 0.5891, "step": 1674 }, { "epoch": 2.4691358024691357, "grad_norm": 2.364436095998999, "learning_rate": 3.2954720382969263e-05, "loss": 0.6823, "step": 1675 }, { "epoch": 2.470609913395983, "grad_norm": 2.149446007969104, "learning_rate": 3.293595330132876e-05, "loss": 0.6006, "step": 1676 }, { "epoch": 2.47208402432283, "grad_norm": 2.108181311915247, "learning_rate": 3.291718124511879e-05, "loss": 0.6301, "step": 1677 }, { "epoch": 2.4735581352496774, "grad_norm": 2.2954971436566076, "learning_rate": 3.289840422610643e-05, "loss": 0.6337, "step": 1678 }, { "epoch": 2.4750322461765246, "grad_norm": 2.085582117103375, "learning_rate": 3.287962225606185e-05, "loss": 0.6684, "step": 1679 }, { "epoch": 2.476506357103372, "grad_norm": 2.1452233148028053, "learning_rate": 3.286083534675835e-05, "loss": 0.7254, "step": 1680 }, { "epoch": 2.477980468030219, "grad_norm": 2.301936379369666, "learning_rate": 3.284204350997229e-05, "loss": 0.6933, "step": 1681 }, { "epoch": 2.4794545789570663, "grad_norm": 2.3147209431016935, "learning_rate": 3.282324675748314e-05, "loss": 0.5412, "step": 1682 }, { "epoch": 2.4809286898839136, "grad_norm": 2.5030303708595043, "learning_rate": 3.280444510107346e-05, "loss": 0.7227, "step": 1683 }, { "epoch": 2.4824028008107613, "grad_norm": 2.321106848588293, "learning_rate": 3.278563855252885e-05, "loss": 0.6375, "step": 1684 }, { "epoch": 2.4838769117376085, "grad_norm": 2.1382431924462484, "learning_rate": 3.276682712363801e-05, "loss": 0.7572, "step": 1685 }, { "epoch": 2.4853510226644557, "grad_norm": 2.2963836559589135, "learning_rate": 3.274801082619269e-05, "loss": 0.6582, "step": 1686 }, { "epoch": 2.486825133591303, "grad_norm": 2.2310142584891812, "learning_rate": 3.2729189671987695e-05, "loss": 0.5479, "step": 1687 }, { "epoch": 2.48829924451815, "grad_norm": 2.227801276146725, "learning_rate": 3.271036367282085e-05, "loss": 0.6313, "step": 1688 }, { "epoch": 2.4897733554449974, "grad_norm": 2.1455337043627156, "learning_rate": 3.269153284049306e-05, "loss": 0.5449, "step": 1689 }, { "epoch": 2.4912474663718447, "grad_norm": 2.179961265954795, "learning_rate": 3.267269718680822e-05, "loss": 0.6635, "step": 1690 }, { "epoch": 2.492721577298692, "grad_norm": 2.16348644535693, "learning_rate": 3.265385672357327e-05, "loss": 0.6285, "step": 1691 }, { "epoch": 2.494195688225539, "grad_norm": 2.4796276679028737, "learning_rate": 3.2635011462598145e-05, "loss": 0.6517, "step": 1692 }, { "epoch": 2.4956697991523864, "grad_norm": 2.123029917602759, "learning_rate": 3.261616141569581e-05, "loss": 0.5999, "step": 1693 }, { "epoch": 2.4971439100792336, "grad_norm": 2.20900509771447, "learning_rate": 3.2597306594682225e-05, "loss": 0.5977, "step": 1694 }, { "epoch": 2.498618021006081, "grad_norm": 2.2113433573214554, "learning_rate": 3.257844701137633e-05, "loss": 0.6936, "step": 1695 }, { "epoch": 2.500092131932928, "grad_norm": 2.204863383241085, "learning_rate": 3.255958267760006e-05, "loss": 0.66, "step": 1696 }, { "epoch": 2.5015662428597754, "grad_norm": 2.0437008073118554, "learning_rate": 3.254071360517833e-05, "loss": 0.7369, "step": 1697 }, { "epoch": 2.5030403537866226, "grad_norm": 2.125127227564835, "learning_rate": 3.252183980593901e-05, "loss": 0.6225, "step": 1698 }, { "epoch": 2.50451446471347, "grad_norm": 2.0861362548319717, "learning_rate": 3.250296129171295e-05, "loss": 0.6709, "step": 1699 }, { "epoch": 2.505988575640317, "grad_norm": 2.219105544280509, "learning_rate": 3.2484078074333954e-05, "loss": 0.5589, "step": 1700 }, { "epoch": 2.5074626865671643, "grad_norm": 1.9330372635732274, "learning_rate": 3.246519016563876e-05, "loss": 0.5662, "step": 1701 }, { "epoch": 2.5089367974940116, "grad_norm": 2.015812591576387, "learning_rate": 3.244629757746706e-05, "loss": 0.5713, "step": 1702 }, { "epoch": 2.510410908420859, "grad_norm": 2.0624452063477983, "learning_rate": 3.242740032166149e-05, "loss": 0.6604, "step": 1703 }, { "epoch": 2.511885019347706, "grad_norm": 2.1514169459534553, "learning_rate": 3.240849841006758e-05, "loss": 0.6277, "step": 1704 }, { "epoch": 2.5133591302745533, "grad_norm": 2.0478264148134038, "learning_rate": 3.23895918545338e-05, "loss": 0.5294, "step": 1705 }, { "epoch": 2.5148332412014005, "grad_norm": 2.1744205185220333, "learning_rate": 3.237068066691152e-05, "loss": 0.7642, "step": 1706 }, { "epoch": 2.5163073521282477, "grad_norm": 2.0734993069484076, "learning_rate": 3.2351764859055034e-05, "loss": 0.7179, "step": 1707 }, { "epoch": 2.517781463055095, "grad_norm": 2.1217846948118217, "learning_rate": 3.233284444282152e-05, "loss": 0.5872, "step": 1708 }, { "epoch": 2.519255573981942, "grad_norm": 2.0050258669052696, "learning_rate": 3.2313919430071026e-05, "loss": 0.5716, "step": 1709 }, { "epoch": 2.5207296849087895, "grad_norm": 2.2089789784278686, "learning_rate": 3.2294989832666514e-05, "loss": 0.6652, "step": 1710 }, { "epoch": 2.5222037958356367, "grad_norm": 1.9968355297312899, "learning_rate": 3.22760556624738e-05, "loss": 0.5701, "step": 1711 }, { "epoch": 2.523677906762484, "grad_norm": 2.177322252904508, "learning_rate": 3.225711693136156e-05, "loss": 0.664, "step": 1712 }, { "epoch": 2.525152017689331, "grad_norm": 2.0371486992848724, "learning_rate": 3.223817365120136e-05, "loss": 0.4977, "step": 1713 }, { "epoch": 2.5266261286161784, "grad_norm": 2.2191865518414127, "learning_rate": 3.221922583386758e-05, "loss": 0.6661, "step": 1714 }, { "epoch": 2.5281002395430257, "grad_norm": 2.310894073429074, "learning_rate": 3.220027349123748e-05, "loss": 0.7399, "step": 1715 }, { "epoch": 2.529574350469873, "grad_norm": 2.4824243721097785, "learning_rate": 3.2181316635191125e-05, "loss": 0.6522, "step": 1716 }, { "epoch": 2.53104846139672, "grad_norm": 2.3667480262239597, "learning_rate": 3.2162355277611416e-05, "loss": 0.5327, "step": 1717 }, { "epoch": 2.5325225723235674, "grad_norm": 2.13262983385297, "learning_rate": 3.214338943038409e-05, "loss": 0.5837, "step": 1718 }, { "epoch": 2.5339966832504146, "grad_norm": 1.967766410114623, "learning_rate": 3.21244191053977e-05, "loss": 0.6297, "step": 1719 }, { "epoch": 2.535470794177262, "grad_norm": 2.119683290252712, "learning_rate": 3.2105444314543584e-05, "loss": 0.5874, "step": 1720 }, { "epoch": 2.536944905104109, "grad_norm": 2.561317263939865, "learning_rate": 3.208646506971589e-05, "loss": 0.8079, "step": 1721 }, { "epoch": 2.5384190160309563, "grad_norm": 1.987229344394884, "learning_rate": 3.206748138281157e-05, "loss": 0.6618, "step": 1722 }, { "epoch": 2.5398931269578036, "grad_norm": 2.315060718473385, "learning_rate": 3.204849326573034e-05, "loss": 0.6358, "step": 1723 }, { "epoch": 2.541367237884651, "grad_norm": 2.0442336417214646, "learning_rate": 3.20295007303747e-05, "loss": 0.5418, "step": 1724 }, { "epoch": 2.542841348811498, "grad_norm": 2.141108214699678, "learning_rate": 3.201050378864994e-05, "loss": 0.6189, "step": 1725 }, { "epoch": 2.5443154597383453, "grad_norm": 1.9772695852272635, "learning_rate": 3.1991502452464074e-05, "loss": 0.617, "step": 1726 }, { "epoch": 2.5457895706651925, "grad_norm": 2.1443705958935735, "learning_rate": 3.1972496733727906e-05, "loss": 0.733, "step": 1727 }, { "epoch": 2.5472636815920398, "grad_norm": 2.246574655548504, "learning_rate": 3.195348664435497e-05, "loss": 0.6114, "step": 1728 }, { "epoch": 2.548737792518887, "grad_norm": 2.1835841415821697, "learning_rate": 3.193447219626153e-05, "loss": 0.6341, "step": 1729 }, { "epoch": 2.5502119034457342, "grad_norm": 1.9420807125475172, "learning_rate": 3.191545340136661e-05, "loss": 0.586, "step": 1730 }, { "epoch": 2.5516860143725815, "grad_norm": 2.107186771886576, "learning_rate": 3.1896430271591937e-05, "loss": 0.7224, "step": 1731 }, { "epoch": 2.5531601252994287, "grad_norm": 2.0844709977630145, "learning_rate": 3.187740281886195e-05, "loss": 0.6494, "step": 1732 }, { "epoch": 2.554634236226276, "grad_norm": 2.6242587541606985, "learning_rate": 3.185837105510383e-05, "loss": 0.6394, "step": 1733 }, { "epoch": 2.556108347153123, "grad_norm": 2.045571064761024, "learning_rate": 3.183933499224743e-05, "loss": 0.6724, "step": 1734 }, { "epoch": 2.5575824580799704, "grad_norm": 2.127835457664384, "learning_rate": 3.18202946422253e-05, "loss": 0.6387, "step": 1735 }, { "epoch": 2.5590565690068177, "grad_norm": 2.006617090028911, "learning_rate": 3.18012500169727e-05, "loss": 0.6237, "step": 1736 }, { "epoch": 2.560530679933665, "grad_norm": 2.2790332631943073, "learning_rate": 3.178220112842753e-05, "loss": 0.6541, "step": 1737 }, { "epoch": 2.562004790860512, "grad_norm": 1.915510105091466, "learning_rate": 3.176314798853042e-05, "loss": 0.5313, "step": 1738 }, { "epoch": 2.5634789017873594, "grad_norm": 2.0177040910126443, "learning_rate": 3.17440906092246e-05, "loss": 0.6889, "step": 1739 }, { "epoch": 2.5649530127142066, "grad_norm": 2.363903524899127, "learning_rate": 3.1725029002456e-05, "loss": 0.5706, "step": 1740 }, { "epoch": 2.566427123641054, "grad_norm": 2.125533878355245, "learning_rate": 3.17059631801732e-05, "loss": 0.6667, "step": 1741 }, { "epoch": 2.567901234567901, "grad_norm": 2.187823462413169, "learning_rate": 3.168689315432741e-05, "loss": 0.5544, "step": 1742 }, { "epoch": 2.5693753454947483, "grad_norm": 2.2086328147453287, "learning_rate": 3.1667818936872465e-05, "loss": 0.6726, "step": 1743 }, { "epoch": 2.5708494564215956, "grad_norm": 2.2484070119056567, "learning_rate": 3.1648740539764844e-05, "loss": 0.6175, "step": 1744 }, { "epoch": 2.572323567348443, "grad_norm": 1.9966683055423047, "learning_rate": 3.162965797496364e-05, "loss": 0.7062, "step": 1745 }, { "epoch": 2.57379767827529, "grad_norm": 1.7774572985921437, "learning_rate": 3.161057125443056e-05, "loss": 0.4893, "step": 1746 }, { "epoch": 2.5752717892021373, "grad_norm": 2.26223269406243, "learning_rate": 3.1591480390129914e-05, "loss": 0.5384, "step": 1747 }, { "epoch": 2.5767459001289845, "grad_norm": 2.17829254250349, "learning_rate": 3.157238539402862e-05, "loss": 0.6412, "step": 1748 }, { "epoch": 2.5782200110558318, "grad_norm": 2.316938213744556, "learning_rate": 3.155328627809617e-05, "loss": 0.7302, "step": 1749 }, { "epoch": 2.579694121982679, "grad_norm": 2.1628631683750883, "learning_rate": 3.1534183054304645e-05, "loss": 0.7586, "step": 1750 }, { "epoch": 2.579694121982679, "eval_bleu": 0.06834691742112595, "eval_bleu_1gram": 0.3760155472677324, "eval_bleu_2gram": 0.14805930249151447, "eval_bleu_3gram": 0.06592366803450699, "eval_bleu_4gram": 0.03206738394250721, "eval_rag_val_loss": 1.374537369416606, "eval_rouge1": 0.3703336564210789, "eval_rouge2": 0.14348433838370003, "eval_rougeL": 0.35042993299421793, "step": 1750 }, { "epoch": 2.5811682329095262, "grad_norm": 2.351331021394919, "learning_rate": 3.1515075734628705e-05, "loss": 0.5916, "step": 1751 }, { "epoch": 2.5826423438363735, "grad_norm": 2.272995528948876, "learning_rate": 3.149596433104556e-05, "loss": 0.7131, "step": 1752 }, { "epoch": 2.5841164547632207, "grad_norm": 2.5459186654115866, "learning_rate": 3.147684885553502e-05, "loss": 0.6682, "step": 1753 }, { "epoch": 2.585590565690068, "grad_norm": 2.2925341842582045, "learning_rate": 3.145772932007939e-05, "loss": 0.6214, "step": 1754 }, { "epoch": 2.587064676616915, "grad_norm": 2.0887213143753707, "learning_rate": 3.143860573666357e-05, "loss": 0.5229, "step": 1755 }, { "epoch": 2.5885387875437624, "grad_norm": 2.0562140881959157, "learning_rate": 3.1419478117274984e-05, "loss": 0.63, "step": 1756 }, { "epoch": 2.5900128984706097, "grad_norm": 2.4543964954092576, "learning_rate": 3.140034647390357e-05, "loss": 0.8599, "step": 1757 }, { "epoch": 2.591487009397457, "grad_norm": 2.089127975955675, "learning_rate": 3.13812108185418e-05, "loss": 0.5938, "step": 1758 }, { "epoch": 2.592961120324304, "grad_norm": 2.0377202201673104, "learning_rate": 3.136207116318466e-05, "loss": 0.5804, "step": 1759 }, { "epoch": 2.5944352312511514, "grad_norm": 2.464317301830087, "learning_rate": 3.1342927519829644e-05, "loss": 0.7802, "step": 1760 }, { "epoch": 2.5959093421779986, "grad_norm": 1.9995894010589146, "learning_rate": 3.1323779900476744e-05, "loss": 0.6205, "step": 1761 }, { "epoch": 2.597383453104846, "grad_norm": 1.915490750311632, "learning_rate": 3.1304628317128446e-05, "loss": 0.6149, "step": 1762 }, { "epoch": 2.5988575640316935, "grad_norm": 1.9669885778711231, "learning_rate": 3.128547278178972e-05, "loss": 0.6832, "step": 1763 }, { "epoch": 2.600331674958541, "grad_norm": 2.066138670241954, "learning_rate": 3.126631330646802e-05, "loss": 0.5943, "step": 1764 }, { "epoch": 2.601805785885388, "grad_norm": 2.2455487195380215, "learning_rate": 3.124714990317324e-05, "loss": 0.6184, "step": 1765 }, { "epoch": 2.6032798968122353, "grad_norm": 2.1141229388273697, "learning_rate": 3.122798258391779e-05, "loss": 0.7683, "step": 1766 }, { "epoch": 2.6047540077390825, "grad_norm": 2.012576970115287, "learning_rate": 3.120881136071649e-05, "loss": 0.6413, "step": 1767 }, { "epoch": 2.6062281186659297, "grad_norm": 1.9344735658150407, "learning_rate": 3.118963624558662e-05, "loss": 0.5882, "step": 1768 }, { "epoch": 2.607702229592777, "grad_norm": 2.3697645807639645, "learning_rate": 3.11704572505479e-05, "loss": 0.6932, "step": 1769 }, { "epoch": 2.609176340519624, "grad_norm": 2.2324251857409285, "learning_rate": 3.115127438762247e-05, "loss": 0.6862, "step": 1770 }, { "epoch": 2.6106504514464715, "grad_norm": 2.206043015407369, "learning_rate": 3.113208766883494e-05, "loss": 0.6517, "step": 1771 }, { "epoch": 2.6121245623733187, "grad_norm": 2.0860911109169886, "learning_rate": 3.111289710621228e-05, "loss": 0.6182, "step": 1772 }, { "epoch": 2.613598673300166, "grad_norm": 2.3289195503436044, "learning_rate": 3.109370271178389e-05, "loss": 0.7405, "step": 1773 }, { "epoch": 2.615072784227013, "grad_norm": 2.137602516972893, "learning_rate": 3.10745044975816e-05, "loss": 0.5523, "step": 1774 }, { "epoch": 2.6165468951538604, "grad_norm": 2.291534026960173, "learning_rate": 3.1055302475639594e-05, "loss": 0.6874, "step": 1775 }, { "epoch": 2.6180210060807076, "grad_norm": 2.157039110942417, "learning_rate": 3.103609665799445e-05, "loss": 0.6415, "step": 1776 }, { "epoch": 2.619495117007555, "grad_norm": 2.149582325485988, "learning_rate": 3.1016887056685155e-05, "loss": 0.5825, "step": 1777 }, { "epoch": 2.620969227934402, "grad_norm": 2.2496035544493527, "learning_rate": 3.0997673683753024e-05, "loss": 0.6197, "step": 1778 }, { "epoch": 2.6224433388612494, "grad_norm": 2.3443538650784466, "learning_rate": 3.0978456551241786e-05, "loss": 0.594, "step": 1779 }, { "epoch": 2.6239174497880966, "grad_norm": 2.214192397265182, "learning_rate": 3.095923567119748e-05, "loss": 0.6252, "step": 1780 }, { "epoch": 2.625391560714944, "grad_norm": 2.1766395177311573, "learning_rate": 3.094001105566852e-05, "loss": 0.6178, "step": 1781 }, { "epoch": 2.626865671641791, "grad_norm": 2.0264208388981793, "learning_rate": 3.0920782716705654e-05, "loss": 0.5444, "step": 1782 }, { "epoch": 2.6283397825686383, "grad_norm": 2.111572025690276, "learning_rate": 3.0901550666361964e-05, "loss": 0.6844, "step": 1783 }, { "epoch": 2.6298138934954856, "grad_norm": 2.3090109407835646, "learning_rate": 3.088231491669287e-05, "loss": 0.7359, "step": 1784 }, { "epoch": 2.631288004422333, "grad_norm": 2.8556965164376518, "learning_rate": 3.0863075479756084e-05, "loss": 0.6419, "step": 1785 }, { "epoch": 2.63276211534918, "grad_norm": 2.080522688518485, "learning_rate": 3.084383236761166e-05, "loss": 0.5949, "step": 1786 }, { "epoch": 2.6342362262760273, "grad_norm": 2.386793869010169, "learning_rate": 3.0824585592321936e-05, "loss": 0.7346, "step": 1787 }, { "epoch": 2.6357103372028745, "grad_norm": 2.2730986352439913, "learning_rate": 3.080533516595155e-05, "loss": 0.7506, "step": 1788 }, { "epoch": 2.6371844481297217, "grad_norm": 2.0333431316619093, "learning_rate": 3.078608110056745e-05, "loss": 0.6667, "step": 1789 }, { "epoch": 2.638658559056569, "grad_norm": 2.0878575093111627, "learning_rate": 3.076682340823882e-05, "loss": 0.7238, "step": 1790 }, { "epoch": 2.6401326699834162, "grad_norm": 2.0927373230595414, "learning_rate": 3.074756210103715e-05, "loss": 0.6025, "step": 1791 }, { "epoch": 2.6416067809102635, "grad_norm": 1.9436049109766456, "learning_rate": 3.072829719103619e-05, "loss": 0.6372, "step": 1792 }, { "epoch": 2.6430808918371107, "grad_norm": 2.100597060567366, "learning_rate": 3.070902869031196e-05, "loss": 0.5068, "step": 1793 }, { "epoch": 2.644555002763958, "grad_norm": 2.2489376739170273, "learning_rate": 3.0689756610942705e-05, "loss": 0.6062, "step": 1794 }, { "epoch": 2.646029113690805, "grad_norm": 2.1239193805329952, "learning_rate": 3.067048096500893e-05, "loss": 0.5572, "step": 1795 }, { "epoch": 2.6475032246176524, "grad_norm": 2.2832066753741644, "learning_rate": 3.065120176459338e-05, "loss": 0.6937, "step": 1796 }, { "epoch": 2.6489773355444997, "grad_norm": 2.094629928387292, "learning_rate": 3.0631919021781e-05, "loss": 0.6246, "step": 1797 }, { "epoch": 2.650451446471347, "grad_norm": 2.4894818294587577, "learning_rate": 3.0612632748659e-05, "loss": 0.5984, "step": 1798 }, { "epoch": 2.651925557398194, "grad_norm": 2.097066956331137, "learning_rate": 3.0593342957316765e-05, "loss": 0.5822, "step": 1799 }, { "epoch": 2.6533996683250414, "grad_norm": 2.1614885688107086, "learning_rate": 3.05740496598459e-05, "loss": 0.643, "step": 1800 }, { "epoch": 2.6548737792518886, "grad_norm": 2.0367196021976306, "learning_rate": 3.055475286834021e-05, "loss": 0.6448, "step": 1801 }, { "epoch": 2.656347890178736, "grad_norm": 2.3183621516608794, "learning_rate": 3.053545259489569e-05, "loss": 0.6522, "step": 1802 }, { "epoch": 2.657822001105583, "grad_norm": 2.0987113132057917, "learning_rate": 3.051614885161051e-05, "loss": 0.5873, "step": 1803 }, { "epoch": 2.6592961120324303, "grad_norm": 1.816477521913213, "learning_rate": 3.0496841650585022e-05, "loss": 0.6, "step": 1804 }, { "epoch": 2.6607702229592776, "grad_norm": 2.226833286885066, "learning_rate": 3.0477531003921745e-05, "loss": 0.6763, "step": 1805 }, { "epoch": 2.662244333886125, "grad_norm": 2.2620443528108045, "learning_rate": 3.0458216923725356e-05, "loss": 0.6547, "step": 1806 }, { "epoch": 2.663718444812972, "grad_norm": 2.3451348854427136, "learning_rate": 3.043889942210268e-05, "loss": 0.554, "step": 1807 }, { "epoch": 2.6651925557398193, "grad_norm": 2.0777657564034175, "learning_rate": 3.0419578511162695e-05, "loss": 0.6153, "step": 1808 }, { "epoch": 2.6666666666666665, "grad_norm": 2.2980899841114235, "learning_rate": 3.0400254203016503e-05, "loss": 0.6066, "step": 1809 }, { "epoch": 2.6681407775935138, "grad_norm": 2.2688632316811668, "learning_rate": 3.0380926509777364e-05, "loss": 0.7922, "step": 1810 }, { "epoch": 2.669614888520361, "grad_norm": 2.084828971599424, "learning_rate": 3.0361595443560624e-05, "loss": 0.7645, "step": 1811 }, { "epoch": 2.6710889994472087, "grad_norm": 2.193082932997993, "learning_rate": 3.034226101648377e-05, "loss": 0.6528, "step": 1812 }, { "epoch": 2.672563110374056, "grad_norm": 1.9725587122003219, "learning_rate": 3.0322923240666377e-05, "loss": 0.6473, "step": 1813 }, { "epoch": 2.674037221300903, "grad_norm": 1.8804755051875448, "learning_rate": 3.030358212823014e-05, "loss": 0.5707, "step": 1814 }, { "epoch": 2.6755113322277504, "grad_norm": 2.2223437580676433, "learning_rate": 3.0284237691298823e-05, "loss": 0.6387, "step": 1815 }, { "epoch": 2.6769854431545976, "grad_norm": 2.056562489241638, "learning_rate": 3.0264889941998285e-05, "loss": 0.6099, "step": 1816 }, { "epoch": 2.678459554081445, "grad_norm": 2.325708453621988, "learning_rate": 3.0245538892456455e-05, "loss": 0.7121, "step": 1817 }, { "epoch": 2.679933665008292, "grad_norm": 2.1461253538467475, "learning_rate": 3.0226184554803357e-05, "loss": 0.6883, "step": 1818 }, { "epoch": 2.6814077759351393, "grad_norm": 2.278850286121578, "learning_rate": 3.0206826941171035e-05, "loss": 0.603, "step": 1819 }, { "epoch": 2.6828818868619866, "grad_norm": 2.518084159286967, "learning_rate": 3.0187466063693614e-05, "loss": 0.7091, "step": 1820 }, { "epoch": 2.684355997788834, "grad_norm": 2.2068363610025923, "learning_rate": 3.0168101934507266e-05, "loss": 0.6458, "step": 1821 }, { "epoch": 2.685830108715681, "grad_norm": 2.1417910330715513, "learning_rate": 3.0148734565750176e-05, "loss": 0.5986, "step": 1822 }, { "epoch": 2.6873042196425283, "grad_norm": 2.2718288327737994, "learning_rate": 3.012936396956259e-05, "loss": 0.6252, "step": 1823 }, { "epoch": 2.6887783305693755, "grad_norm": 2.4698543734227587, "learning_rate": 3.0109990158086764e-05, "loss": 0.5783, "step": 1824 }, { "epoch": 2.690252441496223, "grad_norm": 2.5179425102379285, "learning_rate": 3.0090613143466956e-05, "loss": 0.6692, "step": 1825 }, { "epoch": 2.69172655242307, "grad_norm": 2.221852886449379, "learning_rate": 3.0071232937849457e-05, "loss": 0.6577, "step": 1826 }, { "epoch": 2.6932006633499173, "grad_norm": 2.051517376254577, "learning_rate": 3.0051849553382555e-05, "loss": 0.6578, "step": 1827 }, { "epoch": 2.6946747742767645, "grad_norm": 2.203927293073226, "learning_rate": 3.0032463002216505e-05, "loss": 0.6043, "step": 1828 }, { "epoch": 2.6961488852036117, "grad_norm": 2.1286096768769376, "learning_rate": 3.001307329650357e-05, "loss": 0.6411, "step": 1829 }, { "epoch": 2.697622996130459, "grad_norm": 2.2325739504195146, "learning_rate": 2.9993680448397988e-05, "loss": 0.6285, "step": 1830 }, { "epoch": 2.699097107057306, "grad_norm": 2.5107649776532757, "learning_rate": 2.997428447005596e-05, "loss": 0.5577, "step": 1831 }, { "epoch": 2.7005712179841534, "grad_norm": 2.4947083737373226, "learning_rate": 2.9954885373635655e-05, "loss": 0.8341, "step": 1832 }, { "epoch": 2.7020453289110007, "grad_norm": 2.2307170455808754, "learning_rate": 2.9935483171297186e-05, "loss": 0.6869, "step": 1833 }, { "epoch": 2.703519439837848, "grad_norm": 2.22047421794613, "learning_rate": 2.991607787520263e-05, "loss": 0.6278, "step": 1834 }, { "epoch": 2.704993550764695, "grad_norm": 2.1273324171108343, "learning_rate": 2.989666949751599e-05, "loss": 0.6481, "step": 1835 }, { "epoch": 2.7064676616915424, "grad_norm": 2.3878088426805335, "learning_rate": 2.9877258050403212e-05, "loss": 0.7144, "step": 1836 }, { "epoch": 2.7079417726183896, "grad_norm": 2.047245603600702, "learning_rate": 2.985784354603215e-05, "loss": 0.5682, "step": 1837 }, { "epoch": 2.709415883545237, "grad_norm": 2.172311094124076, "learning_rate": 2.9838425996572583e-05, "loss": 0.5228, "step": 1838 }, { "epoch": 2.710889994472084, "grad_norm": 2.2467789376087763, "learning_rate": 2.981900541419621e-05, "loss": 0.69, "step": 1839 }, { "epoch": 2.7123641053989314, "grad_norm": 2.352883307011802, "learning_rate": 2.9799581811076605e-05, "loss": 0.6424, "step": 1840 }, { "epoch": 2.7138382163257786, "grad_norm": 2.0657162875550372, "learning_rate": 2.978015519938926e-05, "loss": 0.5996, "step": 1841 }, { "epoch": 2.715312327252626, "grad_norm": 1.9868644057930693, "learning_rate": 2.9760725591311545e-05, "loss": 0.6129, "step": 1842 }, { "epoch": 2.716786438179473, "grad_norm": 2.1828703799591778, "learning_rate": 2.9741292999022707e-05, "loss": 0.6059, "step": 1843 }, { "epoch": 2.7182605491063203, "grad_norm": 2.0278059886322852, "learning_rate": 2.9721857434703858e-05, "loss": 0.57, "step": 1844 }, { "epoch": 2.7197346600331676, "grad_norm": 2.284177599934916, "learning_rate": 2.9702418910537983e-05, "loss": 0.6301, "step": 1845 }, { "epoch": 2.721208770960015, "grad_norm": 2.0647182673278937, "learning_rate": 2.9682977438709914e-05, "loss": 0.5747, "step": 1846 }, { "epoch": 2.722682881886862, "grad_norm": 2.251005371968567, "learning_rate": 2.9663533031406344e-05, "loss": 0.7499, "step": 1847 }, { "epoch": 2.7241569928137093, "grad_norm": 2.1192336664122777, "learning_rate": 2.9644085700815777e-05, "loss": 0.6487, "step": 1848 }, { "epoch": 2.7256311037405565, "grad_norm": 2.2759894765868713, "learning_rate": 2.9624635459128585e-05, "loss": 0.5408, "step": 1849 }, { "epoch": 2.7271052146674037, "grad_norm": 2.42595953293878, "learning_rate": 2.960518231853695e-05, "loss": 0.5811, "step": 1850 }, { "epoch": 2.728579325594251, "grad_norm": 2.149011264826529, "learning_rate": 2.9585726291234872e-05, "loss": 0.6102, "step": 1851 }, { "epoch": 2.730053436521098, "grad_norm": 2.0993469358148698, "learning_rate": 2.9566267389418144e-05, "loss": 0.679, "step": 1852 }, { "epoch": 2.7315275474479455, "grad_norm": 2.0204373656686534, "learning_rate": 2.9546805625284384e-05, "loss": 0.6067, "step": 1853 }, { "epoch": 2.7330016583747927, "grad_norm": 2.144271306749844, "learning_rate": 2.9527341011033e-05, "loss": 0.7244, "step": 1854 }, { "epoch": 2.73447576930164, "grad_norm": 2.198431006058006, "learning_rate": 2.9507873558865175e-05, "loss": 0.6674, "step": 1855 }, { "epoch": 2.735949880228487, "grad_norm": 2.338401286125025, "learning_rate": 2.9488403280983873e-05, "loss": 0.7754, "step": 1856 }, { "epoch": 2.7374239911553344, "grad_norm": 2.0236070480893527, "learning_rate": 2.9468930189593845e-05, "loss": 0.5787, "step": 1857 }, { "epoch": 2.7388981020821817, "grad_norm": 2.3560568611459445, "learning_rate": 2.9449454296901603e-05, "loss": 0.6446, "step": 1858 }, { "epoch": 2.740372213009029, "grad_norm": 2.1287166406030242, "learning_rate": 2.9429975615115383e-05, "loss": 0.7712, "step": 1859 }, { "epoch": 2.741846323935876, "grad_norm": 2.1856679601595186, "learning_rate": 2.9410494156445216e-05, "loss": 0.6307, "step": 1860 }, { "epoch": 2.7433204348627234, "grad_norm": 2.3695596322844774, "learning_rate": 2.9391009933102836e-05, "loss": 0.6962, "step": 1861 }, { "epoch": 2.7447945457895706, "grad_norm": 1.998385314021425, "learning_rate": 2.9371522957301734e-05, "loss": 0.61, "step": 1862 }, { "epoch": 2.746268656716418, "grad_norm": 2.1966018962814062, "learning_rate": 2.935203324125711e-05, "loss": 0.7196, "step": 1863 }, { "epoch": 2.747742767643265, "grad_norm": 2.2202633277124506, "learning_rate": 2.9332540797185892e-05, "loss": 0.7057, "step": 1864 }, { "epoch": 2.7492168785701123, "grad_norm": 2.236821682367304, "learning_rate": 2.9313045637306714e-05, "loss": 0.6931, "step": 1865 }, { "epoch": 2.7506909894969596, "grad_norm": 2.0291652838710745, "learning_rate": 2.9293547773839917e-05, "loss": 0.687, "step": 1866 }, { "epoch": 2.752165100423807, "grad_norm": 2.0119257848183523, "learning_rate": 2.9274047219007534e-05, "loss": 0.5122, "step": 1867 }, { "epoch": 2.753639211350654, "grad_norm": 2.140988284282532, "learning_rate": 2.925454398503328e-05, "loss": 0.6856, "step": 1868 }, { "epoch": 2.7551133222775013, "grad_norm": 1.9543177510346872, "learning_rate": 2.9235038084142557e-05, "loss": 0.5934, "step": 1869 }, { "epoch": 2.7565874332043485, "grad_norm": 2.087036529075102, "learning_rate": 2.921552952856243e-05, "loss": 0.6243, "step": 1870 }, { "epoch": 2.7580615441311958, "grad_norm": 2.0243108919079518, "learning_rate": 2.919601833052163e-05, "loss": 0.6769, "step": 1871 }, { "epoch": 2.759535655058043, "grad_norm": 2.1467537125221847, "learning_rate": 2.9176504502250563e-05, "loss": 0.6329, "step": 1872 }, { "epoch": 2.7610097659848902, "grad_norm": 2.0402081658237226, "learning_rate": 2.9156988055981254e-05, "loss": 0.6104, "step": 1873 }, { "epoch": 2.7624838769117375, "grad_norm": 2.1572734228443653, "learning_rate": 2.9137469003947392e-05, "loss": 0.6399, "step": 1874 }, { "epoch": 2.7639579878385847, "grad_norm": 2.1453462313273604, "learning_rate": 2.9117947358384288e-05, "loss": 0.6426, "step": 1875 }, { "epoch": 2.765432098765432, "grad_norm": 2.0936108443291643, "learning_rate": 2.909842313152888e-05, "loss": 0.6476, "step": 1876 }, { "epoch": 2.766906209692279, "grad_norm": 2.0062521723560205, "learning_rate": 2.9078896335619732e-05, "loss": 0.696, "step": 1877 }, { "epoch": 2.7683803206191264, "grad_norm": 2.442017306342258, "learning_rate": 2.9059366982897007e-05, "loss": 0.6239, "step": 1878 }, { "epoch": 2.7698544315459737, "grad_norm": 2.436861883902541, "learning_rate": 2.9039835085602473e-05, "loss": 0.6545, "step": 1879 }, { "epoch": 2.771328542472821, "grad_norm": 2.325442618620713, "learning_rate": 2.9020300655979503e-05, "loss": 0.8235, "step": 1880 }, { "epoch": 2.772802653399668, "grad_norm": 2.192267970962115, "learning_rate": 2.9000763706273036e-05, "loss": 0.7069, "step": 1881 }, { "epoch": 2.7742767643265154, "grad_norm": 2.30415291486043, "learning_rate": 2.8981224248729628e-05, "loss": 0.6837, "step": 1882 }, { "epoch": 2.7757508752533626, "grad_norm": 2.1039535364162485, "learning_rate": 2.896168229559737e-05, "loss": 0.5937, "step": 1883 }, { "epoch": 2.77722498618021, "grad_norm": 1.938549495833581, "learning_rate": 2.8942137859125928e-05, "loss": 0.6155, "step": 1884 }, { "epoch": 2.778699097107057, "grad_norm": 2.2794382922603225, "learning_rate": 2.8922590951566536e-05, "loss": 0.6417, "step": 1885 }, { "epoch": 2.7801732080339043, "grad_norm": 2.3755253662476505, "learning_rate": 2.8903041585171963e-05, "loss": 0.5813, "step": 1886 }, { "epoch": 2.7816473189607516, "grad_norm": 2.0964515679203615, "learning_rate": 2.8883489772196525e-05, "loss": 0.6338, "step": 1887 }, { "epoch": 2.783121429887599, "grad_norm": 2.145111061190947, "learning_rate": 2.886393552489608e-05, "loss": 0.7253, "step": 1888 }, { "epoch": 2.784595540814446, "grad_norm": 2.2502619272998152, "learning_rate": 2.8844378855527998e-05, "loss": 0.6488, "step": 1889 }, { "epoch": 2.7860696517412933, "grad_norm": 2.0359897188178127, "learning_rate": 2.8824819776351176e-05, "loss": 0.6624, "step": 1890 }, { "epoch": 2.7875437626681405, "grad_norm": 1.882081392411211, "learning_rate": 2.8805258299626015e-05, "loss": 0.5531, "step": 1891 }, { "epoch": 2.7890178735949878, "grad_norm": 2.3170983243979912, "learning_rate": 2.878569443761442e-05, "loss": 0.671, "step": 1892 }, { "epoch": 2.790491984521835, "grad_norm": 2.2077958394560584, "learning_rate": 2.8766128202579797e-05, "loss": 0.6399, "step": 1893 }, { "epoch": 2.7919660954486822, "grad_norm": 2.0946598638128244, "learning_rate": 2.874655960678704e-05, "loss": 0.6702, "step": 1894 }, { "epoch": 2.79344020637553, "grad_norm": 2.1396613632083334, "learning_rate": 2.87269886625025e-05, "loss": 0.5376, "step": 1895 }, { "epoch": 2.794914317302377, "grad_norm": 2.109992494976563, "learning_rate": 2.870741538199405e-05, "loss": 0.6801, "step": 1896 }, { "epoch": 2.7963884282292244, "grad_norm": 2.287254166030884, "learning_rate": 2.8687839777530977e-05, "loss": 0.648, "step": 1897 }, { "epoch": 2.7978625391560716, "grad_norm": 1.9906605572122353, "learning_rate": 2.8668261861384045e-05, "loss": 0.6134, "step": 1898 }, { "epoch": 2.799336650082919, "grad_norm": 2.0784742018929583, "learning_rate": 2.8648681645825472e-05, "loss": 0.5988, "step": 1899 }, { "epoch": 2.800810761009766, "grad_norm": 2.3234376764970825, "learning_rate": 2.8629099143128907e-05, "loss": 0.7517, "step": 1900 }, { "epoch": 2.8022848719366134, "grad_norm": 2.020283129272805, "learning_rate": 2.860951436556944e-05, "loss": 0.5043, "step": 1901 }, { "epoch": 2.8037589828634606, "grad_norm": 2.3611011380253015, "learning_rate": 2.8589927325423576e-05, "loss": 0.688, "step": 1902 }, { "epoch": 2.805233093790308, "grad_norm": 2.1224496908706927, "learning_rate": 2.8570338034969264e-05, "loss": 0.5533, "step": 1903 }, { "epoch": 2.806707204717155, "grad_norm": 2.107883406366202, "learning_rate": 2.855074650648583e-05, "loss": 0.5732, "step": 1904 }, { "epoch": 2.8081813156440023, "grad_norm": 2.21134130881321, "learning_rate": 2.853115275225403e-05, "loss": 0.7403, "step": 1905 }, { "epoch": 2.8096554265708495, "grad_norm": 2.4227570927017874, "learning_rate": 2.8511556784556e-05, "loss": 0.6363, "step": 1906 }, { "epoch": 2.811129537497697, "grad_norm": 2.588630771958228, "learning_rate": 2.8491958615675262e-05, "loss": 0.7016, "step": 1907 }, { "epoch": 2.812603648424544, "grad_norm": 2.1581560638958366, "learning_rate": 2.8472358257896732e-05, "loss": 0.6325, "step": 1908 }, { "epoch": 2.8140777593513913, "grad_norm": 2.1156912721427474, "learning_rate": 2.8452755723506687e-05, "loss": 0.7003, "step": 1909 }, { "epoch": 2.8155518702782385, "grad_norm": 2.0859906479140347, "learning_rate": 2.843315102479276e-05, "loss": 0.5632, "step": 1910 }, { "epoch": 2.8170259812050857, "grad_norm": 2.121160629089224, "learning_rate": 2.841354417404397e-05, "loss": 0.565, "step": 1911 }, { "epoch": 2.818500092131933, "grad_norm": 2.2528888382864745, "learning_rate": 2.8393935183550662e-05, "loss": 0.6583, "step": 1912 }, { "epoch": 2.81997420305878, "grad_norm": 2.2373791752590924, "learning_rate": 2.8374324065604517e-05, "loss": 0.5692, "step": 1913 }, { "epoch": 2.8214483139856275, "grad_norm": 2.140746287230293, "learning_rate": 2.8354710832498576e-05, "loss": 0.6709, "step": 1914 }, { "epoch": 2.8229224249124747, "grad_norm": 2.3280701086754676, "learning_rate": 2.833509549652717e-05, "loss": 0.6899, "step": 1915 }, { "epoch": 2.824396535839322, "grad_norm": 2.1585717348183473, "learning_rate": 2.831547806998598e-05, "loss": 0.67, "step": 1916 }, { "epoch": 2.825870646766169, "grad_norm": 2.23852452525562, "learning_rate": 2.8295858565171983e-05, "loss": 0.7079, "step": 1917 }, { "epoch": 2.8273447576930164, "grad_norm": 2.233521732051239, "learning_rate": 2.8276236994383453e-05, "loss": 0.6636, "step": 1918 }, { "epoch": 2.8288188686198636, "grad_norm": 2.11500431123479, "learning_rate": 2.825661336991998e-05, "loss": 0.6332, "step": 1919 }, { "epoch": 2.830292979546711, "grad_norm": 2.0947033434027817, "learning_rate": 2.8236987704082417e-05, "loss": 0.6508, "step": 1920 }, { "epoch": 2.831767090473558, "grad_norm": 2.0637531519403076, "learning_rate": 2.8217360009172922e-05, "loss": 0.5787, "step": 1921 }, { "epoch": 2.8332412014004054, "grad_norm": 2.258963426852011, "learning_rate": 2.8197730297494896e-05, "loss": 0.5667, "step": 1922 }, { "epoch": 2.8347153123272526, "grad_norm": 2.134300858423282, "learning_rate": 2.8178098581353018e-05, "loss": 0.5688, "step": 1923 }, { "epoch": 2.8361894232541, "grad_norm": 2.1147643013423347, "learning_rate": 2.8158464873053237e-05, "loss": 0.6252, "step": 1924 }, { "epoch": 2.837663534180947, "grad_norm": 2.0524779067900654, "learning_rate": 2.8138829184902727e-05, "loss": 0.7134, "step": 1925 }, { "epoch": 2.8391376451077943, "grad_norm": 2.060921845510402, "learning_rate": 2.811919152920991e-05, "loss": 0.5551, "step": 1926 }, { "epoch": 2.8406117560346416, "grad_norm": 2.0810290245074157, "learning_rate": 2.8099551918284468e-05, "loss": 0.6152, "step": 1927 }, { "epoch": 2.842085866961489, "grad_norm": 2.142094907978542, "learning_rate": 2.8079910364437263e-05, "loss": 0.6795, "step": 1928 }, { "epoch": 2.843559977888336, "grad_norm": 1.9847835810989694, "learning_rate": 2.8060266879980408e-05, "loss": 0.7064, "step": 1929 }, { "epoch": 2.8450340888151833, "grad_norm": 2.1450979460292907, "learning_rate": 2.8040621477227214e-05, "loss": 0.6808, "step": 1930 }, { "epoch": 2.8465081997420305, "grad_norm": 1.8742404670935933, "learning_rate": 2.8020974168492197e-05, "loss": 0.6036, "step": 1931 }, { "epoch": 2.8479823106688777, "grad_norm": 1.9360766566327574, "learning_rate": 2.8001324966091076e-05, "loss": 0.5385, "step": 1932 }, { "epoch": 2.849456421595725, "grad_norm": 2.2388069640579675, "learning_rate": 2.7981673882340726e-05, "loss": 0.698, "step": 1933 }, { "epoch": 2.8509305325225722, "grad_norm": 1.944666621215492, "learning_rate": 2.796202092955924e-05, "loss": 0.5301, "step": 1934 }, { "epoch": 2.8524046434494195, "grad_norm": 2.38002115817242, "learning_rate": 2.7942366120065872e-05, "loss": 0.6177, "step": 1935 }, { "epoch": 2.8538787543762667, "grad_norm": 2.1305207736102147, "learning_rate": 2.792270946618102e-05, "loss": 0.614, "step": 1936 }, { "epoch": 2.855352865303114, "grad_norm": 2.3161634346873843, "learning_rate": 2.790305098022626e-05, "loss": 0.7474, "step": 1937 }, { "epoch": 2.856826976229961, "grad_norm": 2.268813632041212, "learning_rate": 2.78833906745243e-05, "loss": 0.6254, "step": 1938 }, { "epoch": 2.8583010871568084, "grad_norm": 2.206465116825457, "learning_rate": 2.7863728561399016e-05, "loss": 0.6168, "step": 1939 }, { "epoch": 2.8597751980836557, "grad_norm": 2.0168220454907493, "learning_rate": 2.7844064653175378e-05, "loss": 0.5642, "step": 1940 }, { "epoch": 2.861249309010503, "grad_norm": 2.0904155768861212, "learning_rate": 2.7824398962179503e-05, "loss": 0.6233, "step": 1941 }, { "epoch": 2.86272341993735, "grad_norm": 2.0996159611088565, "learning_rate": 2.780473150073864e-05, "loss": 0.603, "step": 1942 }, { "epoch": 2.8641975308641974, "grad_norm": 2.1601858292222134, "learning_rate": 2.7785062281181124e-05, "loss": 0.6024, "step": 1943 }, { "epoch": 2.8656716417910446, "grad_norm": 2.4228191873277796, "learning_rate": 2.7765391315836396e-05, "loss": 0.6945, "step": 1944 }, { "epoch": 2.8671457527178923, "grad_norm": 2.2715344411198357, "learning_rate": 2.7745718617034998e-05, "loss": 0.5343, "step": 1945 }, { "epoch": 2.8686198636447395, "grad_norm": 2.1230747252658113, "learning_rate": 2.7726044197108557e-05, "loss": 0.7374, "step": 1946 }, { "epoch": 2.8700939745715868, "grad_norm": 2.1665644866128724, "learning_rate": 2.7706368068389778e-05, "loss": 0.5082, "step": 1947 }, { "epoch": 2.871568085498434, "grad_norm": 2.2453341319104645, "learning_rate": 2.7686690243212432e-05, "loss": 0.7627, "step": 1948 }, { "epoch": 2.8730421964252812, "grad_norm": 2.182396193544922, "learning_rate": 2.7667010733911354e-05, "loss": 0.6647, "step": 1949 }, { "epoch": 2.8745163073521285, "grad_norm": 2.0718698078625555, "learning_rate": 2.7647329552822455e-05, "loss": 0.6004, "step": 1950 }, { "epoch": 2.8759904182789757, "grad_norm": 2.045594724995805, "learning_rate": 2.762764671228267e-05, "loss": 0.5601, "step": 1951 }, { "epoch": 2.877464529205823, "grad_norm": 2.3424458244841673, "learning_rate": 2.760796222462998e-05, "loss": 0.7296, "step": 1952 }, { "epoch": 2.87893864013267, "grad_norm": 2.109096480454886, "learning_rate": 2.7588276102203398e-05, "loss": 0.5825, "step": 1953 }, { "epoch": 2.8804127510595174, "grad_norm": 2.3112676275064885, "learning_rate": 2.7568588357342973e-05, "loss": 0.6503, "step": 1954 }, { "epoch": 2.8818868619863647, "grad_norm": 2.3554424328701877, "learning_rate": 2.754889900238975e-05, "loss": 0.6564, "step": 1955 }, { "epoch": 2.883360972913212, "grad_norm": 2.306847132677517, "learning_rate": 2.7529208049685807e-05, "loss": 0.6425, "step": 1956 }, { "epoch": 2.884835083840059, "grad_norm": 2.3749034259636344, "learning_rate": 2.7509515511574208e-05, "loss": 0.6401, "step": 1957 }, { "epoch": 2.8863091947669064, "grad_norm": 2.2279339765974098, "learning_rate": 2.748982140039902e-05, "loss": 0.6343, "step": 1958 }, { "epoch": 2.8877833056937536, "grad_norm": 2.0316120705324274, "learning_rate": 2.747012572850528e-05, "loss": 0.5595, "step": 1959 }, { "epoch": 2.889257416620601, "grad_norm": 2.081922918367753, "learning_rate": 2.7450428508239024e-05, "loss": 0.6844, "step": 1960 }, { "epoch": 2.890731527547448, "grad_norm": 2.0413945334587704, "learning_rate": 2.743072975194723e-05, "loss": 0.5594, "step": 1961 }, { "epoch": 2.8922056384742953, "grad_norm": 2.2753139237340605, "learning_rate": 2.741102947197789e-05, "loss": 0.5982, "step": 1962 }, { "epoch": 2.8936797494011426, "grad_norm": 2.2644042837137803, "learning_rate": 2.7391327680679895e-05, "loss": 0.6461, "step": 1963 }, { "epoch": 2.89515386032799, "grad_norm": 2.025450305556051, "learning_rate": 2.7371624390403116e-05, "loss": 0.6558, "step": 1964 }, { "epoch": 2.896627971254837, "grad_norm": 2.1558819885229106, "learning_rate": 2.735191961349835e-05, "loss": 0.6998, "step": 1965 }, { "epoch": 2.8981020821816843, "grad_norm": 2.091146644094903, "learning_rate": 2.7332213362317328e-05, "loss": 0.6212, "step": 1966 }, { "epoch": 2.8995761931085315, "grad_norm": 2.121236497771991, "learning_rate": 2.7312505649212722e-05, "loss": 0.512, "step": 1967 }, { "epoch": 2.901050304035379, "grad_norm": 2.2541621177002233, "learning_rate": 2.7292796486538093e-05, "loss": 0.7145, "step": 1968 }, { "epoch": 2.902524414962226, "grad_norm": 2.08696055951799, "learning_rate": 2.727308588664793e-05, "loss": 0.484, "step": 1969 }, { "epoch": 2.9039985258890733, "grad_norm": 2.5390053141096227, "learning_rate": 2.725337386189761e-05, "loss": 0.6913, "step": 1970 }, { "epoch": 2.9054726368159205, "grad_norm": 2.4722251094367875, "learning_rate": 2.723366042464342e-05, "loss": 0.753, "step": 1971 }, { "epoch": 2.9069467477427677, "grad_norm": 2.198137630787544, "learning_rate": 2.7213945587242508e-05, "loss": 0.7816, "step": 1972 }, { "epoch": 2.908420858669615, "grad_norm": 2.1185166807186473, "learning_rate": 2.7194229362052924e-05, "loss": 0.6491, "step": 1973 }, { "epoch": 2.909894969596462, "grad_norm": 2.5757180740434977, "learning_rate": 2.7174511761433585e-05, "loss": 0.8917, "step": 1974 }, { "epoch": 2.9113690805233094, "grad_norm": 2.2020365955177192, "learning_rate": 2.715479279774425e-05, "loss": 0.7001, "step": 1975 }, { "epoch": 2.9128431914501567, "grad_norm": 2.011873524461758, "learning_rate": 2.7135072483345552e-05, "loss": 0.5666, "step": 1976 }, { "epoch": 2.914317302377004, "grad_norm": 2.135964213317946, "learning_rate": 2.7115350830598958e-05, "loss": 0.5946, "step": 1977 }, { "epoch": 2.915791413303851, "grad_norm": 2.4759331032770406, "learning_rate": 2.709562785186679e-05, "loss": 0.706, "step": 1978 }, { "epoch": 2.9172655242306984, "grad_norm": 2.5921460443254936, "learning_rate": 2.7075903559512178e-05, "loss": 0.6987, "step": 1979 }, { "epoch": 2.9187396351575456, "grad_norm": 2.2051810552261064, "learning_rate": 2.7056177965899097e-05, "loss": 0.6552, "step": 1980 }, { "epoch": 2.920213746084393, "grad_norm": 2.147276519586754, "learning_rate": 2.7036451083392332e-05, "loss": 0.6907, "step": 1981 }, { "epoch": 2.92168785701124, "grad_norm": 1.987548873108532, "learning_rate": 2.701672292435747e-05, "loss": 0.6053, "step": 1982 }, { "epoch": 2.9231619679380874, "grad_norm": 2.113666379460029, "learning_rate": 2.69969935011609e-05, "loss": 0.6527, "step": 1983 }, { "epoch": 2.9246360788649346, "grad_norm": 2.0353928792676936, "learning_rate": 2.6977262826169807e-05, "loss": 0.6108, "step": 1984 }, { "epoch": 2.926110189791782, "grad_norm": 2.421165706996798, "learning_rate": 2.695753091175216e-05, "loss": 0.6197, "step": 1985 }, { "epoch": 2.927584300718629, "grad_norm": 2.1099050350423294, "learning_rate": 2.6937797770276702e-05, "loss": 0.7125, "step": 1986 }, { "epoch": 2.9290584116454763, "grad_norm": 2.536737592443302, "learning_rate": 2.6918063414112942e-05, "loss": 0.5689, "step": 1987 }, { "epoch": 2.9305325225723236, "grad_norm": 2.2658371398527164, "learning_rate": 2.6898327855631155e-05, "loss": 0.7219, "step": 1988 }, { "epoch": 2.932006633499171, "grad_norm": 2.1103320176024014, "learning_rate": 2.6878591107202383e-05, "loss": 0.5938, "step": 1989 }, { "epoch": 2.933480744426018, "grad_norm": 2.0949449687640147, "learning_rate": 2.685885318119839e-05, "loss": 0.6525, "step": 1990 }, { "epoch": 2.9349548553528653, "grad_norm": 2.4738514017118054, "learning_rate": 2.683911408999169e-05, "loss": 0.7414, "step": 1991 }, { "epoch": 2.9364289662797125, "grad_norm": 2.190333901299555, "learning_rate": 2.6819373845955527e-05, "loss": 0.7147, "step": 1992 }, { "epoch": 2.9379030772065597, "grad_norm": 2.42088149827676, "learning_rate": 2.6799632461463862e-05, "loss": 0.6832, "step": 1993 }, { "epoch": 2.939377188133407, "grad_norm": 2.2355290347035175, "learning_rate": 2.6779889948891384e-05, "loss": 0.7355, "step": 1994 }, { "epoch": 2.940851299060254, "grad_norm": 2.355956980650906, "learning_rate": 2.676014632061347e-05, "loss": 0.6064, "step": 1995 }, { "epoch": 2.9423254099871015, "grad_norm": 2.017998414332936, "learning_rate": 2.674040158900622e-05, "loss": 0.6479, "step": 1996 }, { "epoch": 2.9437995209139487, "grad_norm": 2.587578382548776, "learning_rate": 2.6720655766446412e-05, "loss": 0.7481, "step": 1997 }, { "epoch": 2.945273631840796, "grad_norm": 2.3905823149018084, "learning_rate": 2.6700908865311497e-05, "loss": 0.7701, "step": 1998 }, { "epoch": 2.946747742767643, "grad_norm": 1.8651183087252112, "learning_rate": 2.6681160897979623e-05, "loss": 0.5676, "step": 1999 }, { "epoch": 2.9482218536944904, "grad_norm": 1.855751931515428, "learning_rate": 2.6661411876829596e-05, "loss": 0.6221, "step": 2000 }, { "epoch": 2.9482218536944904, "eval_bleu": 0.06964037147652799, "eval_bleu_1gram": 0.3765823141274865, "eval_bleu_2gram": 0.14746436284176845, "eval_bleu_3gram": 0.06427355198308782, "eval_bleu_4gram": 0.03179043857075316, "eval_rag_val_loss": 1.3536509342731968, "eval_rouge1": 0.37103110340577655, "eval_rouge2": 0.14232453983981508, "eval_rougeL": 0.3515595073725018, "step": 2000 } ], "logging_steps": 1, "max_steps": 4068, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": true, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }