| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 435, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.011510791366906475, | |
| "grad_norm": 1.295614242553711, | |
| "learning_rate": 0.0, | |
| "loss": 2.0611, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02302158273381295, | |
| "grad_norm": 1.3193614482879639, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 2.2646, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.034532374100719423, | |
| "grad_norm": 1.3311564922332764, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 2.302, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0460431654676259, | |
| "grad_norm": 1.3216726779937744, | |
| "learning_rate": 1.5e-06, | |
| "loss": 2.2383, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.05755395683453238, | |
| "grad_norm": 1.3751393556594849, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 2.2098, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.06906474820143885, | |
| "grad_norm": 1.3023695945739746, | |
| "learning_rate": 2.5e-06, | |
| "loss": 2.1331, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.08057553956834532, | |
| "grad_norm": 1.271135926246643, | |
| "learning_rate": 3e-06, | |
| "loss": 2.2544, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0920863309352518, | |
| "grad_norm": 1.3196772336959839, | |
| "learning_rate": 3.5e-06, | |
| "loss": 2.3242, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.10359712230215827, | |
| "grad_norm": 1.251950740814209, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.1899, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.11510791366906475, | |
| "grad_norm": 1.2269312143325806, | |
| "learning_rate": 4.5e-06, | |
| "loss": 2.1637, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.12661870503597122, | |
| "grad_norm": 1.3193552494049072, | |
| "learning_rate": 5e-06, | |
| "loss": 2.2172, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1381294964028777, | |
| "grad_norm": 1.2646515369415283, | |
| "learning_rate": 4.988235294117647e-06, | |
| "loss": 2.1069, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.14964028776978416, | |
| "grad_norm": 1.301425576210022, | |
| "learning_rate": 4.976470588235294e-06, | |
| "loss": 2.2276, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.16115107913669063, | |
| "grad_norm": 1.1908143758773804, | |
| "learning_rate": 4.964705882352942e-06, | |
| "loss": 2.1751, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.17266187050359713, | |
| "grad_norm": 1.1371599435806274, | |
| "learning_rate": 4.9529411764705885e-06, | |
| "loss": 2.2119, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.1841726618705036, | |
| "grad_norm": 1.1369460821151733, | |
| "learning_rate": 4.941176470588236e-06, | |
| "loss": 2.1352, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.19568345323741007, | |
| "grad_norm": 1.2164900302886963, | |
| "learning_rate": 4.929411764705882e-06, | |
| "loss": 2.1389, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.20719424460431654, | |
| "grad_norm": 1.1954495906829834, | |
| "learning_rate": 4.91764705882353e-06, | |
| "loss": 2.1272, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.218705035971223, | |
| "grad_norm": 1.077544927597046, | |
| "learning_rate": 4.905882352941177e-06, | |
| "loss": 2.2048, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.2302158273381295, | |
| "grad_norm": 1.064244270324707, | |
| "learning_rate": 4.894117647058824e-06, | |
| "loss": 2.1051, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.24172661870503598, | |
| "grad_norm": 1.045543909072876, | |
| "learning_rate": 4.882352941176471e-06, | |
| "loss": 2.1624, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.25323741007194245, | |
| "grad_norm": 1.1219857931137085, | |
| "learning_rate": 4.870588235294118e-06, | |
| "loss": 2.0821, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.2647482014388489, | |
| "grad_norm": 1.0319204330444336, | |
| "learning_rate": 4.858823529411766e-06, | |
| "loss": 2.2232, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.2762589928057554, | |
| "grad_norm": 0.9106553792953491, | |
| "learning_rate": 4.847058823529412e-06, | |
| "loss": 2.0455, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.28776978417266186, | |
| "grad_norm": 0.9222265481948853, | |
| "learning_rate": 4.835294117647059e-06, | |
| "loss": 2.0065, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.2992805755395683, | |
| "grad_norm": 0.9210799336433411, | |
| "learning_rate": 4.823529411764706e-06, | |
| "loss": 2.2051, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.3107913669064748, | |
| "grad_norm": 0.95929354429245, | |
| "learning_rate": 4.811764705882354e-06, | |
| "loss": 2.293, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.32230215827338127, | |
| "grad_norm": 0.9271309971809387, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 2.1646, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.3338129496402878, | |
| "grad_norm": 0.8561931848526001, | |
| "learning_rate": 4.7882352941176475e-06, | |
| "loss": 2.151, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.34532374100719426, | |
| "grad_norm": 0.8737633228302002, | |
| "learning_rate": 4.776470588235294e-06, | |
| "loss": 2.2063, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.35683453237410073, | |
| "grad_norm": 0.8623224496841431, | |
| "learning_rate": 4.764705882352941e-06, | |
| "loss": 2.0332, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.3683453237410072, | |
| "grad_norm": 0.8182441592216492, | |
| "learning_rate": 4.752941176470589e-06, | |
| "loss": 2.1344, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.37985611510791367, | |
| "grad_norm": 0.8933826088905334, | |
| "learning_rate": 4.741176470588236e-06, | |
| "loss": 2.2029, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.39136690647482014, | |
| "grad_norm": 0.870568037033081, | |
| "learning_rate": 4.729411764705883e-06, | |
| "loss": 2.2155, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.4028776978417266, | |
| "grad_norm": 0.8473733067512512, | |
| "learning_rate": 4.717647058823529e-06, | |
| "loss": 2.1522, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.4143884892086331, | |
| "grad_norm": 0.8416939377784729, | |
| "learning_rate": 4.705882352941177e-06, | |
| "loss": 2.1937, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.42589928057553955, | |
| "grad_norm": 0.7861284613609314, | |
| "learning_rate": 4.694117647058824e-06, | |
| "loss": 2.1359, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.437410071942446, | |
| "grad_norm": 0.7433235049247742, | |
| "learning_rate": 4.682352941176471e-06, | |
| "loss": 2.1636, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.4489208633093525, | |
| "grad_norm": 0.7175543308258057, | |
| "learning_rate": 4.670588235294118e-06, | |
| "loss": 1.9667, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.460431654676259, | |
| "grad_norm": 0.7294153571128845, | |
| "learning_rate": 4.658823529411765e-06, | |
| "loss": 2.1362, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.4719424460431655, | |
| "grad_norm": 0.7124460935592651, | |
| "learning_rate": 4.647058823529412e-06, | |
| "loss": 2.0463, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.48345323741007196, | |
| "grad_norm": 0.7458817362785339, | |
| "learning_rate": 4.635294117647059e-06, | |
| "loss": 2.18, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.4949640287769784, | |
| "grad_norm": 0.6650211215019226, | |
| "learning_rate": 4.623529411764706e-06, | |
| "loss": 2.0119, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.5064748201438849, | |
| "grad_norm": 0.7400155067443848, | |
| "learning_rate": 4.611764705882353e-06, | |
| "loss": 2.0537, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.5179856115107914, | |
| "grad_norm": 0.6916301250457764, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 2.0584, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.5294964028776978, | |
| "grad_norm": 0.6942080855369568, | |
| "learning_rate": 4.588235294117647e-06, | |
| "loss": 2.1126, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.5410071942446043, | |
| "grad_norm": 0.694041907787323, | |
| "learning_rate": 4.5764705882352945e-06, | |
| "loss": 2.1958, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.5525179856115108, | |
| "grad_norm": 0.6456537842750549, | |
| "learning_rate": 4.564705882352941e-06, | |
| "loss": 2.212, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.5640287769784172, | |
| "grad_norm": 0.663451075553894, | |
| "learning_rate": 4.552941176470589e-06, | |
| "loss": 2.1401, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.5755395683453237, | |
| "grad_norm": 0.7182263135910034, | |
| "learning_rate": 4.541176470588236e-06, | |
| "loss": 2.137, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5870503597122302, | |
| "grad_norm": 0.6442857384681702, | |
| "learning_rate": 4.529411764705883e-06, | |
| "loss": 2.0755, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.5985611510791367, | |
| "grad_norm": 0.6306608319282532, | |
| "learning_rate": 4.51764705882353e-06, | |
| "loss": 2.0456, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.6100719424460431, | |
| "grad_norm": 0.628402054309845, | |
| "learning_rate": 4.505882352941176e-06, | |
| "loss": 2.0548, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.6215827338129496, | |
| "grad_norm": 0.6245840191841125, | |
| "learning_rate": 4.494117647058824e-06, | |
| "loss": 2.176, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.6330935251798561, | |
| "grad_norm": 0.5973242521286011, | |
| "learning_rate": 4.482352941176471e-06, | |
| "loss": 2.004, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.6446043165467625, | |
| "grad_norm": 0.6911327242851257, | |
| "learning_rate": 4.4705882352941184e-06, | |
| "loss": 2.0991, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.6561151079136691, | |
| "grad_norm": 0.6195106506347656, | |
| "learning_rate": 4.458823529411765e-06, | |
| "loss": 2.0232, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.6676258992805756, | |
| "grad_norm": 0.5956724882125854, | |
| "learning_rate": 4.447058823529412e-06, | |
| "loss": 2.1384, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.679136690647482, | |
| "grad_norm": 0.6167479753494263, | |
| "learning_rate": 4.435294117647059e-06, | |
| "loss": 2.0524, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.6906474820143885, | |
| "grad_norm": 0.5958898663520813, | |
| "learning_rate": 4.423529411764707e-06, | |
| "loss": 2.0854, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.702158273381295, | |
| "grad_norm": 0.6348150372505188, | |
| "learning_rate": 4.411764705882353e-06, | |
| "loss": 2.1037, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.7136690647482015, | |
| "grad_norm": 0.5777585506439209, | |
| "learning_rate": 4.4e-06, | |
| "loss": 2.0463, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.7251798561151079, | |
| "grad_norm": 0.5744509696960449, | |
| "learning_rate": 4.388235294117648e-06, | |
| "loss": 2.0368, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.7366906474820144, | |
| "grad_norm": 0.6182202696800232, | |
| "learning_rate": 4.376470588235294e-06, | |
| "loss": 2.1305, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.7482014388489209, | |
| "grad_norm": 0.6051247715950012, | |
| "learning_rate": 4.3647058823529415e-06, | |
| "loss": 1.9982, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.7597122302158273, | |
| "grad_norm": 0.585530161857605, | |
| "learning_rate": 4.352941176470588e-06, | |
| "loss": 2.1311, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.7712230215827338, | |
| "grad_norm": 0.5718886256217957, | |
| "learning_rate": 4.341176470588236e-06, | |
| "loss": 2.0899, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.7827338129496403, | |
| "grad_norm": 0.5871637463569641, | |
| "learning_rate": 4.329411764705883e-06, | |
| "loss": 2.1422, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.7942446043165468, | |
| "grad_norm": 0.5348390340805054, | |
| "learning_rate": 4.31764705882353e-06, | |
| "loss": 2.0414, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.8057553956834532, | |
| "grad_norm": 0.5360246896743774, | |
| "learning_rate": 4.305882352941177e-06, | |
| "loss": 2.0697, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8172661870503597, | |
| "grad_norm": 0.575326144695282, | |
| "learning_rate": 4.294117647058823e-06, | |
| "loss": 2.126, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.8287769784172662, | |
| "grad_norm": 0.5389431118965149, | |
| "learning_rate": 4.282352941176471e-06, | |
| "loss": 1.9699, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.8402877697841726, | |
| "grad_norm": 0.5181302428245544, | |
| "learning_rate": 4.270588235294118e-06, | |
| "loss": 2.0431, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.8517985611510791, | |
| "grad_norm": 0.5258436799049377, | |
| "learning_rate": 4.2588235294117655e-06, | |
| "loss": 2.1398, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.8633093525179856, | |
| "grad_norm": 0.5759520530700684, | |
| "learning_rate": 4.247058823529412e-06, | |
| "loss": 2.1257, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.874820143884892, | |
| "grad_norm": 0.5312909483909607, | |
| "learning_rate": 4.235294117647059e-06, | |
| "loss": 2.0202, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.8863309352517985, | |
| "grad_norm": 0.6128862500190735, | |
| "learning_rate": 4.223529411764706e-06, | |
| "loss": 2.1821, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.897841726618705, | |
| "grad_norm": 0.5947574377059937, | |
| "learning_rate": 4.211764705882354e-06, | |
| "loss": 2.0586, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.9093525179856116, | |
| "grad_norm": 0.5092775225639343, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 2.0783, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.920863309352518, | |
| "grad_norm": 0.5344525575637817, | |
| "learning_rate": 4.188235294117647e-06, | |
| "loss": 2.1457, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9323741007194245, | |
| "grad_norm": 0.5249314308166504, | |
| "learning_rate": 4.176470588235295e-06, | |
| "loss": 1.9904, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.943884892086331, | |
| "grad_norm": 0.5562986135482788, | |
| "learning_rate": 4.164705882352941e-06, | |
| "loss": 2.053, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.9553956834532374, | |
| "grad_norm": 0.5227307081222534, | |
| "learning_rate": 4.152941176470589e-06, | |
| "loss": 1.9463, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.9669064748201439, | |
| "grad_norm": 0.5479752421379089, | |
| "learning_rate": 4.141176470588235e-06, | |
| "loss": 2.1546, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.9784172661870504, | |
| "grad_norm": 0.49207690358161926, | |
| "learning_rate": 4.129411764705883e-06, | |
| "loss": 1.9769, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.9899280575539569, | |
| "grad_norm": 0.5519751310348511, | |
| "learning_rate": 4.11764705882353e-06, | |
| "loss": 2.0985, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5100968480110168, | |
| "learning_rate": 4.105882352941177e-06, | |
| "loss": 2.0412, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.0115107913669066, | |
| "grad_norm": 0.5197513103485107, | |
| "learning_rate": 4.094117647058824e-06, | |
| "loss": 2.0366, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.023021582733813, | |
| "grad_norm": 0.475099116563797, | |
| "learning_rate": 4.082352941176471e-06, | |
| "loss": 1.9594, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.0345323741007195, | |
| "grad_norm": 0.4655166566371918, | |
| "learning_rate": 4.070588235294118e-06, | |
| "loss": 1.988, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0460431654676259, | |
| "grad_norm": 0.47748327255249023, | |
| "learning_rate": 4.058823529411765e-06, | |
| "loss": 2.0573, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.0575539568345325, | |
| "grad_norm": 0.4752672612667084, | |
| "learning_rate": 4.0470588235294125e-06, | |
| "loss": 2.0761, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.0690647482014388, | |
| "grad_norm": 0.42830637097358704, | |
| "learning_rate": 4.0352941176470585e-06, | |
| "loss": 1.9, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.0805755395683454, | |
| "grad_norm": 0.509665310382843, | |
| "learning_rate": 4.023529411764706e-06, | |
| "loss": 2.1022, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.0920863309352518, | |
| "grad_norm": 0.4985044300556183, | |
| "learning_rate": 4.011764705882353e-06, | |
| "loss": 2.0004, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.1035971223021583, | |
| "grad_norm": 0.4855203330516815, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.9497, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.1151079136690647, | |
| "grad_norm": 0.5421211123466492, | |
| "learning_rate": 3.9882352941176475e-06, | |
| "loss": 2.0288, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.1266187050359713, | |
| "grad_norm": 0.4415021538734436, | |
| "learning_rate": 3.976470588235294e-06, | |
| "loss": 1.9209, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.1381294964028776, | |
| "grad_norm": 0.5151281356811523, | |
| "learning_rate": 3.964705882352942e-06, | |
| "loss": 2.0827, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.1496402877697842, | |
| "grad_norm": 0.4920862913131714, | |
| "learning_rate": 3.952941176470588e-06, | |
| "loss": 2.058, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1611510791366906, | |
| "grad_norm": 0.4780770540237427, | |
| "learning_rate": 3.941176470588236e-06, | |
| "loss": 2.0177, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.1726618705035972, | |
| "grad_norm": 0.5100148916244507, | |
| "learning_rate": 3.9294117647058824e-06, | |
| "loss": 2.0284, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.1841726618705035, | |
| "grad_norm": 0.49509063363075256, | |
| "learning_rate": 3.91764705882353e-06, | |
| "loss": 1.9646, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.19568345323741, | |
| "grad_norm": 0.518322229385376, | |
| "learning_rate": 3.905882352941177e-06, | |
| "loss": 1.9543, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.2071942446043165, | |
| "grad_norm": 0.49948441982269287, | |
| "learning_rate": 3.894117647058824e-06, | |
| "loss": 1.9506, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.218705035971223, | |
| "grad_norm": 0.4965578317642212, | |
| "learning_rate": 3.882352941176471e-06, | |
| "loss": 1.9751, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.2302158273381294, | |
| "grad_norm": 0.4927343428134918, | |
| "learning_rate": 3.870588235294118e-06, | |
| "loss": 2.0053, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.241726618705036, | |
| "grad_norm": 0.5349815487861633, | |
| "learning_rate": 3.858823529411765e-06, | |
| "loss": 1.9543, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.2532374100719426, | |
| "grad_norm": 0.47634226083755493, | |
| "learning_rate": 3.847058823529412e-06, | |
| "loss": 2.0552, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.264748201438849, | |
| "grad_norm": 0.4937780797481537, | |
| "learning_rate": 3.8352941176470596e-06, | |
| "loss": 2.0465, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.2762589928057553, | |
| "grad_norm": 0.49782195687294006, | |
| "learning_rate": 3.8235294117647055e-06, | |
| "loss": 2.0029, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.2877697841726619, | |
| "grad_norm": 0.4511786103248596, | |
| "learning_rate": 3.8117647058823532e-06, | |
| "loss": 2.0275, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.2992805755395684, | |
| "grad_norm": 0.5700087547302246, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 2.1454, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.3107913669064748, | |
| "grad_norm": 0.46322473883628845, | |
| "learning_rate": 3.7882352941176477e-06, | |
| "loss": 2.1094, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.3223021582733812, | |
| "grad_norm": 0.48486411571502686, | |
| "learning_rate": 3.776470588235294e-06, | |
| "loss": 1.9504, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.3338129496402877, | |
| "grad_norm": 0.5094816088676453, | |
| "learning_rate": 3.7647058823529414e-06, | |
| "loss": 2.0014, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.3453237410071943, | |
| "grad_norm": 0.4983859360218048, | |
| "learning_rate": 3.7529411764705886e-06, | |
| "loss": 2.0876, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 1.3568345323741007, | |
| "grad_norm": 0.46220195293426514, | |
| "learning_rate": 3.741176470588236e-06, | |
| "loss": 1.9317, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.3683453237410073, | |
| "grad_norm": 0.5100168585777283, | |
| "learning_rate": 3.7294117647058827e-06, | |
| "loss": 2.0247, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 1.3798561151079136, | |
| "grad_norm": 0.45993947982788086, | |
| "learning_rate": 3.71764705882353e-06, | |
| "loss": 2.1151, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.3913669064748202, | |
| "grad_norm": 0.45173344016075134, | |
| "learning_rate": 3.7058823529411767e-06, | |
| "loss": 1.9784, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 1.4028776978417266, | |
| "grad_norm": 0.5567683577537537, | |
| "learning_rate": 3.6941176470588236e-06, | |
| "loss": 1.9727, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.4143884892086331, | |
| "grad_norm": 0.5246084928512573, | |
| "learning_rate": 3.682352941176471e-06, | |
| "loss": 1.9025, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 1.4258992805755395, | |
| "grad_norm": 0.4744240343570709, | |
| "learning_rate": 3.670588235294118e-06, | |
| "loss": 1.9713, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.437410071942446, | |
| "grad_norm": 0.4525962769985199, | |
| "learning_rate": 3.6588235294117653e-06, | |
| "loss": 1.9845, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.4489208633093524, | |
| "grad_norm": 0.5006890296936035, | |
| "learning_rate": 3.6470588235294117e-06, | |
| "loss": 1.9638, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.460431654676259, | |
| "grad_norm": 0.4702132046222687, | |
| "learning_rate": 3.635294117647059e-06, | |
| "loss": 1.9444, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 1.4719424460431654, | |
| "grad_norm": 0.5455424189567566, | |
| "learning_rate": 3.623529411764706e-06, | |
| "loss": 1.9387, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.483453237410072, | |
| "grad_norm": 0.6158833503723145, | |
| "learning_rate": 3.6117647058823534e-06, | |
| "loss": 2.0112, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 1.4949640287769785, | |
| "grad_norm": 0.4596414268016815, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 2.0323, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.506474820143885, | |
| "grad_norm": 0.46785110235214233, | |
| "learning_rate": 3.5882352941176475e-06, | |
| "loss": 2.065, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 1.5179856115107913, | |
| "grad_norm": 0.46331682801246643, | |
| "learning_rate": 3.5764705882352948e-06, | |
| "loss": 1.93, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.5294964028776978, | |
| "grad_norm": 0.47538137435913086, | |
| "learning_rate": 3.564705882352941e-06, | |
| "loss": 2.0613, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 1.5410071942446044, | |
| "grad_norm": 0.5323189496994019, | |
| "learning_rate": 3.5529411764705884e-06, | |
| "loss": 2.0175, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.5525179856115108, | |
| "grad_norm": 0.4505595266819, | |
| "learning_rate": 3.5411764705882356e-06, | |
| "loss": 1.9124, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.5640287769784171, | |
| "grad_norm": 0.4327537417411804, | |
| "learning_rate": 3.529411764705883e-06, | |
| "loss": 1.9633, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.5755395683453237, | |
| "grad_norm": 0.497213214635849, | |
| "learning_rate": 3.5176470588235297e-06, | |
| "loss": 1.9673, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 1.5870503597122303, | |
| "grad_norm": 0.4779418706893921, | |
| "learning_rate": 3.5058823529411765e-06, | |
| "loss": 2.0615, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.5985611510791367, | |
| "grad_norm": 0.45400741696357727, | |
| "learning_rate": 3.4941176470588238e-06, | |
| "loss": 2.0023, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 1.610071942446043, | |
| "grad_norm": 0.4582999646663666, | |
| "learning_rate": 3.4823529411764706e-06, | |
| "loss": 2.104, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.6215827338129496, | |
| "grad_norm": 0.436869353055954, | |
| "learning_rate": 3.470588235294118e-06, | |
| "loss": 2.0327, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.6330935251798562, | |
| "grad_norm": 0.49151161313056946, | |
| "learning_rate": 3.458823529411765e-06, | |
| "loss": 1.8815, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.6446043165467625, | |
| "grad_norm": 0.4952029585838318, | |
| "learning_rate": 3.4470588235294123e-06, | |
| "loss": 1.9371, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 1.6561151079136691, | |
| "grad_norm": 0.4617934226989746, | |
| "learning_rate": 3.4352941176470587e-06, | |
| "loss": 2.0492, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.6676258992805755, | |
| "grad_norm": 0.45421457290649414, | |
| "learning_rate": 3.423529411764706e-06, | |
| "loss": 1.9799, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.679136690647482, | |
| "grad_norm": 0.45641016960144043, | |
| "learning_rate": 3.4117647058823532e-06, | |
| "loss": 2.1129, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.6906474820143886, | |
| "grad_norm": 0.4528232514858246, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 1.9968, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.702158273381295, | |
| "grad_norm": 0.425224632024765, | |
| "learning_rate": 3.3882352941176473e-06, | |
| "loss": 2.0766, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.7136690647482014, | |
| "grad_norm": 0.44971901178359985, | |
| "learning_rate": 3.3764705882352946e-06, | |
| "loss": 1.951, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 1.725179856115108, | |
| "grad_norm": 0.5100486278533936, | |
| "learning_rate": 3.364705882352942e-06, | |
| "loss": 2.0126, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7366906474820145, | |
| "grad_norm": 0.43650567531585693, | |
| "learning_rate": 3.352941176470588e-06, | |
| "loss": 2.0752, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 1.7482014388489209, | |
| "grad_norm": 0.4769532382488251, | |
| "learning_rate": 3.3411764705882354e-06, | |
| "loss": 2.005, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.7597122302158272, | |
| "grad_norm": 0.4705875813961029, | |
| "learning_rate": 3.3294117647058827e-06, | |
| "loss": 2.0744, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 1.7712230215827338, | |
| "grad_norm": 0.4628766179084778, | |
| "learning_rate": 3.31764705882353e-06, | |
| "loss": 2.108, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.7827338129496404, | |
| "grad_norm": 0.4407738149166107, | |
| "learning_rate": 3.3058823529411763e-06, | |
| "loss": 1.9779, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.7942446043165468, | |
| "grad_norm": 0.500913679599762, | |
| "learning_rate": 3.2941176470588236e-06, | |
| "loss": 1.8435, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.8057553956834531, | |
| "grad_norm": 0.4094080626964569, | |
| "learning_rate": 3.282352941176471e-06, | |
| "loss": 1.972, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.8172661870503597, | |
| "grad_norm": 0.46937379240989685, | |
| "learning_rate": 3.270588235294118e-06, | |
| "loss": 1.8564, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.8287769784172663, | |
| "grad_norm": 0.46270328760147095, | |
| "learning_rate": 3.258823529411765e-06, | |
| "loss": 2.0629, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.8402877697841726, | |
| "grad_norm": 0.452856183052063, | |
| "learning_rate": 3.247058823529412e-06, | |
| "loss": 2.0334, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.851798561151079, | |
| "grad_norm": 0.4600447714328766, | |
| "learning_rate": 3.2352941176470594e-06, | |
| "loss": 1.9833, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.8633093525179856, | |
| "grad_norm": 0.5156863927841187, | |
| "learning_rate": 3.2235294117647058e-06, | |
| "loss": 2.1083, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.8748201438848922, | |
| "grad_norm": 0.4730561077594757, | |
| "learning_rate": 3.211764705882353e-06, | |
| "loss": 2.016, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.8863309352517985, | |
| "grad_norm": 0.4580685496330261, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 2.0209, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.8978417266187049, | |
| "grad_norm": 0.438728004693985, | |
| "learning_rate": 3.1882352941176475e-06, | |
| "loss": 1.9078, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.9093525179856115, | |
| "grad_norm": 0.4413028061389923, | |
| "learning_rate": 3.1764705882352943e-06, | |
| "loss": 1.8775, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.920863309352518, | |
| "grad_norm": 0.4299080967903137, | |
| "learning_rate": 3.1647058823529416e-06, | |
| "loss": 2.0034, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.9323741007194246, | |
| "grad_norm": 0.47266408801078796, | |
| "learning_rate": 3.1529411764705884e-06, | |
| "loss": 1.8302, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.943884892086331, | |
| "grad_norm": 0.4524175226688385, | |
| "learning_rate": 3.1411764705882357e-06, | |
| "loss": 1.967, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.9553956834532373, | |
| "grad_norm": 0.39914512634277344, | |
| "learning_rate": 3.1294117647058825e-06, | |
| "loss": 2.0307, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.966906474820144, | |
| "grad_norm": 0.47265124320983887, | |
| "learning_rate": 3.1176470588235297e-06, | |
| "loss": 2.0066, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.9784172661870505, | |
| "grad_norm": 0.3866676986217499, | |
| "learning_rate": 3.105882352941177e-06, | |
| "loss": 2.0306, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.9899280575539569, | |
| "grad_norm": 0.4875778555870056, | |
| "learning_rate": 3.0941176470588234e-06, | |
| "loss": 1.926, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.467013418674469, | |
| "learning_rate": 3.0823529411764706e-06, | |
| "loss": 2.0708, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 2.0115107913669066, | |
| "grad_norm": 0.4312443435192108, | |
| "learning_rate": 3.070588235294118e-06, | |
| "loss": 1.9997, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.023021582733813, | |
| "grad_norm": 0.44231534004211426, | |
| "learning_rate": 3.058823529411765e-06, | |
| "loss": 2.0316, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 2.0345323741007193, | |
| "grad_norm": 0.43418049812316895, | |
| "learning_rate": 3.047058823529412e-06, | |
| "loss": 1.898, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 2.046043165467626, | |
| "grad_norm": 0.42247310280799866, | |
| "learning_rate": 3.035294117647059e-06, | |
| "loss": 1.9524, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 2.0575539568345325, | |
| "grad_norm": 0.47551754117012024, | |
| "learning_rate": 3.0235294117647064e-06, | |
| "loss": 1.9299, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 2.069064748201439, | |
| "grad_norm": 0.39652958512306213, | |
| "learning_rate": 3.011764705882353e-06, | |
| "loss": 2.0617, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.080575539568345, | |
| "grad_norm": 0.41868913173675537, | |
| "learning_rate": 3e-06, | |
| "loss": 1.9897, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 2.0920863309352518, | |
| "grad_norm": 0.39825567603111267, | |
| "learning_rate": 2.9882352941176473e-06, | |
| "loss": 1.9624, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 2.1035971223021583, | |
| "grad_norm": 0.4380688965320587, | |
| "learning_rate": 2.9764705882352946e-06, | |
| "loss": 1.9904, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 2.115107913669065, | |
| "grad_norm": 0.39882150292396545, | |
| "learning_rate": 2.9647058823529414e-06, | |
| "loss": 1.9084, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 2.126618705035971, | |
| "grad_norm": 0.4040652811527252, | |
| "learning_rate": 2.9529411764705882e-06, | |
| "loss": 2.1459, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.1381294964028776, | |
| "grad_norm": 0.4166700839996338, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 2.0026, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 2.149640287769784, | |
| "grad_norm": 0.41756269335746765, | |
| "learning_rate": 2.9294117647058827e-06, | |
| "loss": 2.0675, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 2.161151079136691, | |
| "grad_norm": 0.41332557797431946, | |
| "learning_rate": 2.9176470588235295e-06, | |
| "loss": 1.9391, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 2.172661870503597, | |
| "grad_norm": 0.4298039972782135, | |
| "learning_rate": 2.9058823529411768e-06, | |
| "loss": 1.8671, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 2.1841726618705035, | |
| "grad_norm": 0.45794206857681274, | |
| "learning_rate": 2.894117647058824e-06, | |
| "loss": 1.9466, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.19568345323741, | |
| "grad_norm": 0.3940126597881317, | |
| "learning_rate": 2.8823529411764704e-06, | |
| "loss": 1.9557, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 2.2071942446043167, | |
| "grad_norm": 0.45463988184928894, | |
| "learning_rate": 2.8705882352941177e-06, | |
| "loss": 1.9659, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 2.218705035971223, | |
| "grad_norm": 0.4931933879852295, | |
| "learning_rate": 2.858823529411765e-06, | |
| "loss": 1.9761, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 2.2302158273381294, | |
| "grad_norm": 0.42029982805252075, | |
| "learning_rate": 2.847058823529412e-06, | |
| "loss": 2.0658, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 2.241726618705036, | |
| "grad_norm": 0.4499173164367676, | |
| "learning_rate": 2.835294117647059e-06, | |
| "loss": 2.0426, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.2532374100719426, | |
| "grad_norm": 0.42561689019203186, | |
| "learning_rate": 2.8235294117647062e-06, | |
| "loss": 1.9447, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 2.2647482014388487, | |
| "grad_norm": 0.4154174327850342, | |
| "learning_rate": 2.8117647058823535e-06, | |
| "loss": 1.8771, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 2.2762589928057553, | |
| "grad_norm": 0.40316736698150635, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 2.0181, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 2.287769784172662, | |
| "grad_norm": 0.4346086084842682, | |
| "learning_rate": 2.788235294117647e-06, | |
| "loss": 1.9526, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 2.2992805755395684, | |
| "grad_norm": 0.4031694829463959, | |
| "learning_rate": 2.7764705882352944e-06, | |
| "loss": 2.0371, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.310791366906475, | |
| "grad_norm": 0.46281856298446655, | |
| "learning_rate": 2.7647058823529416e-06, | |
| "loss": 1.9795, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 2.322302158273381, | |
| "grad_norm": 0.41117939352989197, | |
| "learning_rate": 2.7529411764705884e-06, | |
| "loss": 1.93, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 2.3338129496402877, | |
| "grad_norm": 0.4585975110530853, | |
| "learning_rate": 2.7411764705882353e-06, | |
| "loss": 2.0624, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 2.3453237410071943, | |
| "grad_norm": 0.43110424280166626, | |
| "learning_rate": 2.7294117647058825e-06, | |
| "loss": 1.9551, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 2.356834532374101, | |
| "grad_norm": 0.44794800877571106, | |
| "learning_rate": 2.7176470588235297e-06, | |
| "loss": 1.9633, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 2.368345323741007, | |
| "grad_norm": 0.44358718395233154, | |
| "learning_rate": 2.7058823529411766e-06, | |
| "loss": 1.992, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 2.3798561151079136, | |
| "grad_norm": 0.3666572868824005, | |
| "learning_rate": 2.694117647058824e-06, | |
| "loss": 1.9791, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 2.39136690647482, | |
| "grad_norm": 0.4338827431201935, | |
| "learning_rate": 2.682352941176471e-06, | |
| "loss": 1.9804, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 2.402877697841727, | |
| "grad_norm": 0.3984374701976776, | |
| "learning_rate": 2.6705882352941175e-06, | |
| "loss": 2.0183, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 2.414388489208633, | |
| "grad_norm": 0.44970276951789856, | |
| "learning_rate": 2.6588235294117647e-06, | |
| "loss": 1.9441, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.4258992805755395, | |
| "grad_norm": 0.38036495447158813, | |
| "learning_rate": 2.647058823529412e-06, | |
| "loss": 2.0069, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 2.437410071942446, | |
| "grad_norm": 0.40496423840522766, | |
| "learning_rate": 2.635294117647059e-06, | |
| "loss": 2.0429, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 2.4489208633093527, | |
| "grad_norm": 0.4080514907836914, | |
| "learning_rate": 2.623529411764706e-06, | |
| "loss": 1.9265, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 2.460431654676259, | |
| "grad_norm": 0.4119996130466461, | |
| "learning_rate": 2.6117647058823533e-06, | |
| "loss": 2.0675, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 2.4719424460431654, | |
| "grad_norm": 0.4465183615684509, | |
| "learning_rate": 2.6e-06, | |
| "loss": 1.9325, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 2.483453237410072, | |
| "grad_norm": 0.4121825397014618, | |
| "learning_rate": 2.5882352941176473e-06, | |
| "loss": 1.9198, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 2.4949640287769785, | |
| "grad_norm": 0.41370537877082825, | |
| "learning_rate": 2.576470588235294e-06, | |
| "loss": 2.0955, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 2.506474820143885, | |
| "grad_norm": 0.44053828716278076, | |
| "learning_rate": 2.5647058823529414e-06, | |
| "loss": 2.0383, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 2.5179856115107913, | |
| "grad_norm": 0.42168861627578735, | |
| "learning_rate": 2.5529411764705887e-06, | |
| "loss": 1.9718, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 2.529496402877698, | |
| "grad_norm": 0.4006345570087433, | |
| "learning_rate": 2.541176470588235e-06, | |
| "loss": 1.9606, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.5410071942446044, | |
| "grad_norm": 0.40757259726524353, | |
| "learning_rate": 2.5294117647058823e-06, | |
| "loss": 2.0171, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 2.5525179856115106, | |
| "grad_norm": 0.40500617027282715, | |
| "learning_rate": 2.5176470588235295e-06, | |
| "loss": 1.9153, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 2.564028776978417, | |
| "grad_norm": 0.36510592699050903, | |
| "learning_rate": 2.505882352941177e-06, | |
| "loss": 1.8415, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 2.5755395683453237, | |
| "grad_norm": 0.44662347435951233, | |
| "learning_rate": 2.4941176470588236e-06, | |
| "loss": 1.9275, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 2.5870503597122303, | |
| "grad_norm": 0.4446622431278229, | |
| "learning_rate": 2.482352941176471e-06, | |
| "loss": 2.002, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.598561151079137, | |
| "grad_norm": 0.46108031272888184, | |
| "learning_rate": 2.470588235294118e-06, | |
| "loss": 1.9739, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 2.610071942446043, | |
| "grad_norm": 0.42075315117836, | |
| "learning_rate": 2.458823529411765e-06, | |
| "loss": 1.93, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 2.6215827338129496, | |
| "grad_norm": 0.4763178527355194, | |
| "learning_rate": 2.447058823529412e-06, | |
| "loss": 1.8973, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 2.633093525179856, | |
| "grad_norm": 0.39183807373046875, | |
| "learning_rate": 2.435294117647059e-06, | |
| "loss": 1.9485, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 2.6446043165467623, | |
| "grad_norm": 0.4360307455062866, | |
| "learning_rate": 2.423529411764706e-06, | |
| "loss": 1.902, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.656115107913669, | |
| "grad_norm": 0.46272391080856323, | |
| "learning_rate": 2.411764705882353e-06, | |
| "loss": 1.9535, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 2.6676258992805755, | |
| "grad_norm": 0.43482983112335205, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 1.9474, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 2.679136690647482, | |
| "grad_norm": 0.38162457942962646, | |
| "learning_rate": 2.388235294117647e-06, | |
| "loss": 1.935, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 2.6906474820143886, | |
| "grad_norm": 0.4534943401813507, | |
| "learning_rate": 2.3764705882352944e-06, | |
| "loss": 2.0316, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 2.702158273381295, | |
| "grad_norm": 0.3955429792404175, | |
| "learning_rate": 2.3647058823529416e-06, | |
| "loss": 1.9407, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 2.7136690647482014, | |
| "grad_norm": 0.3862835466861725, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 1.7705, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 2.725179856115108, | |
| "grad_norm": 0.42534133791923523, | |
| "learning_rate": 2.3411764705882357e-06, | |
| "loss": 1.9679, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 2.7366906474820145, | |
| "grad_norm": 0.42100828886032104, | |
| "learning_rate": 2.3294117647058825e-06, | |
| "loss": 1.8997, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 2.7482014388489207, | |
| "grad_norm": 0.4313439726829529, | |
| "learning_rate": 2.3176470588235293e-06, | |
| "loss": 1.865, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 2.7597122302158272, | |
| "grad_norm": 0.42461809515953064, | |
| "learning_rate": 2.3058823529411766e-06, | |
| "loss": 1.8169, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.771223021582734, | |
| "grad_norm": 0.39618465304374695, | |
| "learning_rate": 2.2941176470588234e-06, | |
| "loss": 1.8453, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 2.7827338129496404, | |
| "grad_norm": 0.4130411148071289, | |
| "learning_rate": 2.2823529411764707e-06, | |
| "loss": 1.901, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 2.794244604316547, | |
| "grad_norm": 0.40345144271850586, | |
| "learning_rate": 2.270588235294118e-06, | |
| "loss": 1.9913, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 2.805755395683453, | |
| "grad_norm": 0.4513832926750183, | |
| "learning_rate": 2.258823529411765e-06, | |
| "loss": 1.9226, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.8172661870503597, | |
| "grad_norm": 0.4180731475353241, | |
| "learning_rate": 2.247058823529412e-06, | |
| "loss": 1.9364, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.8287769784172663, | |
| "grad_norm": 0.40142112970352173, | |
| "learning_rate": 2.2352941176470592e-06, | |
| "loss": 2.0484, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 2.8402877697841724, | |
| "grad_norm": 0.42125147581100464, | |
| "learning_rate": 2.223529411764706e-06, | |
| "loss": 1.8964, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 2.851798561151079, | |
| "grad_norm": 0.4866432249546051, | |
| "learning_rate": 2.2117647058823533e-06, | |
| "loss": 1.8468, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 2.8633093525179856, | |
| "grad_norm": 0.43598270416259766, | |
| "learning_rate": 2.2e-06, | |
| "loss": 1.8239, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 2.874820143884892, | |
| "grad_norm": 0.39764901995658875, | |
| "learning_rate": 2.188235294117647e-06, | |
| "loss": 1.9577, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.8863309352517987, | |
| "grad_norm": 0.43190810084342957, | |
| "learning_rate": 2.176470588235294e-06, | |
| "loss": 1.8573, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 2.897841726618705, | |
| "grad_norm": 0.43591439723968506, | |
| "learning_rate": 2.1647058823529414e-06, | |
| "loss": 1.9726, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 2.9093525179856115, | |
| "grad_norm": 0.37023991346359253, | |
| "learning_rate": 2.1529411764705887e-06, | |
| "loss": 1.9219, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 2.920863309352518, | |
| "grad_norm": 0.4382263422012329, | |
| "learning_rate": 2.1411764705882355e-06, | |
| "loss": 2.0037, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 2.9323741007194246, | |
| "grad_norm": 0.4027315378189087, | |
| "learning_rate": 2.1294117647058827e-06, | |
| "loss": 1.7579, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.9438848920863308, | |
| "grad_norm": 0.41298389434814453, | |
| "learning_rate": 2.1176470588235296e-06, | |
| "loss": 1.895, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 2.9553956834532373, | |
| "grad_norm": 0.39726293087005615, | |
| "learning_rate": 2.105882352941177e-06, | |
| "loss": 1.9757, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 2.966906474820144, | |
| "grad_norm": 0.4210617244243622, | |
| "learning_rate": 2.0941176470588236e-06, | |
| "loss": 1.9105, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 2.9784172661870505, | |
| "grad_norm": 0.4142284095287323, | |
| "learning_rate": 2.0823529411764705e-06, | |
| "loss": 1.8864, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 2.989928057553957, | |
| "grad_norm": 0.3998337686061859, | |
| "learning_rate": 2.0705882352941177e-06, | |
| "loss": 1.9273, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.5197608470916748, | |
| "learning_rate": 2.058823529411765e-06, | |
| "loss": 1.9653, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 3.0115107913669066, | |
| "grad_norm": 0.41634294390678406, | |
| "learning_rate": 2.047058823529412e-06, | |
| "loss": 2.0373, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 3.023021582733813, | |
| "grad_norm": 0.45880216360092163, | |
| "learning_rate": 2.035294117647059e-06, | |
| "loss": 2.0428, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 3.0345323741007193, | |
| "grad_norm": 0.4628404974937439, | |
| "learning_rate": 2.0235294117647063e-06, | |
| "loss": 1.9155, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 3.046043165467626, | |
| "grad_norm": 0.434393972158432, | |
| "learning_rate": 2.011764705882353e-06, | |
| "loss": 1.9765, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 3.0575539568345325, | |
| "grad_norm": 0.3955315053462982, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.8562, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 3.069064748201439, | |
| "grad_norm": 0.38154056668281555, | |
| "learning_rate": 1.988235294117647e-06, | |
| "loss": 1.9882, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 3.080575539568345, | |
| "grad_norm": 0.39588648080825806, | |
| "learning_rate": 1.976470588235294e-06, | |
| "loss": 1.966, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 3.0920863309352518, | |
| "grad_norm": 0.3876536786556244, | |
| "learning_rate": 1.9647058823529412e-06, | |
| "loss": 1.9866, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 3.1035971223021583, | |
| "grad_norm": 0.4039998948574066, | |
| "learning_rate": 1.9529411764705885e-06, | |
| "loss": 1.8997, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.115107913669065, | |
| "grad_norm": 0.4637863039970398, | |
| "learning_rate": 1.9411764705882353e-06, | |
| "loss": 1.8496, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 3.126618705035971, | |
| "grad_norm": 0.37885233759880066, | |
| "learning_rate": 1.9294117647058825e-06, | |
| "loss": 1.9608, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 3.1381294964028776, | |
| "grad_norm": 0.39651158452033997, | |
| "learning_rate": 1.9176470588235298e-06, | |
| "loss": 1.9621, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 3.149640287769784, | |
| "grad_norm": 0.4272053837776184, | |
| "learning_rate": 1.9058823529411766e-06, | |
| "loss": 2.0647, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 3.161151079136691, | |
| "grad_norm": 0.36566904187202454, | |
| "learning_rate": 1.8941176470588239e-06, | |
| "loss": 1.9796, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 3.172661870503597, | |
| "grad_norm": 0.4320291578769684, | |
| "learning_rate": 1.8823529411764707e-06, | |
| "loss": 1.8802, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 3.1841726618705035, | |
| "grad_norm": 0.43051987886428833, | |
| "learning_rate": 1.870588235294118e-06, | |
| "loss": 1.8472, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 3.19568345323741, | |
| "grad_norm": 0.40524744987487793, | |
| "learning_rate": 1.858823529411765e-06, | |
| "loss": 1.978, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 3.2071942446043167, | |
| "grad_norm": 0.40227210521698, | |
| "learning_rate": 1.8470588235294118e-06, | |
| "loss": 1.8278, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 3.218705035971223, | |
| "grad_norm": 0.4132155478000641, | |
| "learning_rate": 1.835294117647059e-06, | |
| "loss": 1.8649, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.2302158273381294, | |
| "grad_norm": 0.44230976700782776, | |
| "learning_rate": 1.8235294117647058e-06, | |
| "loss": 2.0472, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 3.241726618705036, | |
| "grad_norm": 0.41526058316230774, | |
| "learning_rate": 1.811764705882353e-06, | |
| "loss": 1.9339, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 3.2532374100719426, | |
| "grad_norm": 0.4362848401069641, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 2.0415, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 3.2647482014388487, | |
| "grad_norm": 0.36819082498550415, | |
| "learning_rate": 1.7882352941176474e-06, | |
| "loss": 1.9617, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 3.2762589928057553, | |
| "grad_norm": 0.3629878759384155, | |
| "learning_rate": 1.7764705882352942e-06, | |
| "loss": 1.9392, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 3.287769784172662, | |
| "grad_norm": 0.39181169867515564, | |
| "learning_rate": 1.7647058823529414e-06, | |
| "loss": 1.9113, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 3.2992805755395684, | |
| "grad_norm": 0.38480955362319946, | |
| "learning_rate": 1.7529411764705883e-06, | |
| "loss": 1.796, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 3.310791366906475, | |
| "grad_norm": 0.3901033103466034, | |
| "learning_rate": 1.7411764705882353e-06, | |
| "loss": 1.8803, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 3.322302158273381, | |
| "grad_norm": 0.43883225321769714, | |
| "learning_rate": 1.7294117647058825e-06, | |
| "loss": 1.9635, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 3.3338129496402877, | |
| "grad_norm": 0.36902785301208496, | |
| "learning_rate": 1.7176470588235294e-06, | |
| "loss": 1.9661, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.3453237410071943, | |
| "grad_norm": 0.39565619826316833, | |
| "learning_rate": 1.7058823529411766e-06, | |
| "loss": 2.0416, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 3.356834532374101, | |
| "grad_norm": 0.4097813367843628, | |
| "learning_rate": 1.6941176470588237e-06, | |
| "loss": 1.9529, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 3.368345323741007, | |
| "grad_norm": 0.43599432706832886, | |
| "learning_rate": 1.682352941176471e-06, | |
| "loss": 2.011, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 3.3798561151079136, | |
| "grad_norm": 0.3928837180137634, | |
| "learning_rate": 1.6705882352941177e-06, | |
| "loss": 1.8357, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 3.39136690647482, | |
| "grad_norm": 0.40635019540786743, | |
| "learning_rate": 1.658823529411765e-06, | |
| "loss": 1.8737, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 3.402877697841727, | |
| "grad_norm": 0.43016231060028076, | |
| "learning_rate": 1.6470588235294118e-06, | |
| "loss": 1.9542, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 3.414388489208633, | |
| "grad_norm": 0.478292852640152, | |
| "learning_rate": 1.635294117647059e-06, | |
| "loss": 1.8912, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 3.4258992805755395, | |
| "grad_norm": 0.40000081062316895, | |
| "learning_rate": 1.623529411764706e-06, | |
| "loss": 1.9357, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 3.437410071942446, | |
| "grad_norm": 0.4033874273300171, | |
| "learning_rate": 1.6117647058823529e-06, | |
| "loss": 1.906, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 3.4489208633093527, | |
| "grad_norm": 0.40947261452674866, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 1.8173, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.460431654676259, | |
| "grad_norm": 0.4071550965309143, | |
| "learning_rate": 1.5882352941176472e-06, | |
| "loss": 1.9455, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 3.4719424460431654, | |
| "grad_norm": 0.4430578052997589, | |
| "learning_rate": 1.5764705882352942e-06, | |
| "loss": 1.8767, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 3.483453237410072, | |
| "grad_norm": 0.41929903626441956, | |
| "learning_rate": 1.5647058823529412e-06, | |
| "loss": 1.9888, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 3.4949640287769785, | |
| "grad_norm": 0.3640955090522766, | |
| "learning_rate": 1.5529411764705885e-06, | |
| "loss": 1.8597, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 3.506474820143885, | |
| "grad_norm": 0.4024275839328766, | |
| "learning_rate": 1.5411764705882353e-06, | |
| "loss": 1.8771, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 3.5179856115107913, | |
| "grad_norm": 0.38399186730384827, | |
| "learning_rate": 1.5294117647058826e-06, | |
| "loss": 1.9956, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 3.529496402877698, | |
| "grad_norm": 0.39859694242477417, | |
| "learning_rate": 1.5176470588235296e-06, | |
| "loss": 1.9475, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 3.5410071942446044, | |
| "grad_norm": 0.4364980459213257, | |
| "learning_rate": 1.5058823529411764e-06, | |
| "loss": 1.9514, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 3.5525179856115106, | |
| "grad_norm": 0.4755602180957794, | |
| "learning_rate": 1.4941176470588237e-06, | |
| "loss": 1.9057, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 3.564028776978417, | |
| "grad_norm": 0.39024367928504944, | |
| "learning_rate": 1.4823529411764707e-06, | |
| "loss": 1.9041, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.5755395683453237, | |
| "grad_norm": 0.4508678615093231, | |
| "learning_rate": 1.4705882352941177e-06, | |
| "loss": 2.0103, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 3.5870503597122303, | |
| "grad_norm": 0.4054012894630432, | |
| "learning_rate": 1.4588235294117648e-06, | |
| "loss": 2.0046, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 3.598561151079137, | |
| "grad_norm": 0.40608781576156616, | |
| "learning_rate": 1.447058823529412e-06, | |
| "loss": 1.9209, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 3.610071942446043, | |
| "grad_norm": 0.4027564525604248, | |
| "learning_rate": 1.4352941176470588e-06, | |
| "loss": 1.9453, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 3.6215827338129496, | |
| "grad_norm": 0.37097108364105225, | |
| "learning_rate": 1.423529411764706e-06, | |
| "loss": 1.8778, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 3.633093525179856, | |
| "grad_norm": 0.3786408007144928, | |
| "learning_rate": 1.4117647058823531e-06, | |
| "loss": 2.0226, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 3.6446043165467623, | |
| "grad_norm": 0.5143160223960876, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 1.8967, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 3.656115107913669, | |
| "grad_norm": 0.4117061197757721, | |
| "learning_rate": 1.3882352941176472e-06, | |
| "loss": 1.9639, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 3.6676258992805755, | |
| "grad_norm": 0.38462352752685547, | |
| "learning_rate": 1.3764705882352942e-06, | |
| "loss": 1.9823, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 3.679136690647482, | |
| "grad_norm": 0.4092719852924347, | |
| "learning_rate": 1.3647058823529413e-06, | |
| "loss": 1.9542, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.6906474820143886, | |
| "grad_norm": 0.3653268814086914, | |
| "learning_rate": 1.3529411764705883e-06, | |
| "loss": 1.9841, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 3.702158273381295, | |
| "grad_norm": 0.46952006220817566, | |
| "learning_rate": 1.3411764705882355e-06, | |
| "loss": 1.8227, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 3.7136690647482014, | |
| "grad_norm": 0.3843960464000702, | |
| "learning_rate": 1.3294117647058824e-06, | |
| "loss": 1.9256, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 3.725179856115108, | |
| "grad_norm": 0.4532316327095032, | |
| "learning_rate": 1.3176470588235296e-06, | |
| "loss": 1.8819, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 3.7366906474820145, | |
| "grad_norm": 0.4273243248462677, | |
| "learning_rate": 1.3058823529411766e-06, | |
| "loss": 1.7333, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 3.7482014388489207, | |
| "grad_norm": 0.3750397264957428, | |
| "learning_rate": 1.2941176470588237e-06, | |
| "loss": 1.8715, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 3.7597122302158272, | |
| "grad_norm": 0.37180638313293457, | |
| "learning_rate": 1.2823529411764707e-06, | |
| "loss": 1.8691, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 3.771223021582734, | |
| "grad_norm": 0.45777222514152527, | |
| "learning_rate": 1.2705882352941175e-06, | |
| "loss": 2.0797, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 3.7827338129496404, | |
| "grad_norm": 0.40992122888565063, | |
| "learning_rate": 1.2588235294117648e-06, | |
| "loss": 1.8158, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 3.794244604316547, | |
| "grad_norm": 0.39492303133010864, | |
| "learning_rate": 1.2470588235294118e-06, | |
| "loss": 1.9402, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.805755395683453, | |
| "grad_norm": 0.4207254946231842, | |
| "learning_rate": 1.235294117647059e-06, | |
| "loss": 1.9019, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 3.8172661870503597, | |
| "grad_norm": 0.41245564818382263, | |
| "learning_rate": 1.223529411764706e-06, | |
| "loss": 1.9962, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 3.8287769784172663, | |
| "grad_norm": 0.4352625906467438, | |
| "learning_rate": 1.211764705882353e-06, | |
| "loss": 1.9475, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 3.8402877697841724, | |
| "grad_norm": 0.44661152362823486, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 1.9815, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 3.851798561151079, | |
| "grad_norm": 0.3469794988632202, | |
| "learning_rate": 1.1882352941176472e-06, | |
| "loss": 1.9558, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 3.8633093525179856, | |
| "grad_norm": 0.3915042281150818, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 1.9432, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 3.874820143884892, | |
| "grad_norm": 0.40242835879325867, | |
| "learning_rate": 1.1647058823529413e-06, | |
| "loss": 2.0347, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 3.8863309352517987, | |
| "grad_norm": 0.4240435063838959, | |
| "learning_rate": 1.1529411764705883e-06, | |
| "loss": 1.8546, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 3.897841726618705, | |
| "grad_norm": 0.3874111771583557, | |
| "learning_rate": 1.1411764705882353e-06, | |
| "loss": 1.8743, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 3.9093525179856115, | |
| "grad_norm": 0.4088236391544342, | |
| "learning_rate": 1.1294117647058826e-06, | |
| "loss": 1.9511, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.920863309352518, | |
| "grad_norm": 0.4264145493507385, | |
| "learning_rate": 1.1176470588235296e-06, | |
| "loss": 1.8457, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 3.9323741007194246, | |
| "grad_norm": 0.4625447392463684, | |
| "learning_rate": 1.1058823529411766e-06, | |
| "loss": 2.0004, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 3.9438848920863308, | |
| "grad_norm": 0.3717285692691803, | |
| "learning_rate": 1.0941176470588235e-06, | |
| "loss": 2.018, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 3.9553956834532373, | |
| "grad_norm": 0.38859859108924866, | |
| "learning_rate": 1.0823529411764707e-06, | |
| "loss": 2.0653, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 3.966906474820144, | |
| "grad_norm": 0.4414234161376953, | |
| "learning_rate": 1.0705882352941177e-06, | |
| "loss": 1.9468, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 3.9784172661870505, | |
| "grad_norm": 0.44937196373939514, | |
| "learning_rate": 1.0588235294117648e-06, | |
| "loss": 1.9817, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 3.989928057553957, | |
| "grad_norm": 0.4258635938167572, | |
| "learning_rate": 1.0470588235294118e-06, | |
| "loss": 1.8073, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.43822839856147766, | |
| "learning_rate": 1.0352941176470589e-06, | |
| "loss": 1.8991, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 4.011510791366907, | |
| "grad_norm": 0.46056368947029114, | |
| "learning_rate": 1.023529411764706e-06, | |
| "loss": 1.9822, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 4.023021582733813, | |
| "grad_norm": 0.37518858909606934, | |
| "learning_rate": 1.0117647058823531e-06, | |
| "loss": 1.813, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.03453237410072, | |
| "grad_norm": 0.3921293020248413, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 2.0461, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 4.046043165467626, | |
| "grad_norm": 0.4025590419769287, | |
| "learning_rate": 9.88235294117647e-07, | |
| "loss": 1.9639, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 4.057553956834532, | |
| "grad_norm": 0.40453991293907166, | |
| "learning_rate": 9.764705882352942e-07, | |
| "loss": 1.9503, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 4.069064748201439, | |
| "grad_norm": 0.35269466042518616, | |
| "learning_rate": 9.647058823529413e-07, | |
| "loss": 1.9183, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 4.080575539568345, | |
| "grad_norm": 0.4840276837348938, | |
| "learning_rate": 9.529411764705883e-07, | |
| "loss": 1.8452, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 4.092086330935252, | |
| "grad_norm": 0.39707890152931213, | |
| "learning_rate": 9.411764705882353e-07, | |
| "loss": 1.9937, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 4.103597122302158, | |
| "grad_norm": 0.4407122731208801, | |
| "learning_rate": 9.294117647058825e-07, | |
| "loss": 2.022, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 4.115107913669065, | |
| "grad_norm": 0.41454342007637024, | |
| "learning_rate": 9.176470588235295e-07, | |
| "loss": 1.8907, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 4.1266187050359715, | |
| "grad_norm": 0.45531314611434937, | |
| "learning_rate": 9.058823529411765e-07, | |
| "loss": 1.8662, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 4.138129496402878, | |
| "grad_norm": 0.4606649577617645, | |
| "learning_rate": 8.941176470588237e-07, | |
| "loss": 1.8686, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.149640287769784, | |
| "grad_norm": 0.3887675106525421, | |
| "learning_rate": 8.823529411764707e-07, | |
| "loss": 1.9282, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 4.16115107913669, | |
| "grad_norm": 0.40791404247283936, | |
| "learning_rate": 8.705882352941177e-07, | |
| "loss": 1.8935, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 4.172661870503597, | |
| "grad_norm": 0.4134286046028137, | |
| "learning_rate": 8.588235294117647e-07, | |
| "loss": 1.9222, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 4.1841726618705035, | |
| "grad_norm": 0.45545920729637146, | |
| "learning_rate": 8.470588235294118e-07, | |
| "loss": 1.7464, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 4.19568345323741, | |
| "grad_norm": 0.3929649889469147, | |
| "learning_rate": 8.352941176470589e-07, | |
| "loss": 1.9524, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 4.207194244604317, | |
| "grad_norm": 0.3917909860610962, | |
| "learning_rate": 8.235294117647059e-07, | |
| "loss": 1.8608, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.218705035971223, | |
| "grad_norm": 0.3615923821926117, | |
| "learning_rate": 8.11764705882353e-07, | |
| "loss": 2.0196, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 4.23021582733813, | |
| "grad_norm": 0.42296525835990906, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 2.0074, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 4.2417266187050355, | |
| "grad_norm": 0.38392674922943115, | |
| "learning_rate": 7.882352941176471e-07, | |
| "loss": 1.8838, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 4.253237410071942, | |
| "grad_norm": 0.45736461877822876, | |
| "learning_rate": 7.764705882352942e-07, | |
| "loss": 1.9283, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.264748201438849, | |
| "grad_norm": 0.4002780020236969, | |
| "learning_rate": 7.647058823529413e-07, | |
| "loss": 1.9358, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 4.276258992805755, | |
| "grad_norm": 0.4492432475090027, | |
| "learning_rate": 7.529411764705882e-07, | |
| "loss": 1.9168, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 4.287769784172662, | |
| "grad_norm": 0.4120420813560486, | |
| "learning_rate": 7.411764705882353e-07, | |
| "loss": 2.0267, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 4.299280575539568, | |
| "grad_norm": 0.37060075998306274, | |
| "learning_rate": 7.294117647058824e-07, | |
| "loss": 1.7903, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 4.310791366906475, | |
| "grad_norm": 0.41300690174102783, | |
| "learning_rate": 7.176470588235294e-07, | |
| "loss": 1.9742, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 4.322302158273382, | |
| "grad_norm": 0.39079055190086365, | |
| "learning_rate": 7.058823529411766e-07, | |
| "loss": 1.9725, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 4.333812949640288, | |
| "grad_norm": 0.37425902485847473, | |
| "learning_rate": 6.941176470588236e-07, | |
| "loss": 1.9327, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 4.345323741007194, | |
| "grad_norm": 0.39660215377807617, | |
| "learning_rate": 6.823529411764706e-07, | |
| "loss": 1.9264, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 4.3568345323741005, | |
| "grad_norm": 0.3530445098876953, | |
| "learning_rate": 6.705882352941178e-07, | |
| "loss": 2.0301, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 4.368345323741007, | |
| "grad_norm": 0.3451977074146271, | |
| "learning_rate": 6.588235294117648e-07, | |
| "loss": 1.9497, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.379856115107914, | |
| "grad_norm": 0.39704129099845886, | |
| "learning_rate": 6.470588235294118e-07, | |
| "loss": 2.0197, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 4.39136690647482, | |
| "grad_norm": 0.4510452151298523, | |
| "learning_rate": 6.352941176470588e-07, | |
| "loss": 2.0232, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 4.402877697841727, | |
| "grad_norm": 0.40996772050857544, | |
| "learning_rate": 6.235294117647059e-07, | |
| "loss": 1.9544, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 4.414388489208633, | |
| "grad_norm": 0.39487242698669434, | |
| "learning_rate": 6.11764705882353e-07, | |
| "loss": 1.906, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 4.42589928057554, | |
| "grad_norm": 0.38208380341529846, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 1.9622, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 4.437410071942446, | |
| "grad_norm": 0.4923550486564636, | |
| "learning_rate": 5.882352941176471e-07, | |
| "loss": 1.8734, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 4.448920863309352, | |
| "grad_norm": 0.3934495151042938, | |
| "learning_rate": 5.764705882352941e-07, | |
| "loss": 1.9016, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 4.460431654676259, | |
| "grad_norm": 0.3597968518733978, | |
| "learning_rate": 5.647058823529413e-07, | |
| "loss": 1.8471, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 4.471942446043165, | |
| "grad_norm": 0.3755582273006439, | |
| "learning_rate": 5.529411764705883e-07, | |
| "loss": 1.9416, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 4.483453237410072, | |
| "grad_norm": 0.41233447194099426, | |
| "learning_rate": 5.411764705882354e-07, | |
| "loss": 1.8175, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.4949640287769785, | |
| "grad_norm": 0.43287456035614014, | |
| "learning_rate": 5.294117647058824e-07, | |
| "loss": 2.0551, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 4.506474820143885, | |
| "grad_norm": 0.3722149133682251, | |
| "learning_rate": 5.176470588235294e-07, | |
| "loss": 1.8064, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 4.517985611510792, | |
| "grad_norm": 0.36640405654907227, | |
| "learning_rate": 5.058823529411766e-07, | |
| "loss": 1.8697, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 4.529496402877697, | |
| "grad_norm": 0.42729923129081726, | |
| "learning_rate": 4.941176470588235e-07, | |
| "loss": 1.9544, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 4.541007194244604, | |
| "grad_norm": 0.4456847012042999, | |
| "learning_rate": 4.823529411764706e-07, | |
| "loss": 1.9493, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 4.5525179856115106, | |
| "grad_norm": 0.37357428669929504, | |
| "learning_rate": 4.7058823529411767e-07, | |
| "loss": 1.8166, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 4.564028776978417, | |
| "grad_norm": 0.36978697776794434, | |
| "learning_rate": 4.5882352941176476e-07, | |
| "loss": 1.9415, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 4.575539568345324, | |
| "grad_norm": 0.4100985825061798, | |
| "learning_rate": 4.4705882352941184e-07, | |
| "loss": 1.9396, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 4.58705035971223, | |
| "grad_norm": 0.4530591368675232, | |
| "learning_rate": 4.352941176470588e-07, | |
| "loss": 2.0031, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 4.598561151079137, | |
| "grad_norm": 0.43112170696258545, | |
| "learning_rate": 4.235294117647059e-07, | |
| "loss": 1.8297, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.6100719424460435, | |
| "grad_norm": 0.4256054162979126, | |
| "learning_rate": 4.1176470588235295e-07, | |
| "loss": 1.9466, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 4.62158273381295, | |
| "grad_norm": 0.41112977266311646, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 1.9521, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 4.633093525179856, | |
| "grad_norm": 0.4099273681640625, | |
| "learning_rate": 3.882352941176471e-07, | |
| "loss": 1.8238, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 4.644604316546762, | |
| "grad_norm": 0.3838717043399811, | |
| "learning_rate": 3.764705882352941e-07, | |
| "loss": 2.0297, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 4.656115107913669, | |
| "grad_norm": 0.40768367052078247, | |
| "learning_rate": 3.647058823529412e-07, | |
| "loss": 1.763, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 4.6676258992805755, | |
| "grad_norm": 0.4218824803829193, | |
| "learning_rate": 3.529411764705883e-07, | |
| "loss": 1.9831, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 4.679136690647482, | |
| "grad_norm": 0.4038139581680298, | |
| "learning_rate": 3.411764705882353e-07, | |
| "loss": 1.9038, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 4.690647482014389, | |
| "grad_norm": 0.444604754447937, | |
| "learning_rate": 3.294117647058824e-07, | |
| "loss": 1.8462, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 4.702158273381295, | |
| "grad_norm": 0.398887038230896, | |
| "learning_rate": 3.176470588235294e-07, | |
| "loss": 1.8827, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 4.713669064748202, | |
| "grad_norm": 0.3743375837802887, | |
| "learning_rate": 3.058823529411765e-07, | |
| "loss": 1.9547, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.725179856115108, | |
| "grad_norm": 0.36487120389938354, | |
| "learning_rate": 2.9411764705882356e-07, | |
| "loss": 1.9927, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 4.736690647482014, | |
| "grad_norm": 0.38175249099731445, | |
| "learning_rate": 2.8235294117647064e-07, | |
| "loss": 1.8559, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 4.748201438848921, | |
| "grad_norm": 0.45353925228118896, | |
| "learning_rate": 2.705882352941177e-07, | |
| "loss": 1.8788, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 4.759712230215827, | |
| "grad_norm": 0.38203585147857666, | |
| "learning_rate": 2.588235294117647e-07, | |
| "loss": 1.9798, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 4.771223021582734, | |
| "grad_norm": 0.4031854271888733, | |
| "learning_rate": 2.4705882352941175e-07, | |
| "loss": 1.924, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 4.78273381294964, | |
| "grad_norm": 0.39555788040161133, | |
| "learning_rate": 2.3529411764705883e-07, | |
| "loss": 1.9385, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 4.794244604316547, | |
| "grad_norm": 0.36034807562828064, | |
| "learning_rate": 2.2352941176470592e-07, | |
| "loss": 1.8572, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 4.805755395683454, | |
| "grad_norm": 0.416274756193161, | |
| "learning_rate": 2.1176470588235296e-07, | |
| "loss": 1.9249, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 4.817266187050359, | |
| "grad_norm": 0.40581023693084717, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 2.0491, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 4.828776978417266, | |
| "grad_norm": 0.433010995388031, | |
| "learning_rate": 1.8823529411764705e-07, | |
| "loss": 1.9709, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.840287769784172, | |
| "grad_norm": 0.37076178193092346, | |
| "learning_rate": 1.7647058823529414e-07, | |
| "loss": 1.8053, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 4.851798561151079, | |
| "grad_norm": 0.4253133237361908, | |
| "learning_rate": 1.647058823529412e-07, | |
| "loss": 1.8407, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 4.863309352517986, | |
| "grad_norm": 0.3607207238674164, | |
| "learning_rate": 1.5294117647058826e-07, | |
| "loss": 1.9195, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 4.874820143884892, | |
| "grad_norm": 0.39863321185112, | |
| "learning_rate": 1.4117647058823532e-07, | |
| "loss": 1.9297, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 4.886330935251799, | |
| "grad_norm": 0.36778998374938965, | |
| "learning_rate": 1.2941176470588236e-07, | |
| "loss": 1.9024, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 4.897841726618705, | |
| "grad_norm": 0.4625915288925171, | |
| "learning_rate": 1.1764705882352942e-07, | |
| "loss": 1.969, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 4.909352517985612, | |
| "grad_norm": 0.39172741770744324, | |
| "learning_rate": 1.0588235294117648e-07, | |
| "loss": 1.9035, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 4.920863309352518, | |
| "grad_norm": 0.38108983635902405, | |
| "learning_rate": 9.411764705882353e-08, | |
| "loss": 1.921, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 4.932374100719424, | |
| "grad_norm": 0.4520784020423889, | |
| "learning_rate": 8.23529411764706e-08, | |
| "loss": 2.0255, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 4.943884892086331, | |
| "grad_norm": 0.4058828055858612, | |
| "learning_rate": 7.058823529411766e-08, | |
| "loss": 1.9021, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.955395683453237, | |
| "grad_norm": 0.38254988193511963, | |
| "learning_rate": 5.882352941176471e-08, | |
| "loss": 1.865, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 4.966906474820144, | |
| "grad_norm": 0.4428682327270508, | |
| "learning_rate": 4.705882352941176e-08, | |
| "loss": 1.9649, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 4.9784172661870505, | |
| "grad_norm": 0.41550108790397644, | |
| "learning_rate": 3.529411764705883e-08, | |
| "loss": 1.9868, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 4.989928057553957, | |
| "grad_norm": 0.430279016494751, | |
| "learning_rate": 2.352941176470588e-08, | |
| "loss": 1.913, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.4352044463157654, | |
| "learning_rate": 1.176470588235294e-08, | |
| "loss": 1.9378, | |
| "step": 435 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 435, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1776104868752e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |