{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.6326715544937804, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002636131477057418, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 14.8018, "step": 1, "ts_encoder_learning_rate": 0.0 }, { "epoch": 0.005272262954114836, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 14.8252, "step": 2, "ts_encoder_learning_rate": 0.0 }, { "epoch": 0.007908394431172255, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 15.0107, "step": 3, "ts_encoder_learning_rate": 0.0 }, { "epoch": 0.010544525908229673, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 14.9011, "step": 4, "ts_encoder_learning_rate": 0.0 }, { "epoch": 0.01318065738528709, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 14.897, "step": 5, "ts_encoder_learning_rate": 0.0 }, { "epoch": 0.01581678886234451, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 14.9028, "step": 6, "ts_encoder_learning_rate": 0.0 }, { "epoch": 0.018452920339401928, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 14.7914, "step": 7, "ts_encoder_learning_rate": 0.0 }, { "epoch": 0.021089051816459346, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 15.1729, "step": 8, "ts_encoder_learning_rate": 0.0 }, { "epoch": 0.023725183293516763, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 14.7748, "step": 9, "ts_encoder_learning_rate": 0.0 }, { "epoch": 0.02636131477057418, "grad_norm": 1096.9997559030376, "learning_rate": 0.0, "loss": 14.735, "step": 10, "ts_encoder_learning_rate": 5.000000000000001e-07 }, { "epoch": 0.0289974462476316, "grad_norm": 1096.9997559030376, "learning_rate": 5.000000000000001e-07, "loss": 14.8943, "step": 11, "ts_encoder_learning_rate": 5.000000000000001e-07 }, { "epoch": 0.03163357772468902, "grad_norm": 1305.2661541342568, "learning_rate": 5.000000000000001e-07, "loss": 14.5478, "step": 12, "ts_encoder_learning_rate": 1.0000000000000002e-06 }, { "epoch": 0.034269709201746434, "grad_norm": 1279.5606119323502, "learning_rate": 1.0000000000000002e-06, "loss": 14.542, "step": 13, "ts_encoder_learning_rate": 1.5e-06 }, { "epoch": 0.036905840678803856, "grad_norm": 1391.7635545286846, "learning_rate": 1.5e-06, "loss": 12.2412, "step": 14, "ts_encoder_learning_rate": 2.0000000000000003e-06 }, { "epoch": 0.03954197215586128, "grad_norm": 1848.3283721710281, "learning_rate": 2.0000000000000003e-06, "loss": 7.6029, "step": 15, "ts_encoder_learning_rate": 2.5e-06 }, { "epoch": 0.04217810363291869, "grad_norm": 1848.3283721710281, "learning_rate": 2.5e-06, "loss": 4.8804, "step": 16, "ts_encoder_learning_rate": 2.5e-06 }, { "epoch": 0.04481423510997611, "grad_norm": 1759.395829823404, "learning_rate": 2.5e-06, "loss": 4.8283, "step": 17, "ts_encoder_learning_rate": 3e-06 }, { "epoch": 0.04745036658703353, "grad_norm": 1407.6124078931969, "learning_rate": 3e-06, "loss": 3.5724, "step": 18, "ts_encoder_learning_rate": 3.5e-06 }, { "epoch": 0.05008649806409095, "grad_norm": 372.4651747290722, "learning_rate": 3.5e-06, "loss": 1.9155, "step": 19, "ts_encoder_learning_rate": 4.000000000000001e-06 }, { "epoch": 0.05272262954114836, "grad_norm": 938.4571697479417, "learning_rate": 4.000000000000001e-06, "loss": 1.9811, "step": 20, "ts_encoder_learning_rate": 4.5e-06 }, { "epoch": 0.055358761018205783, "grad_norm": 544.7103035134295, "learning_rate": 4.5e-06, "loss": 1.7949, "step": 21, "ts_encoder_learning_rate": 5e-06 }, { "epoch": 0.0579948924952632, "grad_norm": 454.5593620001429, "learning_rate": 5e-06, "loss": 1.9186, "step": 22, "ts_encoder_learning_rate": 5.500000000000001e-06 }, { "epoch": 0.06063102397232062, "grad_norm": 1081.6948838381863, "learning_rate": 5.500000000000001e-06, "loss": 2.2253, "step": 23, "ts_encoder_learning_rate": 6e-06 }, { "epoch": 0.06326715544937804, "grad_norm": 614.5108980638121, "learning_rate": 6e-06, "loss": 2.0748, "step": 24, "ts_encoder_learning_rate": 6.5000000000000004e-06 }, { "epoch": 0.06590328692643546, "grad_norm": 510.8405053914244, "learning_rate": 6.5000000000000004e-06, "loss": 0.985, "step": 25, "ts_encoder_learning_rate": 7e-06 }, { "epoch": 0.06853941840349287, "grad_norm": 187.64105795677528, "learning_rate": 7e-06, "loss": 0.9204, "step": 26, "ts_encoder_learning_rate": 7.500000000000001e-06 }, { "epoch": 0.07117554988055029, "grad_norm": 239.94638266940726, "learning_rate": 7.500000000000001e-06, "loss": 0.7961, "step": 27, "ts_encoder_learning_rate": 8.000000000000001e-06 }, { "epoch": 0.07381168135760771, "grad_norm": 204.37461156136854, "learning_rate": 8.000000000000001e-06, "loss": 0.7757, "step": 28, "ts_encoder_learning_rate": 8.5e-06 }, { "epoch": 0.07644781283466513, "grad_norm": 394.9877265614201, "learning_rate": 8.5e-06, "loss": 0.8967, "step": 29, "ts_encoder_learning_rate": 9e-06 }, { "epoch": 0.07908394431172255, "grad_norm": 115.99747240190717, "learning_rate": 9e-06, "loss": 0.6932, "step": 30, "ts_encoder_learning_rate": 9.5e-06 }, { "epoch": 0.08172007578877996, "grad_norm": 203.24289413853845, "learning_rate": 9.5e-06, "loss": 0.6311, "step": 31, "ts_encoder_learning_rate": 1e-05 }, { "epoch": 0.08435620726583738, "grad_norm": 97.33610135135591, "learning_rate": 1e-05, "loss": 0.5115, "step": 32, "ts_encoder_learning_rate": 9.999974308631955e-06 }, { "epoch": 0.0869923387428948, "grad_norm": 346.00707857529187, "learning_rate": 9.999974308631955e-06, "loss": 0.4777, "step": 33, "ts_encoder_learning_rate": 9.999897234791831e-06 }, { "epoch": 0.08962847021995222, "grad_norm": 29.349977310917716, "learning_rate": 9.999897234791831e-06, "loss": 0.4537, "step": 34, "ts_encoder_learning_rate": 9.999768779271687e-06 }, { "epoch": 0.09226460169700963, "grad_norm": 112.56606641016495, "learning_rate": 9.999768779271687e-06, "loss": 0.8332, "step": 35, "ts_encoder_learning_rate": 9.999588943391597e-06 }, { "epoch": 0.09490073317406705, "grad_norm": 465.069049932412, "learning_rate": 9.999588943391597e-06, "loss": 1.0421, "step": 36, "ts_encoder_learning_rate": 9.999357728999657e-06 }, { "epoch": 0.09753686465112447, "grad_norm": 158.97965025080174, "learning_rate": 9.999357728999657e-06, "loss": 0.5741, "step": 37, "ts_encoder_learning_rate": 9.99907513847195e-06 }, { "epoch": 0.1001729961281819, "grad_norm": 194.59482532664853, "learning_rate": 9.99907513847195e-06, "loss": 0.484, "step": 38, "ts_encoder_learning_rate": 9.998741174712534e-06 }, { "epoch": 0.10280912760523932, "grad_norm": 161.100341083291, "learning_rate": 9.998741174712534e-06, "loss": 0.5337, "step": 39, "ts_encoder_learning_rate": 9.9983558411534e-06 }, { "epoch": 0.10544525908229672, "grad_norm": 27.126218719305797, "learning_rate": 9.9983558411534e-06, "loss": 0.5538, "step": 40, "ts_encoder_learning_rate": 9.997919141754448e-06 }, { "epoch": 0.10808139055935415, "grad_norm": 33.98098658560328, "learning_rate": 9.997919141754448e-06, "loss": 0.7161, "step": 41, "ts_encoder_learning_rate": 9.99743108100344e-06 }, { "epoch": 0.11071752203641157, "grad_norm": 39.621572486406116, "learning_rate": 9.99743108100344e-06, "loss": 0.3756, "step": 42, "ts_encoder_learning_rate": 9.996891663915955e-06 }, { "epoch": 0.11335365351346899, "grad_norm": 14.92159451052064, "learning_rate": 9.996891663915955e-06, "loss": 0.4255, "step": 43, "ts_encoder_learning_rate": 9.99630089603534e-06 }, { "epoch": 0.1159897849905264, "grad_norm": 13.462409209351557, "learning_rate": 9.99630089603534e-06, "loss": 0.4359, "step": 44, "ts_encoder_learning_rate": 9.995658783432645e-06 }, { "epoch": 0.11862591646758382, "grad_norm": 5.901719710197823, "learning_rate": 9.995658783432645e-06, "loss": 0.3448, "step": 45, "ts_encoder_learning_rate": 9.994965332706574e-06 }, { "epoch": 0.12126204794464124, "grad_norm": 32.35793808957399, "learning_rate": 9.994965332706574e-06, "loss": 0.4025, "step": 46, "ts_encoder_learning_rate": 9.994220550983404e-06 }, { "epoch": 0.12389817942169866, "grad_norm": 31.71042211582521, "learning_rate": 9.994220550983404e-06, "loss": 0.5392, "step": 47, "ts_encoder_learning_rate": 9.993424445916923e-06 }, { "epoch": 0.12653431089875608, "grad_norm": 21.433144223039534, "learning_rate": 9.993424445916923e-06, "loss": 0.4416, "step": 48, "ts_encoder_learning_rate": 9.992577025688338e-06 }, { "epoch": 0.1291704423758135, "grad_norm": 9.093244564527424, "learning_rate": 9.992577025688338e-06, "loss": 0.3626, "step": 49, "ts_encoder_learning_rate": 9.991678299006206e-06 }, { "epoch": 0.13180657385287092, "grad_norm": 4.629496592285392, "learning_rate": 9.991678299006206e-06, "loss": 0.3829, "step": 50, "ts_encoder_learning_rate": 9.990728275106332e-06 }, { "epoch": 0.13444270532992833, "grad_norm": 4.275838877635983, "learning_rate": 9.990728275106332e-06, "loss": 0.3262, "step": 51, "ts_encoder_learning_rate": 9.989726963751683e-06 }, { "epoch": 0.13707883680698574, "grad_norm": 4.249381704965988, "learning_rate": 9.989726963751683e-06, "loss": 0.3252, "step": 52, "ts_encoder_learning_rate": 9.98867437523228e-06 }, { "epoch": 0.13971496828404317, "grad_norm": 6.056412755943698, "learning_rate": 9.98867437523228e-06, "loss": 0.276, "step": 53, "ts_encoder_learning_rate": 9.987570520365105e-06 }, { "epoch": 0.14235109976110058, "grad_norm": 5.208273616470221, "learning_rate": 9.987570520365105e-06, "loss": 0.2754, "step": 54, "ts_encoder_learning_rate": 9.986415410493966e-06 }, { "epoch": 0.14498723123815802, "grad_norm": 3.7353303435304244, "learning_rate": 9.986415410493966e-06, "loss": 0.2529, "step": 55, "ts_encoder_learning_rate": 9.98520905748941e-06 }, { "epoch": 0.14762336271521542, "grad_norm": 3.933015208783408, "learning_rate": 9.98520905748941e-06, "loss": 0.2508, "step": 56, "ts_encoder_learning_rate": 9.983951473748579e-06 }, { "epoch": 0.15025949419227283, "grad_norm": 5.206399280832945, "learning_rate": 9.983951473748579e-06, "loss": 0.2651, "step": 57, "ts_encoder_learning_rate": 9.982642672195093e-06 }, { "epoch": 0.15289562566933027, "grad_norm": 4.167850168478869, "learning_rate": 9.982642672195093e-06, "loss": 0.2868, "step": 58, "ts_encoder_learning_rate": 9.98128266627891e-06 }, { "epoch": 0.15553175714638767, "grad_norm": 9.226577568221924, "learning_rate": 9.98128266627891e-06, "loss": 0.2833, "step": 59, "ts_encoder_learning_rate": 9.979871469976197e-06 }, { "epoch": 0.1581678886234451, "grad_norm": 3.7123405016527316, "learning_rate": 9.979871469976197e-06, "loss": 0.2578, "step": 60, "ts_encoder_learning_rate": 9.978409097789178e-06 }, { "epoch": 0.16080402010050251, "grad_norm": 3.049493694179588, "learning_rate": 9.978409097789178e-06, "loss": 0.2509, "step": 61, "ts_encoder_learning_rate": 9.976895564745993e-06 }, { "epoch": 0.16344015157755992, "grad_norm": 2.8746718898314314, "learning_rate": 9.976895564745993e-06, "loss": 0.2367, "step": 62, "ts_encoder_learning_rate": 9.975330886400531e-06 }, { "epoch": 0.16607628305461736, "grad_norm": 2.1229971064439614, "learning_rate": 9.975330886400531e-06, "loss": 0.2456, "step": 63, "ts_encoder_learning_rate": 9.973715078832288e-06 }, { "epoch": 0.16871241453167476, "grad_norm": 1.9770119063232516, "learning_rate": 9.973715078832288e-06, "loss": 0.2251, "step": 64, "ts_encoder_learning_rate": 9.972048158646184e-06 }, { "epoch": 0.17134854600873217, "grad_norm": 2.5667195491487016, "learning_rate": 9.972048158646184e-06, "loss": 0.2445, "step": 65, "ts_encoder_learning_rate": 9.970330142972403e-06 }, { "epoch": 0.1739846774857896, "grad_norm": 2.1113696627510117, "learning_rate": 9.970330142972403e-06, "loss": 0.2574, "step": 66, "ts_encoder_learning_rate": 9.968561049466214e-06 }, { "epoch": 0.17662080896284701, "grad_norm": 1.6243156106778176, "learning_rate": 9.968561049466214e-06, "loss": 0.2303, "step": 67, "ts_encoder_learning_rate": 9.966740896307791e-06 }, { "epoch": 0.17925694043990445, "grad_norm": 1.6849608770961007, "learning_rate": 9.966740896307791e-06, "loss": 0.2243, "step": 68, "ts_encoder_learning_rate": 9.964869702202023e-06 }, { "epoch": 0.18189307191696186, "grad_norm": 3.92330386577651, "learning_rate": 9.964869702202023e-06, "loss": 0.2441, "step": 69, "ts_encoder_learning_rate": 9.962947486378325e-06 }, { "epoch": 0.18452920339401926, "grad_norm": 2.582255962327349, "learning_rate": 9.962947486378325e-06, "loss": 0.2398, "step": 70, "ts_encoder_learning_rate": 9.96097426859044e-06 }, { "epoch": 0.1871653348710767, "grad_norm": 1.7505976231929627, "learning_rate": 9.96097426859044e-06, "loss": 0.2189, "step": 71, "ts_encoder_learning_rate": 9.95895006911623e-06 }, { "epoch": 0.1898014663481341, "grad_norm": 2.1937879121282116, "learning_rate": 9.95895006911623e-06, "loss": 0.2301, "step": 72, "ts_encoder_learning_rate": 9.956874908757482e-06 }, { "epoch": 0.19243759782519154, "grad_norm": 2.1666041664824265, "learning_rate": 9.956874908757482e-06, "loss": 0.2432, "step": 73, "ts_encoder_learning_rate": 9.954748808839675e-06 }, { "epoch": 0.19507372930224895, "grad_norm": 1.7390080947257538, "learning_rate": 9.954748808839675e-06, "loss": 0.2475, "step": 74, "ts_encoder_learning_rate": 9.952571791211776e-06 }, { "epoch": 0.19770986077930636, "grad_norm": 1.8227633742041385, "learning_rate": 9.952571791211776e-06, "loss": 0.205, "step": 75, "ts_encoder_learning_rate": 9.950343878246011e-06 }, { "epoch": 0.2003459922563638, "grad_norm": 2.023155095367864, "learning_rate": 9.950343878246011e-06, "loss": 0.222, "step": 76, "ts_encoder_learning_rate": 9.948065092837631e-06 }, { "epoch": 0.2029821237334212, "grad_norm": 2.5393740081357885, "learning_rate": 9.948065092837631e-06, "loss": 0.2264, "step": 77, "ts_encoder_learning_rate": 9.945735458404681e-06 }, { "epoch": 0.20561825521047863, "grad_norm": 1.9070977453067859, "learning_rate": 9.945735458404681e-06, "loss": 0.1948, "step": 78, "ts_encoder_learning_rate": 9.943354998887763e-06 }, { "epoch": 0.20825438668753604, "grad_norm": 2.228467040055354, "learning_rate": 9.943354998887763e-06, "loss": 0.2352, "step": 79, "ts_encoder_learning_rate": 9.94092373874978e-06 }, { "epoch": 0.21089051816459345, "grad_norm": 2.1754655976070985, "learning_rate": 9.94092373874978e-06, "loss": 0.2051, "step": 80, "ts_encoder_learning_rate": 9.938441702975689e-06 }, { "epoch": 0.21352664964165088, "grad_norm": 1.5948561463002036, "learning_rate": 9.938441702975689e-06, "loss": 0.2104, "step": 81, "ts_encoder_learning_rate": 9.935908917072253e-06 }, { "epoch": 0.2161627811187083, "grad_norm": 2.9184103349665955, "learning_rate": 9.935908917072253e-06, "loss": 0.2214, "step": 82, "ts_encoder_learning_rate": 9.93332540706776e-06 }, { "epoch": 0.21879891259576573, "grad_norm": 2.533165757626651, "learning_rate": 9.93332540706776e-06, "loss": 0.2182, "step": 83, "ts_encoder_learning_rate": 9.930691199511775e-06 }, { "epoch": 0.22143504407282313, "grad_norm": 2.4172123332256072, "learning_rate": 9.930691199511775e-06, "loss": 0.2477, "step": 84, "ts_encoder_learning_rate": 9.928006321474859e-06 }, { "epoch": 0.22407117554988054, "grad_norm": 2.2355360356090865, "learning_rate": 9.928006321474859e-06, "loss": 0.2153, "step": 85, "ts_encoder_learning_rate": 9.925270800548285e-06 }, { "epoch": 0.22670730702693798, "grad_norm": 1.325879276978752, "learning_rate": 9.925270800548285e-06, "loss": 0.2106, "step": 86, "ts_encoder_learning_rate": 9.922484664843763e-06 }, { "epoch": 0.22934343850399538, "grad_norm": 2.105985586302988, "learning_rate": 9.922484664843763e-06, "loss": 0.2033, "step": 87, "ts_encoder_learning_rate": 9.91964794299315e-06 }, { "epoch": 0.2319795699810528, "grad_norm": 1.627701668717812, "learning_rate": 9.91964794299315e-06, "loss": 0.2063, "step": 88, "ts_encoder_learning_rate": 9.916760664148148e-06 }, { "epoch": 0.23461570145811023, "grad_norm": 1.7548289492746183, "learning_rate": 9.916760664148148e-06, "loss": 0.2023, "step": 89, "ts_encoder_learning_rate": 9.91382285798002e-06 }, { "epoch": 0.23725183293516763, "grad_norm": 1.2779758188322887, "learning_rate": 9.91382285798002e-06, "loss": 0.1859, "step": 90, "ts_encoder_learning_rate": 9.910834554679266e-06 }, { "epoch": 0.23988796441222507, "grad_norm": 1.4309994411495965, "learning_rate": 9.910834554679266e-06, "loss": 0.1985, "step": 91, "ts_encoder_learning_rate": 9.907795784955327e-06 }, { "epoch": 0.24252409588928248, "grad_norm": 1.578028044854179, "learning_rate": 9.907795784955327e-06, "loss": 0.197, "step": 92, "ts_encoder_learning_rate": 9.904706580036265e-06 }, { "epoch": 0.24516022736633988, "grad_norm": 1.4796574606576762, "learning_rate": 9.904706580036265e-06, "loss": 0.1819, "step": 93, "ts_encoder_learning_rate": 9.901566971668437e-06 }, { "epoch": 0.24779635884339732, "grad_norm": 1.4288017254755137, "learning_rate": 9.901566971668437e-06, "loss": 0.2007, "step": 94, "ts_encoder_learning_rate": 9.898376992116179e-06 }, { "epoch": 0.2504324903204547, "grad_norm": 2.1391069510229137, "learning_rate": 9.898376992116179e-06, "loss": 0.2147, "step": 95, "ts_encoder_learning_rate": 9.895136674161466e-06 }, { "epoch": 0.25306862179751216, "grad_norm": 1.4103238390166355, "learning_rate": 9.895136674161466e-06, "loss": 0.1678, "step": 96, "ts_encoder_learning_rate": 9.891846051103578e-06 }, { "epoch": 0.2557047532745696, "grad_norm": 1.7272702030863043, "learning_rate": 9.891846051103578e-06, "loss": 0.212, "step": 97, "ts_encoder_learning_rate": 9.888505156758758e-06 }, { "epoch": 0.258340884751627, "grad_norm": 1.465691780286817, "learning_rate": 9.888505156758758e-06, "loss": 0.1886, "step": 98, "ts_encoder_learning_rate": 9.885114025459865e-06 }, { "epoch": 0.2609770162286844, "grad_norm": 1.5753790137474581, "learning_rate": 9.885114025459865e-06, "loss": 0.1846, "step": 99, "ts_encoder_learning_rate": 9.881672692056022e-06 }, { "epoch": 0.26361314770574185, "grad_norm": 6.38876297290493, "learning_rate": 9.881672692056022e-06, "loss": 0.221, "step": 100, "ts_encoder_learning_rate": 9.878181191912251e-06 }, { "epoch": 0.2662492791827992, "grad_norm": 8.212765911278606, "learning_rate": 9.878181191912251e-06, "loss": 0.2088, "step": 101, "ts_encoder_learning_rate": 9.874639560909118e-06 }, { "epoch": 0.26888541065985666, "grad_norm": 97.72702166479309, "learning_rate": 9.874639560909118e-06, "loss": 0.2366, "step": 102, "ts_encoder_learning_rate": 9.871047835442365e-06 }, { "epoch": 0.2715215421369141, "grad_norm": 97.72702166479309, "learning_rate": 9.871047835442365e-06, "loss": 0.2448, "step": 103, "ts_encoder_learning_rate": 9.871047835442365e-06 }, { "epoch": 0.2741576736139715, "grad_norm": 52.034102007317564, "learning_rate": 9.871047835442365e-06, "loss": 0.2336, "step": 104, "ts_encoder_learning_rate": 9.867406052422525e-06 }, { "epoch": 0.2767938050910289, "grad_norm": 52.034102007317564, "learning_rate": 9.867406052422525e-06, "loss": 0.2643, "step": 105, "ts_encoder_learning_rate": 9.867406052422525e-06 }, { "epoch": 0.27942993656808635, "grad_norm": 52.034102007317564, "learning_rate": 9.867406052422525e-06, "loss": 0.2327, "step": 106, "ts_encoder_learning_rate": 9.867406052422525e-06 }, { "epoch": 0.2820660680451437, "grad_norm": 9.876750537498761, "learning_rate": 9.867406052422525e-06, "loss": 0.2537, "step": 107, "ts_encoder_learning_rate": 9.863714249274553e-06 }, { "epoch": 0.28470219952220116, "grad_norm": 10.163299154086943, "learning_rate": 9.863714249274553e-06, "loss": 0.2425, "step": 108, "ts_encoder_learning_rate": 9.85997246393744e-06 }, { "epoch": 0.2873383309992586, "grad_norm": 3.2455067886441973, "learning_rate": 9.85997246393744e-06, "loss": 0.1929, "step": 109, "ts_encoder_learning_rate": 9.85618073486382e-06 }, { "epoch": 0.28997446247631603, "grad_norm": 2.5488848745593913, "learning_rate": 9.85618073486382e-06, "loss": 0.2007, "step": 110, "ts_encoder_learning_rate": 9.852339101019574e-06 }, { "epoch": 0.2926105939533734, "grad_norm": 2.062458273578386, "learning_rate": 9.852339101019574e-06, "loss": 0.2245, "step": 111, "ts_encoder_learning_rate": 9.848447601883436e-06 }, { "epoch": 0.29524672543043085, "grad_norm": 1.8908032493798421, "learning_rate": 9.848447601883436e-06, "loss": 0.2017, "step": 112, "ts_encoder_learning_rate": 9.844506277446577e-06 }, { "epoch": 0.2978828569074883, "grad_norm": 1.8585429810855012, "learning_rate": 9.844506277446577e-06, "loss": 0.17, "step": 113, "ts_encoder_learning_rate": 9.840515168212208e-06 }, { "epoch": 0.30051898838454566, "grad_norm": 1.6115065133773239, "learning_rate": 9.840515168212208e-06, "loss": 0.2084, "step": 114, "ts_encoder_learning_rate": 9.836474315195148e-06 }, { "epoch": 0.3031551198616031, "grad_norm": 3.04800232147694, "learning_rate": 9.836474315195148e-06, "loss": 0.2319, "step": 115, "ts_encoder_learning_rate": 9.832383759921415e-06 }, { "epoch": 0.30579125133866053, "grad_norm": 3.3376308967003245, "learning_rate": 9.832383759921415e-06, "loss": 0.2059, "step": 116, "ts_encoder_learning_rate": 9.828243544427795e-06 }, { "epoch": 0.3084273828157179, "grad_norm": 1.847624347245679, "learning_rate": 9.828243544427795e-06, "loss": 0.1637, "step": 117, "ts_encoder_learning_rate": 9.824053711261405e-06 }, { "epoch": 0.31106351429277534, "grad_norm": 6.259456880309696, "learning_rate": 9.824053711261405e-06, "loss": 0.2051, "step": 118, "ts_encoder_learning_rate": 9.819814303479268e-06 }, { "epoch": 0.3136996457698328, "grad_norm": 1.248955065574141, "learning_rate": 9.819814303479268e-06, "loss": 0.1729, "step": 119, "ts_encoder_learning_rate": 9.815525364647853e-06 }, { "epoch": 0.3163357772468902, "grad_norm": 1.9069742114108161, "learning_rate": 9.815525364647853e-06, "loss": 0.1725, "step": 120, "ts_encoder_learning_rate": 9.811186938842645e-06 }, { "epoch": 0.3189719087239476, "grad_norm": 2.578824804080835, "learning_rate": 9.811186938842645e-06, "loss": 0.2071, "step": 121, "ts_encoder_learning_rate": 9.80679907064768e-06 }, { "epoch": 0.32160804020100503, "grad_norm": 1.4884102177576088, "learning_rate": 9.80679907064768e-06, "loss": 0.1971, "step": 122, "ts_encoder_learning_rate": 9.802361805155097e-06 }, { "epoch": 0.32424417167806246, "grad_norm": 3.950574735875806, "learning_rate": 9.802361805155097e-06, "loss": 0.2118, "step": 123, "ts_encoder_learning_rate": 9.797875187964661e-06 }, { "epoch": 0.32688030315511984, "grad_norm": 2.0658586146827194, "learning_rate": 9.797875187964661e-06, "loss": 0.1905, "step": 124, "ts_encoder_learning_rate": 9.793339265183303e-06 }, { "epoch": 0.3295164346321773, "grad_norm": 1.7249946266097584, "learning_rate": 9.793339265183303e-06, "loss": 0.1573, "step": 125, "ts_encoder_learning_rate": 9.788754083424654e-06 }, { "epoch": 0.3321525661092347, "grad_norm": 2.2599848390315667, "learning_rate": 9.788754083424654e-06, "loss": 0.1879, "step": 126, "ts_encoder_learning_rate": 9.784119689808545e-06 }, { "epoch": 0.3347886975862921, "grad_norm": 1.8125389329195718, "learning_rate": 9.784119689808545e-06, "loss": 0.2048, "step": 127, "ts_encoder_learning_rate": 9.779436131960544e-06 }, { "epoch": 0.33742482906334953, "grad_norm": 2.506219649349012, "learning_rate": 9.779436131960544e-06, "loss": 0.1869, "step": 128, "ts_encoder_learning_rate": 9.774703458011453e-06 }, { "epoch": 0.34006096054040696, "grad_norm": 1.8024855840730984, "learning_rate": 9.774703458011453e-06, "loss": 0.1962, "step": 129, "ts_encoder_learning_rate": 9.76992171659682e-06 }, { "epoch": 0.34269709201746434, "grad_norm": 1.59370844293676, "learning_rate": 9.76992171659682e-06, "loss": 0.1886, "step": 130, "ts_encoder_learning_rate": 9.765090956856437e-06 }, { "epoch": 0.3453332234945218, "grad_norm": 1.6796351392079987, "learning_rate": 9.765090956856437e-06, "loss": 0.1734, "step": 131, "ts_encoder_learning_rate": 9.760211228433834e-06 }, { "epoch": 0.3479693549715792, "grad_norm": 1.8622570561386254, "learning_rate": 9.760211228433834e-06, "loss": 0.2003, "step": 132, "ts_encoder_learning_rate": 9.755282581475769e-06 }, { "epoch": 0.35060548644863665, "grad_norm": 4.261853496235677, "learning_rate": 9.755282581475769e-06, "loss": 0.2152, "step": 133, "ts_encoder_learning_rate": 9.750305066631717e-06 }, { "epoch": 0.35324161792569403, "grad_norm": 2.084316819171521, "learning_rate": 9.750305066631717e-06, "loss": 0.204, "step": 134, "ts_encoder_learning_rate": 9.745278735053345e-06 }, { "epoch": 0.35587774940275146, "grad_norm": 2.1355217159376125, "learning_rate": 9.745278735053345e-06, "loss": 0.1812, "step": 135, "ts_encoder_learning_rate": 9.740203638393984e-06 }, { "epoch": 0.3585138808798089, "grad_norm": 2.1726864130161485, "learning_rate": 9.740203638393984e-06, "loss": 0.1741, "step": 136, "ts_encoder_learning_rate": 9.735079828808107e-06 }, { "epoch": 0.3611500123568663, "grad_norm": 1.82669888695553, "learning_rate": 9.735079828808107e-06, "loss": 0.1772, "step": 137, "ts_encoder_learning_rate": 9.729907358950785e-06 }, { "epoch": 0.3637861438339237, "grad_norm": 2.0888172211110647, "learning_rate": 9.729907358950785e-06, "loss": 0.1747, "step": 138, "ts_encoder_learning_rate": 9.724686281977146e-06 }, { "epoch": 0.36642227531098115, "grad_norm": 1.7793028446193322, "learning_rate": 9.724686281977146e-06, "loss": 0.1645, "step": 139, "ts_encoder_learning_rate": 9.719416651541839e-06 }, { "epoch": 0.36905840678803853, "grad_norm": 3.1339804740693697, "learning_rate": 9.719416651541839e-06, "loss": 0.2205, "step": 140, "ts_encoder_learning_rate": 9.714098521798466e-06 }, { "epoch": 0.37169453826509596, "grad_norm": 1.922012523700954, "learning_rate": 9.714098521798466e-06, "loss": 0.1922, "step": 141, "ts_encoder_learning_rate": 9.708731947399039e-06 }, { "epoch": 0.3743306697421534, "grad_norm": 2.111535338552465, "learning_rate": 9.708731947399039e-06, "loss": 0.2084, "step": 142, "ts_encoder_learning_rate": 9.703316983493414e-06 }, { "epoch": 0.37696680121921083, "grad_norm": 1.6443259615131731, "learning_rate": 9.703316983493414e-06, "loss": 0.1644, "step": 143, "ts_encoder_learning_rate": 9.697853685728721e-06 }, { "epoch": 0.3796029326962682, "grad_norm": 1.0337440158621962, "learning_rate": 9.697853685728721e-06, "loss": 0.175, "step": 144, "ts_encoder_learning_rate": 9.692342110248802e-06 }, { "epoch": 0.38223906417332565, "grad_norm": 2.6059449586383447, "learning_rate": 9.692342110248802e-06, "loss": 0.1749, "step": 145, "ts_encoder_learning_rate": 9.686782313693622e-06 }, { "epoch": 0.3848751956503831, "grad_norm": 1.7541337816760245, "learning_rate": 9.686782313693622e-06, "loss": 0.1814, "step": 146, "ts_encoder_learning_rate": 9.681174353198687e-06 }, { "epoch": 0.38751132712744046, "grad_norm": 1.9857610217097408, "learning_rate": 9.681174353198687e-06, "loss": 0.1817, "step": 147, "ts_encoder_learning_rate": 9.675518286394474e-06 }, { "epoch": 0.3901474586044979, "grad_norm": 2.015451167688765, "learning_rate": 9.675518286394474e-06, "loss": 0.1853, "step": 148, "ts_encoder_learning_rate": 9.669814171405818e-06 }, { "epoch": 0.39278359008155533, "grad_norm": 1.417012874189727, "learning_rate": 9.669814171405818e-06, "loss": 0.2065, "step": 149, "ts_encoder_learning_rate": 9.664062066851325e-06 }, { "epoch": 0.3954197215586127, "grad_norm": 2.8203561728923776, "learning_rate": 9.664062066851325e-06, "loss": 0.1729, "step": 150, "ts_encoder_learning_rate": 9.658262031842772e-06 }, { "epoch": 0.39805585303567015, "grad_norm": 2.068280212416892, "learning_rate": 9.658262031842772e-06, "loss": 0.1885, "step": 151, "ts_encoder_learning_rate": 9.65241412598449e-06 }, { "epoch": 0.4006919845127276, "grad_norm": 2.6419154731801933, "learning_rate": 9.65241412598449e-06, "loss": 0.1845, "step": 152, "ts_encoder_learning_rate": 9.64651840937276e-06 }, { "epoch": 0.40332811598978496, "grad_norm": 3.9825926281731068, "learning_rate": 9.64651840937276e-06, "loss": 0.1829, "step": 153, "ts_encoder_learning_rate": 9.640574942595195e-06 }, { "epoch": 0.4059642474668424, "grad_norm": 2.57569222575164, "learning_rate": 9.640574942595195e-06, "loss": 0.1749, "step": 154, "ts_encoder_learning_rate": 9.63458378673011e-06 }, { "epoch": 0.40860037894389983, "grad_norm": 2.1803645371235696, "learning_rate": 9.63458378673011e-06, "loss": 0.1854, "step": 155, "ts_encoder_learning_rate": 9.6285450033459e-06 }, { "epoch": 0.41123651042095727, "grad_norm": 4.412857746362803, "learning_rate": 9.6285450033459e-06, "loss": 0.1844, "step": 156, "ts_encoder_learning_rate": 9.622458654500408e-06 }, { "epoch": 0.41387264189801465, "grad_norm": 3.9208819464546725, "learning_rate": 9.622458654500408e-06, "loss": 0.1843, "step": 157, "ts_encoder_learning_rate": 9.616324802740287e-06 }, { "epoch": 0.4165087733750721, "grad_norm": 2.423222471726984, "learning_rate": 9.616324802740287e-06, "loss": 0.1552, "step": 158, "ts_encoder_learning_rate": 9.610143511100354e-06 }, { "epoch": 0.4191449048521295, "grad_norm": 1.7446627651403062, "learning_rate": 9.610143511100354e-06, "loss": 0.1536, "step": 159, "ts_encoder_learning_rate": 9.603914843102941e-06 }, { "epoch": 0.4217810363291869, "grad_norm": 3.9104938056757, "learning_rate": 9.603914843102941e-06, "loss": 0.1688, "step": 160, "ts_encoder_learning_rate": 9.597638862757255e-06 }, { "epoch": 0.42441716780624433, "grad_norm": 3.0669664248193835, "learning_rate": 9.597638862757255e-06, "loss": 0.1623, "step": 161, "ts_encoder_learning_rate": 9.591315634558698e-06 }, { "epoch": 0.42705329928330177, "grad_norm": 2.2417095321151446, "learning_rate": 9.591315634558698e-06, "loss": 0.1769, "step": 162, "ts_encoder_learning_rate": 9.584945223488227e-06 }, { "epoch": 0.42968943076035915, "grad_norm": 1.3116635599548383, "learning_rate": 9.584945223488227e-06, "loss": 0.1338, "step": 163, "ts_encoder_learning_rate": 9.57852769501167e-06 }, { "epoch": 0.4323255622374166, "grad_norm": 2.6909253272106, "learning_rate": 9.57852769501167e-06, "loss": 0.1705, "step": 164, "ts_encoder_learning_rate": 9.572063115079063e-06 }, { "epoch": 0.434961693714474, "grad_norm": 2.4035516559911456, "learning_rate": 9.572063115079063e-06, "loss": 0.1507, "step": 165, "ts_encoder_learning_rate": 9.565551550123967e-06 }, { "epoch": 0.43759782519153145, "grad_norm": 1.698615887190314, "learning_rate": 9.565551550123967e-06, "loss": 0.1584, "step": 166, "ts_encoder_learning_rate": 9.558993067062785e-06 }, { "epoch": 0.44023395666858883, "grad_norm": 1.5658851097960265, "learning_rate": 9.558993067062785e-06, "loss": 0.1444, "step": 167, "ts_encoder_learning_rate": 9.552387733294081e-06 }, { "epoch": 0.44287008814564627, "grad_norm": 1.428302430233315, "learning_rate": 9.552387733294081e-06, "loss": 0.1392, "step": 168, "ts_encoder_learning_rate": 9.545735616697875e-06 }, { "epoch": 0.4455062196227037, "grad_norm": 1.8871291912109978, "learning_rate": 9.545735616697875e-06, "loss": 0.163, "step": 169, "ts_encoder_learning_rate": 9.539036785634961e-06 }, { "epoch": 0.4481423510997611, "grad_norm": 1.4930705735785357, "learning_rate": 9.539036785634961e-06, "loss": 0.1189, "step": 170, "ts_encoder_learning_rate": 9.532291308946191e-06 }, { "epoch": 0.4507784825768185, "grad_norm": 2.2958980821132733, "learning_rate": 9.532291308946191e-06, "loss": 0.1475, "step": 171, "ts_encoder_learning_rate": 9.525499255951775e-06 }, { "epoch": 0.45341461405387595, "grad_norm": 2.292110960348305, "learning_rate": 9.525499255951775e-06, "loss": 0.1464, "step": 172, "ts_encoder_learning_rate": 9.518660696450567e-06 }, { "epoch": 0.45605074553093333, "grad_norm": 1.5583683866727895, "learning_rate": 9.518660696450567e-06, "loss": 0.1637, "step": 173, "ts_encoder_learning_rate": 9.511775700719347e-06 }, { "epoch": 0.45868687700799077, "grad_norm": 2.293732030943085, "learning_rate": 9.511775700719347e-06, "loss": 0.1425, "step": 174, "ts_encoder_learning_rate": 9.504844339512096e-06 }, { "epoch": 0.4613230084850482, "grad_norm": 2.378677779357337, "learning_rate": 9.504844339512096e-06, "loss": 0.1706, "step": 175, "ts_encoder_learning_rate": 9.497866684059278e-06 }, { "epoch": 0.4639591399621056, "grad_norm": 1.545023475236903, "learning_rate": 9.497866684059278e-06, "loss": 0.139, "step": 176, "ts_encoder_learning_rate": 9.490842806067095e-06 }, { "epoch": 0.466595271439163, "grad_norm": 3.000973914853472, "learning_rate": 9.490842806067095e-06, "loss": 0.1596, "step": 177, "ts_encoder_learning_rate": 9.483772777716767e-06 }, { "epoch": 0.46923140291622045, "grad_norm": 3.558064259164343, "learning_rate": 9.483772777716767e-06, "loss": 0.1806, "step": 178, "ts_encoder_learning_rate": 9.476656671663766e-06 }, { "epoch": 0.4718675343932779, "grad_norm": 2.3771476346252287, "learning_rate": 9.476656671663766e-06, "loss": 0.1517, "step": 179, "ts_encoder_learning_rate": 9.469494561037097e-06 }, { "epoch": 0.47450366587033527, "grad_norm": 1.3953249248383899, "learning_rate": 9.469494561037097e-06, "loss": 0.1504, "step": 180, "ts_encoder_learning_rate": 9.462286519438531e-06 }, { "epoch": 0.4771397973473927, "grad_norm": 3.0894973811179613, "learning_rate": 9.462286519438531e-06, "loss": 0.1471, "step": 181, "ts_encoder_learning_rate": 9.45503262094184e-06 }, { "epoch": 0.47977592882445014, "grad_norm": 3.447964797634623, "learning_rate": 9.45503262094184e-06, "loss": 0.1729, "step": 182, "ts_encoder_learning_rate": 9.44773294009206e-06 }, { "epoch": 0.4824120603015075, "grad_norm": 1.3569301564256495, "learning_rate": 9.44773294009206e-06, "loss": 0.1377, "step": 183, "ts_encoder_learning_rate": 9.440387551904705e-06 }, { "epoch": 0.48504819177856495, "grad_norm": 1.299399066206849, "learning_rate": 9.440387551904705e-06, "loss": 0.1474, "step": 184, "ts_encoder_learning_rate": 9.432996531865001e-06 }, { "epoch": 0.4876843232556224, "grad_norm": 2.3518650616525925, "learning_rate": 9.432996531865001e-06, "loss": 0.1365, "step": 185, "ts_encoder_learning_rate": 9.425559955927118e-06 }, { "epoch": 0.49032045473267977, "grad_norm": 2.444972697601273, "learning_rate": 9.425559955927118e-06, "loss": 0.1771, "step": 186, "ts_encoder_learning_rate": 9.418077900513377e-06 }, { "epoch": 0.4929565862097372, "grad_norm": 1.7782371185002437, "learning_rate": 9.418077900513377e-06, "loss": 0.1373, "step": 187, "ts_encoder_learning_rate": 9.410550442513475e-06 }, { "epoch": 0.49559271768679464, "grad_norm": 1.6778989547499836, "learning_rate": 9.410550442513475e-06, "loss": 0.1565, "step": 188, "ts_encoder_learning_rate": 9.40297765928369e-06 }, { "epoch": 0.49822884916385207, "grad_norm": 2.155563042258836, "learning_rate": 9.40297765928369e-06, "loss": 0.153, "step": 189, "ts_encoder_learning_rate": 9.395359628646087e-06 }, { "epoch": 0.5008649806409095, "grad_norm": 1.295519636961886, "learning_rate": 9.395359628646087e-06, "loss": 0.129, "step": 190, "ts_encoder_learning_rate": 9.387696428887715e-06 }, { "epoch": 0.5035011121179669, "grad_norm": 1.9350288061910503, "learning_rate": 9.387696428887715e-06, "loss": 0.1397, "step": 191, "ts_encoder_learning_rate": 9.37998813875981e-06 }, { "epoch": 0.5061372435950243, "grad_norm": 1.4020631402932546, "learning_rate": 9.37998813875981e-06, "loss": 0.1496, "step": 192, "ts_encoder_learning_rate": 9.372234837476979e-06 }, { "epoch": 0.5087733750720818, "grad_norm": 1.6083327990489644, "learning_rate": 9.372234837476979e-06, "loss": 0.1435, "step": 193, "ts_encoder_learning_rate": 9.364436604716389e-06 }, { "epoch": 0.5114095065491392, "grad_norm": 1.6706875142270174, "learning_rate": 9.364436604716389e-06, "loss": 0.1465, "step": 194, "ts_encoder_learning_rate": 9.356593520616948e-06 }, { "epoch": 0.5140456380261965, "grad_norm": 1.2317832812802163, "learning_rate": 9.356593520616948e-06, "loss": 0.1242, "step": 195, "ts_encoder_learning_rate": 9.348705665778479e-06 }, { "epoch": 0.516681769503254, "grad_norm": 2.2546259482178415, "learning_rate": 9.348705665778479e-06, "loss": 0.1446, "step": 196, "ts_encoder_learning_rate": 9.340773121260893e-06 }, { "epoch": 0.5193179009803114, "grad_norm": 2.4160528757749202, "learning_rate": 9.340773121260893e-06, "loss": 0.1426, "step": 197, "ts_encoder_learning_rate": 9.33279596858336e-06 }, { "epoch": 0.5219540324573688, "grad_norm": 2.9782825880249475, "learning_rate": 9.33279596858336e-06, "loss": 0.1581, "step": 198, "ts_encoder_learning_rate": 9.324774289723469e-06 }, { "epoch": 0.5245901639344263, "grad_norm": 2.2270014484031537, "learning_rate": 9.324774289723469e-06, "loss": 0.1456, "step": 199, "ts_encoder_learning_rate": 9.316708167116377e-06 }, { "epoch": 0.5272262954114837, "grad_norm": 2.254839404791111, "learning_rate": 9.316708167116377e-06, "loss": 0.1719, "step": 200, "ts_encoder_learning_rate": 9.308597683653976e-06 }, { "epoch": 0.529862426888541, "grad_norm": 2.1147336782126906, "learning_rate": 9.308597683653976e-06, "loss": 0.1412, "step": 201, "ts_encoder_learning_rate": 9.300442922684033e-06 }, { "epoch": 0.5324985583655985, "grad_norm": 3.403863020435584, "learning_rate": 9.300442922684033e-06, "loss": 0.159, "step": 202, "ts_encoder_learning_rate": 9.292243968009332e-06 }, { "epoch": 0.5351346898426559, "grad_norm": 2.1840583753378704, "learning_rate": 9.292243968009332e-06, "loss": 0.153, "step": 203, "ts_encoder_learning_rate": 9.284000903886818e-06 }, { "epoch": 0.5377708213197133, "grad_norm": 2.756745654144269, "learning_rate": 9.284000903886818e-06, "loss": 0.1496, "step": 204, "ts_encoder_learning_rate": 9.275713815026732e-06 }, { "epoch": 0.5404069527967708, "grad_norm": 1.562920226633681, "learning_rate": 9.275713815026732e-06, "loss": 0.1444, "step": 205, "ts_encoder_learning_rate": 9.26738278659173e-06 }, { "epoch": 0.5430430842738282, "grad_norm": 1.8988550139932143, "learning_rate": 9.26738278659173e-06, "loss": 0.1327, "step": 206, "ts_encoder_learning_rate": 9.259007904196023e-06 }, { "epoch": 0.5456792157508856, "grad_norm": 1.804458613750819, "learning_rate": 9.259007904196023e-06, "loss": 0.1502, "step": 207, "ts_encoder_learning_rate": 9.250589253904481e-06 }, { "epoch": 0.548315347227943, "grad_norm": 1.8650637771648768, "learning_rate": 9.250589253904481e-06, "loss": 0.1473, "step": 208, "ts_encoder_learning_rate": 9.242126922231763e-06 }, { "epoch": 0.5509514787050004, "grad_norm": 2.051385105386284, "learning_rate": 9.242126922231763e-06, "loss": 0.1658, "step": 209, "ts_encoder_learning_rate": 9.233620996141421e-06 }, { "epoch": 0.5535876101820578, "grad_norm": 2.3089156973651463, "learning_rate": 9.233620996141421e-06, "loss": 0.145, "step": 210, "ts_encoder_learning_rate": 9.225071563045007e-06 }, { "epoch": 0.5562237416591153, "grad_norm": 2.3126324557088265, "learning_rate": 9.225071563045007e-06, "loss": 0.1503, "step": 211, "ts_encoder_learning_rate": 9.216478710801171e-06 }, { "epoch": 0.5588598731361727, "grad_norm": 2.0335782421296047, "learning_rate": 9.216478710801171e-06, "loss": 0.1254, "step": 212, "ts_encoder_learning_rate": 9.207842527714767e-06 }, { "epoch": 0.5614960046132301, "grad_norm": 2.959817313362635, "learning_rate": 9.207842527714767e-06, "loss": 0.1344, "step": 213, "ts_encoder_learning_rate": 9.199163102535937e-06 }, { "epoch": 0.5641321360902875, "grad_norm": 2.401709370382837, "learning_rate": 9.199163102535937e-06, "loss": 0.1235, "step": 214, "ts_encoder_learning_rate": 9.190440524459203e-06 }, { "epoch": 0.5667682675673449, "grad_norm": 2.4698482564816437, "learning_rate": 9.190440524459203e-06, "loss": 0.1652, "step": 215, "ts_encoder_learning_rate": 9.181674883122554e-06 }, { "epoch": 0.5694043990444023, "grad_norm": 2.174645988827366, "learning_rate": 9.181674883122554e-06, "loss": 0.1191, "step": 216, "ts_encoder_learning_rate": 9.172866268606514e-06 }, { "epoch": 0.5720405305214598, "grad_norm": 1.4935255107278584, "learning_rate": 9.172866268606514e-06, "loss": 0.1348, "step": 217, "ts_encoder_learning_rate": 9.164014771433228e-06 }, { "epoch": 0.5746766619985172, "grad_norm": 2.261714559414658, "learning_rate": 9.164014771433228e-06, "loss": 0.1393, "step": 218, "ts_encoder_learning_rate": 9.15512048256552e-06 }, { "epoch": 0.5773127934755746, "grad_norm": 1.9389125772079525, "learning_rate": 9.15512048256552e-06, "loss": 0.1159, "step": 219, "ts_encoder_learning_rate": 9.146183493405976e-06 }, { "epoch": 0.5799489249526321, "grad_norm": 2.2307885757186376, "learning_rate": 9.146183493405976e-06, "loss": 0.1197, "step": 220, "ts_encoder_learning_rate": 9.137203895795983e-06 }, { "epoch": 0.5825850564296894, "grad_norm": 2.2631154256287784, "learning_rate": 9.137203895795983e-06, "loss": 0.1297, "step": 221, "ts_encoder_learning_rate": 9.128181782014801e-06 }, { "epoch": 0.5852211879067468, "grad_norm": 1.8377593809270902, "learning_rate": 9.128181782014801e-06, "loss": 0.1375, "step": 222, "ts_encoder_learning_rate": 9.119117244778609e-06 }, { "epoch": 0.5878573193838043, "grad_norm": 1.941575989624506, "learning_rate": 9.119117244778609e-06, "loss": 0.1381, "step": 223, "ts_encoder_learning_rate": 9.110010377239552e-06 }, { "epoch": 0.5904934508608617, "grad_norm": 2.0700642084907797, "learning_rate": 9.110010377239552e-06, "loss": 0.1215, "step": 224, "ts_encoder_learning_rate": 9.10086127298478e-06 }, { "epoch": 0.5931295823379191, "grad_norm": 2.175877796189538, "learning_rate": 9.10086127298478e-06, "loss": 0.1274, "step": 225, "ts_encoder_learning_rate": 9.0916700260355e-06 }, { "epoch": 0.5957657138149766, "grad_norm": 2.7088680551627444, "learning_rate": 9.0916700260355e-06, "loss": 0.1546, "step": 226, "ts_encoder_learning_rate": 9.082436730845993e-06 }, { "epoch": 0.5984018452920339, "grad_norm": 2.2127473631725634, "learning_rate": 9.082436730845993e-06, "loss": 0.1456, "step": 227, "ts_encoder_learning_rate": 9.073161482302656e-06 }, { "epoch": 0.6010379767690913, "grad_norm": 2.2852943362812947, "learning_rate": 9.073161482302656e-06, "loss": 0.1133, "step": 228, "ts_encoder_learning_rate": 9.063844375723014e-06 }, { "epoch": 0.6036741082461488, "grad_norm": 2.022226024724427, "learning_rate": 9.063844375723014e-06, "loss": 0.147, "step": 229, "ts_encoder_learning_rate": 9.054485506854756e-06 }, { "epoch": 0.6063102397232062, "grad_norm": 1.4127928730528008, "learning_rate": 9.054485506854756e-06, "loss": 0.1399, "step": 230, "ts_encoder_learning_rate": 9.045084971874738e-06 }, { "epoch": 0.6089463712002636, "grad_norm": 2.3124067231050796, "learning_rate": 9.045084971874738e-06, "loss": 0.166, "step": 231, "ts_encoder_learning_rate": 9.035642867388003e-06 }, { "epoch": 0.6115825026773211, "grad_norm": 2.9417960489910557, "learning_rate": 9.035642867388003e-06, "loss": 0.1336, "step": 232, "ts_encoder_learning_rate": 9.026159290426782e-06 }, { "epoch": 0.6142186341543785, "grad_norm": 2.037375696729251, "learning_rate": 9.026159290426782e-06, "loss": 0.1226, "step": 233, "ts_encoder_learning_rate": 9.016634338449504e-06 }, { "epoch": 0.6168547656314358, "grad_norm": 2.7178140969916478, "learning_rate": 9.016634338449504e-06, "loss": 0.1405, "step": 234, "ts_encoder_learning_rate": 9.007068109339783e-06 }, { "epoch": 0.6194908971084933, "grad_norm": 2.4572769251627804, "learning_rate": 9.007068109339783e-06, "loss": 0.1296, "step": 235, "ts_encoder_learning_rate": 8.997460701405431e-06 }, { "epoch": 0.6221270285855507, "grad_norm": 1.7074800943778181, "learning_rate": 8.997460701405431e-06, "loss": 0.1076, "step": 236, "ts_encoder_learning_rate": 8.987812213377423e-06 }, { "epoch": 0.6247631600626081, "grad_norm": 1.3569057680195367, "learning_rate": 8.987812213377423e-06, "loss": 0.1307, "step": 237, "ts_encoder_learning_rate": 8.978122744408905e-06 }, { "epoch": 0.6273992915396656, "grad_norm": 2.2197611340479333, "learning_rate": 8.978122744408905e-06, "loss": 0.1255, "step": 238, "ts_encoder_learning_rate": 8.968392394074164e-06 }, { "epoch": 0.630035423016723, "grad_norm": 2.80909657385505, "learning_rate": 8.968392394074164e-06, "loss": 0.1311, "step": 239, "ts_encoder_learning_rate": 8.9586212623676e-06 }, { "epoch": 0.6326715544937804, "grad_norm": 1.596591103789994, "learning_rate": 8.9586212623676e-06, "loss": 0.1247, "step": 240, "ts_encoder_learning_rate": 8.948809449702712e-06 }, { "epoch": 0.6353076859708378, "grad_norm": 2.255624101113016, "learning_rate": 8.948809449702712e-06, "loss": 0.1125, "step": 241, "ts_encoder_learning_rate": 8.938957056911057e-06 }, { "epoch": 0.6379438174478952, "grad_norm": 1.4404926531170739, "learning_rate": 8.938957056911057e-06, "loss": 0.1156, "step": 242, "ts_encoder_learning_rate": 8.929064185241214e-06 }, { "epoch": 0.6405799489249526, "grad_norm": 2.0628024401597465, "learning_rate": 8.929064185241214e-06, "loss": 0.137, "step": 243, "ts_encoder_learning_rate": 8.919130936357743e-06 }, { "epoch": 0.6432160804020101, "grad_norm": 1.7372644218298394, "learning_rate": 8.919130936357743e-06, "loss": 0.111, "step": 244, "ts_encoder_learning_rate": 8.90915741234015e-06 }, { "epoch": 0.6458522118790675, "grad_norm": 1.627934422877123, "learning_rate": 8.90915741234015e-06, "loss": 0.1334, "step": 245, "ts_encoder_learning_rate": 8.899143715681822e-06 }, { "epoch": 0.6484883433561249, "grad_norm": 1.8556278497877248, "learning_rate": 8.899143715681822e-06, "loss": 0.1219, "step": 246, "ts_encoder_learning_rate": 8.889089949288986e-06 }, { "epoch": 0.6511244748331823, "grad_norm": 3.1526166628811603, "learning_rate": 8.889089949288986e-06, "loss": 0.0894, "step": 247, "ts_encoder_learning_rate": 8.878996216479651e-06 }, { "epoch": 0.6537606063102397, "grad_norm": 2.213556128570375, "learning_rate": 8.878996216479651e-06, "loss": 0.1277, "step": 248, "ts_encoder_learning_rate": 8.868862620982534e-06 }, { "epoch": 0.6563967377872971, "grad_norm": 4.4965782913660854, "learning_rate": 8.868862620982534e-06, "loss": 0.1258, "step": 249, "ts_encoder_learning_rate": 8.85868926693601e-06 }, { "epoch": 0.6590328692643546, "grad_norm": 2.2567027094465573, "learning_rate": 8.85868926693601e-06, "loss": 0.1313, "step": 250, "ts_encoder_learning_rate": 8.84847625888703e-06 }, { "epoch": 0.661669000741412, "grad_norm": 2.865960904272953, "learning_rate": 8.84847625888703e-06, "loss": 0.1255, "step": 251, "ts_encoder_learning_rate": 8.838223701790057e-06 }, { "epoch": 0.6643051322184694, "grad_norm": 1.698484873506614, "learning_rate": 8.838223701790057e-06, "loss": 0.1277, "step": 252, "ts_encoder_learning_rate": 8.827931701005974e-06 }, { "epoch": 0.6669412636955269, "grad_norm": 1.7108540523901774, "learning_rate": 8.827931701005974e-06, "loss": 0.1238, "step": 253, "ts_encoder_learning_rate": 8.817600362301018e-06 }, { "epoch": 0.6695773951725842, "grad_norm": 2.574763087212977, "learning_rate": 8.817600362301018e-06, "loss": 0.119, "step": 254, "ts_encoder_learning_rate": 8.807229791845673e-06 }, { "epoch": 0.6722135266496416, "grad_norm": 1.76981658030345, "learning_rate": 8.807229791845673e-06, "loss": 0.1162, "step": 255, "ts_encoder_learning_rate": 8.7968200962136e-06 }, { "epoch": 0.6748496581266991, "grad_norm": 2.3402398000656532, "learning_rate": 8.7968200962136e-06, "loss": 0.1082, "step": 256, "ts_encoder_learning_rate": 8.786371382380527e-06 }, { "epoch": 0.6774857896037565, "grad_norm": 3.333892645282909, "learning_rate": 8.786371382380527e-06, "loss": 0.1226, "step": 257, "ts_encoder_learning_rate": 8.775883757723156e-06 }, { "epoch": 0.6801219210808139, "grad_norm": 2.0605502821038626, "learning_rate": 8.775883757723156e-06, "loss": 0.1278, "step": 258, "ts_encoder_learning_rate": 8.765357330018056e-06 }, { "epoch": 0.6827580525578714, "grad_norm": 1.8198127860386084, "learning_rate": 8.765357330018056e-06, "loss": 0.1104, "step": 259, "ts_encoder_learning_rate": 8.754792207440557e-06 }, { "epoch": 0.6853941840349287, "grad_norm": 1.7852327454966495, "learning_rate": 8.754792207440557e-06, "loss": 0.1257, "step": 260, "ts_encoder_learning_rate": 8.74418849856364e-06 }, { "epoch": 0.6880303155119861, "grad_norm": 2.3856839525596043, "learning_rate": 8.74418849856364e-06, "loss": 0.1232, "step": 261, "ts_encoder_learning_rate": 8.733546312356826e-06 }, { "epoch": 0.6906664469890436, "grad_norm": 2.0514309990696, "learning_rate": 8.733546312356826e-06, "loss": 0.106, "step": 262, "ts_encoder_learning_rate": 8.722865758185036e-06 }, { "epoch": 0.693302578466101, "grad_norm": 1.8497678205013666, "learning_rate": 8.722865758185036e-06, "loss": 0.0925, "step": 263, "ts_encoder_learning_rate": 8.712146945807494e-06 }, { "epoch": 0.6959387099431584, "grad_norm": 2.325593177065593, "learning_rate": 8.712146945807494e-06, "loss": 0.1151, "step": 264, "ts_encoder_learning_rate": 8.701389985376578e-06 }, { "epoch": 0.6985748414202159, "grad_norm": 2.4024115509182544, "learning_rate": 8.701389985376578e-06, "loss": 0.1351, "step": 265, "ts_encoder_learning_rate": 8.690594987436705e-06 }, { "epoch": 0.7012109728972733, "grad_norm": 2.404989824928956, "learning_rate": 8.690594987436705e-06, "loss": 0.1183, "step": 266, "ts_encoder_learning_rate": 8.679762062923176e-06 }, { "epoch": 0.7038471043743306, "grad_norm": 2.4255255570984904, "learning_rate": 8.679762062923176e-06, "loss": 0.1175, "step": 267, "ts_encoder_learning_rate": 8.668891323161053e-06 }, { "epoch": 0.7064832358513881, "grad_norm": 2.063468492850095, "learning_rate": 8.668891323161053e-06, "loss": 0.1136, "step": 268, "ts_encoder_learning_rate": 8.657982879864007e-06 }, { "epoch": 0.7091193673284455, "grad_norm": 2.454812018935756, "learning_rate": 8.657982879864007e-06, "loss": 0.1184, "step": 269, "ts_encoder_learning_rate": 8.647036845133171e-06 }, { "epoch": 0.7117554988055029, "grad_norm": 2.389510429853746, "learning_rate": 8.647036845133171e-06, "loss": 0.1109, "step": 270, "ts_encoder_learning_rate": 8.636053331455986e-06 }, { "epoch": 0.7143916302825604, "grad_norm": 2.534713674498363, "learning_rate": 8.636053331455986e-06, "loss": 0.1245, "step": 271, "ts_encoder_learning_rate": 8.625032451705053e-06 }, { "epoch": 0.7170277617596178, "grad_norm": 1.8778540259505276, "learning_rate": 8.625032451705053e-06, "loss": 0.1116, "step": 272, "ts_encoder_learning_rate": 8.613974319136959e-06 }, { "epoch": 0.7196638932366752, "grad_norm": 2.1437242323626053, "learning_rate": 8.613974319136959e-06, "loss": 0.1093, "step": 273, "ts_encoder_learning_rate": 8.602879047391127e-06 }, { "epoch": 0.7223000247137326, "grad_norm": 1.841731554668583, "learning_rate": 8.602879047391127e-06, "loss": 0.1155, "step": 274, "ts_encoder_learning_rate": 8.591746750488639e-06 }, { "epoch": 0.72493615619079, "grad_norm": 1.7263269186243153, "learning_rate": 8.591746750488639e-06, "loss": 0.1106, "step": 275, "ts_encoder_learning_rate": 8.580577542831072e-06 }, { "epoch": 0.7275722876678474, "grad_norm": 2.1861381161848144, "learning_rate": 8.580577542831072e-06, "loss": 0.1069, "step": 276, "ts_encoder_learning_rate": 8.569371539199316e-06 }, { "epoch": 0.7302084191449049, "grad_norm": 1.83254402858276, "learning_rate": 8.569371539199316e-06, "loss": 0.1062, "step": 277, "ts_encoder_learning_rate": 8.558128854752397e-06 }, { "epoch": 0.7328445506219623, "grad_norm": 1.5361832609660626, "learning_rate": 8.558128854752397e-06, "loss": 0.0958, "step": 278, "ts_encoder_learning_rate": 8.54684960502629e-06 }, { "epoch": 0.7354806820990197, "grad_norm": 1.6977272803107797, "learning_rate": 8.54684960502629e-06, "loss": 0.0986, "step": 279, "ts_encoder_learning_rate": 8.535533905932739e-06 }, { "epoch": 0.7381168135760771, "grad_norm": 1.805089674746036, "learning_rate": 8.535533905932739e-06, "loss": 0.0959, "step": 280, "ts_encoder_learning_rate": 8.52418187375806e-06 }, { "epoch": 0.7407529450531345, "grad_norm": 2.4198941080590153, "learning_rate": 8.52418187375806e-06, "loss": 0.1202, "step": 281, "ts_encoder_learning_rate": 8.512793625161947e-06 }, { "epoch": 0.7433890765301919, "grad_norm": 1.9365613559437527, "learning_rate": 8.512793625161947e-06, "loss": 0.1251, "step": 282, "ts_encoder_learning_rate": 8.501369277176275e-06 }, { "epoch": 0.7460252080072494, "grad_norm": 2.6757940840346475, "learning_rate": 8.501369277176275e-06, "loss": 0.1167, "step": 283, "ts_encoder_learning_rate": 8.489908947203897e-06 }, { "epoch": 0.7486613394843068, "grad_norm": 1.3212164606431878, "learning_rate": 8.489908947203897e-06, "loss": 0.0967, "step": 284, "ts_encoder_learning_rate": 8.478412753017433e-06 }, { "epoch": 0.7512974709613642, "grad_norm": 2.3824932932765255, "learning_rate": 8.478412753017433e-06, "loss": 0.1211, "step": 285, "ts_encoder_learning_rate": 8.466880812758064e-06 }, { "epoch": 0.7539336024384217, "grad_norm": 1.8845886753934002, "learning_rate": 8.466880812758064e-06, "loss": 0.1156, "step": 286, "ts_encoder_learning_rate": 8.455313244934324e-06 }, { "epoch": 0.756569733915479, "grad_norm": 1.9404694724173959, "learning_rate": 8.455313244934324e-06, "loss": 0.0977, "step": 287, "ts_encoder_learning_rate": 8.443710168420866e-06 }, { "epoch": 0.7592058653925364, "grad_norm": 2.3462365486027097, "learning_rate": 8.443710168420866e-06, "loss": 0.105, "step": 288, "ts_encoder_learning_rate": 8.432071702457253e-06 }, { "epoch": 0.7618419968695939, "grad_norm": 2.228457755561138, "learning_rate": 8.432071702457253e-06, "loss": 0.1221, "step": 289, "ts_encoder_learning_rate": 8.420397966646732e-06 }, { "epoch": 0.7644781283466513, "grad_norm": 1.5579438707012758, "learning_rate": 8.420397966646732e-06, "loss": 0.0959, "step": 290, "ts_encoder_learning_rate": 8.408689080954997e-06 }, { "epoch": 0.7671142598237087, "grad_norm": 2.5815854965292258, "learning_rate": 8.408689080954997e-06, "loss": 0.1037, "step": 291, "ts_encoder_learning_rate": 8.396945165708971e-06 }, { "epoch": 0.7697503913007662, "grad_norm": 1.9767105091219221, "learning_rate": 8.396945165708971e-06, "loss": 0.1083, "step": 292, "ts_encoder_learning_rate": 8.38516634159555e-06 }, { "epoch": 0.7723865227778235, "grad_norm": 2.164960614575108, "learning_rate": 8.38516634159555e-06, "loss": 0.1092, "step": 293, "ts_encoder_learning_rate": 8.373352729660373e-06 }, { "epoch": 0.7750226542548809, "grad_norm": 2.426995380324648, "learning_rate": 8.373352729660373e-06, "loss": 0.0968, "step": 294, "ts_encoder_learning_rate": 8.361504451306585e-06 }, { "epoch": 0.7776587857319384, "grad_norm": 2.2037799475117765, "learning_rate": 8.361504451306585e-06, "loss": 0.1216, "step": 295, "ts_encoder_learning_rate": 8.349621628293578e-06 }, { "epoch": 0.7802949172089958, "grad_norm": 1.5101855867509804, "learning_rate": 8.349621628293578e-06, "loss": 0.0865, "step": 296, "ts_encoder_learning_rate": 8.337704382735741e-06 }, { "epoch": 0.7829310486860532, "grad_norm": 1.8304482678186873, "learning_rate": 8.337704382735741e-06, "loss": 0.0907, "step": 297, "ts_encoder_learning_rate": 8.325752837101213e-06 }, { "epoch": 0.7855671801631107, "grad_norm": 1.3186981403774303, "learning_rate": 8.325752837101213e-06, "loss": 0.0953, "step": 298, "ts_encoder_learning_rate": 8.313767114210615e-06 }, { "epoch": 0.7882033116401681, "grad_norm": 1.6252129812087586, "learning_rate": 8.313767114210615e-06, "loss": 0.0963, "step": 299, "ts_encoder_learning_rate": 8.301747337235798e-06 }, { "epoch": 0.7908394431172254, "grad_norm": 2.5667790327516604, "learning_rate": 8.301747337235798e-06, "loss": 0.1127, "step": 300, "ts_encoder_learning_rate": 8.289693629698564e-06 }, { "epoch": 0.7934755745942829, "grad_norm": 1.3839390646179972, "learning_rate": 8.289693629698564e-06, "loss": 0.0959, "step": 301, "ts_encoder_learning_rate": 8.27760611546941e-06 }, { "epoch": 0.7961117060713403, "grad_norm": 1.619204129359444, "learning_rate": 8.27760611546941e-06, "loss": 0.0941, "step": 302, "ts_encoder_learning_rate": 8.265484918766243e-06 }, { "epoch": 0.7987478375483977, "grad_norm": 13.33516060264948, "learning_rate": 8.265484918766243e-06, "loss": 0.1133, "step": 303, "ts_encoder_learning_rate": 8.253330164153118e-06 }, { "epoch": 0.8013839690254552, "grad_norm": 2.1482523103471736, "learning_rate": 8.253330164153118e-06, "loss": 0.0951, "step": 304, "ts_encoder_learning_rate": 8.241141976538944e-06 }, { "epoch": 0.8040201005025126, "grad_norm": 1.765409769976939, "learning_rate": 8.241141976538944e-06, "loss": 0.0999, "step": 305, "ts_encoder_learning_rate": 8.228920481176202e-06 }, { "epoch": 0.8066562319795699, "grad_norm": 1.6926228572462632, "learning_rate": 8.228920481176202e-06, "loss": 0.0971, "step": 306, "ts_encoder_learning_rate": 8.216665803659671e-06 }, { "epoch": 0.8092923634566274, "grad_norm": 2.2379314054489203, "learning_rate": 8.216665803659671e-06, "loss": 0.0892, "step": 307, "ts_encoder_learning_rate": 8.204378069925121e-06 }, { "epoch": 0.8119284949336848, "grad_norm": 2.3735432898917552, "learning_rate": 8.204378069925121e-06, "loss": 0.1129, "step": 308, "ts_encoder_learning_rate": 8.192057406248028e-06 }, { "epoch": 0.8145646264107422, "grad_norm": 1.9923807546881467, "learning_rate": 8.192057406248028e-06, "loss": 0.1031, "step": 309, "ts_encoder_learning_rate": 8.179703939242276e-06 }, { "epoch": 0.8172007578877997, "grad_norm": 1.469953484901779, "learning_rate": 8.179703939242276e-06, "loss": 0.0928, "step": 310, "ts_encoder_learning_rate": 8.16731779585885e-06 }, { "epoch": 0.8198368893648571, "grad_norm": 1.6845119035497684, "learning_rate": 8.16731779585885e-06, "loss": 0.1036, "step": 311, "ts_encoder_learning_rate": 8.154899103384536e-06 }, { "epoch": 0.8224730208419145, "grad_norm": 2.997454352710499, "learning_rate": 8.154899103384536e-06, "loss": 0.0915, "step": 312, "ts_encoder_learning_rate": 8.142447989440618e-06 }, { "epoch": 0.8251091523189719, "grad_norm": 1.7182074416814566, "learning_rate": 8.142447989440618e-06, "loss": 0.1003, "step": 313, "ts_encoder_learning_rate": 8.129964581981554e-06 }, { "epoch": 0.8277452837960293, "grad_norm": 1.9353741860227904, "learning_rate": 8.129964581981554e-06, "loss": 0.0898, "step": 314, "ts_encoder_learning_rate": 8.117449009293668e-06 }, { "epoch": 0.8303814152730867, "grad_norm": 1.5652582875371663, "learning_rate": 8.117449009293668e-06, "loss": 0.0935, "step": 315, "ts_encoder_learning_rate": 8.104901399993837e-06 }, { "epoch": 0.8330175467501442, "grad_norm": 1.1621735202700743, "learning_rate": 8.104901399993837e-06, "loss": 0.0996, "step": 316, "ts_encoder_learning_rate": 8.092321883028157e-06 }, { "epoch": 0.8356536782272016, "grad_norm": 1.6764936566754018, "learning_rate": 8.092321883028157e-06, "loss": 0.1089, "step": 317, "ts_encoder_learning_rate": 8.079710587670633e-06 }, { "epoch": 0.838289809704259, "grad_norm": 2.0164848493032808, "learning_rate": 8.079710587670633e-06, "loss": 0.0996, "step": 318, "ts_encoder_learning_rate": 8.067067643521834e-06 }, { "epoch": 0.8409259411813165, "grad_norm": 1.6759859565355775, "learning_rate": 8.067067643521834e-06, "loss": 0.1023, "step": 319, "ts_encoder_learning_rate": 8.054393180507572e-06 }, { "epoch": 0.8435620726583738, "grad_norm": 1.416243499558276, "learning_rate": 8.054393180507572e-06, "loss": 0.093, "step": 320, "ts_encoder_learning_rate": 8.041687328877566e-06 }, { "epoch": 0.8461982041354312, "grad_norm": 2.18872702162099, "learning_rate": 8.041687328877566e-06, "loss": 0.0981, "step": 321, "ts_encoder_learning_rate": 8.0289502192041e-06 }, { "epoch": 0.8488343356124887, "grad_norm": 1.3033768655546698, "learning_rate": 8.0289502192041e-06, "loss": 0.0839, "step": 322, "ts_encoder_learning_rate": 8.016181982380682e-06 }, { "epoch": 0.8514704670895461, "grad_norm": 2.0867250492468266, "learning_rate": 8.016181982380682e-06, "loss": 0.1059, "step": 323, "ts_encoder_learning_rate": 8.003382749620704e-06 }, { "epoch": 0.8541065985666035, "grad_norm": 1.488852214231389, "learning_rate": 8.003382749620704e-06, "loss": 0.0792, "step": 324, "ts_encoder_learning_rate": 7.99055265245608e-06 }, { "epoch": 0.856742730043661, "grad_norm": 1.6343632839095115, "learning_rate": 7.99055265245608e-06, "loss": 0.0969, "step": 325, "ts_encoder_learning_rate": 7.977691822735914e-06 }, { "epoch": 0.8593788615207183, "grad_norm": 1.7752008703715696, "learning_rate": 7.977691822735914e-06, "loss": 0.1092, "step": 326, "ts_encoder_learning_rate": 7.96480039262513e-06 }, { "epoch": 0.8620149929977757, "grad_norm": 1.5781012521986801, "learning_rate": 7.96480039262513e-06, "loss": 0.0912, "step": 327, "ts_encoder_learning_rate": 7.951878494603116e-06 }, { "epoch": 0.8646511244748332, "grad_norm": 1.7441562007191758, "learning_rate": 7.951878494603116e-06, "loss": 0.0888, "step": 328, "ts_encoder_learning_rate": 7.938926261462366e-06 }, { "epoch": 0.8672872559518906, "grad_norm": 1.8345267313412936, "learning_rate": 7.938926261462366e-06, "loss": 0.0835, "step": 329, "ts_encoder_learning_rate": 7.925943826307119e-06 }, { "epoch": 0.869923387428948, "grad_norm": 1.9237226057153634, "learning_rate": 7.925943826307119e-06, "loss": 0.1003, "step": 330, "ts_encoder_learning_rate": 7.912931322551981e-06 }, { "epoch": 0.8725595189060055, "grad_norm": 2.0786928587458235, "learning_rate": 7.912931322551981e-06, "loss": 0.0977, "step": 331, "ts_encoder_learning_rate": 7.89988888392056e-06 }, { "epoch": 0.8751956503830629, "grad_norm": 1.137765624411406, "learning_rate": 7.89988888392056e-06, "loss": 0.0882, "step": 332, "ts_encoder_learning_rate": 7.886816644444099e-06 }, { "epoch": 0.8778317818601202, "grad_norm": 1.5400536066702828, "learning_rate": 7.886816644444099e-06, "loss": 0.0899, "step": 333, "ts_encoder_learning_rate": 7.873714738460075e-06 }, { "epoch": 0.8804679133371777, "grad_norm": 1.9149603930639032, "learning_rate": 7.873714738460075e-06, "loss": 0.086, "step": 334, "ts_encoder_learning_rate": 7.860583300610849e-06 }, { "epoch": 0.8831040448142351, "grad_norm": 1.845652808068975, "learning_rate": 7.860583300610849e-06, "loss": 0.1191, "step": 335, "ts_encoder_learning_rate": 7.84742246584226e-06 }, { "epoch": 0.8857401762912925, "grad_norm": 1.539167450730259, "learning_rate": 7.84742246584226e-06, "loss": 0.1018, "step": 336, "ts_encoder_learning_rate": 7.83423236940225e-06 }, { "epoch": 0.88837630776835, "grad_norm": 2.7054499134952303, "learning_rate": 7.83423236940225e-06, "loss": 0.0787, "step": 337, "ts_encoder_learning_rate": 7.821013146839467e-06 }, { "epoch": 0.8910124392454074, "grad_norm": 2.3960105755739356, "learning_rate": 7.821013146839467e-06, "loss": 0.0998, "step": 338, "ts_encoder_learning_rate": 7.807764934001875e-06 }, { "epoch": 0.8936485707224647, "grad_norm": 1.9114513463572482, "learning_rate": 7.807764934001875e-06, "loss": 0.0955, "step": 339, "ts_encoder_learning_rate": 7.794487867035358e-06 }, { "epoch": 0.8962847021995222, "grad_norm": 1.1940763189233343, "learning_rate": 7.794487867035358e-06, "loss": 0.0978, "step": 340, "ts_encoder_learning_rate": 7.781182082382325e-06 }, { "epoch": 0.8989208336765796, "grad_norm": 1.6611445100384334, "learning_rate": 7.781182082382325e-06, "loss": 0.0945, "step": 341, "ts_encoder_learning_rate": 7.767847716780297e-06 }, { "epoch": 0.901556965153637, "grad_norm": 1.925999851813011, "learning_rate": 7.767847716780297e-06, "loss": 0.1061, "step": 342, "ts_encoder_learning_rate": 7.754484907260513e-06 }, { "epoch": 0.9041930966306945, "grad_norm": 1.3700140058079462, "learning_rate": 7.754484907260513e-06, "loss": 0.0816, "step": 343, "ts_encoder_learning_rate": 7.741093791146517e-06 }, { "epoch": 0.9068292281077519, "grad_norm": 2.1966063055592366, "learning_rate": 7.741093791146517e-06, "loss": 0.1056, "step": 344, "ts_encoder_learning_rate": 7.727674506052744e-06 }, { "epoch": 0.9094653595848093, "grad_norm": 1.7516803825411136, "learning_rate": 7.727674506052744e-06, "loss": 0.0933, "step": 345, "ts_encoder_learning_rate": 7.714227189883112e-06 }, { "epoch": 0.9121014910618667, "grad_norm": 2.2157367065223297, "learning_rate": 7.714227189883112e-06, "loss": 0.0918, "step": 346, "ts_encoder_learning_rate": 7.700751980829601e-06 }, { "epoch": 0.9147376225389241, "grad_norm": 2.20733086616214, "learning_rate": 7.700751980829601e-06, "loss": 0.0975, "step": 347, "ts_encoder_learning_rate": 7.687249017370832e-06 }, { "epoch": 0.9173737540159815, "grad_norm": 2.084116228493792, "learning_rate": 7.687249017370832e-06, "loss": 0.0992, "step": 348, "ts_encoder_learning_rate": 7.673718438270649e-06 }, { "epoch": 0.920009885493039, "grad_norm": 2.084868196282067, "learning_rate": 7.673718438270649e-06, "loss": 0.0891, "step": 349, "ts_encoder_learning_rate": 7.660160382576683e-06 }, { "epoch": 0.9226460169700964, "grad_norm": 1.8080308541934171, "learning_rate": 7.660160382576683e-06, "loss": 0.0891, "step": 350, "ts_encoder_learning_rate": 7.646574989618938e-06 }, { "epoch": 0.9252821484471538, "grad_norm": 2.2542071099863397, "learning_rate": 7.646574989618938e-06, "loss": 0.0866, "step": 351, "ts_encoder_learning_rate": 7.632962399008342e-06 }, { "epoch": 0.9279182799242112, "grad_norm": 2.4095949616333283, "learning_rate": 7.632962399008342e-06, "loss": 0.0945, "step": 352, "ts_encoder_learning_rate": 7.619322750635327e-06 }, { "epoch": 0.9305544114012686, "grad_norm": 2.0875916761878934, "learning_rate": 7.619322750635327e-06, "loss": 0.0865, "step": 353, "ts_encoder_learning_rate": 7.605656184668385e-06 }, { "epoch": 0.933190542878326, "grad_norm": 1.7064657643571555, "learning_rate": 7.605656184668385e-06, "loss": 0.0933, "step": 354, "ts_encoder_learning_rate": 7.591962841552627e-06 }, { "epoch": 0.9358266743553835, "grad_norm": 2.500359725738562, "learning_rate": 7.591962841552627e-06, "loss": 0.0872, "step": 355, "ts_encoder_learning_rate": 7.578242862008336e-06 }, { "epoch": 0.9384628058324409, "grad_norm": 1.384078600663941, "learning_rate": 7.578242862008336e-06, "loss": 0.0702, "step": 356, "ts_encoder_learning_rate": 7.564496387029532e-06 }, { "epoch": 0.9410989373094983, "grad_norm": 1.5245598469180928, "learning_rate": 7.564496387029532e-06, "loss": 0.0801, "step": 357, "ts_encoder_learning_rate": 7.550723557882514e-06 }, { "epoch": 0.9437350687865558, "grad_norm": 2.045161639456311, "learning_rate": 7.550723557882514e-06, "loss": 0.0937, "step": 358, "ts_encoder_learning_rate": 7.536924516104411e-06 }, { "epoch": 0.9463712002636131, "grad_norm": 2.5839255461657027, "learning_rate": 7.536924516104411e-06, "loss": 0.0942, "step": 359, "ts_encoder_learning_rate": 7.52309940350173e-06 }, { "epoch": 0.9490073317406705, "grad_norm": 3.2274496289419212, "learning_rate": 7.52309940350173e-06, "loss": 0.1023, "step": 360, "ts_encoder_learning_rate": 7.509248362148889e-06 }, { "epoch": 0.951643463217728, "grad_norm": 1.4645186215038737, "learning_rate": 7.509248362148889e-06, "loss": 0.0889, "step": 361, "ts_encoder_learning_rate": 7.49537153438677e-06 }, { "epoch": 0.9542795946947854, "grad_norm": 3.4885631800713903, "learning_rate": 7.49537153438677e-06, "loss": 0.1, "step": 362, "ts_encoder_learning_rate": 7.481469062821252e-06 }, { "epoch": 0.9569157261718428, "grad_norm": 1.6689194932402402, "learning_rate": 7.481469062821252e-06, "loss": 0.0909, "step": 363, "ts_encoder_learning_rate": 7.467541090321735e-06 }, { "epoch": 0.9595518576489003, "grad_norm": 1.3923817479195213, "learning_rate": 7.467541090321735e-06, "loss": 0.086, "step": 364, "ts_encoder_learning_rate": 7.453587760019691e-06 }, { "epoch": 0.9621879891259577, "grad_norm": 2.1695652586948606, "learning_rate": 7.453587760019691e-06, "loss": 0.0755, "step": 365, "ts_encoder_learning_rate": 7.439609215307173e-06 }, { "epoch": 0.964824120603015, "grad_norm": 1.9185102579274915, "learning_rate": 7.439609215307173e-06, "loss": 0.0717, "step": 366, "ts_encoder_learning_rate": 7.42560559983536e-06 }, { "epoch": 0.9674602520800725, "grad_norm": 2.425906908342805, "learning_rate": 7.42560559983536e-06, "loss": 0.0958, "step": 367, "ts_encoder_learning_rate": 7.411577057513066e-06 }, { "epoch": 0.9700963835571299, "grad_norm": 1.4397236920539425, "learning_rate": 7.411577057513066e-06, "loss": 0.101, "step": 368, "ts_encoder_learning_rate": 7.39752373250527e-06 }, { "epoch": 0.9727325150341873, "grad_norm": 1.0990134832990999, "learning_rate": 7.39752373250527e-06, "loss": 0.0754, "step": 369, "ts_encoder_learning_rate": 7.383445769231628e-06 }, { "epoch": 0.9753686465112448, "grad_norm": 1.3029567612526982, "learning_rate": 7.383445769231628e-06, "loss": 0.0789, "step": 370, "ts_encoder_learning_rate": 7.369343312364994e-06 }, { "epoch": 0.9780047779883022, "grad_norm": 2.3110295949001545, "learning_rate": 7.369343312364994e-06, "loss": 0.0798, "step": 371, "ts_encoder_learning_rate": 7.355216506829933e-06 }, { "epoch": 0.9806409094653595, "grad_norm": 1.7411270859762809, "learning_rate": 7.355216506829933e-06, "loss": 0.089, "step": 372, "ts_encoder_learning_rate": 7.34106549780123e-06 }, { "epoch": 0.983277040942417, "grad_norm": 1.8926501676131235, "learning_rate": 7.34106549780123e-06, "loss": 0.0985, "step": 373, "ts_encoder_learning_rate": 7.326890430702396e-06 }, { "epoch": 0.9859131724194744, "grad_norm": 1.7001478537324086, "learning_rate": 7.326890430702396e-06, "loss": 0.0798, "step": 374, "ts_encoder_learning_rate": 7.312691451204178e-06 }, { "epoch": 0.9885493038965318, "grad_norm": 1.9333040871086815, "learning_rate": 7.312691451204178e-06, "loss": 0.0794, "step": 375, "ts_encoder_learning_rate": 7.2984687052230585e-06 }, { "epoch": 0.9911854353735893, "grad_norm": 1.6635256130487461, "learning_rate": 7.2984687052230585e-06, "loss": 0.077, "step": 376, "ts_encoder_learning_rate": 7.284222338919758e-06 }, { "epoch": 0.9938215668506467, "grad_norm": 1.7377302475474232, "learning_rate": 7.284222338919758e-06, "loss": 0.0828, "step": 377, "ts_encoder_learning_rate": 7.269952498697734e-06 }, { "epoch": 0.9964576983277041, "grad_norm": 1.8805139125533905, "learning_rate": 7.269952498697734e-06, "loss": 0.0766, "step": 378, "ts_encoder_learning_rate": 7.255659331201673e-06 }, { "epoch": 0.9990938298047615, "grad_norm": 1.3707632091787474, "learning_rate": 7.255659331201673e-06, "loss": 0.0824, "step": 379, "ts_encoder_learning_rate": 7.241342983315985e-06 }, { "epoch": 1.0, "grad_norm": 1.3707632091787474, "learning_rate": 7.241342983315985e-06, "loss": 0.0264, "step": 380, "ts_encoder_learning_rate": 7.227003602163296e-06 }, { "epoch": 1.0026361314770573, "grad_norm": 3.1286777602818665, "learning_rate": 7.227003602163296e-06, "loss": 0.0931, "step": 381, "ts_encoder_learning_rate": 7.212641335102932e-06 }, { "epoch": 1.0052722629541149, "grad_norm": 2.8391713475277, "learning_rate": 7.212641335102932e-06, "loss": 0.0722, "step": 382, "ts_encoder_learning_rate": 7.198256329729412e-06 }, { "epoch": 1.0079083944311722, "grad_norm": 1.256304357468329, "learning_rate": 7.198256329729412e-06, "loss": 0.0769, "step": 383, "ts_encoder_learning_rate": 7.183848733870917e-06 }, { "epoch": 1.0105445259082297, "grad_norm": 3.5250439890455767, "learning_rate": 7.183848733870917e-06, "loss": 0.0893, "step": 384, "ts_encoder_learning_rate": 7.169418695587791e-06 }, { "epoch": 1.013180657385287, "grad_norm": 2.626283878871451, "learning_rate": 7.169418695587791e-06, "loss": 0.0729, "step": 385, "ts_encoder_learning_rate": 7.154966363171003e-06 }, { "epoch": 1.0158167888623446, "grad_norm": 1.760910812924816, "learning_rate": 7.154966363171003e-06, "loss": 0.0879, "step": 386, "ts_encoder_learning_rate": 7.140491885140629e-06 }, { "epoch": 1.018452920339402, "grad_norm": 1.955129103690491, "learning_rate": 7.140491885140629e-06, "loss": 0.068, "step": 387, "ts_encoder_learning_rate": 7.125995410244324e-06 }, { "epoch": 1.0210890518164593, "grad_norm": 1.4906223976736166, "learning_rate": 7.125995410244324e-06, "loss": 0.0671, "step": 388, "ts_encoder_learning_rate": 7.1114770874558e-06 }, { "epoch": 1.0237251832935168, "grad_norm": 1.4849510993800188, "learning_rate": 7.1114770874558e-06, "loss": 0.0751, "step": 389, "ts_encoder_learning_rate": 7.096937065973285e-06 }, { "epoch": 1.0263613147705741, "grad_norm": 1.497911410516406, "learning_rate": 7.096937065973285e-06, "loss": 0.0673, "step": 390, "ts_encoder_learning_rate": 7.082375495217996e-06 }, { "epoch": 1.0289974462476317, "grad_norm": 1.4093983694447139, "learning_rate": 7.082375495217996e-06, "loss": 0.0722, "step": 391, "ts_encoder_learning_rate": 7.067792524832604e-06 }, { "epoch": 1.031633577724689, "grad_norm": 1.3429042920164558, "learning_rate": 7.067792524832604e-06, "loss": 0.0716, "step": 392, "ts_encoder_learning_rate": 7.053188304679691e-06 }, { "epoch": 1.0342697092017463, "grad_norm": 1.606708984123339, "learning_rate": 7.053188304679691e-06, "loss": 0.0745, "step": 393, "ts_encoder_learning_rate": 7.038562984840216e-06 }, { "epoch": 1.0369058406788039, "grad_norm": 1.9904571024970765, "learning_rate": 7.038562984840216e-06, "loss": 0.0677, "step": 394, "ts_encoder_learning_rate": 7.023916715611969e-06 }, { "epoch": 1.0395419721558612, "grad_norm": 1.4505783107760564, "learning_rate": 7.023916715611969e-06, "loss": 0.0647, "step": 395, "ts_encoder_learning_rate": 7.009249647508028e-06 }, { "epoch": 1.0421781036329187, "grad_norm": 1.5229728357909127, "learning_rate": 7.009249647508028e-06, "loss": 0.0652, "step": 396, "ts_encoder_learning_rate": 6.994561931255209e-06 }, { "epoch": 1.044814235109976, "grad_norm": 2.6769507938266277, "learning_rate": 6.994561931255209e-06, "loss": 0.0678, "step": 397, "ts_encoder_learning_rate": 6.9798537177925226e-06 }, { "epoch": 1.0474503665870336, "grad_norm": 2.0169207725312703, "learning_rate": 6.9798537177925226e-06, "loss": 0.074, "step": 398, "ts_encoder_learning_rate": 6.965125158269619e-06 }, { "epoch": 1.050086498064091, "grad_norm": 1.9292523257890648, "learning_rate": 6.965125158269619e-06, "loss": 0.076, "step": 399, "ts_encoder_learning_rate": 6.950376404045235e-06 }, { "epoch": 1.0527226295411483, "grad_norm": 1.841218569643746, "learning_rate": 6.950376404045235e-06, "loss": 0.0815, "step": 400, "ts_encoder_learning_rate": 6.935607606685642e-06 }, { "epoch": 1.0553587610182058, "grad_norm": 1.3685510522725666, "learning_rate": 6.935607606685642e-06, "loss": 0.0649, "step": 401, "ts_encoder_learning_rate": 6.9208189179630805e-06 }, { "epoch": 1.0579948924952631, "grad_norm": 1.2255871348116065, "learning_rate": 6.9208189179630805e-06, "loss": 0.0751, "step": 402, "ts_encoder_learning_rate": 6.906010489854209e-06 }, { "epoch": 1.0606310239723207, "grad_norm": 2.25284044313197, "learning_rate": 6.906010489854209e-06, "loss": 0.0722, "step": 403, "ts_encoder_learning_rate": 6.891182474538539e-06 }, { "epoch": 1.063267155449378, "grad_norm": 1.574007278408115, "learning_rate": 6.891182474538539e-06, "loss": 0.0874, "step": 404, "ts_encoder_learning_rate": 6.876335024396872e-06 }, { "epoch": 1.0659032869264355, "grad_norm": 1.4368386388721925, "learning_rate": 6.876335024396872e-06, "loss": 0.064, "step": 405, "ts_encoder_learning_rate": 6.8614682920097265e-06 }, { "epoch": 1.0685394184034929, "grad_norm": 1.698997766946676, "learning_rate": 6.8614682920097265e-06, "loss": 0.0861, "step": 406, "ts_encoder_learning_rate": 6.846582430155783e-06 }, { "epoch": 1.0711755498805502, "grad_norm": 1.621405485095503, "learning_rate": 6.846582430155783e-06, "loss": 0.0616, "step": 407, "ts_encoder_learning_rate": 6.831677591810302e-06 }, { "epoch": 1.0738116813576077, "grad_norm": 1.7546607458906713, "learning_rate": 6.831677591810302e-06, "loss": 0.0634, "step": 408, "ts_encoder_learning_rate": 6.816753930143558e-06 }, { "epoch": 1.076447812834665, "grad_norm": 1.530962724670228, "learning_rate": 6.816753930143558e-06, "loss": 0.0605, "step": 409, "ts_encoder_learning_rate": 6.801811598519268e-06 }, { "epoch": 1.0790839443117226, "grad_norm": 1.4049855220372447, "learning_rate": 6.801811598519268e-06, "loss": 0.0651, "step": 410, "ts_encoder_learning_rate": 6.786850750493006e-06 }, { "epoch": 1.08172007578878, "grad_norm": 1.2835177716818393, "learning_rate": 6.786850750493006e-06, "loss": 0.0588, "step": 411, "ts_encoder_learning_rate": 6.771871539810633e-06 }, { "epoch": 1.0843562072658375, "grad_norm": 2.177159514710094, "learning_rate": 6.771871539810633e-06, "loss": 0.0714, "step": 412, "ts_encoder_learning_rate": 6.7568741204067145e-06 }, { "epoch": 1.0869923387428948, "grad_norm": 2.0968533031020744, "learning_rate": 6.7568741204067145e-06, "loss": 0.075, "step": 413, "ts_encoder_learning_rate": 6.741858646402941e-06 }, { "epoch": 1.0896284702199521, "grad_norm": 2.0444573124344996, "learning_rate": 6.741858646402941e-06, "loss": 0.0761, "step": 414, "ts_encoder_learning_rate": 6.726825272106539e-06 }, { "epoch": 1.0922646016970097, "grad_norm": 1.8927366457463946, "learning_rate": 6.726825272106539e-06, "loss": 0.0843, "step": 415, "ts_encoder_learning_rate": 6.71177415200869e-06 }, { "epoch": 1.094900733174067, "grad_norm": 1.5743826612232619, "learning_rate": 6.71177415200869e-06, "loss": 0.0669, "step": 416, "ts_encoder_learning_rate": 6.696705440782939e-06 }, { "epoch": 1.0975368646511245, "grad_norm": 1.9849832173789321, "learning_rate": 6.696705440782939e-06, "loss": 0.0606, "step": 417, "ts_encoder_learning_rate": 6.68161929328361e-06 }, { "epoch": 1.1001729961281819, "grad_norm": 1.2161772899273893, "learning_rate": 6.68161929328361e-06, "loss": 0.0598, "step": 418, "ts_encoder_learning_rate": 6.66651586454421e-06 }, { "epoch": 1.1028091276052394, "grad_norm": 1.2393164520057445, "learning_rate": 6.66651586454421e-06, "loss": 0.0692, "step": 419, "ts_encoder_learning_rate": 6.651395309775837e-06 }, { "epoch": 1.1054452590822967, "grad_norm": 1.4520060898019855, "learning_rate": 6.651395309775837e-06, "loss": 0.0692, "step": 420, "ts_encoder_learning_rate": 6.636257784365585e-06 }, { "epoch": 1.108081390559354, "grad_norm": 2.071887988162363, "learning_rate": 6.636257784365585e-06, "loss": 0.0817, "step": 421, "ts_encoder_learning_rate": 6.62110344387495e-06 }, { "epoch": 1.1107175220364116, "grad_norm": 1.4450804845065022, "learning_rate": 6.62110344387495e-06, "loss": 0.0615, "step": 422, "ts_encoder_learning_rate": 6.605932444038229e-06 }, { "epoch": 1.113353653513469, "grad_norm": 4.131996313423228, "learning_rate": 6.605932444038229e-06, "loss": 0.0567, "step": 423, "ts_encoder_learning_rate": 6.5907449407609145e-06 }, { "epoch": 1.1159897849905265, "grad_norm": 1.3469924411982919, "learning_rate": 6.5907449407609145e-06, "loss": 0.0688, "step": 424, "ts_encoder_learning_rate": 6.575541090118105e-06 }, { "epoch": 1.1186259164675838, "grad_norm": 1.676545292506096, "learning_rate": 6.575541090118105e-06, "loss": 0.0707, "step": 425, "ts_encoder_learning_rate": 6.5603210483528864e-06 }, { "epoch": 1.1212620479446413, "grad_norm": 2.1293469373373086, "learning_rate": 6.5603210483528864e-06, "loss": 0.0783, "step": 426, "ts_encoder_learning_rate": 6.545084971874738e-06 }, { "epoch": 1.1238981794216987, "grad_norm": 2.1545361587555893, "learning_rate": 6.545084971874738e-06, "loss": 0.0662, "step": 427, "ts_encoder_learning_rate": 6.529833017257919e-06 }, { "epoch": 1.126534310898756, "grad_norm": 1.5431217078868176, "learning_rate": 6.529833017257919e-06, "loss": 0.0665, "step": 428, "ts_encoder_learning_rate": 6.514565341239861e-06 }, { "epoch": 1.1291704423758135, "grad_norm": 1.6871162084026083, "learning_rate": 6.514565341239861e-06, "loss": 0.0609, "step": 429, "ts_encoder_learning_rate": 6.499282100719558e-06 }, { "epoch": 1.1318065738528709, "grad_norm": 1.3008708089844792, "learning_rate": 6.499282100719558e-06, "loss": 0.0669, "step": 430, "ts_encoder_learning_rate": 6.483983452755953e-06 }, { "epoch": 1.1344427053299284, "grad_norm": 1.31908563414336, "learning_rate": 6.483983452755953e-06, "loss": 0.0681, "step": 431, "ts_encoder_learning_rate": 6.468669554566324e-06 }, { "epoch": 1.1370788368069857, "grad_norm": 2.2211285641938403, "learning_rate": 6.468669554566324e-06, "loss": 0.0732, "step": 432, "ts_encoder_learning_rate": 6.4533405635246696e-06 }, { "epoch": 1.1397149682840433, "grad_norm": 2.0753940060908413, "learning_rate": 6.4533405635246696e-06, "loss": 0.0663, "step": 433, "ts_encoder_learning_rate": 6.437996637160086e-06 }, { "epoch": 1.1423510997611006, "grad_norm": 1.4786983123231945, "learning_rate": 6.437996637160086e-06, "loss": 0.0546, "step": 434, "ts_encoder_learning_rate": 6.4226379331551625e-06 }, { "epoch": 1.144987231238158, "grad_norm": 1.5876247856249708, "learning_rate": 6.4226379331551625e-06, "loss": 0.0735, "step": 435, "ts_encoder_learning_rate": 6.407264609344344e-06 }, { "epoch": 1.1476233627152155, "grad_norm": 1.670327323230943, "learning_rate": 6.407264609344344e-06, "loss": 0.0604, "step": 436, "ts_encoder_learning_rate": 6.3918768237123175e-06 }, { "epoch": 1.1502594941922728, "grad_norm": 1.7007074105089541, "learning_rate": 6.3918768237123175e-06, "loss": 0.0506, "step": 437, "ts_encoder_learning_rate": 6.376474734392388e-06 }, { "epoch": 1.1528956256693303, "grad_norm": 0.8723701532963661, "learning_rate": 6.376474734392388e-06, "loss": 0.051, "step": 438, "ts_encoder_learning_rate": 6.361058499664856e-06 }, { "epoch": 1.1555317571463877, "grad_norm": 1.2073690713191445, "learning_rate": 6.361058499664856e-06, "loss": 0.0658, "step": 439, "ts_encoder_learning_rate": 6.345628277955384e-06 }, { "epoch": 1.1581678886234452, "grad_norm": 4.282100892926092, "learning_rate": 6.345628277955384e-06, "loss": 0.0638, "step": 440, "ts_encoder_learning_rate": 6.330184227833376e-06 }, { "epoch": 1.1608040201005025, "grad_norm": 1.0987411320570284, "learning_rate": 6.330184227833376e-06, "loss": 0.058, "step": 441, "ts_encoder_learning_rate": 6.3147265080103405e-06 }, { "epoch": 1.1634401515775599, "grad_norm": 0.9366573260467199, "learning_rate": 6.3147265080103405e-06, "loss": 0.0473, "step": 442, "ts_encoder_learning_rate": 6.299255277338265e-06 }, { "epoch": 1.1660762830546174, "grad_norm": 6.839529381634898, "learning_rate": 6.299255277338265e-06, "loss": 0.1152, "step": 443, "ts_encoder_learning_rate": 6.283770694807983e-06 }, { "epoch": 1.1687124145316747, "grad_norm": 6.3594435572609305, "learning_rate": 6.283770694807983e-06, "loss": 0.093, "step": 444, "ts_encoder_learning_rate": 6.268272919547537e-06 }, { "epoch": 1.171348546008732, "grad_norm": 40.08684333591222, "learning_rate": 6.268272919547537e-06, "loss": 0.0962, "step": 445, "ts_encoder_learning_rate": 6.252762110820548e-06 }, { "epoch": 1.1739846774857896, "grad_norm": 2.943023237687571, "learning_rate": 6.252762110820548e-06, "loss": 0.0901, "step": 446, "ts_encoder_learning_rate": 6.237238428024573e-06 }, { "epoch": 1.176620808962847, "grad_norm": 2.3992218806525445, "learning_rate": 6.237238428024573e-06, "loss": 0.0862, "step": 447, "ts_encoder_learning_rate": 6.2217020306894705e-06 }, { "epoch": 1.1792569404399045, "grad_norm": 5.0353855751257415, "learning_rate": 6.2217020306894705e-06, "loss": 0.0668, "step": 448, "ts_encoder_learning_rate": 6.2061530784757625e-06 }, { "epoch": 1.1818930719169618, "grad_norm": 1.8328704649956604, "learning_rate": 6.2061530784757625e-06, "loss": 0.0738, "step": 449, "ts_encoder_learning_rate": 6.1905917311729915e-06 }, { "epoch": 1.1845292033940193, "grad_norm": 1.5717279092672578, "learning_rate": 6.1905917311729915e-06, "loss": 0.0603, "step": 450, "ts_encoder_learning_rate": 6.175018148698077e-06 }, { "epoch": 1.1871653348710767, "grad_norm": 2.392989669488609, "learning_rate": 6.175018148698077e-06, "loss": 0.0524, "step": 451, "ts_encoder_learning_rate": 6.1594324910936734e-06 }, { "epoch": 1.189801466348134, "grad_norm": 1.5764592052091633, "learning_rate": 6.1594324910936734e-06, "loss": 0.0621, "step": 452, "ts_encoder_learning_rate": 6.143834918526528e-06 }, { "epoch": 1.1924375978251915, "grad_norm": 1.8238472425314327, "learning_rate": 6.143834918526528e-06, "loss": 0.0622, "step": 453, "ts_encoder_learning_rate": 6.1282255912858315e-06 }, { "epoch": 1.1950737293022489, "grad_norm": 2.8133947110243485, "learning_rate": 6.1282255912858315e-06, "loss": 0.0862, "step": 454, "ts_encoder_learning_rate": 6.112604669781572e-06 }, { "epoch": 1.1977098607793064, "grad_norm": 1.3652304574964094, "learning_rate": 6.112604669781572e-06, "loss": 0.0527, "step": 455, "ts_encoder_learning_rate": 6.096972314542889e-06 }, { "epoch": 1.2003459922563637, "grad_norm": 1.0715660143813739, "learning_rate": 6.096972314542889e-06, "loss": 0.0709, "step": 456, "ts_encoder_learning_rate": 6.0813286862164175e-06 }, { "epoch": 1.2029821237334213, "grad_norm": 1.5558524275638925, "learning_rate": 6.0813286862164175e-06, "loss": 0.0423, "step": 457, "ts_encoder_learning_rate": 6.065673945564643e-06 }, { "epoch": 1.2056182552104786, "grad_norm": 1.6176907363538375, "learning_rate": 6.065673945564643e-06, "loss": 0.0622, "step": 458, "ts_encoder_learning_rate": 6.050008253464247e-06 }, { "epoch": 1.208254386687536, "grad_norm": 0.8564869594899379, "learning_rate": 6.050008253464247e-06, "loss": 0.0455, "step": 459, "ts_encoder_learning_rate": 6.034331770904455e-06 }, { "epoch": 1.2108905181645935, "grad_norm": 1.1035352728404064, "learning_rate": 6.034331770904455e-06, "loss": 0.0563, "step": 460, "ts_encoder_learning_rate": 6.018644658985378e-06 }, { "epoch": 1.2135266496416508, "grad_norm": 1.5616128959288538, "learning_rate": 6.018644658985378e-06, "loss": 0.0539, "step": 461, "ts_encoder_learning_rate": 6.002947078916365e-06 }, { "epoch": 1.2161627811187083, "grad_norm": 1.8418037108772232, "learning_rate": 6.002947078916365e-06, "loss": 0.0659, "step": 462, "ts_encoder_learning_rate": 5.987239192014336e-06 }, { "epoch": 1.2187989125957657, "grad_norm": 1.6148182637272608, "learning_rate": 5.987239192014336e-06, "loss": 0.0486, "step": 463, "ts_encoder_learning_rate": 5.971521159702136e-06 }, { "epoch": 1.2214350440728232, "grad_norm": 1.1194159742020753, "learning_rate": 5.971521159702136e-06, "loss": 0.0517, "step": 464, "ts_encoder_learning_rate": 5.955793143506863e-06 }, { "epoch": 1.2240711755498805, "grad_norm": 2.5603301388490065, "learning_rate": 5.955793143506863e-06, "loss": 0.0792, "step": 465, "ts_encoder_learning_rate": 5.940055305058219e-06 }, { "epoch": 1.2267073070269379, "grad_norm": 2.805962948191509, "learning_rate": 5.940055305058219e-06, "loss": 0.0495, "step": 466, "ts_encoder_learning_rate": 5.9243078060868445e-06 }, { "epoch": 1.2293434385039954, "grad_norm": 1.1698319176717191, "learning_rate": 5.9243078060868445e-06, "loss": 0.0469, "step": 467, "ts_encoder_learning_rate": 5.908550808422656e-06 }, { "epoch": 1.2319795699810527, "grad_norm": 2.0037504801434176, "learning_rate": 5.908550808422656e-06, "loss": 0.0861, "step": 468, "ts_encoder_learning_rate": 5.892784473993184e-06 }, { "epoch": 1.2346157014581103, "grad_norm": 1.723869314498301, "learning_rate": 5.892784473993184e-06, "loss": 0.0593, "step": 469, "ts_encoder_learning_rate": 5.877008964821909e-06 }, { "epoch": 1.2372518329351676, "grad_norm": 1.6055066461843874, "learning_rate": 5.877008964821909e-06, "loss": 0.0626, "step": 470, "ts_encoder_learning_rate": 5.861224443026595e-06 }, { "epoch": 1.2398879644122252, "grad_norm": 1.5661164699031636, "learning_rate": 5.861224443026595e-06, "loss": 0.0602, "step": 471, "ts_encoder_learning_rate": 5.845431070817627e-06 }, { "epoch": 1.2425240958892825, "grad_norm": 1.0282805477784254, "learning_rate": 5.845431070817627e-06, "loss": 0.0524, "step": 472, "ts_encoder_learning_rate": 5.82962901049634e-06 }, { "epoch": 1.2451602273663398, "grad_norm": 1.7894572815574583, "learning_rate": 5.82962901049634e-06, "loss": 0.0571, "step": 473, "ts_encoder_learning_rate": 5.8138184244533516e-06 }, { "epoch": 1.2477963588433973, "grad_norm": 1.06562374064391, "learning_rate": 5.8138184244533516e-06, "loss": 0.057, "step": 474, "ts_encoder_learning_rate": 5.797999475166897e-06 }, { "epoch": 1.2504324903204547, "grad_norm": 2.604409235331405, "learning_rate": 5.797999475166897e-06, "loss": 0.0704, "step": 475, "ts_encoder_learning_rate": 5.782172325201155e-06 }, { "epoch": 1.2530686217975122, "grad_norm": 1.0601741954985433, "learning_rate": 5.782172325201155e-06, "loss": 0.0519, "step": 476, "ts_encoder_learning_rate": 5.766337137204579e-06 }, { "epoch": 1.2557047532745695, "grad_norm": 1.4951262075969727, "learning_rate": 5.766337137204579e-06, "loss": 0.0649, "step": 477, "ts_encoder_learning_rate": 5.7504940739082305e-06 }, { "epoch": 1.258340884751627, "grad_norm": 2.0377946241283964, "learning_rate": 5.7504940739082305e-06, "loss": 0.0709, "step": 478, "ts_encoder_learning_rate": 5.734643298124091e-06 }, { "epoch": 1.2609770162286844, "grad_norm": 1.3627445943335548, "learning_rate": 5.734643298124091e-06, "loss": 0.0597, "step": 479, "ts_encoder_learning_rate": 5.71878497274341e-06 }, { "epoch": 1.2636131477057417, "grad_norm": 1.514945622789671, "learning_rate": 5.71878497274341e-06, "loss": 0.0621, "step": 480, "ts_encoder_learning_rate": 5.702919260735015e-06 }, { "epoch": 1.2662492791827993, "grad_norm": 1.6326473802607846, "learning_rate": 5.702919260735015e-06, "loss": 0.0546, "step": 481, "ts_encoder_learning_rate": 5.6870463251436485e-06 }, { "epoch": 1.2688854106598566, "grad_norm": 1.1610484009982354, "learning_rate": 5.6870463251436485e-06, "loss": 0.0721, "step": 482, "ts_encoder_learning_rate": 5.671166329088278e-06 }, { "epoch": 1.2715215421369142, "grad_norm": 9.497222320275062, "learning_rate": 5.671166329088278e-06, "loss": 0.0622, "step": 483, "ts_encoder_learning_rate": 5.655279435760436e-06 }, { "epoch": 1.2741576736139715, "grad_norm": 2.5943554438763345, "learning_rate": 5.655279435760436e-06, "loss": 0.0611, "step": 484, "ts_encoder_learning_rate": 5.6393858084225305e-06 }, { "epoch": 1.276793805091029, "grad_norm": 1.7043776646620947, "learning_rate": 5.6393858084225305e-06, "loss": 0.0557, "step": 485, "ts_encoder_learning_rate": 5.623485610406174e-06 }, { "epoch": 1.2794299365680863, "grad_norm": 3.8980670638726176, "learning_rate": 5.623485610406174e-06, "loss": 0.0645, "step": 486, "ts_encoder_learning_rate": 5.6075790051105025e-06 }, { "epoch": 1.2820660680451437, "grad_norm": 3.6504547162269523, "learning_rate": 5.6075790051105025e-06, "loss": 0.0708, "step": 487, "ts_encoder_learning_rate": 5.5916661560004945e-06 }, { "epoch": 1.2847021995222012, "grad_norm": 5.021699015115789, "learning_rate": 5.5916661560004945e-06, "loss": 0.0595, "step": 488, "ts_encoder_learning_rate": 5.575747226605298e-06 }, { "epoch": 1.2873383309992585, "grad_norm": 1.8447348562525032, "learning_rate": 5.575747226605298e-06, "loss": 0.0692, "step": 489, "ts_encoder_learning_rate": 5.559822380516539e-06 }, { "epoch": 1.289974462476316, "grad_norm": 1.8812523455034513, "learning_rate": 5.559822380516539e-06, "loss": 0.0539, "step": 490, "ts_encoder_learning_rate": 5.543891781386655e-06 }, { "epoch": 1.2926105939533734, "grad_norm": 1.506328585661642, "learning_rate": 5.543891781386655e-06, "loss": 0.0398, "step": 491, "ts_encoder_learning_rate": 5.527955592927198e-06 }, { "epoch": 1.295246725430431, "grad_norm": 1.8603503730820898, "learning_rate": 5.527955592927198e-06, "loss": 0.0585, "step": 492, "ts_encoder_learning_rate": 5.512013978907157e-06 }, { "epoch": 1.2978828569074883, "grad_norm": 2.4648081055680846, "learning_rate": 5.512013978907157e-06, "loss": 0.058, "step": 493, "ts_encoder_learning_rate": 5.496067103151288e-06 }, { "epoch": 1.3005189883845456, "grad_norm": 1.4041012620060673, "learning_rate": 5.496067103151288e-06, "loss": 0.0443, "step": 494, "ts_encoder_learning_rate": 5.480115129538409e-06 }, { "epoch": 1.3031551198616032, "grad_norm": 1.3893300223548792, "learning_rate": 5.480115129538409e-06, "loss": 0.0433, "step": 495, "ts_encoder_learning_rate": 5.464158221999731e-06 }, { "epoch": 1.3057912513386605, "grad_norm": 1.2015614710865317, "learning_rate": 5.464158221999731e-06, "loss": 0.0483, "step": 496, "ts_encoder_learning_rate": 5.448196544517168e-06 }, { "epoch": 1.3084273828157178, "grad_norm": 1.3699932224551299, "learning_rate": 5.448196544517168e-06, "loss": 0.0476, "step": 497, "ts_encoder_learning_rate": 5.4322302611216515e-06 }, { "epoch": 1.3110635142927753, "grad_norm": 1.3403881800344724, "learning_rate": 5.4322302611216515e-06, "loss": 0.0564, "step": 498, "ts_encoder_learning_rate": 5.4162595358914475e-06 }, { "epoch": 1.313699645769833, "grad_norm": 73.58179677044741, "learning_rate": 5.4162595358914475e-06, "loss": 0.0646, "step": 499, "ts_encoder_learning_rate": 5.4002845329504675e-06 }, { "epoch": 1.3163357772468902, "grad_norm": 0.7574363377553233, "learning_rate": 5.4002845329504675e-06, "loss": 0.0572, "step": 500, "ts_encoder_learning_rate": 5.384305416466584e-06 }, { "epoch": 1.3189719087239475, "grad_norm": 9.085144033066042, "learning_rate": 5.384305416466584e-06, "loss": 0.0455, "step": 501, "ts_encoder_learning_rate": 5.368322350649942e-06 }, { "epoch": 1.321608040201005, "grad_norm": 1.5134736470746724, "learning_rate": 5.368322350649942e-06, "loss": 0.0643, "step": 502, "ts_encoder_learning_rate": 5.35233549975127e-06 }, { "epoch": 1.3242441716780624, "grad_norm": 7.310424680388469, "learning_rate": 5.35233549975127e-06, "loss": 0.061, "step": 503, "ts_encoder_learning_rate": 5.336345028060199e-06 }, { "epoch": 1.3268803031551197, "grad_norm": 1.1323514407618245, "learning_rate": 5.336345028060199e-06, "loss": 0.0549, "step": 504, "ts_encoder_learning_rate": 5.320351099903565e-06 }, { "epoch": 1.3295164346321773, "grad_norm": 1.6279841201077867, "learning_rate": 5.320351099903565e-06, "loss": 0.0613, "step": 505, "ts_encoder_learning_rate": 5.304353879643727e-06 }, { "epoch": 1.3321525661092348, "grad_norm": 1.3523619947920968, "learning_rate": 5.304353879643727e-06, "loss": 0.0472, "step": 506, "ts_encoder_learning_rate": 5.288353531676873e-06 }, { "epoch": 1.3347886975862921, "grad_norm": 1.6311287951575122, "learning_rate": 5.288353531676873e-06, "loss": 0.0449, "step": 507, "ts_encoder_learning_rate": 5.2723502204313346e-06 }, { "epoch": 1.3374248290633495, "grad_norm": 1.254954921395093, "learning_rate": 5.2723502204313346e-06, "loss": 0.0468, "step": 508, "ts_encoder_learning_rate": 5.256344110365896e-06 }, { "epoch": 1.340060960540407, "grad_norm": 1.4319071308303999, "learning_rate": 5.256344110365896e-06, "loss": 0.047, "step": 509, "ts_encoder_learning_rate": 5.240335365968104e-06 }, { "epoch": 1.3426970920174643, "grad_norm": 1.8236696801665577, "learning_rate": 5.240335365968104e-06, "loss": 0.0514, "step": 510, "ts_encoder_learning_rate": 5.224324151752575e-06 }, { "epoch": 1.3453332234945217, "grad_norm": 1.9778928788592056, "learning_rate": 5.224324151752575e-06, "loss": 0.052, "step": 511, "ts_encoder_learning_rate": 5.208310632259308e-06 }, { "epoch": 1.3479693549715792, "grad_norm": 1.417312718751431, "learning_rate": 5.208310632259308e-06, "loss": 0.0477, "step": 512, "ts_encoder_learning_rate": 5.192294972051992e-06 }, { "epoch": 1.3506054864486368, "grad_norm": 2.0176988121139523, "learning_rate": 5.192294972051992e-06, "loss": 0.0439, "step": 513, "ts_encoder_learning_rate": 5.1762773357163175e-06 }, { "epoch": 1.353241617925694, "grad_norm": 1.8355976352392431, "learning_rate": 5.1762773357163175e-06, "loss": 0.0657, "step": 514, "ts_encoder_learning_rate": 5.160257887858278e-06 }, { "epoch": 1.3558777494027514, "grad_norm": 1.6689128377623292, "learning_rate": 5.160257887858278e-06, "loss": 0.0557, "step": 515, "ts_encoder_learning_rate": 5.144236793102485e-06 }, { "epoch": 1.358513880879809, "grad_norm": 1.8005326862964846, "learning_rate": 5.144236793102485e-06, "loss": 0.0538, "step": 516, "ts_encoder_learning_rate": 5.128214216090478e-06 }, { "epoch": 1.3611500123568663, "grad_norm": 1.633570103313834, "learning_rate": 5.128214216090478e-06, "loss": 0.0448, "step": 517, "ts_encoder_learning_rate": 5.112190321479026e-06 }, { "epoch": 1.3637861438339236, "grad_norm": 1.207340276387567, "learning_rate": 5.112190321479026e-06, "loss": 0.0418, "step": 518, "ts_encoder_learning_rate": 5.0961652739384356e-06 }, { "epoch": 1.3664222753109811, "grad_norm": 0.8605880311313963, "learning_rate": 5.0961652739384356e-06, "loss": 0.051, "step": 519, "ts_encoder_learning_rate": 5.080139238150869e-06 }, { "epoch": 1.3690584067880385, "grad_norm": 1.0552622999396024, "learning_rate": 5.080139238150869e-06, "loss": 0.0545, "step": 520, "ts_encoder_learning_rate": 5.064112378808636e-06 }, { "epoch": 1.371694538265096, "grad_norm": 1.7431125427812204, "learning_rate": 5.064112378808636e-06, "loss": 0.0544, "step": 521, "ts_encoder_learning_rate": 5.048084860612516e-06 }, { "epoch": 1.3743306697421533, "grad_norm": 1.4698730704414043, "learning_rate": 5.048084860612516e-06, "loss": 0.0639, "step": 522, "ts_encoder_learning_rate": 5.032056848270056e-06 }, { "epoch": 1.376966801219211, "grad_norm": 1.8450236243661535, "learning_rate": 5.032056848270056e-06, "loss": 0.0467, "step": 523, "ts_encoder_learning_rate": 5.016028506493881e-06 }, { "epoch": 1.3796029326962682, "grad_norm": 2.052864394366136, "learning_rate": 5.016028506493881e-06, "loss": 0.0491, "step": 524, "ts_encoder_learning_rate": 5e-06 }, { "epoch": 1.3822390641733255, "grad_norm": 1.5026027017438155, "learning_rate": 5e-06, "loss": 0.0464, "step": 525, "ts_encoder_learning_rate": 4.9839714935061215e-06 }, { "epoch": 1.384875195650383, "grad_norm": 1.153030381312369, "learning_rate": 4.9839714935061215e-06, "loss": 0.0419, "step": 526, "ts_encoder_learning_rate": 4.967943151729945e-06 }, { "epoch": 1.3875113271274404, "grad_norm": 1.4490249141706013, "learning_rate": 4.967943151729945e-06, "loss": 0.0603, "step": 527, "ts_encoder_learning_rate": 4.951915139387484e-06 }, { "epoch": 1.390147458604498, "grad_norm": 1.4564680397959866, "learning_rate": 4.951915139387484e-06, "loss": 0.0477, "step": 528, "ts_encoder_learning_rate": 4.935887621191364e-06 }, { "epoch": 1.3927835900815553, "grad_norm": 1.3964892258738686, "learning_rate": 4.935887621191364e-06, "loss": 0.0528, "step": 529, "ts_encoder_learning_rate": 4.919860761849132e-06 }, { "epoch": 1.3954197215586128, "grad_norm": 6.281977322231947, "learning_rate": 4.919860761849132e-06, "loss": 0.0548, "step": 530, "ts_encoder_learning_rate": 4.903834726061565e-06 }, { "epoch": 1.3980558530356701, "grad_norm": 1.3837480562779865, "learning_rate": 4.903834726061565e-06, "loss": 0.0455, "step": 531, "ts_encoder_learning_rate": 4.887809678520976e-06 }, { "epoch": 1.4006919845127275, "grad_norm": 1.0021501716084598, "learning_rate": 4.887809678520976e-06, "loss": 0.0494, "step": 532, "ts_encoder_learning_rate": 4.871785783909523e-06 }, { "epoch": 1.403328115989785, "grad_norm": 1.151715988041567, "learning_rate": 4.871785783909523e-06, "loss": 0.0433, "step": 533, "ts_encoder_learning_rate": 4.855763206897516e-06 }, { "epoch": 1.4059642474668423, "grad_norm": 0.858749288851531, "learning_rate": 4.855763206897516e-06, "loss": 0.0362, "step": 534, "ts_encoder_learning_rate": 4.839742112141725e-06 }, { "epoch": 1.4086003789439, "grad_norm": 1.9221750234266837, "learning_rate": 4.839742112141725e-06, "loss": 0.0552, "step": 535, "ts_encoder_learning_rate": 4.823722664283684e-06 }, { "epoch": 1.4112365104209572, "grad_norm": 2.0598775899946347, "learning_rate": 4.823722664283684e-06, "loss": 0.0612, "step": 536, "ts_encoder_learning_rate": 4.807705027948008e-06 }, { "epoch": 1.4138726418980148, "grad_norm": 11.902681310300874, "learning_rate": 4.807705027948008e-06, "loss": 0.0572, "step": 537, "ts_encoder_learning_rate": 4.7916893677406925e-06 }, { "epoch": 1.416508773375072, "grad_norm": 1.4839974850899151, "learning_rate": 4.7916893677406925e-06, "loss": 0.0671, "step": 538, "ts_encoder_learning_rate": 4.775675848247427e-06 }, { "epoch": 1.4191449048521294, "grad_norm": 3.177751373971005, "learning_rate": 4.775675848247427e-06, "loss": 0.0592, "step": 539, "ts_encoder_learning_rate": 4.759664634031897e-06 }, { "epoch": 1.421781036329187, "grad_norm": 2.8690250587273494, "learning_rate": 4.759664634031897e-06, "loss": 0.0562, "step": 540, "ts_encoder_learning_rate": 4.743655889634105e-06 }, { "epoch": 1.4244171678062443, "grad_norm": 2.257175666571135, "learning_rate": 4.743655889634105e-06, "loss": 0.0629, "step": 541, "ts_encoder_learning_rate": 4.727649779568666e-06 }, { "epoch": 1.4270532992833018, "grad_norm": 1.276938190549169, "learning_rate": 4.727649779568666e-06, "loss": 0.0492, "step": 542, "ts_encoder_learning_rate": 4.711646468323129e-06 }, { "epoch": 1.4296894307603591, "grad_norm": 1.2256191207359708, "learning_rate": 4.711646468323129e-06, "loss": 0.0411, "step": 543, "ts_encoder_learning_rate": 4.695646120356275e-06 }, { "epoch": 1.4323255622374167, "grad_norm": 1.607726895519541, "learning_rate": 4.695646120356275e-06, "loss": 0.0394, "step": 544, "ts_encoder_learning_rate": 4.679648900096436e-06 }, { "epoch": 1.434961693714474, "grad_norm": 5.4916831132826704, "learning_rate": 4.679648900096436e-06, "loss": 0.0593, "step": 545, "ts_encoder_learning_rate": 4.663654971939802e-06 }, { "epoch": 1.4375978251915313, "grad_norm": 1.1834633367325678, "learning_rate": 4.663654971939802e-06, "loss": 0.0461, "step": 546, "ts_encoder_learning_rate": 4.64766450024873e-06 }, { "epoch": 1.4402339566685889, "grad_norm": 1.0570788406545149, "learning_rate": 4.64766450024873e-06, "loss": 0.0521, "step": 547, "ts_encoder_learning_rate": 4.6316776493500615e-06 }, { "epoch": 1.4428700881456462, "grad_norm": 1.519805292714776, "learning_rate": 4.6316776493500615e-06, "loss": 0.056, "step": 548, "ts_encoder_learning_rate": 4.615694583533418e-06 }, { "epoch": 1.4455062196227038, "grad_norm": 1.8886920313289481, "learning_rate": 4.615694583533418e-06, "loss": 0.059, "step": 549, "ts_encoder_learning_rate": 4.599715467049534e-06 }, { "epoch": 1.448142351099761, "grad_norm": 1.2634550829904962, "learning_rate": 4.599715467049534e-06, "loss": 0.0433, "step": 550, "ts_encoder_learning_rate": 4.583740464108554e-06 }, { "epoch": 1.4507784825768186, "grad_norm": 1.8049707104769945, "learning_rate": 4.583740464108554e-06, "loss": 0.0627, "step": 551, "ts_encoder_learning_rate": 4.56776973887835e-06 }, { "epoch": 1.453414614053876, "grad_norm": 1.9485167870885407, "learning_rate": 4.56776973887835e-06, "loss": 0.0469, "step": 552, "ts_encoder_learning_rate": 4.551803455482833e-06 }, { "epoch": 1.4560507455309333, "grad_norm": 1.267286074272727, "learning_rate": 4.551803455482833e-06, "loss": 0.0619, "step": 553, "ts_encoder_learning_rate": 4.53584177800027e-06 }, { "epoch": 1.4586868770079908, "grad_norm": 1.3504185426583042, "learning_rate": 4.53584177800027e-06, "loss": 0.0533, "step": 554, "ts_encoder_learning_rate": 4.5198848704615915e-06 }, { "epoch": 1.4613230084850481, "grad_norm": 1.0200226065433153, "learning_rate": 4.5198848704615915e-06, "loss": 0.0516, "step": 555, "ts_encoder_learning_rate": 4.503932896848713e-06 }, { "epoch": 1.4639591399621055, "grad_norm": 1.459926252613376, "learning_rate": 4.503932896848713e-06, "loss": 0.0544, "step": 556, "ts_encoder_learning_rate": 4.487986021092844e-06 }, { "epoch": 1.466595271439163, "grad_norm": 1.044163195089025, "learning_rate": 4.487986021092844e-06, "loss": 0.0464, "step": 557, "ts_encoder_learning_rate": 4.472044407072805e-06 }, { "epoch": 1.4692314029162206, "grad_norm": 1.667493213165678, "learning_rate": 4.472044407072805e-06, "loss": 0.0675, "step": 558, "ts_encoder_learning_rate": 4.456108218613346e-06 }, { "epoch": 1.4718675343932779, "grad_norm": 2.446693967731953, "learning_rate": 4.456108218613346e-06, "loss": 0.0499, "step": 559, "ts_encoder_learning_rate": 4.4401776194834615e-06 }, { "epoch": 1.4745036658703352, "grad_norm": 1.1699696309583159, "learning_rate": 4.4401776194834615e-06, "loss": 0.0542, "step": 560, "ts_encoder_learning_rate": 4.424252773394704e-06 }, { "epoch": 1.4771397973473928, "grad_norm": 1.1962425478415217, "learning_rate": 4.424252773394704e-06, "loss": 0.0462, "step": 561, "ts_encoder_learning_rate": 4.408333843999506e-06 }, { "epoch": 1.47977592882445, "grad_norm": 2.3242298710819758, "learning_rate": 4.408333843999506e-06, "loss": 0.0491, "step": 562, "ts_encoder_learning_rate": 4.392420994889498e-06 }, { "epoch": 1.4824120603015074, "grad_norm": 2.1023818406064643, "learning_rate": 4.392420994889498e-06, "loss": 0.0496, "step": 563, "ts_encoder_learning_rate": 4.376514389593827e-06 }, { "epoch": 1.485048191778565, "grad_norm": 2.4457184111852723, "learning_rate": 4.376514389593827e-06, "loss": 0.0524, "step": 564, "ts_encoder_learning_rate": 4.3606141915774695e-06 }, { "epoch": 1.4876843232556225, "grad_norm": 1.3302734845105864, "learning_rate": 4.3606141915774695e-06, "loss": 0.0504, "step": 565, "ts_encoder_learning_rate": 4.344720564239567e-06 }, { "epoch": 1.4903204547326798, "grad_norm": 1.1082526743380932, "learning_rate": 4.344720564239567e-06, "loss": 0.0481, "step": 566, "ts_encoder_learning_rate": 4.3288336709117246e-06 }, { "epoch": 1.4929565862097371, "grad_norm": 2.427309688292863, "learning_rate": 4.3288336709117246e-06, "loss": 0.0511, "step": 567, "ts_encoder_learning_rate": 4.312953674856355e-06 }, { "epoch": 1.4955927176867947, "grad_norm": 1.7521878257146914, "learning_rate": 4.312953674856355e-06, "loss": 0.0508, "step": 568, "ts_encoder_learning_rate": 4.297080739264987e-06 }, { "epoch": 1.498228849163852, "grad_norm": 1.8035517353788895, "learning_rate": 4.297080739264987e-06, "loss": 0.0468, "step": 569, "ts_encoder_learning_rate": 4.281215027256592e-06 }, { "epoch": 1.5008649806409093, "grad_norm": 1.3680011804510284, "learning_rate": 4.281215027256592e-06, "loss": 0.0615, "step": 570, "ts_encoder_learning_rate": 4.265356701875911e-06 }, { "epoch": 1.5035011121179669, "grad_norm": 1.0277278297390235, "learning_rate": 4.265356701875911e-06, "loss": 0.0392, "step": 571, "ts_encoder_learning_rate": 4.249505926091771e-06 }, { "epoch": 1.5061372435950244, "grad_norm": 1.5675045465594473, "learning_rate": 4.249505926091771e-06, "loss": 0.0511, "step": 572, "ts_encoder_learning_rate": 4.23366286279542e-06 }, { "epoch": 1.5087733750720818, "grad_norm": 2.516079496477877, "learning_rate": 4.23366286279542e-06, "loss": 0.0529, "step": 573, "ts_encoder_learning_rate": 4.217827674798845e-06 }, { "epoch": 1.511409506549139, "grad_norm": 2.7086489547850854, "learning_rate": 4.217827674798845e-06, "loss": 0.0443, "step": 574, "ts_encoder_learning_rate": 4.2020005248331056e-06 }, { "epoch": 1.5140456380261966, "grad_norm": 1.5770057283903636, "learning_rate": 4.2020005248331056e-06, "loss": 0.0534, "step": 575, "ts_encoder_learning_rate": 4.186181575546651e-06 }, { "epoch": 1.516681769503254, "grad_norm": 0.9542576979843979, "learning_rate": 4.186181575546651e-06, "loss": 0.0445, "step": 576, "ts_encoder_learning_rate": 4.170370989503662e-06 }, { "epoch": 1.5193179009803113, "grad_norm": 1.033126540105532, "learning_rate": 4.170370989503662e-06, "loss": 0.0436, "step": 577, "ts_encoder_learning_rate": 4.154568929182374e-06 }, { "epoch": 1.5219540324573688, "grad_norm": 1.3759475573506135, "learning_rate": 4.154568929182374e-06, "loss": 0.0471, "step": 578, "ts_encoder_learning_rate": 4.138775556973406e-06 }, { "epoch": 1.5245901639344264, "grad_norm": 1.715702326116523, "learning_rate": 4.138775556973406e-06, "loss": 0.0553, "step": 579, "ts_encoder_learning_rate": 4.122991035178093e-06 }, { "epoch": 1.5272262954114837, "grad_norm": 1.2802507956632132, "learning_rate": 4.122991035178093e-06, "loss": 0.0646, "step": 580, "ts_encoder_learning_rate": 4.107215526006818e-06 }, { "epoch": 1.529862426888541, "grad_norm": 1.2923645458952004, "learning_rate": 4.107215526006818e-06, "loss": 0.0589, "step": 581, "ts_encoder_learning_rate": 4.091449191577346e-06 }, { "epoch": 1.5324985583655986, "grad_norm": 1.275873998852951, "learning_rate": 4.091449191577346e-06, "loss": 0.051, "step": 582, "ts_encoder_learning_rate": 4.075692193913156e-06 }, { "epoch": 1.5351346898426559, "grad_norm": 1.5143761646518399, "learning_rate": 4.075692193913156e-06, "loss": 0.0422, "step": 583, "ts_encoder_learning_rate": 4.059944694941783e-06 }, { "epoch": 1.5377708213197132, "grad_norm": 1.2547966531700192, "learning_rate": 4.059944694941783e-06, "loss": 0.0645, "step": 584, "ts_encoder_learning_rate": 4.04420685649314e-06 }, { "epoch": 1.5404069527967708, "grad_norm": 1.7483804817058275, "learning_rate": 4.04420685649314e-06, "loss": 0.0446, "step": 585, "ts_encoder_learning_rate": 4.028478840297867e-06 }, { "epoch": 1.5430430842738283, "grad_norm": 1.863160809736345, "learning_rate": 4.028478840297867e-06, "loss": 0.0556, "step": 586, "ts_encoder_learning_rate": 4.012760807985665e-06 }, { "epoch": 1.5456792157508856, "grad_norm": 1.3419436863472527, "learning_rate": 4.012760807985665e-06, "loss": 0.05, "step": 587, "ts_encoder_learning_rate": 3.997052921083637e-06 }, { "epoch": 1.548315347227943, "grad_norm": 1.1909742103770653, "learning_rate": 3.997052921083637e-06, "loss": 0.0445, "step": 588, "ts_encoder_learning_rate": 3.9813553410146225e-06 }, { "epoch": 1.5509514787050005, "grad_norm": 1.0564200056890343, "learning_rate": 3.9813553410146225e-06, "loss": 0.0665, "step": 589, "ts_encoder_learning_rate": 3.965668229095546e-06 }, { "epoch": 1.5535876101820578, "grad_norm": 6.866040081379579, "learning_rate": 3.965668229095546e-06, "loss": 0.054, "step": 590, "ts_encoder_learning_rate": 3.949991746535753e-06 }, { "epoch": 1.5562237416591151, "grad_norm": 1.584656026556296, "learning_rate": 3.949991746535753e-06, "loss": 0.0476, "step": 591, "ts_encoder_learning_rate": 3.934326054435358e-06 }, { "epoch": 1.5588598731361727, "grad_norm": 1.3961799847840088, "learning_rate": 3.934326054435358e-06, "loss": 0.0534, "step": 592, "ts_encoder_learning_rate": 3.918671313783583e-06 }, { "epoch": 1.5614960046132302, "grad_norm": 0.8651238980949738, "learning_rate": 3.918671313783583e-06, "loss": 0.0465, "step": 593, "ts_encoder_learning_rate": 3.903027685457112e-06 }, { "epoch": 1.5641321360902873, "grad_norm": 1.4641882274691738, "learning_rate": 3.903027685457112e-06, "loss": 0.0382, "step": 594, "ts_encoder_learning_rate": 3.887395330218429e-06 }, { "epoch": 1.5667682675673449, "grad_norm": 1.312732235302505, "learning_rate": 3.887395330218429e-06, "loss": 0.0434, "step": 595, "ts_encoder_learning_rate": 3.87177440871417e-06 }, { "epoch": 1.5694043990444024, "grad_norm": 1.082210100085236, "learning_rate": 3.87177440871417e-06, "loss": 0.0455, "step": 596, "ts_encoder_learning_rate": 3.856165081473474e-06 }, { "epoch": 1.5720405305214598, "grad_norm": 1.1636550431612551, "learning_rate": 3.856165081473474e-06, "loss": 0.0521, "step": 597, "ts_encoder_learning_rate": 3.840567508906328e-06 }, { "epoch": 1.574676661998517, "grad_norm": 0.9722535818497965, "learning_rate": 3.840567508906328e-06, "loss": 0.0479, "step": 598, "ts_encoder_learning_rate": 3.824981851301924e-06 }, { "epoch": 1.5773127934755746, "grad_norm": 1.0986855549101457, "learning_rate": 3.824981851301924e-06, "loss": 0.0444, "step": 599, "ts_encoder_learning_rate": 3.809408268827009e-06 }, { "epoch": 1.5799489249526322, "grad_norm": 3.8382139919055662, "learning_rate": 3.809408268827009e-06, "loss": 0.0481, "step": 600, "ts_encoder_learning_rate": 3.7938469215242374e-06 }, { "epoch": 1.5825850564296893, "grad_norm": 0.8061311507375318, "learning_rate": 3.7938469215242374e-06, "loss": 0.0556, "step": 601, "ts_encoder_learning_rate": 3.778297969310529e-06 }, { "epoch": 1.5852211879067468, "grad_norm": 1.1509891000241945, "learning_rate": 3.778297969310529e-06, "loss": 0.0474, "step": 602, "ts_encoder_learning_rate": 3.7627615719754294e-06 }, { "epoch": 1.5878573193838044, "grad_norm": 0.7746498139881461, "learning_rate": 3.7627615719754294e-06, "loss": 0.0495, "step": 603, "ts_encoder_learning_rate": 3.7472378891794537e-06 }, { "epoch": 1.5904934508608617, "grad_norm": 1.1470163275979566, "learning_rate": 3.7472378891794537e-06, "loss": 0.035, "step": 604, "ts_encoder_learning_rate": 3.731727080452464e-06 }, { "epoch": 1.593129582337919, "grad_norm": 1.0156962254575825, "learning_rate": 3.731727080452464e-06, "loss": 0.0511, "step": 605, "ts_encoder_learning_rate": 3.7162293051920185e-06 }, { "epoch": 1.5957657138149766, "grad_norm": 0.6709332984734167, "learning_rate": 3.7162293051920185e-06, "loss": 0.0381, "step": 606, "ts_encoder_learning_rate": 3.7007447226617367e-06 }, { "epoch": 1.5984018452920339, "grad_norm": 1.0740219504440236, "learning_rate": 3.7007447226617367e-06, "loss": 0.0379, "step": 607, "ts_encoder_learning_rate": 3.685273491989661e-06 }, { "epoch": 1.6010379767690912, "grad_norm": 1.0725681809883107, "learning_rate": 3.685273491989661e-06, "loss": 0.0405, "step": 608, "ts_encoder_learning_rate": 3.669815772166625e-06 }, { "epoch": 1.6036741082461488, "grad_norm": 1.0834624903360521, "learning_rate": 3.669815772166625e-06, "loss": 0.0355, "step": 609, "ts_encoder_learning_rate": 3.654371722044616e-06 }, { "epoch": 1.6063102397232063, "grad_norm": 1.3539103412709967, "learning_rate": 3.654371722044616e-06, "loss": 0.0367, "step": 610, "ts_encoder_learning_rate": 3.638941500335145e-06 }, { "epoch": 1.6089463712002636, "grad_norm": 1.73704351662127, "learning_rate": 3.638941500335145e-06, "loss": 0.0357, "step": 611, "ts_encoder_learning_rate": 3.6235252656076138e-06 }, { "epoch": 1.611582502677321, "grad_norm": 1.0546272097596496, "learning_rate": 3.6235252656076138e-06, "loss": 0.0513, "step": 612, "ts_encoder_learning_rate": 3.608123176287685e-06 }, { "epoch": 1.6142186341543785, "grad_norm": 1.1384580684741372, "learning_rate": 3.608123176287685e-06, "loss": 0.0492, "step": 613, "ts_encoder_learning_rate": 3.5927353906556583e-06 }, { "epoch": 1.6168547656314358, "grad_norm": 1.0408680243087836, "learning_rate": 3.5927353906556583e-06, "loss": 0.0493, "step": 614, "ts_encoder_learning_rate": 3.5773620668448384e-06 }, { "epoch": 1.6194908971084931, "grad_norm": 1.2408867536402586, "learning_rate": 3.5773620668448384e-06, "loss": 0.0477, "step": 615, "ts_encoder_learning_rate": 3.562003362839914e-06 }, { "epoch": 1.6221270285855507, "grad_norm": 0.9012275494297888, "learning_rate": 3.562003362839914e-06, "loss": 0.0374, "step": 616, "ts_encoder_learning_rate": 3.5466594364753325e-06 }, { "epoch": 1.6247631600626082, "grad_norm": 1.542557802018261, "learning_rate": 3.5466594364753325e-06, "loss": 0.0563, "step": 617, "ts_encoder_learning_rate": 3.531330445433677e-06 }, { "epoch": 1.6273992915396656, "grad_norm": 1.3600810197945168, "learning_rate": 3.531330445433677e-06, "loss": 0.0508, "step": 618, "ts_encoder_learning_rate": 3.516016547244047e-06 }, { "epoch": 1.6300354230167229, "grad_norm": 1.1139555587082588, "learning_rate": 3.516016547244047e-06, "loss": 0.0383, "step": 619, "ts_encoder_learning_rate": 3.500717899280442e-06 }, { "epoch": 1.6326715544937804, "grad_norm": 1.8530076436307588, "learning_rate": 3.500717899280442e-06, "loss": 0.0444, "step": 620, "ts_encoder_learning_rate": 3.48543465876014e-06 }, { "epoch": 1.6353076859708378, "grad_norm": 0.9664504630825478, "learning_rate": 3.48543465876014e-06, "loss": 0.0564, "step": 621, "ts_encoder_learning_rate": 3.4701669827420827e-06 }, { "epoch": 1.637943817447895, "grad_norm": 2.6957722854149035, "learning_rate": 3.4701669827420827e-06, "loss": 0.0524, "step": 622, "ts_encoder_learning_rate": 3.4549150281252635e-06 }, { "epoch": 1.6405799489249526, "grad_norm": 0.8782042487398507, "learning_rate": 3.4549150281252635e-06, "loss": 0.0345, "step": 623, "ts_encoder_learning_rate": 3.4396789516471152e-06 }, { "epoch": 1.6432160804020102, "grad_norm": 1.455788121211078, "learning_rate": 3.4396789516471152e-06, "loss": 0.0494, "step": 624, "ts_encoder_learning_rate": 3.424458909881897e-06 }, { "epoch": 1.6458522118790675, "grad_norm": 1.1390293600922257, "learning_rate": 3.424458909881897e-06, "loss": 0.0451, "step": 625, "ts_encoder_learning_rate": 3.409255059239086e-06 }, { "epoch": 1.6484883433561248, "grad_norm": 1.0205461298328486, "learning_rate": 3.409255059239086e-06, "loss": 0.0481, "step": 626, "ts_encoder_learning_rate": 3.3940675559617724e-06 }, { "epoch": 1.6511244748331824, "grad_norm": 1.3290866058317974, "learning_rate": 3.3940675559617724e-06, "loss": 0.0379, "step": 627, "ts_encoder_learning_rate": 3.37889655612505e-06 }, { "epoch": 1.6537606063102397, "grad_norm": 1.588969233408268, "learning_rate": 3.37889655612505e-06, "loss": 0.0475, "step": 628, "ts_encoder_learning_rate": 3.363742215634416e-06 }, { "epoch": 1.656396737787297, "grad_norm": 1.5323219144753453, "learning_rate": 3.363742215634416e-06, "loss": 0.0535, "step": 629, "ts_encoder_learning_rate": 3.3486046902241663e-06 }, { "epoch": 1.6590328692643546, "grad_norm": 1.2117312516941228, "learning_rate": 3.3486046902241663e-06, "loss": 0.0456, "step": 630, "ts_encoder_learning_rate": 3.3334841354557923e-06 }, { "epoch": 1.661669000741412, "grad_norm": 1.1542851651812696, "learning_rate": 3.3334841354557923e-06, "loss": 0.0454, "step": 631, "ts_encoder_learning_rate": 3.318380706716392e-06 }, { "epoch": 1.6643051322184694, "grad_norm": 1.7354976924553471, "learning_rate": 3.318380706716392e-06, "loss": 0.0383, "step": 632, "ts_encoder_learning_rate": 3.303294559217063e-06 }, { "epoch": 1.6669412636955268, "grad_norm": 1.839192030247768, "learning_rate": 3.303294559217063e-06, "loss": 0.0452, "step": 633, "ts_encoder_learning_rate": 3.288225847991312e-06 }, { "epoch": 1.6695773951725843, "grad_norm": 1.1075074885202028, "learning_rate": 3.288225847991312e-06, "loss": 0.0381, "step": 634, "ts_encoder_learning_rate": 3.273174727893463e-06 }, { "epoch": 1.6722135266496416, "grad_norm": 1.0570238025860814, "learning_rate": 3.273174727893463e-06, "loss": 0.0494, "step": 635, "ts_encoder_learning_rate": 3.2581413535970597e-06 }, { "epoch": 1.674849658126699, "grad_norm": 1.4464126650801725, "learning_rate": 3.2581413535970597e-06, "loss": 0.0327, "step": 636, "ts_encoder_learning_rate": 3.2431258795932863e-06 }, { "epoch": 1.6774857896037565, "grad_norm": 4.97131370712164, "learning_rate": 3.2431258795932863e-06, "loss": 0.0518, "step": 637, "ts_encoder_learning_rate": 3.228128460189368e-06 }, { "epoch": 1.680121921080814, "grad_norm": 1.3774085333033086, "learning_rate": 3.228128460189368e-06, "loss": 0.0368, "step": 638, "ts_encoder_learning_rate": 3.213149249506997e-06 }, { "epoch": 1.6827580525578714, "grad_norm": 1.5023786656225406, "learning_rate": 3.213149249506997e-06, "loss": 0.0339, "step": 639, "ts_encoder_learning_rate": 3.198188401480734e-06 }, { "epoch": 1.6853941840349287, "grad_norm": 1.3990085291401217, "learning_rate": 3.198188401480734e-06, "loss": 0.0459, "step": 640, "ts_encoder_learning_rate": 3.183246069856443e-06 }, { "epoch": 1.6880303155119862, "grad_norm": 3.826763148327695, "learning_rate": 3.183246069856443e-06, "loss": 0.0384, "step": 641, "ts_encoder_learning_rate": 3.1683224081897e-06 }, { "epoch": 1.6906664469890436, "grad_norm": 3.421422432395587, "learning_rate": 3.1683224081897e-06, "loss": 0.0464, "step": 642, "ts_encoder_learning_rate": 3.1534175698442194e-06 }, { "epoch": 1.6933025784661009, "grad_norm": 3.7657139495021323, "learning_rate": 3.1534175698442194e-06, "loss": 0.0403, "step": 643, "ts_encoder_learning_rate": 3.1385317079902743e-06 }, { "epoch": 1.6959387099431584, "grad_norm": 37.29958018939492, "learning_rate": 3.1385317079902743e-06, "loss": 0.0498, "step": 644, "ts_encoder_learning_rate": 3.12366497560313e-06 }, { "epoch": 1.698574841420216, "grad_norm": 4.349734482528857, "learning_rate": 3.12366497560313e-06, "loss": 0.0505, "step": 645, "ts_encoder_learning_rate": 3.1088175254614616e-06 }, { "epoch": 1.7012109728972733, "grad_norm": 48.489588646859666, "learning_rate": 3.1088175254614616e-06, "loss": 0.0382, "step": 646, "ts_encoder_learning_rate": 3.093989510145792e-06 }, { "epoch": 1.7038471043743306, "grad_norm": 6.78448657127166, "learning_rate": 3.093989510145792e-06, "loss": 0.0508, "step": 647, "ts_encoder_learning_rate": 3.079181082036922e-06 }, { "epoch": 1.7064832358513882, "grad_norm": 3.23674986315901, "learning_rate": 3.079181082036922e-06, "loss": 0.0446, "step": 648, "ts_encoder_learning_rate": 3.0643923933143603e-06 }, { "epoch": 1.7091193673284455, "grad_norm": 1.178341628440116, "learning_rate": 3.0643923933143603e-06, "loss": 0.0516, "step": 649, "ts_encoder_learning_rate": 3.049623595954766e-06 }, { "epoch": 1.7117554988055028, "grad_norm": 8.830888469474749, "learning_rate": 3.049623595954766e-06, "loss": 0.0417, "step": 650, "ts_encoder_learning_rate": 3.0348748417303826e-06 }, { "epoch": 1.7143916302825604, "grad_norm": 1.8437992927036981, "learning_rate": 3.0348748417303826e-06, "loss": 0.035, "step": 651, "ts_encoder_learning_rate": 3.020146282207479e-06 }, { "epoch": 1.717027761759618, "grad_norm": 47.65658053142364, "learning_rate": 3.020146282207479e-06, "loss": 0.0541, "step": 652, "ts_encoder_learning_rate": 3.005438068744792e-06 }, { "epoch": 1.7196638932366752, "grad_norm": 1.1030676770500603, "learning_rate": 3.005438068744792e-06, "loss": 0.0392, "step": 653, "ts_encoder_learning_rate": 2.9907503524919734e-06 }, { "epoch": 1.7223000247137326, "grad_norm": 16.670627222356522, "learning_rate": 2.9907503524919734e-06, "loss": 0.0437, "step": 654, "ts_encoder_learning_rate": 2.976083284388031e-06 }, { "epoch": 1.72493615619079, "grad_norm": 0.9106485328504088, "learning_rate": 2.976083284388031e-06, "loss": 0.049, "step": 655, "ts_encoder_learning_rate": 2.9614370151597837e-06 }, { "epoch": 1.7275722876678474, "grad_norm": 1.3803980461708971, "learning_rate": 2.9614370151597837e-06, "loss": 0.0394, "step": 656, "ts_encoder_learning_rate": 2.9468116953203107e-06 }, { "epoch": 1.7302084191449048, "grad_norm": 0.9915306370393331, "learning_rate": 2.9468116953203107e-06, "loss": 0.0486, "step": 657, "ts_encoder_learning_rate": 2.932207475167398e-06 }, { "epoch": 1.7328445506219623, "grad_norm": 1.6041614843590046, "learning_rate": 2.932207475167398e-06, "loss": 0.0406, "step": 658, "ts_encoder_learning_rate": 2.9176245047820064e-06 }, { "epoch": 1.7354806820990198, "grad_norm": 1.3499944374287252, "learning_rate": 2.9176245047820064e-06, "loss": 0.0403, "step": 659, "ts_encoder_learning_rate": 2.9030629340267165e-06 }, { "epoch": 1.738116813576077, "grad_norm": 0.8297929219673496, "learning_rate": 2.9030629340267165e-06, "loss": 0.0401, "step": 660, "ts_encoder_learning_rate": 2.8885229125442022e-06 }, { "epoch": 1.7407529450531345, "grad_norm": 1.0327297756381613, "learning_rate": 2.8885229125442022e-06, "loss": 0.0347, "step": 661, "ts_encoder_learning_rate": 2.8740045897556766e-06 }, { "epoch": 1.743389076530192, "grad_norm": 1.294243398194849, "learning_rate": 2.8740045897556766e-06, "loss": 0.0382, "step": 662, "ts_encoder_learning_rate": 2.859508114859374e-06 }, { "epoch": 1.7460252080072494, "grad_norm": 9.88848400216231, "learning_rate": 2.859508114859374e-06, "loss": 0.0437, "step": 663, "ts_encoder_learning_rate": 2.845033636828998e-06 }, { "epoch": 1.7486613394843067, "grad_norm": 1.3051859972411644, "learning_rate": 2.845033636828998e-06, "loss": 0.0506, "step": 664, "ts_encoder_learning_rate": 2.83058130441221e-06 }, { "epoch": 1.7512974709613642, "grad_norm": 1.3312709718052185, "learning_rate": 2.83058130441221e-06, "loss": 0.0262, "step": 665, "ts_encoder_learning_rate": 2.8161512661290847e-06 }, { "epoch": 1.7539336024384218, "grad_norm": 1.1582478757709687, "learning_rate": 2.8161512661290847e-06, "loss": 0.0452, "step": 666, "ts_encoder_learning_rate": 2.80174367027059e-06 }, { "epoch": 1.7565697339154789, "grad_norm": 1.2069057310106728, "learning_rate": 2.80174367027059e-06, "loss": 0.0371, "step": 667, "ts_encoder_learning_rate": 2.7873586648970686e-06 }, { "epoch": 1.7592058653925364, "grad_norm": 1.2044293435585265, "learning_rate": 2.7873586648970686e-06, "loss": 0.0443, "step": 668, "ts_encoder_learning_rate": 2.772996397836704e-06 }, { "epoch": 1.761841996869594, "grad_norm": 1.0589558487998179, "learning_rate": 2.772996397836704e-06, "loss": 0.0357, "step": 669, "ts_encoder_learning_rate": 2.7586570166840154e-06 }, { "epoch": 1.7644781283466513, "grad_norm": 1.1371065267074214, "learning_rate": 2.7586570166840154e-06, "loss": 0.0405, "step": 670, "ts_encoder_learning_rate": 2.7443406687983267e-06 }, { "epoch": 1.7671142598237086, "grad_norm": 1.310944403130682, "learning_rate": 2.7443406687983267e-06, "loss": 0.0334, "step": 671, "ts_encoder_learning_rate": 2.7300475013022666e-06 }, { "epoch": 1.7697503913007662, "grad_norm": 1.611743339204791, "learning_rate": 2.7300475013022666e-06, "loss": 0.0486, "step": 672, "ts_encoder_learning_rate": 2.7157776610802416e-06 }, { "epoch": 1.7723865227778235, "grad_norm": 2.1145133691029625, "learning_rate": 2.7157776610802416e-06, "loss": 0.0484, "step": 673, "ts_encoder_learning_rate": 2.7015312947769436e-06 }, { "epoch": 1.7750226542548808, "grad_norm": 1.8011066906635667, "learning_rate": 2.7015312947769436e-06, "loss": 0.0472, "step": 674, "ts_encoder_learning_rate": 2.687308548795825e-06 }, { "epoch": 1.7776587857319384, "grad_norm": 14.318871488368272, "learning_rate": 2.687308548795825e-06, "loss": 0.0397, "step": 675, "ts_encoder_learning_rate": 2.6731095692976073e-06 }, { "epoch": 1.780294917208996, "grad_norm": 0.9235148193782242, "learning_rate": 2.6731095692976073e-06, "loss": 0.03, "step": 676, "ts_encoder_learning_rate": 2.6589345021987725e-06 }, { "epoch": 1.7829310486860532, "grad_norm": 7.592255856926825, "learning_rate": 2.6589345021987725e-06, "loss": 0.0348, "step": 677, "ts_encoder_learning_rate": 2.6447834931700688e-06 }, { "epoch": 1.7855671801631106, "grad_norm": 1.7017607950556997, "learning_rate": 2.6447834931700688e-06, "loss": 0.0429, "step": 678, "ts_encoder_learning_rate": 2.6306566876350072e-06 }, { "epoch": 1.788203311640168, "grad_norm": 1.1703754969173203, "learning_rate": 2.6306566876350072e-06, "loss": 0.0534, "step": 679, "ts_encoder_learning_rate": 2.6165542307683744e-06 }, { "epoch": 1.7908394431172254, "grad_norm": 1.255297983170475, "learning_rate": 2.6165542307683744e-06, "loss": 0.043, "step": 680, "ts_encoder_learning_rate": 2.6024762674947313e-06 }, { "epoch": 1.7934755745942828, "grad_norm": 1.5888017813716921, "learning_rate": 2.6024762674947313e-06, "loss": 0.0343, "step": 681, "ts_encoder_learning_rate": 2.588422942486932e-06 }, { "epoch": 1.7961117060713403, "grad_norm": 1.7718676183781328, "learning_rate": 2.588422942486932e-06, "loss": 0.0486, "step": 682, "ts_encoder_learning_rate": 2.5743944001646394e-06 }, { "epoch": 1.7987478375483978, "grad_norm": 0.9086652386668309, "learning_rate": 2.5743944001646394e-06, "loss": 0.0497, "step": 683, "ts_encoder_learning_rate": 2.5603907846928277e-06 }, { "epoch": 1.8013839690254552, "grad_norm": 1.1341128763629043, "learning_rate": 2.5603907846928277e-06, "loss": 0.0425, "step": 684, "ts_encoder_learning_rate": 2.5464122399803126e-06 }, { "epoch": 1.8040201005025125, "grad_norm": 2.5998127513101315, "learning_rate": 2.5464122399803126e-06, "loss": 0.0574, "step": 685, "ts_encoder_learning_rate": 2.532458909678266e-06 }, { "epoch": 1.80665623197957, "grad_norm": 4.542124494248002, "learning_rate": 2.532458909678266e-06, "loss": 0.0462, "step": 686, "ts_encoder_learning_rate": 2.5185309371787515e-06 }, { "epoch": 1.8092923634566274, "grad_norm": 1.0616553515388587, "learning_rate": 2.5185309371787515e-06, "loss": 0.0446, "step": 687, "ts_encoder_learning_rate": 2.50462846561323e-06 }, { "epoch": 1.8119284949336847, "grad_norm": 0.7064894341667076, "learning_rate": 2.50462846561323e-06, "loss": 0.035, "step": 688, "ts_encoder_learning_rate": 2.4907516378511137e-06 }, { "epoch": 1.8145646264107422, "grad_norm": 0.673676553652552, "learning_rate": 2.4907516378511137e-06, "loss": 0.0456, "step": 689, "ts_encoder_learning_rate": 2.4769005964982718e-06 }, { "epoch": 1.8172007578877998, "grad_norm": 1.0526217413305237, "learning_rate": 2.4769005964982718e-06, "loss": 0.0408, "step": 690, "ts_encoder_learning_rate": 2.46307548389559e-06 }, { "epoch": 1.819836889364857, "grad_norm": 0.8741534711615824, "learning_rate": 2.46307548389559e-06, "loss": 0.0369, "step": 691, "ts_encoder_learning_rate": 2.4492764421174863e-06 }, { "epoch": 1.8224730208419144, "grad_norm": 1.558424208864291, "learning_rate": 2.4492764421174863e-06, "loss": 0.0403, "step": 692, "ts_encoder_learning_rate": 2.43550361297047e-06 }, { "epoch": 1.825109152318972, "grad_norm": 0.8337291977316967, "learning_rate": 2.43550361297047e-06, "loss": 0.0352, "step": 693, "ts_encoder_learning_rate": 2.4217571379916673e-06 }, { "epoch": 1.8277452837960293, "grad_norm": 1.017616972872044, "learning_rate": 2.4217571379916673e-06, "loss": 0.0552, "step": 694, "ts_encoder_learning_rate": 2.408037158447375e-06 }, { "epoch": 1.8303814152730866, "grad_norm": 0.7844845835056601, "learning_rate": 2.408037158447375e-06, "loss": 0.0435, "step": 695, "ts_encoder_learning_rate": 2.394343815331616e-06 }, { "epoch": 1.8330175467501442, "grad_norm": 1.1004307198779026, "learning_rate": 2.394343815331616e-06, "loss": 0.0314, "step": 696, "ts_encoder_learning_rate": 2.3806772493646725e-06 }, { "epoch": 1.8356536782272017, "grad_norm": 1.1688405779568678, "learning_rate": 2.3806772493646725e-06, "loss": 0.0269, "step": 697, "ts_encoder_learning_rate": 2.3670376009916596e-06 }, { "epoch": 1.838289809704259, "grad_norm": 1.0701865886101891, "learning_rate": 2.3670376009916596e-06, "loss": 0.044, "step": 698, "ts_encoder_learning_rate": 2.353425010381063e-06 }, { "epoch": 1.8409259411813164, "grad_norm": 1.360235946458034, "learning_rate": 2.353425010381063e-06, "loss": 0.0384, "step": 699, "ts_encoder_learning_rate": 2.339839617423318e-06 }, { "epoch": 1.843562072658374, "grad_norm": 0.8062724598386658, "learning_rate": 2.339839617423318e-06, "loss": 0.0331, "step": 700, "ts_encoder_learning_rate": 2.3262815617293517e-06 }, { "epoch": 1.8461982041354312, "grad_norm": 1.1455549172176551, "learning_rate": 2.3262815617293517e-06, "loss": 0.0542, "step": 701, "ts_encoder_learning_rate": 2.31275098262917e-06 }, { "epoch": 1.8488343356124886, "grad_norm": 0.703436998992223, "learning_rate": 2.31275098262917e-06, "loss": 0.0313, "step": 702, "ts_encoder_learning_rate": 2.2992480191704003e-06 }, { "epoch": 1.851470467089546, "grad_norm": 0.8764547755682153, "learning_rate": 2.2992480191704003e-06, "loss": 0.0333, "step": 703, "ts_encoder_learning_rate": 2.28577281011689e-06 }, { "epoch": 1.8541065985666036, "grad_norm": 1.23716774475027, "learning_rate": 2.28577281011689e-06, "loss": 0.0301, "step": 704, "ts_encoder_learning_rate": 2.272325493947257e-06 }, { "epoch": 1.856742730043661, "grad_norm": 11.281594612889576, "learning_rate": 2.272325493947257e-06, "loss": 0.0312, "step": 705, "ts_encoder_learning_rate": 2.2589062088534837e-06 }, { "epoch": 1.8593788615207183, "grad_norm": 0.9987575070457472, "learning_rate": 2.2589062088534837e-06, "loss": 0.0417, "step": 706, "ts_encoder_learning_rate": 2.245515092739488e-06 }, { "epoch": 1.8620149929977758, "grad_norm": 1.0116065835025165, "learning_rate": 2.245515092739488e-06, "loss": 0.0496, "step": 707, "ts_encoder_learning_rate": 2.2321522832197036e-06 }, { "epoch": 1.8646511244748332, "grad_norm": 1.3806751773781658, "learning_rate": 2.2321522832197036e-06, "loss": 0.039, "step": 708, "ts_encoder_learning_rate": 2.2188179176176767e-06 }, { "epoch": 1.8672872559518905, "grad_norm": 0.9901586798069452, "learning_rate": 2.2188179176176767e-06, "loss": 0.041, "step": 709, "ts_encoder_learning_rate": 2.2055121329646416e-06 }, { "epoch": 1.869923387428948, "grad_norm": 0.9660208947927406, "learning_rate": 2.2055121329646416e-06, "loss": 0.0263, "step": 710, "ts_encoder_learning_rate": 2.1922350659981262e-06 }, { "epoch": 1.8725595189060056, "grad_norm": 0.9148657839084163, "learning_rate": 2.1922350659981262e-06, "loss": 0.0382, "step": 711, "ts_encoder_learning_rate": 2.178986853160535e-06 }, { "epoch": 1.875195650383063, "grad_norm": 0.9900246938077546, "learning_rate": 2.178986853160535e-06, "loss": 0.0482, "step": 712, "ts_encoder_learning_rate": 2.165767630597752e-06 }, { "epoch": 1.8778317818601202, "grad_norm": 0.995911258816107, "learning_rate": 2.165767630597752e-06, "loss": 0.0383, "step": 713, "ts_encoder_learning_rate": 2.1525775341577404e-06 }, { "epoch": 1.8804679133371778, "grad_norm": 0.9529466866489111, "learning_rate": 2.1525775341577404e-06, "loss": 0.0336, "step": 714, "ts_encoder_learning_rate": 2.139416699389153e-06 }, { "epoch": 1.883104044814235, "grad_norm": 0.6476905341796513, "learning_rate": 2.139416699389153e-06, "loss": 0.0274, "step": 715, "ts_encoder_learning_rate": 2.126285261539926e-06 }, { "epoch": 1.8857401762912924, "grad_norm": 1.1085422585469236, "learning_rate": 2.126285261539926e-06, "loss": 0.0361, "step": 716, "ts_encoder_learning_rate": 2.1131833555559037e-06 }, { "epoch": 1.88837630776835, "grad_norm": 1.0042700649998133, "learning_rate": 2.1131833555559037e-06, "loss": 0.0288, "step": 717, "ts_encoder_learning_rate": 2.1001111160794387e-06 }, { "epoch": 1.8910124392454075, "grad_norm": 0.9521264639042284, "learning_rate": 2.1001111160794387e-06, "loss": 0.0421, "step": 718, "ts_encoder_learning_rate": 2.08706867744802e-06 }, { "epoch": 1.8936485707224646, "grad_norm": 0.931714414769153, "learning_rate": 2.08706867744802e-06, "loss": 0.0366, "step": 719, "ts_encoder_learning_rate": 2.074056173692881e-06 }, { "epoch": 1.8962847021995222, "grad_norm": 0.754100564730907, "learning_rate": 2.074056173692881e-06, "loss": 0.0303, "step": 720, "ts_encoder_learning_rate": 2.061073738537635e-06 }, { "epoch": 1.8989208336765797, "grad_norm": 0.7479604079430805, "learning_rate": 2.061073738537635e-06, "loss": 0.0466, "step": 721, "ts_encoder_learning_rate": 2.0481215053968874e-06 }, { "epoch": 1.901556965153637, "grad_norm": 0.7804885190676818, "learning_rate": 2.0481215053968874e-06, "loss": 0.0295, "step": 722, "ts_encoder_learning_rate": 2.0351996073748713e-06 }, { "epoch": 1.9041930966306944, "grad_norm": 0.9239500456881254, "learning_rate": 2.0351996073748713e-06, "loss": 0.0418, "step": 723, "ts_encoder_learning_rate": 2.0223081772640867e-06 }, { "epoch": 1.906829228107752, "grad_norm": 1.0393746665064, "learning_rate": 2.0223081772640867e-06, "loss": 0.0352, "step": 724, "ts_encoder_learning_rate": 2.00944734754392e-06 }, { "epoch": 1.9094653595848095, "grad_norm": 0.7757057471334936, "learning_rate": 2.00944734754392e-06, "loss": 0.0334, "step": 725, "ts_encoder_learning_rate": 1.9966172503792986e-06 }, { "epoch": 1.9121014910618666, "grad_norm": 0.7982941003952496, "learning_rate": 1.9966172503792986e-06, "loss": 0.0341, "step": 726, "ts_encoder_learning_rate": 1.983818017619318e-06 }, { "epoch": 1.914737622538924, "grad_norm": 1.0301075039556638, "learning_rate": 1.983818017619318e-06, "loss": 0.0407, "step": 727, "ts_encoder_learning_rate": 1.971049780795901e-06 }, { "epoch": 1.9173737540159816, "grad_norm": 0.9027944740938065, "learning_rate": 1.971049780795901e-06, "loss": 0.0325, "step": 728, "ts_encoder_learning_rate": 1.9583126711224342e-06 }, { "epoch": 1.920009885493039, "grad_norm": 1.0093899617917834, "learning_rate": 1.9583126711224342e-06, "loss": 0.0343, "step": 729, "ts_encoder_learning_rate": 1.945606819492429e-06 }, { "epoch": 1.9226460169700963, "grad_norm": 1.6230911664954315, "learning_rate": 1.945606819492429e-06, "loss": 0.0467, "step": 730, "ts_encoder_learning_rate": 1.932932356478168e-06 }, { "epoch": 1.9252821484471538, "grad_norm": 1.169611902284653, "learning_rate": 1.932932356478168e-06, "loss": 0.0346, "step": 731, "ts_encoder_learning_rate": 1.9202894123293677e-06 }, { "epoch": 1.9279182799242112, "grad_norm": 0.8882211427092304, "learning_rate": 1.9202894123293677e-06, "loss": 0.0271, "step": 732, "ts_encoder_learning_rate": 1.9076781169718426e-06 }, { "epoch": 1.9305544114012685, "grad_norm": 0.7788137841169445, "learning_rate": 1.9076781169718426e-06, "loss": 0.0317, "step": 733, "ts_encoder_learning_rate": 1.895098600006164e-06 }, { "epoch": 1.933190542878326, "grad_norm": 0.9177124549747325, "learning_rate": 1.895098600006164e-06, "loss": 0.0294, "step": 734, "ts_encoder_learning_rate": 1.8825509907063328e-06 }, { "epoch": 1.9358266743553836, "grad_norm": 1.1488053411805068, "learning_rate": 1.8825509907063328e-06, "loss": 0.0319, "step": 735, "ts_encoder_learning_rate": 1.8700354180184465e-06 }, { "epoch": 1.938462805832441, "grad_norm": 0.9810496010092534, "learning_rate": 1.8700354180184465e-06, "loss": 0.0348, "step": 736, "ts_encoder_learning_rate": 1.857552010559382e-06 }, { "epoch": 1.9410989373094982, "grad_norm": 0.5266455663863897, "learning_rate": 1.857552010559382e-06, "loss": 0.0166, "step": 737, "ts_encoder_learning_rate": 1.8451008966154622e-06 }, { "epoch": 1.9437350687865558, "grad_norm": 0.6289054316907776, "learning_rate": 1.8451008966154622e-06, "loss": 0.0296, "step": 738, "ts_encoder_learning_rate": 1.8326822041411524e-06 }, { "epoch": 1.946371200263613, "grad_norm": 0.9572882282852405, "learning_rate": 1.8326822041411524e-06, "loss": 0.0445, "step": 739, "ts_encoder_learning_rate": 1.8202960607577246e-06 }, { "epoch": 1.9490073317406704, "grad_norm": 1.314826995873295, "learning_rate": 1.8202960607577246e-06, "loss": 0.0348, "step": 740, "ts_encoder_learning_rate": 1.8079425937519729e-06 }, { "epoch": 1.951643463217728, "grad_norm": 0.996575258797314, "learning_rate": 1.8079425937519729e-06, "loss": 0.0378, "step": 741, "ts_encoder_learning_rate": 1.7956219300748796e-06 }, { "epoch": 1.9542795946947855, "grad_norm": 1.0607844107395987, "learning_rate": 1.7956219300748796e-06, "loss": 0.0329, "step": 742, "ts_encoder_learning_rate": 1.7833341963403312e-06 }, { "epoch": 1.9569157261718428, "grad_norm": 0.9094930783526255, "learning_rate": 1.7833341963403312e-06, "loss": 0.0473, "step": 743, "ts_encoder_learning_rate": 1.771079518823799e-06 }, { "epoch": 1.9595518576489002, "grad_norm": 1.1039356442587034, "learning_rate": 1.771079518823799e-06, "loss": 0.0422, "step": 744, "ts_encoder_learning_rate": 1.7588580234610592e-06 }, { "epoch": 1.9621879891259577, "grad_norm": 0.8174282643803666, "learning_rate": 1.7588580234610592e-06, "loss": 0.0406, "step": 745, "ts_encoder_learning_rate": 1.7466698358468825e-06 }, { "epoch": 1.964824120603015, "grad_norm": 0.7214652835866359, "learning_rate": 1.7466698358468825e-06, "loss": 0.0326, "step": 746, "ts_encoder_learning_rate": 1.7345150812337564e-06 }, { "epoch": 1.9674602520800724, "grad_norm": 1.2667444333402988, "learning_rate": 1.7345150812337564e-06, "loss": 0.0415, "step": 747, "ts_encoder_learning_rate": 1.7223938845305932e-06 }, { "epoch": 1.97009638355713, "grad_norm": 0.9773679348538253, "learning_rate": 1.7223938845305932e-06, "loss": 0.0369, "step": 748, "ts_encoder_learning_rate": 1.7103063703014372e-06 }, { "epoch": 1.9727325150341875, "grad_norm": 1.1325567784328214, "learning_rate": 1.7103063703014372e-06, "loss": 0.0364, "step": 749, "ts_encoder_learning_rate": 1.6982526627642043e-06 }, { "epoch": 1.9753686465112448, "grad_norm": 0.8996954549073614, "learning_rate": 1.6982526627642043e-06, "loss": 0.0329, "step": 750, "ts_encoder_learning_rate": 1.6862328857893856e-06 }, { "epoch": 1.978004777988302, "grad_norm": 0.7139793070917391, "learning_rate": 1.6862328857893856e-06, "loss": 0.036, "step": 751, "ts_encoder_learning_rate": 1.6742471628987894e-06 }, { "epoch": 1.9806409094653596, "grad_norm": 0.7560937228601913, "learning_rate": 1.6742471628987894e-06, "loss": 0.0334, "step": 752, "ts_encoder_learning_rate": 1.6622956172642601e-06 }, { "epoch": 1.983277040942417, "grad_norm": 1.6103163078678424, "learning_rate": 1.6622956172642601e-06, "loss": 0.0371, "step": 753, "ts_encoder_learning_rate": 1.6503783717064247e-06 }, { "epoch": 1.9859131724194743, "grad_norm": 1.6781996659316394, "learning_rate": 1.6503783717064247e-06, "loss": 0.0394, "step": 754, "ts_encoder_learning_rate": 1.6384955486934157e-06 }, { "epoch": 1.9885493038965318, "grad_norm": 1.69153268173132, "learning_rate": 1.6384955486934157e-06, "loss": 0.0311, "step": 755, "ts_encoder_learning_rate": 1.6266472703396286e-06 }, { "epoch": 1.9911854353735894, "grad_norm": 0.7719824022746361, "learning_rate": 1.6266472703396286e-06, "loss": 0.0262, "step": 756, "ts_encoder_learning_rate": 1.6148336584044539e-06 }, { "epoch": 1.9938215668506467, "grad_norm": 1.3733252437657573, "learning_rate": 1.6148336584044539e-06, "loss": 0.0455, "step": 757, "ts_encoder_learning_rate": 1.6030548342910302e-06 }, { "epoch": 1.996457698327704, "grad_norm": 0.9175183384074306, "learning_rate": 1.6030548342910302e-06, "loss": 0.0322, "step": 758, "ts_encoder_learning_rate": 1.5913109190450033e-06 }, { "epoch": 1.9990938298047616, "grad_norm": 1.1099095764607132, "learning_rate": 1.5913109190450033e-06, "loss": 0.0394, "step": 759, "ts_encoder_learning_rate": 1.5796020333532696e-06 }, { "epoch": 2.0, "grad_norm": 1.1099095764607132, "learning_rate": 1.5796020333532696e-06, "loss": 0.0092, "step": 760, "ts_encoder_learning_rate": 1.567928297542749e-06 }, { "epoch": 2.0026361314770575, "grad_norm": 0.9713547183808008, "learning_rate": 1.567928297542749e-06, "loss": 0.023, "step": 761, "ts_encoder_learning_rate": 1.5562898315791354e-06 }, { "epoch": 2.0052722629541146, "grad_norm": 0.7009540897546592, "learning_rate": 1.5562898315791354e-06, "loss": 0.0244, "step": 762, "ts_encoder_learning_rate": 1.544686755065677e-06 }, { "epoch": 2.007908394431172, "grad_norm": 0.6101415210273791, "learning_rate": 1.544686755065677e-06, "loss": 0.0277, "step": 763, "ts_encoder_learning_rate": 1.5331191872419349e-06 }, { "epoch": 2.0105445259082297, "grad_norm": 1.326219222691662, "learning_rate": 1.5331191872419349e-06, "loss": 0.0299, "step": 764, "ts_encoder_learning_rate": 1.5215872469825682e-06 }, { "epoch": 2.0131806573852873, "grad_norm": 1.0899654240221532, "learning_rate": 1.5215872469825682e-06, "loss": 0.0272, "step": 765, "ts_encoder_learning_rate": 1.510091052796105e-06 }, { "epoch": 2.0158167888623444, "grad_norm": 1.087434101218195, "learning_rate": 1.510091052796105e-06, "loss": 0.0369, "step": 766, "ts_encoder_learning_rate": 1.4986307228237268e-06 }, { "epoch": 2.018452920339402, "grad_norm": 0.7445449957284473, "learning_rate": 1.4986307228237268e-06, "loss": 0.0281, "step": 767, "ts_encoder_learning_rate": 1.4872063748380544e-06 }, { "epoch": 2.0210890518164595, "grad_norm": 0.7216822829913329, "learning_rate": 1.4872063748380544e-06, "loss": 0.0268, "step": 768, "ts_encoder_learning_rate": 1.4758181262419425e-06 }, { "epoch": 2.0237251832935166, "grad_norm": 0.6931261426498003, "learning_rate": 1.4758181262419425e-06, "loss": 0.0282, "step": 769, "ts_encoder_learning_rate": 1.4644660940672628e-06 }, { "epoch": 2.026361314770574, "grad_norm": 0.9133884937103188, "learning_rate": 1.4644660940672628e-06, "loss": 0.0322, "step": 770, "ts_encoder_learning_rate": 1.4531503949737107e-06 }, { "epoch": 2.0289974462476317, "grad_norm": 1.0086856147825554, "learning_rate": 1.4531503949737107e-06, "loss": 0.0363, "step": 771, "ts_encoder_learning_rate": 1.4418711452476048e-06 }, { "epoch": 2.031633577724689, "grad_norm": 1.6070882260232144, "learning_rate": 1.4418711452476048e-06, "loss": 0.0249, "step": 772, "ts_encoder_learning_rate": 1.4306284608006837e-06 }, { "epoch": 2.0342697092017463, "grad_norm": 0.7314536247624556, "learning_rate": 1.4306284608006837e-06, "loss": 0.0274, "step": 773, "ts_encoder_learning_rate": 1.4194224571689286e-06 }, { "epoch": 2.036905840678804, "grad_norm": 0.9803513438084768, "learning_rate": 1.4194224571689286e-06, "loss": 0.0262, "step": 774, "ts_encoder_learning_rate": 1.4082532495113627e-06 }, { "epoch": 2.0395419721558614, "grad_norm": 0.8684478482698204, "learning_rate": 1.4082532495113627e-06, "loss": 0.0347, "step": 775, "ts_encoder_learning_rate": 1.3971209526088764e-06 }, { "epoch": 2.0421781036329185, "grad_norm": 1.6463822697507045, "learning_rate": 1.3971209526088764e-06, "loss": 0.0199, "step": 776, "ts_encoder_learning_rate": 1.3860256808630429e-06 }, { "epoch": 2.044814235109976, "grad_norm": 0.6542575533228376, "learning_rate": 1.3860256808630429e-06, "loss": 0.0162, "step": 777, "ts_encoder_learning_rate": 1.3749675482949487e-06 }, { "epoch": 2.0474503665870336, "grad_norm": 0.8414387552707323, "learning_rate": 1.3749675482949487e-06, "loss": 0.0194, "step": 778, "ts_encoder_learning_rate": 1.3639466685440133e-06 }, { "epoch": 2.050086498064091, "grad_norm": 0.8702132467221131, "learning_rate": 1.3639466685440133e-06, "loss": 0.0225, "step": 779, "ts_encoder_learning_rate": 1.3529631548668298e-06 }, { "epoch": 2.0527226295411483, "grad_norm": 0.7835222402353583, "learning_rate": 1.3529631548668298e-06, "loss": 0.0254, "step": 780, "ts_encoder_learning_rate": 1.3420171201359933e-06 }, { "epoch": 2.055358761018206, "grad_norm": 0.8617901557793036, "learning_rate": 1.3420171201359933e-06, "loss": 0.0214, "step": 781, "ts_encoder_learning_rate": 1.331108676838948e-06 }, { "epoch": 2.0579948924952634, "grad_norm": 0.8593905981383287, "learning_rate": 1.331108676838948e-06, "loss": 0.0215, "step": 782, "ts_encoder_learning_rate": 1.3202379370768254e-06 }, { "epoch": 2.0606310239723205, "grad_norm": 0.9813541929977725, "learning_rate": 1.3202379370768254e-06, "loss": 0.0244, "step": 783, "ts_encoder_learning_rate": 1.3094050125632973e-06 }, { "epoch": 2.063267155449378, "grad_norm": 0.973576019815649, "learning_rate": 1.3094050125632973e-06, "loss": 0.0267, "step": 784, "ts_encoder_learning_rate": 1.298610014623423e-06 }, { "epoch": 2.0659032869264355, "grad_norm": 0.6999339833328826, "learning_rate": 1.298610014623423e-06, "loss": 0.025, "step": 785, "ts_encoder_learning_rate": 1.2878530541925077e-06 }, { "epoch": 2.0685394184034926, "grad_norm": 1.0152957492148167, "learning_rate": 1.2878530541925077e-06, "loss": 0.0229, "step": 786, "ts_encoder_learning_rate": 1.2771342418149658e-06 }, { "epoch": 2.07117554988055, "grad_norm": 0.8698513957975991, "learning_rate": 1.2771342418149658e-06, "loss": 0.0265, "step": 787, "ts_encoder_learning_rate": 1.2664536876431755e-06 }, { "epoch": 2.0738116813576077, "grad_norm": 1.52591637899822, "learning_rate": 1.2664536876431755e-06, "loss": 0.0301, "step": 788, "ts_encoder_learning_rate": 1.2558115014363592e-06 }, { "epoch": 2.0764478128346653, "grad_norm": 0.9350945862866684, "learning_rate": 1.2558115014363592e-06, "loss": 0.0302, "step": 789, "ts_encoder_learning_rate": 1.2452077925594435e-06 }, { "epoch": 2.0790839443117224, "grad_norm": 0.8787004897897062, "learning_rate": 1.2452077925594435e-06, "loss": 0.0231, "step": 790, "ts_encoder_learning_rate": 1.234642669981946e-06 }, { "epoch": 2.08172007578878, "grad_norm": 0.6989519656800383, "learning_rate": 1.234642669981946e-06, "loss": 0.0277, "step": 791, "ts_encoder_learning_rate": 1.2241162422768444e-06 }, { "epoch": 2.0843562072658375, "grad_norm": 0.6816146317530842, "learning_rate": 1.2241162422768444e-06, "loss": 0.018, "step": 792, "ts_encoder_learning_rate": 1.2136286176194744e-06 }, { "epoch": 2.0869923387428946, "grad_norm": 0.9698620591378605, "learning_rate": 1.2136286176194744e-06, "loss": 0.0299, "step": 793, "ts_encoder_learning_rate": 1.203179903786401e-06 }, { "epoch": 2.089628470219952, "grad_norm": 1.0709130550444075, "learning_rate": 1.203179903786401e-06, "loss": 0.0267, "step": 794, "ts_encoder_learning_rate": 1.1927702081543279e-06 }, { "epoch": 2.0922646016970097, "grad_norm": 0.8147652241024573, "learning_rate": 1.1927702081543279e-06, "loss": 0.026, "step": 795, "ts_encoder_learning_rate": 1.1823996376989849e-06 }, { "epoch": 2.094900733174067, "grad_norm": 0.9483527494174168, "learning_rate": 1.1823996376989849e-06, "loss": 0.0363, "step": 796, "ts_encoder_learning_rate": 1.1720682989940264e-06 }, { "epoch": 2.0975368646511243, "grad_norm": 1.2061966993790179, "learning_rate": 1.1720682989940264e-06, "loss": 0.0204, "step": 797, "ts_encoder_learning_rate": 1.1617762982099446e-06 }, { "epoch": 2.100172996128182, "grad_norm": 0.6048278244859194, "learning_rate": 1.1617762982099446e-06, "loss": 0.025, "step": 798, "ts_encoder_learning_rate": 1.1515237411129698e-06 }, { "epoch": 2.1028091276052394, "grad_norm": 0.9014144838221269, "learning_rate": 1.1515237411129698e-06, "loss": 0.0231, "step": 799, "ts_encoder_learning_rate": 1.141310733063991e-06 }, { "epoch": 2.1054452590822965, "grad_norm": 0.779763420400092, "learning_rate": 1.141310733063991e-06, "loss": 0.0227, "step": 800, "ts_encoder_learning_rate": 1.1311373790174656e-06 }, { "epoch": 2.108081390559354, "grad_norm": 1.1737340769983258, "learning_rate": 1.1311373790174656e-06, "loss": 0.0204, "step": 801, "ts_encoder_learning_rate": 1.1210037835203508e-06 }, { "epoch": 2.1107175220364116, "grad_norm": 0.9202975123489521, "learning_rate": 1.1210037835203508e-06, "loss": 0.023, "step": 802, "ts_encoder_learning_rate": 1.1109100507110133e-06 }, { "epoch": 2.113353653513469, "grad_norm": 1.612705002454156, "learning_rate": 1.1109100507110133e-06, "loss": 0.0165, "step": 803, "ts_encoder_learning_rate": 1.1008562843181796e-06 }, { "epoch": 2.1159897849905263, "grad_norm": 0.6460378715478632, "learning_rate": 1.1008562843181796e-06, "loss": 0.0315, "step": 804, "ts_encoder_learning_rate": 1.0908425876598512e-06 }, { "epoch": 2.118625916467584, "grad_norm": 0.9987512843535336, "learning_rate": 1.0908425876598512e-06, "loss": 0.0295, "step": 805, "ts_encoder_learning_rate": 1.0808690636422587e-06 }, { "epoch": 2.1212620479446413, "grad_norm": 1.0149200333623514, "learning_rate": 1.0808690636422587e-06, "loss": 0.0211, "step": 806, "ts_encoder_learning_rate": 1.0709358147587883e-06 }, { "epoch": 2.1238981794216985, "grad_norm": 0.9119696321626057, "learning_rate": 1.0709358147587883e-06, "loss": 0.02, "step": 807, "ts_encoder_learning_rate": 1.0610429430889451e-06 }, { "epoch": 2.126534310898756, "grad_norm": 1.0352913040595793, "learning_rate": 1.0610429430889451e-06, "loss": 0.0275, "step": 808, "ts_encoder_learning_rate": 1.0511905502972885e-06 }, { "epoch": 2.1291704423758135, "grad_norm": 1.0733762586021238, "learning_rate": 1.0511905502972885e-06, "loss": 0.0183, "step": 809, "ts_encoder_learning_rate": 1.041378737632402e-06 }, { "epoch": 2.131806573852871, "grad_norm": 1.1892591837270536, "learning_rate": 1.041378737632402e-06, "loss": 0.0264, "step": 810, "ts_encoder_learning_rate": 1.031607605925839e-06 }, { "epoch": 2.134442705329928, "grad_norm": 0.886404626398071, "learning_rate": 1.031607605925839e-06, "loss": 0.0183, "step": 811, "ts_encoder_learning_rate": 1.0218772555910955e-06 }, { "epoch": 2.1370788368069857, "grad_norm": 0.7677446140505582, "learning_rate": 1.0218772555910955e-06, "loss": 0.0174, "step": 812, "ts_encoder_learning_rate": 1.0121877866225783e-06 }, { "epoch": 2.1397149682840433, "grad_norm": 0.9647219206963211, "learning_rate": 1.0121877866225783e-06, "loss": 0.0312, "step": 813, "ts_encoder_learning_rate": 1.0025392985945703e-06 }, { "epoch": 2.1423510997611004, "grad_norm": 1.18116857549236, "learning_rate": 1.0025392985945703e-06, "loss": 0.0328, "step": 814, "ts_encoder_learning_rate": 9.929318906602176e-07 }, { "epoch": 2.144987231238158, "grad_norm": 1.2020647866794596, "learning_rate": 9.929318906602176e-07, "loss": 0.0248, "step": 815, "ts_encoder_learning_rate": 9.833656615504978e-07 }, { "epoch": 2.1476233627152155, "grad_norm": 0.8370574190670566, "learning_rate": 9.833656615504978e-07, "loss": 0.0146, "step": 816, "ts_encoder_learning_rate": 9.738407095732195e-07 }, { "epoch": 2.150259494192273, "grad_norm": 1.0299221864599144, "learning_rate": 9.738407095732195e-07, "loss": 0.0217, "step": 817, "ts_encoder_learning_rate": 9.643571326119982e-07 }, { "epoch": 2.15289562566933, "grad_norm": 0.8018415484592077, "learning_rate": 9.643571326119982e-07, "loss": 0.0139, "step": 818, "ts_encoder_learning_rate": 9.549150281252633e-07 }, { "epoch": 2.1555317571463877, "grad_norm": 0.5028892166090512, "learning_rate": 9.549150281252633e-07, "loss": 0.0197, "step": 819, "ts_encoder_learning_rate": 9.455144931452459e-07 }, { "epoch": 2.158167888623445, "grad_norm": 0.6355920739705047, "learning_rate": 9.455144931452459e-07, "loss": 0.0251, "step": 820, "ts_encoder_learning_rate": 9.361556242769871e-07 }, { "epoch": 2.1608040201005023, "grad_norm": 1.1083085846789678, "learning_rate": 9.361556242769871e-07, "loss": 0.02, "step": 821, "ts_encoder_learning_rate": 9.26838517697346e-07 }, { "epoch": 2.16344015157756, "grad_norm": 0.9876632473531536, "learning_rate": 9.26838517697346e-07, "loss": 0.0351, "step": 822, "ts_encoder_learning_rate": 9.175632691540065e-07 }, { "epoch": 2.1660762830546174, "grad_norm": 2.2693420737855523, "learning_rate": 9.175632691540065e-07, "loss": 0.0276, "step": 823, "ts_encoder_learning_rate": 9.083299739645007e-07 }, { "epoch": 2.168712414531675, "grad_norm": 23.226392431703353, "learning_rate": 9.083299739645007e-07, "loss": 0.0242, "step": 824, "ts_encoder_learning_rate": 8.991387270152202e-07 }, { "epoch": 2.171348546008732, "grad_norm": 0.75772536779788, "learning_rate": 8.991387270152202e-07, "loss": 0.0178, "step": 825, "ts_encoder_learning_rate": 8.899896227604509e-07 }, { "epoch": 2.1739846774857896, "grad_norm": 0.7879892794840004, "learning_rate": 8.899896227604509e-07, "loss": 0.021, "step": 826, "ts_encoder_learning_rate": 8.808827552213917e-07 }, { "epoch": 2.176620808962847, "grad_norm": 0.6285382943818703, "learning_rate": 8.808827552213917e-07, "loss": 0.0275, "step": 827, "ts_encoder_learning_rate": 8.718182179851998e-07 }, { "epoch": 2.1792569404399043, "grad_norm": 0.8438059272532128, "learning_rate": 8.718182179851998e-07, "loss": 0.0222, "step": 828, "ts_encoder_learning_rate": 8.627961042040183e-07 }, { "epoch": 2.181893071916962, "grad_norm": 0.8841129842435451, "learning_rate": 8.627961042040183e-07, "loss": 0.0263, "step": 829, "ts_encoder_learning_rate": 8.538165065940263e-07 }, { "epoch": 2.1845292033940193, "grad_norm": 1.1647962750768701, "learning_rate": 8.538165065940263e-07, "loss": 0.0287, "step": 830, "ts_encoder_learning_rate": 8.448795174344803e-07 }, { "epoch": 2.187165334871077, "grad_norm": 0.6223444639742729, "learning_rate": 8.448795174344803e-07, "loss": 0.0182, "step": 831, "ts_encoder_learning_rate": 8.359852285667752e-07 }, { "epoch": 2.189801466348134, "grad_norm": 0.848108350576564, "learning_rate": 8.359852285667752e-07, "loss": 0.0278, "step": 832, "ts_encoder_learning_rate": 8.271337313934869e-07 }, { "epoch": 2.1924375978251915, "grad_norm": 0.6976201249959373, "learning_rate": 8.271337313934869e-07, "loss": 0.0254, "step": 833, "ts_encoder_learning_rate": 8.183251168774476e-07 }, { "epoch": 2.195073729302249, "grad_norm": 0.839401112904285, "learning_rate": 8.183251168774476e-07, "loss": 0.0146, "step": 834, "ts_encoder_learning_rate": 8.095594755407971e-07 }, { "epoch": 2.197709860779306, "grad_norm": 1.2538696240617628, "learning_rate": 8.095594755407971e-07, "loss": 0.0247, "step": 835, "ts_encoder_learning_rate": 8.008368974640634e-07 }, { "epoch": 2.2003459922563637, "grad_norm": 2.0495039413289633, "learning_rate": 8.008368974640634e-07, "loss": 0.0259, "step": 836, "ts_encoder_learning_rate": 7.921574722852343e-07 }, { "epoch": 2.2029821237334213, "grad_norm": 0.8059355568372082, "learning_rate": 7.921574722852343e-07, "loss": 0.0254, "step": 837, "ts_encoder_learning_rate": 7.835212891988292e-07 }, { "epoch": 2.205618255210479, "grad_norm": 1.1603161094730907, "learning_rate": 7.835212891988292e-07, "loss": 0.0222, "step": 838, "ts_encoder_learning_rate": 7.749284369549954e-07 }, { "epoch": 2.208254386687536, "grad_norm": 0.642346193891705, "learning_rate": 7.749284369549954e-07, "loss": 0.0173, "step": 839, "ts_encoder_learning_rate": 7.663790038585794e-07 }, { "epoch": 2.2108905181645935, "grad_norm": 0.969152855410992, "learning_rate": 7.663790038585794e-07, "loss": 0.0195, "step": 840, "ts_encoder_learning_rate": 7.578730777682386e-07 }, { "epoch": 2.213526649641651, "grad_norm": 0.6006548208059819, "learning_rate": 7.578730777682386e-07, "loss": 0.0187, "step": 841, "ts_encoder_learning_rate": 7.494107460955207e-07 }, { "epoch": 2.216162781118708, "grad_norm": 0.9560113777748233, "learning_rate": 7.494107460955207e-07, "loss": 0.019, "step": 842, "ts_encoder_learning_rate": 7.409920958039795e-07 }, { "epoch": 2.2187989125957657, "grad_norm": 1.0871309619249734, "learning_rate": 7.409920958039795e-07, "loss": 0.0196, "step": 843, "ts_encoder_learning_rate": 7.326172134082704e-07 }, { "epoch": 2.221435044072823, "grad_norm": 0.8419045626414003, "learning_rate": 7.326172134082704e-07, "loss": 0.0326, "step": 844, "ts_encoder_learning_rate": 7.242861849732696e-07 }, { "epoch": 2.2240711755498808, "grad_norm": 1.0419111956667741, "learning_rate": 7.242861849732696e-07, "loss": 0.0314, "step": 845, "ts_encoder_learning_rate": 7.159990961131818e-07 }, { "epoch": 2.226707307026938, "grad_norm": 0.8401412848739376, "learning_rate": 7.159990961131818e-07, "loss": 0.0304, "step": 846, "ts_encoder_learning_rate": 7.077560319906696e-07 }, { "epoch": 2.2293434385039954, "grad_norm": 0.9818544247472701, "learning_rate": 7.077560319906696e-07, "loss": 0.0328, "step": 847, "ts_encoder_learning_rate": 6.995570773159693e-07 }, { "epoch": 2.231979569981053, "grad_norm": 0.9988984189818604, "learning_rate": 6.995570773159693e-07, "loss": 0.036, "step": 848, "ts_encoder_learning_rate": 6.914023163460248e-07 }, { "epoch": 2.23461570145811, "grad_norm": 1.8121563715937015, "learning_rate": 6.914023163460248e-07, "loss": 0.0293, "step": 849, "ts_encoder_learning_rate": 6.832918328836247e-07 }, { "epoch": 2.2372518329351676, "grad_norm": 1.4137491922964698, "learning_rate": 6.832918328836247e-07, "loss": 0.0284, "step": 850, "ts_encoder_learning_rate": 6.752257102765325e-07 }, { "epoch": 2.239887964412225, "grad_norm": 0.7043298270791407, "learning_rate": 6.752257102765325e-07, "loss": 0.0254, "step": 851, "ts_encoder_learning_rate": 6.6720403141664e-07 }, { "epoch": 2.2425240958892827, "grad_norm": 0.9273068808985826, "learning_rate": 6.6720403141664e-07, "loss": 0.0254, "step": 852, "ts_encoder_learning_rate": 6.592268787391077e-07 }, { "epoch": 2.24516022736634, "grad_norm": 0.9385646367897301, "learning_rate": 6.592268787391077e-07, "loss": 0.0298, "step": 853, "ts_encoder_learning_rate": 6.512943342215234e-07 }, { "epoch": 2.2477963588433973, "grad_norm": 0.6600040913818298, "learning_rate": 6.512943342215234e-07, "loss": 0.0237, "step": 854, "ts_encoder_learning_rate": 6.43406479383053e-07 }, { "epoch": 2.250432490320455, "grad_norm": 0.6282127208289683, "learning_rate": 6.43406479383053e-07, "loss": 0.0213, "step": 855, "ts_encoder_learning_rate": 6.355633952836115e-07 }, { "epoch": 2.253068621797512, "grad_norm": 0.6788883483435632, "learning_rate": 6.355633952836115e-07, "loss": 0.021, "step": 856, "ts_encoder_learning_rate": 6.277651625230219e-07 }, { "epoch": 2.2557047532745695, "grad_norm": 0.6668730348823718, "learning_rate": 6.277651625230219e-07, "loss": 0.0236, "step": 857, "ts_encoder_learning_rate": 6.200118612401918e-07 }, { "epoch": 2.258340884751627, "grad_norm": 7.060799735251775, "learning_rate": 6.200118612401918e-07, "loss": 0.0297, "step": 858, "ts_encoder_learning_rate": 6.12303571112286e-07 }, { "epoch": 2.2609770162286846, "grad_norm": 1.046944161496044, "learning_rate": 6.12303571112286e-07, "loss": 0.0273, "step": 859, "ts_encoder_learning_rate": 6.04640371353914e-07 }, { "epoch": 2.2636131477057417, "grad_norm": 1.1099528570927184, "learning_rate": 6.04640371353914e-07, "loss": 0.0252, "step": 860, "ts_encoder_learning_rate": 5.9702234071631e-07 }, { "epoch": 2.2662492791827993, "grad_norm": 0.9773409732921918, "learning_rate": 5.9702234071631e-07, "loss": 0.0294, "step": 861, "ts_encoder_learning_rate": 5.89449557486525e-07 }, { "epoch": 2.268885410659857, "grad_norm": 1.0917243460679, "learning_rate": 5.89449557486525e-07, "loss": 0.0138, "step": 862, "ts_encoder_learning_rate": 5.819220994866237e-07 }, { "epoch": 2.271521542136914, "grad_norm": 0.6888106894141612, "learning_rate": 5.819220994866237e-07, "loss": 0.0144, "step": 863, "ts_encoder_learning_rate": 5.744400440728826e-07 }, { "epoch": 2.2741576736139715, "grad_norm": 0.633492770066237, "learning_rate": 5.744400440728826e-07, "loss": 0.0232, "step": 864, "ts_encoder_learning_rate": 5.670034681349995e-07 }, { "epoch": 2.276793805091029, "grad_norm": 6.0165889346314865, "learning_rate": 5.670034681349995e-07, "loss": 0.025, "step": 865, "ts_encoder_learning_rate": 5.596124480952975e-07 }, { "epoch": 2.2794299365680866, "grad_norm": 1.5182521559787252, "learning_rate": 5.596124480952975e-07, "loss": 0.0268, "step": 866, "ts_encoder_learning_rate": 5.522670599079416e-07 }, { "epoch": 2.2820660680451437, "grad_norm": 0.8001358979720962, "learning_rate": 5.522670599079416e-07, "loss": 0.0213, "step": 867, "ts_encoder_learning_rate": 5.449673790581611e-07 }, { "epoch": 2.284702199522201, "grad_norm": 1.8105317634620746, "learning_rate": 5.449673790581611e-07, "loss": 0.0252, "step": 868, "ts_encoder_learning_rate": 5.377134805614714e-07 }, { "epoch": 2.2873383309992588, "grad_norm": 0.9912234653856702, "learning_rate": 5.377134805614714e-07, "loss": 0.0237, "step": 869, "ts_encoder_learning_rate": 5.305054389629022e-07 }, { "epoch": 2.289974462476316, "grad_norm": 0.7760075581423171, "learning_rate": 5.305054389629022e-07, "loss": 0.0235, "step": 870, "ts_encoder_learning_rate": 5.233433283362349e-07 }, { "epoch": 2.2926105939533734, "grad_norm": 0.6707225852541246, "learning_rate": 5.233433283362349e-07, "loss": 0.0217, "step": 871, "ts_encoder_learning_rate": 5.162272222832349e-07 }, { "epoch": 2.295246725430431, "grad_norm": 0.6919555345400337, "learning_rate": 5.162272222832349e-07, "loss": 0.027, "step": 872, "ts_encoder_learning_rate": 5.091571939329049e-07 }, { "epoch": 2.2978828569074885, "grad_norm": 0.8176259644215725, "learning_rate": 5.091571939329049e-07, "loss": 0.0196, "step": 873, "ts_encoder_learning_rate": 5.021333159407232e-07 }, { "epoch": 2.3005189883845456, "grad_norm": 8.688779032036406, "learning_rate": 5.021333159407232e-07, "loss": 0.0222, "step": 874, "ts_encoder_learning_rate": 4.951556604879049e-07 }, { "epoch": 2.303155119861603, "grad_norm": 0.4934714850429601, "learning_rate": 4.951556604879049e-07, "loss": 0.0225, "step": 875, "ts_encoder_learning_rate": 4.882242992806546e-07 }, { "epoch": 2.3057912513386607, "grad_norm": 0.6932226455966719, "learning_rate": 4.882242992806546e-07, "loss": 0.0279, "step": 876, "ts_encoder_learning_rate": 4.813393035494329e-07 }, { "epoch": 2.308427382815718, "grad_norm": 0.785391963657986, "learning_rate": 4.813393035494329e-07, "loss": 0.0257, "step": 877, "ts_encoder_learning_rate": 4.745007440482252e-07 }, { "epoch": 2.3110635142927753, "grad_norm": 0.7002142776657593, "learning_rate": 4.745007440482252e-07, "loss": 0.0167, "step": 878, "ts_encoder_learning_rate": 4.677086910538092e-07 }, { "epoch": 2.313699645769833, "grad_norm": 0.6570845075498787, "learning_rate": 4.677086910538092e-07, "loss": 0.0204, "step": 879, "ts_encoder_learning_rate": 4.6096321436504e-07 }, { "epoch": 2.3163357772468904, "grad_norm": 0.708907031204872, "learning_rate": 4.6096321436504e-07, "loss": 0.0194, "step": 880, "ts_encoder_learning_rate": 4.542643833021254e-07 }, { "epoch": 2.3189719087239475, "grad_norm": 0.7432937034337077, "learning_rate": 4.542643833021254e-07, "loss": 0.0244, "step": 881, "ts_encoder_learning_rate": 4.4761226670592074e-07 }, { "epoch": 2.321608040201005, "grad_norm": 1.0057609426812313, "learning_rate": 4.4761226670592074e-07, "loss": 0.0243, "step": 882, "ts_encoder_learning_rate": 4.410069329372152e-07 }, { "epoch": 2.3242441716780626, "grad_norm": 0.9748291224257548, "learning_rate": 4.410069329372152e-07, "loss": 0.0261, "step": 883, "ts_encoder_learning_rate": 4.344484498760343e-07 }, { "epoch": 2.3268803031551197, "grad_norm": 0.7468342295758325, "learning_rate": 4.344484498760343e-07, "loss": 0.017, "step": 884, "ts_encoder_learning_rate": 4.279368849209381e-07 }, { "epoch": 2.3295164346321773, "grad_norm": 0.4990301640406677, "learning_rate": 4.279368849209381e-07, "loss": 0.0163, "step": 885, "ts_encoder_learning_rate": 4.214723049883307e-07 }, { "epoch": 2.332152566109235, "grad_norm": 0.8120692295636992, "learning_rate": 4.214723049883307e-07, "loss": 0.0271, "step": 886, "ts_encoder_learning_rate": 4.150547765117746e-07 }, { "epoch": 2.334788697586292, "grad_norm": 0.8689858151247111, "learning_rate": 4.150547765117746e-07, "loss": 0.0172, "step": 887, "ts_encoder_learning_rate": 4.086843654413031e-07 }, { "epoch": 2.3374248290633495, "grad_norm": 0.7557921121418426, "learning_rate": 4.086843654413031e-07, "loss": 0.0148, "step": 888, "ts_encoder_learning_rate": 4.0236113724274716e-07 }, { "epoch": 2.340060960540407, "grad_norm": 0.631642992248285, "learning_rate": 4.0236113724274716e-07, "loss": 0.0256, "step": 889, "ts_encoder_learning_rate": 3.960851568970586e-07 }, { "epoch": 2.342697092017464, "grad_norm": 1.080316244149369, "learning_rate": 3.960851568970586e-07, "loss": 0.0217, "step": 890, "ts_encoder_learning_rate": 3.8985648889964755e-07 }, { "epoch": 2.3453332234945217, "grad_norm": 1.054131562422457, "learning_rate": 3.8985648889964755e-07, "loss": 0.0211, "step": 891, "ts_encoder_learning_rate": 3.83675197259713e-07 }, { "epoch": 2.347969354971579, "grad_norm": 0.8646617497823404, "learning_rate": 3.83675197259713e-07, "loss": 0.019, "step": 892, "ts_encoder_learning_rate": 3.77541345499593e-07 }, { "epoch": 2.3506054864486368, "grad_norm": 0.7244425140139443, "learning_rate": 3.77541345499593e-07, "loss": 0.0184, "step": 893, "ts_encoder_learning_rate": 3.7145499665410147e-07 }, { "epoch": 2.353241617925694, "grad_norm": 0.5515618977177071, "learning_rate": 3.7145499665410147e-07, "loss": 0.0143, "step": 894, "ts_encoder_learning_rate": 3.6541621326989183e-07 }, { "epoch": 2.3558777494027514, "grad_norm": 0.7590154170192799, "learning_rate": 3.6541621326989183e-07, "loss": 0.0241, "step": 895, "ts_encoder_learning_rate": 3.5942505740480583e-07 }, { "epoch": 2.358513880879809, "grad_norm": 0.7373124344034562, "learning_rate": 3.5942505740480583e-07, "loss": 0.0203, "step": 896, "ts_encoder_learning_rate": 3.534815906272404e-07 }, { "epoch": 2.361150012356866, "grad_norm": 0.7737906407252233, "learning_rate": 3.534815906272404e-07, "loss": 0.0122, "step": 897, "ts_encoder_learning_rate": 3.475858740155108e-07 }, { "epoch": 2.3637861438339236, "grad_norm": 0.5846122157036439, "learning_rate": 3.475858740155108e-07, "loss": 0.0213, "step": 898, "ts_encoder_learning_rate": 3.417379681572297e-07 }, { "epoch": 2.366422275310981, "grad_norm": 1.1971330946069132, "learning_rate": 3.417379681572297e-07, "loss": 0.0182, "step": 899, "ts_encoder_learning_rate": 3.359379331486762e-07 }, { "epoch": 2.3690584067880387, "grad_norm": 0.5894377764075366, "learning_rate": 3.359379331486762e-07, "loss": 0.0228, "step": 900, "ts_encoder_learning_rate": 3.301858285941845e-07 }, { "epoch": 2.371694538265096, "grad_norm": 0.6964796421497281, "learning_rate": 3.301858285941845e-07, "loss": 0.019, "step": 901, "ts_encoder_learning_rate": 3.2448171360552837e-07 }, { "epoch": 2.3743306697421533, "grad_norm": 0.8068516225517265, "learning_rate": 3.2448171360552837e-07, "loss": 0.0242, "step": 902, "ts_encoder_learning_rate": 3.18825646801314e-07 }, { "epoch": 2.376966801219211, "grad_norm": 0.6128923162506175, "learning_rate": 3.18825646801314e-07, "loss": 0.0219, "step": 903, "ts_encoder_learning_rate": 3.1321768630638073e-07 }, { "epoch": 2.379602932696268, "grad_norm": 0.9632236483373098, "learning_rate": 3.1321768630638073e-07, "loss": 0.0234, "step": 904, "ts_encoder_learning_rate": 3.076578897511978e-07 }, { "epoch": 2.3822390641733255, "grad_norm": 0.7461189266458699, "learning_rate": 3.076578897511978e-07, "loss": 0.0144, "step": 905, "ts_encoder_learning_rate": 3.0214631427127883e-07 }, { "epoch": 2.384875195650383, "grad_norm": 0.716902757344509, "learning_rate": 3.0214631427127883e-07, "loss": 0.0177, "step": 906, "ts_encoder_learning_rate": 2.966830165065876e-07 }, { "epoch": 2.3875113271274406, "grad_norm": 0.5861169684467917, "learning_rate": 2.966830165065876e-07, "loss": 0.027, "step": 907, "ts_encoder_learning_rate": 2.912680526009626e-07 }, { "epoch": 2.3901474586044977, "grad_norm": 0.834726511672464, "learning_rate": 2.912680526009626e-07, "loss": 0.0231, "step": 908, "ts_encoder_learning_rate": 2.8590147820153513e-07 }, { "epoch": 2.3927835900815553, "grad_norm": 0.6958964808420537, "learning_rate": 2.8590147820153513e-07, "loss": 0.0211, "step": 909, "ts_encoder_learning_rate": 2.8058334845816214e-07 }, { "epoch": 2.395419721558613, "grad_norm": 0.7097540987940018, "learning_rate": 2.8058334845816214e-07, "loss": 0.0169, "step": 910, "ts_encoder_learning_rate": 2.7531371802285436e-07 }, { "epoch": 2.39805585303567, "grad_norm": 0.8778409447989682, "learning_rate": 2.7531371802285436e-07, "loss": 0.026, "step": 911, "ts_encoder_learning_rate": 2.7009264104921606e-07 }, { "epoch": 2.4006919845127275, "grad_norm": 0.7010540368539773, "learning_rate": 2.7009264104921606e-07, "loss": 0.0175, "step": 912, "ts_encoder_learning_rate": 2.6492017119189415e-07 }, { "epoch": 2.403328115989785, "grad_norm": 0.5997922863707355, "learning_rate": 2.6492017119189415e-07, "loss": 0.0222, "step": 913, "ts_encoder_learning_rate": 2.5979636160601673e-07 }, { "epoch": 2.4059642474668426, "grad_norm": 0.667743261533583, "learning_rate": 2.5979636160601673e-07, "loss": 0.0197, "step": 914, "ts_encoder_learning_rate": 2.547212649466568e-07 }, { "epoch": 2.4086003789438997, "grad_norm": 0.8452729220439927, "learning_rate": 2.547212649466568e-07, "loss": 0.0197, "step": 915, "ts_encoder_learning_rate": 2.4969493336828353e-07 }, { "epoch": 2.411236510420957, "grad_norm": 0.726874711910585, "learning_rate": 2.4969493336828353e-07, "loss": 0.024, "step": 916, "ts_encoder_learning_rate": 2.447174185242324e-07 }, { "epoch": 2.4138726418980148, "grad_norm": 0.6390019959390945, "learning_rate": 2.447174185242324e-07, "loss": 0.0109, "step": 917, "ts_encoder_learning_rate": 2.397887715661679e-07 }, { "epoch": 2.416508773375072, "grad_norm": 0.844652867992998, "learning_rate": 2.397887715661679e-07, "loss": 0.0276, "step": 918, "ts_encoder_learning_rate": 2.3490904314356412e-07 }, { "epoch": 2.4191449048521294, "grad_norm": 0.8450103641231302, "learning_rate": 2.3490904314356412e-07, "loss": 0.0209, "step": 919, "ts_encoder_learning_rate": 2.3007828340318117e-07 }, { "epoch": 2.421781036329187, "grad_norm": 0.9914129840085654, "learning_rate": 2.3007828340318117e-07, "loss": 0.0224, "step": 920, "ts_encoder_learning_rate": 2.2529654198854834e-07 }, { "epoch": 2.4244171678062445, "grad_norm": 0.7485782865431817, "learning_rate": 2.2529654198854834e-07, "loss": 0.0231, "step": 921, "ts_encoder_learning_rate": 2.205638680394573e-07 }, { "epoch": 2.4270532992833016, "grad_norm": 0.863071039677985, "learning_rate": 2.205638680394573e-07, "loss": 0.0217, "step": 922, "ts_encoder_learning_rate": 2.1588031019145638e-07 }, { "epoch": 2.429689430760359, "grad_norm": 0.7178423022311772, "learning_rate": 2.1588031019145638e-07, "loss": 0.0245, "step": 923, "ts_encoder_learning_rate": 2.1124591657534776e-07 }, { "epoch": 2.4323255622374167, "grad_norm": 0.8272616881685937, "learning_rate": 2.1124591657534776e-07, "loss": 0.0199, "step": 924, "ts_encoder_learning_rate": 2.0666073481669714e-07 }, { "epoch": 2.434961693714474, "grad_norm": 0.5263023342082227, "learning_rate": 2.0666073481669714e-07, "loss": 0.0223, "step": 925, "ts_encoder_learning_rate": 2.0212481203534083e-07 }, { "epoch": 2.4375978251915313, "grad_norm": 1.1822141493073524, "learning_rate": 2.0212481203534083e-07, "loss": 0.0273, "step": 926, "ts_encoder_learning_rate": 1.9763819484490353e-07 }, { "epoch": 2.440233956668589, "grad_norm": 0.6818080172841297, "learning_rate": 1.9763819484490353e-07, "loss": 0.02, "step": 927, "ts_encoder_learning_rate": 1.932009293523196e-07 }, { "epoch": 2.4428700881456464, "grad_norm": 0.9008331212699131, "learning_rate": 1.932009293523196e-07, "loss": 0.0216, "step": 928, "ts_encoder_learning_rate": 1.8881306115735632e-07 }, { "epoch": 2.4455062196227035, "grad_norm": 0.7779126298027532, "learning_rate": 1.8881306115735632e-07, "loss": 0.0185, "step": 929, "ts_encoder_learning_rate": 1.8447463535214872e-07 }, { "epoch": 2.448142351099761, "grad_norm": 0.9120921557733357, "learning_rate": 1.8447463535214872e-07, "loss": 0.0186, "step": 930, "ts_encoder_learning_rate": 1.801856965207338e-07 }, { "epoch": 2.4507784825768186, "grad_norm": 0.740476134046954, "learning_rate": 1.801856965207338e-07, "loss": 0.02, "step": 931, "ts_encoder_learning_rate": 1.7594628873859488e-07 }, { "epoch": 2.4534146140538757, "grad_norm": 0.720558833321943, "learning_rate": 1.7594628873859488e-07, "loss": 0.0165, "step": 932, "ts_encoder_learning_rate": 1.7175645557220567e-07 }, { "epoch": 2.4560507455309333, "grad_norm": 0.7232369354288679, "learning_rate": 1.7175645557220567e-07, "loss": 0.0258, "step": 933, "ts_encoder_learning_rate": 1.6761624007858524e-07 }, { "epoch": 2.458686877007991, "grad_norm": 0.9619568884736648, "learning_rate": 1.6761624007858524e-07, "loss": 0.0206, "step": 934, "ts_encoder_learning_rate": 1.6352568480485277e-07 }, { "epoch": 2.4613230084850484, "grad_norm": 0.5484139865997792, "learning_rate": 1.6352568480485277e-07, "loss": 0.0174, "step": 935, "ts_encoder_learning_rate": 1.594848317877934e-07 }, { "epoch": 2.4639591399621055, "grad_norm": 1.1119566633908704, "learning_rate": 1.594848317877934e-07, "loss": 0.024, "step": 936, "ts_encoder_learning_rate": 1.5549372255342367e-07 }, { "epoch": 2.466595271439163, "grad_norm": 0.8218791802362867, "learning_rate": 1.5549372255342367e-07, "loss": 0.0193, "step": 937, "ts_encoder_learning_rate": 1.5155239811656562e-07 }, { "epoch": 2.4692314029162206, "grad_norm": 0.6933235377212601, "learning_rate": 1.5155239811656562e-07, "loss": 0.0288, "step": 938, "ts_encoder_learning_rate": 1.4766089898042678e-07 }, { "epoch": 2.4718675343932777, "grad_norm": 0.953369090336964, "learning_rate": 1.4766089898042678e-07, "loss": 0.0214, "step": 939, "ts_encoder_learning_rate": 1.4381926513618139e-07 }, { "epoch": 2.474503665870335, "grad_norm": 0.6976764387240867, "learning_rate": 1.4381926513618139e-07, "loss": 0.0197, "step": 940, "ts_encoder_learning_rate": 1.4002753606256082e-07 }, { "epoch": 2.4771397973473928, "grad_norm": 0.7526896911937908, "learning_rate": 1.4002753606256082e-07, "loss": 0.0205, "step": 941, "ts_encoder_learning_rate": 1.362857507254478e-07 }, { "epoch": 2.4797759288244503, "grad_norm": 0.7732644266125883, "learning_rate": 1.362857507254478e-07, "loss": 0.0196, "step": 942, "ts_encoder_learning_rate": 1.3259394757747678e-07 }, { "epoch": 2.4824120603015074, "grad_norm": 0.7898334854513247, "learning_rate": 1.3259394757747678e-07, "loss": 0.0168, "step": 943, "ts_encoder_learning_rate": 1.2895216455763582e-07 }, { "epoch": 2.485048191778565, "grad_norm": 0.7203527294510174, "learning_rate": 1.2895216455763582e-07, "loss": 0.0215, "step": 944, "ts_encoder_learning_rate": 1.253604390908819e-07 }, { "epoch": 2.4876843232556225, "grad_norm": 0.8318588601172171, "learning_rate": 1.253604390908819e-07, "loss": 0.0233, "step": 945, "ts_encoder_learning_rate": 1.2181880808775026e-07 }, { "epoch": 2.4903204547326796, "grad_norm": 0.7763925821106455, "learning_rate": 1.2181880808775026e-07, "loss": 0.0231, "step": 946, "ts_encoder_learning_rate": 1.1832730794397951e-07 }, { "epoch": 2.492956586209737, "grad_norm": 0.7417703735767751, "learning_rate": 1.1832730794397951e-07, "loss": 0.0202, "step": 947, "ts_encoder_learning_rate": 1.1488597454013539e-07 }, { "epoch": 2.4955927176867947, "grad_norm": 0.7347992893208377, "learning_rate": 1.1488597454013539e-07, "loss": 0.0121, "step": 948, "ts_encoder_learning_rate": 1.1149484324124326e-07 }, { "epoch": 2.4982288491638522, "grad_norm": 0.5208636481653479, "learning_rate": 1.1149484324124326e-07, "loss": 0.0184, "step": 949, "ts_encoder_learning_rate": 1.0815394889642339e-07 }, { "epoch": 2.5008649806409093, "grad_norm": 0.6580687022583558, "learning_rate": 1.0815394889642339e-07, "loss": 0.0253, "step": 950, "ts_encoder_learning_rate": 1.0486332583853565e-07 }, { "epoch": 2.503501112117967, "grad_norm": 0.7791631897968705, "learning_rate": 1.0486332583853565e-07, "loss": 0.0177, "step": 951, "ts_encoder_learning_rate": 1.0162300788382263e-07 }, { "epoch": 2.5061372435950244, "grad_norm": 0.7718676787617951, "learning_rate": 1.0162300788382263e-07, "loss": 0.0242, "step": 952, "ts_encoder_learning_rate": 9.843302833156377e-08 }, { "epoch": 2.5087733750720815, "grad_norm": 0.9526114922481819, "learning_rate": 9.843302833156377e-08, "loss": 0.018, "step": 953, "ts_encoder_learning_rate": 9.529341996373675e-08 }, { "epoch": 2.511409506549139, "grad_norm": 0.6723748361084942, "learning_rate": 9.529341996373675e-08, "loss": 0.0142, "step": 954, "ts_encoder_learning_rate": 9.22042150446728e-08 }, { "epoch": 2.5140456380261966, "grad_norm": 0.640693460278807, "learning_rate": 9.22042150446728e-08, "loss": 0.0248, "step": 955, "ts_encoder_learning_rate": 8.916544532073413e-08 }, { "epoch": 2.516681769503254, "grad_norm": 0.7438871182485605, "learning_rate": 8.916544532073413e-08, "loss": 0.0176, "step": 956, "ts_encoder_learning_rate": 8.617714201998084e-08 }, { "epoch": 2.5193179009803113, "grad_norm": 0.6536893914893551, "learning_rate": 8.617714201998084e-08, "loss": 0.0197, "step": 957, "ts_encoder_learning_rate": 8.323933585185184e-08 }, { "epoch": 2.521954032457369, "grad_norm": 0.7966005611731805, "learning_rate": 8.323933585185184e-08, "loss": 0.0202, "step": 958, "ts_encoder_learning_rate": 8.035205700685167e-08 }, { "epoch": 2.5245901639344264, "grad_norm": 0.7751864100873821, "learning_rate": 8.035205700685167e-08, "loss": 0.0245, "step": 959, "ts_encoder_learning_rate": 7.7515335156238e-08 }, { "epoch": 2.5272262954114835, "grad_norm": 0.6397557109288652, "learning_rate": 7.7515335156238e-08, "loss": 0.0138, "step": 960, "ts_encoder_learning_rate": 7.47291994517163e-08 }, { "epoch": 2.529862426888541, "grad_norm": 0.9115949923033936, "learning_rate": 7.47291994517163e-08, "loss": 0.0278, "step": 961, "ts_encoder_learning_rate": 7.199367852514239e-08 }, { "epoch": 2.5324985583655986, "grad_norm": 0.6977436866064831, "learning_rate": 7.199367852514239e-08, "loss": 0.0169, "step": 962, "ts_encoder_learning_rate": 6.930880048822531e-08 }, { "epoch": 2.535134689842656, "grad_norm": 0.8119048152627732, "learning_rate": 6.930880048822531e-08, "loss": 0.0222, "step": 963, "ts_encoder_learning_rate": 6.667459293224155e-08 }, { "epoch": 2.537770821319713, "grad_norm": 0.6593343852854229, "learning_rate": 6.667459293224155e-08, "loss": 0.0226, "step": 964, "ts_encoder_learning_rate": 6.409108292774912e-08 }, { "epoch": 2.5404069527967708, "grad_norm": 0.7024263781864509, "learning_rate": 6.409108292774912e-08, "loss": 0.0175, "step": 965, "ts_encoder_learning_rate": 6.15582970243117e-08 }, { "epoch": 2.5430430842738283, "grad_norm": 0.5434330332434761, "learning_rate": 6.15582970243117e-08, "loss": 0.0229, "step": 966, "ts_encoder_learning_rate": 5.907626125022159e-08 }, { "epoch": 2.5456792157508854, "grad_norm": 0.9794479292209439, "learning_rate": 5.907626125022159e-08, "loss": 0.0244, "step": 967, "ts_encoder_learning_rate": 5.6645001112237694e-08 }, { "epoch": 2.548315347227943, "grad_norm": 0.9128017914715376, "learning_rate": 5.6645001112237694e-08, "loss": 0.0223, "step": 968, "ts_encoder_learning_rate": 5.426454159531913e-08 }, { "epoch": 2.5509514787050005, "grad_norm": 0.8322790378300886, "learning_rate": 5.426454159531913e-08, "loss": 0.0167, "step": 969, "ts_encoder_learning_rate": 5.1934907162370374e-08 }, { "epoch": 2.553587610182058, "grad_norm": 0.5828552348921294, "learning_rate": 5.1934907162370374e-08, "loss": 0.0233, "step": 970, "ts_encoder_learning_rate": 4.9656121753990924e-08 }, { "epoch": 2.556223741659115, "grad_norm": 0.6297264426484448, "learning_rate": 4.9656121753990924e-08, "loss": 0.0248, "step": 971, "ts_encoder_learning_rate": 4.742820878822496e-08 }, { "epoch": 2.5588598731361727, "grad_norm": 0.6449652253099856, "learning_rate": 4.742820878822496e-08, "loss": 0.018, "step": 972, "ts_encoder_learning_rate": 4.52511911603265e-08 }, { "epoch": 2.5614960046132302, "grad_norm": 0.8662936136802849, "learning_rate": 4.52511911603265e-08, "loss": 0.0207, "step": 973, "ts_encoder_learning_rate": 4.312509124251907e-08 }, { "epoch": 2.5641321360902873, "grad_norm": 0.8396191031005396, "learning_rate": 4.312509124251907e-08, "loss": 0.0229, "step": 974, "ts_encoder_learning_rate": 4.104993088376974e-08 }, { "epoch": 2.566768267567345, "grad_norm": 0.7510456306691026, "learning_rate": 4.104993088376974e-08, "loss": 0.0153, "step": 975, "ts_encoder_learning_rate": 3.902573140956101e-08 }, { "epoch": 2.5694043990444024, "grad_norm": 0.5430731666125107, "learning_rate": 3.902573140956101e-08, "loss": 0.0222, "step": 976, "ts_encoder_learning_rate": 3.705251362167484e-08 }, { "epoch": 2.57204053052146, "grad_norm": 0.750719791150226, "learning_rate": 3.705251362167484e-08, "loss": 0.0133, "step": 977, "ts_encoder_learning_rate": 3.513029779797783e-08 }, { "epoch": 2.574676661998517, "grad_norm": 0.6890160305022875, "learning_rate": 3.513029779797783e-08, "loss": 0.0226, "step": 978, "ts_encoder_learning_rate": 3.325910369220975e-08 }, { "epoch": 2.5773127934755746, "grad_norm": 0.640217064006493, "learning_rate": 3.325910369220975e-08, "loss": 0.0217, "step": 979, "ts_encoder_learning_rate": 3.143895053378698e-08 }, { "epoch": 2.579948924952632, "grad_norm": 0.650608671275551, "learning_rate": 3.143895053378698e-08, "loss": 0.021, "step": 980, "ts_encoder_learning_rate": 2.966985702759828e-08 }, { "epoch": 2.5825850564296893, "grad_norm": 1.4308555266835152, "learning_rate": 2.966985702759828e-08, "loss": 0.0281, "step": 981, "ts_encoder_learning_rate": 2.7951841353817676e-08 }, { "epoch": 2.585221187906747, "grad_norm": 1.0043277890456705, "learning_rate": 2.7951841353817676e-08, "loss": 0.0205, "step": 982, "ts_encoder_learning_rate": 2.6284921167712975e-08 }, { "epoch": 2.5878573193838044, "grad_norm": 0.7699218451759371, "learning_rate": 2.6284921167712975e-08, "loss": 0.0201, "step": 983, "ts_encoder_learning_rate": 2.4669113599469774e-08 }, { "epoch": 2.590493450860862, "grad_norm": 0.6897052459896869, "learning_rate": 2.4669113599469774e-08, "loss": 0.0242, "step": 984, "ts_encoder_learning_rate": 2.3104435254008852e-08 }, { "epoch": 2.593129582337919, "grad_norm": 0.7228194509828196, "learning_rate": 2.3104435254008852e-08, "loss": 0.0285, "step": 985, "ts_encoder_learning_rate": 2.159090221082294e-08 }, { "epoch": 2.5957657138149766, "grad_norm": 0.8174809900239196, "learning_rate": 2.159090221082294e-08, "loss": 0.0167, "step": 986, "ts_encoder_learning_rate": 2.012853002380466e-08 }, { "epoch": 2.5984018452920337, "grad_norm": 0.7653138858818684, "learning_rate": 2.012853002380466e-08, "loss": 0.0125, "step": 987, "ts_encoder_learning_rate": 1.8717333721091634e-08 }, { "epoch": 2.601037976769091, "grad_norm": 0.5150577808719591, "learning_rate": 1.8717333721091634e-08, "loss": 0.0201, "step": 988, "ts_encoder_learning_rate": 1.735732780490884e-08 }, { "epoch": 2.6036741082461488, "grad_norm": 0.7752673495066984, "learning_rate": 1.735732780490884e-08, "loss": 0.0122, "step": 989, "ts_encoder_learning_rate": 1.6048526251421502e-08 }, { "epoch": 2.6063102397232063, "grad_norm": 0.6444134054280553, "learning_rate": 1.6048526251421502e-08, "loss": 0.0202, "step": 990, "ts_encoder_learning_rate": 1.4790942510590767e-08 }, { "epoch": 2.608946371200264, "grad_norm": 0.6604248178517298, "learning_rate": 1.4790942510590767e-08, "loss": 0.0234, "step": 991, "ts_encoder_learning_rate": 1.3584589506034362e-08 }, { "epoch": 2.611582502677321, "grad_norm": 0.8166087014626134, "learning_rate": 1.3584589506034362e-08, "loss": 0.0212, "step": 992, "ts_encoder_learning_rate": 1.2429479634897268e-08 }, { "epoch": 2.6142186341543785, "grad_norm": 0.5979082769485419, "learning_rate": 1.2429479634897268e-08, "loss": 0.018, "step": 993, "ts_encoder_learning_rate": 1.132562476771959e-08 }, { "epoch": 2.6168547656314356, "grad_norm": 0.6136562468042444, "learning_rate": 1.132562476771959e-08, "loss": 0.026, "step": 994, "ts_encoder_learning_rate": 1.0273036248318325e-08 }, { "epoch": 2.619490897108493, "grad_norm": 0.738729241027843, "learning_rate": 1.0273036248318325e-08, "loss": 0.0182, "step": 995, "ts_encoder_learning_rate": 9.27172489366912e-09 }, { "epoch": 2.6221270285855507, "grad_norm": 0.6076766225952003, "learning_rate": 9.27172489366912e-09, "loss": 0.017, "step": 996, "ts_encoder_learning_rate": 8.321700993795812e-09 }, { "epoch": 2.6247631600626082, "grad_norm": 0.8828040407540239, "learning_rate": 8.321700993795812e-09, "loss": 0.0183, "step": 997, "ts_encoder_learning_rate": 7.422974311662723e-09 }, { "epoch": 2.627399291539666, "grad_norm": 0.6622087750202462, "learning_rate": 7.422974311662723e-09, "loss": 0.0174, "step": 998, "ts_encoder_learning_rate": 6.575554083078084e-09 }, { "epoch": 2.630035423016723, "grad_norm": 0.76863578860787, "learning_rate": 6.575554083078084e-09, "loss": 0.0191, "step": 999, "ts_encoder_learning_rate": 5.779449016595773e-09 }, { "epoch": 2.6326715544937804, "grad_norm": 0.633605481547031, "learning_rate": 5.779449016595773e-09, "loss": 0.0188, "step": 1000, "ts_encoder_learning_rate": 5.034667293427053e-09 }, { "epoch": 2.6326715544937804, "step": 1000, "total_flos": 869424341942272.0, "train_loss": 0.32716700187977404, "train_runtime": 47494.4505, "train_samples_per_second": 10.78, "train_steps_per_second": 0.021, "ts_encoder_learning_rate": 5.034667293427053e-09 } ], "logging_steps": 1.0, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 869424341942272.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }