STReasoner-8B-Align / trainer_state.json
Time-HD-Anonymous's picture
Upload folder using huggingface_hub
377e53c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.6326715544937804,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002636131477057418,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 14.8018,
"step": 1,
"ts_encoder_learning_rate": 0.0
},
{
"epoch": 0.005272262954114836,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 14.8252,
"step": 2,
"ts_encoder_learning_rate": 0.0
},
{
"epoch": 0.007908394431172255,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 15.0107,
"step": 3,
"ts_encoder_learning_rate": 0.0
},
{
"epoch": 0.010544525908229673,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 14.9011,
"step": 4,
"ts_encoder_learning_rate": 0.0
},
{
"epoch": 0.01318065738528709,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 14.897,
"step": 5,
"ts_encoder_learning_rate": 0.0
},
{
"epoch": 0.01581678886234451,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 14.9028,
"step": 6,
"ts_encoder_learning_rate": 0.0
},
{
"epoch": 0.018452920339401928,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 14.7914,
"step": 7,
"ts_encoder_learning_rate": 0.0
},
{
"epoch": 0.021089051816459346,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 15.1729,
"step": 8,
"ts_encoder_learning_rate": 0.0
},
{
"epoch": 0.023725183293516763,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 14.7748,
"step": 9,
"ts_encoder_learning_rate": 0.0
},
{
"epoch": 0.02636131477057418,
"grad_norm": 1096.9997559030376,
"learning_rate": 0.0,
"loss": 14.735,
"step": 10,
"ts_encoder_learning_rate": 5.000000000000001e-07
},
{
"epoch": 0.0289974462476316,
"grad_norm": 1096.9997559030376,
"learning_rate": 5.000000000000001e-07,
"loss": 14.8943,
"step": 11,
"ts_encoder_learning_rate": 5.000000000000001e-07
},
{
"epoch": 0.03163357772468902,
"grad_norm": 1305.2661541342568,
"learning_rate": 5.000000000000001e-07,
"loss": 14.5478,
"step": 12,
"ts_encoder_learning_rate": 1.0000000000000002e-06
},
{
"epoch": 0.034269709201746434,
"grad_norm": 1279.5606119323502,
"learning_rate": 1.0000000000000002e-06,
"loss": 14.542,
"step": 13,
"ts_encoder_learning_rate": 1.5e-06
},
{
"epoch": 0.036905840678803856,
"grad_norm": 1391.7635545286846,
"learning_rate": 1.5e-06,
"loss": 12.2412,
"step": 14,
"ts_encoder_learning_rate": 2.0000000000000003e-06
},
{
"epoch": 0.03954197215586128,
"grad_norm": 1848.3283721710281,
"learning_rate": 2.0000000000000003e-06,
"loss": 7.6029,
"step": 15,
"ts_encoder_learning_rate": 2.5e-06
},
{
"epoch": 0.04217810363291869,
"grad_norm": 1848.3283721710281,
"learning_rate": 2.5e-06,
"loss": 4.8804,
"step": 16,
"ts_encoder_learning_rate": 2.5e-06
},
{
"epoch": 0.04481423510997611,
"grad_norm": 1759.395829823404,
"learning_rate": 2.5e-06,
"loss": 4.8283,
"step": 17,
"ts_encoder_learning_rate": 3e-06
},
{
"epoch": 0.04745036658703353,
"grad_norm": 1407.6124078931969,
"learning_rate": 3e-06,
"loss": 3.5724,
"step": 18,
"ts_encoder_learning_rate": 3.5e-06
},
{
"epoch": 0.05008649806409095,
"grad_norm": 372.4651747290722,
"learning_rate": 3.5e-06,
"loss": 1.9155,
"step": 19,
"ts_encoder_learning_rate": 4.000000000000001e-06
},
{
"epoch": 0.05272262954114836,
"grad_norm": 938.4571697479417,
"learning_rate": 4.000000000000001e-06,
"loss": 1.9811,
"step": 20,
"ts_encoder_learning_rate": 4.5e-06
},
{
"epoch": 0.055358761018205783,
"grad_norm": 544.7103035134295,
"learning_rate": 4.5e-06,
"loss": 1.7949,
"step": 21,
"ts_encoder_learning_rate": 5e-06
},
{
"epoch": 0.0579948924952632,
"grad_norm": 454.5593620001429,
"learning_rate": 5e-06,
"loss": 1.9186,
"step": 22,
"ts_encoder_learning_rate": 5.500000000000001e-06
},
{
"epoch": 0.06063102397232062,
"grad_norm": 1081.6948838381863,
"learning_rate": 5.500000000000001e-06,
"loss": 2.2253,
"step": 23,
"ts_encoder_learning_rate": 6e-06
},
{
"epoch": 0.06326715544937804,
"grad_norm": 614.5108980638121,
"learning_rate": 6e-06,
"loss": 2.0748,
"step": 24,
"ts_encoder_learning_rate": 6.5000000000000004e-06
},
{
"epoch": 0.06590328692643546,
"grad_norm": 510.8405053914244,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.985,
"step": 25,
"ts_encoder_learning_rate": 7e-06
},
{
"epoch": 0.06853941840349287,
"grad_norm": 187.64105795677528,
"learning_rate": 7e-06,
"loss": 0.9204,
"step": 26,
"ts_encoder_learning_rate": 7.500000000000001e-06
},
{
"epoch": 0.07117554988055029,
"grad_norm": 239.94638266940726,
"learning_rate": 7.500000000000001e-06,
"loss": 0.7961,
"step": 27,
"ts_encoder_learning_rate": 8.000000000000001e-06
},
{
"epoch": 0.07381168135760771,
"grad_norm": 204.37461156136854,
"learning_rate": 8.000000000000001e-06,
"loss": 0.7757,
"step": 28,
"ts_encoder_learning_rate": 8.5e-06
},
{
"epoch": 0.07644781283466513,
"grad_norm": 394.9877265614201,
"learning_rate": 8.5e-06,
"loss": 0.8967,
"step": 29,
"ts_encoder_learning_rate": 9e-06
},
{
"epoch": 0.07908394431172255,
"grad_norm": 115.99747240190717,
"learning_rate": 9e-06,
"loss": 0.6932,
"step": 30,
"ts_encoder_learning_rate": 9.5e-06
},
{
"epoch": 0.08172007578877996,
"grad_norm": 203.24289413853845,
"learning_rate": 9.5e-06,
"loss": 0.6311,
"step": 31,
"ts_encoder_learning_rate": 1e-05
},
{
"epoch": 0.08435620726583738,
"grad_norm": 97.33610135135591,
"learning_rate": 1e-05,
"loss": 0.5115,
"step": 32,
"ts_encoder_learning_rate": 9.999974308631955e-06
},
{
"epoch": 0.0869923387428948,
"grad_norm": 346.00707857529187,
"learning_rate": 9.999974308631955e-06,
"loss": 0.4777,
"step": 33,
"ts_encoder_learning_rate": 9.999897234791831e-06
},
{
"epoch": 0.08962847021995222,
"grad_norm": 29.349977310917716,
"learning_rate": 9.999897234791831e-06,
"loss": 0.4537,
"step": 34,
"ts_encoder_learning_rate": 9.999768779271687e-06
},
{
"epoch": 0.09226460169700963,
"grad_norm": 112.56606641016495,
"learning_rate": 9.999768779271687e-06,
"loss": 0.8332,
"step": 35,
"ts_encoder_learning_rate": 9.999588943391597e-06
},
{
"epoch": 0.09490073317406705,
"grad_norm": 465.069049932412,
"learning_rate": 9.999588943391597e-06,
"loss": 1.0421,
"step": 36,
"ts_encoder_learning_rate": 9.999357728999657e-06
},
{
"epoch": 0.09753686465112447,
"grad_norm": 158.97965025080174,
"learning_rate": 9.999357728999657e-06,
"loss": 0.5741,
"step": 37,
"ts_encoder_learning_rate": 9.99907513847195e-06
},
{
"epoch": 0.1001729961281819,
"grad_norm": 194.59482532664853,
"learning_rate": 9.99907513847195e-06,
"loss": 0.484,
"step": 38,
"ts_encoder_learning_rate": 9.998741174712534e-06
},
{
"epoch": 0.10280912760523932,
"grad_norm": 161.100341083291,
"learning_rate": 9.998741174712534e-06,
"loss": 0.5337,
"step": 39,
"ts_encoder_learning_rate": 9.9983558411534e-06
},
{
"epoch": 0.10544525908229672,
"grad_norm": 27.126218719305797,
"learning_rate": 9.9983558411534e-06,
"loss": 0.5538,
"step": 40,
"ts_encoder_learning_rate": 9.997919141754448e-06
},
{
"epoch": 0.10808139055935415,
"grad_norm": 33.98098658560328,
"learning_rate": 9.997919141754448e-06,
"loss": 0.7161,
"step": 41,
"ts_encoder_learning_rate": 9.99743108100344e-06
},
{
"epoch": 0.11071752203641157,
"grad_norm": 39.621572486406116,
"learning_rate": 9.99743108100344e-06,
"loss": 0.3756,
"step": 42,
"ts_encoder_learning_rate": 9.996891663915955e-06
},
{
"epoch": 0.11335365351346899,
"grad_norm": 14.92159451052064,
"learning_rate": 9.996891663915955e-06,
"loss": 0.4255,
"step": 43,
"ts_encoder_learning_rate": 9.99630089603534e-06
},
{
"epoch": 0.1159897849905264,
"grad_norm": 13.462409209351557,
"learning_rate": 9.99630089603534e-06,
"loss": 0.4359,
"step": 44,
"ts_encoder_learning_rate": 9.995658783432645e-06
},
{
"epoch": 0.11862591646758382,
"grad_norm": 5.901719710197823,
"learning_rate": 9.995658783432645e-06,
"loss": 0.3448,
"step": 45,
"ts_encoder_learning_rate": 9.994965332706574e-06
},
{
"epoch": 0.12126204794464124,
"grad_norm": 32.35793808957399,
"learning_rate": 9.994965332706574e-06,
"loss": 0.4025,
"step": 46,
"ts_encoder_learning_rate": 9.994220550983404e-06
},
{
"epoch": 0.12389817942169866,
"grad_norm": 31.71042211582521,
"learning_rate": 9.994220550983404e-06,
"loss": 0.5392,
"step": 47,
"ts_encoder_learning_rate": 9.993424445916923e-06
},
{
"epoch": 0.12653431089875608,
"grad_norm": 21.433144223039534,
"learning_rate": 9.993424445916923e-06,
"loss": 0.4416,
"step": 48,
"ts_encoder_learning_rate": 9.992577025688338e-06
},
{
"epoch": 0.1291704423758135,
"grad_norm": 9.093244564527424,
"learning_rate": 9.992577025688338e-06,
"loss": 0.3626,
"step": 49,
"ts_encoder_learning_rate": 9.991678299006206e-06
},
{
"epoch": 0.13180657385287092,
"grad_norm": 4.629496592285392,
"learning_rate": 9.991678299006206e-06,
"loss": 0.3829,
"step": 50,
"ts_encoder_learning_rate": 9.990728275106332e-06
},
{
"epoch": 0.13444270532992833,
"grad_norm": 4.275838877635983,
"learning_rate": 9.990728275106332e-06,
"loss": 0.3262,
"step": 51,
"ts_encoder_learning_rate": 9.989726963751683e-06
},
{
"epoch": 0.13707883680698574,
"grad_norm": 4.249381704965988,
"learning_rate": 9.989726963751683e-06,
"loss": 0.3252,
"step": 52,
"ts_encoder_learning_rate": 9.98867437523228e-06
},
{
"epoch": 0.13971496828404317,
"grad_norm": 6.056412755943698,
"learning_rate": 9.98867437523228e-06,
"loss": 0.276,
"step": 53,
"ts_encoder_learning_rate": 9.987570520365105e-06
},
{
"epoch": 0.14235109976110058,
"grad_norm": 5.208273616470221,
"learning_rate": 9.987570520365105e-06,
"loss": 0.2754,
"step": 54,
"ts_encoder_learning_rate": 9.986415410493966e-06
},
{
"epoch": 0.14498723123815802,
"grad_norm": 3.7353303435304244,
"learning_rate": 9.986415410493966e-06,
"loss": 0.2529,
"step": 55,
"ts_encoder_learning_rate": 9.98520905748941e-06
},
{
"epoch": 0.14762336271521542,
"grad_norm": 3.933015208783408,
"learning_rate": 9.98520905748941e-06,
"loss": 0.2508,
"step": 56,
"ts_encoder_learning_rate": 9.983951473748579e-06
},
{
"epoch": 0.15025949419227283,
"grad_norm": 5.206399280832945,
"learning_rate": 9.983951473748579e-06,
"loss": 0.2651,
"step": 57,
"ts_encoder_learning_rate": 9.982642672195093e-06
},
{
"epoch": 0.15289562566933027,
"grad_norm": 4.167850168478869,
"learning_rate": 9.982642672195093e-06,
"loss": 0.2868,
"step": 58,
"ts_encoder_learning_rate": 9.98128266627891e-06
},
{
"epoch": 0.15553175714638767,
"grad_norm": 9.226577568221924,
"learning_rate": 9.98128266627891e-06,
"loss": 0.2833,
"step": 59,
"ts_encoder_learning_rate": 9.979871469976197e-06
},
{
"epoch": 0.1581678886234451,
"grad_norm": 3.7123405016527316,
"learning_rate": 9.979871469976197e-06,
"loss": 0.2578,
"step": 60,
"ts_encoder_learning_rate": 9.978409097789178e-06
},
{
"epoch": 0.16080402010050251,
"grad_norm": 3.049493694179588,
"learning_rate": 9.978409097789178e-06,
"loss": 0.2509,
"step": 61,
"ts_encoder_learning_rate": 9.976895564745993e-06
},
{
"epoch": 0.16344015157755992,
"grad_norm": 2.8746718898314314,
"learning_rate": 9.976895564745993e-06,
"loss": 0.2367,
"step": 62,
"ts_encoder_learning_rate": 9.975330886400531e-06
},
{
"epoch": 0.16607628305461736,
"grad_norm": 2.1229971064439614,
"learning_rate": 9.975330886400531e-06,
"loss": 0.2456,
"step": 63,
"ts_encoder_learning_rate": 9.973715078832288e-06
},
{
"epoch": 0.16871241453167476,
"grad_norm": 1.9770119063232516,
"learning_rate": 9.973715078832288e-06,
"loss": 0.2251,
"step": 64,
"ts_encoder_learning_rate": 9.972048158646184e-06
},
{
"epoch": 0.17134854600873217,
"grad_norm": 2.5667195491487016,
"learning_rate": 9.972048158646184e-06,
"loss": 0.2445,
"step": 65,
"ts_encoder_learning_rate": 9.970330142972403e-06
},
{
"epoch": 0.1739846774857896,
"grad_norm": 2.1113696627510117,
"learning_rate": 9.970330142972403e-06,
"loss": 0.2574,
"step": 66,
"ts_encoder_learning_rate": 9.968561049466214e-06
},
{
"epoch": 0.17662080896284701,
"grad_norm": 1.6243156106778176,
"learning_rate": 9.968561049466214e-06,
"loss": 0.2303,
"step": 67,
"ts_encoder_learning_rate": 9.966740896307791e-06
},
{
"epoch": 0.17925694043990445,
"grad_norm": 1.6849608770961007,
"learning_rate": 9.966740896307791e-06,
"loss": 0.2243,
"step": 68,
"ts_encoder_learning_rate": 9.964869702202023e-06
},
{
"epoch": 0.18189307191696186,
"grad_norm": 3.92330386577651,
"learning_rate": 9.964869702202023e-06,
"loss": 0.2441,
"step": 69,
"ts_encoder_learning_rate": 9.962947486378325e-06
},
{
"epoch": 0.18452920339401926,
"grad_norm": 2.582255962327349,
"learning_rate": 9.962947486378325e-06,
"loss": 0.2398,
"step": 70,
"ts_encoder_learning_rate": 9.96097426859044e-06
},
{
"epoch": 0.1871653348710767,
"grad_norm": 1.7505976231929627,
"learning_rate": 9.96097426859044e-06,
"loss": 0.2189,
"step": 71,
"ts_encoder_learning_rate": 9.95895006911623e-06
},
{
"epoch": 0.1898014663481341,
"grad_norm": 2.1937879121282116,
"learning_rate": 9.95895006911623e-06,
"loss": 0.2301,
"step": 72,
"ts_encoder_learning_rate": 9.956874908757482e-06
},
{
"epoch": 0.19243759782519154,
"grad_norm": 2.1666041664824265,
"learning_rate": 9.956874908757482e-06,
"loss": 0.2432,
"step": 73,
"ts_encoder_learning_rate": 9.954748808839675e-06
},
{
"epoch": 0.19507372930224895,
"grad_norm": 1.7390080947257538,
"learning_rate": 9.954748808839675e-06,
"loss": 0.2475,
"step": 74,
"ts_encoder_learning_rate": 9.952571791211776e-06
},
{
"epoch": 0.19770986077930636,
"grad_norm": 1.8227633742041385,
"learning_rate": 9.952571791211776e-06,
"loss": 0.205,
"step": 75,
"ts_encoder_learning_rate": 9.950343878246011e-06
},
{
"epoch": 0.2003459922563638,
"grad_norm": 2.023155095367864,
"learning_rate": 9.950343878246011e-06,
"loss": 0.222,
"step": 76,
"ts_encoder_learning_rate": 9.948065092837631e-06
},
{
"epoch": 0.2029821237334212,
"grad_norm": 2.5393740081357885,
"learning_rate": 9.948065092837631e-06,
"loss": 0.2264,
"step": 77,
"ts_encoder_learning_rate": 9.945735458404681e-06
},
{
"epoch": 0.20561825521047863,
"grad_norm": 1.9070977453067859,
"learning_rate": 9.945735458404681e-06,
"loss": 0.1948,
"step": 78,
"ts_encoder_learning_rate": 9.943354998887763e-06
},
{
"epoch": 0.20825438668753604,
"grad_norm": 2.228467040055354,
"learning_rate": 9.943354998887763e-06,
"loss": 0.2352,
"step": 79,
"ts_encoder_learning_rate": 9.94092373874978e-06
},
{
"epoch": 0.21089051816459345,
"grad_norm": 2.1754655976070985,
"learning_rate": 9.94092373874978e-06,
"loss": 0.2051,
"step": 80,
"ts_encoder_learning_rate": 9.938441702975689e-06
},
{
"epoch": 0.21352664964165088,
"grad_norm": 1.5948561463002036,
"learning_rate": 9.938441702975689e-06,
"loss": 0.2104,
"step": 81,
"ts_encoder_learning_rate": 9.935908917072253e-06
},
{
"epoch": 0.2161627811187083,
"grad_norm": 2.9184103349665955,
"learning_rate": 9.935908917072253e-06,
"loss": 0.2214,
"step": 82,
"ts_encoder_learning_rate": 9.93332540706776e-06
},
{
"epoch": 0.21879891259576573,
"grad_norm": 2.533165757626651,
"learning_rate": 9.93332540706776e-06,
"loss": 0.2182,
"step": 83,
"ts_encoder_learning_rate": 9.930691199511775e-06
},
{
"epoch": 0.22143504407282313,
"grad_norm": 2.4172123332256072,
"learning_rate": 9.930691199511775e-06,
"loss": 0.2477,
"step": 84,
"ts_encoder_learning_rate": 9.928006321474859e-06
},
{
"epoch": 0.22407117554988054,
"grad_norm": 2.2355360356090865,
"learning_rate": 9.928006321474859e-06,
"loss": 0.2153,
"step": 85,
"ts_encoder_learning_rate": 9.925270800548285e-06
},
{
"epoch": 0.22670730702693798,
"grad_norm": 1.325879276978752,
"learning_rate": 9.925270800548285e-06,
"loss": 0.2106,
"step": 86,
"ts_encoder_learning_rate": 9.922484664843763e-06
},
{
"epoch": 0.22934343850399538,
"grad_norm": 2.105985586302988,
"learning_rate": 9.922484664843763e-06,
"loss": 0.2033,
"step": 87,
"ts_encoder_learning_rate": 9.91964794299315e-06
},
{
"epoch": 0.2319795699810528,
"grad_norm": 1.627701668717812,
"learning_rate": 9.91964794299315e-06,
"loss": 0.2063,
"step": 88,
"ts_encoder_learning_rate": 9.916760664148148e-06
},
{
"epoch": 0.23461570145811023,
"grad_norm": 1.7548289492746183,
"learning_rate": 9.916760664148148e-06,
"loss": 0.2023,
"step": 89,
"ts_encoder_learning_rate": 9.91382285798002e-06
},
{
"epoch": 0.23725183293516763,
"grad_norm": 1.2779758188322887,
"learning_rate": 9.91382285798002e-06,
"loss": 0.1859,
"step": 90,
"ts_encoder_learning_rate": 9.910834554679266e-06
},
{
"epoch": 0.23988796441222507,
"grad_norm": 1.4309994411495965,
"learning_rate": 9.910834554679266e-06,
"loss": 0.1985,
"step": 91,
"ts_encoder_learning_rate": 9.907795784955327e-06
},
{
"epoch": 0.24252409588928248,
"grad_norm": 1.578028044854179,
"learning_rate": 9.907795784955327e-06,
"loss": 0.197,
"step": 92,
"ts_encoder_learning_rate": 9.904706580036265e-06
},
{
"epoch": 0.24516022736633988,
"grad_norm": 1.4796574606576762,
"learning_rate": 9.904706580036265e-06,
"loss": 0.1819,
"step": 93,
"ts_encoder_learning_rate": 9.901566971668437e-06
},
{
"epoch": 0.24779635884339732,
"grad_norm": 1.4288017254755137,
"learning_rate": 9.901566971668437e-06,
"loss": 0.2007,
"step": 94,
"ts_encoder_learning_rate": 9.898376992116179e-06
},
{
"epoch": 0.2504324903204547,
"grad_norm": 2.1391069510229137,
"learning_rate": 9.898376992116179e-06,
"loss": 0.2147,
"step": 95,
"ts_encoder_learning_rate": 9.895136674161466e-06
},
{
"epoch": 0.25306862179751216,
"grad_norm": 1.4103238390166355,
"learning_rate": 9.895136674161466e-06,
"loss": 0.1678,
"step": 96,
"ts_encoder_learning_rate": 9.891846051103578e-06
},
{
"epoch": 0.2557047532745696,
"grad_norm": 1.7272702030863043,
"learning_rate": 9.891846051103578e-06,
"loss": 0.212,
"step": 97,
"ts_encoder_learning_rate": 9.888505156758758e-06
},
{
"epoch": 0.258340884751627,
"grad_norm": 1.465691780286817,
"learning_rate": 9.888505156758758e-06,
"loss": 0.1886,
"step": 98,
"ts_encoder_learning_rate": 9.885114025459865e-06
},
{
"epoch": 0.2609770162286844,
"grad_norm": 1.5753790137474581,
"learning_rate": 9.885114025459865e-06,
"loss": 0.1846,
"step": 99,
"ts_encoder_learning_rate": 9.881672692056022e-06
},
{
"epoch": 0.26361314770574185,
"grad_norm": 6.38876297290493,
"learning_rate": 9.881672692056022e-06,
"loss": 0.221,
"step": 100,
"ts_encoder_learning_rate": 9.878181191912251e-06
},
{
"epoch": 0.2662492791827992,
"grad_norm": 8.212765911278606,
"learning_rate": 9.878181191912251e-06,
"loss": 0.2088,
"step": 101,
"ts_encoder_learning_rate": 9.874639560909118e-06
},
{
"epoch": 0.26888541065985666,
"grad_norm": 97.72702166479309,
"learning_rate": 9.874639560909118e-06,
"loss": 0.2366,
"step": 102,
"ts_encoder_learning_rate": 9.871047835442365e-06
},
{
"epoch": 0.2715215421369141,
"grad_norm": 97.72702166479309,
"learning_rate": 9.871047835442365e-06,
"loss": 0.2448,
"step": 103,
"ts_encoder_learning_rate": 9.871047835442365e-06
},
{
"epoch": 0.2741576736139715,
"grad_norm": 52.034102007317564,
"learning_rate": 9.871047835442365e-06,
"loss": 0.2336,
"step": 104,
"ts_encoder_learning_rate": 9.867406052422525e-06
},
{
"epoch": 0.2767938050910289,
"grad_norm": 52.034102007317564,
"learning_rate": 9.867406052422525e-06,
"loss": 0.2643,
"step": 105,
"ts_encoder_learning_rate": 9.867406052422525e-06
},
{
"epoch": 0.27942993656808635,
"grad_norm": 52.034102007317564,
"learning_rate": 9.867406052422525e-06,
"loss": 0.2327,
"step": 106,
"ts_encoder_learning_rate": 9.867406052422525e-06
},
{
"epoch": 0.2820660680451437,
"grad_norm": 9.876750537498761,
"learning_rate": 9.867406052422525e-06,
"loss": 0.2537,
"step": 107,
"ts_encoder_learning_rate": 9.863714249274553e-06
},
{
"epoch": 0.28470219952220116,
"grad_norm": 10.163299154086943,
"learning_rate": 9.863714249274553e-06,
"loss": 0.2425,
"step": 108,
"ts_encoder_learning_rate": 9.85997246393744e-06
},
{
"epoch": 0.2873383309992586,
"grad_norm": 3.2455067886441973,
"learning_rate": 9.85997246393744e-06,
"loss": 0.1929,
"step": 109,
"ts_encoder_learning_rate": 9.85618073486382e-06
},
{
"epoch": 0.28997446247631603,
"grad_norm": 2.5488848745593913,
"learning_rate": 9.85618073486382e-06,
"loss": 0.2007,
"step": 110,
"ts_encoder_learning_rate": 9.852339101019574e-06
},
{
"epoch": 0.2926105939533734,
"grad_norm": 2.062458273578386,
"learning_rate": 9.852339101019574e-06,
"loss": 0.2245,
"step": 111,
"ts_encoder_learning_rate": 9.848447601883436e-06
},
{
"epoch": 0.29524672543043085,
"grad_norm": 1.8908032493798421,
"learning_rate": 9.848447601883436e-06,
"loss": 0.2017,
"step": 112,
"ts_encoder_learning_rate": 9.844506277446577e-06
},
{
"epoch": 0.2978828569074883,
"grad_norm": 1.8585429810855012,
"learning_rate": 9.844506277446577e-06,
"loss": 0.17,
"step": 113,
"ts_encoder_learning_rate": 9.840515168212208e-06
},
{
"epoch": 0.30051898838454566,
"grad_norm": 1.6115065133773239,
"learning_rate": 9.840515168212208e-06,
"loss": 0.2084,
"step": 114,
"ts_encoder_learning_rate": 9.836474315195148e-06
},
{
"epoch": 0.3031551198616031,
"grad_norm": 3.04800232147694,
"learning_rate": 9.836474315195148e-06,
"loss": 0.2319,
"step": 115,
"ts_encoder_learning_rate": 9.832383759921415e-06
},
{
"epoch": 0.30579125133866053,
"grad_norm": 3.3376308967003245,
"learning_rate": 9.832383759921415e-06,
"loss": 0.2059,
"step": 116,
"ts_encoder_learning_rate": 9.828243544427795e-06
},
{
"epoch": 0.3084273828157179,
"grad_norm": 1.847624347245679,
"learning_rate": 9.828243544427795e-06,
"loss": 0.1637,
"step": 117,
"ts_encoder_learning_rate": 9.824053711261405e-06
},
{
"epoch": 0.31106351429277534,
"grad_norm": 6.259456880309696,
"learning_rate": 9.824053711261405e-06,
"loss": 0.2051,
"step": 118,
"ts_encoder_learning_rate": 9.819814303479268e-06
},
{
"epoch": 0.3136996457698328,
"grad_norm": 1.248955065574141,
"learning_rate": 9.819814303479268e-06,
"loss": 0.1729,
"step": 119,
"ts_encoder_learning_rate": 9.815525364647853e-06
},
{
"epoch": 0.3163357772468902,
"grad_norm": 1.9069742114108161,
"learning_rate": 9.815525364647853e-06,
"loss": 0.1725,
"step": 120,
"ts_encoder_learning_rate": 9.811186938842645e-06
},
{
"epoch": 0.3189719087239476,
"grad_norm": 2.578824804080835,
"learning_rate": 9.811186938842645e-06,
"loss": 0.2071,
"step": 121,
"ts_encoder_learning_rate": 9.80679907064768e-06
},
{
"epoch": 0.32160804020100503,
"grad_norm": 1.4884102177576088,
"learning_rate": 9.80679907064768e-06,
"loss": 0.1971,
"step": 122,
"ts_encoder_learning_rate": 9.802361805155097e-06
},
{
"epoch": 0.32424417167806246,
"grad_norm": 3.950574735875806,
"learning_rate": 9.802361805155097e-06,
"loss": 0.2118,
"step": 123,
"ts_encoder_learning_rate": 9.797875187964661e-06
},
{
"epoch": 0.32688030315511984,
"grad_norm": 2.0658586146827194,
"learning_rate": 9.797875187964661e-06,
"loss": 0.1905,
"step": 124,
"ts_encoder_learning_rate": 9.793339265183303e-06
},
{
"epoch": 0.3295164346321773,
"grad_norm": 1.7249946266097584,
"learning_rate": 9.793339265183303e-06,
"loss": 0.1573,
"step": 125,
"ts_encoder_learning_rate": 9.788754083424654e-06
},
{
"epoch": 0.3321525661092347,
"grad_norm": 2.2599848390315667,
"learning_rate": 9.788754083424654e-06,
"loss": 0.1879,
"step": 126,
"ts_encoder_learning_rate": 9.784119689808545e-06
},
{
"epoch": 0.3347886975862921,
"grad_norm": 1.8125389329195718,
"learning_rate": 9.784119689808545e-06,
"loss": 0.2048,
"step": 127,
"ts_encoder_learning_rate": 9.779436131960544e-06
},
{
"epoch": 0.33742482906334953,
"grad_norm": 2.506219649349012,
"learning_rate": 9.779436131960544e-06,
"loss": 0.1869,
"step": 128,
"ts_encoder_learning_rate": 9.774703458011453e-06
},
{
"epoch": 0.34006096054040696,
"grad_norm": 1.8024855840730984,
"learning_rate": 9.774703458011453e-06,
"loss": 0.1962,
"step": 129,
"ts_encoder_learning_rate": 9.76992171659682e-06
},
{
"epoch": 0.34269709201746434,
"grad_norm": 1.59370844293676,
"learning_rate": 9.76992171659682e-06,
"loss": 0.1886,
"step": 130,
"ts_encoder_learning_rate": 9.765090956856437e-06
},
{
"epoch": 0.3453332234945218,
"grad_norm": 1.6796351392079987,
"learning_rate": 9.765090956856437e-06,
"loss": 0.1734,
"step": 131,
"ts_encoder_learning_rate": 9.760211228433834e-06
},
{
"epoch": 0.3479693549715792,
"grad_norm": 1.8622570561386254,
"learning_rate": 9.760211228433834e-06,
"loss": 0.2003,
"step": 132,
"ts_encoder_learning_rate": 9.755282581475769e-06
},
{
"epoch": 0.35060548644863665,
"grad_norm": 4.261853496235677,
"learning_rate": 9.755282581475769e-06,
"loss": 0.2152,
"step": 133,
"ts_encoder_learning_rate": 9.750305066631717e-06
},
{
"epoch": 0.35324161792569403,
"grad_norm": 2.084316819171521,
"learning_rate": 9.750305066631717e-06,
"loss": 0.204,
"step": 134,
"ts_encoder_learning_rate": 9.745278735053345e-06
},
{
"epoch": 0.35587774940275146,
"grad_norm": 2.1355217159376125,
"learning_rate": 9.745278735053345e-06,
"loss": 0.1812,
"step": 135,
"ts_encoder_learning_rate": 9.740203638393984e-06
},
{
"epoch": 0.3585138808798089,
"grad_norm": 2.1726864130161485,
"learning_rate": 9.740203638393984e-06,
"loss": 0.1741,
"step": 136,
"ts_encoder_learning_rate": 9.735079828808107e-06
},
{
"epoch": 0.3611500123568663,
"grad_norm": 1.82669888695553,
"learning_rate": 9.735079828808107e-06,
"loss": 0.1772,
"step": 137,
"ts_encoder_learning_rate": 9.729907358950785e-06
},
{
"epoch": 0.3637861438339237,
"grad_norm": 2.0888172211110647,
"learning_rate": 9.729907358950785e-06,
"loss": 0.1747,
"step": 138,
"ts_encoder_learning_rate": 9.724686281977146e-06
},
{
"epoch": 0.36642227531098115,
"grad_norm": 1.7793028446193322,
"learning_rate": 9.724686281977146e-06,
"loss": 0.1645,
"step": 139,
"ts_encoder_learning_rate": 9.719416651541839e-06
},
{
"epoch": 0.36905840678803853,
"grad_norm": 3.1339804740693697,
"learning_rate": 9.719416651541839e-06,
"loss": 0.2205,
"step": 140,
"ts_encoder_learning_rate": 9.714098521798466e-06
},
{
"epoch": 0.37169453826509596,
"grad_norm": 1.922012523700954,
"learning_rate": 9.714098521798466e-06,
"loss": 0.1922,
"step": 141,
"ts_encoder_learning_rate": 9.708731947399039e-06
},
{
"epoch": 0.3743306697421534,
"grad_norm": 2.111535338552465,
"learning_rate": 9.708731947399039e-06,
"loss": 0.2084,
"step": 142,
"ts_encoder_learning_rate": 9.703316983493414e-06
},
{
"epoch": 0.37696680121921083,
"grad_norm": 1.6443259615131731,
"learning_rate": 9.703316983493414e-06,
"loss": 0.1644,
"step": 143,
"ts_encoder_learning_rate": 9.697853685728721e-06
},
{
"epoch": 0.3796029326962682,
"grad_norm": 1.0337440158621962,
"learning_rate": 9.697853685728721e-06,
"loss": 0.175,
"step": 144,
"ts_encoder_learning_rate": 9.692342110248802e-06
},
{
"epoch": 0.38223906417332565,
"grad_norm": 2.6059449586383447,
"learning_rate": 9.692342110248802e-06,
"loss": 0.1749,
"step": 145,
"ts_encoder_learning_rate": 9.686782313693622e-06
},
{
"epoch": 0.3848751956503831,
"grad_norm": 1.7541337816760245,
"learning_rate": 9.686782313693622e-06,
"loss": 0.1814,
"step": 146,
"ts_encoder_learning_rate": 9.681174353198687e-06
},
{
"epoch": 0.38751132712744046,
"grad_norm": 1.9857610217097408,
"learning_rate": 9.681174353198687e-06,
"loss": 0.1817,
"step": 147,
"ts_encoder_learning_rate": 9.675518286394474e-06
},
{
"epoch": 0.3901474586044979,
"grad_norm": 2.015451167688765,
"learning_rate": 9.675518286394474e-06,
"loss": 0.1853,
"step": 148,
"ts_encoder_learning_rate": 9.669814171405818e-06
},
{
"epoch": 0.39278359008155533,
"grad_norm": 1.417012874189727,
"learning_rate": 9.669814171405818e-06,
"loss": 0.2065,
"step": 149,
"ts_encoder_learning_rate": 9.664062066851325e-06
},
{
"epoch": 0.3954197215586127,
"grad_norm": 2.8203561728923776,
"learning_rate": 9.664062066851325e-06,
"loss": 0.1729,
"step": 150,
"ts_encoder_learning_rate": 9.658262031842772e-06
},
{
"epoch": 0.39805585303567015,
"grad_norm": 2.068280212416892,
"learning_rate": 9.658262031842772e-06,
"loss": 0.1885,
"step": 151,
"ts_encoder_learning_rate": 9.65241412598449e-06
},
{
"epoch": 0.4006919845127276,
"grad_norm": 2.6419154731801933,
"learning_rate": 9.65241412598449e-06,
"loss": 0.1845,
"step": 152,
"ts_encoder_learning_rate": 9.64651840937276e-06
},
{
"epoch": 0.40332811598978496,
"grad_norm": 3.9825926281731068,
"learning_rate": 9.64651840937276e-06,
"loss": 0.1829,
"step": 153,
"ts_encoder_learning_rate": 9.640574942595195e-06
},
{
"epoch": 0.4059642474668424,
"grad_norm": 2.57569222575164,
"learning_rate": 9.640574942595195e-06,
"loss": 0.1749,
"step": 154,
"ts_encoder_learning_rate": 9.63458378673011e-06
},
{
"epoch": 0.40860037894389983,
"grad_norm": 2.1803645371235696,
"learning_rate": 9.63458378673011e-06,
"loss": 0.1854,
"step": 155,
"ts_encoder_learning_rate": 9.6285450033459e-06
},
{
"epoch": 0.41123651042095727,
"grad_norm": 4.412857746362803,
"learning_rate": 9.6285450033459e-06,
"loss": 0.1844,
"step": 156,
"ts_encoder_learning_rate": 9.622458654500408e-06
},
{
"epoch": 0.41387264189801465,
"grad_norm": 3.9208819464546725,
"learning_rate": 9.622458654500408e-06,
"loss": 0.1843,
"step": 157,
"ts_encoder_learning_rate": 9.616324802740287e-06
},
{
"epoch": 0.4165087733750721,
"grad_norm": 2.423222471726984,
"learning_rate": 9.616324802740287e-06,
"loss": 0.1552,
"step": 158,
"ts_encoder_learning_rate": 9.610143511100354e-06
},
{
"epoch": 0.4191449048521295,
"grad_norm": 1.7446627651403062,
"learning_rate": 9.610143511100354e-06,
"loss": 0.1536,
"step": 159,
"ts_encoder_learning_rate": 9.603914843102941e-06
},
{
"epoch": 0.4217810363291869,
"grad_norm": 3.9104938056757,
"learning_rate": 9.603914843102941e-06,
"loss": 0.1688,
"step": 160,
"ts_encoder_learning_rate": 9.597638862757255e-06
},
{
"epoch": 0.42441716780624433,
"grad_norm": 3.0669664248193835,
"learning_rate": 9.597638862757255e-06,
"loss": 0.1623,
"step": 161,
"ts_encoder_learning_rate": 9.591315634558698e-06
},
{
"epoch": 0.42705329928330177,
"grad_norm": 2.2417095321151446,
"learning_rate": 9.591315634558698e-06,
"loss": 0.1769,
"step": 162,
"ts_encoder_learning_rate": 9.584945223488227e-06
},
{
"epoch": 0.42968943076035915,
"grad_norm": 1.3116635599548383,
"learning_rate": 9.584945223488227e-06,
"loss": 0.1338,
"step": 163,
"ts_encoder_learning_rate": 9.57852769501167e-06
},
{
"epoch": 0.4323255622374166,
"grad_norm": 2.6909253272106,
"learning_rate": 9.57852769501167e-06,
"loss": 0.1705,
"step": 164,
"ts_encoder_learning_rate": 9.572063115079063e-06
},
{
"epoch": 0.434961693714474,
"grad_norm": 2.4035516559911456,
"learning_rate": 9.572063115079063e-06,
"loss": 0.1507,
"step": 165,
"ts_encoder_learning_rate": 9.565551550123967e-06
},
{
"epoch": 0.43759782519153145,
"grad_norm": 1.698615887190314,
"learning_rate": 9.565551550123967e-06,
"loss": 0.1584,
"step": 166,
"ts_encoder_learning_rate": 9.558993067062785e-06
},
{
"epoch": 0.44023395666858883,
"grad_norm": 1.5658851097960265,
"learning_rate": 9.558993067062785e-06,
"loss": 0.1444,
"step": 167,
"ts_encoder_learning_rate": 9.552387733294081e-06
},
{
"epoch": 0.44287008814564627,
"grad_norm": 1.428302430233315,
"learning_rate": 9.552387733294081e-06,
"loss": 0.1392,
"step": 168,
"ts_encoder_learning_rate": 9.545735616697875e-06
},
{
"epoch": 0.4455062196227037,
"grad_norm": 1.8871291912109978,
"learning_rate": 9.545735616697875e-06,
"loss": 0.163,
"step": 169,
"ts_encoder_learning_rate": 9.539036785634961e-06
},
{
"epoch": 0.4481423510997611,
"grad_norm": 1.4930705735785357,
"learning_rate": 9.539036785634961e-06,
"loss": 0.1189,
"step": 170,
"ts_encoder_learning_rate": 9.532291308946191e-06
},
{
"epoch": 0.4507784825768185,
"grad_norm": 2.2958980821132733,
"learning_rate": 9.532291308946191e-06,
"loss": 0.1475,
"step": 171,
"ts_encoder_learning_rate": 9.525499255951775e-06
},
{
"epoch": 0.45341461405387595,
"grad_norm": 2.292110960348305,
"learning_rate": 9.525499255951775e-06,
"loss": 0.1464,
"step": 172,
"ts_encoder_learning_rate": 9.518660696450567e-06
},
{
"epoch": 0.45605074553093333,
"grad_norm": 1.5583683866727895,
"learning_rate": 9.518660696450567e-06,
"loss": 0.1637,
"step": 173,
"ts_encoder_learning_rate": 9.511775700719347e-06
},
{
"epoch": 0.45868687700799077,
"grad_norm": 2.293732030943085,
"learning_rate": 9.511775700719347e-06,
"loss": 0.1425,
"step": 174,
"ts_encoder_learning_rate": 9.504844339512096e-06
},
{
"epoch": 0.4613230084850482,
"grad_norm": 2.378677779357337,
"learning_rate": 9.504844339512096e-06,
"loss": 0.1706,
"step": 175,
"ts_encoder_learning_rate": 9.497866684059278e-06
},
{
"epoch": 0.4639591399621056,
"grad_norm": 1.545023475236903,
"learning_rate": 9.497866684059278e-06,
"loss": 0.139,
"step": 176,
"ts_encoder_learning_rate": 9.490842806067095e-06
},
{
"epoch": 0.466595271439163,
"grad_norm": 3.000973914853472,
"learning_rate": 9.490842806067095e-06,
"loss": 0.1596,
"step": 177,
"ts_encoder_learning_rate": 9.483772777716767e-06
},
{
"epoch": 0.46923140291622045,
"grad_norm": 3.558064259164343,
"learning_rate": 9.483772777716767e-06,
"loss": 0.1806,
"step": 178,
"ts_encoder_learning_rate": 9.476656671663766e-06
},
{
"epoch": 0.4718675343932779,
"grad_norm": 2.3771476346252287,
"learning_rate": 9.476656671663766e-06,
"loss": 0.1517,
"step": 179,
"ts_encoder_learning_rate": 9.469494561037097e-06
},
{
"epoch": 0.47450366587033527,
"grad_norm": 1.3953249248383899,
"learning_rate": 9.469494561037097e-06,
"loss": 0.1504,
"step": 180,
"ts_encoder_learning_rate": 9.462286519438531e-06
},
{
"epoch": 0.4771397973473927,
"grad_norm": 3.0894973811179613,
"learning_rate": 9.462286519438531e-06,
"loss": 0.1471,
"step": 181,
"ts_encoder_learning_rate": 9.45503262094184e-06
},
{
"epoch": 0.47977592882445014,
"grad_norm": 3.447964797634623,
"learning_rate": 9.45503262094184e-06,
"loss": 0.1729,
"step": 182,
"ts_encoder_learning_rate": 9.44773294009206e-06
},
{
"epoch": 0.4824120603015075,
"grad_norm": 1.3569301564256495,
"learning_rate": 9.44773294009206e-06,
"loss": 0.1377,
"step": 183,
"ts_encoder_learning_rate": 9.440387551904705e-06
},
{
"epoch": 0.48504819177856495,
"grad_norm": 1.299399066206849,
"learning_rate": 9.440387551904705e-06,
"loss": 0.1474,
"step": 184,
"ts_encoder_learning_rate": 9.432996531865001e-06
},
{
"epoch": 0.4876843232556224,
"grad_norm": 2.3518650616525925,
"learning_rate": 9.432996531865001e-06,
"loss": 0.1365,
"step": 185,
"ts_encoder_learning_rate": 9.425559955927118e-06
},
{
"epoch": 0.49032045473267977,
"grad_norm": 2.444972697601273,
"learning_rate": 9.425559955927118e-06,
"loss": 0.1771,
"step": 186,
"ts_encoder_learning_rate": 9.418077900513377e-06
},
{
"epoch": 0.4929565862097372,
"grad_norm": 1.7782371185002437,
"learning_rate": 9.418077900513377e-06,
"loss": 0.1373,
"step": 187,
"ts_encoder_learning_rate": 9.410550442513475e-06
},
{
"epoch": 0.49559271768679464,
"grad_norm": 1.6778989547499836,
"learning_rate": 9.410550442513475e-06,
"loss": 0.1565,
"step": 188,
"ts_encoder_learning_rate": 9.40297765928369e-06
},
{
"epoch": 0.49822884916385207,
"grad_norm": 2.155563042258836,
"learning_rate": 9.40297765928369e-06,
"loss": 0.153,
"step": 189,
"ts_encoder_learning_rate": 9.395359628646087e-06
},
{
"epoch": 0.5008649806409095,
"grad_norm": 1.295519636961886,
"learning_rate": 9.395359628646087e-06,
"loss": 0.129,
"step": 190,
"ts_encoder_learning_rate": 9.387696428887715e-06
},
{
"epoch": 0.5035011121179669,
"grad_norm": 1.9350288061910503,
"learning_rate": 9.387696428887715e-06,
"loss": 0.1397,
"step": 191,
"ts_encoder_learning_rate": 9.37998813875981e-06
},
{
"epoch": 0.5061372435950243,
"grad_norm": 1.4020631402932546,
"learning_rate": 9.37998813875981e-06,
"loss": 0.1496,
"step": 192,
"ts_encoder_learning_rate": 9.372234837476979e-06
},
{
"epoch": 0.5087733750720818,
"grad_norm": 1.6083327990489644,
"learning_rate": 9.372234837476979e-06,
"loss": 0.1435,
"step": 193,
"ts_encoder_learning_rate": 9.364436604716389e-06
},
{
"epoch": 0.5114095065491392,
"grad_norm": 1.6706875142270174,
"learning_rate": 9.364436604716389e-06,
"loss": 0.1465,
"step": 194,
"ts_encoder_learning_rate": 9.356593520616948e-06
},
{
"epoch": 0.5140456380261965,
"grad_norm": 1.2317832812802163,
"learning_rate": 9.356593520616948e-06,
"loss": 0.1242,
"step": 195,
"ts_encoder_learning_rate": 9.348705665778479e-06
},
{
"epoch": 0.516681769503254,
"grad_norm": 2.2546259482178415,
"learning_rate": 9.348705665778479e-06,
"loss": 0.1446,
"step": 196,
"ts_encoder_learning_rate": 9.340773121260893e-06
},
{
"epoch": 0.5193179009803114,
"grad_norm": 2.4160528757749202,
"learning_rate": 9.340773121260893e-06,
"loss": 0.1426,
"step": 197,
"ts_encoder_learning_rate": 9.33279596858336e-06
},
{
"epoch": 0.5219540324573688,
"grad_norm": 2.9782825880249475,
"learning_rate": 9.33279596858336e-06,
"loss": 0.1581,
"step": 198,
"ts_encoder_learning_rate": 9.324774289723469e-06
},
{
"epoch": 0.5245901639344263,
"grad_norm": 2.2270014484031537,
"learning_rate": 9.324774289723469e-06,
"loss": 0.1456,
"step": 199,
"ts_encoder_learning_rate": 9.316708167116377e-06
},
{
"epoch": 0.5272262954114837,
"grad_norm": 2.254839404791111,
"learning_rate": 9.316708167116377e-06,
"loss": 0.1719,
"step": 200,
"ts_encoder_learning_rate": 9.308597683653976e-06
},
{
"epoch": 0.529862426888541,
"grad_norm": 2.1147336782126906,
"learning_rate": 9.308597683653976e-06,
"loss": 0.1412,
"step": 201,
"ts_encoder_learning_rate": 9.300442922684033e-06
},
{
"epoch": 0.5324985583655985,
"grad_norm": 3.403863020435584,
"learning_rate": 9.300442922684033e-06,
"loss": 0.159,
"step": 202,
"ts_encoder_learning_rate": 9.292243968009332e-06
},
{
"epoch": 0.5351346898426559,
"grad_norm": 2.1840583753378704,
"learning_rate": 9.292243968009332e-06,
"loss": 0.153,
"step": 203,
"ts_encoder_learning_rate": 9.284000903886818e-06
},
{
"epoch": 0.5377708213197133,
"grad_norm": 2.756745654144269,
"learning_rate": 9.284000903886818e-06,
"loss": 0.1496,
"step": 204,
"ts_encoder_learning_rate": 9.275713815026732e-06
},
{
"epoch": 0.5404069527967708,
"grad_norm": 1.562920226633681,
"learning_rate": 9.275713815026732e-06,
"loss": 0.1444,
"step": 205,
"ts_encoder_learning_rate": 9.26738278659173e-06
},
{
"epoch": 0.5430430842738282,
"grad_norm": 1.8988550139932143,
"learning_rate": 9.26738278659173e-06,
"loss": 0.1327,
"step": 206,
"ts_encoder_learning_rate": 9.259007904196023e-06
},
{
"epoch": 0.5456792157508856,
"grad_norm": 1.804458613750819,
"learning_rate": 9.259007904196023e-06,
"loss": 0.1502,
"step": 207,
"ts_encoder_learning_rate": 9.250589253904481e-06
},
{
"epoch": 0.548315347227943,
"grad_norm": 1.8650637771648768,
"learning_rate": 9.250589253904481e-06,
"loss": 0.1473,
"step": 208,
"ts_encoder_learning_rate": 9.242126922231763e-06
},
{
"epoch": 0.5509514787050004,
"grad_norm": 2.051385105386284,
"learning_rate": 9.242126922231763e-06,
"loss": 0.1658,
"step": 209,
"ts_encoder_learning_rate": 9.233620996141421e-06
},
{
"epoch": 0.5535876101820578,
"grad_norm": 2.3089156973651463,
"learning_rate": 9.233620996141421e-06,
"loss": 0.145,
"step": 210,
"ts_encoder_learning_rate": 9.225071563045007e-06
},
{
"epoch": 0.5562237416591153,
"grad_norm": 2.3126324557088265,
"learning_rate": 9.225071563045007e-06,
"loss": 0.1503,
"step": 211,
"ts_encoder_learning_rate": 9.216478710801171e-06
},
{
"epoch": 0.5588598731361727,
"grad_norm": 2.0335782421296047,
"learning_rate": 9.216478710801171e-06,
"loss": 0.1254,
"step": 212,
"ts_encoder_learning_rate": 9.207842527714767e-06
},
{
"epoch": 0.5614960046132301,
"grad_norm": 2.959817313362635,
"learning_rate": 9.207842527714767e-06,
"loss": 0.1344,
"step": 213,
"ts_encoder_learning_rate": 9.199163102535937e-06
},
{
"epoch": 0.5641321360902875,
"grad_norm": 2.401709370382837,
"learning_rate": 9.199163102535937e-06,
"loss": 0.1235,
"step": 214,
"ts_encoder_learning_rate": 9.190440524459203e-06
},
{
"epoch": 0.5667682675673449,
"grad_norm": 2.4698482564816437,
"learning_rate": 9.190440524459203e-06,
"loss": 0.1652,
"step": 215,
"ts_encoder_learning_rate": 9.181674883122554e-06
},
{
"epoch": 0.5694043990444023,
"grad_norm": 2.174645988827366,
"learning_rate": 9.181674883122554e-06,
"loss": 0.1191,
"step": 216,
"ts_encoder_learning_rate": 9.172866268606514e-06
},
{
"epoch": 0.5720405305214598,
"grad_norm": 1.4935255107278584,
"learning_rate": 9.172866268606514e-06,
"loss": 0.1348,
"step": 217,
"ts_encoder_learning_rate": 9.164014771433228e-06
},
{
"epoch": 0.5746766619985172,
"grad_norm": 2.261714559414658,
"learning_rate": 9.164014771433228e-06,
"loss": 0.1393,
"step": 218,
"ts_encoder_learning_rate": 9.15512048256552e-06
},
{
"epoch": 0.5773127934755746,
"grad_norm": 1.9389125772079525,
"learning_rate": 9.15512048256552e-06,
"loss": 0.1159,
"step": 219,
"ts_encoder_learning_rate": 9.146183493405976e-06
},
{
"epoch": 0.5799489249526321,
"grad_norm": 2.2307885757186376,
"learning_rate": 9.146183493405976e-06,
"loss": 0.1197,
"step": 220,
"ts_encoder_learning_rate": 9.137203895795983e-06
},
{
"epoch": 0.5825850564296894,
"grad_norm": 2.2631154256287784,
"learning_rate": 9.137203895795983e-06,
"loss": 0.1297,
"step": 221,
"ts_encoder_learning_rate": 9.128181782014801e-06
},
{
"epoch": 0.5852211879067468,
"grad_norm": 1.8377593809270902,
"learning_rate": 9.128181782014801e-06,
"loss": 0.1375,
"step": 222,
"ts_encoder_learning_rate": 9.119117244778609e-06
},
{
"epoch": 0.5878573193838043,
"grad_norm": 1.941575989624506,
"learning_rate": 9.119117244778609e-06,
"loss": 0.1381,
"step": 223,
"ts_encoder_learning_rate": 9.110010377239552e-06
},
{
"epoch": 0.5904934508608617,
"grad_norm": 2.0700642084907797,
"learning_rate": 9.110010377239552e-06,
"loss": 0.1215,
"step": 224,
"ts_encoder_learning_rate": 9.10086127298478e-06
},
{
"epoch": 0.5931295823379191,
"grad_norm": 2.175877796189538,
"learning_rate": 9.10086127298478e-06,
"loss": 0.1274,
"step": 225,
"ts_encoder_learning_rate": 9.0916700260355e-06
},
{
"epoch": 0.5957657138149766,
"grad_norm": 2.7088680551627444,
"learning_rate": 9.0916700260355e-06,
"loss": 0.1546,
"step": 226,
"ts_encoder_learning_rate": 9.082436730845993e-06
},
{
"epoch": 0.5984018452920339,
"grad_norm": 2.2127473631725634,
"learning_rate": 9.082436730845993e-06,
"loss": 0.1456,
"step": 227,
"ts_encoder_learning_rate": 9.073161482302656e-06
},
{
"epoch": 0.6010379767690913,
"grad_norm": 2.2852943362812947,
"learning_rate": 9.073161482302656e-06,
"loss": 0.1133,
"step": 228,
"ts_encoder_learning_rate": 9.063844375723014e-06
},
{
"epoch": 0.6036741082461488,
"grad_norm": 2.022226024724427,
"learning_rate": 9.063844375723014e-06,
"loss": 0.147,
"step": 229,
"ts_encoder_learning_rate": 9.054485506854756e-06
},
{
"epoch": 0.6063102397232062,
"grad_norm": 1.4127928730528008,
"learning_rate": 9.054485506854756e-06,
"loss": 0.1399,
"step": 230,
"ts_encoder_learning_rate": 9.045084971874738e-06
},
{
"epoch": 0.6089463712002636,
"grad_norm": 2.3124067231050796,
"learning_rate": 9.045084971874738e-06,
"loss": 0.166,
"step": 231,
"ts_encoder_learning_rate": 9.035642867388003e-06
},
{
"epoch": 0.6115825026773211,
"grad_norm": 2.9417960489910557,
"learning_rate": 9.035642867388003e-06,
"loss": 0.1336,
"step": 232,
"ts_encoder_learning_rate": 9.026159290426782e-06
},
{
"epoch": 0.6142186341543785,
"grad_norm": 2.037375696729251,
"learning_rate": 9.026159290426782e-06,
"loss": 0.1226,
"step": 233,
"ts_encoder_learning_rate": 9.016634338449504e-06
},
{
"epoch": 0.6168547656314358,
"grad_norm": 2.7178140969916478,
"learning_rate": 9.016634338449504e-06,
"loss": 0.1405,
"step": 234,
"ts_encoder_learning_rate": 9.007068109339783e-06
},
{
"epoch": 0.6194908971084933,
"grad_norm": 2.4572769251627804,
"learning_rate": 9.007068109339783e-06,
"loss": 0.1296,
"step": 235,
"ts_encoder_learning_rate": 8.997460701405431e-06
},
{
"epoch": 0.6221270285855507,
"grad_norm": 1.7074800943778181,
"learning_rate": 8.997460701405431e-06,
"loss": 0.1076,
"step": 236,
"ts_encoder_learning_rate": 8.987812213377423e-06
},
{
"epoch": 0.6247631600626081,
"grad_norm": 1.3569057680195367,
"learning_rate": 8.987812213377423e-06,
"loss": 0.1307,
"step": 237,
"ts_encoder_learning_rate": 8.978122744408905e-06
},
{
"epoch": 0.6273992915396656,
"grad_norm": 2.2197611340479333,
"learning_rate": 8.978122744408905e-06,
"loss": 0.1255,
"step": 238,
"ts_encoder_learning_rate": 8.968392394074164e-06
},
{
"epoch": 0.630035423016723,
"grad_norm": 2.80909657385505,
"learning_rate": 8.968392394074164e-06,
"loss": 0.1311,
"step": 239,
"ts_encoder_learning_rate": 8.9586212623676e-06
},
{
"epoch": 0.6326715544937804,
"grad_norm": 1.596591103789994,
"learning_rate": 8.9586212623676e-06,
"loss": 0.1247,
"step": 240,
"ts_encoder_learning_rate": 8.948809449702712e-06
},
{
"epoch": 0.6353076859708378,
"grad_norm": 2.255624101113016,
"learning_rate": 8.948809449702712e-06,
"loss": 0.1125,
"step": 241,
"ts_encoder_learning_rate": 8.938957056911057e-06
},
{
"epoch": 0.6379438174478952,
"grad_norm": 1.4404926531170739,
"learning_rate": 8.938957056911057e-06,
"loss": 0.1156,
"step": 242,
"ts_encoder_learning_rate": 8.929064185241214e-06
},
{
"epoch": 0.6405799489249526,
"grad_norm": 2.0628024401597465,
"learning_rate": 8.929064185241214e-06,
"loss": 0.137,
"step": 243,
"ts_encoder_learning_rate": 8.919130936357743e-06
},
{
"epoch": 0.6432160804020101,
"grad_norm": 1.7372644218298394,
"learning_rate": 8.919130936357743e-06,
"loss": 0.111,
"step": 244,
"ts_encoder_learning_rate": 8.90915741234015e-06
},
{
"epoch": 0.6458522118790675,
"grad_norm": 1.627934422877123,
"learning_rate": 8.90915741234015e-06,
"loss": 0.1334,
"step": 245,
"ts_encoder_learning_rate": 8.899143715681822e-06
},
{
"epoch": 0.6484883433561249,
"grad_norm": 1.8556278497877248,
"learning_rate": 8.899143715681822e-06,
"loss": 0.1219,
"step": 246,
"ts_encoder_learning_rate": 8.889089949288986e-06
},
{
"epoch": 0.6511244748331823,
"grad_norm": 3.1526166628811603,
"learning_rate": 8.889089949288986e-06,
"loss": 0.0894,
"step": 247,
"ts_encoder_learning_rate": 8.878996216479651e-06
},
{
"epoch": 0.6537606063102397,
"grad_norm": 2.213556128570375,
"learning_rate": 8.878996216479651e-06,
"loss": 0.1277,
"step": 248,
"ts_encoder_learning_rate": 8.868862620982534e-06
},
{
"epoch": 0.6563967377872971,
"grad_norm": 4.4965782913660854,
"learning_rate": 8.868862620982534e-06,
"loss": 0.1258,
"step": 249,
"ts_encoder_learning_rate": 8.85868926693601e-06
},
{
"epoch": 0.6590328692643546,
"grad_norm": 2.2567027094465573,
"learning_rate": 8.85868926693601e-06,
"loss": 0.1313,
"step": 250,
"ts_encoder_learning_rate": 8.84847625888703e-06
},
{
"epoch": 0.661669000741412,
"grad_norm": 2.865960904272953,
"learning_rate": 8.84847625888703e-06,
"loss": 0.1255,
"step": 251,
"ts_encoder_learning_rate": 8.838223701790057e-06
},
{
"epoch": 0.6643051322184694,
"grad_norm": 1.698484873506614,
"learning_rate": 8.838223701790057e-06,
"loss": 0.1277,
"step": 252,
"ts_encoder_learning_rate": 8.827931701005974e-06
},
{
"epoch": 0.6669412636955269,
"grad_norm": 1.7108540523901774,
"learning_rate": 8.827931701005974e-06,
"loss": 0.1238,
"step": 253,
"ts_encoder_learning_rate": 8.817600362301018e-06
},
{
"epoch": 0.6695773951725842,
"grad_norm": 2.574763087212977,
"learning_rate": 8.817600362301018e-06,
"loss": 0.119,
"step": 254,
"ts_encoder_learning_rate": 8.807229791845673e-06
},
{
"epoch": 0.6722135266496416,
"grad_norm": 1.76981658030345,
"learning_rate": 8.807229791845673e-06,
"loss": 0.1162,
"step": 255,
"ts_encoder_learning_rate": 8.7968200962136e-06
},
{
"epoch": 0.6748496581266991,
"grad_norm": 2.3402398000656532,
"learning_rate": 8.7968200962136e-06,
"loss": 0.1082,
"step": 256,
"ts_encoder_learning_rate": 8.786371382380527e-06
},
{
"epoch": 0.6774857896037565,
"grad_norm": 3.333892645282909,
"learning_rate": 8.786371382380527e-06,
"loss": 0.1226,
"step": 257,
"ts_encoder_learning_rate": 8.775883757723156e-06
},
{
"epoch": 0.6801219210808139,
"grad_norm": 2.0605502821038626,
"learning_rate": 8.775883757723156e-06,
"loss": 0.1278,
"step": 258,
"ts_encoder_learning_rate": 8.765357330018056e-06
},
{
"epoch": 0.6827580525578714,
"grad_norm": 1.8198127860386084,
"learning_rate": 8.765357330018056e-06,
"loss": 0.1104,
"step": 259,
"ts_encoder_learning_rate": 8.754792207440557e-06
},
{
"epoch": 0.6853941840349287,
"grad_norm": 1.7852327454966495,
"learning_rate": 8.754792207440557e-06,
"loss": 0.1257,
"step": 260,
"ts_encoder_learning_rate": 8.74418849856364e-06
},
{
"epoch": 0.6880303155119861,
"grad_norm": 2.3856839525596043,
"learning_rate": 8.74418849856364e-06,
"loss": 0.1232,
"step": 261,
"ts_encoder_learning_rate": 8.733546312356826e-06
},
{
"epoch": 0.6906664469890436,
"grad_norm": 2.0514309990696,
"learning_rate": 8.733546312356826e-06,
"loss": 0.106,
"step": 262,
"ts_encoder_learning_rate": 8.722865758185036e-06
},
{
"epoch": 0.693302578466101,
"grad_norm": 1.8497678205013666,
"learning_rate": 8.722865758185036e-06,
"loss": 0.0925,
"step": 263,
"ts_encoder_learning_rate": 8.712146945807494e-06
},
{
"epoch": 0.6959387099431584,
"grad_norm": 2.325593177065593,
"learning_rate": 8.712146945807494e-06,
"loss": 0.1151,
"step": 264,
"ts_encoder_learning_rate": 8.701389985376578e-06
},
{
"epoch": 0.6985748414202159,
"grad_norm": 2.4024115509182544,
"learning_rate": 8.701389985376578e-06,
"loss": 0.1351,
"step": 265,
"ts_encoder_learning_rate": 8.690594987436705e-06
},
{
"epoch": 0.7012109728972733,
"grad_norm": 2.404989824928956,
"learning_rate": 8.690594987436705e-06,
"loss": 0.1183,
"step": 266,
"ts_encoder_learning_rate": 8.679762062923176e-06
},
{
"epoch": 0.7038471043743306,
"grad_norm": 2.4255255570984904,
"learning_rate": 8.679762062923176e-06,
"loss": 0.1175,
"step": 267,
"ts_encoder_learning_rate": 8.668891323161053e-06
},
{
"epoch": 0.7064832358513881,
"grad_norm": 2.063468492850095,
"learning_rate": 8.668891323161053e-06,
"loss": 0.1136,
"step": 268,
"ts_encoder_learning_rate": 8.657982879864007e-06
},
{
"epoch": 0.7091193673284455,
"grad_norm": 2.454812018935756,
"learning_rate": 8.657982879864007e-06,
"loss": 0.1184,
"step": 269,
"ts_encoder_learning_rate": 8.647036845133171e-06
},
{
"epoch": 0.7117554988055029,
"grad_norm": 2.389510429853746,
"learning_rate": 8.647036845133171e-06,
"loss": 0.1109,
"step": 270,
"ts_encoder_learning_rate": 8.636053331455986e-06
},
{
"epoch": 0.7143916302825604,
"grad_norm": 2.534713674498363,
"learning_rate": 8.636053331455986e-06,
"loss": 0.1245,
"step": 271,
"ts_encoder_learning_rate": 8.625032451705053e-06
},
{
"epoch": 0.7170277617596178,
"grad_norm": 1.8778540259505276,
"learning_rate": 8.625032451705053e-06,
"loss": 0.1116,
"step": 272,
"ts_encoder_learning_rate": 8.613974319136959e-06
},
{
"epoch": 0.7196638932366752,
"grad_norm": 2.1437242323626053,
"learning_rate": 8.613974319136959e-06,
"loss": 0.1093,
"step": 273,
"ts_encoder_learning_rate": 8.602879047391127e-06
},
{
"epoch": 0.7223000247137326,
"grad_norm": 1.841731554668583,
"learning_rate": 8.602879047391127e-06,
"loss": 0.1155,
"step": 274,
"ts_encoder_learning_rate": 8.591746750488639e-06
},
{
"epoch": 0.72493615619079,
"grad_norm": 1.7263269186243153,
"learning_rate": 8.591746750488639e-06,
"loss": 0.1106,
"step": 275,
"ts_encoder_learning_rate": 8.580577542831072e-06
},
{
"epoch": 0.7275722876678474,
"grad_norm": 2.1861381161848144,
"learning_rate": 8.580577542831072e-06,
"loss": 0.1069,
"step": 276,
"ts_encoder_learning_rate": 8.569371539199316e-06
},
{
"epoch": 0.7302084191449049,
"grad_norm": 1.83254402858276,
"learning_rate": 8.569371539199316e-06,
"loss": 0.1062,
"step": 277,
"ts_encoder_learning_rate": 8.558128854752397e-06
},
{
"epoch": 0.7328445506219623,
"grad_norm": 1.5361832609660626,
"learning_rate": 8.558128854752397e-06,
"loss": 0.0958,
"step": 278,
"ts_encoder_learning_rate": 8.54684960502629e-06
},
{
"epoch": 0.7354806820990197,
"grad_norm": 1.6977272803107797,
"learning_rate": 8.54684960502629e-06,
"loss": 0.0986,
"step": 279,
"ts_encoder_learning_rate": 8.535533905932739e-06
},
{
"epoch": 0.7381168135760771,
"grad_norm": 1.805089674746036,
"learning_rate": 8.535533905932739e-06,
"loss": 0.0959,
"step": 280,
"ts_encoder_learning_rate": 8.52418187375806e-06
},
{
"epoch": 0.7407529450531345,
"grad_norm": 2.4198941080590153,
"learning_rate": 8.52418187375806e-06,
"loss": 0.1202,
"step": 281,
"ts_encoder_learning_rate": 8.512793625161947e-06
},
{
"epoch": 0.7433890765301919,
"grad_norm": 1.9365613559437527,
"learning_rate": 8.512793625161947e-06,
"loss": 0.1251,
"step": 282,
"ts_encoder_learning_rate": 8.501369277176275e-06
},
{
"epoch": 0.7460252080072494,
"grad_norm": 2.6757940840346475,
"learning_rate": 8.501369277176275e-06,
"loss": 0.1167,
"step": 283,
"ts_encoder_learning_rate": 8.489908947203897e-06
},
{
"epoch": 0.7486613394843068,
"grad_norm": 1.3212164606431878,
"learning_rate": 8.489908947203897e-06,
"loss": 0.0967,
"step": 284,
"ts_encoder_learning_rate": 8.478412753017433e-06
},
{
"epoch": 0.7512974709613642,
"grad_norm": 2.3824932932765255,
"learning_rate": 8.478412753017433e-06,
"loss": 0.1211,
"step": 285,
"ts_encoder_learning_rate": 8.466880812758064e-06
},
{
"epoch": 0.7539336024384217,
"grad_norm": 1.8845886753934002,
"learning_rate": 8.466880812758064e-06,
"loss": 0.1156,
"step": 286,
"ts_encoder_learning_rate": 8.455313244934324e-06
},
{
"epoch": 0.756569733915479,
"grad_norm": 1.9404694724173959,
"learning_rate": 8.455313244934324e-06,
"loss": 0.0977,
"step": 287,
"ts_encoder_learning_rate": 8.443710168420866e-06
},
{
"epoch": 0.7592058653925364,
"grad_norm": 2.3462365486027097,
"learning_rate": 8.443710168420866e-06,
"loss": 0.105,
"step": 288,
"ts_encoder_learning_rate": 8.432071702457253e-06
},
{
"epoch": 0.7618419968695939,
"grad_norm": 2.228457755561138,
"learning_rate": 8.432071702457253e-06,
"loss": 0.1221,
"step": 289,
"ts_encoder_learning_rate": 8.420397966646732e-06
},
{
"epoch": 0.7644781283466513,
"grad_norm": 1.5579438707012758,
"learning_rate": 8.420397966646732e-06,
"loss": 0.0959,
"step": 290,
"ts_encoder_learning_rate": 8.408689080954997e-06
},
{
"epoch": 0.7671142598237087,
"grad_norm": 2.5815854965292258,
"learning_rate": 8.408689080954997e-06,
"loss": 0.1037,
"step": 291,
"ts_encoder_learning_rate": 8.396945165708971e-06
},
{
"epoch": 0.7697503913007662,
"grad_norm": 1.9767105091219221,
"learning_rate": 8.396945165708971e-06,
"loss": 0.1083,
"step": 292,
"ts_encoder_learning_rate": 8.38516634159555e-06
},
{
"epoch": 0.7723865227778235,
"grad_norm": 2.164960614575108,
"learning_rate": 8.38516634159555e-06,
"loss": 0.1092,
"step": 293,
"ts_encoder_learning_rate": 8.373352729660373e-06
},
{
"epoch": 0.7750226542548809,
"grad_norm": 2.426995380324648,
"learning_rate": 8.373352729660373e-06,
"loss": 0.0968,
"step": 294,
"ts_encoder_learning_rate": 8.361504451306585e-06
},
{
"epoch": 0.7776587857319384,
"grad_norm": 2.2037799475117765,
"learning_rate": 8.361504451306585e-06,
"loss": 0.1216,
"step": 295,
"ts_encoder_learning_rate": 8.349621628293578e-06
},
{
"epoch": 0.7802949172089958,
"grad_norm": 1.5101855867509804,
"learning_rate": 8.349621628293578e-06,
"loss": 0.0865,
"step": 296,
"ts_encoder_learning_rate": 8.337704382735741e-06
},
{
"epoch": 0.7829310486860532,
"grad_norm": 1.8304482678186873,
"learning_rate": 8.337704382735741e-06,
"loss": 0.0907,
"step": 297,
"ts_encoder_learning_rate": 8.325752837101213e-06
},
{
"epoch": 0.7855671801631107,
"grad_norm": 1.3186981403774303,
"learning_rate": 8.325752837101213e-06,
"loss": 0.0953,
"step": 298,
"ts_encoder_learning_rate": 8.313767114210615e-06
},
{
"epoch": 0.7882033116401681,
"grad_norm": 1.6252129812087586,
"learning_rate": 8.313767114210615e-06,
"loss": 0.0963,
"step": 299,
"ts_encoder_learning_rate": 8.301747337235798e-06
},
{
"epoch": 0.7908394431172254,
"grad_norm": 2.5667790327516604,
"learning_rate": 8.301747337235798e-06,
"loss": 0.1127,
"step": 300,
"ts_encoder_learning_rate": 8.289693629698564e-06
},
{
"epoch": 0.7934755745942829,
"grad_norm": 1.3839390646179972,
"learning_rate": 8.289693629698564e-06,
"loss": 0.0959,
"step": 301,
"ts_encoder_learning_rate": 8.27760611546941e-06
},
{
"epoch": 0.7961117060713403,
"grad_norm": 1.619204129359444,
"learning_rate": 8.27760611546941e-06,
"loss": 0.0941,
"step": 302,
"ts_encoder_learning_rate": 8.265484918766243e-06
},
{
"epoch": 0.7987478375483977,
"grad_norm": 13.33516060264948,
"learning_rate": 8.265484918766243e-06,
"loss": 0.1133,
"step": 303,
"ts_encoder_learning_rate": 8.253330164153118e-06
},
{
"epoch": 0.8013839690254552,
"grad_norm": 2.1482523103471736,
"learning_rate": 8.253330164153118e-06,
"loss": 0.0951,
"step": 304,
"ts_encoder_learning_rate": 8.241141976538944e-06
},
{
"epoch": 0.8040201005025126,
"grad_norm": 1.765409769976939,
"learning_rate": 8.241141976538944e-06,
"loss": 0.0999,
"step": 305,
"ts_encoder_learning_rate": 8.228920481176202e-06
},
{
"epoch": 0.8066562319795699,
"grad_norm": 1.6926228572462632,
"learning_rate": 8.228920481176202e-06,
"loss": 0.0971,
"step": 306,
"ts_encoder_learning_rate": 8.216665803659671e-06
},
{
"epoch": 0.8092923634566274,
"grad_norm": 2.2379314054489203,
"learning_rate": 8.216665803659671e-06,
"loss": 0.0892,
"step": 307,
"ts_encoder_learning_rate": 8.204378069925121e-06
},
{
"epoch": 0.8119284949336848,
"grad_norm": 2.3735432898917552,
"learning_rate": 8.204378069925121e-06,
"loss": 0.1129,
"step": 308,
"ts_encoder_learning_rate": 8.192057406248028e-06
},
{
"epoch": 0.8145646264107422,
"grad_norm": 1.9923807546881467,
"learning_rate": 8.192057406248028e-06,
"loss": 0.1031,
"step": 309,
"ts_encoder_learning_rate": 8.179703939242276e-06
},
{
"epoch": 0.8172007578877997,
"grad_norm": 1.469953484901779,
"learning_rate": 8.179703939242276e-06,
"loss": 0.0928,
"step": 310,
"ts_encoder_learning_rate": 8.16731779585885e-06
},
{
"epoch": 0.8198368893648571,
"grad_norm": 1.6845119035497684,
"learning_rate": 8.16731779585885e-06,
"loss": 0.1036,
"step": 311,
"ts_encoder_learning_rate": 8.154899103384536e-06
},
{
"epoch": 0.8224730208419145,
"grad_norm": 2.997454352710499,
"learning_rate": 8.154899103384536e-06,
"loss": 0.0915,
"step": 312,
"ts_encoder_learning_rate": 8.142447989440618e-06
},
{
"epoch": 0.8251091523189719,
"grad_norm": 1.7182074416814566,
"learning_rate": 8.142447989440618e-06,
"loss": 0.1003,
"step": 313,
"ts_encoder_learning_rate": 8.129964581981554e-06
},
{
"epoch": 0.8277452837960293,
"grad_norm": 1.9353741860227904,
"learning_rate": 8.129964581981554e-06,
"loss": 0.0898,
"step": 314,
"ts_encoder_learning_rate": 8.117449009293668e-06
},
{
"epoch": 0.8303814152730867,
"grad_norm": 1.5652582875371663,
"learning_rate": 8.117449009293668e-06,
"loss": 0.0935,
"step": 315,
"ts_encoder_learning_rate": 8.104901399993837e-06
},
{
"epoch": 0.8330175467501442,
"grad_norm": 1.1621735202700743,
"learning_rate": 8.104901399993837e-06,
"loss": 0.0996,
"step": 316,
"ts_encoder_learning_rate": 8.092321883028157e-06
},
{
"epoch": 0.8356536782272016,
"grad_norm": 1.6764936566754018,
"learning_rate": 8.092321883028157e-06,
"loss": 0.1089,
"step": 317,
"ts_encoder_learning_rate": 8.079710587670633e-06
},
{
"epoch": 0.838289809704259,
"grad_norm": 2.0164848493032808,
"learning_rate": 8.079710587670633e-06,
"loss": 0.0996,
"step": 318,
"ts_encoder_learning_rate": 8.067067643521834e-06
},
{
"epoch": 0.8409259411813165,
"grad_norm": 1.6759859565355775,
"learning_rate": 8.067067643521834e-06,
"loss": 0.1023,
"step": 319,
"ts_encoder_learning_rate": 8.054393180507572e-06
},
{
"epoch": 0.8435620726583738,
"grad_norm": 1.416243499558276,
"learning_rate": 8.054393180507572e-06,
"loss": 0.093,
"step": 320,
"ts_encoder_learning_rate": 8.041687328877566e-06
},
{
"epoch": 0.8461982041354312,
"grad_norm": 2.18872702162099,
"learning_rate": 8.041687328877566e-06,
"loss": 0.0981,
"step": 321,
"ts_encoder_learning_rate": 8.0289502192041e-06
},
{
"epoch": 0.8488343356124887,
"grad_norm": 1.3033768655546698,
"learning_rate": 8.0289502192041e-06,
"loss": 0.0839,
"step": 322,
"ts_encoder_learning_rate": 8.016181982380682e-06
},
{
"epoch": 0.8514704670895461,
"grad_norm": 2.0867250492468266,
"learning_rate": 8.016181982380682e-06,
"loss": 0.1059,
"step": 323,
"ts_encoder_learning_rate": 8.003382749620704e-06
},
{
"epoch": 0.8541065985666035,
"grad_norm": 1.488852214231389,
"learning_rate": 8.003382749620704e-06,
"loss": 0.0792,
"step": 324,
"ts_encoder_learning_rate": 7.99055265245608e-06
},
{
"epoch": 0.856742730043661,
"grad_norm": 1.6343632839095115,
"learning_rate": 7.99055265245608e-06,
"loss": 0.0969,
"step": 325,
"ts_encoder_learning_rate": 7.977691822735914e-06
},
{
"epoch": 0.8593788615207183,
"grad_norm": 1.7752008703715696,
"learning_rate": 7.977691822735914e-06,
"loss": 0.1092,
"step": 326,
"ts_encoder_learning_rate": 7.96480039262513e-06
},
{
"epoch": 0.8620149929977757,
"grad_norm": 1.5781012521986801,
"learning_rate": 7.96480039262513e-06,
"loss": 0.0912,
"step": 327,
"ts_encoder_learning_rate": 7.951878494603116e-06
},
{
"epoch": 0.8646511244748332,
"grad_norm": 1.7441562007191758,
"learning_rate": 7.951878494603116e-06,
"loss": 0.0888,
"step": 328,
"ts_encoder_learning_rate": 7.938926261462366e-06
},
{
"epoch": 0.8672872559518906,
"grad_norm": 1.8345267313412936,
"learning_rate": 7.938926261462366e-06,
"loss": 0.0835,
"step": 329,
"ts_encoder_learning_rate": 7.925943826307119e-06
},
{
"epoch": 0.869923387428948,
"grad_norm": 1.9237226057153634,
"learning_rate": 7.925943826307119e-06,
"loss": 0.1003,
"step": 330,
"ts_encoder_learning_rate": 7.912931322551981e-06
},
{
"epoch": 0.8725595189060055,
"grad_norm": 2.0786928587458235,
"learning_rate": 7.912931322551981e-06,
"loss": 0.0977,
"step": 331,
"ts_encoder_learning_rate": 7.89988888392056e-06
},
{
"epoch": 0.8751956503830629,
"grad_norm": 1.137765624411406,
"learning_rate": 7.89988888392056e-06,
"loss": 0.0882,
"step": 332,
"ts_encoder_learning_rate": 7.886816644444099e-06
},
{
"epoch": 0.8778317818601202,
"grad_norm": 1.5400536066702828,
"learning_rate": 7.886816644444099e-06,
"loss": 0.0899,
"step": 333,
"ts_encoder_learning_rate": 7.873714738460075e-06
},
{
"epoch": 0.8804679133371777,
"grad_norm": 1.9149603930639032,
"learning_rate": 7.873714738460075e-06,
"loss": 0.086,
"step": 334,
"ts_encoder_learning_rate": 7.860583300610849e-06
},
{
"epoch": 0.8831040448142351,
"grad_norm": 1.845652808068975,
"learning_rate": 7.860583300610849e-06,
"loss": 0.1191,
"step": 335,
"ts_encoder_learning_rate": 7.84742246584226e-06
},
{
"epoch": 0.8857401762912925,
"grad_norm": 1.539167450730259,
"learning_rate": 7.84742246584226e-06,
"loss": 0.1018,
"step": 336,
"ts_encoder_learning_rate": 7.83423236940225e-06
},
{
"epoch": 0.88837630776835,
"grad_norm": 2.7054499134952303,
"learning_rate": 7.83423236940225e-06,
"loss": 0.0787,
"step": 337,
"ts_encoder_learning_rate": 7.821013146839467e-06
},
{
"epoch": 0.8910124392454074,
"grad_norm": 2.3960105755739356,
"learning_rate": 7.821013146839467e-06,
"loss": 0.0998,
"step": 338,
"ts_encoder_learning_rate": 7.807764934001875e-06
},
{
"epoch": 0.8936485707224647,
"grad_norm": 1.9114513463572482,
"learning_rate": 7.807764934001875e-06,
"loss": 0.0955,
"step": 339,
"ts_encoder_learning_rate": 7.794487867035358e-06
},
{
"epoch": 0.8962847021995222,
"grad_norm": 1.1940763189233343,
"learning_rate": 7.794487867035358e-06,
"loss": 0.0978,
"step": 340,
"ts_encoder_learning_rate": 7.781182082382325e-06
},
{
"epoch": 0.8989208336765796,
"grad_norm": 1.6611445100384334,
"learning_rate": 7.781182082382325e-06,
"loss": 0.0945,
"step": 341,
"ts_encoder_learning_rate": 7.767847716780297e-06
},
{
"epoch": 0.901556965153637,
"grad_norm": 1.925999851813011,
"learning_rate": 7.767847716780297e-06,
"loss": 0.1061,
"step": 342,
"ts_encoder_learning_rate": 7.754484907260513e-06
},
{
"epoch": 0.9041930966306945,
"grad_norm": 1.3700140058079462,
"learning_rate": 7.754484907260513e-06,
"loss": 0.0816,
"step": 343,
"ts_encoder_learning_rate": 7.741093791146517e-06
},
{
"epoch": 0.9068292281077519,
"grad_norm": 2.1966063055592366,
"learning_rate": 7.741093791146517e-06,
"loss": 0.1056,
"step": 344,
"ts_encoder_learning_rate": 7.727674506052744e-06
},
{
"epoch": 0.9094653595848093,
"grad_norm": 1.7516803825411136,
"learning_rate": 7.727674506052744e-06,
"loss": 0.0933,
"step": 345,
"ts_encoder_learning_rate": 7.714227189883112e-06
},
{
"epoch": 0.9121014910618667,
"grad_norm": 2.2157367065223297,
"learning_rate": 7.714227189883112e-06,
"loss": 0.0918,
"step": 346,
"ts_encoder_learning_rate": 7.700751980829601e-06
},
{
"epoch": 0.9147376225389241,
"grad_norm": 2.20733086616214,
"learning_rate": 7.700751980829601e-06,
"loss": 0.0975,
"step": 347,
"ts_encoder_learning_rate": 7.687249017370832e-06
},
{
"epoch": 0.9173737540159815,
"grad_norm": 2.084116228493792,
"learning_rate": 7.687249017370832e-06,
"loss": 0.0992,
"step": 348,
"ts_encoder_learning_rate": 7.673718438270649e-06
},
{
"epoch": 0.920009885493039,
"grad_norm": 2.084868196282067,
"learning_rate": 7.673718438270649e-06,
"loss": 0.0891,
"step": 349,
"ts_encoder_learning_rate": 7.660160382576683e-06
},
{
"epoch": 0.9226460169700964,
"grad_norm": 1.8080308541934171,
"learning_rate": 7.660160382576683e-06,
"loss": 0.0891,
"step": 350,
"ts_encoder_learning_rate": 7.646574989618938e-06
},
{
"epoch": 0.9252821484471538,
"grad_norm": 2.2542071099863397,
"learning_rate": 7.646574989618938e-06,
"loss": 0.0866,
"step": 351,
"ts_encoder_learning_rate": 7.632962399008342e-06
},
{
"epoch": 0.9279182799242112,
"grad_norm": 2.4095949616333283,
"learning_rate": 7.632962399008342e-06,
"loss": 0.0945,
"step": 352,
"ts_encoder_learning_rate": 7.619322750635327e-06
},
{
"epoch": 0.9305544114012686,
"grad_norm": 2.0875916761878934,
"learning_rate": 7.619322750635327e-06,
"loss": 0.0865,
"step": 353,
"ts_encoder_learning_rate": 7.605656184668385e-06
},
{
"epoch": 0.933190542878326,
"grad_norm": 1.7064657643571555,
"learning_rate": 7.605656184668385e-06,
"loss": 0.0933,
"step": 354,
"ts_encoder_learning_rate": 7.591962841552627e-06
},
{
"epoch": 0.9358266743553835,
"grad_norm": 2.500359725738562,
"learning_rate": 7.591962841552627e-06,
"loss": 0.0872,
"step": 355,
"ts_encoder_learning_rate": 7.578242862008336e-06
},
{
"epoch": 0.9384628058324409,
"grad_norm": 1.384078600663941,
"learning_rate": 7.578242862008336e-06,
"loss": 0.0702,
"step": 356,
"ts_encoder_learning_rate": 7.564496387029532e-06
},
{
"epoch": 0.9410989373094983,
"grad_norm": 1.5245598469180928,
"learning_rate": 7.564496387029532e-06,
"loss": 0.0801,
"step": 357,
"ts_encoder_learning_rate": 7.550723557882514e-06
},
{
"epoch": 0.9437350687865558,
"grad_norm": 2.045161639456311,
"learning_rate": 7.550723557882514e-06,
"loss": 0.0937,
"step": 358,
"ts_encoder_learning_rate": 7.536924516104411e-06
},
{
"epoch": 0.9463712002636131,
"grad_norm": 2.5839255461657027,
"learning_rate": 7.536924516104411e-06,
"loss": 0.0942,
"step": 359,
"ts_encoder_learning_rate": 7.52309940350173e-06
},
{
"epoch": 0.9490073317406705,
"grad_norm": 3.2274496289419212,
"learning_rate": 7.52309940350173e-06,
"loss": 0.1023,
"step": 360,
"ts_encoder_learning_rate": 7.509248362148889e-06
},
{
"epoch": 0.951643463217728,
"grad_norm": 1.4645186215038737,
"learning_rate": 7.509248362148889e-06,
"loss": 0.0889,
"step": 361,
"ts_encoder_learning_rate": 7.49537153438677e-06
},
{
"epoch": 0.9542795946947854,
"grad_norm": 3.4885631800713903,
"learning_rate": 7.49537153438677e-06,
"loss": 0.1,
"step": 362,
"ts_encoder_learning_rate": 7.481469062821252e-06
},
{
"epoch": 0.9569157261718428,
"grad_norm": 1.6689194932402402,
"learning_rate": 7.481469062821252e-06,
"loss": 0.0909,
"step": 363,
"ts_encoder_learning_rate": 7.467541090321735e-06
},
{
"epoch": 0.9595518576489003,
"grad_norm": 1.3923817479195213,
"learning_rate": 7.467541090321735e-06,
"loss": 0.086,
"step": 364,
"ts_encoder_learning_rate": 7.453587760019691e-06
},
{
"epoch": 0.9621879891259577,
"grad_norm": 2.1695652586948606,
"learning_rate": 7.453587760019691e-06,
"loss": 0.0755,
"step": 365,
"ts_encoder_learning_rate": 7.439609215307173e-06
},
{
"epoch": 0.964824120603015,
"grad_norm": 1.9185102579274915,
"learning_rate": 7.439609215307173e-06,
"loss": 0.0717,
"step": 366,
"ts_encoder_learning_rate": 7.42560559983536e-06
},
{
"epoch": 0.9674602520800725,
"grad_norm": 2.425906908342805,
"learning_rate": 7.42560559983536e-06,
"loss": 0.0958,
"step": 367,
"ts_encoder_learning_rate": 7.411577057513066e-06
},
{
"epoch": 0.9700963835571299,
"grad_norm": 1.4397236920539425,
"learning_rate": 7.411577057513066e-06,
"loss": 0.101,
"step": 368,
"ts_encoder_learning_rate": 7.39752373250527e-06
},
{
"epoch": 0.9727325150341873,
"grad_norm": 1.0990134832990999,
"learning_rate": 7.39752373250527e-06,
"loss": 0.0754,
"step": 369,
"ts_encoder_learning_rate": 7.383445769231628e-06
},
{
"epoch": 0.9753686465112448,
"grad_norm": 1.3029567612526982,
"learning_rate": 7.383445769231628e-06,
"loss": 0.0789,
"step": 370,
"ts_encoder_learning_rate": 7.369343312364994e-06
},
{
"epoch": 0.9780047779883022,
"grad_norm": 2.3110295949001545,
"learning_rate": 7.369343312364994e-06,
"loss": 0.0798,
"step": 371,
"ts_encoder_learning_rate": 7.355216506829933e-06
},
{
"epoch": 0.9806409094653595,
"grad_norm": 1.7411270859762809,
"learning_rate": 7.355216506829933e-06,
"loss": 0.089,
"step": 372,
"ts_encoder_learning_rate": 7.34106549780123e-06
},
{
"epoch": 0.983277040942417,
"grad_norm": 1.8926501676131235,
"learning_rate": 7.34106549780123e-06,
"loss": 0.0985,
"step": 373,
"ts_encoder_learning_rate": 7.326890430702396e-06
},
{
"epoch": 0.9859131724194744,
"grad_norm": 1.7001478537324086,
"learning_rate": 7.326890430702396e-06,
"loss": 0.0798,
"step": 374,
"ts_encoder_learning_rate": 7.312691451204178e-06
},
{
"epoch": 0.9885493038965318,
"grad_norm": 1.9333040871086815,
"learning_rate": 7.312691451204178e-06,
"loss": 0.0794,
"step": 375,
"ts_encoder_learning_rate": 7.2984687052230585e-06
},
{
"epoch": 0.9911854353735893,
"grad_norm": 1.6635256130487461,
"learning_rate": 7.2984687052230585e-06,
"loss": 0.077,
"step": 376,
"ts_encoder_learning_rate": 7.284222338919758e-06
},
{
"epoch": 0.9938215668506467,
"grad_norm": 1.7377302475474232,
"learning_rate": 7.284222338919758e-06,
"loss": 0.0828,
"step": 377,
"ts_encoder_learning_rate": 7.269952498697734e-06
},
{
"epoch": 0.9964576983277041,
"grad_norm": 1.8805139125533905,
"learning_rate": 7.269952498697734e-06,
"loss": 0.0766,
"step": 378,
"ts_encoder_learning_rate": 7.255659331201673e-06
},
{
"epoch": 0.9990938298047615,
"grad_norm": 1.3707632091787474,
"learning_rate": 7.255659331201673e-06,
"loss": 0.0824,
"step": 379,
"ts_encoder_learning_rate": 7.241342983315985e-06
},
{
"epoch": 1.0,
"grad_norm": 1.3707632091787474,
"learning_rate": 7.241342983315985e-06,
"loss": 0.0264,
"step": 380,
"ts_encoder_learning_rate": 7.227003602163296e-06
},
{
"epoch": 1.0026361314770573,
"grad_norm": 3.1286777602818665,
"learning_rate": 7.227003602163296e-06,
"loss": 0.0931,
"step": 381,
"ts_encoder_learning_rate": 7.212641335102932e-06
},
{
"epoch": 1.0052722629541149,
"grad_norm": 2.8391713475277,
"learning_rate": 7.212641335102932e-06,
"loss": 0.0722,
"step": 382,
"ts_encoder_learning_rate": 7.198256329729412e-06
},
{
"epoch": 1.0079083944311722,
"grad_norm": 1.256304357468329,
"learning_rate": 7.198256329729412e-06,
"loss": 0.0769,
"step": 383,
"ts_encoder_learning_rate": 7.183848733870917e-06
},
{
"epoch": 1.0105445259082297,
"grad_norm": 3.5250439890455767,
"learning_rate": 7.183848733870917e-06,
"loss": 0.0893,
"step": 384,
"ts_encoder_learning_rate": 7.169418695587791e-06
},
{
"epoch": 1.013180657385287,
"grad_norm": 2.626283878871451,
"learning_rate": 7.169418695587791e-06,
"loss": 0.0729,
"step": 385,
"ts_encoder_learning_rate": 7.154966363171003e-06
},
{
"epoch": 1.0158167888623446,
"grad_norm": 1.760910812924816,
"learning_rate": 7.154966363171003e-06,
"loss": 0.0879,
"step": 386,
"ts_encoder_learning_rate": 7.140491885140629e-06
},
{
"epoch": 1.018452920339402,
"grad_norm": 1.955129103690491,
"learning_rate": 7.140491885140629e-06,
"loss": 0.068,
"step": 387,
"ts_encoder_learning_rate": 7.125995410244324e-06
},
{
"epoch": 1.0210890518164593,
"grad_norm": 1.4906223976736166,
"learning_rate": 7.125995410244324e-06,
"loss": 0.0671,
"step": 388,
"ts_encoder_learning_rate": 7.1114770874558e-06
},
{
"epoch": 1.0237251832935168,
"grad_norm": 1.4849510993800188,
"learning_rate": 7.1114770874558e-06,
"loss": 0.0751,
"step": 389,
"ts_encoder_learning_rate": 7.096937065973285e-06
},
{
"epoch": 1.0263613147705741,
"grad_norm": 1.497911410516406,
"learning_rate": 7.096937065973285e-06,
"loss": 0.0673,
"step": 390,
"ts_encoder_learning_rate": 7.082375495217996e-06
},
{
"epoch": 1.0289974462476317,
"grad_norm": 1.4093983694447139,
"learning_rate": 7.082375495217996e-06,
"loss": 0.0722,
"step": 391,
"ts_encoder_learning_rate": 7.067792524832604e-06
},
{
"epoch": 1.031633577724689,
"grad_norm": 1.3429042920164558,
"learning_rate": 7.067792524832604e-06,
"loss": 0.0716,
"step": 392,
"ts_encoder_learning_rate": 7.053188304679691e-06
},
{
"epoch": 1.0342697092017463,
"grad_norm": 1.606708984123339,
"learning_rate": 7.053188304679691e-06,
"loss": 0.0745,
"step": 393,
"ts_encoder_learning_rate": 7.038562984840216e-06
},
{
"epoch": 1.0369058406788039,
"grad_norm": 1.9904571024970765,
"learning_rate": 7.038562984840216e-06,
"loss": 0.0677,
"step": 394,
"ts_encoder_learning_rate": 7.023916715611969e-06
},
{
"epoch": 1.0395419721558612,
"grad_norm": 1.4505783107760564,
"learning_rate": 7.023916715611969e-06,
"loss": 0.0647,
"step": 395,
"ts_encoder_learning_rate": 7.009249647508028e-06
},
{
"epoch": 1.0421781036329187,
"grad_norm": 1.5229728357909127,
"learning_rate": 7.009249647508028e-06,
"loss": 0.0652,
"step": 396,
"ts_encoder_learning_rate": 6.994561931255209e-06
},
{
"epoch": 1.044814235109976,
"grad_norm": 2.6769507938266277,
"learning_rate": 6.994561931255209e-06,
"loss": 0.0678,
"step": 397,
"ts_encoder_learning_rate": 6.9798537177925226e-06
},
{
"epoch": 1.0474503665870336,
"grad_norm": 2.0169207725312703,
"learning_rate": 6.9798537177925226e-06,
"loss": 0.074,
"step": 398,
"ts_encoder_learning_rate": 6.965125158269619e-06
},
{
"epoch": 1.050086498064091,
"grad_norm": 1.9292523257890648,
"learning_rate": 6.965125158269619e-06,
"loss": 0.076,
"step": 399,
"ts_encoder_learning_rate": 6.950376404045235e-06
},
{
"epoch": 1.0527226295411483,
"grad_norm": 1.841218569643746,
"learning_rate": 6.950376404045235e-06,
"loss": 0.0815,
"step": 400,
"ts_encoder_learning_rate": 6.935607606685642e-06
},
{
"epoch": 1.0553587610182058,
"grad_norm": 1.3685510522725666,
"learning_rate": 6.935607606685642e-06,
"loss": 0.0649,
"step": 401,
"ts_encoder_learning_rate": 6.9208189179630805e-06
},
{
"epoch": 1.0579948924952631,
"grad_norm": 1.2255871348116065,
"learning_rate": 6.9208189179630805e-06,
"loss": 0.0751,
"step": 402,
"ts_encoder_learning_rate": 6.906010489854209e-06
},
{
"epoch": 1.0606310239723207,
"grad_norm": 2.25284044313197,
"learning_rate": 6.906010489854209e-06,
"loss": 0.0722,
"step": 403,
"ts_encoder_learning_rate": 6.891182474538539e-06
},
{
"epoch": 1.063267155449378,
"grad_norm": 1.574007278408115,
"learning_rate": 6.891182474538539e-06,
"loss": 0.0874,
"step": 404,
"ts_encoder_learning_rate": 6.876335024396872e-06
},
{
"epoch": 1.0659032869264355,
"grad_norm": 1.4368386388721925,
"learning_rate": 6.876335024396872e-06,
"loss": 0.064,
"step": 405,
"ts_encoder_learning_rate": 6.8614682920097265e-06
},
{
"epoch": 1.0685394184034929,
"grad_norm": 1.698997766946676,
"learning_rate": 6.8614682920097265e-06,
"loss": 0.0861,
"step": 406,
"ts_encoder_learning_rate": 6.846582430155783e-06
},
{
"epoch": 1.0711755498805502,
"grad_norm": 1.621405485095503,
"learning_rate": 6.846582430155783e-06,
"loss": 0.0616,
"step": 407,
"ts_encoder_learning_rate": 6.831677591810302e-06
},
{
"epoch": 1.0738116813576077,
"grad_norm": 1.7546607458906713,
"learning_rate": 6.831677591810302e-06,
"loss": 0.0634,
"step": 408,
"ts_encoder_learning_rate": 6.816753930143558e-06
},
{
"epoch": 1.076447812834665,
"grad_norm": 1.530962724670228,
"learning_rate": 6.816753930143558e-06,
"loss": 0.0605,
"step": 409,
"ts_encoder_learning_rate": 6.801811598519268e-06
},
{
"epoch": 1.0790839443117226,
"grad_norm": 1.4049855220372447,
"learning_rate": 6.801811598519268e-06,
"loss": 0.0651,
"step": 410,
"ts_encoder_learning_rate": 6.786850750493006e-06
},
{
"epoch": 1.08172007578878,
"grad_norm": 1.2835177716818393,
"learning_rate": 6.786850750493006e-06,
"loss": 0.0588,
"step": 411,
"ts_encoder_learning_rate": 6.771871539810633e-06
},
{
"epoch": 1.0843562072658375,
"grad_norm": 2.177159514710094,
"learning_rate": 6.771871539810633e-06,
"loss": 0.0714,
"step": 412,
"ts_encoder_learning_rate": 6.7568741204067145e-06
},
{
"epoch": 1.0869923387428948,
"grad_norm": 2.0968533031020744,
"learning_rate": 6.7568741204067145e-06,
"loss": 0.075,
"step": 413,
"ts_encoder_learning_rate": 6.741858646402941e-06
},
{
"epoch": 1.0896284702199521,
"grad_norm": 2.0444573124344996,
"learning_rate": 6.741858646402941e-06,
"loss": 0.0761,
"step": 414,
"ts_encoder_learning_rate": 6.726825272106539e-06
},
{
"epoch": 1.0922646016970097,
"grad_norm": 1.8927366457463946,
"learning_rate": 6.726825272106539e-06,
"loss": 0.0843,
"step": 415,
"ts_encoder_learning_rate": 6.71177415200869e-06
},
{
"epoch": 1.094900733174067,
"grad_norm": 1.5743826612232619,
"learning_rate": 6.71177415200869e-06,
"loss": 0.0669,
"step": 416,
"ts_encoder_learning_rate": 6.696705440782939e-06
},
{
"epoch": 1.0975368646511245,
"grad_norm": 1.9849832173789321,
"learning_rate": 6.696705440782939e-06,
"loss": 0.0606,
"step": 417,
"ts_encoder_learning_rate": 6.68161929328361e-06
},
{
"epoch": 1.1001729961281819,
"grad_norm": 1.2161772899273893,
"learning_rate": 6.68161929328361e-06,
"loss": 0.0598,
"step": 418,
"ts_encoder_learning_rate": 6.66651586454421e-06
},
{
"epoch": 1.1028091276052394,
"grad_norm": 1.2393164520057445,
"learning_rate": 6.66651586454421e-06,
"loss": 0.0692,
"step": 419,
"ts_encoder_learning_rate": 6.651395309775837e-06
},
{
"epoch": 1.1054452590822967,
"grad_norm": 1.4520060898019855,
"learning_rate": 6.651395309775837e-06,
"loss": 0.0692,
"step": 420,
"ts_encoder_learning_rate": 6.636257784365585e-06
},
{
"epoch": 1.108081390559354,
"grad_norm": 2.071887988162363,
"learning_rate": 6.636257784365585e-06,
"loss": 0.0817,
"step": 421,
"ts_encoder_learning_rate": 6.62110344387495e-06
},
{
"epoch": 1.1107175220364116,
"grad_norm": 1.4450804845065022,
"learning_rate": 6.62110344387495e-06,
"loss": 0.0615,
"step": 422,
"ts_encoder_learning_rate": 6.605932444038229e-06
},
{
"epoch": 1.113353653513469,
"grad_norm": 4.131996313423228,
"learning_rate": 6.605932444038229e-06,
"loss": 0.0567,
"step": 423,
"ts_encoder_learning_rate": 6.5907449407609145e-06
},
{
"epoch": 1.1159897849905265,
"grad_norm": 1.3469924411982919,
"learning_rate": 6.5907449407609145e-06,
"loss": 0.0688,
"step": 424,
"ts_encoder_learning_rate": 6.575541090118105e-06
},
{
"epoch": 1.1186259164675838,
"grad_norm": 1.676545292506096,
"learning_rate": 6.575541090118105e-06,
"loss": 0.0707,
"step": 425,
"ts_encoder_learning_rate": 6.5603210483528864e-06
},
{
"epoch": 1.1212620479446413,
"grad_norm": 2.1293469373373086,
"learning_rate": 6.5603210483528864e-06,
"loss": 0.0783,
"step": 426,
"ts_encoder_learning_rate": 6.545084971874738e-06
},
{
"epoch": 1.1238981794216987,
"grad_norm": 2.1545361587555893,
"learning_rate": 6.545084971874738e-06,
"loss": 0.0662,
"step": 427,
"ts_encoder_learning_rate": 6.529833017257919e-06
},
{
"epoch": 1.126534310898756,
"grad_norm": 1.5431217078868176,
"learning_rate": 6.529833017257919e-06,
"loss": 0.0665,
"step": 428,
"ts_encoder_learning_rate": 6.514565341239861e-06
},
{
"epoch": 1.1291704423758135,
"grad_norm": 1.6871162084026083,
"learning_rate": 6.514565341239861e-06,
"loss": 0.0609,
"step": 429,
"ts_encoder_learning_rate": 6.499282100719558e-06
},
{
"epoch": 1.1318065738528709,
"grad_norm": 1.3008708089844792,
"learning_rate": 6.499282100719558e-06,
"loss": 0.0669,
"step": 430,
"ts_encoder_learning_rate": 6.483983452755953e-06
},
{
"epoch": 1.1344427053299284,
"grad_norm": 1.31908563414336,
"learning_rate": 6.483983452755953e-06,
"loss": 0.0681,
"step": 431,
"ts_encoder_learning_rate": 6.468669554566324e-06
},
{
"epoch": 1.1370788368069857,
"grad_norm": 2.2211285641938403,
"learning_rate": 6.468669554566324e-06,
"loss": 0.0732,
"step": 432,
"ts_encoder_learning_rate": 6.4533405635246696e-06
},
{
"epoch": 1.1397149682840433,
"grad_norm": 2.0753940060908413,
"learning_rate": 6.4533405635246696e-06,
"loss": 0.0663,
"step": 433,
"ts_encoder_learning_rate": 6.437996637160086e-06
},
{
"epoch": 1.1423510997611006,
"grad_norm": 1.4786983123231945,
"learning_rate": 6.437996637160086e-06,
"loss": 0.0546,
"step": 434,
"ts_encoder_learning_rate": 6.4226379331551625e-06
},
{
"epoch": 1.144987231238158,
"grad_norm": 1.5876247856249708,
"learning_rate": 6.4226379331551625e-06,
"loss": 0.0735,
"step": 435,
"ts_encoder_learning_rate": 6.407264609344344e-06
},
{
"epoch": 1.1476233627152155,
"grad_norm": 1.670327323230943,
"learning_rate": 6.407264609344344e-06,
"loss": 0.0604,
"step": 436,
"ts_encoder_learning_rate": 6.3918768237123175e-06
},
{
"epoch": 1.1502594941922728,
"grad_norm": 1.7007074105089541,
"learning_rate": 6.3918768237123175e-06,
"loss": 0.0506,
"step": 437,
"ts_encoder_learning_rate": 6.376474734392388e-06
},
{
"epoch": 1.1528956256693303,
"grad_norm": 0.8723701532963661,
"learning_rate": 6.376474734392388e-06,
"loss": 0.051,
"step": 438,
"ts_encoder_learning_rate": 6.361058499664856e-06
},
{
"epoch": 1.1555317571463877,
"grad_norm": 1.2073690713191445,
"learning_rate": 6.361058499664856e-06,
"loss": 0.0658,
"step": 439,
"ts_encoder_learning_rate": 6.345628277955384e-06
},
{
"epoch": 1.1581678886234452,
"grad_norm": 4.282100892926092,
"learning_rate": 6.345628277955384e-06,
"loss": 0.0638,
"step": 440,
"ts_encoder_learning_rate": 6.330184227833376e-06
},
{
"epoch": 1.1608040201005025,
"grad_norm": 1.0987411320570284,
"learning_rate": 6.330184227833376e-06,
"loss": 0.058,
"step": 441,
"ts_encoder_learning_rate": 6.3147265080103405e-06
},
{
"epoch": 1.1634401515775599,
"grad_norm": 0.9366573260467199,
"learning_rate": 6.3147265080103405e-06,
"loss": 0.0473,
"step": 442,
"ts_encoder_learning_rate": 6.299255277338265e-06
},
{
"epoch": 1.1660762830546174,
"grad_norm": 6.839529381634898,
"learning_rate": 6.299255277338265e-06,
"loss": 0.1152,
"step": 443,
"ts_encoder_learning_rate": 6.283770694807983e-06
},
{
"epoch": 1.1687124145316747,
"grad_norm": 6.3594435572609305,
"learning_rate": 6.283770694807983e-06,
"loss": 0.093,
"step": 444,
"ts_encoder_learning_rate": 6.268272919547537e-06
},
{
"epoch": 1.171348546008732,
"grad_norm": 40.08684333591222,
"learning_rate": 6.268272919547537e-06,
"loss": 0.0962,
"step": 445,
"ts_encoder_learning_rate": 6.252762110820548e-06
},
{
"epoch": 1.1739846774857896,
"grad_norm": 2.943023237687571,
"learning_rate": 6.252762110820548e-06,
"loss": 0.0901,
"step": 446,
"ts_encoder_learning_rate": 6.237238428024573e-06
},
{
"epoch": 1.176620808962847,
"grad_norm": 2.3992218806525445,
"learning_rate": 6.237238428024573e-06,
"loss": 0.0862,
"step": 447,
"ts_encoder_learning_rate": 6.2217020306894705e-06
},
{
"epoch": 1.1792569404399045,
"grad_norm": 5.0353855751257415,
"learning_rate": 6.2217020306894705e-06,
"loss": 0.0668,
"step": 448,
"ts_encoder_learning_rate": 6.2061530784757625e-06
},
{
"epoch": 1.1818930719169618,
"grad_norm": 1.8328704649956604,
"learning_rate": 6.2061530784757625e-06,
"loss": 0.0738,
"step": 449,
"ts_encoder_learning_rate": 6.1905917311729915e-06
},
{
"epoch": 1.1845292033940193,
"grad_norm": 1.5717279092672578,
"learning_rate": 6.1905917311729915e-06,
"loss": 0.0603,
"step": 450,
"ts_encoder_learning_rate": 6.175018148698077e-06
},
{
"epoch": 1.1871653348710767,
"grad_norm": 2.392989669488609,
"learning_rate": 6.175018148698077e-06,
"loss": 0.0524,
"step": 451,
"ts_encoder_learning_rate": 6.1594324910936734e-06
},
{
"epoch": 1.189801466348134,
"grad_norm": 1.5764592052091633,
"learning_rate": 6.1594324910936734e-06,
"loss": 0.0621,
"step": 452,
"ts_encoder_learning_rate": 6.143834918526528e-06
},
{
"epoch": 1.1924375978251915,
"grad_norm": 1.8238472425314327,
"learning_rate": 6.143834918526528e-06,
"loss": 0.0622,
"step": 453,
"ts_encoder_learning_rate": 6.1282255912858315e-06
},
{
"epoch": 1.1950737293022489,
"grad_norm": 2.8133947110243485,
"learning_rate": 6.1282255912858315e-06,
"loss": 0.0862,
"step": 454,
"ts_encoder_learning_rate": 6.112604669781572e-06
},
{
"epoch": 1.1977098607793064,
"grad_norm": 1.3652304574964094,
"learning_rate": 6.112604669781572e-06,
"loss": 0.0527,
"step": 455,
"ts_encoder_learning_rate": 6.096972314542889e-06
},
{
"epoch": 1.2003459922563637,
"grad_norm": 1.0715660143813739,
"learning_rate": 6.096972314542889e-06,
"loss": 0.0709,
"step": 456,
"ts_encoder_learning_rate": 6.0813286862164175e-06
},
{
"epoch": 1.2029821237334213,
"grad_norm": 1.5558524275638925,
"learning_rate": 6.0813286862164175e-06,
"loss": 0.0423,
"step": 457,
"ts_encoder_learning_rate": 6.065673945564643e-06
},
{
"epoch": 1.2056182552104786,
"grad_norm": 1.6176907363538375,
"learning_rate": 6.065673945564643e-06,
"loss": 0.0622,
"step": 458,
"ts_encoder_learning_rate": 6.050008253464247e-06
},
{
"epoch": 1.208254386687536,
"grad_norm": 0.8564869594899379,
"learning_rate": 6.050008253464247e-06,
"loss": 0.0455,
"step": 459,
"ts_encoder_learning_rate": 6.034331770904455e-06
},
{
"epoch": 1.2108905181645935,
"grad_norm": 1.1035352728404064,
"learning_rate": 6.034331770904455e-06,
"loss": 0.0563,
"step": 460,
"ts_encoder_learning_rate": 6.018644658985378e-06
},
{
"epoch": 1.2135266496416508,
"grad_norm": 1.5616128959288538,
"learning_rate": 6.018644658985378e-06,
"loss": 0.0539,
"step": 461,
"ts_encoder_learning_rate": 6.002947078916365e-06
},
{
"epoch": 1.2161627811187083,
"grad_norm": 1.8418037108772232,
"learning_rate": 6.002947078916365e-06,
"loss": 0.0659,
"step": 462,
"ts_encoder_learning_rate": 5.987239192014336e-06
},
{
"epoch": 1.2187989125957657,
"grad_norm": 1.6148182637272608,
"learning_rate": 5.987239192014336e-06,
"loss": 0.0486,
"step": 463,
"ts_encoder_learning_rate": 5.971521159702136e-06
},
{
"epoch": 1.2214350440728232,
"grad_norm": 1.1194159742020753,
"learning_rate": 5.971521159702136e-06,
"loss": 0.0517,
"step": 464,
"ts_encoder_learning_rate": 5.955793143506863e-06
},
{
"epoch": 1.2240711755498805,
"grad_norm": 2.5603301388490065,
"learning_rate": 5.955793143506863e-06,
"loss": 0.0792,
"step": 465,
"ts_encoder_learning_rate": 5.940055305058219e-06
},
{
"epoch": 1.2267073070269379,
"grad_norm": 2.805962948191509,
"learning_rate": 5.940055305058219e-06,
"loss": 0.0495,
"step": 466,
"ts_encoder_learning_rate": 5.9243078060868445e-06
},
{
"epoch": 1.2293434385039954,
"grad_norm": 1.1698319176717191,
"learning_rate": 5.9243078060868445e-06,
"loss": 0.0469,
"step": 467,
"ts_encoder_learning_rate": 5.908550808422656e-06
},
{
"epoch": 1.2319795699810527,
"grad_norm": 2.0037504801434176,
"learning_rate": 5.908550808422656e-06,
"loss": 0.0861,
"step": 468,
"ts_encoder_learning_rate": 5.892784473993184e-06
},
{
"epoch": 1.2346157014581103,
"grad_norm": 1.723869314498301,
"learning_rate": 5.892784473993184e-06,
"loss": 0.0593,
"step": 469,
"ts_encoder_learning_rate": 5.877008964821909e-06
},
{
"epoch": 1.2372518329351676,
"grad_norm": 1.6055066461843874,
"learning_rate": 5.877008964821909e-06,
"loss": 0.0626,
"step": 470,
"ts_encoder_learning_rate": 5.861224443026595e-06
},
{
"epoch": 1.2398879644122252,
"grad_norm": 1.5661164699031636,
"learning_rate": 5.861224443026595e-06,
"loss": 0.0602,
"step": 471,
"ts_encoder_learning_rate": 5.845431070817627e-06
},
{
"epoch": 1.2425240958892825,
"grad_norm": 1.0282805477784254,
"learning_rate": 5.845431070817627e-06,
"loss": 0.0524,
"step": 472,
"ts_encoder_learning_rate": 5.82962901049634e-06
},
{
"epoch": 1.2451602273663398,
"grad_norm": 1.7894572815574583,
"learning_rate": 5.82962901049634e-06,
"loss": 0.0571,
"step": 473,
"ts_encoder_learning_rate": 5.8138184244533516e-06
},
{
"epoch": 1.2477963588433973,
"grad_norm": 1.06562374064391,
"learning_rate": 5.8138184244533516e-06,
"loss": 0.057,
"step": 474,
"ts_encoder_learning_rate": 5.797999475166897e-06
},
{
"epoch": 1.2504324903204547,
"grad_norm": 2.604409235331405,
"learning_rate": 5.797999475166897e-06,
"loss": 0.0704,
"step": 475,
"ts_encoder_learning_rate": 5.782172325201155e-06
},
{
"epoch": 1.2530686217975122,
"grad_norm": 1.0601741954985433,
"learning_rate": 5.782172325201155e-06,
"loss": 0.0519,
"step": 476,
"ts_encoder_learning_rate": 5.766337137204579e-06
},
{
"epoch": 1.2557047532745695,
"grad_norm": 1.4951262075969727,
"learning_rate": 5.766337137204579e-06,
"loss": 0.0649,
"step": 477,
"ts_encoder_learning_rate": 5.7504940739082305e-06
},
{
"epoch": 1.258340884751627,
"grad_norm": 2.0377946241283964,
"learning_rate": 5.7504940739082305e-06,
"loss": 0.0709,
"step": 478,
"ts_encoder_learning_rate": 5.734643298124091e-06
},
{
"epoch": 1.2609770162286844,
"grad_norm": 1.3627445943335548,
"learning_rate": 5.734643298124091e-06,
"loss": 0.0597,
"step": 479,
"ts_encoder_learning_rate": 5.71878497274341e-06
},
{
"epoch": 1.2636131477057417,
"grad_norm": 1.514945622789671,
"learning_rate": 5.71878497274341e-06,
"loss": 0.0621,
"step": 480,
"ts_encoder_learning_rate": 5.702919260735015e-06
},
{
"epoch": 1.2662492791827993,
"grad_norm": 1.6326473802607846,
"learning_rate": 5.702919260735015e-06,
"loss": 0.0546,
"step": 481,
"ts_encoder_learning_rate": 5.6870463251436485e-06
},
{
"epoch": 1.2688854106598566,
"grad_norm": 1.1610484009982354,
"learning_rate": 5.6870463251436485e-06,
"loss": 0.0721,
"step": 482,
"ts_encoder_learning_rate": 5.671166329088278e-06
},
{
"epoch": 1.2715215421369142,
"grad_norm": 9.497222320275062,
"learning_rate": 5.671166329088278e-06,
"loss": 0.0622,
"step": 483,
"ts_encoder_learning_rate": 5.655279435760436e-06
},
{
"epoch": 1.2741576736139715,
"grad_norm": 2.5943554438763345,
"learning_rate": 5.655279435760436e-06,
"loss": 0.0611,
"step": 484,
"ts_encoder_learning_rate": 5.6393858084225305e-06
},
{
"epoch": 1.276793805091029,
"grad_norm": 1.7043776646620947,
"learning_rate": 5.6393858084225305e-06,
"loss": 0.0557,
"step": 485,
"ts_encoder_learning_rate": 5.623485610406174e-06
},
{
"epoch": 1.2794299365680863,
"grad_norm": 3.8980670638726176,
"learning_rate": 5.623485610406174e-06,
"loss": 0.0645,
"step": 486,
"ts_encoder_learning_rate": 5.6075790051105025e-06
},
{
"epoch": 1.2820660680451437,
"grad_norm": 3.6504547162269523,
"learning_rate": 5.6075790051105025e-06,
"loss": 0.0708,
"step": 487,
"ts_encoder_learning_rate": 5.5916661560004945e-06
},
{
"epoch": 1.2847021995222012,
"grad_norm": 5.021699015115789,
"learning_rate": 5.5916661560004945e-06,
"loss": 0.0595,
"step": 488,
"ts_encoder_learning_rate": 5.575747226605298e-06
},
{
"epoch": 1.2873383309992585,
"grad_norm": 1.8447348562525032,
"learning_rate": 5.575747226605298e-06,
"loss": 0.0692,
"step": 489,
"ts_encoder_learning_rate": 5.559822380516539e-06
},
{
"epoch": 1.289974462476316,
"grad_norm": 1.8812523455034513,
"learning_rate": 5.559822380516539e-06,
"loss": 0.0539,
"step": 490,
"ts_encoder_learning_rate": 5.543891781386655e-06
},
{
"epoch": 1.2926105939533734,
"grad_norm": 1.506328585661642,
"learning_rate": 5.543891781386655e-06,
"loss": 0.0398,
"step": 491,
"ts_encoder_learning_rate": 5.527955592927198e-06
},
{
"epoch": 1.295246725430431,
"grad_norm": 1.8603503730820898,
"learning_rate": 5.527955592927198e-06,
"loss": 0.0585,
"step": 492,
"ts_encoder_learning_rate": 5.512013978907157e-06
},
{
"epoch": 1.2978828569074883,
"grad_norm": 2.4648081055680846,
"learning_rate": 5.512013978907157e-06,
"loss": 0.058,
"step": 493,
"ts_encoder_learning_rate": 5.496067103151288e-06
},
{
"epoch": 1.3005189883845456,
"grad_norm": 1.4041012620060673,
"learning_rate": 5.496067103151288e-06,
"loss": 0.0443,
"step": 494,
"ts_encoder_learning_rate": 5.480115129538409e-06
},
{
"epoch": 1.3031551198616032,
"grad_norm": 1.3893300223548792,
"learning_rate": 5.480115129538409e-06,
"loss": 0.0433,
"step": 495,
"ts_encoder_learning_rate": 5.464158221999731e-06
},
{
"epoch": 1.3057912513386605,
"grad_norm": 1.2015614710865317,
"learning_rate": 5.464158221999731e-06,
"loss": 0.0483,
"step": 496,
"ts_encoder_learning_rate": 5.448196544517168e-06
},
{
"epoch": 1.3084273828157178,
"grad_norm": 1.3699932224551299,
"learning_rate": 5.448196544517168e-06,
"loss": 0.0476,
"step": 497,
"ts_encoder_learning_rate": 5.4322302611216515e-06
},
{
"epoch": 1.3110635142927753,
"grad_norm": 1.3403881800344724,
"learning_rate": 5.4322302611216515e-06,
"loss": 0.0564,
"step": 498,
"ts_encoder_learning_rate": 5.4162595358914475e-06
},
{
"epoch": 1.313699645769833,
"grad_norm": 73.58179677044741,
"learning_rate": 5.4162595358914475e-06,
"loss": 0.0646,
"step": 499,
"ts_encoder_learning_rate": 5.4002845329504675e-06
},
{
"epoch": 1.3163357772468902,
"grad_norm": 0.7574363377553233,
"learning_rate": 5.4002845329504675e-06,
"loss": 0.0572,
"step": 500,
"ts_encoder_learning_rate": 5.384305416466584e-06
},
{
"epoch": 1.3189719087239475,
"grad_norm": 9.085144033066042,
"learning_rate": 5.384305416466584e-06,
"loss": 0.0455,
"step": 501,
"ts_encoder_learning_rate": 5.368322350649942e-06
},
{
"epoch": 1.321608040201005,
"grad_norm": 1.5134736470746724,
"learning_rate": 5.368322350649942e-06,
"loss": 0.0643,
"step": 502,
"ts_encoder_learning_rate": 5.35233549975127e-06
},
{
"epoch": 1.3242441716780624,
"grad_norm": 7.310424680388469,
"learning_rate": 5.35233549975127e-06,
"loss": 0.061,
"step": 503,
"ts_encoder_learning_rate": 5.336345028060199e-06
},
{
"epoch": 1.3268803031551197,
"grad_norm": 1.1323514407618245,
"learning_rate": 5.336345028060199e-06,
"loss": 0.0549,
"step": 504,
"ts_encoder_learning_rate": 5.320351099903565e-06
},
{
"epoch": 1.3295164346321773,
"grad_norm": 1.6279841201077867,
"learning_rate": 5.320351099903565e-06,
"loss": 0.0613,
"step": 505,
"ts_encoder_learning_rate": 5.304353879643727e-06
},
{
"epoch": 1.3321525661092348,
"grad_norm": 1.3523619947920968,
"learning_rate": 5.304353879643727e-06,
"loss": 0.0472,
"step": 506,
"ts_encoder_learning_rate": 5.288353531676873e-06
},
{
"epoch": 1.3347886975862921,
"grad_norm": 1.6311287951575122,
"learning_rate": 5.288353531676873e-06,
"loss": 0.0449,
"step": 507,
"ts_encoder_learning_rate": 5.2723502204313346e-06
},
{
"epoch": 1.3374248290633495,
"grad_norm": 1.254954921395093,
"learning_rate": 5.2723502204313346e-06,
"loss": 0.0468,
"step": 508,
"ts_encoder_learning_rate": 5.256344110365896e-06
},
{
"epoch": 1.340060960540407,
"grad_norm": 1.4319071308303999,
"learning_rate": 5.256344110365896e-06,
"loss": 0.047,
"step": 509,
"ts_encoder_learning_rate": 5.240335365968104e-06
},
{
"epoch": 1.3426970920174643,
"grad_norm": 1.8236696801665577,
"learning_rate": 5.240335365968104e-06,
"loss": 0.0514,
"step": 510,
"ts_encoder_learning_rate": 5.224324151752575e-06
},
{
"epoch": 1.3453332234945217,
"grad_norm": 1.9778928788592056,
"learning_rate": 5.224324151752575e-06,
"loss": 0.052,
"step": 511,
"ts_encoder_learning_rate": 5.208310632259308e-06
},
{
"epoch": 1.3479693549715792,
"grad_norm": 1.417312718751431,
"learning_rate": 5.208310632259308e-06,
"loss": 0.0477,
"step": 512,
"ts_encoder_learning_rate": 5.192294972051992e-06
},
{
"epoch": 1.3506054864486368,
"grad_norm": 2.0176988121139523,
"learning_rate": 5.192294972051992e-06,
"loss": 0.0439,
"step": 513,
"ts_encoder_learning_rate": 5.1762773357163175e-06
},
{
"epoch": 1.353241617925694,
"grad_norm": 1.8355976352392431,
"learning_rate": 5.1762773357163175e-06,
"loss": 0.0657,
"step": 514,
"ts_encoder_learning_rate": 5.160257887858278e-06
},
{
"epoch": 1.3558777494027514,
"grad_norm": 1.6689128377623292,
"learning_rate": 5.160257887858278e-06,
"loss": 0.0557,
"step": 515,
"ts_encoder_learning_rate": 5.144236793102485e-06
},
{
"epoch": 1.358513880879809,
"grad_norm": 1.8005326862964846,
"learning_rate": 5.144236793102485e-06,
"loss": 0.0538,
"step": 516,
"ts_encoder_learning_rate": 5.128214216090478e-06
},
{
"epoch": 1.3611500123568663,
"grad_norm": 1.633570103313834,
"learning_rate": 5.128214216090478e-06,
"loss": 0.0448,
"step": 517,
"ts_encoder_learning_rate": 5.112190321479026e-06
},
{
"epoch": 1.3637861438339236,
"grad_norm": 1.207340276387567,
"learning_rate": 5.112190321479026e-06,
"loss": 0.0418,
"step": 518,
"ts_encoder_learning_rate": 5.0961652739384356e-06
},
{
"epoch": 1.3664222753109811,
"grad_norm": 0.8605880311313963,
"learning_rate": 5.0961652739384356e-06,
"loss": 0.051,
"step": 519,
"ts_encoder_learning_rate": 5.080139238150869e-06
},
{
"epoch": 1.3690584067880385,
"grad_norm": 1.0552622999396024,
"learning_rate": 5.080139238150869e-06,
"loss": 0.0545,
"step": 520,
"ts_encoder_learning_rate": 5.064112378808636e-06
},
{
"epoch": 1.371694538265096,
"grad_norm": 1.7431125427812204,
"learning_rate": 5.064112378808636e-06,
"loss": 0.0544,
"step": 521,
"ts_encoder_learning_rate": 5.048084860612516e-06
},
{
"epoch": 1.3743306697421533,
"grad_norm": 1.4698730704414043,
"learning_rate": 5.048084860612516e-06,
"loss": 0.0639,
"step": 522,
"ts_encoder_learning_rate": 5.032056848270056e-06
},
{
"epoch": 1.376966801219211,
"grad_norm": 1.8450236243661535,
"learning_rate": 5.032056848270056e-06,
"loss": 0.0467,
"step": 523,
"ts_encoder_learning_rate": 5.016028506493881e-06
},
{
"epoch": 1.3796029326962682,
"grad_norm": 2.052864394366136,
"learning_rate": 5.016028506493881e-06,
"loss": 0.0491,
"step": 524,
"ts_encoder_learning_rate": 5e-06
},
{
"epoch": 1.3822390641733255,
"grad_norm": 1.5026027017438155,
"learning_rate": 5e-06,
"loss": 0.0464,
"step": 525,
"ts_encoder_learning_rate": 4.9839714935061215e-06
},
{
"epoch": 1.384875195650383,
"grad_norm": 1.153030381312369,
"learning_rate": 4.9839714935061215e-06,
"loss": 0.0419,
"step": 526,
"ts_encoder_learning_rate": 4.967943151729945e-06
},
{
"epoch": 1.3875113271274404,
"grad_norm": 1.4490249141706013,
"learning_rate": 4.967943151729945e-06,
"loss": 0.0603,
"step": 527,
"ts_encoder_learning_rate": 4.951915139387484e-06
},
{
"epoch": 1.390147458604498,
"grad_norm": 1.4564680397959866,
"learning_rate": 4.951915139387484e-06,
"loss": 0.0477,
"step": 528,
"ts_encoder_learning_rate": 4.935887621191364e-06
},
{
"epoch": 1.3927835900815553,
"grad_norm": 1.3964892258738686,
"learning_rate": 4.935887621191364e-06,
"loss": 0.0528,
"step": 529,
"ts_encoder_learning_rate": 4.919860761849132e-06
},
{
"epoch": 1.3954197215586128,
"grad_norm": 6.281977322231947,
"learning_rate": 4.919860761849132e-06,
"loss": 0.0548,
"step": 530,
"ts_encoder_learning_rate": 4.903834726061565e-06
},
{
"epoch": 1.3980558530356701,
"grad_norm": 1.3837480562779865,
"learning_rate": 4.903834726061565e-06,
"loss": 0.0455,
"step": 531,
"ts_encoder_learning_rate": 4.887809678520976e-06
},
{
"epoch": 1.4006919845127275,
"grad_norm": 1.0021501716084598,
"learning_rate": 4.887809678520976e-06,
"loss": 0.0494,
"step": 532,
"ts_encoder_learning_rate": 4.871785783909523e-06
},
{
"epoch": 1.403328115989785,
"grad_norm": 1.151715988041567,
"learning_rate": 4.871785783909523e-06,
"loss": 0.0433,
"step": 533,
"ts_encoder_learning_rate": 4.855763206897516e-06
},
{
"epoch": 1.4059642474668423,
"grad_norm": 0.858749288851531,
"learning_rate": 4.855763206897516e-06,
"loss": 0.0362,
"step": 534,
"ts_encoder_learning_rate": 4.839742112141725e-06
},
{
"epoch": 1.4086003789439,
"grad_norm": 1.9221750234266837,
"learning_rate": 4.839742112141725e-06,
"loss": 0.0552,
"step": 535,
"ts_encoder_learning_rate": 4.823722664283684e-06
},
{
"epoch": 1.4112365104209572,
"grad_norm": 2.0598775899946347,
"learning_rate": 4.823722664283684e-06,
"loss": 0.0612,
"step": 536,
"ts_encoder_learning_rate": 4.807705027948008e-06
},
{
"epoch": 1.4138726418980148,
"grad_norm": 11.902681310300874,
"learning_rate": 4.807705027948008e-06,
"loss": 0.0572,
"step": 537,
"ts_encoder_learning_rate": 4.7916893677406925e-06
},
{
"epoch": 1.416508773375072,
"grad_norm": 1.4839974850899151,
"learning_rate": 4.7916893677406925e-06,
"loss": 0.0671,
"step": 538,
"ts_encoder_learning_rate": 4.775675848247427e-06
},
{
"epoch": 1.4191449048521294,
"grad_norm": 3.177751373971005,
"learning_rate": 4.775675848247427e-06,
"loss": 0.0592,
"step": 539,
"ts_encoder_learning_rate": 4.759664634031897e-06
},
{
"epoch": 1.421781036329187,
"grad_norm": 2.8690250587273494,
"learning_rate": 4.759664634031897e-06,
"loss": 0.0562,
"step": 540,
"ts_encoder_learning_rate": 4.743655889634105e-06
},
{
"epoch": 1.4244171678062443,
"grad_norm": 2.257175666571135,
"learning_rate": 4.743655889634105e-06,
"loss": 0.0629,
"step": 541,
"ts_encoder_learning_rate": 4.727649779568666e-06
},
{
"epoch": 1.4270532992833018,
"grad_norm": 1.276938190549169,
"learning_rate": 4.727649779568666e-06,
"loss": 0.0492,
"step": 542,
"ts_encoder_learning_rate": 4.711646468323129e-06
},
{
"epoch": 1.4296894307603591,
"grad_norm": 1.2256191207359708,
"learning_rate": 4.711646468323129e-06,
"loss": 0.0411,
"step": 543,
"ts_encoder_learning_rate": 4.695646120356275e-06
},
{
"epoch": 1.4323255622374167,
"grad_norm": 1.607726895519541,
"learning_rate": 4.695646120356275e-06,
"loss": 0.0394,
"step": 544,
"ts_encoder_learning_rate": 4.679648900096436e-06
},
{
"epoch": 1.434961693714474,
"grad_norm": 5.4916831132826704,
"learning_rate": 4.679648900096436e-06,
"loss": 0.0593,
"step": 545,
"ts_encoder_learning_rate": 4.663654971939802e-06
},
{
"epoch": 1.4375978251915313,
"grad_norm": 1.1834633367325678,
"learning_rate": 4.663654971939802e-06,
"loss": 0.0461,
"step": 546,
"ts_encoder_learning_rate": 4.64766450024873e-06
},
{
"epoch": 1.4402339566685889,
"grad_norm": 1.0570788406545149,
"learning_rate": 4.64766450024873e-06,
"loss": 0.0521,
"step": 547,
"ts_encoder_learning_rate": 4.6316776493500615e-06
},
{
"epoch": 1.4428700881456462,
"grad_norm": 1.519805292714776,
"learning_rate": 4.6316776493500615e-06,
"loss": 0.056,
"step": 548,
"ts_encoder_learning_rate": 4.615694583533418e-06
},
{
"epoch": 1.4455062196227038,
"grad_norm": 1.8886920313289481,
"learning_rate": 4.615694583533418e-06,
"loss": 0.059,
"step": 549,
"ts_encoder_learning_rate": 4.599715467049534e-06
},
{
"epoch": 1.448142351099761,
"grad_norm": 1.2634550829904962,
"learning_rate": 4.599715467049534e-06,
"loss": 0.0433,
"step": 550,
"ts_encoder_learning_rate": 4.583740464108554e-06
},
{
"epoch": 1.4507784825768186,
"grad_norm": 1.8049707104769945,
"learning_rate": 4.583740464108554e-06,
"loss": 0.0627,
"step": 551,
"ts_encoder_learning_rate": 4.56776973887835e-06
},
{
"epoch": 1.453414614053876,
"grad_norm": 1.9485167870885407,
"learning_rate": 4.56776973887835e-06,
"loss": 0.0469,
"step": 552,
"ts_encoder_learning_rate": 4.551803455482833e-06
},
{
"epoch": 1.4560507455309333,
"grad_norm": 1.267286074272727,
"learning_rate": 4.551803455482833e-06,
"loss": 0.0619,
"step": 553,
"ts_encoder_learning_rate": 4.53584177800027e-06
},
{
"epoch": 1.4586868770079908,
"grad_norm": 1.3504185426583042,
"learning_rate": 4.53584177800027e-06,
"loss": 0.0533,
"step": 554,
"ts_encoder_learning_rate": 4.5198848704615915e-06
},
{
"epoch": 1.4613230084850481,
"grad_norm": 1.0200226065433153,
"learning_rate": 4.5198848704615915e-06,
"loss": 0.0516,
"step": 555,
"ts_encoder_learning_rate": 4.503932896848713e-06
},
{
"epoch": 1.4639591399621055,
"grad_norm": 1.459926252613376,
"learning_rate": 4.503932896848713e-06,
"loss": 0.0544,
"step": 556,
"ts_encoder_learning_rate": 4.487986021092844e-06
},
{
"epoch": 1.466595271439163,
"grad_norm": 1.044163195089025,
"learning_rate": 4.487986021092844e-06,
"loss": 0.0464,
"step": 557,
"ts_encoder_learning_rate": 4.472044407072805e-06
},
{
"epoch": 1.4692314029162206,
"grad_norm": 1.667493213165678,
"learning_rate": 4.472044407072805e-06,
"loss": 0.0675,
"step": 558,
"ts_encoder_learning_rate": 4.456108218613346e-06
},
{
"epoch": 1.4718675343932779,
"grad_norm": 2.446693967731953,
"learning_rate": 4.456108218613346e-06,
"loss": 0.0499,
"step": 559,
"ts_encoder_learning_rate": 4.4401776194834615e-06
},
{
"epoch": 1.4745036658703352,
"grad_norm": 1.1699696309583159,
"learning_rate": 4.4401776194834615e-06,
"loss": 0.0542,
"step": 560,
"ts_encoder_learning_rate": 4.424252773394704e-06
},
{
"epoch": 1.4771397973473928,
"grad_norm": 1.1962425478415217,
"learning_rate": 4.424252773394704e-06,
"loss": 0.0462,
"step": 561,
"ts_encoder_learning_rate": 4.408333843999506e-06
},
{
"epoch": 1.47977592882445,
"grad_norm": 2.3242298710819758,
"learning_rate": 4.408333843999506e-06,
"loss": 0.0491,
"step": 562,
"ts_encoder_learning_rate": 4.392420994889498e-06
},
{
"epoch": 1.4824120603015074,
"grad_norm": 2.1023818406064643,
"learning_rate": 4.392420994889498e-06,
"loss": 0.0496,
"step": 563,
"ts_encoder_learning_rate": 4.376514389593827e-06
},
{
"epoch": 1.485048191778565,
"grad_norm": 2.4457184111852723,
"learning_rate": 4.376514389593827e-06,
"loss": 0.0524,
"step": 564,
"ts_encoder_learning_rate": 4.3606141915774695e-06
},
{
"epoch": 1.4876843232556225,
"grad_norm": 1.3302734845105864,
"learning_rate": 4.3606141915774695e-06,
"loss": 0.0504,
"step": 565,
"ts_encoder_learning_rate": 4.344720564239567e-06
},
{
"epoch": 1.4903204547326798,
"grad_norm": 1.1082526743380932,
"learning_rate": 4.344720564239567e-06,
"loss": 0.0481,
"step": 566,
"ts_encoder_learning_rate": 4.3288336709117246e-06
},
{
"epoch": 1.4929565862097371,
"grad_norm": 2.427309688292863,
"learning_rate": 4.3288336709117246e-06,
"loss": 0.0511,
"step": 567,
"ts_encoder_learning_rate": 4.312953674856355e-06
},
{
"epoch": 1.4955927176867947,
"grad_norm": 1.7521878257146914,
"learning_rate": 4.312953674856355e-06,
"loss": 0.0508,
"step": 568,
"ts_encoder_learning_rate": 4.297080739264987e-06
},
{
"epoch": 1.498228849163852,
"grad_norm": 1.8035517353788895,
"learning_rate": 4.297080739264987e-06,
"loss": 0.0468,
"step": 569,
"ts_encoder_learning_rate": 4.281215027256592e-06
},
{
"epoch": 1.5008649806409093,
"grad_norm": 1.3680011804510284,
"learning_rate": 4.281215027256592e-06,
"loss": 0.0615,
"step": 570,
"ts_encoder_learning_rate": 4.265356701875911e-06
},
{
"epoch": 1.5035011121179669,
"grad_norm": 1.0277278297390235,
"learning_rate": 4.265356701875911e-06,
"loss": 0.0392,
"step": 571,
"ts_encoder_learning_rate": 4.249505926091771e-06
},
{
"epoch": 1.5061372435950244,
"grad_norm": 1.5675045465594473,
"learning_rate": 4.249505926091771e-06,
"loss": 0.0511,
"step": 572,
"ts_encoder_learning_rate": 4.23366286279542e-06
},
{
"epoch": 1.5087733750720818,
"grad_norm": 2.516079496477877,
"learning_rate": 4.23366286279542e-06,
"loss": 0.0529,
"step": 573,
"ts_encoder_learning_rate": 4.217827674798845e-06
},
{
"epoch": 1.511409506549139,
"grad_norm": 2.7086489547850854,
"learning_rate": 4.217827674798845e-06,
"loss": 0.0443,
"step": 574,
"ts_encoder_learning_rate": 4.2020005248331056e-06
},
{
"epoch": 1.5140456380261966,
"grad_norm": 1.5770057283903636,
"learning_rate": 4.2020005248331056e-06,
"loss": 0.0534,
"step": 575,
"ts_encoder_learning_rate": 4.186181575546651e-06
},
{
"epoch": 1.516681769503254,
"grad_norm": 0.9542576979843979,
"learning_rate": 4.186181575546651e-06,
"loss": 0.0445,
"step": 576,
"ts_encoder_learning_rate": 4.170370989503662e-06
},
{
"epoch": 1.5193179009803113,
"grad_norm": 1.033126540105532,
"learning_rate": 4.170370989503662e-06,
"loss": 0.0436,
"step": 577,
"ts_encoder_learning_rate": 4.154568929182374e-06
},
{
"epoch": 1.5219540324573688,
"grad_norm": 1.3759475573506135,
"learning_rate": 4.154568929182374e-06,
"loss": 0.0471,
"step": 578,
"ts_encoder_learning_rate": 4.138775556973406e-06
},
{
"epoch": 1.5245901639344264,
"grad_norm": 1.715702326116523,
"learning_rate": 4.138775556973406e-06,
"loss": 0.0553,
"step": 579,
"ts_encoder_learning_rate": 4.122991035178093e-06
},
{
"epoch": 1.5272262954114837,
"grad_norm": 1.2802507956632132,
"learning_rate": 4.122991035178093e-06,
"loss": 0.0646,
"step": 580,
"ts_encoder_learning_rate": 4.107215526006818e-06
},
{
"epoch": 1.529862426888541,
"grad_norm": 1.2923645458952004,
"learning_rate": 4.107215526006818e-06,
"loss": 0.0589,
"step": 581,
"ts_encoder_learning_rate": 4.091449191577346e-06
},
{
"epoch": 1.5324985583655986,
"grad_norm": 1.275873998852951,
"learning_rate": 4.091449191577346e-06,
"loss": 0.051,
"step": 582,
"ts_encoder_learning_rate": 4.075692193913156e-06
},
{
"epoch": 1.5351346898426559,
"grad_norm": 1.5143761646518399,
"learning_rate": 4.075692193913156e-06,
"loss": 0.0422,
"step": 583,
"ts_encoder_learning_rate": 4.059944694941783e-06
},
{
"epoch": 1.5377708213197132,
"grad_norm": 1.2547966531700192,
"learning_rate": 4.059944694941783e-06,
"loss": 0.0645,
"step": 584,
"ts_encoder_learning_rate": 4.04420685649314e-06
},
{
"epoch": 1.5404069527967708,
"grad_norm": 1.7483804817058275,
"learning_rate": 4.04420685649314e-06,
"loss": 0.0446,
"step": 585,
"ts_encoder_learning_rate": 4.028478840297867e-06
},
{
"epoch": 1.5430430842738283,
"grad_norm": 1.863160809736345,
"learning_rate": 4.028478840297867e-06,
"loss": 0.0556,
"step": 586,
"ts_encoder_learning_rate": 4.012760807985665e-06
},
{
"epoch": 1.5456792157508856,
"grad_norm": 1.3419436863472527,
"learning_rate": 4.012760807985665e-06,
"loss": 0.05,
"step": 587,
"ts_encoder_learning_rate": 3.997052921083637e-06
},
{
"epoch": 1.548315347227943,
"grad_norm": 1.1909742103770653,
"learning_rate": 3.997052921083637e-06,
"loss": 0.0445,
"step": 588,
"ts_encoder_learning_rate": 3.9813553410146225e-06
},
{
"epoch": 1.5509514787050005,
"grad_norm": 1.0564200056890343,
"learning_rate": 3.9813553410146225e-06,
"loss": 0.0665,
"step": 589,
"ts_encoder_learning_rate": 3.965668229095546e-06
},
{
"epoch": 1.5535876101820578,
"grad_norm": 6.866040081379579,
"learning_rate": 3.965668229095546e-06,
"loss": 0.054,
"step": 590,
"ts_encoder_learning_rate": 3.949991746535753e-06
},
{
"epoch": 1.5562237416591151,
"grad_norm": 1.584656026556296,
"learning_rate": 3.949991746535753e-06,
"loss": 0.0476,
"step": 591,
"ts_encoder_learning_rate": 3.934326054435358e-06
},
{
"epoch": 1.5588598731361727,
"grad_norm": 1.3961799847840088,
"learning_rate": 3.934326054435358e-06,
"loss": 0.0534,
"step": 592,
"ts_encoder_learning_rate": 3.918671313783583e-06
},
{
"epoch": 1.5614960046132302,
"grad_norm": 0.8651238980949738,
"learning_rate": 3.918671313783583e-06,
"loss": 0.0465,
"step": 593,
"ts_encoder_learning_rate": 3.903027685457112e-06
},
{
"epoch": 1.5641321360902873,
"grad_norm": 1.4641882274691738,
"learning_rate": 3.903027685457112e-06,
"loss": 0.0382,
"step": 594,
"ts_encoder_learning_rate": 3.887395330218429e-06
},
{
"epoch": 1.5667682675673449,
"grad_norm": 1.312732235302505,
"learning_rate": 3.887395330218429e-06,
"loss": 0.0434,
"step": 595,
"ts_encoder_learning_rate": 3.87177440871417e-06
},
{
"epoch": 1.5694043990444024,
"grad_norm": 1.082210100085236,
"learning_rate": 3.87177440871417e-06,
"loss": 0.0455,
"step": 596,
"ts_encoder_learning_rate": 3.856165081473474e-06
},
{
"epoch": 1.5720405305214598,
"grad_norm": 1.1636550431612551,
"learning_rate": 3.856165081473474e-06,
"loss": 0.0521,
"step": 597,
"ts_encoder_learning_rate": 3.840567508906328e-06
},
{
"epoch": 1.574676661998517,
"grad_norm": 0.9722535818497965,
"learning_rate": 3.840567508906328e-06,
"loss": 0.0479,
"step": 598,
"ts_encoder_learning_rate": 3.824981851301924e-06
},
{
"epoch": 1.5773127934755746,
"grad_norm": 1.0986855549101457,
"learning_rate": 3.824981851301924e-06,
"loss": 0.0444,
"step": 599,
"ts_encoder_learning_rate": 3.809408268827009e-06
},
{
"epoch": 1.5799489249526322,
"grad_norm": 3.8382139919055662,
"learning_rate": 3.809408268827009e-06,
"loss": 0.0481,
"step": 600,
"ts_encoder_learning_rate": 3.7938469215242374e-06
},
{
"epoch": 1.5825850564296893,
"grad_norm": 0.8061311507375318,
"learning_rate": 3.7938469215242374e-06,
"loss": 0.0556,
"step": 601,
"ts_encoder_learning_rate": 3.778297969310529e-06
},
{
"epoch": 1.5852211879067468,
"grad_norm": 1.1509891000241945,
"learning_rate": 3.778297969310529e-06,
"loss": 0.0474,
"step": 602,
"ts_encoder_learning_rate": 3.7627615719754294e-06
},
{
"epoch": 1.5878573193838044,
"grad_norm": 0.7746498139881461,
"learning_rate": 3.7627615719754294e-06,
"loss": 0.0495,
"step": 603,
"ts_encoder_learning_rate": 3.7472378891794537e-06
},
{
"epoch": 1.5904934508608617,
"grad_norm": 1.1470163275979566,
"learning_rate": 3.7472378891794537e-06,
"loss": 0.035,
"step": 604,
"ts_encoder_learning_rate": 3.731727080452464e-06
},
{
"epoch": 1.593129582337919,
"grad_norm": 1.0156962254575825,
"learning_rate": 3.731727080452464e-06,
"loss": 0.0511,
"step": 605,
"ts_encoder_learning_rate": 3.7162293051920185e-06
},
{
"epoch": 1.5957657138149766,
"grad_norm": 0.6709332984734167,
"learning_rate": 3.7162293051920185e-06,
"loss": 0.0381,
"step": 606,
"ts_encoder_learning_rate": 3.7007447226617367e-06
},
{
"epoch": 1.5984018452920339,
"grad_norm": 1.0740219504440236,
"learning_rate": 3.7007447226617367e-06,
"loss": 0.0379,
"step": 607,
"ts_encoder_learning_rate": 3.685273491989661e-06
},
{
"epoch": 1.6010379767690912,
"grad_norm": 1.0725681809883107,
"learning_rate": 3.685273491989661e-06,
"loss": 0.0405,
"step": 608,
"ts_encoder_learning_rate": 3.669815772166625e-06
},
{
"epoch": 1.6036741082461488,
"grad_norm": 1.0834624903360521,
"learning_rate": 3.669815772166625e-06,
"loss": 0.0355,
"step": 609,
"ts_encoder_learning_rate": 3.654371722044616e-06
},
{
"epoch": 1.6063102397232063,
"grad_norm": 1.3539103412709967,
"learning_rate": 3.654371722044616e-06,
"loss": 0.0367,
"step": 610,
"ts_encoder_learning_rate": 3.638941500335145e-06
},
{
"epoch": 1.6089463712002636,
"grad_norm": 1.73704351662127,
"learning_rate": 3.638941500335145e-06,
"loss": 0.0357,
"step": 611,
"ts_encoder_learning_rate": 3.6235252656076138e-06
},
{
"epoch": 1.611582502677321,
"grad_norm": 1.0546272097596496,
"learning_rate": 3.6235252656076138e-06,
"loss": 0.0513,
"step": 612,
"ts_encoder_learning_rate": 3.608123176287685e-06
},
{
"epoch": 1.6142186341543785,
"grad_norm": 1.1384580684741372,
"learning_rate": 3.608123176287685e-06,
"loss": 0.0492,
"step": 613,
"ts_encoder_learning_rate": 3.5927353906556583e-06
},
{
"epoch": 1.6168547656314358,
"grad_norm": 1.0408680243087836,
"learning_rate": 3.5927353906556583e-06,
"loss": 0.0493,
"step": 614,
"ts_encoder_learning_rate": 3.5773620668448384e-06
},
{
"epoch": 1.6194908971084931,
"grad_norm": 1.2408867536402586,
"learning_rate": 3.5773620668448384e-06,
"loss": 0.0477,
"step": 615,
"ts_encoder_learning_rate": 3.562003362839914e-06
},
{
"epoch": 1.6221270285855507,
"grad_norm": 0.9012275494297888,
"learning_rate": 3.562003362839914e-06,
"loss": 0.0374,
"step": 616,
"ts_encoder_learning_rate": 3.5466594364753325e-06
},
{
"epoch": 1.6247631600626082,
"grad_norm": 1.542557802018261,
"learning_rate": 3.5466594364753325e-06,
"loss": 0.0563,
"step": 617,
"ts_encoder_learning_rate": 3.531330445433677e-06
},
{
"epoch": 1.6273992915396656,
"grad_norm": 1.3600810197945168,
"learning_rate": 3.531330445433677e-06,
"loss": 0.0508,
"step": 618,
"ts_encoder_learning_rate": 3.516016547244047e-06
},
{
"epoch": 1.6300354230167229,
"grad_norm": 1.1139555587082588,
"learning_rate": 3.516016547244047e-06,
"loss": 0.0383,
"step": 619,
"ts_encoder_learning_rate": 3.500717899280442e-06
},
{
"epoch": 1.6326715544937804,
"grad_norm": 1.8530076436307588,
"learning_rate": 3.500717899280442e-06,
"loss": 0.0444,
"step": 620,
"ts_encoder_learning_rate": 3.48543465876014e-06
},
{
"epoch": 1.6353076859708378,
"grad_norm": 0.9664504630825478,
"learning_rate": 3.48543465876014e-06,
"loss": 0.0564,
"step": 621,
"ts_encoder_learning_rate": 3.4701669827420827e-06
},
{
"epoch": 1.637943817447895,
"grad_norm": 2.6957722854149035,
"learning_rate": 3.4701669827420827e-06,
"loss": 0.0524,
"step": 622,
"ts_encoder_learning_rate": 3.4549150281252635e-06
},
{
"epoch": 1.6405799489249526,
"grad_norm": 0.8782042487398507,
"learning_rate": 3.4549150281252635e-06,
"loss": 0.0345,
"step": 623,
"ts_encoder_learning_rate": 3.4396789516471152e-06
},
{
"epoch": 1.6432160804020102,
"grad_norm": 1.455788121211078,
"learning_rate": 3.4396789516471152e-06,
"loss": 0.0494,
"step": 624,
"ts_encoder_learning_rate": 3.424458909881897e-06
},
{
"epoch": 1.6458522118790675,
"grad_norm": 1.1390293600922257,
"learning_rate": 3.424458909881897e-06,
"loss": 0.0451,
"step": 625,
"ts_encoder_learning_rate": 3.409255059239086e-06
},
{
"epoch": 1.6484883433561248,
"grad_norm": 1.0205461298328486,
"learning_rate": 3.409255059239086e-06,
"loss": 0.0481,
"step": 626,
"ts_encoder_learning_rate": 3.3940675559617724e-06
},
{
"epoch": 1.6511244748331824,
"grad_norm": 1.3290866058317974,
"learning_rate": 3.3940675559617724e-06,
"loss": 0.0379,
"step": 627,
"ts_encoder_learning_rate": 3.37889655612505e-06
},
{
"epoch": 1.6537606063102397,
"grad_norm": 1.588969233408268,
"learning_rate": 3.37889655612505e-06,
"loss": 0.0475,
"step": 628,
"ts_encoder_learning_rate": 3.363742215634416e-06
},
{
"epoch": 1.656396737787297,
"grad_norm": 1.5323219144753453,
"learning_rate": 3.363742215634416e-06,
"loss": 0.0535,
"step": 629,
"ts_encoder_learning_rate": 3.3486046902241663e-06
},
{
"epoch": 1.6590328692643546,
"grad_norm": 1.2117312516941228,
"learning_rate": 3.3486046902241663e-06,
"loss": 0.0456,
"step": 630,
"ts_encoder_learning_rate": 3.3334841354557923e-06
},
{
"epoch": 1.661669000741412,
"grad_norm": 1.1542851651812696,
"learning_rate": 3.3334841354557923e-06,
"loss": 0.0454,
"step": 631,
"ts_encoder_learning_rate": 3.318380706716392e-06
},
{
"epoch": 1.6643051322184694,
"grad_norm": 1.7354976924553471,
"learning_rate": 3.318380706716392e-06,
"loss": 0.0383,
"step": 632,
"ts_encoder_learning_rate": 3.303294559217063e-06
},
{
"epoch": 1.6669412636955268,
"grad_norm": 1.839192030247768,
"learning_rate": 3.303294559217063e-06,
"loss": 0.0452,
"step": 633,
"ts_encoder_learning_rate": 3.288225847991312e-06
},
{
"epoch": 1.6695773951725843,
"grad_norm": 1.1075074885202028,
"learning_rate": 3.288225847991312e-06,
"loss": 0.0381,
"step": 634,
"ts_encoder_learning_rate": 3.273174727893463e-06
},
{
"epoch": 1.6722135266496416,
"grad_norm": 1.0570238025860814,
"learning_rate": 3.273174727893463e-06,
"loss": 0.0494,
"step": 635,
"ts_encoder_learning_rate": 3.2581413535970597e-06
},
{
"epoch": 1.674849658126699,
"grad_norm": 1.4464126650801725,
"learning_rate": 3.2581413535970597e-06,
"loss": 0.0327,
"step": 636,
"ts_encoder_learning_rate": 3.2431258795932863e-06
},
{
"epoch": 1.6774857896037565,
"grad_norm": 4.97131370712164,
"learning_rate": 3.2431258795932863e-06,
"loss": 0.0518,
"step": 637,
"ts_encoder_learning_rate": 3.228128460189368e-06
},
{
"epoch": 1.680121921080814,
"grad_norm": 1.3774085333033086,
"learning_rate": 3.228128460189368e-06,
"loss": 0.0368,
"step": 638,
"ts_encoder_learning_rate": 3.213149249506997e-06
},
{
"epoch": 1.6827580525578714,
"grad_norm": 1.5023786656225406,
"learning_rate": 3.213149249506997e-06,
"loss": 0.0339,
"step": 639,
"ts_encoder_learning_rate": 3.198188401480734e-06
},
{
"epoch": 1.6853941840349287,
"grad_norm": 1.3990085291401217,
"learning_rate": 3.198188401480734e-06,
"loss": 0.0459,
"step": 640,
"ts_encoder_learning_rate": 3.183246069856443e-06
},
{
"epoch": 1.6880303155119862,
"grad_norm": 3.826763148327695,
"learning_rate": 3.183246069856443e-06,
"loss": 0.0384,
"step": 641,
"ts_encoder_learning_rate": 3.1683224081897e-06
},
{
"epoch": 1.6906664469890436,
"grad_norm": 3.421422432395587,
"learning_rate": 3.1683224081897e-06,
"loss": 0.0464,
"step": 642,
"ts_encoder_learning_rate": 3.1534175698442194e-06
},
{
"epoch": 1.6933025784661009,
"grad_norm": 3.7657139495021323,
"learning_rate": 3.1534175698442194e-06,
"loss": 0.0403,
"step": 643,
"ts_encoder_learning_rate": 3.1385317079902743e-06
},
{
"epoch": 1.6959387099431584,
"grad_norm": 37.29958018939492,
"learning_rate": 3.1385317079902743e-06,
"loss": 0.0498,
"step": 644,
"ts_encoder_learning_rate": 3.12366497560313e-06
},
{
"epoch": 1.698574841420216,
"grad_norm": 4.349734482528857,
"learning_rate": 3.12366497560313e-06,
"loss": 0.0505,
"step": 645,
"ts_encoder_learning_rate": 3.1088175254614616e-06
},
{
"epoch": 1.7012109728972733,
"grad_norm": 48.489588646859666,
"learning_rate": 3.1088175254614616e-06,
"loss": 0.0382,
"step": 646,
"ts_encoder_learning_rate": 3.093989510145792e-06
},
{
"epoch": 1.7038471043743306,
"grad_norm": 6.78448657127166,
"learning_rate": 3.093989510145792e-06,
"loss": 0.0508,
"step": 647,
"ts_encoder_learning_rate": 3.079181082036922e-06
},
{
"epoch": 1.7064832358513882,
"grad_norm": 3.23674986315901,
"learning_rate": 3.079181082036922e-06,
"loss": 0.0446,
"step": 648,
"ts_encoder_learning_rate": 3.0643923933143603e-06
},
{
"epoch": 1.7091193673284455,
"grad_norm": 1.178341628440116,
"learning_rate": 3.0643923933143603e-06,
"loss": 0.0516,
"step": 649,
"ts_encoder_learning_rate": 3.049623595954766e-06
},
{
"epoch": 1.7117554988055028,
"grad_norm": 8.830888469474749,
"learning_rate": 3.049623595954766e-06,
"loss": 0.0417,
"step": 650,
"ts_encoder_learning_rate": 3.0348748417303826e-06
},
{
"epoch": 1.7143916302825604,
"grad_norm": 1.8437992927036981,
"learning_rate": 3.0348748417303826e-06,
"loss": 0.035,
"step": 651,
"ts_encoder_learning_rate": 3.020146282207479e-06
},
{
"epoch": 1.717027761759618,
"grad_norm": 47.65658053142364,
"learning_rate": 3.020146282207479e-06,
"loss": 0.0541,
"step": 652,
"ts_encoder_learning_rate": 3.005438068744792e-06
},
{
"epoch": 1.7196638932366752,
"grad_norm": 1.1030676770500603,
"learning_rate": 3.005438068744792e-06,
"loss": 0.0392,
"step": 653,
"ts_encoder_learning_rate": 2.9907503524919734e-06
},
{
"epoch": 1.7223000247137326,
"grad_norm": 16.670627222356522,
"learning_rate": 2.9907503524919734e-06,
"loss": 0.0437,
"step": 654,
"ts_encoder_learning_rate": 2.976083284388031e-06
},
{
"epoch": 1.72493615619079,
"grad_norm": 0.9106485328504088,
"learning_rate": 2.976083284388031e-06,
"loss": 0.049,
"step": 655,
"ts_encoder_learning_rate": 2.9614370151597837e-06
},
{
"epoch": 1.7275722876678474,
"grad_norm": 1.3803980461708971,
"learning_rate": 2.9614370151597837e-06,
"loss": 0.0394,
"step": 656,
"ts_encoder_learning_rate": 2.9468116953203107e-06
},
{
"epoch": 1.7302084191449048,
"grad_norm": 0.9915306370393331,
"learning_rate": 2.9468116953203107e-06,
"loss": 0.0486,
"step": 657,
"ts_encoder_learning_rate": 2.932207475167398e-06
},
{
"epoch": 1.7328445506219623,
"grad_norm": 1.6041614843590046,
"learning_rate": 2.932207475167398e-06,
"loss": 0.0406,
"step": 658,
"ts_encoder_learning_rate": 2.9176245047820064e-06
},
{
"epoch": 1.7354806820990198,
"grad_norm": 1.3499944374287252,
"learning_rate": 2.9176245047820064e-06,
"loss": 0.0403,
"step": 659,
"ts_encoder_learning_rate": 2.9030629340267165e-06
},
{
"epoch": 1.738116813576077,
"grad_norm": 0.8297929219673496,
"learning_rate": 2.9030629340267165e-06,
"loss": 0.0401,
"step": 660,
"ts_encoder_learning_rate": 2.8885229125442022e-06
},
{
"epoch": 1.7407529450531345,
"grad_norm": 1.0327297756381613,
"learning_rate": 2.8885229125442022e-06,
"loss": 0.0347,
"step": 661,
"ts_encoder_learning_rate": 2.8740045897556766e-06
},
{
"epoch": 1.743389076530192,
"grad_norm": 1.294243398194849,
"learning_rate": 2.8740045897556766e-06,
"loss": 0.0382,
"step": 662,
"ts_encoder_learning_rate": 2.859508114859374e-06
},
{
"epoch": 1.7460252080072494,
"grad_norm": 9.88848400216231,
"learning_rate": 2.859508114859374e-06,
"loss": 0.0437,
"step": 663,
"ts_encoder_learning_rate": 2.845033636828998e-06
},
{
"epoch": 1.7486613394843067,
"grad_norm": 1.3051859972411644,
"learning_rate": 2.845033636828998e-06,
"loss": 0.0506,
"step": 664,
"ts_encoder_learning_rate": 2.83058130441221e-06
},
{
"epoch": 1.7512974709613642,
"grad_norm": 1.3312709718052185,
"learning_rate": 2.83058130441221e-06,
"loss": 0.0262,
"step": 665,
"ts_encoder_learning_rate": 2.8161512661290847e-06
},
{
"epoch": 1.7539336024384218,
"grad_norm": 1.1582478757709687,
"learning_rate": 2.8161512661290847e-06,
"loss": 0.0452,
"step": 666,
"ts_encoder_learning_rate": 2.80174367027059e-06
},
{
"epoch": 1.7565697339154789,
"grad_norm": 1.2069057310106728,
"learning_rate": 2.80174367027059e-06,
"loss": 0.0371,
"step": 667,
"ts_encoder_learning_rate": 2.7873586648970686e-06
},
{
"epoch": 1.7592058653925364,
"grad_norm": 1.2044293435585265,
"learning_rate": 2.7873586648970686e-06,
"loss": 0.0443,
"step": 668,
"ts_encoder_learning_rate": 2.772996397836704e-06
},
{
"epoch": 1.761841996869594,
"grad_norm": 1.0589558487998179,
"learning_rate": 2.772996397836704e-06,
"loss": 0.0357,
"step": 669,
"ts_encoder_learning_rate": 2.7586570166840154e-06
},
{
"epoch": 1.7644781283466513,
"grad_norm": 1.1371065267074214,
"learning_rate": 2.7586570166840154e-06,
"loss": 0.0405,
"step": 670,
"ts_encoder_learning_rate": 2.7443406687983267e-06
},
{
"epoch": 1.7671142598237086,
"grad_norm": 1.310944403130682,
"learning_rate": 2.7443406687983267e-06,
"loss": 0.0334,
"step": 671,
"ts_encoder_learning_rate": 2.7300475013022666e-06
},
{
"epoch": 1.7697503913007662,
"grad_norm": 1.611743339204791,
"learning_rate": 2.7300475013022666e-06,
"loss": 0.0486,
"step": 672,
"ts_encoder_learning_rate": 2.7157776610802416e-06
},
{
"epoch": 1.7723865227778235,
"grad_norm": 2.1145133691029625,
"learning_rate": 2.7157776610802416e-06,
"loss": 0.0484,
"step": 673,
"ts_encoder_learning_rate": 2.7015312947769436e-06
},
{
"epoch": 1.7750226542548808,
"grad_norm": 1.8011066906635667,
"learning_rate": 2.7015312947769436e-06,
"loss": 0.0472,
"step": 674,
"ts_encoder_learning_rate": 2.687308548795825e-06
},
{
"epoch": 1.7776587857319384,
"grad_norm": 14.318871488368272,
"learning_rate": 2.687308548795825e-06,
"loss": 0.0397,
"step": 675,
"ts_encoder_learning_rate": 2.6731095692976073e-06
},
{
"epoch": 1.780294917208996,
"grad_norm": 0.9235148193782242,
"learning_rate": 2.6731095692976073e-06,
"loss": 0.03,
"step": 676,
"ts_encoder_learning_rate": 2.6589345021987725e-06
},
{
"epoch": 1.7829310486860532,
"grad_norm": 7.592255856926825,
"learning_rate": 2.6589345021987725e-06,
"loss": 0.0348,
"step": 677,
"ts_encoder_learning_rate": 2.6447834931700688e-06
},
{
"epoch": 1.7855671801631106,
"grad_norm": 1.7017607950556997,
"learning_rate": 2.6447834931700688e-06,
"loss": 0.0429,
"step": 678,
"ts_encoder_learning_rate": 2.6306566876350072e-06
},
{
"epoch": 1.788203311640168,
"grad_norm": 1.1703754969173203,
"learning_rate": 2.6306566876350072e-06,
"loss": 0.0534,
"step": 679,
"ts_encoder_learning_rate": 2.6165542307683744e-06
},
{
"epoch": 1.7908394431172254,
"grad_norm": 1.255297983170475,
"learning_rate": 2.6165542307683744e-06,
"loss": 0.043,
"step": 680,
"ts_encoder_learning_rate": 2.6024762674947313e-06
},
{
"epoch": 1.7934755745942828,
"grad_norm": 1.5888017813716921,
"learning_rate": 2.6024762674947313e-06,
"loss": 0.0343,
"step": 681,
"ts_encoder_learning_rate": 2.588422942486932e-06
},
{
"epoch": 1.7961117060713403,
"grad_norm": 1.7718676183781328,
"learning_rate": 2.588422942486932e-06,
"loss": 0.0486,
"step": 682,
"ts_encoder_learning_rate": 2.5743944001646394e-06
},
{
"epoch": 1.7987478375483978,
"grad_norm": 0.9086652386668309,
"learning_rate": 2.5743944001646394e-06,
"loss": 0.0497,
"step": 683,
"ts_encoder_learning_rate": 2.5603907846928277e-06
},
{
"epoch": 1.8013839690254552,
"grad_norm": 1.1341128763629043,
"learning_rate": 2.5603907846928277e-06,
"loss": 0.0425,
"step": 684,
"ts_encoder_learning_rate": 2.5464122399803126e-06
},
{
"epoch": 1.8040201005025125,
"grad_norm": 2.5998127513101315,
"learning_rate": 2.5464122399803126e-06,
"loss": 0.0574,
"step": 685,
"ts_encoder_learning_rate": 2.532458909678266e-06
},
{
"epoch": 1.80665623197957,
"grad_norm": 4.542124494248002,
"learning_rate": 2.532458909678266e-06,
"loss": 0.0462,
"step": 686,
"ts_encoder_learning_rate": 2.5185309371787515e-06
},
{
"epoch": 1.8092923634566274,
"grad_norm": 1.0616553515388587,
"learning_rate": 2.5185309371787515e-06,
"loss": 0.0446,
"step": 687,
"ts_encoder_learning_rate": 2.50462846561323e-06
},
{
"epoch": 1.8119284949336847,
"grad_norm": 0.7064894341667076,
"learning_rate": 2.50462846561323e-06,
"loss": 0.035,
"step": 688,
"ts_encoder_learning_rate": 2.4907516378511137e-06
},
{
"epoch": 1.8145646264107422,
"grad_norm": 0.673676553652552,
"learning_rate": 2.4907516378511137e-06,
"loss": 0.0456,
"step": 689,
"ts_encoder_learning_rate": 2.4769005964982718e-06
},
{
"epoch": 1.8172007578877998,
"grad_norm": 1.0526217413305237,
"learning_rate": 2.4769005964982718e-06,
"loss": 0.0408,
"step": 690,
"ts_encoder_learning_rate": 2.46307548389559e-06
},
{
"epoch": 1.819836889364857,
"grad_norm": 0.8741534711615824,
"learning_rate": 2.46307548389559e-06,
"loss": 0.0369,
"step": 691,
"ts_encoder_learning_rate": 2.4492764421174863e-06
},
{
"epoch": 1.8224730208419144,
"grad_norm": 1.558424208864291,
"learning_rate": 2.4492764421174863e-06,
"loss": 0.0403,
"step": 692,
"ts_encoder_learning_rate": 2.43550361297047e-06
},
{
"epoch": 1.825109152318972,
"grad_norm": 0.8337291977316967,
"learning_rate": 2.43550361297047e-06,
"loss": 0.0352,
"step": 693,
"ts_encoder_learning_rate": 2.4217571379916673e-06
},
{
"epoch": 1.8277452837960293,
"grad_norm": 1.017616972872044,
"learning_rate": 2.4217571379916673e-06,
"loss": 0.0552,
"step": 694,
"ts_encoder_learning_rate": 2.408037158447375e-06
},
{
"epoch": 1.8303814152730866,
"grad_norm": 0.7844845835056601,
"learning_rate": 2.408037158447375e-06,
"loss": 0.0435,
"step": 695,
"ts_encoder_learning_rate": 2.394343815331616e-06
},
{
"epoch": 1.8330175467501442,
"grad_norm": 1.1004307198779026,
"learning_rate": 2.394343815331616e-06,
"loss": 0.0314,
"step": 696,
"ts_encoder_learning_rate": 2.3806772493646725e-06
},
{
"epoch": 1.8356536782272017,
"grad_norm": 1.1688405779568678,
"learning_rate": 2.3806772493646725e-06,
"loss": 0.0269,
"step": 697,
"ts_encoder_learning_rate": 2.3670376009916596e-06
},
{
"epoch": 1.838289809704259,
"grad_norm": 1.0701865886101891,
"learning_rate": 2.3670376009916596e-06,
"loss": 0.044,
"step": 698,
"ts_encoder_learning_rate": 2.353425010381063e-06
},
{
"epoch": 1.8409259411813164,
"grad_norm": 1.360235946458034,
"learning_rate": 2.353425010381063e-06,
"loss": 0.0384,
"step": 699,
"ts_encoder_learning_rate": 2.339839617423318e-06
},
{
"epoch": 1.843562072658374,
"grad_norm": 0.8062724598386658,
"learning_rate": 2.339839617423318e-06,
"loss": 0.0331,
"step": 700,
"ts_encoder_learning_rate": 2.3262815617293517e-06
},
{
"epoch": 1.8461982041354312,
"grad_norm": 1.1455549172176551,
"learning_rate": 2.3262815617293517e-06,
"loss": 0.0542,
"step": 701,
"ts_encoder_learning_rate": 2.31275098262917e-06
},
{
"epoch": 1.8488343356124886,
"grad_norm": 0.703436998992223,
"learning_rate": 2.31275098262917e-06,
"loss": 0.0313,
"step": 702,
"ts_encoder_learning_rate": 2.2992480191704003e-06
},
{
"epoch": 1.851470467089546,
"grad_norm": 0.8764547755682153,
"learning_rate": 2.2992480191704003e-06,
"loss": 0.0333,
"step": 703,
"ts_encoder_learning_rate": 2.28577281011689e-06
},
{
"epoch": 1.8541065985666036,
"grad_norm": 1.23716774475027,
"learning_rate": 2.28577281011689e-06,
"loss": 0.0301,
"step": 704,
"ts_encoder_learning_rate": 2.272325493947257e-06
},
{
"epoch": 1.856742730043661,
"grad_norm": 11.281594612889576,
"learning_rate": 2.272325493947257e-06,
"loss": 0.0312,
"step": 705,
"ts_encoder_learning_rate": 2.2589062088534837e-06
},
{
"epoch": 1.8593788615207183,
"grad_norm": 0.9987575070457472,
"learning_rate": 2.2589062088534837e-06,
"loss": 0.0417,
"step": 706,
"ts_encoder_learning_rate": 2.245515092739488e-06
},
{
"epoch": 1.8620149929977758,
"grad_norm": 1.0116065835025165,
"learning_rate": 2.245515092739488e-06,
"loss": 0.0496,
"step": 707,
"ts_encoder_learning_rate": 2.2321522832197036e-06
},
{
"epoch": 1.8646511244748332,
"grad_norm": 1.3806751773781658,
"learning_rate": 2.2321522832197036e-06,
"loss": 0.039,
"step": 708,
"ts_encoder_learning_rate": 2.2188179176176767e-06
},
{
"epoch": 1.8672872559518905,
"grad_norm": 0.9901586798069452,
"learning_rate": 2.2188179176176767e-06,
"loss": 0.041,
"step": 709,
"ts_encoder_learning_rate": 2.2055121329646416e-06
},
{
"epoch": 1.869923387428948,
"grad_norm": 0.9660208947927406,
"learning_rate": 2.2055121329646416e-06,
"loss": 0.0263,
"step": 710,
"ts_encoder_learning_rate": 2.1922350659981262e-06
},
{
"epoch": 1.8725595189060056,
"grad_norm": 0.9148657839084163,
"learning_rate": 2.1922350659981262e-06,
"loss": 0.0382,
"step": 711,
"ts_encoder_learning_rate": 2.178986853160535e-06
},
{
"epoch": 1.875195650383063,
"grad_norm": 0.9900246938077546,
"learning_rate": 2.178986853160535e-06,
"loss": 0.0482,
"step": 712,
"ts_encoder_learning_rate": 2.165767630597752e-06
},
{
"epoch": 1.8778317818601202,
"grad_norm": 0.995911258816107,
"learning_rate": 2.165767630597752e-06,
"loss": 0.0383,
"step": 713,
"ts_encoder_learning_rate": 2.1525775341577404e-06
},
{
"epoch": 1.8804679133371778,
"grad_norm": 0.9529466866489111,
"learning_rate": 2.1525775341577404e-06,
"loss": 0.0336,
"step": 714,
"ts_encoder_learning_rate": 2.139416699389153e-06
},
{
"epoch": 1.883104044814235,
"grad_norm": 0.6476905341796513,
"learning_rate": 2.139416699389153e-06,
"loss": 0.0274,
"step": 715,
"ts_encoder_learning_rate": 2.126285261539926e-06
},
{
"epoch": 1.8857401762912924,
"grad_norm": 1.1085422585469236,
"learning_rate": 2.126285261539926e-06,
"loss": 0.0361,
"step": 716,
"ts_encoder_learning_rate": 2.1131833555559037e-06
},
{
"epoch": 1.88837630776835,
"grad_norm": 1.0042700649998133,
"learning_rate": 2.1131833555559037e-06,
"loss": 0.0288,
"step": 717,
"ts_encoder_learning_rate": 2.1001111160794387e-06
},
{
"epoch": 1.8910124392454075,
"grad_norm": 0.9521264639042284,
"learning_rate": 2.1001111160794387e-06,
"loss": 0.0421,
"step": 718,
"ts_encoder_learning_rate": 2.08706867744802e-06
},
{
"epoch": 1.8936485707224646,
"grad_norm": 0.931714414769153,
"learning_rate": 2.08706867744802e-06,
"loss": 0.0366,
"step": 719,
"ts_encoder_learning_rate": 2.074056173692881e-06
},
{
"epoch": 1.8962847021995222,
"grad_norm": 0.754100564730907,
"learning_rate": 2.074056173692881e-06,
"loss": 0.0303,
"step": 720,
"ts_encoder_learning_rate": 2.061073738537635e-06
},
{
"epoch": 1.8989208336765797,
"grad_norm": 0.7479604079430805,
"learning_rate": 2.061073738537635e-06,
"loss": 0.0466,
"step": 721,
"ts_encoder_learning_rate": 2.0481215053968874e-06
},
{
"epoch": 1.901556965153637,
"grad_norm": 0.7804885190676818,
"learning_rate": 2.0481215053968874e-06,
"loss": 0.0295,
"step": 722,
"ts_encoder_learning_rate": 2.0351996073748713e-06
},
{
"epoch": 1.9041930966306944,
"grad_norm": 0.9239500456881254,
"learning_rate": 2.0351996073748713e-06,
"loss": 0.0418,
"step": 723,
"ts_encoder_learning_rate": 2.0223081772640867e-06
},
{
"epoch": 1.906829228107752,
"grad_norm": 1.0393746665064,
"learning_rate": 2.0223081772640867e-06,
"loss": 0.0352,
"step": 724,
"ts_encoder_learning_rate": 2.00944734754392e-06
},
{
"epoch": 1.9094653595848095,
"grad_norm": 0.7757057471334936,
"learning_rate": 2.00944734754392e-06,
"loss": 0.0334,
"step": 725,
"ts_encoder_learning_rate": 1.9966172503792986e-06
},
{
"epoch": 1.9121014910618666,
"grad_norm": 0.7982941003952496,
"learning_rate": 1.9966172503792986e-06,
"loss": 0.0341,
"step": 726,
"ts_encoder_learning_rate": 1.983818017619318e-06
},
{
"epoch": 1.914737622538924,
"grad_norm": 1.0301075039556638,
"learning_rate": 1.983818017619318e-06,
"loss": 0.0407,
"step": 727,
"ts_encoder_learning_rate": 1.971049780795901e-06
},
{
"epoch": 1.9173737540159816,
"grad_norm": 0.9027944740938065,
"learning_rate": 1.971049780795901e-06,
"loss": 0.0325,
"step": 728,
"ts_encoder_learning_rate": 1.9583126711224342e-06
},
{
"epoch": 1.920009885493039,
"grad_norm": 1.0093899617917834,
"learning_rate": 1.9583126711224342e-06,
"loss": 0.0343,
"step": 729,
"ts_encoder_learning_rate": 1.945606819492429e-06
},
{
"epoch": 1.9226460169700963,
"grad_norm": 1.6230911664954315,
"learning_rate": 1.945606819492429e-06,
"loss": 0.0467,
"step": 730,
"ts_encoder_learning_rate": 1.932932356478168e-06
},
{
"epoch": 1.9252821484471538,
"grad_norm": 1.169611902284653,
"learning_rate": 1.932932356478168e-06,
"loss": 0.0346,
"step": 731,
"ts_encoder_learning_rate": 1.9202894123293677e-06
},
{
"epoch": 1.9279182799242112,
"grad_norm": 0.8882211427092304,
"learning_rate": 1.9202894123293677e-06,
"loss": 0.0271,
"step": 732,
"ts_encoder_learning_rate": 1.9076781169718426e-06
},
{
"epoch": 1.9305544114012685,
"grad_norm": 0.7788137841169445,
"learning_rate": 1.9076781169718426e-06,
"loss": 0.0317,
"step": 733,
"ts_encoder_learning_rate": 1.895098600006164e-06
},
{
"epoch": 1.933190542878326,
"grad_norm": 0.9177124549747325,
"learning_rate": 1.895098600006164e-06,
"loss": 0.0294,
"step": 734,
"ts_encoder_learning_rate": 1.8825509907063328e-06
},
{
"epoch": 1.9358266743553836,
"grad_norm": 1.1488053411805068,
"learning_rate": 1.8825509907063328e-06,
"loss": 0.0319,
"step": 735,
"ts_encoder_learning_rate": 1.8700354180184465e-06
},
{
"epoch": 1.938462805832441,
"grad_norm": 0.9810496010092534,
"learning_rate": 1.8700354180184465e-06,
"loss": 0.0348,
"step": 736,
"ts_encoder_learning_rate": 1.857552010559382e-06
},
{
"epoch": 1.9410989373094982,
"grad_norm": 0.5266455663863897,
"learning_rate": 1.857552010559382e-06,
"loss": 0.0166,
"step": 737,
"ts_encoder_learning_rate": 1.8451008966154622e-06
},
{
"epoch": 1.9437350687865558,
"grad_norm": 0.6289054316907776,
"learning_rate": 1.8451008966154622e-06,
"loss": 0.0296,
"step": 738,
"ts_encoder_learning_rate": 1.8326822041411524e-06
},
{
"epoch": 1.946371200263613,
"grad_norm": 0.9572882282852405,
"learning_rate": 1.8326822041411524e-06,
"loss": 0.0445,
"step": 739,
"ts_encoder_learning_rate": 1.8202960607577246e-06
},
{
"epoch": 1.9490073317406704,
"grad_norm": 1.314826995873295,
"learning_rate": 1.8202960607577246e-06,
"loss": 0.0348,
"step": 740,
"ts_encoder_learning_rate": 1.8079425937519729e-06
},
{
"epoch": 1.951643463217728,
"grad_norm": 0.996575258797314,
"learning_rate": 1.8079425937519729e-06,
"loss": 0.0378,
"step": 741,
"ts_encoder_learning_rate": 1.7956219300748796e-06
},
{
"epoch": 1.9542795946947855,
"grad_norm": 1.0607844107395987,
"learning_rate": 1.7956219300748796e-06,
"loss": 0.0329,
"step": 742,
"ts_encoder_learning_rate": 1.7833341963403312e-06
},
{
"epoch": 1.9569157261718428,
"grad_norm": 0.9094930783526255,
"learning_rate": 1.7833341963403312e-06,
"loss": 0.0473,
"step": 743,
"ts_encoder_learning_rate": 1.771079518823799e-06
},
{
"epoch": 1.9595518576489002,
"grad_norm": 1.1039356442587034,
"learning_rate": 1.771079518823799e-06,
"loss": 0.0422,
"step": 744,
"ts_encoder_learning_rate": 1.7588580234610592e-06
},
{
"epoch": 1.9621879891259577,
"grad_norm": 0.8174282643803666,
"learning_rate": 1.7588580234610592e-06,
"loss": 0.0406,
"step": 745,
"ts_encoder_learning_rate": 1.7466698358468825e-06
},
{
"epoch": 1.964824120603015,
"grad_norm": 0.7214652835866359,
"learning_rate": 1.7466698358468825e-06,
"loss": 0.0326,
"step": 746,
"ts_encoder_learning_rate": 1.7345150812337564e-06
},
{
"epoch": 1.9674602520800724,
"grad_norm": 1.2667444333402988,
"learning_rate": 1.7345150812337564e-06,
"loss": 0.0415,
"step": 747,
"ts_encoder_learning_rate": 1.7223938845305932e-06
},
{
"epoch": 1.97009638355713,
"grad_norm": 0.9773679348538253,
"learning_rate": 1.7223938845305932e-06,
"loss": 0.0369,
"step": 748,
"ts_encoder_learning_rate": 1.7103063703014372e-06
},
{
"epoch": 1.9727325150341875,
"grad_norm": 1.1325567784328214,
"learning_rate": 1.7103063703014372e-06,
"loss": 0.0364,
"step": 749,
"ts_encoder_learning_rate": 1.6982526627642043e-06
},
{
"epoch": 1.9753686465112448,
"grad_norm": 0.8996954549073614,
"learning_rate": 1.6982526627642043e-06,
"loss": 0.0329,
"step": 750,
"ts_encoder_learning_rate": 1.6862328857893856e-06
},
{
"epoch": 1.978004777988302,
"grad_norm": 0.7139793070917391,
"learning_rate": 1.6862328857893856e-06,
"loss": 0.036,
"step": 751,
"ts_encoder_learning_rate": 1.6742471628987894e-06
},
{
"epoch": 1.9806409094653596,
"grad_norm": 0.7560937228601913,
"learning_rate": 1.6742471628987894e-06,
"loss": 0.0334,
"step": 752,
"ts_encoder_learning_rate": 1.6622956172642601e-06
},
{
"epoch": 1.983277040942417,
"grad_norm": 1.6103163078678424,
"learning_rate": 1.6622956172642601e-06,
"loss": 0.0371,
"step": 753,
"ts_encoder_learning_rate": 1.6503783717064247e-06
},
{
"epoch": 1.9859131724194743,
"grad_norm": 1.6781996659316394,
"learning_rate": 1.6503783717064247e-06,
"loss": 0.0394,
"step": 754,
"ts_encoder_learning_rate": 1.6384955486934157e-06
},
{
"epoch": 1.9885493038965318,
"grad_norm": 1.69153268173132,
"learning_rate": 1.6384955486934157e-06,
"loss": 0.0311,
"step": 755,
"ts_encoder_learning_rate": 1.6266472703396286e-06
},
{
"epoch": 1.9911854353735894,
"grad_norm": 0.7719824022746361,
"learning_rate": 1.6266472703396286e-06,
"loss": 0.0262,
"step": 756,
"ts_encoder_learning_rate": 1.6148336584044539e-06
},
{
"epoch": 1.9938215668506467,
"grad_norm": 1.3733252437657573,
"learning_rate": 1.6148336584044539e-06,
"loss": 0.0455,
"step": 757,
"ts_encoder_learning_rate": 1.6030548342910302e-06
},
{
"epoch": 1.996457698327704,
"grad_norm": 0.9175183384074306,
"learning_rate": 1.6030548342910302e-06,
"loss": 0.0322,
"step": 758,
"ts_encoder_learning_rate": 1.5913109190450033e-06
},
{
"epoch": 1.9990938298047616,
"grad_norm": 1.1099095764607132,
"learning_rate": 1.5913109190450033e-06,
"loss": 0.0394,
"step": 759,
"ts_encoder_learning_rate": 1.5796020333532696e-06
},
{
"epoch": 2.0,
"grad_norm": 1.1099095764607132,
"learning_rate": 1.5796020333532696e-06,
"loss": 0.0092,
"step": 760,
"ts_encoder_learning_rate": 1.567928297542749e-06
},
{
"epoch": 2.0026361314770575,
"grad_norm": 0.9713547183808008,
"learning_rate": 1.567928297542749e-06,
"loss": 0.023,
"step": 761,
"ts_encoder_learning_rate": 1.5562898315791354e-06
},
{
"epoch": 2.0052722629541146,
"grad_norm": 0.7009540897546592,
"learning_rate": 1.5562898315791354e-06,
"loss": 0.0244,
"step": 762,
"ts_encoder_learning_rate": 1.544686755065677e-06
},
{
"epoch": 2.007908394431172,
"grad_norm": 0.6101415210273791,
"learning_rate": 1.544686755065677e-06,
"loss": 0.0277,
"step": 763,
"ts_encoder_learning_rate": 1.5331191872419349e-06
},
{
"epoch": 2.0105445259082297,
"grad_norm": 1.326219222691662,
"learning_rate": 1.5331191872419349e-06,
"loss": 0.0299,
"step": 764,
"ts_encoder_learning_rate": 1.5215872469825682e-06
},
{
"epoch": 2.0131806573852873,
"grad_norm": 1.0899654240221532,
"learning_rate": 1.5215872469825682e-06,
"loss": 0.0272,
"step": 765,
"ts_encoder_learning_rate": 1.510091052796105e-06
},
{
"epoch": 2.0158167888623444,
"grad_norm": 1.087434101218195,
"learning_rate": 1.510091052796105e-06,
"loss": 0.0369,
"step": 766,
"ts_encoder_learning_rate": 1.4986307228237268e-06
},
{
"epoch": 2.018452920339402,
"grad_norm": 0.7445449957284473,
"learning_rate": 1.4986307228237268e-06,
"loss": 0.0281,
"step": 767,
"ts_encoder_learning_rate": 1.4872063748380544e-06
},
{
"epoch": 2.0210890518164595,
"grad_norm": 0.7216822829913329,
"learning_rate": 1.4872063748380544e-06,
"loss": 0.0268,
"step": 768,
"ts_encoder_learning_rate": 1.4758181262419425e-06
},
{
"epoch": 2.0237251832935166,
"grad_norm": 0.6931261426498003,
"learning_rate": 1.4758181262419425e-06,
"loss": 0.0282,
"step": 769,
"ts_encoder_learning_rate": 1.4644660940672628e-06
},
{
"epoch": 2.026361314770574,
"grad_norm": 0.9133884937103188,
"learning_rate": 1.4644660940672628e-06,
"loss": 0.0322,
"step": 770,
"ts_encoder_learning_rate": 1.4531503949737107e-06
},
{
"epoch": 2.0289974462476317,
"grad_norm": 1.0086856147825554,
"learning_rate": 1.4531503949737107e-06,
"loss": 0.0363,
"step": 771,
"ts_encoder_learning_rate": 1.4418711452476048e-06
},
{
"epoch": 2.031633577724689,
"grad_norm": 1.6070882260232144,
"learning_rate": 1.4418711452476048e-06,
"loss": 0.0249,
"step": 772,
"ts_encoder_learning_rate": 1.4306284608006837e-06
},
{
"epoch": 2.0342697092017463,
"grad_norm": 0.7314536247624556,
"learning_rate": 1.4306284608006837e-06,
"loss": 0.0274,
"step": 773,
"ts_encoder_learning_rate": 1.4194224571689286e-06
},
{
"epoch": 2.036905840678804,
"grad_norm": 0.9803513438084768,
"learning_rate": 1.4194224571689286e-06,
"loss": 0.0262,
"step": 774,
"ts_encoder_learning_rate": 1.4082532495113627e-06
},
{
"epoch": 2.0395419721558614,
"grad_norm": 0.8684478482698204,
"learning_rate": 1.4082532495113627e-06,
"loss": 0.0347,
"step": 775,
"ts_encoder_learning_rate": 1.3971209526088764e-06
},
{
"epoch": 2.0421781036329185,
"grad_norm": 1.6463822697507045,
"learning_rate": 1.3971209526088764e-06,
"loss": 0.0199,
"step": 776,
"ts_encoder_learning_rate": 1.3860256808630429e-06
},
{
"epoch": 2.044814235109976,
"grad_norm": 0.6542575533228376,
"learning_rate": 1.3860256808630429e-06,
"loss": 0.0162,
"step": 777,
"ts_encoder_learning_rate": 1.3749675482949487e-06
},
{
"epoch": 2.0474503665870336,
"grad_norm": 0.8414387552707323,
"learning_rate": 1.3749675482949487e-06,
"loss": 0.0194,
"step": 778,
"ts_encoder_learning_rate": 1.3639466685440133e-06
},
{
"epoch": 2.050086498064091,
"grad_norm": 0.8702132467221131,
"learning_rate": 1.3639466685440133e-06,
"loss": 0.0225,
"step": 779,
"ts_encoder_learning_rate": 1.3529631548668298e-06
},
{
"epoch": 2.0527226295411483,
"grad_norm": 0.7835222402353583,
"learning_rate": 1.3529631548668298e-06,
"loss": 0.0254,
"step": 780,
"ts_encoder_learning_rate": 1.3420171201359933e-06
},
{
"epoch": 2.055358761018206,
"grad_norm": 0.8617901557793036,
"learning_rate": 1.3420171201359933e-06,
"loss": 0.0214,
"step": 781,
"ts_encoder_learning_rate": 1.331108676838948e-06
},
{
"epoch": 2.0579948924952634,
"grad_norm": 0.8593905981383287,
"learning_rate": 1.331108676838948e-06,
"loss": 0.0215,
"step": 782,
"ts_encoder_learning_rate": 1.3202379370768254e-06
},
{
"epoch": 2.0606310239723205,
"grad_norm": 0.9813541929977725,
"learning_rate": 1.3202379370768254e-06,
"loss": 0.0244,
"step": 783,
"ts_encoder_learning_rate": 1.3094050125632973e-06
},
{
"epoch": 2.063267155449378,
"grad_norm": 0.973576019815649,
"learning_rate": 1.3094050125632973e-06,
"loss": 0.0267,
"step": 784,
"ts_encoder_learning_rate": 1.298610014623423e-06
},
{
"epoch": 2.0659032869264355,
"grad_norm": 0.6999339833328826,
"learning_rate": 1.298610014623423e-06,
"loss": 0.025,
"step": 785,
"ts_encoder_learning_rate": 1.2878530541925077e-06
},
{
"epoch": 2.0685394184034926,
"grad_norm": 1.0152957492148167,
"learning_rate": 1.2878530541925077e-06,
"loss": 0.0229,
"step": 786,
"ts_encoder_learning_rate": 1.2771342418149658e-06
},
{
"epoch": 2.07117554988055,
"grad_norm": 0.8698513957975991,
"learning_rate": 1.2771342418149658e-06,
"loss": 0.0265,
"step": 787,
"ts_encoder_learning_rate": 1.2664536876431755e-06
},
{
"epoch": 2.0738116813576077,
"grad_norm": 1.52591637899822,
"learning_rate": 1.2664536876431755e-06,
"loss": 0.0301,
"step": 788,
"ts_encoder_learning_rate": 1.2558115014363592e-06
},
{
"epoch": 2.0764478128346653,
"grad_norm": 0.9350945862866684,
"learning_rate": 1.2558115014363592e-06,
"loss": 0.0302,
"step": 789,
"ts_encoder_learning_rate": 1.2452077925594435e-06
},
{
"epoch": 2.0790839443117224,
"grad_norm": 0.8787004897897062,
"learning_rate": 1.2452077925594435e-06,
"loss": 0.0231,
"step": 790,
"ts_encoder_learning_rate": 1.234642669981946e-06
},
{
"epoch": 2.08172007578878,
"grad_norm": 0.6989519656800383,
"learning_rate": 1.234642669981946e-06,
"loss": 0.0277,
"step": 791,
"ts_encoder_learning_rate": 1.2241162422768444e-06
},
{
"epoch": 2.0843562072658375,
"grad_norm": 0.6816146317530842,
"learning_rate": 1.2241162422768444e-06,
"loss": 0.018,
"step": 792,
"ts_encoder_learning_rate": 1.2136286176194744e-06
},
{
"epoch": 2.0869923387428946,
"grad_norm": 0.9698620591378605,
"learning_rate": 1.2136286176194744e-06,
"loss": 0.0299,
"step": 793,
"ts_encoder_learning_rate": 1.203179903786401e-06
},
{
"epoch": 2.089628470219952,
"grad_norm": 1.0709130550444075,
"learning_rate": 1.203179903786401e-06,
"loss": 0.0267,
"step": 794,
"ts_encoder_learning_rate": 1.1927702081543279e-06
},
{
"epoch": 2.0922646016970097,
"grad_norm": 0.8147652241024573,
"learning_rate": 1.1927702081543279e-06,
"loss": 0.026,
"step": 795,
"ts_encoder_learning_rate": 1.1823996376989849e-06
},
{
"epoch": 2.094900733174067,
"grad_norm": 0.9483527494174168,
"learning_rate": 1.1823996376989849e-06,
"loss": 0.0363,
"step": 796,
"ts_encoder_learning_rate": 1.1720682989940264e-06
},
{
"epoch": 2.0975368646511243,
"grad_norm": 1.2061966993790179,
"learning_rate": 1.1720682989940264e-06,
"loss": 0.0204,
"step": 797,
"ts_encoder_learning_rate": 1.1617762982099446e-06
},
{
"epoch": 2.100172996128182,
"grad_norm": 0.6048278244859194,
"learning_rate": 1.1617762982099446e-06,
"loss": 0.025,
"step": 798,
"ts_encoder_learning_rate": 1.1515237411129698e-06
},
{
"epoch": 2.1028091276052394,
"grad_norm": 0.9014144838221269,
"learning_rate": 1.1515237411129698e-06,
"loss": 0.0231,
"step": 799,
"ts_encoder_learning_rate": 1.141310733063991e-06
},
{
"epoch": 2.1054452590822965,
"grad_norm": 0.779763420400092,
"learning_rate": 1.141310733063991e-06,
"loss": 0.0227,
"step": 800,
"ts_encoder_learning_rate": 1.1311373790174656e-06
},
{
"epoch": 2.108081390559354,
"grad_norm": 1.1737340769983258,
"learning_rate": 1.1311373790174656e-06,
"loss": 0.0204,
"step": 801,
"ts_encoder_learning_rate": 1.1210037835203508e-06
},
{
"epoch": 2.1107175220364116,
"grad_norm": 0.9202975123489521,
"learning_rate": 1.1210037835203508e-06,
"loss": 0.023,
"step": 802,
"ts_encoder_learning_rate": 1.1109100507110133e-06
},
{
"epoch": 2.113353653513469,
"grad_norm": 1.612705002454156,
"learning_rate": 1.1109100507110133e-06,
"loss": 0.0165,
"step": 803,
"ts_encoder_learning_rate": 1.1008562843181796e-06
},
{
"epoch": 2.1159897849905263,
"grad_norm": 0.6460378715478632,
"learning_rate": 1.1008562843181796e-06,
"loss": 0.0315,
"step": 804,
"ts_encoder_learning_rate": 1.0908425876598512e-06
},
{
"epoch": 2.118625916467584,
"grad_norm": 0.9987512843535336,
"learning_rate": 1.0908425876598512e-06,
"loss": 0.0295,
"step": 805,
"ts_encoder_learning_rate": 1.0808690636422587e-06
},
{
"epoch": 2.1212620479446413,
"grad_norm": 1.0149200333623514,
"learning_rate": 1.0808690636422587e-06,
"loss": 0.0211,
"step": 806,
"ts_encoder_learning_rate": 1.0709358147587883e-06
},
{
"epoch": 2.1238981794216985,
"grad_norm": 0.9119696321626057,
"learning_rate": 1.0709358147587883e-06,
"loss": 0.02,
"step": 807,
"ts_encoder_learning_rate": 1.0610429430889451e-06
},
{
"epoch": 2.126534310898756,
"grad_norm": 1.0352913040595793,
"learning_rate": 1.0610429430889451e-06,
"loss": 0.0275,
"step": 808,
"ts_encoder_learning_rate": 1.0511905502972885e-06
},
{
"epoch": 2.1291704423758135,
"grad_norm": 1.0733762586021238,
"learning_rate": 1.0511905502972885e-06,
"loss": 0.0183,
"step": 809,
"ts_encoder_learning_rate": 1.041378737632402e-06
},
{
"epoch": 2.131806573852871,
"grad_norm": 1.1892591837270536,
"learning_rate": 1.041378737632402e-06,
"loss": 0.0264,
"step": 810,
"ts_encoder_learning_rate": 1.031607605925839e-06
},
{
"epoch": 2.134442705329928,
"grad_norm": 0.886404626398071,
"learning_rate": 1.031607605925839e-06,
"loss": 0.0183,
"step": 811,
"ts_encoder_learning_rate": 1.0218772555910955e-06
},
{
"epoch": 2.1370788368069857,
"grad_norm": 0.7677446140505582,
"learning_rate": 1.0218772555910955e-06,
"loss": 0.0174,
"step": 812,
"ts_encoder_learning_rate": 1.0121877866225783e-06
},
{
"epoch": 2.1397149682840433,
"grad_norm": 0.9647219206963211,
"learning_rate": 1.0121877866225783e-06,
"loss": 0.0312,
"step": 813,
"ts_encoder_learning_rate": 1.0025392985945703e-06
},
{
"epoch": 2.1423510997611004,
"grad_norm": 1.18116857549236,
"learning_rate": 1.0025392985945703e-06,
"loss": 0.0328,
"step": 814,
"ts_encoder_learning_rate": 9.929318906602176e-07
},
{
"epoch": 2.144987231238158,
"grad_norm": 1.2020647866794596,
"learning_rate": 9.929318906602176e-07,
"loss": 0.0248,
"step": 815,
"ts_encoder_learning_rate": 9.833656615504978e-07
},
{
"epoch": 2.1476233627152155,
"grad_norm": 0.8370574190670566,
"learning_rate": 9.833656615504978e-07,
"loss": 0.0146,
"step": 816,
"ts_encoder_learning_rate": 9.738407095732195e-07
},
{
"epoch": 2.150259494192273,
"grad_norm": 1.0299221864599144,
"learning_rate": 9.738407095732195e-07,
"loss": 0.0217,
"step": 817,
"ts_encoder_learning_rate": 9.643571326119982e-07
},
{
"epoch": 2.15289562566933,
"grad_norm": 0.8018415484592077,
"learning_rate": 9.643571326119982e-07,
"loss": 0.0139,
"step": 818,
"ts_encoder_learning_rate": 9.549150281252633e-07
},
{
"epoch": 2.1555317571463877,
"grad_norm": 0.5028892166090512,
"learning_rate": 9.549150281252633e-07,
"loss": 0.0197,
"step": 819,
"ts_encoder_learning_rate": 9.455144931452459e-07
},
{
"epoch": 2.158167888623445,
"grad_norm": 0.6355920739705047,
"learning_rate": 9.455144931452459e-07,
"loss": 0.0251,
"step": 820,
"ts_encoder_learning_rate": 9.361556242769871e-07
},
{
"epoch": 2.1608040201005023,
"grad_norm": 1.1083085846789678,
"learning_rate": 9.361556242769871e-07,
"loss": 0.02,
"step": 821,
"ts_encoder_learning_rate": 9.26838517697346e-07
},
{
"epoch": 2.16344015157756,
"grad_norm": 0.9876632473531536,
"learning_rate": 9.26838517697346e-07,
"loss": 0.0351,
"step": 822,
"ts_encoder_learning_rate": 9.175632691540065e-07
},
{
"epoch": 2.1660762830546174,
"grad_norm": 2.2693420737855523,
"learning_rate": 9.175632691540065e-07,
"loss": 0.0276,
"step": 823,
"ts_encoder_learning_rate": 9.083299739645007e-07
},
{
"epoch": 2.168712414531675,
"grad_norm": 23.226392431703353,
"learning_rate": 9.083299739645007e-07,
"loss": 0.0242,
"step": 824,
"ts_encoder_learning_rate": 8.991387270152202e-07
},
{
"epoch": 2.171348546008732,
"grad_norm": 0.75772536779788,
"learning_rate": 8.991387270152202e-07,
"loss": 0.0178,
"step": 825,
"ts_encoder_learning_rate": 8.899896227604509e-07
},
{
"epoch": 2.1739846774857896,
"grad_norm": 0.7879892794840004,
"learning_rate": 8.899896227604509e-07,
"loss": 0.021,
"step": 826,
"ts_encoder_learning_rate": 8.808827552213917e-07
},
{
"epoch": 2.176620808962847,
"grad_norm": 0.6285382943818703,
"learning_rate": 8.808827552213917e-07,
"loss": 0.0275,
"step": 827,
"ts_encoder_learning_rate": 8.718182179851998e-07
},
{
"epoch": 2.1792569404399043,
"grad_norm": 0.8438059272532128,
"learning_rate": 8.718182179851998e-07,
"loss": 0.0222,
"step": 828,
"ts_encoder_learning_rate": 8.627961042040183e-07
},
{
"epoch": 2.181893071916962,
"grad_norm": 0.8841129842435451,
"learning_rate": 8.627961042040183e-07,
"loss": 0.0263,
"step": 829,
"ts_encoder_learning_rate": 8.538165065940263e-07
},
{
"epoch": 2.1845292033940193,
"grad_norm": 1.1647962750768701,
"learning_rate": 8.538165065940263e-07,
"loss": 0.0287,
"step": 830,
"ts_encoder_learning_rate": 8.448795174344803e-07
},
{
"epoch": 2.187165334871077,
"grad_norm": 0.6223444639742729,
"learning_rate": 8.448795174344803e-07,
"loss": 0.0182,
"step": 831,
"ts_encoder_learning_rate": 8.359852285667752e-07
},
{
"epoch": 2.189801466348134,
"grad_norm": 0.848108350576564,
"learning_rate": 8.359852285667752e-07,
"loss": 0.0278,
"step": 832,
"ts_encoder_learning_rate": 8.271337313934869e-07
},
{
"epoch": 2.1924375978251915,
"grad_norm": 0.6976201249959373,
"learning_rate": 8.271337313934869e-07,
"loss": 0.0254,
"step": 833,
"ts_encoder_learning_rate": 8.183251168774476e-07
},
{
"epoch": 2.195073729302249,
"grad_norm": 0.839401112904285,
"learning_rate": 8.183251168774476e-07,
"loss": 0.0146,
"step": 834,
"ts_encoder_learning_rate": 8.095594755407971e-07
},
{
"epoch": 2.197709860779306,
"grad_norm": 1.2538696240617628,
"learning_rate": 8.095594755407971e-07,
"loss": 0.0247,
"step": 835,
"ts_encoder_learning_rate": 8.008368974640634e-07
},
{
"epoch": 2.2003459922563637,
"grad_norm": 2.0495039413289633,
"learning_rate": 8.008368974640634e-07,
"loss": 0.0259,
"step": 836,
"ts_encoder_learning_rate": 7.921574722852343e-07
},
{
"epoch": 2.2029821237334213,
"grad_norm": 0.8059355568372082,
"learning_rate": 7.921574722852343e-07,
"loss": 0.0254,
"step": 837,
"ts_encoder_learning_rate": 7.835212891988292e-07
},
{
"epoch": 2.205618255210479,
"grad_norm": 1.1603161094730907,
"learning_rate": 7.835212891988292e-07,
"loss": 0.0222,
"step": 838,
"ts_encoder_learning_rate": 7.749284369549954e-07
},
{
"epoch": 2.208254386687536,
"grad_norm": 0.642346193891705,
"learning_rate": 7.749284369549954e-07,
"loss": 0.0173,
"step": 839,
"ts_encoder_learning_rate": 7.663790038585794e-07
},
{
"epoch": 2.2108905181645935,
"grad_norm": 0.969152855410992,
"learning_rate": 7.663790038585794e-07,
"loss": 0.0195,
"step": 840,
"ts_encoder_learning_rate": 7.578730777682386e-07
},
{
"epoch": 2.213526649641651,
"grad_norm": 0.6006548208059819,
"learning_rate": 7.578730777682386e-07,
"loss": 0.0187,
"step": 841,
"ts_encoder_learning_rate": 7.494107460955207e-07
},
{
"epoch": 2.216162781118708,
"grad_norm": 0.9560113777748233,
"learning_rate": 7.494107460955207e-07,
"loss": 0.019,
"step": 842,
"ts_encoder_learning_rate": 7.409920958039795e-07
},
{
"epoch": 2.2187989125957657,
"grad_norm": 1.0871309619249734,
"learning_rate": 7.409920958039795e-07,
"loss": 0.0196,
"step": 843,
"ts_encoder_learning_rate": 7.326172134082704e-07
},
{
"epoch": 2.221435044072823,
"grad_norm": 0.8419045626414003,
"learning_rate": 7.326172134082704e-07,
"loss": 0.0326,
"step": 844,
"ts_encoder_learning_rate": 7.242861849732696e-07
},
{
"epoch": 2.2240711755498808,
"grad_norm": 1.0419111956667741,
"learning_rate": 7.242861849732696e-07,
"loss": 0.0314,
"step": 845,
"ts_encoder_learning_rate": 7.159990961131818e-07
},
{
"epoch": 2.226707307026938,
"grad_norm": 0.8401412848739376,
"learning_rate": 7.159990961131818e-07,
"loss": 0.0304,
"step": 846,
"ts_encoder_learning_rate": 7.077560319906696e-07
},
{
"epoch": 2.2293434385039954,
"grad_norm": 0.9818544247472701,
"learning_rate": 7.077560319906696e-07,
"loss": 0.0328,
"step": 847,
"ts_encoder_learning_rate": 6.995570773159693e-07
},
{
"epoch": 2.231979569981053,
"grad_norm": 0.9988984189818604,
"learning_rate": 6.995570773159693e-07,
"loss": 0.036,
"step": 848,
"ts_encoder_learning_rate": 6.914023163460248e-07
},
{
"epoch": 2.23461570145811,
"grad_norm": 1.8121563715937015,
"learning_rate": 6.914023163460248e-07,
"loss": 0.0293,
"step": 849,
"ts_encoder_learning_rate": 6.832918328836247e-07
},
{
"epoch": 2.2372518329351676,
"grad_norm": 1.4137491922964698,
"learning_rate": 6.832918328836247e-07,
"loss": 0.0284,
"step": 850,
"ts_encoder_learning_rate": 6.752257102765325e-07
},
{
"epoch": 2.239887964412225,
"grad_norm": 0.7043298270791407,
"learning_rate": 6.752257102765325e-07,
"loss": 0.0254,
"step": 851,
"ts_encoder_learning_rate": 6.6720403141664e-07
},
{
"epoch": 2.2425240958892827,
"grad_norm": 0.9273068808985826,
"learning_rate": 6.6720403141664e-07,
"loss": 0.0254,
"step": 852,
"ts_encoder_learning_rate": 6.592268787391077e-07
},
{
"epoch": 2.24516022736634,
"grad_norm": 0.9385646367897301,
"learning_rate": 6.592268787391077e-07,
"loss": 0.0298,
"step": 853,
"ts_encoder_learning_rate": 6.512943342215234e-07
},
{
"epoch": 2.2477963588433973,
"grad_norm": 0.6600040913818298,
"learning_rate": 6.512943342215234e-07,
"loss": 0.0237,
"step": 854,
"ts_encoder_learning_rate": 6.43406479383053e-07
},
{
"epoch": 2.250432490320455,
"grad_norm": 0.6282127208289683,
"learning_rate": 6.43406479383053e-07,
"loss": 0.0213,
"step": 855,
"ts_encoder_learning_rate": 6.355633952836115e-07
},
{
"epoch": 2.253068621797512,
"grad_norm": 0.6788883483435632,
"learning_rate": 6.355633952836115e-07,
"loss": 0.021,
"step": 856,
"ts_encoder_learning_rate": 6.277651625230219e-07
},
{
"epoch": 2.2557047532745695,
"grad_norm": 0.6668730348823718,
"learning_rate": 6.277651625230219e-07,
"loss": 0.0236,
"step": 857,
"ts_encoder_learning_rate": 6.200118612401918e-07
},
{
"epoch": 2.258340884751627,
"grad_norm": 7.060799735251775,
"learning_rate": 6.200118612401918e-07,
"loss": 0.0297,
"step": 858,
"ts_encoder_learning_rate": 6.12303571112286e-07
},
{
"epoch": 2.2609770162286846,
"grad_norm": 1.046944161496044,
"learning_rate": 6.12303571112286e-07,
"loss": 0.0273,
"step": 859,
"ts_encoder_learning_rate": 6.04640371353914e-07
},
{
"epoch": 2.2636131477057417,
"grad_norm": 1.1099528570927184,
"learning_rate": 6.04640371353914e-07,
"loss": 0.0252,
"step": 860,
"ts_encoder_learning_rate": 5.9702234071631e-07
},
{
"epoch": 2.2662492791827993,
"grad_norm": 0.9773409732921918,
"learning_rate": 5.9702234071631e-07,
"loss": 0.0294,
"step": 861,
"ts_encoder_learning_rate": 5.89449557486525e-07
},
{
"epoch": 2.268885410659857,
"grad_norm": 1.0917243460679,
"learning_rate": 5.89449557486525e-07,
"loss": 0.0138,
"step": 862,
"ts_encoder_learning_rate": 5.819220994866237e-07
},
{
"epoch": 2.271521542136914,
"grad_norm": 0.6888106894141612,
"learning_rate": 5.819220994866237e-07,
"loss": 0.0144,
"step": 863,
"ts_encoder_learning_rate": 5.744400440728826e-07
},
{
"epoch": 2.2741576736139715,
"grad_norm": 0.633492770066237,
"learning_rate": 5.744400440728826e-07,
"loss": 0.0232,
"step": 864,
"ts_encoder_learning_rate": 5.670034681349995e-07
},
{
"epoch": 2.276793805091029,
"grad_norm": 6.0165889346314865,
"learning_rate": 5.670034681349995e-07,
"loss": 0.025,
"step": 865,
"ts_encoder_learning_rate": 5.596124480952975e-07
},
{
"epoch": 2.2794299365680866,
"grad_norm": 1.5182521559787252,
"learning_rate": 5.596124480952975e-07,
"loss": 0.0268,
"step": 866,
"ts_encoder_learning_rate": 5.522670599079416e-07
},
{
"epoch": 2.2820660680451437,
"grad_norm": 0.8001358979720962,
"learning_rate": 5.522670599079416e-07,
"loss": 0.0213,
"step": 867,
"ts_encoder_learning_rate": 5.449673790581611e-07
},
{
"epoch": 2.284702199522201,
"grad_norm": 1.8105317634620746,
"learning_rate": 5.449673790581611e-07,
"loss": 0.0252,
"step": 868,
"ts_encoder_learning_rate": 5.377134805614714e-07
},
{
"epoch": 2.2873383309992588,
"grad_norm": 0.9912234653856702,
"learning_rate": 5.377134805614714e-07,
"loss": 0.0237,
"step": 869,
"ts_encoder_learning_rate": 5.305054389629022e-07
},
{
"epoch": 2.289974462476316,
"grad_norm": 0.7760075581423171,
"learning_rate": 5.305054389629022e-07,
"loss": 0.0235,
"step": 870,
"ts_encoder_learning_rate": 5.233433283362349e-07
},
{
"epoch": 2.2926105939533734,
"grad_norm": 0.6707225852541246,
"learning_rate": 5.233433283362349e-07,
"loss": 0.0217,
"step": 871,
"ts_encoder_learning_rate": 5.162272222832349e-07
},
{
"epoch": 2.295246725430431,
"grad_norm": 0.6919555345400337,
"learning_rate": 5.162272222832349e-07,
"loss": 0.027,
"step": 872,
"ts_encoder_learning_rate": 5.091571939329049e-07
},
{
"epoch": 2.2978828569074885,
"grad_norm": 0.8176259644215725,
"learning_rate": 5.091571939329049e-07,
"loss": 0.0196,
"step": 873,
"ts_encoder_learning_rate": 5.021333159407232e-07
},
{
"epoch": 2.3005189883845456,
"grad_norm": 8.688779032036406,
"learning_rate": 5.021333159407232e-07,
"loss": 0.0222,
"step": 874,
"ts_encoder_learning_rate": 4.951556604879049e-07
},
{
"epoch": 2.303155119861603,
"grad_norm": 0.4934714850429601,
"learning_rate": 4.951556604879049e-07,
"loss": 0.0225,
"step": 875,
"ts_encoder_learning_rate": 4.882242992806546e-07
},
{
"epoch": 2.3057912513386607,
"grad_norm": 0.6932226455966719,
"learning_rate": 4.882242992806546e-07,
"loss": 0.0279,
"step": 876,
"ts_encoder_learning_rate": 4.813393035494329e-07
},
{
"epoch": 2.308427382815718,
"grad_norm": 0.785391963657986,
"learning_rate": 4.813393035494329e-07,
"loss": 0.0257,
"step": 877,
"ts_encoder_learning_rate": 4.745007440482252e-07
},
{
"epoch": 2.3110635142927753,
"grad_norm": 0.7002142776657593,
"learning_rate": 4.745007440482252e-07,
"loss": 0.0167,
"step": 878,
"ts_encoder_learning_rate": 4.677086910538092e-07
},
{
"epoch": 2.313699645769833,
"grad_norm": 0.6570845075498787,
"learning_rate": 4.677086910538092e-07,
"loss": 0.0204,
"step": 879,
"ts_encoder_learning_rate": 4.6096321436504e-07
},
{
"epoch": 2.3163357772468904,
"grad_norm": 0.708907031204872,
"learning_rate": 4.6096321436504e-07,
"loss": 0.0194,
"step": 880,
"ts_encoder_learning_rate": 4.542643833021254e-07
},
{
"epoch": 2.3189719087239475,
"grad_norm": 0.7432937034337077,
"learning_rate": 4.542643833021254e-07,
"loss": 0.0244,
"step": 881,
"ts_encoder_learning_rate": 4.4761226670592074e-07
},
{
"epoch": 2.321608040201005,
"grad_norm": 1.0057609426812313,
"learning_rate": 4.4761226670592074e-07,
"loss": 0.0243,
"step": 882,
"ts_encoder_learning_rate": 4.410069329372152e-07
},
{
"epoch": 2.3242441716780626,
"grad_norm": 0.9748291224257548,
"learning_rate": 4.410069329372152e-07,
"loss": 0.0261,
"step": 883,
"ts_encoder_learning_rate": 4.344484498760343e-07
},
{
"epoch": 2.3268803031551197,
"grad_norm": 0.7468342295758325,
"learning_rate": 4.344484498760343e-07,
"loss": 0.017,
"step": 884,
"ts_encoder_learning_rate": 4.279368849209381e-07
},
{
"epoch": 2.3295164346321773,
"grad_norm": 0.4990301640406677,
"learning_rate": 4.279368849209381e-07,
"loss": 0.0163,
"step": 885,
"ts_encoder_learning_rate": 4.214723049883307e-07
},
{
"epoch": 2.332152566109235,
"grad_norm": 0.8120692295636992,
"learning_rate": 4.214723049883307e-07,
"loss": 0.0271,
"step": 886,
"ts_encoder_learning_rate": 4.150547765117746e-07
},
{
"epoch": 2.334788697586292,
"grad_norm": 0.8689858151247111,
"learning_rate": 4.150547765117746e-07,
"loss": 0.0172,
"step": 887,
"ts_encoder_learning_rate": 4.086843654413031e-07
},
{
"epoch": 2.3374248290633495,
"grad_norm": 0.7557921121418426,
"learning_rate": 4.086843654413031e-07,
"loss": 0.0148,
"step": 888,
"ts_encoder_learning_rate": 4.0236113724274716e-07
},
{
"epoch": 2.340060960540407,
"grad_norm": 0.631642992248285,
"learning_rate": 4.0236113724274716e-07,
"loss": 0.0256,
"step": 889,
"ts_encoder_learning_rate": 3.960851568970586e-07
},
{
"epoch": 2.342697092017464,
"grad_norm": 1.080316244149369,
"learning_rate": 3.960851568970586e-07,
"loss": 0.0217,
"step": 890,
"ts_encoder_learning_rate": 3.8985648889964755e-07
},
{
"epoch": 2.3453332234945217,
"grad_norm": 1.054131562422457,
"learning_rate": 3.8985648889964755e-07,
"loss": 0.0211,
"step": 891,
"ts_encoder_learning_rate": 3.83675197259713e-07
},
{
"epoch": 2.347969354971579,
"grad_norm": 0.8646617497823404,
"learning_rate": 3.83675197259713e-07,
"loss": 0.019,
"step": 892,
"ts_encoder_learning_rate": 3.77541345499593e-07
},
{
"epoch": 2.3506054864486368,
"grad_norm": 0.7244425140139443,
"learning_rate": 3.77541345499593e-07,
"loss": 0.0184,
"step": 893,
"ts_encoder_learning_rate": 3.7145499665410147e-07
},
{
"epoch": 2.353241617925694,
"grad_norm": 0.5515618977177071,
"learning_rate": 3.7145499665410147e-07,
"loss": 0.0143,
"step": 894,
"ts_encoder_learning_rate": 3.6541621326989183e-07
},
{
"epoch": 2.3558777494027514,
"grad_norm": 0.7590154170192799,
"learning_rate": 3.6541621326989183e-07,
"loss": 0.0241,
"step": 895,
"ts_encoder_learning_rate": 3.5942505740480583e-07
},
{
"epoch": 2.358513880879809,
"grad_norm": 0.7373124344034562,
"learning_rate": 3.5942505740480583e-07,
"loss": 0.0203,
"step": 896,
"ts_encoder_learning_rate": 3.534815906272404e-07
},
{
"epoch": 2.361150012356866,
"grad_norm": 0.7737906407252233,
"learning_rate": 3.534815906272404e-07,
"loss": 0.0122,
"step": 897,
"ts_encoder_learning_rate": 3.475858740155108e-07
},
{
"epoch": 2.3637861438339236,
"grad_norm": 0.5846122157036439,
"learning_rate": 3.475858740155108e-07,
"loss": 0.0213,
"step": 898,
"ts_encoder_learning_rate": 3.417379681572297e-07
},
{
"epoch": 2.366422275310981,
"grad_norm": 1.1971330946069132,
"learning_rate": 3.417379681572297e-07,
"loss": 0.0182,
"step": 899,
"ts_encoder_learning_rate": 3.359379331486762e-07
},
{
"epoch": 2.3690584067880387,
"grad_norm": 0.5894377764075366,
"learning_rate": 3.359379331486762e-07,
"loss": 0.0228,
"step": 900,
"ts_encoder_learning_rate": 3.301858285941845e-07
},
{
"epoch": 2.371694538265096,
"grad_norm": 0.6964796421497281,
"learning_rate": 3.301858285941845e-07,
"loss": 0.019,
"step": 901,
"ts_encoder_learning_rate": 3.2448171360552837e-07
},
{
"epoch": 2.3743306697421533,
"grad_norm": 0.8068516225517265,
"learning_rate": 3.2448171360552837e-07,
"loss": 0.0242,
"step": 902,
"ts_encoder_learning_rate": 3.18825646801314e-07
},
{
"epoch": 2.376966801219211,
"grad_norm": 0.6128923162506175,
"learning_rate": 3.18825646801314e-07,
"loss": 0.0219,
"step": 903,
"ts_encoder_learning_rate": 3.1321768630638073e-07
},
{
"epoch": 2.379602932696268,
"grad_norm": 0.9632236483373098,
"learning_rate": 3.1321768630638073e-07,
"loss": 0.0234,
"step": 904,
"ts_encoder_learning_rate": 3.076578897511978e-07
},
{
"epoch": 2.3822390641733255,
"grad_norm": 0.7461189266458699,
"learning_rate": 3.076578897511978e-07,
"loss": 0.0144,
"step": 905,
"ts_encoder_learning_rate": 3.0214631427127883e-07
},
{
"epoch": 2.384875195650383,
"grad_norm": 0.716902757344509,
"learning_rate": 3.0214631427127883e-07,
"loss": 0.0177,
"step": 906,
"ts_encoder_learning_rate": 2.966830165065876e-07
},
{
"epoch": 2.3875113271274406,
"grad_norm": 0.5861169684467917,
"learning_rate": 2.966830165065876e-07,
"loss": 0.027,
"step": 907,
"ts_encoder_learning_rate": 2.912680526009626e-07
},
{
"epoch": 2.3901474586044977,
"grad_norm": 0.834726511672464,
"learning_rate": 2.912680526009626e-07,
"loss": 0.0231,
"step": 908,
"ts_encoder_learning_rate": 2.8590147820153513e-07
},
{
"epoch": 2.3927835900815553,
"grad_norm": 0.6958964808420537,
"learning_rate": 2.8590147820153513e-07,
"loss": 0.0211,
"step": 909,
"ts_encoder_learning_rate": 2.8058334845816214e-07
},
{
"epoch": 2.395419721558613,
"grad_norm": 0.7097540987940018,
"learning_rate": 2.8058334845816214e-07,
"loss": 0.0169,
"step": 910,
"ts_encoder_learning_rate": 2.7531371802285436e-07
},
{
"epoch": 2.39805585303567,
"grad_norm": 0.8778409447989682,
"learning_rate": 2.7531371802285436e-07,
"loss": 0.026,
"step": 911,
"ts_encoder_learning_rate": 2.7009264104921606e-07
},
{
"epoch": 2.4006919845127275,
"grad_norm": 0.7010540368539773,
"learning_rate": 2.7009264104921606e-07,
"loss": 0.0175,
"step": 912,
"ts_encoder_learning_rate": 2.6492017119189415e-07
},
{
"epoch": 2.403328115989785,
"grad_norm": 0.5997922863707355,
"learning_rate": 2.6492017119189415e-07,
"loss": 0.0222,
"step": 913,
"ts_encoder_learning_rate": 2.5979636160601673e-07
},
{
"epoch": 2.4059642474668426,
"grad_norm": 0.667743261533583,
"learning_rate": 2.5979636160601673e-07,
"loss": 0.0197,
"step": 914,
"ts_encoder_learning_rate": 2.547212649466568e-07
},
{
"epoch": 2.4086003789438997,
"grad_norm": 0.8452729220439927,
"learning_rate": 2.547212649466568e-07,
"loss": 0.0197,
"step": 915,
"ts_encoder_learning_rate": 2.4969493336828353e-07
},
{
"epoch": 2.411236510420957,
"grad_norm": 0.726874711910585,
"learning_rate": 2.4969493336828353e-07,
"loss": 0.024,
"step": 916,
"ts_encoder_learning_rate": 2.447174185242324e-07
},
{
"epoch": 2.4138726418980148,
"grad_norm": 0.6390019959390945,
"learning_rate": 2.447174185242324e-07,
"loss": 0.0109,
"step": 917,
"ts_encoder_learning_rate": 2.397887715661679e-07
},
{
"epoch": 2.416508773375072,
"grad_norm": 0.844652867992998,
"learning_rate": 2.397887715661679e-07,
"loss": 0.0276,
"step": 918,
"ts_encoder_learning_rate": 2.3490904314356412e-07
},
{
"epoch": 2.4191449048521294,
"grad_norm": 0.8450103641231302,
"learning_rate": 2.3490904314356412e-07,
"loss": 0.0209,
"step": 919,
"ts_encoder_learning_rate": 2.3007828340318117e-07
},
{
"epoch": 2.421781036329187,
"grad_norm": 0.9914129840085654,
"learning_rate": 2.3007828340318117e-07,
"loss": 0.0224,
"step": 920,
"ts_encoder_learning_rate": 2.2529654198854834e-07
},
{
"epoch": 2.4244171678062445,
"grad_norm": 0.7485782865431817,
"learning_rate": 2.2529654198854834e-07,
"loss": 0.0231,
"step": 921,
"ts_encoder_learning_rate": 2.205638680394573e-07
},
{
"epoch": 2.4270532992833016,
"grad_norm": 0.863071039677985,
"learning_rate": 2.205638680394573e-07,
"loss": 0.0217,
"step": 922,
"ts_encoder_learning_rate": 2.1588031019145638e-07
},
{
"epoch": 2.429689430760359,
"grad_norm": 0.7178423022311772,
"learning_rate": 2.1588031019145638e-07,
"loss": 0.0245,
"step": 923,
"ts_encoder_learning_rate": 2.1124591657534776e-07
},
{
"epoch": 2.4323255622374167,
"grad_norm": 0.8272616881685937,
"learning_rate": 2.1124591657534776e-07,
"loss": 0.0199,
"step": 924,
"ts_encoder_learning_rate": 2.0666073481669714e-07
},
{
"epoch": 2.434961693714474,
"grad_norm": 0.5263023342082227,
"learning_rate": 2.0666073481669714e-07,
"loss": 0.0223,
"step": 925,
"ts_encoder_learning_rate": 2.0212481203534083e-07
},
{
"epoch": 2.4375978251915313,
"grad_norm": 1.1822141493073524,
"learning_rate": 2.0212481203534083e-07,
"loss": 0.0273,
"step": 926,
"ts_encoder_learning_rate": 1.9763819484490353e-07
},
{
"epoch": 2.440233956668589,
"grad_norm": 0.6818080172841297,
"learning_rate": 1.9763819484490353e-07,
"loss": 0.02,
"step": 927,
"ts_encoder_learning_rate": 1.932009293523196e-07
},
{
"epoch": 2.4428700881456464,
"grad_norm": 0.9008331212699131,
"learning_rate": 1.932009293523196e-07,
"loss": 0.0216,
"step": 928,
"ts_encoder_learning_rate": 1.8881306115735632e-07
},
{
"epoch": 2.4455062196227035,
"grad_norm": 0.7779126298027532,
"learning_rate": 1.8881306115735632e-07,
"loss": 0.0185,
"step": 929,
"ts_encoder_learning_rate": 1.8447463535214872e-07
},
{
"epoch": 2.448142351099761,
"grad_norm": 0.9120921557733357,
"learning_rate": 1.8447463535214872e-07,
"loss": 0.0186,
"step": 930,
"ts_encoder_learning_rate": 1.801856965207338e-07
},
{
"epoch": 2.4507784825768186,
"grad_norm": 0.740476134046954,
"learning_rate": 1.801856965207338e-07,
"loss": 0.02,
"step": 931,
"ts_encoder_learning_rate": 1.7594628873859488e-07
},
{
"epoch": 2.4534146140538757,
"grad_norm": 0.720558833321943,
"learning_rate": 1.7594628873859488e-07,
"loss": 0.0165,
"step": 932,
"ts_encoder_learning_rate": 1.7175645557220567e-07
},
{
"epoch": 2.4560507455309333,
"grad_norm": 0.7232369354288679,
"learning_rate": 1.7175645557220567e-07,
"loss": 0.0258,
"step": 933,
"ts_encoder_learning_rate": 1.6761624007858524e-07
},
{
"epoch": 2.458686877007991,
"grad_norm": 0.9619568884736648,
"learning_rate": 1.6761624007858524e-07,
"loss": 0.0206,
"step": 934,
"ts_encoder_learning_rate": 1.6352568480485277e-07
},
{
"epoch": 2.4613230084850484,
"grad_norm": 0.5484139865997792,
"learning_rate": 1.6352568480485277e-07,
"loss": 0.0174,
"step": 935,
"ts_encoder_learning_rate": 1.594848317877934e-07
},
{
"epoch": 2.4639591399621055,
"grad_norm": 1.1119566633908704,
"learning_rate": 1.594848317877934e-07,
"loss": 0.024,
"step": 936,
"ts_encoder_learning_rate": 1.5549372255342367e-07
},
{
"epoch": 2.466595271439163,
"grad_norm": 0.8218791802362867,
"learning_rate": 1.5549372255342367e-07,
"loss": 0.0193,
"step": 937,
"ts_encoder_learning_rate": 1.5155239811656562e-07
},
{
"epoch": 2.4692314029162206,
"grad_norm": 0.6933235377212601,
"learning_rate": 1.5155239811656562e-07,
"loss": 0.0288,
"step": 938,
"ts_encoder_learning_rate": 1.4766089898042678e-07
},
{
"epoch": 2.4718675343932777,
"grad_norm": 0.953369090336964,
"learning_rate": 1.4766089898042678e-07,
"loss": 0.0214,
"step": 939,
"ts_encoder_learning_rate": 1.4381926513618139e-07
},
{
"epoch": 2.474503665870335,
"grad_norm": 0.6976764387240867,
"learning_rate": 1.4381926513618139e-07,
"loss": 0.0197,
"step": 940,
"ts_encoder_learning_rate": 1.4002753606256082e-07
},
{
"epoch": 2.4771397973473928,
"grad_norm": 0.7526896911937908,
"learning_rate": 1.4002753606256082e-07,
"loss": 0.0205,
"step": 941,
"ts_encoder_learning_rate": 1.362857507254478e-07
},
{
"epoch": 2.4797759288244503,
"grad_norm": 0.7732644266125883,
"learning_rate": 1.362857507254478e-07,
"loss": 0.0196,
"step": 942,
"ts_encoder_learning_rate": 1.3259394757747678e-07
},
{
"epoch": 2.4824120603015074,
"grad_norm": 0.7898334854513247,
"learning_rate": 1.3259394757747678e-07,
"loss": 0.0168,
"step": 943,
"ts_encoder_learning_rate": 1.2895216455763582e-07
},
{
"epoch": 2.485048191778565,
"grad_norm": 0.7203527294510174,
"learning_rate": 1.2895216455763582e-07,
"loss": 0.0215,
"step": 944,
"ts_encoder_learning_rate": 1.253604390908819e-07
},
{
"epoch": 2.4876843232556225,
"grad_norm": 0.8318588601172171,
"learning_rate": 1.253604390908819e-07,
"loss": 0.0233,
"step": 945,
"ts_encoder_learning_rate": 1.2181880808775026e-07
},
{
"epoch": 2.4903204547326796,
"grad_norm": 0.7763925821106455,
"learning_rate": 1.2181880808775026e-07,
"loss": 0.0231,
"step": 946,
"ts_encoder_learning_rate": 1.1832730794397951e-07
},
{
"epoch": 2.492956586209737,
"grad_norm": 0.7417703735767751,
"learning_rate": 1.1832730794397951e-07,
"loss": 0.0202,
"step": 947,
"ts_encoder_learning_rate": 1.1488597454013539e-07
},
{
"epoch": 2.4955927176867947,
"grad_norm": 0.7347992893208377,
"learning_rate": 1.1488597454013539e-07,
"loss": 0.0121,
"step": 948,
"ts_encoder_learning_rate": 1.1149484324124326e-07
},
{
"epoch": 2.4982288491638522,
"grad_norm": 0.5208636481653479,
"learning_rate": 1.1149484324124326e-07,
"loss": 0.0184,
"step": 949,
"ts_encoder_learning_rate": 1.0815394889642339e-07
},
{
"epoch": 2.5008649806409093,
"grad_norm": 0.6580687022583558,
"learning_rate": 1.0815394889642339e-07,
"loss": 0.0253,
"step": 950,
"ts_encoder_learning_rate": 1.0486332583853565e-07
},
{
"epoch": 2.503501112117967,
"grad_norm": 0.7791631897968705,
"learning_rate": 1.0486332583853565e-07,
"loss": 0.0177,
"step": 951,
"ts_encoder_learning_rate": 1.0162300788382263e-07
},
{
"epoch": 2.5061372435950244,
"grad_norm": 0.7718676787617951,
"learning_rate": 1.0162300788382263e-07,
"loss": 0.0242,
"step": 952,
"ts_encoder_learning_rate": 9.843302833156377e-08
},
{
"epoch": 2.5087733750720815,
"grad_norm": 0.9526114922481819,
"learning_rate": 9.843302833156377e-08,
"loss": 0.018,
"step": 953,
"ts_encoder_learning_rate": 9.529341996373675e-08
},
{
"epoch": 2.511409506549139,
"grad_norm": 0.6723748361084942,
"learning_rate": 9.529341996373675e-08,
"loss": 0.0142,
"step": 954,
"ts_encoder_learning_rate": 9.22042150446728e-08
},
{
"epoch": 2.5140456380261966,
"grad_norm": 0.640693460278807,
"learning_rate": 9.22042150446728e-08,
"loss": 0.0248,
"step": 955,
"ts_encoder_learning_rate": 8.916544532073413e-08
},
{
"epoch": 2.516681769503254,
"grad_norm": 0.7438871182485605,
"learning_rate": 8.916544532073413e-08,
"loss": 0.0176,
"step": 956,
"ts_encoder_learning_rate": 8.617714201998084e-08
},
{
"epoch": 2.5193179009803113,
"grad_norm": 0.6536893914893551,
"learning_rate": 8.617714201998084e-08,
"loss": 0.0197,
"step": 957,
"ts_encoder_learning_rate": 8.323933585185184e-08
},
{
"epoch": 2.521954032457369,
"grad_norm": 0.7966005611731805,
"learning_rate": 8.323933585185184e-08,
"loss": 0.0202,
"step": 958,
"ts_encoder_learning_rate": 8.035205700685167e-08
},
{
"epoch": 2.5245901639344264,
"grad_norm": 0.7751864100873821,
"learning_rate": 8.035205700685167e-08,
"loss": 0.0245,
"step": 959,
"ts_encoder_learning_rate": 7.7515335156238e-08
},
{
"epoch": 2.5272262954114835,
"grad_norm": 0.6397557109288652,
"learning_rate": 7.7515335156238e-08,
"loss": 0.0138,
"step": 960,
"ts_encoder_learning_rate": 7.47291994517163e-08
},
{
"epoch": 2.529862426888541,
"grad_norm": 0.9115949923033936,
"learning_rate": 7.47291994517163e-08,
"loss": 0.0278,
"step": 961,
"ts_encoder_learning_rate": 7.199367852514239e-08
},
{
"epoch": 2.5324985583655986,
"grad_norm": 0.6977436866064831,
"learning_rate": 7.199367852514239e-08,
"loss": 0.0169,
"step": 962,
"ts_encoder_learning_rate": 6.930880048822531e-08
},
{
"epoch": 2.535134689842656,
"grad_norm": 0.8119048152627732,
"learning_rate": 6.930880048822531e-08,
"loss": 0.0222,
"step": 963,
"ts_encoder_learning_rate": 6.667459293224155e-08
},
{
"epoch": 2.537770821319713,
"grad_norm": 0.6593343852854229,
"learning_rate": 6.667459293224155e-08,
"loss": 0.0226,
"step": 964,
"ts_encoder_learning_rate": 6.409108292774912e-08
},
{
"epoch": 2.5404069527967708,
"grad_norm": 0.7024263781864509,
"learning_rate": 6.409108292774912e-08,
"loss": 0.0175,
"step": 965,
"ts_encoder_learning_rate": 6.15582970243117e-08
},
{
"epoch": 2.5430430842738283,
"grad_norm": 0.5434330332434761,
"learning_rate": 6.15582970243117e-08,
"loss": 0.0229,
"step": 966,
"ts_encoder_learning_rate": 5.907626125022159e-08
},
{
"epoch": 2.5456792157508854,
"grad_norm": 0.9794479292209439,
"learning_rate": 5.907626125022159e-08,
"loss": 0.0244,
"step": 967,
"ts_encoder_learning_rate": 5.6645001112237694e-08
},
{
"epoch": 2.548315347227943,
"grad_norm": 0.9128017914715376,
"learning_rate": 5.6645001112237694e-08,
"loss": 0.0223,
"step": 968,
"ts_encoder_learning_rate": 5.426454159531913e-08
},
{
"epoch": 2.5509514787050005,
"grad_norm": 0.8322790378300886,
"learning_rate": 5.426454159531913e-08,
"loss": 0.0167,
"step": 969,
"ts_encoder_learning_rate": 5.1934907162370374e-08
},
{
"epoch": 2.553587610182058,
"grad_norm": 0.5828552348921294,
"learning_rate": 5.1934907162370374e-08,
"loss": 0.0233,
"step": 970,
"ts_encoder_learning_rate": 4.9656121753990924e-08
},
{
"epoch": 2.556223741659115,
"grad_norm": 0.6297264426484448,
"learning_rate": 4.9656121753990924e-08,
"loss": 0.0248,
"step": 971,
"ts_encoder_learning_rate": 4.742820878822496e-08
},
{
"epoch": 2.5588598731361727,
"grad_norm": 0.6449652253099856,
"learning_rate": 4.742820878822496e-08,
"loss": 0.018,
"step": 972,
"ts_encoder_learning_rate": 4.52511911603265e-08
},
{
"epoch": 2.5614960046132302,
"grad_norm": 0.8662936136802849,
"learning_rate": 4.52511911603265e-08,
"loss": 0.0207,
"step": 973,
"ts_encoder_learning_rate": 4.312509124251907e-08
},
{
"epoch": 2.5641321360902873,
"grad_norm": 0.8396191031005396,
"learning_rate": 4.312509124251907e-08,
"loss": 0.0229,
"step": 974,
"ts_encoder_learning_rate": 4.104993088376974e-08
},
{
"epoch": 2.566768267567345,
"grad_norm": 0.7510456306691026,
"learning_rate": 4.104993088376974e-08,
"loss": 0.0153,
"step": 975,
"ts_encoder_learning_rate": 3.902573140956101e-08
},
{
"epoch": 2.5694043990444024,
"grad_norm": 0.5430731666125107,
"learning_rate": 3.902573140956101e-08,
"loss": 0.0222,
"step": 976,
"ts_encoder_learning_rate": 3.705251362167484e-08
},
{
"epoch": 2.57204053052146,
"grad_norm": 0.750719791150226,
"learning_rate": 3.705251362167484e-08,
"loss": 0.0133,
"step": 977,
"ts_encoder_learning_rate": 3.513029779797783e-08
},
{
"epoch": 2.574676661998517,
"grad_norm": 0.6890160305022875,
"learning_rate": 3.513029779797783e-08,
"loss": 0.0226,
"step": 978,
"ts_encoder_learning_rate": 3.325910369220975e-08
},
{
"epoch": 2.5773127934755746,
"grad_norm": 0.640217064006493,
"learning_rate": 3.325910369220975e-08,
"loss": 0.0217,
"step": 979,
"ts_encoder_learning_rate": 3.143895053378698e-08
},
{
"epoch": 2.579948924952632,
"grad_norm": 0.650608671275551,
"learning_rate": 3.143895053378698e-08,
"loss": 0.021,
"step": 980,
"ts_encoder_learning_rate": 2.966985702759828e-08
},
{
"epoch": 2.5825850564296893,
"grad_norm": 1.4308555266835152,
"learning_rate": 2.966985702759828e-08,
"loss": 0.0281,
"step": 981,
"ts_encoder_learning_rate": 2.7951841353817676e-08
},
{
"epoch": 2.585221187906747,
"grad_norm": 1.0043277890456705,
"learning_rate": 2.7951841353817676e-08,
"loss": 0.0205,
"step": 982,
"ts_encoder_learning_rate": 2.6284921167712975e-08
},
{
"epoch": 2.5878573193838044,
"grad_norm": 0.7699218451759371,
"learning_rate": 2.6284921167712975e-08,
"loss": 0.0201,
"step": 983,
"ts_encoder_learning_rate": 2.4669113599469774e-08
},
{
"epoch": 2.590493450860862,
"grad_norm": 0.6897052459896869,
"learning_rate": 2.4669113599469774e-08,
"loss": 0.0242,
"step": 984,
"ts_encoder_learning_rate": 2.3104435254008852e-08
},
{
"epoch": 2.593129582337919,
"grad_norm": 0.7228194509828196,
"learning_rate": 2.3104435254008852e-08,
"loss": 0.0285,
"step": 985,
"ts_encoder_learning_rate": 2.159090221082294e-08
},
{
"epoch": 2.5957657138149766,
"grad_norm": 0.8174809900239196,
"learning_rate": 2.159090221082294e-08,
"loss": 0.0167,
"step": 986,
"ts_encoder_learning_rate": 2.012853002380466e-08
},
{
"epoch": 2.5984018452920337,
"grad_norm": 0.7653138858818684,
"learning_rate": 2.012853002380466e-08,
"loss": 0.0125,
"step": 987,
"ts_encoder_learning_rate": 1.8717333721091634e-08
},
{
"epoch": 2.601037976769091,
"grad_norm": 0.5150577808719591,
"learning_rate": 1.8717333721091634e-08,
"loss": 0.0201,
"step": 988,
"ts_encoder_learning_rate": 1.735732780490884e-08
},
{
"epoch": 2.6036741082461488,
"grad_norm": 0.7752673495066984,
"learning_rate": 1.735732780490884e-08,
"loss": 0.0122,
"step": 989,
"ts_encoder_learning_rate": 1.6048526251421502e-08
},
{
"epoch": 2.6063102397232063,
"grad_norm": 0.6444134054280553,
"learning_rate": 1.6048526251421502e-08,
"loss": 0.0202,
"step": 990,
"ts_encoder_learning_rate": 1.4790942510590767e-08
},
{
"epoch": 2.608946371200264,
"grad_norm": 0.6604248178517298,
"learning_rate": 1.4790942510590767e-08,
"loss": 0.0234,
"step": 991,
"ts_encoder_learning_rate": 1.3584589506034362e-08
},
{
"epoch": 2.611582502677321,
"grad_norm": 0.8166087014626134,
"learning_rate": 1.3584589506034362e-08,
"loss": 0.0212,
"step": 992,
"ts_encoder_learning_rate": 1.2429479634897268e-08
},
{
"epoch": 2.6142186341543785,
"grad_norm": 0.5979082769485419,
"learning_rate": 1.2429479634897268e-08,
"loss": 0.018,
"step": 993,
"ts_encoder_learning_rate": 1.132562476771959e-08
},
{
"epoch": 2.6168547656314356,
"grad_norm": 0.6136562468042444,
"learning_rate": 1.132562476771959e-08,
"loss": 0.026,
"step": 994,
"ts_encoder_learning_rate": 1.0273036248318325e-08
},
{
"epoch": 2.619490897108493,
"grad_norm": 0.738729241027843,
"learning_rate": 1.0273036248318325e-08,
"loss": 0.0182,
"step": 995,
"ts_encoder_learning_rate": 9.27172489366912e-09
},
{
"epoch": 2.6221270285855507,
"grad_norm": 0.6076766225952003,
"learning_rate": 9.27172489366912e-09,
"loss": 0.017,
"step": 996,
"ts_encoder_learning_rate": 8.321700993795812e-09
},
{
"epoch": 2.6247631600626082,
"grad_norm": 0.8828040407540239,
"learning_rate": 8.321700993795812e-09,
"loss": 0.0183,
"step": 997,
"ts_encoder_learning_rate": 7.422974311662723e-09
},
{
"epoch": 2.627399291539666,
"grad_norm": 0.6622087750202462,
"learning_rate": 7.422974311662723e-09,
"loss": 0.0174,
"step": 998,
"ts_encoder_learning_rate": 6.575554083078084e-09
},
{
"epoch": 2.630035423016723,
"grad_norm": 0.76863578860787,
"learning_rate": 6.575554083078084e-09,
"loss": 0.0191,
"step": 999,
"ts_encoder_learning_rate": 5.779449016595773e-09
},
{
"epoch": 2.6326715544937804,
"grad_norm": 0.633605481547031,
"learning_rate": 5.779449016595773e-09,
"loss": 0.0188,
"step": 1000,
"ts_encoder_learning_rate": 5.034667293427053e-09
},
{
"epoch": 2.6326715544937804,
"step": 1000,
"total_flos": 869424341942272.0,
"train_loss": 0.32716700187977404,
"train_runtime": 47494.4505,
"train_samples_per_second": 10.78,
"train_steps_per_second": 0.021,
"ts_encoder_learning_rate": 5.034667293427053e-09
}
],
"logging_steps": 1.0,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 869424341942272.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}