| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.6326715544937804, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002636131477057418, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 14.8018, | |
| "step": 1, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.005272262954114836, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 14.8252, | |
| "step": 2, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.007908394431172255, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 15.0107, | |
| "step": 3, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.010544525908229673, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 14.9011, | |
| "step": 4, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.01318065738528709, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 14.897, | |
| "step": 5, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.01581678886234451, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 14.9028, | |
| "step": 6, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.018452920339401928, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 14.7914, | |
| "step": 7, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.021089051816459346, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 15.1729, | |
| "step": 8, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.023725183293516763, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 14.7748, | |
| "step": 9, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.02636131477057418, | |
| "grad_norm": 1096.9997559030376, | |
| "learning_rate": 0.0, | |
| "loss": 14.735, | |
| "step": 10, | |
| "ts_encoder_learning_rate": 5.000000000000001e-07 | |
| }, | |
| { | |
| "epoch": 0.0289974462476316, | |
| "grad_norm": 1096.9997559030376, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 14.8943, | |
| "step": 11, | |
| "ts_encoder_learning_rate": 5.000000000000001e-07 | |
| }, | |
| { | |
| "epoch": 0.03163357772468902, | |
| "grad_norm": 1305.2661541342568, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 14.5478, | |
| "step": 12, | |
| "ts_encoder_learning_rate": 1.0000000000000002e-06 | |
| }, | |
| { | |
| "epoch": 0.034269709201746434, | |
| "grad_norm": 1279.5606119323502, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 14.542, | |
| "step": 13, | |
| "ts_encoder_learning_rate": 1.5e-06 | |
| }, | |
| { | |
| "epoch": 0.036905840678803856, | |
| "grad_norm": 1391.7635545286846, | |
| "learning_rate": 1.5e-06, | |
| "loss": 12.2412, | |
| "step": 14, | |
| "ts_encoder_learning_rate": 2.0000000000000003e-06 | |
| }, | |
| { | |
| "epoch": 0.03954197215586128, | |
| "grad_norm": 1848.3283721710281, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 7.6029, | |
| "step": 15, | |
| "ts_encoder_learning_rate": 2.5e-06 | |
| }, | |
| { | |
| "epoch": 0.04217810363291869, | |
| "grad_norm": 1848.3283721710281, | |
| "learning_rate": 2.5e-06, | |
| "loss": 4.8804, | |
| "step": 16, | |
| "ts_encoder_learning_rate": 2.5e-06 | |
| }, | |
| { | |
| "epoch": 0.04481423510997611, | |
| "grad_norm": 1759.395829823404, | |
| "learning_rate": 2.5e-06, | |
| "loss": 4.8283, | |
| "step": 17, | |
| "ts_encoder_learning_rate": 3e-06 | |
| }, | |
| { | |
| "epoch": 0.04745036658703353, | |
| "grad_norm": 1407.6124078931969, | |
| "learning_rate": 3e-06, | |
| "loss": 3.5724, | |
| "step": 18, | |
| "ts_encoder_learning_rate": 3.5e-06 | |
| }, | |
| { | |
| "epoch": 0.05008649806409095, | |
| "grad_norm": 372.4651747290722, | |
| "learning_rate": 3.5e-06, | |
| "loss": 1.9155, | |
| "step": 19, | |
| "ts_encoder_learning_rate": 4.000000000000001e-06 | |
| }, | |
| { | |
| "epoch": 0.05272262954114836, | |
| "grad_norm": 938.4571697479417, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.9811, | |
| "step": 20, | |
| "ts_encoder_learning_rate": 4.5e-06 | |
| }, | |
| { | |
| "epoch": 0.055358761018205783, | |
| "grad_norm": 544.7103035134295, | |
| "learning_rate": 4.5e-06, | |
| "loss": 1.7949, | |
| "step": 21, | |
| "ts_encoder_learning_rate": 5e-06 | |
| }, | |
| { | |
| "epoch": 0.0579948924952632, | |
| "grad_norm": 454.5593620001429, | |
| "learning_rate": 5e-06, | |
| "loss": 1.9186, | |
| "step": 22, | |
| "ts_encoder_learning_rate": 5.500000000000001e-06 | |
| }, | |
| { | |
| "epoch": 0.06063102397232062, | |
| "grad_norm": 1081.6948838381863, | |
| "learning_rate": 5.500000000000001e-06, | |
| "loss": 2.2253, | |
| "step": 23, | |
| "ts_encoder_learning_rate": 6e-06 | |
| }, | |
| { | |
| "epoch": 0.06326715544937804, | |
| "grad_norm": 614.5108980638121, | |
| "learning_rate": 6e-06, | |
| "loss": 2.0748, | |
| "step": 24, | |
| "ts_encoder_learning_rate": 6.5000000000000004e-06 | |
| }, | |
| { | |
| "epoch": 0.06590328692643546, | |
| "grad_norm": 510.8405053914244, | |
| "learning_rate": 6.5000000000000004e-06, | |
| "loss": 0.985, | |
| "step": 25, | |
| "ts_encoder_learning_rate": 7e-06 | |
| }, | |
| { | |
| "epoch": 0.06853941840349287, | |
| "grad_norm": 187.64105795677528, | |
| "learning_rate": 7e-06, | |
| "loss": 0.9204, | |
| "step": 26, | |
| "ts_encoder_learning_rate": 7.500000000000001e-06 | |
| }, | |
| { | |
| "epoch": 0.07117554988055029, | |
| "grad_norm": 239.94638266940726, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.7961, | |
| "step": 27, | |
| "ts_encoder_learning_rate": 8.000000000000001e-06 | |
| }, | |
| { | |
| "epoch": 0.07381168135760771, | |
| "grad_norm": 204.37461156136854, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.7757, | |
| "step": 28, | |
| "ts_encoder_learning_rate": 8.5e-06 | |
| }, | |
| { | |
| "epoch": 0.07644781283466513, | |
| "grad_norm": 394.9877265614201, | |
| "learning_rate": 8.5e-06, | |
| "loss": 0.8967, | |
| "step": 29, | |
| "ts_encoder_learning_rate": 9e-06 | |
| }, | |
| { | |
| "epoch": 0.07908394431172255, | |
| "grad_norm": 115.99747240190717, | |
| "learning_rate": 9e-06, | |
| "loss": 0.6932, | |
| "step": 30, | |
| "ts_encoder_learning_rate": 9.5e-06 | |
| }, | |
| { | |
| "epoch": 0.08172007578877996, | |
| "grad_norm": 203.24289413853845, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.6311, | |
| "step": 31, | |
| "ts_encoder_learning_rate": 1e-05 | |
| }, | |
| { | |
| "epoch": 0.08435620726583738, | |
| "grad_norm": 97.33610135135591, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5115, | |
| "step": 32, | |
| "ts_encoder_learning_rate": 9.999974308631955e-06 | |
| }, | |
| { | |
| "epoch": 0.0869923387428948, | |
| "grad_norm": 346.00707857529187, | |
| "learning_rate": 9.999974308631955e-06, | |
| "loss": 0.4777, | |
| "step": 33, | |
| "ts_encoder_learning_rate": 9.999897234791831e-06 | |
| }, | |
| { | |
| "epoch": 0.08962847021995222, | |
| "grad_norm": 29.349977310917716, | |
| "learning_rate": 9.999897234791831e-06, | |
| "loss": 0.4537, | |
| "step": 34, | |
| "ts_encoder_learning_rate": 9.999768779271687e-06 | |
| }, | |
| { | |
| "epoch": 0.09226460169700963, | |
| "grad_norm": 112.56606641016495, | |
| "learning_rate": 9.999768779271687e-06, | |
| "loss": 0.8332, | |
| "step": 35, | |
| "ts_encoder_learning_rate": 9.999588943391597e-06 | |
| }, | |
| { | |
| "epoch": 0.09490073317406705, | |
| "grad_norm": 465.069049932412, | |
| "learning_rate": 9.999588943391597e-06, | |
| "loss": 1.0421, | |
| "step": 36, | |
| "ts_encoder_learning_rate": 9.999357728999657e-06 | |
| }, | |
| { | |
| "epoch": 0.09753686465112447, | |
| "grad_norm": 158.97965025080174, | |
| "learning_rate": 9.999357728999657e-06, | |
| "loss": 0.5741, | |
| "step": 37, | |
| "ts_encoder_learning_rate": 9.99907513847195e-06 | |
| }, | |
| { | |
| "epoch": 0.1001729961281819, | |
| "grad_norm": 194.59482532664853, | |
| "learning_rate": 9.99907513847195e-06, | |
| "loss": 0.484, | |
| "step": 38, | |
| "ts_encoder_learning_rate": 9.998741174712534e-06 | |
| }, | |
| { | |
| "epoch": 0.10280912760523932, | |
| "grad_norm": 161.100341083291, | |
| "learning_rate": 9.998741174712534e-06, | |
| "loss": 0.5337, | |
| "step": 39, | |
| "ts_encoder_learning_rate": 9.9983558411534e-06 | |
| }, | |
| { | |
| "epoch": 0.10544525908229672, | |
| "grad_norm": 27.126218719305797, | |
| "learning_rate": 9.9983558411534e-06, | |
| "loss": 0.5538, | |
| "step": 40, | |
| "ts_encoder_learning_rate": 9.997919141754448e-06 | |
| }, | |
| { | |
| "epoch": 0.10808139055935415, | |
| "grad_norm": 33.98098658560328, | |
| "learning_rate": 9.997919141754448e-06, | |
| "loss": 0.7161, | |
| "step": 41, | |
| "ts_encoder_learning_rate": 9.99743108100344e-06 | |
| }, | |
| { | |
| "epoch": 0.11071752203641157, | |
| "grad_norm": 39.621572486406116, | |
| "learning_rate": 9.99743108100344e-06, | |
| "loss": 0.3756, | |
| "step": 42, | |
| "ts_encoder_learning_rate": 9.996891663915955e-06 | |
| }, | |
| { | |
| "epoch": 0.11335365351346899, | |
| "grad_norm": 14.92159451052064, | |
| "learning_rate": 9.996891663915955e-06, | |
| "loss": 0.4255, | |
| "step": 43, | |
| "ts_encoder_learning_rate": 9.99630089603534e-06 | |
| }, | |
| { | |
| "epoch": 0.1159897849905264, | |
| "grad_norm": 13.462409209351557, | |
| "learning_rate": 9.99630089603534e-06, | |
| "loss": 0.4359, | |
| "step": 44, | |
| "ts_encoder_learning_rate": 9.995658783432645e-06 | |
| }, | |
| { | |
| "epoch": 0.11862591646758382, | |
| "grad_norm": 5.901719710197823, | |
| "learning_rate": 9.995658783432645e-06, | |
| "loss": 0.3448, | |
| "step": 45, | |
| "ts_encoder_learning_rate": 9.994965332706574e-06 | |
| }, | |
| { | |
| "epoch": 0.12126204794464124, | |
| "grad_norm": 32.35793808957399, | |
| "learning_rate": 9.994965332706574e-06, | |
| "loss": 0.4025, | |
| "step": 46, | |
| "ts_encoder_learning_rate": 9.994220550983404e-06 | |
| }, | |
| { | |
| "epoch": 0.12389817942169866, | |
| "grad_norm": 31.71042211582521, | |
| "learning_rate": 9.994220550983404e-06, | |
| "loss": 0.5392, | |
| "step": 47, | |
| "ts_encoder_learning_rate": 9.993424445916923e-06 | |
| }, | |
| { | |
| "epoch": 0.12653431089875608, | |
| "grad_norm": 21.433144223039534, | |
| "learning_rate": 9.993424445916923e-06, | |
| "loss": 0.4416, | |
| "step": 48, | |
| "ts_encoder_learning_rate": 9.992577025688338e-06 | |
| }, | |
| { | |
| "epoch": 0.1291704423758135, | |
| "grad_norm": 9.093244564527424, | |
| "learning_rate": 9.992577025688338e-06, | |
| "loss": 0.3626, | |
| "step": 49, | |
| "ts_encoder_learning_rate": 9.991678299006206e-06 | |
| }, | |
| { | |
| "epoch": 0.13180657385287092, | |
| "grad_norm": 4.629496592285392, | |
| "learning_rate": 9.991678299006206e-06, | |
| "loss": 0.3829, | |
| "step": 50, | |
| "ts_encoder_learning_rate": 9.990728275106332e-06 | |
| }, | |
| { | |
| "epoch": 0.13444270532992833, | |
| "grad_norm": 4.275838877635983, | |
| "learning_rate": 9.990728275106332e-06, | |
| "loss": 0.3262, | |
| "step": 51, | |
| "ts_encoder_learning_rate": 9.989726963751683e-06 | |
| }, | |
| { | |
| "epoch": 0.13707883680698574, | |
| "grad_norm": 4.249381704965988, | |
| "learning_rate": 9.989726963751683e-06, | |
| "loss": 0.3252, | |
| "step": 52, | |
| "ts_encoder_learning_rate": 9.98867437523228e-06 | |
| }, | |
| { | |
| "epoch": 0.13971496828404317, | |
| "grad_norm": 6.056412755943698, | |
| "learning_rate": 9.98867437523228e-06, | |
| "loss": 0.276, | |
| "step": 53, | |
| "ts_encoder_learning_rate": 9.987570520365105e-06 | |
| }, | |
| { | |
| "epoch": 0.14235109976110058, | |
| "grad_norm": 5.208273616470221, | |
| "learning_rate": 9.987570520365105e-06, | |
| "loss": 0.2754, | |
| "step": 54, | |
| "ts_encoder_learning_rate": 9.986415410493966e-06 | |
| }, | |
| { | |
| "epoch": 0.14498723123815802, | |
| "grad_norm": 3.7353303435304244, | |
| "learning_rate": 9.986415410493966e-06, | |
| "loss": 0.2529, | |
| "step": 55, | |
| "ts_encoder_learning_rate": 9.98520905748941e-06 | |
| }, | |
| { | |
| "epoch": 0.14762336271521542, | |
| "grad_norm": 3.933015208783408, | |
| "learning_rate": 9.98520905748941e-06, | |
| "loss": 0.2508, | |
| "step": 56, | |
| "ts_encoder_learning_rate": 9.983951473748579e-06 | |
| }, | |
| { | |
| "epoch": 0.15025949419227283, | |
| "grad_norm": 5.206399280832945, | |
| "learning_rate": 9.983951473748579e-06, | |
| "loss": 0.2651, | |
| "step": 57, | |
| "ts_encoder_learning_rate": 9.982642672195093e-06 | |
| }, | |
| { | |
| "epoch": 0.15289562566933027, | |
| "grad_norm": 4.167850168478869, | |
| "learning_rate": 9.982642672195093e-06, | |
| "loss": 0.2868, | |
| "step": 58, | |
| "ts_encoder_learning_rate": 9.98128266627891e-06 | |
| }, | |
| { | |
| "epoch": 0.15553175714638767, | |
| "grad_norm": 9.226577568221924, | |
| "learning_rate": 9.98128266627891e-06, | |
| "loss": 0.2833, | |
| "step": 59, | |
| "ts_encoder_learning_rate": 9.979871469976197e-06 | |
| }, | |
| { | |
| "epoch": 0.1581678886234451, | |
| "grad_norm": 3.7123405016527316, | |
| "learning_rate": 9.979871469976197e-06, | |
| "loss": 0.2578, | |
| "step": 60, | |
| "ts_encoder_learning_rate": 9.978409097789178e-06 | |
| }, | |
| { | |
| "epoch": 0.16080402010050251, | |
| "grad_norm": 3.049493694179588, | |
| "learning_rate": 9.978409097789178e-06, | |
| "loss": 0.2509, | |
| "step": 61, | |
| "ts_encoder_learning_rate": 9.976895564745993e-06 | |
| }, | |
| { | |
| "epoch": 0.16344015157755992, | |
| "grad_norm": 2.8746718898314314, | |
| "learning_rate": 9.976895564745993e-06, | |
| "loss": 0.2367, | |
| "step": 62, | |
| "ts_encoder_learning_rate": 9.975330886400531e-06 | |
| }, | |
| { | |
| "epoch": 0.16607628305461736, | |
| "grad_norm": 2.1229971064439614, | |
| "learning_rate": 9.975330886400531e-06, | |
| "loss": 0.2456, | |
| "step": 63, | |
| "ts_encoder_learning_rate": 9.973715078832288e-06 | |
| }, | |
| { | |
| "epoch": 0.16871241453167476, | |
| "grad_norm": 1.9770119063232516, | |
| "learning_rate": 9.973715078832288e-06, | |
| "loss": 0.2251, | |
| "step": 64, | |
| "ts_encoder_learning_rate": 9.972048158646184e-06 | |
| }, | |
| { | |
| "epoch": 0.17134854600873217, | |
| "grad_norm": 2.5667195491487016, | |
| "learning_rate": 9.972048158646184e-06, | |
| "loss": 0.2445, | |
| "step": 65, | |
| "ts_encoder_learning_rate": 9.970330142972403e-06 | |
| }, | |
| { | |
| "epoch": 0.1739846774857896, | |
| "grad_norm": 2.1113696627510117, | |
| "learning_rate": 9.970330142972403e-06, | |
| "loss": 0.2574, | |
| "step": 66, | |
| "ts_encoder_learning_rate": 9.968561049466214e-06 | |
| }, | |
| { | |
| "epoch": 0.17662080896284701, | |
| "grad_norm": 1.6243156106778176, | |
| "learning_rate": 9.968561049466214e-06, | |
| "loss": 0.2303, | |
| "step": 67, | |
| "ts_encoder_learning_rate": 9.966740896307791e-06 | |
| }, | |
| { | |
| "epoch": 0.17925694043990445, | |
| "grad_norm": 1.6849608770961007, | |
| "learning_rate": 9.966740896307791e-06, | |
| "loss": 0.2243, | |
| "step": 68, | |
| "ts_encoder_learning_rate": 9.964869702202023e-06 | |
| }, | |
| { | |
| "epoch": 0.18189307191696186, | |
| "grad_norm": 3.92330386577651, | |
| "learning_rate": 9.964869702202023e-06, | |
| "loss": 0.2441, | |
| "step": 69, | |
| "ts_encoder_learning_rate": 9.962947486378325e-06 | |
| }, | |
| { | |
| "epoch": 0.18452920339401926, | |
| "grad_norm": 2.582255962327349, | |
| "learning_rate": 9.962947486378325e-06, | |
| "loss": 0.2398, | |
| "step": 70, | |
| "ts_encoder_learning_rate": 9.96097426859044e-06 | |
| }, | |
| { | |
| "epoch": 0.1871653348710767, | |
| "grad_norm": 1.7505976231929627, | |
| "learning_rate": 9.96097426859044e-06, | |
| "loss": 0.2189, | |
| "step": 71, | |
| "ts_encoder_learning_rate": 9.95895006911623e-06 | |
| }, | |
| { | |
| "epoch": 0.1898014663481341, | |
| "grad_norm": 2.1937879121282116, | |
| "learning_rate": 9.95895006911623e-06, | |
| "loss": 0.2301, | |
| "step": 72, | |
| "ts_encoder_learning_rate": 9.956874908757482e-06 | |
| }, | |
| { | |
| "epoch": 0.19243759782519154, | |
| "grad_norm": 2.1666041664824265, | |
| "learning_rate": 9.956874908757482e-06, | |
| "loss": 0.2432, | |
| "step": 73, | |
| "ts_encoder_learning_rate": 9.954748808839675e-06 | |
| }, | |
| { | |
| "epoch": 0.19507372930224895, | |
| "grad_norm": 1.7390080947257538, | |
| "learning_rate": 9.954748808839675e-06, | |
| "loss": 0.2475, | |
| "step": 74, | |
| "ts_encoder_learning_rate": 9.952571791211776e-06 | |
| }, | |
| { | |
| "epoch": 0.19770986077930636, | |
| "grad_norm": 1.8227633742041385, | |
| "learning_rate": 9.952571791211776e-06, | |
| "loss": 0.205, | |
| "step": 75, | |
| "ts_encoder_learning_rate": 9.950343878246011e-06 | |
| }, | |
| { | |
| "epoch": 0.2003459922563638, | |
| "grad_norm": 2.023155095367864, | |
| "learning_rate": 9.950343878246011e-06, | |
| "loss": 0.222, | |
| "step": 76, | |
| "ts_encoder_learning_rate": 9.948065092837631e-06 | |
| }, | |
| { | |
| "epoch": 0.2029821237334212, | |
| "grad_norm": 2.5393740081357885, | |
| "learning_rate": 9.948065092837631e-06, | |
| "loss": 0.2264, | |
| "step": 77, | |
| "ts_encoder_learning_rate": 9.945735458404681e-06 | |
| }, | |
| { | |
| "epoch": 0.20561825521047863, | |
| "grad_norm": 1.9070977453067859, | |
| "learning_rate": 9.945735458404681e-06, | |
| "loss": 0.1948, | |
| "step": 78, | |
| "ts_encoder_learning_rate": 9.943354998887763e-06 | |
| }, | |
| { | |
| "epoch": 0.20825438668753604, | |
| "grad_norm": 2.228467040055354, | |
| "learning_rate": 9.943354998887763e-06, | |
| "loss": 0.2352, | |
| "step": 79, | |
| "ts_encoder_learning_rate": 9.94092373874978e-06 | |
| }, | |
| { | |
| "epoch": 0.21089051816459345, | |
| "grad_norm": 2.1754655976070985, | |
| "learning_rate": 9.94092373874978e-06, | |
| "loss": 0.2051, | |
| "step": 80, | |
| "ts_encoder_learning_rate": 9.938441702975689e-06 | |
| }, | |
| { | |
| "epoch": 0.21352664964165088, | |
| "grad_norm": 1.5948561463002036, | |
| "learning_rate": 9.938441702975689e-06, | |
| "loss": 0.2104, | |
| "step": 81, | |
| "ts_encoder_learning_rate": 9.935908917072253e-06 | |
| }, | |
| { | |
| "epoch": 0.2161627811187083, | |
| "grad_norm": 2.9184103349665955, | |
| "learning_rate": 9.935908917072253e-06, | |
| "loss": 0.2214, | |
| "step": 82, | |
| "ts_encoder_learning_rate": 9.93332540706776e-06 | |
| }, | |
| { | |
| "epoch": 0.21879891259576573, | |
| "grad_norm": 2.533165757626651, | |
| "learning_rate": 9.93332540706776e-06, | |
| "loss": 0.2182, | |
| "step": 83, | |
| "ts_encoder_learning_rate": 9.930691199511775e-06 | |
| }, | |
| { | |
| "epoch": 0.22143504407282313, | |
| "grad_norm": 2.4172123332256072, | |
| "learning_rate": 9.930691199511775e-06, | |
| "loss": 0.2477, | |
| "step": 84, | |
| "ts_encoder_learning_rate": 9.928006321474859e-06 | |
| }, | |
| { | |
| "epoch": 0.22407117554988054, | |
| "grad_norm": 2.2355360356090865, | |
| "learning_rate": 9.928006321474859e-06, | |
| "loss": 0.2153, | |
| "step": 85, | |
| "ts_encoder_learning_rate": 9.925270800548285e-06 | |
| }, | |
| { | |
| "epoch": 0.22670730702693798, | |
| "grad_norm": 1.325879276978752, | |
| "learning_rate": 9.925270800548285e-06, | |
| "loss": 0.2106, | |
| "step": 86, | |
| "ts_encoder_learning_rate": 9.922484664843763e-06 | |
| }, | |
| { | |
| "epoch": 0.22934343850399538, | |
| "grad_norm": 2.105985586302988, | |
| "learning_rate": 9.922484664843763e-06, | |
| "loss": 0.2033, | |
| "step": 87, | |
| "ts_encoder_learning_rate": 9.91964794299315e-06 | |
| }, | |
| { | |
| "epoch": 0.2319795699810528, | |
| "grad_norm": 1.627701668717812, | |
| "learning_rate": 9.91964794299315e-06, | |
| "loss": 0.2063, | |
| "step": 88, | |
| "ts_encoder_learning_rate": 9.916760664148148e-06 | |
| }, | |
| { | |
| "epoch": 0.23461570145811023, | |
| "grad_norm": 1.7548289492746183, | |
| "learning_rate": 9.916760664148148e-06, | |
| "loss": 0.2023, | |
| "step": 89, | |
| "ts_encoder_learning_rate": 9.91382285798002e-06 | |
| }, | |
| { | |
| "epoch": 0.23725183293516763, | |
| "grad_norm": 1.2779758188322887, | |
| "learning_rate": 9.91382285798002e-06, | |
| "loss": 0.1859, | |
| "step": 90, | |
| "ts_encoder_learning_rate": 9.910834554679266e-06 | |
| }, | |
| { | |
| "epoch": 0.23988796441222507, | |
| "grad_norm": 1.4309994411495965, | |
| "learning_rate": 9.910834554679266e-06, | |
| "loss": 0.1985, | |
| "step": 91, | |
| "ts_encoder_learning_rate": 9.907795784955327e-06 | |
| }, | |
| { | |
| "epoch": 0.24252409588928248, | |
| "grad_norm": 1.578028044854179, | |
| "learning_rate": 9.907795784955327e-06, | |
| "loss": 0.197, | |
| "step": 92, | |
| "ts_encoder_learning_rate": 9.904706580036265e-06 | |
| }, | |
| { | |
| "epoch": 0.24516022736633988, | |
| "grad_norm": 1.4796574606576762, | |
| "learning_rate": 9.904706580036265e-06, | |
| "loss": 0.1819, | |
| "step": 93, | |
| "ts_encoder_learning_rate": 9.901566971668437e-06 | |
| }, | |
| { | |
| "epoch": 0.24779635884339732, | |
| "grad_norm": 1.4288017254755137, | |
| "learning_rate": 9.901566971668437e-06, | |
| "loss": 0.2007, | |
| "step": 94, | |
| "ts_encoder_learning_rate": 9.898376992116179e-06 | |
| }, | |
| { | |
| "epoch": 0.2504324903204547, | |
| "grad_norm": 2.1391069510229137, | |
| "learning_rate": 9.898376992116179e-06, | |
| "loss": 0.2147, | |
| "step": 95, | |
| "ts_encoder_learning_rate": 9.895136674161466e-06 | |
| }, | |
| { | |
| "epoch": 0.25306862179751216, | |
| "grad_norm": 1.4103238390166355, | |
| "learning_rate": 9.895136674161466e-06, | |
| "loss": 0.1678, | |
| "step": 96, | |
| "ts_encoder_learning_rate": 9.891846051103578e-06 | |
| }, | |
| { | |
| "epoch": 0.2557047532745696, | |
| "grad_norm": 1.7272702030863043, | |
| "learning_rate": 9.891846051103578e-06, | |
| "loss": 0.212, | |
| "step": 97, | |
| "ts_encoder_learning_rate": 9.888505156758758e-06 | |
| }, | |
| { | |
| "epoch": 0.258340884751627, | |
| "grad_norm": 1.465691780286817, | |
| "learning_rate": 9.888505156758758e-06, | |
| "loss": 0.1886, | |
| "step": 98, | |
| "ts_encoder_learning_rate": 9.885114025459865e-06 | |
| }, | |
| { | |
| "epoch": 0.2609770162286844, | |
| "grad_norm": 1.5753790137474581, | |
| "learning_rate": 9.885114025459865e-06, | |
| "loss": 0.1846, | |
| "step": 99, | |
| "ts_encoder_learning_rate": 9.881672692056022e-06 | |
| }, | |
| { | |
| "epoch": 0.26361314770574185, | |
| "grad_norm": 6.38876297290493, | |
| "learning_rate": 9.881672692056022e-06, | |
| "loss": 0.221, | |
| "step": 100, | |
| "ts_encoder_learning_rate": 9.878181191912251e-06 | |
| }, | |
| { | |
| "epoch": 0.2662492791827992, | |
| "grad_norm": 8.212765911278606, | |
| "learning_rate": 9.878181191912251e-06, | |
| "loss": 0.2088, | |
| "step": 101, | |
| "ts_encoder_learning_rate": 9.874639560909118e-06 | |
| }, | |
| { | |
| "epoch": 0.26888541065985666, | |
| "grad_norm": 97.72702166479309, | |
| "learning_rate": 9.874639560909118e-06, | |
| "loss": 0.2366, | |
| "step": 102, | |
| "ts_encoder_learning_rate": 9.871047835442365e-06 | |
| }, | |
| { | |
| "epoch": 0.2715215421369141, | |
| "grad_norm": 97.72702166479309, | |
| "learning_rate": 9.871047835442365e-06, | |
| "loss": 0.2448, | |
| "step": 103, | |
| "ts_encoder_learning_rate": 9.871047835442365e-06 | |
| }, | |
| { | |
| "epoch": 0.2741576736139715, | |
| "grad_norm": 52.034102007317564, | |
| "learning_rate": 9.871047835442365e-06, | |
| "loss": 0.2336, | |
| "step": 104, | |
| "ts_encoder_learning_rate": 9.867406052422525e-06 | |
| }, | |
| { | |
| "epoch": 0.2767938050910289, | |
| "grad_norm": 52.034102007317564, | |
| "learning_rate": 9.867406052422525e-06, | |
| "loss": 0.2643, | |
| "step": 105, | |
| "ts_encoder_learning_rate": 9.867406052422525e-06 | |
| }, | |
| { | |
| "epoch": 0.27942993656808635, | |
| "grad_norm": 52.034102007317564, | |
| "learning_rate": 9.867406052422525e-06, | |
| "loss": 0.2327, | |
| "step": 106, | |
| "ts_encoder_learning_rate": 9.867406052422525e-06 | |
| }, | |
| { | |
| "epoch": 0.2820660680451437, | |
| "grad_norm": 9.876750537498761, | |
| "learning_rate": 9.867406052422525e-06, | |
| "loss": 0.2537, | |
| "step": 107, | |
| "ts_encoder_learning_rate": 9.863714249274553e-06 | |
| }, | |
| { | |
| "epoch": 0.28470219952220116, | |
| "grad_norm": 10.163299154086943, | |
| "learning_rate": 9.863714249274553e-06, | |
| "loss": 0.2425, | |
| "step": 108, | |
| "ts_encoder_learning_rate": 9.85997246393744e-06 | |
| }, | |
| { | |
| "epoch": 0.2873383309992586, | |
| "grad_norm": 3.2455067886441973, | |
| "learning_rate": 9.85997246393744e-06, | |
| "loss": 0.1929, | |
| "step": 109, | |
| "ts_encoder_learning_rate": 9.85618073486382e-06 | |
| }, | |
| { | |
| "epoch": 0.28997446247631603, | |
| "grad_norm": 2.5488848745593913, | |
| "learning_rate": 9.85618073486382e-06, | |
| "loss": 0.2007, | |
| "step": 110, | |
| "ts_encoder_learning_rate": 9.852339101019574e-06 | |
| }, | |
| { | |
| "epoch": 0.2926105939533734, | |
| "grad_norm": 2.062458273578386, | |
| "learning_rate": 9.852339101019574e-06, | |
| "loss": 0.2245, | |
| "step": 111, | |
| "ts_encoder_learning_rate": 9.848447601883436e-06 | |
| }, | |
| { | |
| "epoch": 0.29524672543043085, | |
| "grad_norm": 1.8908032493798421, | |
| "learning_rate": 9.848447601883436e-06, | |
| "loss": 0.2017, | |
| "step": 112, | |
| "ts_encoder_learning_rate": 9.844506277446577e-06 | |
| }, | |
| { | |
| "epoch": 0.2978828569074883, | |
| "grad_norm": 1.8585429810855012, | |
| "learning_rate": 9.844506277446577e-06, | |
| "loss": 0.17, | |
| "step": 113, | |
| "ts_encoder_learning_rate": 9.840515168212208e-06 | |
| }, | |
| { | |
| "epoch": 0.30051898838454566, | |
| "grad_norm": 1.6115065133773239, | |
| "learning_rate": 9.840515168212208e-06, | |
| "loss": 0.2084, | |
| "step": 114, | |
| "ts_encoder_learning_rate": 9.836474315195148e-06 | |
| }, | |
| { | |
| "epoch": 0.3031551198616031, | |
| "grad_norm": 3.04800232147694, | |
| "learning_rate": 9.836474315195148e-06, | |
| "loss": 0.2319, | |
| "step": 115, | |
| "ts_encoder_learning_rate": 9.832383759921415e-06 | |
| }, | |
| { | |
| "epoch": 0.30579125133866053, | |
| "grad_norm": 3.3376308967003245, | |
| "learning_rate": 9.832383759921415e-06, | |
| "loss": 0.2059, | |
| "step": 116, | |
| "ts_encoder_learning_rate": 9.828243544427795e-06 | |
| }, | |
| { | |
| "epoch": 0.3084273828157179, | |
| "grad_norm": 1.847624347245679, | |
| "learning_rate": 9.828243544427795e-06, | |
| "loss": 0.1637, | |
| "step": 117, | |
| "ts_encoder_learning_rate": 9.824053711261405e-06 | |
| }, | |
| { | |
| "epoch": 0.31106351429277534, | |
| "grad_norm": 6.259456880309696, | |
| "learning_rate": 9.824053711261405e-06, | |
| "loss": 0.2051, | |
| "step": 118, | |
| "ts_encoder_learning_rate": 9.819814303479268e-06 | |
| }, | |
| { | |
| "epoch": 0.3136996457698328, | |
| "grad_norm": 1.248955065574141, | |
| "learning_rate": 9.819814303479268e-06, | |
| "loss": 0.1729, | |
| "step": 119, | |
| "ts_encoder_learning_rate": 9.815525364647853e-06 | |
| }, | |
| { | |
| "epoch": 0.3163357772468902, | |
| "grad_norm": 1.9069742114108161, | |
| "learning_rate": 9.815525364647853e-06, | |
| "loss": 0.1725, | |
| "step": 120, | |
| "ts_encoder_learning_rate": 9.811186938842645e-06 | |
| }, | |
| { | |
| "epoch": 0.3189719087239476, | |
| "grad_norm": 2.578824804080835, | |
| "learning_rate": 9.811186938842645e-06, | |
| "loss": 0.2071, | |
| "step": 121, | |
| "ts_encoder_learning_rate": 9.80679907064768e-06 | |
| }, | |
| { | |
| "epoch": 0.32160804020100503, | |
| "grad_norm": 1.4884102177576088, | |
| "learning_rate": 9.80679907064768e-06, | |
| "loss": 0.1971, | |
| "step": 122, | |
| "ts_encoder_learning_rate": 9.802361805155097e-06 | |
| }, | |
| { | |
| "epoch": 0.32424417167806246, | |
| "grad_norm": 3.950574735875806, | |
| "learning_rate": 9.802361805155097e-06, | |
| "loss": 0.2118, | |
| "step": 123, | |
| "ts_encoder_learning_rate": 9.797875187964661e-06 | |
| }, | |
| { | |
| "epoch": 0.32688030315511984, | |
| "grad_norm": 2.0658586146827194, | |
| "learning_rate": 9.797875187964661e-06, | |
| "loss": 0.1905, | |
| "step": 124, | |
| "ts_encoder_learning_rate": 9.793339265183303e-06 | |
| }, | |
| { | |
| "epoch": 0.3295164346321773, | |
| "grad_norm": 1.7249946266097584, | |
| "learning_rate": 9.793339265183303e-06, | |
| "loss": 0.1573, | |
| "step": 125, | |
| "ts_encoder_learning_rate": 9.788754083424654e-06 | |
| }, | |
| { | |
| "epoch": 0.3321525661092347, | |
| "grad_norm": 2.2599848390315667, | |
| "learning_rate": 9.788754083424654e-06, | |
| "loss": 0.1879, | |
| "step": 126, | |
| "ts_encoder_learning_rate": 9.784119689808545e-06 | |
| }, | |
| { | |
| "epoch": 0.3347886975862921, | |
| "grad_norm": 1.8125389329195718, | |
| "learning_rate": 9.784119689808545e-06, | |
| "loss": 0.2048, | |
| "step": 127, | |
| "ts_encoder_learning_rate": 9.779436131960544e-06 | |
| }, | |
| { | |
| "epoch": 0.33742482906334953, | |
| "grad_norm": 2.506219649349012, | |
| "learning_rate": 9.779436131960544e-06, | |
| "loss": 0.1869, | |
| "step": 128, | |
| "ts_encoder_learning_rate": 9.774703458011453e-06 | |
| }, | |
| { | |
| "epoch": 0.34006096054040696, | |
| "grad_norm": 1.8024855840730984, | |
| "learning_rate": 9.774703458011453e-06, | |
| "loss": 0.1962, | |
| "step": 129, | |
| "ts_encoder_learning_rate": 9.76992171659682e-06 | |
| }, | |
| { | |
| "epoch": 0.34269709201746434, | |
| "grad_norm": 1.59370844293676, | |
| "learning_rate": 9.76992171659682e-06, | |
| "loss": 0.1886, | |
| "step": 130, | |
| "ts_encoder_learning_rate": 9.765090956856437e-06 | |
| }, | |
| { | |
| "epoch": 0.3453332234945218, | |
| "grad_norm": 1.6796351392079987, | |
| "learning_rate": 9.765090956856437e-06, | |
| "loss": 0.1734, | |
| "step": 131, | |
| "ts_encoder_learning_rate": 9.760211228433834e-06 | |
| }, | |
| { | |
| "epoch": 0.3479693549715792, | |
| "grad_norm": 1.8622570561386254, | |
| "learning_rate": 9.760211228433834e-06, | |
| "loss": 0.2003, | |
| "step": 132, | |
| "ts_encoder_learning_rate": 9.755282581475769e-06 | |
| }, | |
| { | |
| "epoch": 0.35060548644863665, | |
| "grad_norm": 4.261853496235677, | |
| "learning_rate": 9.755282581475769e-06, | |
| "loss": 0.2152, | |
| "step": 133, | |
| "ts_encoder_learning_rate": 9.750305066631717e-06 | |
| }, | |
| { | |
| "epoch": 0.35324161792569403, | |
| "grad_norm": 2.084316819171521, | |
| "learning_rate": 9.750305066631717e-06, | |
| "loss": 0.204, | |
| "step": 134, | |
| "ts_encoder_learning_rate": 9.745278735053345e-06 | |
| }, | |
| { | |
| "epoch": 0.35587774940275146, | |
| "grad_norm": 2.1355217159376125, | |
| "learning_rate": 9.745278735053345e-06, | |
| "loss": 0.1812, | |
| "step": 135, | |
| "ts_encoder_learning_rate": 9.740203638393984e-06 | |
| }, | |
| { | |
| "epoch": 0.3585138808798089, | |
| "grad_norm": 2.1726864130161485, | |
| "learning_rate": 9.740203638393984e-06, | |
| "loss": 0.1741, | |
| "step": 136, | |
| "ts_encoder_learning_rate": 9.735079828808107e-06 | |
| }, | |
| { | |
| "epoch": 0.3611500123568663, | |
| "grad_norm": 1.82669888695553, | |
| "learning_rate": 9.735079828808107e-06, | |
| "loss": 0.1772, | |
| "step": 137, | |
| "ts_encoder_learning_rate": 9.729907358950785e-06 | |
| }, | |
| { | |
| "epoch": 0.3637861438339237, | |
| "grad_norm": 2.0888172211110647, | |
| "learning_rate": 9.729907358950785e-06, | |
| "loss": 0.1747, | |
| "step": 138, | |
| "ts_encoder_learning_rate": 9.724686281977146e-06 | |
| }, | |
| { | |
| "epoch": 0.36642227531098115, | |
| "grad_norm": 1.7793028446193322, | |
| "learning_rate": 9.724686281977146e-06, | |
| "loss": 0.1645, | |
| "step": 139, | |
| "ts_encoder_learning_rate": 9.719416651541839e-06 | |
| }, | |
| { | |
| "epoch": 0.36905840678803853, | |
| "grad_norm": 3.1339804740693697, | |
| "learning_rate": 9.719416651541839e-06, | |
| "loss": 0.2205, | |
| "step": 140, | |
| "ts_encoder_learning_rate": 9.714098521798466e-06 | |
| }, | |
| { | |
| "epoch": 0.37169453826509596, | |
| "grad_norm": 1.922012523700954, | |
| "learning_rate": 9.714098521798466e-06, | |
| "loss": 0.1922, | |
| "step": 141, | |
| "ts_encoder_learning_rate": 9.708731947399039e-06 | |
| }, | |
| { | |
| "epoch": 0.3743306697421534, | |
| "grad_norm": 2.111535338552465, | |
| "learning_rate": 9.708731947399039e-06, | |
| "loss": 0.2084, | |
| "step": 142, | |
| "ts_encoder_learning_rate": 9.703316983493414e-06 | |
| }, | |
| { | |
| "epoch": 0.37696680121921083, | |
| "grad_norm": 1.6443259615131731, | |
| "learning_rate": 9.703316983493414e-06, | |
| "loss": 0.1644, | |
| "step": 143, | |
| "ts_encoder_learning_rate": 9.697853685728721e-06 | |
| }, | |
| { | |
| "epoch": 0.3796029326962682, | |
| "grad_norm": 1.0337440158621962, | |
| "learning_rate": 9.697853685728721e-06, | |
| "loss": 0.175, | |
| "step": 144, | |
| "ts_encoder_learning_rate": 9.692342110248802e-06 | |
| }, | |
| { | |
| "epoch": 0.38223906417332565, | |
| "grad_norm": 2.6059449586383447, | |
| "learning_rate": 9.692342110248802e-06, | |
| "loss": 0.1749, | |
| "step": 145, | |
| "ts_encoder_learning_rate": 9.686782313693622e-06 | |
| }, | |
| { | |
| "epoch": 0.3848751956503831, | |
| "grad_norm": 1.7541337816760245, | |
| "learning_rate": 9.686782313693622e-06, | |
| "loss": 0.1814, | |
| "step": 146, | |
| "ts_encoder_learning_rate": 9.681174353198687e-06 | |
| }, | |
| { | |
| "epoch": 0.38751132712744046, | |
| "grad_norm": 1.9857610217097408, | |
| "learning_rate": 9.681174353198687e-06, | |
| "loss": 0.1817, | |
| "step": 147, | |
| "ts_encoder_learning_rate": 9.675518286394474e-06 | |
| }, | |
| { | |
| "epoch": 0.3901474586044979, | |
| "grad_norm": 2.015451167688765, | |
| "learning_rate": 9.675518286394474e-06, | |
| "loss": 0.1853, | |
| "step": 148, | |
| "ts_encoder_learning_rate": 9.669814171405818e-06 | |
| }, | |
| { | |
| "epoch": 0.39278359008155533, | |
| "grad_norm": 1.417012874189727, | |
| "learning_rate": 9.669814171405818e-06, | |
| "loss": 0.2065, | |
| "step": 149, | |
| "ts_encoder_learning_rate": 9.664062066851325e-06 | |
| }, | |
| { | |
| "epoch": 0.3954197215586127, | |
| "grad_norm": 2.8203561728923776, | |
| "learning_rate": 9.664062066851325e-06, | |
| "loss": 0.1729, | |
| "step": 150, | |
| "ts_encoder_learning_rate": 9.658262031842772e-06 | |
| }, | |
| { | |
| "epoch": 0.39805585303567015, | |
| "grad_norm": 2.068280212416892, | |
| "learning_rate": 9.658262031842772e-06, | |
| "loss": 0.1885, | |
| "step": 151, | |
| "ts_encoder_learning_rate": 9.65241412598449e-06 | |
| }, | |
| { | |
| "epoch": 0.4006919845127276, | |
| "grad_norm": 2.6419154731801933, | |
| "learning_rate": 9.65241412598449e-06, | |
| "loss": 0.1845, | |
| "step": 152, | |
| "ts_encoder_learning_rate": 9.64651840937276e-06 | |
| }, | |
| { | |
| "epoch": 0.40332811598978496, | |
| "grad_norm": 3.9825926281731068, | |
| "learning_rate": 9.64651840937276e-06, | |
| "loss": 0.1829, | |
| "step": 153, | |
| "ts_encoder_learning_rate": 9.640574942595195e-06 | |
| }, | |
| { | |
| "epoch": 0.4059642474668424, | |
| "grad_norm": 2.57569222575164, | |
| "learning_rate": 9.640574942595195e-06, | |
| "loss": 0.1749, | |
| "step": 154, | |
| "ts_encoder_learning_rate": 9.63458378673011e-06 | |
| }, | |
| { | |
| "epoch": 0.40860037894389983, | |
| "grad_norm": 2.1803645371235696, | |
| "learning_rate": 9.63458378673011e-06, | |
| "loss": 0.1854, | |
| "step": 155, | |
| "ts_encoder_learning_rate": 9.6285450033459e-06 | |
| }, | |
| { | |
| "epoch": 0.41123651042095727, | |
| "grad_norm": 4.412857746362803, | |
| "learning_rate": 9.6285450033459e-06, | |
| "loss": 0.1844, | |
| "step": 156, | |
| "ts_encoder_learning_rate": 9.622458654500408e-06 | |
| }, | |
| { | |
| "epoch": 0.41387264189801465, | |
| "grad_norm": 3.9208819464546725, | |
| "learning_rate": 9.622458654500408e-06, | |
| "loss": 0.1843, | |
| "step": 157, | |
| "ts_encoder_learning_rate": 9.616324802740287e-06 | |
| }, | |
| { | |
| "epoch": 0.4165087733750721, | |
| "grad_norm": 2.423222471726984, | |
| "learning_rate": 9.616324802740287e-06, | |
| "loss": 0.1552, | |
| "step": 158, | |
| "ts_encoder_learning_rate": 9.610143511100354e-06 | |
| }, | |
| { | |
| "epoch": 0.4191449048521295, | |
| "grad_norm": 1.7446627651403062, | |
| "learning_rate": 9.610143511100354e-06, | |
| "loss": 0.1536, | |
| "step": 159, | |
| "ts_encoder_learning_rate": 9.603914843102941e-06 | |
| }, | |
| { | |
| "epoch": 0.4217810363291869, | |
| "grad_norm": 3.9104938056757, | |
| "learning_rate": 9.603914843102941e-06, | |
| "loss": 0.1688, | |
| "step": 160, | |
| "ts_encoder_learning_rate": 9.597638862757255e-06 | |
| }, | |
| { | |
| "epoch": 0.42441716780624433, | |
| "grad_norm": 3.0669664248193835, | |
| "learning_rate": 9.597638862757255e-06, | |
| "loss": 0.1623, | |
| "step": 161, | |
| "ts_encoder_learning_rate": 9.591315634558698e-06 | |
| }, | |
| { | |
| "epoch": 0.42705329928330177, | |
| "grad_norm": 2.2417095321151446, | |
| "learning_rate": 9.591315634558698e-06, | |
| "loss": 0.1769, | |
| "step": 162, | |
| "ts_encoder_learning_rate": 9.584945223488227e-06 | |
| }, | |
| { | |
| "epoch": 0.42968943076035915, | |
| "grad_norm": 1.3116635599548383, | |
| "learning_rate": 9.584945223488227e-06, | |
| "loss": 0.1338, | |
| "step": 163, | |
| "ts_encoder_learning_rate": 9.57852769501167e-06 | |
| }, | |
| { | |
| "epoch": 0.4323255622374166, | |
| "grad_norm": 2.6909253272106, | |
| "learning_rate": 9.57852769501167e-06, | |
| "loss": 0.1705, | |
| "step": 164, | |
| "ts_encoder_learning_rate": 9.572063115079063e-06 | |
| }, | |
| { | |
| "epoch": 0.434961693714474, | |
| "grad_norm": 2.4035516559911456, | |
| "learning_rate": 9.572063115079063e-06, | |
| "loss": 0.1507, | |
| "step": 165, | |
| "ts_encoder_learning_rate": 9.565551550123967e-06 | |
| }, | |
| { | |
| "epoch": 0.43759782519153145, | |
| "grad_norm": 1.698615887190314, | |
| "learning_rate": 9.565551550123967e-06, | |
| "loss": 0.1584, | |
| "step": 166, | |
| "ts_encoder_learning_rate": 9.558993067062785e-06 | |
| }, | |
| { | |
| "epoch": 0.44023395666858883, | |
| "grad_norm": 1.5658851097960265, | |
| "learning_rate": 9.558993067062785e-06, | |
| "loss": 0.1444, | |
| "step": 167, | |
| "ts_encoder_learning_rate": 9.552387733294081e-06 | |
| }, | |
| { | |
| "epoch": 0.44287008814564627, | |
| "grad_norm": 1.428302430233315, | |
| "learning_rate": 9.552387733294081e-06, | |
| "loss": 0.1392, | |
| "step": 168, | |
| "ts_encoder_learning_rate": 9.545735616697875e-06 | |
| }, | |
| { | |
| "epoch": 0.4455062196227037, | |
| "grad_norm": 1.8871291912109978, | |
| "learning_rate": 9.545735616697875e-06, | |
| "loss": 0.163, | |
| "step": 169, | |
| "ts_encoder_learning_rate": 9.539036785634961e-06 | |
| }, | |
| { | |
| "epoch": 0.4481423510997611, | |
| "grad_norm": 1.4930705735785357, | |
| "learning_rate": 9.539036785634961e-06, | |
| "loss": 0.1189, | |
| "step": 170, | |
| "ts_encoder_learning_rate": 9.532291308946191e-06 | |
| }, | |
| { | |
| "epoch": 0.4507784825768185, | |
| "grad_norm": 2.2958980821132733, | |
| "learning_rate": 9.532291308946191e-06, | |
| "loss": 0.1475, | |
| "step": 171, | |
| "ts_encoder_learning_rate": 9.525499255951775e-06 | |
| }, | |
| { | |
| "epoch": 0.45341461405387595, | |
| "grad_norm": 2.292110960348305, | |
| "learning_rate": 9.525499255951775e-06, | |
| "loss": 0.1464, | |
| "step": 172, | |
| "ts_encoder_learning_rate": 9.518660696450567e-06 | |
| }, | |
| { | |
| "epoch": 0.45605074553093333, | |
| "grad_norm": 1.5583683866727895, | |
| "learning_rate": 9.518660696450567e-06, | |
| "loss": 0.1637, | |
| "step": 173, | |
| "ts_encoder_learning_rate": 9.511775700719347e-06 | |
| }, | |
| { | |
| "epoch": 0.45868687700799077, | |
| "grad_norm": 2.293732030943085, | |
| "learning_rate": 9.511775700719347e-06, | |
| "loss": 0.1425, | |
| "step": 174, | |
| "ts_encoder_learning_rate": 9.504844339512096e-06 | |
| }, | |
| { | |
| "epoch": 0.4613230084850482, | |
| "grad_norm": 2.378677779357337, | |
| "learning_rate": 9.504844339512096e-06, | |
| "loss": 0.1706, | |
| "step": 175, | |
| "ts_encoder_learning_rate": 9.497866684059278e-06 | |
| }, | |
| { | |
| "epoch": 0.4639591399621056, | |
| "grad_norm": 1.545023475236903, | |
| "learning_rate": 9.497866684059278e-06, | |
| "loss": 0.139, | |
| "step": 176, | |
| "ts_encoder_learning_rate": 9.490842806067095e-06 | |
| }, | |
| { | |
| "epoch": 0.466595271439163, | |
| "grad_norm": 3.000973914853472, | |
| "learning_rate": 9.490842806067095e-06, | |
| "loss": 0.1596, | |
| "step": 177, | |
| "ts_encoder_learning_rate": 9.483772777716767e-06 | |
| }, | |
| { | |
| "epoch": 0.46923140291622045, | |
| "grad_norm": 3.558064259164343, | |
| "learning_rate": 9.483772777716767e-06, | |
| "loss": 0.1806, | |
| "step": 178, | |
| "ts_encoder_learning_rate": 9.476656671663766e-06 | |
| }, | |
| { | |
| "epoch": 0.4718675343932779, | |
| "grad_norm": 2.3771476346252287, | |
| "learning_rate": 9.476656671663766e-06, | |
| "loss": 0.1517, | |
| "step": 179, | |
| "ts_encoder_learning_rate": 9.469494561037097e-06 | |
| }, | |
| { | |
| "epoch": 0.47450366587033527, | |
| "grad_norm": 1.3953249248383899, | |
| "learning_rate": 9.469494561037097e-06, | |
| "loss": 0.1504, | |
| "step": 180, | |
| "ts_encoder_learning_rate": 9.462286519438531e-06 | |
| }, | |
| { | |
| "epoch": 0.4771397973473927, | |
| "grad_norm": 3.0894973811179613, | |
| "learning_rate": 9.462286519438531e-06, | |
| "loss": 0.1471, | |
| "step": 181, | |
| "ts_encoder_learning_rate": 9.45503262094184e-06 | |
| }, | |
| { | |
| "epoch": 0.47977592882445014, | |
| "grad_norm": 3.447964797634623, | |
| "learning_rate": 9.45503262094184e-06, | |
| "loss": 0.1729, | |
| "step": 182, | |
| "ts_encoder_learning_rate": 9.44773294009206e-06 | |
| }, | |
| { | |
| "epoch": 0.4824120603015075, | |
| "grad_norm": 1.3569301564256495, | |
| "learning_rate": 9.44773294009206e-06, | |
| "loss": 0.1377, | |
| "step": 183, | |
| "ts_encoder_learning_rate": 9.440387551904705e-06 | |
| }, | |
| { | |
| "epoch": 0.48504819177856495, | |
| "grad_norm": 1.299399066206849, | |
| "learning_rate": 9.440387551904705e-06, | |
| "loss": 0.1474, | |
| "step": 184, | |
| "ts_encoder_learning_rate": 9.432996531865001e-06 | |
| }, | |
| { | |
| "epoch": 0.4876843232556224, | |
| "grad_norm": 2.3518650616525925, | |
| "learning_rate": 9.432996531865001e-06, | |
| "loss": 0.1365, | |
| "step": 185, | |
| "ts_encoder_learning_rate": 9.425559955927118e-06 | |
| }, | |
| { | |
| "epoch": 0.49032045473267977, | |
| "grad_norm": 2.444972697601273, | |
| "learning_rate": 9.425559955927118e-06, | |
| "loss": 0.1771, | |
| "step": 186, | |
| "ts_encoder_learning_rate": 9.418077900513377e-06 | |
| }, | |
| { | |
| "epoch": 0.4929565862097372, | |
| "grad_norm": 1.7782371185002437, | |
| "learning_rate": 9.418077900513377e-06, | |
| "loss": 0.1373, | |
| "step": 187, | |
| "ts_encoder_learning_rate": 9.410550442513475e-06 | |
| }, | |
| { | |
| "epoch": 0.49559271768679464, | |
| "grad_norm": 1.6778989547499836, | |
| "learning_rate": 9.410550442513475e-06, | |
| "loss": 0.1565, | |
| "step": 188, | |
| "ts_encoder_learning_rate": 9.40297765928369e-06 | |
| }, | |
| { | |
| "epoch": 0.49822884916385207, | |
| "grad_norm": 2.155563042258836, | |
| "learning_rate": 9.40297765928369e-06, | |
| "loss": 0.153, | |
| "step": 189, | |
| "ts_encoder_learning_rate": 9.395359628646087e-06 | |
| }, | |
| { | |
| "epoch": 0.5008649806409095, | |
| "grad_norm": 1.295519636961886, | |
| "learning_rate": 9.395359628646087e-06, | |
| "loss": 0.129, | |
| "step": 190, | |
| "ts_encoder_learning_rate": 9.387696428887715e-06 | |
| }, | |
| { | |
| "epoch": 0.5035011121179669, | |
| "grad_norm": 1.9350288061910503, | |
| "learning_rate": 9.387696428887715e-06, | |
| "loss": 0.1397, | |
| "step": 191, | |
| "ts_encoder_learning_rate": 9.37998813875981e-06 | |
| }, | |
| { | |
| "epoch": 0.5061372435950243, | |
| "grad_norm": 1.4020631402932546, | |
| "learning_rate": 9.37998813875981e-06, | |
| "loss": 0.1496, | |
| "step": 192, | |
| "ts_encoder_learning_rate": 9.372234837476979e-06 | |
| }, | |
| { | |
| "epoch": 0.5087733750720818, | |
| "grad_norm": 1.6083327990489644, | |
| "learning_rate": 9.372234837476979e-06, | |
| "loss": 0.1435, | |
| "step": 193, | |
| "ts_encoder_learning_rate": 9.364436604716389e-06 | |
| }, | |
| { | |
| "epoch": 0.5114095065491392, | |
| "grad_norm": 1.6706875142270174, | |
| "learning_rate": 9.364436604716389e-06, | |
| "loss": 0.1465, | |
| "step": 194, | |
| "ts_encoder_learning_rate": 9.356593520616948e-06 | |
| }, | |
| { | |
| "epoch": 0.5140456380261965, | |
| "grad_norm": 1.2317832812802163, | |
| "learning_rate": 9.356593520616948e-06, | |
| "loss": 0.1242, | |
| "step": 195, | |
| "ts_encoder_learning_rate": 9.348705665778479e-06 | |
| }, | |
| { | |
| "epoch": 0.516681769503254, | |
| "grad_norm": 2.2546259482178415, | |
| "learning_rate": 9.348705665778479e-06, | |
| "loss": 0.1446, | |
| "step": 196, | |
| "ts_encoder_learning_rate": 9.340773121260893e-06 | |
| }, | |
| { | |
| "epoch": 0.5193179009803114, | |
| "grad_norm": 2.4160528757749202, | |
| "learning_rate": 9.340773121260893e-06, | |
| "loss": 0.1426, | |
| "step": 197, | |
| "ts_encoder_learning_rate": 9.33279596858336e-06 | |
| }, | |
| { | |
| "epoch": 0.5219540324573688, | |
| "grad_norm": 2.9782825880249475, | |
| "learning_rate": 9.33279596858336e-06, | |
| "loss": 0.1581, | |
| "step": 198, | |
| "ts_encoder_learning_rate": 9.324774289723469e-06 | |
| }, | |
| { | |
| "epoch": 0.5245901639344263, | |
| "grad_norm": 2.2270014484031537, | |
| "learning_rate": 9.324774289723469e-06, | |
| "loss": 0.1456, | |
| "step": 199, | |
| "ts_encoder_learning_rate": 9.316708167116377e-06 | |
| }, | |
| { | |
| "epoch": 0.5272262954114837, | |
| "grad_norm": 2.254839404791111, | |
| "learning_rate": 9.316708167116377e-06, | |
| "loss": 0.1719, | |
| "step": 200, | |
| "ts_encoder_learning_rate": 9.308597683653976e-06 | |
| }, | |
| { | |
| "epoch": 0.529862426888541, | |
| "grad_norm": 2.1147336782126906, | |
| "learning_rate": 9.308597683653976e-06, | |
| "loss": 0.1412, | |
| "step": 201, | |
| "ts_encoder_learning_rate": 9.300442922684033e-06 | |
| }, | |
| { | |
| "epoch": 0.5324985583655985, | |
| "grad_norm": 3.403863020435584, | |
| "learning_rate": 9.300442922684033e-06, | |
| "loss": 0.159, | |
| "step": 202, | |
| "ts_encoder_learning_rate": 9.292243968009332e-06 | |
| }, | |
| { | |
| "epoch": 0.5351346898426559, | |
| "grad_norm": 2.1840583753378704, | |
| "learning_rate": 9.292243968009332e-06, | |
| "loss": 0.153, | |
| "step": 203, | |
| "ts_encoder_learning_rate": 9.284000903886818e-06 | |
| }, | |
| { | |
| "epoch": 0.5377708213197133, | |
| "grad_norm": 2.756745654144269, | |
| "learning_rate": 9.284000903886818e-06, | |
| "loss": 0.1496, | |
| "step": 204, | |
| "ts_encoder_learning_rate": 9.275713815026732e-06 | |
| }, | |
| { | |
| "epoch": 0.5404069527967708, | |
| "grad_norm": 1.562920226633681, | |
| "learning_rate": 9.275713815026732e-06, | |
| "loss": 0.1444, | |
| "step": 205, | |
| "ts_encoder_learning_rate": 9.26738278659173e-06 | |
| }, | |
| { | |
| "epoch": 0.5430430842738282, | |
| "grad_norm": 1.8988550139932143, | |
| "learning_rate": 9.26738278659173e-06, | |
| "loss": 0.1327, | |
| "step": 206, | |
| "ts_encoder_learning_rate": 9.259007904196023e-06 | |
| }, | |
| { | |
| "epoch": 0.5456792157508856, | |
| "grad_norm": 1.804458613750819, | |
| "learning_rate": 9.259007904196023e-06, | |
| "loss": 0.1502, | |
| "step": 207, | |
| "ts_encoder_learning_rate": 9.250589253904481e-06 | |
| }, | |
| { | |
| "epoch": 0.548315347227943, | |
| "grad_norm": 1.8650637771648768, | |
| "learning_rate": 9.250589253904481e-06, | |
| "loss": 0.1473, | |
| "step": 208, | |
| "ts_encoder_learning_rate": 9.242126922231763e-06 | |
| }, | |
| { | |
| "epoch": 0.5509514787050004, | |
| "grad_norm": 2.051385105386284, | |
| "learning_rate": 9.242126922231763e-06, | |
| "loss": 0.1658, | |
| "step": 209, | |
| "ts_encoder_learning_rate": 9.233620996141421e-06 | |
| }, | |
| { | |
| "epoch": 0.5535876101820578, | |
| "grad_norm": 2.3089156973651463, | |
| "learning_rate": 9.233620996141421e-06, | |
| "loss": 0.145, | |
| "step": 210, | |
| "ts_encoder_learning_rate": 9.225071563045007e-06 | |
| }, | |
| { | |
| "epoch": 0.5562237416591153, | |
| "grad_norm": 2.3126324557088265, | |
| "learning_rate": 9.225071563045007e-06, | |
| "loss": 0.1503, | |
| "step": 211, | |
| "ts_encoder_learning_rate": 9.216478710801171e-06 | |
| }, | |
| { | |
| "epoch": 0.5588598731361727, | |
| "grad_norm": 2.0335782421296047, | |
| "learning_rate": 9.216478710801171e-06, | |
| "loss": 0.1254, | |
| "step": 212, | |
| "ts_encoder_learning_rate": 9.207842527714767e-06 | |
| }, | |
| { | |
| "epoch": 0.5614960046132301, | |
| "grad_norm": 2.959817313362635, | |
| "learning_rate": 9.207842527714767e-06, | |
| "loss": 0.1344, | |
| "step": 213, | |
| "ts_encoder_learning_rate": 9.199163102535937e-06 | |
| }, | |
| { | |
| "epoch": 0.5641321360902875, | |
| "grad_norm": 2.401709370382837, | |
| "learning_rate": 9.199163102535937e-06, | |
| "loss": 0.1235, | |
| "step": 214, | |
| "ts_encoder_learning_rate": 9.190440524459203e-06 | |
| }, | |
| { | |
| "epoch": 0.5667682675673449, | |
| "grad_norm": 2.4698482564816437, | |
| "learning_rate": 9.190440524459203e-06, | |
| "loss": 0.1652, | |
| "step": 215, | |
| "ts_encoder_learning_rate": 9.181674883122554e-06 | |
| }, | |
| { | |
| "epoch": 0.5694043990444023, | |
| "grad_norm": 2.174645988827366, | |
| "learning_rate": 9.181674883122554e-06, | |
| "loss": 0.1191, | |
| "step": 216, | |
| "ts_encoder_learning_rate": 9.172866268606514e-06 | |
| }, | |
| { | |
| "epoch": 0.5720405305214598, | |
| "grad_norm": 1.4935255107278584, | |
| "learning_rate": 9.172866268606514e-06, | |
| "loss": 0.1348, | |
| "step": 217, | |
| "ts_encoder_learning_rate": 9.164014771433228e-06 | |
| }, | |
| { | |
| "epoch": 0.5746766619985172, | |
| "grad_norm": 2.261714559414658, | |
| "learning_rate": 9.164014771433228e-06, | |
| "loss": 0.1393, | |
| "step": 218, | |
| "ts_encoder_learning_rate": 9.15512048256552e-06 | |
| }, | |
| { | |
| "epoch": 0.5773127934755746, | |
| "grad_norm": 1.9389125772079525, | |
| "learning_rate": 9.15512048256552e-06, | |
| "loss": 0.1159, | |
| "step": 219, | |
| "ts_encoder_learning_rate": 9.146183493405976e-06 | |
| }, | |
| { | |
| "epoch": 0.5799489249526321, | |
| "grad_norm": 2.2307885757186376, | |
| "learning_rate": 9.146183493405976e-06, | |
| "loss": 0.1197, | |
| "step": 220, | |
| "ts_encoder_learning_rate": 9.137203895795983e-06 | |
| }, | |
| { | |
| "epoch": 0.5825850564296894, | |
| "grad_norm": 2.2631154256287784, | |
| "learning_rate": 9.137203895795983e-06, | |
| "loss": 0.1297, | |
| "step": 221, | |
| "ts_encoder_learning_rate": 9.128181782014801e-06 | |
| }, | |
| { | |
| "epoch": 0.5852211879067468, | |
| "grad_norm": 1.8377593809270902, | |
| "learning_rate": 9.128181782014801e-06, | |
| "loss": 0.1375, | |
| "step": 222, | |
| "ts_encoder_learning_rate": 9.119117244778609e-06 | |
| }, | |
| { | |
| "epoch": 0.5878573193838043, | |
| "grad_norm": 1.941575989624506, | |
| "learning_rate": 9.119117244778609e-06, | |
| "loss": 0.1381, | |
| "step": 223, | |
| "ts_encoder_learning_rate": 9.110010377239552e-06 | |
| }, | |
| { | |
| "epoch": 0.5904934508608617, | |
| "grad_norm": 2.0700642084907797, | |
| "learning_rate": 9.110010377239552e-06, | |
| "loss": 0.1215, | |
| "step": 224, | |
| "ts_encoder_learning_rate": 9.10086127298478e-06 | |
| }, | |
| { | |
| "epoch": 0.5931295823379191, | |
| "grad_norm": 2.175877796189538, | |
| "learning_rate": 9.10086127298478e-06, | |
| "loss": 0.1274, | |
| "step": 225, | |
| "ts_encoder_learning_rate": 9.0916700260355e-06 | |
| }, | |
| { | |
| "epoch": 0.5957657138149766, | |
| "grad_norm": 2.7088680551627444, | |
| "learning_rate": 9.0916700260355e-06, | |
| "loss": 0.1546, | |
| "step": 226, | |
| "ts_encoder_learning_rate": 9.082436730845993e-06 | |
| }, | |
| { | |
| "epoch": 0.5984018452920339, | |
| "grad_norm": 2.2127473631725634, | |
| "learning_rate": 9.082436730845993e-06, | |
| "loss": 0.1456, | |
| "step": 227, | |
| "ts_encoder_learning_rate": 9.073161482302656e-06 | |
| }, | |
| { | |
| "epoch": 0.6010379767690913, | |
| "grad_norm": 2.2852943362812947, | |
| "learning_rate": 9.073161482302656e-06, | |
| "loss": 0.1133, | |
| "step": 228, | |
| "ts_encoder_learning_rate": 9.063844375723014e-06 | |
| }, | |
| { | |
| "epoch": 0.6036741082461488, | |
| "grad_norm": 2.022226024724427, | |
| "learning_rate": 9.063844375723014e-06, | |
| "loss": 0.147, | |
| "step": 229, | |
| "ts_encoder_learning_rate": 9.054485506854756e-06 | |
| }, | |
| { | |
| "epoch": 0.6063102397232062, | |
| "grad_norm": 1.4127928730528008, | |
| "learning_rate": 9.054485506854756e-06, | |
| "loss": 0.1399, | |
| "step": 230, | |
| "ts_encoder_learning_rate": 9.045084971874738e-06 | |
| }, | |
| { | |
| "epoch": 0.6089463712002636, | |
| "grad_norm": 2.3124067231050796, | |
| "learning_rate": 9.045084971874738e-06, | |
| "loss": 0.166, | |
| "step": 231, | |
| "ts_encoder_learning_rate": 9.035642867388003e-06 | |
| }, | |
| { | |
| "epoch": 0.6115825026773211, | |
| "grad_norm": 2.9417960489910557, | |
| "learning_rate": 9.035642867388003e-06, | |
| "loss": 0.1336, | |
| "step": 232, | |
| "ts_encoder_learning_rate": 9.026159290426782e-06 | |
| }, | |
| { | |
| "epoch": 0.6142186341543785, | |
| "grad_norm": 2.037375696729251, | |
| "learning_rate": 9.026159290426782e-06, | |
| "loss": 0.1226, | |
| "step": 233, | |
| "ts_encoder_learning_rate": 9.016634338449504e-06 | |
| }, | |
| { | |
| "epoch": 0.6168547656314358, | |
| "grad_norm": 2.7178140969916478, | |
| "learning_rate": 9.016634338449504e-06, | |
| "loss": 0.1405, | |
| "step": 234, | |
| "ts_encoder_learning_rate": 9.007068109339783e-06 | |
| }, | |
| { | |
| "epoch": 0.6194908971084933, | |
| "grad_norm": 2.4572769251627804, | |
| "learning_rate": 9.007068109339783e-06, | |
| "loss": 0.1296, | |
| "step": 235, | |
| "ts_encoder_learning_rate": 8.997460701405431e-06 | |
| }, | |
| { | |
| "epoch": 0.6221270285855507, | |
| "grad_norm": 1.7074800943778181, | |
| "learning_rate": 8.997460701405431e-06, | |
| "loss": 0.1076, | |
| "step": 236, | |
| "ts_encoder_learning_rate": 8.987812213377423e-06 | |
| }, | |
| { | |
| "epoch": 0.6247631600626081, | |
| "grad_norm": 1.3569057680195367, | |
| "learning_rate": 8.987812213377423e-06, | |
| "loss": 0.1307, | |
| "step": 237, | |
| "ts_encoder_learning_rate": 8.978122744408905e-06 | |
| }, | |
| { | |
| "epoch": 0.6273992915396656, | |
| "grad_norm": 2.2197611340479333, | |
| "learning_rate": 8.978122744408905e-06, | |
| "loss": 0.1255, | |
| "step": 238, | |
| "ts_encoder_learning_rate": 8.968392394074164e-06 | |
| }, | |
| { | |
| "epoch": 0.630035423016723, | |
| "grad_norm": 2.80909657385505, | |
| "learning_rate": 8.968392394074164e-06, | |
| "loss": 0.1311, | |
| "step": 239, | |
| "ts_encoder_learning_rate": 8.9586212623676e-06 | |
| }, | |
| { | |
| "epoch": 0.6326715544937804, | |
| "grad_norm": 1.596591103789994, | |
| "learning_rate": 8.9586212623676e-06, | |
| "loss": 0.1247, | |
| "step": 240, | |
| "ts_encoder_learning_rate": 8.948809449702712e-06 | |
| }, | |
| { | |
| "epoch": 0.6353076859708378, | |
| "grad_norm": 2.255624101113016, | |
| "learning_rate": 8.948809449702712e-06, | |
| "loss": 0.1125, | |
| "step": 241, | |
| "ts_encoder_learning_rate": 8.938957056911057e-06 | |
| }, | |
| { | |
| "epoch": 0.6379438174478952, | |
| "grad_norm": 1.4404926531170739, | |
| "learning_rate": 8.938957056911057e-06, | |
| "loss": 0.1156, | |
| "step": 242, | |
| "ts_encoder_learning_rate": 8.929064185241214e-06 | |
| }, | |
| { | |
| "epoch": 0.6405799489249526, | |
| "grad_norm": 2.0628024401597465, | |
| "learning_rate": 8.929064185241214e-06, | |
| "loss": 0.137, | |
| "step": 243, | |
| "ts_encoder_learning_rate": 8.919130936357743e-06 | |
| }, | |
| { | |
| "epoch": 0.6432160804020101, | |
| "grad_norm": 1.7372644218298394, | |
| "learning_rate": 8.919130936357743e-06, | |
| "loss": 0.111, | |
| "step": 244, | |
| "ts_encoder_learning_rate": 8.90915741234015e-06 | |
| }, | |
| { | |
| "epoch": 0.6458522118790675, | |
| "grad_norm": 1.627934422877123, | |
| "learning_rate": 8.90915741234015e-06, | |
| "loss": 0.1334, | |
| "step": 245, | |
| "ts_encoder_learning_rate": 8.899143715681822e-06 | |
| }, | |
| { | |
| "epoch": 0.6484883433561249, | |
| "grad_norm": 1.8556278497877248, | |
| "learning_rate": 8.899143715681822e-06, | |
| "loss": 0.1219, | |
| "step": 246, | |
| "ts_encoder_learning_rate": 8.889089949288986e-06 | |
| }, | |
| { | |
| "epoch": 0.6511244748331823, | |
| "grad_norm": 3.1526166628811603, | |
| "learning_rate": 8.889089949288986e-06, | |
| "loss": 0.0894, | |
| "step": 247, | |
| "ts_encoder_learning_rate": 8.878996216479651e-06 | |
| }, | |
| { | |
| "epoch": 0.6537606063102397, | |
| "grad_norm": 2.213556128570375, | |
| "learning_rate": 8.878996216479651e-06, | |
| "loss": 0.1277, | |
| "step": 248, | |
| "ts_encoder_learning_rate": 8.868862620982534e-06 | |
| }, | |
| { | |
| "epoch": 0.6563967377872971, | |
| "grad_norm": 4.4965782913660854, | |
| "learning_rate": 8.868862620982534e-06, | |
| "loss": 0.1258, | |
| "step": 249, | |
| "ts_encoder_learning_rate": 8.85868926693601e-06 | |
| }, | |
| { | |
| "epoch": 0.6590328692643546, | |
| "grad_norm": 2.2567027094465573, | |
| "learning_rate": 8.85868926693601e-06, | |
| "loss": 0.1313, | |
| "step": 250, | |
| "ts_encoder_learning_rate": 8.84847625888703e-06 | |
| }, | |
| { | |
| "epoch": 0.661669000741412, | |
| "grad_norm": 2.865960904272953, | |
| "learning_rate": 8.84847625888703e-06, | |
| "loss": 0.1255, | |
| "step": 251, | |
| "ts_encoder_learning_rate": 8.838223701790057e-06 | |
| }, | |
| { | |
| "epoch": 0.6643051322184694, | |
| "grad_norm": 1.698484873506614, | |
| "learning_rate": 8.838223701790057e-06, | |
| "loss": 0.1277, | |
| "step": 252, | |
| "ts_encoder_learning_rate": 8.827931701005974e-06 | |
| }, | |
| { | |
| "epoch": 0.6669412636955269, | |
| "grad_norm": 1.7108540523901774, | |
| "learning_rate": 8.827931701005974e-06, | |
| "loss": 0.1238, | |
| "step": 253, | |
| "ts_encoder_learning_rate": 8.817600362301018e-06 | |
| }, | |
| { | |
| "epoch": 0.6695773951725842, | |
| "grad_norm": 2.574763087212977, | |
| "learning_rate": 8.817600362301018e-06, | |
| "loss": 0.119, | |
| "step": 254, | |
| "ts_encoder_learning_rate": 8.807229791845673e-06 | |
| }, | |
| { | |
| "epoch": 0.6722135266496416, | |
| "grad_norm": 1.76981658030345, | |
| "learning_rate": 8.807229791845673e-06, | |
| "loss": 0.1162, | |
| "step": 255, | |
| "ts_encoder_learning_rate": 8.7968200962136e-06 | |
| }, | |
| { | |
| "epoch": 0.6748496581266991, | |
| "grad_norm": 2.3402398000656532, | |
| "learning_rate": 8.7968200962136e-06, | |
| "loss": 0.1082, | |
| "step": 256, | |
| "ts_encoder_learning_rate": 8.786371382380527e-06 | |
| }, | |
| { | |
| "epoch": 0.6774857896037565, | |
| "grad_norm": 3.333892645282909, | |
| "learning_rate": 8.786371382380527e-06, | |
| "loss": 0.1226, | |
| "step": 257, | |
| "ts_encoder_learning_rate": 8.775883757723156e-06 | |
| }, | |
| { | |
| "epoch": 0.6801219210808139, | |
| "grad_norm": 2.0605502821038626, | |
| "learning_rate": 8.775883757723156e-06, | |
| "loss": 0.1278, | |
| "step": 258, | |
| "ts_encoder_learning_rate": 8.765357330018056e-06 | |
| }, | |
| { | |
| "epoch": 0.6827580525578714, | |
| "grad_norm": 1.8198127860386084, | |
| "learning_rate": 8.765357330018056e-06, | |
| "loss": 0.1104, | |
| "step": 259, | |
| "ts_encoder_learning_rate": 8.754792207440557e-06 | |
| }, | |
| { | |
| "epoch": 0.6853941840349287, | |
| "grad_norm": 1.7852327454966495, | |
| "learning_rate": 8.754792207440557e-06, | |
| "loss": 0.1257, | |
| "step": 260, | |
| "ts_encoder_learning_rate": 8.74418849856364e-06 | |
| }, | |
| { | |
| "epoch": 0.6880303155119861, | |
| "grad_norm": 2.3856839525596043, | |
| "learning_rate": 8.74418849856364e-06, | |
| "loss": 0.1232, | |
| "step": 261, | |
| "ts_encoder_learning_rate": 8.733546312356826e-06 | |
| }, | |
| { | |
| "epoch": 0.6906664469890436, | |
| "grad_norm": 2.0514309990696, | |
| "learning_rate": 8.733546312356826e-06, | |
| "loss": 0.106, | |
| "step": 262, | |
| "ts_encoder_learning_rate": 8.722865758185036e-06 | |
| }, | |
| { | |
| "epoch": 0.693302578466101, | |
| "grad_norm": 1.8497678205013666, | |
| "learning_rate": 8.722865758185036e-06, | |
| "loss": 0.0925, | |
| "step": 263, | |
| "ts_encoder_learning_rate": 8.712146945807494e-06 | |
| }, | |
| { | |
| "epoch": 0.6959387099431584, | |
| "grad_norm": 2.325593177065593, | |
| "learning_rate": 8.712146945807494e-06, | |
| "loss": 0.1151, | |
| "step": 264, | |
| "ts_encoder_learning_rate": 8.701389985376578e-06 | |
| }, | |
| { | |
| "epoch": 0.6985748414202159, | |
| "grad_norm": 2.4024115509182544, | |
| "learning_rate": 8.701389985376578e-06, | |
| "loss": 0.1351, | |
| "step": 265, | |
| "ts_encoder_learning_rate": 8.690594987436705e-06 | |
| }, | |
| { | |
| "epoch": 0.7012109728972733, | |
| "grad_norm": 2.404989824928956, | |
| "learning_rate": 8.690594987436705e-06, | |
| "loss": 0.1183, | |
| "step": 266, | |
| "ts_encoder_learning_rate": 8.679762062923176e-06 | |
| }, | |
| { | |
| "epoch": 0.7038471043743306, | |
| "grad_norm": 2.4255255570984904, | |
| "learning_rate": 8.679762062923176e-06, | |
| "loss": 0.1175, | |
| "step": 267, | |
| "ts_encoder_learning_rate": 8.668891323161053e-06 | |
| }, | |
| { | |
| "epoch": 0.7064832358513881, | |
| "grad_norm": 2.063468492850095, | |
| "learning_rate": 8.668891323161053e-06, | |
| "loss": 0.1136, | |
| "step": 268, | |
| "ts_encoder_learning_rate": 8.657982879864007e-06 | |
| }, | |
| { | |
| "epoch": 0.7091193673284455, | |
| "grad_norm": 2.454812018935756, | |
| "learning_rate": 8.657982879864007e-06, | |
| "loss": 0.1184, | |
| "step": 269, | |
| "ts_encoder_learning_rate": 8.647036845133171e-06 | |
| }, | |
| { | |
| "epoch": 0.7117554988055029, | |
| "grad_norm": 2.389510429853746, | |
| "learning_rate": 8.647036845133171e-06, | |
| "loss": 0.1109, | |
| "step": 270, | |
| "ts_encoder_learning_rate": 8.636053331455986e-06 | |
| }, | |
| { | |
| "epoch": 0.7143916302825604, | |
| "grad_norm": 2.534713674498363, | |
| "learning_rate": 8.636053331455986e-06, | |
| "loss": 0.1245, | |
| "step": 271, | |
| "ts_encoder_learning_rate": 8.625032451705053e-06 | |
| }, | |
| { | |
| "epoch": 0.7170277617596178, | |
| "grad_norm": 1.8778540259505276, | |
| "learning_rate": 8.625032451705053e-06, | |
| "loss": 0.1116, | |
| "step": 272, | |
| "ts_encoder_learning_rate": 8.613974319136959e-06 | |
| }, | |
| { | |
| "epoch": 0.7196638932366752, | |
| "grad_norm": 2.1437242323626053, | |
| "learning_rate": 8.613974319136959e-06, | |
| "loss": 0.1093, | |
| "step": 273, | |
| "ts_encoder_learning_rate": 8.602879047391127e-06 | |
| }, | |
| { | |
| "epoch": 0.7223000247137326, | |
| "grad_norm": 1.841731554668583, | |
| "learning_rate": 8.602879047391127e-06, | |
| "loss": 0.1155, | |
| "step": 274, | |
| "ts_encoder_learning_rate": 8.591746750488639e-06 | |
| }, | |
| { | |
| "epoch": 0.72493615619079, | |
| "grad_norm": 1.7263269186243153, | |
| "learning_rate": 8.591746750488639e-06, | |
| "loss": 0.1106, | |
| "step": 275, | |
| "ts_encoder_learning_rate": 8.580577542831072e-06 | |
| }, | |
| { | |
| "epoch": 0.7275722876678474, | |
| "grad_norm": 2.1861381161848144, | |
| "learning_rate": 8.580577542831072e-06, | |
| "loss": 0.1069, | |
| "step": 276, | |
| "ts_encoder_learning_rate": 8.569371539199316e-06 | |
| }, | |
| { | |
| "epoch": 0.7302084191449049, | |
| "grad_norm": 1.83254402858276, | |
| "learning_rate": 8.569371539199316e-06, | |
| "loss": 0.1062, | |
| "step": 277, | |
| "ts_encoder_learning_rate": 8.558128854752397e-06 | |
| }, | |
| { | |
| "epoch": 0.7328445506219623, | |
| "grad_norm": 1.5361832609660626, | |
| "learning_rate": 8.558128854752397e-06, | |
| "loss": 0.0958, | |
| "step": 278, | |
| "ts_encoder_learning_rate": 8.54684960502629e-06 | |
| }, | |
| { | |
| "epoch": 0.7354806820990197, | |
| "grad_norm": 1.6977272803107797, | |
| "learning_rate": 8.54684960502629e-06, | |
| "loss": 0.0986, | |
| "step": 279, | |
| "ts_encoder_learning_rate": 8.535533905932739e-06 | |
| }, | |
| { | |
| "epoch": 0.7381168135760771, | |
| "grad_norm": 1.805089674746036, | |
| "learning_rate": 8.535533905932739e-06, | |
| "loss": 0.0959, | |
| "step": 280, | |
| "ts_encoder_learning_rate": 8.52418187375806e-06 | |
| }, | |
| { | |
| "epoch": 0.7407529450531345, | |
| "grad_norm": 2.4198941080590153, | |
| "learning_rate": 8.52418187375806e-06, | |
| "loss": 0.1202, | |
| "step": 281, | |
| "ts_encoder_learning_rate": 8.512793625161947e-06 | |
| }, | |
| { | |
| "epoch": 0.7433890765301919, | |
| "grad_norm": 1.9365613559437527, | |
| "learning_rate": 8.512793625161947e-06, | |
| "loss": 0.1251, | |
| "step": 282, | |
| "ts_encoder_learning_rate": 8.501369277176275e-06 | |
| }, | |
| { | |
| "epoch": 0.7460252080072494, | |
| "grad_norm": 2.6757940840346475, | |
| "learning_rate": 8.501369277176275e-06, | |
| "loss": 0.1167, | |
| "step": 283, | |
| "ts_encoder_learning_rate": 8.489908947203897e-06 | |
| }, | |
| { | |
| "epoch": 0.7486613394843068, | |
| "grad_norm": 1.3212164606431878, | |
| "learning_rate": 8.489908947203897e-06, | |
| "loss": 0.0967, | |
| "step": 284, | |
| "ts_encoder_learning_rate": 8.478412753017433e-06 | |
| }, | |
| { | |
| "epoch": 0.7512974709613642, | |
| "grad_norm": 2.3824932932765255, | |
| "learning_rate": 8.478412753017433e-06, | |
| "loss": 0.1211, | |
| "step": 285, | |
| "ts_encoder_learning_rate": 8.466880812758064e-06 | |
| }, | |
| { | |
| "epoch": 0.7539336024384217, | |
| "grad_norm": 1.8845886753934002, | |
| "learning_rate": 8.466880812758064e-06, | |
| "loss": 0.1156, | |
| "step": 286, | |
| "ts_encoder_learning_rate": 8.455313244934324e-06 | |
| }, | |
| { | |
| "epoch": 0.756569733915479, | |
| "grad_norm": 1.9404694724173959, | |
| "learning_rate": 8.455313244934324e-06, | |
| "loss": 0.0977, | |
| "step": 287, | |
| "ts_encoder_learning_rate": 8.443710168420866e-06 | |
| }, | |
| { | |
| "epoch": 0.7592058653925364, | |
| "grad_norm": 2.3462365486027097, | |
| "learning_rate": 8.443710168420866e-06, | |
| "loss": 0.105, | |
| "step": 288, | |
| "ts_encoder_learning_rate": 8.432071702457253e-06 | |
| }, | |
| { | |
| "epoch": 0.7618419968695939, | |
| "grad_norm": 2.228457755561138, | |
| "learning_rate": 8.432071702457253e-06, | |
| "loss": 0.1221, | |
| "step": 289, | |
| "ts_encoder_learning_rate": 8.420397966646732e-06 | |
| }, | |
| { | |
| "epoch": 0.7644781283466513, | |
| "grad_norm": 1.5579438707012758, | |
| "learning_rate": 8.420397966646732e-06, | |
| "loss": 0.0959, | |
| "step": 290, | |
| "ts_encoder_learning_rate": 8.408689080954997e-06 | |
| }, | |
| { | |
| "epoch": 0.7671142598237087, | |
| "grad_norm": 2.5815854965292258, | |
| "learning_rate": 8.408689080954997e-06, | |
| "loss": 0.1037, | |
| "step": 291, | |
| "ts_encoder_learning_rate": 8.396945165708971e-06 | |
| }, | |
| { | |
| "epoch": 0.7697503913007662, | |
| "grad_norm": 1.9767105091219221, | |
| "learning_rate": 8.396945165708971e-06, | |
| "loss": 0.1083, | |
| "step": 292, | |
| "ts_encoder_learning_rate": 8.38516634159555e-06 | |
| }, | |
| { | |
| "epoch": 0.7723865227778235, | |
| "grad_norm": 2.164960614575108, | |
| "learning_rate": 8.38516634159555e-06, | |
| "loss": 0.1092, | |
| "step": 293, | |
| "ts_encoder_learning_rate": 8.373352729660373e-06 | |
| }, | |
| { | |
| "epoch": 0.7750226542548809, | |
| "grad_norm": 2.426995380324648, | |
| "learning_rate": 8.373352729660373e-06, | |
| "loss": 0.0968, | |
| "step": 294, | |
| "ts_encoder_learning_rate": 8.361504451306585e-06 | |
| }, | |
| { | |
| "epoch": 0.7776587857319384, | |
| "grad_norm": 2.2037799475117765, | |
| "learning_rate": 8.361504451306585e-06, | |
| "loss": 0.1216, | |
| "step": 295, | |
| "ts_encoder_learning_rate": 8.349621628293578e-06 | |
| }, | |
| { | |
| "epoch": 0.7802949172089958, | |
| "grad_norm": 1.5101855867509804, | |
| "learning_rate": 8.349621628293578e-06, | |
| "loss": 0.0865, | |
| "step": 296, | |
| "ts_encoder_learning_rate": 8.337704382735741e-06 | |
| }, | |
| { | |
| "epoch": 0.7829310486860532, | |
| "grad_norm": 1.8304482678186873, | |
| "learning_rate": 8.337704382735741e-06, | |
| "loss": 0.0907, | |
| "step": 297, | |
| "ts_encoder_learning_rate": 8.325752837101213e-06 | |
| }, | |
| { | |
| "epoch": 0.7855671801631107, | |
| "grad_norm": 1.3186981403774303, | |
| "learning_rate": 8.325752837101213e-06, | |
| "loss": 0.0953, | |
| "step": 298, | |
| "ts_encoder_learning_rate": 8.313767114210615e-06 | |
| }, | |
| { | |
| "epoch": 0.7882033116401681, | |
| "grad_norm": 1.6252129812087586, | |
| "learning_rate": 8.313767114210615e-06, | |
| "loss": 0.0963, | |
| "step": 299, | |
| "ts_encoder_learning_rate": 8.301747337235798e-06 | |
| }, | |
| { | |
| "epoch": 0.7908394431172254, | |
| "grad_norm": 2.5667790327516604, | |
| "learning_rate": 8.301747337235798e-06, | |
| "loss": 0.1127, | |
| "step": 300, | |
| "ts_encoder_learning_rate": 8.289693629698564e-06 | |
| }, | |
| { | |
| "epoch": 0.7934755745942829, | |
| "grad_norm": 1.3839390646179972, | |
| "learning_rate": 8.289693629698564e-06, | |
| "loss": 0.0959, | |
| "step": 301, | |
| "ts_encoder_learning_rate": 8.27760611546941e-06 | |
| }, | |
| { | |
| "epoch": 0.7961117060713403, | |
| "grad_norm": 1.619204129359444, | |
| "learning_rate": 8.27760611546941e-06, | |
| "loss": 0.0941, | |
| "step": 302, | |
| "ts_encoder_learning_rate": 8.265484918766243e-06 | |
| }, | |
| { | |
| "epoch": 0.7987478375483977, | |
| "grad_norm": 13.33516060264948, | |
| "learning_rate": 8.265484918766243e-06, | |
| "loss": 0.1133, | |
| "step": 303, | |
| "ts_encoder_learning_rate": 8.253330164153118e-06 | |
| }, | |
| { | |
| "epoch": 0.8013839690254552, | |
| "grad_norm": 2.1482523103471736, | |
| "learning_rate": 8.253330164153118e-06, | |
| "loss": 0.0951, | |
| "step": 304, | |
| "ts_encoder_learning_rate": 8.241141976538944e-06 | |
| }, | |
| { | |
| "epoch": 0.8040201005025126, | |
| "grad_norm": 1.765409769976939, | |
| "learning_rate": 8.241141976538944e-06, | |
| "loss": 0.0999, | |
| "step": 305, | |
| "ts_encoder_learning_rate": 8.228920481176202e-06 | |
| }, | |
| { | |
| "epoch": 0.8066562319795699, | |
| "grad_norm": 1.6926228572462632, | |
| "learning_rate": 8.228920481176202e-06, | |
| "loss": 0.0971, | |
| "step": 306, | |
| "ts_encoder_learning_rate": 8.216665803659671e-06 | |
| }, | |
| { | |
| "epoch": 0.8092923634566274, | |
| "grad_norm": 2.2379314054489203, | |
| "learning_rate": 8.216665803659671e-06, | |
| "loss": 0.0892, | |
| "step": 307, | |
| "ts_encoder_learning_rate": 8.204378069925121e-06 | |
| }, | |
| { | |
| "epoch": 0.8119284949336848, | |
| "grad_norm": 2.3735432898917552, | |
| "learning_rate": 8.204378069925121e-06, | |
| "loss": 0.1129, | |
| "step": 308, | |
| "ts_encoder_learning_rate": 8.192057406248028e-06 | |
| }, | |
| { | |
| "epoch": 0.8145646264107422, | |
| "grad_norm": 1.9923807546881467, | |
| "learning_rate": 8.192057406248028e-06, | |
| "loss": 0.1031, | |
| "step": 309, | |
| "ts_encoder_learning_rate": 8.179703939242276e-06 | |
| }, | |
| { | |
| "epoch": 0.8172007578877997, | |
| "grad_norm": 1.469953484901779, | |
| "learning_rate": 8.179703939242276e-06, | |
| "loss": 0.0928, | |
| "step": 310, | |
| "ts_encoder_learning_rate": 8.16731779585885e-06 | |
| }, | |
| { | |
| "epoch": 0.8198368893648571, | |
| "grad_norm": 1.6845119035497684, | |
| "learning_rate": 8.16731779585885e-06, | |
| "loss": 0.1036, | |
| "step": 311, | |
| "ts_encoder_learning_rate": 8.154899103384536e-06 | |
| }, | |
| { | |
| "epoch": 0.8224730208419145, | |
| "grad_norm": 2.997454352710499, | |
| "learning_rate": 8.154899103384536e-06, | |
| "loss": 0.0915, | |
| "step": 312, | |
| "ts_encoder_learning_rate": 8.142447989440618e-06 | |
| }, | |
| { | |
| "epoch": 0.8251091523189719, | |
| "grad_norm": 1.7182074416814566, | |
| "learning_rate": 8.142447989440618e-06, | |
| "loss": 0.1003, | |
| "step": 313, | |
| "ts_encoder_learning_rate": 8.129964581981554e-06 | |
| }, | |
| { | |
| "epoch": 0.8277452837960293, | |
| "grad_norm": 1.9353741860227904, | |
| "learning_rate": 8.129964581981554e-06, | |
| "loss": 0.0898, | |
| "step": 314, | |
| "ts_encoder_learning_rate": 8.117449009293668e-06 | |
| }, | |
| { | |
| "epoch": 0.8303814152730867, | |
| "grad_norm": 1.5652582875371663, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 0.0935, | |
| "step": 315, | |
| "ts_encoder_learning_rate": 8.104901399993837e-06 | |
| }, | |
| { | |
| "epoch": 0.8330175467501442, | |
| "grad_norm": 1.1621735202700743, | |
| "learning_rate": 8.104901399993837e-06, | |
| "loss": 0.0996, | |
| "step": 316, | |
| "ts_encoder_learning_rate": 8.092321883028157e-06 | |
| }, | |
| { | |
| "epoch": 0.8356536782272016, | |
| "grad_norm": 1.6764936566754018, | |
| "learning_rate": 8.092321883028157e-06, | |
| "loss": 0.1089, | |
| "step": 317, | |
| "ts_encoder_learning_rate": 8.079710587670633e-06 | |
| }, | |
| { | |
| "epoch": 0.838289809704259, | |
| "grad_norm": 2.0164848493032808, | |
| "learning_rate": 8.079710587670633e-06, | |
| "loss": 0.0996, | |
| "step": 318, | |
| "ts_encoder_learning_rate": 8.067067643521834e-06 | |
| }, | |
| { | |
| "epoch": 0.8409259411813165, | |
| "grad_norm": 1.6759859565355775, | |
| "learning_rate": 8.067067643521834e-06, | |
| "loss": 0.1023, | |
| "step": 319, | |
| "ts_encoder_learning_rate": 8.054393180507572e-06 | |
| }, | |
| { | |
| "epoch": 0.8435620726583738, | |
| "grad_norm": 1.416243499558276, | |
| "learning_rate": 8.054393180507572e-06, | |
| "loss": 0.093, | |
| "step": 320, | |
| "ts_encoder_learning_rate": 8.041687328877566e-06 | |
| }, | |
| { | |
| "epoch": 0.8461982041354312, | |
| "grad_norm": 2.18872702162099, | |
| "learning_rate": 8.041687328877566e-06, | |
| "loss": 0.0981, | |
| "step": 321, | |
| "ts_encoder_learning_rate": 8.0289502192041e-06 | |
| }, | |
| { | |
| "epoch": 0.8488343356124887, | |
| "grad_norm": 1.3033768655546698, | |
| "learning_rate": 8.0289502192041e-06, | |
| "loss": 0.0839, | |
| "step": 322, | |
| "ts_encoder_learning_rate": 8.016181982380682e-06 | |
| }, | |
| { | |
| "epoch": 0.8514704670895461, | |
| "grad_norm": 2.0867250492468266, | |
| "learning_rate": 8.016181982380682e-06, | |
| "loss": 0.1059, | |
| "step": 323, | |
| "ts_encoder_learning_rate": 8.003382749620704e-06 | |
| }, | |
| { | |
| "epoch": 0.8541065985666035, | |
| "grad_norm": 1.488852214231389, | |
| "learning_rate": 8.003382749620704e-06, | |
| "loss": 0.0792, | |
| "step": 324, | |
| "ts_encoder_learning_rate": 7.99055265245608e-06 | |
| }, | |
| { | |
| "epoch": 0.856742730043661, | |
| "grad_norm": 1.6343632839095115, | |
| "learning_rate": 7.99055265245608e-06, | |
| "loss": 0.0969, | |
| "step": 325, | |
| "ts_encoder_learning_rate": 7.977691822735914e-06 | |
| }, | |
| { | |
| "epoch": 0.8593788615207183, | |
| "grad_norm": 1.7752008703715696, | |
| "learning_rate": 7.977691822735914e-06, | |
| "loss": 0.1092, | |
| "step": 326, | |
| "ts_encoder_learning_rate": 7.96480039262513e-06 | |
| }, | |
| { | |
| "epoch": 0.8620149929977757, | |
| "grad_norm": 1.5781012521986801, | |
| "learning_rate": 7.96480039262513e-06, | |
| "loss": 0.0912, | |
| "step": 327, | |
| "ts_encoder_learning_rate": 7.951878494603116e-06 | |
| }, | |
| { | |
| "epoch": 0.8646511244748332, | |
| "grad_norm": 1.7441562007191758, | |
| "learning_rate": 7.951878494603116e-06, | |
| "loss": 0.0888, | |
| "step": 328, | |
| "ts_encoder_learning_rate": 7.938926261462366e-06 | |
| }, | |
| { | |
| "epoch": 0.8672872559518906, | |
| "grad_norm": 1.8345267313412936, | |
| "learning_rate": 7.938926261462366e-06, | |
| "loss": 0.0835, | |
| "step": 329, | |
| "ts_encoder_learning_rate": 7.925943826307119e-06 | |
| }, | |
| { | |
| "epoch": 0.869923387428948, | |
| "grad_norm": 1.9237226057153634, | |
| "learning_rate": 7.925943826307119e-06, | |
| "loss": 0.1003, | |
| "step": 330, | |
| "ts_encoder_learning_rate": 7.912931322551981e-06 | |
| }, | |
| { | |
| "epoch": 0.8725595189060055, | |
| "grad_norm": 2.0786928587458235, | |
| "learning_rate": 7.912931322551981e-06, | |
| "loss": 0.0977, | |
| "step": 331, | |
| "ts_encoder_learning_rate": 7.89988888392056e-06 | |
| }, | |
| { | |
| "epoch": 0.8751956503830629, | |
| "grad_norm": 1.137765624411406, | |
| "learning_rate": 7.89988888392056e-06, | |
| "loss": 0.0882, | |
| "step": 332, | |
| "ts_encoder_learning_rate": 7.886816644444099e-06 | |
| }, | |
| { | |
| "epoch": 0.8778317818601202, | |
| "grad_norm": 1.5400536066702828, | |
| "learning_rate": 7.886816644444099e-06, | |
| "loss": 0.0899, | |
| "step": 333, | |
| "ts_encoder_learning_rate": 7.873714738460075e-06 | |
| }, | |
| { | |
| "epoch": 0.8804679133371777, | |
| "grad_norm": 1.9149603930639032, | |
| "learning_rate": 7.873714738460075e-06, | |
| "loss": 0.086, | |
| "step": 334, | |
| "ts_encoder_learning_rate": 7.860583300610849e-06 | |
| }, | |
| { | |
| "epoch": 0.8831040448142351, | |
| "grad_norm": 1.845652808068975, | |
| "learning_rate": 7.860583300610849e-06, | |
| "loss": 0.1191, | |
| "step": 335, | |
| "ts_encoder_learning_rate": 7.84742246584226e-06 | |
| }, | |
| { | |
| "epoch": 0.8857401762912925, | |
| "grad_norm": 1.539167450730259, | |
| "learning_rate": 7.84742246584226e-06, | |
| "loss": 0.1018, | |
| "step": 336, | |
| "ts_encoder_learning_rate": 7.83423236940225e-06 | |
| }, | |
| { | |
| "epoch": 0.88837630776835, | |
| "grad_norm": 2.7054499134952303, | |
| "learning_rate": 7.83423236940225e-06, | |
| "loss": 0.0787, | |
| "step": 337, | |
| "ts_encoder_learning_rate": 7.821013146839467e-06 | |
| }, | |
| { | |
| "epoch": 0.8910124392454074, | |
| "grad_norm": 2.3960105755739356, | |
| "learning_rate": 7.821013146839467e-06, | |
| "loss": 0.0998, | |
| "step": 338, | |
| "ts_encoder_learning_rate": 7.807764934001875e-06 | |
| }, | |
| { | |
| "epoch": 0.8936485707224647, | |
| "grad_norm": 1.9114513463572482, | |
| "learning_rate": 7.807764934001875e-06, | |
| "loss": 0.0955, | |
| "step": 339, | |
| "ts_encoder_learning_rate": 7.794487867035358e-06 | |
| }, | |
| { | |
| "epoch": 0.8962847021995222, | |
| "grad_norm": 1.1940763189233343, | |
| "learning_rate": 7.794487867035358e-06, | |
| "loss": 0.0978, | |
| "step": 340, | |
| "ts_encoder_learning_rate": 7.781182082382325e-06 | |
| }, | |
| { | |
| "epoch": 0.8989208336765796, | |
| "grad_norm": 1.6611445100384334, | |
| "learning_rate": 7.781182082382325e-06, | |
| "loss": 0.0945, | |
| "step": 341, | |
| "ts_encoder_learning_rate": 7.767847716780297e-06 | |
| }, | |
| { | |
| "epoch": 0.901556965153637, | |
| "grad_norm": 1.925999851813011, | |
| "learning_rate": 7.767847716780297e-06, | |
| "loss": 0.1061, | |
| "step": 342, | |
| "ts_encoder_learning_rate": 7.754484907260513e-06 | |
| }, | |
| { | |
| "epoch": 0.9041930966306945, | |
| "grad_norm": 1.3700140058079462, | |
| "learning_rate": 7.754484907260513e-06, | |
| "loss": 0.0816, | |
| "step": 343, | |
| "ts_encoder_learning_rate": 7.741093791146517e-06 | |
| }, | |
| { | |
| "epoch": 0.9068292281077519, | |
| "grad_norm": 2.1966063055592366, | |
| "learning_rate": 7.741093791146517e-06, | |
| "loss": 0.1056, | |
| "step": 344, | |
| "ts_encoder_learning_rate": 7.727674506052744e-06 | |
| }, | |
| { | |
| "epoch": 0.9094653595848093, | |
| "grad_norm": 1.7516803825411136, | |
| "learning_rate": 7.727674506052744e-06, | |
| "loss": 0.0933, | |
| "step": 345, | |
| "ts_encoder_learning_rate": 7.714227189883112e-06 | |
| }, | |
| { | |
| "epoch": 0.9121014910618667, | |
| "grad_norm": 2.2157367065223297, | |
| "learning_rate": 7.714227189883112e-06, | |
| "loss": 0.0918, | |
| "step": 346, | |
| "ts_encoder_learning_rate": 7.700751980829601e-06 | |
| }, | |
| { | |
| "epoch": 0.9147376225389241, | |
| "grad_norm": 2.20733086616214, | |
| "learning_rate": 7.700751980829601e-06, | |
| "loss": 0.0975, | |
| "step": 347, | |
| "ts_encoder_learning_rate": 7.687249017370832e-06 | |
| }, | |
| { | |
| "epoch": 0.9173737540159815, | |
| "grad_norm": 2.084116228493792, | |
| "learning_rate": 7.687249017370832e-06, | |
| "loss": 0.0992, | |
| "step": 348, | |
| "ts_encoder_learning_rate": 7.673718438270649e-06 | |
| }, | |
| { | |
| "epoch": 0.920009885493039, | |
| "grad_norm": 2.084868196282067, | |
| "learning_rate": 7.673718438270649e-06, | |
| "loss": 0.0891, | |
| "step": 349, | |
| "ts_encoder_learning_rate": 7.660160382576683e-06 | |
| }, | |
| { | |
| "epoch": 0.9226460169700964, | |
| "grad_norm": 1.8080308541934171, | |
| "learning_rate": 7.660160382576683e-06, | |
| "loss": 0.0891, | |
| "step": 350, | |
| "ts_encoder_learning_rate": 7.646574989618938e-06 | |
| }, | |
| { | |
| "epoch": 0.9252821484471538, | |
| "grad_norm": 2.2542071099863397, | |
| "learning_rate": 7.646574989618938e-06, | |
| "loss": 0.0866, | |
| "step": 351, | |
| "ts_encoder_learning_rate": 7.632962399008342e-06 | |
| }, | |
| { | |
| "epoch": 0.9279182799242112, | |
| "grad_norm": 2.4095949616333283, | |
| "learning_rate": 7.632962399008342e-06, | |
| "loss": 0.0945, | |
| "step": 352, | |
| "ts_encoder_learning_rate": 7.619322750635327e-06 | |
| }, | |
| { | |
| "epoch": 0.9305544114012686, | |
| "grad_norm": 2.0875916761878934, | |
| "learning_rate": 7.619322750635327e-06, | |
| "loss": 0.0865, | |
| "step": 353, | |
| "ts_encoder_learning_rate": 7.605656184668385e-06 | |
| }, | |
| { | |
| "epoch": 0.933190542878326, | |
| "grad_norm": 1.7064657643571555, | |
| "learning_rate": 7.605656184668385e-06, | |
| "loss": 0.0933, | |
| "step": 354, | |
| "ts_encoder_learning_rate": 7.591962841552627e-06 | |
| }, | |
| { | |
| "epoch": 0.9358266743553835, | |
| "grad_norm": 2.500359725738562, | |
| "learning_rate": 7.591962841552627e-06, | |
| "loss": 0.0872, | |
| "step": 355, | |
| "ts_encoder_learning_rate": 7.578242862008336e-06 | |
| }, | |
| { | |
| "epoch": 0.9384628058324409, | |
| "grad_norm": 1.384078600663941, | |
| "learning_rate": 7.578242862008336e-06, | |
| "loss": 0.0702, | |
| "step": 356, | |
| "ts_encoder_learning_rate": 7.564496387029532e-06 | |
| }, | |
| { | |
| "epoch": 0.9410989373094983, | |
| "grad_norm": 1.5245598469180928, | |
| "learning_rate": 7.564496387029532e-06, | |
| "loss": 0.0801, | |
| "step": 357, | |
| "ts_encoder_learning_rate": 7.550723557882514e-06 | |
| }, | |
| { | |
| "epoch": 0.9437350687865558, | |
| "grad_norm": 2.045161639456311, | |
| "learning_rate": 7.550723557882514e-06, | |
| "loss": 0.0937, | |
| "step": 358, | |
| "ts_encoder_learning_rate": 7.536924516104411e-06 | |
| }, | |
| { | |
| "epoch": 0.9463712002636131, | |
| "grad_norm": 2.5839255461657027, | |
| "learning_rate": 7.536924516104411e-06, | |
| "loss": 0.0942, | |
| "step": 359, | |
| "ts_encoder_learning_rate": 7.52309940350173e-06 | |
| }, | |
| { | |
| "epoch": 0.9490073317406705, | |
| "grad_norm": 3.2274496289419212, | |
| "learning_rate": 7.52309940350173e-06, | |
| "loss": 0.1023, | |
| "step": 360, | |
| "ts_encoder_learning_rate": 7.509248362148889e-06 | |
| }, | |
| { | |
| "epoch": 0.951643463217728, | |
| "grad_norm": 1.4645186215038737, | |
| "learning_rate": 7.509248362148889e-06, | |
| "loss": 0.0889, | |
| "step": 361, | |
| "ts_encoder_learning_rate": 7.49537153438677e-06 | |
| }, | |
| { | |
| "epoch": 0.9542795946947854, | |
| "grad_norm": 3.4885631800713903, | |
| "learning_rate": 7.49537153438677e-06, | |
| "loss": 0.1, | |
| "step": 362, | |
| "ts_encoder_learning_rate": 7.481469062821252e-06 | |
| }, | |
| { | |
| "epoch": 0.9569157261718428, | |
| "grad_norm": 1.6689194932402402, | |
| "learning_rate": 7.481469062821252e-06, | |
| "loss": 0.0909, | |
| "step": 363, | |
| "ts_encoder_learning_rate": 7.467541090321735e-06 | |
| }, | |
| { | |
| "epoch": 0.9595518576489003, | |
| "grad_norm": 1.3923817479195213, | |
| "learning_rate": 7.467541090321735e-06, | |
| "loss": 0.086, | |
| "step": 364, | |
| "ts_encoder_learning_rate": 7.453587760019691e-06 | |
| }, | |
| { | |
| "epoch": 0.9621879891259577, | |
| "grad_norm": 2.1695652586948606, | |
| "learning_rate": 7.453587760019691e-06, | |
| "loss": 0.0755, | |
| "step": 365, | |
| "ts_encoder_learning_rate": 7.439609215307173e-06 | |
| }, | |
| { | |
| "epoch": 0.964824120603015, | |
| "grad_norm": 1.9185102579274915, | |
| "learning_rate": 7.439609215307173e-06, | |
| "loss": 0.0717, | |
| "step": 366, | |
| "ts_encoder_learning_rate": 7.42560559983536e-06 | |
| }, | |
| { | |
| "epoch": 0.9674602520800725, | |
| "grad_norm": 2.425906908342805, | |
| "learning_rate": 7.42560559983536e-06, | |
| "loss": 0.0958, | |
| "step": 367, | |
| "ts_encoder_learning_rate": 7.411577057513066e-06 | |
| }, | |
| { | |
| "epoch": 0.9700963835571299, | |
| "grad_norm": 1.4397236920539425, | |
| "learning_rate": 7.411577057513066e-06, | |
| "loss": 0.101, | |
| "step": 368, | |
| "ts_encoder_learning_rate": 7.39752373250527e-06 | |
| }, | |
| { | |
| "epoch": 0.9727325150341873, | |
| "grad_norm": 1.0990134832990999, | |
| "learning_rate": 7.39752373250527e-06, | |
| "loss": 0.0754, | |
| "step": 369, | |
| "ts_encoder_learning_rate": 7.383445769231628e-06 | |
| }, | |
| { | |
| "epoch": 0.9753686465112448, | |
| "grad_norm": 1.3029567612526982, | |
| "learning_rate": 7.383445769231628e-06, | |
| "loss": 0.0789, | |
| "step": 370, | |
| "ts_encoder_learning_rate": 7.369343312364994e-06 | |
| }, | |
| { | |
| "epoch": 0.9780047779883022, | |
| "grad_norm": 2.3110295949001545, | |
| "learning_rate": 7.369343312364994e-06, | |
| "loss": 0.0798, | |
| "step": 371, | |
| "ts_encoder_learning_rate": 7.355216506829933e-06 | |
| }, | |
| { | |
| "epoch": 0.9806409094653595, | |
| "grad_norm": 1.7411270859762809, | |
| "learning_rate": 7.355216506829933e-06, | |
| "loss": 0.089, | |
| "step": 372, | |
| "ts_encoder_learning_rate": 7.34106549780123e-06 | |
| }, | |
| { | |
| "epoch": 0.983277040942417, | |
| "grad_norm": 1.8926501676131235, | |
| "learning_rate": 7.34106549780123e-06, | |
| "loss": 0.0985, | |
| "step": 373, | |
| "ts_encoder_learning_rate": 7.326890430702396e-06 | |
| }, | |
| { | |
| "epoch": 0.9859131724194744, | |
| "grad_norm": 1.7001478537324086, | |
| "learning_rate": 7.326890430702396e-06, | |
| "loss": 0.0798, | |
| "step": 374, | |
| "ts_encoder_learning_rate": 7.312691451204178e-06 | |
| }, | |
| { | |
| "epoch": 0.9885493038965318, | |
| "grad_norm": 1.9333040871086815, | |
| "learning_rate": 7.312691451204178e-06, | |
| "loss": 0.0794, | |
| "step": 375, | |
| "ts_encoder_learning_rate": 7.2984687052230585e-06 | |
| }, | |
| { | |
| "epoch": 0.9911854353735893, | |
| "grad_norm": 1.6635256130487461, | |
| "learning_rate": 7.2984687052230585e-06, | |
| "loss": 0.077, | |
| "step": 376, | |
| "ts_encoder_learning_rate": 7.284222338919758e-06 | |
| }, | |
| { | |
| "epoch": 0.9938215668506467, | |
| "grad_norm": 1.7377302475474232, | |
| "learning_rate": 7.284222338919758e-06, | |
| "loss": 0.0828, | |
| "step": 377, | |
| "ts_encoder_learning_rate": 7.269952498697734e-06 | |
| }, | |
| { | |
| "epoch": 0.9964576983277041, | |
| "grad_norm": 1.8805139125533905, | |
| "learning_rate": 7.269952498697734e-06, | |
| "loss": 0.0766, | |
| "step": 378, | |
| "ts_encoder_learning_rate": 7.255659331201673e-06 | |
| }, | |
| { | |
| "epoch": 0.9990938298047615, | |
| "grad_norm": 1.3707632091787474, | |
| "learning_rate": 7.255659331201673e-06, | |
| "loss": 0.0824, | |
| "step": 379, | |
| "ts_encoder_learning_rate": 7.241342983315985e-06 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.3707632091787474, | |
| "learning_rate": 7.241342983315985e-06, | |
| "loss": 0.0264, | |
| "step": 380, | |
| "ts_encoder_learning_rate": 7.227003602163296e-06 | |
| }, | |
| { | |
| "epoch": 1.0026361314770573, | |
| "grad_norm": 3.1286777602818665, | |
| "learning_rate": 7.227003602163296e-06, | |
| "loss": 0.0931, | |
| "step": 381, | |
| "ts_encoder_learning_rate": 7.212641335102932e-06 | |
| }, | |
| { | |
| "epoch": 1.0052722629541149, | |
| "grad_norm": 2.8391713475277, | |
| "learning_rate": 7.212641335102932e-06, | |
| "loss": 0.0722, | |
| "step": 382, | |
| "ts_encoder_learning_rate": 7.198256329729412e-06 | |
| }, | |
| { | |
| "epoch": 1.0079083944311722, | |
| "grad_norm": 1.256304357468329, | |
| "learning_rate": 7.198256329729412e-06, | |
| "loss": 0.0769, | |
| "step": 383, | |
| "ts_encoder_learning_rate": 7.183848733870917e-06 | |
| }, | |
| { | |
| "epoch": 1.0105445259082297, | |
| "grad_norm": 3.5250439890455767, | |
| "learning_rate": 7.183848733870917e-06, | |
| "loss": 0.0893, | |
| "step": 384, | |
| "ts_encoder_learning_rate": 7.169418695587791e-06 | |
| }, | |
| { | |
| "epoch": 1.013180657385287, | |
| "grad_norm": 2.626283878871451, | |
| "learning_rate": 7.169418695587791e-06, | |
| "loss": 0.0729, | |
| "step": 385, | |
| "ts_encoder_learning_rate": 7.154966363171003e-06 | |
| }, | |
| { | |
| "epoch": 1.0158167888623446, | |
| "grad_norm": 1.760910812924816, | |
| "learning_rate": 7.154966363171003e-06, | |
| "loss": 0.0879, | |
| "step": 386, | |
| "ts_encoder_learning_rate": 7.140491885140629e-06 | |
| }, | |
| { | |
| "epoch": 1.018452920339402, | |
| "grad_norm": 1.955129103690491, | |
| "learning_rate": 7.140491885140629e-06, | |
| "loss": 0.068, | |
| "step": 387, | |
| "ts_encoder_learning_rate": 7.125995410244324e-06 | |
| }, | |
| { | |
| "epoch": 1.0210890518164593, | |
| "grad_norm": 1.4906223976736166, | |
| "learning_rate": 7.125995410244324e-06, | |
| "loss": 0.0671, | |
| "step": 388, | |
| "ts_encoder_learning_rate": 7.1114770874558e-06 | |
| }, | |
| { | |
| "epoch": 1.0237251832935168, | |
| "grad_norm": 1.4849510993800188, | |
| "learning_rate": 7.1114770874558e-06, | |
| "loss": 0.0751, | |
| "step": 389, | |
| "ts_encoder_learning_rate": 7.096937065973285e-06 | |
| }, | |
| { | |
| "epoch": 1.0263613147705741, | |
| "grad_norm": 1.497911410516406, | |
| "learning_rate": 7.096937065973285e-06, | |
| "loss": 0.0673, | |
| "step": 390, | |
| "ts_encoder_learning_rate": 7.082375495217996e-06 | |
| }, | |
| { | |
| "epoch": 1.0289974462476317, | |
| "grad_norm": 1.4093983694447139, | |
| "learning_rate": 7.082375495217996e-06, | |
| "loss": 0.0722, | |
| "step": 391, | |
| "ts_encoder_learning_rate": 7.067792524832604e-06 | |
| }, | |
| { | |
| "epoch": 1.031633577724689, | |
| "grad_norm": 1.3429042920164558, | |
| "learning_rate": 7.067792524832604e-06, | |
| "loss": 0.0716, | |
| "step": 392, | |
| "ts_encoder_learning_rate": 7.053188304679691e-06 | |
| }, | |
| { | |
| "epoch": 1.0342697092017463, | |
| "grad_norm": 1.606708984123339, | |
| "learning_rate": 7.053188304679691e-06, | |
| "loss": 0.0745, | |
| "step": 393, | |
| "ts_encoder_learning_rate": 7.038562984840216e-06 | |
| }, | |
| { | |
| "epoch": 1.0369058406788039, | |
| "grad_norm": 1.9904571024970765, | |
| "learning_rate": 7.038562984840216e-06, | |
| "loss": 0.0677, | |
| "step": 394, | |
| "ts_encoder_learning_rate": 7.023916715611969e-06 | |
| }, | |
| { | |
| "epoch": 1.0395419721558612, | |
| "grad_norm": 1.4505783107760564, | |
| "learning_rate": 7.023916715611969e-06, | |
| "loss": 0.0647, | |
| "step": 395, | |
| "ts_encoder_learning_rate": 7.009249647508028e-06 | |
| }, | |
| { | |
| "epoch": 1.0421781036329187, | |
| "grad_norm": 1.5229728357909127, | |
| "learning_rate": 7.009249647508028e-06, | |
| "loss": 0.0652, | |
| "step": 396, | |
| "ts_encoder_learning_rate": 6.994561931255209e-06 | |
| }, | |
| { | |
| "epoch": 1.044814235109976, | |
| "grad_norm": 2.6769507938266277, | |
| "learning_rate": 6.994561931255209e-06, | |
| "loss": 0.0678, | |
| "step": 397, | |
| "ts_encoder_learning_rate": 6.9798537177925226e-06 | |
| }, | |
| { | |
| "epoch": 1.0474503665870336, | |
| "grad_norm": 2.0169207725312703, | |
| "learning_rate": 6.9798537177925226e-06, | |
| "loss": 0.074, | |
| "step": 398, | |
| "ts_encoder_learning_rate": 6.965125158269619e-06 | |
| }, | |
| { | |
| "epoch": 1.050086498064091, | |
| "grad_norm": 1.9292523257890648, | |
| "learning_rate": 6.965125158269619e-06, | |
| "loss": 0.076, | |
| "step": 399, | |
| "ts_encoder_learning_rate": 6.950376404045235e-06 | |
| }, | |
| { | |
| "epoch": 1.0527226295411483, | |
| "grad_norm": 1.841218569643746, | |
| "learning_rate": 6.950376404045235e-06, | |
| "loss": 0.0815, | |
| "step": 400, | |
| "ts_encoder_learning_rate": 6.935607606685642e-06 | |
| }, | |
| { | |
| "epoch": 1.0553587610182058, | |
| "grad_norm": 1.3685510522725666, | |
| "learning_rate": 6.935607606685642e-06, | |
| "loss": 0.0649, | |
| "step": 401, | |
| "ts_encoder_learning_rate": 6.9208189179630805e-06 | |
| }, | |
| { | |
| "epoch": 1.0579948924952631, | |
| "grad_norm": 1.2255871348116065, | |
| "learning_rate": 6.9208189179630805e-06, | |
| "loss": 0.0751, | |
| "step": 402, | |
| "ts_encoder_learning_rate": 6.906010489854209e-06 | |
| }, | |
| { | |
| "epoch": 1.0606310239723207, | |
| "grad_norm": 2.25284044313197, | |
| "learning_rate": 6.906010489854209e-06, | |
| "loss": 0.0722, | |
| "step": 403, | |
| "ts_encoder_learning_rate": 6.891182474538539e-06 | |
| }, | |
| { | |
| "epoch": 1.063267155449378, | |
| "grad_norm": 1.574007278408115, | |
| "learning_rate": 6.891182474538539e-06, | |
| "loss": 0.0874, | |
| "step": 404, | |
| "ts_encoder_learning_rate": 6.876335024396872e-06 | |
| }, | |
| { | |
| "epoch": 1.0659032869264355, | |
| "grad_norm": 1.4368386388721925, | |
| "learning_rate": 6.876335024396872e-06, | |
| "loss": 0.064, | |
| "step": 405, | |
| "ts_encoder_learning_rate": 6.8614682920097265e-06 | |
| }, | |
| { | |
| "epoch": 1.0685394184034929, | |
| "grad_norm": 1.698997766946676, | |
| "learning_rate": 6.8614682920097265e-06, | |
| "loss": 0.0861, | |
| "step": 406, | |
| "ts_encoder_learning_rate": 6.846582430155783e-06 | |
| }, | |
| { | |
| "epoch": 1.0711755498805502, | |
| "grad_norm": 1.621405485095503, | |
| "learning_rate": 6.846582430155783e-06, | |
| "loss": 0.0616, | |
| "step": 407, | |
| "ts_encoder_learning_rate": 6.831677591810302e-06 | |
| }, | |
| { | |
| "epoch": 1.0738116813576077, | |
| "grad_norm": 1.7546607458906713, | |
| "learning_rate": 6.831677591810302e-06, | |
| "loss": 0.0634, | |
| "step": 408, | |
| "ts_encoder_learning_rate": 6.816753930143558e-06 | |
| }, | |
| { | |
| "epoch": 1.076447812834665, | |
| "grad_norm": 1.530962724670228, | |
| "learning_rate": 6.816753930143558e-06, | |
| "loss": 0.0605, | |
| "step": 409, | |
| "ts_encoder_learning_rate": 6.801811598519268e-06 | |
| }, | |
| { | |
| "epoch": 1.0790839443117226, | |
| "grad_norm": 1.4049855220372447, | |
| "learning_rate": 6.801811598519268e-06, | |
| "loss": 0.0651, | |
| "step": 410, | |
| "ts_encoder_learning_rate": 6.786850750493006e-06 | |
| }, | |
| { | |
| "epoch": 1.08172007578878, | |
| "grad_norm": 1.2835177716818393, | |
| "learning_rate": 6.786850750493006e-06, | |
| "loss": 0.0588, | |
| "step": 411, | |
| "ts_encoder_learning_rate": 6.771871539810633e-06 | |
| }, | |
| { | |
| "epoch": 1.0843562072658375, | |
| "grad_norm": 2.177159514710094, | |
| "learning_rate": 6.771871539810633e-06, | |
| "loss": 0.0714, | |
| "step": 412, | |
| "ts_encoder_learning_rate": 6.7568741204067145e-06 | |
| }, | |
| { | |
| "epoch": 1.0869923387428948, | |
| "grad_norm": 2.0968533031020744, | |
| "learning_rate": 6.7568741204067145e-06, | |
| "loss": 0.075, | |
| "step": 413, | |
| "ts_encoder_learning_rate": 6.741858646402941e-06 | |
| }, | |
| { | |
| "epoch": 1.0896284702199521, | |
| "grad_norm": 2.0444573124344996, | |
| "learning_rate": 6.741858646402941e-06, | |
| "loss": 0.0761, | |
| "step": 414, | |
| "ts_encoder_learning_rate": 6.726825272106539e-06 | |
| }, | |
| { | |
| "epoch": 1.0922646016970097, | |
| "grad_norm": 1.8927366457463946, | |
| "learning_rate": 6.726825272106539e-06, | |
| "loss": 0.0843, | |
| "step": 415, | |
| "ts_encoder_learning_rate": 6.71177415200869e-06 | |
| }, | |
| { | |
| "epoch": 1.094900733174067, | |
| "grad_norm": 1.5743826612232619, | |
| "learning_rate": 6.71177415200869e-06, | |
| "loss": 0.0669, | |
| "step": 416, | |
| "ts_encoder_learning_rate": 6.696705440782939e-06 | |
| }, | |
| { | |
| "epoch": 1.0975368646511245, | |
| "grad_norm": 1.9849832173789321, | |
| "learning_rate": 6.696705440782939e-06, | |
| "loss": 0.0606, | |
| "step": 417, | |
| "ts_encoder_learning_rate": 6.68161929328361e-06 | |
| }, | |
| { | |
| "epoch": 1.1001729961281819, | |
| "grad_norm": 1.2161772899273893, | |
| "learning_rate": 6.68161929328361e-06, | |
| "loss": 0.0598, | |
| "step": 418, | |
| "ts_encoder_learning_rate": 6.66651586454421e-06 | |
| }, | |
| { | |
| "epoch": 1.1028091276052394, | |
| "grad_norm": 1.2393164520057445, | |
| "learning_rate": 6.66651586454421e-06, | |
| "loss": 0.0692, | |
| "step": 419, | |
| "ts_encoder_learning_rate": 6.651395309775837e-06 | |
| }, | |
| { | |
| "epoch": 1.1054452590822967, | |
| "grad_norm": 1.4520060898019855, | |
| "learning_rate": 6.651395309775837e-06, | |
| "loss": 0.0692, | |
| "step": 420, | |
| "ts_encoder_learning_rate": 6.636257784365585e-06 | |
| }, | |
| { | |
| "epoch": 1.108081390559354, | |
| "grad_norm": 2.071887988162363, | |
| "learning_rate": 6.636257784365585e-06, | |
| "loss": 0.0817, | |
| "step": 421, | |
| "ts_encoder_learning_rate": 6.62110344387495e-06 | |
| }, | |
| { | |
| "epoch": 1.1107175220364116, | |
| "grad_norm": 1.4450804845065022, | |
| "learning_rate": 6.62110344387495e-06, | |
| "loss": 0.0615, | |
| "step": 422, | |
| "ts_encoder_learning_rate": 6.605932444038229e-06 | |
| }, | |
| { | |
| "epoch": 1.113353653513469, | |
| "grad_norm": 4.131996313423228, | |
| "learning_rate": 6.605932444038229e-06, | |
| "loss": 0.0567, | |
| "step": 423, | |
| "ts_encoder_learning_rate": 6.5907449407609145e-06 | |
| }, | |
| { | |
| "epoch": 1.1159897849905265, | |
| "grad_norm": 1.3469924411982919, | |
| "learning_rate": 6.5907449407609145e-06, | |
| "loss": 0.0688, | |
| "step": 424, | |
| "ts_encoder_learning_rate": 6.575541090118105e-06 | |
| }, | |
| { | |
| "epoch": 1.1186259164675838, | |
| "grad_norm": 1.676545292506096, | |
| "learning_rate": 6.575541090118105e-06, | |
| "loss": 0.0707, | |
| "step": 425, | |
| "ts_encoder_learning_rate": 6.5603210483528864e-06 | |
| }, | |
| { | |
| "epoch": 1.1212620479446413, | |
| "grad_norm": 2.1293469373373086, | |
| "learning_rate": 6.5603210483528864e-06, | |
| "loss": 0.0783, | |
| "step": 426, | |
| "ts_encoder_learning_rate": 6.545084971874738e-06 | |
| }, | |
| { | |
| "epoch": 1.1238981794216987, | |
| "grad_norm": 2.1545361587555893, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.0662, | |
| "step": 427, | |
| "ts_encoder_learning_rate": 6.529833017257919e-06 | |
| }, | |
| { | |
| "epoch": 1.126534310898756, | |
| "grad_norm": 1.5431217078868176, | |
| "learning_rate": 6.529833017257919e-06, | |
| "loss": 0.0665, | |
| "step": 428, | |
| "ts_encoder_learning_rate": 6.514565341239861e-06 | |
| }, | |
| { | |
| "epoch": 1.1291704423758135, | |
| "grad_norm": 1.6871162084026083, | |
| "learning_rate": 6.514565341239861e-06, | |
| "loss": 0.0609, | |
| "step": 429, | |
| "ts_encoder_learning_rate": 6.499282100719558e-06 | |
| }, | |
| { | |
| "epoch": 1.1318065738528709, | |
| "grad_norm": 1.3008708089844792, | |
| "learning_rate": 6.499282100719558e-06, | |
| "loss": 0.0669, | |
| "step": 430, | |
| "ts_encoder_learning_rate": 6.483983452755953e-06 | |
| }, | |
| { | |
| "epoch": 1.1344427053299284, | |
| "grad_norm": 1.31908563414336, | |
| "learning_rate": 6.483983452755953e-06, | |
| "loss": 0.0681, | |
| "step": 431, | |
| "ts_encoder_learning_rate": 6.468669554566324e-06 | |
| }, | |
| { | |
| "epoch": 1.1370788368069857, | |
| "grad_norm": 2.2211285641938403, | |
| "learning_rate": 6.468669554566324e-06, | |
| "loss": 0.0732, | |
| "step": 432, | |
| "ts_encoder_learning_rate": 6.4533405635246696e-06 | |
| }, | |
| { | |
| "epoch": 1.1397149682840433, | |
| "grad_norm": 2.0753940060908413, | |
| "learning_rate": 6.4533405635246696e-06, | |
| "loss": 0.0663, | |
| "step": 433, | |
| "ts_encoder_learning_rate": 6.437996637160086e-06 | |
| }, | |
| { | |
| "epoch": 1.1423510997611006, | |
| "grad_norm": 1.4786983123231945, | |
| "learning_rate": 6.437996637160086e-06, | |
| "loss": 0.0546, | |
| "step": 434, | |
| "ts_encoder_learning_rate": 6.4226379331551625e-06 | |
| }, | |
| { | |
| "epoch": 1.144987231238158, | |
| "grad_norm": 1.5876247856249708, | |
| "learning_rate": 6.4226379331551625e-06, | |
| "loss": 0.0735, | |
| "step": 435, | |
| "ts_encoder_learning_rate": 6.407264609344344e-06 | |
| }, | |
| { | |
| "epoch": 1.1476233627152155, | |
| "grad_norm": 1.670327323230943, | |
| "learning_rate": 6.407264609344344e-06, | |
| "loss": 0.0604, | |
| "step": 436, | |
| "ts_encoder_learning_rate": 6.3918768237123175e-06 | |
| }, | |
| { | |
| "epoch": 1.1502594941922728, | |
| "grad_norm": 1.7007074105089541, | |
| "learning_rate": 6.3918768237123175e-06, | |
| "loss": 0.0506, | |
| "step": 437, | |
| "ts_encoder_learning_rate": 6.376474734392388e-06 | |
| }, | |
| { | |
| "epoch": 1.1528956256693303, | |
| "grad_norm": 0.8723701532963661, | |
| "learning_rate": 6.376474734392388e-06, | |
| "loss": 0.051, | |
| "step": 438, | |
| "ts_encoder_learning_rate": 6.361058499664856e-06 | |
| }, | |
| { | |
| "epoch": 1.1555317571463877, | |
| "grad_norm": 1.2073690713191445, | |
| "learning_rate": 6.361058499664856e-06, | |
| "loss": 0.0658, | |
| "step": 439, | |
| "ts_encoder_learning_rate": 6.345628277955384e-06 | |
| }, | |
| { | |
| "epoch": 1.1581678886234452, | |
| "grad_norm": 4.282100892926092, | |
| "learning_rate": 6.345628277955384e-06, | |
| "loss": 0.0638, | |
| "step": 440, | |
| "ts_encoder_learning_rate": 6.330184227833376e-06 | |
| }, | |
| { | |
| "epoch": 1.1608040201005025, | |
| "grad_norm": 1.0987411320570284, | |
| "learning_rate": 6.330184227833376e-06, | |
| "loss": 0.058, | |
| "step": 441, | |
| "ts_encoder_learning_rate": 6.3147265080103405e-06 | |
| }, | |
| { | |
| "epoch": 1.1634401515775599, | |
| "grad_norm": 0.9366573260467199, | |
| "learning_rate": 6.3147265080103405e-06, | |
| "loss": 0.0473, | |
| "step": 442, | |
| "ts_encoder_learning_rate": 6.299255277338265e-06 | |
| }, | |
| { | |
| "epoch": 1.1660762830546174, | |
| "grad_norm": 6.839529381634898, | |
| "learning_rate": 6.299255277338265e-06, | |
| "loss": 0.1152, | |
| "step": 443, | |
| "ts_encoder_learning_rate": 6.283770694807983e-06 | |
| }, | |
| { | |
| "epoch": 1.1687124145316747, | |
| "grad_norm": 6.3594435572609305, | |
| "learning_rate": 6.283770694807983e-06, | |
| "loss": 0.093, | |
| "step": 444, | |
| "ts_encoder_learning_rate": 6.268272919547537e-06 | |
| }, | |
| { | |
| "epoch": 1.171348546008732, | |
| "grad_norm": 40.08684333591222, | |
| "learning_rate": 6.268272919547537e-06, | |
| "loss": 0.0962, | |
| "step": 445, | |
| "ts_encoder_learning_rate": 6.252762110820548e-06 | |
| }, | |
| { | |
| "epoch": 1.1739846774857896, | |
| "grad_norm": 2.943023237687571, | |
| "learning_rate": 6.252762110820548e-06, | |
| "loss": 0.0901, | |
| "step": 446, | |
| "ts_encoder_learning_rate": 6.237238428024573e-06 | |
| }, | |
| { | |
| "epoch": 1.176620808962847, | |
| "grad_norm": 2.3992218806525445, | |
| "learning_rate": 6.237238428024573e-06, | |
| "loss": 0.0862, | |
| "step": 447, | |
| "ts_encoder_learning_rate": 6.2217020306894705e-06 | |
| }, | |
| { | |
| "epoch": 1.1792569404399045, | |
| "grad_norm": 5.0353855751257415, | |
| "learning_rate": 6.2217020306894705e-06, | |
| "loss": 0.0668, | |
| "step": 448, | |
| "ts_encoder_learning_rate": 6.2061530784757625e-06 | |
| }, | |
| { | |
| "epoch": 1.1818930719169618, | |
| "grad_norm": 1.8328704649956604, | |
| "learning_rate": 6.2061530784757625e-06, | |
| "loss": 0.0738, | |
| "step": 449, | |
| "ts_encoder_learning_rate": 6.1905917311729915e-06 | |
| }, | |
| { | |
| "epoch": 1.1845292033940193, | |
| "grad_norm": 1.5717279092672578, | |
| "learning_rate": 6.1905917311729915e-06, | |
| "loss": 0.0603, | |
| "step": 450, | |
| "ts_encoder_learning_rate": 6.175018148698077e-06 | |
| }, | |
| { | |
| "epoch": 1.1871653348710767, | |
| "grad_norm": 2.392989669488609, | |
| "learning_rate": 6.175018148698077e-06, | |
| "loss": 0.0524, | |
| "step": 451, | |
| "ts_encoder_learning_rate": 6.1594324910936734e-06 | |
| }, | |
| { | |
| "epoch": 1.189801466348134, | |
| "grad_norm": 1.5764592052091633, | |
| "learning_rate": 6.1594324910936734e-06, | |
| "loss": 0.0621, | |
| "step": 452, | |
| "ts_encoder_learning_rate": 6.143834918526528e-06 | |
| }, | |
| { | |
| "epoch": 1.1924375978251915, | |
| "grad_norm": 1.8238472425314327, | |
| "learning_rate": 6.143834918526528e-06, | |
| "loss": 0.0622, | |
| "step": 453, | |
| "ts_encoder_learning_rate": 6.1282255912858315e-06 | |
| }, | |
| { | |
| "epoch": 1.1950737293022489, | |
| "grad_norm": 2.8133947110243485, | |
| "learning_rate": 6.1282255912858315e-06, | |
| "loss": 0.0862, | |
| "step": 454, | |
| "ts_encoder_learning_rate": 6.112604669781572e-06 | |
| }, | |
| { | |
| "epoch": 1.1977098607793064, | |
| "grad_norm": 1.3652304574964094, | |
| "learning_rate": 6.112604669781572e-06, | |
| "loss": 0.0527, | |
| "step": 455, | |
| "ts_encoder_learning_rate": 6.096972314542889e-06 | |
| }, | |
| { | |
| "epoch": 1.2003459922563637, | |
| "grad_norm": 1.0715660143813739, | |
| "learning_rate": 6.096972314542889e-06, | |
| "loss": 0.0709, | |
| "step": 456, | |
| "ts_encoder_learning_rate": 6.0813286862164175e-06 | |
| }, | |
| { | |
| "epoch": 1.2029821237334213, | |
| "grad_norm": 1.5558524275638925, | |
| "learning_rate": 6.0813286862164175e-06, | |
| "loss": 0.0423, | |
| "step": 457, | |
| "ts_encoder_learning_rate": 6.065673945564643e-06 | |
| }, | |
| { | |
| "epoch": 1.2056182552104786, | |
| "grad_norm": 1.6176907363538375, | |
| "learning_rate": 6.065673945564643e-06, | |
| "loss": 0.0622, | |
| "step": 458, | |
| "ts_encoder_learning_rate": 6.050008253464247e-06 | |
| }, | |
| { | |
| "epoch": 1.208254386687536, | |
| "grad_norm": 0.8564869594899379, | |
| "learning_rate": 6.050008253464247e-06, | |
| "loss": 0.0455, | |
| "step": 459, | |
| "ts_encoder_learning_rate": 6.034331770904455e-06 | |
| }, | |
| { | |
| "epoch": 1.2108905181645935, | |
| "grad_norm": 1.1035352728404064, | |
| "learning_rate": 6.034331770904455e-06, | |
| "loss": 0.0563, | |
| "step": 460, | |
| "ts_encoder_learning_rate": 6.018644658985378e-06 | |
| }, | |
| { | |
| "epoch": 1.2135266496416508, | |
| "grad_norm": 1.5616128959288538, | |
| "learning_rate": 6.018644658985378e-06, | |
| "loss": 0.0539, | |
| "step": 461, | |
| "ts_encoder_learning_rate": 6.002947078916365e-06 | |
| }, | |
| { | |
| "epoch": 1.2161627811187083, | |
| "grad_norm": 1.8418037108772232, | |
| "learning_rate": 6.002947078916365e-06, | |
| "loss": 0.0659, | |
| "step": 462, | |
| "ts_encoder_learning_rate": 5.987239192014336e-06 | |
| }, | |
| { | |
| "epoch": 1.2187989125957657, | |
| "grad_norm": 1.6148182637272608, | |
| "learning_rate": 5.987239192014336e-06, | |
| "loss": 0.0486, | |
| "step": 463, | |
| "ts_encoder_learning_rate": 5.971521159702136e-06 | |
| }, | |
| { | |
| "epoch": 1.2214350440728232, | |
| "grad_norm": 1.1194159742020753, | |
| "learning_rate": 5.971521159702136e-06, | |
| "loss": 0.0517, | |
| "step": 464, | |
| "ts_encoder_learning_rate": 5.955793143506863e-06 | |
| }, | |
| { | |
| "epoch": 1.2240711755498805, | |
| "grad_norm": 2.5603301388490065, | |
| "learning_rate": 5.955793143506863e-06, | |
| "loss": 0.0792, | |
| "step": 465, | |
| "ts_encoder_learning_rate": 5.940055305058219e-06 | |
| }, | |
| { | |
| "epoch": 1.2267073070269379, | |
| "grad_norm": 2.805962948191509, | |
| "learning_rate": 5.940055305058219e-06, | |
| "loss": 0.0495, | |
| "step": 466, | |
| "ts_encoder_learning_rate": 5.9243078060868445e-06 | |
| }, | |
| { | |
| "epoch": 1.2293434385039954, | |
| "grad_norm": 1.1698319176717191, | |
| "learning_rate": 5.9243078060868445e-06, | |
| "loss": 0.0469, | |
| "step": 467, | |
| "ts_encoder_learning_rate": 5.908550808422656e-06 | |
| }, | |
| { | |
| "epoch": 1.2319795699810527, | |
| "grad_norm": 2.0037504801434176, | |
| "learning_rate": 5.908550808422656e-06, | |
| "loss": 0.0861, | |
| "step": 468, | |
| "ts_encoder_learning_rate": 5.892784473993184e-06 | |
| }, | |
| { | |
| "epoch": 1.2346157014581103, | |
| "grad_norm": 1.723869314498301, | |
| "learning_rate": 5.892784473993184e-06, | |
| "loss": 0.0593, | |
| "step": 469, | |
| "ts_encoder_learning_rate": 5.877008964821909e-06 | |
| }, | |
| { | |
| "epoch": 1.2372518329351676, | |
| "grad_norm": 1.6055066461843874, | |
| "learning_rate": 5.877008964821909e-06, | |
| "loss": 0.0626, | |
| "step": 470, | |
| "ts_encoder_learning_rate": 5.861224443026595e-06 | |
| }, | |
| { | |
| "epoch": 1.2398879644122252, | |
| "grad_norm": 1.5661164699031636, | |
| "learning_rate": 5.861224443026595e-06, | |
| "loss": 0.0602, | |
| "step": 471, | |
| "ts_encoder_learning_rate": 5.845431070817627e-06 | |
| }, | |
| { | |
| "epoch": 1.2425240958892825, | |
| "grad_norm": 1.0282805477784254, | |
| "learning_rate": 5.845431070817627e-06, | |
| "loss": 0.0524, | |
| "step": 472, | |
| "ts_encoder_learning_rate": 5.82962901049634e-06 | |
| }, | |
| { | |
| "epoch": 1.2451602273663398, | |
| "grad_norm": 1.7894572815574583, | |
| "learning_rate": 5.82962901049634e-06, | |
| "loss": 0.0571, | |
| "step": 473, | |
| "ts_encoder_learning_rate": 5.8138184244533516e-06 | |
| }, | |
| { | |
| "epoch": 1.2477963588433973, | |
| "grad_norm": 1.06562374064391, | |
| "learning_rate": 5.8138184244533516e-06, | |
| "loss": 0.057, | |
| "step": 474, | |
| "ts_encoder_learning_rate": 5.797999475166897e-06 | |
| }, | |
| { | |
| "epoch": 1.2504324903204547, | |
| "grad_norm": 2.604409235331405, | |
| "learning_rate": 5.797999475166897e-06, | |
| "loss": 0.0704, | |
| "step": 475, | |
| "ts_encoder_learning_rate": 5.782172325201155e-06 | |
| }, | |
| { | |
| "epoch": 1.2530686217975122, | |
| "grad_norm": 1.0601741954985433, | |
| "learning_rate": 5.782172325201155e-06, | |
| "loss": 0.0519, | |
| "step": 476, | |
| "ts_encoder_learning_rate": 5.766337137204579e-06 | |
| }, | |
| { | |
| "epoch": 1.2557047532745695, | |
| "grad_norm": 1.4951262075969727, | |
| "learning_rate": 5.766337137204579e-06, | |
| "loss": 0.0649, | |
| "step": 477, | |
| "ts_encoder_learning_rate": 5.7504940739082305e-06 | |
| }, | |
| { | |
| "epoch": 1.258340884751627, | |
| "grad_norm": 2.0377946241283964, | |
| "learning_rate": 5.7504940739082305e-06, | |
| "loss": 0.0709, | |
| "step": 478, | |
| "ts_encoder_learning_rate": 5.734643298124091e-06 | |
| }, | |
| { | |
| "epoch": 1.2609770162286844, | |
| "grad_norm": 1.3627445943335548, | |
| "learning_rate": 5.734643298124091e-06, | |
| "loss": 0.0597, | |
| "step": 479, | |
| "ts_encoder_learning_rate": 5.71878497274341e-06 | |
| }, | |
| { | |
| "epoch": 1.2636131477057417, | |
| "grad_norm": 1.514945622789671, | |
| "learning_rate": 5.71878497274341e-06, | |
| "loss": 0.0621, | |
| "step": 480, | |
| "ts_encoder_learning_rate": 5.702919260735015e-06 | |
| }, | |
| { | |
| "epoch": 1.2662492791827993, | |
| "grad_norm": 1.6326473802607846, | |
| "learning_rate": 5.702919260735015e-06, | |
| "loss": 0.0546, | |
| "step": 481, | |
| "ts_encoder_learning_rate": 5.6870463251436485e-06 | |
| }, | |
| { | |
| "epoch": 1.2688854106598566, | |
| "grad_norm": 1.1610484009982354, | |
| "learning_rate": 5.6870463251436485e-06, | |
| "loss": 0.0721, | |
| "step": 482, | |
| "ts_encoder_learning_rate": 5.671166329088278e-06 | |
| }, | |
| { | |
| "epoch": 1.2715215421369142, | |
| "grad_norm": 9.497222320275062, | |
| "learning_rate": 5.671166329088278e-06, | |
| "loss": 0.0622, | |
| "step": 483, | |
| "ts_encoder_learning_rate": 5.655279435760436e-06 | |
| }, | |
| { | |
| "epoch": 1.2741576736139715, | |
| "grad_norm": 2.5943554438763345, | |
| "learning_rate": 5.655279435760436e-06, | |
| "loss": 0.0611, | |
| "step": 484, | |
| "ts_encoder_learning_rate": 5.6393858084225305e-06 | |
| }, | |
| { | |
| "epoch": 1.276793805091029, | |
| "grad_norm": 1.7043776646620947, | |
| "learning_rate": 5.6393858084225305e-06, | |
| "loss": 0.0557, | |
| "step": 485, | |
| "ts_encoder_learning_rate": 5.623485610406174e-06 | |
| }, | |
| { | |
| "epoch": 1.2794299365680863, | |
| "grad_norm": 3.8980670638726176, | |
| "learning_rate": 5.623485610406174e-06, | |
| "loss": 0.0645, | |
| "step": 486, | |
| "ts_encoder_learning_rate": 5.6075790051105025e-06 | |
| }, | |
| { | |
| "epoch": 1.2820660680451437, | |
| "grad_norm": 3.6504547162269523, | |
| "learning_rate": 5.6075790051105025e-06, | |
| "loss": 0.0708, | |
| "step": 487, | |
| "ts_encoder_learning_rate": 5.5916661560004945e-06 | |
| }, | |
| { | |
| "epoch": 1.2847021995222012, | |
| "grad_norm": 5.021699015115789, | |
| "learning_rate": 5.5916661560004945e-06, | |
| "loss": 0.0595, | |
| "step": 488, | |
| "ts_encoder_learning_rate": 5.575747226605298e-06 | |
| }, | |
| { | |
| "epoch": 1.2873383309992585, | |
| "grad_norm": 1.8447348562525032, | |
| "learning_rate": 5.575747226605298e-06, | |
| "loss": 0.0692, | |
| "step": 489, | |
| "ts_encoder_learning_rate": 5.559822380516539e-06 | |
| }, | |
| { | |
| "epoch": 1.289974462476316, | |
| "grad_norm": 1.8812523455034513, | |
| "learning_rate": 5.559822380516539e-06, | |
| "loss": 0.0539, | |
| "step": 490, | |
| "ts_encoder_learning_rate": 5.543891781386655e-06 | |
| }, | |
| { | |
| "epoch": 1.2926105939533734, | |
| "grad_norm": 1.506328585661642, | |
| "learning_rate": 5.543891781386655e-06, | |
| "loss": 0.0398, | |
| "step": 491, | |
| "ts_encoder_learning_rate": 5.527955592927198e-06 | |
| }, | |
| { | |
| "epoch": 1.295246725430431, | |
| "grad_norm": 1.8603503730820898, | |
| "learning_rate": 5.527955592927198e-06, | |
| "loss": 0.0585, | |
| "step": 492, | |
| "ts_encoder_learning_rate": 5.512013978907157e-06 | |
| }, | |
| { | |
| "epoch": 1.2978828569074883, | |
| "grad_norm": 2.4648081055680846, | |
| "learning_rate": 5.512013978907157e-06, | |
| "loss": 0.058, | |
| "step": 493, | |
| "ts_encoder_learning_rate": 5.496067103151288e-06 | |
| }, | |
| { | |
| "epoch": 1.3005189883845456, | |
| "grad_norm": 1.4041012620060673, | |
| "learning_rate": 5.496067103151288e-06, | |
| "loss": 0.0443, | |
| "step": 494, | |
| "ts_encoder_learning_rate": 5.480115129538409e-06 | |
| }, | |
| { | |
| "epoch": 1.3031551198616032, | |
| "grad_norm": 1.3893300223548792, | |
| "learning_rate": 5.480115129538409e-06, | |
| "loss": 0.0433, | |
| "step": 495, | |
| "ts_encoder_learning_rate": 5.464158221999731e-06 | |
| }, | |
| { | |
| "epoch": 1.3057912513386605, | |
| "grad_norm": 1.2015614710865317, | |
| "learning_rate": 5.464158221999731e-06, | |
| "loss": 0.0483, | |
| "step": 496, | |
| "ts_encoder_learning_rate": 5.448196544517168e-06 | |
| }, | |
| { | |
| "epoch": 1.3084273828157178, | |
| "grad_norm": 1.3699932224551299, | |
| "learning_rate": 5.448196544517168e-06, | |
| "loss": 0.0476, | |
| "step": 497, | |
| "ts_encoder_learning_rate": 5.4322302611216515e-06 | |
| }, | |
| { | |
| "epoch": 1.3110635142927753, | |
| "grad_norm": 1.3403881800344724, | |
| "learning_rate": 5.4322302611216515e-06, | |
| "loss": 0.0564, | |
| "step": 498, | |
| "ts_encoder_learning_rate": 5.4162595358914475e-06 | |
| }, | |
| { | |
| "epoch": 1.313699645769833, | |
| "grad_norm": 73.58179677044741, | |
| "learning_rate": 5.4162595358914475e-06, | |
| "loss": 0.0646, | |
| "step": 499, | |
| "ts_encoder_learning_rate": 5.4002845329504675e-06 | |
| }, | |
| { | |
| "epoch": 1.3163357772468902, | |
| "grad_norm": 0.7574363377553233, | |
| "learning_rate": 5.4002845329504675e-06, | |
| "loss": 0.0572, | |
| "step": 500, | |
| "ts_encoder_learning_rate": 5.384305416466584e-06 | |
| }, | |
| { | |
| "epoch": 1.3189719087239475, | |
| "grad_norm": 9.085144033066042, | |
| "learning_rate": 5.384305416466584e-06, | |
| "loss": 0.0455, | |
| "step": 501, | |
| "ts_encoder_learning_rate": 5.368322350649942e-06 | |
| }, | |
| { | |
| "epoch": 1.321608040201005, | |
| "grad_norm": 1.5134736470746724, | |
| "learning_rate": 5.368322350649942e-06, | |
| "loss": 0.0643, | |
| "step": 502, | |
| "ts_encoder_learning_rate": 5.35233549975127e-06 | |
| }, | |
| { | |
| "epoch": 1.3242441716780624, | |
| "grad_norm": 7.310424680388469, | |
| "learning_rate": 5.35233549975127e-06, | |
| "loss": 0.061, | |
| "step": 503, | |
| "ts_encoder_learning_rate": 5.336345028060199e-06 | |
| }, | |
| { | |
| "epoch": 1.3268803031551197, | |
| "grad_norm": 1.1323514407618245, | |
| "learning_rate": 5.336345028060199e-06, | |
| "loss": 0.0549, | |
| "step": 504, | |
| "ts_encoder_learning_rate": 5.320351099903565e-06 | |
| }, | |
| { | |
| "epoch": 1.3295164346321773, | |
| "grad_norm": 1.6279841201077867, | |
| "learning_rate": 5.320351099903565e-06, | |
| "loss": 0.0613, | |
| "step": 505, | |
| "ts_encoder_learning_rate": 5.304353879643727e-06 | |
| }, | |
| { | |
| "epoch": 1.3321525661092348, | |
| "grad_norm": 1.3523619947920968, | |
| "learning_rate": 5.304353879643727e-06, | |
| "loss": 0.0472, | |
| "step": 506, | |
| "ts_encoder_learning_rate": 5.288353531676873e-06 | |
| }, | |
| { | |
| "epoch": 1.3347886975862921, | |
| "grad_norm": 1.6311287951575122, | |
| "learning_rate": 5.288353531676873e-06, | |
| "loss": 0.0449, | |
| "step": 507, | |
| "ts_encoder_learning_rate": 5.2723502204313346e-06 | |
| }, | |
| { | |
| "epoch": 1.3374248290633495, | |
| "grad_norm": 1.254954921395093, | |
| "learning_rate": 5.2723502204313346e-06, | |
| "loss": 0.0468, | |
| "step": 508, | |
| "ts_encoder_learning_rate": 5.256344110365896e-06 | |
| }, | |
| { | |
| "epoch": 1.340060960540407, | |
| "grad_norm": 1.4319071308303999, | |
| "learning_rate": 5.256344110365896e-06, | |
| "loss": 0.047, | |
| "step": 509, | |
| "ts_encoder_learning_rate": 5.240335365968104e-06 | |
| }, | |
| { | |
| "epoch": 1.3426970920174643, | |
| "grad_norm": 1.8236696801665577, | |
| "learning_rate": 5.240335365968104e-06, | |
| "loss": 0.0514, | |
| "step": 510, | |
| "ts_encoder_learning_rate": 5.224324151752575e-06 | |
| }, | |
| { | |
| "epoch": 1.3453332234945217, | |
| "grad_norm": 1.9778928788592056, | |
| "learning_rate": 5.224324151752575e-06, | |
| "loss": 0.052, | |
| "step": 511, | |
| "ts_encoder_learning_rate": 5.208310632259308e-06 | |
| }, | |
| { | |
| "epoch": 1.3479693549715792, | |
| "grad_norm": 1.417312718751431, | |
| "learning_rate": 5.208310632259308e-06, | |
| "loss": 0.0477, | |
| "step": 512, | |
| "ts_encoder_learning_rate": 5.192294972051992e-06 | |
| }, | |
| { | |
| "epoch": 1.3506054864486368, | |
| "grad_norm": 2.0176988121139523, | |
| "learning_rate": 5.192294972051992e-06, | |
| "loss": 0.0439, | |
| "step": 513, | |
| "ts_encoder_learning_rate": 5.1762773357163175e-06 | |
| }, | |
| { | |
| "epoch": 1.353241617925694, | |
| "grad_norm": 1.8355976352392431, | |
| "learning_rate": 5.1762773357163175e-06, | |
| "loss": 0.0657, | |
| "step": 514, | |
| "ts_encoder_learning_rate": 5.160257887858278e-06 | |
| }, | |
| { | |
| "epoch": 1.3558777494027514, | |
| "grad_norm": 1.6689128377623292, | |
| "learning_rate": 5.160257887858278e-06, | |
| "loss": 0.0557, | |
| "step": 515, | |
| "ts_encoder_learning_rate": 5.144236793102485e-06 | |
| }, | |
| { | |
| "epoch": 1.358513880879809, | |
| "grad_norm": 1.8005326862964846, | |
| "learning_rate": 5.144236793102485e-06, | |
| "loss": 0.0538, | |
| "step": 516, | |
| "ts_encoder_learning_rate": 5.128214216090478e-06 | |
| }, | |
| { | |
| "epoch": 1.3611500123568663, | |
| "grad_norm": 1.633570103313834, | |
| "learning_rate": 5.128214216090478e-06, | |
| "loss": 0.0448, | |
| "step": 517, | |
| "ts_encoder_learning_rate": 5.112190321479026e-06 | |
| }, | |
| { | |
| "epoch": 1.3637861438339236, | |
| "grad_norm": 1.207340276387567, | |
| "learning_rate": 5.112190321479026e-06, | |
| "loss": 0.0418, | |
| "step": 518, | |
| "ts_encoder_learning_rate": 5.0961652739384356e-06 | |
| }, | |
| { | |
| "epoch": 1.3664222753109811, | |
| "grad_norm": 0.8605880311313963, | |
| "learning_rate": 5.0961652739384356e-06, | |
| "loss": 0.051, | |
| "step": 519, | |
| "ts_encoder_learning_rate": 5.080139238150869e-06 | |
| }, | |
| { | |
| "epoch": 1.3690584067880385, | |
| "grad_norm": 1.0552622999396024, | |
| "learning_rate": 5.080139238150869e-06, | |
| "loss": 0.0545, | |
| "step": 520, | |
| "ts_encoder_learning_rate": 5.064112378808636e-06 | |
| }, | |
| { | |
| "epoch": 1.371694538265096, | |
| "grad_norm": 1.7431125427812204, | |
| "learning_rate": 5.064112378808636e-06, | |
| "loss": 0.0544, | |
| "step": 521, | |
| "ts_encoder_learning_rate": 5.048084860612516e-06 | |
| }, | |
| { | |
| "epoch": 1.3743306697421533, | |
| "grad_norm": 1.4698730704414043, | |
| "learning_rate": 5.048084860612516e-06, | |
| "loss": 0.0639, | |
| "step": 522, | |
| "ts_encoder_learning_rate": 5.032056848270056e-06 | |
| }, | |
| { | |
| "epoch": 1.376966801219211, | |
| "grad_norm": 1.8450236243661535, | |
| "learning_rate": 5.032056848270056e-06, | |
| "loss": 0.0467, | |
| "step": 523, | |
| "ts_encoder_learning_rate": 5.016028506493881e-06 | |
| }, | |
| { | |
| "epoch": 1.3796029326962682, | |
| "grad_norm": 2.052864394366136, | |
| "learning_rate": 5.016028506493881e-06, | |
| "loss": 0.0491, | |
| "step": 524, | |
| "ts_encoder_learning_rate": 5e-06 | |
| }, | |
| { | |
| "epoch": 1.3822390641733255, | |
| "grad_norm": 1.5026027017438155, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0464, | |
| "step": 525, | |
| "ts_encoder_learning_rate": 4.9839714935061215e-06 | |
| }, | |
| { | |
| "epoch": 1.384875195650383, | |
| "grad_norm": 1.153030381312369, | |
| "learning_rate": 4.9839714935061215e-06, | |
| "loss": 0.0419, | |
| "step": 526, | |
| "ts_encoder_learning_rate": 4.967943151729945e-06 | |
| }, | |
| { | |
| "epoch": 1.3875113271274404, | |
| "grad_norm": 1.4490249141706013, | |
| "learning_rate": 4.967943151729945e-06, | |
| "loss": 0.0603, | |
| "step": 527, | |
| "ts_encoder_learning_rate": 4.951915139387484e-06 | |
| }, | |
| { | |
| "epoch": 1.390147458604498, | |
| "grad_norm": 1.4564680397959866, | |
| "learning_rate": 4.951915139387484e-06, | |
| "loss": 0.0477, | |
| "step": 528, | |
| "ts_encoder_learning_rate": 4.935887621191364e-06 | |
| }, | |
| { | |
| "epoch": 1.3927835900815553, | |
| "grad_norm": 1.3964892258738686, | |
| "learning_rate": 4.935887621191364e-06, | |
| "loss": 0.0528, | |
| "step": 529, | |
| "ts_encoder_learning_rate": 4.919860761849132e-06 | |
| }, | |
| { | |
| "epoch": 1.3954197215586128, | |
| "grad_norm": 6.281977322231947, | |
| "learning_rate": 4.919860761849132e-06, | |
| "loss": 0.0548, | |
| "step": 530, | |
| "ts_encoder_learning_rate": 4.903834726061565e-06 | |
| }, | |
| { | |
| "epoch": 1.3980558530356701, | |
| "grad_norm": 1.3837480562779865, | |
| "learning_rate": 4.903834726061565e-06, | |
| "loss": 0.0455, | |
| "step": 531, | |
| "ts_encoder_learning_rate": 4.887809678520976e-06 | |
| }, | |
| { | |
| "epoch": 1.4006919845127275, | |
| "grad_norm": 1.0021501716084598, | |
| "learning_rate": 4.887809678520976e-06, | |
| "loss": 0.0494, | |
| "step": 532, | |
| "ts_encoder_learning_rate": 4.871785783909523e-06 | |
| }, | |
| { | |
| "epoch": 1.403328115989785, | |
| "grad_norm": 1.151715988041567, | |
| "learning_rate": 4.871785783909523e-06, | |
| "loss": 0.0433, | |
| "step": 533, | |
| "ts_encoder_learning_rate": 4.855763206897516e-06 | |
| }, | |
| { | |
| "epoch": 1.4059642474668423, | |
| "grad_norm": 0.858749288851531, | |
| "learning_rate": 4.855763206897516e-06, | |
| "loss": 0.0362, | |
| "step": 534, | |
| "ts_encoder_learning_rate": 4.839742112141725e-06 | |
| }, | |
| { | |
| "epoch": 1.4086003789439, | |
| "grad_norm": 1.9221750234266837, | |
| "learning_rate": 4.839742112141725e-06, | |
| "loss": 0.0552, | |
| "step": 535, | |
| "ts_encoder_learning_rate": 4.823722664283684e-06 | |
| }, | |
| { | |
| "epoch": 1.4112365104209572, | |
| "grad_norm": 2.0598775899946347, | |
| "learning_rate": 4.823722664283684e-06, | |
| "loss": 0.0612, | |
| "step": 536, | |
| "ts_encoder_learning_rate": 4.807705027948008e-06 | |
| }, | |
| { | |
| "epoch": 1.4138726418980148, | |
| "grad_norm": 11.902681310300874, | |
| "learning_rate": 4.807705027948008e-06, | |
| "loss": 0.0572, | |
| "step": 537, | |
| "ts_encoder_learning_rate": 4.7916893677406925e-06 | |
| }, | |
| { | |
| "epoch": 1.416508773375072, | |
| "grad_norm": 1.4839974850899151, | |
| "learning_rate": 4.7916893677406925e-06, | |
| "loss": 0.0671, | |
| "step": 538, | |
| "ts_encoder_learning_rate": 4.775675848247427e-06 | |
| }, | |
| { | |
| "epoch": 1.4191449048521294, | |
| "grad_norm": 3.177751373971005, | |
| "learning_rate": 4.775675848247427e-06, | |
| "loss": 0.0592, | |
| "step": 539, | |
| "ts_encoder_learning_rate": 4.759664634031897e-06 | |
| }, | |
| { | |
| "epoch": 1.421781036329187, | |
| "grad_norm": 2.8690250587273494, | |
| "learning_rate": 4.759664634031897e-06, | |
| "loss": 0.0562, | |
| "step": 540, | |
| "ts_encoder_learning_rate": 4.743655889634105e-06 | |
| }, | |
| { | |
| "epoch": 1.4244171678062443, | |
| "grad_norm": 2.257175666571135, | |
| "learning_rate": 4.743655889634105e-06, | |
| "loss": 0.0629, | |
| "step": 541, | |
| "ts_encoder_learning_rate": 4.727649779568666e-06 | |
| }, | |
| { | |
| "epoch": 1.4270532992833018, | |
| "grad_norm": 1.276938190549169, | |
| "learning_rate": 4.727649779568666e-06, | |
| "loss": 0.0492, | |
| "step": 542, | |
| "ts_encoder_learning_rate": 4.711646468323129e-06 | |
| }, | |
| { | |
| "epoch": 1.4296894307603591, | |
| "grad_norm": 1.2256191207359708, | |
| "learning_rate": 4.711646468323129e-06, | |
| "loss": 0.0411, | |
| "step": 543, | |
| "ts_encoder_learning_rate": 4.695646120356275e-06 | |
| }, | |
| { | |
| "epoch": 1.4323255622374167, | |
| "grad_norm": 1.607726895519541, | |
| "learning_rate": 4.695646120356275e-06, | |
| "loss": 0.0394, | |
| "step": 544, | |
| "ts_encoder_learning_rate": 4.679648900096436e-06 | |
| }, | |
| { | |
| "epoch": 1.434961693714474, | |
| "grad_norm": 5.4916831132826704, | |
| "learning_rate": 4.679648900096436e-06, | |
| "loss": 0.0593, | |
| "step": 545, | |
| "ts_encoder_learning_rate": 4.663654971939802e-06 | |
| }, | |
| { | |
| "epoch": 1.4375978251915313, | |
| "grad_norm": 1.1834633367325678, | |
| "learning_rate": 4.663654971939802e-06, | |
| "loss": 0.0461, | |
| "step": 546, | |
| "ts_encoder_learning_rate": 4.64766450024873e-06 | |
| }, | |
| { | |
| "epoch": 1.4402339566685889, | |
| "grad_norm": 1.0570788406545149, | |
| "learning_rate": 4.64766450024873e-06, | |
| "loss": 0.0521, | |
| "step": 547, | |
| "ts_encoder_learning_rate": 4.6316776493500615e-06 | |
| }, | |
| { | |
| "epoch": 1.4428700881456462, | |
| "grad_norm": 1.519805292714776, | |
| "learning_rate": 4.6316776493500615e-06, | |
| "loss": 0.056, | |
| "step": 548, | |
| "ts_encoder_learning_rate": 4.615694583533418e-06 | |
| }, | |
| { | |
| "epoch": 1.4455062196227038, | |
| "grad_norm": 1.8886920313289481, | |
| "learning_rate": 4.615694583533418e-06, | |
| "loss": 0.059, | |
| "step": 549, | |
| "ts_encoder_learning_rate": 4.599715467049534e-06 | |
| }, | |
| { | |
| "epoch": 1.448142351099761, | |
| "grad_norm": 1.2634550829904962, | |
| "learning_rate": 4.599715467049534e-06, | |
| "loss": 0.0433, | |
| "step": 550, | |
| "ts_encoder_learning_rate": 4.583740464108554e-06 | |
| }, | |
| { | |
| "epoch": 1.4507784825768186, | |
| "grad_norm": 1.8049707104769945, | |
| "learning_rate": 4.583740464108554e-06, | |
| "loss": 0.0627, | |
| "step": 551, | |
| "ts_encoder_learning_rate": 4.56776973887835e-06 | |
| }, | |
| { | |
| "epoch": 1.453414614053876, | |
| "grad_norm": 1.9485167870885407, | |
| "learning_rate": 4.56776973887835e-06, | |
| "loss": 0.0469, | |
| "step": 552, | |
| "ts_encoder_learning_rate": 4.551803455482833e-06 | |
| }, | |
| { | |
| "epoch": 1.4560507455309333, | |
| "grad_norm": 1.267286074272727, | |
| "learning_rate": 4.551803455482833e-06, | |
| "loss": 0.0619, | |
| "step": 553, | |
| "ts_encoder_learning_rate": 4.53584177800027e-06 | |
| }, | |
| { | |
| "epoch": 1.4586868770079908, | |
| "grad_norm": 1.3504185426583042, | |
| "learning_rate": 4.53584177800027e-06, | |
| "loss": 0.0533, | |
| "step": 554, | |
| "ts_encoder_learning_rate": 4.5198848704615915e-06 | |
| }, | |
| { | |
| "epoch": 1.4613230084850481, | |
| "grad_norm": 1.0200226065433153, | |
| "learning_rate": 4.5198848704615915e-06, | |
| "loss": 0.0516, | |
| "step": 555, | |
| "ts_encoder_learning_rate": 4.503932896848713e-06 | |
| }, | |
| { | |
| "epoch": 1.4639591399621055, | |
| "grad_norm": 1.459926252613376, | |
| "learning_rate": 4.503932896848713e-06, | |
| "loss": 0.0544, | |
| "step": 556, | |
| "ts_encoder_learning_rate": 4.487986021092844e-06 | |
| }, | |
| { | |
| "epoch": 1.466595271439163, | |
| "grad_norm": 1.044163195089025, | |
| "learning_rate": 4.487986021092844e-06, | |
| "loss": 0.0464, | |
| "step": 557, | |
| "ts_encoder_learning_rate": 4.472044407072805e-06 | |
| }, | |
| { | |
| "epoch": 1.4692314029162206, | |
| "grad_norm": 1.667493213165678, | |
| "learning_rate": 4.472044407072805e-06, | |
| "loss": 0.0675, | |
| "step": 558, | |
| "ts_encoder_learning_rate": 4.456108218613346e-06 | |
| }, | |
| { | |
| "epoch": 1.4718675343932779, | |
| "grad_norm": 2.446693967731953, | |
| "learning_rate": 4.456108218613346e-06, | |
| "loss": 0.0499, | |
| "step": 559, | |
| "ts_encoder_learning_rate": 4.4401776194834615e-06 | |
| }, | |
| { | |
| "epoch": 1.4745036658703352, | |
| "grad_norm": 1.1699696309583159, | |
| "learning_rate": 4.4401776194834615e-06, | |
| "loss": 0.0542, | |
| "step": 560, | |
| "ts_encoder_learning_rate": 4.424252773394704e-06 | |
| }, | |
| { | |
| "epoch": 1.4771397973473928, | |
| "grad_norm": 1.1962425478415217, | |
| "learning_rate": 4.424252773394704e-06, | |
| "loss": 0.0462, | |
| "step": 561, | |
| "ts_encoder_learning_rate": 4.408333843999506e-06 | |
| }, | |
| { | |
| "epoch": 1.47977592882445, | |
| "grad_norm": 2.3242298710819758, | |
| "learning_rate": 4.408333843999506e-06, | |
| "loss": 0.0491, | |
| "step": 562, | |
| "ts_encoder_learning_rate": 4.392420994889498e-06 | |
| }, | |
| { | |
| "epoch": 1.4824120603015074, | |
| "grad_norm": 2.1023818406064643, | |
| "learning_rate": 4.392420994889498e-06, | |
| "loss": 0.0496, | |
| "step": 563, | |
| "ts_encoder_learning_rate": 4.376514389593827e-06 | |
| }, | |
| { | |
| "epoch": 1.485048191778565, | |
| "grad_norm": 2.4457184111852723, | |
| "learning_rate": 4.376514389593827e-06, | |
| "loss": 0.0524, | |
| "step": 564, | |
| "ts_encoder_learning_rate": 4.3606141915774695e-06 | |
| }, | |
| { | |
| "epoch": 1.4876843232556225, | |
| "grad_norm": 1.3302734845105864, | |
| "learning_rate": 4.3606141915774695e-06, | |
| "loss": 0.0504, | |
| "step": 565, | |
| "ts_encoder_learning_rate": 4.344720564239567e-06 | |
| }, | |
| { | |
| "epoch": 1.4903204547326798, | |
| "grad_norm": 1.1082526743380932, | |
| "learning_rate": 4.344720564239567e-06, | |
| "loss": 0.0481, | |
| "step": 566, | |
| "ts_encoder_learning_rate": 4.3288336709117246e-06 | |
| }, | |
| { | |
| "epoch": 1.4929565862097371, | |
| "grad_norm": 2.427309688292863, | |
| "learning_rate": 4.3288336709117246e-06, | |
| "loss": 0.0511, | |
| "step": 567, | |
| "ts_encoder_learning_rate": 4.312953674856355e-06 | |
| }, | |
| { | |
| "epoch": 1.4955927176867947, | |
| "grad_norm": 1.7521878257146914, | |
| "learning_rate": 4.312953674856355e-06, | |
| "loss": 0.0508, | |
| "step": 568, | |
| "ts_encoder_learning_rate": 4.297080739264987e-06 | |
| }, | |
| { | |
| "epoch": 1.498228849163852, | |
| "grad_norm": 1.8035517353788895, | |
| "learning_rate": 4.297080739264987e-06, | |
| "loss": 0.0468, | |
| "step": 569, | |
| "ts_encoder_learning_rate": 4.281215027256592e-06 | |
| }, | |
| { | |
| "epoch": 1.5008649806409093, | |
| "grad_norm": 1.3680011804510284, | |
| "learning_rate": 4.281215027256592e-06, | |
| "loss": 0.0615, | |
| "step": 570, | |
| "ts_encoder_learning_rate": 4.265356701875911e-06 | |
| }, | |
| { | |
| "epoch": 1.5035011121179669, | |
| "grad_norm": 1.0277278297390235, | |
| "learning_rate": 4.265356701875911e-06, | |
| "loss": 0.0392, | |
| "step": 571, | |
| "ts_encoder_learning_rate": 4.249505926091771e-06 | |
| }, | |
| { | |
| "epoch": 1.5061372435950244, | |
| "grad_norm": 1.5675045465594473, | |
| "learning_rate": 4.249505926091771e-06, | |
| "loss": 0.0511, | |
| "step": 572, | |
| "ts_encoder_learning_rate": 4.23366286279542e-06 | |
| }, | |
| { | |
| "epoch": 1.5087733750720818, | |
| "grad_norm": 2.516079496477877, | |
| "learning_rate": 4.23366286279542e-06, | |
| "loss": 0.0529, | |
| "step": 573, | |
| "ts_encoder_learning_rate": 4.217827674798845e-06 | |
| }, | |
| { | |
| "epoch": 1.511409506549139, | |
| "grad_norm": 2.7086489547850854, | |
| "learning_rate": 4.217827674798845e-06, | |
| "loss": 0.0443, | |
| "step": 574, | |
| "ts_encoder_learning_rate": 4.2020005248331056e-06 | |
| }, | |
| { | |
| "epoch": 1.5140456380261966, | |
| "grad_norm": 1.5770057283903636, | |
| "learning_rate": 4.2020005248331056e-06, | |
| "loss": 0.0534, | |
| "step": 575, | |
| "ts_encoder_learning_rate": 4.186181575546651e-06 | |
| }, | |
| { | |
| "epoch": 1.516681769503254, | |
| "grad_norm": 0.9542576979843979, | |
| "learning_rate": 4.186181575546651e-06, | |
| "loss": 0.0445, | |
| "step": 576, | |
| "ts_encoder_learning_rate": 4.170370989503662e-06 | |
| }, | |
| { | |
| "epoch": 1.5193179009803113, | |
| "grad_norm": 1.033126540105532, | |
| "learning_rate": 4.170370989503662e-06, | |
| "loss": 0.0436, | |
| "step": 577, | |
| "ts_encoder_learning_rate": 4.154568929182374e-06 | |
| }, | |
| { | |
| "epoch": 1.5219540324573688, | |
| "grad_norm": 1.3759475573506135, | |
| "learning_rate": 4.154568929182374e-06, | |
| "loss": 0.0471, | |
| "step": 578, | |
| "ts_encoder_learning_rate": 4.138775556973406e-06 | |
| }, | |
| { | |
| "epoch": 1.5245901639344264, | |
| "grad_norm": 1.715702326116523, | |
| "learning_rate": 4.138775556973406e-06, | |
| "loss": 0.0553, | |
| "step": 579, | |
| "ts_encoder_learning_rate": 4.122991035178093e-06 | |
| }, | |
| { | |
| "epoch": 1.5272262954114837, | |
| "grad_norm": 1.2802507956632132, | |
| "learning_rate": 4.122991035178093e-06, | |
| "loss": 0.0646, | |
| "step": 580, | |
| "ts_encoder_learning_rate": 4.107215526006818e-06 | |
| }, | |
| { | |
| "epoch": 1.529862426888541, | |
| "grad_norm": 1.2923645458952004, | |
| "learning_rate": 4.107215526006818e-06, | |
| "loss": 0.0589, | |
| "step": 581, | |
| "ts_encoder_learning_rate": 4.091449191577346e-06 | |
| }, | |
| { | |
| "epoch": 1.5324985583655986, | |
| "grad_norm": 1.275873998852951, | |
| "learning_rate": 4.091449191577346e-06, | |
| "loss": 0.051, | |
| "step": 582, | |
| "ts_encoder_learning_rate": 4.075692193913156e-06 | |
| }, | |
| { | |
| "epoch": 1.5351346898426559, | |
| "grad_norm": 1.5143761646518399, | |
| "learning_rate": 4.075692193913156e-06, | |
| "loss": 0.0422, | |
| "step": 583, | |
| "ts_encoder_learning_rate": 4.059944694941783e-06 | |
| }, | |
| { | |
| "epoch": 1.5377708213197132, | |
| "grad_norm": 1.2547966531700192, | |
| "learning_rate": 4.059944694941783e-06, | |
| "loss": 0.0645, | |
| "step": 584, | |
| "ts_encoder_learning_rate": 4.04420685649314e-06 | |
| }, | |
| { | |
| "epoch": 1.5404069527967708, | |
| "grad_norm": 1.7483804817058275, | |
| "learning_rate": 4.04420685649314e-06, | |
| "loss": 0.0446, | |
| "step": 585, | |
| "ts_encoder_learning_rate": 4.028478840297867e-06 | |
| }, | |
| { | |
| "epoch": 1.5430430842738283, | |
| "grad_norm": 1.863160809736345, | |
| "learning_rate": 4.028478840297867e-06, | |
| "loss": 0.0556, | |
| "step": 586, | |
| "ts_encoder_learning_rate": 4.012760807985665e-06 | |
| }, | |
| { | |
| "epoch": 1.5456792157508856, | |
| "grad_norm": 1.3419436863472527, | |
| "learning_rate": 4.012760807985665e-06, | |
| "loss": 0.05, | |
| "step": 587, | |
| "ts_encoder_learning_rate": 3.997052921083637e-06 | |
| }, | |
| { | |
| "epoch": 1.548315347227943, | |
| "grad_norm": 1.1909742103770653, | |
| "learning_rate": 3.997052921083637e-06, | |
| "loss": 0.0445, | |
| "step": 588, | |
| "ts_encoder_learning_rate": 3.9813553410146225e-06 | |
| }, | |
| { | |
| "epoch": 1.5509514787050005, | |
| "grad_norm": 1.0564200056890343, | |
| "learning_rate": 3.9813553410146225e-06, | |
| "loss": 0.0665, | |
| "step": 589, | |
| "ts_encoder_learning_rate": 3.965668229095546e-06 | |
| }, | |
| { | |
| "epoch": 1.5535876101820578, | |
| "grad_norm": 6.866040081379579, | |
| "learning_rate": 3.965668229095546e-06, | |
| "loss": 0.054, | |
| "step": 590, | |
| "ts_encoder_learning_rate": 3.949991746535753e-06 | |
| }, | |
| { | |
| "epoch": 1.5562237416591151, | |
| "grad_norm": 1.584656026556296, | |
| "learning_rate": 3.949991746535753e-06, | |
| "loss": 0.0476, | |
| "step": 591, | |
| "ts_encoder_learning_rate": 3.934326054435358e-06 | |
| }, | |
| { | |
| "epoch": 1.5588598731361727, | |
| "grad_norm": 1.3961799847840088, | |
| "learning_rate": 3.934326054435358e-06, | |
| "loss": 0.0534, | |
| "step": 592, | |
| "ts_encoder_learning_rate": 3.918671313783583e-06 | |
| }, | |
| { | |
| "epoch": 1.5614960046132302, | |
| "grad_norm": 0.8651238980949738, | |
| "learning_rate": 3.918671313783583e-06, | |
| "loss": 0.0465, | |
| "step": 593, | |
| "ts_encoder_learning_rate": 3.903027685457112e-06 | |
| }, | |
| { | |
| "epoch": 1.5641321360902873, | |
| "grad_norm": 1.4641882274691738, | |
| "learning_rate": 3.903027685457112e-06, | |
| "loss": 0.0382, | |
| "step": 594, | |
| "ts_encoder_learning_rate": 3.887395330218429e-06 | |
| }, | |
| { | |
| "epoch": 1.5667682675673449, | |
| "grad_norm": 1.312732235302505, | |
| "learning_rate": 3.887395330218429e-06, | |
| "loss": 0.0434, | |
| "step": 595, | |
| "ts_encoder_learning_rate": 3.87177440871417e-06 | |
| }, | |
| { | |
| "epoch": 1.5694043990444024, | |
| "grad_norm": 1.082210100085236, | |
| "learning_rate": 3.87177440871417e-06, | |
| "loss": 0.0455, | |
| "step": 596, | |
| "ts_encoder_learning_rate": 3.856165081473474e-06 | |
| }, | |
| { | |
| "epoch": 1.5720405305214598, | |
| "grad_norm": 1.1636550431612551, | |
| "learning_rate": 3.856165081473474e-06, | |
| "loss": 0.0521, | |
| "step": 597, | |
| "ts_encoder_learning_rate": 3.840567508906328e-06 | |
| }, | |
| { | |
| "epoch": 1.574676661998517, | |
| "grad_norm": 0.9722535818497965, | |
| "learning_rate": 3.840567508906328e-06, | |
| "loss": 0.0479, | |
| "step": 598, | |
| "ts_encoder_learning_rate": 3.824981851301924e-06 | |
| }, | |
| { | |
| "epoch": 1.5773127934755746, | |
| "grad_norm": 1.0986855549101457, | |
| "learning_rate": 3.824981851301924e-06, | |
| "loss": 0.0444, | |
| "step": 599, | |
| "ts_encoder_learning_rate": 3.809408268827009e-06 | |
| }, | |
| { | |
| "epoch": 1.5799489249526322, | |
| "grad_norm": 3.8382139919055662, | |
| "learning_rate": 3.809408268827009e-06, | |
| "loss": 0.0481, | |
| "step": 600, | |
| "ts_encoder_learning_rate": 3.7938469215242374e-06 | |
| }, | |
| { | |
| "epoch": 1.5825850564296893, | |
| "grad_norm": 0.8061311507375318, | |
| "learning_rate": 3.7938469215242374e-06, | |
| "loss": 0.0556, | |
| "step": 601, | |
| "ts_encoder_learning_rate": 3.778297969310529e-06 | |
| }, | |
| { | |
| "epoch": 1.5852211879067468, | |
| "grad_norm": 1.1509891000241945, | |
| "learning_rate": 3.778297969310529e-06, | |
| "loss": 0.0474, | |
| "step": 602, | |
| "ts_encoder_learning_rate": 3.7627615719754294e-06 | |
| }, | |
| { | |
| "epoch": 1.5878573193838044, | |
| "grad_norm": 0.7746498139881461, | |
| "learning_rate": 3.7627615719754294e-06, | |
| "loss": 0.0495, | |
| "step": 603, | |
| "ts_encoder_learning_rate": 3.7472378891794537e-06 | |
| }, | |
| { | |
| "epoch": 1.5904934508608617, | |
| "grad_norm": 1.1470163275979566, | |
| "learning_rate": 3.7472378891794537e-06, | |
| "loss": 0.035, | |
| "step": 604, | |
| "ts_encoder_learning_rate": 3.731727080452464e-06 | |
| }, | |
| { | |
| "epoch": 1.593129582337919, | |
| "grad_norm": 1.0156962254575825, | |
| "learning_rate": 3.731727080452464e-06, | |
| "loss": 0.0511, | |
| "step": 605, | |
| "ts_encoder_learning_rate": 3.7162293051920185e-06 | |
| }, | |
| { | |
| "epoch": 1.5957657138149766, | |
| "grad_norm": 0.6709332984734167, | |
| "learning_rate": 3.7162293051920185e-06, | |
| "loss": 0.0381, | |
| "step": 606, | |
| "ts_encoder_learning_rate": 3.7007447226617367e-06 | |
| }, | |
| { | |
| "epoch": 1.5984018452920339, | |
| "grad_norm": 1.0740219504440236, | |
| "learning_rate": 3.7007447226617367e-06, | |
| "loss": 0.0379, | |
| "step": 607, | |
| "ts_encoder_learning_rate": 3.685273491989661e-06 | |
| }, | |
| { | |
| "epoch": 1.6010379767690912, | |
| "grad_norm": 1.0725681809883107, | |
| "learning_rate": 3.685273491989661e-06, | |
| "loss": 0.0405, | |
| "step": 608, | |
| "ts_encoder_learning_rate": 3.669815772166625e-06 | |
| }, | |
| { | |
| "epoch": 1.6036741082461488, | |
| "grad_norm": 1.0834624903360521, | |
| "learning_rate": 3.669815772166625e-06, | |
| "loss": 0.0355, | |
| "step": 609, | |
| "ts_encoder_learning_rate": 3.654371722044616e-06 | |
| }, | |
| { | |
| "epoch": 1.6063102397232063, | |
| "grad_norm": 1.3539103412709967, | |
| "learning_rate": 3.654371722044616e-06, | |
| "loss": 0.0367, | |
| "step": 610, | |
| "ts_encoder_learning_rate": 3.638941500335145e-06 | |
| }, | |
| { | |
| "epoch": 1.6089463712002636, | |
| "grad_norm": 1.73704351662127, | |
| "learning_rate": 3.638941500335145e-06, | |
| "loss": 0.0357, | |
| "step": 611, | |
| "ts_encoder_learning_rate": 3.6235252656076138e-06 | |
| }, | |
| { | |
| "epoch": 1.611582502677321, | |
| "grad_norm": 1.0546272097596496, | |
| "learning_rate": 3.6235252656076138e-06, | |
| "loss": 0.0513, | |
| "step": 612, | |
| "ts_encoder_learning_rate": 3.608123176287685e-06 | |
| }, | |
| { | |
| "epoch": 1.6142186341543785, | |
| "grad_norm": 1.1384580684741372, | |
| "learning_rate": 3.608123176287685e-06, | |
| "loss": 0.0492, | |
| "step": 613, | |
| "ts_encoder_learning_rate": 3.5927353906556583e-06 | |
| }, | |
| { | |
| "epoch": 1.6168547656314358, | |
| "grad_norm": 1.0408680243087836, | |
| "learning_rate": 3.5927353906556583e-06, | |
| "loss": 0.0493, | |
| "step": 614, | |
| "ts_encoder_learning_rate": 3.5773620668448384e-06 | |
| }, | |
| { | |
| "epoch": 1.6194908971084931, | |
| "grad_norm": 1.2408867536402586, | |
| "learning_rate": 3.5773620668448384e-06, | |
| "loss": 0.0477, | |
| "step": 615, | |
| "ts_encoder_learning_rate": 3.562003362839914e-06 | |
| }, | |
| { | |
| "epoch": 1.6221270285855507, | |
| "grad_norm": 0.9012275494297888, | |
| "learning_rate": 3.562003362839914e-06, | |
| "loss": 0.0374, | |
| "step": 616, | |
| "ts_encoder_learning_rate": 3.5466594364753325e-06 | |
| }, | |
| { | |
| "epoch": 1.6247631600626082, | |
| "grad_norm": 1.542557802018261, | |
| "learning_rate": 3.5466594364753325e-06, | |
| "loss": 0.0563, | |
| "step": 617, | |
| "ts_encoder_learning_rate": 3.531330445433677e-06 | |
| }, | |
| { | |
| "epoch": 1.6273992915396656, | |
| "grad_norm": 1.3600810197945168, | |
| "learning_rate": 3.531330445433677e-06, | |
| "loss": 0.0508, | |
| "step": 618, | |
| "ts_encoder_learning_rate": 3.516016547244047e-06 | |
| }, | |
| { | |
| "epoch": 1.6300354230167229, | |
| "grad_norm": 1.1139555587082588, | |
| "learning_rate": 3.516016547244047e-06, | |
| "loss": 0.0383, | |
| "step": 619, | |
| "ts_encoder_learning_rate": 3.500717899280442e-06 | |
| }, | |
| { | |
| "epoch": 1.6326715544937804, | |
| "grad_norm": 1.8530076436307588, | |
| "learning_rate": 3.500717899280442e-06, | |
| "loss": 0.0444, | |
| "step": 620, | |
| "ts_encoder_learning_rate": 3.48543465876014e-06 | |
| }, | |
| { | |
| "epoch": 1.6353076859708378, | |
| "grad_norm": 0.9664504630825478, | |
| "learning_rate": 3.48543465876014e-06, | |
| "loss": 0.0564, | |
| "step": 621, | |
| "ts_encoder_learning_rate": 3.4701669827420827e-06 | |
| }, | |
| { | |
| "epoch": 1.637943817447895, | |
| "grad_norm": 2.6957722854149035, | |
| "learning_rate": 3.4701669827420827e-06, | |
| "loss": 0.0524, | |
| "step": 622, | |
| "ts_encoder_learning_rate": 3.4549150281252635e-06 | |
| }, | |
| { | |
| "epoch": 1.6405799489249526, | |
| "grad_norm": 0.8782042487398507, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 0.0345, | |
| "step": 623, | |
| "ts_encoder_learning_rate": 3.4396789516471152e-06 | |
| }, | |
| { | |
| "epoch": 1.6432160804020102, | |
| "grad_norm": 1.455788121211078, | |
| "learning_rate": 3.4396789516471152e-06, | |
| "loss": 0.0494, | |
| "step": 624, | |
| "ts_encoder_learning_rate": 3.424458909881897e-06 | |
| }, | |
| { | |
| "epoch": 1.6458522118790675, | |
| "grad_norm": 1.1390293600922257, | |
| "learning_rate": 3.424458909881897e-06, | |
| "loss": 0.0451, | |
| "step": 625, | |
| "ts_encoder_learning_rate": 3.409255059239086e-06 | |
| }, | |
| { | |
| "epoch": 1.6484883433561248, | |
| "grad_norm": 1.0205461298328486, | |
| "learning_rate": 3.409255059239086e-06, | |
| "loss": 0.0481, | |
| "step": 626, | |
| "ts_encoder_learning_rate": 3.3940675559617724e-06 | |
| }, | |
| { | |
| "epoch": 1.6511244748331824, | |
| "grad_norm": 1.3290866058317974, | |
| "learning_rate": 3.3940675559617724e-06, | |
| "loss": 0.0379, | |
| "step": 627, | |
| "ts_encoder_learning_rate": 3.37889655612505e-06 | |
| }, | |
| { | |
| "epoch": 1.6537606063102397, | |
| "grad_norm": 1.588969233408268, | |
| "learning_rate": 3.37889655612505e-06, | |
| "loss": 0.0475, | |
| "step": 628, | |
| "ts_encoder_learning_rate": 3.363742215634416e-06 | |
| }, | |
| { | |
| "epoch": 1.656396737787297, | |
| "grad_norm": 1.5323219144753453, | |
| "learning_rate": 3.363742215634416e-06, | |
| "loss": 0.0535, | |
| "step": 629, | |
| "ts_encoder_learning_rate": 3.3486046902241663e-06 | |
| }, | |
| { | |
| "epoch": 1.6590328692643546, | |
| "grad_norm": 1.2117312516941228, | |
| "learning_rate": 3.3486046902241663e-06, | |
| "loss": 0.0456, | |
| "step": 630, | |
| "ts_encoder_learning_rate": 3.3334841354557923e-06 | |
| }, | |
| { | |
| "epoch": 1.661669000741412, | |
| "grad_norm": 1.1542851651812696, | |
| "learning_rate": 3.3334841354557923e-06, | |
| "loss": 0.0454, | |
| "step": 631, | |
| "ts_encoder_learning_rate": 3.318380706716392e-06 | |
| }, | |
| { | |
| "epoch": 1.6643051322184694, | |
| "grad_norm": 1.7354976924553471, | |
| "learning_rate": 3.318380706716392e-06, | |
| "loss": 0.0383, | |
| "step": 632, | |
| "ts_encoder_learning_rate": 3.303294559217063e-06 | |
| }, | |
| { | |
| "epoch": 1.6669412636955268, | |
| "grad_norm": 1.839192030247768, | |
| "learning_rate": 3.303294559217063e-06, | |
| "loss": 0.0452, | |
| "step": 633, | |
| "ts_encoder_learning_rate": 3.288225847991312e-06 | |
| }, | |
| { | |
| "epoch": 1.6695773951725843, | |
| "grad_norm": 1.1075074885202028, | |
| "learning_rate": 3.288225847991312e-06, | |
| "loss": 0.0381, | |
| "step": 634, | |
| "ts_encoder_learning_rate": 3.273174727893463e-06 | |
| }, | |
| { | |
| "epoch": 1.6722135266496416, | |
| "grad_norm": 1.0570238025860814, | |
| "learning_rate": 3.273174727893463e-06, | |
| "loss": 0.0494, | |
| "step": 635, | |
| "ts_encoder_learning_rate": 3.2581413535970597e-06 | |
| }, | |
| { | |
| "epoch": 1.674849658126699, | |
| "grad_norm": 1.4464126650801725, | |
| "learning_rate": 3.2581413535970597e-06, | |
| "loss": 0.0327, | |
| "step": 636, | |
| "ts_encoder_learning_rate": 3.2431258795932863e-06 | |
| }, | |
| { | |
| "epoch": 1.6774857896037565, | |
| "grad_norm": 4.97131370712164, | |
| "learning_rate": 3.2431258795932863e-06, | |
| "loss": 0.0518, | |
| "step": 637, | |
| "ts_encoder_learning_rate": 3.228128460189368e-06 | |
| }, | |
| { | |
| "epoch": 1.680121921080814, | |
| "grad_norm": 1.3774085333033086, | |
| "learning_rate": 3.228128460189368e-06, | |
| "loss": 0.0368, | |
| "step": 638, | |
| "ts_encoder_learning_rate": 3.213149249506997e-06 | |
| }, | |
| { | |
| "epoch": 1.6827580525578714, | |
| "grad_norm": 1.5023786656225406, | |
| "learning_rate": 3.213149249506997e-06, | |
| "loss": 0.0339, | |
| "step": 639, | |
| "ts_encoder_learning_rate": 3.198188401480734e-06 | |
| }, | |
| { | |
| "epoch": 1.6853941840349287, | |
| "grad_norm": 1.3990085291401217, | |
| "learning_rate": 3.198188401480734e-06, | |
| "loss": 0.0459, | |
| "step": 640, | |
| "ts_encoder_learning_rate": 3.183246069856443e-06 | |
| }, | |
| { | |
| "epoch": 1.6880303155119862, | |
| "grad_norm": 3.826763148327695, | |
| "learning_rate": 3.183246069856443e-06, | |
| "loss": 0.0384, | |
| "step": 641, | |
| "ts_encoder_learning_rate": 3.1683224081897e-06 | |
| }, | |
| { | |
| "epoch": 1.6906664469890436, | |
| "grad_norm": 3.421422432395587, | |
| "learning_rate": 3.1683224081897e-06, | |
| "loss": 0.0464, | |
| "step": 642, | |
| "ts_encoder_learning_rate": 3.1534175698442194e-06 | |
| }, | |
| { | |
| "epoch": 1.6933025784661009, | |
| "grad_norm": 3.7657139495021323, | |
| "learning_rate": 3.1534175698442194e-06, | |
| "loss": 0.0403, | |
| "step": 643, | |
| "ts_encoder_learning_rate": 3.1385317079902743e-06 | |
| }, | |
| { | |
| "epoch": 1.6959387099431584, | |
| "grad_norm": 37.29958018939492, | |
| "learning_rate": 3.1385317079902743e-06, | |
| "loss": 0.0498, | |
| "step": 644, | |
| "ts_encoder_learning_rate": 3.12366497560313e-06 | |
| }, | |
| { | |
| "epoch": 1.698574841420216, | |
| "grad_norm": 4.349734482528857, | |
| "learning_rate": 3.12366497560313e-06, | |
| "loss": 0.0505, | |
| "step": 645, | |
| "ts_encoder_learning_rate": 3.1088175254614616e-06 | |
| }, | |
| { | |
| "epoch": 1.7012109728972733, | |
| "grad_norm": 48.489588646859666, | |
| "learning_rate": 3.1088175254614616e-06, | |
| "loss": 0.0382, | |
| "step": 646, | |
| "ts_encoder_learning_rate": 3.093989510145792e-06 | |
| }, | |
| { | |
| "epoch": 1.7038471043743306, | |
| "grad_norm": 6.78448657127166, | |
| "learning_rate": 3.093989510145792e-06, | |
| "loss": 0.0508, | |
| "step": 647, | |
| "ts_encoder_learning_rate": 3.079181082036922e-06 | |
| }, | |
| { | |
| "epoch": 1.7064832358513882, | |
| "grad_norm": 3.23674986315901, | |
| "learning_rate": 3.079181082036922e-06, | |
| "loss": 0.0446, | |
| "step": 648, | |
| "ts_encoder_learning_rate": 3.0643923933143603e-06 | |
| }, | |
| { | |
| "epoch": 1.7091193673284455, | |
| "grad_norm": 1.178341628440116, | |
| "learning_rate": 3.0643923933143603e-06, | |
| "loss": 0.0516, | |
| "step": 649, | |
| "ts_encoder_learning_rate": 3.049623595954766e-06 | |
| }, | |
| { | |
| "epoch": 1.7117554988055028, | |
| "grad_norm": 8.830888469474749, | |
| "learning_rate": 3.049623595954766e-06, | |
| "loss": 0.0417, | |
| "step": 650, | |
| "ts_encoder_learning_rate": 3.0348748417303826e-06 | |
| }, | |
| { | |
| "epoch": 1.7143916302825604, | |
| "grad_norm": 1.8437992927036981, | |
| "learning_rate": 3.0348748417303826e-06, | |
| "loss": 0.035, | |
| "step": 651, | |
| "ts_encoder_learning_rate": 3.020146282207479e-06 | |
| }, | |
| { | |
| "epoch": 1.717027761759618, | |
| "grad_norm": 47.65658053142364, | |
| "learning_rate": 3.020146282207479e-06, | |
| "loss": 0.0541, | |
| "step": 652, | |
| "ts_encoder_learning_rate": 3.005438068744792e-06 | |
| }, | |
| { | |
| "epoch": 1.7196638932366752, | |
| "grad_norm": 1.1030676770500603, | |
| "learning_rate": 3.005438068744792e-06, | |
| "loss": 0.0392, | |
| "step": 653, | |
| "ts_encoder_learning_rate": 2.9907503524919734e-06 | |
| }, | |
| { | |
| "epoch": 1.7223000247137326, | |
| "grad_norm": 16.670627222356522, | |
| "learning_rate": 2.9907503524919734e-06, | |
| "loss": 0.0437, | |
| "step": 654, | |
| "ts_encoder_learning_rate": 2.976083284388031e-06 | |
| }, | |
| { | |
| "epoch": 1.72493615619079, | |
| "grad_norm": 0.9106485328504088, | |
| "learning_rate": 2.976083284388031e-06, | |
| "loss": 0.049, | |
| "step": 655, | |
| "ts_encoder_learning_rate": 2.9614370151597837e-06 | |
| }, | |
| { | |
| "epoch": 1.7275722876678474, | |
| "grad_norm": 1.3803980461708971, | |
| "learning_rate": 2.9614370151597837e-06, | |
| "loss": 0.0394, | |
| "step": 656, | |
| "ts_encoder_learning_rate": 2.9468116953203107e-06 | |
| }, | |
| { | |
| "epoch": 1.7302084191449048, | |
| "grad_norm": 0.9915306370393331, | |
| "learning_rate": 2.9468116953203107e-06, | |
| "loss": 0.0486, | |
| "step": 657, | |
| "ts_encoder_learning_rate": 2.932207475167398e-06 | |
| }, | |
| { | |
| "epoch": 1.7328445506219623, | |
| "grad_norm": 1.6041614843590046, | |
| "learning_rate": 2.932207475167398e-06, | |
| "loss": 0.0406, | |
| "step": 658, | |
| "ts_encoder_learning_rate": 2.9176245047820064e-06 | |
| }, | |
| { | |
| "epoch": 1.7354806820990198, | |
| "grad_norm": 1.3499944374287252, | |
| "learning_rate": 2.9176245047820064e-06, | |
| "loss": 0.0403, | |
| "step": 659, | |
| "ts_encoder_learning_rate": 2.9030629340267165e-06 | |
| }, | |
| { | |
| "epoch": 1.738116813576077, | |
| "grad_norm": 0.8297929219673496, | |
| "learning_rate": 2.9030629340267165e-06, | |
| "loss": 0.0401, | |
| "step": 660, | |
| "ts_encoder_learning_rate": 2.8885229125442022e-06 | |
| }, | |
| { | |
| "epoch": 1.7407529450531345, | |
| "grad_norm": 1.0327297756381613, | |
| "learning_rate": 2.8885229125442022e-06, | |
| "loss": 0.0347, | |
| "step": 661, | |
| "ts_encoder_learning_rate": 2.8740045897556766e-06 | |
| }, | |
| { | |
| "epoch": 1.743389076530192, | |
| "grad_norm": 1.294243398194849, | |
| "learning_rate": 2.8740045897556766e-06, | |
| "loss": 0.0382, | |
| "step": 662, | |
| "ts_encoder_learning_rate": 2.859508114859374e-06 | |
| }, | |
| { | |
| "epoch": 1.7460252080072494, | |
| "grad_norm": 9.88848400216231, | |
| "learning_rate": 2.859508114859374e-06, | |
| "loss": 0.0437, | |
| "step": 663, | |
| "ts_encoder_learning_rate": 2.845033636828998e-06 | |
| }, | |
| { | |
| "epoch": 1.7486613394843067, | |
| "grad_norm": 1.3051859972411644, | |
| "learning_rate": 2.845033636828998e-06, | |
| "loss": 0.0506, | |
| "step": 664, | |
| "ts_encoder_learning_rate": 2.83058130441221e-06 | |
| }, | |
| { | |
| "epoch": 1.7512974709613642, | |
| "grad_norm": 1.3312709718052185, | |
| "learning_rate": 2.83058130441221e-06, | |
| "loss": 0.0262, | |
| "step": 665, | |
| "ts_encoder_learning_rate": 2.8161512661290847e-06 | |
| }, | |
| { | |
| "epoch": 1.7539336024384218, | |
| "grad_norm": 1.1582478757709687, | |
| "learning_rate": 2.8161512661290847e-06, | |
| "loss": 0.0452, | |
| "step": 666, | |
| "ts_encoder_learning_rate": 2.80174367027059e-06 | |
| }, | |
| { | |
| "epoch": 1.7565697339154789, | |
| "grad_norm": 1.2069057310106728, | |
| "learning_rate": 2.80174367027059e-06, | |
| "loss": 0.0371, | |
| "step": 667, | |
| "ts_encoder_learning_rate": 2.7873586648970686e-06 | |
| }, | |
| { | |
| "epoch": 1.7592058653925364, | |
| "grad_norm": 1.2044293435585265, | |
| "learning_rate": 2.7873586648970686e-06, | |
| "loss": 0.0443, | |
| "step": 668, | |
| "ts_encoder_learning_rate": 2.772996397836704e-06 | |
| }, | |
| { | |
| "epoch": 1.761841996869594, | |
| "grad_norm": 1.0589558487998179, | |
| "learning_rate": 2.772996397836704e-06, | |
| "loss": 0.0357, | |
| "step": 669, | |
| "ts_encoder_learning_rate": 2.7586570166840154e-06 | |
| }, | |
| { | |
| "epoch": 1.7644781283466513, | |
| "grad_norm": 1.1371065267074214, | |
| "learning_rate": 2.7586570166840154e-06, | |
| "loss": 0.0405, | |
| "step": 670, | |
| "ts_encoder_learning_rate": 2.7443406687983267e-06 | |
| }, | |
| { | |
| "epoch": 1.7671142598237086, | |
| "grad_norm": 1.310944403130682, | |
| "learning_rate": 2.7443406687983267e-06, | |
| "loss": 0.0334, | |
| "step": 671, | |
| "ts_encoder_learning_rate": 2.7300475013022666e-06 | |
| }, | |
| { | |
| "epoch": 1.7697503913007662, | |
| "grad_norm": 1.611743339204791, | |
| "learning_rate": 2.7300475013022666e-06, | |
| "loss": 0.0486, | |
| "step": 672, | |
| "ts_encoder_learning_rate": 2.7157776610802416e-06 | |
| }, | |
| { | |
| "epoch": 1.7723865227778235, | |
| "grad_norm": 2.1145133691029625, | |
| "learning_rate": 2.7157776610802416e-06, | |
| "loss": 0.0484, | |
| "step": 673, | |
| "ts_encoder_learning_rate": 2.7015312947769436e-06 | |
| }, | |
| { | |
| "epoch": 1.7750226542548808, | |
| "grad_norm": 1.8011066906635667, | |
| "learning_rate": 2.7015312947769436e-06, | |
| "loss": 0.0472, | |
| "step": 674, | |
| "ts_encoder_learning_rate": 2.687308548795825e-06 | |
| }, | |
| { | |
| "epoch": 1.7776587857319384, | |
| "grad_norm": 14.318871488368272, | |
| "learning_rate": 2.687308548795825e-06, | |
| "loss": 0.0397, | |
| "step": 675, | |
| "ts_encoder_learning_rate": 2.6731095692976073e-06 | |
| }, | |
| { | |
| "epoch": 1.780294917208996, | |
| "grad_norm": 0.9235148193782242, | |
| "learning_rate": 2.6731095692976073e-06, | |
| "loss": 0.03, | |
| "step": 676, | |
| "ts_encoder_learning_rate": 2.6589345021987725e-06 | |
| }, | |
| { | |
| "epoch": 1.7829310486860532, | |
| "grad_norm": 7.592255856926825, | |
| "learning_rate": 2.6589345021987725e-06, | |
| "loss": 0.0348, | |
| "step": 677, | |
| "ts_encoder_learning_rate": 2.6447834931700688e-06 | |
| }, | |
| { | |
| "epoch": 1.7855671801631106, | |
| "grad_norm": 1.7017607950556997, | |
| "learning_rate": 2.6447834931700688e-06, | |
| "loss": 0.0429, | |
| "step": 678, | |
| "ts_encoder_learning_rate": 2.6306566876350072e-06 | |
| }, | |
| { | |
| "epoch": 1.788203311640168, | |
| "grad_norm": 1.1703754969173203, | |
| "learning_rate": 2.6306566876350072e-06, | |
| "loss": 0.0534, | |
| "step": 679, | |
| "ts_encoder_learning_rate": 2.6165542307683744e-06 | |
| }, | |
| { | |
| "epoch": 1.7908394431172254, | |
| "grad_norm": 1.255297983170475, | |
| "learning_rate": 2.6165542307683744e-06, | |
| "loss": 0.043, | |
| "step": 680, | |
| "ts_encoder_learning_rate": 2.6024762674947313e-06 | |
| }, | |
| { | |
| "epoch": 1.7934755745942828, | |
| "grad_norm": 1.5888017813716921, | |
| "learning_rate": 2.6024762674947313e-06, | |
| "loss": 0.0343, | |
| "step": 681, | |
| "ts_encoder_learning_rate": 2.588422942486932e-06 | |
| }, | |
| { | |
| "epoch": 1.7961117060713403, | |
| "grad_norm": 1.7718676183781328, | |
| "learning_rate": 2.588422942486932e-06, | |
| "loss": 0.0486, | |
| "step": 682, | |
| "ts_encoder_learning_rate": 2.5743944001646394e-06 | |
| }, | |
| { | |
| "epoch": 1.7987478375483978, | |
| "grad_norm": 0.9086652386668309, | |
| "learning_rate": 2.5743944001646394e-06, | |
| "loss": 0.0497, | |
| "step": 683, | |
| "ts_encoder_learning_rate": 2.5603907846928277e-06 | |
| }, | |
| { | |
| "epoch": 1.8013839690254552, | |
| "grad_norm": 1.1341128763629043, | |
| "learning_rate": 2.5603907846928277e-06, | |
| "loss": 0.0425, | |
| "step": 684, | |
| "ts_encoder_learning_rate": 2.5464122399803126e-06 | |
| }, | |
| { | |
| "epoch": 1.8040201005025125, | |
| "grad_norm": 2.5998127513101315, | |
| "learning_rate": 2.5464122399803126e-06, | |
| "loss": 0.0574, | |
| "step": 685, | |
| "ts_encoder_learning_rate": 2.532458909678266e-06 | |
| }, | |
| { | |
| "epoch": 1.80665623197957, | |
| "grad_norm": 4.542124494248002, | |
| "learning_rate": 2.532458909678266e-06, | |
| "loss": 0.0462, | |
| "step": 686, | |
| "ts_encoder_learning_rate": 2.5185309371787515e-06 | |
| }, | |
| { | |
| "epoch": 1.8092923634566274, | |
| "grad_norm": 1.0616553515388587, | |
| "learning_rate": 2.5185309371787515e-06, | |
| "loss": 0.0446, | |
| "step": 687, | |
| "ts_encoder_learning_rate": 2.50462846561323e-06 | |
| }, | |
| { | |
| "epoch": 1.8119284949336847, | |
| "grad_norm": 0.7064894341667076, | |
| "learning_rate": 2.50462846561323e-06, | |
| "loss": 0.035, | |
| "step": 688, | |
| "ts_encoder_learning_rate": 2.4907516378511137e-06 | |
| }, | |
| { | |
| "epoch": 1.8145646264107422, | |
| "grad_norm": 0.673676553652552, | |
| "learning_rate": 2.4907516378511137e-06, | |
| "loss": 0.0456, | |
| "step": 689, | |
| "ts_encoder_learning_rate": 2.4769005964982718e-06 | |
| }, | |
| { | |
| "epoch": 1.8172007578877998, | |
| "grad_norm": 1.0526217413305237, | |
| "learning_rate": 2.4769005964982718e-06, | |
| "loss": 0.0408, | |
| "step": 690, | |
| "ts_encoder_learning_rate": 2.46307548389559e-06 | |
| }, | |
| { | |
| "epoch": 1.819836889364857, | |
| "grad_norm": 0.8741534711615824, | |
| "learning_rate": 2.46307548389559e-06, | |
| "loss": 0.0369, | |
| "step": 691, | |
| "ts_encoder_learning_rate": 2.4492764421174863e-06 | |
| }, | |
| { | |
| "epoch": 1.8224730208419144, | |
| "grad_norm": 1.558424208864291, | |
| "learning_rate": 2.4492764421174863e-06, | |
| "loss": 0.0403, | |
| "step": 692, | |
| "ts_encoder_learning_rate": 2.43550361297047e-06 | |
| }, | |
| { | |
| "epoch": 1.825109152318972, | |
| "grad_norm": 0.8337291977316967, | |
| "learning_rate": 2.43550361297047e-06, | |
| "loss": 0.0352, | |
| "step": 693, | |
| "ts_encoder_learning_rate": 2.4217571379916673e-06 | |
| }, | |
| { | |
| "epoch": 1.8277452837960293, | |
| "grad_norm": 1.017616972872044, | |
| "learning_rate": 2.4217571379916673e-06, | |
| "loss": 0.0552, | |
| "step": 694, | |
| "ts_encoder_learning_rate": 2.408037158447375e-06 | |
| }, | |
| { | |
| "epoch": 1.8303814152730866, | |
| "grad_norm": 0.7844845835056601, | |
| "learning_rate": 2.408037158447375e-06, | |
| "loss": 0.0435, | |
| "step": 695, | |
| "ts_encoder_learning_rate": 2.394343815331616e-06 | |
| }, | |
| { | |
| "epoch": 1.8330175467501442, | |
| "grad_norm": 1.1004307198779026, | |
| "learning_rate": 2.394343815331616e-06, | |
| "loss": 0.0314, | |
| "step": 696, | |
| "ts_encoder_learning_rate": 2.3806772493646725e-06 | |
| }, | |
| { | |
| "epoch": 1.8356536782272017, | |
| "grad_norm": 1.1688405779568678, | |
| "learning_rate": 2.3806772493646725e-06, | |
| "loss": 0.0269, | |
| "step": 697, | |
| "ts_encoder_learning_rate": 2.3670376009916596e-06 | |
| }, | |
| { | |
| "epoch": 1.838289809704259, | |
| "grad_norm": 1.0701865886101891, | |
| "learning_rate": 2.3670376009916596e-06, | |
| "loss": 0.044, | |
| "step": 698, | |
| "ts_encoder_learning_rate": 2.353425010381063e-06 | |
| }, | |
| { | |
| "epoch": 1.8409259411813164, | |
| "grad_norm": 1.360235946458034, | |
| "learning_rate": 2.353425010381063e-06, | |
| "loss": 0.0384, | |
| "step": 699, | |
| "ts_encoder_learning_rate": 2.339839617423318e-06 | |
| }, | |
| { | |
| "epoch": 1.843562072658374, | |
| "grad_norm": 0.8062724598386658, | |
| "learning_rate": 2.339839617423318e-06, | |
| "loss": 0.0331, | |
| "step": 700, | |
| "ts_encoder_learning_rate": 2.3262815617293517e-06 | |
| }, | |
| { | |
| "epoch": 1.8461982041354312, | |
| "grad_norm": 1.1455549172176551, | |
| "learning_rate": 2.3262815617293517e-06, | |
| "loss": 0.0542, | |
| "step": 701, | |
| "ts_encoder_learning_rate": 2.31275098262917e-06 | |
| }, | |
| { | |
| "epoch": 1.8488343356124886, | |
| "grad_norm": 0.703436998992223, | |
| "learning_rate": 2.31275098262917e-06, | |
| "loss": 0.0313, | |
| "step": 702, | |
| "ts_encoder_learning_rate": 2.2992480191704003e-06 | |
| }, | |
| { | |
| "epoch": 1.851470467089546, | |
| "grad_norm": 0.8764547755682153, | |
| "learning_rate": 2.2992480191704003e-06, | |
| "loss": 0.0333, | |
| "step": 703, | |
| "ts_encoder_learning_rate": 2.28577281011689e-06 | |
| }, | |
| { | |
| "epoch": 1.8541065985666036, | |
| "grad_norm": 1.23716774475027, | |
| "learning_rate": 2.28577281011689e-06, | |
| "loss": 0.0301, | |
| "step": 704, | |
| "ts_encoder_learning_rate": 2.272325493947257e-06 | |
| }, | |
| { | |
| "epoch": 1.856742730043661, | |
| "grad_norm": 11.281594612889576, | |
| "learning_rate": 2.272325493947257e-06, | |
| "loss": 0.0312, | |
| "step": 705, | |
| "ts_encoder_learning_rate": 2.2589062088534837e-06 | |
| }, | |
| { | |
| "epoch": 1.8593788615207183, | |
| "grad_norm": 0.9987575070457472, | |
| "learning_rate": 2.2589062088534837e-06, | |
| "loss": 0.0417, | |
| "step": 706, | |
| "ts_encoder_learning_rate": 2.245515092739488e-06 | |
| }, | |
| { | |
| "epoch": 1.8620149929977758, | |
| "grad_norm": 1.0116065835025165, | |
| "learning_rate": 2.245515092739488e-06, | |
| "loss": 0.0496, | |
| "step": 707, | |
| "ts_encoder_learning_rate": 2.2321522832197036e-06 | |
| }, | |
| { | |
| "epoch": 1.8646511244748332, | |
| "grad_norm": 1.3806751773781658, | |
| "learning_rate": 2.2321522832197036e-06, | |
| "loss": 0.039, | |
| "step": 708, | |
| "ts_encoder_learning_rate": 2.2188179176176767e-06 | |
| }, | |
| { | |
| "epoch": 1.8672872559518905, | |
| "grad_norm": 0.9901586798069452, | |
| "learning_rate": 2.2188179176176767e-06, | |
| "loss": 0.041, | |
| "step": 709, | |
| "ts_encoder_learning_rate": 2.2055121329646416e-06 | |
| }, | |
| { | |
| "epoch": 1.869923387428948, | |
| "grad_norm": 0.9660208947927406, | |
| "learning_rate": 2.2055121329646416e-06, | |
| "loss": 0.0263, | |
| "step": 710, | |
| "ts_encoder_learning_rate": 2.1922350659981262e-06 | |
| }, | |
| { | |
| "epoch": 1.8725595189060056, | |
| "grad_norm": 0.9148657839084163, | |
| "learning_rate": 2.1922350659981262e-06, | |
| "loss": 0.0382, | |
| "step": 711, | |
| "ts_encoder_learning_rate": 2.178986853160535e-06 | |
| }, | |
| { | |
| "epoch": 1.875195650383063, | |
| "grad_norm": 0.9900246938077546, | |
| "learning_rate": 2.178986853160535e-06, | |
| "loss": 0.0482, | |
| "step": 712, | |
| "ts_encoder_learning_rate": 2.165767630597752e-06 | |
| }, | |
| { | |
| "epoch": 1.8778317818601202, | |
| "grad_norm": 0.995911258816107, | |
| "learning_rate": 2.165767630597752e-06, | |
| "loss": 0.0383, | |
| "step": 713, | |
| "ts_encoder_learning_rate": 2.1525775341577404e-06 | |
| }, | |
| { | |
| "epoch": 1.8804679133371778, | |
| "grad_norm": 0.9529466866489111, | |
| "learning_rate": 2.1525775341577404e-06, | |
| "loss": 0.0336, | |
| "step": 714, | |
| "ts_encoder_learning_rate": 2.139416699389153e-06 | |
| }, | |
| { | |
| "epoch": 1.883104044814235, | |
| "grad_norm": 0.6476905341796513, | |
| "learning_rate": 2.139416699389153e-06, | |
| "loss": 0.0274, | |
| "step": 715, | |
| "ts_encoder_learning_rate": 2.126285261539926e-06 | |
| }, | |
| { | |
| "epoch": 1.8857401762912924, | |
| "grad_norm": 1.1085422585469236, | |
| "learning_rate": 2.126285261539926e-06, | |
| "loss": 0.0361, | |
| "step": 716, | |
| "ts_encoder_learning_rate": 2.1131833555559037e-06 | |
| }, | |
| { | |
| "epoch": 1.88837630776835, | |
| "grad_norm": 1.0042700649998133, | |
| "learning_rate": 2.1131833555559037e-06, | |
| "loss": 0.0288, | |
| "step": 717, | |
| "ts_encoder_learning_rate": 2.1001111160794387e-06 | |
| }, | |
| { | |
| "epoch": 1.8910124392454075, | |
| "grad_norm": 0.9521264639042284, | |
| "learning_rate": 2.1001111160794387e-06, | |
| "loss": 0.0421, | |
| "step": 718, | |
| "ts_encoder_learning_rate": 2.08706867744802e-06 | |
| }, | |
| { | |
| "epoch": 1.8936485707224646, | |
| "grad_norm": 0.931714414769153, | |
| "learning_rate": 2.08706867744802e-06, | |
| "loss": 0.0366, | |
| "step": 719, | |
| "ts_encoder_learning_rate": 2.074056173692881e-06 | |
| }, | |
| { | |
| "epoch": 1.8962847021995222, | |
| "grad_norm": 0.754100564730907, | |
| "learning_rate": 2.074056173692881e-06, | |
| "loss": 0.0303, | |
| "step": 720, | |
| "ts_encoder_learning_rate": 2.061073738537635e-06 | |
| }, | |
| { | |
| "epoch": 1.8989208336765797, | |
| "grad_norm": 0.7479604079430805, | |
| "learning_rate": 2.061073738537635e-06, | |
| "loss": 0.0466, | |
| "step": 721, | |
| "ts_encoder_learning_rate": 2.0481215053968874e-06 | |
| }, | |
| { | |
| "epoch": 1.901556965153637, | |
| "grad_norm": 0.7804885190676818, | |
| "learning_rate": 2.0481215053968874e-06, | |
| "loss": 0.0295, | |
| "step": 722, | |
| "ts_encoder_learning_rate": 2.0351996073748713e-06 | |
| }, | |
| { | |
| "epoch": 1.9041930966306944, | |
| "grad_norm": 0.9239500456881254, | |
| "learning_rate": 2.0351996073748713e-06, | |
| "loss": 0.0418, | |
| "step": 723, | |
| "ts_encoder_learning_rate": 2.0223081772640867e-06 | |
| }, | |
| { | |
| "epoch": 1.906829228107752, | |
| "grad_norm": 1.0393746665064, | |
| "learning_rate": 2.0223081772640867e-06, | |
| "loss": 0.0352, | |
| "step": 724, | |
| "ts_encoder_learning_rate": 2.00944734754392e-06 | |
| }, | |
| { | |
| "epoch": 1.9094653595848095, | |
| "grad_norm": 0.7757057471334936, | |
| "learning_rate": 2.00944734754392e-06, | |
| "loss": 0.0334, | |
| "step": 725, | |
| "ts_encoder_learning_rate": 1.9966172503792986e-06 | |
| }, | |
| { | |
| "epoch": 1.9121014910618666, | |
| "grad_norm": 0.7982941003952496, | |
| "learning_rate": 1.9966172503792986e-06, | |
| "loss": 0.0341, | |
| "step": 726, | |
| "ts_encoder_learning_rate": 1.983818017619318e-06 | |
| }, | |
| { | |
| "epoch": 1.914737622538924, | |
| "grad_norm": 1.0301075039556638, | |
| "learning_rate": 1.983818017619318e-06, | |
| "loss": 0.0407, | |
| "step": 727, | |
| "ts_encoder_learning_rate": 1.971049780795901e-06 | |
| }, | |
| { | |
| "epoch": 1.9173737540159816, | |
| "grad_norm": 0.9027944740938065, | |
| "learning_rate": 1.971049780795901e-06, | |
| "loss": 0.0325, | |
| "step": 728, | |
| "ts_encoder_learning_rate": 1.9583126711224342e-06 | |
| }, | |
| { | |
| "epoch": 1.920009885493039, | |
| "grad_norm": 1.0093899617917834, | |
| "learning_rate": 1.9583126711224342e-06, | |
| "loss": 0.0343, | |
| "step": 729, | |
| "ts_encoder_learning_rate": 1.945606819492429e-06 | |
| }, | |
| { | |
| "epoch": 1.9226460169700963, | |
| "grad_norm": 1.6230911664954315, | |
| "learning_rate": 1.945606819492429e-06, | |
| "loss": 0.0467, | |
| "step": 730, | |
| "ts_encoder_learning_rate": 1.932932356478168e-06 | |
| }, | |
| { | |
| "epoch": 1.9252821484471538, | |
| "grad_norm": 1.169611902284653, | |
| "learning_rate": 1.932932356478168e-06, | |
| "loss": 0.0346, | |
| "step": 731, | |
| "ts_encoder_learning_rate": 1.9202894123293677e-06 | |
| }, | |
| { | |
| "epoch": 1.9279182799242112, | |
| "grad_norm": 0.8882211427092304, | |
| "learning_rate": 1.9202894123293677e-06, | |
| "loss": 0.0271, | |
| "step": 732, | |
| "ts_encoder_learning_rate": 1.9076781169718426e-06 | |
| }, | |
| { | |
| "epoch": 1.9305544114012685, | |
| "grad_norm": 0.7788137841169445, | |
| "learning_rate": 1.9076781169718426e-06, | |
| "loss": 0.0317, | |
| "step": 733, | |
| "ts_encoder_learning_rate": 1.895098600006164e-06 | |
| }, | |
| { | |
| "epoch": 1.933190542878326, | |
| "grad_norm": 0.9177124549747325, | |
| "learning_rate": 1.895098600006164e-06, | |
| "loss": 0.0294, | |
| "step": 734, | |
| "ts_encoder_learning_rate": 1.8825509907063328e-06 | |
| }, | |
| { | |
| "epoch": 1.9358266743553836, | |
| "grad_norm": 1.1488053411805068, | |
| "learning_rate": 1.8825509907063328e-06, | |
| "loss": 0.0319, | |
| "step": 735, | |
| "ts_encoder_learning_rate": 1.8700354180184465e-06 | |
| }, | |
| { | |
| "epoch": 1.938462805832441, | |
| "grad_norm": 0.9810496010092534, | |
| "learning_rate": 1.8700354180184465e-06, | |
| "loss": 0.0348, | |
| "step": 736, | |
| "ts_encoder_learning_rate": 1.857552010559382e-06 | |
| }, | |
| { | |
| "epoch": 1.9410989373094982, | |
| "grad_norm": 0.5266455663863897, | |
| "learning_rate": 1.857552010559382e-06, | |
| "loss": 0.0166, | |
| "step": 737, | |
| "ts_encoder_learning_rate": 1.8451008966154622e-06 | |
| }, | |
| { | |
| "epoch": 1.9437350687865558, | |
| "grad_norm": 0.6289054316907776, | |
| "learning_rate": 1.8451008966154622e-06, | |
| "loss": 0.0296, | |
| "step": 738, | |
| "ts_encoder_learning_rate": 1.8326822041411524e-06 | |
| }, | |
| { | |
| "epoch": 1.946371200263613, | |
| "grad_norm": 0.9572882282852405, | |
| "learning_rate": 1.8326822041411524e-06, | |
| "loss": 0.0445, | |
| "step": 739, | |
| "ts_encoder_learning_rate": 1.8202960607577246e-06 | |
| }, | |
| { | |
| "epoch": 1.9490073317406704, | |
| "grad_norm": 1.314826995873295, | |
| "learning_rate": 1.8202960607577246e-06, | |
| "loss": 0.0348, | |
| "step": 740, | |
| "ts_encoder_learning_rate": 1.8079425937519729e-06 | |
| }, | |
| { | |
| "epoch": 1.951643463217728, | |
| "grad_norm": 0.996575258797314, | |
| "learning_rate": 1.8079425937519729e-06, | |
| "loss": 0.0378, | |
| "step": 741, | |
| "ts_encoder_learning_rate": 1.7956219300748796e-06 | |
| }, | |
| { | |
| "epoch": 1.9542795946947855, | |
| "grad_norm": 1.0607844107395987, | |
| "learning_rate": 1.7956219300748796e-06, | |
| "loss": 0.0329, | |
| "step": 742, | |
| "ts_encoder_learning_rate": 1.7833341963403312e-06 | |
| }, | |
| { | |
| "epoch": 1.9569157261718428, | |
| "grad_norm": 0.9094930783526255, | |
| "learning_rate": 1.7833341963403312e-06, | |
| "loss": 0.0473, | |
| "step": 743, | |
| "ts_encoder_learning_rate": 1.771079518823799e-06 | |
| }, | |
| { | |
| "epoch": 1.9595518576489002, | |
| "grad_norm": 1.1039356442587034, | |
| "learning_rate": 1.771079518823799e-06, | |
| "loss": 0.0422, | |
| "step": 744, | |
| "ts_encoder_learning_rate": 1.7588580234610592e-06 | |
| }, | |
| { | |
| "epoch": 1.9621879891259577, | |
| "grad_norm": 0.8174282643803666, | |
| "learning_rate": 1.7588580234610592e-06, | |
| "loss": 0.0406, | |
| "step": 745, | |
| "ts_encoder_learning_rate": 1.7466698358468825e-06 | |
| }, | |
| { | |
| "epoch": 1.964824120603015, | |
| "grad_norm": 0.7214652835866359, | |
| "learning_rate": 1.7466698358468825e-06, | |
| "loss": 0.0326, | |
| "step": 746, | |
| "ts_encoder_learning_rate": 1.7345150812337564e-06 | |
| }, | |
| { | |
| "epoch": 1.9674602520800724, | |
| "grad_norm": 1.2667444333402988, | |
| "learning_rate": 1.7345150812337564e-06, | |
| "loss": 0.0415, | |
| "step": 747, | |
| "ts_encoder_learning_rate": 1.7223938845305932e-06 | |
| }, | |
| { | |
| "epoch": 1.97009638355713, | |
| "grad_norm": 0.9773679348538253, | |
| "learning_rate": 1.7223938845305932e-06, | |
| "loss": 0.0369, | |
| "step": 748, | |
| "ts_encoder_learning_rate": 1.7103063703014372e-06 | |
| }, | |
| { | |
| "epoch": 1.9727325150341875, | |
| "grad_norm": 1.1325567784328214, | |
| "learning_rate": 1.7103063703014372e-06, | |
| "loss": 0.0364, | |
| "step": 749, | |
| "ts_encoder_learning_rate": 1.6982526627642043e-06 | |
| }, | |
| { | |
| "epoch": 1.9753686465112448, | |
| "grad_norm": 0.8996954549073614, | |
| "learning_rate": 1.6982526627642043e-06, | |
| "loss": 0.0329, | |
| "step": 750, | |
| "ts_encoder_learning_rate": 1.6862328857893856e-06 | |
| }, | |
| { | |
| "epoch": 1.978004777988302, | |
| "grad_norm": 0.7139793070917391, | |
| "learning_rate": 1.6862328857893856e-06, | |
| "loss": 0.036, | |
| "step": 751, | |
| "ts_encoder_learning_rate": 1.6742471628987894e-06 | |
| }, | |
| { | |
| "epoch": 1.9806409094653596, | |
| "grad_norm": 0.7560937228601913, | |
| "learning_rate": 1.6742471628987894e-06, | |
| "loss": 0.0334, | |
| "step": 752, | |
| "ts_encoder_learning_rate": 1.6622956172642601e-06 | |
| }, | |
| { | |
| "epoch": 1.983277040942417, | |
| "grad_norm": 1.6103163078678424, | |
| "learning_rate": 1.6622956172642601e-06, | |
| "loss": 0.0371, | |
| "step": 753, | |
| "ts_encoder_learning_rate": 1.6503783717064247e-06 | |
| }, | |
| { | |
| "epoch": 1.9859131724194743, | |
| "grad_norm": 1.6781996659316394, | |
| "learning_rate": 1.6503783717064247e-06, | |
| "loss": 0.0394, | |
| "step": 754, | |
| "ts_encoder_learning_rate": 1.6384955486934157e-06 | |
| }, | |
| { | |
| "epoch": 1.9885493038965318, | |
| "grad_norm": 1.69153268173132, | |
| "learning_rate": 1.6384955486934157e-06, | |
| "loss": 0.0311, | |
| "step": 755, | |
| "ts_encoder_learning_rate": 1.6266472703396286e-06 | |
| }, | |
| { | |
| "epoch": 1.9911854353735894, | |
| "grad_norm": 0.7719824022746361, | |
| "learning_rate": 1.6266472703396286e-06, | |
| "loss": 0.0262, | |
| "step": 756, | |
| "ts_encoder_learning_rate": 1.6148336584044539e-06 | |
| }, | |
| { | |
| "epoch": 1.9938215668506467, | |
| "grad_norm": 1.3733252437657573, | |
| "learning_rate": 1.6148336584044539e-06, | |
| "loss": 0.0455, | |
| "step": 757, | |
| "ts_encoder_learning_rate": 1.6030548342910302e-06 | |
| }, | |
| { | |
| "epoch": 1.996457698327704, | |
| "grad_norm": 0.9175183384074306, | |
| "learning_rate": 1.6030548342910302e-06, | |
| "loss": 0.0322, | |
| "step": 758, | |
| "ts_encoder_learning_rate": 1.5913109190450033e-06 | |
| }, | |
| { | |
| "epoch": 1.9990938298047616, | |
| "grad_norm": 1.1099095764607132, | |
| "learning_rate": 1.5913109190450033e-06, | |
| "loss": 0.0394, | |
| "step": 759, | |
| "ts_encoder_learning_rate": 1.5796020333532696e-06 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.1099095764607132, | |
| "learning_rate": 1.5796020333532696e-06, | |
| "loss": 0.0092, | |
| "step": 760, | |
| "ts_encoder_learning_rate": 1.567928297542749e-06 | |
| }, | |
| { | |
| "epoch": 2.0026361314770575, | |
| "grad_norm": 0.9713547183808008, | |
| "learning_rate": 1.567928297542749e-06, | |
| "loss": 0.023, | |
| "step": 761, | |
| "ts_encoder_learning_rate": 1.5562898315791354e-06 | |
| }, | |
| { | |
| "epoch": 2.0052722629541146, | |
| "grad_norm": 0.7009540897546592, | |
| "learning_rate": 1.5562898315791354e-06, | |
| "loss": 0.0244, | |
| "step": 762, | |
| "ts_encoder_learning_rate": 1.544686755065677e-06 | |
| }, | |
| { | |
| "epoch": 2.007908394431172, | |
| "grad_norm": 0.6101415210273791, | |
| "learning_rate": 1.544686755065677e-06, | |
| "loss": 0.0277, | |
| "step": 763, | |
| "ts_encoder_learning_rate": 1.5331191872419349e-06 | |
| }, | |
| { | |
| "epoch": 2.0105445259082297, | |
| "grad_norm": 1.326219222691662, | |
| "learning_rate": 1.5331191872419349e-06, | |
| "loss": 0.0299, | |
| "step": 764, | |
| "ts_encoder_learning_rate": 1.5215872469825682e-06 | |
| }, | |
| { | |
| "epoch": 2.0131806573852873, | |
| "grad_norm": 1.0899654240221532, | |
| "learning_rate": 1.5215872469825682e-06, | |
| "loss": 0.0272, | |
| "step": 765, | |
| "ts_encoder_learning_rate": 1.510091052796105e-06 | |
| }, | |
| { | |
| "epoch": 2.0158167888623444, | |
| "grad_norm": 1.087434101218195, | |
| "learning_rate": 1.510091052796105e-06, | |
| "loss": 0.0369, | |
| "step": 766, | |
| "ts_encoder_learning_rate": 1.4986307228237268e-06 | |
| }, | |
| { | |
| "epoch": 2.018452920339402, | |
| "grad_norm": 0.7445449957284473, | |
| "learning_rate": 1.4986307228237268e-06, | |
| "loss": 0.0281, | |
| "step": 767, | |
| "ts_encoder_learning_rate": 1.4872063748380544e-06 | |
| }, | |
| { | |
| "epoch": 2.0210890518164595, | |
| "grad_norm": 0.7216822829913329, | |
| "learning_rate": 1.4872063748380544e-06, | |
| "loss": 0.0268, | |
| "step": 768, | |
| "ts_encoder_learning_rate": 1.4758181262419425e-06 | |
| }, | |
| { | |
| "epoch": 2.0237251832935166, | |
| "grad_norm": 0.6931261426498003, | |
| "learning_rate": 1.4758181262419425e-06, | |
| "loss": 0.0282, | |
| "step": 769, | |
| "ts_encoder_learning_rate": 1.4644660940672628e-06 | |
| }, | |
| { | |
| "epoch": 2.026361314770574, | |
| "grad_norm": 0.9133884937103188, | |
| "learning_rate": 1.4644660940672628e-06, | |
| "loss": 0.0322, | |
| "step": 770, | |
| "ts_encoder_learning_rate": 1.4531503949737107e-06 | |
| }, | |
| { | |
| "epoch": 2.0289974462476317, | |
| "grad_norm": 1.0086856147825554, | |
| "learning_rate": 1.4531503949737107e-06, | |
| "loss": 0.0363, | |
| "step": 771, | |
| "ts_encoder_learning_rate": 1.4418711452476048e-06 | |
| }, | |
| { | |
| "epoch": 2.031633577724689, | |
| "grad_norm": 1.6070882260232144, | |
| "learning_rate": 1.4418711452476048e-06, | |
| "loss": 0.0249, | |
| "step": 772, | |
| "ts_encoder_learning_rate": 1.4306284608006837e-06 | |
| }, | |
| { | |
| "epoch": 2.0342697092017463, | |
| "grad_norm": 0.7314536247624556, | |
| "learning_rate": 1.4306284608006837e-06, | |
| "loss": 0.0274, | |
| "step": 773, | |
| "ts_encoder_learning_rate": 1.4194224571689286e-06 | |
| }, | |
| { | |
| "epoch": 2.036905840678804, | |
| "grad_norm": 0.9803513438084768, | |
| "learning_rate": 1.4194224571689286e-06, | |
| "loss": 0.0262, | |
| "step": 774, | |
| "ts_encoder_learning_rate": 1.4082532495113627e-06 | |
| }, | |
| { | |
| "epoch": 2.0395419721558614, | |
| "grad_norm": 0.8684478482698204, | |
| "learning_rate": 1.4082532495113627e-06, | |
| "loss": 0.0347, | |
| "step": 775, | |
| "ts_encoder_learning_rate": 1.3971209526088764e-06 | |
| }, | |
| { | |
| "epoch": 2.0421781036329185, | |
| "grad_norm": 1.6463822697507045, | |
| "learning_rate": 1.3971209526088764e-06, | |
| "loss": 0.0199, | |
| "step": 776, | |
| "ts_encoder_learning_rate": 1.3860256808630429e-06 | |
| }, | |
| { | |
| "epoch": 2.044814235109976, | |
| "grad_norm": 0.6542575533228376, | |
| "learning_rate": 1.3860256808630429e-06, | |
| "loss": 0.0162, | |
| "step": 777, | |
| "ts_encoder_learning_rate": 1.3749675482949487e-06 | |
| }, | |
| { | |
| "epoch": 2.0474503665870336, | |
| "grad_norm": 0.8414387552707323, | |
| "learning_rate": 1.3749675482949487e-06, | |
| "loss": 0.0194, | |
| "step": 778, | |
| "ts_encoder_learning_rate": 1.3639466685440133e-06 | |
| }, | |
| { | |
| "epoch": 2.050086498064091, | |
| "grad_norm": 0.8702132467221131, | |
| "learning_rate": 1.3639466685440133e-06, | |
| "loss": 0.0225, | |
| "step": 779, | |
| "ts_encoder_learning_rate": 1.3529631548668298e-06 | |
| }, | |
| { | |
| "epoch": 2.0527226295411483, | |
| "grad_norm": 0.7835222402353583, | |
| "learning_rate": 1.3529631548668298e-06, | |
| "loss": 0.0254, | |
| "step": 780, | |
| "ts_encoder_learning_rate": 1.3420171201359933e-06 | |
| }, | |
| { | |
| "epoch": 2.055358761018206, | |
| "grad_norm": 0.8617901557793036, | |
| "learning_rate": 1.3420171201359933e-06, | |
| "loss": 0.0214, | |
| "step": 781, | |
| "ts_encoder_learning_rate": 1.331108676838948e-06 | |
| }, | |
| { | |
| "epoch": 2.0579948924952634, | |
| "grad_norm": 0.8593905981383287, | |
| "learning_rate": 1.331108676838948e-06, | |
| "loss": 0.0215, | |
| "step": 782, | |
| "ts_encoder_learning_rate": 1.3202379370768254e-06 | |
| }, | |
| { | |
| "epoch": 2.0606310239723205, | |
| "grad_norm": 0.9813541929977725, | |
| "learning_rate": 1.3202379370768254e-06, | |
| "loss": 0.0244, | |
| "step": 783, | |
| "ts_encoder_learning_rate": 1.3094050125632973e-06 | |
| }, | |
| { | |
| "epoch": 2.063267155449378, | |
| "grad_norm": 0.973576019815649, | |
| "learning_rate": 1.3094050125632973e-06, | |
| "loss": 0.0267, | |
| "step": 784, | |
| "ts_encoder_learning_rate": 1.298610014623423e-06 | |
| }, | |
| { | |
| "epoch": 2.0659032869264355, | |
| "grad_norm": 0.6999339833328826, | |
| "learning_rate": 1.298610014623423e-06, | |
| "loss": 0.025, | |
| "step": 785, | |
| "ts_encoder_learning_rate": 1.2878530541925077e-06 | |
| }, | |
| { | |
| "epoch": 2.0685394184034926, | |
| "grad_norm": 1.0152957492148167, | |
| "learning_rate": 1.2878530541925077e-06, | |
| "loss": 0.0229, | |
| "step": 786, | |
| "ts_encoder_learning_rate": 1.2771342418149658e-06 | |
| }, | |
| { | |
| "epoch": 2.07117554988055, | |
| "grad_norm": 0.8698513957975991, | |
| "learning_rate": 1.2771342418149658e-06, | |
| "loss": 0.0265, | |
| "step": 787, | |
| "ts_encoder_learning_rate": 1.2664536876431755e-06 | |
| }, | |
| { | |
| "epoch": 2.0738116813576077, | |
| "grad_norm": 1.52591637899822, | |
| "learning_rate": 1.2664536876431755e-06, | |
| "loss": 0.0301, | |
| "step": 788, | |
| "ts_encoder_learning_rate": 1.2558115014363592e-06 | |
| }, | |
| { | |
| "epoch": 2.0764478128346653, | |
| "grad_norm": 0.9350945862866684, | |
| "learning_rate": 1.2558115014363592e-06, | |
| "loss": 0.0302, | |
| "step": 789, | |
| "ts_encoder_learning_rate": 1.2452077925594435e-06 | |
| }, | |
| { | |
| "epoch": 2.0790839443117224, | |
| "grad_norm": 0.8787004897897062, | |
| "learning_rate": 1.2452077925594435e-06, | |
| "loss": 0.0231, | |
| "step": 790, | |
| "ts_encoder_learning_rate": 1.234642669981946e-06 | |
| }, | |
| { | |
| "epoch": 2.08172007578878, | |
| "grad_norm": 0.6989519656800383, | |
| "learning_rate": 1.234642669981946e-06, | |
| "loss": 0.0277, | |
| "step": 791, | |
| "ts_encoder_learning_rate": 1.2241162422768444e-06 | |
| }, | |
| { | |
| "epoch": 2.0843562072658375, | |
| "grad_norm": 0.6816146317530842, | |
| "learning_rate": 1.2241162422768444e-06, | |
| "loss": 0.018, | |
| "step": 792, | |
| "ts_encoder_learning_rate": 1.2136286176194744e-06 | |
| }, | |
| { | |
| "epoch": 2.0869923387428946, | |
| "grad_norm": 0.9698620591378605, | |
| "learning_rate": 1.2136286176194744e-06, | |
| "loss": 0.0299, | |
| "step": 793, | |
| "ts_encoder_learning_rate": 1.203179903786401e-06 | |
| }, | |
| { | |
| "epoch": 2.089628470219952, | |
| "grad_norm": 1.0709130550444075, | |
| "learning_rate": 1.203179903786401e-06, | |
| "loss": 0.0267, | |
| "step": 794, | |
| "ts_encoder_learning_rate": 1.1927702081543279e-06 | |
| }, | |
| { | |
| "epoch": 2.0922646016970097, | |
| "grad_norm": 0.8147652241024573, | |
| "learning_rate": 1.1927702081543279e-06, | |
| "loss": 0.026, | |
| "step": 795, | |
| "ts_encoder_learning_rate": 1.1823996376989849e-06 | |
| }, | |
| { | |
| "epoch": 2.094900733174067, | |
| "grad_norm": 0.9483527494174168, | |
| "learning_rate": 1.1823996376989849e-06, | |
| "loss": 0.0363, | |
| "step": 796, | |
| "ts_encoder_learning_rate": 1.1720682989940264e-06 | |
| }, | |
| { | |
| "epoch": 2.0975368646511243, | |
| "grad_norm": 1.2061966993790179, | |
| "learning_rate": 1.1720682989940264e-06, | |
| "loss": 0.0204, | |
| "step": 797, | |
| "ts_encoder_learning_rate": 1.1617762982099446e-06 | |
| }, | |
| { | |
| "epoch": 2.100172996128182, | |
| "grad_norm": 0.6048278244859194, | |
| "learning_rate": 1.1617762982099446e-06, | |
| "loss": 0.025, | |
| "step": 798, | |
| "ts_encoder_learning_rate": 1.1515237411129698e-06 | |
| }, | |
| { | |
| "epoch": 2.1028091276052394, | |
| "grad_norm": 0.9014144838221269, | |
| "learning_rate": 1.1515237411129698e-06, | |
| "loss": 0.0231, | |
| "step": 799, | |
| "ts_encoder_learning_rate": 1.141310733063991e-06 | |
| }, | |
| { | |
| "epoch": 2.1054452590822965, | |
| "grad_norm": 0.779763420400092, | |
| "learning_rate": 1.141310733063991e-06, | |
| "loss": 0.0227, | |
| "step": 800, | |
| "ts_encoder_learning_rate": 1.1311373790174656e-06 | |
| }, | |
| { | |
| "epoch": 2.108081390559354, | |
| "grad_norm": 1.1737340769983258, | |
| "learning_rate": 1.1311373790174656e-06, | |
| "loss": 0.0204, | |
| "step": 801, | |
| "ts_encoder_learning_rate": 1.1210037835203508e-06 | |
| }, | |
| { | |
| "epoch": 2.1107175220364116, | |
| "grad_norm": 0.9202975123489521, | |
| "learning_rate": 1.1210037835203508e-06, | |
| "loss": 0.023, | |
| "step": 802, | |
| "ts_encoder_learning_rate": 1.1109100507110133e-06 | |
| }, | |
| { | |
| "epoch": 2.113353653513469, | |
| "grad_norm": 1.612705002454156, | |
| "learning_rate": 1.1109100507110133e-06, | |
| "loss": 0.0165, | |
| "step": 803, | |
| "ts_encoder_learning_rate": 1.1008562843181796e-06 | |
| }, | |
| { | |
| "epoch": 2.1159897849905263, | |
| "grad_norm": 0.6460378715478632, | |
| "learning_rate": 1.1008562843181796e-06, | |
| "loss": 0.0315, | |
| "step": 804, | |
| "ts_encoder_learning_rate": 1.0908425876598512e-06 | |
| }, | |
| { | |
| "epoch": 2.118625916467584, | |
| "grad_norm": 0.9987512843535336, | |
| "learning_rate": 1.0908425876598512e-06, | |
| "loss": 0.0295, | |
| "step": 805, | |
| "ts_encoder_learning_rate": 1.0808690636422587e-06 | |
| }, | |
| { | |
| "epoch": 2.1212620479446413, | |
| "grad_norm": 1.0149200333623514, | |
| "learning_rate": 1.0808690636422587e-06, | |
| "loss": 0.0211, | |
| "step": 806, | |
| "ts_encoder_learning_rate": 1.0709358147587883e-06 | |
| }, | |
| { | |
| "epoch": 2.1238981794216985, | |
| "grad_norm": 0.9119696321626057, | |
| "learning_rate": 1.0709358147587883e-06, | |
| "loss": 0.02, | |
| "step": 807, | |
| "ts_encoder_learning_rate": 1.0610429430889451e-06 | |
| }, | |
| { | |
| "epoch": 2.126534310898756, | |
| "grad_norm": 1.0352913040595793, | |
| "learning_rate": 1.0610429430889451e-06, | |
| "loss": 0.0275, | |
| "step": 808, | |
| "ts_encoder_learning_rate": 1.0511905502972885e-06 | |
| }, | |
| { | |
| "epoch": 2.1291704423758135, | |
| "grad_norm": 1.0733762586021238, | |
| "learning_rate": 1.0511905502972885e-06, | |
| "loss": 0.0183, | |
| "step": 809, | |
| "ts_encoder_learning_rate": 1.041378737632402e-06 | |
| }, | |
| { | |
| "epoch": 2.131806573852871, | |
| "grad_norm": 1.1892591837270536, | |
| "learning_rate": 1.041378737632402e-06, | |
| "loss": 0.0264, | |
| "step": 810, | |
| "ts_encoder_learning_rate": 1.031607605925839e-06 | |
| }, | |
| { | |
| "epoch": 2.134442705329928, | |
| "grad_norm": 0.886404626398071, | |
| "learning_rate": 1.031607605925839e-06, | |
| "loss": 0.0183, | |
| "step": 811, | |
| "ts_encoder_learning_rate": 1.0218772555910955e-06 | |
| }, | |
| { | |
| "epoch": 2.1370788368069857, | |
| "grad_norm": 0.7677446140505582, | |
| "learning_rate": 1.0218772555910955e-06, | |
| "loss": 0.0174, | |
| "step": 812, | |
| "ts_encoder_learning_rate": 1.0121877866225783e-06 | |
| }, | |
| { | |
| "epoch": 2.1397149682840433, | |
| "grad_norm": 0.9647219206963211, | |
| "learning_rate": 1.0121877866225783e-06, | |
| "loss": 0.0312, | |
| "step": 813, | |
| "ts_encoder_learning_rate": 1.0025392985945703e-06 | |
| }, | |
| { | |
| "epoch": 2.1423510997611004, | |
| "grad_norm": 1.18116857549236, | |
| "learning_rate": 1.0025392985945703e-06, | |
| "loss": 0.0328, | |
| "step": 814, | |
| "ts_encoder_learning_rate": 9.929318906602176e-07 | |
| }, | |
| { | |
| "epoch": 2.144987231238158, | |
| "grad_norm": 1.2020647866794596, | |
| "learning_rate": 9.929318906602176e-07, | |
| "loss": 0.0248, | |
| "step": 815, | |
| "ts_encoder_learning_rate": 9.833656615504978e-07 | |
| }, | |
| { | |
| "epoch": 2.1476233627152155, | |
| "grad_norm": 0.8370574190670566, | |
| "learning_rate": 9.833656615504978e-07, | |
| "loss": 0.0146, | |
| "step": 816, | |
| "ts_encoder_learning_rate": 9.738407095732195e-07 | |
| }, | |
| { | |
| "epoch": 2.150259494192273, | |
| "grad_norm": 1.0299221864599144, | |
| "learning_rate": 9.738407095732195e-07, | |
| "loss": 0.0217, | |
| "step": 817, | |
| "ts_encoder_learning_rate": 9.643571326119982e-07 | |
| }, | |
| { | |
| "epoch": 2.15289562566933, | |
| "grad_norm": 0.8018415484592077, | |
| "learning_rate": 9.643571326119982e-07, | |
| "loss": 0.0139, | |
| "step": 818, | |
| "ts_encoder_learning_rate": 9.549150281252633e-07 | |
| }, | |
| { | |
| "epoch": 2.1555317571463877, | |
| "grad_norm": 0.5028892166090512, | |
| "learning_rate": 9.549150281252633e-07, | |
| "loss": 0.0197, | |
| "step": 819, | |
| "ts_encoder_learning_rate": 9.455144931452459e-07 | |
| }, | |
| { | |
| "epoch": 2.158167888623445, | |
| "grad_norm": 0.6355920739705047, | |
| "learning_rate": 9.455144931452459e-07, | |
| "loss": 0.0251, | |
| "step": 820, | |
| "ts_encoder_learning_rate": 9.361556242769871e-07 | |
| }, | |
| { | |
| "epoch": 2.1608040201005023, | |
| "grad_norm": 1.1083085846789678, | |
| "learning_rate": 9.361556242769871e-07, | |
| "loss": 0.02, | |
| "step": 821, | |
| "ts_encoder_learning_rate": 9.26838517697346e-07 | |
| }, | |
| { | |
| "epoch": 2.16344015157756, | |
| "grad_norm": 0.9876632473531536, | |
| "learning_rate": 9.26838517697346e-07, | |
| "loss": 0.0351, | |
| "step": 822, | |
| "ts_encoder_learning_rate": 9.175632691540065e-07 | |
| }, | |
| { | |
| "epoch": 2.1660762830546174, | |
| "grad_norm": 2.2693420737855523, | |
| "learning_rate": 9.175632691540065e-07, | |
| "loss": 0.0276, | |
| "step": 823, | |
| "ts_encoder_learning_rate": 9.083299739645007e-07 | |
| }, | |
| { | |
| "epoch": 2.168712414531675, | |
| "grad_norm": 23.226392431703353, | |
| "learning_rate": 9.083299739645007e-07, | |
| "loss": 0.0242, | |
| "step": 824, | |
| "ts_encoder_learning_rate": 8.991387270152202e-07 | |
| }, | |
| { | |
| "epoch": 2.171348546008732, | |
| "grad_norm": 0.75772536779788, | |
| "learning_rate": 8.991387270152202e-07, | |
| "loss": 0.0178, | |
| "step": 825, | |
| "ts_encoder_learning_rate": 8.899896227604509e-07 | |
| }, | |
| { | |
| "epoch": 2.1739846774857896, | |
| "grad_norm": 0.7879892794840004, | |
| "learning_rate": 8.899896227604509e-07, | |
| "loss": 0.021, | |
| "step": 826, | |
| "ts_encoder_learning_rate": 8.808827552213917e-07 | |
| }, | |
| { | |
| "epoch": 2.176620808962847, | |
| "grad_norm": 0.6285382943818703, | |
| "learning_rate": 8.808827552213917e-07, | |
| "loss": 0.0275, | |
| "step": 827, | |
| "ts_encoder_learning_rate": 8.718182179851998e-07 | |
| }, | |
| { | |
| "epoch": 2.1792569404399043, | |
| "grad_norm": 0.8438059272532128, | |
| "learning_rate": 8.718182179851998e-07, | |
| "loss": 0.0222, | |
| "step": 828, | |
| "ts_encoder_learning_rate": 8.627961042040183e-07 | |
| }, | |
| { | |
| "epoch": 2.181893071916962, | |
| "grad_norm": 0.8841129842435451, | |
| "learning_rate": 8.627961042040183e-07, | |
| "loss": 0.0263, | |
| "step": 829, | |
| "ts_encoder_learning_rate": 8.538165065940263e-07 | |
| }, | |
| { | |
| "epoch": 2.1845292033940193, | |
| "grad_norm": 1.1647962750768701, | |
| "learning_rate": 8.538165065940263e-07, | |
| "loss": 0.0287, | |
| "step": 830, | |
| "ts_encoder_learning_rate": 8.448795174344803e-07 | |
| }, | |
| { | |
| "epoch": 2.187165334871077, | |
| "grad_norm": 0.6223444639742729, | |
| "learning_rate": 8.448795174344803e-07, | |
| "loss": 0.0182, | |
| "step": 831, | |
| "ts_encoder_learning_rate": 8.359852285667752e-07 | |
| }, | |
| { | |
| "epoch": 2.189801466348134, | |
| "grad_norm": 0.848108350576564, | |
| "learning_rate": 8.359852285667752e-07, | |
| "loss": 0.0278, | |
| "step": 832, | |
| "ts_encoder_learning_rate": 8.271337313934869e-07 | |
| }, | |
| { | |
| "epoch": 2.1924375978251915, | |
| "grad_norm": 0.6976201249959373, | |
| "learning_rate": 8.271337313934869e-07, | |
| "loss": 0.0254, | |
| "step": 833, | |
| "ts_encoder_learning_rate": 8.183251168774476e-07 | |
| }, | |
| { | |
| "epoch": 2.195073729302249, | |
| "grad_norm": 0.839401112904285, | |
| "learning_rate": 8.183251168774476e-07, | |
| "loss": 0.0146, | |
| "step": 834, | |
| "ts_encoder_learning_rate": 8.095594755407971e-07 | |
| }, | |
| { | |
| "epoch": 2.197709860779306, | |
| "grad_norm": 1.2538696240617628, | |
| "learning_rate": 8.095594755407971e-07, | |
| "loss": 0.0247, | |
| "step": 835, | |
| "ts_encoder_learning_rate": 8.008368974640634e-07 | |
| }, | |
| { | |
| "epoch": 2.2003459922563637, | |
| "grad_norm": 2.0495039413289633, | |
| "learning_rate": 8.008368974640634e-07, | |
| "loss": 0.0259, | |
| "step": 836, | |
| "ts_encoder_learning_rate": 7.921574722852343e-07 | |
| }, | |
| { | |
| "epoch": 2.2029821237334213, | |
| "grad_norm": 0.8059355568372082, | |
| "learning_rate": 7.921574722852343e-07, | |
| "loss": 0.0254, | |
| "step": 837, | |
| "ts_encoder_learning_rate": 7.835212891988292e-07 | |
| }, | |
| { | |
| "epoch": 2.205618255210479, | |
| "grad_norm": 1.1603161094730907, | |
| "learning_rate": 7.835212891988292e-07, | |
| "loss": 0.0222, | |
| "step": 838, | |
| "ts_encoder_learning_rate": 7.749284369549954e-07 | |
| }, | |
| { | |
| "epoch": 2.208254386687536, | |
| "grad_norm": 0.642346193891705, | |
| "learning_rate": 7.749284369549954e-07, | |
| "loss": 0.0173, | |
| "step": 839, | |
| "ts_encoder_learning_rate": 7.663790038585794e-07 | |
| }, | |
| { | |
| "epoch": 2.2108905181645935, | |
| "grad_norm": 0.969152855410992, | |
| "learning_rate": 7.663790038585794e-07, | |
| "loss": 0.0195, | |
| "step": 840, | |
| "ts_encoder_learning_rate": 7.578730777682386e-07 | |
| }, | |
| { | |
| "epoch": 2.213526649641651, | |
| "grad_norm": 0.6006548208059819, | |
| "learning_rate": 7.578730777682386e-07, | |
| "loss": 0.0187, | |
| "step": 841, | |
| "ts_encoder_learning_rate": 7.494107460955207e-07 | |
| }, | |
| { | |
| "epoch": 2.216162781118708, | |
| "grad_norm": 0.9560113777748233, | |
| "learning_rate": 7.494107460955207e-07, | |
| "loss": 0.019, | |
| "step": 842, | |
| "ts_encoder_learning_rate": 7.409920958039795e-07 | |
| }, | |
| { | |
| "epoch": 2.2187989125957657, | |
| "grad_norm": 1.0871309619249734, | |
| "learning_rate": 7.409920958039795e-07, | |
| "loss": 0.0196, | |
| "step": 843, | |
| "ts_encoder_learning_rate": 7.326172134082704e-07 | |
| }, | |
| { | |
| "epoch": 2.221435044072823, | |
| "grad_norm": 0.8419045626414003, | |
| "learning_rate": 7.326172134082704e-07, | |
| "loss": 0.0326, | |
| "step": 844, | |
| "ts_encoder_learning_rate": 7.242861849732696e-07 | |
| }, | |
| { | |
| "epoch": 2.2240711755498808, | |
| "grad_norm": 1.0419111956667741, | |
| "learning_rate": 7.242861849732696e-07, | |
| "loss": 0.0314, | |
| "step": 845, | |
| "ts_encoder_learning_rate": 7.159990961131818e-07 | |
| }, | |
| { | |
| "epoch": 2.226707307026938, | |
| "grad_norm": 0.8401412848739376, | |
| "learning_rate": 7.159990961131818e-07, | |
| "loss": 0.0304, | |
| "step": 846, | |
| "ts_encoder_learning_rate": 7.077560319906696e-07 | |
| }, | |
| { | |
| "epoch": 2.2293434385039954, | |
| "grad_norm": 0.9818544247472701, | |
| "learning_rate": 7.077560319906696e-07, | |
| "loss": 0.0328, | |
| "step": 847, | |
| "ts_encoder_learning_rate": 6.995570773159693e-07 | |
| }, | |
| { | |
| "epoch": 2.231979569981053, | |
| "grad_norm": 0.9988984189818604, | |
| "learning_rate": 6.995570773159693e-07, | |
| "loss": 0.036, | |
| "step": 848, | |
| "ts_encoder_learning_rate": 6.914023163460248e-07 | |
| }, | |
| { | |
| "epoch": 2.23461570145811, | |
| "grad_norm": 1.8121563715937015, | |
| "learning_rate": 6.914023163460248e-07, | |
| "loss": 0.0293, | |
| "step": 849, | |
| "ts_encoder_learning_rate": 6.832918328836247e-07 | |
| }, | |
| { | |
| "epoch": 2.2372518329351676, | |
| "grad_norm": 1.4137491922964698, | |
| "learning_rate": 6.832918328836247e-07, | |
| "loss": 0.0284, | |
| "step": 850, | |
| "ts_encoder_learning_rate": 6.752257102765325e-07 | |
| }, | |
| { | |
| "epoch": 2.239887964412225, | |
| "grad_norm": 0.7043298270791407, | |
| "learning_rate": 6.752257102765325e-07, | |
| "loss": 0.0254, | |
| "step": 851, | |
| "ts_encoder_learning_rate": 6.6720403141664e-07 | |
| }, | |
| { | |
| "epoch": 2.2425240958892827, | |
| "grad_norm": 0.9273068808985826, | |
| "learning_rate": 6.6720403141664e-07, | |
| "loss": 0.0254, | |
| "step": 852, | |
| "ts_encoder_learning_rate": 6.592268787391077e-07 | |
| }, | |
| { | |
| "epoch": 2.24516022736634, | |
| "grad_norm": 0.9385646367897301, | |
| "learning_rate": 6.592268787391077e-07, | |
| "loss": 0.0298, | |
| "step": 853, | |
| "ts_encoder_learning_rate": 6.512943342215234e-07 | |
| }, | |
| { | |
| "epoch": 2.2477963588433973, | |
| "grad_norm": 0.6600040913818298, | |
| "learning_rate": 6.512943342215234e-07, | |
| "loss": 0.0237, | |
| "step": 854, | |
| "ts_encoder_learning_rate": 6.43406479383053e-07 | |
| }, | |
| { | |
| "epoch": 2.250432490320455, | |
| "grad_norm": 0.6282127208289683, | |
| "learning_rate": 6.43406479383053e-07, | |
| "loss": 0.0213, | |
| "step": 855, | |
| "ts_encoder_learning_rate": 6.355633952836115e-07 | |
| }, | |
| { | |
| "epoch": 2.253068621797512, | |
| "grad_norm": 0.6788883483435632, | |
| "learning_rate": 6.355633952836115e-07, | |
| "loss": 0.021, | |
| "step": 856, | |
| "ts_encoder_learning_rate": 6.277651625230219e-07 | |
| }, | |
| { | |
| "epoch": 2.2557047532745695, | |
| "grad_norm": 0.6668730348823718, | |
| "learning_rate": 6.277651625230219e-07, | |
| "loss": 0.0236, | |
| "step": 857, | |
| "ts_encoder_learning_rate": 6.200118612401918e-07 | |
| }, | |
| { | |
| "epoch": 2.258340884751627, | |
| "grad_norm": 7.060799735251775, | |
| "learning_rate": 6.200118612401918e-07, | |
| "loss": 0.0297, | |
| "step": 858, | |
| "ts_encoder_learning_rate": 6.12303571112286e-07 | |
| }, | |
| { | |
| "epoch": 2.2609770162286846, | |
| "grad_norm": 1.046944161496044, | |
| "learning_rate": 6.12303571112286e-07, | |
| "loss": 0.0273, | |
| "step": 859, | |
| "ts_encoder_learning_rate": 6.04640371353914e-07 | |
| }, | |
| { | |
| "epoch": 2.2636131477057417, | |
| "grad_norm": 1.1099528570927184, | |
| "learning_rate": 6.04640371353914e-07, | |
| "loss": 0.0252, | |
| "step": 860, | |
| "ts_encoder_learning_rate": 5.9702234071631e-07 | |
| }, | |
| { | |
| "epoch": 2.2662492791827993, | |
| "grad_norm": 0.9773409732921918, | |
| "learning_rate": 5.9702234071631e-07, | |
| "loss": 0.0294, | |
| "step": 861, | |
| "ts_encoder_learning_rate": 5.89449557486525e-07 | |
| }, | |
| { | |
| "epoch": 2.268885410659857, | |
| "grad_norm": 1.0917243460679, | |
| "learning_rate": 5.89449557486525e-07, | |
| "loss": 0.0138, | |
| "step": 862, | |
| "ts_encoder_learning_rate": 5.819220994866237e-07 | |
| }, | |
| { | |
| "epoch": 2.271521542136914, | |
| "grad_norm": 0.6888106894141612, | |
| "learning_rate": 5.819220994866237e-07, | |
| "loss": 0.0144, | |
| "step": 863, | |
| "ts_encoder_learning_rate": 5.744400440728826e-07 | |
| }, | |
| { | |
| "epoch": 2.2741576736139715, | |
| "grad_norm": 0.633492770066237, | |
| "learning_rate": 5.744400440728826e-07, | |
| "loss": 0.0232, | |
| "step": 864, | |
| "ts_encoder_learning_rate": 5.670034681349995e-07 | |
| }, | |
| { | |
| "epoch": 2.276793805091029, | |
| "grad_norm": 6.0165889346314865, | |
| "learning_rate": 5.670034681349995e-07, | |
| "loss": 0.025, | |
| "step": 865, | |
| "ts_encoder_learning_rate": 5.596124480952975e-07 | |
| }, | |
| { | |
| "epoch": 2.2794299365680866, | |
| "grad_norm": 1.5182521559787252, | |
| "learning_rate": 5.596124480952975e-07, | |
| "loss": 0.0268, | |
| "step": 866, | |
| "ts_encoder_learning_rate": 5.522670599079416e-07 | |
| }, | |
| { | |
| "epoch": 2.2820660680451437, | |
| "grad_norm": 0.8001358979720962, | |
| "learning_rate": 5.522670599079416e-07, | |
| "loss": 0.0213, | |
| "step": 867, | |
| "ts_encoder_learning_rate": 5.449673790581611e-07 | |
| }, | |
| { | |
| "epoch": 2.284702199522201, | |
| "grad_norm": 1.8105317634620746, | |
| "learning_rate": 5.449673790581611e-07, | |
| "loss": 0.0252, | |
| "step": 868, | |
| "ts_encoder_learning_rate": 5.377134805614714e-07 | |
| }, | |
| { | |
| "epoch": 2.2873383309992588, | |
| "grad_norm": 0.9912234653856702, | |
| "learning_rate": 5.377134805614714e-07, | |
| "loss": 0.0237, | |
| "step": 869, | |
| "ts_encoder_learning_rate": 5.305054389629022e-07 | |
| }, | |
| { | |
| "epoch": 2.289974462476316, | |
| "grad_norm": 0.7760075581423171, | |
| "learning_rate": 5.305054389629022e-07, | |
| "loss": 0.0235, | |
| "step": 870, | |
| "ts_encoder_learning_rate": 5.233433283362349e-07 | |
| }, | |
| { | |
| "epoch": 2.2926105939533734, | |
| "grad_norm": 0.6707225852541246, | |
| "learning_rate": 5.233433283362349e-07, | |
| "loss": 0.0217, | |
| "step": 871, | |
| "ts_encoder_learning_rate": 5.162272222832349e-07 | |
| }, | |
| { | |
| "epoch": 2.295246725430431, | |
| "grad_norm": 0.6919555345400337, | |
| "learning_rate": 5.162272222832349e-07, | |
| "loss": 0.027, | |
| "step": 872, | |
| "ts_encoder_learning_rate": 5.091571939329049e-07 | |
| }, | |
| { | |
| "epoch": 2.2978828569074885, | |
| "grad_norm": 0.8176259644215725, | |
| "learning_rate": 5.091571939329049e-07, | |
| "loss": 0.0196, | |
| "step": 873, | |
| "ts_encoder_learning_rate": 5.021333159407232e-07 | |
| }, | |
| { | |
| "epoch": 2.3005189883845456, | |
| "grad_norm": 8.688779032036406, | |
| "learning_rate": 5.021333159407232e-07, | |
| "loss": 0.0222, | |
| "step": 874, | |
| "ts_encoder_learning_rate": 4.951556604879049e-07 | |
| }, | |
| { | |
| "epoch": 2.303155119861603, | |
| "grad_norm": 0.4934714850429601, | |
| "learning_rate": 4.951556604879049e-07, | |
| "loss": 0.0225, | |
| "step": 875, | |
| "ts_encoder_learning_rate": 4.882242992806546e-07 | |
| }, | |
| { | |
| "epoch": 2.3057912513386607, | |
| "grad_norm": 0.6932226455966719, | |
| "learning_rate": 4.882242992806546e-07, | |
| "loss": 0.0279, | |
| "step": 876, | |
| "ts_encoder_learning_rate": 4.813393035494329e-07 | |
| }, | |
| { | |
| "epoch": 2.308427382815718, | |
| "grad_norm": 0.785391963657986, | |
| "learning_rate": 4.813393035494329e-07, | |
| "loss": 0.0257, | |
| "step": 877, | |
| "ts_encoder_learning_rate": 4.745007440482252e-07 | |
| }, | |
| { | |
| "epoch": 2.3110635142927753, | |
| "grad_norm": 0.7002142776657593, | |
| "learning_rate": 4.745007440482252e-07, | |
| "loss": 0.0167, | |
| "step": 878, | |
| "ts_encoder_learning_rate": 4.677086910538092e-07 | |
| }, | |
| { | |
| "epoch": 2.313699645769833, | |
| "grad_norm": 0.6570845075498787, | |
| "learning_rate": 4.677086910538092e-07, | |
| "loss": 0.0204, | |
| "step": 879, | |
| "ts_encoder_learning_rate": 4.6096321436504e-07 | |
| }, | |
| { | |
| "epoch": 2.3163357772468904, | |
| "grad_norm": 0.708907031204872, | |
| "learning_rate": 4.6096321436504e-07, | |
| "loss": 0.0194, | |
| "step": 880, | |
| "ts_encoder_learning_rate": 4.542643833021254e-07 | |
| }, | |
| { | |
| "epoch": 2.3189719087239475, | |
| "grad_norm": 0.7432937034337077, | |
| "learning_rate": 4.542643833021254e-07, | |
| "loss": 0.0244, | |
| "step": 881, | |
| "ts_encoder_learning_rate": 4.4761226670592074e-07 | |
| }, | |
| { | |
| "epoch": 2.321608040201005, | |
| "grad_norm": 1.0057609426812313, | |
| "learning_rate": 4.4761226670592074e-07, | |
| "loss": 0.0243, | |
| "step": 882, | |
| "ts_encoder_learning_rate": 4.410069329372152e-07 | |
| }, | |
| { | |
| "epoch": 2.3242441716780626, | |
| "grad_norm": 0.9748291224257548, | |
| "learning_rate": 4.410069329372152e-07, | |
| "loss": 0.0261, | |
| "step": 883, | |
| "ts_encoder_learning_rate": 4.344484498760343e-07 | |
| }, | |
| { | |
| "epoch": 2.3268803031551197, | |
| "grad_norm": 0.7468342295758325, | |
| "learning_rate": 4.344484498760343e-07, | |
| "loss": 0.017, | |
| "step": 884, | |
| "ts_encoder_learning_rate": 4.279368849209381e-07 | |
| }, | |
| { | |
| "epoch": 2.3295164346321773, | |
| "grad_norm": 0.4990301640406677, | |
| "learning_rate": 4.279368849209381e-07, | |
| "loss": 0.0163, | |
| "step": 885, | |
| "ts_encoder_learning_rate": 4.214723049883307e-07 | |
| }, | |
| { | |
| "epoch": 2.332152566109235, | |
| "grad_norm": 0.8120692295636992, | |
| "learning_rate": 4.214723049883307e-07, | |
| "loss": 0.0271, | |
| "step": 886, | |
| "ts_encoder_learning_rate": 4.150547765117746e-07 | |
| }, | |
| { | |
| "epoch": 2.334788697586292, | |
| "grad_norm": 0.8689858151247111, | |
| "learning_rate": 4.150547765117746e-07, | |
| "loss": 0.0172, | |
| "step": 887, | |
| "ts_encoder_learning_rate": 4.086843654413031e-07 | |
| }, | |
| { | |
| "epoch": 2.3374248290633495, | |
| "grad_norm": 0.7557921121418426, | |
| "learning_rate": 4.086843654413031e-07, | |
| "loss": 0.0148, | |
| "step": 888, | |
| "ts_encoder_learning_rate": 4.0236113724274716e-07 | |
| }, | |
| { | |
| "epoch": 2.340060960540407, | |
| "grad_norm": 0.631642992248285, | |
| "learning_rate": 4.0236113724274716e-07, | |
| "loss": 0.0256, | |
| "step": 889, | |
| "ts_encoder_learning_rate": 3.960851568970586e-07 | |
| }, | |
| { | |
| "epoch": 2.342697092017464, | |
| "grad_norm": 1.080316244149369, | |
| "learning_rate": 3.960851568970586e-07, | |
| "loss": 0.0217, | |
| "step": 890, | |
| "ts_encoder_learning_rate": 3.8985648889964755e-07 | |
| }, | |
| { | |
| "epoch": 2.3453332234945217, | |
| "grad_norm": 1.054131562422457, | |
| "learning_rate": 3.8985648889964755e-07, | |
| "loss": 0.0211, | |
| "step": 891, | |
| "ts_encoder_learning_rate": 3.83675197259713e-07 | |
| }, | |
| { | |
| "epoch": 2.347969354971579, | |
| "grad_norm": 0.8646617497823404, | |
| "learning_rate": 3.83675197259713e-07, | |
| "loss": 0.019, | |
| "step": 892, | |
| "ts_encoder_learning_rate": 3.77541345499593e-07 | |
| }, | |
| { | |
| "epoch": 2.3506054864486368, | |
| "grad_norm": 0.7244425140139443, | |
| "learning_rate": 3.77541345499593e-07, | |
| "loss": 0.0184, | |
| "step": 893, | |
| "ts_encoder_learning_rate": 3.7145499665410147e-07 | |
| }, | |
| { | |
| "epoch": 2.353241617925694, | |
| "grad_norm": 0.5515618977177071, | |
| "learning_rate": 3.7145499665410147e-07, | |
| "loss": 0.0143, | |
| "step": 894, | |
| "ts_encoder_learning_rate": 3.6541621326989183e-07 | |
| }, | |
| { | |
| "epoch": 2.3558777494027514, | |
| "grad_norm": 0.7590154170192799, | |
| "learning_rate": 3.6541621326989183e-07, | |
| "loss": 0.0241, | |
| "step": 895, | |
| "ts_encoder_learning_rate": 3.5942505740480583e-07 | |
| }, | |
| { | |
| "epoch": 2.358513880879809, | |
| "grad_norm": 0.7373124344034562, | |
| "learning_rate": 3.5942505740480583e-07, | |
| "loss": 0.0203, | |
| "step": 896, | |
| "ts_encoder_learning_rate": 3.534815906272404e-07 | |
| }, | |
| { | |
| "epoch": 2.361150012356866, | |
| "grad_norm": 0.7737906407252233, | |
| "learning_rate": 3.534815906272404e-07, | |
| "loss": 0.0122, | |
| "step": 897, | |
| "ts_encoder_learning_rate": 3.475858740155108e-07 | |
| }, | |
| { | |
| "epoch": 2.3637861438339236, | |
| "grad_norm": 0.5846122157036439, | |
| "learning_rate": 3.475858740155108e-07, | |
| "loss": 0.0213, | |
| "step": 898, | |
| "ts_encoder_learning_rate": 3.417379681572297e-07 | |
| }, | |
| { | |
| "epoch": 2.366422275310981, | |
| "grad_norm": 1.1971330946069132, | |
| "learning_rate": 3.417379681572297e-07, | |
| "loss": 0.0182, | |
| "step": 899, | |
| "ts_encoder_learning_rate": 3.359379331486762e-07 | |
| }, | |
| { | |
| "epoch": 2.3690584067880387, | |
| "grad_norm": 0.5894377764075366, | |
| "learning_rate": 3.359379331486762e-07, | |
| "loss": 0.0228, | |
| "step": 900, | |
| "ts_encoder_learning_rate": 3.301858285941845e-07 | |
| }, | |
| { | |
| "epoch": 2.371694538265096, | |
| "grad_norm": 0.6964796421497281, | |
| "learning_rate": 3.301858285941845e-07, | |
| "loss": 0.019, | |
| "step": 901, | |
| "ts_encoder_learning_rate": 3.2448171360552837e-07 | |
| }, | |
| { | |
| "epoch": 2.3743306697421533, | |
| "grad_norm": 0.8068516225517265, | |
| "learning_rate": 3.2448171360552837e-07, | |
| "loss": 0.0242, | |
| "step": 902, | |
| "ts_encoder_learning_rate": 3.18825646801314e-07 | |
| }, | |
| { | |
| "epoch": 2.376966801219211, | |
| "grad_norm": 0.6128923162506175, | |
| "learning_rate": 3.18825646801314e-07, | |
| "loss": 0.0219, | |
| "step": 903, | |
| "ts_encoder_learning_rate": 3.1321768630638073e-07 | |
| }, | |
| { | |
| "epoch": 2.379602932696268, | |
| "grad_norm": 0.9632236483373098, | |
| "learning_rate": 3.1321768630638073e-07, | |
| "loss": 0.0234, | |
| "step": 904, | |
| "ts_encoder_learning_rate": 3.076578897511978e-07 | |
| }, | |
| { | |
| "epoch": 2.3822390641733255, | |
| "grad_norm": 0.7461189266458699, | |
| "learning_rate": 3.076578897511978e-07, | |
| "loss": 0.0144, | |
| "step": 905, | |
| "ts_encoder_learning_rate": 3.0214631427127883e-07 | |
| }, | |
| { | |
| "epoch": 2.384875195650383, | |
| "grad_norm": 0.716902757344509, | |
| "learning_rate": 3.0214631427127883e-07, | |
| "loss": 0.0177, | |
| "step": 906, | |
| "ts_encoder_learning_rate": 2.966830165065876e-07 | |
| }, | |
| { | |
| "epoch": 2.3875113271274406, | |
| "grad_norm": 0.5861169684467917, | |
| "learning_rate": 2.966830165065876e-07, | |
| "loss": 0.027, | |
| "step": 907, | |
| "ts_encoder_learning_rate": 2.912680526009626e-07 | |
| }, | |
| { | |
| "epoch": 2.3901474586044977, | |
| "grad_norm": 0.834726511672464, | |
| "learning_rate": 2.912680526009626e-07, | |
| "loss": 0.0231, | |
| "step": 908, | |
| "ts_encoder_learning_rate": 2.8590147820153513e-07 | |
| }, | |
| { | |
| "epoch": 2.3927835900815553, | |
| "grad_norm": 0.6958964808420537, | |
| "learning_rate": 2.8590147820153513e-07, | |
| "loss": 0.0211, | |
| "step": 909, | |
| "ts_encoder_learning_rate": 2.8058334845816214e-07 | |
| }, | |
| { | |
| "epoch": 2.395419721558613, | |
| "grad_norm": 0.7097540987940018, | |
| "learning_rate": 2.8058334845816214e-07, | |
| "loss": 0.0169, | |
| "step": 910, | |
| "ts_encoder_learning_rate": 2.7531371802285436e-07 | |
| }, | |
| { | |
| "epoch": 2.39805585303567, | |
| "grad_norm": 0.8778409447989682, | |
| "learning_rate": 2.7531371802285436e-07, | |
| "loss": 0.026, | |
| "step": 911, | |
| "ts_encoder_learning_rate": 2.7009264104921606e-07 | |
| }, | |
| { | |
| "epoch": 2.4006919845127275, | |
| "grad_norm": 0.7010540368539773, | |
| "learning_rate": 2.7009264104921606e-07, | |
| "loss": 0.0175, | |
| "step": 912, | |
| "ts_encoder_learning_rate": 2.6492017119189415e-07 | |
| }, | |
| { | |
| "epoch": 2.403328115989785, | |
| "grad_norm": 0.5997922863707355, | |
| "learning_rate": 2.6492017119189415e-07, | |
| "loss": 0.0222, | |
| "step": 913, | |
| "ts_encoder_learning_rate": 2.5979636160601673e-07 | |
| }, | |
| { | |
| "epoch": 2.4059642474668426, | |
| "grad_norm": 0.667743261533583, | |
| "learning_rate": 2.5979636160601673e-07, | |
| "loss": 0.0197, | |
| "step": 914, | |
| "ts_encoder_learning_rate": 2.547212649466568e-07 | |
| }, | |
| { | |
| "epoch": 2.4086003789438997, | |
| "grad_norm": 0.8452729220439927, | |
| "learning_rate": 2.547212649466568e-07, | |
| "loss": 0.0197, | |
| "step": 915, | |
| "ts_encoder_learning_rate": 2.4969493336828353e-07 | |
| }, | |
| { | |
| "epoch": 2.411236510420957, | |
| "grad_norm": 0.726874711910585, | |
| "learning_rate": 2.4969493336828353e-07, | |
| "loss": 0.024, | |
| "step": 916, | |
| "ts_encoder_learning_rate": 2.447174185242324e-07 | |
| }, | |
| { | |
| "epoch": 2.4138726418980148, | |
| "grad_norm": 0.6390019959390945, | |
| "learning_rate": 2.447174185242324e-07, | |
| "loss": 0.0109, | |
| "step": 917, | |
| "ts_encoder_learning_rate": 2.397887715661679e-07 | |
| }, | |
| { | |
| "epoch": 2.416508773375072, | |
| "grad_norm": 0.844652867992998, | |
| "learning_rate": 2.397887715661679e-07, | |
| "loss": 0.0276, | |
| "step": 918, | |
| "ts_encoder_learning_rate": 2.3490904314356412e-07 | |
| }, | |
| { | |
| "epoch": 2.4191449048521294, | |
| "grad_norm": 0.8450103641231302, | |
| "learning_rate": 2.3490904314356412e-07, | |
| "loss": 0.0209, | |
| "step": 919, | |
| "ts_encoder_learning_rate": 2.3007828340318117e-07 | |
| }, | |
| { | |
| "epoch": 2.421781036329187, | |
| "grad_norm": 0.9914129840085654, | |
| "learning_rate": 2.3007828340318117e-07, | |
| "loss": 0.0224, | |
| "step": 920, | |
| "ts_encoder_learning_rate": 2.2529654198854834e-07 | |
| }, | |
| { | |
| "epoch": 2.4244171678062445, | |
| "grad_norm": 0.7485782865431817, | |
| "learning_rate": 2.2529654198854834e-07, | |
| "loss": 0.0231, | |
| "step": 921, | |
| "ts_encoder_learning_rate": 2.205638680394573e-07 | |
| }, | |
| { | |
| "epoch": 2.4270532992833016, | |
| "grad_norm": 0.863071039677985, | |
| "learning_rate": 2.205638680394573e-07, | |
| "loss": 0.0217, | |
| "step": 922, | |
| "ts_encoder_learning_rate": 2.1588031019145638e-07 | |
| }, | |
| { | |
| "epoch": 2.429689430760359, | |
| "grad_norm": 0.7178423022311772, | |
| "learning_rate": 2.1588031019145638e-07, | |
| "loss": 0.0245, | |
| "step": 923, | |
| "ts_encoder_learning_rate": 2.1124591657534776e-07 | |
| }, | |
| { | |
| "epoch": 2.4323255622374167, | |
| "grad_norm": 0.8272616881685937, | |
| "learning_rate": 2.1124591657534776e-07, | |
| "loss": 0.0199, | |
| "step": 924, | |
| "ts_encoder_learning_rate": 2.0666073481669714e-07 | |
| }, | |
| { | |
| "epoch": 2.434961693714474, | |
| "grad_norm": 0.5263023342082227, | |
| "learning_rate": 2.0666073481669714e-07, | |
| "loss": 0.0223, | |
| "step": 925, | |
| "ts_encoder_learning_rate": 2.0212481203534083e-07 | |
| }, | |
| { | |
| "epoch": 2.4375978251915313, | |
| "grad_norm": 1.1822141493073524, | |
| "learning_rate": 2.0212481203534083e-07, | |
| "loss": 0.0273, | |
| "step": 926, | |
| "ts_encoder_learning_rate": 1.9763819484490353e-07 | |
| }, | |
| { | |
| "epoch": 2.440233956668589, | |
| "grad_norm": 0.6818080172841297, | |
| "learning_rate": 1.9763819484490353e-07, | |
| "loss": 0.02, | |
| "step": 927, | |
| "ts_encoder_learning_rate": 1.932009293523196e-07 | |
| }, | |
| { | |
| "epoch": 2.4428700881456464, | |
| "grad_norm": 0.9008331212699131, | |
| "learning_rate": 1.932009293523196e-07, | |
| "loss": 0.0216, | |
| "step": 928, | |
| "ts_encoder_learning_rate": 1.8881306115735632e-07 | |
| }, | |
| { | |
| "epoch": 2.4455062196227035, | |
| "grad_norm": 0.7779126298027532, | |
| "learning_rate": 1.8881306115735632e-07, | |
| "loss": 0.0185, | |
| "step": 929, | |
| "ts_encoder_learning_rate": 1.8447463535214872e-07 | |
| }, | |
| { | |
| "epoch": 2.448142351099761, | |
| "grad_norm": 0.9120921557733357, | |
| "learning_rate": 1.8447463535214872e-07, | |
| "loss": 0.0186, | |
| "step": 930, | |
| "ts_encoder_learning_rate": 1.801856965207338e-07 | |
| }, | |
| { | |
| "epoch": 2.4507784825768186, | |
| "grad_norm": 0.740476134046954, | |
| "learning_rate": 1.801856965207338e-07, | |
| "loss": 0.02, | |
| "step": 931, | |
| "ts_encoder_learning_rate": 1.7594628873859488e-07 | |
| }, | |
| { | |
| "epoch": 2.4534146140538757, | |
| "grad_norm": 0.720558833321943, | |
| "learning_rate": 1.7594628873859488e-07, | |
| "loss": 0.0165, | |
| "step": 932, | |
| "ts_encoder_learning_rate": 1.7175645557220567e-07 | |
| }, | |
| { | |
| "epoch": 2.4560507455309333, | |
| "grad_norm": 0.7232369354288679, | |
| "learning_rate": 1.7175645557220567e-07, | |
| "loss": 0.0258, | |
| "step": 933, | |
| "ts_encoder_learning_rate": 1.6761624007858524e-07 | |
| }, | |
| { | |
| "epoch": 2.458686877007991, | |
| "grad_norm": 0.9619568884736648, | |
| "learning_rate": 1.6761624007858524e-07, | |
| "loss": 0.0206, | |
| "step": 934, | |
| "ts_encoder_learning_rate": 1.6352568480485277e-07 | |
| }, | |
| { | |
| "epoch": 2.4613230084850484, | |
| "grad_norm": 0.5484139865997792, | |
| "learning_rate": 1.6352568480485277e-07, | |
| "loss": 0.0174, | |
| "step": 935, | |
| "ts_encoder_learning_rate": 1.594848317877934e-07 | |
| }, | |
| { | |
| "epoch": 2.4639591399621055, | |
| "grad_norm": 1.1119566633908704, | |
| "learning_rate": 1.594848317877934e-07, | |
| "loss": 0.024, | |
| "step": 936, | |
| "ts_encoder_learning_rate": 1.5549372255342367e-07 | |
| }, | |
| { | |
| "epoch": 2.466595271439163, | |
| "grad_norm": 0.8218791802362867, | |
| "learning_rate": 1.5549372255342367e-07, | |
| "loss": 0.0193, | |
| "step": 937, | |
| "ts_encoder_learning_rate": 1.5155239811656562e-07 | |
| }, | |
| { | |
| "epoch": 2.4692314029162206, | |
| "grad_norm": 0.6933235377212601, | |
| "learning_rate": 1.5155239811656562e-07, | |
| "loss": 0.0288, | |
| "step": 938, | |
| "ts_encoder_learning_rate": 1.4766089898042678e-07 | |
| }, | |
| { | |
| "epoch": 2.4718675343932777, | |
| "grad_norm": 0.953369090336964, | |
| "learning_rate": 1.4766089898042678e-07, | |
| "loss": 0.0214, | |
| "step": 939, | |
| "ts_encoder_learning_rate": 1.4381926513618139e-07 | |
| }, | |
| { | |
| "epoch": 2.474503665870335, | |
| "grad_norm": 0.6976764387240867, | |
| "learning_rate": 1.4381926513618139e-07, | |
| "loss": 0.0197, | |
| "step": 940, | |
| "ts_encoder_learning_rate": 1.4002753606256082e-07 | |
| }, | |
| { | |
| "epoch": 2.4771397973473928, | |
| "grad_norm": 0.7526896911937908, | |
| "learning_rate": 1.4002753606256082e-07, | |
| "loss": 0.0205, | |
| "step": 941, | |
| "ts_encoder_learning_rate": 1.362857507254478e-07 | |
| }, | |
| { | |
| "epoch": 2.4797759288244503, | |
| "grad_norm": 0.7732644266125883, | |
| "learning_rate": 1.362857507254478e-07, | |
| "loss": 0.0196, | |
| "step": 942, | |
| "ts_encoder_learning_rate": 1.3259394757747678e-07 | |
| }, | |
| { | |
| "epoch": 2.4824120603015074, | |
| "grad_norm": 0.7898334854513247, | |
| "learning_rate": 1.3259394757747678e-07, | |
| "loss": 0.0168, | |
| "step": 943, | |
| "ts_encoder_learning_rate": 1.2895216455763582e-07 | |
| }, | |
| { | |
| "epoch": 2.485048191778565, | |
| "grad_norm": 0.7203527294510174, | |
| "learning_rate": 1.2895216455763582e-07, | |
| "loss": 0.0215, | |
| "step": 944, | |
| "ts_encoder_learning_rate": 1.253604390908819e-07 | |
| }, | |
| { | |
| "epoch": 2.4876843232556225, | |
| "grad_norm": 0.8318588601172171, | |
| "learning_rate": 1.253604390908819e-07, | |
| "loss": 0.0233, | |
| "step": 945, | |
| "ts_encoder_learning_rate": 1.2181880808775026e-07 | |
| }, | |
| { | |
| "epoch": 2.4903204547326796, | |
| "grad_norm": 0.7763925821106455, | |
| "learning_rate": 1.2181880808775026e-07, | |
| "loss": 0.0231, | |
| "step": 946, | |
| "ts_encoder_learning_rate": 1.1832730794397951e-07 | |
| }, | |
| { | |
| "epoch": 2.492956586209737, | |
| "grad_norm": 0.7417703735767751, | |
| "learning_rate": 1.1832730794397951e-07, | |
| "loss": 0.0202, | |
| "step": 947, | |
| "ts_encoder_learning_rate": 1.1488597454013539e-07 | |
| }, | |
| { | |
| "epoch": 2.4955927176867947, | |
| "grad_norm": 0.7347992893208377, | |
| "learning_rate": 1.1488597454013539e-07, | |
| "loss": 0.0121, | |
| "step": 948, | |
| "ts_encoder_learning_rate": 1.1149484324124326e-07 | |
| }, | |
| { | |
| "epoch": 2.4982288491638522, | |
| "grad_norm": 0.5208636481653479, | |
| "learning_rate": 1.1149484324124326e-07, | |
| "loss": 0.0184, | |
| "step": 949, | |
| "ts_encoder_learning_rate": 1.0815394889642339e-07 | |
| }, | |
| { | |
| "epoch": 2.5008649806409093, | |
| "grad_norm": 0.6580687022583558, | |
| "learning_rate": 1.0815394889642339e-07, | |
| "loss": 0.0253, | |
| "step": 950, | |
| "ts_encoder_learning_rate": 1.0486332583853565e-07 | |
| }, | |
| { | |
| "epoch": 2.503501112117967, | |
| "grad_norm": 0.7791631897968705, | |
| "learning_rate": 1.0486332583853565e-07, | |
| "loss": 0.0177, | |
| "step": 951, | |
| "ts_encoder_learning_rate": 1.0162300788382263e-07 | |
| }, | |
| { | |
| "epoch": 2.5061372435950244, | |
| "grad_norm": 0.7718676787617951, | |
| "learning_rate": 1.0162300788382263e-07, | |
| "loss": 0.0242, | |
| "step": 952, | |
| "ts_encoder_learning_rate": 9.843302833156377e-08 | |
| }, | |
| { | |
| "epoch": 2.5087733750720815, | |
| "grad_norm": 0.9526114922481819, | |
| "learning_rate": 9.843302833156377e-08, | |
| "loss": 0.018, | |
| "step": 953, | |
| "ts_encoder_learning_rate": 9.529341996373675e-08 | |
| }, | |
| { | |
| "epoch": 2.511409506549139, | |
| "grad_norm": 0.6723748361084942, | |
| "learning_rate": 9.529341996373675e-08, | |
| "loss": 0.0142, | |
| "step": 954, | |
| "ts_encoder_learning_rate": 9.22042150446728e-08 | |
| }, | |
| { | |
| "epoch": 2.5140456380261966, | |
| "grad_norm": 0.640693460278807, | |
| "learning_rate": 9.22042150446728e-08, | |
| "loss": 0.0248, | |
| "step": 955, | |
| "ts_encoder_learning_rate": 8.916544532073413e-08 | |
| }, | |
| { | |
| "epoch": 2.516681769503254, | |
| "grad_norm": 0.7438871182485605, | |
| "learning_rate": 8.916544532073413e-08, | |
| "loss": 0.0176, | |
| "step": 956, | |
| "ts_encoder_learning_rate": 8.617714201998084e-08 | |
| }, | |
| { | |
| "epoch": 2.5193179009803113, | |
| "grad_norm": 0.6536893914893551, | |
| "learning_rate": 8.617714201998084e-08, | |
| "loss": 0.0197, | |
| "step": 957, | |
| "ts_encoder_learning_rate": 8.323933585185184e-08 | |
| }, | |
| { | |
| "epoch": 2.521954032457369, | |
| "grad_norm": 0.7966005611731805, | |
| "learning_rate": 8.323933585185184e-08, | |
| "loss": 0.0202, | |
| "step": 958, | |
| "ts_encoder_learning_rate": 8.035205700685167e-08 | |
| }, | |
| { | |
| "epoch": 2.5245901639344264, | |
| "grad_norm": 0.7751864100873821, | |
| "learning_rate": 8.035205700685167e-08, | |
| "loss": 0.0245, | |
| "step": 959, | |
| "ts_encoder_learning_rate": 7.7515335156238e-08 | |
| }, | |
| { | |
| "epoch": 2.5272262954114835, | |
| "grad_norm": 0.6397557109288652, | |
| "learning_rate": 7.7515335156238e-08, | |
| "loss": 0.0138, | |
| "step": 960, | |
| "ts_encoder_learning_rate": 7.47291994517163e-08 | |
| }, | |
| { | |
| "epoch": 2.529862426888541, | |
| "grad_norm": 0.9115949923033936, | |
| "learning_rate": 7.47291994517163e-08, | |
| "loss": 0.0278, | |
| "step": 961, | |
| "ts_encoder_learning_rate": 7.199367852514239e-08 | |
| }, | |
| { | |
| "epoch": 2.5324985583655986, | |
| "grad_norm": 0.6977436866064831, | |
| "learning_rate": 7.199367852514239e-08, | |
| "loss": 0.0169, | |
| "step": 962, | |
| "ts_encoder_learning_rate": 6.930880048822531e-08 | |
| }, | |
| { | |
| "epoch": 2.535134689842656, | |
| "grad_norm": 0.8119048152627732, | |
| "learning_rate": 6.930880048822531e-08, | |
| "loss": 0.0222, | |
| "step": 963, | |
| "ts_encoder_learning_rate": 6.667459293224155e-08 | |
| }, | |
| { | |
| "epoch": 2.537770821319713, | |
| "grad_norm": 0.6593343852854229, | |
| "learning_rate": 6.667459293224155e-08, | |
| "loss": 0.0226, | |
| "step": 964, | |
| "ts_encoder_learning_rate": 6.409108292774912e-08 | |
| }, | |
| { | |
| "epoch": 2.5404069527967708, | |
| "grad_norm": 0.7024263781864509, | |
| "learning_rate": 6.409108292774912e-08, | |
| "loss": 0.0175, | |
| "step": 965, | |
| "ts_encoder_learning_rate": 6.15582970243117e-08 | |
| }, | |
| { | |
| "epoch": 2.5430430842738283, | |
| "grad_norm": 0.5434330332434761, | |
| "learning_rate": 6.15582970243117e-08, | |
| "loss": 0.0229, | |
| "step": 966, | |
| "ts_encoder_learning_rate": 5.907626125022159e-08 | |
| }, | |
| { | |
| "epoch": 2.5456792157508854, | |
| "grad_norm": 0.9794479292209439, | |
| "learning_rate": 5.907626125022159e-08, | |
| "loss": 0.0244, | |
| "step": 967, | |
| "ts_encoder_learning_rate": 5.6645001112237694e-08 | |
| }, | |
| { | |
| "epoch": 2.548315347227943, | |
| "grad_norm": 0.9128017914715376, | |
| "learning_rate": 5.6645001112237694e-08, | |
| "loss": 0.0223, | |
| "step": 968, | |
| "ts_encoder_learning_rate": 5.426454159531913e-08 | |
| }, | |
| { | |
| "epoch": 2.5509514787050005, | |
| "grad_norm": 0.8322790378300886, | |
| "learning_rate": 5.426454159531913e-08, | |
| "loss": 0.0167, | |
| "step": 969, | |
| "ts_encoder_learning_rate": 5.1934907162370374e-08 | |
| }, | |
| { | |
| "epoch": 2.553587610182058, | |
| "grad_norm": 0.5828552348921294, | |
| "learning_rate": 5.1934907162370374e-08, | |
| "loss": 0.0233, | |
| "step": 970, | |
| "ts_encoder_learning_rate": 4.9656121753990924e-08 | |
| }, | |
| { | |
| "epoch": 2.556223741659115, | |
| "grad_norm": 0.6297264426484448, | |
| "learning_rate": 4.9656121753990924e-08, | |
| "loss": 0.0248, | |
| "step": 971, | |
| "ts_encoder_learning_rate": 4.742820878822496e-08 | |
| }, | |
| { | |
| "epoch": 2.5588598731361727, | |
| "grad_norm": 0.6449652253099856, | |
| "learning_rate": 4.742820878822496e-08, | |
| "loss": 0.018, | |
| "step": 972, | |
| "ts_encoder_learning_rate": 4.52511911603265e-08 | |
| }, | |
| { | |
| "epoch": 2.5614960046132302, | |
| "grad_norm": 0.8662936136802849, | |
| "learning_rate": 4.52511911603265e-08, | |
| "loss": 0.0207, | |
| "step": 973, | |
| "ts_encoder_learning_rate": 4.312509124251907e-08 | |
| }, | |
| { | |
| "epoch": 2.5641321360902873, | |
| "grad_norm": 0.8396191031005396, | |
| "learning_rate": 4.312509124251907e-08, | |
| "loss": 0.0229, | |
| "step": 974, | |
| "ts_encoder_learning_rate": 4.104993088376974e-08 | |
| }, | |
| { | |
| "epoch": 2.566768267567345, | |
| "grad_norm": 0.7510456306691026, | |
| "learning_rate": 4.104993088376974e-08, | |
| "loss": 0.0153, | |
| "step": 975, | |
| "ts_encoder_learning_rate": 3.902573140956101e-08 | |
| }, | |
| { | |
| "epoch": 2.5694043990444024, | |
| "grad_norm": 0.5430731666125107, | |
| "learning_rate": 3.902573140956101e-08, | |
| "loss": 0.0222, | |
| "step": 976, | |
| "ts_encoder_learning_rate": 3.705251362167484e-08 | |
| }, | |
| { | |
| "epoch": 2.57204053052146, | |
| "grad_norm": 0.750719791150226, | |
| "learning_rate": 3.705251362167484e-08, | |
| "loss": 0.0133, | |
| "step": 977, | |
| "ts_encoder_learning_rate": 3.513029779797783e-08 | |
| }, | |
| { | |
| "epoch": 2.574676661998517, | |
| "grad_norm": 0.6890160305022875, | |
| "learning_rate": 3.513029779797783e-08, | |
| "loss": 0.0226, | |
| "step": 978, | |
| "ts_encoder_learning_rate": 3.325910369220975e-08 | |
| }, | |
| { | |
| "epoch": 2.5773127934755746, | |
| "grad_norm": 0.640217064006493, | |
| "learning_rate": 3.325910369220975e-08, | |
| "loss": 0.0217, | |
| "step": 979, | |
| "ts_encoder_learning_rate": 3.143895053378698e-08 | |
| }, | |
| { | |
| "epoch": 2.579948924952632, | |
| "grad_norm": 0.650608671275551, | |
| "learning_rate": 3.143895053378698e-08, | |
| "loss": 0.021, | |
| "step": 980, | |
| "ts_encoder_learning_rate": 2.966985702759828e-08 | |
| }, | |
| { | |
| "epoch": 2.5825850564296893, | |
| "grad_norm": 1.4308555266835152, | |
| "learning_rate": 2.966985702759828e-08, | |
| "loss": 0.0281, | |
| "step": 981, | |
| "ts_encoder_learning_rate": 2.7951841353817676e-08 | |
| }, | |
| { | |
| "epoch": 2.585221187906747, | |
| "grad_norm": 1.0043277890456705, | |
| "learning_rate": 2.7951841353817676e-08, | |
| "loss": 0.0205, | |
| "step": 982, | |
| "ts_encoder_learning_rate": 2.6284921167712975e-08 | |
| }, | |
| { | |
| "epoch": 2.5878573193838044, | |
| "grad_norm": 0.7699218451759371, | |
| "learning_rate": 2.6284921167712975e-08, | |
| "loss": 0.0201, | |
| "step": 983, | |
| "ts_encoder_learning_rate": 2.4669113599469774e-08 | |
| }, | |
| { | |
| "epoch": 2.590493450860862, | |
| "grad_norm": 0.6897052459896869, | |
| "learning_rate": 2.4669113599469774e-08, | |
| "loss": 0.0242, | |
| "step": 984, | |
| "ts_encoder_learning_rate": 2.3104435254008852e-08 | |
| }, | |
| { | |
| "epoch": 2.593129582337919, | |
| "grad_norm": 0.7228194509828196, | |
| "learning_rate": 2.3104435254008852e-08, | |
| "loss": 0.0285, | |
| "step": 985, | |
| "ts_encoder_learning_rate": 2.159090221082294e-08 | |
| }, | |
| { | |
| "epoch": 2.5957657138149766, | |
| "grad_norm": 0.8174809900239196, | |
| "learning_rate": 2.159090221082294e-08, | |
| "loss": 0.0167, | |
| "step": 986, | |
| "ts_encoder_learning_rate": 2.012853002380466e-08 | |
| }, | |
| { | |
| "epoch": 2.5984018452920337, | |
| "grad_norm": 0.7653138858818684, | |
| "learning_rate": 2.012853002380466e-08, | |
| "loss": 0.0125, | |
| "step": 987, | |
| "ts_encoder_learning_rate": 1.8717333721091634e-08 | |
| }, | |
| { | |
| "epoch": 2.601037976769091, | |
| "grad_norm": 0.5150577808719591, | |
| "learning_rate": 1.8717333721091634e-08, | |
| "loss": 0.0201, | |
| "step": 988, | |
| "ts_encoder_learning_rate": 1.735732780490884e-08 | |
| }, | |
| { | |
| "epoch": 2.6036741082461488, | |
| "grad_norm": 0.7752673495066984, | |
| "learning_rate": 1.735732780490884e-08, | |
| "loss": 0.0122, | |
| "step": 989, | |
| "ts_encoder_learning_rate": 1.6048526251421502e-08 | |
| }, | |
| { | |
| "epoch": 2.6063102397232063, | |
| "grad_norm": 0.6444134054280553, | |
| "learning_rate": 1.6048526251421502e-08, | |
| "loss": 0.0202, | |
| "step": 990, | |
| "ts_encoder_learning_rate": 1.4790942510590767e-08 | |
| }, | |
| { | |
| "epoch": 2.608946371200264, | |
| "grad_norm": 0.6604248178517298, | |
| "learning_rate": 1.4790942510590767e-08, | |
| "loss": 0.0234, | |
| "step": 991, | |
| "ts_encoder_learning_rate": 1.3584589506034362e-08 | |
| }, | |
| { | |
| "epoch": 2.611582502677321, | |
| "grad_norm": 0.8166087014626134, | |
| "learning_rate": 1.3584589506034362e-08, | |
| "loss": 0.0212, | |
| "step": 992, | |
| "ts_encoder_learning_rate": 1.2429479634897268e-08 | |
| }, | |
| { | |
| "epoch": 2.6142186341543785, | |
| "grad_norm": 0.5979082769485419, | |
| "learning_rate": 1.2429479634897268e-08, | |
| "loss": 0.018, | |
| "step": 993, | |
| "ts_encoder_learning_rate": 1.132562476771959e-08 | |
| }, | |
| { | |
| "epoch": 2.6168547656314356, | |
| "grad_norm": 0.6136562468042444, | |
| "learning_rate": 1.132562476771959e-08, | |
| "loss": 0.026, | |
| "step": 994, | |
| "ts_encoder_learning_rate": 1.0273036248318325e-08 | |
| }, | |
| { | |
| "epoch": 2.619490897108493, | |
| "grad_norm": 0.738729241027843, | |
| "learning_rate": 1.0273036248318325e-08, | |
| "loss": 0.0182, | |
| "step": 995, | |
| "ts_encoder_learning_rate": 9.27172489366912e-09 | |
| }, | |
| { | |
| "epoch": 2.6221270285855507, | |
| "grad_norm": 0.6076766225952003, | |
| "learning_rate": 9.27172489366912e-09, | |
| "loss": 0.017, | |
| "step": 996, | |
| "ts_encoder_learning_rate": 8.321700993795812e-09 | |
| }, | |
| { | |
| "epoch": 2.6247631600626082, | |
| "grad_norm": 0.8828040407540239, | |
| "learning_rate": 8.321700993795812e-09, | |
| "loss": 0.0183, | |
| "step": 997, | |
| "ts_encoder_learning_rate": 7.422974311662723e-09 | |
| }, | |
| { | |
| "epoch": 2.627399291539666, | |
| "grad_norm": 0.6622087750202462, | |
| "learning_rate": 7.422974311662723e-09, | |
| "loss": 0.0174, | |
| "step": 998, | |
| "ts_encoder_learning_rate": 6.575554083078084e-09 | |
| }, | |
| { | |
| "epoch": 2.630035423016723, | |
| "grad_norm": 0.76863578860787, | |
| "learning_rate": 6.575554083078084e-09, | |
| "loss": 0.0191, | |
| "step": 999, | |
| "ts_encoder_learning_rate": 5.779449016595773e-09 | |
| }, | |
| { | |
| "epoch": 2.6326715544937804, | |
| "grad_norm": 0.633605481547031, | |
| "learning_rate": 5.779449016595773e-09, | |
| "loss": 0.0188, | |
| "step": 1000, | |
| "ts_encoder_learning_rate": 5.034667293427053e-09 | |
| }, | |
| { | |
| "epoch": 2.6326715544937804, | |
| "step": 1000, | |
| "total_flos": 869424341942272.0, | |
| "train_loss": 0.32716700187977404, | |
| "train_runtime": 47494.4505, | |
| "train_samples_per_second": 10.78, | |
| "train_steps_per_second": 0.021, | |
| "ts_encoder_learning_rate": 5.034667293427053e-09 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 869424341942272.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |