| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.6039927404718695, | |
| "eval_steps": 500, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01161524500907441, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 4.6825, | |
| "step": 1, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.02323049001814882, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 4.6428, | |
| "step": 2, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.03484573502722323, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 4.6696, | |
| "step": 3, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.04646098003629764, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 4.6588, | |
| "step": 4, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.05807622504537205, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 4.7245, | |
| "step": 5, | |
| "ts_encoder_learning_rate": 0.0 | |
| }, | |
| { | |
| "epoch": 0.06969147005444647, | |
| "grad_norm": 152.50412651338738, | |
| "learning_rate": 0.0, | |
| "loss": 4.6365, | |
| "step": 6, | |
| "ts_encoder_learning_rate": 1.25e-06 | |
| }, | |
| { | |
| "epoch": 0.08130671506352087, | |
| "grad_norm": 153.96358887797103, | |
| "learning_rate": 1.25e-06, | |
| "loss": 4.6399, | |
| "step": 7, | |
| "ts_encoder_learning_rate": 2.5e-06 | |
| }, | |
| { | |
| "epoch": 0.09292196007259527, | |
| "grad_norm": 119.30991989360129, | |
| "learning_rate": 2.5e-06, | |
| "loss": 4.0109, | |
| "step": 8, | |
| "ts_encoder_learning_rate": 3.7500000000000005e-06 | |
| }, | |
| { | |
| "epoch": 0.10453720508166969, | |
| "grad_norm": 62.980179337580374, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 2.8417, | |
| "step": 9, | |
| "ts_encoder_learning_rate": 5e-06 | |
| }, | |
| { | |
| "epoch": 0.1161524500907441, | |
| "grad_norm": 33.513562064012554, | |
| "learning_rate": 5e-06, | |
| "loss": 2.1341, | |
| "step": 10, | |
| "ts_encoder_learning_rate": 6.25e-06 | |
| }, | |
| { | |
| "epoch": 0.1277676950998185, | |
| "grad_norm": 14.313077235644876, | |
| "learning_rate": 6.25e-06, | |
| "loss": 1.7057, | |
| "step": 11, | |
| "ts_encoder_learning_rate": 7.500000000000001e-06 | |
| }, | |
| { | |
| "epoch": 0.13938294010889293, | |
| "grad_norm": 6.369652791543636, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.3565, | |
| "step": 12, | |
| "ts_encoder_learning_rate": 8.750000000000001e-06 | |
| }, | |
| { | |
| "epoch": 0.15099818511796734, | |
| "grad_norm": 3.731701365388006, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 1.1662, | |
| "step": 13, | |
| "ts_encoder_learning_rate": 1e-05 | |
| }, | |
| { | |
| "epoch": 0.16261343012704174, | |
| "grad_norm": 2.6361585613163587, | |
| "learning_rate": 1e-05, | |
| "loss": 1.065, | |
| "step": 14, | |
| "ts_encoder_learning_rate": 9.999839429671632e-06 | |
| }, | |
| { | |
| "epoch": 0.17422867513611615, | |
| "grad_norm": 1.7275515554655163, | |
| "learning_rate": 9.999839429671632e-06, | |
| "loss": 1.0108, | |
| "step": 15, | |
| "ts_encoder_learning_rate": 9.999357728999657e-06 | |
| }, | |
| { | |
| "epoch": 0.18584392014519055, | |
| "grad_norm": 1.3395365357042155, | |
| "learning_rate": 9.999357728999657e-06, | |
| "loss": 0.9446, | |
| "step": 16, | |
| "ts_encoder_learning_rate": 9.99855492892281e-06 | |
| }, | |
| { | |
| "epoch": 0.19745916515426498, | |
| "grad_norm": 1.2724446763819477, | |
| "learning_rate": 9.99855492892281e-06, | |
| "loss": 0.8875, | |
| "step": 17, | |
| "ts_encoder_learning_rate": 9.99743108100344e-06 | |
| }, | |
| { | |
| "epoch": 0.20907441016333939, | |
| "grad_norm": 1.1656594933692108, | |
| "learning_rate": 9.99743108100344e-06, | |
| "loss": 0.8609, | |
| "step": 18, | |
| "ts_encoder_learning_rate": 9.9959862574242e-06 | |
| }, | |
| { | |
| "epoch": 0.2206896551724138, | |
| "grad_norm": 0.8157300015925317, | |
| "learning_rate": 9.9959862574242e-06, | |
| "loss": 0.8392, | |
| "step": 19, | |
| "ts_encoder_learning_rate": 9.994220550983404e-06 | |
| }, | |
| { | |
| "epoch": 0.2323049001814882, | |
| "grad_norm": 0.8647381094727201, | |
| "learning_rate": 9.994220550983404e-06, | |
| "loss": 0.8233, | |
| "step": 20, | |
| "ts_encoder_learning_rate": 9.992134075089085e-06 | |
| }, | |
| { | |
| "epoch": 0.24392014519056263, | |
| "grad_norm": 0.7179729634940382, | |
| "learning_rate": 9.992134075089085e-06, | |
| "loss": 0.7767, | |
| "step": 21, | |
| "ts_encoder_learning_rate": 9.989726963751683e-06 | |
| }, | |
| { | |
| "epoch": 0.255535390199637, | |
| "grad_norm": 0.5610758217332802, | |
| "learning_rate": 9.989726963751683e-06, | |
| "loss": 0.7525, | |
| "step": 22, | |
| "ts_encoder_learning_rate": 9.986999371575465e-06 | |
| }, | |
| { | |
| "epoch": 0.2671506352087114, | |
| "grad_norm": 0.6140193378709962, | |
| "learning_rate": 9.986999371575465e-06, | |
| "loss": 0.7503, | |
| "step": 23, | |
| "ts_encoder_learning_rate": 9.983951473748579e-06 | |
| }, | |
| { | |
| "epoch": 0.27876588021778587, | |
| "grad_norm": 0.5463424314841056, | |
| "learning_rate": 9.983951473748579e-06, | |
| "loss": 0.7361, | |
| "step": 24, | |
| "ts_encoder_learning_rate": 9.980583466031808e-06 | |
| }, | |
| { | |
| "epoch": 0.29038112522686027, | |
| "grad_norm": 0.5456053434098116, | |
| "learning_rate": 9.980583466031808e-06, | |
| "loss": 0.7445, | |
| "step": 25, | |
| "ts_encoder_learning_rate": 9.976895564745993e-06 | |
| }, | |
| { | |
| "epoch": 0.3019963702359347, | |
| "grad_norm": 0.4724488207315688, | |
| "learning_rate": 9.976895564745993e-06, | |
| "loss": 0.7258, | |
| "step": 26, | |
| "ts_encoder_learning_rate": 9.97288800675814e-06 | |
| }, | |
| { | |
| "epoch": 0.3136116152450091, | |
| "grad_norm": 0.43473472160091314, | |
| "learning_rate": 9.97288800675814e-06, | |
| "loss": 0.7131, | |
| "step": 27, | |
| "ts_encoder_learning_rate": 9.968561049466214e-06 | |
| }, | |
| { | |
| "epoch": 0.3252268602540835, | |
| "grad_norm": 0.4651449922676477, | |
| "learning_rate": 9.968561049466214e-06, | |
| "loss": 0.7125, | |
| "step": 28, | |
| "ts_encoder_learning_rate": 9.963914970782594e-06 | |
| }, | |
| { | |
| "epoch": 0.3368421052631579, | |
| "grad_norm": 0.42304487514126465, | |
| "learning_rate": 9.963914970782594e-06, | |
| "loss": 0.6958, | |
| "step": 29, | |
| "ts_encoder_learning_rate": 9.95895006911623e-06 | |
| }, | |
| { | |
| "epoch": 0.3484573502722323, | |
| "grad_norm": 0.37512478085470513, | |
| "learning_rate": 9.95895006911623e-06, | |
| "loss": 0.6815, | |
| "step": 30, | |
| "ts_encoder_learning_rate": 9.953666663353485e-06 | |
| }, | |
| { | |
| "epoch": 0.3600725952813067, | |
| "grad_norm": 0.38681860428776565, | |
| "learning_rate": 9.953666663353485e-06, | |
| "loss": 0.682, | |
| "step": 31, | |
| "ts_encoder_learning_rate": 9.948065092837631e-06 | |
| }, | |
| { | |
| "epoch": 0.3716878402903811, | |
| "grad_norm": 0.3878313981786008, | |
| "learning_rate": 9.948065092837631e-06, | |
| "loss": 0.6705, | |
| "step": 32, | |
| "ts_encoder_learning_rate": 9.942145717347077e-06 | |
| }, | |
| { | |
| "epoch": 0.38330308529945556, | |
| "grad_norm": 0.3483493230771332, | |
| "learning_rate": 9.942145717347077e-06, | |
| "loss": 0.6517, | |
| "step": 33, | |
| "ts_encoder_learning_rate": 9.935908917072253e-06 | |
| }, | |
| { | |
| "epoch": 0.39491833030852996, | |
| "grad_norm": 0.32925851151692315, | |
| "learning_rate": 9.935908917072253e-06, | |
| "loss": 0.6611, | |
| "step": 34, | |
| "ts_encoder_learning_rate": 9.92935509259118e-06 | |
| }, | |
| { | |
| "epoch": 0.40653357531760437, | |
| "grad_norm": 0.3655692991223213, | |
| "learning_rate": 9.92935509259118e-06, | |
| "loss": 0.6449, | |
| "step": 35, | |
| "ts_encoder_learning_rate": 9.922484664843763e-06 | |
| }, | |
| { | |
| "epoch": 0.41814882032667877, | |
| "grad_norm": 0.3582656975961708, | |
| "learning_rate": 9.922484664843763e-06, | |
| "loss": 0.6522, | |
| "step": 36, | |
| "ts_encoder_learning_rate": 9.915298075104735e-06 | |
| }, | |
| { | |
| "epoch": 0.4297640653357532, | |
| "grad_norm": 0.33379075182945417, | |
| "learning_rate": 9.915298075104735e-06, | |
| "loss": 0.6416, | |
| "step": 37, | |
| "ts_encoder_learning_rate": 9.907795784955327e-06 | |
| }, | |
| { | |
| "epoch": 0.4413793103448276, | |
| "grad_norm": 0.3347132110396014, | |
| "learning_rate": 9.907795784955327e-06, | |
| "loss": 0.6539, | |
| "step": 38, | |
| "ts_encoder_learning_rate": 9.899978276253617e-06 | |
| }, | |
| { | |
| "epoch": 0.452994555353902, | |
| "grad_norm": 0.3241158793623529, | |
| "learning_rate": 9.899978276253617e-06, | |
| "loss": 0.6438, | |
| "step": 39, | |
| "ts_encoder_learning_rate": 9.891846051103578e-06 | |
| }, | |
| { | |
| "epoch": 0.4646098003629764, | |
| "grad_norm": 0.3455452567899899, | |
| "learning_rate": 9.891846051103578e-06, | |
| "loss": 0.6316, | |
| "step": 40, | |
| "ts_encoder_learning_rate": 9.883399631822836e-06 | |
| }, | |
| { | |
| "epoch": 0.4762250453720508, | |
| "grad_norm": 0.31313557573101863, | |
| "learning_rate": 9.883399631822836e-06, | |
| "loss": 0.6389, | |
| "step": 41, | |
| "ts_encoder_learning_rate": 9.874639560909118e-06 | |
| }, | |
| { | |
| "epoch": 0.48784029038112525, | |
| "grad_norm": 0.3280416621294979, | |
| "learning_rate": 9.874639560909118e-06, | |
| "loss": 0.6285, | |
| "step": 42, | |
| "ts_encoder_learning_rate": 9.86556640100541e-06 | |
| }, | |
| { | |
| "epoch": 0.49945553539019966, | |
| "grad_norm": 0.3198451666750831, | |
| "learning_rate": 9.86556640100541e-06, | |
| "loss": 0.6356, | |
| "step": 43, | |
| "ts_encoder_learning_rate": 9.85618073486382e-06 | |
| }, | |
| { | |
| "epoch": 0.511070780399274, | |
| "grad_norm": 0.32065369243985437, | |
| "learning_rate": 9.85618073486382e-06, | |
| "loss": 0.6301, | |
| "step": 44, | |
| "ts_encoder_learning_rate": 9.846483165308142e-06 | |
| }, | |
| { | |
| "epoch": 0.5226860254083484, | |
| "grad_norm": 0.2985686533559952, | |
| "learning_rate": 9.846483165308142e-06, | |
| "loss": 0.6094, | |
| "step": 45, | |
| "ts_encoder_learning_rate": 9.836474315195148e-06 | |
| }, | |
| { | |
| "epoch": 0.5343012704174228, | |
| "grad_norm": 0.3043913719441071, | |
| "learning_rate": 9.836474315195148e-06, | |
| "loss": 0.618, | |
| "step": 46, | |
| "ts_encoder_learning_rate": 9.826154827374578e-06 | |
| }, | |
| { | |
| "epoch": 0.5459165154264973, | |
| "grad_norm": 0.29426029916433744, | |
| "learning_rate": 9.826154827374578e-06, | |
| "loss": 0.6117, | |
| "step": 47, | |
| "ts_encoder_learning_rate": 9.815525364647853e-06 | |
| }, | |
| { | |
| "epoch": 0.5575317604355717, | |
| "grad_norm": 0.29759373582076726, | |
| "learning_rate": 9.815525364647853e-06, | |
| "loss": 0.6102, | |
| "step": 48, | |
| "ts_encoder_learning_rate": 9.804586609725499e-06 | |
| }, | |
| { | |
| "epoch": 0.5691470054446461, | |
| "grad_norm": 0.2991170372194726, | |
| "learning_rate": 9.804586609725499e-06, | |
| "loss": 0.5973, | |
| "step": 49, | |
| "ts_encoder_learning_rate": 9.793339265183303e-06 | |
| }, | |
| { | |
| "epoch": 0.5807622504537205, | |
| "grad_norm": 0.297629927322108, | |
| "learning_rate": 9.793339265183303e-06, | |
| "loss": 0.5997, | |
| "step": 50, | |
| "ts_encoder_learning_rate": 9.781784053417192e-06 | |
| }, | |
| { | |
| "epoch": 0.592377495462795, | |
| "grad_norm": 0.29559157031475897, | |
| "learning_rate": 9.781784053417192e-06, | |
| "loss": 0.6012, | |
| "step": 51, | |
| "ts_encoder_learning_rate": 9.76992171659682e-06 | |
| }, | |
| { | |
| "epoch": 0.6039927404718693, | |
| "grad_norm": 0.30135176793549534, | |
| "learning_rate": 9.76992171659682e-06, | |
| "loss": 0.5997, | |
| "step": 52, | |
| "ts_encoder_learning_rate": 9.757753016617917e-06 | |
| }, | |
| { | |
| "epoch": 0.6156079854809438, | |
| "grad_norm": 0.2830877744764034, | |
| "learning_rate": 9.757753016617917e-06, | |
| "loss": 0.593, | |
| "step": 53, | |
| "ts_encoder_learning_rate": 9.745278735053345e-06 | |
| }, | |
| { | |
| "epoch": 0.6272232304900182, | |
| "grad_norm": 0.30643259357036984, | |
| "learning_rate": 9.745278735053345e-06, | |
| "loss": 0.5861, | |
| "step": 54, | |
| "ts_encoder_learning_rate": 9.732499673102895e-06 | |
| }, | |
| { | |
| "epoch": 0.6388384754990926, | |
| "grad_norm": 0.33677739208267987, | |
| "learning_rate": 9.732499673102895e-06, | |
| "loss": 0.579, | |
| "step": 55, | |
| "ts_encoder_learning_rate": 9.719416651541839e-06 | |
| }, | |
| { | |
| "epoch": 0.650453720508167, | |
| "grad_norm": 0.3205752816564006, | |
| "learning_rate": 9.719416651541839e-06, | |
| "loss": 0.589, | |
| "step": 56, | |
| "ts_encoder_learning_rate": 9.706030510668202e-06 | |
| }, | |
| { | |
| "epoch": 0.6620689655172414, | |
| "grad_norm": 0.3136856624465887, | |
| "learning_rate": 9.706030510668202e-06, | |
| "loss": 0.5719, | |
| "step": 57, | |
| "ts_encoder_learning_rate": 9.692342110248802e-06 | |
| }, | |
| { | |
| "epoch": 0.6736842105263158, | |
| "grad_norm": 0.3538385796052594, | |
| "learning_rate": 9.692342110248802e-06, | |
| "loss": 0.5836, | |
| "step": 58, | |
| "ts_encoder_learning_rate": 9.678352329464018e-06 | |
| }, | |
| { | |
| "epoch": 0.6852994555353902, | |
| "grad_norm": 0.3412039613534201, | |
| "learning_rate": 9.678352329464018e-06, | |
| "loss": 0.5776, | |
| "step": 59, | |
| "ts_encoder_learning_rate": 9.664062066851325e-06 | |
| }, | |
| { | |
| "epoch": 0.6969147005444646, | |
| "grad_norm": 0.3470773794842348, | |
| "learning_rate": 9.664062066851325e-06, | |
| "loss": 0.5851, | |
| "step": 60, | |
| "ts_encoder_learning_rate": 9.649472240247588e-06 | |
| }, | |
| { | |
| "epoch": 0.708529945553539, | |
| "grad_norm": 0.3212528977463762, | |
| "learning_rate": 9.649472240247588e-06, | |
| "loss": 0.5739, | |
| "step": 61, | |
| "ts_encoder_learning_rate": 9.63458378673011e-06 | |
| }, | |
| { | |
| "epoch": 0.7201451905626134, | |
| "grad_norm": 0.3628948373052999, | |
| "learning_rate": 9.63458378673011e-06, | |
| "loss": 0.5667, | |
| "step": 62, | |
| "ts_encoder_learning_rate": 9.619397662556434e-06 | |
| }, | |
| { | |
| "epoch": 0.7317604355716878, | |
| "grad_norm": 0.35120878235548797, | |
| "learning_rate": 9.619397662556434e-06, | |
| "loss": 0.5746, | |
| "step": 63, | |
| "ts_encoder_learning_rate": 9.603914843102941e-06 | |
| }, | |
| { | |
| "epoch": 0.7433756805807622, | |
| "grad_norm": 0.34896054004358656, | |
| "learning_rate": 9.603914843102941e-06, | |
| "loss": 0.5683, | |
| "step": 64, | |
| "ts_encoder_learning_rate": 9.588136322802194e-06 | |
| }, | |
| { | |
| "epoch": 0.7549909255898367, | |
| "grad_norm": 0.34262893904308916, | |
| "learning_rate": 9.588136322802194e-06, | |
| "loss": 0.5576, | |
| "step": 65, | |
| "ts_encoder_learning_rate": 9.572063115079063e-06 | |
| }, | |
| { | |
| "epoch": 0.7666061705989111, | |
| "grad_norm": 0.3380023238243971, | |
| "learning_rate": 9.572063115079063e-06, | |
| "loss": 0.553, | |
| "step": 66, | |
| "ts_encoder_learning_rate": 9.555696252285648e-06 | |
| }, | |
| { | |
| "epoch": 0.7782214156079855, | |
| "grad_norm": 0.3875484812249266, | |
| "learning_rate": 9.555696252285648e-06, | |
| "loss": 0.5596, | |
| "step": 67, | |
| "ts_encoder_learning_rate": 9.539036785634961e-06 | |
| }, | |
| { | |
| "epoch": 0.7898366606170599, | |
| "grad_norm": 0.35930423509307985, | |
| "learning_rate": 9.539036785634961e-06, | |
| "loss": 0.5545, | |
| "step": 68, | |
| "ts_encoder_learning_rate": 9.522085785133415e-06 | |
| }, | |
| { | |
| "epoch": 0.8014519056261343, | |
| "grad_norm": 0.3791388166119568, | |
| "learning_rate": 9.522085785133415e-06, | |
| "loss": 0.5513, | |
| "step": 69, | |
| "ts_encoder_learning_rate": 9.504844339512096e-06 | |
| }, | |
| { | |
| "epoch": 0.8130671506352087, | |
| "grad_norm": 0.34685014988411594, | |
| "learning_rate": 9.504844339512096e-06, | |
| "loss": 0.5516, | |
| "step": 70, | |
| "ts_encoder_learning_rate": 9.48731355615684e-06 | |
| }, | |
| { | |
| "epoch": 0.8246823956442831, | |
| "grad_norm": 0.35909523874401894, | |
| "learning_rate": 9.48731355615684e-06, | |
| "loss": 0.5425, | |
| "step": 71, | |
| "ts_encoder_learning_rate": 9.469494561037097e-06 | |
| }, | |
| { | |
| "epoch": 0.8362976406533575, | |
| "grad_norm": 0.4403897777700719, | |
| "learning_rate": 9.469494561037097e-06, | |
| "loss": 0.5329, | |
| "step": 72, | |
| "ts_encoder_learning_rate": 9.451388498633635e-06 | |
| }, | |
| { | |
| "epoch": 0.847912885662432, | |
| "grad_norm": 0.3818885699775511, | |
| "learning_rate": 9.451388498633635e-06, | |
| "loss": 0.5365, | |
| "step": 73, | |
| "ts_encoder_learning_rate": 9.432996531865001e-06 | |
| }, | |
| { | |
| "epoch": 0.8595281306715064, | |
| "grad_norm": 0.40435312969694975, | |
| "learning_rate": 9.432996531865001e-06, | |
| "loss": 0.5315, | |
| "step": 74, | |
| "ts_encoder_learning_rate": 9.414319842012855e-06 | |
| }, | |
| { | |
| "epoch": 0.8711433756805808, | |
| "grad_norm": 0.4248553104454013, | |
| "learning_rate": 9.414319842012855e-06, | |
| "loss": 0.533, | |
| "step": 75, | |
| "ts_encoder_learning_rate": 9.395359628646087e-06 | |
| }, | |
| { | |
| "epoch": 0.8827586206896552, | |
| "grad_norm": 0.39206791521135576, | |
| "learning_rate": 9.395359628646087e-06, | |
| "loss": 0.5256, | |
| "step": 76, | |
| "ts_encoder_learning_rate": 9.376117109543769e-06 | |
| }, | |
| { | |
| "epoch": 0.8943738656987296, | |
| "grad_norm": 0.36377005854333166, | |
| "learning_rate": 9.376117109543769e-06, | |
| "loss": 0.5194, | |
| "step": 77, | |
| "ts_encoder_learning_rate": 9.356593520616948e-06 | |
| }, | |
| { | |
| "epoch": 0.905989110707804, | |
| "grad_norm": 0.43146303271047914, | |
| "learning_rate": 9.356593520616948e-06, | |
| "loss": 0.5267, | |
| "step": 78, | |
| "ts_encoder_learning_rate": 9.336790115829255e-06 | |
| }, | |
| { | |
| "epoch": 0.9176043557168784, | |
| "grad_norm": 0.3621214209550119, | |
| "learning_rate": 9.336790115829255e-06, | |
| "loss": 0.5218, | |
| "step": 79, | |
| "ts_encoder_learning_rate": 9.316708167116377e-06 | |
| }, | |
| { | |
| "epoch": 0.9292196007259528, | |
| "grad_norm": 0.40377960504482446, | |
| "learning_rate": 9.316708167116377e-06, | |
| "loss": 0.5214, | |
| "step": 80, | |
| "ts_encoder_learning_rate": 9.296348964304351e-06 | |
| }, | |
| { | |
| "epoch": 0.9408348457350272, | |
| "grad_norm": 0.3806316238680835, | |
| "learning_rate": 9.296348964304351e-06, | |
| "loss": 0.5102, | |
| "step": 81, | |
| "ts_encoder_learning_rate": 9.275713815026732e-06 | |
| }, | |
| { | |
| "epoch": 0.9524500907441016, | |
| "grad_norm": 0.387322276165842, | |
| "learning_rate": 9.275713815026732e-06, | |
| "loss": 0.5069, | |
| "step": 82, | |
| "ts_encoder_learning_rate": 9.254804044640596e-06 | |
| }, | |
| { | |
| "epoch": 0.964065335753176, | |
| "grad_norm": 0.46164777708230237, | |
| "learning_rate": 9.254804044640596e-06, | |
| "loss": 0.4985, | |
| "step": 83, | |
| "ts_encoder_learning_rate": 9.233620996141421e-06 | |
| }, | |
| { | |
| "epoch": 0.9756805807622505, | |
| "grad_norm": 0.4208251582273127, | |
| "learning_rate": 9.233620996141421e-06, | |
| "loss": 0.5086, | |
| "step": 84, | |
| "ts_encoder_learning_rate": 9.212166030076832e-06 | |
| }, | |
| { | |
| "epoch": 0.9872958257713249, | |
| "grad_norm": 0.43078427861557256, | |
| "learning_rate": 9.212166030076832e-06, | |
| "loss": 0.525, | |
| "step": 85, | |
| "ts_encoder_learning_rate": 9.190440524459203e-06 | |
| }, | |
| { | |
| "epoch": 0.9989110707803993, | |
| "grad_norm": 0.47136114267604184, | |
| "learning_rate": 9.190440524459203e-06, | |
| "loss": 0.504, | |
| "step": 86, | |
| "ts_encoder_learning_rate": 9.168445874677168e-06 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.47136114267604184, | |
| "learning_rate": 9.168445874677168e-06, | |
| "loss": 0.0433, | |
| "step": 87, | |
| "ts_encoder_learning_rate": 9.146183493405976e-06 | |
| }, | |
| { | |
| "epoch": 1.0116152450090745, | |
| "grad_norm": 0.4459150481878699, | |
| "learning_rate": 9.146183493405976e-06, | |
| "loss": 0.4937, | |
| "step": 88, | |
| "ts_encoder_learning_rate": 9.12365481051678e-06 | |
| }, | |
| { | |
| "epoch": 1.0232304900181488, | |
| "grad_norm": 0.4648677558952652, | |
| "learning_rate": 9.12365481051678e-06, | |
| "loss": 0.4814, | |
| "step": 89, | |
| "ts_encoder_learning_rate": 9.10086127298478e-06 | |
| }, | |
| { | |
| "epoch": 1.0348457350272233, | |
| "grad_norm": 0.424833617058968, | |
| "learning_rate": 9.10086127298478e-06, | |
| "loss": 0.4872, | |
| "step": 90, | |
| "ts_encoder_learning_rate": 9.077804344796302e-06 | |
| }, | |
| { | |
| "epoch": 1.0464609800362976, | |
| "grad_norm": 0.5447833529096903, | |
| "learning_rate": 9.077804344796302e-06, | |
| "loss": 0.465, | |
| "step": 91, | |
| "ts_encoder_learning_rate": 9.054485506854756e-06 | |
| }, | |
| { | |
| "epoch": 1.0580762250453721, | |
| "grad_norm": 0.5645327960600057, | |
| "learning_rate": 9.054485506854756e-06, | |
| "loss": 0.465, | |
| "step": 92, | |
| "ts_encoder_learning_rate": 9.030906256885528e-06 | |
| }, | |
| { | |
| "epoch": 1.0696914700544464, | |
| "grad_norm": 0.5707423730743318, | |
| "learning_rate": 9.030906256885528e-06, | |
| "loss": 0.4664, | |
| "step": 93, | |
| "ts_encoder_learning_rate": 9.007068109339783e-06 | |
| }, | |
| { | |
| "epoch": 1.081306715063521, | |
| "grad_norm": 0.49970416310158533, | |
| "learning_rate": 9.007068109339783e-06, | |
| "loss": 0.458, | |
| "step": 94, | |
| "ts_encoder_learning_rate": 8.982972595297195e-06 | |
| }, | |
| { | |
| "epoch": 1.0929219600725952, | |
| "grad_norm": 0.4760304595595311, | |
| "learning_rate": 8.982972595297195e-06, | |
| "loss": 0.4669, | |
| "step": 95, | |
| "ts_encoder_learning_rate": 8.9586212623676e-06 | |
| }, | |
| { | |
| "epoch": 1.1045372050816697, | |
| "grad_norm": 0.6546521338664805, | |
| "learning_rate": 8.9586212623676e-06, | |
| "loss": 0.4631, | |
| "step": 96, | |
| "ts_encoder_learning_rate": 8.93401567459161e-06 | |
| }, | |
| { | |
| "epoch": 1.116152450090744, | |
| "grad_norm": 0.6820617032835106, | |
| "learning_rate": 8.93401567459161e-06, | |
| "loss": 0.4556, | |
| "step": 97, | |
| "ts_encoder_learning_rate": 8.90915741234015e-06 | |
| }, | |
| { | |
| "epoch": 1.1277676950998186, | |
| "grad_norm": 0.4791674527928274, | |
| "learning_rate": 8.90915741234015e-06, | |
| "loss": 0.4461, | |
| "step": 98, | |
| "ts_encoder_learning_rate": 8.884048072212952e-06 | |
| }, | |
| { | |
| "epoch": 1.1393829401088928, | |
| "grad_norm": 0.8342160989342361, | |
| "learning_rate": 8.884048072212952e-06, | |
| "loss": 0.4665, | |
| "step": 99, | |
| "ts_encoder_learning_rate": 8.85868926693601e-06 | |
| }, | |
| { | |
| "epoch": 1.1509981851179674, | |
| "grad_norm": 0.6077292569121415, | |
| "learning_rate": 8.85868926693601e-06, | |
| "loss": 0.4492, | |
| "step": 100, | |
| "ts_encoder_learning_rate": 8.833082625258003e-06 | |
| }, | |
| { | |
| "epoch": 1.1626134301270417, | |
| "grad_norm": 0.5647955360914533, | |
| "learning_rate": 8.833082625258003e-06, | |
| "loss": 0.4435, | |
| "step": 101, | |
| "ts_encoder_learning_rate": 8.807229791845673e-06 | |
| }, | |
| { | |
| "epoch": 1.1742286751361162, | |
| "grad_norm": 0.6044541014322447, | |
| "learning_rate": 8.807229791845673e-06, | |
| "loss": 0.4427, | |
| "step": 102, | |
| "ts_encoder_learning_rate": 8.781132427178203e-06 | |
| }, | |
| { | |
| "epoch": 1.1858439201451905, | |
| "grad_norm": 0.5437886950515926, | |
| "learning_rate": 8.781132427178203e-06, | |
| "loss": 0.4427, | |
| "step": 103, | |
| "ts_encoder_learning_rate": 8.754792207440557e-06 | |
| }, | |
| { | |
| "epoch": 1.197459165154265, | |
| "grad_norm": 0.5946421067486096, | |
| "learning_rate": 8.754792207440557e-06, | |
| "loss": 0.4327, | |
| "step": 104, | |
| "ts_encoder_learning_rate": 8.728210824415829e-06 | |
| }, | |
| { | |
| "epoch": 1.2090744101633395, | |
| "grad_norm": 0.6434751459915273, | |
| "learning_rate": 8.728210824415829e-06, | |
| "loss": 0.4342, | |
| "step": 105, | |
| "ts_encoder_learning_rate": 8.701389985376578e-06 | |
| }, | |
| { | |
| "epoch": 1.2206896551724138, | |
| "grad_norm": 0.5051510187934748, | |
| "learning_rate": 8.701389985376578e-06, | |
| "loss": 0.4398, | |
| "step": 106, | |
| "ts_encoder_learning_rate": 8.674331412975178e-06 | |
| }, | |
| { | |
| "epoch": 1.232304900181488, | |
| "grad_norm": 0.5595267159920397, | |
| "learning_rate": 8.674331412975178e-06, | |
| "loss": 0.442, | |
| "step": 107, | |
| "ts_encoder_learning_rate": 8.647036845133171e-06 | |
| }, | |
| { | |
| "epoch": 1.2439201451905626, | |
| "grad_norm": 0.525092656163426, | |
| "learning_rate": 8.647036845133171e-06, | |
| "loss": 0.43, | |
| "step": 108, | |
| "ts_encoder_learning_rate": 8.619508034929646e-06 | |
| }, | |
| { | |
| "epoch": 1.2555353901996371, | |
| "grad_norm": 0.5105752951496461, | |
| "learning_rate": 8.619508034929646e-06, | |
| "loss": 0.4218, | |
| "step": 109, | |
| "ts_encoder_learning_rate": 8.591746750488639e-06 | |
| }, | |
| { | |
| "epoch": 1.2671506352087114, | |
| "grad_norm": 0.5231024334601619, | |
| "learning_rate": 8.591746750488639e-06, | |
| "loss": 0.4182, | |
| "step": 110, | |
| "ts_encoder_learning_rate": 8.563754774865574e-06 | |
| }, | |
| { | |
| "epoch": 1.278765880217786, | |
| "grad_norm": 0.4819620849275164, | |
| "learning_rate": 8.563754774865574e-06, | |
| "loss": 0.4246, | |
| "step": 111, | |
| "ts_encoder_learning_rate": 8.535533905932739e-06 | |
| }, | |
| { | |
| "epoch": 1.2903811252268602, | |
| "grad_norm": 0.5106626492387893, | |
| "learning_rate": 8.535533905932739e-06, | |
| "loss": 0.4156, | |
| "step": 112, | |
| "ts_encoder_learning_rate": 8.507085956263808e-06 | |
| }, | |
| { | |
| "epoch": 1.3019963702359347, | |
| "grad_norm": 0.48163467909413527, | |
| "learning_rate": 8.507085956263808e-06, | |
| "loss": 0.3944, | |
| "step": 113, | |
| "ts_encoder_learning_rate": 8.478412753017433e-06 | |
| }, | |
| { | |
| "epoch": 1.313611615245009, | |
| "grad_norm": 0.4708287815222732, | |
| "learning_rate": 8.478412753017433e-06, | |
| "loss": 0.4198, | |
| "step": 114, | |
| "ts_encoder_learning_rate": 8.449516137819875e-06 | |
| }, | |
| { | |
| "epoch": 1.3252268602540835, | |
| "grad_norm": 0.45335927979980417, | |
| "learning_rate": 8.449516137819875e-06, | |
| "loss": 0.4004, | |
| "step": 115, | |
| "ts_encoder_learning_rate": 8.420397966646732e-06 | |
| }, | |
| { | |
| "epoch": 1.3368421052631578, | |
| "grad_norm": 0.4731479114137599, | |
| "learning_rate": 8.420397966646732e-06, | |
| "loss": 0.4012, | |
| "step": 116, | |
| "ts_encoder_learning_rate": 8.391060109703725e-06 | |
| }, | |
| { | |
| "epoch": 1.3484573502722323, | |
| "grad_norm": 0.45763071454798787, | |
| "learning_rate": 8.391060109703725e-06, | |
| "loss": 0.4043, | |
| "step": 117, | |
| "ts_encoder_learning_rate": 8.361504451306585e-06 | |
| }, | |
| { | |
| "epoch": 1.3600725952813066, | |
| "grad_norm": 0.49218004748616023, | |
| "learning_rate": 8.361504451306585e-06, | |
| "loss": 0.3804, | |
| "step": 118, | |
| "ts_encoder_learning_rate": 8.331732889760021e-06 | |
| }, | |
| { | |
| "epoch": 1.3716878402903812, | |
| "grad_norm": 0.48411439748425, | |
| "learning_rate": 8.331732889760021e-06, | |
| "loss": 0.3768, | |
| "step": 119, | |
| "ts_encoder_learning_rate": 8.301747337235798e-06 | |
| }, | |
| { | |
| "epoch": 1.3833030852994557, | |
| "grad_norm": 0.5158115467010486, | |
| "learning_rate": 8.301747337235798e-06, | |
| "loss": 0.3893, | |
| "step": 120, | |
| "ts_encoder_learning_rate": 8.271549719649923e-06 | |
| }, | |
| { | |
| "epoch": 1.39491833030853, | |
| "grad_norm": 0.4717657156486388, | |
| "learning_rate": 8.271549719649923e-06, | |
| "loss": 0.3929, | |
| "step": 121, | |
| "ts_encoder_learning_rate": 8.241141976538944e-06 | |
| }, | |
| { | |
| "epoch": 1.4065335753176043, | |
| "grad_norm": 0.48449961022029603, | |
| "learning_rate": 8.241141976538944e-06, | |
| "loss": 0.3903, | |
| "step": 122, | |
| "ts_encoder_learning_rate": 8.210526060935377e-06 | |
| }, | |
| { | |
| "epoch": 1.4181488203266788, | |
| "grad_norm": 0.5590109000920023, | |
| "learning_rate": 8.210526060935377e-06, | |
| "loss": 0.3731, | |
| "step": 123, | |
| "ts_encoder_learning_rate": 8.179703939242276e-06 | |
| }, | |
| { | |
| "epoch": 1.4297640653357533, | |
| "grad_norm": 0.48980864166559324, | |
| "learning_rate": 8.179703939242276e-06, | |
| "loss": 0.37, | |
| "step": 124, | |
| "ts_encoder_learning_rate": 8.148677591106919e-06 | |
| }, | |
| { | |
| "epoch": 1.4413793103448276, | |
| "grad_norm": 0.49306252756918445, | |
| "learning_rate": 8.148677591106919e-06, | |
| "loss": 0.3821, | |
| "step": 125, | |
| "ts_encoder_learning_rate": 8.117449009293668e-06 | |
| }, | |
| { | |
| "epoch": 1.4529945553539019, | |
| "grad_norm": 0.5343587386921387, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 0.388, | |
| "step": 126, | |
| "ts_encoder_learning_rate": 8.08602019955598e-06 | |
| }, | |
| { | |
| "epoch": 1.4646098003629764, | |
| "grad_norm": 0.4932498699505098, | |
| "learning_rate": 8.08602019955598e-06, | |
| "loss": 0.3689, | |
| "step": 127, | |
| "ts_encoder_learning_rate": 8.054393180507572e-06 | |
| }, | |
| { | |
| "epoch": 1.476225045372051, | |
| "grad_norm": 0.5115591275832498, | |
| "learning_rate": 8.054393180507572e-06, | |
| "loss": 0.3705, | |
| "step": 128, | |
| "ts_encoder_learning_rate": 8.022569983492781e-06 | |
| }, | |
| { | |
| "epoch": 1.4878402903811252, | |
| "grad_norm": 0.5293741893039812, | |
| "learning_rate": 8.022569983492781e-06, | |
| "loss": 0.3757, | |
| "step": 129, | |
| "ts_encoder_learning_rate": 7.99055265245608e-06 | |
| }, | |
| { | |
| "epoch": 1.4994555353901997, | |
| "grad_norm": 0.4820946133813763, | |
| "learning_rate": 7.99055265245608e-06, | |
| "loss": 0.3774, | |
| "step": 130, | |
| "ts_encoder_learning_rate": 7.958343243810818e-06 | |
| }, | |
| { | |
| "epoch": 1.511070780399274, | |
| "grad_norm": 0.5715571029957693, | |
| "learning_rate": 7.958343243810818e-06, | |
| "loss": 0.3536, | |
| "step": 131, | |
| "ts_encoder_learning_rate": 7.925943826307119e-06 | |
| }, | |
| { | |
| "epoch": 1.5226860254083485, | |
| "grad_norm": 0.45688383927056603, | |
| "learning_rate": 7.925943826307119e-06, | |
| "loss": 0.3781, | |
| "step": 132, | |
| "ts_encoder_learning_rate": 7.89335648089903e-06 | |
| }, | |
| { | |
| "epoch": 1.5343012704174228, | |
| "grad_norm": 0.5417387120089402, | |
| "learning_rate": 7.89335648089903e-06, | |
| "loss": 0.3849, | |
| "step": 133, | |
| "ts_encoder_learning_rate": 7.860583300610849e-06 | |
| }, | |
| { | |
| "epoch": 1.5459165154264973, | |
| "grad_norm": 0.48249798152601187, | |
| "learning_rate": 7.860583300610849e-06, | |
| "loss": 0.3614, | |
| "step": 134, | |
| "ts_encoder_learning_rate": 7.827626390402707e-06 | |
| }, | |
| { | |
| "epoch": 1.5575317604355718, | |
| "grad_norm": 0.49148409141245863, | |
| "learning_rate": 7.827626390402707e-06, | |
| "loss": 0.3604, | |
| "step": 135, | |
| "ts_encoder_learning_rate": 7.794487867035358e-06 | |
| }, | |
| { | |
| "epoch": 1.5691470054446461, | |
| "grad_norm": 0.5575426863669594, | |
| "learning_rate": 7.794487867035358e-06, | |
| "loss": 0.3582, | |
| "step": 136, | |
| "ts_encoder_learning_rate": 7.761169858934238e-06 | |
| }, | |
| { | |
| "epoch": 1.5807622504537204, | |
| "grad_norm": 0.47759386240015317, | |
| "learning_rate": 7.761169858934238e-06, | |
| "loss": 0.364, | |
| "step": 137, | |
| "ts_encoder_learning_rate": 7.727674506052744e-06 | |
| }, | |
| { | |
| "epoch": 1.592377495462795, | |
| "grad_norm": 0.5295175062956325, | |
| "learning_rate": 7.727674506052744e-06, | |
| "loss": 0.3543, | |
| "step": 138, | |
| "ts_encoder_learning_rate": 7.694003959734802e-06 | |
| }, | |
| { | |
| "epoch": 1.6039927404718695, | |
| "grad_norm": 0.6604920964139046, | |
| "learning_rate": 7.694003959734802e-06, | |
| "loss": 0.3607, | |
| "step": 139, | |
| "ts_encoder_learning_rate": 7.660160382576683e-06 | |
| }, | |
| { | |
| "epoch": 1.6156079854809438, | |
| "grad_norm": 0.5412776964498659, | |
| "learning_rate": 7.660160382576683e-06, | |
| "loss": 0.3494, | |
| "step": 140, | |
| "ts_encoder_learning_rate": 7.626145948288107e-06 | |
| }, | |
| { | |
| "epoch": 1.627223230490018, | |
| "grad_norm": 0.4885017642227708, | |
| "learning_rate": 7.626145948288107e-06, | |
| "loss": 0.3563, | |
| "step": 141, | |
| "ts_encoder_learning_rate": 7.591962841552627e-06 | |
| }, | |
| { | |
| "epoch": 1.6388384754990926, | |
| "grad_norm": 0.6310454372740532, | |
| "learning_rate": 7.591962841552627e-06, | |
| "loss": 0.3513, | |
| "step": 142, | |
| "ts_encoder_learning_rate": 7.55761325788731e-06 | |
| }, | |
| { | |
| "epoch": 1.650453720508167, | |
| "grad_norm": 0.44382432912819253, | |
| "learning_rate": 7.55761325788731e-06, | |
| "loss": 0.3562, | |
| "step": 143, | |
| "ts_encoder_learning_rate": 7.52309940350173e-06 | |
| }, | |
| { | |
| "epoch": 1.6620689655172414, | |
| "grad_norm": 0.5462511565321053, | |
| "learning_rate": 7.52309940350173e-06, | |
| "loss": 0.3461, | |
| "step": 144, | |
| "ts_encoder_learning_rate": 7.488423495156258e-06 | |
| }, | |
| { | |
| "epoch": 1.6736842105263157, | |
| "grad_norm": 0.4959866346692337, | |
| "learning_rate": 7.488423495156258e-06, | |
| "loss": 0.3615, | |
| "step": 145, | |
| "ts_encoder_learning_rate": 7.453587760019691e-06 | |
| }, | |
| { | |
| "epoch": 1.6852994555353902, | |
| "grad_norm": 0.481447728937644, | |
| "learning_rate": 7.453587760019691e-06, | |
| "loss": 0.3532, | |
| "step": 146, | |
| "ts_encoder_learning_rate": 7.4185944355261996e-06 | |
| }, | |
| { | |
| "epoch": 1.6969147005444647, | |
| "grad_norm": 0.4773235171153849, | |
| "learning_rate": 7.4185944355261996e-06, | |
| "loss": 0.3646, | |
| "step": 147, | |
| "ts_encoder_learning_rate": 7.383445769231628e-06 | |
| }, | |
| { | |
| "epoch": 1.708529945553539, | |
| "grad_norm": 0.4922147267880892, | |
| "learning_rate": 7.383445769231628e-06, | |
| "loss": 0.349, | |
| "step": 148, | |
| "ts_encoder_learning_rate": 7.348144018669129e-06 | |
| }, | |
| { | |
| "epoch": 1.7201451905626133, | |
| "grad_norm": 0.500323445240717, | |
| "learning_rate": 7.348144018669129e-06, | |
| "loss": 0.3271, | |
| "step": 149, | |
| "ts_encoder_learning_rate": 7.312691451204178e-06 | |
| }, | |
| { | |
| "epoch": 1.7317604355716878, | |
| "grad_norm": 0.47751019497636976, | |
| "learning_rate": 7.312691451204178e-06, | |
| "loss": 0.3635, | |
| "step": 150, | |
| "ts_encoder_learning_rate": 7.277090343888931e-06 | |
| }, | |
| { | |
| "epoch": 1.7433756805807623, | |
| "grad_norm": 0.4939172584022833, | |
| "learning_rate": 7.277090343888931e-06, | |
| "loss": 0.3522, | |
| "step": 151, | |
| "ts_encoder_learning_rate": 7.241342983315985e-06 | |
| }, | |
| { | |
| "epoch": 1.7549909255898366, | |
| "grad_norm": 0.526535710546132, | |
| "learning_rate": 7.241342983315985e-06, | |
| "loss": 0.3379, | |
| "step": 152, | |
| "ts_encoder_learning_rate": 7.205451665471515e-06 | |
| }, | |
| { | |
| "epoch": 1.7666061705989111, | |
| "grad_norm": 0.48512244017684314, | |
| "learning_rate": 7.205451665471515e-06, | |
| "loss": 0.3671, | |
| "step": 153, | |
| "ts_encoder_learning_rate": 7.169418695587791e-06 | |
| }, | |
| { | |
| "epoch": 1.7782214156079856, | |
| "grad_norm": 0.5249208108710423, | |
| "learning_rate": 7.169418695587791e-06, | |
| "loss": 0.353, | |
| "step": 154, | |
| "ts_encoder_learning_rate": 7.1332463879951404e-06 | |
| }, | |
| { | |
| "epoch": 1.78983666061706, | |
| "grad_norm": 0.5939685667617669, | |
| "learning_rate": 7.1332463879951404e-06, | |
| "loss": 0.3338, | |
| "step": 155, | |
| "ts_encoder_learning_rate": 7.096937065973285e-06 | |
| }, | |
| { | |
| "epoch": 1.8014519056261342, | |
| "grad_norm": 0.46870716209574953, | |
| "learning_rate": 7.096937065973285e-06, | |
| "loss": 0.3258, | |
| "step": 156, | |
| "ts_encoder_learning_rate": 7.060493061602128e-06 | |
| }, | |
| { | |
| "epoch": 1.8130671506352087, | |
| "grad_norm": 0.5004844023800996, | |
| "learning_rate": 7.060493061602128e-06, | |
| "loss": 0.3338, | |
| "step": 157, | |
| "ts_encoder_learning_rate": 7.023916715611969e-06 | |
| }, | |
| { | |
| "epoch": 1.8246823956442833, | |
| "grad_norm": 0.6018225120929914, | |
| "learning_rate": 7.023916715611969e-06, | |
| "loss": 0.3559, | |
| "step": 158, | |
| "ts_encoder_learning_rate": 6.987210377233165e-06 | |
| }, | |
| { | |
| "epoch": 1.8362976406533575, | |
| "grad_norm": 0.5115688533474527, | |
| "learning_rate": 6.987210377233165e-06, | |
| "loss": 0.3215, | |
| "step": 159, | |
| "ts_encoder_learning_rate": 6.950376404045235e-06 | |
| }, | |
| { | |
| "epoch": 1.8479128856624318, | |
| "grad_norm": 0.5453997597445909, | |
| "learning_rate": 6.950376404045235e-06, | |
| "loss": 0.3288, | |
| "step": 160, | |
| "ts_encoder_learning_rate": 6.913417161825449e-06 | |
| }, | |
| { | |
| "epoch": 1.8595281306715064, | |
| "grad_norm": 0.5468491048869575, | |
| "learning_rate": 6.913417161825449e-06, | |
| "loss": 0.3351, | |
| "step": 161, | |
| "ts_encoder_learning_rate": 6.876335024396872e-06 | |
| }, | |
| { | |
| "epoch": 1.8711433756805809, | |
| "grad_norm": 0.5127435043219846, | |
| "learning_rate": 6.876335024396872e-06, | |
| "loss": 0.3261, | |
| "step": 162, | |
| "ts_encoder_learning_rate": 6.839132373475894e-06 | |
| }, | |
| { | |
| "epoch": 1.8827586206896552, | |
| "grad_norm": 0.5334108745888081, | |
| "learning_rate": 6.839132373475894e-06, | |
| "loss": 0.315, | |
| "step": 163, | |
| "ts_encoder_learning_rate": 6.801811598519268e-06 | |
| }, | |
| { | |
| "epoch": 1.8943738656987295, | |
| "grad_norm": 0.5154441355243857, | |
| "learning_rate": 6.801811598519268e-06, | |
| "loss": 0.3427, | |
| "step": 164, | |
| "ts_encoder_learning_rate": 6.764375096570628e-06 | |
| }, | |
| { | |
| "epoch": 1.905989110707804, | |
| "grad_norm": 0.5561507577872592, | |
| "learning_rate": 6.764375096570628e-06, | |
| "loss": 0.3259, | |
| "step": 165, | |
| "ts_encoder_learning_rate": 6.726825272106539e-06 | |
| }, | |
| { | |
| "epoch": 1.9176043557168785, | |
| "grad_norm": 0.5291334612556954, | |
| "learning_rate": 6.726825272106539e-06, | |
| "loss": 0.3198, | |
| "step": 166, | |
| "ts_encoder_learning_rate": 6.689164536882059e-06 | |
| }, | |
| { | |
| "epoch": 1.9292196007259528, | |
| "grad_norm": 0.5011785801262693, | |
| "learning_rate": 6.689164536882059e-06, | |
| "loss": 0.3187, | |
| "step": 167, | |
| "ts_encoder_learning_rate": 6.651395309775837e-06 | |
| }, | |
| { | |
| "epoch": 1.940834845735027, | |
| "grad_norm": 0.5750688070408072, | |
| "learning_rate": 6.651395309775837e-06, | |
| "loss": 0.2997, | |
| "step": 168, | |
| "ts_encoder_learning_rate": 6.6135200166347505e-06 | |
| }, | |
| { | |
| "epoch": 1.9524500907441016, | |
| "grad_norm": 0.5043052526525891, | |
| "learning_rate": 6.6135200166347505e-06, | |
| "loss": 0.3145, | |
| "step": 169, | |
| "ts_encoder_learning_rate": 6.575541090118105e-06 | |
| }, | |
| { | |
| "epoch": 1.964065335753176, | |
| "grad_norm": 0.4896874363768986, | |
| "learning_rate": 6.575541090118105e-06, | |
| "loss": 0.3009, | |
| "step": 170, | |
| "ts_encoder_learning_rate": 6.537460969541378e-06 | |
| }, | |
| { | |
| "epoch": 1.9756805807622504, | |
| "grad_norm": 0.5088309056638338, | |
| "learning_rate": 6.537460969541378e-06, | |
| "loss": 0.3129, | |
| "step": 171, | |
| "ts_encoder_learning_rate": 6.499282100719558e-06 | |
| }, | |
| { | |
| "epoch": 1.987295825771325, | |
| "grad_norm": 0.49023494845007226, | |
| "learning_rate": 6.499282100719558e-06, | |
| "loss": 0.3082, | |
| "step": 172, | |
| "ts_encoder_learning_rate": 6.461006935810048e-06 | |
| }, | |
| { | |
| "epoch": 1.9989110707803994, | |
| "grad_norm": 0.5280481482147419, | |
| "learning_rate": 6.461006935810048e-06, | |
| "loss": 0.302, | |
| "step": 173, | |
| "ts_encoder_learning_rate": 6.4226379331551625e-06 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5280481482147419, | |
| "learning_rate": 6.4226379331551625e-06, | |
| "loss": 0.0333, | |
| "step": 174, | |
| "ts_encoder_learning_rate": 6.384177557124247e-06 | |
| }, | |
| { | |
| "epoch": 2.0116152450090743, | |
| "grad_norm": 0.5645755484448244, | |
| "learning_rate": 6.384177557124247e-06, | |
| "loss": 0.2784, | |
| "step": 175, | |
| "ts_encoder_learning_rate": 6.345628277955384e-06 | |
| }, | |
| { | |
| "epoch": 2.023230490018149, | |
| "grad_norm": 0.7089731024549738, | |
| "learning_rate": 6.345628277955384e-06, | |
| "loss": 0.2864, | |
| "step": 176, | |
| "ts_encoder_learning_rate": 6.306992571596742e-06 | |
| }, | |
| { | |
| "epoch": 2.0348457350272233, | |
| "grad_norm": 0.5602666036492625, | |
| "learning_rate": 6.306992571596742e-06, | |
| "loss": 0.2881, | |
| "step": 177, | |
| "ts_encoder_learning_rate": 6.268272919547537e-06 | |
| }, | |
| { | |
| "epoch": 2.0464609800362976, | |
| "grad_norm": 0.6075921236619318, | |
| "learning_rate": 6.268272919547537e-06, | |
| "loss": 0.2798, | |
| "step": 178, | |
| "ts_encoder_learning_rate": 6.229471808698673e-06 | |
| }, | |
| { | |
| "epoch": 2.058076225045372, | |
| "grad_norm": 0.6984592942833859, | |
| "learning_rate": 6.229471808698673e-06, | |
| "loss": 0.2673, | |
| "step": 179, | |
| "ts_encoder_learning_rate": 6.1905917311729915e-06 | |
| }, | |
| { | |
| "epoch": 2.0696914700544466, | |
| "grad_norm": 0.5022810378072105, | |
| "learning_rate": 6.1905917311729915e-06, | |
| "loss": 0.2849, | |
| "step": 180, | |
| "ts_encoder_learning_rate": 6.151635184165219e-06 | |
| }, | |
| { | |
| "epoch": 2.081306715063521, | |
| "grad_norm": 0.5093282715650761, | |
| "learning_rate": 6.151635184165219e-06, | |
| "loss": 0.3026, | |
| "step": 181, | |
| "ts_encoder_learning_rate": 6.112604669781572e-06 | |
| }, | |
| { | |
| "epoch": 2.0929219600725952, | |
| "grad_norm": 0.6048006185139588, | |
| "learning_rate": 6.112604669781572e-06, | |
| "loss": 0.2835, | |
| "step": 182, | |
| "ts_encoder_learning_rate": 6.073502694879059e-06 | |
| }, | |
| { | |
| "epoch": 2.1045372050816695, | |
| "grad_norm": 0.5065083654679191, | |
| "learning_rate": 6.073502694879059e-06, | |
| "loss": 0.2782, | |
| "step": 183, | |
| "ts_encoder_learning_rate": 6.034331770904455e-06 | |
| }, | |
| { | |
| "epoch": 2.1161524500907443, | |
| "grad_norm": 0.5598494532388394, | |
| "learning_rate": 6.034331770904455e-06, | |
| "loss": 0.2825, | |
| "step": 184, | |
| "ts_encoder_learning_rate": 5.9950944137330125e-06 | |
| }, | |
| { | |
| "epoch": 2.1277676950998186, | |
| "grad_norm": 0.530982359241071, | |
| "learning_rate": 5.9950944137330125e-06, | |
| "loss": 0.2702, | |
| "step": 185, | |
| "ts_encoder_learning_rate": 5.955793143506863e-06 | |
| }, | |
| { | |
| "epoch": 2.139382940108893, | |
| "grad_norm": 0.509154289045405, | |
| "learning_rate": 5.955793143506863e-06, | |
| "loss": 0.2785, | |
| "step": 186, | |
| "ts_encoder_learning_rate": 5.916430484473149e-06 | |
| }, | |
| { | |
| "epoch": 2.150998185117967, | |
| "grad_norm": 0.5119364886674341, | |
| "learning_rate": 5.916430484473149e-06, | |
| "loss": 0.2895, | |
| "step": 187, | |
| "ts_encoder_learning_rate": 5.877008964821909e-06 | |
| }, | |
| { | |
| "epoch": 2.162613430127042, | |
| "grad_norm": 0.5034054419615417, | |
| "learning_rate": 5.877008964821909e-06, | |
| "loss": 0.2936, | |
| "step": 188, | |
| "ts_encoder_learning_rate": 5.837531116523683e-06 | |
| }, | |
| { | |
| "epoch": 2.174228675136116, | |
| "grad_norm": 0.5673133335829393, | |
| "learning_rate": 5.837531116523683e-06, | |
| "loss": 0.2762, | |
| "step": 189, | |
| "ts_encoder_learning_rate": 5.797999475166897e-06 | |
| }, | |
| { | |
| "epoch": 2.1858439201451905, | |
| "grad_norm": 0.5459438366130899, | |
| "learning_rate": 5.797999475166897e-06, | |
| "loss": 0.2718, | |
| "step": 190, | |
| "ts_encoder_learning_rate": 5.7584165797950055e-06 | |
| }, | |
| { | |
| "epoch": 2.1974591651542648, | |
| "grad_norm": 0.5533315060900448, | |
| "learning_rate": 5.7584165797950055e-06, | |
| "loss": 0.2754, | |
| "step": 191, | |
| "ts_encoder_learning_rate": 5.71878497274341e-06 | |
| }, | |
| { | |
| "epoch": 2.2090744101633395, | |
| "grad_norm": 0.6022264770129343, | |
| "learning_rate": 5.71878497274341e-06, | |
| "loss": 0.2743, | |
| "step": 192, | |
| "ts_encoder_learning_rate": 5.679107199476174e-06 | |
| }, | |
| { | |
| "epoch": 2.220689655172414, | |
| "grad_norm": 0.5540881606867427, | |
| "learning_rate": 5.679107199476174e-06, | |
| "loss": 0.2878, | |
| "step": 193, | |
| "ts_encoder_learning_rate": 5.6393858084225305e-06 | |
| }, | |
| { | |
| "epoch": 2.232304900181488, | |
| "grad_norm": 0.5577935030781893, | |
| "learning_rate": 5.6393858084225305e-06, | |
| "loss": 0.2677, | |
| "step": 194, | |
| "ts_encoder_learning_rate": 5.599623350813202e-06 | |
| }, | |
| { | |
| "epoch": 2.243920145190563, | |
| "grad_norm": 0.5369265581968106, | |
| "learning_rate": 5.599623350813202e-06, | |
| "loss": 0.26, | |
| "step": 195, | |
| "ts_encoder_learning_rate": 5.559822380516539e-06 | |
| }, | |
| { | |
| "epoch": 2.255535390199637, | |
| "grad_norm": 0.5350922011535203, | |
| "learning_rate": 5.559822380516539e-06, | |
| "loss": 0.2936, | |
| "step": 196, | |
| "ts_encoder_learning_rate": 5.5199854538744905e-06 | |
| }, | |
| { | |
| "epoch": 2.2671506352087114, | |
| "grad_norm": 0.534100714512928, | |
| "learning_rate": 5.5199854538744905e-06, | |
| "loss": 0.2756, | |
| "step": 197, | |
| "ts_encoder_learning_rate": 5.480115129538409e-06 | |
| }, | |
| { | |
| "epoch": 2.2787658802177857, | |
| "grad_norm": 0.5103849200596632, | |
| "learning_rate": 5.480115129538409e-06, | |
| "loss": 0.2498, | |
| "step": 198, | |
| "ts_encoder_learning_rate": 5.440213968304728e-06 | |
| }, | |
| { | |
| "epoch": 2.2903811252268604, | |
| "grad_norm": 0.5702739710122362, | |
| "learning_rate": 5.440213968304728e-06, | |
| "loss": 0.2708, | |
| "step": 199, | |
| "ts_encoder_learning_rate": 5.4002845329504675e-06 | |
| }, | |
| { | |
| "epoch": 2.3019963702359347, | |
| "grad_norm": 0.5216996613028344, | |
| "learning_rate": 5.4002845329504675e-06, | |
| "loss": 0.2668, | |
| "step": 200, | |
| "ts_encoder_learning_rate": 5.360329388068649e-06 | |
| }, | |
| { | |
| "epoch": 2.313611615245009, | |
| "grad_norm": 0.5388651334047538, | |
| "learning_rate": 5.360329388068649e-06, | |
| "loss": 0.2703, | |
| "step": 201, | |
| "ts_encoder_learning_rate": 5.320351099903565e-06 | |
| }, | |
| { | |
| "epoch": 2.3252268602540833, | |
| "grad_norm": 0.6001939955314459, | |
| "learning_rate": 5.320351099903565e-06, | |
| "loss": 0.261, | |
| "step": 202, | |
| "ts_encoder_learning_rate": 5.2803522361859596e-06 | |
| }, | |
| { | |
| "epoch": 2.336842105263158, | |
| "grad_norm": 0.4980937517422858, | |
| "learning_rate": 5.2803522361859596e-06, | |
| "loss": 0.2404, | |
| "step": 203, | |
| "ts_encoder_learning_rate": 5.240335365968104e-06 | |
| }, | |
| { | |
| "epoch": 2.3484573502722323, | |
| "grad_norm": 0.5342465570777747, | |
| "learning_rate": 5.240335365968104e-06, | |
| "loss": 0.2654, | |
| "step": 204, | |
| "ts_encoder_learning_rate": 5.2003030594587964e-06 | |
| }, | |
| { | |
| "epoch": 2.3600725952813066, | |
| "grad_norm": 0.5621041789253622, | |
| "learning_rate": 5.2003030594587964e-06, | |
| "loss": 0.2733, | |
| "step": 205, | |
| "ts_encoder_learning_rate": 5.160257887858278e-06 | |
| }, | |
| { | |
| "epoch": 2.371687840290381, | |
| "grad_norm": 0.5582661532374841, | |
| "learning_rate": 5.160257887858278e-06, | |
| "loss": 0.2597, | |
| "step": 206, | |
| "ts_encoder_learning_rate": 5.120202423193085e-06 | |
| }, | |
| { | |
| "epoch": 2.3833030852994557, | |
| "grad_norm": 0.5783285206586468, | |
| "learning_rate": 5.120202423193085e-06, | |
| "loss": 0.2525, | |
| "step": 207, | |
| "ts_encoder_learning_rate": 5.080139238150869e-06 | |
| }, | |
| { | |
| "epoch": 2.39491833030853, | |
| "grad_norm": 0.4822046196118616, | |
| "learning_rate": 5.080139238150869e-06, | |
| "loss": 0.2518, | |
| "step": 208, | |
| "ts_encoder_learning_rate": 5.040070905915139e-06 | |
| }, | |
| { | |
| "epoch": 2.4065335753176043, | |
| "grad_norm": 0.5559233478270967, | |
| "learning_rate": 5.040070905915139e-06, | |
| "loss": 0.28, | |
| "step": 209, | |
| "ts_encoder_learning_rate": 5e-06 | |
| }, | |
| { | |
| "epoch": 2.418148820326679, | |
| "grad_norm": 0.48901203329897386, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2515, | |
| "step": 210, | |
| "ts_encoder_learning_rate": 4.959929094084862e-06 | |
| }, | |
| { | |
| "epoch": 2.4297640653357533, | |
| "grad_norm": 0.48746183943137245, | |
| "learning_rate": 4.959929094084862e-06, | |
| "loss": 0.2518, | |
| "step": 211, | |
| "ts_encoder_learning_rate": 4.919860761849132e-06 | |
| }, | |
| { | |
| "epoch": 2.4413793103448276, | |
| "grad_norm": 0.5009144470099608, | |
| "learning_rate": 4.919860761849132e-06, | |
| "loss": 0.2532, | |
| "step": 212, | |
| "ts_encoder_learning_rate": 4.879797576806915e-06 | |
| }, | |
| { | |
| "epoch": 2.452994555353902, | |
| "grad_norm": 0.4965510882041085, | |
| "learning_rate": 4.879797576806915e-06, | |
| "loss": 0.243, | |
| "step": 213, | |
| "ts_encoder_learning_rate": 4.839742112141725e-06 | |
| }, | |
| { | |
| "epoch": 2.464609800362976, | |
| "grad_norm": 0.48351081176855426, | |
| "learning_rate": 4.839742112141725e-06, | |
| "loss": 0.2596, | |
| "step": 214, | |
| "ts_encoder_learning_rate": 4.799696940541204e-06 | |
| }, | |
| { | |
| "epoch": 2.476225045372051, | |
| "grad_norm": 0.5172008324226596, | |
| "learning_rate": 4.799696940541204e-06, | |
| "loss": 0.236, | |
| "step": 215, | |
| "ts_encoder_learning_rate": 4.759664634031897e-06 | |
| }, | |
| { | |
| "epoch": 2.487840290381125, | |
| "grad_norm": 0.5571046196144138, | |
| "learning_rate": 4.759664634031897e-06, | |
| "loss": 0.2479, | |
| "step": 216, | |
| "ts_encoder_learning_rate": 4.719647763814041e-06 | |
| }, | |
| { | |
| "epoch": 2.4994555353901995, | |
| "grad_norm": 0.5421950769241719, | |
| "learning_rate": 4.719647763814041e-06, | |
| "loss": 0.2691, | |
| "step": 217, | |
| "ts_encoder_learning_rate": 4.679648900096436e-06 | |
| }, | |
| { | |
| "epoch": 2.5110707803992742, | |
| "grad_norm": 0.5330899469762319, | |
| "learning_rate": 4.679648900096436e-06, | |
| "loss": 0.2579, | |
| "step": 218, | |
| "ts_encoder_learning_rate": 4.6396706119313526e-06 | |
| }, | |
| { | |
| "epoch": 2.5226860254083485, | |
| "grad_norm": 0.5460154625334825, | |
| "learning_rate": 4.6396706119313526e-06, | |
| "loss": 0.2496, | |
| "step": 219, | |
| "ts_encoder_learning_rate": 4.599715467049534e-06 | |
| }, | |
| { | |
| "epoch": 2.534301270417423, | |
| "grad_norm": 0.5440271431449177, | |
| "learning_rate": 4.599715467049534e-06, | |
| "loss": 0.2455, | |
| "step": 220, | |
| "ts_encoder_learning_rate": 4.559786031695275e-06 | |
| }, | |
| { | |
| "epoch": 2.545916515426497, | |
| "grad_norm": 0.5133166382378065, | |
| "learning_rate": 4.559786031695275e-06, | |
| "loss": 0.2636, | |
| "step": 221, | |
| "ts_encoder_learning_rate": 4.5198848704615915e-06 | |
| }, | |
| { | |
| "epoch": 2.557531760435572, | |
| "grad_norm": 0.5366585588218753, | |
| "learning_rate": 4.5198848704615915e-06, | |
| "loss": 0.247, | |
| "step": 222, | |
| "ts_encoder_learning_rate": 4.480014546125511e-06 | |
| }, | |
| { | |
| "epoch": 2.569147005444646, | |
| "grad_norm": 0.5474900733819053, | |
| "learning_rate": 4.480014546125511e-06, | |
| "loss": 0.2795, | |
| "step": 223, | |
| "ts_encoder_learning_rate": 4.4401776194834615e-06 | |
| }, | |
| { | |
| "epoch": 2.5807622504537204, | |
| "grad_norm": 0.4975693304306332, | |
| "learning_rate": 4.4401776194834615e-06, | |
| "loss": 0.2401, | |
| "step": 224, | |
| "ts_encoder_learning_rate": 4.4003766491867984e-06 | |
| }, | |
| { | |
| "epoch": 2.592377495462795, | |
| "grad_norm": 0.5106865786465035, | |
| "learning_rate": 4.4003766491867984e-06, | |
| "loss": 0.242, | |
| "step": 225, | |
| "ts_encoder_learning_rate": 4.3606141915774695e-06 | |
| }, | |
| { | |
| "epoch": 2.6039927404718695, | |
| "grad_norm": 0.5168549085943859, | |
| "learning_rate": 4.3606141915774695e-06, | |
| "loss": 0.2335, | |
| "step": 226, | |
| "ts_encoder_learning_rate": 4.320892800523827e-06 | |
| }, | |
| { | |
| "epoch": 2.6156079854809438, | |
| "grad_norm": 0.5211543822385405, | |
| "learning_rate": 4.320892800523827e-06, | |
| "loss": 0.2493, | |
| "step": 227, | |
| "ts_encoder_learning_rate": 4.281215027256592e-06 | |
| }, | |
| { | |
| "epoch": 2.627223230490018, | |
| "grad_norm": 0.5127128225307483, | |
| "learning_rate": 4.281215027256592e-06, | |
| "loss": 0.2514, | |
| "step": 228, | |
| "ts_encoder_learning_rate": 4.241583420204998e-06 | |
| }, | |
| { | |
| "epoch": 2.6388384754990923, | |
| "grad_norm": 0.5378693778374266, | |
| "learning_rate": 4.241583420204998e-06, | |
| "loss": 0.2399, | |
| "step": 229, | |
| "ts_encoder_learning_rate": 4.2020005248331056e-06 | |
| }, | |
| { | |
| "epoch": 2.650453720508167, | |
| "grad_norm": 0.5721336179178363, | |
| "learning_rate": 4.2020005248331056e-06, | |
| "loss": 0.2558, | |
| "step": 230, | |
| "ts_encoder_learning_rate": 4.162468883476319e-06 | |
| }, | |
| { | |
| "epoch": 2.6620689655172414, | |
| "grad_norm": 0.5507499405880641, | |
| "learning_rate": 4.162468883476319e-06, | |
| "loss": 0.2423, | |
| "step": 231, | |
| "ts_encoder_learning_rate": 4.122991035178093e-06 | |
| }, | |
| { | |
| "epoch": 2.6736842105263157, | |
| "grad_norm": 0.5191297273890276, | |
| "learning_rate": 4.122991035178093e-06, | |
| "loss": 0.2387, | |
| "step": 232, | |
| "ts_encoder_learning_rate": 4.083569515526853e-06 | |
| }, | |
| { | |
| "epoch": 2.6852994555353904, | |
| "grad_norm": 0.5056716954679873, | |
| "learning_rate": 4.083569515526853e-06, | |
| "loss": 0.2379, | |
| "step": 233, | |
| "ts_encoder_learning_rate": 4.04420685649314e-06 | |
| }, | |
| { | |
| "epoch": 2.6969147005444647, | |
| "grad_norm": 0.5020728789197858, | |
| "learning_rate": 4.04420685649314e-06, | |
| "loss": 0.2341, | |
| "step": 234, | |
| "ts_encoder_learning_rate": 4.004905586266988e-06 | |
| }, | |
| { | |
| "epoch": 2.708529945553539, | |
| "grad_norm": 0.49710632823598544, | |
| "learning_rate": 4.004905586266988e-06, | |
| "loss": 0.2142, | |
| "step": 235, | |
| "ts_encoder_learning_rate": 3.965668229095546e-06 | |
| }, | |
| { | |
| "epoch": 2.7201451905626133, | |
| "grad_norm": 0.46282201836826814, | |
| "learning_rate": 3.965668229095546e-06, | |
| "loss": 0.25, | |
| "step": 236, | |
| "ts_encoder_learning_rate": 3.926497305120943e-06 | |
| }, | |
| { | |
| "epoch": 2.7317604355716876, | |
| "grad_norm": 0.5037406395194425, | |
| "learning_rate": 3.926497305120943e-06, | |
| "loss": 0.2423, | |
| "step": 237, | |
| "ts_encoder_learning_rate": 3.887395330218429e-06 | |
| }, | |
| { | |
| "epoch": 2.7433756805807623, | |
| "grad_norm": 0.5502464701719969, | |
| "learning_rate": 3.887395330218429e-06, | |
| "loss": 0.2371, | |
| "step": 238, | |
| "ts_encoder_learning_rate": 3.848364815834782e-06 | |
| }, | |
| { | |
| "epoch": 2.7549909255898366, | |
| "grad_norm": 0.488890082077443, | |
| "learning_rate": 3.848364815834782e-06, | |
| "loss": 0.2367, | |
| "step": 239, | |
| "ts_encoder_learning_rate": 3.809408268827009e-06 | |
| }, | |
| { | |
| "epoch": 2.7666061705989113, | |
| "grad_norm": 0.5122316791585159, | |
| "learning_rate": 3.809408268827009e-06, | |
| "loss": 0.2506, | |
| "step": 240, | |
| "ts_encoder_learning_rate": 3.7705281913013286e-06 | |
| }, | |
| { | |
| "epoch": 2.7782214156079856, | |
| "grad_norm": 0.4868899262078824, | |
| "learning_rate": 3.7705281913013286e-06, | |
| "loss": 0.2413, | |
| "step": 241, | |
| "ts_encoder_learning_rate": 3.731727080452464e-06 | |
| }, | |
| { | |
| "epoch": 2.78983666061706, | |
| "grad_norm": 0.5086075968707988, | |
| "learning_rate": 3.731727080452464e-06, | |
| "loss": 0.2421, | |
| "step": 242, | |
| "ts_encoder_learning_rate": 3.6930074284032613e-06 | |
| }, | |
| { | |
| "epoch": 2.801451905626134, | |
| "grad_norm": 0.49235271981217643, | |
| "learning_rate": 3.6930074284032613e-06, | |
| "loss": 0.2406, | |
| "step": 243, | |
| "ts_encoder_learning_rate": 3.654371722044616e-06 | |
| }, | |
| { | |
| "epoch": 2.8130671506352085, | |
| "grad_norm": 0.49103994839708176, | |
| "learning_rate": 3.654371722044616e-06, | |
| "loss": 0.2439, | |
| "step": 244, | |
| "ts_encoder_learning_rate": 3.6158224428757538e-06 | |
| }, | |
| { | |
| "epoch": 2.8246823956442833, | |
| "grad_norm": 0.9993476254292818, | |
| "learning_rate": 3.6158224428757538e-06, | |
| "loss": 0.2268, | |
| "step": 245, | |
| "ts_encoder_learning_rate": 3.5773620668448384e-06 | |
| }, | |
| { | |
| "epoch": 2.8362976406533575, | |
| "grad_norm": 0.5159214069274143, | |
| "learning_rate": 3.5773620668448384e-06, | |
| "loss": 0.2388, | |
| "step": 246, | |
| "ts_encoder_learning_rate": 3.538993064189954e-06 | |
| }, | |
| { | |
| "epoch": 2.847912885662432, | |
| "grad_norm": 0.543713795445949, | |
| "learning_rate": 3.538993064189954e-06, | |
| "loss": 0.2284, | |
| "step": 247, | |
| "ts_encoder_learning_rate": 3.500717899280442e-06 | |
| }, | |
| { | |
| "epoch": 2.8595281306715066, | |
| "grad_norm": 0.5140037192841642, | |
| "learning_rate": 3.500717899280442e-06, | |
| "loss": 0.2305, | |
| "step": 248, | |
| "ts_encoder_learning_rate": 3.4625390304586224e-06 | |
| }, | |
| { | |
| "epoch": 2.871143375680581, | |
| "grad_norm": 0.49143474790675895, | |
| "learning_rate": 3.4625390304586224e-06, | |
| "loss": 0.2348, | |
| "step": 249, | |
| "ts_encoder_learning_rate": 3.424458909881897e-06 | |
| }, | |
| { | |
| "epoch": 2.882758620689655, | |
| "grad_norm": 0.5203399617942011, | |
| "learning_rate": 3.424458909881897e-06, | |
| "loss": 0.2175, | |
| "step": 250, | |
| "ts_encoder_learning_rate": 3.386479983365251e-06 | |
| }, | |
| { | |
| "epoch": 2.8943738656987295, | |
| "grad_norm": 0.5363618954072708, | |
| "learning_rate": 3.386479983365251e-06, | |
| "loss": 0.2289, | |
| "step": 251, | |
| "ts_encoder_learning_rate": 3.3486046902241663e-06 | |
| }, | |
| { | |
| "epoch": 2.9059891107078037, | |
| "grad_norm": 0.49208512127705756, | |
| "learning_rate": 3.3486046902241663e-06, | |
| "loss": 0.2386, | |
| "step": 252, | |
| "ts_encoder_learning_rate": 3.310835463117942e-06 | |
| }, | |
| { | |
| "epoch": 2.9176043557168785, | |
| "grad_norm": 0.48789242095969204, | |
| "learning_rate": 3.310835463117942e-06, | |
| "loss": 0.2392, | |
| "step": 253, | |
| "ts_encoder_learning_rate": 3.273174727893463e-06 | |
| }, | |
| { | |
| "epoch": 2.9292196007259528, | |
| "grad_norm": 0.5163396778042415, | |
| "learning_rate": 3.273174727893463e-06, | |
| "loss": 0.2392, | |
| "step": 254, | |
| "ts_encoder_learning_rate": 3.235624903429374e-06 | |
| }, | |
| { | |
| "epoch": 2.940834845735027, | |
| "grad_norm": 0.4839363209051733, | |
| "learning_rate": 3.235624903429374e-06, | |
| "loss": 0.2294, | |
| "step": 255, | |
| "ts_encoder_learning_rate": 3.198188401480734e-06 | |
| }, | |
| { | |
| "epoch": 2.952450090744102, | |
| "grad_norm": 0.5099295694573828, | |
| "learning_rate": 3.198188401480734e-06, | |
| "loss": 0.214, | |
| "step": 256, | |
| "ts_encoder_learning_rate": 3.160867626524107e-06 | |
| }, | |
| { | |
| "epoch": 2.964065335753176, | |
| "grad_norm": 0.52866992195366, | |
| "learning_rate": 3.160867626524107e-06, | |
| "loss": 0.232, | |
| "step": 257, | |
| "ts_encoder_learning_rate": 3.12366497560313e-06 | |
| }, | |
| { | |
| "epoch": 2.9756805807622504, | |
| "grad_norm": 0.5016653572033554, | |
| "learning_rate": 3.12366497560313e-06, | |
| "loss": 0.2477, | |
| "step": 258, | |
| "ts_encoder_learning_rate": 3.0865828381745515e-06 | |
| }, | |
| { | |
| "epoch": 2.9872958257713247, | |
| "grad_norm": 0.5704722771230331, | |
| "learning_rate": 3.0865828381745515e-06, | |
| "loss": 0.2215, | |
| "step": 259, | |
| "ts_encoder_learning_rate": 3.049623595954766e-06 | |
| }, | |
| { | |
| "epoch": 2.9989110707803994, | |
| "grad_norm": 0.5049693668147675, | |
| "learning_rate": 3.049623595954766e-06, | |
| "loss": 0.2239, | |
| "step": 260, | |
| "ts_encoder_learning_rate": 3.0127896227668367e-06 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.5049693668147675, | |
| "learning_rate": 3.0127896227668367e-06, | |
| "loss": 0.0215, | |
| "step": 261, | |
| "ts_encoder_learning_rate": 2.976083284388031e-06 | |
| }, | |
| { | |
| "epoch": 3.0116152450090743, | |
| "grad_norm": 0.6043291464224838, | |
| "learning_rate": 2.976083284388031e-06, | |
| "loss": 0.2239, | |
| "step": 262, | |
| "ts_encoder_learning_rate": 2.9395069383978725e-06 | |
| }, | |
| { | |
| "epoch": 3.023230490018149, | |
| "grad_norm": 0.46138071995137575, | |
| "learning_rate": 2.9395069383978725e-06, | |
| "loss": 0.1959, | |
| "step": 263, | |
| "ts_encoder_learning_rate": 2.9030629340267165e-06 | |
| }, | |
| { | |
| "epoch": 3.0348457350272233, | |
| "grad_norm": 0.6605611111234634, | |
| "learning_rate": 2.9030629340267165e-06, | |
| "loss": 0.2223, | |
| "step": 264, | |
| "ts_encoder_learning_rate": 2.8667536120048616e-06 | |
| }, | |
| { | |
| "epoch": 3.0464609800362976, | |
| "grad_norm": 0.4886966776948408, | |
| "learning_rate": 2.8667536120048616e-06, | |
| "loss": 0.2147, | |
| "step": 265, | |
| "ts_encoder_learning_rate": 2.83058130441221e-06 | |
| }, | |
| { | |
| "epoch": 3.058076225045372, | |
| "grad_norm": 0.603343292555356, | |
| "learning_rate": 2.83058130441221e-06, | |
| "loss": 0.2122, | |
| "step": 266, | |
| "ts_encoder_learning_rate": 2.794548334528486e-06 | |
| }, | |
| { | |
| "epoch": 3.0696914700544466, | |
| "grad_norm": 0.46864057728976466, | |
| "learning_rate": 2.794548334528486e-06, | |
| "loss": 0.187, | |
| "step": 267, | |
| "ts_encoder_learning_rate": 2.7586570166840154e-06 | |
| }, | |
| { | |
| "epoch": 3.081306715063521, | |
| "grad_norm": 0.5904508040534621, | |
| "learning_rate": 2.7586570166840154e-06, | |
| "loss": 0.191, | |
| "step": 268, | |
| "ts_encoder_learning_rate": 2.7229096561110703e-06 | |
| }, | |
| { | |
| "epoch": 3.0929219600725952, | |
| "grad_norm": 0.5389347532364875, | |
| "learning_rate": 2.7229096561110703e-06, | |
| "loss": 0.2183, | |
| "step": 269, | |
| "ts_encoder_learning_rate": 2.687308548795825e-06 | |
| }, | |
| { | |
| "epoch": 3.1045372050816695, | |
| "grad_norm": 0.5130748388642997, | |
| "learning_rate": 2.687308548795825e-06, | |
| "loss": 0.2239, | |
| "step": 270, | |
| "ts_encoder_learning_rate": 2.651855981330872e-06 | |
| }, | |
| { | |
| "epoch": 3.1161524500907443, | |
| "grad_norm": 0.49543600582732883, | |
| "learning_rate": 2.651855981330872e-06, | |
| "loss": 0.1958, | |
| "step": 271, | |
| "ts_encoder_learning_rate": 2.6165542307683744e-06 | |
| }, | |
| { | |
| "epoch": 3.1277676950998186, | |
| "grad_norm": 0.517787257194227, | |
| "learning_rate": 2.6165542307683744e-06, | |
| "loss": 0.2171, | |
| "step": 272, | |
| "ts_encoder_learning_rate": 2.5814055644738013e-06 | |
| }, | |
| { | |
| "epoch": 3.139382940108893, | |
| "grad_norm": 0.5435576653784301, | |
| "learning_rate": 2.5814055644738013e-06, | |
| "loss": 0.2144, | |
| "step": 273, | |
| "ts_encoder_learning_rate": 2.5464122399803126e-06 | |
| }, | |
| { | |
| "epoch": 3.150998185117967, | |
| "grad_norm": 0.5217681007016235, | |
| "learning_rate": 2.5464122399803126e-06, | |
| "loss": 0.1963, | |
| "step": 274, | |
| "ts_encoder_learning_rate": 2.5115765048437445e-06 | |
| }, | |
| { | |
| "epoch": 3.162613430127042, | |
| "grad_norm": 0.4918846481089564, | |
| "learning_rate": 2.5115765048437445e-06, | |
| "loss": 0.2244, | |
| "step": 275, | |
| "ts_encoder_learning_rate": 2.4769005964982718e-06 | |
| }, | |
| { | |
| "epoch": 3.174228675136116, | |
| "grad_norm": 0.4834963808627837, | |
| "learning_rate": 2.4769005964982718e-06, | |
| "loss": 0.2125, | |
| "step": 276, | |
| "ts_encoder_learning_rate": 2.4423867421126923e-06 | |
| }, | |
| { | |
| "epoch": 3.1858439201451905, | |
| "grad_norm": 0.4937777628741182, | |
| "learning_rate": 2.4423867421126923e-06, | |
| "loss": 0.2181, | |
| "step": 277, | |
| "ts_encoder_learning_rate": 2.408037158447375e-06 | |
| }, | |
| { | |
| "epoch": 3.1974591651542648, | |
| "grad_norm": 0.46282845264960937, | |
| "learning_rate": 2.408037158447375e-06, | |
| "loss": 0.213, | |
| "step": 278, | |
| "ts_encoder_learning_rate": 2.3738540517118953e-06 | |
| }, | |
| { | |
| "epoch": 3.2090744101633395, | |
| "grad_norm": 0.5150835355954807, | |
| "learning_rate": 2.3738540517118953e-06, | |
| "loss": 0.1852, | |
| "step": 279, | |
| "ts_encoder_learning_rate": 2.339839617423318e-06 | |
| }, | |
| { | |
| "epoch": 3.220689655172414, | |
| "grad_norm": 0.49870953616723984, | |
| "learning_rate": 2.339839617423318e-06, | |
| "loss": 0.207, | |
| "step": 280, | |
| "ts_encoder_learning_rate": 2.305996040265198e-06 | |
| }, | |
| { | |
| "epoch": 3.232304900181488, | |
| "grad_norm": 0.49174431699593274, | |
| "learning_rate": 2.305996040265198e-06, | |
| "loss": 0.2162, | |
| "step": 281, | |
| "ts_encoder_learning_rate": 2.272325493947257e-06 | |
| }, | |
| { | |
| "epoch": 3.243920145190563, | |
| "grad_norm": 0.4875355155859377, | |
| "learning_rate": 2.272325493947257e-06, | |
| "loss": 0.2243, | |
| "step": 282, | |
| "ts_encoder_learning_rate": 2.238830141065765e-06 | |
| }, | |
| { | |
| "epoch": 3.255535390199637, | |
| "grad_norm": 0.5107168259211297, | |
| "learning_rate": 2.238830141065765e-06, | |
| "loss": 0.201, | |
| "step": 283, | |
| "ts_encoder_learning_rate": 2.2055121329646416e-06 | |
| }, | |
| { | |
| "epoch": 3.2671506352087114, | |
| "grad_norm": 0.4819085595049116, | |
| "learning_rate": 2.2055121329646416e-06, | |
| "loss": 0.1981, | |
| "step": 284, | |
| "ts_encoder_learning_rate": 2.1723736095972946e-06 | |
| }, | |
| { | |
| "epoch": 3.2787658802177857, | |
| "grad_norm": 0.4477936237119145, | |
| "learning_rate": 2.1723736095972946e-06, | |
| "loss": 0.1959, | |
| "step": 285, | |
| "ts_encoder_learning_rate": 2.139416699389153e-06 | |
| }, | |
| { | |
| "epoch": 3.2903811252268604, | |
| "grad_norm": 0.4701102863028192, | |
| "learning_rate": 2.139416699389153e-06, | |
| "loss": 0.1936, | |
| "step": 286, | |
| "ts_encoder_learning_rate": 2.1066435191009717e-06 | |
| }, | |
| { | |
| "epoch": 3.3019963702359347, | |
| "grad_norm": 0.5076017279364189, | |
| "learning_rate": 2.1066435191009717e-06, | |
| "loss": 0.214, | |
| "step": 287, | |
| "ts_encoder_learning_rate": 2.074056173692881e-06 | |
| }, | |
| { | |
| "epoch": 3.313611615245009, | |
| "grad_norm": 0.480769257020111, | |
| "learning_rate": 2.074056173692881e-06, | |
| "loss": 0.1793, | |
| "step": 288, | |
| "ts_encoder_learning_rate": 2.041656756189184e-06 | |
| }, | |
| { | |
| "epoch": 3.3252268602540833, | |
| "grad_norm": 0.4957168313559018, | |
| "learning_rate": 2.041656756189184e-06, | |
| "loss": 0.2041, | |
| "step": 289, | |
| "ts_encoder_learning_rate": 2.00944734754392e-06 | |
| }, | |
| { | |
| "epoch": 3.336842105263158, | |
| "grad_norm": 0.4710750376178812, | |
| "learning_rate": 2.00944734754392e-06, | |
| "loss": 0.1945, | |
| "step": 290, | |
| "ts_encoder_learning_rate": 1.977430016507222e-06 | |
| }, | |
| { | |
| "epoch": 3.3484573502722323, | |
| "grad_norm": 0.45013646064857127, | |
| "learning_rate": 1.977430016507222e-06, | |
| "loss": 0.1944, | |
| "step": 291, | |
| "ts_encoder_learning_rate": 1.945606819492429e-06 | |
| }, | |
| { | |
| "epoch": 3.3600725952813066, | |
| "grad_norm": 0.4963393211559648, | |
| "learning_rate": 1.945606819492429e-06, | |
| "loss": 0.2029, | |
| "step": 292, | |
| "ts_encoder_learning_rate": 1.913979800444021e-06 | |
| }, | |
| { | |
| "epoch": 3.371687840290381, | |
| "grad_norm": 0.45636659614066805, | |
| "learning_rate": 1.913979800444021e-06, | |
| "loss": 0.2042, | |
| "step": 293, | |
| "ts_encoder_learning_rate": 1.8825509907063328e-06 | |
| }, | |
| { | |
| "epoch": 3.3833030852994557, | |
| "grad_norm": 0.48404604144273267, | |
| "learning_rate": 1.8825509907063328e-06, | |
| "loss": 0.2073, | |
| "step": 294, | |
| "ts_encoder_learning_rate": 1.8513224088930814e-06 | |
| }, | |
| { | |
| "epoch": 3.39491833030853, | |
| "grad_norm": 0.4903949616722962, | |
| "learning_rate": 1.8513224088930814e-06, | |
| "loss": 0.2117, | |
| "step": 295, | |
| "ts_encoder_learning_rate": 1.8202960607577246e-06 | |
| }, | |
| { | |
| "epoch": 3.4065335753176043, | |
| "grad_norm": 0.4749952038477868, | |
| "learning_rate": 1.8202960607577246e-06, | |
| "loss": 0.2105, | |
| "step": 296, | |
| "ts_encoder_learning_rate": 1.7894739390646227e-06 | |
| }, | |
| { | |
| "epoch": 3.418148820326679, | |
| "grad_norm": 0.49308201942231306, | |
| "learning_rate": 1.7894739390646227e-06, | |
| "loss": 0.1854, | |
| "step": 297, | |
| "ts_encoder_learning_rate": 1.7588580234610592e-06 | |
| }, | |
| { | |
| "epoch": 3.4297640653357533, | |
| "grad_norm": 0.44330828627883645, | |
| "learning_rate": 1.7588580234610592e-06, | |
| "loss": 0.2049, | |
| "step": 298, | |
| "ts_encoder_learning_rate": 1.728450280350079e-06 | |
| }, | |
| { | |
| "epoch": 3.4413793103448276, | |
| "grad_norm": 0.47198428442224044, | |
| "learning_rate": 1.728450280350079e-06, | |
| "loss": 0.1917, | |
| "step": 299, | |
| "ts_encoder_learning_rate": 1.6982526627642043e-06 | |
| }, | |
| { | |
| "epoch": 3.452994555353902, | |
| "grad_norm": 0.47052296840051827, | |
| "learning_rate": 1.6982526627642043e-06, | |
| "loss": 0.1966, | |
| "step": 300, | |
| "ts_encoder_learning_rate": 1.6682671102399806e-06 | |
| }, | |
| { | |
| "epoch": 3.464609800362976, | |
| "grad_norm": 0.47469220902280884, | |
| "learning_rate": 1.6682671102399806e-06, | |
| "loss": 0.1993, | |
| "step": 301, | |
| "ts_encoder_learning_rate": 1.6384955486934157e-06 | |
| }, | |
| { | |
| "epoch": 3.476225045372051, | |
| "grad_norm": 0.5047215872734404, | |
| "learning_rate": 1.6384955486934157e-06, | |
| "loss": 0.2087, | |
| "step": 302, | |
| "ts_encoder_learning_rate": 1.6089398902962767e-06 | |
| }, | |
| { | |
| "epoch": 3.487840290381125, | |
| "grad_norm": 0.46226600784092325, | |
| "learning_rate": 1.6089398902962767e-06, | |
| "loss": 0.2223, | |
| "step": 303, | |
| "ts_encoder_learning_rate": 1.5796020333532696e-06 | |
| }, | |
| { | |
| "epoch": 3.4994555353901995, | |
| "grad_norm": 0.49692738160329974, | |
| "learning_rate": 1.5796020333532696e-06, | |
| "loss": 0.2098, | |
| "step": 304, | |
| "ts_encoder_learning_rate": 1.5504838621801272e-06 | |
| }, | |
| { | |
| "epoch": 3.5110707803992742, | |
| "grad_norm": 0.44096965404662336, | |
| "learning_rate": 1.5504838621801272e-06, | |
| "loss": 0.1917, | |
| "step": 305, | |
| "ts_encoder_learning_rate": 1.5215872469825682e-06 | |
| }, | |
| { | |
| "epoch": 3.5226860254083485, | |
| "grad_norm": 0.46470354099812156, | |
| "learning_rate": 1.5215872469825682e-06, | |
| "loss": 0.2024, | |
| "step": 306, | |
| "ts_encoder_learning_rate": 1.4929140437361916e-06 | |
| }, | |
| { | |
| "epoch": 3.534301270417423, | |
| "grad_norm": 0.6245744672781995, | |
| "learning_rate": 1.4929140437361916e-06, | |
| "loss": 0.1932, | |
| "step": 307, | |
| "ts_encoder_learning_rate": 1.4644660940672628e-06 | |
| }, | |
| { | |
| "epoch": 3.545916515426497, | |
| "grad_norm": 0.4616850273696799, | |
| "learning_rate": 1.4644660940672628e-06, | |
| "loss": 0.1894, | |
| "step": 308, | |
| "ts_encoder_learning_rate": 1.4362452251344283e-06 | |
| }, | |
| { | |
| "epoch": 3.557531760435572, | |
| "grad_norm": 0.46072956667527437, | |
| "learning_rate": 1.4362452251344283e-06, | |
| "loss": 0.1939, | |
| "step": 309, | |
| "ts_encoder_learning_rate": 1.4082532495113627e-06 | |
| }, | |
| { | |
| "epoch": 3.569147005444646, | |
| "grad_norm": 0.4622635293176079, | |
| "learning_rate": 1.4082532495113627e-06, | |
| "loss": 0.1986, | |
| "step": 310, | |
| "ts_encoder_learning_rate": 1.3804919650703551e-06 | |
| }, | |
| { | |
| "epoch": 3.5807622504537204, | |
| "grad_norm": 0.5048261638194425, | |
| "learning_rate": 1.3804919650703551e-06, | |
| "loss": 0.1999, | |
| "step": 311, | |
| "ts_encoder_learning_rate": 1.3529631548668298e-06 | |
| }, | |
| { | |
| "epoch": 3.592377495462795, | |
| "grad_norm": 0.4265055997084881, | |
| "learning_rate": 1.3529631548668298e-06, | |
| "loss": 0.207, | |
| "step": 312, | |
| "ts_encoder_learning_rate": 1.3256685870248227e-06 | |
| }, | |
| { | |
| "epoch": 3.6039927404718695, | |
| "grad_norm": 0.43467381019959384, | |
| "learning_rate": 1.3256685870248227e-06, | |
| "loss": 0.1809, | |
| "step": 313, | |
| "ts_encoder_learning_rate": 1.298610014623423e-06 | |
| }, | |
| { | |
| "epoch": 3.6156079854809438, | |
| "grad_norm": 0.47840223124389564, | |
| "learning_rate": 1.298610014623423e-06, | |
| "loss": 0.1985, | |
| "step": 314, | |
| "ts_encoder_learning_rate": 1.2717891755841722e-06 | |
| }, | |
| { | |
| "epoch": 3.627223230490018, | |
| "grad_norm": 0.4736005082533798, | |
| "learning_rate": 1.2717891755841722e-06, | |
| "loss": 0.2029, | |
| "step": 315, | |
| "ts_encoder_learning_rate": 1.2452077925594435e-06 | |
| }, | |
| { | |
| "epoch": 3.6388384754990923, | |
| "grad_norm": 0.44359091955078794, | |
| "learning_rate": 1.2452077925594435e-06, | |
| "loss": 0.1974, | |
| "step": 316, | |
| "ts_encoder_learning_rate": 1.2188675728217986e-06 | |
| }, | |
| { | |
| "epoch": 3.650453720508167, | |
| "grad_norm": 0.4835930654461023, | |
| "learning_rate": 1.2188675728217986e-06, | |
| "loss": 0.2004, | |
| "step": 317, | |
| "ts_encoder_learning_rate": 1.1927702081543279e-06 | |
| }, | |
| { | |
| "epoch": 3.6620689655172414, | |
| "grad_norm": 0.4950865344011163, | |
| "learning_rate": 1.1927702081543279e-06, | |
| "loss": 0.1971, | |
| "step": 318, | |
| "ts_encoder_learning_rate": 1.166917374742e-06 | |
| }, | |
| { | |
| "epoch": 3.6736842105263157, | |
| "grad_norm": 0.4542349082763096, | |
| "learning_rate": 1.166917374742e-06, | |
| "loss": 0.2029, | |
| "step": 319, | |
| "ts_encoder_learning_rate": 1.141310733063991e-06 | |
| }, | |
| { | |
| "epoch": 3.6852994555353904, | |
| "grad_norm": 0.4722201030579586, | |
| "learning_rate": 1.141310733063991e-06, | |
| "loss": 0.1871, | |
| "step": 320, | |
| "ts_encoder_learning_rate": 1.1159519277870507e-06 | |
| }, | |
| { | |
| "epoch": 3.6969147005444647, | |
| "grad_norm": 0.44859200333215415, | |
| "learning_rate": 1.1159519277870507e-06, | |
| "loss": 0.1912, | |
| "step": 321, | |
| "ts_encoder_learning_rate": 1.0908425876598512e-06 | |
| }, | |
| { | |
| "epoch": 3.708529945553539, | |
| "grad_norm": 0.46643463691801845, | |
| "learning_rate": 1.0908425876598512e-06, | |
| "loss": 0.1845, | |
| "step": 322, | |
| "ts_encoder_learning_rate": 1.0659843254083919e-06 | |
| }, | |
| { | |
| "epoch": 3.7201451905626133, | |
| "grad_norm": 0.4306281262155224, | |
| "learning_rate": 1.0659843254083919e-06, | |
| "loss": 0.1779, | |
| "step": 323, | |
| "ts_encoder_learning_rate": 1.041378737632402e-06 | |
| }, | |
| { | |
| "epoch": 3.7317604355716876, | |
| "grad_norm": 0.4753156573309627, | |
| "learning_rate": 1.041378737632402e-06, | |
| "loss": 0.1989, | |
| "step": 324, | |
| "ts_encoder_learning_rate": 1.0170274047028068e-06 | |
| }, | |
| { | |
| "epoch": 3.7433756805807623, | |
| "grad_norm": 0.45875837749866927, | |
| "learning_rate": 1.0170274047028068e-06, | |
| "loss": 0.1912, | |
| "step": 325, | |
| "ts_encoder_learning_rate": 9.929318906602176e-07 | |
| }, | |
| { | |
| "epoch": 3.7549909255898366, | |
| "grad_norm": 0.4823250266490247, | |
| "learning_rate": 9.929318906602176e-07, | |
| "loss": 0.194, | |
| "step": 326, | |
| "ts_encoder_learning_rate": 9.690937431144725e-07 | |
| }, | |
| { | |
| "epoch": 3.7666061705989113, | |
| "grad_norm": 0.4354804449964891, | |
| "learning_rate": 9.690937431144725e-07, | |
| "loss": 0.193, | |
| "step": 327, | |
| "ts_encoder_learning_rate": 9.455144931452459e-07 | |
| }, | |
| { | |
| "epoch": 3.7782214156079856, | |
| "grad_norm": 0.4634865596978523, | |
| "learning_rate": 9.455144931452459e-07, | |
| "loss": 0.2046, | |
| "step": 328, | |
| "ts_encoder_learning_rate": 9.221956552036992e-07 | |
| }, | |
| { | |
| "epoch": 3.78983666061706, | |
| "grad_norm": 0.455260620824431, | |
| "learning_rate": 9.221956552036992e-07, | |
| "loss": 0.1939, | |
| "step": 329, | |
| "ts_encoder_learning_rate": 8.991387270152202e-07 | |
| }, | |
| { | |
| "epoch": 3.801451905626134, | |
| "grad_norm": 0.49649814336003645, | |
| "learning_rate": 8.991387270152202e-07, | |
| "loss": 0.2102, | |
| "step": 330, | |
| "ts_encoder_learning_rate": 8.76345189483222e-07 | |
| }, | |
| { | |
| "epoch": 3.8130671506352085, | |
| "grad_norm": 0.49114892681403455, | |
| "learning_rate": 8.76345189483222e-07, | |
| "loss": 0.1963, | |
| "step": 331, | |
| "ts_encoder_learning_rate": 8.538165065940263e-07 | |
| }, | |
| { | |
| "epoch": 3.8246823956442833, | |
| "grad_norm": 0.5286315843085168, | |
| "learning_rate": 8.538165065940263e-07, | |
| "loss": 0.2193, | |
| "step": 332, | |
| "ts_encoder_learning_rate": 8.315541253228332e-07 | |
| }, | |
| { | |
| "epoch": 3.8362976406533575, | |
| "grad_norm": 0.4405444221304671, | |
| "learning_rate": 8.315541253228332e-07, | |
| "loss": 0.1914, | |
| "step": 333, | |
| "ts_encoder_learning_rate": 8.095594755407971e-07 | |
| }, | |
| { | |
| "epoch": 3.847912885662432, | |
| "grad_norm": 0.4698521600536052, | |
| "learning_rate": 8.095594755407971e-07, | |
| "loss": 0.2235, | |
| "step": 334, | |
| "ts_encoder_learning_rate": 7.878339699231702e-07 | |
| }, | |
| { | |
| "epoch": 3.8595281306715066, | |
| "grad_norm": 0.4371598229337324, | |
| "learning_rate": 7.878339699231702e-07, | |
| "loss": 0.1839, | |
| "step": 335, | |
| "ts_encoder_learning_rate": 7.663790038585794e-07 | |
| }, | |
| { | |
| "epoch": 3.871143375680581, | |
| "grad_norm": 0.45962066234037935, | |
| "learning_rate": 7.663790038585794e-07, | |
| "loss": 0.1995, | |
| "step": 336, | |
| "ts_encoder_learning_rate": 7.451959553594051e-07 | |
| }, | |
| { | |
| "epoch": 3.882758620689655, | |
| "grad_norm": 0.42173091596380397, | |
| "learning_rate": 7.451959553594051e-07, | |
| "loss": 0.1827, | |
| "step": 337, | |
| "ts_encoder_learning_rate": 7.242861849732696e-07 | |
| }, | |
| { | |
| "epoch": 3.8943738656987295, | |
| "grad_norm": 0.4331430485638045, | |
| "learning_rate": 7.242861849732696e-07, | |
| "loss": 0.2012, | |
| "step": 338, | |
| "ts_encoder_learning_rate": 7.036510356956494e-07 | |
| }, | |
| { | |
| "epoch": 3.9059891107078037, | |
| "grad_norm": 0.4465736646292548, | |
| "learning_rate": 7.036510356956494e-07, | |
| "loss": 0.1989, | |
| "step": 339, | |
| "ts_encoder_learning_rate": 6.832918328836247e-07 | |
| }, | |
| { | |
| "epoch": 3.9176043557168785, | |
| "grad_norm": 0.43493507516276353, | |
| "learning_rate": 6.832918328836247e-07, | |
| "loss": 0.1951, | |
| "step": 340, | |
| "ts_encoder_learning_rate": 6.632098841707458e-07 | |
| }, | |
| { | |
| "epoch": 3.9292196007259528, | |
| "grad_norm": 0.42038926625601386, | |
| "learning_rate": 6.632098841707458e-07, | |
| "loss": 0.1915, | |
| "step": 341, | |
| "ts_encoder_learning_rate": 6.43406479383053e-07 | |
| }, | |
| { | |
| "epoch": 3.940834845735027, | |
| "grad_norm": 0.46480078485720294, | |
| "learning_rate": 6.43406479383053e-07, | |
| "loss": 0.1917, | |
| "step": 342, | |
| "ts_encoder_learning_rate": 6.238828904562316e-07 | |
| }, | |
| { | |
| "epoch": 3.952450090744102, | |
| "grad_norm": 0.4490758579669617, | |
| "learning_rate": 6.238828904562316e-07, | |
| "loss": 0.2059, | |
| "step": 343, | |
| "ts_encoder_learning_rate": 6.04640371353914e-07 | |
| }, | |
| { | |
| "epoch": 3.964065335753176, | |
| "grad_norm": 0.4613515058442109, | |
| "learning_rate": 6.04640371353914e-07, | |
| "loss": 0.1985, | |
| "step": 344, | |
| "ts_encoder_learning_rate": 5.856801579871457e-07 | |
| }, | |
| { | |
| "epoch": 3.9756805807622504, | |
| "grad_norm": 0.4456006110376394, | |
| "learning_rate": 5.856801579871457e-07, | |
| "loss": 0.2025, | |
| "step": 345, | |
| "ts_encoder_learning_rate": 5.670034681349995e-07 | |
| }, | |
| { | |
| "epoch": 3.9872958257713247, | |
| "grad_norm": 0.42933039768161857, | |
| "learning_rate": 5.670034681349995e-07, | |
| "loss": 0.2012, | |
| "step": 346, | |
| "ts_encoder_learning_rate": 5.486115013663668e-07 | |
| }, | |
| { | |
| "epoch": 3.9989110707803994, | |
| "grad_norm": 0.44999338968368285, | |
| "learning_rate": 5.486115013663668e-07, | |
| "loss": 0.187, | |
| "step": 347, | |
| "ts_encoder_learning_rate": 5.305054389629022e-07 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.44999338968368285, | |
| "learning_rate": 5.305054389629022e-07, | |
| "loss": 0.0145, | |
| "step": 348, | |
| "ts_encoder_learning_rate": 5.126864438431628e-07 | |
| }, | |
| { | |
| "epoch": 4.011615245009074, | |
| "grad_norm": 0.42250491238346477, | |
| "learning_rate": 5.126864438431628e-07, | |
| "loss": 0.1884, | |
| "step": 349, | |
| "ts_encoder_learning_rate": 4.951556604879049e-07 | |
| }, | |
| { | |
| "epoch": 4.023230490018149, | |
| "grad_norm": 0.4230629092251735, | |
| "learning_rate": 4.951556604879049e-07, | |
| "loss": 0.1905, | |
| "step": 350, | |
| "ts_encoder_learning_rate": 4.779142148665855e-07 | |
| }, | |
| { | |
| "epoch": 4.034845735027223, | |
| "grad_norm": 0.4174529397738527, | |
| "learning_rate": 4.779142148665855e-07, | |
| "loss": 0.1658, | |
| "step": 351, | |
| "ts_encoder_learning_rate": 4.6096321436504e-07 | |
| }, | |
| { | |
| "epoch": 4.046460980036298, | |
| "grad_norm": 0.43118539472845935, | |
| "learning_rate": 4.6096321436504e-07, | |
| "loss": 0.184, | |
| "step": 352, | |
| "ts_encoder_learning_rate": 4.4430374771435245e-07 | |
| }, | |
| { | |
| "epoch": 4.058076225045372, | |
| "grad_norm": 0.41001720611475784, | |
| "learning_rate": 4.4430374771435245e-07, | |
| "loss": 0.1828, | |
| "step": 353, | |
| "ts_encoder_learning_rate": 4.279368849209381e-07 | |
| }, | |
| { | |
| "epoch": 4.069691470054447, | |
| "grad_norm": 0.4308084296921054, | |
| "learning_rate": 4.279368849209381e-07, | |
| "loss": 0.1964, | |
| "step": 354, | |
| "ts_encoder_learning_rate": 4.1186367719780737e-07 | |
| }, | |
| { | |
| "epoch": 4.081306715063521, | |
| "grad_norm": 0.4407009368252455, | |
| "learning_rate": 4.1186367719780737e-07, | |
| "loss": 0.1735, | |
| "step": 355, | |
| "ts_encoder_learning_rate": 3.960851568970586e-07 | |
| }, | |
| { | |
| "epoch": 4.092921960072595, | |
| "grad_norm": 0.41256069394964856, | |
| "learning_rate": 3.960851568970586e-07, | |
| "loss": 0.1911, | |
| "step": 356, | |
| "ts_encoder_learning_rate": 3.8060233744356634e-07 | |
| }, | |
| { | |
| "epoch": 4.1045372050816695, | |
| "grad_norm": 0.4221589175474974, | |
| "learning_rate": 3.8060233744356634e-07, | |
| "loss": 0.1863, | |
| "step": 357, | |
| "ts_encoder_learning_rate": 3.6541621326989183e-07 | |
| }, | |
| { | |
| "epoch": 4.116152450090744, | |
| "grad_norm": 0.42751127372899456, | |
| "learning_rate": 3.6541621326989183e-07, | |
| "loss": 0.2028, | |
| "step": 358, | |
| "ts_encoder_learning_rate": 3.5052775975241203e-07 | |
| }, | |
| { | |
| "epoch": 4.127767695099818, | |
| "grad_norm": 0.40851979892771395, | |
| "learning_rate": 3.5052775975241203e-07, | |
| "loss": 0.1718, | |
| "step": 359, | |
| "ts_encoder_learning_rate": 3.359379331486762e-07 | |
| }, | |
| { | |
| "epoch": 4.139382940108893, | |
| "grad_norm": 0.41178087270431546, | |
| "learning_rate": 3.359379331486762e-07, | |
| "loss": 0.181, | |
| "step": 360, | |
| "ts_encoder_learning_rate": 3.216476705359839e-07 | |
| }, | |
| { | |
| "epoch": 4.150998185117968, | |
| "grad_norm": 0.4104761665945451, | |
| "learning_rate": 3.216476705359839e-07, | |
| "loss": 0.1872, | |
| "step": 361, | |
| "ts_encoder_learning_rate": 3.076578897511978e-07 | |
| }, | |
| { | |
| "epoch": 4.162613430127042, | |
| "grad_norm": 0.41012969922765047, | |
| "learning_rate": 3.076578897511978e-07, | |
| "loss": 0.182, | |
| "step": 362, | |
| "ts_encoder_learning_rate": 2.939694893317979e-07 | |
| }, | |
| { | |
| "epoch": 4.174228675136116, | |
| "grad_norm": 0.41789852835375363, | |
| "learning_rate": 2.939694893317979e-07, | |
| "loss": 0.1937, | |
| "step": 363, | |
| "ts_encoder_learning_rate": 2.8058334845816214e-07 | |
| }, | |
| { | |
| "epoch": 4.1858439201451905, | |
| "grad_norm": 0.42143167915544566, | |
| "learning_rate": 2.8058334845816214e-07, | |
| "loss": 0.1838, | |
| "step": 364, | |
| "ts_encoder_learning_rate": 2.6750032689710604e-07 | |
| }, | |
| { | |
| "epoch": 4.197459165154265, | |
| "grad_norm": 0.39213432837711776, | |
| "learning_rate": 2.6750032689710604e-07, | |
| "loss": 0.1742, | |
| "step": 365, | |
| "ts_encoder_learning_rate": 2.547212649466568e-07 | |
| }, | |
| { | |
| "epoch": 4.209074410163339, | |
| "grad_norm": 0.3958142973041478, | |
| "learning_rate": 2.547212649466568e-07, | |
| "loss": 0.1919, | |
| "step": 366, | |
| "ts_encoder_learning_rate": 2.4224698338208344e-07 | |
| }, | |
| { | |
| "epoch": 4.220689655172414, | |
| "grad_norm": 0.44213215894104213, | |
| "learning_rate": 2.4224698338208344e-07, | |
| "loss": 0.1841, | |
| "step": 367, | |
| "ts_encoder_learning_rate": 2.3007828340318117e-07 | |
| }, | |
| { | |
| "epoch": 4.2323049001814885, | |
| "grad_norm": 0.4245037412302445, | |
| "learning_rate": 2.3007828340318117e-07, | |
| "loss": 0.1891, | |
| "step": 368, | |
| "ts_encoder_learning_rate": 2.1821594658280932e-07 | |
| }, | |
| { | |
| "epoch": 4.243920145190563, | |
| "grad_norm": 0.4132437127742664, | |
| "learning_rate": 2.1821594658280932e-07, | |
| "loss": 0.1803, | |
| "step": 369, | |
| "ts_encoder_learning_rate": 2.0666073481669714e-07 | |
| }, | |
| { | |
| "epoch": 4.255535390199637, | |
| "grad_norm": 0.411342871117412, | |
| "learning_rate": 2.0666073481669714e-07, | |
| "loss": 0.1812, | |
| "step": 370, | |
| "ts_encoder_learning_rate": 1.9541339027450256e-07 | |
| }, | |
| { | |
| "epoch": 4.267150635208711, | |
| "grad_norm": 0.42070511786632736, | |
| "learning_rate": 1.9541339027450256e-07, | |
| "loss": 0.1781, | |
| "step": 371, | |
| "ts_encoder_learning_rate": 1.8447463535214872e-07 | |
| }, | |
| { | |
| "epoch": 4.278765880217786, | |
| "grad_norm": 0.3960054704602751, | |
| "learning_rate": 1.8447463535214872e-07, | |
| "loss": 0.1872, | |
| "step": 372, | |
| "ts_encoder_learning_rate": 1.7384517262542255e-07 | |
| }, | |
| { | |
| "epoch": 4.29038112522686, | |
| "grad_norm": 0.4146312365888249, | |
| "learning_rate": 1.7384517262542255e-07, | |
| "loss": 0.1996, | |
| "step": 373, | |
| "ts_encoder_learning_rate": 1.6352568480485277e-07 | |
| }, | |
| { | |
| "epoch": 4.301996370235934, | |
| "grad_norm": 0.4302254069498354, | |
| "learning_rate": 1.6352568480485277e-07, | |
| "loss": 0.1799, | |
| "step": 374, | |
| "ts_encoder_learning_rate": 1.5351683469185973e-07 | |
| }, | |
| { | |
| "epoch": 4.3136116152450095, | |
| "grad_norm": 0.4048487003041662, | |
| "learning_rate": 1.5351683469185973e-07, | |
| "loss": 0.1853, | |
| "step": 375, | |
| "ts_encoder_learning_rate": 1.4381926513618139e-07 | |
| }, | |
| { | |
| "epoch": 4.325226860254084, | |
| "grad_norm": 0.4340645587831662, | |
| "learning_rate": 1.4381926513618139e-07, | |
| "loss": 0.1745, | |
| "step": 376, | |
| "ts_encoder_learning_rate": 1.3443359899458997e-07 | |
| }, | |
| { | |
| "epoch": 4.336842105263158, | |
| "grad_norm": 0.419931335758943, | |
| "learning_rate": 1.3443359899458997e-07, | |
| "loss": 0.2001, | |
| "step": 377, | |
| "ts_encoder_learning_rate": 1.253604390908819e-07 | |
| }, | |
| { | |
| "epoch": 4.348457350272232, | |
| "grad_norm": 0.41065254305787063, | |
| "learning_rate": 1.253604390908819e-07, | |
| "loss": 0.1856, | |
| "step": 378, | |
| "ts_encoder_learning_rate": 1.1660036817716492e-07 | |
| }, | |
| { | |
| "epoch": 4.360072595281307, | |
| "grad_norm": 0.4386933347694567, | |
| "learning_rate": 1.1660036817716492e-07, | |
| "loss": 0.1885, | |
| "step": 379, | |
| "ts_encoder_learning_rate": 1.0815394889642339e-07 | |
| }, | |
| { | |
| "epoch": 4.371687840290381, | |
| "grad_norm": 0.4206901632288436, | |
| "learning_rate": 1.0815394889642339e-07, | |
| "loss": 0.1855, | |
| "step": 380, | |
| "ts_encoder_learning_rate": 1.0002172374638519e-07 | |
| }, | |
| { | |
| "epoch": 4.383303085299455, | |
| "grad_norm": 0.45345577005415333, | |
| "learning_rate": 1.0002172374638519e-07, | |
| "loss": 0.201, | |
| "step": 381, | |
| "ts_encoder_learning_rate": 9.22042150446728e-08 | |
| }, | |
| { | |
| "epoch": 4.3949183303085295, | |
| "grad_norm": 0.42825131486091655, | |
| "learning_rate": 9.22042150446728e-08, | |
| "loss": 0.1977, | |
| "step": 382, | |
| "ts_encoder_learning_rate": 8.470192489526519e-08 | |
| }, | |
| { | |
| "epoch": 4.406533575317605, | |
| "grad_norm": 0.4180892476123211, | |
| "learning_rate": 8.470192489526519e-08, | |
| "loss": 0.1819, | |
| "step": 383, | |
| "ts_encoder_learning_rate": 7.7515335156238e-08 | |
| }, | |
| { | |
| "epoch": 4.418148820326679, | |
| "grad_norm": 0.3988898186373836, | |
| "learning_rate": 7.7515335156238e-08, | |
| "loss": 0.1845, | |
| "step": 384, | |
| "ts_encoder_learning_rate": 7.064490740882057e-08 | |
| }, | |
| { | |
| "epoch": 4.429764065335753, | |
| "grad_norm": 0.4224273125164809, | |
| "learning_rate": 7.064490740882057e-08, | |
| "loss": 0.1831, | |
| "step": 385, | |
| "ts_encoder_learning_rate": 6.409108292774912e-08 | |
| }, | |
| { | |
| "epoch": 4.441379310344828, | |
| "grad_norm": 0.40574161987282553, | |
| "learning_rate": 6.409108292774912e-08, | |
| "loss": 0.1884, | |
| "step": 386, | |
| "ts_encoder_learning_rate": 5.785428265292381e-08 | |
| }, | |
| { | |
| "epoch": 4.452994555353902, | |
| "grad_norm": 0.4205040886436754, | |
| "learning_rate": 5.785428265292381e-08, | |
| "loss": 0.1854, | |
| "step": 387, | |
| "ts_encoder_learning_rate": 5.1934907162370374e-08 | |
| }, | |
| { | |
| "epoch": 4.464609800362976, | |
| "grad_norm": 0.41648710747921297, | |
| "learning_rate": 5.1934907162370374e-08, | |
| "loss": 0.1756, | |
| "step": 388, | |
| "ts_encoder_learning_rate": 4.63333366465174e-08 | |
| }, | |
| { | |
| "epoch": 4.4762250453720505, | |
| "grad_norm": 0.39540267589707684, | |
| "learning_rate": 4.63333366465174e-08, | |
| "loss": 0.1831, | |
| "step": 389, | |
| "ts_encoder_learning_rate": 4.104993088376974e-08 | |
| }, | |
| { | |
| "epoch": 4.487840290381126, | |
| "grad_norm": 0.3927817907648021, | |
| "learning_rate": 4.104993088376974e-08, | |
| "loss": 0.1742, | |
| "step": 390, | |
| "ts_encoder_learning_rate": 3.608502921740753e-08 | |
| }, | |
| { | |
| "epoch": 4.4994555353902, | |
| "grad_norm": 0.43059843726946884, | |
| "learning_rate": 3.608502921740753e-08, | |
| "loss": 0.1922, | |
| "step": 391, | |
| "ts_encoder_learning_rate": 3.143895053378698e-08 | |
| }, | |
| { | |
| "epoch": 4.511070780399274, | |
| "grad_norm": 0.41821997175820497, | |
| "learning_rate": 3.143895053378698e-08, | |
| "loss": 0.188, | |
| "step": 392, | |
| "ts_encoder_learning_rate": 2.7111993241860646e-08 | |
| }, | |
| { | |
| "epoch": 4.5226860254083485, | |
| "grad_norm": 0.44898297045915464, | |
| "learning_rate": 2.7111993241860646e-08, | |
| "loss": 0.195, | |
| "step": 393, | |
| "ts_encoder_learning_rate": 2.3104435254008852e-08 | |
| }, | |
| { | |
| "epoch": 4.534301270417423, | |
| "grad_norm": 0.41081766017860594, | |
| "learning_rate": 2.3104435254008852e-08, | |
| "loss": 0.1764, | |
| "step": 394, | |
| "ts_encoder_learning_rate": 1.9416533968193428e-08 | |
| }, | |
| { | |
| "epoch": 4.545916515426497, | |
| "grad_norm": 0.4267367649318197, | |
| "learning_rate": 1.9416533968193428e-08, | |
| "loss": 0.178, | |
| "step": 395, | |
| "ts_encoder_learning_rate": 1.6048526251421502e-08 | |
| }, | |
| { | |
| "epoch": 4.557531760435571, | |
| "grad_norm": 0.40492762389862497, | |
| "learning_rate": 1.6048526251421502e-08, | |
| "loss": 0.1833, | |
| "step": 396, | |
| "ts_encoder_learning_rate": 1.3000628424535978e-08 | |
| }, | |
| { | |
| "epoch": 4.569147005444647, | |
| "grad_norm": 0.41530646634421503, | |
| "learning_rate": 1.3000628424535978e-08, | |
| "loss": 0.1764, | |
| "step": 397, | |
| "ts_encoder_learning_rate": 1.0273036248318325e-08 | |
| }, | |
| { | |
| "epoch": 4.580762250453721, | |
| "grad_norm": 0.41054541493317387, | |
| "learning_rate": 1.0273036248318325e-08, | |
| "loss": 0.1884, | |
| "step": 398, | |
| "ts_encoder_learning_rate": 7.865924910916977e-09 | |
| }, | |
| { | |
| "epoch": 4.592377495462795, | |
| "grad_norm": 0.3927284291620028, | |
| "learning_rate": 7.865924910916977e-09, | |
| "loss": 0.174, | |
| "step": 399, | |
| "ts_encoder_learning_rate": 5.779449016595773e-09 | |
| }, | |
| { | |
| "epoch": 4.6039927404718695, | |
| "grad_norm": 0.40810543942098576, | |
| "learning_rate": 5.779449016595773e-09, | |
| "loss": 0.1944, | |
| "step": 400, | |
| "ts_encoder_learning_rate": 4.0137425758018935e-09 | |
| }, | |
| { | |
| "epoch": 4.6039927404718695, | |
| "step": 400, | |
| "total_flos": 667646607294464.0, | |
| "train_loss": 0.43487690573791044, | |
| "train_runtime": 29492.4665, | |
| "train_samples_per_second": 6.944, | |
| "train_steps_per_second": 0.014, | |
| "ts_encoder_learning_rate": 4.0137425758018935e-09 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 667646607294464.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |