| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.44236188478930466, | |
| "eval_steps": 500, | |
| "global_step": 40500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019996132455658605, | |
| "loss": 42.0911, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001999226491131721, | |
| "loss": 38.7852, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019988397366975814, | |
| "loss": 31.5332, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019984529822634418, | |
| "loss": 25.7456, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001998066227829302, | |
| "loss": 20.5779, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019976794733951624, | |
| "loss": 17.4712, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019972927189610228, | |
| "loss": 14.2287, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019969059645268835, | |
| "loss": 12.748, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001996519210092744, | |
| "loss": 11.9603, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019961324556586044, | |
| "loss": 12.6114, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019957457012244646, | |
| "loss": 11.2424, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001995358946790325, | |
| "loss": 10.4185, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019949721923561854, | |
| "loss": 10.6495, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019945854379220458, | |
| "loss": 8.6583, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019941986834879063, | |
| "loss": 7.9045, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019938119290537667, | |
| "loss": 7.5867, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019934251746196271, | |
| "loss": 6.344, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019930384201854876, | |
| "loss": 7.0004, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001992651665751348, | |
| "loss": 6.0177, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019922649113172082, | |
| "loss": 6.4546, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019918781568830686, | |
| "loss": 4.9639, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001991491402448929, | |
| "loss": 4.5082, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019911046480147897, | |
| "loss": 4.9607, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019907178935806501, | |
| "loss": 4.6557, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019903311391465106, | |
| "loss": 3.7942, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001989944384712371, | |
| "loss": 3.3466, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019895576302782312, | |
| "loss": 4.2555, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019891708758440916, | |
| "loss": 3.7983, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001988784121409952, | |
| "loss": 3.6397, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019883973669758125, | |
| "loss": 3.2206, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001988010612541673, | |
| "loss": 2.9513, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019876238581075333, | |
| "loss": 3.443, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019872371036733938, | |
| "loss": 2.6171, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019868503492392542, | |
| "loss": 2.6626, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019864635948051143, | |
| "loss": 3.2079, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019860768403709748, | |
| "loss": 2.679, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019856900859368352, | |
| "loss": 3.2509, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001985303331502696, | |
| "loss": 2.3529, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019849165770685563, | |
| "loss": 2.3721, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019845298226344168, | |
| "loss": 2.7719, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019841430682002772, | |
| "loss": 2.3059, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019837563137661374, | |
| "loss": 2.9214, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019833695593319978, | |
| "loss": 2.4541, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019829828048978582, | |
| "loss": 2.3267, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019825960504637186, | |
| "loss": 2.1945, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001982209296029579, | |
| "loss": 2.3966, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019818225415954395, | |
| "loss": 2.5349, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019814357871613, | |
| "loss": 2.0588, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019810490327271604, | |
| "loss": 3.1209, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019806622782930205, | |
| "loss": 2.3281, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001980275523858881, | |
| "loss": 2.0749, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019798887694247414, | |
| "loss": 2.1665, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001979502014990602, | |
| "loss": 2.5256, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019791152605564625, | |
| "loss": 2.3435, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001978728506122323, | |
| "loss": 2.2333, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019783417516881834, | |
| "loss": 1.9695, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019779549972540435, | |
| "loss": 2.3046, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001977568242819904, | |
| "loss": 2.1951, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019771814883857644, | |
| "loss": 2.2141, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019767947339516248, | |
| "loss": 2.3285, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019764079795174853, | |
| "loss": 1.9263, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019760212250833457, | |
| "loss": 2.4391, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001975634470649206, | |
| "loss": 2.2386, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019752477162150665, | |
| "loss": 1.9979, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019748609617809267, | |
| "loss": 2.2926, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001974474207346787, | |
| "loss": 2.0263, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019740874529126476, | |
| "loss": 2.3533, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019737006984785083, | |
| "loss": 2.0248, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019733139440443687, | |
| "loss": 1.5322, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001972927189610229, | |
| "loss": 1.2563, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019725404351760896, | |
| "loss": 1.2361, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019721536807419497, | |
| "loss": 1.3821, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019717669263078101, | |
| "loss": 1.0988, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019713801718736706, | |
| "loss": 1.2244, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001970993417439531, | |
| "loss": 0.9095, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019706066630053914, | |
| "loss": 1.2458, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001970219908571252, | |
| "loss": 1.1168, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019698331541371123, | |
| "loss": 0.7974, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019694463997029727, | |
| "loss": 1.0594, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001969059645268833, | |
| "loss": 1.2522, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019686728908346933, | |
| "loss": 0.8916, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001968286136400554, | |
| "loss": 0.9284, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019678993819664144, | |
| "loss": 0.7177, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001967512627532275, | |
| "loss": 1.0662, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019671258730981353, | |
| "loss": 1.0509, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019667391186639957, | |
| "loss": 1.0486, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001966352364229856, | |
| "loss": 0.9541, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019659656097957163, | |
| "loss": 1.1056, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019655788553615768, | |
| "loss": 1.0613, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019651921009274372, | |
| "loss": 0.9647, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019648053464932976, | |
| "loss": 0.8281, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001964418592059158, | |
| "loss": 0.8205, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019640318376250185, | |
| "loss": 0.8107, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001963645083190879, | |
| "loss": 0.9087, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019632583287567393, | |
| "loss": 0.7476, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019628715743225995, | |
| "loss": 0.8191, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019624848198884602, | |
| "loss": 1.0138, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019620980654543206, | |
| "loss": 1.0121, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001961711311020181, | |
| "loss": 0.7376, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019613245565860415, | |
| "loss": 0.8335, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001960937802151902, | |
| "loss": 0.9411, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001960551047717762, | |
| "loss": 0.8631, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019601642932836225, | |
| "loss": 0.936, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001959777538849483, | |
| "loss": 0.8524, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019593907844153434, | |
| "loss": 0.7093, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019590040299812038, | |
| "loss": 0.8302, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019586172755470642, | |
| "loss": 0.8756, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019582305211129247, | |
| "loss": 0.7728, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001957843766678785, | |
| "loss": 0.809, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019574570122446455, | |
| "loss": 0.7282, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019570702578105057, | |
| "loss": 0.8608, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019566835033763664, | |
| "loss": 1.0078, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019562967489422268, | |
| "loss": 0.9773, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019559099945080872, | |
| "loss": 0.7969, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019555232400739477, | |
| "loss": 0.9988, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001955136485639808, | |
| "loss": 0.7552, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019547497312056685, | |
| "loss": 0.8775, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019543629767715287, | |
| "loss": 0.8895, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001953976222337389, | |
| "loss": 0.8106, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019535894679032496, | |
| "loss": 0.6546, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.000195320271346911, | |
| "loss": 0.9039, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019528159590349704, | |
| "loss": 0.652, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019524292046008308, | |
| "loss": 0.6561, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019520424501666913, | |
| "loss": 1.046, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019516556957325517, | |
| "loss": 0.8783, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019512689412984119, | |
| "loss": 0.7351, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019508821868642726, | |
| "loss": 0.733, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001950495432430133, | |
| "loss": 0.7675, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019501086779959934, | |
| "loss": 0.9451, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019497219235618539, | |
| "loss": 0.9686, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019493351691277143, | |
| "loss": 0.6083, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019489484146935747, | |
| "loss": 0.8619, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001948561660259435, | |
| "loss": 0.6557, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019481749058252953, | |
| "loss": 0.8819, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019477881513911557, | |
| "loss": 0.8356, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019474013969570162, | |
| "loss": 0.8211, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019470146425228766, | |
| "loss": 0.8393, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001946627888088737, | |
| "loss": 1.0301, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019462411336545975, | |
| "loss": 0.7435, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001945854379220458, | |
| "loss": 0.71, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001945467624786318, | |
| "loss": 0.7786, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019450808703521787, | |
| "loss": 1.1273, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019446941159180392, | |
| "loss": 0.923, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019443073614838996, | |
| "loss": 0.8656, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.000194392060704976, | |
| "loss": 0.8191, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019435338526156205, | |
| "loss": 0.8924, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001943147098181481, | |
| "loss": 0.9004, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001942760343747341, | |
| "loss": 0.6538, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019423735893132015, | |
| "loss": 0.8669, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001941986834879062, | |
| "loss": 0.9103, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019416000804449223, | |
| "loss": 0.8853, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019412133260107828, | |
| "loss": 0.7989, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019408265715766432, | |
| "loss": 0.6957, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019404398171425036, | |
| "loss": 0.8685, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001940053062708364, | |
| "loss": 0.6701, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019396663082742242, | |
| "loss": 0.7488, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001939279553840085, | |
| "loss": 0.9214, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019388927994059454, | |
| "loss": 0.7879, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019385060449718058, | |
| "loss": 0.8522, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019381192905376662, | |
| "loss": 0.9119, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019377325361035267, | |
| "loss": 0.6229, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001937345781669387, | |
| "loss": 0.8, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019369590272352472, | |
| "loss": 0.6705, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019365722728011077, | |
| "loss": 0.8694, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001936185518366968, | |
| "loss": 0.7932, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019357987639328285, | |
| "loss": 0.7311, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001935412009498689, | |
| "loss": 0.844, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019350252550645494, | |
| "loss": 0.8428, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019346385006304098, | |
| "loss": 0.8791, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019342517461962703, | |
| "loss": 0.9576, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019338649917621304, | |
| "loss": 0.821, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001933478237327991, | |
| "loss": 1.0343, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019330914828938515, | |
| "loss": 0.862, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001932704728459712, | |
| "loss": 0.6914, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019323179740255724, | |
| "loss": 1.0047, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019319312195914328, | |
| "loss": 0.7347, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019315444651572933, | |
| "loss": 0.7331, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019311577107231534, | |
| "loss": 0.9639, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019307709562890139, | |
| "loss": 0.7824, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019303842018548743, | |
| "loss": 0.8321, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019299974474207347, | |
| "loss": 1.053, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019296106929865951, | |
| "loss": 0.677, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019292239385524556, | |
| "loss": 0.8771, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001928837184118316, | |
| "loss": 0.7547, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019284504296841764, | |
| "loss": 0.7911, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019280636752500366, | |
| "loss": 0.8772, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019276769208158973, | |
| "loss": 1.0254, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019272901663817577, | |
| "loss": 0.9881, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019269034119476182, | |
| "loss": 0.9809, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019265166575134786, | |
| "loss": 0.6407, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001926129903079339, | |
| "loss": 0.8552, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019257431486451994, | |
| "loss": 0.5715, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019253563942110596, | |
| "loss": 0.7908, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000192496963977692, | |
| "loss": 0.8544, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019245828853427805, | |
| "loss": 0.7795, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001924196130908641, | |
| "loss": 0.7534, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019238093764745013, | |
| "loss": 0.9141, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019234226220403618, | |
| "loss": 0.6377, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019230358676062222, | |
| "loss": 0.8392, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019226491131720826, | |
| "loss": 0.8541, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001922262358737943, | |
| "loss": 0.7969, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019218756043038035, | |
| "loss": 0.6434, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001921488849869664, | |
| "loss": 0.9645, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019211020954355243, | |
| "loss": 0.8545, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019207153410013848, | |
| "loss": 0.669, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019203285865672452, | |
| "loss": 0.7878, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019199418321331056, | |
| "loss": 0.6872, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019195550776989658, | |
| "loss": 0.7578, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019191683232648262, | |
| "loss": 0.6626, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019187815688306866, | |
| "loss": 0.7433, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001918394814396547, | |
| "loss": 0.8421, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019180080599624075, | |
| "loss": 0.8302, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001917621305528268, | |
| "loss": 0.7689, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019172345510941284, | |
| "loss": 0.7695, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019168477966599888, | |
| "loss": 0.8601, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019164610422258492, | |
| "loss": 1.0576, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019160742877917097, | |
| "loss": 0.6168, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000191568753335757, | |
| "loss": 0.8053, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019153007789234305, | |
| "loss": 0.6034, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001914914024489291, | |
| "loss": 0.8146, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019145272700551514, | |
| "loss": 0.813, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019141405156210118, | |
| "loss": 0.7254, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019137537611868722, | |
| "loss": 0.8516, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019133670067527324, | |
| "loss": 0.8619, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019129802523185928, | |
| "loss": 0.8323, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019125934978844533, | |
| "loss": 0.802, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019122067434503137, | |
| "loss": 0.9431, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001911819989016174, | |
| "loss": 0.833, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019114332345820345, | |
| "loss": 0.8785, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001911046480147895, | |
| "loss": 0.894, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019106597257137554, | |
| "loss": 0.9139, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019102729712796158, | |
| "loss": 0.7368, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019098862168454763, | |
| "loss": 0.8103, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019094994624113367, | |
| "loss": 0.8547, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001909112707977197, | |
| "loss": 0.7595, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019087259535430576, | |
| "loss": 0.7432, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001908339199108918, | |
| "loss": 0.8002, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019079524446747784, | |
| "loss": 0.7238, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019075656902406386, | |
| "loss": 0.7368, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001907178935806499, | |
| "loss": 0.7619, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019067921813723594, | |
| "loss": 0.8555, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.000190640542693822, | |
| "loss": 0.7968, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019060186725040803, | |
| "loss": 0.6167, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019056319180699407, | |
| "loss": 0.7268, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019052451636358012, | |
| "loss": 0.7624, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019048584092016616, | |
| "loss": 0.7498, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001904471654767522, | |
| "loss": 0.7913, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019040849003333825, | |
| "loss": 0.9818, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001903698145899243, | |
| "loss": 0.9653, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019033113914651033, | |
| "loss": 0.8576, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019029246370309637, | |
| "loss": 0.7793, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019025378825968242, | |
| "loss": 0.9986, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019021511281626846, | |
| "loss": 0.6204, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019017643737285448, | |
| "loss": 0.7432, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019013776192944052, | |
| "loss": 0.7728, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019009908648602656, | |
| "loss": 0.924, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001900604110426126, | |
| "loss": 0.8306, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019002173559919865, | |
| "loss": 0.9331, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001899830601557847, | |
| "loss": 0.9156, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00018994438471237073, | |
| "loss": 0.7475, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00018990570926895678, | |
| "loss": 0.8014, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00018986703382554282, | |
| "loss": 0.7636, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00018982835838212886, | |
| "loss": 0.8878, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001897896829387149, | |
| "loss": 0.7146, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00018975100749530095, | |
| "loss": 0.7577, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000189712332051887, | |
| "loss": 0.9388, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00018967365660847304, | |
| "loss": 0.7735, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00018963498116505908, | |
| "loss": 0.6801, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001895963057216451, | |
| "loss": 0.7908, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00018955763027823114, | |
| "loss": 0.7054, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00018951895483481718, | |
| "loss": 0.8082, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018948027939140322, | |
| "loss": 0.7959, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018944160394798927, | |
| "loss": 0.8319, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001894029285045753, | |
| "loss": 0.7559, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018936425306116135, | |
| "loss": 0.6439, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001893255776177474, | |
| "loss": 0.7906, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018928690217433344, | |
| "loss": 0.9517, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018924822673091948, | |
| "loss": 0.8082, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018920955128750552, | |
| "loss": 0.8872, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018917087584409157, | |
| "loss": 0.6533, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001891322004006776, | |
| "loss": 0.8846, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018909352495726365, | |
| "loss": 0.7644, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001890548495138497, | |
| "loss": 0.9197, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001890161740704357, | |
| "loss": 0.8356, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018897749862702176, | |
| "loss": 0.7626, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001889388231836078, | |
| "loss": 0.7978, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018890014774019384, | |
| "loss": 0.9382, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018886147229677988, | |
| "loss": 0.8213, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018882279685336593, | |
| "loss": 0.8098, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018878412140995197, | |
| "loss": 0.6624, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018874544596653801, | |
| "loss": 0.6901, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018870677052312406, | |
| "loss": 0.8449, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001886680950797101, | |
| "loss": 0.992, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018862941963629614, | |
| "loss": 0.8163, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018859074419288219, | |
| "loss": 0.9079, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018855206874946823, | |
| "loss": 0.6967, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00018851339330605427, | |
| "loss": 0.7634, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018847471786264032, | |
| "loss": 0.881, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018843604241922633, | |
| "loss": 0.9108, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018839736697581237, | |
| "loss": 0.7132, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018835869153239842, | |
| "loss": 0.5067, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018832001608898446, | |
| "loss": 1.1357, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001882813406455705, | |
| "loss": 0.7256, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018824266520215655, | |
| "loss": 0.6846, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001882039897587426, | |
| "loss": 0.8358, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018816531431532863, | |
| "loss": 0.7776, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018812663887191468, | |
| "loss": 0.5573, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018808796342850072, | |
| "loss": 0.6548, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018804928798508676, | |
| "loss": 0.7813, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001880106125416728, | |
| "loss": 0.853, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018797193709825885, | |
| "loss": 0.757, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001879332616548449, | |
| "loss": 0.7511, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018789458621143093, | |
| "loss": 0.8809, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018785591076801698, | |
| "loss": 0.6439, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000187817235324603, | |
| "loss": 0.6401, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018777855988118904, | |
| "loss": 0.9463, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018773988443777508, | |
| "loss": 0.7206, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018770120899436112, | |
| "loss": 0.738, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018766253355094716, | |
| "loss": 0.8078, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001876238581075332, | |
| "loss": 0.8814, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018758518266411925, | |
| "loss": 0.7841, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001875465072207053, | |
| "loss": 0.9534, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018750783177729134, | |
| "loss": 0.7588, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018746915633387738, | |
| "loss": 0.7467, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018743048089046342, | |
| "loss": 0.7402, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018739180544704947, | |
| "loss": 0.7391, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001873531300036355, | |
| "loss": 0.93, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018731445456022155, | |
| "loss": 0.673, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001872757791168076, | |
| "loss": 0.8719, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001872371036733936, | |
| "loss": 0.7977, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018719842822997965, | |
| "loss": 0.8446, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001871597527865657, | |
| "loss": 0.5509, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018712107734315174, | |
| "loss": 0.7187, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018708240189973778, | |
| "loss": 0.7886, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018704372645632383, | |
| "loss": 0.787, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018700505101290987, | |
| "loss": 0.8182, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001869663755694959, | |
| "loss": 0.7996, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018692770012608195, | |
| "loss": 1.0537, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.000186889024682668, | |
| "loss": 0.7795, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018685034923925404, | |
| "loss": 0.6382, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018681167379584008, | |
| "loss": 0.8503, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018677299835242613, | |
| "loss": 0.7172, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018673432290901217, | |
| "loss": 0.8269, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001866956474655982, | |
| "loss": 0.8608, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018665697202218423, | |
| "loss": 0.5488, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018661829657877027, | |
| "loss": 0.6198, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018657962113535631, | |
| "loss": 0.6294, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018654094569194236, | |
| "loss": 0.6832, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001865022702485284, | |
| "loss": 0.7857, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018646359480511444, | |
| "loss": 0.873, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018642491936170051, | |
| "loss": 0.8379, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018638624391828653, | |
| "loss": 0.716, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018634756847487257, | |
| "loss": 0.7123, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018630889303145862, | |
| "loss": 0.7582, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018627021758804466, | |
| "loss": 0.8126, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001862315421446307, | |
| "loss": 0.8564, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018619286670121674, | |
| "loss": 0.5869, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001861541912578028, | |
| "loss": 0.7508, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018611551581438883, | |
| "loss": 0.7061, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018607684037097485, | |
| "loss": 0.7345, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001860381649275609, | |
| "loss": 0.5775, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018599948948414693, | |
| "loss": 0.7817, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018596081404073298, | |
| "loss": 0.7201, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018592213859731902, | |
| "loss": 0.8352, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018588346315390506, | |
| "loss": 0.7986, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018584478771049113, | |
| "loss": 0.5892, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018580611226707715, | |
| "loss": 0.6573, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001857674368236632, | |
| "loss": 0.7291, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018572876138024923, | |
| "loss": 0.8477, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018569008593683528, | |
| "loss": 0.7634, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018565141049342132, | |
| "loss": 0.5596, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018561273505000736, | |
| "loss": 0.7536, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001855740596065934, | |
| "loss": 0.8015, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00018553538416317945, | |
| "loss": 0.9044, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018549670871976547, | |
| "loss": 0.7212, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001854580332763515, | |
| "loss": 0.6835, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018541935783293755, | |
| "loss": 0.6431, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001853806823895236, | |
| "loss": 0.6776, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018534200694610964, | |
| "loss": 0.8134, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018530333150269568, | |
| "loss": 0.7613, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018526465605928175, | |
| "loss": 0.6909, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018522598061586777, | |
| "loss": 0.5647, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001851873051724538, | |
| "loss": 0.845, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018514862972903985, | |
| "loss": 0.6676, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001851099542856259, | |
| "loss": 0.608, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018507127884221194, | |
| "loss": 0.6545, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018503260339879798, | |
| "loss": 0.8084, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018499392795538402, | |
| "loss": 0.9323, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018495525251197007, | |
| "loss": 0.7761, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018491657706855608, | |
| "loss": 0.7525, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018487790162514213, | |
| "loss": 0.7387, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018483922618172817, | |
| "loss": 0.7412, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001848005507383142, | |
| "loss": 0.6455, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018476187529490026, | |
| "loss": 0.6401, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018472319985148633, | |
| "loss": 0.7524, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018468452440807237, | |
| "loss": 0.8381, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018464584896465838, | |
| "loss": 0.7317, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018460717352124443, | |
| "loss": 0.7321, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018456849807783047, | |
| "loss": 0.8627, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001845298226344165, | |
| "loss": 0.8806, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018449114719100256, | |
| "loss": 0.6949, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001844524717475886, | |
| "loss": 0.8466, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018441379630417464, | |
| "loss": 0.468, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018437512086076069, | |
| "loss": 0.8107, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001843364454173467, | |
| "loss": 0.8214, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018429776997393274, | |
| "loss": 0.6371, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001842590945305188, | |
| "loss": 0.8139, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018422041908710483, | |
| "loss": 0.8821, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018418174364369087, | |
| "loss": 0.749, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018414306820027694, | |
| "loss": 0.7666, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000184104392756863, | |
| "loss": 0.804, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000184065717313449, | |
| "loss": 0.8258, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018402704187003505, | |
| "loss": 0.6565, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001839883664266211, | |
| "loss": 0.6481, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018394969098320713, | |
| "loss": 0.5938, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018391101553979317, | |
| "loss": 0.6611, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018387234009637922, | |
| "loss": 0.9062, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018383366465296526, | |
| "loss": 0.6141, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001837949892095513, | |
| "loss": 0.6457, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018375631376613735, | |
| "loss": 0.5349, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018371763832272336, | |
| "loss": 0.6687, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001836789628793094, | |
| "loss": 1.0448, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018364028743589545, | |
| "loss": 0.8059, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001836016119924815, | |
| "loss": 0.6748, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018356293654906756, | |
| "loss": 0.5979, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001835242611056536, | |
| "loss": 0.8469, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018348558566223962, | |
| "loss": 0.7463, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018344691021882566, | |
| "loss": 0.7493, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001834082347754117, | |
| "loss": 0.8654, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018336955933199775, | |
| "loss": 0.7216, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001833308838885838, | |
| "loss": 0.7847, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018329220844516984, | |
| "loss": 0.5339, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018325353300175588, | |
| "loss": 0.7045, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018321485755834192, | |
| "loss": 0.6995, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018317618211492797, | |
| "loss": 0.736, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018313750667151398, | |
| "loss": 0.7212, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018309883122810002, | |
| "loss": 0.6062, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018306015578468607, | |
| "loss": 0.889, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001830214803412721, | |
| "loss": 0.6812, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018298280489785818, | |
| "loss": 0.7713, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018294412945444422, | |
| "loss": 0.7462, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018290545401103027, | |
| "loss": 0.5084, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018286677856761628, | |
| "loss": 0.6875, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018282810312420233, | |
| "loss": 0.8552, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018278942768078837, | |
| "loss": 0.7549, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001827507522373744, | |
| "loss": 0.6307, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018271207679396045, | |
| "loss": 1.0293, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001826734013505465, | |
| "loss": 0.7603, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018263472590713254, | |
| "loss": 0.5218, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018259605046371858, | |
| "loss": 0.5962, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001825573750203046, | |
| "loss": 0.7793, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018251869957689064, | |
| "loss": 0.6511, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018248002413347669, | |
| "loss": 0.6589, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018244134869006273, | |
| "loss": 0.826, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001824026732466488, | |
| "loss": 0.7561, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018236399780323484, | |
| "loss": 0.7605, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018232532235982088, | |
| "loss": 0.7887, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001822866469164069, | |
| "loss": 0.6065, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018224797147299294, | |
| "loss": 0.7631, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018220929602957899, | |
| "loss": 0.6708, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018217062058616503, | |
| "loss": 0.8115, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018213194514275107, | |
| "loss": 0.5469, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018209326969933712, | |
| "loss": 0.751, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018205459425592316, | |
| "loss": 0.6424, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001820159188125092, | |
| "loss": 0.7017, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018197724336909522, | |
| "loss": 0.615, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018193856792568126, | |
| "loss": 0.9175, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001818998924822673, | |
| "loss": 0.9438, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018186121703885335, | |
| "loss": 0.8183, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018182254159543942, | |
| "loss": 0.9829, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018178386615202546, | |
| "loss": 0.7703, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001817451907086115, | |
| "loss": 0.6007, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018170651526519752, | |
| "loss": 0.6095, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018166783982178356, | |
| "loss": 0.6811, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001816291643783696, | |
| "loss": 0.6791, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018159048893495565, | |
| "loss": 0.8032, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001815518134915417, | |
| "loss": 0.6968, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018151313804812773, | |
| "loss": 0.7912, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018147446260471378, | |
| "loss": 0.6557, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018143578716129982, | |
| "loss": 0.7041, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018139711171788584, | |
| "loss": 0.7028, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018135843627447188, | |
| "loss": 0.5454, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018131976083105792, | |
| "loss": 0.8485, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018128108538764396, | |
| "loss": 0.6944, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018124240994423003, | |
| "loss": 0.6429, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018120373450081608, | |
| "loss": 0.9911, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018116505905740212, | |
| "loss": 0.7624, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018112638361398814, | |
| "loss": 0.8377, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018108770817057418, | |
| "loss": 0.7377, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018104903272716022, | |
| "loss": 0.8191, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018101035728374627, | |
| "loss": 0.6292, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001809716818403323, | |
| "loss": 0.7387, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018093300639691835, | |
| "loss": 0.7166, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001808943309535044, | |
| "loss": 0.787, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018085565551009044, | |
| "loss": 0.6451, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018081698006667645, | |
| "loss": 0.8142, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001807783046232625, | |
| "loss": 0.9096, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018073962917984854, | |
| "loss": 0.6828, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018070095373643458, | |
| "loss": 0.5445, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018066227829302065, | |
| "loss": 0.8181, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018047301300240095, | |
| "loss": 1.124, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018043403698044964, | |
| "loss": 1.1854, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018039506095849834, | |
| "loss": 1.0665, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018035608493654706, | |
| "loss": 0.8882, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018031710891459575, | |
| "loss": 1.0253, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018027813289264445, | |
| "loss": 1.0734, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018023915687069315, | |
| "loss": 1.0606, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018020018084874187, | |
| "loss": 0.9589, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018016120482679056, | |
| "loss": 1.0562, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018012222880483926, | |
| "loss": 0.8676, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018008325278288798, | |
| "loss": 1.0997, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018004427676093668, | |
| "loss": 0.9763, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018000530073898537, | |
| "loss": 0.8347, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001799663247170341, | |
| "loss": 0.9396, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001799273486950828, | |
| "loss": 0.9281, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001798883726731315, | |
| "loss": 0.9826, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001798493966511802, | |
| "loss": 0.8583, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001798104206292289, | |
| "loss": 0.8509, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00017977144460727763, | |
| "loss": 0.8912, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001797324685853263, | |
| "loss": 0.8786, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00017969349256337502, | |
| "loss": 0.8482, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00017965451654142374, | |
| "loss": 0.9426, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00017961554051947244, | |
| "loss": 0.9505, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00017957656449752113, | |
| "loss": 0.8555, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00017953758847556983, | |
| "loss": 1.1169, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017949861245361855, | |
| "loss": 0.8806, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017945963643166725, | |
| "loss": 0.9295, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017942066040971594, | |
| "loss": 0.8931, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017938168438776466, | |
| "loss": 0.9139, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017934270836581336, | |
| "loss": 0.9318, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017930373234386205, | |
| "loss": 1.0256, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017926475632191078, | |
| "loss": 0.9042, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017922578029995947, | |
| "loss": 0.8945, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017918680427800817, | |
| "loss": 0.8622, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001791478282560569, | |
| "loss": 0.8348, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017910885223410559, | |
| "loss": 1.0544, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001790698762121543, | |
| "loss": 0.7097, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017903090019020298, | |
| "loss": 0.8808, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001789919241682517, | |
| "loss": 0.896, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017895294814630042, | |
| "loss": 1.0487, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017891397212434912, | |
| "loss": 0.9996, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001788749961023978, | |
| "loss": 0.9624, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001788360200804465, | |
| "loss": 0.9344, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017879704405849523, | |
| "loss": 0.9103, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017875806803654393, | |
| "loss": 0.7311, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017871909201459262, | |
| "loss": 0.9748, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017868011599264134, | |
| "loss": 0.7231, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017864113997069004, | |
| "loss": 0.9844, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017860216394873874, | |
| "loss": 0.9322, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017856318792678746, | |
| "loss": 0.9103, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017852421190483615, | |
| "loss": 1.0132, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017848523588288485, | |
| "loss": 0.8617, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017844625986093354, | |
| "loss": 1.0296, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017840728383898227, | |
| "loss": 1.0048, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.000178368307817031, | |
| "loss": 1.1557, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017832933179507966, | |
| "loss": 0.7993, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017829035577312838, | |
| "loss": 1.002, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001782513797511771, | |
| "loss": 1.0392, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001782124037292258, | |
| "loss": 0.8991, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001781734277072745, | |
| "loss": 0.8488, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001781344516853232, | |
| "loss": 0.8418, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001780954756633719, | |
| "loss": 1.02, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001780564996414206, | |
| "loss": 1.0404, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001780175236194693, | |
| "loss": 0.9571, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017797854759751803, | |
| "loss": 0.7724, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017793957157556672, | |
| "loss": 1.0129, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017790059555361542, | |
| "loss": 0.8916, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017786161953166414, | |
| "loss": 0.9504, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017782264350971283, | |
| "loss": 0.8393, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017778366748776153, | |
| "loss": 0.7675, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017774469146581023, | |
| "loss": 0.8273, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017770571544385895, | |
| "loss": 0.8967, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017766673942190764, | |
| "loss": 1.084, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017762776339995634, | |
| "loss": 0.7741, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017758878737800506, | |
| "loss": 1.1056, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017754981135605378, | |
| "loss": 1.183, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017751083533410245, | |
| "loss": 1.0375, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017747185931215118, | |
| "loss": 1.2414, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017743288329019987, | |
| "loss": 1.2209, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001773939072682486, | |
| "loss": 0.8581, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001773549312462973, | |
| "loss": 1.0284, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017731595522434598, | |
| "loss": 0.8502, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001772769792023947, | |
| "loss": 0.6947, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001772380031804434, | |
| "loss": 0.7579, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001771990271584921, | |
| "loss": 0.9771, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017716005113654082, | |
| "loss": 0.8661, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017712107511458952, | |
| "loss": 0.8433, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001770820990926382, | |
| "loss": 0.9419, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001770431230706869, | |
| "loss": 1.076, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017700414704873563, | |
| "loss": 0.9966, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017696517102678433, | |
| "loss": 0.7618, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017692619500483302, | |
| "loss": 0.8269, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017688721898288174, | |
| "loss": 0.8109, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017684824296093047, | |
| "loss": 0.7426, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017680926693897913, | |
| "loss": 0.9972, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017677029091702786, | |
| "loss": 0.7991, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017673131489507655, | |
| "loss": 0.7988, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017669233887312527, | |
| "loss": 0.7364, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017665336285117397, | |
| "loss": 1.0258, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017661438682922267, | |
| "loss": 1.0606, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001765754108072714, | |
| "loss": 0.909, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017653643478532008, | |
| "loss": 0.8428, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017649745876336878, | |
| "loss": 0.8707, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001764584827414175, | |
| "loss": 0.7147, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001764195067194662, | |
| "loss": 0.89, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001763805306975149, | |
| "loss": 0.847, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001763415546755636, | |
| "loss": 0.7826, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001763025786536123, | |
| "loss": 0.8614, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.000176263602631661, | |
| "loss": 0.8078, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001762246266097097, | |
| "loss": 0.872, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017618565058775842, | |
| "loss": 0.8734, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017614667456580715, | |
| "loss": 0.8836, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017610769854385582, | |
| "loss": 1.042, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017606872252190454, | |
| "loss": 0.8561, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017602974649995323, | |
| "loss": 0.6824, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017599077047800196, | |
| "loss": 0.9277, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017595179445605065, | |
| "loss": 0.9887, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017591281843409935, | |
| "loss": 0.7925, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017587384241214807, | |
| "loss": 0.8944, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017583486639019674, | |
| "loss": 1.2576, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017579589036824546, | |
| "loss": 1.0837, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017575691434629418, | |
| "loss": 1.0205, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017571793832434288, | |
| "loss": 1.1691, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017567896230239157, | |
| "loss": 0.9541, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017563998628044027, | |
| "loss": 0.7735, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.000175601010258489, | |
| "loss": 0.9219, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001755620342365377, | |
| "loss": 0.6201, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017552305821458638, | |
| "loss": 0.8204, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001754840821926351, | |
| "loss": 0.7272, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001754451061706838, | |
| "loss": 0.864, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001754061301487325, | |
| "loss": 0.9935, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017536715412678122, | |
| "loss": 0.8651, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017532817810482992, | |
| "loss": 0.8508, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001752892020828786, | |
| "loss": 0.863, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017525022606092733, | |
| "loss": 0.9272, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017521125003897603, | |
| "loss": 0.9609, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017517227401702475, | |
| "loss": 1.1736, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017513329799507342, | |
| "loss": 0.8571, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017509432197312214, | |
| "loss": 0.758, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017505534595117086, | |
| "loss": 1.0157, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017501636992921956, | |
| "loss": 0.762, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017497739390726826, | |
| "loss": 0.7206, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017493841788531695, | |
| "loss": 0.9902, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017489944186336567, | |
| "loss": 0.8943, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017486046584141437, | |
| "loss": 0.9721, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017482148981946306, | |
| "loss": 0.9522, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001747825137975118, | |
| "loss": 0.9819, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017474353777556048, | |
| "loss": 1.0563, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017470456175360918, | |
| "loss": 0.782, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001746655857316579, | |
| "loss": 0.9609, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001746266097097066, | |
| "loss": 0.9329, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001745876336877553, | |
| "loss": 0.7019, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017454865766580401, | |
| "loss": 0.9395, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001745096816438527, | |
| "loss": 0.7248, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00017447070562190143, | |
| "loss": 0.7116, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001744317295999501, | |
| "loss": 0.92, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00017439275357799882, | |
| "loss": 0.8105, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00017435377755604755, | |
| "loss": 0.8492, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00017431480153409624, | |
| "loss": 0.8305, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00017427582551214494, | |
| "loss": 0.882, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016390108569805799, | |
| "loss": 40.3284, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016384647311968892, | |
| "loss": 41.3929, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001637918605413199, | |
| "loss": 40.5771, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016373724796295083, | |
| "loss": 40.1587, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001636826353845818, | |
| "loss": 38.4849, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016362802280621276, | |
| "loss": 40.8953, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001635734102278437, | |
| "loss": 41.1837, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016351879764947463, | |
| "loss": 41.4111, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001634641850711056, | |
| "loss": 39.7779, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016340957249273653, | |
| "loss": 39.6051, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001633549599143675, | |
| "loss": 39.1987, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016330034733599844, | |
| "loss": 36.4834, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001632457347576294, | |
| "loss": 38.9442, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016319112217926034, | |
| "loss": 38.7699, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016313650960089128, | |
| "loss": 38.1662, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016308189702252224, | |
| "loss": 38.3107, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016302728444415318, | |
| "loss": 37.137, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016297267186578414, | |
| "loss": 39.8413, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016291805928741508, | |
| "loss": 37.834, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016286344670904602, | |
| "loss": 38.752, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016280883413067698, | |
| "loss": 38.9749, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016275422155230792, | |
| "loss": 37.0203, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016269960897393888, | |
| "loss": 37.8575, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016264499639556982, | |
| "loss": 36.1197, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001625903838172008, | |
| "loss": 38.9567, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016253577123883172, | |
| "loss": 36.921, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001624811586604627, | |
| "loss": 37.7047, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016242654608209365, | |
| "loss": 37.8749, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001623719335037246, | |
| "loss": 36.0547, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016231732092535553, | |
| "loss": 35.7079, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001622627083469865, | |
| "loss": 35.5162, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016220809576861743, | |
| "loss": 35.6316, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001621534831902484, | |
| "loss": 37.1081, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016209887061187933, | |
| "loss": 35.4266, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001620442580335103, | |
| "loss": 35.8718, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016198964545514124, | |
| "loss": 34.2143, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016193503287677217, | |
| "loss": 34.0882, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016188042029840314, | |
| "loss": 32.8758, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016182580772003408, | |
| "loss": 32.0339, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016177119514166504, | |
| "loss": 31.4164, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016171658256329598, | |
| "loss": 31.8205, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016166196998492694, | |
| "loss": 32.6587, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016160735740655788, | |
| "loss": 31.8695, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016155274482818882, | |
| "loss": 31.0461, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016149813224981978, | |
| "loss": 30.1198, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016144351967145072, | |
| "loss": 28.9032, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016138890709308169, | |
| "loss": 30.3631, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016133429451471265, | |
| "loss": 29.2617, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001612796819363436, | |
| "loss": 28.4782, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016122506935797455, | |
| "loss": 28.6378, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001611704567796055, | |
| "loss": 28.3341, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016111584420123645, | |
| "loss": 27.6153, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001610612316228674, | |
| "loss": 26.5044, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016100661904449833, | |
| "loss": 26.8876, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001609520064661293, | |
| "loss": 26.9291, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016089739388776023, | |
| "loss": 24.024, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001608427813093912, | |
| "loss": 24.5533, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016078816873102213, | |
| "loss": 24.6948, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001607335561526531, | |
| "loss": 22.2483, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016067894357428404, | |
| "loss": 21.7253, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016062433099591497, | |
| "loss": 20.7581, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016056971841754594, | |
| "loss": 18.6484, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016051510583917688, | |
| "loss": 19.3484, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016046049326080784, | |
| "loss": 15.8305, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016040588068243878, | |
| "loss": 16.644, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016035126810406972, | |
| "loss": 16.1415, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016029665552570068, | |
| "loss": 16.2331, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016024204294733162, | |
| "loss": 13.7222, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016018743036896258, | |
| "loss": 13.1968, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016013281779059355, | |
| "loss": 13.7183, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016007820521222449, | |
| "loss": 13.6719, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016002359263385545, | |
| "loss": 12.565, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001599689800554864, | |
| "loss": 11.7014, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015991436747711735, | |
| "loss": 11.9391, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001598597548987483, | |
| "loss": 10.8187, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015980514232037923, | |
| "loss": 9.9151, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001597505297420102, | |
| "loss": 8.8924, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015969591716364113, | |
| "loss": 7.7144, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001596413045852721, | |
| "loss": 6.7915, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015958669200690303, | |
| "loss": 6.1585, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.000159532079428534, | |
| "loss": 6.4101, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015947746685016494, | |
| "loss": 4.6158, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015942285427179587, | |
| "loss": 4.76, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015936824169342684, | |
| "loss": 4.0994, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015931362911505778, | |
| "loss": 4.7396, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015925901653668874, | |
| "loss": 3.6542, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015920440395831968, | |
| "loss": 3.4333, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015914979137995064, | |
| "loss": 4.575, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015909517880158158, | |
| "loss": 3.3926, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015904056622321252, | |
| "loss": 3.3063, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015898595364484348, | |
| "loss": 2.9068, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015893134106647445, | |
| "loss": 2.9475, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015887672848810538, | |
| "loss": 2.94, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015882211590973635, | |
| "loss": 3.2924, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001587675033313673, | |
| "loss": 3.4012, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015871289075299825, | |
| "loss": 2.8093, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001586582781746292, | |
| "loss": 3.115, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015860366559626015, | |
| "loss": 2.4926, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001585490530178911, | |
| "loss": 2.3319, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015849444043952203, | |
| "loss": 2.4095, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.000158439827861153, | |
| "loss": 2.563, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015838521528278393, | |
| "loss": 2.5545, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001583306027044149, | |
| "loss": 2.0663, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015827599012604583, | |
| "loss": 2.0732, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001582213775476768, | |
| "loss": 2.9127, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015816676496930774, | |
| "loss": 2.2365, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015811215239093867, | |
| "loss": 3.9376, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015805753981256964, | |
| "loss": 2.0433, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015800292723420058, | |
| "loss": 2.1487, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015794831465583154, | |
| "loss": 1.8283, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015789370207746248, | |
| "loss": 1.5619, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015783908949909342, | |
| "loss": 1.6508, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015778447692072438, | |
| "loss": 1.8076, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015772986434235535, | |
| "loss": 1.5081, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001576752517639863, | |
| "loss": 1.7372, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015762063918561725, | |
| "loss": 1.504, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015756602660724819, | |
| "loss": 1.4685, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015751141402887915, | |
| "loss": 1.366, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001574568014505101, | |
| "loss": 1.3556, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015740218887214105, | |
| "loss": 1.328, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.000157347576293772, | |
| "loss": 1.672, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015729296371540293, | |
| "loss": 1.2776, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001572383511370339, | |
| "loss": 1.619, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015718373855866483, | |
| "loss": 1.4484, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001571291259802958, | |
| "loss": 1.4561, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015707451340192673, | |
| "loss": 1.5445, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001570199008235577, | |
| "loss": 1.6477, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015696528824518864, | |
| "loss": 1.483, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015691067566681957, | |
| "loss": 1.4913, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015685606308845054, | |
| "loss": 1.2746, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015680145051008148, | |
| "loss": 1.4588, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015674683793171244, | |
| "loss": 1.3793, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015669222535334338, | |
| "loss": 1.4776, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015663761277497434, | |
| "loss": 1.7906, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001565830001966053, | |
| "loss": 1.4083, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00015652838761823624, | |
| "loss": 1.5248, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001564737750398672, | |
| "loss": 1.2159, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00015641916246149815, | |
| "loss": 1.4073, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00015636454988312908, | |
| "loss": 1.2702, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00015630993730476005, | |
| "loss": 1.33, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.000156255324726391, | |
| "loss": 1.4365, | |
| "step": 40050 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00015620071214802195, | |
| "loss": 1.2484, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001561460995696529, | |
| "loss": 1.2985, | |
| "step": 40150 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00015609148699128385, | |
| "loss": 1.3169, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001560368744129148, | |
| "loss": 1.2415, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00015598226183454573, | |
| "loss": 1.0357, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001559276492561767, | |
| "loss": 1.3613, | |
| "step": 40350 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00015587303667780763, | |
| "loss": 1.1524, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001558184240994386, | |
| "loss": 1.4132, | |
| "step": 40450 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00015576381152106953, | |
| "loss": 1.4276, | |
| "step": 40500 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 183108, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "total_flos": 2.90367192517632e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |