{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.922064491868007, "eval_steps": 200, "global_step": 180000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "action_loss": 0.6916, "epoch": 0.004700573469963335, "learning_rate": 4.1666666666666667e-07, "llm_loss": 6.1121, "loss": 6.8037, "step": 50 }, { "action_loss": 0.661, "epoch": 0.00940114693992667, "learning_rate": 8.333333333333333e-07, "llm_loss": 4.0994, "loss": 4.7604, "step": 100 }, { "action_loss": 0.5095, "epoch": 0.014101720409890007, "learning_rate": 1.25e-06, "llm_loss": 1.4575, "loss": 1.967, "step": 150 }, { "action_loss": 0.1982, "epoch": 0.01880229387985334, "learning_rate": 1.6666666666666667e-06, "llm_loss": 0.0177, "loss": 0.2158, "step": 200 }, { "action_loss": 0.0885, "epoch": 0.02350286734981668, "learning_rate": 2.0833333333333334e-06, "llm_loss": 0.004, "loss": 0.0925, "step": 250 }, { "action_loss": 0.0596, "epoch": 0.028203440819780014, "learning_rate": 2.5e-06, "llm_loss": 0.001, "loss": 0.0606, "step": 300 }, { "action_loss": 0.0526, "epoch": 0.03290401428974335, "learning_rate": 2.916666666666667e-06, "llm_loss": 0.0003, "loss": 0.0529, "step": 350 }, { "action_loss": 0.0391, "epoch": 0.03760458775970668, "learning_rate": 3.3333333333333333e-06, "llm_loss": 0.0001, "loss": 0.0392, "step": 400 }, { "action_loss": 0.032, "epoch": 0.04230516122967002, "learning_rate": 3.7500000000000005e-06, "llm_loss": 0.0, "loss": 0.032, "step": 450 }, { "action_loss": 0.0399, "epoch": 0.04700573469963336, "learning_rate": 4.166666666666667e-06, "llm_loss": 0.0, "loss": 0.0399, "step": 500 }, { "action_loss": 0.0399, "epoch": 0.05170630816959669, "learning_rate": 4.583333333333333e-06, "llm_loss": 0.0, "loss": 0.0399, "step": 550 }, { "action_loss": 0.0293, "epoch": 0.05640688163956003, "learning_rate": 5e-06, "llm_loss": 0.0, "loss": 0.0293, "step": 600 }, { "action_loss": 0.0275, "epoch": 0.06110745510952336, "learning_rate": 5.416666666666667e-06, "llm_loss": 0.0, "loss": 0.0275, "step": 650 }, { "action_loss": 0.0271, "epoch": 0.0658080285794867, "learning_rate": 5.833333333333334e-06, "llm_loss": 0.0, "loss": 0.0271, "step": 700 }, { "action_loss": 0.0192, "epoch": 0.07050860204945003, "learning_rate": 6.25e-06, "llm_loss": 0.0, "loss": 0.0192, "step": 750 }, { "action_loss": 0.0403, "epoch": 0.07520917551941336, "learning_rate": 6.666666666666667e-06, "llm_loss": 0.0, "loss": 0.0403, "step": 800 }, { "action_loss": 0.0223, "epoch": 0.07990974898937671, "learning_rate": 7.083333333333335e-06, "llm_loss": 0.0, "loss": 0.0223, "step": 850 }, { "action_loss": 0.044, "epoch": 0.08461032245934004, "learning_rate": 7.500000000000001e-06, "llm_loss": 0.0, "loss": 0.044, "step": 900 }, { "action_loss": 0.042, "epoch": 0.08931089592930337, "learning_rate": 7.916666666666667e-06, "llm_loss": 0.0, "loss": 0.042, "step": 950 }, { "action_loss": 0.0339, "epoch": 0.09401146939926672, "learning_rate": 8.333333333333334e-06, "llm_loss": 0.0, "loss": 0.0339, "step": 1000 }, { "action_loss": 0.0358, "epoch": 0.09871204286923005, "learning_rate": 8.750000000000001e-06, "llm_loss": 0.0, "loss": 0.0358, "step": 1050 }, { "action_loss": 0.0404, "epoch": 0.10341261633919338, "learning_rate": 9.166666666666666e-06, "llm_loss": 0.0, "loss": 0.0404, "step": 1100 }, { "action_loss": 0.0273, "epoch": 0.10811318980915671, "learning_rate": 9.583333333333335e-06, "llm_loss": 0.0, "loss": 0.0273, "step": 1150 }, { "action_loss": 0.0266, "epoch": 0.11281376327912006, "learning_rate": 1e-05, "llm_loss": 0.0, "loss": 0.0266, "step": 1200 }, { "action_loss": 0.0334, "epoch": 0.11751433674908339, "learning_rate": 1.0416666666666668e-05, "llm_loss": 0.0, "loss": 0.0334, "step": 1250 }, { "action_loss": 0.0227, "epoch": 0.12221491021904672, "learning_rate": 1.0833333333333334e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 1300 }, { "action_loss": 0.0281, "epoch": 0.12691548368901007, "learning_rate": 1.125e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 1350 }, { "action_loss": 0.0337, "epoch": 0.1316160571589734, "learning_rate": 1.1666666666666668e-05, "llm_loss": 0.0, "loss": 0.0337, "step": 1400 }, { "action_loss": 0.0286, "epoch": 0.13631663062893673, "learning_rate": 1.2083333333333333e-05, "llm_loss": 0.0, "loss": 0.0286, "step": 1450 }, { "action_loss": 0.0235, "epoch": 0.14101720409890006, "learning_rate": 1.25e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 1500 }, { "action_loss": 0.029, "epoch": 0.1457177775688634, "learning_rate": 1.2916666666666668e-05, "llm_loss": 0.0, "loss": 0.029, "step": 1550 }, { "action_loss": 0.0294, "epoch": 0.15041835103882673, "learning_rate": 1.3333333333333333e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 1600 }, { "action_loss": 0.0361, "epoch": 0.15511892450879008, "learning_rate": 1.375e-05, "llm_loss": 0.0, "loss": 0.0361, "step": 1650 }, { "action_loss": 0.0217, "epoch": 0.15981949797875342, "learning_rate": 1.416666666666667e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 1700 }, { "action_loss": 0.0234, "epoch": 0.16452007144871675, "learning_rate": 1.4583333333333333e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 1750 }, { "action_loss": 0.0319, "epoch": 0.16922064491868008, "learning_rate": 1.5000000000000002e-05, "llm_loss": 0.0, "loss": 0.0319, "step": 1800 }, { "action_loss": 0.0328, "epoch": 0.1739212183886434, "learning_rate": 1.5416666666666668e-05, "llm_loss": 0.0, "loss": 0.0328, "step": 1850 }, { "action_loss": 0.023, "epoch": 0.17862179185860674, "learning_rate": 1.5833333333333333e-05, "llm_loss": 0.0, "loss": 0.023, "step": 1900 }, { "action_loss": 0.0301, "epoch": 0.18332236532857007, "learning_rate": 1.6250000000000002e-05, "llm_loss": 0.0, "loss": 0.0301, "step": 1950 }, { "action_loss": 0.0323, "epoch": 0.18802293879853343, "learning_rate": 1.6666666666666667e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 2000 }, { "action_loss": 0.0378, "epoch": 0.19272351226849677, "learning_rate": 1.7083333333333333e-05, "llm_loss": 0.0, "loss": 0.0378, "step": 2050 }, { "action_loss": 0.0238, "epoch": 0.1974240857384601, "learning_rate": 1.7500000000000002e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 2100 }, { "action_loss": 0.0197, "epoch": 0.20212465920842343, "learning_rate": 1.7916666666666667e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 2150 }, { "action_loss": 0.0321, "epoch": 0.20682523267838676, "learning_rate": 1.8333333333333333e-05, "llm_loss": 0.0, "loss": 0.0321, "step": 2200 }, { "action_loss": 0.0183, "epoch": 0.2115258061483501, "learning_rate": 1.8750000000000002e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 2250 }, { "action_loss": 0.0294, "epoch": 0.21622637961831342, "learning_rate": 1.916666666666667e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 2300 }, { "action_loss": 0.0294, "epoch": 0.22092695308827676, "learning_rate": 1.9583333333333333e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 2350 }, { "action_loss": 0.0391, "epoch": 0.22562752655824012, "learning_rate": 2e-05, "llm_loss": 0.0, "loss": 0.0391, "step": 2400 }, { "action_loss": 0.0323, "epoch": 0.23032810002820345, "learning_rate": 1.9999997814670797e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 2450 }, { "action_loss": 0.0219, "epoch": 0.23502867349816678, "learning_rate": 1.9999991258684133e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 2500 }, { "action_loss": 0.0295, "epoch": 0.2397292469681301, "learning_rate": 1.9999980332042875e-05, "llm_loss": 0.0, "loss": 0.0296, "step": 2550 }, { "action_loss": 0.0407, "epoch": 0.24442982043809344, "learning_rate": 1.9999965034751806e-05, "llm_loss": 0.0, "loss": 0.0407, "step": 2600 }, { "action_loss": 0.0299, "epoch": 0.24913039390805677, "learning_rate": 1.9999945366817607e-05, "llm_loss": 0.0, "loss": 0.0299, "step": 2650 }, { "action_loss": 0.0505, "epoch": 0.25383096737802013, "learning_rate": 1.999992132824887e-05, "llm_loss": 0.0, "loss": 0.0505, "step": 2700 }, { "action_loss": 0.0309, "epoch": 0.25853154084798347, "learning_rate": 1.999989291905611e-05, "llm_loss": 0.0, "loss": 0.0309, "step": 2750 }, { "action_loss": 0.0167, "epoch": 0.2632321143179468, "learning_rate": 1.9999860139251737e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 2800 }, { "action_loss": 0.0231, "epoch": 0.26793268778791013, "learning_rate": 1.9999822988850083e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 2850 }, { "action_loss": 0.0221, "epoch": 0.27263326125787346, "learning_rate": 1.999978146786738e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 2900 }, { "action_loss": 0.022, "epoch": 0.2773338347278368, "learning_rate": 1.9999735576321776e-05, "llm_loss": 0.0, "loss": 0.022, "step": 2950 }, { "action_loss": 0.0377, "epoch": 0.2820344081978001, "learning_rate": 1.9999685314233333e-05, "llm_loss": 0.0, "loss": 0.0377, "step": 3000 }, { "action_loss": 0.0355, "epoch": 0.28673498166776346, "learning_rate": 1.9999630681624014e-05, "llm_loss": 0.0, "loss": 0.0355, "step": 3050 }, { "action_loss": 0.0143, "epoch": 0.2914355551377268, "learning_rate": 1.9999571678517702e-05, "llm_loss": 0.0, "loss": 0.0143, "step": 3100 }, { "action_loss": 0.0344, "epoch": 0.2961361286076901, "learning_rate": 1.999950830494018e-05, "llm_loss": 0.0, "loss": 0.0344, "step": 3150 }, { "action_loss": 0.0243, "epoch": 0.30083670207765345, "learning_rate": 1.9999440560919153e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 3200 }, { "action_loss": 0.0253, "epoch": 0.3055372755476168, "learning_rate": 1.9999368446484222e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 3250 }, { "action_loss": 0.0303, "epoch": 0.31023784901758017, "learning_rate": 1.999929196166691e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 3300 }, { "action_loss": 0.031, "epoch": 0.3149384224875435, "learning_rate": 1.9999211106500644e-05, "llm_loss": 0.0, "loss": 0.031, "step": 3350 }, { "action_loss": 0.0265, "epoch": 0.31963899595750683, "learning_rate": 1.9999125881020766e-05, "llm_loss": 0.0, "loss": 0.0265, "step": 3400 }, { "action_loss": 0.029, "epoch": 0.32433956942747016, "learning_rate": 1.9999036285264526e-05, "llm_loss": 0.0, "loss": 0.029, "step": 3450 }, { "action_loss": 0.0385, "epoch": 0.3290401428974335, "learning_rate": 1.9998942319271076e-05, "llm_loss": 0.0, "loss": 0.0385, "step": 3500 }, { "action_loss": 0.0275, "epoch": 0.33374071636739683, "learning_rate": 1.9998843983081492e-05, "llm_loss": 0.0, "loss": 0.0275, "step": 3550 }, { "action_loss": 0.0174, "epoch": 0.33844128983736016, "learning_rate": 1.9998741276738753e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 3600 }, { "action_loss": 0.0219, "epoch": 0.3431418633073235, "learning_rate": 1.9998634200287745e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 3650 }, { "action_loss": 0.0321, "epoch": 0.3478424367772868, "learning_rate": 1.999852275377527e-05, "llm_loss": 0.0, "loss": 0.0321, "step": 3700 }, { "action_loss": 0.0319, "epoch": 0.35254301024725015, "learning_rate": 1.9998406937250035e-05, "llm_loss": 0.0, "loss": 0.0319, "step": 3750 }, { "action_loss": 0.0346, "epoch": 0.3572435837172135, "learning_rate": 1.9998286750762662e-05, "llm_loss": 0.0, "loss": 0.0346, "step": 3800 }, { "action_loss": 0.0271, "epoch": 0.3619441571871768, "learning_rate": 1.999816219436568e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 3850 }, { "action_loss": 0.0185, "epoch": 0.36664473065714015, "learning_rate": 1.999803326811353e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 3900 }, { "action_loss": 0.0349, "epoch": 0.3713453041271035, "learning_rate": 1.9997899972062553e-05, "llm_loss": 0.0, "loss": 0.0349, "step": 3950 }, { "action_loss": 0.0266, "epoch": 0.37604587759706687, "learning_rate": 1.999776230627102e-05, "llm_loss": 0.0, "loss": 0.0266, "step": 4000 }, { "action_loss": 0.0208, "epoch": 0.3807464510670302, "learning_rate": 1.999762027079909e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 4050 }, { "action_loss": 0.0223, "epoch": 0.38544702453699353, "learning_rate": 1.999747386570885e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 4100 }, { "action_loss": 0.0237, "epoch": 0.39014759800695686, "learning_rate": 1.9997323091064284e-05, "llm_loss": 0.0, "loss": 0.0237, "step": 4150 }, { "action_loss": 0.0189, "epoch": 0.3948481714769202, "learning_rate": 1.9997167946931293e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 4200 }, { "action_loss": 0.034, "epoch": 0.3995487449468835, "learning_rate": 1.999700843337768e-05, "llm_loss": 0.0, "loss": 0.034, "step": 4250 }, { "action_loss": 0.029, "epoch": 0.40424931841684686, "learning_rate": 1.999684455047317e-05, "llm_loss": 0.0, "loss": 0.029, "step": 4300 }, { "action_loss": 0.0333, "epoch": 0.4089498918868102, "learning_rate": 1.9996676298289387e-05, "llm_loss": 0.0, "loss": 0.0333, "step": 4350 }, { "action_loss": 0.0273, "epoch": 0.4136504653567735, "learning_rate": 1.9996503676899863e-05, "llm_loss": 0.0, "loss": 0.0273, "step": 4400 }, { "action_loss": 0.0327, "epoch": 0.41835103882673685, "learning_rate": 1.9996326686380055e-05, "llm_loss": 0.0, "loss": 0.0327, "step": 4450 }, { "action_loss": 0.0246, "epoch": 0.4230516122967002, "learning_rate": 1.9996148996814977e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 4500 }, { "action_loss": 0.0338, "epoch": 0.4277521857666635, "learning_rate": 1.9995963355647256e-05, "llm_loss": 0.0, "loss": 0.0338, "step": 4550 }, { "action_loss": 0.0329, "epoch": 0.43245275923662685, "learning_rate": 1.9995773345585396e-05, "llm_loss": 0.0, "loss": 0.0329, "step": 4600 }, { "action_loss": 0.0328, "epoch": 0.4371533327065902, "learning_rate": 1.9995578966712457e-05, "llm_loss": 0.0, "loss": 0.0328, "step": 4650 }, { "action_loss": 0.0241, "epoch": 0.4418539061765535, "learning_rate": 1.9995380219113383e-05, "llm_loss": 0.0, "loss": 0.0241, "step": 4700 }, { "action_loss": 0.0175, "epoch": 0.4465544796465169, "learning_rate": 1.999517710287505e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 4750 }, { "action_loss": 0.0282, "epoch": 0.45125505311648023, "learning_rate": 1.9994969618086223e-05, "llm_loss": 0.0, "loss": 0.0282, "step": 4800 }, { "action_loss": 0.0199, "epoch": 0.45595562658644356, "learning_rate": 1.9994757764837595e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 4850 }, { "action_loss": 0.0261, "epoch": 0.4606562000564069, "learning_rate": 1.999454154322176e-05, "llm_loss": 0.0, "loss": 0.0261, "step": 4900 }, { "action_loss": 0.0204, "epoch": 0.4653567735263702, "learning_rate": 1.9994320953333215e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 4950 }, { "action_loss": 0.0259, "epoch": 0.47005734699633356, "learning_rate": 1.9994095995268374e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 5000 }, { "action_loss": 0.0329, "epoch": 0.4747579204662969, "learning_rate": 1.9993866669125558e-05, "llm_loss": 0.0, "loss": 0.0329, "step": 5050 }, { "action_loss": 0.0252, "epoch": 0.4794584939362602, "learning_rate": 1.9993632975005004e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 5100 }, { "action_loss": 0.0209, "epoch": 0.48415906740622355, "learning_rate": 1.9993394913008846e-05, "llm_loss": 0.0, "loss": 0.0209, "step": 5150 }, { "action_loss": 0.0297, "epoch": 0.4888596408761869, "learning_rate": 1.999315248324113e-05, "llm_loss": 0.0, "loss": 0.0297, "step": 5200 }, { "action_loss": 0.0301, "epoch": 0.4935602143461502, "learning_rate": 1.9992910664558917e-05, "llm_loss": 0.0, "loss": 0.0301, "step": 5250 }, { "action_loss": 0.021, "epoch": 0.49826078781611355, "learning_rate": 1.9992659586917962e-05, "llm_loss": 0.0, "loss": 0.021, "step": 5300 }, { "action_loss": 0.0242, "epoch": 0.5029613612860769, "learning_rate": 1.999240414182684e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 5350 }, { "action_loss": 0.0271, "epoch": 0.5076619347560403, "learning_rate": 1.9992144329397196e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 5400 }, { "action_loss": 0.0245, "epoch": 0.5123625082260036, "learning_rate": 1.9991880149742583e-05, "llm_loss": 0.0, "loss": 0.0245, "step": 5450 }, { "action_loss": 0.027, "epoch": 0.5170630816959669, "learning_rate": 1.999161160297847e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 5500 }, { "action_loss": 0.0221, "epoch": 0.5217636551659303, "learning_rate": 1.9991338689222225e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 5550 }, { "action_loss": 0.0164, "epoch": 0.5264642286358936, "learning_rate": 1.999106140859313e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 5600 }, { "action_loss": 0.0259, "epoch": 0.5311648021058569, "learning_rate": 1.9990779761212376e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 5650 }, { "action_loss": 0.0302, "epoch": 0.5358653755758203, "learning_rate": 1.9990493747203062e-05, "llm_loss": 0.0, "loss": 0.0302, "step": 5700 }, { "action_loss": 0.0384, "epoch": 0.5405659490457836, "learning_rate": 1.9990203366690197e-05, "llm_loss": 0.0, "loss": 0.0384, "step": 5750 }, { "action_loss": 0.0329, "epoch": 0.5452665225157469, "learning_rate": 1.998990861980069e-05, "llm_loss": 0.0, "loss": 0.0329, "step": 5800 }, { "action_loss": 0.0237, "epoch": 0.5499670959857103, "learning_rate": 1.9989609506663373e-05, "llm_loss": 0.0, "loss": 0.0237, "step": 5850 }, { "action_loss": 0.0169, "epoch": 0.5546676694556736, "learning_rate": 1.9989306027408968e-05, "llm_loss": 0.0, "loss": 0.0169, "step": 5900 }, { "action_loss": 0.0317, "epoch": 0.5593682429256369, "learning_rate": 1.998899818217012e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 5950 }, { "action_loss": 0.0232, "epoch": 0.5640688163956002, "learning_rate": 1.9988685971081386e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 6000 }, { "action_loss": 0.0156, "epoch": 0.5687693898655636, "learning_rate": 1.998836939427921e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 6050 }, { "action_loss": 0.0199, "epoch": 0.5734699633355269, "learning_rate": 1.9988048451901964e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 6100 }, { "action_loss": 0.0234, "epoch": 0.5781705368054902, "learning_rate": 1.9987723144089918e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 6150 }, { "action_loss": 0.0275, "epoch": 0.5828711102754536, "learning_rate": 1.9987393470985256e-05, "llm_loss": 0.0, "loss": 0.0275, "step": 6200 }, { "action_loss": 0.0278, "epoch": 0.5875716837454169, "learning_rate": 1.9987059432732065e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 6250 }, { "action_loss": 0.0275, "epoch": 0.5922722572153802, "learning_rate": 1.998672102947634e-05, "llm_loss": 0.0, "loss": 0.0275, "step": 6300 }, { "action_loss": 0.0263, "epoch": 0.5969728306853436, "learning_rate": 1.9986378261365987e-05, "llm_loss": 0.0, "loss": 0.0263, "step": 6350 }, { "action_loss": 0.0406, "epoch": 0.6016734041553069, "learning_rate": 1.998603112855082e-05, "llm_loss": 0.0, "loss": 0.0406, "step": 6400 }, { "action_loss": 0.0209, "epoch": 0.6063739776252702, "learning_rate": 1.998567963118256e-05, "llm_loss": 0.0, "loss": 0.0209, "step": 6450 }, { "action_loss": 0.0291, "epoch": 0.6110745510952336, "learning_rate": 1.998532376941483e-05, "llm_loss": 0.0, "loss": 0.0291, "step": 6500 }, { "action_loss": 0.0299, "epoch": 0.615775124565197, "learning_rate": 1.9984963543403164e-05, "llm_loss": 0.0, "loss": 0.0299, "step": 6550 }, { "action_loss": 0.0236, "epoch": 0.6204756980351603, "learning_rate": 1.998459895330501e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 6600 }, { "action_loss": 0.0209, "epoch": 0.6251762715051237, "learning_rate": 1.9984229999279713e-05, "llm_loss": 0.0, "loss": 0.0209, "step": 6650 }, { "action_loss": 0.0214, "epoch": 0.629876844975087, "learning_rate": 1.9983856681488537e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 6700 }, { "action_loss": 0.0261, "epoch": 0.6345774184450503, "learning_rate": 1.998347900009464e-05, "llm_loss": 0.0, "loss": 0.0261, "step": 6750 }, { "action_loss": 0.0217, "epoch": 0.6392779919150137, "learning_rate": 1.9983096955263095e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 6800 }, { "action_loss": 0.0188, "epoch": 0.643978565384977, "learning_rate": 1.998271054716088e-05, "llm_loss": 0.0, "loss": 0.0188, "step": 6850 }, { "action_loss": 0.0286, "epoch": 0.6486791388549403, "learning_rate": 1.9982319775956883e-05, "llm_loss": 0.0, "loss": 0.0286, "step": 6900 }, { "action_loss": 0.0276, "epoch": 0.6533797123249037, "learning_rate": 1.9981924641821897e-05, "llm_loss": 0.0, "loss": 0.0276, "step": 6950 }, { "action_loss": 0.03, "epoch": 0.658080285794867, "learning_rate": 1.9981525144928618e-05, "llm_loss": 0.0, "loss": 0.03, "step": 7000 }, { "action_loss": 0.0343, "epoch": 0.6627808592648303, "learning_rate": 1.9981121285451657e-05, "llm_loss": 0.0, "loss": 0.0343, "step": 7050 }, { "action_loss": 0.0265, "epoch": 0.6674814327347937, "learning_rate": 1.9980713063567524e-05, "llm_loss": 0.0, "loss": 0.0265, "step": 7100 }, { "action_loss": 0.0232, "epoch": 0.672182006204757, "learning_rate": 1.998030047945464e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 7150 }, { "action_loss": 0.0229, "epoch": 0.6768825796747203, "learning_rate": 1.9979883533293334e-05, "llm_loss": 0.0, "loss": 0.0229, "step": 7200 }, { "action_loss": 0.0177, "epoch": 0.6815831531446837, "learning_rate": 1.9979462225265834e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 7250 }, { "action_loss": 0.028, "epoch": 0.686283726614647, "learning_rate": 1.997903655555628e-05, "llm_loss": 0.0, "loss": 0.028, "step": 7300 }, { "action_loss": 0.0299, "epoch": 0.6909843000846103, "learning_rate": 1.9978606524350727e-05, "llm_loss": 0.0, "loss": 0.0299, "step": 7350 }, { "action_loss": 0.0316, "epoch": 0.6956848735545736, "learning_rate": 1.9978172131837112e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 7400 }, { "action_loss": 0.0301, "epoch": 0.700385447024537, "learning_rate": 1.99777333782053e-05, "llm_loss": 0.0, "loss": 0.0301, "step": 7450 }, { "action_loss": 0.0464, "epoch": 0.7050860204945003, "learning_rate": 1.9977290263647057e-05, "llm_loss": 0.0, "loss": 0.0464, "step": 7500 }, { "action_loss": 0.014, "epoch": 0.7097865939644636, "learning_rate": 1.9976842788356054e-05, "llm_loss": 0.0, "loss": 0.014, "step": 7550 }, { "action_loss": 0.017, "epoch": 0.714487167434427, "learning_rate": 1.9976390952527865e-05, "llm_loss": 0.0, "loss": 0.017, "step": 7600 }, { "action_loss": 0.0159, "epoch": 0.7191877409043903, "learning_rate": 1.997593475635997e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 7650 }, { "action_loss": 0.0269, "epoch": 0.7238883143743536, "learning_rate": 1.997547420005176e-05, "llm_loss": 0.0, "loss": 0.0269, "step": 7700 }, { "action_loss": 0.0182, "epoch": 0.728588887844317, "learning_rate": 1.9975009283804527e-05, "llm_loss": 0.0, "loss": 0.0182, "step": 7750 }, { "action_loss": 0.0234, "epoch": 0.7332894613142803, "learning_rate": 1.997454000782147e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 7800 }, { "action_loss": 0.026, "epoch": 0.7379900347842436, "learning_rate": 1.9974066372307694e-05, "llm_loss": 0.0, "loss": 0.026, "step": 7850 }, { "action_loss": 0.0368, "epoch": 0.742690608254207, "learning_rate": 1.997358837747021e-05, "llm_loss": 0.0, "loss": 0.0368, "step": 7900 }, { "action_loss": 0.0298, "epoch": 0.7473911817241703, "learning_rate": 1.9973106023517927e-05, "llm_loss": 0.0, "loss": 0.0298, "step": 7950 }, { "action_loss": 0.0433, "epoch": 0.7520917551941337, "learning_rate": 1.9972619310661678e-05, "llm_loss": 0.0, "loss": 0.0433, "step": 8000 }, { "action_loss": 0.0289, "epoch": 0.7567923286640971, "learning_rate": 1.9972128239114173e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 8050 }, { "action_loss": 0.03, "epoch": 0.7614929021340604, "learning_rate": 1.9971632809090057e-05, "llm_loss": 0.0, "loss": 0.03, "step": 8100 }, { "action_loss": 0.0278, "epoch": 0.7661934756040237, "learning_rate": 1.9971133020805856e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 8150 }, { "action_loss": 0.011, "epoch": 0.7708940490739871, "learning_rate": 1.997062887448001e-05, "llm_loss": 0.0, "loss": 0.011, "step": 8200 }, { "action_loss": 0.024, "epoch": 0.7755946225439504, "learning_rate": 1.9970120370332876e-05, "llm_loss": 0.0, "loss": 0.024, "step": 8250 }, { "action_loss": 0.0333, "epoch": 0.7802951960139137, "learning_rate": 1.9969607508586688e-05, "llm_loss": 0.0, "loss": 0.0333, "step": 8300 }, { "action_loss": 0.0292, "epoch": 0.7849957694838771, "learning_rate": 1.996909028946561e-05, "llm_loss": 0.0, "loss": 0.0292, "step": 8350 }, { "action_loss": 0.0231, "epoch": 0.7896963429538404, "learning_rate": 1.99685687131957e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 8400 }, { "action_loss": 0.033, "epoch": 0.7943969164238037, "learning_rate": 1.9968042780004917e-05, "llm_loss": 0.0, "loss": 0.033, "step": 8450 }, { "action_loss": 0.0161, "epoch": 0.799097489893767, "learning_rate": 1.9967512490123134e-05, "llm_loss": 0.0, "loss": 0.0161, "step": 8500 }, { "action_loss": 0.0244, "epoch": 0.8037980633637304, "learning_rate": 1.9966977843782117e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 8550 }, { "action_loss": 0.0166, "epoch": 0.8084986368336937, "learning_rate": 1.9966438841215545e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 8600 }, { "action_loss": 0.0232, "epoch": 0.813199210303657, "learning_rate": 1.9965895482659e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 8650 }, { "action_loss": 0.0451, "epoch": 0.8178997837736204, "learning_rate": 1.9965347768349958e-05, "llm_loss": 0.0, "loss": 0.0451, "step": 8700 }, { "action_loss": 0.0363, "epoch": 0.8226003572435837, "learning_rate": 1.9964795698527816e-05, "llm_loss": 0.0, "loss": 0.0363, "step": 8750 }, { "action_loss": 0.0262, "epoch": 0.827300930713547, "learning_rate": 1.9964239273433854e-05, "llm_loss": 0.0, "loss": 0.0262, "step": 8800 }, { "action_loss": 0.0202, "epoch": 0.8320015041835104, "learning_rate": 1.9963678493311273e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 8850 }, { "action_loss": 0.0242, "epoch": 0.8367020776534737, "learning_rate": 1.9963113358405174e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 8900 }, { "action_loss": 0.0236, "epoch": 0.841402651123437, "learning_rate": 1.9962543868962547e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 8950 }, { "action_loss": 0.0238, "epoch": 0.8461032245934004, "learning_rate": 1.9961970025232306e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 9000 }, { "action_loss": 0.0224, "epoch": 0.8508037980633637, "learning_rate": 1.996139182746526e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 9050 }, { "action_loss": 0.0323, "epoch": 0.855504371533327, "learning_rate": 1.996080927591411e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 9100 }, { "action_loss": 0.0378, "epoch": 0.8602049450032904, "learning_rate": 1.9960222370833477e-05, "llm_loss": 0.0, "loss": 0.0378, "step": 9150 }, { "action_loss": 0.0297, "epoch": 0.8649055184732537, "learning_rate": 1.9959631112479876e-05, "llm_loss": 0.0, "loss": 0.0297, "step": 9200 }, { "action_loss": 0.0169, "epoch": 0.869606091943217, "learning_rate": 1.995903550111172e-05, "llm_loss": 0.0, "loss": 0.0169, "step": 9250 }, { "action_loss": 0.0347, "epoch": 0.8743066654131804, "learning_rate": 1.995843553698934e-05, "llm_loss": 0.0, "loss": 0.0347, "step": 9300 }, { "action_loss": 0.033, "epoch": 0.8790072388831437, "learning_rate": 1.9957831220374953e-05, "llm_loss": 0.0, "loss": 0.033, "step": 9350 }, { "action_loss": 0.0334, "epoch": 0.883707812353107, "learning_rate": 1.9957222551532686e-05, "llm_loss": 0.0, "loss": 0.0334, "step": 9400 }, { "action_loss": 0.016, "epoch": 0.8884083858230705, "learning_rate": 1.9956609530728574e-05, "llm_loss": 0.0, "loss": 0.016, "step": 9450 }, { "action_loss": 0.0181, "epoch": 0.8931089592930338, "learning_rate": 1.9955992158230533e-05, "llm_loss": 0.0, "loss": 0.0181, "step": 9500 }, { "action_loss": 0.0166, "epoch": 0.8978095327629971, "learning_rate": 1.995537043430841e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 9550 }, { "action_loss": 0.0258, "epoch": 0.9025101062329605, "learning_rate": 1.9954744359233935e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 9600 }, { "action_loss": 0.0239, "epoch": 0.9072106797029238, "learning_rate": 1.9954113933280737e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 9650 }, { "action_loss": 0.0201, "epoch": 0.9119112531728871, "learning_rate": 1.995347915672436e-05, "llm_loss": 0.0, "loss": 0.0201, "step": 9700 }, { "action_loss": 0.0355, "epoch": 0.9166118266428505, "learning_rate": 1.9952840029842244e-05, "llm_loss": 0.0, "loss": 0.0355, "step": 9750 }, { "action_loss": 0.0218, "epoch": 0.9213124001128138, "learning_rate": 1.995219655291373e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 9800 }, { "action_loss": 0.0167, "epoch": 0.9260129735827771, "learning_rate": 1.9951548726220057e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 9850 }, { "action_loss": 0.0104, "epoch": 0.9307135470527405, "learning_rate": 1.9950896550044366e-05, "llm_loss": 0.0, "loss": 0.0104, "step": 9900 }, { "action_loss": 0.0298, "epoch": 0.9354141205227038, "learning_rate": 1.9950240024671705e-05, "llm_loss": 0.0, "loss": 0.0298, "step": 9950 }, { "action_loss": 0.0425, "epoch": 0.9401146939926671, "learning_rate": 1.9949579150389015e-05, "llm_loss": 0.0, "loss": 0.0425, "step": 10000 }, { "action_loss": 0.0172, "epoch": 0.9448152674626304, "learning_rate": 1.9948913927485147e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 10050 }, { "action_loss": 0.019, "epoch": 0.9495158409325938, "learning_rate": 1.9948244356250847e-05, "llm_loss": 0.0, "loss": 0.019, "step": 10100 }, { "action_loss": 0.0234, "epoch": 0.9542164144025571, "learning_rate": 1.994757043697875e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 10150 }, { "action_loss": 0.024, "epoch": 0.9589169878725204, "learning_rate": 1.994689216996342e-05, "llm_loss": 0.0, "loss": 0.024, "step": 10200 }, { "action_loss": 0.013, "epoch": 0.9636175613424838, "learning_rate": 1.9946209555501293e-05, "llm_loss": 0.0, "loss": 0.013, "step": 10250 }, { "action_loss": 0.0136, "epoch": 0.9683181348124471, "learning_rate": 1.994552259389072e-05, "llm_loss": 0.0, "loss": 0.0136, "step": 10300 }, { "action_loss": 0.0299, "epoch": 0.9730187082824104, "learning_rate": 1.994483128543195e-05, "llm_loss": 0.0, "loss": 0.0299, "step": 10350 }, { "action_loss": 0.0218, "epoch": 0.9777192817523738, "learning_rate": 1.9944135630427127e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 10400 }, { "action_loss": 0.0352, "epoch": 0.9824198552223371, "learning_rate": 1.99434356291803e-05, "llm_loss": 0.0, "loss": 0.0352, "step": 10450 }, { "action_loss": 0.0288, "epoch": 0.9871204286923004, "learning_rate": 1.9942731281997413e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 10500 }, { "action_loss": 0.0239, "epoch": 0.9918210021622638, "learning_rate": 1.9942022589186316e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 10550 }, { "action_loss": 0.0397, "epoch": 0.9965215756322271, "learning_rate": 1.9941309551056755e-05, "llm_loss": 0.0, "loss": 0.0397, "step": 10600 }, { "action_loss": 0.029, "epoch": 1.0012221491021904, "learning_rate": 1.9940592167920367e-05, "llm_loss": 0.0, "loss": 0.029, "step": 10650 }, { "action_loss": 0.0201, "epoch": 1.0059227225721539, "learning_rate": 1.9939870440090706e-05, "llm_loss": 0.0, "loss": 0.0201, "step": 10700 }, { "action_loss": 0.0262, "epoch": 1.010623296042117, "learning_rate": 1.9939144367883205e-05, "llm_loss": 0.0, "loss": 0.0262, "step": 10750 }, { "action_loss": 0.0242, "epoch": 1.0153238695120805, "learning_rate": 1.9938413951615212e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 10800 }, { "action_loss": 0.0271, "epoch": 1.0200244429820438, "learning_rate": 1.9937679191605964e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 10850 }, { "action_loss": 0.0362, "epoch": 1.0247250164520072, "learning_rate": 1.9936940088176602e-05, "llm_loss": 0.0, "loss": 0.0362, "step": 10900 }, { "action_loss": 0.0175, "epoch": 1.0294255899219704, "learning_rate": 1.9936196641650165e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 10950 }, { "action_loss": 0.0289, "epoch": 1.0341261633919339, "learning_rate": 1.993544885235158e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 11000 }, { "action_loss": 0.0359, "epoch": 1.038826736861897, "learning_rate": 1.9934696720607682e-05, "llm_loss": 0.0, "loss": 0.0359, "step": 11050 }, { "action_loss": 0.0417, "epoch": 1.0435273103318605, "learning_rate": 1.9933940246747208e-05, "llm_loss": 0.0, "loss": 0.0417, "step": 11100 }, { "action_loss": 0.0157, "epoch": 1.0482278838018237, "learning_rate": 1.9933179431100783e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 11150 }, { "action_loss": 0.0349, "epoch": 1.0529284572717872, "learning_rate": 1.9932414274000932e-05, "llm_loss": 0.0, "loss": 0.0349, "step": 11200 }, { "action_loss": 0.0255, "epoch": 1.0576290307417504, "learning_rate": 1.9931644775782085e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 11250 }, { "action_loss": 0.0268, "epoch": 1.0623296042117139, "learning_rate": 1.9930870936780557e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 11300 }, { "action_loss": 0.0217, "epoch": 1.067030177681677, "learning_rate": 1.993009275733457e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 11350 }, { "action_loss": 0.025, "epoch": 1.0717307511516405, "learning_rate": 1.992931023778424e-05, "llm_loss": 0.0, "loss": 0.025, "step": 11400 }, { "action_loss": 0.0145, "epoch": 1.0764313246216037, "learning_rate": 1.9928523378471573e-05, "llm_loss": 0.0, "loss": 0.0145, "step": 11450 }, { "action_loss": 0.0151, "epoch": 1.0811318980915672, "learning_rate": 1.992773217974049e-05, "llm_loss": 0.0, "loss": 0.0151, "step": 11500 }, { "action_loss": 0.0174, "epoch": 1.0858324715615306, "learning_rate": 1.9926936641936786e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 11550 }, { "action_loss": 0.0189, "epoch": 1.0905330450314938, "learning_rate": 1.9926136765408167e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 11600 }, { "action_loss": 0.0252, "epoch": 1.095233618501457, "learning_rate": 1.9925332550504234e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 11650 }, { "action_loss": 0.0089, "epoch": 1.0999341919714205, "learning_rate": 1.9924523997576484e-05, "llm_loss": 0.0, "loss": 0.0089, "step": 11700 }, { "action_loss": 0.0335, "epoch": 1.104634765441384, "learning_rate": 1.99237111069783e-05, "llm_loss": 0.0, "loss": 0.0335, "step": 11750 }, { "action_loss": 0.0239, "epoch": 1.1093353389113472, "learning_rate": 1.9922893879064978e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 11800 }, { "action_loss": 0.0206, "epoch": 1.1140359123813106, "learning_rate": 1.9922072314193692e-05, "llm_loss": 0.0, "loss": 0.0206, "step": 11850 }, { "action_loss": 0.0334, "epoch": 1.1187364858512738, "learning_rate": 1.9921246412723523e-05, "llm_loss": 0.0, "loss": 0.0334, "step": 11900 }, { "action_loss": 0.0254, "epoch": 1.1234370593212373, "learning_rate": 1.9920416175015445e-05, "llm_loss": 0.0, "loss": 0.0254, "step": 11950 }, { "action_loss": 0.0222, "epoch": 1.1281376327912005, "learning_rate": 1.991958160143233e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 12000 }, { "action_loss": 0.0202, "epoch": 1.132838206261164, "learning_rate": 1.991874269233893e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 12050 }, { "action_loss": 0.0167, "epoch": 1.1375387797311272, "learning_rate": 1.991789944810192e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 12100 }, { "action_loss": 0.0313, "epoch": 1.1422393532010906, "learning_rate": 1.991705186908984e-05, "llm_loss": 0.0, "loss": 0.0313, "step": 12150 }, { "action_loss": 0.0241, "epoch": 1.1469399266710538, "learning_rate": 1.991619995567314e-05, "llm_loss": 0.0, "loss": 0.0241, "step": 12200 }, { "action_loss": 0.0253, "epoch": 1.1516405001410173, "learning_rate": 1.991534370822417e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 12250 }, { "action_loss": 0.0305, "epoch": 1.1563410736109805, "learning_rate": 1.991448312711716e-05, "llm_loss": 0.0, "loss": 0.0305, "step": 12300 }, { "action_loss": 0.0318, "epoch": 1.161041647080944, "learning_rate": 1.991361821272824e-05, "llm_loss": 0.0, "loss": 0.0318, "step": 12350 }, { "action_loss": 0.0331, "epoch": 1.1657422205509071, "learning_rate": 1.991274896543544e-05, "llm_loss": 0.0, "loss": 0.0331, "step": 12400 }, { "action_loss": 0.0223, "epoch": 1.1704427940208706, "learning_rate": 1.991187538561867e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 12450 }, { "action_loss": 0.0158, "epoch": 1.1751433674908338, "learning_rate": 1.991099747365975e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 12500 }, { "action_loss": 0.0411, "epoch": 1.1798439409607973, "learning_rate": 1.9910115229942383e-05, "llm_loss": 0.0, "loss": 0.0411, "step": 12550 }, { "action_loss": 0.0214, "epoch": 1.1845445144307605, "learning_rate": 1.990922865485216e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 12600 }, { "action_loss": 0.0157, "epoch": 1.189245087900724, "learning_rate": 1.9908337748776585e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 12650 }, { "action_loss": 0.026, "epoch": 1.1939456613706871, "learning_rate": 1.9907442512105034e-05, "llm_loss": 0.0, "loss": 0.026, "step": 12700 }, { "action_loss": 0.0253, "epoch": 1.1986462348406506, "learning_rate": 1.9906542945228787e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 12750 }, { "action_loss": 0.0228, "epoch": 1.2033468083106138, "learning_rate": 1.990563904854102e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 12800 }, { "action_loss": 0.0214, "epoch": 1.2080473817805772, "learning_rate": 1.9904730822436786e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 12850 }, { "action_loss": 0.0277, "epoch": 1.2127479552505407, "learning_rate": 1.9903818267313037e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 12900 }, { "action_loss": 0.0281, "epoch": 1.217448528720504, "learning_rate": 1.9902901383568633e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 12950 }, { "action_loss": 0.0136, "epoch": 1.2221491021904671, "learning_rate": 1.9901980171604303e-05, "llm_loss": 0.0, "loss": 0.0136, "step": 13000 }, { "action_loss": 0.0295, "epoch": 1.2268496756604306, "learning_rate": 1.990105463182268e-05, "llm_loss": 0.0, "loss": 0.0295, "step": 13050 }, { "action_loss": 0.0497, "epoch": 1.231550249130394, "learning_rate": 1.9900124764628285e-05, "llm_loss": 0.0, "loss": 0.0497, "step": 13100 }, { "action_loss": 0.0278, "epoch": 1.2362508226003572, "learning_rate": 1.9899190570427535e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 13150 }, { "action_loss": 0.0197, "epoch": 1.2409513960703205, "learning_rate": 1.9898252049628725e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 13200 }, { "action_loss": 0.0443, "epoch": 1.245651969540284, "learning_rate": 1.989730920264206e-05, "llm_loss": 0.0, "loss": 0.0443, "step": 13250 }, { "action_loss": 0.0259, "epoch": 1.2503525430102473, "learning_rate": 1.989636202987962e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 13300 }, { "action_loss": 0.0284, "epoch": 1.2550531164802106, "learning_rate": 1.989541053175539e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 13350 }, { "action_loss": 0.0192, "epoch": 1.2597536899501738, "learning_rate": 1.9894454708685225e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 13400 }, { "action_loss": 0.0298, "epoch": 1.2644542634201372, "learning_rate": 1.9893494561086895e-05, "llm_loss": 0.0, "loss": 0.0298, "step": 13450 }, { "action_loss": 0.0252, "epoch": 1.2691548368901007, "learning_rate": 1.989253008938004e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 13500 }, { "action_loss": 0.02, "epoch": 1.2738554103600639, "learning_rate": 1.9891561293986197e-05, "llm_loss": 0.0, "loss": 0.02, "step": 13550 }, { "action_loss": 0.0272, "epoch": 1.2785559838300273, "learning_rate": 1.98905881753288e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 13600 }, { "action_loss": 0.0338, "epoch": 1.2832565572999906, "learning_rate": 1.988961073383316e-05, "llm_loss": 0.0, "loss": 0.0338, "step": 13650 }, { "action_loss": 0.0284, "epoch": 1.287957130769954, "learning_rate": 1.9888628969926485e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 13700 }, { "action_loss": 0.0279, "epoch": 1.2926577042399172, "learning_rate": 1.9887642884037876e-05, "llm_loss": 0.0, "loss": 0.0279, "step": 13750 }, { "action_loss": 0.0256, "epoch": 1.2973582777098807, "learning_rate": 1.9886652476598306e-05, "llm_loss": 0.0, "loss": 0.0256, "step": 13800 }, { "action_loss": 0.0267, "epoch": 1.3020588511798439, "learning_rate": 1.9885657748040655e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 13850 }, { "action_loss": 0.0264, "epoch": 1.3067594246498073, "learning_rate": 1.988465869879969e-05, "llm_loss": 0.0, "loss": 0.0264, "step": 13900 }, { "action_loss": 0.0191, "epoch": 1.3114599981197705, "learning_rate": 1.988365532931205e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 13950 }, { "action_loss": 0.0257, "epoch": 1.316160571589734, "learning_rate": 1.988264764001628e-05, "llm_loss": 0.0, "loss": 0.0257, "step": 14000 }, { "action_loss": 0.0294, "epoch": 1.3208611450596972, "learning_rate": 1.9881635631352808e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 14050 }, { "action_loss": 0.0159, "epoch": 1.3255617185296606, "learning_rate": 1.9880619303763944e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 14100 }, { "action_loss": 0.0251, "epoch": 1.3302622919996239, "learning_rate": 1.9879598657693894e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 14150 }, { "action_loss": 0.016, "epoch": 1.3349628654695873, "learning_rate": 1.9878573693588744e-05, "llm_loss": 0.0, "loss": 0.016, "step": 14200 }, { "action_loss": 0.0223, "epoch": 1.3396634389395508, "learning_rate": 1.9877544411896474e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 14250 }, { "action_loss": 0.0291, "epoch": 1.344364012409514, "learning_rate": 1.9876510813066946e-05, "llm_loss": 0.0, "loss": 0.0291, "step": 14300 }, { "action_loss": 0.0285, "epoch": 1.3490645858794772, "learning_rate": 1.9875472897551913e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 14350 }, { "action_loss": 0.0352, "epoch": 1.3537651593494406, "learning_rate": 1.9874430665805006e-05, "llm_loss": 0.0, "loss": 0.0352, "step": 14400 }, { "action_loss": 0.0234, "epoch": 1.358465732819404, "learning_rate": 1.987338411828176e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 14450 }, { "action_loss": 0.0286, "epoch": 1.3631663062893673, "learning_rate": 1.9872333255439573e-05, "llm_loss": 0.0, "loss": 0.0286, "step": 14500 }, { "action_loss": 0.0089, "epoch": 1.3678668797593305, "learning_rate": 1.987127807773775e-05, "llm_loss": 0.0, "loss": 0.0089, "step": 14550 }, { "action_loss": 0.0181, "epoch": 1.372567453229294, "learning_rate": 1.987021858563747e-05, "llm_loss": 0.0, "loss": 0.0181, "step": 14600 }, { "action_loss": 0.0198, "epoch": 1.3772680266992574, "learning_rate": 1.98691547796018e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 14650 }, { "action_loss": 0.0288, "epoch": 1.3819686001692206, "learning_rate": 1.9868086660095695e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 14700 }, { "action_loss": 0.0168, "epoch": 1.3866691736391838, "learning_rate": 1.9867014227585992e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 14750 }, { "action_loss": 0.0213, "epoch": 1.3913697471091473, "learning_rate": 1.9865937482541416e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 14800 }, { "action_loss": 0.0158, "epoch": 1.3960703205791107, "learning_rate": 1.9864856425432574e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 14850 }, { "action_loss": 0.0387, "epoch": 1.400770894049074, "learning_rate": 1.986377105673196e-05, "llm_loss": 0.0, "loss": 0.0387, "step": 14900 }, { "action_loss": 0.0278, "epoch": 1.4054714675190374, "learning_rate": 1.9862681376913954e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 14950 }, { "action_loss": 0.0278, "epoch": 1.4101720409890006, "learning_rate": 1.9861587386454812e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 15000 }, { "action_loss": 0.0227, "epoch": 1.414872614458964, "learning_rate": 1.9860489085832685e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 15050 }, { "action_loss": 0.0258, "epoch": 1.4195731879289273, "learning_rate": 1.98593864755276e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 15100 }, { "action_loss": 0.0287, "epoch": 1.4242737613988907, "learning_rate": 1.9858279556021475e-05, "llm_loss": 0.0, "loss": 0.0287, "step": 15150 }, { "action_loss": 0.019, "epoch": 1.428974334868854, "learning_rate": 1.9857168327798098e-05, "llm_loss": 0.0, "loss": 0.019, "step": 15200 }, { "action_loss": 0.0198, "epoch": 1.4336749083388174, "learning_rate": 1.9856052791343153e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 15250 }, { "action_loss": 0.0163, "epoch": 1.4383754818087806, "learning_rate": 1.9854932947144208e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 15300 }, { "action_loss": 0.0132, "epoch": 1.443076055278744, "learning_rate": 1.9853808795690704e-05, "llm_loss": 0.0, "loss": 0.0132, "step": 15350 }, { "action_loss": 0.0341, "epoch": 1.4477766287487073, "learning_rate": 1.9852680337473965e-05, "llm_loss": 0.0, "loss": 0.0341, "step": 15400 }, { "action_loss": 0.0166, "epoch": 1.4524772022186707, "learning_rate": 1.985154757298721e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 15450 }, { "action_loss": 0.0286, "epoch": 1.457177775688634, "learning_rate": 1.985041050272553e-05, "llm_loss": 0.0, "loss": 0.0286, "step": 15500 }, { "action_loss": 0.0213, "epoch": 1.4618783491585974, "learning_rate": 1.9849269127185892e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 15550 }, { "action_loss": 0.0262, "epoch": 1.4665789226285608, "learning_rate": 1.984812344686716e-05, "llm_loss": 0.0, "loss": 0.0262, "step": 15600 }, { "action_loss": 0.0211, "epoch": 1.471279496098524, "learning_rate": 1.984697346227007e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 15650 }, { "action_loss": 0.0186, "epoch": 1.4759800695684873, "learning_rate": 1.9845819173897237e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 15700 }, { "action_loss": 0.0221, "epoch": 1.4806806430384507, "learning_rate": 1.984466058225317e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 15750 }, { "action_loss": 0.0327, "epoch": 1.4853812165084141, "learning_rate": 1.984349768784424e-05, "llm_loss": 0.0, "loss": 0.0327, "step": 15800 }, { "action_loss": 0.0398, "epoch": 1.4900817899783774, "learning_rate": 1.9842330491178712e-05, "llm_loss": 0.0, "loss": 0.0398, "step": 15850 }, { "action_loss": 0.0268, "epoch": 1.4947823634483406, "learning_rate": 1.9841158992766735e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 15900 }, { "action_loss": 0.0183, "epoch": 1.499482936918304, "learning_rate": 1.9839983193120317e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 15950 }, { "action_loss": 0.0253, "epoch": 1.5041835103882675, "learning_rate": 1.983880309275337e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 16000 }, { "action_loss": 0.0177, "epoch": 1.5088840838582307, "learning_rate": 1.9837618692181674e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 16050 }, { "action_loss": 0.0129, "epoch": 1.513584657328194, "learning_rate": 1.9836429991922884e-05, "llm_loss": 0.0, "loss": 0.0129, "step": 16100 }, { "action_loss": 0.025, "epoch": 1.5182852307981574, "learning_rate": 1.983523699249655e-05, "llm_loss": 0.0, "loss": 0.025, "step": 16150 }, { "action_loss": 0.0158, "epoch": 1.5229858042681208, "learning_rate": 1.9834039694424082e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 16200 }, { "action_loss": 0.025, "epoch": 1.527686377738084, "learning_rate": 1.9832838098228786e-05, "llm_loss": 0.0, "loss": 0.025, "step": 16250 }, { "action_loss": 0.0246, "epoch": 1.5323869512080472, "learning_rate": 1.9831632204435833e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 16300 }, { "action_loss": 0.0285, "epoch": 1.5370875246780107, "learning_rate": 1.9830422013572283e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 16350 }, { "action_loss": 0.0408, "epoch": 1.5417880981479741, "learning_rate": 1.9829207526167064e-05, "llm_loss": 0.0, "loss": 0.0408, "step": 16400 }, { "action_loss": 0.0256, "epoch": 1.5464886716179373, "learning_rate": 1.982798874275099e-05, "llm_loss": 0.0, "loss": 0.0256, "step": 16450 }, { "action_loss": 0.0246, "epoch": 1.5511892450879006, "learning_rate": 1.9826765663856745e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 16500 }, { "action_loss": 0.0364, "epoch": 1.555889818557864, "learning_rate": 1.9825538290018903e-05, "llm_loss": 0.0, "loss": 0.0364, "step": 16550 }, { "action_loss": 0.0095, "epoch": 1.5605903920278275, "learning_rate": 1.9824306621773902e-05, "llm_loss": 0.0, "loss": 0.0095, "step": 16600 }, { "action_loss": 0.0318, "epoch": 1.5652909654977907, "learning_rate": 1.9823070659660062e-05, "llm_loss": 0.0, "loss": 0.0318, "step": 16650 }, { "action_loss": 0.0163, "epoch": 1.5699915389677541, "learning_rate": 1.9821830404217583e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 16700 }, { "action_loss": 0.03, "epoch": 1.5746921124377176, "learning_rate": 1.9820585855988535e-05, "llm_loss": 0.0, "loss": 0.03, "step": 16750 }, { "action_loss": 0.0473, "epoch": 1.5793926859076808, "learning_rate": 1.9819337015516867e-05, "llm_loss": 0.0, "loss": 0.0473, "step": 16800 }, { "action_loss": 0.0481, "epoch": 1.584093259377644, "learning_rate": 1.981808388334841e-05, "llm_loss": 0.0, "loss": 0.0481, "step": 16850 }, { "action_loss": 0.0205, "epoch": 1.5887938328476074, "learning_rate": 1.9816826460030855e-05, "llm_loss": 0.0, "loss": 0.0205, "step": 16900 }, { "action_loss": 0.025, "epoch": 1.5934944063175709, "learning_rate": 1.981556474611379e-05, "llm_loss": 0.0, "loss": 0.025, "step": 16950 }, { "action_loss": 0.0149, "epoch": 1.598194979787534, "learning_rate": 1.9814298742148665e-05, "llm_loss": 0.0, "loss": 0.0149, "step": 17000 }, { "action_loss": 0.0153, "epoch": 1.6028955532574973, "learning_rate": 1.9813028448688797e-05, "llm_loss": 0.0, "loss": 0.0153, "step": 17050 }, { "action_loss": 0.0258, "epoch": 1.6075961267274608, "learning_rate": 1.98117538662894e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 17100 }, { "action_loss": 0.022, "epoch": 1.6122967001974242, "learning_rate": 1.9810474995507545e-05, "llm_loss": 0.0, "loss": 0.022, "step": 17150 }, { "action_loss": 0.0168, "epoch": 1.6169972736673874, "learning_rate": 1.9809191836902185e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 17200 }, { "action_loss": 0.0258, "epoch": 1.6216978471373507, "learning_rate": 1.980790439103414e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 17250 }, { "action_loss": 0.0156, "epoch": 1.626398420607314, "learning_rate": 1.980661265846611e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 17300 }, { "action_loss": 0.0218, "epoch": 1.6310989940772775, "learning_rate": 1.9805316639762676e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 17350 }, { "action_loss": 0.0126, "epoch": 1.6357995675472408, "learning_rate": 1.980401633549027e-05, "llm_loss": 0.0, "loss": 0.0126, "step": 17400 }, { "action_loss": 0.0254, "epoch": 1.640500141017204, "learning_rate": 1.9802711746217222e-05, "llm_loss": 0.0, "loss": 0.0254, "step": 17450 }, { "action_loss": 0.0337, "epoch": 1.6452007144871674, "learning_rate": 1.9801402872513713e-05, "llm_loss": 0.0, "loss": 0.0337, "step": 17500 }, { "action_loss": 0.0263, "epoch": 1.6499012879571309, "learning_rate": 1.9800089714951817e-05, "llm_loss": 0.0, "loss": 0.0263, "step": 17550 }, { "action_loss": 0.0298, "epoch": 1.654601861427094, "learning_rate": 1.9798772274105463e-05, "llm_loss": 0.0, "loss": 0.0298, "step": 17600 }, { "action_loss": 0.0205, "epoch": 1.6593024348970573, "learning_rate": 1.9797450550550465e-05, "llm_loss": 0.0, "loss": 0.0205, "step": 17650 }, { "action_loss": 0.0315, "epoch": 1.6640030083670208, "learning_rate": 1.9796124544864498e-05, "llm_loss": 0.0, "loss": 0.0315, "step": 17700 }, { "action_loss": 0.0219, "epoch": 1.6687035818369842, "learning_rate": 1.9794794257627117e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 17750 }, { "action_loss": 0.0267, "epoch": 1.6734041553069474, "learning_rate": 1.9793459689419744e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 17800 }, { "action_loss": 0.0186, "epoch": 1.6781047287769106, "learning_rate": 1.9792120840825674e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 17850 }, { "action_loss": 0.0227, "epoch": 1.682805302246874, "learning_rate": 1.979077771243007e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 17900 }, { "action_loss": 0.0345, "epoch": 1.6875058757168375, "learning_rate": 1.978943030481997e-05, "llm_loss": 0.0, "loss": 0.0345, "step": 17950 }, { "action_loss": 0.0345, "epoch": 1.6922064491868007, "learning_rate": 1.978807861858428e-05, "llm_loss": 0.0, "loss": 0.0345, "step": 18000 }, { "action_loss": 0.0277, "epoch": 1.696907022656764, "learning_rate": 1.9786722654313773e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 18050 }, { "action_loss": 0.03, "epoch": 1.7016075961267274, "learning_rate": 1.9785362412601097e-05, "llm_loss": 0.0, "loss": 0.03, "step": 18100 }, { "action_loss": 0.0129, "epoch": 1.7063081695966908, "learning_rate": 1.9783997894040766e-05, "llm_loss": 0.0, "loss": 0.0129, "step": 18150 }, { "action_loss": 0.024, "epoch": 1.711008743066654, "learning_rate": 1.9782629099229166e-05, "llm_loss": 0.0, "loss": 0.024, "step": 18200 }, { "action_loss": 0.0255, "epoch": 1.7157093165366175, "learning_rate": 1.9781256028764547e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 18250 }, { "action_loss": 0.0194, "epoch": 1.720409890006581, "learning_rate": 1.9779878683247038e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 18300 }, { "action_loss": 0.0227, "epoch": 1.7251104634765442, "learning_rate": 1.9778497063278622e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 18350 }, { "action_loss": 0.0186, "epoch": 1.7298110369465074, "learning_rate": 1.9777111169463162e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 18400 }, { "action_loss": 0.0157, "epoch": 1.7345116104164708, "learning_rate": 1.9775721002406385e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 18450 }, { "action_loss": 0.0273, "epoch": 1.7392121838864343, "learning_rate": 1.9774326562715885e-05, "llm_loss": 0.0, "loss": 0.0273, "step": 18500 }, { "action_loss": 0.0357, "epoch": 1.7439127573563975, "learning_rate": 1.977292785100112e-05, "llm_loss": 0.0, "loss": 0.0357, "step": 18550 }, { "action_loss": 0.0151, "epoch": 1.7486133308263607, "learning_rate": 1.9771524867873428e-05, "llm_loss": 0.0, "loss": 0.0151, "step": 18600 }, { "action_loss": 0.0285, "epoch": 1.7533139042963242, "learning_rate": 1.9770117613945996e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 18650 }, { "action_loss": 0.0288, "epoch": 1.7580144777662876, "learning_rate": 1.976870608983389e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 18700 }, { "action_loss": 0.0254, "epoch": 1.7627150512362508, "learning_rate": 1.9767290296154044e-05, "llm_loss": 0.0, "loss": 0.0254, "step": 18750 }, { "action_loss": 0.0213, "epoch": 1.767415624706214, "learning_rate": 1.9765870233525243e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 18800 }, { "action_loss": 0.0191, "epoch": 1.7721161981761775, "learning_rate": 1.9764445902568154e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 18850 }, { "action_loss": 0.0243, "epoch": 1.776816771646141, "learning_rate": 1.9763017303905305e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 18900 }, { "action_loss": 0.0175, "epoch": 1.7815173451161042, "learning_rate": 1.9761584438161084e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 18950 }, { "action_loss": 0.0259, "epoch": 1.7862179185860674, "learning_rate": 1.9760147305961747e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 19000 }, { "action_loss": 0.0225, "epoch": 1.7909184920560308, "learning_rate": 1.975870590793542e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 19050 }, { "action_loss": 0.0317, "epoch": 1.7956190655259943, "learning_rate": 1.9757260244712086e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 19100 }, { "action_loss": 0.0228, "epoch": 1.8003196389959575, "learning_rate": 1.9755810316923597e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 19150 }, { "action_loss": 0.0133, "epoch": 1.8050202124659207, "learning_rate": 1.9754356125203658e-05, "llm_loss": 0.0, "loss": 0.0133, "step": 19200 }, { "action_loss": 0.0192, "epoch": 1.8097207859358841, "learning_rate": 1.975289767018786e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 19250 }, { "action_loss": 0.0142, "epoch": 1.8144213594058476, "learning_rate": 1.9751434952513637e-05, "llm_loss": 0.0, "loss": 0.0142, "step": 19300 }, { "action_loss": 0.0382, "epoch": 1.8191219328758108, "learning_rate": 1.9749967972820293e-05, "llm_loss": 0.0, "loss": 0.0382, "step": 19350 }, { "action_loss": 0.0285, "epoch": 1.823822506345774, "learning_rate": 1.9748496731748993e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 19400 }, { "action_loss": 0.0277, "epoch": 1.8285230798157375, "learning_rate": 1.974702122994277e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 19450 }, { "action_loss": 0.0185, "epoch": 1.833223653285701, "learning_rate": 1.9745541468046515e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 19500 }, { "action_loss": 0.0156, "epoch": 1.8379242267556641, "learning_rate": 1.9744057446706977e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 19550 }, { "action_loss": 0.0347, "epoch": 1.8426248002256276, "learning_rate": 1.9742569166572778e-05, "llm_loss": 0.0, "loss": 0.0348, "step": 19600 }, { "action_loss": 0.0253, "epoch": 1.847325373695591, "learning_rate": 1.9741076628294387e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 19650 }, { "action_loss": 0.0289, "epoch": 1.8520259471655542, "learning_rate": 1.9739579832524146e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 19700 }, { "action_loss": 0.0244, "epoch": 1.8567265206355175, "learning_rate": 1.9738078779916255e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 19750 }, { "action_loss": 0.0304, "epoch": 1.861427094105481, "learning_rate": 1.973657347112677e-05, "llm_loss": 0.0, "loss": 0.0304, "step": 19800 }, { "action_loss": 0.0309, "epoch": 1.8661276675754443, "learning_rate": 1.9735063906813608e-05, "llm_loss": 0.0, "loss": 0.0309, "step": 19850 }, { "action_loss": 0.0312, "epoch": 1.8708282410454076, "learning_rate": 1.973355008763655e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 19900 }, { "action_loss": 0.0269, "epoch": 1.8755288145153708, "learning_rate": 1.9732032014257236e-05, "llm_loss": 0.0, "loss": 0.0269, "step": 19950 }, { "action_loss": 0.0221, "epoch": 1.8802293879853342, "learning_rate": 1.9730509687339164e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 20000 }, { "action_loss": 0.0123, "epoch": 1.8849299614552977, "learning_rate": 1.972898310754769e-05, "llm_loss": 0.0, "loss": 0.0123, "step": 20050 }, { "action_loss": 0.0184, "epoch": 1.889630534925261, "learning_rate": 1.972745227555003e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 20100 }, { "action_loss": 0.0258, "epoch": 1.8943311083952241, "learning_rate": 1.9725917192015254e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 20150 }, { "action_loss": 0.0375, "epoch": 1.8990316818651876, "learning_rate": 1.9724377857614304e-05, "llm_loss": 0.0, "loss": 0.0375, "step": 20200 }, { "action_loss": 0.0222, "epoch": 1.903732255335151, "learning_rate": 1.9722834273019966e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 20250 }, { "action_loss": 0.0218, "epoch": 1.9084328288051142, "learning_rate": 1.9721286438906885e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 20300 }, { "action_loss": 0.0327, "epoch": 1.9131334022750774, "learning_rate": 1.971973435595157e-05, "llm_loss": 0.0, "loss": 0.0327, "step": 20350 }, { "action_loss": 0.028, "epoch": 1.9178339757450409, "learning_rate": 1.971817802483238e-05, "llm_loss": 0.0, "loss": 0.028, "step": 20400 }, { "action_loss": 0.0193, "epoch": 1.9225345492150043, "learning_rate": 1.9716617446229537e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 20450 }, { "action_loss": 0.0251, "epoch": 1.9272351226849675, "learning_rate": 1.971505262082512e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 20500 }, { "action_loss": 0.0158, "epoch": 1.9319356961549308, "learning_rate": 1.9713483549303052e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 20550 }, { "action_loss": 0.0223, "epoch": 1.9366362696248942, "learning_rate": 1.9711910232349127e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 20600 }, { "action_loss": 0.028, "epoch": 1.9413368430948577, "learning_rate": 1.9710332670650988e-05, "llm_loss": 0.0, "loss": 0.028, "step": 20650 }, { "action_loss": 0.0138, "epoch": 1.9460374165648209, "learning_rate": 1.9708750864898132e-05, "llm_loss": 0.0, "loss": 0.0138, "step": 20700 }, { "action_loss": 0.0167, "epoch": 1.950737990034784, "learning_rate": 1.970716481578191e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 20750 }, { "action_loss": 0.0217, "epoch": 1.9554385635047475, "learning_rate": 1.9705574523995532e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 20800 }, { "action_loss": 0.0177, "epoch": 1.960139136974711, "learning_rate": 1.970397999023406e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 20850 }, { "action_loss": 0.0251, "epoch": 1.9648397104446742, "learning_rate": 1.9702381215194413e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 20900 }, { "action_loss": 0.034, "epoch": 1.9695402839146374, "learning_rate": 1.9700778199575358e-05, "llm_loss": 0.0, "loss": 0.034, "step": 20950 }, { "action_loss": 0.0213, "epoch": 1.9742408573846009, "learning_rate": 1.9699170944077516e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 21000 }, { "action_loss": 0.0274, "epoch": 1.9789414308545643, "learning_rate": 1.969755944940337e-05, "llm_loss": 0.0, "loss": 0.0274, "step": 21050 }, { "action_loss": 0.0217, "epoch": 1.9836420043245275, "learning_rate": 1.9695943716257243e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 21100 }, { "action_loss": 0.0251, "epoch": 1.988342577794491, "learning_rate": 1.969432374534532e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 21150 }, { "action_loss": 0.0231, "epoch": 1.9930431512644544, "learning_rate": 1.9692699537375634e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 21200 }, { "action_loss": 0.0216, "epoch": 1.9977437247344176, "learning_rate": 1.969107109305807e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 21250 }, { "action_loss": 0.025, "epoch": 2.002444298204381, "learning_rate": 1.968943841310437e-05, "llm_loss": 0.0, "loss": 0.025, "step": 21300 }, { "action_loss": 0.0119, "epoch": 2.007144871674344, "learning_rate": 1.9687801498228114e-05, "llm_loss": 0.0, "loss": 0.0119, "step": 21350 }, { "action_loss": 0.0236, "epoch": 2.0118454451443077, "learning_rate": 1.968616034914475e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 21400 }, { "action_loss": 0.0176, "epoch": 2.016546018614271, "learning_rate": 1.9684514966571566e-05, "llm_loss": 0.0, "loss": 0.0176, "step": 21450 }, { "action_loss": 0.0191, "epoch": 2.021246592084234, "learning_rate": 1.96828653512277e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 21500 }, { "action_loss": 0.0255, "epoch": 2.0259471655541974, "learning_rate": 1.9681211503834144e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 21550 }, { "action_loss": 0.0117, "epoch": 2.030647739024161, "learning_rate": 1.9679553425113736e-05, "llm_loss": 0.0, "loss": 0.0117, "step": 21600 }, { "action_loss": 0.018, "epoch": 2.0353483124941243, "learning_rate": 1.967789111579117e-05, "llm_loss": 0.0, "loss": 0.018, "step": 21650 }, { "action_loss": 0.0375, "epoch": 2.0400488859640875, "learning_rate": 1.967622457659298e-05, "llm_loss": 0.0, "loss": 0.0375, "step": 21700 }, { "action_loss": 0.0239, "epoch": 2.044749459434051, "learning_rate": 1.9674553808247557e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 21750 }, { "action_loss": 0.0194, "epoch": 2.0494500329040144, "learning_rate": 1.9672878811485135e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 21800 }, { "action_loss": 0.0388, "epoch": 2.0541506063739776, "learning_rate": 1.96711995870378e-05, "llm_loss": 0.0, "loss": 0.0388, "step": 21850 }, { "action_loss": 0.0249, "epoch": 2.058851179843941, "learning_rate": 1.966951613563948e-05, "llm_loss": 0.0, "loss": 0.0249, "step": 21900 }, { "action_loss": 0.0258, "epoch": 2.0635517533139045, "learning_rate": 1.966782845802596e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 21950 }, { "action_loss": 0.0286, "epoch": 2.0682523267838677, "learning_rate": 1.9666136554934858e-05, "llm_loss": 0.0, "loss": 0.0286, "step": 22000 }, { "action_loss": 0.027, "epoch": 2.072952900253831, "learning_rate": 1.9664440427105654e-05, "llm_loss": 0.0, "loss": 0.027, "step": 22050 }, { "action_loss": 0.0265, "epoch": 2.077653473723794, "learning_rate": 1.9662740075279664e-05, "llm_loss": 0.0, "loss": 0.0265, "step": 22100 }, { "action_loss": 0.0263, "epoch": 2.082354047193758, "learning_rate": 1.9661035500200053e-05, "llm_loss": 0.0, "loss": 0.0263, "step": 22150 }, { "action_loss": 0.0226, "epoch": 2.087054620663721, "learning_rate": 1.9659326702611835e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 22200 }, { "action_loss": 0.0306, "epoch": 2.0917551941336843, "learning_rate": 1.9657613683261866e-05, "llm_loss": 0.0, "loss": 0.0306, "step": 22250 }, { "action_loss": 0.0385, "epoch": 2.0964557676036475, "learning_rate": 1.965589644289885e-05, "llm_loss": 0.0, "loss": 0.0385, "step": 22300 }, { "action_loss": 0.0091, "epoch": 2.101156341073611, "learning_rate": 1.9654174982273332e-05, "llm_loss": 0.0, "loss": 0.0091, "step": 22350 }, { "action_loss": 0.0289, "epoch": 2.1058569145435744, "learning_rate": 1.9652449302137705e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 22400 }, { "action_loss": 0.0095, "epoch": 2.1105574880135376, "learning_rate": 1.9650719403246203e-05, "llm_loss": 0.0, "loss": 0.0095, "step": 22450 }, { "action_loss": 0.0219, "epoch": 2.115258061483501, "learning_rate": 1.9648985286354904e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 22500 }, { "action_loss": 0.0282, "epoch": 2.1199586349534645, "learning_rate": 1.9647246952221734e-05, "llm_loss": 0.0, "loss": 0.0282, "step": 22550 }, { "action_loss": 0.0158, "epoch": 2.1246592084234277, "learning_rate": 1.9645504401606463e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 22600 }, { "action_loss": 0.038, "epoch": 2.129359781893391, "learning_rate": 1.9643757635270694e-05, "llm_loss": 0.0, "loss": 0.038, "step": 22650 }, { "action_loss": 0.0218, "epoch": 2.134060355363354, "learning_rate": 1.9642006653977885e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 22700 }, { "action_loss": 0.0289, "epoch": 2.138760928833318, "learning_rate": 1.9640251458493325e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 22750 }, { "action_loss": 0.0257, "epoch": 2.143461502303281, "learning_rate": 1.963849204958415e-05, "llm_loss": 0.0, "loss": 0.0257, "step": 22800 }, { "action_loss": 0.0377, "epoch": 2.1481620757732443, "learning_rate": 1.963672842801934e-05, "llm_loss": 0.0, "loss": 0.0377, "step": 22850 }, { "action_loss": 0.0321, "epoch": 2.1528626492432075, "learning_rate": 1.9634960594569717e-05, "llm_loss": 0.0, "loss": 0.0321, "step": 22900 }, { "action_loss": 0.0316, "epoch": 2.157563222713171, "learning_rate": 1.9633188550007933e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 22950 }, { "action_loss": 0.0158, "epoch": 2.1622637961831344, "learning_rate": 1.9631412295108494e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 23000 }, { "action_loss": 0.0284, "epoch": 2.1669643696530976, "learning_rate": 1.9629631830647736e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 23050 }, { "action_loss": 0.0216, "epoch": 2.1716649431230612, "learning_rate": 1.9627847157403842e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 23100 }, { "action_loss": 0.0192, "epoch": 2.1763655165930245, "learning_rate": 1.962605827615683e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 23150 }, { "action_loss": 0.0282, "epoch": 2.1810660900629877, "learning_rate": 1.962426518768856e-05, "llm_loss": 0.0, "loss": 0.0282, "step": 23200 }, { "action_loss": 0.0186, "epoch": 2.185766663532951, "learning_rate": 1.9622467892782732e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 23250 }, { "action_loss": 0.026, "epoch": 2.190467237002914, "learning_rate": 1.962066639222488e-05, "llm_loss": 0.0, "loss": 0.026, "step": 23300 }, { "action_loss": 0.0246, "epoch": 2.195167810472878, "learning_rate": 1.9618860686802378e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 23350 }, { "action_loss": 0.0187, "epoch": 2.199868383942841, "learning_rate": 1.9617050777304435e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 23400 }, { "action_loss": 0.0218, "epoch": 2.2045689574128042, "learning_rate": 1.9615236664522108e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 23450 }, { "action_loss": 0.0249, "epoch": 2.209269530882768, "learning_rate": 1.9613418349248276e-05, "llm_loss": 0.0, "loss": 0.0249, "step": 23500 }, { "action_loss": 0.0222, "epoch": 2.213970104352731, "learning_rate": 1.961159583227767e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 23550 }, { "action_loss": 0.0155, "epoch": 2.2186706778226943, "learning_rate": 1.9609769114406845e-05, "llm_loss": 0.0, "loss": 0.0155, "step": 23600 }, { "action_loss": 0.0225, "epoch": 2.2233712512926576, "learning_rate": 1.9607938196434198e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 23650 }, { "action_loss": 0.0347, "epoch": 2.2280718247626212, "learning_rate": 1.960610307915996e-05, "llm_loss": 0.0, "loss": 0.0347, "step": 23700 }, { "action_loss": 0.0193, "epoch": 2.2327723982325844, "learning_rate": 1.96042637633862e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 23750 }, { "action_loss": 0.028, "epoch": 2.2374729717025477, "learning_rate": 1.960242024991682e-05, "llm_loss": 0.0, "loss": 0.028, "step": 23800 }, { "action_loss": 0.0247, "epoch": 2.242173545172511, "learning_rate": 1.960057253955755e-05, "llm_loss": 0.0, "loss": 0.0247, "step": 23850 }, { "action_loss": 0.0271, "epoch": 2.2468741186424745, "learning_rate": 1.9598720633115973e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 23900 }, { "action_loss": 0.0085, "epoch": 2.2515746921124378, "learning_rate": 1.9596864531401488e-05, "llm_loss": 0.0, "loss": 0.0085, "step": 23950 }, { "action_loss": 0.0278, "epoch": 2.256275265582401, "learning_rate": 1.959500423522533e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 24000 }, { "action_loss": 0.0347, "epoch": 2.260975839052364, "learning_rate": 1.9593139745400575e-05, "llm_loss": 0.0, "loss": 0.0347, "step": 24050 }, { "action_loss": 0.0185, "epoch": 2.265676412522328, "learning_rate": 1.959127106274213e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 24100 }, { "action_loss": 0.0203, "epoch": 2.270376985992291, "learning_rate": 1.958939818806673e-05, "llm_loss": 0.0, "loss": 0.0203, "step": 24150 }, { "action_loss": 0.0129, "epoch": 2.2750775594622543, "learning_rate": 1.958752112219294e-05, "llm_loss": 0.0, "loss": 0.0129, "step": 24200 }, { "action_loss": 0.021, "epoch": 2.2797781329322175, "learning_rate": 1.958563986594117e-05, "llm_loss": 0.0, "loss": 0.021, "step": 24250 }, { "action_loss": 0.0276, "epoch": 2.284478706402181, "learning_rate": 1.9583754420133646e-05, "llm_loss": 0.0, "loss": 0.0276, "step": 24300 }, { "action_loss": 0.0339, "epoch": 2.2891792798721444, "learning_rate": 1.9581864785594433e-05, "llm_loss": 0.0, "loss": 0.0339, "step": 24350 }, { "action_loss": 0.0246, "epoch": 2.2938798533421076, "learning_rate": 1.957997096314943e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 24400 }, { "action_loss": 0.0348, "epoch": 2.2985804268120713, "learning_rate": 1.9578072953626357e-05, "llm_loss": 0.0, "loss": 0.0348, "step": 24450 }, { "action_loss": 0.0227, "epoch": 2.3032810002820345, "learning_rate": 1.9576170757854773e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 24500 }, { "action_loss": 0.0232, "epoch": 2.3079815737519978, "learning_rate": 1.957426437666606e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 24550 }, { "action_loss": 0.03, "epoch": 2.312682147221961, "learning_rate": 1.9572353810893433e-05, "llm_loss": 0.0, "loss": 0.03, "step": 24600 }, { "action_loss": 0.0354, "epoch": 2.317382720691924, "learning_rate": 1.9570439061371936e-05, "llm_loss": 0.0, "loss": 0.0354, "step": 24650 }, { "action_loss": 0.0373, "epoch": 2.322083294161888, "learning_rate": 1.956852012893844e-05, "llm_loss": 0.0, "loss": 0.0373, "step": 24700 }, { "action_loss": 0.0217, "epoch": 2.326783867631851, "learning_rate": 1.9566597014431644e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 24750 }, { "action_loss": 0.0473, "epoch": 2.3314844411018143, "learning_rate": 1.9564669718692078e-05, "llm_loss": 0.0, "loss": 0.0473, "step": 24800 }, { "action_loss": 0.0238, "epoch": 2.336185014571778, "learning_rate": 1.9562738242562097e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 24850 }, { "action_loss": 0.0244, "epoch": 2.340885588041741, "learning_rate": 1.9560802586885876e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 24900 }, { "action_loss": 0.0278, "epoch": 2.3455861615117044, "learning_rate": 1.9558862752509433e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 24950 }, { "action_loss": 0.0117, "epoch": 2.3502867349816676, "learning_rate": 1.95569187402806e-05, "llm_loss": 0.0, "loss": 0.0117, "step": 25000 }, { "action_loss": 0.0189, "epoch": 2.3549873084516313, "learning_rate": 1.9554970551049037e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 25050 }, { "action_loss": 0.0051, "epoch": 2.3596878819215945, "learning_rate": 1.9553018185666234e-05, "llm_loss": 0.0, "loss": 0.0051, "step": 25100 }, { "action_loss": 0.0247, "epoch": 2.3643884553915577, "learning_rate": 1.9551061644985503e-05, "llm_loss": 0.0, "loss": 0.0247, "step": 25150 }, { "action_loss": 0.0172, "epoch": 2.369089028861521, "learning_rate": 1.9549100929861977e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 25200 }, { "action_loss": 0.0175, "epoch": 2.3737896023314846, "learning_rate": 1.954713604115262e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 25250 }, { "action_loss": 0.0258, "epoch": 2.378490175801448, "learning_rate": 1.9545166979716217e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 25300 }, { "action_loss": 0.0315, "epoch": 2.383190749271411, "learning_rate": 1.9543193746413385e-05, "llm_loss": 0.0, "loss": 0.0315, "step": 25350 }, { "action_loss": 0.029, "epoch": 2.3878913227413743, "learning_rate": 1.9541216342106544e-05, "llm_loss": 0.0, "loss": 0.029, "step": 25400 }, { "action_loss": 0.0334, "epoch": 2.392591896211338, "learning_rate": 1.9539234767659957e-05, "llm_loss": 0.0, "loss": 0.0334, "step": 25450 }, { "action_loss": 0.0285, "epoch": 2.397292469681301, "learning_rate": 1.9537249023939703e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 25500 }, { "action_loss": 0.0195, "epoch": 2.4019930431512644, "learning_rate": 1.9535259111813682e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 25550 }, { "action_loss": 0.0124, "epoch": 2.4066936166212276, "learning_rate": 1.9533265032151615e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 25600 }, { "action_loss": 0.0272, "epoch": 2.4113941900911913, "learning_rate": 1.9531266785825046e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 25650 }, { "action_loss": 0.0145, "epoch": 2.4160947635611545, "learning_rate": 1.9529264373707346e-05, "llm_loss": 0.0, "loss": 0.0145, "step": 25700 }, { "action_loss": 0.0315, "epoch": 2.4207953370311177, "learning_rate": 1.9527257796673692e-05, "llm_loss": 0.0, "loss": 0.0315, "step": 25750 }, { "action_loss": 0.012, "epoch": 2.4254959105010814, "learning_rate": 1.9525247055601095e-05, "llm_loss": 0.0, "loss": 0.012, "step": 25800 }, { "action_loss": 0.0121, "epoch": 2.4301964839710446, "learning_rate": 1.9523232151368383e-05, "llm_loss": 0.0, "loss": 0.0121, "step": 25850 }, { "action_loss": 0.0214, "epoch": 2.434897057441008, "learning_rate": 1.95212130848562e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 25900 }, { "action_loss": 0.0152, "epoch": 2.439597630910971, "learning_rate": 1.9519189856947008e-05, "llm_loss": 0.0, "loss": 0.0152, "step": 25950 }, { "action_loss": 0.0204, "epoch": 2.4442982043809343, "learning_rate": 1.9517162468525093e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 26000 }, { "action_loss": 0.0271, "epoch": 2.448998777850898, "learning_rate": 1.9515130920476563e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 26050 }, { "action_loss": 0.0195, "epoch": 2.453699351320861, "learning_rate": 1.9513095213689327e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 26100 }, { "action_loss": 0.0251, "epoch": 2.4583999247908244, "learning_rate": 1.951105534905313e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 26150 }, { "action_loss": 0.032, "epoch": 2.463100498260788, "learning_rate": 1.950901132745953e-05, "llm_loss": 0.0, "loss": 0.032, "step": 26200 }, { "action_loss": 0.0288, "epoch": 2.4678010717307513, "learning_rate": 1.9506963149801894e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 26250 }, { "action_loss": 0.0322, "epoch": 2.4725016452007145, "learning_rate": 1.9504910816975408e-05, "llm_loss": 0.0, "loss": 0.0322, "step": 26300 }, { "action_loss": 0.0389, "epoch": 2.4772022186706777, "learning_rate": 1.9502854329877083e-05, "llm_loss": 0.0, "loss": 0.0389, "step": 26350 }, { "action_loss": 0.0279, "epoch": 2.481902792140641, "learning_rate": 1.9500793689405736e-05, "llm_loss": 0.0, "loss": 0.0279, "step": 26400 }, { "action_loss": 0.0315, "epoch": 2.4866033656106046, "learning_rate": 1.9498728896462002e-05, "llm_loss": 0.0, "loss": 0.0315, "step": 26450 }, { "action_loss": 0.0266, "epoch": 2.491303939080568, "learning_rate": 1.9496659951948332e-05, "llm_loss": 0.0, "loss": 0.0266, "step": 26500 }, { "action_loss": 0.0128, "epoch": 2.496004512550531, "learning_rate": 1.9494586856768994e-05, "llm_loss": 0.0, "loss": 0.0128, "step": 26550 }, { "action_loss": 0.0211, "epoch": 2.5007050860204947, "learning_rate": 1.9492509611830063e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 26600 }, { "action_loss": 0.0216, "epoch": 2.505405659490458, "learning_rate": 1.9490428218039432e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 26650 }, { "action_loss": 0.0221, "epoch": 2.510106232960421, "learning_rate": 1.9488342676306812e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 26700 }, { "action_loss": 0.0213, "epoch": 2.5148068064303843, "learning_rate": 1.9486252987543715e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 26750 }, { "action_loss": 0.0221, "epoch": 2.5195073799003476, "learning_rate": 1.9484159152663476e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 26800 }, { "action_loss": 0.0268, "epoch": 2.5242079533703112, "learning_rate": 1.948206117258124e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 26850 }, { "action_loss": 0.047, "epoch": 2.5289085268402745, "learning_rate": 1.947995904821396e-05, "llm_loss": 0.0, "loss": 0.047, "step": 26900 }, { "action_loss": 0.0159, "epoch": 2.5336091003102377, "learning_rate": 1.9477852780480404e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 26950 }, { "action_loss": 0.0253, "epoch": 2.5383096737802013, "learning_rate": 1.9475742370301153e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 27000 }, { "action_loss": 0.0267, "epoch": 2.5430102472501646, "learning_rate": 1.947362781859859e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 27050 }, { "action_loss": 0.0252, "epoch": 2.5477108207201278, "learning_rate": 1.947150912629691e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 27100 }, { "action_loss": 0.0156, "epoch": 2.5524113941900914, "learning_rate": 1.9469386294322135e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 27150 }, { "action_loss": 0.0117, "epoch": 2.5571119676600547, "learning_rate": 1.9467259323602067e-05, "llm_loss": 0.0, "loss": 0.0117, "step": 27200 }, { "action_loss": 0.023, "epoch": 2.561812541130018, "learning_rate": 1.9465128215066342e-05, "llm_loss": 0.0, "loss": 0.023, "step": 27250 }, { "action_loss": 0.0352, "epoch": 2.566513114599981, "learning_rate": 1.9462992969646392e-05, "llm_loss": 0.0, "loss": 0.0352, "step": 27300 }, { "action_loss": 0.0288, "epoch": 2.5712136880699443, "learning_rate": 1.9460853588275454e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 27350 }, { "action_loss": 0.0148, "epoch": 2.575914261539908, "learning_rate": 1.945871007188859e-05, "llm_loss": 0.0, "loss": 0.0148, "step": 27400 }, { "action_loss": 0.019, "epoch": 2.580614835009871, "learning_rate": 1.9456562421422648e-05, "llm_loss": 0.0, "loss": 0.019, "step": 27450 }, { "action_loss": 0.0413, "epoch": 2.5853154084798344, "learning_rate": 1.9454410637816296e-05, "llm_loss": 0.0, "loss": 0.0413, "step": 27500 }, { "action_loss": 0.0225, "epoch": 2.590015981949798, "learning_rate": 1.9452254722010008e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 27550 }, { "action_loss": 0.0249, "epoch": 2.5947165554197613, "learning_rate": 1.9450094674946056e-05, "llm_loss": 0.0, "loss": 0.0249, "step": 27600 }, { "action_loss": 0.0127, "epoch": 2.5994171288897245, "learning_rate": 1.9447930497568528e-05, "llm_loss": 0.0, "loss": 0.0127, "step": 27650 }, { "action_loss": 0.0114, "epoch": 2.6041177023596878, "learning_rate": 1.9445762190823308e-05, "llm_loss": 0.0, "loss": 0.0114, "step": 27700 }, { "action_loss": 0.0317, "epoch": 2.608818275829651, "learning_rate": 1.944358975565809e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 27750 }, { "action_loss": 0.0344, "epoch": 2.6135188492996146, "learning_rate": 1.944141319302237e-05, "llm_loss": 0.0, "loss": 0.0344, "step": 27800 }, { "action_loss": 0.0313, "epoch": 2.618219422769578, "learning_rate": 1.9439232503867455e-05, "llm_loss": 0.0, "loss": 0.0313, "step": 27850 }, { "action_loss": 0.0282, "epoch": 2.622919996239541, "learning_rate": 1.943704768914644e-05, "llm_loss": 0.0, "loss": 0.0282, "step": 27900 }, { "action_loss": 0.0258, "epoch": 2.6276205697095047, "learning_rate": 1.9434858749814244e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 27950 }, { "action_loss": 0.0219, "epoch": 2.632321143179468, "learning_rate": 1.9432665686827567e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 28000 }, { "action_loss": 0.0146, "epoch": 2.637021716649431, "learning_rate": 1.9430468501144928e-05, "llm_loss": 0.0, "loss": 0.0146, "step": 28050 }, { "action_loss": 0.0141, "epoch": 2.6417222901193944, "learning_rate": 1.942826719372664e-05, "llm_loss": 0.0, "loss": 0.0141, "step": 28100 }, { "action_loss": 0.0224, "epoch": 2.6464228635893576, "learning_rate": 1.9426061765534822e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 28150 }, { "action_loss": 0.0206, "epoch": 2.6511234370593213, "learning_rate": 1.9423852217533386e-05, "llm_loss": 0.0, "loss": 0.0206, "step": 28200 }, { "action_loss": 0.0314, "epoch": 2.6558240105292845, "learning_rate": 1.9421638550688057e-05, "llm_loss": 0.0, "loss": 0.0314, "step": 28250 }, { "action_loss": 0.022, "epoch": 2.6605245839992477, "learning_rate": 1.9419420765966345e-05, "llm_loss": 0.0, "loss": 0.022, "step": 28300 }, { "action_loss": 0.0328, "epoch": 2.6652251574692114, "learning_rate": 1.9417198864337572e-05, "llm_loss": 0.0, "loss": 0.0328, "step": 28350 }, { "action_loss": 0.0279, "epoch": 2.6699257309391746, "learning_rate": 1.941497284677286e-05, "llm_loss": 0.0, "loss": 0.0279, "step": 28400 }, { "action_loss": 0.0173, "epoch": 2.674626304409138, "learning_rate": 1.941274271424512e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 28450 }, { "action_loss": 0.0248, "epoch": 2.6793268778791015, "learning_rate": 1.9410508467729064e-05, "llm_loss": 0.0, "loss": 0.0248, "step": 28500 }, { "action_loss": 0.029, "epoch": 2.6840274513490647, "learning_rate": 1.940827010820121e-05, "llm_loss": 0.0, "loss": 0.029, "step": 28550 }, { "action_loss": 0.0185, "epoch": 2.688728024819028, "learning_rate": 1.9406027636639867e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 28600 }, { "action_loss": 0.0276, "epoch": 2.693428598288991, "learning_rate": 1.940378105402514e-05, "llm_loss": 0.0, "loss": 0.0276, "step": 28650 }, { "action_loss": 0.0284, "epoch": 2.6981291717589544, "learning_rate": 1.940153036133894e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 28700 }, { "action_loss": 0.0252, "epoch": 2.702829745228918, "learning_rate": 1.939927555956496e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 28750 }, { "action_loss": 0.0324, "epoch": 2.7075303186988813, "learning_rate": 1.9397016649688705e-05, "llm_loss": 0.0, "loss": 0.0324, "step": 28800 }, { "action_loss": 0.0221, "epoch": 2.7122308921688445, "learning_rate": 1.9394753632697464e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 28850 }, { "action_loss": 0.0181, "epoch": 2.716931465638808, "learning_rate": 1.9392486509580316e-05, "llm_loss": 0.0, "loss": 0.0181, "step": 28900 }, { "action_loss": 0.0323, "epoch": 2.7216320391087714, "learning_rate": 1.9390215281328157e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 28950 }, { "action_loss": 0.0478, "epoch": 2.7263326125787346, "learning_rate": 1.9387939948933655e-05, "llm_loss": 0.0, "loss": 0.0478, "step": 29000 }, { "action_loss": 0.0214, "epoch": 2.731033186048698, "learning_rate": 1.938566051339128e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 29050 }, { "action_loss": 0.0208, "epoch": 2.735733759518661, "learning_rate": 1.9383376975697302e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 29100 }, { "action_loss": 0.0218, "epoch": 2.7404343329886247, "learning_rate": 1.9381089336849773e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 29150 }, { "action_loss": 0.0226, "epoch": 2.745134906458588, "learning_rate": 1.9378797597848536e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 29200 }, { "action_loss": 0.0219, "epoch": 2.749835479928551, "learning_rate": 1.937650175969524e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 29250 }, { "action_loss": 0.0242, "epoch": 2.754536053398515, "learning_rate": 1.9374201823393316e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 29300 }, { "action_loss": 0.0238, "epoch": 2.759236626868478, "learning_rate": 1.9371897789947984e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 29350 }, { "action_loss": 0.0291, "epoch": 2.7639372003384413, "learning_rate": 1.936958966036626e-05, "llm_loss": 0.0, "loss": 0.0291, "step": 29400 }, { "action_loss": 0.0193, "epoch": 2.7686377738084045, "learning_rate": 1.9367277435656953e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 29450 }, { "action_loss": 0.0277, "epoch": 2.7733383472783677, "learning_rate": 1.9364961116830653e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 29500 }, { "action_loss": 0.0281, "epoch": 2.7780389207483314, "learning_rate": 1.9362640704899744e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 29550 }, { "action_loss": 0.0115, "epoch": 2.7827394942182946, "learning_rate": 1.93603162008784e-05, "llm_loss": 0.0, "loss": 0.0115, "step": 29600 }, { "action_loss": 0.0244, "epoch": 2.787440067688258, "learning_rate": 1.9357987605782576e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 29650 }, { "action_loss": 0.0309, "epoch": 2.7921406411582215, "learning_rate": 1.9355654920630033e-05, "llm_loss": 0.0, "loss": 0.0309, "step": 29700 }, { "action_loss": 0.0243, "epoch": 2.7968412146281847, "learning_rate": 1.93533181464403e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 29750 }, { "action_loss": 0.0371, "epoch": 2.801541788098148, "learning_rate": 1.9350977284234704e-05, "llm_loss": 0.0, "loss": 0.0371, "step": 29800 }, { "action_loss": 0.0457, "epoch": 2.8062423615681116, "learning_rate": 1.9348632335036354e-05, "llm_loss": 0.0, "loss": 0.0457, "step": 29850 }, { "action_loss": 0.0174, "epoch": 2.810942935038075, "learning_rate": 1.934628329987015e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 29900 }, { "action_loss": 0.0274, "epoch": 2.815643508508038, "learning_rate": 1.934393017976277e-05, "llm_loss": 0.0, "loss": 0.0274, "step": 29950 }, { "action_loss": 0.0181, "epoch": 2.8203440819780012, "learning_rate": 1.934157297574269e-05, "llm_loss": 0.0, "loss": 0.0181, "step": 30000 }, { "action_loss": 0.0273, "epoch": 2.8250446554479645, "learning_rate": 1.933921168884016e-05, "llm_loss": 0.0, "loss": 0.0273, "step": 30050 }, { "action_loss": 0.0312, "epoch": 2.829745228917928, "learning_rate": 1.9336846320087212e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 30100 }, { "action_loss": 0.0371, "epoch": 2.8344458023878913, "learning_rate": 1.9334476870517675e-05, "llm_loss": 0.0, "loss": 0.0371, "step": 30150 }, { "action_loss": 0.022, "epoch": 2.8391463758578546, "learning_rate": 1.9332103341167154e-05, "llm_loss": 0.0, "loss": 0.022, "step": 30200 }, { "action_loss": 0.0139, "epoch": 2.8438469493278182, "learning_rate": 1.9329725733073034e-05, "llm_loss": 0.0, "loss": 0.0139, "step": 30250 }, { "action_loss": 0.0269, "epoch": 2.8485475227977815, "learning_rate": 1.9327344047274485e-05, "llm_loss": 0.0, "loss": 0.0269, "step": 30300 }, { "action_loss": 0.0247, "epoch": 2.8532480962677447, "learning_rate": 1.9324958284812468e-05, "llm_loss": 0.0, "loss": 0.0247, "step": 30350 }, { "action_loss": 0.0247, "epoch": 2.857948669737708, "learning_rate": 1.9322568446729716e-05, "llm_loss": 0.0, "loss": 0.0247, "step": 30400 }, { "action_loss": 0.0317, "epoch": 2.862649243207671, "learning_rate": 1.932017453407074e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 30450 }, { "action_loss": 0.0174, "epoch": 2.8673498166776348, "learning_rate": 1.931777654788184e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 30500 }, { "action_loss": 0.0246, "epoch": 2.872050390147598, "learning_rate": 1.9315374489211096e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 30550 }, { "action_loss": 0.0351, "epoch": 2.876750963617561, "learning_rate": 1.9312968359108365e-05, "llm_loss": 0.0, "loss": 0.0351, "step": 30600 }, { "action_loss": 0.0093, "epoch": 2.881451537087525, "learning_rate": 1.9310558158625286e-05, "llm_loss": 0.0, "loss": 0.0093, "step": 30650 }, { "action_loss": 0.0333, "epoch": 2.886152110557488, "learning_rate": 1.930814388881527e-05, "llm_loss": 0.0, "loss": 0.0333, "step": 30700 }, { "action_loss": 0.0215, "epoch": 2.8908526840274513, "learning_rate": 1.9305725550733516e-05, "llm_loss": 0.0, "loss": 0.0215, "step": 30750 }, { "action_loss": 0.0303, "epoch": 2.8955532574974145, "learning_rate": 1.9303303145436997e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 30800 }, { "action_loss": 0.0151, "epoch": 2.9002538309673778, "learning_rate": 1.9300876673984463e-05, "llm_loss": 0.0, "loss": 0.0151, "step": 30850 }, { "action_loss": 0.0271, "epoch": 2.9049544044373414, "learning_rate": 1.929844613743644e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 30900 }, { "action_loss": 0.0279, "epoch": 2.9096549779073047, "learning_rate": 1.9296011536855235e-05, "llm_loss": 0.0, "loss": 0.0279, "step": 30950 }, { "action_loss": 0.0218, "epoch": 2.914355551377268, "learning_rate": 1.9293572873304925e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 31000 }, { "action_loss": 0.0176, "epoch": 2.9190561248472315, "learning_rate": 1.9291130147851374e-05, "llm_loss": 0.0, "loss": 0.0176, "step": 31050 }, { "action_loss": 0.0312, "epoch": 2.9237566983171948, "learning_rate": 1.9288683361562204e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 31100 }, { "action_loss": 0.0084, "epoch": 2.928457271787158, "learning_rate": 1.928623251550683e-05, "llm_loss": 0.0, "loss": 0.0084, "step": 31150 }, { "action_loss": 0.0335, "epoch": 2.9331578452571216, "learning_rate": 1.9283777610756427e-05, "llm_loss": 0.0, "loss": 0.0335, "step": 31200 }, { "action_loss": 0.0215, "epoch": 2.937858418727085, "learning_rate": 1.928131864838396e-05, "llm_loss": 0.0, "loss": 0.0215, "step": 31250 }, { "action_loss": 0.0324, "epoch": 2.942558992197048, "learning_rate": 1.9278855629464145e-05, "llm_loss": 0.0, "loss": 0.0324, "step": 31300 }, { "action_loss": 0.0115, "epoch": 2.9472595656670113, "learning_rate": 1.927638855507349e-05, "llm_loss": 0.0, "loss": 0.0115, "step": 31350 }, { "action_loss": 0.0272, "epoch": 2.9519601391369745, "learning_rate": 1.9273917426290264e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 31400 }, { "action_loss": 0.0279, "epoch": 2.956660712606938, "learning_rate": 1.927144224419452e-05, "llm_loss": 0.0, "loss": 0.0279, "step": 31450 }, { "action_loss": 0.009, "epoch": 2.9613612860769014, "learning_rate": 1.9268963009868073e-05, "llm_loss": 0.0, "loss": 0.009, "step": 31500 }, { "action_loss": 0.0361, "epoch": 2.9660618595468646, "learning_rate": 1.926647972439451e-05, "llm_loss": 0.0, "loss": 0.0361, "step": 31550 }, { "action_loss": 0.0268, "epoch": 2.9707624330168283, "learning_rate": 1.926399238885919e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 31600 }, { "action_loss": 0.0371, "epoch": 2.9754630064867915, "learning_rate": 1.9261501004349248e-05, "llm_loss": 0.0, "loss": 0.0371, "step": 31650 }, { "action_loss": 0.0319, "epoch": 2.9801635799567547, "learning_rate": 1.9259005571953577e-05, "llm_loss": 0.0, "loss": 0.0319, "step": 31700 }, { "action_loss": 0.014, "epoch": 2.984864153426718, "learning_rate": 1.9256506092762845e-05, "llm_loss": 0.0, "loss": 0.014, "step": 31750 }, { "action_loss": 0.0198, "epoch": 2.989564726896681, "learning_rate": 1.925400256786949e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 31800 }, { "action_loss": 0.022, "epoch": 2.994265300366645, "learning_rate": 1.925149499836772e-05, "llm_loss": 0.0, "loss": 0.022, "step": 31850 }, { "action_loss": 0.0179, "epoch": 2.998965873836608, "learning_rate": 1.9248983385353504e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 31900 }, { "action_loss": 0.0265, "epoch": 3.0036664473065713, "learning_rate": 1.9246467729924587e-05, "llm_loss": 0.0, "loss": 0.0265, "step": 31950 }, { "action_loss": 0.0135, "epoch": 3.008367020776535, "learning_rate": 1.9243948033180475e-05, "llm_loss": 0.0, "loss": 0.0135, "step": 32000 }, { "action_loss": 0.0394, "epoch": 3.013067594246498, "learning_rate": 1.9241424296222435e-05, "llm_loss": 0.0, "loss": 0.0394, "step": 32050 }, { "action_loss": 0.0152, "epoch": 3.0177681677164614, "learning_rate": 1.9238896520153513e-05, "llm_loss": 0.0, "loss": 0.0152, "step": 32100 }, { "action_loss": 0.0219, "epoch": 3.0224687411864246, "learning_rate": 1.9236364706078512e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 32150 }, { "action_loss": 0.0396, "epoch": 3.0271693146563883, "learning_rate": 1.9233828855104e-05, "llm_loss": 0.0, "loss": 0.0396, "step": 32200 }, { "action_loss": 0.0218, "epoch": 3.0318698881263515, "learning_rate": 1.923128896833831e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 32250 }, { "action_loss": 0.0233, "epoch": 3.0365704615963147, "learning_rate": 1.9228745046891545e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 32300 }, { "action_loss": 0.0313, "epoch": 3.041271035066278, "learning_rate": 1.922619709187556e-05, "llm_loss": 0.0, "loss": 0.0313, "step": 32350 }, { "action_loss": 0.0144, "epoch": 3.0459716085362416, "learning_rate": 1.9223645104403987e-05, "llm_loss": 0.0, "loss": 0.0144, "step": 32400 }, { "action_loss": 0.0149, "epoch": 3.050672182006205, "learning_rate": 1.9221089085592203e-05, "llm_loss": 0.0, "loss": 0.0149, "step": 32450 }, { "action_loss": 0.0304, "epoch": 3.055372755476168, "learning_rate": 1.921852903655736e-05, "llm_loss": 0.0, "loss": 0.0304, "step": 32500 }, { "action_loss": 0.0143, "epoch": 3.0600733289461313, "learning_rate": 1.921596495841837e-05, "llm_loss": 0.0, "loss": 0.0143, "step": 32550 }, { "action_loss": 0.0351, "epoch": 3.064773902416095, "learning_rate": 1.9213396852295906e-05, "llm_loss": 0.0, "loss": 0.0351, "step": 32600 }, { "action_loss": 0.0207, "epoch": 3.069474475886058, "learning_rate": 1.9210824719312395e-05, "llm_loss": 0.0, "loss": 0.0207, "step": 32650 }, { "action_loss": 0.0217, "epoch": 3.0741750493560214, "learning_rate": 1.9208248560592024e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 32700 }, { "action_loss": 0.0272, "epoch": 3.0788756228259846, "learning_rate": 1.9205668377260757e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 32750 }, { "action_loss": 0.0209, "epoch": 3.0835761962959483, "learning_rate": 1.9203084170446294e-05, "llm_loss": 0.0, "loss": 0.0209, "step": 32800 }, { "action_loss": 0.0191, "epoch": 3.0882767697659115, "learning_rate": 1.9200495941278105e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 32850 }, { "action_loss": 0.0275, "epoch": 3.0929773432358747, "learning_rate": 1.9197903690887417e-05, "llm_loss": 0.0, "loss": 0.0275, "step": 32900 }, { "action_loss": 0.0207, "epoch": 3.097677916705838, "learning_rate": 1.9195307420407217e-05, "llm_loss": 0.0, "loss": 0.0207, "step": 32950 }, { "action_loss": 0.0243, "epoch": 3.1023784901758016, "learning_rate": 1.9192707130972245e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 33000 }, { "action_loss": 0.0112, "epoch": 3.107079063645765, "learning_rate": 1.9190102823719e-05, "llm_loss": 0.0, "loss": 0.0112, "step": 33050 }, { "action_loss": 0.0275, "epoch": 3.111779637115728, "learning_rate": 1.9187494499785728e-05, "llm_loss": 0.0, "loss": 0.0275, "step": 33100 }, { "action_loss": 0.0285, "epoch": 3.1164802105856912, "learning_rate": 1.9184882160312448e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 33150 }, { "action_loss": 0.0258, "epoch": 3.121180784055655, "learning_rate": 1.9182265806440918e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 33200 }, { "action_loss": 0.026, "epoch": 3.125881357525618, "learning_rate": 1.917964543931466e-05, "llm_loss": 0.0, "loss": 0.026, "step": 33250 }, { "action_loss": 0.0245, "epoch": 3.1305819309955814, "learning_rate": 1.9177021060078943e-05, "llm_loss": 0.0, "loss": 0.0245, "step": 33300 }, { "action_loss": 0.0148, "epoch": 3.135282504465545, "learning_rate": 1.9174392669880803e-05, "llm_loss": 0.0, "loss": 0.0148, "step": 33350 }, { "action_loss": 0.0377, "epoch": 3.1399830779355082, "learning_rate": 1.917176026986901e-05, "llm_loss": 0.0, "loss": 0.0377, "step": 33400 }, { "action_loss": 0.018, "epoch": 3.1446836514054715, "learning_rate": 1.9169123861194097e-05, "llm_loss": 0.0, "loss": 0.018, "step": 33450 }, { "action_loss": 0.0223, "epoch": 3.1493842248754347, "learning_rate": 1.916648344500835e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 33500 }, { "action_loss": 0.03, "epoch": 3.1540847983453983, "learning_rate": 1.916383902246581e-05, "llm_loss": 0.0, "loss": 0.03, "step": 33550 }, { "action_loss": 0.012, "epoch": 3.1587853718153616, "learning_rate": 1.9161190594722254e-05, "llm_loss": 0.0, "loss": 0.012, "step": 33600 }, { "action_loss": 0.0189, "epoch": 3.163485945285325, "learning_rate": 1.9158538162935227e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 33650 }, { "action_loss": 0.0206, "epoch": 3.168186518755288, "learning_rate": 1.9155881728264016e-05, "llm_loss": 0.0, "loss": 0.0206, "step": 33700 }, { "action_loss": 0.0187, "epoch": 3.1728870922252517, "learning_rate": 1.915322129186965e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 33750 }, { "action_loss": 0.0289, "epoch": 3.177587665695215, "learning_rate": 1.915055685491492e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 33800 }, { "action_loss": 0.0179, "epoch": 3.182288239165178, "learning_rate": 1.9147888418564364e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 33850 }, { "action_loss": 0.0303, "epoch": 3.1869888126351413, "learning_rate": 1.914521598398426e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 33900 }, { "action_loss": 0.037, "epoch": 3.191689386105105, "learning_rate": 1.9142539552342638e-05, "llm_loss": 0.0, "loss": 0.037, "step": 33950 }, { "action_loss": 0.0437, "epoch": 3.196389959575068, "learning_rate": 1.9139859124809275e-05, "llm_loss": 0.0, "loss": 0.0437, "step": 34000 }, { "action_loss": 0.0212, "epoch": 3.2010905330450314, "learning_rate": 1.91371747025557e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 34050 }, { "action_loss": 0.028, "epoch": 3.2057911065149947, "learning_rate": 1.9134486286755174e-05, "llm_loss": 0.0, "loss": 0.028, "step": 34100 }, { "action_loss": 0.0322, "epoch": 3.2104916799849583, "learning_rate": 1.9131793878582715e-05, "llm_loss": 0.0, "loss": 0.0322, "step": 34150 }, { "action_loss": 0.0201, "epoch": 3.2151922534549215, "learning_rate": 1.912909747921508e-05, "llm_loss": 0.0, "loss": 0.0201, "step": 34200 }, { "action_loss": 0.0142, "epoch": 3.2198928269248848, "learning_rate": 1.912639708983078e-05, "llm_loss": 0.0, "loss": 0.0142, "step": 34250 }, { "action_loss": 0.0242, "epoch": 3.224593400394848, "learning_rate": 1.9123692711610058e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 34300 }, { "action_loss": 0.0147, "epoch": 3.2292939738648117, "learning_rate": 1.9120984345734907e-05, "llm_loss": 0.0, "loss": 0.0147, "step": 34350 }, { "action_loss": 0.0164, "epoch": 3.233994547334775, "learning_rate": 1.9118271993389057e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 34400 }, { "action_loss": 0.0235, "epoch": 3.238695120804738, "learning_rate": 1.9115555655757987e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 34450 }, { "action_loss": 0.0216, "epoch": 3.2433956942747013, "learning_rate": 1.911283533402892e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 34500 }, { "action_loss": 0.0105, "epoch": 3.248096267744665, "learning_rate": 1.911011102939081e-05, "llm_loss": 0.0, "loss": 0.0105, "step": 34550 }, { "action_loss": 0.0307, "epoch": 3.252796841214628, "learning_rate": 1.910738274303436e-05, "llm_loss": 0.0, "loss": 0.0307, "step": 34600 }, { "action_loss": 0.0282, "epoch": 3.2574974146845914, "learning_rate": 1.910465047615201e-05, "llm_loss": 0.0, "loss": 0.0282, "step": 34650 }, { "action_loss": 0.0173, "epoch": 3.262197988154555, "learning_rate": 1.910191422993794e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 34700 }, { "action_loss": 0.0222, "epoch": 3.2668985616245183, "learning_rate": 1.909917400558807e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 34750 }, { "action_loss": 0.033, "epoch": 3.2715991350944815, "learning_rate": 1.9096429804300062e-05, "llm_loss": 0.0, "loss": 0.033, "step": 34800 }, { "action_loss": 0.0285, "epoch": 3.2762997085644447, "learning_rate": 1.9093681627273306e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 34850 }, { "action_loss": 0.0238, "epoch": 3.281000282034408, "learning_rate": 1.9090929475708943e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 34900 }, { "action_loss": 0.0254, "epoch": 3.2857008555043716, "learning_rate": 1.9088173350809836e-05, "llm_loss": 0.0, "loss": 0.0254, "step": 34950 }, { "action_loss": 0.0169, "epoch": 3.290401428974335, "learning_rate": 1.9085413253780603e-05, "llm_loss": 0.0, "loss": 0.0169, "step": 35000 }, { "action_loss": 0.0146, "epoch": 3.295102002444298, "learning_rate": 1.9082649185827583e-05, "llm_loss": 0.0, "loss": 0.0146, "step": 35050 }, { "action_loss": 0.0206, "epoch": 3.2998025759142617, "learning_rate": 1.9079881148158854e-05, "llm_loss": 0.0, "loss": 0.0206, "step": 35100 }, { "action_loss": 0.0238, "epoch": 3.304503149384225, "learning_rate": 1.9077109141984235e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 35150 }, { "action_loss": 0.0312, "epoch": 3.309203722854188, "learning_rate": 1.9074333168515272e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 35200 }, { "action_loss": 0.024, "epoch": 3.3139042963241514, "learning_rate": 1.907155322896525e-05, "llm_loss": 0.0, "loss": 0.024, "step": 35250 }, { "action_loss": 0.0248, "epoch": 3.318604869794115, "learning_rate": 1.9068769324549186e-05, "llm_loss": 0.0, "loss": 0.0248, "step": 35300 }, { "action_loss": 0.0183, "epoch": 3.3233054432640783, "learning_rate": 1.9065981456483827e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 35350 }, { "action_loss": 0.0175, "epoch": 3.3280060167340415, "learning_rate": 1.9063189625987655e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 35400 }, { "action_loss": 0.0177, "epoch": 3.3327065902040047, "learning_rate": 1.906039383428089e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 35450 }, { "action_loss": 0.0111, "epoch": 3.3374071636739684, "learning_rate": 1.9057594082585464e-05, "llm_loss": 0.0, "loss": 0.0111, "step": 35500 }, { "action_loss": 0.0231, "epoch": 3.3421077371439316, "learning_rate": 1.9054790372125066e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 35550 }, { "action_loss": 0.0106, "epoch": 3.346808310613895, "learning_rate": 1.90519827041251e-05, "llm_loss": 0.0, "loss": 0.0106, "step": 35600 }, { "action_loss": 0.0258, "epoch": 3.351508884083858, "learning_rate": 1.904917107981269e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 35650 }, { "action_loss": 0.0249, "epoch": 3.3562094575538217, "learning_rate": 1.904635550041672e-05, "llm_loss": 0.0, "loss": 0.0249, "step": 35700 }, { "action_loss": 0.0304, "epoch": 3.360910031023785, "learning_rate": 1.9043535967167766e-05, "llm_loss": 0.0, "loss": 0.0304, "step": 35750 }, { "action_loss": 0.0269, "epoch": 3.365610604493748, "learning_rate": 1.904071248129816e-05, "llm_loss": 0.0, "loss": 0.0269, "step": 35800 }, { "action_loss": 0.0342, "epoch": 3.3703111779637114, "learning_rate": 1.9037885044041947e-05, "llm_loss": 0.0, "loss": 0.0342, "step": 35850 }, { "action_loss": 0.0146, "epoch": 3.375011751433675, "learning_rate": 1.9035053656634904e-05, "llm_loss": 0.0, "loss": 0.0146, "step": 35900 }, { "action_loss": 0.0242, "epoch": 3.3797123249036383, "learning_rate": 1.9032218320314535e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 35950 }, { "action_loss": 0.0321, "epoch": 3.3844128983736015, "learning_rate": 1.902937903632007e-05, "llm_loss": 0.0, "loss": 0.0321, "step": 36000 }, { "action_loss": 0.025, "epoch": 3.389113471843565, "learning_rate": 1.9026535805892456e-05, "llm_loss": 0.0, "loss": 0.025, "step": 36050 }, { "action_loss": 0.0166, "epoch": 3.3938140453135284, "learning_rate": 1.902368863027438e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 36100 }, { "action_loss": 0.0201, "epoch": 3.3985146187834916, "learning_rate": 1.902083751071024e-05, "llm_loss": 0.0, "loss": 0.0201, "step": 36150 }, { "action_loss": 0.032, "epoch": 3.403215192253455, "learning_rate": 1.9017982448446165e-05, "llm_loss": 0.0, "loss": 0.032, "step": 36200 }, { "action_loss": 0.022, "epoch": 3.407915765723418, "learning_rate": 1.9015123444730007e-05, "llm_loss": 0.0, "loss": 0.022, "step": 36250 }, { "action_loss": 0.0313, "epoch": 3.4126163391933817, "learning_rate": 1.901226050081134e-05, "llm_loss": 0.0, "loss": 0.0313, "step": 36300 }, { "action_loss": 0.028, "epoch": 3.417316912663345, "learning_rate": 1.900939361794145e-05, "llm_loss": 0.0, "loss": 0.028, "step": 36350 }, { "action_loss": 0.0159, "epoch": 3.422017486133308, "learning_rate": 1.9006522797373362e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 36400 }, { "action_loss": 0.0107, "epoch": 3.426718059603272, "learning_rate": 1.900364804036181e-05, "llm_loss": 0.0, "loss": 0.0107, "step": 36450 }, { "action_loss": 0.0304, "epoch": 3.431418633073235, "learning_rate": 1.9000769348163255e-05, "llm_loss": 0.0, "loss": 0.0304, "step": 36500 }, { "action_loss": 0.011, "epoch": 3.4361192065431982, "learning_rate": 1.8997886722035873e-05, "llm_loss": 0.0, "loss": 0.011, "step": 36550 }, { "action_loss": 0.0174, "epoch": 3.4408197800131615, "learning_rate": 1.899500016323956e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 36600 }, { "action_loss": 0.0278, "epoch": 3.445520353483125, "learning_rate": 1.8992109673035936e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 36650 }, { "action_loss": 0.0187, "epoch": 3.4502209269530884, "learning_rate": 1.8989215252688335e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 36700 }, { "action_loss": 0.0127, "epoch": 3.4549215004230516, "learning_rate": 1.8986316903461803e-05, "llm_loss": 0.0, "loss": 0.0127, "step": 36750 }, { "action_loss": 0.0316, "epoch": 3.459622073893015, "learning_rate": 1.8983414626623116e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 36800 }, { "action_loss": 0.0349, "epoch": 3.4643226473629785, "learning_rate": 1.898050842344076e-05, "llm_loss": 0.0, "loss": 0.0349, "step": 36850 }, { "action_loss": 0.0284, "epoch": 3.4690232208329417, "learning_rate": 1.8977598295184935e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 36900 }, { "action_loss": 0.0182, "epoch": 3.473723794302905, "learning_rate": 1.8974684243127556e-05, "llm_loss": 0.0, "loss": 0.0182, "step": 36950 }, { "action_loss": 0.0233, "epoch": 3.478424367772868, "learning_rate": 1.8971766268542262e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 37000 }, { "action_loss": 0.0413, "epoch": 3.483124941242832, "learning_rate": 1.8968844372704394e-05, "llm_loss": 0.0, "loss": 0.0413, "step": 37050 }, { "action_loss": 0.0283, "epoch": 3.487825514712795, "learning_rate": 1.896591855689102e-05, "llm_loss": 0.0, "loss": 0.0283, "step": 37100 }, { "action_loss": 0.0115, "epoch": 3.4925260881827582, "learning_rate": 1.8962988822380905e-05, "llm_loss": 0.0, "loss": 0.0115, "step": 37150 }, { "action_loss": 0.0364, "epoch": 3.4972266616527214, "learning_rate": 1.8960055170454544e-05, "llm_loss": 0.0, "loss": 0.0364, "step": 37200 }, { "action_loss": 0.0287, "epoch": 3.501927235122685, "learning_rate": 1.895711760239413e-05, "llm_loss": 0.0, "loss": 0.0287, "step": 37250 }, { "action_loss": 0.0185, "epoch": 3.5066278085926483, "learning_rate": 1.8954176119483575e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 37300 }, { "action_loss": 0.0146, "epoch": 3.5113283820626116, "learning_rate": 1.8951230723008507e-05, "llm_loss": 0.0, "loss": 0.0146, "step": 37350 }, { "action_loss": 0.0299, "epoch": 3.516028955532575, "learning_rate": 1.894828141425625e-05, "llm_loss": 0.0, "loss": 0.0299, "step": 37400 }, { "action_loss": 0.0308, "epoch": 3.5207295290025384, "learning_rate": 1.8945328194515847e-05, "llm_loss": 0.0, "loss": 0.0308, "step": 37450 }, { "action_loss": 0.0301, "epoch": 3.5254301024725017, "learning_rate": 1.8942371065078057e-05, "llm_loss": 0.0, "loss": 0.0301, "step": 37500 }, { "action_loss": 0.0366, "epoch": 3.530130675942465, "learning_rate": 1.8939410027235332e-05, "llm_loss": 0.0, "loss": 0.0366, "step": 37550 }, { "action_loss": 0.0212, "epoch": 3.534831249412428, "learning_rate": 1.8936445082281845e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 37600 }, { "action_loss": 0.0206, "epoch": 3.5395318228823918, "learning_rate": 1.893347623151347e-05, "llm_loss": 0.0, "loss": 0.0206, "step": 37650 }, { "action_loss": 0.0312, "epoch": 3.544232396352355, "learning_rate": 1.8930503476227787e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 37700 }, { "action_loss": 0.0187, "epoch": 3.548932969822318, "learning_rate": 1.8927526817724095e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 37750 }, { "action_loss": 0.0344, "epoch": 3.553633543292282, "learning_rate": 1.8924546257303382e-05, "llm_loss": 0.0, "loss": 0.0344, "step": 37800 }, { "action_loss": 0.0244, "epoch": 3.558334116762245, "learning_rate": 1.8921561796268354e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 37850 }, { "action_loss": 0.0235, "epoch": 3.5630346902322083, "learning_rate": 1.8918573435923414e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 37900 }, { "action_loss": 0.0147, "epoch": 3.5677352637021715, "learning_rate": 1.8915581177574667e-05, "llm_loss": 0.0, "loss": 0.0147, "step": 37950 }, { "action_loss": 0.0139, "epoch": 3.5724358371721348, "learning_rate": 1.8912585022529943e-05, "llm_loss": 0.0, "loss": 0.0139, "step": 38000 }, { "action_loss": 0.0284, "epoch": 3.5771364106420984, "learning_rate": 1.890958497209874e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 38050 }, { "action_loss": 0.0246, "epoch": 3.5818369841120616, "learning_rate": 1.890658102759229e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 38100 }, { "action_loss": 0.0155, "epoch": 3.586537557582025, "learning_rate": 1.8903573190323508e-05, "llm_loss": 0.0, "loss": 0.0155, "step": 38150 }, { "action_loss": 0.0356, "epoch": 3.5912381310519885, "learning_rate": 1.8900561461607023e-05, "llm_loss": 0.0, "loss": 0.0356, "step": 38200 }, { "action_loss": 0.018, "epoch": 3.5959387045219517, "learning_rate": 1.8897545842759154e-05, "llm_loss": 0.0, "loss": 0.0181, "step": 38250 }, { "action_loss": 0.0151, "epoch": 3.600639277991915, "learning_rate": 1.8894526335097928e-05, "llm_loss": 0.0, "loss": 0.0151, "step": 38300 }, { "action_loss": 0.027, "epoch": 3.605339851461878, "learning_rate": 1.889150293994307e-05, "llm_loss": 0.0, "loss": 0.027, "step": 38350 }, { "action_loss": 0.0195, "epoch": 3.6100404249318414, "learning_rate": 1.8888475658615993e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 38400 }, { "action_loss": 0.0403, "epoch": 3.614740998401805, "learning_rate": 1.888544449243983e-05, "llm_loss": 0.0, "loss": 0.0403, "step": 38450 }, { "action_loss": 0.0309, "epoch": 3.6194415718717683, "learning_rate": 1.888240944273939e-05, "llm_loss": 0.0, "loss": 0.0309, "step": 38500 }, { "action_loss": 0.0272, "epoch": 3.6241421453417315, "learning_rate": 1.8879370510841202e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 38550 }, { "action_loss": 0.0221, "epoch": 3.628842718811695, "learning_rate": 1.8876327698073467e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 38600 }, { "action_loss": 0.0213, "epoch": 3.6335432922816584, "learning_rate": 1.88732810057661e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 38650 }, { "action_loss": 0.0316, "epoch": 3.6382438657516216, "learning_rate": 1.887023043525071e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 38700 }, { "action_loss": 0.0135, "epoch": 3.6429444392215853, "learning_rate": 1.886717598786059e-05, "llm_loss": 0.0, "loss": 0.0135, "step": 38750 }, { "action_loss": 0.02, "epoch": 3.6476450126915485, "learning_rate": 1.8864117664930738e-05, "llm_loss": 0.0, "loss": 0.02, "step": 38800 }, { "action_loss": 0.0238, "epoch": 3.6523455861615117, "learning_rate": 1.8861055467797843e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 38850 }, { "action_loss": 0.0178, "epoch": 3.657046159631475, "learning_rate": 1.8857989397800284e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 38900 }, { "action_loss": 0.0117, "epoch": 3.661746733101438, "learning_rate": 1.885491945627814e-05, "llm_loss": 0.0, "loss": 0.0117, "step": 38950 }, { "action_loss": 0.0077, "epoch": 3.666447306571402, "learning_rate": 1.885184564457317e-05, "llm_loss": 0.0, "loss": 0.0077, "step": 39000 }, { "action_loss": 0.014, "epoch": 3.671147880041365, "learning_rate": 1.8848767964028846e-05, "llm_loss": 0.0, "loss": 0.014, "step": 39050 }, { "action_loss": 0.0198, "epoch": 3.6758484535113283, "learning_rate": 1.8845686415990298e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 39100 }, { "action_loss": 0.0355, "epoch": 3.680549026981292, "learning_rate": 1.884260100180438e-05, "llm_loss": 0.0, "loss": 0.0355, "step": 39150 }, { "action_loss": 0.0232, "epoch": 3.685249600451255, "learning_rate": 1.883951172281962e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 39200 }, { "action_loss": 0.0088, "epoch": 3.6899501739212184, "learning_rate": 1.8836418580386227e-05, "llm_loss": 0.0, "loss": 0.0088, "step": 39250 }, { "action_loss": 0.0283, "epoch": 3.6946507473911816, "learning_rate": 1.8833321575856117e-05, "llm_loss": 0.0, "loss": 0.0283, "step": 39300 }, { "action_loss": 0.0255, "epoch": 3.699351320861145, "learning_rate": 1.883022071058288e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 39350 }, { "action_loss": 0.0277, "epoch": 3.7040518943311085, "learning_rate": 1.8827115985921806e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 39400 }, { "action_loss": 0.0305, "epoch": 3.7087524678010717, "learning_rate": 1.8824007403229852e-05, "llm_loss": 0.0, "loss": 0.0305, "step": 39450 }, { "action_loss": 0.0433, "epoch": 3.713453041271035, "learning_rate": 1.882089496386568e-05, "llm_loss": 0.0, "loss": 0.0433, "step": 39500 }, { "action_loss": 0.0236, "epoch": 3.7181536147409986, "learning_rate": 1.8817778669189633e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 39550 }, { "action_loss": 0.0312, "epoch": 3.722854188210962, "learning_rate": 1.881465852056373e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 39600 }, { "action_loss": 0.0263, "epoch": 3.727554761680925, "learning_rate": 1.881153451935169e-05, "llm_loss": 0.0, "loss": 0.0263, "step": 39650 }, { "action_loss": 0.017, "epoch": 3.7322553351508883, "learning_rate": 1.8808406666918903e-05, "llm_loss": 0.0, "loss": 0.017, "step": 39700 }, { "action_loss": 0.0235, "epoch": 3.7369559086208515, "learning_rate": 1.8805274964632444e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 39750 }, { "action_loss": 0.027, "epoch": 3.741656482090815, "learning_rate": 1.8802139413861075e-05, "llm_loss": 0.0, "loss": 0.027, "step": 39800 }, { "action_loss": 0.0175, "epoch": 3.7463570555607784, "learning_rate": 1.8799000015975242e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 39850 }, { "action_loss": 0.0324, "epoch": 3.7510576290307416, "learning_rate": 1.8795856772347064e-05, "llm_loss": 0.0, "loss": 0.0324, "step": 39900 }, { "action_loss": 0.0413, "epoch": 3.7557582025007052, "learning_rate": 1.8792709684350344e-05, "llm_loss": 0.0, "loss": 0.0413, "step": 39950 }, { "action_loss": 0.02, "epoch": 3.7604587759706685, "learning_rate": 1.8789558753360572e-05, "llm_loss": 0.0, "loss": 0.02, "step": 40000 }, { "action_loss": 0.012, "epoch": 3.7651593494406317, "learning_rate": 1.8786403980754908e-05, "llm_loss": 0.0, "loss": 0.012, "step": 40050 }, { "action_loss": 0.0238, "epoch": 3.7698599229105954, "learning_rate": 1.8783245367912196e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 40100 }, { "action_loss": 0.026, "epoch": 3.7745604963805586, "learning_rate": 1.878008291621296e-05, "llm_loss": 0.0, "loss": 0.026, "step": 40150 }, { "action_loss": 0.0254, "epoch": 3.779261069850522, "learning_rate": 1.87769166270394e-05, "llm_loss": 0.0, "loss": 0.0254, "step": 40200 }, { "action_loss": 0.0271, "epoch": 3.783961643320485, "learning_rate": 1.8773746501775388e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 40250 }, { "action_loss": 0.0228, "epoch": 3.7886622167904482, "learning_rate": 1.877057254180648e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 40300 }, { "action_loss": 0.0208, "epoch": 3.793362790260412, "learning_rate": 1.8767394748519908e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 40350 }, { "action_loss": 0.0222, "epoch": 3.798063363730375, "learning_rate": 1.8764213123304576e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 40400 }, { "action_loss": 0.0406, "epoch": 3.8027639372003383, "learning_rate": 1.8761027667551063e-05, "llm_loss": 0.0, "loss": 0.0406, "step": 40450 }, { "action_loss": 0.0268, "epoch": 3.807464510670302, "learning_rate": 1.8757838382651622e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 40500 }, { "action_loss": 0.0178, "epoch": 3.8121650841402652, "learning_rate": 1.875464527000018e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 40550 }, { "action_loss": 0.0393, "epoch": 3.8168656576102284, "learning_rate": 1.875144833099234e-05, "llm_loss": 0.0, "loss": 0.0393, "step": 40600 }, { "action_loss": 0.037, "epoch": 3.8215662310801917, "learning_rate": 1.874824756702537e-05, "llm_loss": 0.0, "loss": 0.037, "step": 40650 }, { "action_loss": 0.0269, "epoch": 3.826266804550155, "learning_rate": 1.8745042979498223e-05, "llm_loss": 0.0, "loss": 0.0269, "step": 40700 }, { "action_loss": 0.0281, "epoch": 3.8309673780201186, "learning_rate": 1.874183456981151e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 40750 }, { "action_loss": 0.0403, "epoch": 3.8356679514900818, "learning_rate": 1.873862233936751e-05, "llm_loss": 0.0, "loss": 0.0403, "step": 40800 }, { "action_loss": 0.0339, "epoch": 3.840368524960045, "learning_rate": 1.8735406289570193e-05, "llm_loss": 0.0, "loss": 0.0339, "step": 40850 }, { "action_loss": 0.0176, "epoch": 3.8450690984300087, "learning_rate": 1.8732186421825174e-05, "llm_loss": 0.0, "loss": 0.0176, "step": 40900 }, { "action_loss": 0.0311, "epoch": 3.849769671899972, "learning_rate": 1.8728962737539752e-05, "llm_loss": 0.0, "loss": 0.0311, "step": 40950 }, { "action_loss": 0.0194, "epoch": 3.854470245369935, "learning_rate": 1.8725735238122888e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 41000 }, { "action_loss": 0.0303, "epoch": 3.8591708188398983, "learning_rate": 1.872250392498521e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 41050 }, { "action_loss": 0.0143, "epoch": 3.8638713923098615, "learning_rate": 1.8719268799539022e-05, "llm_loss": 0.0, "loss": 0.0143, "step": 41100 }, { "action_loss": 0.0323, "epoch": 3.868571965779825, "learning_rate": 1.8716029863198274e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 41150 }, { "action_loss": 0.031, "epoch": 3.8732725392497884, "learning_rate": 1.8712787117378606e-05, "llm_loss": 0.0, "loss": 0.031, "step": 41200 }, { "action_loss": 0.0232, "epoch": 3.8779731127197516, "learning_rate": 1.8709540563497312e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 41250 }, { "action_loss": 0.0132, "epoch": 3.8826736861897153, "learning_rate": 1.870629020297334e-05, "llm_loss": 0.0, "loss": 0.0132, "step": 41300 }, { "action_loss": 0.0163, "epoch": 3.8873742596596785, "learning_rate": 1.8703036037227314e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 41350 }, { "action_loss": 0.0151, "epoch": 3.8920748331296418, "learning_rate": 1.8699778067681524e-05, "llm_loss": 0.0, "loss": 0.0151, "step": 41400 }, { "action_loss": 0.0259, "epoch": 3.8967754065996054, "learning_rate": 1.8696516295759914e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 41450 }, { "action_loss": 0.0293, "epoch": 3.9014759800695686, "learning_rate": 1.8693250722888095e-05, "llm_loss": 0.0, "loss": 0.0293, "step": 41500 }, { "action_loss": 0.0285, "epoch": 3.906176553539532, "learning_rate": 1.8689981350493337e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 41550 }, { "action_loss": 0.0203, "epoch": 3.910877127009495, "learning_rate": 1.8686708180004568e-05, "llm_loss": 0.0, "loss": 0.0203, "step": 41600 }, { "action_loss": 0.0294, "epoch": 3.9155777004794583, "learning_rate": 1.868343121285238e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 41650 }, { "action_loss": 0.0413, "epoch": 3.920278273949422, "learning_rate": 1.8680150450469022e-05, "llm_loss": 0.0, "loss": 0.0413, "step": 41700 }, { "action_loss": 0.0252, "epoch": 3.924978847419385, "learning_rate": 1.867686589428841e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 41750 }, { "action_loss": 0.0076, "epoch": 3.9296794208893484, "learning_rate": 1.8673577545746107e-05, "llm_loss": 0.0, "loss": 0.0076, "step": 41800 }, { "action_loss": 0.0202, "epoch": 3.934379994359312, "learning_rate": 1.8670285406279335e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 41850 }, { "action_loss": 0.0111, "epoch": 3.9390805678292753, "learning_rate": 1.866698947732698e-05, "llm_loss": 0.0, "loss": 0.0111, "step": 41900 }, { "action_loss": 0.0174, "epoch": 3.9437811412992385, "learning_rate": 1.8663689760329576e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 41950 }, { "action_loss": 0.0274, "epoch": 3.9484817147692017, "learning_rate": 1.866038625672932e-05, "llm_loss": 0.0, "loss": 0.0274, "step": 42000 }, { "action_loss": 0.0105, "epoch": 3.953182288239165, "learning_rate": 1.8657078967970063e-05, "llm_loss": 0.0, "loss": 0.0105, "step": 42050 }, { "action_loss": 0.0208, "epoch": 3.9578828617091286, "learning_rate": 1.86537678954973e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 42100 }, { "action_loss": 0.0212, "epoch": 3.962583435179092, "learning_rate": 1.8650453040758194e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 42150 }, { "action_loss": 0.0314, "epoch": 3.967284008649055, "learning_rate": 1.8647134405201554e-05, "llm_loss": 0.0, "loss": 0.0314, "step": 42200 }, { "action_loss": 0.0176, "epoch": 3.9719845821190187, "learning_rate": 1.864381199027784e-05, "llm_loss": 0.0, "loss": 0.0176, "step": 42250 }, { "action_loss": 0.0241, "epoch": 3.976685155588982, "learning_rate": 1.8640485797439166e-05, "llm_loss": 0.0, "loss": 0.0241, "step": 42300 }, { "action_loss": 0.0083, "epoch": 3.981385729058945, "learning_rate": 1.8637155828139297e-05, "llm_loss": 0.0, "loss": 0.0083, "step": 42350 }, { "action_loss": 0.0235, "epoch": 3.9860863025289084, "learning_rate": 1.8633822083833654e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 42400 }, { "action_loss": 0.0213, "epoch": 3.9907868759988716, "learning_rate": 1.8630484565979298e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 42450 }, { "action_loss": 0.0177, "epoch": 3.9954874494688353, "learning_rate": 1.8627143276034943e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 42500 }, { "action_loss": 0.017, "epoch": 4.000188022938799, "learning_rate": 1.8623798215460957e-05, "llm_loss": 0.0, "loss": 0.017, "step": 42550 }, { "action_loss": 0.0427, "epoch": 4.004888596408762, "learning_rate": 1.862044938571935e-05, "llm_loss": 0.0, "loss": 0.0427, "step": 42600 }, { "action_loss": 0.0177, "epoch": 4.009589169878725, "learning_rate": 1.8617096788273778e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 42650 }, { "action_loss": 0.0174, "epoch": 4.014289743348688, "learning_rate": 1.8613740424589553e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 42700 }, { "action_loss": 0.0137, "epoch": 4.018990316818652, "learning_rate": 1.861038029613362e-05, "llm_loss": 0.0, "loss": 0.0137, "step": 42750 }, { "action_loss": 0.0175, "epoch": 4.0236908902886155, "learning_rate": 1.860701640437458e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 42800 }, { "action_loss": 0.0323, "epoch": 4.028391463758578, "learning_rate": 1.8603648750782676e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 42850 }, { "action_loss": 0.0147, "epoch": 4.033092037228542, "learning_rate": 1.8600277336829796e-05, "llm_loss": 0.0, "loss": 0.0147, "step": 42900 }, { "action_loss": 0.0239, "epoch": 4.037792610698506, "learning_rate": 1.859690216398946e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 42950 }, { "action_loss": 0.0211, "epoch": 4.042493184168468, "learning_rate": 1.8593523233736855e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 43000 }, { "action_loss": 0.0401, "epoch": 4.047193757638432, "learning_rate": 1.859014054754879e-05, "llm_loss": 0.0, "loss": 0.0401, "step": 43050 }, { "action_loss": 0.0169, "epoch": 4.051894331108395, "learning_rate": 1.8586754106903714e-05, "llm_loss": 0.0, "loss": 0.0169, "step": 43100 }, { "action_loss": 0.0236, "epoch": 4.0565949045783585, "learning_rate": 1.858336391328173e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 43150 }, { "action_loss": 0.0228, "epoch": 4.061295478048322, "learning_rate": 1.857996996816458e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 43200 }, { "action_loss": 0.0239, "epoch": 4.065996051518285, "learning_rate": 1.857657227303564e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 43250 }, { "action_loss": 0.0277, "epoch": 4.070696624988249, "learning_rate": 1.857317082937992e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 43300 }, { "action_loss": 0.0274, "epoch": 4.075397198458212, "learning_rate": 1.8569765638684086e-05, "llm_loss": 0.0, "loss": 0.0274, "step": 43350 }, { "action_loss": 0.0392, "epoch": 4.080097771928175, "learning_rate": 1.8566356702436422e-05, "llm_loss": 0.0, "loss": 0.0392, "step": 43400 }, { "action_loss": 0.0234, "epoch": 4.084798345398139, "learning_rate": 1.8562944022126857e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 43450 }, { "action_loss": 0.02, "epoch": 4.089498918868102, "learning_rate": 1.8559527599246965e-05, "llm_loss": 0.0, "loss": 0.02, "step": 43500 }, { "action_loss": 0.0272, "epoch": 4.094199492338065, "learning_rate": 1.8556107435289936e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 43550 }, { "action_loss": 0.0264, "epoch": 4.098900065808029, "learning_rate": 1.8552683531750618e-05, "llm_loss": 0.0, "loss": 0.0264, "step": 43600 }, { "action_loss": 0.0255, "epoch": 4.103600639277992, "learning_rate": 1.8549255890125476e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 43650 }, { "action_loss": 0.0235, "epoch": 4.108301212747955, "learning_rate": 1.8545824511912615e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 43700 }, { "action_loss": 0.0392, "epoch": 4.113001786217919, "learning_rate": 1.8542389398611774e-05, "llm_loss": 0.0, "loss": 0.0392, "step": 43750 }, { "action_loss": 0.02, "epoch": 4.117702359687882, "learning_rate": 1.853895055172433e-05, "llm_loss": 0.0, "loss": 0.02, "step": 43800 }, { "action_loss": 0.0207, "epoch": 4.122402933157845, "learning_rate": 1.8535507972753275e-05, "llm_loss": 0.0, "loss": 0.0207, "step": 43850 }, { "action_loss": 0.0126, "epoch": 4.127103506627809, "learning_rate": 1.853206166320325e-05, "llm_loss": 0.0, "loss": 0.0126, "step": 43900 }, { "action_loss": 0.0272, "epoch": 4.131804080097772, "learning_rate": 1.8528611624580517e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 43950 }, { "action_loss": 0.02, "epoch": 4.1365046535677354, "learning_rate": 1.8525226970237096e-05, "llm_loss": 0.0, "loss": 0.02, "step": 44000 }, { "action_loss": 0.0333, "epoch": 4.141205227037698, "learning_rate": 1.8521769552500558e-05, "llm_loss": 0.0, "loss": 0.0333, "step": 44050 }, { "action_loss": 0.0176, "epoch": 4.145905800507662, "learning_rate": 1.8518308410189644e-05, "llm_loss": 0.0, "loss": 0.0176, "step": 44100 }, { "action_loss": 0.0271, "epoch": 4.1506063739776256, "learning_rate": 1.8514843544817096e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 44150 }, { "action_loss": 0.0177, "epoch": 4.155306947447588, "learning_rate": 1.8511374957897294e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 44200 }, { "action_loss": 0.0182, "epoch": 4.160007520917552, "learning_rate": 1.8507902650946238e-05, "llm_loss": 0.0, "loss": 0.0182, "step": 44250 }, { "action_loss": 0.018, "epoch": 4.164708094387516, "learning_rate": 1.850442662548155e-05, "llm_loss": 0.0, "loss": 0.018, "step": 44300 }, { "action_loss": 0.0158, "epoch": 4.169408667857478, "learning_rate": 1.8500946883022486e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 44350 }, { "action_loss": 0.0371, "epoch": 4.174109241327442, "learning_rate": 1.849746342508992e-05, "llm_loss": 0.0, "loss": 0.0371, "step": 44400 }, { "action_loss": 0.0182, "epoch": 4.178809814797405, "learning_rate": 1.8493976253206355e-05, "llm_loss": 0.0, "loss": 0.0182, "step": 44450 }, { "action_loss": 0.0179, "epoch": 4.1835103882673685, "learning_rate": 1.8490485368895914e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 44500 }, { "action_loss": 0.0254, "epoch": 4.188210961737332, "learning_rate": 1.848699077368434e-05, "llm_loss": 0.0, "loss": 0.0254, "step": 44550 }, { "action_loss": 0.015, "epoch": 4.192911535207295, "learning_rate": 1.8483492469099008e-05, "llm_loss": 0.0, "loss": 0.015, "step": 44600 }, { "action_loss": 0.0243, "epoch": 4.197612108677259, "learning_rate": 1.84799904566689e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 44650 }, { "action_loss": 0.0277, "epoch": 4.202312682147222, "learning_rate": 1.8476484737924632e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 44700 }, { "action_loss": 0.0112, "epoch": 4.207013255617185, "learning_rate": 1.8472975314398432e-05, "llm_loss": 0.0, "loss": 0.0112, "step": 44750 }, { "action_loss": 0.033, "epoch": 4.211713829087149, "learning_rate": 1.846946218762415e-05, "llm_loss": 0.0, "loss": 0.033, "step": 44800 }, { "action_loss": 0.0251, "epoch": 4.2164144025571115, "learning_rate": 1.846594535913725e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 44850 }, { "action_loss": 0.0201, "epoch": 4.221114976027075, "learning_rate": 1.846242483047482e-05, "llm_loss": 0.0, "loss": 0.0201, "step": 44900 }, { "action_loss": 0.0174, "epoch": 4.225815549497039, "learning_rate": 1.8458900603175564e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 44950 }, { "action_loss": 0.025, "epoch": 4.230516122967002, "learning_rate": 1.8455372678779796e-05, "llm_loss": 0.0, "loss": 0.025, "step": 45000 }, { "action_loss": 0.0185, "epoch": 4.235216696436965, "learning_rate": 1.845184105882946e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 45050 }, { "action_loss": 0.0353, "epoch": 4.239917269906929, "learning_rate": 1.84483057448681e-05, "llm_loss": 0.0, "loss": 0.0353, "step": 45100 }, { "action_loss": 0.0374, "epoch": 4.244617843376892, "learning_rate": 1.8444766738440884e-05, "llm_loss": 0.0, "loss": 0.0374, "step": 45150 }, { "action_loss": 0.0267, "epoch": 4.249318416846855, "learning_rate": 1.844122404109459e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 45200 }, { "action_loss": 0.0233, "epoch": 4.254018990316819, "learning_rate": 1.8437677654377606e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 45250 }, { "action_loss": 0.0192, "epoch": 4.258719563786782, "learning_rate": 1.8434127579839942e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 45300 }, { "action_loss": 0.0243, "epoch": 4.2634201372567455, "learning_rate": 1.8430573819033215e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 45350 }, { "action_loss": 0.0139, "epoch": 4.268120710726708, "learning_rate": 1.8427016373510646e-05, "llm_loss": 0.0, "loss": 0.0139, "step": 45400 }, { "action_loss": 0.0302, "epoch": 4.272821284196672, "learning_rate": 1.8423455244827078e-05, "llm_loss": 0.0, "loss": 0.0302, "step": 45450 }, { "action_loss": 0.0271, "epoch": 4.277521857666636, "learning_rate": 1.841989043453896e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 45500 }, { "action_loss": 0.0298, "epoch": 4.282222431136598, "learning_rate": 1.8416321944204343e-05, "llm_loss": 0.0, "loss": 0.0298, "step": 45550 }, { "action_loss": 0.0391, "epoch": 4.286923004606562, "learning_rate": 1.84127497753829e-05, "llm_loss": 0.0, "loss": 0.0391, "step": 45600 }, { "action_loss": 0.0389, "epoch": 4.291623578076526, "learning_rate": 1.8409173929635894e-05, "llm_loss": 0.0, "loss": 0.0389, "step": 45650 }, { "action_loss": 0.0173, "epoch": 4.2963241515464885, "learning_rate": 1.8405594408526212e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 45700 }, { "action_loss": 0.0239, "epoch": 4.301024725016452, "learning_rate": 1.840201121361834e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 45750 }, { "action_loss": 0.0263, "epoch": 4.305725298486415, "learning_rate": 1.839842434647837e-05, "llm_loss": 0.0, "loss": 0.0263, "step": 45800 }, { "action_loss": 0.0235, "epoch": 4.310425871956379, "learning_rate": 1.8394833808673995e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 45850 }, { "action_loss": 0.021, "epoch": 4.315126445426342, "learning_rate": 1.839123960177452e-05, "llm_loss": 0.0, "loss": 0.021, "step": 45900 }, { "action_loss": 0.0268, "epoch": 4.319827018896305, "learning_rate": 1.8387641727350853e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 45950 }, { "action_loss": 0.0471, "epoch": 4.324527592366269, "learning_rate": 1.8384040186975498e-05, "llm_loss": 0.0, "loss": 0.0471, "step": 46000 }, { "action_loss": 0.0177, "epoch": 4.329228165836232, "learning_rate": 1.8380434982222564e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 46050 }, { "action_loss": 0.0168, "epoch": 4.333928739306195, "learning_rate": 1.8376826114667764e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 46100 }, { "action_loss": 0.022, "epoch": 4.338629312776159, "learning_rate": 1.8373213585888415e-05, "llm_loss": 0.0, "loss": 0.022, "step": 46150 }, { "action_loss": 0.023, "epoch": 4.3433298862461225, "learning_rate": 1.8369597397463427e-05, "llm_loss": 0.0, "loss": 0.023, "step": 46200 }, { "action_loss": 0.0147, "epoch": 4.348030459716085, "learning_rate": 1.836597755097331e-05, "llm_loss": 0.0, "loss": 0.0147, "step": 46250 }, { "action_loss": 0.0165, "epoch": 4.352731033186049, "learning_rate": 1.8362354048000174e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 46300 }, { "action_loss": 0.0278, "epoch": 4.357431606656012, "learning_rate": 1.8358726890127733e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 46350 }, { "action_loss": 0.0247, "epoch": 4.362132180125975, "learning_rate": 1.8355096078941294e-05, "llm_loss": 0.0, "loss": 0.0247, "step": 46400 }, { "action_loss": 0.0336, "epoch": 4.366832753595939, "learning_rate": 1.8351461616027756e-05, "llm_loss": 0.0, "loss": 0.0336, "step": 46450 }, { "action_loss": 0.0241, "epoch": 4.371533327065902, "learning_rate": 1.8347823502975625e-05, "llm_loss": 0.0, "loss": 0.0241, "step": 46500 }, { "action_loss": 0.0218, "epoch": 4.3762339005358655, "learning_rate": 1.834418174137499e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 46550 }, { "action_loss": 0.0341, "epoch": 4.380934474005828, "learning_rate": 1.8340536332817544e-05, "llm_loss": 0.0, "loss": 0.0341, "step": 46600 }, { "action_loss": 0.0132, "epoch": 4.385635047475792, "learning_rate": 1.833688727889657e-05, "llm_loss": 0.0, "loss": 0.0132, "step": 46650 }, { "action_loss": 0.0221, "epoch": 4.390335620945756, "learning_rate": 1.8333234581206943e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 46700 }, { "action_loss": 0.0255, "epoch": 4.395036194415718, "learning_rate": 1.8329578241345136e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 46750 }, { "action_loss": 0.0307, "epoch": 4.399736767885682, "learning_rate": 1.8325918260909207e-05, "llm_loss": 0.0, "loss": 0.0307, "step": 46800 }, { "action_loss": 0.0171, "epoch": 4.404437341355646, "learning_rate": 1.8322254641498807e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 46850 }, { "action_loss": 0.0243, "epoch": 4.4091379148256085, "learning_rate": 1.8318587384715184e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 46900 }, { "action_loss": 0.0302, "epoch": 4.413838488295572, "learning_rate": 1.831491649216117e-05, "llm_loss": 0.0, "loss": 0.0302, "step": 46950 }, { "action_loss": 0.024, "epoch": 4.418539061765536, "learning_rate": 1.831124196544118e-05, "llm_loss": 0.0, "loss": 0.024, "step": 47000 }, { "action_loss": 0.0103, "epoch": 4.423239635235499, "learning_rate": 1.8307563806161232e-05, "llm_loss": 0.0, "loss": 0.0103, "step": 47050 }, { "action_loss": 0.0202, "epoch": 4.427940208705462, "learning_rate": 1.830388201592892e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 47100 }, { "action_loss": 0.0272, "epoch": 4.432640782175425, "learning_rate": 1.8300196596353425e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 47150 }, { "action_loss": 0.0233, "epoch": 4.437341355645389, "learning_rate": 1.829650754904553e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 47200 }, { "action_loss": 0.0327, "epoch": 4.442041929115352, "learning_rate": 1.829281487561758e-05, "llm_loss": 0.0, "loss": 0.0327, "step": 47250 }, { "action_loss": 0.0178, "epoch": 4.446742502585315, "learning_rate": 1.8289118577683517e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 47300 }, { "action_loss": 0.023, "epoch": 4.451443076055279, "learning_rate": 1.8285418656858874e-05, "llm_loss": 0.0, "loss": 0.023, "step": 47350 }, { "action_loss": 0.0198, "epoch": 4.4561436495252424, "learning_rate": 1.8281715114760756e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 47400 }, { "action_loss": 0.044, "epoch": 4.460844222995205, "learning_rate": 1.827800795300785e-05, "llm_loss": 0.0, "loss": 0.044, "step": 47450 }, { "action_loss": 0.0358, "epoch": 4.465544796465169, "learning_rate": 1.827429717322044e-05, "llm_loss": 0.0, "loss": 0.0358, "step": 47500 }, { "action_loss": 0.031, "epoch": 4.470245369935132, "learning_rate": 1.8270582777020377e-05, "llm_loss": 0.0, "loss": 0.031, "step": 47550 }, { "action_loss": 0.0288, "epoch": 4.474945943405095, "learning_rate": 1.8266864766031093e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 47600 }, { "action_loss": 0.0208, "epoch": 4.479646516875059, "learning_rate": 1.8263143141877604e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 47650 }, { "action_loss": 0.0335, "epoch": 4.484347090345022, "learning_rate": 1.8259417906186506e-05, "llm_loss": 0.0, "loss": 0.0335, "step": 47700 }, { "action_loss": 0.0301, "epoch": 4.489047663814985, "learning_rate": 1.825568906058598e-05, "llm_loss": 0.0, "loss": 0.0301, "step": 47750 }, { "action_loss": 0.0269, "epoch": 4.493748237284949, "learning_rate": 1.825195660670576e-05, "llm_loss": 0.0, "loss": 0.0269, "step": 47800 }, { "action_loss": 0.0232, "epoch": 4.498448810754912, "learning_rate": 1.8248220546177192e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 47850 }, { "action_loss": 0.0206, "epoch": 4.5031493842248755, "learning_rate": 1.8244480880633165e-05, "llm_loss": 0.0, "loss": 0.0206, "step": 47900 }, { "action_loss": 0.0254, "epoch": 4.507849957694839, "learning_rate": 1.824073761170817e-05, "llm_loss": 0.0, "loss": 0.0254, "step": 47950 }, { "action_loss": 0.0279, "epoch": 4.512550531164802, "learning_rate": 1.823699074103826e-05, "llm_loss": 0.0, "loss": 0.0279, "step": 48000 }, { "action_loss": 0.0243, "epoch": 4.517251104634766, "learning_rate": 1.8233240270261064e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 48050 }, { "action_loss": 0.0175, "epoch": 4.521951678104728, "learning_rate": 1.822948620101578e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 48100 }, { "action_loss": 0.0462, "epoch": 4.526652251574692, "learning_rate": 1.8225728534943187e-05, "llm_loss": 0.0, "loss": 0.0462, "step": 48150 }, { "action_loss": 0.0072, "epoch": 4.531352825044656, "learning_rate": 1.8221967273685635e-05, "llm_loss": 0.0, "loss": 0.0072, "step": 48200 }, { "action_loss": 0.0277, "epoch": 4.5360533985146185, "learning_rate": 1.8218202418887035e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 48250 }, { "action_loss": 0.0144, "epoch": 4.540753971984582, "learning_rate": 1.8214433972192887e-05, "llm_loss": 0.0, "loss": 0.0144, "step": 48300 }, { "action_loss": 0.0082, "epoch": 4.545454545454545, "learning_rate": 1.821066193525024e-05, "llm_loss": 0.0, "loss": 0.0082, "step": 48350 }, { "action_loss": 0.0173, "epoch": 4.550155118924509, "learning_rate": 1.8206886309707733e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 48400 }, { "action_loss": 0.0227, "epoch": 4.554855692394472, "learning_rate": 1.8203107097215555e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 48450 }, { "action_loss": 0.0176, "epoch": 4.559556265864435, "learning_rate": 1.8199324299425472e-05, "llm_loss": 0.0, "loss": 0.0176, "step": 48500 }, { "action_loss": 0.011, "epoch": 4.564256839334399, "learning_rate": 1.819553791799082e-05, "llm_loss": 0.0, "loss": 0.011, "step": 48550 }, { "action_loss": 0.0171, "epoch": 4.568957412804362, "learning_rate": 1.819174795456649e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 48600 }, { "action_loss": 0.0143, "epoch": 4.573657986274325, "learning_rate": 1.8187954410808947e-05, "llm_loss": 0.0, "loss": 0.0143, "step": 48650 }, { "action_loss": 0.046, "epoch": 4.578358559744289, "learning_rate": 1.8184157288376226e-05, "llm_loss": 0.0, "loss": 0.046, "step": 48700 }, { "action_loss": 0.0242, "epoch": 4.5830591332142525, "learning_rate": 1.8180356588927913e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 48750 }, { "action_loss": 0.0204, "epoch": 4.587759706684215, "learning_rate": 1.8176552314125167e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 48800 }, { "action_loss": 0.0183, "epoch": 4.592460280154179, "learning_rate": 1.8172744465630704e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 48850 }, { "action_loss": 0.0231, "epoch": 4.597160853624143, "learning_rate": 1.816893304510881e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 48900 }, { "action_loss": 0.0352, "epoch": 4.601861427094105, "learning_rate": 1.816511805422532e-05, "llm_loss": 0.0, "loss": 0.0352, "step": 48950 }, { "action_loss": 0.0232, "epoch": 4.606562000564069, "learning_rate": 1.816129949464764e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 49000 }, { "action_loss": 0.0267, "epoch": 4.611262574034032, "learning_rate": 1.815747736804473e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 49050 }, { "action_loss": 0.0166, "epoch": 4.6159631475039955, "learning_rate": 1.815365167608711e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 49100 }, { "action_loss": 0.0178, "epoch": 4.620663720973959, "learning_rate": 1.8149822420446864e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 49150 }, { "action_loss": 0.0294, "epoch": 4.625364294443922, "learning_rate": 1.8145989602797626e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 49200 }, { "action_loss": 0.0249, "epoch": 4.630064867913886, "learning_rate": 1.8142153224814588e-05, "llm_loss": 0.0, "loss": 0.0249, "step": 49250 }, { "action_loss": 0.0204, "epoch": 4.634765441383848, "learning_rate": 1.8138313288174508e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 49300 }, { "action_loss": 0.0165, "epoch": 4.639466014853812, "learning_rate": 1.8134469794555677e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 49350 }, { "action_loss": 0.0144, "epoch": 4.644166588323776, "learning_rate": 1.8130622745637966e-05, "llm_loss": 0.0, "loss": 0.0144, "step": 49400 }, { "action_loss": 0.0183, "epoch": 4.6488671617937385, "learning_rate": 1.8126772143102785e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 49450 }, { "action_loss": 0.0239, "epoch": 4.653567735263702, "learning_rate": 1.81229179886331e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 49500 }, { "action_loss": 0.0318, "epoch": 4.658268308733666, "learning_rate": 1.811906028391343e-05, "llm_loss": 0.0, "loss": 0.0318, "step": 49550 }, { "action_loss": 0.0345, "epoch": 4.662968882203629, "learning_rate": 1.8115199030629846e-05, "llm_loss": 0.0, "loss": 0.0345, "step": 49600 }, { "action_loss": 0.025, "epoch": 4.667669455673592, "learning_rate": 1.8111334230469974e-05, "llm_loss": 0.0, "loss": 0.025, "step": 49650 }, { "action_loss": 0.051, "epoch": 4.672370029143556, "learning_rate": 1.8107465885122985e-05, "llm_loss": 0.0, "loss": 0.051, "step": 49700 }, { "action_loss": 0.0341, "epoch": 4.677070602613519, "learning_rate": 1.8103593996279595e-05, "llm_loss": 0.0, "loss": 0.0341, "step": 49750 }, { "action_loss": 0.0229, "epoch": 4.681771176083482, "learning_rate": 1.809971856563208e-05, "llm_loss": 0.0, "loss": 0.0229, "step": 49800 }, { "action_loss": 0.0173, "epoch": 4.686471749553445, "learning_rate": 1.8095839594874255e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 49850 }, { "action_loss": 0.0306, "epoch": 4.691172323023409, "learning_rate": 1.8091957085701487e-05, "llm_loss": 0.0, "loss": 0.0306, "step": 49900 }, { "action_loss": 0.0244, "epoch": 4.6958728964933725, "learning_rate": 1.8088071039810692e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 49950 }, { "action_loss": 0.0303, "epoch": 4.700573469963335, "learning_rate": 1.808418145890032e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 50000 }, { "action_loss": 0.0176, "epoch": 4.705274043433299, "learning_rate": 1.808028834467038e-05, "llm_loss": 0.0, "loss": 0.0176, "step": 50050 }, { "action_loss": 0.0303, "epoch": 4.709974616903263, "learning_rate": 1.8076391698822416e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 50100 }, { "action_loss": 0.021, "epoch": 4.714675190373225, "learning_rate": 1.8072491523059527e-05, "llm_loss": 0.0, "loss": 0.021, "step": 50150 }, { "action_loss": 0.0271, "epoch": 4.719375763843189, "learning_rate": 1.806858781908633e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 50200 }, { "action_loss": 0.0278, "epoch": 4.724076337313152, "learning_rate": 1.8064680588609014e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 50250 }, { "action_loss": 0.0193, "epoch": 4.7287769107831155, "learning_rate": 1.8060769833335293e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 50300 }, { "action_loss": 0.0299, "epoch": 4.733477484253079, "learning_rate": 1.8056855554974425e-05, "llm_loss": 0.0, "loss": 0.0299, "step": 50350 }, { "action_loss": 0.0132, "epoch": 4.738178057723042, "learning_rate": 1.8052937755237208e-05, "llm_loss": 0.0, "loss": 0.0132, "step": 50400 }, { "action_loss": 0.0171, "epoch": 4.742878631193006, "learning_rate": 1.8049016435835975e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 50450 }, { "action_loss": 0.0234, "epoch": 4.747579204662969, "learning_rate": 1.80450915984846e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 50500 }, { "action_loss": 0.0405, "epoch": 4.752279778132932, "learning_rate": 1.8041163244898497e-05, "llm_loss": 0.0, "loss": 0.0405, "step": 50550 }, { "action_loss": 0.0227, "epoch": 4.756980351602896, "learning_rate": 1.803723137679462e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 50600 }, { "action_loss": 0.0294, "epoch": 4.761680925072859, "learning_rate": 1.8033295995891448e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 50650 }, { "action_loss": 0.0106, "epoch": 4.766381498542822, "learning_rate": 1.8029357103909007e-05, "llm_loss": 0.0, "loss": 0.0106, "step": 50700 }, { "action_loss": 0.0425, "epoch": 4.771082072012786, "learning_rate": 1.8025414702568846e-05, "llm_loss": 0.0, "loss": 0.0425, "step": 50750 }, { "action_loss": 0.0446, "epoch": 4.775782645482749, "learning_rate": 1.8021468793594058e-05, "llm_loss": 0.0, "loss": 0.0446, "step": 50800 }, { "action_loss": 0.0322, "epoch": 4.780483218952712, "learning_rate": 1.8017519378709264e-05, "llm_loss": 0.0, "loss": 0.0322, "step": 50850 }, { "action_loss": 0.024, "epoch": 4.785183792422676, "learning_rate": 1.8013566459640616e-05, "llm_loss": 0.0, "loss": 0.024, "step": 50900 }, { "action_loss": 0.0115, "epoch": 4.789884365892639, "learning_rate": 1.8009610038115807e-05, "llm_loss": 0.0, "loss": 0.0115, "step": 50950 }, { "action_loss": 0.0228, "epoch": 4.794584939362602, "learning_rate": 1.8005650115864044e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 51000 }, { "action_loss": 0.032, "epoch": 4.799285512832565, "learning_rate": 1.800168669461608e-05, "llm_loss": 0.0, "loss": 0.032, "step": 51050 }, { "action_loss": 0.0101, "epoch": 4.803986086302529, "learning_rate": 1.799771977610419e-05, "llm_loss": 0.0, "loss": 0.0101, "step": 51100 }, { "action_loss": 0.0206, "epoch": 4.808686659772492, "learning_rate": 1.7993749362062178e-05, "llm_loss": 0.0, "loss": 0.0206, "step": 51150 }, { "action_loss": 0.0276, "epoch": 4.813387233242455, "learning_rate": 1.798977545422538e-05, "llm_loss": 0.0, "loss": 0.0276, "step": 51200 }, { "action_loss": 0.0227, "epoch": 4.818087806712419, "learning_rate": 1.798579805433065e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 51250 }, { "action_loss": 0.0346, "epoch": 4.8227883801823825, "learning_rate": 1.798181716411637e-05, "llm_loss": 0.0, "loss": 0.0346, "step": 51300 }, { "action_loss": 0.0171, "epoch": 4.827488953652345, "learning_rate": 1.7977832785322464e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 51350 }, { "action_loss": 0.0239, "epoch": 4.832189527122309, "learning_rate": 1.797384491969036e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 51400 }, { "action_loss": 0.0205, "epoch": 4.836890100592273, "learning_rate": 1.7969853568963015e-05, "llm_loss": 0.0, "loss": 0.0205, "step": 51450 }, { "action_loss": 0.0044, "epoch": 4.841590674062235, "learning_rate": 1.7965858734884916e-05, "llm_loss": 0.0, "loss": 0.0044, "step": 51500 }, { "action_loss": 0.0431, "epoch": 4.846291247532199, "learning_rate": 1.796186041920207e-05, "llm_loss": 0.0, "loss": 0.0431, "step": 51550 }, { "action_loss": 0.0175, "epoch": 4.850991821002163, "learning_rate": 1.7957858623662002e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 51600 }, { "action_loss": 0.0312, "epoch": 4.8556923944721255, "learning_rate": 1.7953853350013762e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 51650 }, { "action_loss": 0.024, "epoch": 4.860392967942089, "learning_rate": 1.7949844600007913e-05, "llm_loss": 0.0, "loss": 0.024, "step": 51700 }, { "action_loss": 0.0202, "epoch": 4.865093541412052, "learning_rate": 1.7945832375396547e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 51750 }, { "action_loss": 0.0348, "epoch": 4.869794114882016, "learning_rate": 1.7941816677933274e-05, "llm_loss": 0.0, "loss": 0.0348, "step": 51800 }, { "action_loss": 0.0281, "epoch": 4.874494688351979, "learning_rate": 1.793779750937321e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 51850 }, { "action_loss": 0.0269, "epoch": 4.879195261821942, "learning_rate": 1.7933774871473e-05, "llm_loss": 0.0, "loss": 0.0269, "step": 51900 }, { "action_loss": 0.028, "epoch": 4.883895835291906, "learning_rate": 1.7929748765990803e-05, "llm_loss": 0.0, "loss": 0.028, "step": 51950 }, { "action_loss": 0.0164, "epoch": 4.8885964087618685, "learning_rate": 1.792571919468629e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 52000 }, { "action_loss": 0.0236, "epoch": 4.893296982231832, "learning_rate": 1.792168615932065e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 52050 }, { "action_loss": 0.0226, "epoch": 4.897997555701796, "learning_rate": 1.7917649661656585e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 52100 }, { "action_loss": 0.0313, "epoch": 4.902698129171759, "learning_rate": 1.791360970345831e-05, "llm_loss": 0.0, "loss": 0.0313, "step": 52150 }, { "action_loss": 0.0292, "epoch": 4.907398702641722, "learning_rate": 1.7909566286491556e-05, "llm_loss": 0.0, "loss": 0.0292, "step": 52200 }, { "action_loss": 0.0145, "epoch": 4.912099276111686, "learning_rate": 1.7905519412523557e-05, "llm_loss": 0.0, "loss": 0.0145, "step": 52250 }, { "action_loss": 0.0143, "epoch": 4.916799849581649, "learning_rate": 1.7901469083323065e-05, "llm_loss": 0.0, "loss": 0.0143, "step": 52300 }, { "action_loss": 0.0163, "epoch": 4.921500423051612, "learning_rate": 1.7897415300660342e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 52350 }, { "action_loss": 0.0204, "epoch": 4.926200996521576, "learning_rate": 1.7893358066307158e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 52400 }, { "action_loss": 0.0342, "epoch": 4.930901569991539, "learning_rate": 1.788929738203679e-05, "llm_loss": 0.0, "loss": 0.0342, "step": 52450 }, { "action_loss": 0.0369, "epoch": 4.9356021434615025, "learning_rate": 1.7885233249624025e-05, "llm_loss": 0.0, "loss": 0.0369, "step": 52500 }, { "action_loss": 0.0211, "epoch": 4.940302716931465, "learning_rate": 1.7881165670845156e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 52550 }, { "action_loss": 0.0171, "epoch": 4.945003290401429, "learning_rate": 1.7877094647477984e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 52600 }, { "action_loss": 0.0208, "epoch": 4.949703863871393, "learning_rate": 1.7873020181301813e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 52650 }, { "action_loss": 0.0127, "epoch": 4.954404437341355, "learning_rate": 1.7868942274097455e-05, "llm_loss": 0.0, "loss": 0.0127, "step": 52700 }, { "action_loss": 0.0144, "epoch": 4.959105010811319, "learning_rate": 1.7864860927647224e-05, "llm_loss": 0.0, "loss": 0.0144, "step": 52750 }, { "action_loss": 0.019, "epoch": 4.963805584281282, "learning_rate": 1.786077614373493e-05, "llm_loss": 0.0, "loss": 0.019, "step": 52800 }, { "action_loss": 0.0263, "epoch": 4.9685061577512455, "learning_rate": 1.7856687924145904e-05, "llm_loss": 0.0, "loss": 0.0263, "step": 52850 }, { "action_loss": 0.0267, "epoch": 4.973206731221209, "learning_rate": 1.7852596270666957e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 52900 }, { "action_loss": 0.0191, "epoch": 4.977907304691172, "learning_rate": 1.784850118508642e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 52950 }, { "action_loss": 0.0139, "epoch": 4.982607878161136, "learning_rate": 1.7844402669194104e-05, "llm_loss": 0.0, "loss": 0.0139, "step": 53000 }, { "action_loss": 0.0175, "epoch": 4.987308451631099, "learning_rate": 1.784030072478134e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 53050 }, { "action_loss": 0.0238, "epoch": 4.992009025101062, "learning_rate": 1.7836195353640945e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 53100 }, { "action_loss": 0.0204, "epoch": 4.996709598571026, "learning_rate": 1.7832086557567234e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 53150 }, { "action_loss": 0.0238, "epoch": 5.001410172040989, "learning_rate": 1.7827974338356027e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 53200 }, { "action_loss": 0.0245, "epoch": 5.006110745510952, "learning_rate": 1.7823858697804626e-05, "llm_loss": 0.0, "loss": 0.0245, "step": 53250 }, { "action_loss": 0.0329, "epoch": 5.010811318980916, "learning_rate": 1.7819739637711846e-05, "llm_loss": 0.0, "loss": 0.0329, "step": 53300 }, { "action_loss": 0.014, "epoch": 5.015511892450879, "learning_rate": 1.781561715987798e-05, "llm_loss": 0.0, "loss": 0.014, "step": 53350 }, { "action_loss": 0.0212, "epoch": 5.020212465920842, "learning_rate": 1.7811491266104825e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 53400 }, { "action_loss": 0.0267, "epoch": 5.024913039390806, "learning_rate": 1.780736195819567e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 53450 }, { "action_loss": 0.0196, "epoch": 5.029613612860769, "learning_rate": 1.7803229237955294e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 53500 }, { "action_loss": 0.0278, "epoch": 5.034314186330732, "learning_rate": 1.7799093107189968e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 53550 }, { "action_loss": 0.0177, "epoch": 5.039014759800696, "learning_rate": 1.7794953567707448e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 53600 }, { "action_loss": 0.0327, "epoch": 5.043715333270659, "learning_rate": 1.7790810621316993e-05, "llm_loss": 0.0, "loss": 0.0327, "step": 53650 }, { "action_loss": 0.0233, "epoch": 5.0484159067406225, "learning_rate": 1.778666426982934e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 53700 }, { "action_loss": 0.0287, "epoch": 5.053116480210586, "learning_rate": 1.7782514515056716e-05, "llm_loss": 0.0, "loss": 0.0287, "step": 53750 }, { "action_loss": 0.0144, "epoch": 5.057817053680549, "learning_rate": 1.777836135881284e-05, "llm_loss": 0.0, "loss": 0.0144, "step": 53800 }, { "action_loss": 0.0238, "epoch": 5.062517627150513, "learning_rate": 1.7774204802912913e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 53850 }, { "action_loss": 0.029, "epoch": 5.067218200620475, "learning_rate": 1.7770044849173624e-05, "llm_loss": 0.0, "loss": 0.029, "step": 53900 }, { "action_loss": 0.024, "epoch": 5.071918774090439, "learning_rate": 1.776588149941315e-05, "llm_loss": 0.0, "loss": 0.024, "step": 53950 }, { "action_loss": 0.0244, "epoch": 5.076619347560403, "learning_rate": 1.776171475545114e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 54000 }, { "action_loss": 0.012, "epoch": 5.0813199210303654, "learning_rate": 1.7757544619108745e-05, "llm_loss": 0.0, "loss": 0.012, "step": 54050 }, { "action_loss": 0.007, "epoch": 5.086020494500329, "learning_rate": 1.7753371092208582e-05, "llm_loss": 0.0, "loss": 0.007, "step": 54100 }, { "action_loss": 0.0171, "epoch": 5.090721067970293, "learning_rate": 1.7749194176574765e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 54150 }, { "action_loss": 0.0279, "epoch": 5.0954216414402556, "learning_rate": 1.7745013874032874e-05, "llm_loss": 0.0, "loss": 0.0279, "step": 54200 }, { "action_loss": 0.0178, "epoch": 5.100122214910219, "learning_rate": 1.774083018640998e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 54250 }, { "action_loss": 0.0192, "epoch": 5.104822788380182, "learning_rate": 1.7736643115534623e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 54300 }, { "action_loss": 0.0197, "epoch": 5.109523361850146, "learning_rate": 1.7732452663236842e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 54350 }, { "action_loss": 0.0208, "epoch": 5.114223935320109, "learning_rate": 1.772825883134813e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 54400 }, { "action_loss": 0.0212, "epoch": 5.118924508790072, "learning_rate": 1.772406162170147e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 54450 }, { "action_loss": 0.035, "epoch": 5.123625082260036, "learning_rate": 1.771986103613132e-05, "llm_loss": 0.0, "loss": 0.035, "step": 54500 }, { "action_loss": 0.0165, "epoch": 5.128325655729999, "learning_rate": 1.7715657076473617e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 54550 }, { "action_loss": 0.0218, "epoch": 5.133026229199962, "learning_rate": 1.7711449744565757e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 54600 }, { "action_loss": 0.0109, "epoch": 5.137726802669926, "learning_rate": 1.770723904224663e-05, "llm_loss": 0.0, "loss": 0.0109, "step": 54650 }, { "action_loss": 0.0131, "epoch": 5.142427376139889, "learning_rate": 1.770302497135659e-05, "llm_loss": 0.0, "loss": 0.0131, "step": 54700 }, { "action_loss": 0.0227, "epoch": 5.147127949609852, "learning_rate": 1.7698807533737456e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 54750 }, { "action_loss": 0.0167, "epoch": 5.151828523079816, "learning_rate": 1.7694586731232535e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 54800 }, { "action_loss": 0.0207, "epoch": 5.156529096549779, "learning_rate": 1.769036256568659e-05, "llm_loss": 0.0, "loss": 0.0207, "step": 54850 }, { "action_loss": 0.0305, "epoch": 5.161229670019742, "learning_rate": 1.768613503894586e-05, "llm_loss": 0.0, "loss": 0.0305, "step": 54900 }, { "action_loss": 0.0264, "epoch": 5.165930243489706, "learning_rate": 1.7681904152858054e-05, "llm_loss": 0.0, "loss": 0.0264, "step": 54950 }, { "action_loss": 0.0318, "epoch": 5.170630816959669, "learning_rate": 1.767766990927235e-05, "llm_loss": 0.0, "loss": 0.0318, "step": 55000 }, { "action_loss": 0.0311, "epoch": 5.1753313904296325, "learning_rate": 1.7673432310039385e-05, "llm_loss": 0.0, "loss": 0.0311, "step": 55050 }, { "action_loss": 0.0101, "epoch": 5.180031963899596, "learning_rate": 1.7669191357011274e-05, "llm_loss": 0.0, "loss": 0.0101, "step": 55100 }, { "action_loss": 0.0074, "epoch": 5.184732537369559, "learning_rate": 1.766494705204159e-05, "llm_loss": 0.0, "loss": 0.0074, "step": 55150 }, { "action_loss": 0.0157, "epoch": 5.189433110839523, "learning_rate": 1.7660699396985378e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 55200 }, { "action_loss": 0.0265, "epoch": 5.194133684309485, "learning_rate": 1.7656448393699136e-05, "llm_loss": 0.0, "loss": 0.0265, "step": 55250 }, { "action_loss": 0.023, "epoch": 5.198834257779449, "learning_rate": 1.7652194044040838e-05, "llm_loss": 0.0, "loss": 0.023, "step": 55300 }, { "action_loss": 0.0108, "epoch": 5.203534831249413, "learning_rate": 1.7647936349869913e-05, "llm_loss": 0.0, "loss": 0.0108, "step": 55350 }, { "action_loss": 0.0111, "epoch": 5.2082354047193755, "learning_rate": 1.7643675313047253e-05, "llm_loss": 0.0, "loss": 0.0111, "step": 55400 }, { "action_loss": 0.0172, "epoch": 5.212935978189339, "learning_rate": 1.763941093543521e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 55450 }, { "action_loss": 0.0165, "epoch": 5.217636551659303, "learning_rate": 1.7635143218897604e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 55500 }, { "action_loss": 0.0231, "epoch": 5.222337125129266, "learning_rate": 1.76308721652997e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 55550 }, { "action_loss": 0.0259, "epoch": 5.227037698599229, "learning_rate": 1.7626597776508242e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 55600 }, { "action_loss": 0.0242, "epoch": 5.231738272069192, "learning_rate": 1.7622320054391405e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 55650 }, { "action_loss": 0.0258, "epoch": 5.236438845539156, "learning_rate": 1.761803900081884e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 55700 }, { "action_loss": 0.0163, "epoch": 5.241139419009119, "learning_rate": 1.7613754617661655e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 55750 }, { "action_loss": 0.0295, "epoch": 5.245839992479082, "learning_rate": 1.7609466906792404e-05, "llm_loss": 0.0, "loss": 0.0295, "step": 55800 }, { "action_loss": 0.0133, "epoch": 5.250540565949046, "learning_rate": 1.760517587008509e-05, "llm_loss": 0.0, "loss": 0.0133, "step": 55850 }, { "action_loss": 0.0187, "epoch": 5.2552411394190095, "learning_rate": 1.7600881509415194e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 55900 }, { "action_loss": 0.0274, "epoch": 5.259941712888972, "learning_rate": 1.7596583826659627e-05, "llm_loss": 0.0, "loss": 0.0274, "step": 55950 }, { "action_loss": 0.0197, "epoch": 5.264642286358936, "learning_rate": 1.7592282823696758e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 56000 }, { "action_loss": 0.0327, "epoch": 5.269342859828899, "learning_rate": 1.758797850240641e-05, "llm_loss": 0.0, "loss": 0.0327, "step": 56050 }, { "action_loss": 0.0265, "epoch": 5.274043433298862, "learning_rate": 1.7583670864669854e-05, "llm_loss": 0.0, "loss": 0.0265, "step": 56100 }, { "action_loss": 0.0241, "epoch": 5.278744006768826, "learning_rate": 1.7579359912369814e-05, "llm_loss": 0.0, "loss": 0.0241, "step": 56150 }, { "action_loss": 0.0163, "epoch": 5.283444580238789, "learning_rate": 1.7575045647390456e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 56200 }, { "action_loss": 0.0311, "epoch": 5.2881451537087525, "learning_rate": 1.7570728071617402e-05, "llm_loss": 0.0, "loss": 0.0311, "step": 56250 }, { "action_loss": 0.0112, "epoch": 5.292845727178716, "learning_rate": 1.7566407186937713e-05, "llm_loss": 0.0, "loss": 0.0112, "step": 56300 }, { "action_loss": 0.0198, "epoch": 5.297546300648679, "learning_rate": 1.75620829952399e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 56350 }, { "action_loss": 0.0229, "epoch": 5.302246874118643, "learning_rate": 1.7557755498413926e-05, "llm_loss": 0.0, "loss": 0.0229, "step": 56400 }, { "action_loss": 0.0175, "epoch": 5.306947447588605, "learning_rate": 1.7553424698351183e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 56450 }, { "action_loss": 0.0172, "epoch": 5.311648021058569, "learning_rate": 1.7549090596944522e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 56500 }, { "action_loss": 0.0141, "epoch": 5.316348594528533, "learning_rate": 1.7544753196088232e-05, "llm_loss": 0.0, "loss": 0.0141, "step": 56550 }, { "action_loss": 0.0299, "epoch": 5.3210491679984955, "learning_rate": 1.7540412497678035e-05, "llm_loss": 0.0, "loss": 0.0299, "step": 56600 }, { "action_loss": 0.0172, "epoch": 5.325749741468459, "learning_rate": 1.753606850361111e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 56650 }, { "action_loss": 0.0334, "epoch": 5.330450314938423, "learning_rate": 1.7531721215786062e-05, "llm_loss": 0.0, "loss": 0.0334, "step": 56700 }, { "action_loss": 0.021, "epoch": 5.335150888408386, "learning_rate": 1.7527370636102948e-05, "llm_loss": 0.0, "loss": 0.021, "step": 56750 }, { "action_loss": 0.0358, "epoch": 5.339851461878349, "learning_rate": 1.7523016766463253e-05, "llm_loss": 0.0, "loss": 0.0358, "step": 56800 }, { "action_loss": 0.0381, "epoch": 5.344552035348313, "learning_rate": 1.7518659608769904e-05, "llm_loss": 0.0, "loss": 0.0381, "step": 56850 }, { "action_loss": 0.0238, "epoch": 5.349252608818276, "learning_rate": 1.7514299164927272e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 56900 }, { "action_loss": 0.0197, "epoch": 5.353953182288239, "learning_rate": 1.7509935436841155e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 56950 }, { "action_loss": 0.0324, "epoch": 5.358653755758202, "learning_rate": 1.7505568426418788e-05, "llm_loss": 0.0, "loss": 0.0324, "step": 57000 }, { "action_loss": 0.0169, "epoch": 5.363354329228166, "learning_rate": 1.750119813556884e-05, "llm_loss": 0.0, "loss": 0.0169, "step": 57050 }, { "action_loss": 0.0263, "epoch": 5.3680549026981295, "learning_rate": 1.7496824566201424e-05, "llm_loss": 0.0, "loss": 0.0263, "step": 57100 }, { "action_loss": 0.0196, "epoch": 5.372755476168092, "learning_rate": 1.7492447720228067e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 57150 }, { "action_loss": 0.0235, "epoch": 5.377456049638056, "learning_rate": 1.7488067599561747e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 57200 }, { "action_loss": 0.0074, "epoch": 5.38215662310802, "learning_rate": 1.748368420611686e-05, "llm_loss": 0.0, "loss": 0.0074, "step": 57250 }, { "action_loss": 0.014, "epoch": 5.386857196577982, "learning_rate": 1.7479297541809245e-05, "llm_loss": 0.0, "loss": 0.014, "step": 57300 }, { "action_loss": 0.0289, "epoch": 5.391557770047946, "learning_rate": 1.7474907608556154e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 57350 }, { "action_loss": 0.0146, "epoch": 5.396258343517909, "learning_rate": 1.7470514408276284e-05, "llm_loss": 0.0, "loss": 0.0146, "step": 57400 }, { "action_loss": 0.0246, "epoch": 5.4009589169878724, "learning_rate": 1.746611794288975e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 57450 }, { "action_loss": 0.0292, "epoch": 5.405659490457836, "learning_rate": 1.7461718214318094e-05, "llm_loss": 0.0, "loss": 0.0292, "step": 57500 }, { "action_loss": 0.0125, "epoch": 5.410360063927799, "learning_rate": 1.745731522448429e-05, "llm_loss": 0.0, "loss": 0.0125, "step": 57550 }, { "action_loss": 0.0341, "epoch": 5.4150606373977626, "learning_rate": 1.745290897531274e-05, "llm_loss": 0.0, "loss": 0.0341, "step": 57600 }, { "action_loss": 0.0179, "epoch": 5.419761210867726, "learning_rate": 1.744849946872925e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 57650 }, { "action_loss": 0.0221, "epoch": 5.424461784337689, "learning_rate": 1.744408670666108e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 57700 }, { "action_loss": 0.0212, "epoch": 5.429162357807653, "learning_rate": 1.743967069103689e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 57750 }, { "action_loss": 0.025, "epoch": 5.433862931277616, "learning_rate": 1.7435251423786774e-05, "llm_loss": 0.0, "loss": 0.025, "step": 57800 }, { "action_loss": 0.0264, "epoch": 5.438563504747579, "learning_rate": 1.7430828906842234e-05, "llm_loss": 0.0, "loss": 0.0264, "step": 57850 }, { "action_loss": 0.0213, "epoch": 5.443264078217543, "learning_rate": 1.742640314213621e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 57900 }, { "action_loss": 0.021, "epoch": 5.4479646516875055, "learning_rate": 1.742197413160305e-05, "llm_loss": 0.0, "loss": 0.021, "step": 57950 }, { "action_loss": 0.0172, "epoch": 5.452665225157469, "learning_rate": 1.741763055404462e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 58000 }, { "action_loss": 0.0197, "epoch": 5.457365798627433, "learning_rate": 1.7413195122486003e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 58050 }, { "action_loss": 0.014, "epoch": 5.462066372097396, "learning_rate": 1.740875645087302e-05, "llm_loss": 0.0, "loss": 0.014, "step": 58100 }, { "action_loss": 0.0406, "epoch": 5.466766945567359, "learning_rate": 1.740431454114567e-05, "llm_loss": 0.0, "loss": 0.0406, "step": 58150 }, { "action_loss": 0.0242, "epoch": 5.471467519037322, "learning_rate": 1.7399869395245358e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 58200 }, { "action_loss": 0.0359, "epoch": 5.476168092507286, "learning_rate": 1.7395421015114897e-05, "llm_loss": 0.0, "loss": 0.0359, "step": 58250 }, { "action_loss": 0.0337, "epoch": 5.480868665977249, "learning_rate": 1.7390969402698537e-05, "llm_loss": 0.0, "loss": 0.0337, "step": 58300 }, { "action_loss": 0.0241, "epoch": 5.485569239447212, "learning_rate": 1.7386514559941913e-05, "llm_loss": 0.0, "loss": 0.0241, "step": 58350 }, { "action_loss": 0.0206, "epoch": 5.490269812917176, "learning_rate": 1.738205648879209e-05, "llm_loss": 0.0, "loss": 0.0206, "step": 58400 }, { "action_loss": 0.0235, "epoch": 5.4949703863871395, "learning_rate": 1.7377595191197536e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 58450 }, { "action_loss": 0.0236, "epoch": 5.499670959857102, "learning_rate": 1.7373130669108137e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 58500 }, { "action_loss": 0.0233, "epoch": 5.504371533327066, "learning_rate": 1.7368662924475175e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 58550 }, { "action_loss": 0.0196, "epoch": 5.50907210679703, "learning_rate": 1.736419195925136e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 58600 }, { "action_loss": 0.0299, "epoch": 5.513772680266992, "learning_rate": 1.7359717775390788e-05, "llm_loss": 0.0, "loss": 0.0299, "step": 58650 }, { "action_loss": 0.0166, "epoch": 5.518473253736956, "learning_rate": 1.7355240374848975e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 58700 }, { "action_loss": 0.0332, "epoch": 5.52317382720692, "learning_rate": 1.735075975958284e-05, "llm_loss": 0.0, "loss": 0.0332, "step": 58750 }, { "action_loss": 0.0267, "epoch": 5.5278744006768825, "learning_rate": 1.7346275931550706e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 58800 }, { "action_loss": 0.0209, "epoch": 5.532574974146846, "learning_rate": 1.7341788892712307e-05, "llm_loss": 0.0, "loss": 0.0209, "step": 58850 }, { "action_loss": 0.0184, "epoch": 5.537275547616809, "learning_rate": 1.7337298645028764e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 58900 }, { "action_loss": 0.0313, "epoch": 5.541976121086773, "learning_rate": 1.733280519046262e-05, "llm_loss": 0.0, "loss": 0.0313, "step": 58950 }, { "action_loss": 0.0236, "epoch": 5.546676694556736, "learning_rate": 1.7328308530977806e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 59000 }, { "action_loss": 0.0336, "epoch": 5.551377268026699, "learning_rate": 1.732380866853966e-05, "llm_loss": 0.0, "loss": 0.0336, "step": 59050 }, { "action_loss": 0.0231, "epoch": 5.556077841496663, "learning_rate": 1.7319305605114916e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 59100 }, { "action_loss": 0.0269, "epoch": 5.5607784149666255, "learning_rate": 1.731479934267171e-05, "llm_loss": 0.0, "loss": 0.0269, "step": 59150 }, { "action_loss": 0.0162, "epoch": 5.565478988436589, "learning_rate": 1.731028988317958e-05, "llm_loss": 0.0, "loss": 0.0162, "step": 59200 }, { "action_loss": 0.0289, "epoch": 5.570179561906553, "learning_rate": 1.730577722860945e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 59250 }, { "action_loss": 0.0335, "epoch": 5.574880135376516, "learning_rate": 1.7301261380933647e-05, "llm_loss": 0.0, "loss": 0.0335, "step": 59300 }, { "action_loss": 0.0265, "epoch": 5.579580708846479, "learning_rate": 1.72967423421259e-05, "llm_loss": 0.0, "loss": 0.0265, "step": 59350 }, { "action_loss": 0.0254, "epoch": 5.584281282316443, "learning_rate": 1.7292220114161323e-05, "llm_loss": 0.0, "loss": 0.0254, "step": 59400 }, { "action_loss": 0.039, "epoch": 5.588981855786406, "learning_rate": 1.7287694699016427e-05, "llm_loss": 0.0, "loss": 0.039, "step": 59450 }, { "action_loss": 0.0405, "epoch": 5.593682429256369, "learning_rate": 1.7283166098669118e-05, "llm_loss": 0.0, "loss": 0.0405, "step": 59500 }, { "action_loss": 0.0107, "epoch": 5.598383002726333, "learning_rate": 1.727863431509869e-05, "llm_loss": 0.0, "loss": 0.0107, "step": 59550 }, { "action_loss": 0.0268, "epoch": 5.603083576196296, "learning_rate": 1.7274099350285832e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 59600 }, { "action_loss": 0.0297, "epoch": 5.6077841496662595, "learning_rate": 1.7269561206212625e-05, "llm_loss": 0.0, "loss": 0.0297, "step": 59650 }, { "action_loss": 0.0304, "epoch": 5.612484723136222, "learning_rate": 1.726501988486253e-05, "llm_loss": 0.0, "loss": 0.0304, "step": 59700 }, { "action_loss": 0.0234, "epoch": 5.617185296606186, "learning_rate": 1.726047538822041e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 59750 }, { "action_loss": 0.0248, "epoch": 5.62188587007615, "learning_rate": 1.7255927718272503e-05, "llm_loss": 0.0, "loss": 0.0248, "step": 59800 }, { "action_loss": 0.0287, "epoch": 5.626586443546112, "learning_rate": 1.725137687700645e-05, "llm_loss": 0.0, "loss": 0.0287, "step": 59850 }, { "action_loss": 0.0346, "epoch": 5.631287017016076, "learning_rate": 1.724682286641126e-05, "llm_loss": 0.0, "loss": 0.0346, "step": 59900 }, { "action_loss": 0.0205, "epoch": 5.635987590486039, "learning_rate": 1.7242265688477338e-05, "llm_loss": 0.0, "loss": 0.0205, "step": 59950 }, { "action_loss": 0.0136, "epoch": 5.6406881639560025, "learning_rate": 1.723770534519647e-05, "llm_loss": 0.0, "loss": 0.0136, "step": 60000 }, { "action_loss": 0.0228, "epoch": 5.645388737425966, "learning_rate": 1.7233141838561827e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 60050 }, { "action_loss": 0.0315, "epoch": 5.650089310895929, "learning_rate": 1.722857517056796e-05, "llm_loss": 0.0, "loss": 0.0315, "step": 60100 }, { "action_loss": 0.0099, "epoch": 5.654789884365893, "learning_rate": 1.722400534321081e-05, "llm_loss": 0.0, "loss": 0.0099, "step": 60150 }, { "action_loss": 0.0299, "epoch": 5.659490457835856, "learning_rate": 1.7219432358487685e-05, "llm_loss": 0.0, "loss": 0.0299, "step": 60200 }, { "action_loss": 0.0235, "epoch": 5.664191031305819, "learning_rate": 1.721485621839728e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 60250 }, { "action_loss": 0.016, "epoch": 5.668891604775783, "learning_rate": 1.7210276924939678e-05, "llm_loss": 0.0, "loss": 0.016, "step": 60300 }, { "action_loss": 0.034, "epoch": 5.673592178245746, "learning_rate": 1.7205694480116324e-05, "llm_loss": 0.0, "loss": 0.034, "step": 60350 }, { "action_loss": 0.0223, "epoch": 5.678292751715709, "learning_rate": 1.720110888593005e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 60400 }, { "action_loss": 0.0304, "epoch": 5.682993325185673, "learning_rate": 1.7196520144385065e-05, "llm_loss": 0.0, "loss": 0.0304, "step": 60450 }, { "action_loss": 0.0258, "epoch": 5.6876938986556365, "learning_rate": 1.7191928257486947e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 60500 }, { "action_loss": 0.0289, "epoch": 5.692394472125599, "learning_rate": 1.7187333227242654e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 60550 }, { "action_loss": 0.0274, "epoch": 5.697095045595563, "learning_rate": 1.7182735055660524e-05, "llm_loss": 0.0, "loss": 0.0274, "step": 60600 }, { "action_loss": 0.0174, "epoch": 5.701795619065526, "learning_rate": 1.717813374475025e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 60650 }, { "action_loss": 0.0225, "epoch": 5.706496192535489, "learning_rate": 1.7173529296522914e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 60700 }, { "action_loss": 0.0244, "epoch": 5.711196766005453, "learning_rate": 1.716892171299096e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 60750 }, { "action_loss": 0.023, "epoch": 5.715897339475416, "learning_rate": 1.7164310996168207e-05, "llm_loss": 0.0, "loss": 0.023, "step": 60800 }, { "action_loss": 0.025, "epoch": 5.7205979129453794, "learning_rate": 1.7159697148069843e-05, "llm_loss": 0.0, "loss": 0.025, "step": 60850 }, { "action_loss": 0.0306, "epoch": 5.725298486415342, "learning_rate": 1.7155080170712423e-05, "llm_loss": 0.0, "loss": 0.0306, "step": 60900 }, { "action_loss": 0.011, "epoch": 5.729999059885306, "learning_rate": 1.7150460066113868e-05, "llm_loss": 0.0, "loss": 0.011, "step": 60950 }, { "action_loss": 0.0246, "epoch": 5.7346996333552696, "learning_rate": 1.7145836836293467e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 61000 }, { "action_loss": 0.0184, "epoch": 5.739400206825232, "learning_rate": 1.7141210483271882e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 61050 }, { "action_loss": 0.0066, "epoch": 5.744100780295196, "learning_rate": 1.7136581009071126e-05, "llm_loss": 0.0, "loss": 0.0066, "step": 61100 }, { "action_loss": 0.0209, "epoch": 5.74880135376516, "learning_rate": 1.713194841571459e-05, "llm_loss": 0.0, "loss": 0.0209, "step": 61150 }, { "action_loss": 0.0112, "epoch": 5.753501927235122, "learning_rate": 1.712731270522702e-05, "llm_loss": 0.0, "loss": 0.0112, "step": 61200 }, { "action_loss": 0.013, "epoch": 5.758202500705086, "learning_rate": 1.7122673879634528e-05, "llm_loss": 0.0, "loss": 0.013, "step": 61250 }, { "action_loss": 0.0214, "epoch": 5.76290307417505, "learning_rate": 1.7118031940964584e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 61300 }, { "action_loss": 0.0402, "epoch": 5.7676036476450125, "learning_rate": 1.711338689124602e-05, "llm_loss": 0.0, "loss": 0.0402, "step": 61350 }, { "action_loss": 0.0237, "epoch": 5.772304221114976, "learning_rate": 1.7108738732509033e-05, "llm_loss": 0.0, "loss": 0.0237, "step": 61400 }, { "action_loss": 0.0167, "epoch": 5.777004794584939, "learning_rate": 1.7104087466785172e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 61450 }, { "action_loss": 0.0211, "epoch": 5.781705368054903, "learning_rate": 1.7099433096107344e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 61500 }, { "action_loss": 0.0195, "epoch": 5.786405941524866, "learning_rate": 1.709477562250982e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 61550 }, { "action_loss": 0.0174, "epoch": 5.791106514994829, "learning_rate": 1.709011504802822e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 61600 }, { "action_loss": 0.0199, "epoch": 5.795807088464793, "learning_rate": 1.7085451374699522e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 61650 }, { "action_loss": 0.0183, "epoch": 5.800507661934756, "learning_rate": 1.708078460456206e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 61700 }, { "action_loss": 0.0137, "epoch": 5.805208235404719, "learning_rate": 1.7076114739655515e-05, "llm_loss": 0.0, "loss": 0.0137, "step": 61750 }, { "action_loss": 0.0178, "epoch": 5.809908808874683, "learning_rate": 1.7071441782020932e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 61800 }, { "action_loss": 0.0303, "epoch": 5.814609382344646, "learning_rate": 1.7066765733700696e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 61850 }, { "action_loss": 0.0288, "epoch": 5.819309955814609, "learning_rate": 1.706208659673855e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 61900 }, { "action_loss": 0.0323, "epoch": 5.824010529284573, "learning_rate": 1.705749804788618e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 61950 }, { "action_loss": 0.0163, "epoch": 5.828711102754536, "learning_rate": 1.705281280144778e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 62000 }, { "action_loss": 0.0296, "epoch": 5.833411676224499, "learning_rate": 1.7048124472465816e-05, "llm_loss": 0.0, "loss": 0.0296, "step": 62050 }, { "action_loss": 0.0237, "epoch": 5.838112249694463, "learning_rate": 1.704343306298941e-05, "llm_loss": 0.0, "loss": 0.0237, "step": 62100 }, { "action_loss": 0.0198, "epoch": 5.842812823164426, "learning_rate": 1.7038738575069002e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 62150 }, { "action_loss": 0.0105, "epoch": 5.8475133966343895, "learning_rate": 1.7034041010756396e-05, "llm_loss": 0.0, "loss": 0.0105, "step": 62200 }, { "action_loss": 0.0188, "epoch": 5.852213970104353, "learning_rate": 1.7029340372104737e-05, "llm_loss": 0.0, "loss": 0.0188, "step": 62250 }, { "action_loss": 0.0297, "epoch": 5.856914543574316, "learning_rate": 1.702463666116852e-05, "llm_loss": 0.0, "loss": 0.0297, "step": 62300 }, { "action_loss": 0.019, "epoch": 5.86161511704428, "learning_rate": 1.7019929880003568e-05, "llm_loss": 0.0, "loss": 0.019, "step": 62350 }, { "action_loss": 0.0096, "epoch": 5.866315690514242, "learning_rate": 1.701522003066706e-05, "llm_loss": 0.0, "loss": 0.0096, "step": 62400 }, { "action_loss": 0.0096, "epoch": 5.871016263984206, "learning_rate": 1.7010507115217506e-05, "llm_loss": 0.0, "loss": 0.0096, "step": 62450 }, { "action_loss": 0.0137, "epoch": 5.87571683745417, "learning_rate": 1.7005791135714764e-05, "llm_loss": 0.0, "loss": 0.0137, "step": 62500 }, { "action_loss": 0.0185, "epoch": 5.8804174109241325, "learning_rate": 1.7001072094220027e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 62550 }, { "action_loss": 0.0301, "epoch": 5.885117984394096, "learning_rate": 1.6996349992795824e-05, "llm_loss": 0.0, "loss": 0.0301, "step": 62600 }, { "action_loss": 0.0258, "epoch": 5.889818557864059, "learning_rate": 1.6991624833506025e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 62650 }, { "action_loss": 0.0267, "epoch": 5.894519131334023, "learning_rate": 1.6986896618415838e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 62700 }, { "action_loss": 0.0189, "epoch": 5.899219704803986, "learning_rate": 1.69821653495918e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 62750 }, { "action_loss": 0.0232, "epoch": 5.903920278273949, "learning_rate": 1.6977431029101795e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 62800 }, { "action_loss": 0.0073, "epoch": 5.908620851743913, "learning_rate": 1.6972693659015026e-05, "llm_loss": 0.0, "loss": 0.0073, "step": 62850 }, { "action_loss": 0.0203, "epoch": 5.913321425213876, "learning_rate": 1.6967953241402038e-05, "llm_loss": 0.0, "loss": 0.0203, "step": 62900 }, { "action_loss": 0.0191, "epoch": 5.918021998683839, "learning_rate": 1.6963209778334704e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 62950 }, { "action_loss": 0.0292, "epoch": 5.922722572153803, "learning_rate": 1.695846327188623e-05, "llm_loss": 0.0, "loss": 0.0292, "step": 63000 }, { "action_loss": 0.0203, "epoch": 5.9274231456237665, "learning_rate": 1.6953713724131152e-05, "llm_loss": 0.0, "loss": 0.0203, "step": 63050 }, { "action_loss": 0.0233, "epoch": 5.932123719093729, "learning_rate": 1.694896113714534e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 63100 }, { "action_loss": 0.017, "epoch": 5.936824292563693, "learning_rate": 1.694420551300598e-05, "llm_loss": 0.0, "loss": 0.017, "step": 63150 }, { "action_loss": 0.0171, "epoch": 5.941524866033657, "learning_rate": 1.6939446853791594e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 63200 }, { "action_loss": 0.0137, "epoch": 5.946225439503619, "learning_rate": 1.6934685161582032e-05, "llm_loss": 0.0, "loss": 0.0137, "step": 63250 }, { "action_loss": 0.0242, "epoch": 5.950926012973583, "learning_rate": 1.692992043845847e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 63300 }, { "action_loss": 0.0235, "epoch": 5.955626586443546, "learning_rate": 1.6925152686503396e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 63350 }, { "action_loss": 0.0164, "epoch": 5.9603271599135095, "learning_rate": 1.692038190780064e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 63400 }, { "action_loss": 0.0198, "epoch": 5.965027733383473, "learning_rate": 1.6915608104435347e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 63450 }, { "action_loss": 0.0363, "epoch": 5.969728306853436, "learning_rate": 1.6910831278493976e-05, "llm_loss": 0.0, "loss": 0.0363, "step": 63500 }, { "action_loss": 0.0135, "epoch": 5.9744288803234, "learning_rate": 1.6906051432064323e-05, "llm_loss": 0.0, "loss": 0.0135, "step": 63550 }, { "action_loss": 0.01, "epoch": 5.979129453793362, "learning_rate": 1.690126856723549e-05, "llm_loss": 0.0, "loss": 0.01, "step": 63600 }, { "action_loss": 0.0171, "epoch": 5.983830027263326, "learning_rate": 1.6896482686097904e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 63650 }, { "action_loss": 0.0134, "epoch": 5.98853060073329, "learning_rate": 1.6891693790743312e-05, "llm_loss": 0.0, "loss": 0.0134, "step": 63700 }, { "action_loss": 0.0331, "epoch": 5.9932311742032525, "learning_rate": 1.6886901883264773e-05, "llm_loss": 0.0, "loss": 0.0331, "step": 63750 }, { "action_loss": 0.0241, "epoch": 5.997931747673216, "learning_rate": 1.6882106965756675e-05, "llm_loss": 0.0, "loss": 0.0241, "step": 63800 }, { "action_loss": 0.0208, "epoch": 6.00263232114318, "learning_rate": 1.6877309040314706e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 63850 }, { "action_loss": 0.04, "epoch": 6.007332894613143, "learning_rate": 1.687250810903587e-05, "llm_loss": 0.0, "loss": 0.04, "step": 63900 }, { "action_loss": 0.0172, "epoch": 6.012033468083106, "learning_rate": 1.6867704174018503e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 63950 }, { "action_loss": 0.0063, "epoch": 6.01673404155307, "learning_rate": 1.686289723736223e-05, "llm_loss": 0.0, "loss": 0.0063, "step": 64000 }, { "action_loss": 0.0269, "epoch": 6.021434615023033, "learning_rate": 1.6858087301168004e-05, "llm_loss": 0.0, "loss": 0.0269, "step": 64050 }, { "action_loss": 0.0361, "epoch": 6.026135188492996, "learning_rate": 1.685327436753808e-05, "llm_loss": 0.0, "loss": 0.0361, "step": 64100 }, { "action_loss": 0.0134, "epoch": 6.030835761962959, "learning_rate": 1.6848458438576033e-05, "llm_loss": 0.0, "loss": 0.0134, "step": 64150 }, { "action_loss": 0.0148, "epoch": 6.035536335432923, "learning_rate": 1.6843639516386737e-05, "llm_loss": 0.0, "loss": 0.0148, "step": 64200 }, { "action_loss": 0.0284, "epoch": 6.0402369089028864, "learning_rate": 1.6838817603076378e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 64250 }, { "action_loss": 0.0271, "epoch": 6.044937482372849, "learning_rate": 1.6833992700752448e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 64300 }, { "action_loss": 0.0336, "epoch": 6.049638055842813, "learning_rate": 1.682916481152375e-05, "llm_loss": 0.0, "loss": 0.0336, "step": 64350 }, { "action_loss": 0.0163, "epoch": 6.0543386293127766, "learning_rate": 1.6824333937500394e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 64400 }, { "action_loss": 0.0239, "epoch": 6.059039202782739, "learning_rate": 1.681950008079378e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 64450 }, { "action_loss": 0.016, "epoch": 6.063739776252703, "learning_rate": 1.6814663243516628e-05, "llm_loss": 0.0, "loss": 0.016, "step": 64500 }, { "action_loss": 0.0361, "epoch": 6.068440349722666, "learning_rate": 1.6809823427782954e-05, "llm_loss": 0.0, "loss": 0.0361, "step": 64550 }, { "action_loss": 0.0106, "epoch": 6.073140923192629, "learning_rate": 1.6804980635708074e-05, "llm_loss": 0.0, "loss": 0.0106, "step": 64600 }, { "action_loss": 0.0264, "epoch": 6.077841496662593, "learning_rate": 1.680013486940861e-05, "llm_loss": 0.0, "loss": 0.0264, "step": 64650 }, { "action_loss": 0.0158, "epoch": 6.082542070132556, "learning_rate": 1.6795286131002475e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 64700 }, { "action_loss": 0.0181, "epoch": 6.0872426436025195, "learning_rate": 1.6790434422608895e-05, "llm_loss": 0.0, "loss": 0.0181, "step": 64750 }, { "action_loss": 0.0333, "epoch": 6.091943217072483, "learning_rate": 1.6785579746348377e-05, "llm_loss": 0.0, "loss": 0.0333, "step": 64800 }, { "action_loss": 0.0199, "epoch": 6.096643790542446, "learning_rate": 1.678072210434274e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 64850 }, { "action_loss": 0.0097, "epoch": 6.10134436401241, "learning_rate": 1.6775861498715096e-05, "llm_loss": 0.0, "loss": 0.0097, "step": 64900 }, { "action_loss": 0.0164, "epoch": 6.106044937482372, "learning_rate": 1.6770997931589845e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 64950 }, { "action_loss": 0.0267, "epoch": 6.110745510952336, "learning_rate": 1.6766131405092687e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 65000 }, { "action_loss": 0.0218, "epoch": 6.1154460844223, "learning_rate": 1.6761261921350613e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 65050 }, { "action_loss": 0.0168, "epoch": 6.1201466578922625, "learning_rate": 1.675638948249191e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 65100 }, { "action_loss": 0.0165, "epoch": 6.124847231362226, "learning_rate": 1.6751514090646158e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 65150 }, { "action_loss": 0.0165, "epoch": 6.12954780483219, "learning_rate": 1.6746635747944216e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 65200 }, { "action_loss": 0.0385, "epoch": 6.134248378302153, "learning_rate": 1.6741754456518246e-05, "llm_loss": 0.0, "loss": 0.0385, "step": 65250 }, { "action_loss": 0.0226, "epoch": 6.138948951772116, "learning_rate": 1.6736870218501697e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 65300 }, { "action_loss": 0.0202, "epoch": 6.143649525242079, "learning_rate": 1.6731983036029294e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 65350 }, { "action_loss": 0.02, "epoch": 6.148350098712043, "learning_rate": 1.6727092911237065e-05, "llm_loss": 0.0, "loss": 0.02, "step": 65400 }, { "action_loss": 0.0272, "epoch": 6.153050672182006, "learning_rate": 1.6722199846262314e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 65450 }, { "action_loss": 0.0336, "epoch": 6.157751245651969, "learning_rate": 1.6717303843243632e-05, "llm_loss": 0.0, "loss": 0.0336, "step": 65500 }, { "action_loss": 0.017, "epoch": 6.162451819121933, "learning_rate": 1.6712404904320893e-05, "llm_loss": 0.0, "loss": 0.017, "step": 65550 }, { "action_loss": 0.0292, "epoch": 6.1671523925918965, "learning_rate": 1.6707503031635258e-05, "llm_loss": 0.0, "loss": 0.0292, "step": 65600 }, { "action_loss": 0.0239, "epoch": 6.171852966061859, "learning_rate": 1.6702598227329172e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 65650 }, { "action_loss": 0.0226, "epoch": 6.176553539531823, "learning_rate": 1.6697690493546353e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 65700 }, { "action_loss": 0.0305, "epoch": 6.181254113001787, "learning_rate": 1.66927798324318e-05, "llm_loss": 0.0, "loss": 0.0305, "step": 65750 }, { "action_loss": 0.0318, "epoch": 6.185954686471749, "learning_rate": 1.6687866246131803e-05, "llm_loss": 0.0, "loss": 0.0318, "step": 65800 }, { "action_loss": 0.0294, "epoch": 6.190655259941713, "learning_rate": 1.668294973679392e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 65850 }, { "action_loss": 0.0457, "epoch": 6.195355833411676, "learning_rate": 1.6678030306566984e-05, "llm_loss": 0.0, "loss": 0.0457, "step": 65900 }, { "action_loss": 0.0204, "epoch": 6.2000564068816395, "learning_rate": 1.667310795760112e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 65950 }, { "action_loss": 0.0169, "epoch": 6.204756980351603, "learning_rate": 1.666818269204771e-05, "llm_loss": 0.0, "loss": 0.0169, "step": 66000 }, { "action_loss": 0.0153, "epoch": 6.209457553821566, "learning_rate": 1.666325451205942e-05, "llm_loss": 0.0, "loss": 0.0153, "step": 66050 }, { "action_loss": 0.0234, "epoch": 6.21415812729153, "learning_rate": 1.6658323419790195e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 66100 }, { "action_loss": 0.02, "epoch": 6.218858700761493, "learning_rate": 1.665338941739524e-05, "llm_loss": 0.0, "loss": 0.02, "step": 66150 }, { "action_loss": 0.0197, "epoch": 6.223559274231456, "learning_rate": 1.6648452507031043e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 66200 }, { "action_loss": 0.0152, "epoch": 6.22825984770142, "learning_rate": 1.6643512690855363e-05, "llm_loss": 0.0, "loss": 0.0152, "step": 66250 }, { "action_loss": 0.0298, "epoch": 6.2329604211713825, "learning_rate": 1.6638569971027216e-05, "llm_loss": 0.0, "loss": 0.0298, "step": 66300 }, { "action_loss": 0.0295, "epoch": 6.237660994641346, "learning_rate": 1.6633624349706902e-05, "llm_loss": 0.0, "loss": 0.0295, "step": 66350 }, { "action_loss": 0.0189, "epoch": 6.24236156811131, "learning_rate": 1.662867582905598e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 66400 }, { "action_loss": 0.0233, "epoch": 6.247062141581273, "learning_rate": 1.662372441123728e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 66450 }, { "action_loss": 0.034, "epoch": 6.251762715051236, "learning_rate": 1.66187700984149e-05, "llm_loss": 0.0, "loss": 0.034, "step": 66500 }, { "action_loss": 0.0249, "epoch": 6.2564632885212, "learning_rate": 1.6613812892754196e-05, "llm_loss": 0.0, "loss": 0.0249, "step": 66550 }, { "action_loss": 0.0283, "epoch": 6.261163861991163, "learning_rate": 1.6608852796421802e-05, "llm_loss": 0.0, "loss": 0.0283, "step": 66600 }, { "action_loss": 0.0196, "epoch": 6.265864435461126, "learning_rate": 1.66038898115856e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 66650 }, { "action_loss": 0.0156, "epoch": 6.27056500893109, "learning_rate": 1.659892394041474e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 66700 }, { "action_loss": 0.0266, "epoch": 6.275265582401053, "learning_rate": 1.6593955185079634e-05, "llm_loss": 0.0, "loss": 0.0266, "step": 66750 }, { "action_loss": 0.024, "epoch": 6.2799661558710165, "learning_rate": 1.658898354775196e-05, "llm_loss": 0.0, "loss": 0.024, "step": 66800 }, { "action_loss": 0.0243, "epoch": 6.284666729340979, "learning_rate": 1.6584009030604653e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 66850 }, { "action_loss": 0.0309, "epoch": 6.289367302810943, "learning_rate": 1.65790316358119e-05, "llm_loss": 0.0, "loss": 0.0309, "step": 66900 }, { "action_loss": 0.0139, "epoch": 6.294067876280907, "learning_rate": 1.6574051365549147e-05, "llm_loss": 0.0, "loss": 0.0139, "step": 66950 }, { "action_loss": 0.0192, "epoch": 6.298768449750869, "learning_rate": 1.6569068221993105e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 67000 }, { "action_loss": 0.0258, "epoch": 6.303469023220833, "learning_rate": 1.6564082207321736e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 67050 }, { "action_loss": 0.025, "epoch": 6.308169596690797, "learning_rate": 1.655909332371425e-05, "llm_loss": 0.0, "loss": 0.025, "step": 67100 }, { "action_loss": 0.0179, "epoch": 6.3128701701607595, "learning_rate": 1.6554101573351127e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 67150 }, { "action_loss": 0.0065, "epoch": 6.317570743630723, "learning_rate": 1.6549106958414087e-05, "llm_loss": 0.0, "loss": 0.0065, "step": 67200 }, { "action_loss": 0.0296, "epoch": 6.322271317100686, "learning_rate": 1.6544109481086106e-05, "llm_loss": 0.0, "loss": 0.0296, "step": 67250 }, { "action_loss": 0.017, "epoch": 6.32697189057065, "learning_rate": 1.6539109143551407e-05, "llm_loss": 0.0, "loss": 0.017, "step": 67300 }, { "action_loss": 0.0227, "epoch": 6.331672464040613, "learning_rate": 1.653410594799547e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 67350 }, { "action_loss": 0.0129, "epoch": 6.336373037510576, "learning_rate": 1.652909989660502e-05, "llm_loss": 0.0, "loss": 0.0129, "step": 67400 }, { "action_loss": 0.029, "epoch": 6.34107361098054, "learning_rate": 1.6524090991568033e-05, "llm_loss": 0.0, "loss": 0.029, "step": 67450 }, { "action_loss": 0.0195, "epoch": 6.345774184450503, "learning_rate": 1.651907923507373e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 67500 }, { "action_loss": 0.0229, "epoch": 6.350474757920466, "learning_rate": 1.651406462931257e-05, "llm_loss": 0.0, "loss": 0.0229, "step": 67550 }, { "action_loss": 0.0172, "epoch": 6.35517533139043, "learning_rate": 1.650904717647628e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 67600 }, { "action_loss": 0.0281, "epoch": 6.359875904860393, "learning_rate": 1.650402687875781e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 67650 }, { "action_loss": 0.0284, "epoch": 6.364576478330356, "learning_rate": 1.649900373835136e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 67700 }, { "action_loss": 0.0259, "epoch": 6.36927705180032, "learning_rate": 1.649397775745237e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 67750 }, { "action_loss": 0.0127, "epoch": 6.373977625270283, "learning_rate": 1.6488948938257534e-05, "llm_loss": 0.0, "loss": 0.0127, "step": 67800 }, { "action_loss": 0.0223, "epoch": 6.378678198740246, "learning_rate": 1.6483917282964766e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 67850 }, { "action_loss": 0.025, "epoch": 6.38337877221021, "learning_rate": 1.647888279377324e-05, "llm_loss": 0.0, "loss": 0.025, "step": 67900 }, { "action_loss": 0.0138, "epoch": 6.388079345680173, "learning_rate": 1.6473845472883357e-05, "llm_loss": 0.0, "loss": 0.0138, "step": 67950 }, { "action_loss": 0.0168, "epoch": 6.392779919150136, "learning_rate": 1.6468805322496754e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 68000 }, { "action_loss": 0.0173, "epoch": 6.397480492620099, "learning_rate": 1.6463762344816314e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 68050 }, { "action_loss": 0.0278, "epoch": 6.402181066090063, "learning_rate": 1.6458716542046144e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 68100 }, { "action_loss": 0.0274, "epoch": 6.4068816395600265, "learning_rate": 1.6453667916391598e-05, "llm_loss": 0.0, "loss": 0.0274, "step": 68150 }, { "action_loss": 0.034, "epoch": 6.411582213029989, "learning_rate": 1.6448616470059254e-05, "llm_loss": 0.0, "loss": 0.034, "step": 68200 }, { "action_loss": 0.0333, "epoch": 6.416282786499953, "learning_rate": 1.644356220525693e-05, "llm_loss": 0.0, "loss": 0.0333, "step": 68250 }, { "action_loss": 0.0285, "epoch": 6.420983359969917, "learning_rate": 1.6438505124193667e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 68300 }, { "action_loss": 0.0363, "epoch": 6.425683933439879, "learning_rate": 1.6433445229079746e-05, "llm_loss": 0.0, "loss": 0.0363, "step": 68350 }, { "action_loss": 0.0201, "epoch": 6.430384506909843, "learning_rate": 1.6428382522126675e-05, "llm_loss": 0.0, "loss": 0.0201, "step": 68400 }, { "action_loss": 0.0268, "epoch": 6.435085080379807, "learning_rate": 1.642331700554719e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 68450 }, { "action_loss": 0.0166, "epoch": 6.4397856538497695, "learning_rate": 1.6418248681555252e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 68500 }, { "action_loss": 0.0402, "epoch": 6.444486227319733, "learning_rate": 1.6413177552366056e-05, "llm_loss": 0.0, "loss": 0.0402, "step": 68550 }, { "action_loss": 0.0167, "epoch": 6.449186800789696, "learning_rate": 1.640810362019602e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 68600 }, { "action_loss": 0.0136, "epoch": 6.45388737425966, "learning_rate": 1.6403026887262784e-05, "llm_loss": 0.0, "loss": 0.0136, "step": 68650 }, { "action_loss": 0.0229, "epoch": 6.458587947729623, "learning_rate": 1.6397947355785215e-05, "llm_loss": 0.0, "loss": 0.0229, "step": 68700 }, { "action_loss": 0.0227, "epoch": 6.463288521199586, "learning_rate": 1.6392865027983405e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 68750 }, { "action_loss": 0.0143, "epoch": 6.46798909466955, "learning_rate": 1.6387779906078664e-05, "llm_loss": 0.0, "loss": 0.0143, "step": 68800 }, { "action_loss": 0.0226, "epoch": 6.472689668139513, "learning_rate": 1.638269199229352e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 68850 }, { "action_loss": 0.0261, "epoch": 6.477390241609476, "learning_rate": 1.6377601288851733e-05, "llm_loss": 0.0, "loss": 0.0261, "step": 68900 }, { "action_loss": 0.0361, "epoch": 6.48209081507944, "learning_rate": 1.6372507797978273e-05, "llm_loss": 0.0, "loss": 0.0361, "step": 68950 }, { "action_loss": 0.0268, "epoch": 6.486791388549403, "learning_rate": 1.6367411521899335e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 69000 }, { "action_loss": 0.0142, "epoch": 6.491491962019366, "learning_rate": 1.6362312462842323e-05, "llm_loss": 0.0, "loss": 0.0142, "step": 69050 }, { "action_loss": 0.0072, "epoch": 6.49619253548933, "learning_rate": 1.635721062303586e-05, "llm_loss": 0.0, "loss": 0.0072, "step": 69100 }, { "action_loss": 0.016, "epoch": 6.500893108959293, "learning_rate": 1.635210600470979e-05, "llm_loss": 0.0, "loss": 0.016, "step": 69150 }, { "action_loss": 0.0297, "epoch": 6.505593682429256, "learning_rate": 1.6346998610095168e-05, "llm_loss": 0.0, "loss": 0.0297, "step": 69200 }, { "action_loss": 0.0131, "epoch": 6.51029425589922, "learning_rate": 1.6341888441424255e-05, "llm_loss": 0.0, "loss": 0.0131, "step": 69250 }, { "action_loss": 0.0349, "epoch": 6.514994829369183, "learning_rate": 1.6336775500930536e-05, "llm_loss": 0.0, "loss": 0.0349, "step": 69300 }, { "action_loss": 0.0296, "epoch": 6.5196954028391465, "learning_rate": 1.6331659790848702e-05, "llm_loss": 0.0, "loss": 0.0296, "step": 69350 }, { "action_loss": 0.0329, "epoch": 6.52439597630911, "learning_rate": 1.6326541313414658e-05, "llm_loss": 0.0, "loss": 0.0329, "step": 69400 }, { "action_loss": 0.0131, "epoch": 6.529096549779073, "learning_rate": 1.632142007086551e-05, "llm_loss": 0.0, "loss": 0.0131, "step": 69450 }, { "action_loss": 0.0224, "epoch": 6.533797123249037, "learning_rate": 1.631629606543958e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 69500 }, { "action_loss": 0.0225, "epoch": 6.538497696718999, "learning_rate": 1.6311169299376398e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 69550 }, { "action_loss": 0.0286, "epoch": 6.543198270188963, "learning_rate": 1.63060397749167e-05, "llm_loss": 0.0, "loss": 0.0286, "step": 69600 }, { "action_loss": 0.0227, "epoch": 6.547898843658927, "learning_rate": 1.630090749430242e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 69650 }, { "action_loss": 0.0066, "epoch": 6.5525994171288895, "learning_rate": 1.6295772459776708e-05, "llm_loss": 0.0, "loss": 0.0066, "step": 69700 }, { "action_loss": 0.017, "epoch": 6.557299990598853, "learning_rate": 1.629063467358391e-05, "llm_loss": 0.0, "loss": 0.017, "step": 69750 }, { "action_loss": 0.0329, "epoch": 6.562000564068816, "learning_rate": 1.6285494137969574e-05, "llm_loss": 0.0, "loss": 0.0329, "step": 69800 }, { "action_loss": 0.0175, "epoch": 6.56670113753878, "learning_rate": 1.6280350855180456e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 69850 }, { "action_loss": 0.0192, "epoch": 6.571401711008743, "learning_rate": 1.627520482746451e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 69900 }, { "action_loss": 0.0166, "epoch": 6.576102284478706, "learning_rate": 1.6270056057070886e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 69950 }, { "action_loss": 0.0214, "epoch": 6.58080285794867, "learning_rate": 1.6264904546249937e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 70000 }, { "action_loss": 0.031, "epoch": 6.585503431418633, "learning_rate": 1.6259750297253216e-05, "llm_loss": 0.0, "loss": 0.031, "step": 70050 }, { "action_loss": 0.0197, "epoch": 6.590204004888596, "learning_rate": 1.6254593312333458e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 70100 }, { "action_loss": 0.0235, "epoch": 6.59490457835856, "learning_rate": 1.6249433593744618e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 70150 }, { "action_loss": 0.0101, "epoch": 6.5996051518285235, "learning_rate": 1.6244271143741827e-05, "llm_loss": 0.0, "loss": 0.0101, "step": 70200 }, { "action_loss": 0.019, "epoch": 6.604305725298486, "learning_rate": 1.623910596458141e-05, "llm_loss": 0.0, "loss": 0.019, "step": 70250 }, { "action_loss": 0.0234, "epoch": 6.60900629876845, "learning_rate": 1.62339380585209e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 70300 }, { "action_loss": 0.0454, "epoch": 6.613706872238414, "learning_rate": 1.622876742781901e-05, "llm_loss": 0.0, "loss": 0.0454, "step": 70350 }, { "action_loss": 0.0195, "epoch": 6.618407445708376, "learning_rate": 1.6223594074735643e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 70400 }, { "action_loss": 0.0224, "epoch": 6.62310801917834, "learning_rate": 1.62184180015319e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 70450 }, { "action_loss": 0.0231, "epoch": 6.627808592648303, "learning_rate": 1.6213239210470058e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 70500 }, { "action_loss": 0.0172, "epoch": 6.6325091661182665, "learning_rate": 1.6208057703813595e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 70550 }, { "action_loss": 0.0342, "epoch": 6.63720973958823, "learning_rate": 1.6202873483827167e-05, "llm_loss": 0.0, "loss": 0.0342, "step": 70600 }, { "action_loss": 0.0316, "epoch": 6.641910313058193, "learning_rate": 1.6197686552776626e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 70650 }, { "action_loss": 0.0124, "epoch": 6.646610886528157, "learning_rate": 1.6192496912928994e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 70700 }, { "action_loss": 0.0164, "epoch": 6.651311459998119, "learning_rate": 1.6187304566552492e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 70750 }, { "action_loss": 0.0237, "epoch": 6.656012033468083, "learning_rate": 1.6182109515916516e-05, "llm_loss": 0.0, "loss": 0.0237, "step": 70800 }, { "action_loss": 0.0132, "epoch": 6.660712606938047, "learning_rate": 1.617691176329164e-05, "llm_loss": 0.0, "loss": 0.0132, "step": 70850 }, { "action_loss": 0.0248, "epoch": 6.6654131804080095, "learning_rate": 1.6171711310949633e-05, "llm_loss": 0.0, "loss": 0.0248, "step": 70900 }, { "action_loss": 0.0226, "epoch": 6.670113753877973, "learning_rate": 1.6166508161163428e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 70950 }, { "action_loss": 0.0296, "epoch": 6.674814327347937, "learning_rate": 1.6161302316207146e-05, "llm_loss": 0.0, "loss": 0.0296, "step": 71000 }, { "action_loss": 0.0127, "epoch": 6.6795149008179, "learning_rate": 1.6156093778356082e-05, "llm_loss": 0.0, "loss": 0.0127, "step": 71050 }, { "action_loss": 0.0266, "epoch": 6.684215474287863, "learning_rate": 1.6150882549886717e-05, "llm_loss": 0.0, "loss": 0.0266, "step": 71100 }, { "action_loss": 0.0173, "epoch": 6.688916047757827, "learning_rate": 1.6145668633076693e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 71150 }, { "action_loss": 0.03, "epoch": 6.69361662122779, "learning_rate": 1.6140452030204837e-05, "llm_loss": 0.0, "loss": 0.03, "step": 71200 }, { "action_loss": 0.0318, "epoch": 6.698317194697753, "learning_rate": 1.613523274355115e-05, "llm_loss": 0.0, "loss": 0.0318, "step": 71250 }, { "action_loss": 0.0171, "epoch": 6.703017768167716, "learning_rate": 1.6130010775396803e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 71300 }, { "action_loss": 0.0158, "epoch": 6.70771834163768, "learning_rate": 1.612478612802414e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 71350 }, { "action_loss": 0.0391, "epoch": 6.712418915107643, "learning_rate": 1.6119558803716677e-05, "llm_loss": 0.0, "loss": 0.0391, "step": 71400 }, { "action_loss": 0.033, "epoch": 6.717119488577606, "learning_rate": 1.6114328804759097e-05, "llm_loss": 0.0, "loss": 0.033, "step": 71450 }, { "action_loss": 0.0162, "epoch": 6.72182006204757, "learning_rate": 1.6109096133437254e-05, "llm_loss": 0.0, "loss": 0.0162, "step": 71500 }, { "action_loss": 0.021, "epoch": 6.726520635517533, "learning_rate": 1.6103860792038172e-05, "llm_loss": 0.0, "loss": 0.021, "step": 71550 }, { "action_loss": 0.0301, "epoch": 6.731221208987496, "learning_rate": 1.6098622782850034e-05, "llm_loss": 0.0, "loss": 0.0301, "step": 71600 }, { "action_loss": 0.0291, "epoch": 6.73592178245746, "learning_rate": 1.6093382108162202e-05, "llm_loss": 0.0, "loss": 0.0291, "step": 71650 }, { "action_loss": 0.0214, "epoch": 6.740622355927423, "learning_rate": 1.6088138770265192e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 71700 }, { "action_loss": 0.0322, "epoch": 6.745322929397386, "learning_rate": 1.6082892771450693e-05, "llm_loss": 0.0, "loss": 0.0322, "step": 71750 }, { "action_loss": 0.0099, "epoch": 6.75002350286735, "learning_rate": 1.6077644114011543e-05, "llm_loss": 0.0, "loss": 0.0099, "step": 71800 }, { "action_loss": 0.0164, "epoch": 6.754724076337313, "learning_rate": 1.607239280024176e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 71850 }, { "action_loss": 0.0154, "epoch": 6.7594246498072765, "learning_rate": 1.6067138832436504e-05, "llm_loss": 0.0, "loss": 0.0154, "step": 71900 }, { "action_loss": 0.0236, "epoch": 6.76412522327724, "learning_rate": 1.6061882212892116e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 71950 }, { "action_loss": 0.0133, "epoch": 6.768825796747203, "learning_rate": 1.6056622943906074e-05, "llm_loss": 0.0, "loss": 0.0133, "step": 72000 }, { "action_loss": 0.0263, "epoch": 6.773526370217167, "learning_rate": 1.6051361027777033e-05, "llm_loss": 0.0, "loss": 0.0263, "step": 72050 }, { "action_loss": 0.0199, "epoch": 6.77822694368713, "learning_rate": 1.6046096466804795e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 72100 }, { "action_loss": 0.0132, "epoch": 6.782927517157093, "learning_rate": 1.6040829263290315e-05, "llm_loss": 0.0, "loss": 0.0132, "step": 72150 }, { "action_loss": 0.0164, "epoch": 6.787628090627057, "learning_rate": 1.6035559419535714e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 72200 }, { "action_loss": 0.0175, "epoch": 6.7923286640970195, "learning_rate": 1.603028693784426e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 72250 }, { "action_loss": 0.019, "epoch": 6.797029237566983, "learning_rate": 1.602501182052037e-05, "llm_loss": 0.0, "loss": 0.019, "step": 72300 }, { "action_loss": 0.0124, "epoch": 6.801729811036947, "learning_rate": 1.601973406986962e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 72350 }, { "action_loss": 0.0202, "epoch": 6.80643038450691, "learning_rate": 1.601445368819874e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 72400 }, { "action_loss": 0.0304, "epoch": 6.811130957976873, "learning_rate": 1.6009170677815592e-05, "llm_loss": 0.0, "loss": 0.0304, "step": 72450 }, { "action_loss": 0.026, "epoch": 6.815831531446836, "learning_rate": 1.600388504102921e-05, "llm_loss": 0.0, "loss": 0.026, "step": 72500 }, { "action_loss": 0.0192, "epoch": 6.8205321049168, "learning_rate": 1.599859678014976e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 72550 }, { "action_loss": 0.0165, "epoch": 6.825232678386763, "learning_rate": 1.5993305897488567e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 72600 }, { "action_loss": 0.0198, "epoch": 6.829933251856726, "learning_rate": 1.5988012395358086e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 72650 }, { "action_loss": 0.018, "epoch": 6.83463382532669, "learning_rate": 1.5982716276071933e-05, "llm_loss": 0.0, "loss": 0.018, "step": 72700 }, { "action_loss": 0.0159, "epoch": 6.8393343987966535, "learning_rate": 1.5977417541944856e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 72750 }, { "action_loss": 0.0149, "epoch": 6.844034972266616, "learning_rate": 1.5972116195292753e-05, "llm_loss": 0.0, "loss": 0.0149, "step": 72800 }, { "action_loss": 0.0134, "epoch": 6.84873554573658, "learning_rate": 1.5966812238432663e-05, "llm_loss": 0.0, "loss": 0.0134, "step": 72850 }, { "action_loss": 0.0268, "epoch": 6.853436119206544, "learning_rate": 1.5961505673682762e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 72900 }, { "action_loss": 0.0225, "epoch": 6.858136692676506, "learning_rate": 1.5956196503362367e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 72950 }, { "action_loss": 0.0099, "epoch": 6.86283726614647, "learning_rate": 1.595088472979194e-05, "llm_loss": 0.0, "loss": 0.0099, "step": 73000 }, { "action_loss": 0.0202, "epoch": 6.867537839616433, "learning_rate": 1.594557035529307e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 73050 }, { "action_loss": 0.0195, "epoch": 6.8722384130863965, "learning_rate": 1.5940253382188488e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 73100 }, { "action_loss": 0.0109, "epoch": 6.87693898655636, "learning_rate": 1.593493381280207e-05, "llm_loss": 0.0, "loss": 0.0109, "step": 73150 }, { "action_loss": 0.032, "epoch": 6.881639560026323, "learning_rate": 1.592961164945881e-05, "llm_loss": 0.0, "loss": 0.032, "step": 73200 }, { "action_loss": 0.0285, "epoch": 6.886340133496287, "learning_rate": 1.592428689448485e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 73250 }, { "action_loss": 0.0226, "epoch": 6.89104070696625, "learning_rate": 1.591895955020745e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 73300 }, { "action_loss": 0.0228, "epoch": 6.895741280436213, "learning_rate": 1.5913629618955017e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 73350 }, { "action_loss": 0.0284, "epoch": 6.900441853906177, "learning_rate": 1.5908297103057082e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 73400 }, { "action_loss": 0.0204, "epoch": 6.9051424273761395, "learning_rate": 1.59029620048443e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 73450 }, { "action_loss": 0.0221, "epoch": 6.909843000846103, "learning_rate": 1.5897624326648465e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 73500 }, { "action_loss": 0.0168, "epoch": 6.914543574316067, "learning_rate": 1.5892284070802495e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 73550 }, { "action_loss": 0.0355, "epoch": 6.91924414778603, "learning_rate": 1.5886941239640428e-05, "llm_loss": 0.0, "loss": 0.0355, "step": 73600 }, { "action_loss": 0.0258, "epoch": 6.923944721255993, "learning_rate": 1.5881595835497436e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 73650 }, { "action_loss": 0.029, "epoch": 6.928645294725957, "learning_rate": 1.5876247860709813e-05, "llm_loss": 0.0, "loss": 0.029, "step": 73700 }, { "action_loss": 0.0192, "epoch": 6.93334586819592, "learning_rate": 1.587089731761498e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 73750 }, { "action_loss": 0.0168, "epoch": 6.938046441665883, "learning_rate": 1.5865544208551464e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 73800 }, { "action_loss": 0.0134, "epoch": 6.942747015135847, "learning_rate": 1.586018853585894e-05, "llm_loss": 0.0, "loss": 0.0134, "step": 73850 }, { "action_loss": 0.0297, "epoch": 6.94744758860581, "learning_rate": 1.585483030187818e-05, "llm_loss": 0.0, "loss": 0.0297, "step": 73900 }, { "action_loss": 0.0122, "epoch": 6.9521481620757735, "learning_rate": 1.5849469508951093e-05, "llm_loss": 0.0, "loss": 0.0122, "step": 73950 }, { "action_loss": 0.0415, "epoch": 6.956848735545736, "learning_rate": 1.584410615942069e-05, "llm_loss": 0.0, "loss": 0.0415, "step": 74000 }, { "action_loss": 0.0097, "epoch": 6.9615493090157, "learning_rate": 1.5838740255631115e-05, "llm_loss": 0.0, "loss": 0.0097, "step": 74050 }, { "action_loss": 0.018, "epoch": 6.966249882485664, "learning_rate": 1.583337179992762e-05, "llm_loss": 0.0, "loss": 0.018, "step": 74100 }, { "action_loss": 0.0352, "epoch": 6.970950455955626, "learning_rate": 1.582800079465657e-05, "llm_loss": 0.0, "loss": 0.0352, "step": 74150 }, { "action_loss": 0.0327, "epoch": 6.97565102942559, "learning_rate": 1.5822627242165453e-05, "llm_loss": 0.0, "loss": 0.0327, "step": 74200 }, { "action_loss": 0.0321, "epoch": 6.980351602895553, "learning_rate": 1.581725114480286e-05, "llm_loss": 0.0, "loss": 0.0321, "step": 74250 }, { "action_loss": 0.0107, "epoch": 6.9850521763655165, "learning_rate": 1.5811872504918502e-05, "llm_loss": 0.0, "loss": 0.0107, "step": 74300 }, { "action_loss": 0.0121, "epoch": 6.98975274983548, "learning_rate": 1.5806491324863197e-05, "llm_loss": 0.0, "loss": 0.0121, "step": 74350 }, { "action_loss": 0.0244, "epoch": 6.994453323305443, "learning_rate": 1.5801107606988878e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 74400 }, { "action_loss": 0.029, "epoch": 6.999153896775407, "learning_rate": 1.5795721353648584e-05, "llm_loss": 0.0, "loss": 0.029, "step": 74450 }, { "action_loss": 0.034, "epoch": 7.00385447024537, "learning_rate": 1.579033256719646e-05, "llm_loss": 0.0, "loss": 0.034, "step": 74500 }, { "action_loss": 0.0099, "epoch": 7.008555043715333, "learning_rate": 1.578494124998776e-05, "llm_loss": 0.0, "loss": 0.0099, "step": 74550 }, { "action_loss": 0.0253, "epoch": 7.013255617185297, "learning_rate": 1.5779547404378847e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 74600 }, { "action_loss": 0.0258, "epoch": 7.01795619065526, "learning_rate": 1.5774151032727186e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 74650 }, { "action_loss": 0.0218, "epoch": 7.022656764125223, "learning_rate": 1.5768752137391352e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 74700 }, { "action_loss": 0.01, "epoch": 7.027357337595187, "learning_rate": 1.5763350720731006e-05, "llm_loss": 0.0, "loss": 0.01, "step": 74750 }, { "action_loss": 0.0195, "epoch": 7.0320579110651495, "learning_rate": 1.575794678510693e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 74800 }, { "action_loss": 0.0334, "epoch": 7.036758484535113, "learning_rate": 1.5752540332881e-05, "llm_loss": 0.0, "loss": 0.0334, "step": 74850 }, { "action_loss": 0.0102, "epoch": 7.041459058005077, "learning_rate": 1.5747131366416192e-05, "llm_loss": 0.0, "loss": 0.0102, "step": 74900 }, { "action_loss": 0.0199, "epoch": 7.04615963147504, "learning_rate": 1.5741719888076573e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 74950 }, { "action_loss": 0.0132, "epoch": 7.050860204945003, "learning_rate": 1.5736305900227325e-05, "llm_loss": 0.0, "loss": 0.0132, "step": 75000 }, { "action_loss": 0.0191, "epoch": 7.055560778414967, "learning_rate": 1.5730889405234713e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 75050 }, { "action_loss": 0.0403, "epoch": 7.06026135188493, "learning_rate": 1.57254704054661e-05, "llm_loss": 0.0, "loss": 0.0403, "step": 75100 }, { "action_loss": 0.0213, "epoch": 7.064961925354893, "learning_rate": 1.572004890328995e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 75150 }, { "action_loss": 0.0106, "epoch": 7.069662498824856, "learning_rate": 1.5714624901075817e-05, "llm_loss": 0.0, "loss": 0.0106, "step": 75200 }, { "action_loss": 0.0232, "epoch": 7.07436307229482, "learning_rate": 1.570919840119434e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 75250 }, { "action_loss": 0.0381, "epoch": 7.0790636457647835, "learning_rate": 1.5703769406017263e-05, "llm_loss": 0.0, "loss": 0.0381, "step": 75300 }, { "action_loss": 0.0402, "epoch": 7.083764219234746, "learning_rate": 1.569833791791741e-05, "llm_loss": 0.0, "loss": 0.0402, "step": 75350 }, { "action_loss": 0.0289, "epoch": 7.08846479270471, "learning_rate": 1.5692903939268704e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 75400 }, { "action_loss": 0.0196, "epoch": 7.093165366174674, "learning_rate": 1.5687467472446153e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 75450 }, { "action_loss": 0.0098, "epoch": 7.097865939644636, "learning_rate": 1.568202851982584e-05, "llm_loss": 0.0, "loss": 0.0098, "step": 75500 }, { "action_loss": 0.0227, "epoch": 7.1025665131146, "learning_rate": 1.5676587083784955e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 75550 }, { "action_loss": 0.0067, "epoch": 7.107267086584564, "learning_rate": 1.567114316670176e-05, "llm_loss": 0.0, "loss": 0.0067, "step": 75600 }, { "action_loss": 0.0259, "epoch": 7.1119676600545265, "learning_rate": 1.5665696770955606e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 75650 }, { "action_loss": 0.0261, "epoch": 7.11666823352449, "learning_rate": 1.5660247898926927e-05, "llm_loss": 0.0, "loss": 0.0261, "step": 75700 }, { "action_loss": 0.0159, "epoch": 7.121368806994453, "learning_rate": 1.565479655299724e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 75750 }, { "action_loss": 0.0162, "epoch": 7.126069380464417, "learning_rate": 1.564934273554914e-05, "llm_loss": 0.0, "loss": 0.0162, "step": 75800 }, { "action_loss": 0.026, "epoch": 7.13076995393438, "learning_rate": 1.5643886448966306e-05, "llm_loss": 0.0, "loss": 0.026, "step": 75850 }, { "action_loss": 0.0166, "epoch": 7.135470527404343, "learning_rate": 1.563842769563349e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 75900 }, { "action_loss": 0.0169, "epoch": 7.140171100874307, "learning_rate": 1.5632966477936532e-05, "llm_loss": 0.0, "loss": 0.0169, "step": 75950 }, { "action_loss": 0.0173, "epoch": 7.14487167434427, "learning_rate": 1.5627502798262343e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 76000 }, { "action_loss": 0.0129, "epoch": 7.149572247814233, "learning_rate": 1.5622036658998906e-05, "llm_loss": 0.0, "loss": 0.0129, "step": 76050 }, { "action_loss": 0.0224, "epoch": 7.154272821284197, "learning_rate": 1.5616568062535292e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 76100 }, { "action_loss": 0.0301, "epoch": 7.15897339475416, "learning_rate": 1.561109701126163e-05, "llm_loss": 0.0, "loss": 0.0301, "step": 76150 }, { "action_loss": 0.0425, "epoch": 7.163673968224123, "learning_rate": 1.5605623507569133e-05, "llm_loss": 0.0, "loss": 0.0425, "step": 76200 }, { "action_loss": 0.0197, "epoch": 7.168374541694087, "learning_rate": 1.5600147553850082e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 76250 }, { "action_loss": 0.0241, "epoch": 7.17307511516405, "learning_rate": 1.5594669152497833e-05, "llm_loss": 0.0, "loss": 0.0241, "step": 76300 }, { "action_loss": 0.0153, "epoch": 7.177775688634013, "learning_rate": 1.55891883059068e-05, "llm_loss": 0.0, "loss": 0.0153, "step": 76350 }, { "action_loss": 0.0236, "epoch": 7.182476262103977, "learning_rate": 1.558370501647248e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 76400 }, { "action_loss": 0.0288, "epoch": 7.18717683557394, "learning_rate": 1.557821928659143e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 76450 }, { "action_loss": 0.0358, "epoch": 7.1918774090439035, "learning_rate": 1.5572731118661275e-05, "llm_loss": 0.0, "loss": 0.0358, "step": 76500 }, { "action_loss": 0.0179, "epoch": 7.196577982513866, "learning_rate": 1.5567240515080706e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 76550 }, { "action_loss": 0.0233, "epoch": 7.20127855598383, "learning_rate": 1.556174747824948e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 76600 }, { "action_loss": 0.0192, "epoch": 7.205979129453794, "learning_rate": 1.555625201056841e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 76650 }, { "action_loss": 0.0319, "epoch": 7.210679702923756, "learning_rate": 1.555075411443938e-05, "llm_loss": 0.0, "loss": 0.0319, "step": 76700 }, { "action_loss": 0.0233, "epoch": 7.21538027639372, "learning_rate": 1.5545253792265338e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 76750 }, { "action_loss": 0.0214, "epoch": 7.220080849863684, "learning_rate": 1.553975104645028e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 76800 }, { "action_loss": 0.0201, "epoch": 7.2247814233336465, "learning_rate": 1.5534245879399273e-05, "llm_loss": 0.0, "loss": 0.0201, "step": 76850 }, { "action_loss": 0.0292, "epoch": 7.22948199680361, "learning_rate": 1.5528738293518434e-05, "llm_loss": 0.0, "loss": 0.0292, "step": 76900 }, { "action_loss": 0.0135, "epoch": 7.234182570273573, "learning_rate": 1.552322829121494e-05, "llm_loss": 0.0, "loss": 0.0135, "step": 76950 }, { "action_loss": 0.0172, "epoch": 7.238883143743537, "learning_rate": 1.5517715874897028e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 77000 }, { "action_loss": 0.0217, "epoch": 7.2435837172135, "learning_rate": 1.5512201046973987e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 77050 }, { "action_loss": 0.038, "epoch": 7.248284290683463, "learning_rate": 1.5506683809856155e-05, "llm_loss": 0.0, "loss": 0.038, "step": 77100 }, { "action_loss": 0.039, "epoch": 7.252984864153427, "learning_rate": 1.5501164165954935e-05, "llm_loss": 0.0, "loss": 0.039, "step": 77150 }, { "action_loss": 0.0153, "epoch": 7.25768543762339, "learning_rate": 1.549564211768277e-05, "llm_loss": 0.0, "loss": 0.0153, "step": 77200 }, { "action_loss": 0.0153, "epoch": 7.262386011093353, "learning_rate": 1.549011766745316e-05, "llm_loss": 0.0, "loss": 0.0153, "step": 77250 }, { "action_loss": 0.0226, "epoch": 7.267086584563317, "learning_rate": 1.548459081768065e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 77300 }, { "action_loss": 0.0185, "epoch": 7.2717871580332805, "learning_rate": 1.5479061570780848e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 77350 }, { "action_loss": 0.0232, "epoch": 7.276487731503243, "learning_rate": 1.5473529929170386e-05, "llm_loss": 0.0, "loss": 0.0232, "step": 77400 }, { "action_loss": 0.0165, "epoch": 7.281188304973207, "learning_rate": 1.5467995895266962e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 77450 }, { "action_loss": 0.0183, "epoch": 7.28588887844317, "learning_rate": 1.5462459471489314e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 77500 }, { "action_loss": 0.0254, "epoch": 7.290589451913133, "learning_rate": 1.5456920660257218e-05, "llm_loss": 0.0, "loss": 0.0254, "step": 77550 }, { "action_loss": 0.0189, "epoch": 7.295290025383097, "learning_rate": 1.5451379463991508e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 77600 }, { "action_loss": 0.0374, "epoch": 7.29999059885306, "learning_rate": 1.5445835885114044e-05, "llm_loss": 0.0, "loss": 0.0374, "step": 77650 }, { "action_loss": 0.0333, "epoch": 7.3046911723230235, "learning_rate": 1.5440289926047737e-05, "llm_loss": 0.0, "loss": 0.0333, "step": 77700 }, { "action_loss": 0.0221, "epoch": 7.309391745792987, "learning_rate": 1.5434741589216537e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 77750 }, { "action_loss": 0.0156, "epoch": 7.31409231926295, "learning_rate": 1.5429190877045433e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 77800 }, { "action_loss": 0.0281, "epoch": 7.318792892732914, "learning_rate": 1.542363779196045e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 77850 }, { "action_loss": 0.0062, "epoch": 7.323493466202876, "learning_rate": 1.5418082336388657e-05, "llm_loss": 0.0, "loss": 0.0062, "step": 77900 }, { "action_loss": 0.0161, "epoch": 7.32819403967284, "learning_rate": 1.541252451275815e-05, "llm_loss": 0.0, "loss": 0.0161, "step": 77950 }, { "action_loss": 0.0217, "epoch": 7.332894613142804, "learning_rate": 1.5406964323498062e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 78000 }, { "action_loss": 0.0102, "epoch": 7.337595186612766, "learning_rate": 1.540140177103856e-05, "llm_loss": 0.0, "loss": 0.0102, "step": 78050 }, { "action_loss": 0.0189, "epoch": 7.34229576008273, "learning_rate": 1.5395836857810853e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 78100 }, { "action_loss": 0.0394, "epoch": 7.346996333552694, "learning_rate": 1.5390269586247172e-05, "llm_loss": 0.0, "loss": 0.0394, "step": 78150 }, { "action_loss": 0.0124, "epoch": 7.3516969070226565, "learning_rate": 1.5384699958780777e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 78200 }, { "action_loss": 0.0099, "epoch": 7.35639748049262, "learning_rate": 1.5379239440512905e-05, "llm_loss": 0.0, "loss": 0.0099, "step": 78250 }, { "action_loss": 0.0184, "epoch": 7.361098053962584, "learning_rate": 1.5373665155541787e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 78300 }, { "action_loss": 0.0388, "epoch": 7.365798627432547, "learning_rate": 1.5368088521925185e-05, "llm_loss": 0.0, "loss": 0.0388, "step": 78350 }, { "action_loss": 0.0244, "epoch": 7.37049920090251, "learning_rate": 1.5362509542100465e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 78400 }, { "action_loss": 0.027, "epoch": 7.375199774372473, "learning_rate": 1.5356928218505998e-05, "llm_loss": 0.0, "loss": 0.027, "step": 78450 }, { "action_loss": 0.0236, "epoch": 7.379900347842437, "learning_rate": 1.535134455358119e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 78500 }, { "action_loss": 0.0253, "epoch": 7.3846009213124, "learning_rate": 1.5345758549766477e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 78550 }, { "action_loss": 0.0363, "epoch": 7.389301494782363, "learning_rate": 1.5340170209503304e-05, "llm_loss": 0.0, "loss": 0.0363, "step": 78600 }, { "action_loss": 0.0236, "epoch": 7.394002068252327, "learning_rate": 1.5334579535234146e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 78650 }, { "action_loss": 0.019, "epoch": 7.3987026417222905, "learning_rate": 1.5328986529402497e-05, "llm_loss": 0.0, "loss": 0.019, "step": 78700 }, { "action_loss": 0.0272, "epoch": 7.403403215192253, "learning_rate": 1.5323391194452864e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 78750 }, { "action_loss": 0.0225, "epoch": 7.408103788662217, "learning_rate": 1.5317793532830787e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 78800 }, { "action_loss": 0.0134, "epoch": 7.41280436213218, "learning_rate": 1.53121935469828e-05, "llm_loss": 0.0, "loss": 0.0134, "step": 78850 }, { "action_loss": 0.0282, "epoch": 7.417504935602143, "learning_rate": 1.5306591239356477e-05, "llm_loss": 0.0, "loss": 0.0282, "step": 78900 }, { "action_loss": 0.0239, "epoch": 7.422205509072107, "learning_rate": 1.5300986612400386e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 78950 }, { "action_loss": 0.0317, "epoch": 7.42690608254207, "learning_rate": 1.5295379668564124e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 79000 }, { "action_loss": 0.0157, "epoch": 7.4316066560120335, "learning_rate": 1.5289770410298293e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 79050 }, { "action_loss": 0.0245, "epoch": 7.436307229481997, "learning_rate": 1.5284158840054507e-05, "llm_loss": 0.0, "loss": 0.0245, "step": 79100 }, { "action_loss": 0.0136, "epoch": 7.44100780295196, "learning_rate": 1.5278544960285392e-05, "llm_loss": 0.0, "loss": 0.0136, "step": 79150 }, { "action_loss": 0.0184, "epoch": 7.445708376421924, "learning_rate": 1.527304111977483e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 79200 }, { "action_loss": 0.013, "epoch": 7.450408949891886, "learning_rate": 1.526742267438525e-05, "llm_loss": 0.0, "loss": 0.013, "step": 79250 }, { "action_loss": 0.0162, "epoch": 7.45510952336185, "learning_rate": 1.5261801926785148e-05, "llm_loss": 0.0, "loss": 0.0162, "step": 79300 }, { "action_loss": 0.0117, "epoch": 7.459810096831814, "learning_rate": 1.5256178879431163e-05, "llm_loss": 0.0, "loss": 0.0117, "step": 79350 }, { "action_loss": 0.0355, "epoch": 7.4645106703017765, "learning_rate": 1.5250553534780932e-05, "llm_loss": 0.0, "loss": 0.0355, "step": 79400 }, { "action_loss": 0.032, "epoch": 7.46921124377174, "learning_rate": 1.5244925895293106e-05, "llm_loss": 0.0, "loss": 0.032, "step": 79450 }, { "action_loss": 0.032, "epoch": 7.473911817241704, "learning_rate": 1.523929596342733e-05, "llm_loss": 0.0, "loss": 0.032, "step": 79500 }, { "action_loss": 0.0194, "epoch": 7.478612390711667, "learning_rate": 1.5233663741644256e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 79550 }, { "action_loss": 0.0291, "epoch": 7.48331296418163, "learning_rate": 1.5228029232405534e-05, "llm_loss": 0.0, "loss": 0.0291, "step": 79600 }, { "action_loss": 0.0217, "epoch": 7.488013537651593, "learning_rate": 1.5222505196435454e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 79650 }, { "action_loss": 0.0064, "epoch": 7.492714111121557, "learning_rate": 1.5216866165300831e-05, "llm_loss": 0.0, "loss": 0.0064, "step": 79700 }, { "action_loss": 0.0167, "epoch": 7.49741468459152, "learning_rate": 1.5211224854052211e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 79750 }, { "action_loss": 0.0227, "epoch": 7.502115258061483, "learning_rate": 1.5205581265155216e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 79800 }, { "action_loss": 0.0286, "epoch": 7.506815831531447, "learning_rate": 1.5199935401076466e-05, "llm_loss": 0.0, "loss": 0.0286, "step": 79850 }, { "action_loss": 0.0157, "epoch": 7.5115164050014105, "learning_rate": 1.5194287264283576e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 79900 }, { "action_loss": 0.0164, "epoch": 7.516216978471373, "learning_rate": 1.5188636857245153e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 79950 }, { "action_loss": 0.0159, "epoch": 7.520917551941337, "learning_rate": 1.51829841824308e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 80000 }, { "action_loss": 0.0247, "epoch": 7.525618125411301, "learning_rate": 1.5177329242311104e-05, "llm_loss": 0.0, "loss": 0.0247, "step": 80050 }, { "action_loss": 0.0233, "epoch": 7.530318698881263, "learning_rate": 1.5171672039357646e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 80100 }, { "action_loss": 0.0227, "epoch": 7.535019272351227, "learning_rate": 1.5166012576042997e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 80150 }, { "action_loss": 0.0172, "epoch": 7.53971984582119, "learning_rate": 1.5160350854840715e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 80200 }, { "action_loss": 0.025, "epoch": 7.5444204192911535, "learning_rate": 1.5154686878225348e-05, "llm_loss": 0.0, "loss": 0.025, "step": 80250 }, { "action_loss": 0.0194, "epoch": 7.549120992761117, "learning_rate": 1.5149020648672425e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 80300 }, { "action_loss": 0.0162, "epoch": 7.55382156623108, "learning_rate": 1.5143352168658458e-05, "llm_loss": 0.0, "loss": 0.0162, "step": 80350 }, { "action_loss": 0.0167, "epoch": 7.558522139701044, "learning_rate": 1.513768144066095e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 80400 }, { "action_loss": 0.0303, "epoch": 7.563222713171007, "learning_rate": 1.5132008467158383e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 80450 }, { "action_loss": 0.0258, "epoch": 7.56792328664097, "learning_rate": 1.5126333250630217e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 80500 }, { "action_loss": 0.0227, "epoch": 7.572623860110934, "learning_rate": 1.5120655793556895e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 80550 }, { "action_loss": 0.0349, "epoch": 7.5773244335808965, "learning_rate": 1.5114976098419842e-05, "llm_loss": 0.0, "loss": 0.0349, "step": 80600 }, { "action_loss": 0.0167, "epoch": 7.58202500705086, "learning_rate": 1.5109294167701456e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 80650 }, { "action_loss": 0.0091, "epoch": 7.586725580520824, "learning_rate": 1.5103610003885121e-05, "llm_loss": 0.0, "loss": 0.0091, "step": 80700 }, { "action_loss": 0.0168, "epoch": 7.591426153990787, "learning_rate": 1.509792360945518e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 80750 }, { "action_loss": 0.0282, "epoch": 7.59612672746075, "learning_rate": 1.5092234986896976e-05, "llm_loss": 0.0, "loss": 0.0282, "step": 80800 }, { "action_loss": 0.0189, "epoch": 7.600827300930714, "learning_rate": 1.50865441386968e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 80850 }, { "action_loss": 0.023, "epoch": 7.605527874400677, "learning_rate": 1.508085106734193e-05, "llm_loss": 0.0, "loss": 0.023, "step": 80900 }, { "action_loss": 0.0191, "epoch": 7.61022844787064, "learning_rate": 1.5075155775320615e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 80950 }, { "action_loss": 0.0222, "epoch": 7.614929021340604, "learning_rate": 1.5069458265122072e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 81000 }, { "action_loss": 0.0226, "epoch": 7.619629594810567, "learning_rate": 1.506375853923649e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 81050 }, { "action_loss": 0.0383, "epoch": 7.6243301682805305, "learning_rate": 1.505805660015502e-05, "llm_loss": 0.0, "loss": 0.0383, "step": 81100 }, { "action_loss": 0.0188, "epoch": 7.629030741750493, "learning_rate": 1.5052352450369787e-05, "llm_loss": 0.0, "loss": 0.0188, "step": 81150 }, { "action_loss": 0.0192, "epoch": 7.633731315220457, "learning_rate": 1.5046646092373883e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 81200 }, { "action_loss": 0.0179, "epoch": 7.638431888690421, "learning_rate": 1.5040937528661357e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 81250 }, { "action_loss": 0.0226, "epoch": 7.643132462160383, "learning_rate": 1.5035226761727228e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 81300 }, { "action_loss": 0.0156, "epoch": 7.647833035630347, "learning_rate": 1.5029513794067479e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 81350 }, { "action_loss": 0.0207, "epoch": 7.65253360910031, "learning_rate": 1.5023798628179056e-05, "llm_loss": 0.0, "loss": 0.0207, "step": 81400 }, { "action_loss": 0.0164, "epoch": 7.657234182570273, "learning_rate": 1.5018081266559855e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 81450 }, { "action_loss": 0.0193, "epoch": 7.661934756040237, "learning_rate": 1.5012361711708745e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 81500 }, { "action_loss": 0.0249, "epoch": 7.6666353295102, "learning_rate": 1.5006639966125549e-05, "llm_loss": 0.0, "loss": 0.0249, "step": 81550 }, { "action_loss": 0.0163, "epoch": 7.6713359029801635, "learning_rate": 1.5000916032311045e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 81600 }, { "action_loss": 0.0192, "epoch": 7.676036476450127, "learning_rate": 1.4995189912766968e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 81650 }, { "action_loss": 0.0255, "epoch": 7.68073704992009, "learning_rate": 1.498946160999601e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 81700 }, { "action_loss": 0.0293, "epoch": 7.685437623390054, "learning_rate": 1.4983731126501814e-05, "llm_loss": 0.0, "loss": 0.0293, "step": 81750 }, { "action_loss": 0.0279, "epoch": 7.690138196860017, "learning_rate": 1.4977998464788983e-05, "llm_loss": 0.0, "loss": 0.0279, "step": 81800 }, { "action_loss": 0.0302, "epoch": 7.69483877032998, "learning_rate": 1.4972263627363065e-05, "llm_loss": 0.0, "loss": 0.0302, "step": 81850 }, { "action_loss": 0.0211, "epoch": 7.699539343799944, "learning_rate": 1.4966526616730561e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 81900 }, { "action_loss": 0.0227, "epoch": 7.704239917269907, "learning_rate": 1.4960787435398925e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 81950 }, { "action_loss": 0.0231, "epoch": 7.70894049073987, "learning_rate": 1.4955046085876555e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 82000 }, { "action_loss": 0.025, "epoch": 7.713641064209834, "learning_rate": 1.4949302570672797e-05, "llm_loss": 0.0, "loss": 0.025, "step": 82050 }, { "action_loss": 0.0319, "epoch": 7.718341637679797, "learning_rate": 1.4943556892297948e-05, "llm_loss": 0.0, "loss": 0.0319, "step": 82100 }, { "action_loss": 0.0222, "epoch": 7.72304221114976, "learning_rate": 1.493780905326325e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 82150 }, { "action_loss": 0.0325, "epoch": 7.727742784619724, "learning_rate": 1.4932059056080883e-05, "llm_loss": 0.0, "loss": 0.0325, "step": 82200 }, { "action_loss": 0.0332, "epoch": 7.732443358089687, "learning_rate": 1.4926306903263974e-05, "llm_loss": 0.0, "loss": 0.0332, "step": 82250 }, { "action_loss": 0.036, "epoch": 7.73714393155965, "learning_rate": 1.4920552597326597e-05, "llm_loss": 0.0, "loss": 0.036, "step": 82300 }, { "action_loss": 0.0165, "epoch": 7.741844505029613, "learning_rate": 1.4914796140783758e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 82350 }, { "action_loss": 0.0363, "epoch": 7.746545078499577, "learning_rate": 1.490903753615141e-05, "llm_loss": 0.0, "loss": 0.0363, "step": 82400 }, { "action_loss": 0.0285, "epoch": 7.7512456519695405, "learning_rate": 1.4903276785946442e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 82450 }, { "action_loss": 0.0199, "epoch": 7.755946225439503, "learning_rate": 1.489751389268668e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 82500 }, { "action_loss": 0.0258, "epoch": 7.760646798909467, "learning_rate": 1.4891748858890888e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 82550 }, { "action_loss": 0.0223, "epoch": 7.765347372379431, "learning_rate": 1.4885981687078767e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 82600 }, { "action_loss": 0.0371, "epoch": 7.770047945849393, "learning_rate": 1.488021237977095e-05, "llm_loss": 0.0, "loss": 0.0371, "step": 82650 }, { "action_loss": 0.0031, "epoch": 7.774748519319357, "learning_rate": 1.4874440939489006e-05, "llm_loss": 0.0, "loss": 0.0031, "step": 82700 }, { "action_loss": 0.0164, "epoch": 7.779449092789321, "learning_rate": 1.486866736875543e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 82750 }, { "action_loss": 0.0316, "epoch": 7.7841496662592835, "learning_rate": 1.4862891670093655e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 82800 }, { "action_loss": 0.023, "epoch": 7.788850239729247, "learning_rate": 1.4857113846028042e-05, "llm_loss": 0.0, "loss": 0.023, "step": 82850 }, { "action_loss": 0.0171, "epoch": 7.79355081319921, "learning_rate": 1.4851333899083877e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 82900 }, { "action_loss": 0.0241, "epoch": 7.798251386669174, "learning_rate": 1.4845551831787384e-05, "llm_loss": 0.0, "loss": 0.0241, "step": 82950 }, { "action_loss": 0.0285, "epoch": 7.802951960139137, "learning_rate": 1.4839767646665702e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 83000 }, { "action_loss": 0.0368, "epoch": 7.8076525336091, "learning_rate": 1.4833981346246901e-05, "llm_loss": 0.0, "loss": 0.0368, "step": 83050 }, { "action_loss": 0.0187, "epoch": 7.812353107079064, "learning_rate": 1.4828192933059981e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 83100 }, { "action_loss": 0.017, "epoch": 7.8170536805490265, "learning_rate": 1.4822402409634853e-05, "llm_loss": 0.0, "loss": 0.017, "step": 83150 }, { "action_loss": 0.0193, "epoch": 7.82175425401899, "learning_rate": 1.481660977850236e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 83200 }, { "action_loss": 0.0224, "epoch": 7.826454827488954, "learning_rate": 1.481081504219426e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 83250 }, { "action_loss": 0.016, "epoch": 7.831155400958917, "learning_rate": 1.4805018203243235e-05, "llm_loss": 0.0, "loss": 0.016, "step": 83300 }, { "action_loss": 0.0231, "epoch": 7.83585597442888, "learning_rate": 1.479921926418289e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 83350 }, { "action_loss": 0.012, "epoch": 7.840556547898844, "learning_rate": 1.479341822754774e-05, "llm_loss": 0.0, "loss": 0.012, "step": 83400 }, { "action_loss": 0.0225, "epoch": 7.845257121368807, "learning_rate": 1.4787615095873223e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 83450 }, { "action_loss": 0.0253, "epoch": 7.84995769483877, "learning_rate": 1.478180987169568e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 83500 }, { "action_loss": 0.0137, "epoch": 7.854658268308734, "learning_rate": 1.477600255755239e-05, "llm_loss": 0.0, "loss": 0.0137, "step": 83550 }, { "action_loss": 0.0153, "epoch": 7.859358841778697, "learning_rate": 1.4770193155981521e-05, "llm_loss": 0.0, "loss": 0.0153, "step": 83600 }, { "action_loss": 0.0448, "epoch": 7.8640594152486605, "learning_rate": 1.4764381669522165e-05, "llm_loss": 0.0, "loss": 0.0448, "step": 83650 }, { "action_loss": 0.0135, "epoch": 7.868759988718624, "learning_rate": 1.475856810071433e-05, "llm_loss": 0.0, "loss": 0.0135, "step": 83700 }, { "action_loss": 0.0333, "epoch": 7.873460562188587, "learning_rate": 1.4752752452098924e-05, "llm_loss": 0.0, "loss": 0.0333, "step": 83750 }, { "action_loss": 0.0195, "epoch": 7.878161135658551, "learning_rate": 1.4746934726217767e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 83800 }, { "action_loss": 0.0131, "epoch": 7.882861709128513, "learning_rate": 1.4741114925613595e-05, "llm_loss": 0.0, "loss": 0.0131, "step": 83850 }, { "action_loss": 0.0233, "epoch": 7.887562282598477, "learning_rate": 1.473540951057661e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 83900 }, { "action_loss": 0.0219, "epoch": 7.892262856068441, "learning_rate": 1.4729585609525967e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 83950 }, { "action_loss": 0.0258, "epoch": 7.8969634295384035, "learning_rate": 1.472375964133501e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 84000 }, { "action_loss": 0.015, "epoch": 7.901664003008367, "learning_rate": 1.4717931608550074e-05, "llm_loss": 0.0, "loss": 0.015, "step": 84050 }, { "action_loss": 0.015, "epoch": 7.90636457647833, "learning_rate": 1.4712101513718386e-05, "llm_loss": 0.0, "loss": 0.015, "step": 84100 }, { "action_loss": 0.0323, "epoch": 7.911065149948294, "learning_rate": 1.470626935938809e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 84150 }, { "action_loss": 0.0228, "epoch": 7.915765723418257, "learning_rate": 1.4700435148108217e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 84200 }, { "action_loss": 0.0267, "epoch": 7.92046629688822, "learning_rate": 1.4694598882428702e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 84250 }, { "action_loss": 0.0183, "epoch": 7.925166870358184, "learning_rate": 1.4688760564900375e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 84300 }, { "action_loss": 0.0176, "epoch": 7.929867443828147, "learning_rate": 1.4682920198074968e-05, "llm_loss": 0.0, "loss": 0.0176, "step": 84350 }, { "action_loss": 0.0326, "epoch": 7.93456801729811, "learning_rate": 1.4677077784505105e-05, "llm_loss": 0.0, "loss": 0.0326, "step": 84400 }, { "action_loss": 0.0313, "epoch": 7.939268590768074, "learning_rate": 1.4671233326744308e-05, "llm_loss": 0.0, "loss": 0.0313, "step": 84450 }, { "action_loss": 0.0168, "epoch": 7.9439691642380375, "learning_rate": 1.4665386827346985e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 84500 }, { "action_loss": 0.0257, "epoch": 7.948669737708, "learning_rate": 1.4659538288868445e-05, "llm_loss": 0.0, "loss": 0.0257, "step": 84550 }, { "action_loss": 0.0196, "epoch": 7.953370311177964, "learning_rate": 1.4653687713864883e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 84600 }, { "action_loss": 0.0277, "epoch": 7.958070884647927, "learning_rate": 1.4647835104893387e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 84650 }, { "action_loss": 0.0245, "epoch": 7.96277145811789, "learning_rate": 1.464198046451193e-05, "llm_loss": 0.0, "loss": 0.0245, "step": 84700 }, { "action_loss": 0.0186, "epoch": 7.967472031587854, "learning_rate": 1.4636123795279375e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 84750 }, { "action_loss": 0.0277, "epoch": 7.972172605057817, "learning_rate": 1.4630265099755474e-05, "llm_loss": 0.0, "loss": 0.0277, "step": 84800 }, { "action_loss": 0.0293, "epoch": 7.97687317852778, "learning_rate": 1.4624404380500864e-05, "llm_loss": 0.0, "loss": 0.0293, "step": 84850 }, { "action_loss": 0.0302, "epoch": 7.981573751997744, "learning_rate": 1.4618541640077062e-05, "llm_loss": 0.0, "loss": 0.0302, "step": 84900 }, { "action_loss": 0.0124, "epoch": 7.986274325467707, "learning_rate": 1.4612676881046473e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 84950 }, { "action_loss": 0.017, "epoch": 7.9909748989376705, "learning_rate": 1.4606810105972385e-05, "llm_loss": 0.0, "loss": 0.017, "step": 85000 }, { "action_loss": 0.015, "epoch": 7.995675472407633, "learning_rate": 1.460094131741896e-05, "llm_loss": 0.0, "loss": 0.015, "step": 85050 }, { "action_loss": 0.0347, "epoch": 8.000376045877598, "learning_rate": 1.4595070517951247e-05, "llm_loss": 0.0, "loss": 0.0347, "step": 85100 }, { "action_loss": 0.024, "epoch": 8.00507661934756, "learning_rate": 1.4589197710135172e-05, "llm_loss": 0.0, "loss": 0.024, "step": 85150 }, { "action_loss": 0.0188, "epoch": 8.009777192817523, "learning_rate": 1.4583322896537539e-05, "llm_loss": 0.0, "loss": 0.0188, "step": 85200 }, { "action_loss": 0.019, "epoch": 8.014477766287486, "learning_rate": 1.4577446079726032e-05, "llm_loss": 0.0, "loss": 0.019, "step": 85250 }, { "action_loss": 0.0387, "epoch": 8.01917833975745, "learning_rate": 1.4571567262269204e-05, "llm_loss": 0.0, "loss": 0.0387, "step": 85300 }, { "action_loss": 0.0223, "epoch": 8.023878913227414, "learning_rate": 1.4565686446736483e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 85350 }, { "action_loss": 0.0235, "epoch": 8.028579486697376, "learning_rate": 1.4559803635698173e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 85400 }, { "action_loss": 0.0191, "epoch": 8.03328006016734, "learning_rate": 1.4553918831725455e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 85450 }, { "action_loss": 0.0267, "epoch": 8.037980633637304, "learning_rate": 1.454803203739037e-05, "llm_loss": 0.0, "loss": 0.0267, "step": 85500 }, { "action_loss": 0.0314, "epoch": 8.042681207107266, "learning_rate": 1.4542143255265835e-05, "llm_loss": 0.0, "loss": 0.0314, "step": 85550 }, { "action_loss": 0.0251, "epoch": 8.047381780577231, "learning_rate": 1.4536252487925637e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 85600 }, { "action_loss": 0.0122, "epoch": 8.052082354047194, "learning_rate": 1.4530359737944433e-05, "llm_loss": 0.0, "loss": 0.0122, "step": 85650 }, { "action_loss": 0.0348, "epoch": 8.056782927517157, "learning_rate": 1.4524465007897739e-05, "llm_loss": 0.0, "loss": 0.0348, "step": 85700 }, { "action_loss": 0.0324, "epoch": 8.061483500987121, "learning_rate": 1.4518568300361936e-05, "llm_loss": 0.0, "loss": 0.0324, "step": 85750 }, { "action_loss": 0.0356, "epoch": 8.066184074457084, "learning_rate": 1.4512669617914282e-05, "llm_loss": 0.0, "loss": 0.0356, "step": 85800 }, { "action_loss": 0.0179, "epoch": 8.070884647927047, "learning_rate": 1.4506768963132882e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 85850 }, { "action_loss": 0.0262, "epoch": 8.075585221397011, "learning_rate": 1.4500866338596717e-05, "llm_loss": 0.0, "loss": 0.0262, "step": 85900 }, { "action_loss": 0.0242, "epoch": 8.080285794866974, "learning_rate": 1.4494961746885619e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 85950 }, { "action_loss": 0.0126, "epoch": 8.084986368336937, "learning_rate": 1.448905519058028e-05, "llm_loss": 0.0, "loss": 0.0126, "step": 86000 }, { "action_loss": 0.0119, "epoch": 8.089686941806901, "learning_rate": 1.4483146672262262e-05, "llm_loss": 0.0, "loss": 0.0119, "step": 86050 }, { "action_loss": 0.0217, "epoch": 8.094387515276864, "learning_rate": 1.4477236194513974e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 86100 }, { "action_loss": 0.0318, "epoch": 8.099088088746827, "learning_rate": 1.4471323759918678e-05, "llm_loss": 0.0, "loss": 0.0318, "step": 86150 }, { "action_loss": 0.0322, "epoch": 8.10378866221679, "learning_rate": 1.4465409371060504e-05, "llm_loss": 0.0, "loss": 0.0322, "step": 86200 }, { "action_loss": 0.0258, "epoch": 8.108489235686754, "learning_rate": 1.4459493030524425e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 86250 }, { "action_loss": 0.0253, "epoch": 8.113189809156717, "learning_rate": 1.4453574740896272e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 86300 }, { "action_loss": 0.0187, "epoch": 8.11789038262668, "learning_rate": 1.4447654504762734e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 86350 }, { "action_loss": 0.0213, "epoch": 8.122590956096644, "learning_rate": 1.4441732324711332e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 86400 }, { "action_loss": 0.0469, "epoch": 8.127291529566607, "learning_rate": 1.4435808203330459e-05, "llm_loss": 0.0, "loss": 0.0469, "step": 86450 }, { "action_loss": 0.0219, "epoch": 8.13199210303657, "learning_rate": 1.4429882143209344e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 86500 }, { "action_loss": 0.0202, "epoch": 8.136692676506534, "learning_rate": 1.442395414693806e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 86550 }, { "action_loss": 0.0181, "epoch": 8.141393249976497, "learning_rate": 1.4418024217107535e-05, "llm_loss": 0.0, "loss": 0.0181, "step": 86600 }, { "action_loss": 0.0105, "epoch": 8.14609382344646, "learning_rate": 1.441209235630954e-05, "llm_loss": 0.0, "loss": 0.0105, "step": 86650 }, { "action_loss": 0.0298, "epoch": 8.150794396916424, "learning_rate": 1.4406158567136688e-05, "llm_loss": 0.0, "loss": 0.0298, "step": 86700 }, { "action_loss": 0.0225, "epoch": 8.155494970386387, "learning_rate": 1.4400222852182435e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 86750 }, { "action_loss": 0.0287, "epoch": 8.16019554385635, "learning_rate": 1.4394285214041081e-05, "llm_loss": 0.0, "loss": 0.0287, "step": 86800 }, { "action_loss": 0.0291, "epoch": 8.164896117326315, "learning_rate": 1.4388345655307764e-05, "llm_loss": 0.0, "loss": 0.0291, "step": 86850 }, { "action_loss": 0.025, "epoch": 8.169596690796277, "learning_rate": 1.438240417857846e-05, "llm_loss": 0.0, "loss": 0.025, "step": 86900 }, { "action_loss": 0.0342, "epoch": 8.17429726426624, "learning_rate": 1.4376460786449986e-05, "llm_loss": 0.0, "loss": 0.0342, "step": 86950 }, { "action_loss": 0.0294, "epoch": 8.178997837736205, "learning_rate": 1.4370515481519997e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 87000 }, { "action_loss": 0.0217, "epoch": 8.183698411206167, "learning_rate": 1.4364568266386985e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 87050 }, { "action_loss": 0.0097, "epoch": 8.18839898467613, "learning_rate": 1.435861914365027e-05, "llm_loss": 0.0, "loss": 0.0097, "step": 87100 }, { "action_loss": 0.0191, "epoch": 8.193099558146093, "learning_rate": 1.4352668115910011e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 87150 }, { "action_loss": 0.0212, "epoch": 8.197800131616058, "learning_rate": 1.4346715185767206e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 87200 }, { "action_loss": 0.0214, "epoch": 8.20250070508602, "learning_rate": 1.4340760355823666e-05, "llm_loss": 0.0, "loss": 0.0214, "step": 87250 }, { "action_loss": 0.0251, "epoch": 8.207201278555983, "learning_rate": 1.4334803628682055e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 87300 }, { "action_loss": 0.0165, "epoch": 8.211901852025948, "learning_rate": 1.4328845006945847e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 87350 }, { "action_loss": 0.0255, "epoch": 8.21660242549591, "learning_rate": 1.4322884493219351e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 87400 }, { "action_loss": 0.0191, "epoch": 8.221302998965873, "learning_rate": 1.4316922090107712e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 87450 }, { "action_loss": 0.0208, "epoch": 8.226003572435838, "learning_rate": 1.4310957800216887e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 87500 }, { "action_loss": 0.0226, "epoch": 8.2307041459058, "learning_rate": 1.4304991626153663e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 87550 }, { "action_loss": 0.0273, "epoch": 8.235404719375763, "learning_rate": 1.4299023570525656e-05, "llm_loss": 0.0, "loss": 0.0273, "step": 87600 }, { "action_loss": 0.0194, "epoch": 8.240105292845728, "learning_rate": 1.4293053635941292e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 87650 }, { "action_loss": 0.0238, "epoch": 8.24480586631569, "learning_rate": 1.4287081825009832e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 87700 }, { "action_loss": 0.0191, "epoch": 8.249506439785653, "learning_rate": 1.4281108140341346e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 87750 }, { "action_loss": 0.0302, "epoch": 8.254207013255618, "learning_rate": 1.4275132584546731e-05, "llm_loss": 0.0, "loss": 0.0302, "step": 87800 }, { "action_loss": 0.0339, "epoch": 8.25890758672558, "learning_rate": 1.4269155160237694e-05, "llm_loss": 0.0, "loss": 0.0339, "step": 87850 }, { "action_loss": 0.0256, "epoch": 8.263608160195544, "learning_rate": 1.4263175870026769e-05, "llm_loss": 0.0, "loss": 0.0256, "step": 87900 }, { "action_loss": 0.0201, "epoch": 8.268308733665506, "learning_rate": 1.4257194716527291e-05, "llm_loss": 0.0, "loss": 0.0201, "step": 87950 }, { "action_loss": 0.0235, "epoch": 8.273009307135471, "learning_rate": 1.425121170235343e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 88000 }, { "action_loss": 0.0323, "epoch": 8.277709880605434, "learning_rate": 1.4245226830120145e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 88050 }, { "action_loss": 0.0183, "epoch": 8.282410454075396, "learning_rate": 1.4239240102443226e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 88100 }, { "action_loss": 0.0308, "epoch": 8.287111027545361, "learning_rate": 1.4233251521939263e-05, "llm_loss": 0.0, "loss": 0.0308, "step": 88150 }, { "action_loss": 0.0245, "epoch": 8.291811601015324, "learning_rate": 1.4227261091225669e-05, "llm_loss": 0.0, "loss": 0.0245, "step": 88200 }, { "action_loss": 0.0192, "epoch": 8.296512174485287, "learning_rate": 1.4221268812920643e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 88250 }, { "action_loss": 0.0166, "epoch": 8.301212747955251, "learning_rate": 1.4215274689643217e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 88300 }, { "action_loss": 0.0164, "epoch": 8.305913321425214, "learning_rate": 1.4209278724013209e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 88350 }, { "action_loss": 0.0193, "epoch": 8.310613894895177, "learning_rate": 1.4203280918651259e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 88400 }, { "action_loss": 0.0219, "epoch": 8.315314468365141, "learning_rate": 1.4197281276178796e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 88450 }, { "action_loss": 0.0059, "epoch": 8.320015041835104, "learning_rate": 1.4191279799218061e-05, "llm_loss": 0.0, "loss": 0.0059, "step": 88500 }, { "action_loss": 0.0157, "epoch": 8.324715615305067, "learning_rate": 1.4185276490392095e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 88550 }, { "action_loss": 0.0125, "epoch": 8.329416188775031, "learning_rate": 1.4179271352324739e-05, "llm_loss": 0.0, "loss": 0.0125, "step": 88600 }, { "action_loss": 0.0127, "epoch": 8.334116762244994, "learning_rate": 1.4173264387640632e-05, "llm_loss": 0.0, "loss": 0.0127, "step": 88650 }, { "action_loss": 0.0123, "epoch": 8.338817335714957, "learning_rate": 1.416725559896522e-05, "llm_loss": 0.0, "loss": 0.0123, "step": 88700 }, { "action_loss": 0.0195, "epoch": 8.343517909184921, "learning_rate": 1.416124498892473e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 88750 }, { "action_loss": 0.0183, "epoch": 8.348218482654884, "learning_rate": 1.4155232560146202e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 88800 }, { "action_loss": 0.0225, "epoch": 8.352919056124847, "learning_rate": 1.4149218315257454e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 88850 }, { "action_loss": 0.0172, "epoch": 8.35761962959481, "learning_rate": 1.4143202256887116e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 88900 }, { "action_loss": 0.0293, "epoch": 8.362320203064774, "learning_rate": 1.4137184387664598e-05, "llm_loss": 0.0, "loss": 0.0293, "step": 88950 }, { "action_loss": 0.0199, "epoch": 8.367020776534737, "learning_rate": 1.4131164710220104e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 89000 }, { "action_loss": 0.0132, "epoch": 8.3717213500047, "learning_rate": 1.4125143227184632e-05, "llm_loss": 0.0, "loss": 0.0132, "step": 89050 }, { "action_loss": 0.032, "epoch": 8.376421923474664, "learning_rate": 1.4119119941189967e-05, "llm_loss": 0.0, "loss": 0.032, "step": 89100 }, { "action_loss": 0.0062, "epoch": 8.381122496944627, "learning_rate": 1.4113094854868679e-05, "llm_loss": 0.0, "loss": 0.0062, "step": 89150 }, { "action_loss": 0.009, "epoch": 8.38582307041459, "learning_rate": 1.4107067970854126e-05, "llm_loss": 0.0, "loss": 0.009, "step": 89200 }, { "action_loss": 0.0159, "epoch": 8.390523643884555, "learning_rate": 1.4101039291780456e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 89250 }, { "action_loss": 0.0298, "epoch": 8.395224217354517, "learning_rate": 1.4095008820282598e-05, "llm_loss": 0.0, "loss": 0.0298, "step": 89300 }, { "action_loss": 0.0457, "epoch": 8.39992479082448, "learning_rate": 1.4088976558996267e-05, "llm_loss": 0.0, "loss": 0.0457, "step": 89350 }, { "action_loss": 0.0166, "epoch": 8.404625364294445, "learning_rate": 1.4082942510557956e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 89400 }, { "action_loss": 0.0281, "epoch": 8.409325937764407, "learning_rate": 1.407690667760494e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 89450 }, { "action_loss": 0.0171, "epoch": 8.41402651123437, "learning_rate": 1.4070869062775282e-05, "llm_loss": 0.0, "loss": 0.0171, "step": 89500 }, { "action_loss": 0.0219, "epoch": 8.418727084704335, "learning_rate": 1.406482966870781e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 89550 }, { "action_loss": 0.0213, "epoch": 8.423427658174298, "learning_rate": 1.4058788498042138e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 89600 }, { "action_loss": 0.0169, "epoch": 8.42812823164426, "learning_rate": 1.405274555341866e-05, "llm_loss": 0.0, "loss": 0.0169, "step": 89650 }, { "action_loss": 0.0231, "epoch": 8.432828805114223, "learning_rate": 1.4046700837478533e-05, "llm_loss": 0.0, "loss": 0.0231, "step": 89700 }, { "action_loss": 0.0292, "epoch": 8.437529378584188, "learning_rate": 1.4040654352863702e-05, "llm_loss": 0.0, "loss": 0.0292, "step": 89750 }, { "action_loss": 0.0197, "epoch": 8.44222995205415, "learning_rate": 1.4034606102216877e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 89800 }, { "action_loss": 0.0258, "epoch": 8.446930525524113, "learning_rate": 1.4028556088181542e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 89850 }, { "action_loss": 0.0091, "epoch": 8.451631098994078, "learning_rate": 1.4022504313401954e-05, "llm_loss": 0.0, "loss": 0.0091, "step": 89900 }, { "action_loss": 0.013, "epoch": 8.45633167246404, "learning_rate": 1.4016450780523133e-05, "llm_loss": 0.0, "loss": 0.013, "step": 89950 }, { "action_loss": 0.0165, "epoch": 8.461032245934003, "learning_rate": 1.4010395492190871e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 90000 }, { "action_loss": 0.0189, "epoch": 8.465732819403968, "learning_rate": 1.400433845105173e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 90050 }, { "action_loss": 0.0165, "epoch": 8.47043339287393, "learning_rate": 1.399827965975304e-05, "llm_loss": 0.0, "loss": 0.0165, "step": 90100 }, { "action_loss": 0.0199, "epoch": 8.475133966343893, "learning_rate": 1.3992219120942883e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 90150 }, { "action_loss": 0.0328, "epoch": 8.479834539813858, "learning_rate": 1.3986156837270118e-05, "llm_loss": 0.0, "loss": 0.0328, "step": 90200 }, { "action_loss": 0.0259, "epoch": 8.48453511328382, "learning_rate": 1.3980092811384363e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 90250 }, { "action_loss": 0.0282, "epoch": 8.489235686753783, "learning_rate": 1.3974027045935994e-05, "llm_loss": 0.0, "loss": 0.0282, "step": 90300 }, { "action_loss": 0.0278, "epoch": 8.493936260223748, "learning_rate": 1.3967959543576152e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 90350 }, { "action_loss": 0.0193, "epoch": 8.49863683369371, "learning_rate": 1.3961890306956734e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 90400 }, { "action_loss": 0.0079, "epoch": 8.503337407163674, "learning_rate": 1.3955819338730395e-05, "llm_loss": 0.0, "loss": 0.0079, "step": 90450 }, { "action_loss": 0.0159, "epoch": 8.508037980633638, "learning_rate": 1.394974664155055e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 90500 }, { "action_loss": 0.0192, "epoch": 8.512738554103601, "learning_rate": 1.3943672218071367e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 90550 }, { "action_loss": 0.0095, "epoch": 8.517439127573564, "learning_rate": 1.3937596070947767e-05, "llm_loss": 0.0, "loss": 0.0095, "step": 90600 }, { "action_loss": 0.0316, "epoch": 8.522139701043528, "learning_rate": 1.393151820283543e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 90650 }, { "action_loss": 0.0236, "epoch": 8.526840274513491, "learning_rate": 1.3925438616390783e-05, "llm_loss": 0.0, "loss": 0.0236, "step": 90700 }, { "action_loss": 0.0379, "epoch": 8.531540847983454, "learning_rate": 1.3919357314271e-05, "llm_loss": 0.0, "loss": 0.0379, "step": 90750 }, { "action_loss": 0.0162, "epoch": 8.536241421453417, "learning_rate": 1.3913274299134019e-05, "llm_loss": 0.0, "loss": 0.0162, "step": 90800 }, { "action_loss": 0.0243, "epoch": 8.540941994923381, "learning_rate": 1.3907189573638516e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 90850 }, { "action_loss": 0.0416, "epoch": 8.545642568393344, "learning_rate": 1.3901103140443912e-05, "llm_loss": 0.0, "loss": 0.0416, "step": 90900 }, { "action_loss": 0.0233, "epoch": 8.550343141863307, "learning_rate": 1.3895015002210384e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 90950 }, { "action_loss": 0.0211, "epoch": 8.555043715333271, "learning_rate": 1.3888925161598847e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 91000 }, { "action_loss": 0.0317, "epoch": 8.559744288803234, "learning_rate": 1.3882833621270966e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 91050 }, { "action_loss": 0.0255, "epoch": 8.564444862273197, "learning_rate": 1.3876740383889138e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 91100 }, { "action_loss": 0.03, "epoch": 8.569145435743161, "learning_rate": 1.3870645452116513e-05, "llm_loss": 0.0, "loss": 0.03, "step": 91150 }, { "action_loss": 0.0152, "epoch": 8.573846009213124, "learning_rate": 1.386454882861698e-05, "llm_loss": 0.0, "loss": 0.0152, "step": 91200 }, { "action_loss": 0.0122, "epoch": 8.578546582683087, "learning_rate": 1.385845051605516e-05, "llm_loss": 0.0, "loss": 0.0122, "step": 91250 }, { "action_loss": 0.0155, "epoch": 8.583247156153051, "learning_rate": 1.385235051709642e-05, "llm_loss": 0.0, "loss": 0.0155, "step": 91300 }, { "action_loss": 0.0322, "epoch": 8.587947729623014, "learning_rate": 1.3846248834406862e-05, "llm_loss": 0.0, "loss": 0.0322, "step": 91350 }, { "action_loss": 0.0212, "epoch": 8.592648303092977, "learning_rate": 1.384014547065332e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 91400 }, { "action_loss": 0.0192, "epoch": 8.59734887656294, "learning_rate": 1.3834040428503371e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 91450 }, { "action_loss": 0.0118, "epoch": 8.602049450032904, "learning_rate": 1.3827933710625312e-05, "llm_loss": 0.0, "loss": 0.0118, "step": 91500 }, { "action_loss": 0.0137, "epoch": 8.606750023502867, "learning_rate": 1.3821825319688187e-05, "llm_loss": 0.0, "loss": 0.0137, "step": 91550 }, { "action_loss": 0.0153, "epoch": 8.61145059697283, "learning_rate": 1.3815715258361764e-05, "llm_loss": 0.0, "loss": 0.0153, "step": 91600 }, { "action_loss": 0.0228, "epoch": 8.616151170442794, "learning_rate": 1.3809603529316542e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 91650 }, { "action_loss": 0.0262, "epoch": 8.620851743912757, "learning_rate": 1.3803490135223747e-05, "llm_loss": 0.0, "loss": 0.0262, "step": 91700 }, { "action_loss": 0.0059, "epoch": 8.62555231738272, "learning_rate": 1.3797375078755339e-05, "llm_loss": 0.0, "loss": 0.0059, "step": 91750 }, { "action_loss": 0.0256, "epoch": 8.630252890852685, "learning_rate": 1.3791258362584e-05, "llm_loss": 0.0, "loss": 0.0256, "step": 91800 }, { "action_loss": 0.0123, "epoch": 8.634953464322647, "learning_rate": 1.3785139989383129e-05, "llm_loss": 0.0, "loss": 0.0123, "step": 91850 }, { "action_loss": 0.0292, "epoch": 8.63965403779261, "learning_rate": 1.3779019961826869e-05, "llm_loss": 0.0, "loss": 0.0292, "step": 91900 }, { "action_loss": 0.0129, "epoch": 8.644354611262575, "learning_rate": 1.377289828259007e-05, "llm_loss": 0.0, "loss": 0.0129, "step": 91950 }, { "action_loss": 0.0131, "epoch": 8.649055184732537, "learning_rate": 1.376677495434831e-05, "llm_loss": 0.0, "loss": 0.0131, "step": 92000 }, { "action_loss": 0.0163, "epoch": 8.6537557582025, "learning_rate": 1.3760649979777885e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 92050 }, { "action_loss": 0.0293, "epoch": 8.658456331672465, "learning_rate": 1.3754523361555812e-05, "llm_loss": 0.0, "loss": 0.0293, "step": 92100 }, { "action_loss": 0.0224, "epoch": 8.663156905142428, "learning_rate": 1.3748395102359825e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 92150 }, { "action_loss": 0.01, "epoch": 8.66785747861239, "learning_rate": 1.3742265204868386e-05, "llm_loss": 0.0, "loss": 0.01, "step": 92200 }, { "action_loss": 0.0228, "epoch": 8.672558052082355, "learning_rate": 1.3736133671760654e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 92250 }, { "action_loss": 0.032, "epoch": 8.677258625552318, "learning_rate": 1.3730000505716516e-05, "llm_loss": 0.0, "loss": 0.032, "step": 92300 }, { "action_loss": 0.0156, "epoch": 8.68195919902228, "learning_rate": 1.3723865709416568e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 92350 }, { "action_loss": 0.0322, "epoch": 8.686659772492245, "learning_rate": 1.3717729285542123e-05, "llm_loss": 0.0, "loss": 0.0322, "step": 92400 }, { "action_loss": 0.0184, "epoch": 8.691360345962208, "learning_rate": 1.3711591236775199e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 92450 }, { "action_loss": 0.0223, "epoch": 8.69606091943217, "learning_rate": 1.3705451565798531e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 92500 }, { "action_loss": 0.0202, "epoch": 8.700761492902133, "learning_rate": 1.3699310275295557e-05, "llm_loss": 0.0, "loss": 0.0202, "step": 92550 }, { "action_loss": 0.0155, "epoch": 8.705462066372098, "learning_rate": 1.3693167367950424e-05, "llm_loss": 0.0, "loss": 0.0155, "step": 92600 }, { "action_loss": 0.0083, "epoch": 8.71016263984206, "learning_rate": 1.3687022846447988e-05, "llm_loss": 0.0, "loss": 0.0083, "step": 92650 }, { "action_loss": 0.025, "epoch": 8.714863213312023, "learning_rate": 1.368087671347381e-05, "llm_loss": 0.0, "loss": 0.025, "step": 92700 }, { "action_loss": 0.0126, "epoch": 8.719563786781988, "learning_rate": 1.3674728971714155e-05, "llm_loss": 0.0, "loss": 0.0126, "step": 92750 }, { "action_loss": 0.0161, "epoch": 8.72426436025195, "learning_rate": 1.3668579623855994e-05, "llm_loss": 0.0, "loss": 0.0161, "step": 92800 }, { "action_loss": 0.0272, "epoch": 8.728964933721914, "learning_rate": 1.3662428672586991e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 92850 }, { "action_loss": 0.0094, "epoch": 8.733665507191878, "learning_rate": 1.3656276120595519e-05, "llm_loss": 0.0, "loss": 0.0094, "step": 92900 }, { "action_loss": 0.0263, "epoch": 8.73836608066184, "learning_rate": 1.3650121970570652e-05, "llm_loss": 0.0, "loss": 0.0263, "step": 92950 }, { "action_loss": 0.0262, "epoch": 8.743066654131804, "learning_rate": 1.3643966225202153e-05, "llm_loss": 0.0, "loss": 0.0262, "step": 93000 }, { "action_loss": 0.0343, "epoch": 8.747767227601768, "learning_rate": 1.3637808887180488e-05, "llm_loss": 0.0, "loss": 0.0343, "step": 93050 }, { "action_loss": 0.0251, "epoch": 8.752467801071731, "learning_rate": 1.3631649959196822e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 93100 }, { "action_loss": 0.0196, "epoch": 8.757168374541694, "learning_rate": 1.3625489443943014e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 93150 }, { "action_loss": 0.0147, "epoch": 8.761868948011656, "learning_rate": 1.361932734411161e-05, "llm_loss": 0.0, "loss": 0.0147, "step": 93200 }, { "action_loss": 0.0158, "epoch": 8.766569521481621, "learning_rate": 1.361316366239586e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 93250 }, { "action_loss": 0.0312, "epoch": 8.771270094951584, "learning_rate": 1.360699840148969e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 93300 }, { "action_loss": 0.0217, "epoch": 8.775970668421547, "learning_rate": 1.3600831564087736e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 93350 }, { "action_loss": 0.015, "epoch": 8.780671241891511, "learning_rate": 1.3594663152885299e-05, "llm_loss": 0.0, "loss": 0.015, "step": 93400 }, { "action_loss": 0.0197, "epoch": 8.785371815361474, "learning_rate": 1.3588493170578388e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 93450 }, { "action_loss": 0.0345, "epoch": 8.790072388831437, "learning_rate": 1.3582321619863689e-05, "llm_loss": 0.0, "loss": 0.0345, "step": 93500 }, { "action_loss": 0.0198, "epoch": 8.794772962301401, "learning_rate": 1.3576148503438579e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 93550 }, { "action_loss": 0.0212, "epoch": 8.799473535771364, "learning_rate": 1.3569973824001119e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 93600 }, { "action_loss": 0.0337, "epoch": 8.804174109241327, "learning_rate": 1.3563797584250043e-05, "llm_loss": 0.0, "loss": 0.0337, "step": 93650 }, { "action_loss": 0.0159, "epoch": 8.808874682711291, "learning_rate": 1.355761978688478e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 93700 }, { "action_loss": 0.0154, "epoch": 8.813575256181254, "learning_rate": 1.3551440434605429e-05, "llm_loss": 0.0, "loss": 0.0154, "step": 93750 }, { "action_loss": 0.0225, "epoch": 8.818275829651217, "learning_rate": 1.3545259530112782e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 93800 }, { "action_loss": 0.0127, "epoch": 8.822976403121181, "learning_rate": 1.3539077076108291e-05, "llm_loss": 0.0, "loss": 0.0127, "step": 93850 }, { "action_loss": 0.0144, "epoch": 8.827676976591144, "learning_rate": 1.3532893075294102e-05, "llm_loss": 0.0, "loss": 0.0144, "step": 93900 }, { "action_loss": 0.0196, "epoch": 8.832377550061107, "learning_rate": 1.352670753037303e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 93950 }, { "action_loss": 0.0212, "epoch": 8.837078123531072, "learning_rate": 1.3520520444048564e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 94000 }, { "action_loss": 0.0264, "epoch": 8.841778697001034, "learning_rate": 1.3514331819024868e-05, "llm_loss": 0.0, "loss": 0.0264, "step": 94050 }, { "action_loss": 0.0274, "epoch": 8.846479270470997, "learning_rate": 1.3508141658006782e-05, "llm_loss": 0.0, "loss": 0.0274, "step": 94100 }, { "action_loss": 0.0118, "epoch": 8.851179843940962, "learning_rate": 1.3501949963699807e-05, "llm_loss": 0.0, "loss": 0.0118, "step": 94150 }, { "action_loss": 0.0285, "epoch": 8.855880417410924, "learning_rate": 1.3495756738810128e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 94200 }, { "action_loss": 0.0222, "epoch": 8.860580990880887, "learning_rate": 1.348956198604459e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 94250 }, { "action_loss": 0.0378, "epoch": 8.86528156435085, "learning_rate": 1.3483365708110706e-05, "llm_loss": 0.0, "loss": 0.0378, "step": 94300 }, { "action_loss": 0.0126, "epoch": 8.869982137820815, "learning_rate": 1.3477167907716658e-05, "llm_loss": 0.0, "loss": 0.0126, "step": 94350 }, { "action_loss": 0.0219, "epoch": 8.874682711290777, "learning_rate": 1.3470968587571291e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 94400 }, { "action_loss": 0.0264, "epoch": 8.87938328476074, "learning_rate": 1.3464767750384125e-05, "llm_loss": 0.0, "loss": 0.0264, "step": 94450 }, { "action_loss": 0.0284, "epoch": 8.884083858230705, "learning_rate": 1.3458565398865322e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 94500 }, { "action_loss": 0.0157, "epoch": 8.888784431700667, "learning_rate": 1.3452361535725723e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 94550 }, { "action_loss": 0.0186, "epoch": 8.89348500517063, "learning_rate": 1.344615616367683e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 94600 }, { "action_loss": 0.0338, "epoch": 8.898185578640595, "learning_rate": 1.343994928543079e-05, "llm_loss": 0.0, "loss": 0.0338, "step": 94650 }, { "action_loss": 0.0159, "epoch": 8.902886152110558, "learning_rate": 1.3433740903700425e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 94700 }, { "action_loss": 0.0193, "epoch": 8.90758672558052, "learning_rate": 1.3427531021199203e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 94750 }, { "action_loss": 0.0195, "epoch": 8.912287299050485, "learning_rate": 1.342131964064125e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 94800 }, { "action_loss": 0.0288, "epoch": 8.916987872520448, "learning_rate": 1.3415106764741353e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 94850 }, { "action_loss": 0.0182, "epoch": 8.92168844599041, "learning_rate": 1.3408892396214941e-05, "llm_loss": 0.0, "loss": 0.0182, "step": 94900 }, { "action_loss": 0.0221, "epoch": 8.926389019460375, "learning_rate": 1.340267653777811e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 94950 }, { "action_loss": 0.0121, "epoch": 8.931089592930338, "learning_rate": 1.3396459192147594e-05, "llm_loss": 0.0, "loss": 0.0121, "step": 95000 }, { "action_loss": 0.0296, "epoch": 8.9357901664003, "learning_rate": 1.3390240362040783e-05, "llm_loss": 0.0, "loss": 0.0296, "step": 95050 }, { "action_loss": 0.0121, "epoch": 8.940490739870263, "learning_rate": 1.3384020050175717e-05, "llm_loss": 0.0, "loss": 0.0121, "step": 95100 }, { "action_loss": 0.0295, "epoch": 8.945191313340228, "learning_rate": 1.337779825927108e-05, "llm_loss": 0.0, "loss": 0.0295, "step": 95150 }, { "action_loss": 0.0248, "epoch": 8.94989188681019, "learning_rate": 1.3371574992046207e-05, "llm_loss": 0.0, "loss": 0.0248, "step": 95200 }, { "action_loss": 0.0242, "epoch": 8.954592460280153, "learning_rate": 1.3365350251221076e-05, "llm_loss": 0.0, "loss": 0.0242, "step": 95250 }, { "action_loss": 0.021, "epoch": 8.959293033750118, "learning_rate": 1.3359124039516304e-05, "llm_loss": 0.0, "loss": 0.021, "step": 95300 }, { "action_loss": 0.0199, "epoch": 8.96399360722008, "learning_rate": 1.3352896359653159e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 95350 }, { "action_loss": 0.0317, "epoch": 8.968694180690044, "learning_rate": 1.3346667214353546e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 95400 }, { "action_loss": 0.0355, "epoch": 8.973394754160008, "learning_rate": 1.3340436606340008e-05, "llm_loss": 0.0, "loss": 0.0355, "step": 95450 }, { "action_loss": 0.0194, "epoch": 8.97809532762997, "learning_rate": 1.3334204538335738e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 95500 }, { "action_loss": 0.016, "epoch": 8.982795901099934, "learning_rate": 1.3327971013064556e-05, "llm_loss": 0.0, "loss": 0.016, "step": 95550 }, { "action_loss": 0.0157, "epoch": 8.987496474569898, "learning_rate": 1.3321736033250925e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 95600 }, { "action_loss": 0.0298, "epoch": 8.992197048039861, "learning_rate": 1.331549960161994e-05, "llm_loss": 0.0, "loss": 0.0298, "step": 95650 }, { "action_loss": 0.0271, "epoch": 8.996897621509824, "learning_rate": 1.3309261720897332e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 95700 }, { "action_loss": 0.014, "epoch": 9.001598194979788, "learning_rate": 1.3303022393809466e-05, "llm_loss": 0.0, "loss": 0.014, "step": 95750 }, { "action_loss": 0.0314, "epoch": 9.006298768449751, "learning_rate": 1.3296781623083339e-05, "llm_loss": 0.0, "loss": 0.0314, "step": 95800 }, { "action_loss": 0.006, "epoch": 9.010999341919714, "learning_rate": 1.3290539411446577e-05, "llm_loss": 0.0, "loss": 0.006, "step": 95850 }, { "action_loss": 0.0221, "epoch": 9.015699915389678, "learning_rate": 1.328429576162744e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 95900 }, { "action_loss": 0.0193, "epoch": 9.020400488859641, "learning_rate": 1.3278050676354815e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 95950 }, { "action_loss": 0.0215, "epoch": 9.025101062329604, "learning_rate": 1.3271804158358212e-05, "llm_loss": 0.0, "loss": 0.0215, "step": 96000 }, { "action_loss": 0.0126, "epoch": 9.029801635799567, "learning_rate": 1.3265556210367771e-05, "llm_loss": 0.0, "loss": 0.0126, "step": 96050 }, { "action_loss": 0.0151, "epoch": 9.034502209269531, "learning_rate": 1.3259306835114264e-05, "llm_loss": 0.0, "loss": 0.0151, "step": 96100 }, { "action_loss": 0.0317, "epoch": 9.039202782739494, "learning_rate": 1.3253056035329066e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 96150 }, { "action_loss": 0.0189, "epoch": 9.043903356209457, "learning_rate": 1.3246803813744198e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 96200 }, { "action_loss": 0.0127, "epoch": 9.048603929679421, "learning_rate": 1.324055017309229e-05, "llm_loss": 0.0, "loss": 0.0127, "step": 96250 }, { "action_loss": 0.0275, "epoch": 9.053304503149384, "learning_rate": 1.3234295116106595e-05, "llm_loss": 0.0, "loss": 0.0275, "step": 96300 }, { "action_loss": 0.0357, "epoch": 9.058005076619347, "learning_rate": 1.3228038645520983e-05, "llm_loss": 0.0, "loss": 0.0357, "step": 96350 }, { "action_loss": 0.0314, "epoch": 9.062705650089312, "learning_rate": 1.3221780764069947e-05, "llm_loss": 0.0, "loss": 0.0314, "step": 96400 }, { "action_loss": 0.0204, "epoch": 9.067406223559274, "learning_rate": 1.321552147448859e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 96450 }, { "action_loss": 0.0262, "epoch": 9.072106797029237, "learning_rate": 1.3209260779512634e-05, "llm_loss": 0.0, "loss": 0.0262, "step": 96500 }, { "action_loss": 0.0211, "epoch": 9.076807370499202, "learning_rate": 1.3202998681878419e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 96550 }, { "action_loss": 0.0127, "epoch": 9.081507943969164, "learning_rate": 1.3196735184322886e-05, "llm_loss": 0.0, "loss": 0.0127, "step": 96600 }, { "action_loss": 0.0213, "epoch": 9.086208517439127, "learning_rate": 1.3190470289583603e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 96650 }, { "action_loss": 0.0183, "epoch": 9.090909090909092, "learning_rate": 1.318420400039874e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 96700 }, { "action_loss": 0.023, "epoch": 9.095609664379054, "learning_rate": 1.3177936319507077e-05, "llm_loss": 0.0, "loss": 0.023, "step": 96750 }, { "action_loss": 0.0116, "epoch": 9.100310237849017, "learning_rate": 1.3171667249648002e-05, "llm_loss": 0.0, "loss": 0.0116, "step": 96800 }, { "action_loss": 0.0159, "epoch": 9.10501081131898, "learning_rate": 1.316539679356151e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 96850 }, { "action_loss": 0.0274, "epoch": 9.109711384788945, "learning_rate": 1.3159124953988209e-05, "llm_loss": 0.0, "loss": 0.0274, "step": 96900 }, { "action_loss": 0.0182, "epoch": 9.114411958258907, "learning_rate": 1.31528517336693e-05, "llm_loss": 0.0, "loss": 0.0182, "step": 96950 }, { "action_loss": 0.0175, "epoch": 9.11911253172887, "learning_rate": 1.31465771353466e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 97000 }, { "action_loss": 0.0156, "epoch": 9.123813105198835, "learning_rate": 1.3140301161762515e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 97050 }, { "action_loss": 0.0354, "epoch": 9.128513678668797, "learning_rate": 1.313402381566006e-05, "llm_loss": 0.0, "loss": 0.0354, "step": 97100 }, { "action_loss": 0.0252, "epoch": 9.13321425213876, "learning_rate": 1.3127745099782852e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 97150 }, { "action_loss": 0.0122, "epoch": 9.137914825608725, "learning_rate": 1.31214650168751e-05, "llm_loss": 0.0, "loss": 0.0122, "step": 97200 }, { "action_loss": 0.0264, "epoch": 9.142615399078688, "learning_rate": 1.3115183569681614e-05, "llm_loss": 0.0, "loss": 0.0264, "step": 97250 }, { "action_loss": 0.0163, "epoch": 9.14731597254865, "learning_rate": 1.31089007609478e-05, "llm_loss": 0.0, "loss": 0.0163, "step": 97300 }, { "action_loss": 0.0253, "epoch": 9.152016546018615, "learning_rate": 1.310261659341966e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 97350 }, { "action_loss": 0.0178, "epoch": 9.156717119488578, "learning_rate": 1.3096331069843786e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 97400 }, { "action_loss": 0.0224, "epoch": 9.16141769295854, "learning_rate": 1.3090044192967372e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 97450 }, { "action_loss": 0.0097, "epoch": 9.166118266428505, "learning_rate": 1.3083755965538195e-05, "llm_loss": 0.0, "loss": 0.0097, "step": 97500 }, { "action_loss": 0.018, "epoch": 9.170818839898468, "learning_rate": 1.307746639030462e-05, "llm_loss": 0.0, "loss": 0.018, "step": 97550 }, { "action_loss": 0.0188, "epoch": 9.17551941336843, "learning_rate": 1.3071175470015609e-05, "llm_loss": 0.0, "loss": 0.0188, "step": 97600 }, { "action_loss": 0.0157, "epoch": 9.180219986838395, "learning_rate": 1.3064883207420706e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 97650 }, { "action_loss": 0.0175, "epoch": 9.184920560308358, "learning_rate": 1.305858960527005e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 97700 }, { "action_loss": 0.0158, "epoch": 9.18962113377832, "learning_rate": 1.3052294666314352e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 97750 }, { "action_loss": 0.0159, "epoch": 9.194321707248283, "learning_rate": 1.3045998393304915e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 97800 }, { "action_loss": 0.025, "epoch": 9.199022280718248, "learning_rate": 1.3039700788993635e-05, "llm_loss": 0.0, "loss": 0.025, "step": 97850 }, { "action_loss": 0.0284, "epoch": 9.20372285418821, "learning_rate": 1.3033401856132968e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 97900 }, { "action_loss": 0.0334, "epoch": 9.208423427658174, "learning_rate": 1.3027101597475969e-05, "llm_loss": 0.0, "loss": 0.0334, "step": 97950 }, { "action_loss": 0.0322, "epoch": 9.213124001128138, "learning_rate": 1.3020800015776262e-05, "llm_loss": 0.0, "loss": 0.0322, "step": 98000 }, { "action_loss": 0.0174, "epoch": 9.217824574598101, "learning_rate": 1.3014497113788055e-05, "llm_loss": 0.0, "loss": 0.0174, "step": 98050 }, { "action_loss": 0.0233, "epoch": 9.222525148068064, "learning_rate": 1.3008192894266132e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 98100 }, { "action_loss": 0.0281, "epoch": 9.227225721538028, "learning_rate": 1.3001887359965851e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 98150 }, { "action_loss": 0.0238, "epoch": 9.231926295007991, "learning_rate": 1.2995580513643144e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 98200 }, { "action_loss": 0.0199, "epoch": 9.236626868477954, "learning_rate": 1.2989272358054526e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 98250 }, { "action_loss": 0.0228, "epoch": 9.241327441947918, "learning_rate": 1.2982962895957062e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 98300 }, { "action_loss": 0.0321, "epoch": 9.246028015417881, "learning_rate": 1.2976652130108416e-05, "llm_loss": 0.0, "loss": 0.0321, "step": 98350 }, { "action_loss": 0.0194, "epoch": 9.250728588887844, "learning_rate": 1.2970340063266801e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 98400 }, { "action_loss": 0.0193, "epoch": 9.255429162357808, "learning_rate": 1.296402669819101e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 98450 }, { "action_loss": 0.0286, "epoch": 9.260129735827771, "learning_rate": 1.2957712037640393e-05, "llm_loss": 0.0, "loss": 0.0286, "step": 98500 }, { "action_loss": 0.0384, "epoch": 9.264830309297734, "learning_rate": 1.2951396084374878e-05, "llm_loss": 0.0, "loss": 0.0384, "step": 98550 }, { "action_loss": 0.0319, "epoch": 9.269530882767697, "learning_rate": 1.294507884115495e-05, "llm_loss": 0.0, "loss": 0.0319, "step": 98600 }, { "action_loss": 0.0384, "epoch": 9.274231456237661, "learning_rate": 1.2938760310741663e-05, "llm_loss": 0.0, "loss": 0.0384, "step": 98650 }, { "action_loss": 0.0101, "epoch": 9.278932029707624, "learning_rate": 1.2932440495896626e-05, "llm_loss": 0.0, "loss": 0.0101, "step": 98700 }, { "action_loss": 0.025, "epoch": 9.283632603177587, "learning_rate": 1.2926119399382018e-05, "llm_loss": 0.0, "loss": 0.025, "step": 98750 }, { "action_loss": 0.0125, "epoch": 9.288333176647551, "learning_rate": 1.2919797023960571e-05, "llm_loss": 0.0, "loss": 0.0125, "step": 98800 }, { "action_loss": 0.0309, "epoch": 9.293033750117514, "learning_rate": 1.2913473372395584e-05, "llm_loss": 0.0, "loss": 0.0309, "step": 98850 }, { "action_loss": 0.0189, "epoch": 9.297734323587477, "learning_rate": 1.2907148447450908e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 98900 }, { "action_loss": 0.0358, "epoch": 9.302434897057442, "learning_rate": 1.2900822251890948e-05, "llm_loss": 0.0, "loss": 0.0358, "step": 98950 }, { "action_loss": 0.0303, "epoch": 9.307135470527404, "learning_rate": 1.2894494788480668e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 99000 }, { "action_loss": 0.0116, "epoch": 9.311836043997367, "learning_rate": 1.2888166059985596e-05, "llm_loss": 0.0, "loss": 0.0116, "step": 99050 }, { "action_loss": 0.019, "epoch": 9.316536617467332, "learning_rate": 1.288183606917179e-05, "llm_loss": 0.0, "loss": 0.019, "step": 99100 }, { "action_loss": 0.0219, "epoch": 9.321237190937294, "learning_rate": 1.2875504818805877e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 99150 }, { "action_loss": 0.0157, "epoch": 9.325937764407257, "learning_rate": 1.2869172311655033e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 99200 }, { "action_loss": 0.0284, "epoch": 9.330638337877222, "learning_rate": 1.2862838550486977e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 99250 }, { "action_loss": 0.0096, "epoch": 9.335338911347185, "learning_rate": 1.2856503538069985e-05, "llm_loss": 0.0, "loss": 0.0097, "step": 99300 }, { "action_loss": 0.029, "epoch": 9.340039484817147, "learning_rate": 1.2850167277172871e-05, "llm_loss": 0.0, "loss": 0.029, "step": 99350 }, { "action_loss": 0.0142, "epoch": 9.344740058287112, "learning_rate": 1.2843829770564998e-05, "llm_loss": 0.0, "loss": 0.0142, "step": 99400 }, { "action_loss": 0.0288, "epoch": 9.349440631757075, "learning_rate": 1.2837491021016276e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 99450 }, { "action_loss": 0.0407, "epoch": 9.354141205227037, "learning_rate": 1.283115103129715e-05, "llm_loss": 0.0, "loss": 0.0407, "step": 99500 }, { "action_loss": 0.0359, "epoch": 9.358841778697, "learning_rate": 1.2824809804178617e-05, "llm_loss": 0.0, "loss": 0.0359, "step": 99550 }, { "action_loss": 0.0246, "epoch": 9.363542352166965, "learning_rate": 1.2818467342432213e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 99600 }, { "action_loss": 0.0207, "epoch": 9.368242925636928, "learning_rate": 1.2812123648830008e-05, "llm_loss": 0.0, "loss": 0.0207, "step": 99650 }, { "action_loss": 0.0119, "epoch": 9.37294349910689, "learning_rate": 1.2805778726144615e-05, "llm_loss": 0.0, "loss": 0.0119, "step": 99700 }, { "action_loss": 0.0161, "epoch": 9.377644072576855, "learning_rate": 1.2799432577149184e-05, "llm_loss": 0.0, "loss": 0.0161, "step": 99750 }, { "action_loss": 0.0283, "epoch": 9.382344646046818, "learning_rate": 1.2793085204617397e-05, "llm_loss": 0.0, "loss": 0.0283, "step": 99800 }, { "action_loss": 0.0084, "epoch": 9.38704521951678, "learning_rate": 1.2786736611323477e-05, "llm_loss": 0.0, "loss": 0.0084, "step": 99850 }, { "action_loss": 0.029, "epoch": 9.391745792986745, "learning_rate": 1.2780386800042176e-05, "llm_loss": 0.0, "loss": 0.029, "step": 99900 }, { "action_loss": 0.0184, "epoch": 9.396446366456708, "learning_rate": 1.2774035773548776e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 99950 }, { "action_loss": 0.0126, "epoch": 9.40114693992667, "learning_rate": 1.27676835346191e-05, "llm_loss": 0.0, "loss": 0.0126, "step": 100000 }, { "action_loss": 0.0093, "epoch": 9.405847513396635, "learning_rate": 1.2761330086029489e-05, "llm_loss": 0.0, "loss": 0.0093, "step": 100050 }, { "action_loss": 0.0268, "epoch": 9.410548086866598, "learning_rate": 1.2754975430556824e-05, "llm_loss": 0.0, "loss": 0.0268, "step": 100100 }, { "action_loss": 0.022, "epoch": 9.41524866033656, "learning_rate": 1.2748619570978505e-05, "llm_loss": 0.0, "loss": 0.022, "step": 100150 }, { "action_loss": 0.0285, "epoch": 9.419949233806525, "learning_rate": 1.2742262510072459e-05, "llm_loss": 0.0, "loss": 0.0285, "step": 100200 }, { "action_loss": 0.0189, "epoch": 9.424649807276488, "learning_rate": 1.2735904250617144e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 100250 }, { "action_loss": 0.0281, "epoch": 9.42935038074645, "learning_rate": 1.2729544795391538e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 100300 }, { "action_loss": 0.0292, "epoch": 9.434050954216413, "learning_rate": 1.2723184147175136e-05, "llm_loss": 0.0, "loss": 0.0292, "step": 100350 }, { "action_loss": 0.0121, "epoch": 9.438751527686378, "learning_rate": 1.2716822308747965e-05, "llm_loss": 0.0, "loss": 0.0121, "step": 100400 }, { "action_loss": 0.0128, "epoch": 9.44345210115634, "learning_rate": 1.2710459282890564e-05, "llm_loss": 0.0, "loss": 0.0128, "step": 100450 }, { "action_loss": 0.0155, "epoch": 9.448152674626304, "learning_rate": 1.2704095072383999e-05, "llm_loss": 0.0, "loss": 0.0155, "step": 100500 }, { "action_loss": 0.0187, "epoch": 9.452853248096268, "learning_rate": 1.2697729680009847e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 100550 }, { "action_loss": 0.028, "epoch": 9.457553821566231, "learning_rate": 1.2691363108550204e-05, "llm_loss": 0.0, "loss": 0.028, "step": 100600 }, { "action_loss": 0.0216, "epoch": 9.462254395036194, "learning_rate": 1.2684995360787676e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 100650 }, { "action_loss": 0.0187, "epoch": 9.466954968506158, "learning_rate": 1.2678626439505395e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 100700 }, { "action_loss": 0.022, "epoch": 9.471655541976121, "learning_rate": 1.2672256347486998e-05, "llm_loss": 0.0, "loss": 0.022, "step": 100750 }, { "action_loss": 0.0229, "epoch": 9.476356115446084, "learning_rate": 1.2665885087516627e-05, "llm_loss": 0.0, "loss": 0.0229, "step": 100800 }, { "action_loss": 0.0064, "epoch": 9.481056688916048, "learning_rate": 1.265951266237895e-05, "llm_loss": 0.0, "loss": 0.0064, "step": 100850 }, { "action_loss": 0.0323, "epoch": 9.485757262386011, "learning_rate": 1.2653139074859135e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 100900 }, { "action_loss": 0.0262, "epoch": 9.490457835855974, "learning_rate": 1.264676432774286e-05, "llm_loss": 0.0, "loss": 0.0262, "step": 100950 }, { "action_loss": 0.0312, "epoch": 9.495158409325938, "learning_rate": 1.2640388423816304e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 101000 }, { "action_loss": 0.0256, "epoch": 9.499858982795901, "learning_rate": 1.263401136586616e-05, "llm_loss": 0.0, "loss": 0.0256, "step": 101050 }, { "action_loss": 0.0344, "epoch": 9.504559556265864, "learning_rate": 1.2627633156679626e-05, "llm_loss": 0.0, "loss": 0.0344, "step": 101100 }, { "action_loss": 0.0188, "epoch": 9.509260129735829, "learning_rate": 1.2621253799044393e-05, "llm_loss": 0.0, "loss": 0.0188, "step": 101150 }, { "action_loss": 0.0253, "epoch": 9.513960703205791, "learning_rate": 1.2614873295748667e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 101200 }, { "action_loss": 0.0128, "epoch": 9.518661276675754, "learning_rate": 1.2608491649581141e-05, "llm_loss": 0.0, "loss": 0.0128, "step": 101250 }, { "action_loss": 0.0353, "epoch": 9.523361850145719, "learning_rate": 1.2602108863331018e-05, "llm_loss": 0.0, "loss": 0.0353, "step": 101300 }, { "action_loss": 0.0125, "epoch": 9.528062423615681, "learning_rate": 1.2595724939788e-05, "llm_loss": 0.0, "loss": 0.0125, "step": 101350 }, { "action_loss": 0.0182, "epoch": 9.532762997085644, "learning_rate": 1.2589339881742274e-05, "llm_loss": 0.0, "loss": 0.0182, "step": 101400 }, { "action_loss": 0.0224, "epoch": 9.537463570555607, "learning_rate": 1.2582953691984531e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 101450 }, { "action_loss": 0.0051, "epoch": 9.542164144025572, "learning_rate": 1.2576566373305964e-05, "llm_loss": 0.0, "loss": 0.0051, "step": 101500 }, { "action_loss": 0.0261, "epoch": 9.546864717495534, "learning_rate": 1.2570177928498246e-05, "llm_loss": 0.0, "loss": 0.0261, "step": 101550 }, { "action_loss": 0.0244, "epoch": 9.551565290965497, "learning_rate": 1.256378836035355e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 101600 }, { "action_loss": 0.0288, "epoch": 9.556265864435462, "learning_rate": 1.2557397671664541e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 101650 }, { "action_loss": 0.0178, "epoch": 9.560966437905424, "learning_rate": 1.2551005865224363e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 101700 }, { "action_loss": 0.0175, "epoch": 9.565667011375387, "learning_rate": 1.2544612943826665e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 101750 }, { "action_loss": 0.0093, "epoch": 9.570367584845352, "learning_rate": 1.2538218910265567e-05, "llm_loss": 0.0, "loss": 0.0093, "step": 101800 }, { "action_loss": 0.0159, "epoch": 9.575068158315315, "learning_rate": 1.2531823767335685e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 101850 }, { "action_loss": 0.0222, "epoch": 9.579768731785277, "learning_rate": 1.2525427517832118e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 101900 }, { "action_loss": 0.0289, "epoch": 9.584469305255242, "learning_rate": 1.251903016455045e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 101950 }, { "action_loss": 0.029, "epoch": 9.589169878725205, "learning_rate": 1.2512631710286745e-05, "llm_loss": 0.0, "loss": 0.029, "step": 102000 }, { "action_loss": 0.0097, "epoch": 9.593870452195167, "learning_rate": 1.2506232157837547e-05, "llm_loss": 0.0, "loss": 0.0097, "step": 102050 }, { "action_loss": 0.0204, "epoch": 9.59857102566513, "learning_rate": 1.2499831509999883e-05, "llm_loss": 0.0, "loss": 0.0204, "step": 102100 }, { "action_loss": 0.0191, "epoch": 9.603271599135095, "learning_rate": 1.249342976957126e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 102150 }, { "action_loss": 0.0328, "epoch": 9.607972172605058, "learning_rate": 1.248702693934965e-05, "llm_loss": 0.0, "loss": 0.0328, "step": 102200 }, { "action_loss": 0.0217, "epoch": 9.61267274607502, "learning_rate": 1.2480623022133525e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 102250 }, { "action_loss": 0.0177, "epoch": 9.617373319544985, "learning_rate": 1.2474218020721808e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 102300 }, { "action_loss": 0.0123, "epoch": 9.622073893014948, "learning_rate": 1.2467811937913912e-05, "llm_loss": 0.0, "loss": 0.0123, "step": 102350 }, { "action_loss": 0.019, "epoch": 9.62677446648491, "learning_rate": 1.2461404776509716e-05, "llm_loss": 0.0, "loss": 0.019, "step": 102400 }, { "action_loss": 0.0211, "epoch": 9.631475039954875, "learning_rate": 1.2454996539309571e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 102450 }, { "action_loss": 0.0228, "epoch": 9.636175613424838, "learning_rate": 1.2448587229114297e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 102500 }, { "action_loss": 0.027, "epoch": 9.6408761868948, "learning_rate": 1.2442176848725187e-05, "llm_loss": 0.0, "loss": 0.027, "step": 102550 }, { "action_loss": 0.0158, "epoch": 9.645576760364765, "learning_rate": 1.2435765400943999e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 102600 }, { "action_loss": 0.0207, "epoch": 9.650277333834728, "learning_rate": 1.2429352888572956e-05, "llm_loss": 0.0, "loss": 0.0207, "step": 102650 }, { "action_loss": 0.0095, "epoch": 9.65497790730469, "learning_rate": 1.2422939314414748e-05, "llm_loss": 0.0, "loss": 0.0095, "step": 102700 }, { "action_loss": 0.016, "epoch": 9.659678480774655, "learning_rate": 1.241652468127253e-05, "llm_loss": 0.0, "loss": 0.016, "step": 102750 }, { "action_loss": 0.0355, "epoch": 9.664379054244618, "learning_rate": 1.241010899194992e-05, "llm_loss": 0.0, "loss": 0.0355, "step": 102800 }, { "action_loss": 0.024, "epoch": 9.66907962771458, "learning_rate": 1.2403692249251001e-05, "llm_loss": 0.0, "loss": 0.024, "step": 102850 }, { "action_loss": 0.0157, "epoch": 9.673780201184545, "learning_rate": 1.2397274455980302e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 102900 }, { "action_loss": 0.0162, "epoch": 9.678480774654508, "learning_rate": 1.2390855614942825e-05, "llm_loss": 0.0, "loss": 0.0162, "step": 102950 }, { "action_loss": 0.0194, "epoch": 9.68318134812447, "learning_rate": 1.2384435728944029e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 103000 }, { "action_loss": 0.0186, "epoch": 9.687881921594435, "learning_rate": 1.2378014800789828e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 103050 }, { "action_loss": 0.0196, "epoch": 9.692582495064398, "learning_rate": 1.2371592833286585e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 103100 }, { "action_loss": 0.0187, "epoch": 9.697283068534361, "learning_rate": 1.2365169829241125e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 103150 }, { "action_loss": 0.0124, "epoch": 9.701983642004324, "learning_rate": 1.2358745791460724e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 103200 }, { "action_loss": 0.0192, "epoch": 9.706684215474288, "learning_rate": 1.2352320722753112e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 103250 }, { "action_loss": 0.0221, "epoch": 9.711384788944251, "learning_rate": 1.2345894625926462e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 103300 }, { "action_loss": 0.0224, "epoch": 9.716085362414214, "learning_rate": 1.2339467503789406e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 103350 }, { "action_loss": 0.0095, "epoch": 9.720785935884178, "learning_rate": 1.2333039359151016e-05, "llm_loss": 0.0, "loss": 0.0095, "step": 103400 }, { "action_loss": 0.022, "epoch": 9.725486509354141, "learning_rate": 1.2326610194820814e-05, "llm_loss": 0.0, "loss": 0.022, "step": 103450 }, { "action_loss": 0.0303, "epoch": 9.730187082824104, "learning_rate": 1.2320180013608773e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 103500 }, { "action_loss": 0.0189, "epoch": 9.734887656294068, "learning_rate": 1.2313748818325302e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 103550 }, { "action_loss": 0.0184, "epoch": 9.739588229764031, "learning_rate": 1.2307316611781258e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 103600 }, { "action_loss": 0.0125, "epoch": 9.744288803233994, "learning_rate": 1.2300883396787939e-05, "llm_loss": 0.0, "loss": 0.0125, "step": 103650 }, { "action_loss": 0.0217, "epoch": 9.748989376703959, "learning_rate": 1.2294449176157079e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 103700 }, { "action_loss": 0.0158, "epoch": 9.753689950173921, "learning_rate": 1.2288013952700862e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 103750 }, { "action_loss": 0.0223, "epoch": 9.758390523643884, "learning_rate": 1.22815777292319e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 103800 }, { "action_loss": 0.0245, "epoch": 9.763091097113849, "learning_rate": 1.227514050856325e-05, "llm_loss": 0.0, "loss": 0.0245, "step": 103850 }, { "action_loss": 0.0328, "epoch": 9.767791670583811, "learning_rate": 1.2268702293508399e-05, "llm_loss": 0.0, "loss": 0.0328, "step": 103900 }, { "action_loss": 0.0156, "epoch": 9.772492244053774, "learning_rate": 1.2262263086881274e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 103950 }, { "action_loss": 0.0215, "epoch": 9.777192817523737, "learning_rate": 1.2255822891496231e-05, "llm_loss": 0.0, "loss": 0.0215, "step": 104000 }, { "action_loss": 0.0253, "epoch": 9.781893390993702, "learning_rate": 1.2249381710168055e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 104050 }, { "action_loss": 0.0084, "epoch": 9.786593964463664, "learning_rate": 1.224293954571197e-05, "llm_loss": 0.0, "loss": 0.0084, "step": 104100 }, { "action_loss": 0.0331, "epoch": 9.791294537933627, "learning_rate": 1.2236496400943624e-05, "llm_loss": 0.0, "loss": 0.0331, "step": 104150 }, { "action_loss": 0.0167, "epoch": 9.795995111403592, "learning_rate": 1.2230052278679098e-05, "llm_loss": 0.0, "loss": 0.0167, "step": 104200 }, { "action_loss": 0.0216, "epoch": 9.800695684873554, "learning_rate": 1.2223607181734898e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 104250 }, { "action_loss": 0.0288, "epoch": 9.805396258343517, "learning_rate": 1.2217161112927956e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 104300 }, { "action_loss": 0.0147, "epoch": 9.810096831813482, "learning_rate": 1.2210714075075627e-05, "llm_loss": 0.0, "loss": 0.0147, "step": 104350 }, { "action_loss": 0.0168, "epoch": 9.814797405283445, "learning_rate": 1.2204266070995689e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 104400 }, { "action_loss": 0.0316, "epoch": 9.819497978753407, "learning_rate": 1.2197817103506349e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 104450 }, { "action_loss": 0.0124, "epoch": 9.824198552223372, "learning_rate": 1.2191367175426225e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 104500 }, { "action_loss": 0.029, "epoch": 9.828899125693335, "learning_rate": 1.2184916289574362e-05, "llm_loss": 0.0, "loss": 0.029, "step": 104550 }, { "action_loss": 0.0258, "epoch": 9.833599699163297, "learning_rate": 1.2178464448770226e-05, "llm_loss": 0.0, "loss": 0.0258, "step": 104600 }, { "action_loss": 0.0189, "epoch": 9.838300272633262, "learning_rate": 1.217201165583369e-05, "llm_loss": 0.0, "loss": 0.0189, "step": 104650 }, { "action_loss": 0.0245, "epoch": 9.843000846103225, "learning_rate": 1.2165557913585056e-05, "llm_loss": 0.0, "loss": 0.0245, "step": 104700 }, { "action_loss": 0.0095, "epoch": 9.847701419573188, "learning_rate": 1.215910322484503e-05, "llm_loss": 0.0, "loss": 0.0095, "step": 104750 }, { "action_loss": 0.0261, "epoch": 9.852401993043152, "learning_rate": 1.2152647592434736e-05, "llm_loss": 0.0, "loss": 0.0261, "step": 104800 }, { "action_loss": 0.0266, "epoch": 9.857102566513115, "learning_rate": 1.214619101917571e-05, "llm_loss": 0.0, "loss": 0.0266, "step": 104850 }, { "action_loss": 0.0363, "epoch": 9.861803139983078, "learning_rate": 1.2139733507889902e-05, "llm_loss": 0.0, "loss": 0.0363, "step": 104900 }, { "action_loss": 0.0185, "epoch": 9.86650371345304, "learning_rate": 1.213327506139967e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 104950 }, { "action_loss": 0.0184, "epoch": 9.871204286923005, "learning_rate": 1.2126815682527777e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 105000 }, { "action_loss": 0.0351, "epoch": 9.875904860392968, "learning_rate": 1.2120355374097399e-05, "llm_loss": 0.0, "loss": 0.0351, "step": 105050 }, { "action_loss": 0.0124, "epoch": 9.88060543386293, "learning_rate": 1.2113894138932114e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 105100 }, { "action_loss": 0.0196, "epoch": 9.885306007332895, "learning_rate": 1.2107431979855914e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 105150 }, { "action_loss": 0.0164, "epoch": 9.890006580802858, "learning_rate": 1.210096889969318e-05, "llm_loss": 0.0, "loss": 0.0164, "step": 105200 }, { "action_loss": 0.0222, "epoch": 9.89470715427282, "learning_rate": 1.2094504901268704e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 105250 }, { "action_loss": 0.0316, "epoch": 9.899407727742785, "learning_rate": 1.2088039987407682e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 105300 }, { "action_loss": 0.0259, "epoch": 9.904108301212748, "learning_rate": 1.2081574160935708e-05, "llm_loss": 0.0, "loss": 0.0259, "step": 105350 }, { "action_loss": 0.0123, "epoch": 9.90880887468271, "learning_rate": 1.2075107424678774e-05, "llm_loss": 0.0, "loss": 0.0123, "step": 105400 }, { "action_loss": 0.0289, "epoch": 9.913509448152675, "learning_rate": 1.2068639781463264e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 105450 }, { "action_loss": 0.0199, "epoch": 9.918210021622638, "learning_rate": 1.2062171234115971e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 105500 }, { "action_loss": 0.015, "epoch": 9.9229105950926, "learning_rate": 1.2055701785464074e-05, "llm_loss": 0.0, "loss": 0.015, "step": 105550 }, { "action_loss": 0.0222, "epoch": 9.927611168562565, "learning_rate": 1.2049231438335144e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 105600 }, { "action_loss": 0.0306, "epoch": 9.932311742032528, "learning_rate": 1.2042760195557151e-05, "llm_loss": 0.0, "loss": 0.0306, "step": 105650 }, { "action_loss": 0.0177, "epoch": 9.937012315502491, "learning_rate": 1.2036288059958454e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 105700 }, { "action_loss": 0.0192, "epoch": 9.941712888972454, "learning_rate": 1.2029815034367805e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 105750 }, { "action_loss": 0.0185, "epoch": 9.946413462442418, "learning_rate": 1.202334112161434e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 105800 }, { "action_loss": 0.0149, "epoch": 9.951114035912381, "learning_rate": 1.2016866324527588e-05, "llm_loss": 0.0, "loss": 0.0149, "step": 105850 }, { "action_loss": 0.0284, "epoch": 9.955814609382344, "learning_rate": 1.2010390645937458e-05, "llm_loss": 0.0, "loss": 0.0284, "step": 105900 }, { "action_loss": 0.029, "epoch": 9.960515182852308, "learning_rate": 1.2003914088674248e-05, "llm_loss": 0.0, "loss": 0.029, "step": 105950 }, { "action_loss": 0.0147, "epoch": 9.965215756322271, "learning_rate": 1.1997436655568642e-05, "llm_loss": 0.0, "loss": 0.0147, "step": 106000 }, { "action_loss": 0.0156, "epoch": 9.969916329792234, "learning_rate": 1.1990958349451702e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 106050 }, { "action_loss": 0.0412, "epoch": 9.974616903262199, "learning_rate": 1.1984479173154877e-05, "llm_loss": 0.0, "loss": 0.0412, "step": 106100 }, { "action_loss": 0.022, "epoch": 9.979317476732161, "learning_rate": 1.1977999129509991e-05, "llm_loss": 0.0, "loss": 0.022, "step": 106150 }, { "action_loss": 0.0157, "epoch": 9.984018050202124, "learning_rate": 1.1971518221349255e-05, "llm_loss": 0.0, "loss": 0.0157, "step": 106200 }, { "action_loss": 0.0142, "epoch": 9.988718623672089, "learning_rate": 1.196503645150525e-05, "llm_loss": 0.0, "loss": 0.0142, "step": 106250 }, { "action_loss": 0.0256, "epoch": 9.993419197142051, "learning_rate": 1.1958553822810935e-05, "llm_loss": 0.0, "loss": 0.0256, "step": 106300 }, { "action_loss": 0.0312, "epoch": 9.998119770612014, "learning_rate": 1.1952070338099643e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 106350 }, { "action_loss": 0.0316, "epoch": 10.002820344081979, "learning_rate": 1.1945586000205087e-05, "llm_loss": 0.0, "loss": 0.0316, "step": 106400 }, { "action_loss": 0.0321, "epoch": 10.007520917551942, "learning_rate": 1.193910081196135e-05, "llm_loss": 0.0, "loss": 0.0321, "step": 106450 }, { "action_loss": 0.0172, "epoch": 10.012221491021904, "learning_rate": 1.1932614776202885e-05, "llm_loss": 0.0, "loss": 0.0172, "step": 106500 }, { "action_loss": 0.013, "epoch": 10.016922064491869, "learning_rate": 1.1926127895764516e-05, "llm_loss": 0.0, "loss": 0.013, "step": 106550 }, { "action_loss": 0.0124, "epoch": 10.021622637961832, "learning_rate": 1.191964017348144e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 106600 }, { "action_loss": 0.0216, "epoch": 10.026323211431794, "learning_rate": 1.1913151612189218e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 106650 }, { "action_loss": 0.0224, "epoch": 10.031023784901757, "learning_rate": 1.1906662214723774e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 106700 }, { "action_loss": 0.035, "epoch": 10.035724358371722, "learning_rate": 1.1900171983921407e-05, "llm_loss": 0.0, "loss": 0.035, "step": 106750 }, { "action_loss": 0.0173, "epoch": 10.040424931841684, "learning_rate": 1.1893680922618774e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 106800 }, { "action_loss": 0.0193, "epoch": 10.045125505311647, "learning_rate": 1.1887189033652895e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 106850 }, { "action_loss": 0.0323, "epoch": 10.049826078781612, "learning_rate": 1.188069631986115e-05, "llm_loss": 0.0, "loss": 0.0323, "step": 106900 }, { "action_loss": 0.0162, "epoch": 10.054526652251575, "learning_rate": 1.187420278408129e-05, "llm_loss": 0.0, "loss": 0.0162, "step": 106950 }, { "action_loss": 0.0185, "epoch": 10.059227225721537, "learning_rate": 1.1867708429151413e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 107000 }, { "action_loss": 0.0194, "epoch": 10.063927799191502, "learning_rate": 1.186121325790998e-05, "llm_loss": 0.0, "loss": 0.0194, "step": 107050 }, { "action_loss": 0.0054, "epoch": 10.068628372661465, "learning_rate": 1.1854717273195808e-05, "llm_loss": 0.0, "loss": 0.0054, "step": 107100 }, { "action_loss": 0.0093, "epoch": 10.073328946131427, "learning_rate": 1.1848220477848071e-05, "llm_loss": 0.0, "loss": 0.0093, "step": 107150 }, { "action_loss": 0.0336, "epoch": 10.078029519601392, "learning_rate": 1.1841722874706297e-05, "llm_loss": 0.0, "loss": 0.0336, "step": 107200 }, { "action_loss": 0.0095, "epoch": 10.082730093071355, "learning_rate": 1.1835224466610366e-05, "llm_loss": 0.0, "loss": 0.0095, "step": 107250 }, { "action_loss": 0.0251, "epoch": 10.087430666541318, "learning_rate": 1.1828725256400508e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 107300 }, { "action_loss": 0.0119, "epoch": 10.092131240011282, "learning_rate": 1.1822225246917307e-05, "llm_loss": 0.0, "loss": 0.0119, "step": 107350 }, { "action_loss": 0.0208, "epoch": 10.096831813481245, "learning_rate": 1.1815724441001699e-05, "llm_loss": 0.0, "loss": 0.0208, "step": 107400 }, { "action_loss": 0.0199, "epoch": 10.101532386951208, "learning_rate": 1.1809222841494958e-05, "llm_loss": 0.0, "loss": 0.0199, "step": 107450 }, { "action_loss": 0.0272, "epoch": 10.106232960421172, "learning_rate": 1.1802720451238714e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 107500 }, { "action_loss": 0.0087, "epoch": 10.110933533891135, "learning_rate": 1.1796217273074939e-05, "llm_loss": 0.0, "loss": 0.0087, "step": 107550 }, { "action_loss": 0.0321, "epoch": 10.115634107361098, "learning_rate": 1.1789713309845952e-05, "llm_loss": 0.0, "loss": 0.0321, "step": 107600 }, { "action_loss": 0.0192, "epoch": 10.12033468083106, "learning_rate": 1.1783208564394408e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 107650 }, { "action_loss": 0.0093, "epoch": 10.125035254301025, "learning_rate": 1.1776703039563317e-05, "llm_loss": 0.0, "loss": 0.0093, "step": 107700 }, { "action_loss": 0.0158, "epoch": 10.129735827770988, "learning_rate": 1.1770196738196015e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 107750 }, { "action_loss": 0.0333, "epoch": 10.13443640124095, "learning_rate": 1.1763689663136191e-05, "llm_loss": 0.0, "loss": 0.0333, "step": 107800 }, { "action_loss": 0.0226, "epoch": 10.139136974710915, "learning_rate": 1.1757181817227856e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 107850 }, { "action_loss": 0.0314, "epoch": 10.143837548180878, "learning_rate": 1.1750673203315375e-05, "llm_loss": 0.0, "loss": 0.0314, "step": 107900 }, { "action_loss": 0.0247, "epoch": 10.14853812165084, "learning_rate": 1.1744163824243435e-05, "llm_loss": 0.0, "loss": 0.0247, "step": 107950 }, { "action_loss": 0.0288, "epoch": 10.153238695120805, "learning_rate": 1.1737653682857067e-05, "llm_loss": 0.0, "loss": 0.0288, "step": 108000 }, { "action_loss": 0.0156, "epoch": 10.157939268590768, "learning_rate": 1.173114278200163e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 108050 }, { "action_loss": 0.018, "epoch": 10.162639842060731, "learning_rate": 1.1724631124522816e-05, "llm_loss": 0.0, "loss": 0.018, "step": 108100 }, { "action_loss": 0.013, "epoch": 10.167340415530695, "learning_rate": 1.1718118713266651e-05, "llm_loss": 0.0, "loss": 0.013, "step": 108150 }, { "action_loss": 0.0122, "epoch": 10.172040989000658, "learning_rate": 1.1711605551079485e-05, "llm_loss": 0.0, "loss": 0.0122, "step": 108200 }, { "action_loss": 0.009, "epoch": 10.176741562470621, "learning_rate": 1.1705091640807997e-05, "llm_loss": 0.0, "loss": 0.009, "step": 108250 }, { "action_loss": 0.0317, "epoch": 10.181442135940586, "learning_rate": 1.16985769852992e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 108300 }, { "action_loss": 0.0161, "epoch": 10.186142709410548, "learning_rate": 1.1692061587400421e-05, "llm_loss": 0.0, "loss": 0.0161, "step": 108350 }, { "action_loss": 0.0119, "epoch": 10.190843282880511, "learning_rate": 1.168554544995932e-05, "llm_loss": 0.0, "loss": 0.0119, "step": 108400 }, { "action_loss": 0.0233, "epoch": 10.195543856350474, "learning_rate": 1.1679028575823881e-05, "llm_loss": 0.0, "loss": 0.0233, "step": 108450 }, { "action_loss": 0.0307, "epoch": 10.200244429820438, "learning_rate": 1.1672510967842403e-05, "llm_loss": 0.0, "loss": 0.0307, "step": 108500 }, { "action_loss": 0.025, "epoch": 10.204945003290401, "learning_rate": 1.1665992628863516e-05, "llm_loss": 0.0, "loss": 0.025, "step": 108550 }, { "action_loss": 0.0146, "epoch": 10.209645576760364, "learning_rate": 1.1659473561736157e-05, "llm_loss": 0.0, "loss": 0.0146, "step": 108600 }, { "action_loss": 0.0312, "epoch": 10.214346150230329, "learning_rate": 1.1652953769309592e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 108650 }, { "action_loss": 0.0273, "epoch": 10.219046723700291, "learning_rate": 1.1646433254433394e-05, "llm_loss": 0.0, "loss": 0.0273, "step": 108700 }, { "action_loss": 0.0244, "epoch": 10.223747297170254, "learning_rate": 1.163991201995746e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 108750 }, { "action_loss": 0.0216, "epoch": 10.228447870640219, "learning_rate": 1.1633390068732003e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 108800 }, { "action_loss": 0.0223, "epoch": 10.233148444110181, "learning_rate": 1.1626867403607539e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 108850 }, { "action_loss": 0.0147, "epoch": 10.237849017580144, "learning_rate": 1.1620344027434904e-05, "llm_loss": 0.0, "loss": 0.0147, "step": 108900 }, { "action_loss": 0.0228, "epoch": 10.242549591050109, "learning_rate": 1.1613819943065246e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 108950 }, { "action_loss": 0.0188, "epoch": 10.247250164520072, "learning_rate": 1.1607295153350016e-05, "llm_loss": 0.0, "loss": 0.0188, "step": 109000 }, { "action_loss": 0.0279, "epoch": 10.251950737990034, "learning_rate": 1.1600769661140978e-05, "llm_loss": 0.0, "loss": 0.0279, "step": 109050 }, { "action_loss": 0.0129, "epoch": 10.256651311459999, "learning_rate": 1.1594243469290203e-05, "llm_loss": 0.0, "loss": 0.0129, "step": 109100 }, { "action_loss": 0.0264, "epoch": 10.261351884929962, "learning_rate": 1.1587716580650062e-05, "llm_loss": 0.0, "loss": 0.0264, "step": 109150 }, { "action_loss": 0.0409, "epoch": 10.266052458399924, "learning_rate": 1.1581188998073239e-05, "llm_loss": 0.0, "loss": 0.0409, "step": 109200 }, { "action_loss": 0.0302, "epoch": 10.270753031869889, "learning_rate": 1.1574660724412716e-05, "llm_loss": 0.0, "loss": 0.0302, "step": 109250 }, { "action_loss": 0.0198, "epoch": 10.275453605339852, "learning_rate": 1.1568131762521782e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 109300 }, { "action_loss": 0.0317, "epoch": 10.280154178809815, "learning_rate": 1.1561602115254016e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 109350 }, { "action_loss": 0.033, "epoch": 10.284854752279777, "learning_rate": 1.155507178546331e-05, "llm_loss": 0.0, "loss": 0.033, "step": 109400 }, { "action_loss": 0.0239, "epoch": 10.289555325749742, "learning_rate": 1.1548540776003845e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 109450 }, { "action_loss": 0.0221, "epoch": 10.294255899219705, "learning_rate": 1.1542009089730102e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 109500 }, { "action_loss": 0.0244, "epoch": 10.298956472689667, "learning_rate": 1.1535476729496864e-05, "llm_loss": 0.0, "loss": 0.0244, "step": 109550 }, { "action_loss": 0.0295, "epoch": 10.303657046159632, "learning_rate": 1.1528943698159192e-05, "llm_loss": 0.0, "loss": 0.0295, "step": 109600 }, { "action_loss": 0.0272, "epoch": 10.308357619629595, "learning_rate": 1.1522409998572459e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 109650 }, { "action_loss": 0.0293, "epoch": 10.313058193099558, "learning_rate": 1.151587563359232e-05, "llm_loss": 0.0, "loss": 0.0293, "step": 109700 }, { "action_loss": 0.0216, "epoch": 10.317758766569522, "learning_rate": 1.1509340606074721e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 109750 }, { "action_loss": 0.0256, "epoch": 10.322459340039485, "learning_rate": 1.1502804918875899e-05, "llm_loss": 0.0, "loss": 0.0256, "step": 109800 }, { "action_loss": 0.0102, "epoch": 10.327159913509448, "learning_rate": 1.1496268574852382e-05, "llm_loss": 0.0, "loss": 0.0102, "step": 109850 }, { "action_loss": 0.0179, "epoch": 10.331860486979412, "learning_rate": 1.1489731576860982e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 109900 }, { "action_loss": 0.0425, "epoch": 10.336561060449375, "learning_rate": 1.1483193927758795e-05, "llm_loss": 0.0, "loss": 0.0425, "step": 109950 }, { "action_loss": 0.0211, "epoch": 10.341261633919338, "learning_rate": 1.1476655630403209e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 110000 }, { "action_loss": 0.0116, "epoch": 10.345962207389302, "learning_rate": 1.1470116687651888e-05, "llm_loss": 0.0, "loss": 0.0116, "step": 110050 }, { "action_loss": 0.0094, "epoch": 10.350662780859265, "learning_rate": 1.1463577102362782e-05, "llm_loss": 0.0, "loss": 0.0094, "step": 110100 }, { "action_loss": 0.0237, "epoch": 10.355363354329228, "learning_rate": 1.1457036877394114e-05, "llm_loss": 0.0, "loss": 0.0237, "step": 110150 }, { "action_loss": 0.0134, "epoch": 10.360063927799192, "learning_rate": 1.1450496015604399e-05, "llm_loss": 0.0, "loss": 0.0134, "step": 110200 }, { "action_loss": 0.0234, "epoch": 10.364764501269155, "learning_rate": 1.1443954519852423e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 110250 }, { "action_loss": 0.0313, "epoch": 10.369465074739118, "learning_rate": 1.143741239299725e-05, "llm_loss": 0.0, "loss": 0.0313, "step": 110300 }, { "action_loss": 0.0095, "epoch": 10.37416564820908, "learning_rate": 1.1430869637898218e-05, "llm_loss": 0.0, "loss": 0.0095, "step": 110350 }, { "action_loss": 0.027, "epoch": 10.378866221679045, "learning_rate": 1.1424326257414949e-05, "llm_loss": 0.0, "loss": 0.027, "step": 110400 }, { "action_loss": 0.0155, "epoch": 10.383566795149008, "learning_rate": 1.1417782254407323e-05, "llm_loss": 0.0, "loss": 0.0155, "step": 110450 }, { "action_loss": 0.0183, "epoch": 10.38826736861897, "learning_rate": 1.1411237631735505e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 110500 }, { "action_loss": 0.0278, "epoch": 10.392967942088935, "learning_rate": 1.1404692392259924e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 110550 }, { "action_loss": 0.0146, "epoch": 10.397668515558898, "learning_rate": 1.139814653884128e-05, "llm_loss": 0.0, "loss": 0.0146, "step": 110600 }, { "action_loss": 0.0125, "epoch": 10.402369089028861, "learning_rate": 1.1391600074340542e-05, "llm_loss": 0.0, "loss": 0.0125, "step": 110650 }, { "action_loss": 0.0063, "epoch": 10.407069662498825, "learning_rate": 1.1385053001618948e-05, "llm_loss": 0.0, "loss": 0.0063, "step": 110700 }, { "action_loss": 0.0154, "epoch": 10.411770235968788, "learning_rate": 1.1378505323537998e-05, "llm_loss": 0.0, "loss": 0.0154, "step": 110750 }, { "action_loss": 0.0182, "epoch": 10.416470809438751, "learning_rate": 1.1371957042959463e-05, "llm_loss": 0.0, "loss": 0.0182, "step": 110800 }, { "action_loss": 0.0245, "epoch": 10.421171382908716, "learning_rate": 1.1365408162745365e-05, "llm_loss": 0.0, "loss": 0.0245, "step": 110850 }, { "action_loss": 0.0191, "epoch": 10.425871956378678, "learning_rate": 1.1358858685757999e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 110900 }, { "action_loss": 0.0183, "epoch": 10.430572529848641, "learning_rate": 1.1352308614859922e-05, "llm_loss": 0.0, "loss": 0.0183, "step": 110950 }, { "action_loss": 0.0152, "epoch": 10.435273103318606, "learning_rate": 1.134575795291394e-05, "llm_loss": 0.0, "loss": 0.0152, "step": 111000 }, { "action_loss": 0.0213, "epoch": 10.439973676788568, "learning_rate": 1.1339206702783127e-05, "llm_loss": 0.0, "loss": 0.0213, "step": 111050 }, { "action_loss": 0.0184, "epoch": 10.444674250258531, "learning_rate": 1.1332654867330808e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 111100 }, { "action_loss": 0.0187, "epoch": 10.449374823728494, "learning_rate": 1.1326102449420571e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 111150 }, { "action_loss": 0.0223, "epoch": 10.454075397198459, "learning_rate": 1.1319549451916251e-05, "llm_loss": 0.0, "loss": 0.0223, "step": 111200 }, { "action_loss": 0.0124, "epoch": 10.458775970668421, "learning_rate": 1.131299587768194e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 111250 }, { "action_loss": 0.0251, "epoch": 10.463476544138384, "learning_rate": 1.1306441729581978e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 111300 }, { "action_loss": 0.0205, "epoch": 10.468177117608349, "learning_rate": 1.1299887010480965e-05, "llm_loss": 0.0, "loss": 0.0205, "step": 111350 }, { "action_loss": 0.0215, "epoch": 10.472877691078311, "learning_rate": 1.1293331723243746e-05, "llm_loss": 0.0, "loss": 0.0215, "step": 111400 }, { "action_loss": 0.0151, "epoch": 10.477578264548274, "learning_rate": 1.1286775870735404e-05, "llm_loss": 0.0, "loss": 0.0151, "step": 111450 }, { "action_loss": 0.0122, "epoch": 10.482278838018239, "learning_rate": 1.1280219455821285e-05, "llm_loss": 0.0, "loss": 0.0122, "step": 111500 }, { "action_loss": 0.0212, "epoch": 10.486979411488202, "learning_rate": 1.1273662481366972e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 111550 }, { "action_loss": 0.0154, "epoch": 10.491679984958164, "learning_rate": 1.1267104950238296e-05, "llm_loss": 0.0, "loss": 0.0154, "step": 111600 }, { "action_loss": 0.0126, "epoch": 10.496380558428129, "learning_rate": 1.126054686530133e-05, "llm_loss": 0.0, "loss": 0.0126, "step": 111650 }, { "action_loss": 0.0153, "epoch": 10.501081131898092, "learning_rate": 1.1253988229422385e-05, "llm_loss": 0.0, "loss": 0.0153, "step": 111700 }, { "action_loss": 0.0252, "epoch": 10.505781705368054, "learning_rate": 1.1247429045468023e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 111750 }, { "action_loss": 0.0084, "epoch": 10.510482278838019, "learning_rate": 1.1240869316305035e-05, "llm_loss": 0.0, "loss": 0.0084, "step": 111800 }, { "action_loss": 0.0158, "epoch": 10.515182852307982, "learning_rate": 1.1234309044800453e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 111850 }, { "action_loss": 0.0224, "epoch": 10.519883425777945, "learning_rate": 1.1227748233821553e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 111900 }, { "action_loss": 0.0116, "epoch": 10.524583999247909, "learning_rate": 1.1221186886235838e-05, "llm_loss": 0.0, "loss": 0.0116, "step": 111950 }, { "action_loss": 0.023, "epoch": 10.529284572717872, "learning_rate": 1.121462500491105e-05, "llm_loss": 0.0, "loss": 0.023, "step": 112000 }, { "action_loss": 0.0151, "epoch": 10.533985146187835, "learning_rate": 1.120806259271516e-05, "llm_loss": 0.0, "loss": 0.0151, "step": 112050 }, { "action_loss": 0.0312, "epoch": 10.538685719657797, "learning_rate": 1.120149965251638e-05, "llm_loss": 0.0, "loss": 0.0312, "step": 112100 }, { "action_loss": 0.027, "epoch": 10.543386293127762, "learning_rate": 1.1194936187183138e-05, "llm_loss": 0.0, "loss": 0.027, "step": 112150 }, { "action_loss": 0.0291, "epoch": 10.548086866597725, "learning_rate": 1.118837219958411e-05, "llm_loss": 0.0, "loss": 0.0291, "step": 112200 }, { "action_loss": 0.0271, "epoch": 10.552787440067688, "learning_rate": 1.1181807692588186e-05, "llm_loss": 0.0, "loss": 0.0271, "step": 112250 }, { "action_loss": 0.0185, "epoch": 10.557488013537652, "learning_rate": 1.1175242669064487e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 112300 }, { "action_loss": 0.0219, "epoch": 10.562188587007615, "learning_rate": 1.1168677131882368e-05, "llm_loss": 0.0, "loss": 0.0219, "step": 112350 }, { "action_loss": 0.0294, "epoch": 10.566889160477578, "learning_rate": 1.1162111083911393e-05, "llm_loss": 0.0, "loss": 0.0294, "step": 112400 }, { "action_loss": 0.0116, "epoch": 10.571589733947542, "learning_rate": 1.1155544528021358e-05, "llm_loss": 0.0, "loss": 0.0116, "step": 112450 }, { "action_loss": 0.0156, "epoch": 10.576290307417505, "learning_rate": 1.114897746708228e-05, "llm_loss": 0.0, "loss": 0.0156, "step": 112500 }, { "action_loss": 0.031, "epoch": 10.580990880887468, "learning_rate": 1.1142409903964401e-05, "llm_loss": 0.0, "loss": 0.031, "step": 112550 }, { "action_loss": 0.0125, "epoch": 10.585691454357432, "learning_rate": 1.1135841841538175e-05, "llm_loss": 0.0, "loss": 0.0125, "step": 112600 }, { "action_loss": 0.0192, "epoch": 10.590392027827395, "learning_rate": 1.112927328267428e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 112650 }, { "action_loss": 0.0211, "epoch": 10.595092601297358, "learning_rate": 1.112270423024361e-05, "llm_loss": 0.0, "loss": 0.0211, "step": 112700 }, { "action_loss": 0.0089, "epoch": 10.599793174767322, "learning_rate": 1.1116134687117269e-05, "llm_loss": 0.0, "loss": 0.0089, "step": 112750 }, { "action_loss": 0.0318, "epoch": 10.604493748237285, "learning_rate": 1.1109564656166583e-05, "llm_loss": 0.0, "loss": 0.0318, "step": 112800 }, { "action_loss": 0.0314, "epoch": 10.609194321707248, "learning_rate": 1.1102994140263086e-05, "llm_loss": 0.0, "loss": 0.0314, "step": 112850 }, { "action_loss": 0.0215, "epoch": 10.61389489517721, "learning_rate": 1.1096423142278527e-05, "llm_loss": 0.0, "loss": 0.0215, "step": 112900 }, { "action_loss": 0.0158, "epoch": 10.618595468647175, "learning_rate": 1.1089851665084866e-05, "llm_loss": 0.0, "loss": 0.0158, "step": 112950 }, { "action_loss": 0.041, "epoch": 10.623296042117138, "learning_rate": 1.108327971155427e-05, "llm_loss": 0.0, "loss": 0.041, "step": 113000 }, { "action_loss": 0.0218, "epoch": 10.6279966155871, "learning_rate": 1.1076707284559114e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 113050 }, { "action_loss": 0.0151, "epoch": 10.632697189057065, "learning_rate": 1.1070134386971985e-05, "llm_loss": 0.0, "loss": 0.0151, "step": 113100 }, { "action_loss": 0.0253, "epoch": 10.637397762527028, "learning_rate": 1.106356102166567e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 113150 }, { "action_loss": 0.0089, "epoch": 10.642098335996991, "learning_rate": 1.105698719151316e-05, "llm_loss": 0.0, "loss": 0.0089, "step": 113200 }, { "action_loss": 0.0121, "epoch": 10.646798909466956, "learning_rate": 1.1050412899387655e-05, "llm_loss": 0.0, "loss": 0.0121, "step": 113250 }, { "action_loss": 0.0228, "epoch": 10.651499482936918, "learning_rate": 1.1043838148162556e-05, "llm_loss": 0.0, "loss": 0.0228, "step": 113300 }, { "action_loss": 0.0191, "epoch": 10.656200056406881, "learning_rate": 1.1037262940711455e-05, "llm_loss": 0.0, "loss": 0.0191, "step": 113350 }, { "action_loss": 0.0273, "epoch": 10.660900629876846, "learning_rate": 1.1030687279908155e-05, "llm_loss": 0.0, "loss": 0.0273, "step": 113400 }, { "action_loss": 0.019, "epoch": 10.665601203346808, "learning_rate": 1.102411116862665e-05, "llm_loss": 0.0, "loss": 0.019, "step": 113450 }, { "action_loss": 0.0332, "epoch": 10.670301776816771, "learning_rate": 1.101753460974114e-05, "llm_loss": 0.0, "loss": 0.0332, "step": 113500 }, { "action_loss": 0.0099, "epoch": 10.675002350286736, "learning_rate": 1.1010957606126007e-05, "llm_loss": 0.0, "loss": 0.0099, "step": 113550 }, { "action_loss": 0.0352, "epoch": 10.679702923756698, "learning_rate": 1.1004380160655836e-05, "llm_loss": 0.0, "loss": 0.0352, "step": 113600 }, { "action_loss": 0.0328, "epoch": 10.684403497226661, "learning_rate": 1.0997802276205407e-05, "llm_loss": 0.0, "loss": 0.0328, "step": 113650 }, { "action_loss": 0.0226, "epoch": 10.689104070696626, "learning_rate": 1.0991223955649688e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 113700 }, { "action_loss": 0.029, "epoch": 10.693804644166589, "learning_rate": 1.0984645201863838e-05, "llm_loss": 0.0, "loss": 0.029, "step": 113750 }, { "action_loss": 0.0196, "epoch": 10.698505217636551, "learning_rate": 1.0978066017723202e-05, "llm_loss": 0.0, "loss": 0.0196, "step": 113800 }, { "action_loss": 0.0159, "epoch": 10.703205791106516, "learning_rate": 1.0971486406103318e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 113850 }, { "action_loss": 0.0238, "epoch": 10.707906364576479, "learning_rate": 1.0964906369879913e-05, "llm_loss": 0.0, "loss": 0.0238, "step": 113900 }, { "action_loss": 0.0201, "epoch": 10.712606938046441, "learning_rate": 1.0958325911928893e-05, "llm_loss": 0.0, "loss": 0.0201, "step": 113950 }, { "action_loss": 0.035, "epoch": 10.717307511516404, "learning_rate": 1.0951745035126352e-05, "llm_loss": 0.0, "loss": 0.035, "step": 114000 }, { "action_loss": 0.0187, "epoch": 10.722008084986369, "learning_rate": 1.0945163742348566e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 114050 }, { "action_loss": 0.0225, "epoch": 10.726708658456332, "learning_rate": 1.0938582036471995e-05, "llm_loss": 0.0, "loss": 0.0225, "step": 114100 }, { "action_loss": 0.019, "epoch": 10.731409231926294, "learning_rate": 1.0931999920373275e-05, "llm_loss": 0.0, "loss": 0.019, "step": 114150 }, { "action_loss": 0.0252, "epoch": 10.736109805396259, "learning_rate": 1.092541739692923e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 114200 }, { "action_loss": 0.0112, "epoch": 10.740810378866222, "learning_rate": 1.0918834469016848e-05, "llm_loss": 0.0, "loss": 0.0112, "step": 114250 }, { "action_loss": 0.0404, "epoch": 10.745510952336184, "learning_rate": 1.0912251139513307e-05, "llm_loss": 0.0, "loss": 0.0404, "step": 114300 }, { "action_loss": 0.0218, "epoch": 10.750211525806149, "learning_rate": 1.0905667411295954e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 114350 }, { "action_loss": 0.0234, "epoch": 10.754912099276112, "learning_rate": 1.0899083287242314e-05, "llm_loss": 0.0, "loss": 0.0234, "step": 114400 }, { "action_loss": 0.0349, "epoch": 10.759612672746075, "learning_rate": 1.0892498770230077e-05, "llm_loss": 0.0, "loss": 0.0349, "step": 114450 }, { "action_loss": 0.0216, "epoch": 10.76431324621604, "learning_rate": 1.0885913863137116e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 114500 }, { "action_loss": 0.0133, "epoch": 10.769013819686002, "learning_rate": 1.0879328568841467e-05, "llm_loss": 0.0, "loss": 0.0133, "step": 114550 }, { "action_loss": 0.0385, "epoch": 10.773714393155965, "learning_rate": 1.087274289022134e-05, "llm_loss": 0.0, "loss": 0.0385, "step": 114600 }, { "action_loss": 0.015, "epoch": 10.778414966625927, "learning_rate": 1.0866156830155103e-05, "llm_loss": 0.0, "loss": 0.015, "step": 114650 }, { "action_loss": 0.025, "epoch": 10.783115540095892, "learning_rate": 1.0859570391521308e-05, "llm_loss": 0.0, "loss": 0.025, "step": 114700 }, { "action_loss": 0.0289, "epoch": 10.787816113565855, "learning_rate": 1.0852983577198653e-05, "llm_loss": 0.0, "loss": 0.0289, "step": 114750 }, { "action_loss": 0.0345, "epoch": 10.792516687035818, "learning_rate": 1.0846396390066012e-05, "llm_loss": 0.0, "loss": 0.0345, "step": 114800 }, { "action_loss": 0.0155, "epoch": 10.797217260505782, "learning_rate": 1.0839808833002423e-05, "llm_loss": 0.0, "loss": 0.0155, "step": 114850 }, { "action_loss": 0.0195, "epoch": 10.801917833975745, "learning_rate": 1.0833220908887079e-05, "llm_loss": 0.0, "loss": 0.0195, "step": 114900 }, { "action_loss": 0.0273, "epoch": 10.806618407445708, "learning_rate": 1.0826632620599338e-05, "llm_loss": 0.0, "loss": 0.0273, "step": 114950 }, { "action_loss": 0.0224, "epoch": 10.811318980915672, "learning_rate": 1.0820043971018717e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 115000 }, { "action_loss": 0.0248, "epoch": 10.816019554385635, "learning_rate": 1.0813454963024885e-05, "llm_loss": 0.0, "loss": 0.0248, "step": 115050 }, { "action_loss": 0.0184, "epoch": 10.820720127855598, "learning_rate": 1.0806865599497676e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 115100 }, { "action_loss": 0.0237, "epoch": 10.825420701325562, "learning_rate": 1.0800275883317078e-05, "llm_loss": 0.0, "loss": 0.0237, "step": 115150 }, { "action_loss": 0.0119, "epoch": 10.830121274795525, "learning_rate": 1.0793685817363225e-05, "llm_loss": 0.0, "loss": 0.0119, "step": 115200 }, { "action_loss": 0.0154, "epoch": 10.834821848265488, "learning_rate": 1.0787095404516412e-05, "llm_loss": 0.0, "loss": 0.0154, "step": 115250 }, { "action_loss": 0.0224, "epoch": 10.839522421735452, "learning_rate": 1.0780504647657084e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 115300 }, { "action_loss": 0.0218, "epoch": 10.844222995205415, "learning_rate": 1.0773913549665835e-05, "llm_loss": 0.0, "loss": 0.0218, "step": 115350 }, { "action_loss": 0.0237, "epoch": 10.848923568675378, "learning_rate": 1.0767322113423413e-05, "llm_loss": 0.0, "loss": 0.0237, "step": 115400 }, { "action_loss": 0.0124, "epoch": 10.853624142145343, "learning_rate": 1.0760730341810704e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 115450 }, { "action_loss": 0.0212, "epoch": 10.858324715615305, "learning_rate": 1.0754138237708746e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 115500 }, { "action_loss": 0.0179, "epoch": 10.863025289085268, "learning_rate": 1.0747545803998727e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 115550 }, { "action_loss": 0.0178, "epoch": 10.867725862555233, "learning_rate": 1.0740953043561975e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 115600 }, { "action_loss": 0.0278, "epoch": 10.872426436025195, "learning_rate": 1.0734359959279954e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 115650 }, { "action_loss": 0.0192, "epoch": 10.877127009495158, "learning_rate": 1.072776655403428e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 115700 }, { "action_loss": 0.0311, "epoch": 10.881827582965121, "learning_rate": 1.0721172830706706e-05, "llm_loss": 0.0, "loss": 0.0311, "step": 115750 }, { "action_loss": 0.0305, "epoch": 10.886528156435086, "learning_rate": 1.0714578792179123e-05, "llm_loss": 0.0, "loss": 0.0305, "step": 115800 }, { "action_loss": 0.0193, "epoch": 10.891228729905048, "learning_rate": 1.0707984441333557e-05, "llm_loss": 0.0, "loss": 0.0193, "step": 115850 }, { "action_loss": 0.0142, "epoch": 10.895929303375011, "learning_rate": 1.0701389781052175e-05, "llm_loss": 0.0, "loss": 0.0142, "step": 115900 }, { "action_loss": 0.0302, "epoch": 10.900629876844976, "learning_rate": 1.069479481421728e-05, "llm_loss": 0.0, "loss": 0.0302, "step": 115950 }, { "action_loss": 0.0324, "epoch": 10.905330450314938, "learning_rate": 1.0688199543711304e-05, "llm_loss": 0.0, "loss": 0.0324, "step": 116000 }, { "action_loss": 0.0179, "epoch": 10.910031023784901, "learning_rate": 1.0681603972416816e-05, "llm_loss": 0.0, "loss": 0.0179, "step": 116050 }, { "action_loss": 0.0353, "epoch": 10.914731597254866, "learning_rate": 1.0675008103216515e-05, "llm_loss": 0.0, "loss": 0.0353, "step": 116100 }, { "action_loss": 0.0159, "epoch": 10.919432170724829, "learning_rate": 1.0668411938993228e-05, "llm_loss": 0.0, "loss": 0.0159, "step": 116150 }, { "action_loss": 0.0209, "epoch": 10.924132744194791, "learning_rate": 1.0661815482629918e-05, "llm_loss": 0.0, "loss": 0.0209, "step": 116200 }, { "action_loss": 0.028, "epoch": 10.928833317664756, "learning_rate": 1.0655218737009664e-05, "llm_loss": 0.0, "loss": 0.028, "step": 116250 }, { "action_loss": 0.0317, "epoch": 10.933533891134719, "learning_rate": 1.0648621705015682e-05, "llm_loss": 0.0, "loss": 0.0317, "step": 116300 }, { "action_loss": 0.0334, "epoch": 10.938234464604681, "learning_rate": 1.064202438953131e-05, "llm_loss": 0.0, "loss": 0.0334, "step": 116350 }, { "action_loss": 0.0118, "epoch": 10.942935038074644, "learning_rate": 1.0635426793440006e-05, "llm_loss": 0.0, "loss": 0.0118, "step": 116400 }, { "action_loss": 0.0166, "epoch": 10.947635611544609, "learning_rate": 1.0628828919625355e-05, "llm_loss": 0.0, "loss": 0.0166, "step": 116450 }, { "action_loss": 0.0311, "epoch": 10.952336185014572, "learning_rate": 1.062223077097107e-05, "llm_loss": 0.0, "loss": 0.0311, "step": 116500 }, { "action_loss": 0.0192, "epoch": 10.957036758484534, "learning_rate": 1.0615632350360965e-05, "llm_loss": 0.0, "loss": 0.0192, "step": 116550 }, { "action_loss": 0.009, "epoch": 10.961737331954499, "learning_rate": 1.0609033660678992e-05, "llm_loss": 0.0, "loss": 0.009, "step": 116600 }, { "action_loss": 0.0185, "epoch": 10.966437905424462, "learning_rate": 1.0602434704809205e-05, "llm_loss": 0.0, "loss": 0.0185, "step": 116650 }, { "action_loss": 0.0081, "epoch": 10.971138478894424, "learning_rate": 1.0595835485635788e-05, "llm_loss": 0.0, "loss": 0.0081, "step": 116700 }, { "action_loss": 0.034, "epoch": 10.975839052364389, "learning_rate": 1.0589236006043033e-05, "llm_loss": 0.0, "loss": 0.034, "step": 116750 }, { "action_loss": 0.0091, "epoch": 10.980539625834352, "learning_rate": 1.0582636268915349e-05, "llm_loss": 0.0, "loss": 0.0091, "step": 116800 }, { "action_loss": 0.0168, "epoch": 10.985240199304315, "learning_rate": 1.0576036277137254e-05, "llm_loss": 0.0, "loss": 0.0168, "step": 116850 }, { "action_loss": 0.0177, "epoch": 10.989940772774279, "learning_rate": 1.0569436033593378e-05, "llm_loss": 0.0, "loss": 0.0177, "step": 116900 }, { "action_loss": 0.0209, "epoch": 10.994641346244242, "learning_rate": 1.0562835541168466e-05, "llm_loss": 0.0, "loss": 0.0209, "step": 116950 }, { "action_loss": 0.0217, "epoch": 10.999341919714205, "learning_rate": 1.0556234802747363e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 117000 }, { "action_loss": 0.0221, "epoch": 11.00404249318417, "learning_rate": 1.0549633821215026e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 117050 }, { "action_loss": 0.0198, "epoch": 11.008743066654132, "learning_rate": 1.0543032599456523e-05, "llm_loss": 0.0, "loss": 0.0198, "step": 117100 }, { "action_loss": 0.0247, "epoch": 11.013443640124095, "learning_rate": 1.0536431140357018e-05, "llm_loss": 0.0, "loss": 0.0247, "step": 117150 }, { "action_loss": 0.0252, "epoch": 11.01814421359406, "learning_rate": 1.0529829446801785e-05, "llm_loss": 0.0, "loss": 0.0252, "step": 117200 }, { "action_loss": 0.0188, "epoch": 11.022844787064022, "learning_rate": 1.05232275216762e-05, "llm_loss": 0.0, "loss": 0.0188, "step": 117250 }, { "action_loss": 0.0308, "epoch": 11.027545360533985, "learning_rate": 1.051662536786574e-05, "llm_loss": 0.0, "loss": 0.0308, "step": 117300 }, { "action_loss": 0.0222, "epoch": 11.03224593400395, "learning_rate": 1.0510022988255979e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 117350 }, { "action_loss": 0.026, "epoch": 11.036946507473912, "learning_rate": 1.0503420385732588e-05, "llm_loss": 0.0, "loss": 0.026, "step": 117400 }, { "action_loss": 0.0226, "epoch": 11.041647080943875, "learning_rate": 1.0496817563181347e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 117450 }, { "action_loss": 0.0129, "epoch": 11.046347654413838, "learning_rate": 1.0490214523488118e-05, "llm_loss": 0.0, "loss": 0.0129, "step": 117500 }, { "action_loss": 0.0319, "epoch": 11.051048227883802, "learning_rate": 1.0483611269538865e-05, "llm_loss": 0.0, "loss": 0.0319, "step": 117550 }, { "action_loss": 0.0149, "epoch": 11.055748801353765, "learning_rate": 1.0477007804219646e-05, "llm_loss": 0.0, "loss": 0.0149, "step": 117600 }, { "action_loss": 0.0222, "epoch": 11.060449374823728, "learning_rate": 1.0470404130416609e-05, "llm_loss": 0.0, "loss": 0.0222, "step": 117650 }, { "action_loss": 0.0124, "epoch": 11.065149948293692, "learning_rate": 1.0463800251016e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 117700 }, { "action_loss": 0.0084, "epoch": 11.069850521763655, "learning_rate": 1.0457196168904137e-05, "llm_loss": 0.0, "loss": 0.0084, "step": 117750 }, { "action_loss": 0.0253, "epoch": 11.074551095233618, "learning_rate": 1.0450591886967449e-05, "llm_loss": 0.0, "loss": 0.0253, "step": 117800 }, { "action_loss": 0.0227, "epoch": 11.079251668703582, "learning_rate": 1.0443987408092437e-05, "llm_loss": 0.0, "loss": 0.0227, "step": 117850 }, { "action_loss": 0.0155, "epoch": 11.083952242173545, "learning_rate": 1.0437382735165699e-05, "llm_loss": 0.0, "loss": 0.0155, "step": 117900 }, { "action_loss": 0.0212, "epoch": 11.088652815643508, "learning_rate": 1.0430777871073908e-05, "llm_loss": 0.0, "loss": 0.0212, "step": 117950 }, { "action_loss": 0.0186, "epoch": 11.093353389113473, "learning_rate": 1.042417281870382e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 118000 }, { "action_loss": 0.0307, "epoch": 11.098053962583435, "learning_rate": 1.0417567580942285e-05, "llm_loss": 0.0, "loss": 0.0307, "step": 118050 }, { "action_loss": 0.0296, "epoch": 11.102754536053398, "learning_rate": 1.0410962160676223e-05, "llm_loss": 0.0, "loss": 0.0296, "step": 118100 }, { "action_loss": 0.0096, "epoch": 11.107455109523363, "learning_rate": 1.040435656079264e-05, "llm_loss": 0.0, "loss": 0.0096, "step": 118150 }, { "action_loss": 0.0352, "epoch": 11.112155682993325, "learning_rate": 1.0397750784178616e-05, "llm_loss": 0.0, "loss": 0.0352, "step": 118200 }, { "action_loss": 0.0243, "epoch": 11.116856256463288, "learning_rate": 1.039114483372131e-05, "llm_loss": 0.0, "loss": 0.0243, "step": 118250 }, { "action_loss": 0.0278, "epoch": 11.121556829933251, "learning_rate": 1.0384538712307957e-05, "llm_loss": 0.0, "loss": 0.0278, "step": 118300 }, { "action_loss": 0.0226, "epoch": 11.126257403403216, "learning_rate": 1.037793242282587e-05, "llm_loss": 0.0, "loss": 0.0226, "step": 118350 }, { "action_loss": 0.0382, "epoch": 11.130957976873178, "learning_rate": 1.0371325968162432e-05, "llm_loss": 0.0, "loss": 0.0382, "step": 118400 }, { "action_loss": 0.022, "epoch": 11.135658550343141, "learning_rate": 1.0364719351205094e-05, "llm_loss": 0.0, "loss": 0.022, "step": 118450 }, { "action_loss": 0.0186, "epoch": 11.140359123813106, "learning_rate": 1.0358112574841391e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 118500 }, { "action_loss": 0.0187, "epoch": 11.145059697283068, "learning_rate": 1.035150564195891e-05, "llm_loss": 0.0, "loss": 0.0187, "step": 118550 }, { "action_loss": 0.0217, "epoch": 11.149760270753031, "learning_rate": 1.0344898555445319e-05, "llm_loss": 0.0, "loss": 0.0217, "step": 118600 }, { "action_loss": 0.0144, "epoch": 11.154460844222996, "learning_rate": 1.0338291318188355e-05, "llm_loss": 0.0, "loss": 0.0144, "step": 118650 }, { "action_loss": 0.0143, "epoch": 11.159161417692959, "learning_rate": 1.0331683933075808e-05, "llm_loss": 0.0, "loss": 0.0143, "step": 118700 }, { "action_loss": 0.0186, "epoch": 11.163861991162921, "learning_rate": 1.0325076402995543e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 118750 }, { "action_loss": 0.0175, "epoch": 11.168562564632886, "learning_rate": 1.031846873083549e-05, "llm_loss": 0.0, "loss": 0.0175, "step": 118800 }, { "action_loss": 0.0124, "epoch": 11.173263138102849, "learning_rate": 1.0311860919483633e-05, "llm_loss": 0.0, "loss": 0.0124, "step": 118850 }, { "action_loss": 0.0331, "epoch": 11.177963711572811, "learning_rate": 1.0305252971828022e-05, "llm_loss": 0.0, "loss": 0.0331, "step": 118900 }, { "action_loss": 0.0329, "epoch": 11.182664285042776, "learning_rate": 1.029864489075676e-05, "llm_loss": 0.0, "loss": 0.0329, "step": 118950 }, { "action_loss": 0.0188, "epoch": 11.187364858512739, "learning_rate": 1.029203667915802e-05, "llm_loss": 0.0, "loss": 0.0188, "step": 119000 }, { "action_loss": 0.0351, "epoch": 11.192065431982702, "learning_rate": 1.028542833992002e-05, "llm_loss": 0.0, "loss": 0.0351, "step": 119050 }, { "action_loss": 0.0235, "epoch": 11.196766005452666, "learning_rate": 1.0278819875931046e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 119100 }, { "action_loss": 0.0239, "epoch": 11.201466578922629, "learning_rate": 1.0272211290079426e-05, "llm_loss": 0.0, "loss": 0.0239, "step": 119150 }, { "action_loss": 0.0132, "epoch": 11.206167152392592, "learning_rate": 1.0265602585253552e-05, "llm_loss": 0.0, "loss": 0.0132, "step": 119200 }, { "action_loss": 0.0235, "epoch": 11.210867725862554, "learning_rate": 1.0258993764341861e-05, "llm_loss": 0.0, "loss": 0.0235, "step": 119250 }, { "action_loss": 0.0159, "epoch": 11.215568299332519, "learning_rate": 1.025238483023284e-05, "llm_loss": 0.0014, "loss": 0.0173, "step": 119300 }, { "action_loss": 0.0346, "epoch": 11.220268872802482, "learning_rate": 1.0245775785815033e-05, "llm_loss": 0.0, "loss": 0.0346, "step": 119350 }, { "action_loss": 0.0224, "epoch": 11.224969446272445, "learning_rate": 1.0239166633977023e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 119400 }, { "action_loss": 0.0246, "epoch": 11.229670019742409, "learning_rate": 1.0232557377607449e-05, "llm_loss": 0.0, "loss": 0.0246, "step": 119450 }, { "action_loss": 0.0066, "epoch": 11.234370593212372, "learning_rate": 1.0225948019594987e-05, "llm_loss": 0.0, "loss": 0.0066, "step": 119500 }, { "action_loss": 0.0197, "epoch": 11.239071166682335, "learning_rate": 1.0219338562828365e-05, "llm_loss": 0.0, "loss": 0.0197, "step": 119550 }, { "action_loss": 0.0137, "epoch": 11.2437717401523, "learning_rate": 1.021272901019635e-05, "llm_loss": 0.0, "loss": 0.0137, "step": 119600 }, { "action_loss": 0.0154, "epoch": 11.248472313622262, "learning_rate": 1.020611936458775e-05, "llm_loss": 0.0, "loss": 0.0154, "step": 119650 }, { "action_loss": 0.0215, "epoch": 11.253172887092225, "learning_rate": 1.0199509628891417e-05, "llm_loss": 0.0, "loss": 0.0215, "step": 119700 }, { "action_loss": 0.0145, "epoch": 11.25787346056219, "learning_rate": 1.0192899805996241e-05, "llm_loss": 0.0, "loss": 0.0145, "step": 119750 }, { "action_loss": 0.0343, "epoch": 11.262574034032152, "learning_rate": 1.018628989879115e-05, "llm_loss": 0.0, "loss": 0.0343, "step": 119800 }, { "action_loss": 0.0216, "epoch": 11.267274607502115, "learning_rate": 1.0179679910165106e-05, "llm_loss": 0.0, "loss": 0.0216, "step": 119850 }, { "action_loss": 0.0136, "epoch": 11.27197518097208, "learning_rate": 1.0173069843007111e-05, "llm_loss": 0.0, "loss": 0.0136, "step": 119900 }, { "action_loss": 0.0356, "epoch": 11.276675754442042, "learning_rate": 1.0166459700206199e-05, "llm_loss": 0.0, "loss": 0.0356, "step": 119950 }, { "action_loss": 0.0181, "epoch": 11.281376327912005, "learning_rate": 1.015984948465144e-05, "llm_loss": 0.0, "loss": 0.0181, "step": 120000 }, { "action_loss": 0.0221, "epoch": 11.286076901381968, "learning_rate": 1.015323919923193e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 120050 }, { "action_loss": 0.021, "epoch": 11.290777474851932, "learning_rate": 1.0146628846836798e-05, "llm_loss": 0.0, "loss": 0.021, "step": 120100 }, { "action_loss": 0.0105, "epoch": 11.295478048321895, "learning_rate": 1.014001843035521e-05, "llm_loss": 0.0, "loss": 0.0105, "step": 120150 }, { "action_loss": 0.0325, "epoch": 11.300178621791858, "learning_rate": 1.0133407952676346e-05, "llm_loss": 0.0, "loss": 0.0325, "step": 120200 }, { "action_loss": 0.0352, "epoch": 11.304879195261822, "learning_rate": 1.0126797416689426e-05, "llm_loss": 0.0, "loss": 0.0352, "step": 120250 }, { "action_loss": 0.0311, "epoch": 11.309579768731785, "learning_rate": 1.0120186825283684e-05, "llm_loss": 0.0, "loss": 0.0311, "step": 120300 }, { "action_loss": 0.0221, "epoch": 11.314280342201748, "learning_rate": 1.0113576181348387e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 120350 }, { "action_loss": 0.0182, "epoch": 11.318980915671712, "learning_rate": 1.0106965487772824e-05, "llm_loss": 0.0, "loss": 0.0182, "step": 120400 }, { "action_loss": 0.0184, "epoch": 11.323681489141675, "learning_rate": 1.0100354747446294e-05, "llm_loss": 0.0, "loss": 0.0184, "step": 120450 }, { "action_loss": 0.0272, "epoch": 11.328382062611638, "learning_rate": 1.0093743963258135e-05, "llm_loss": 0.0, "loss": 0.0272, "step": 120500 }, { "action_loss": 0.0221, "epoch": 11.333082636081603, "learning_rate": 1.0087133138097691e-05, "llm_loss": 0.0, "loss": 0.0221, "step": 120550 }, { "action_loss": 0.0293, "epoch": 11.337783209551565, "learning_rate": 1.0080522274854329e-05, "llm_loss": 0.0, "loss": 0.0293, "step": 120600 }, { "action_loss": 0.0262, "epoch": 11.342483783021528, "learning_rate": 1.007391137641743e-05, "llm_loss": 0.0, "loss": 0.0262, "step": 120650 }, { "action_loss": 0.0251, "epoch": 11.347184356491493, "learning_rate": 1.0067300445676398e-05, "llm_loss": 0.0, "loss": 0.0251, "step": 120700 }, { "action_loss": 0.0229, "epoch": 11.351884929961455, "learning_rate": 1.0060689485520639e-05, "llm_loss": 0.0, "loss": 0.0229, "step": 120750 }, { "action_loss": 0.0281, "epoch": 11.356585503431418, "learning_rate": 1.0054078498839574e-05, "llm_loss": 0.0, "loss": 0.0281, "step": 120800 }, { "action_loss": 0.0186, "epoch": 11.361286076901383, "learning_rate": 1.0047467488522647e-05, "llm_loss": 0.0, "loss": 0.0186, "step": 120850 }, { "action_loss": 0.0128, "epoch": 11.365986650371346, "learning_rate": 1.00408564574593e-05, "llm_loss": 0.0, "loss": 0.0128, "step": 120900 }, { "action_loss": 0.0325, "epoch": 11.370687223841308, "learning_rate": 1.0034245408538994e-05, "llm_loss": 0.0, "loss": 0.0325, "step": 120950 }, { "action_loss": 0.0173, "epoch": 11.375387797311271, "learning_rate": 1.002763434465119e-05, "llm_loss": 0.0, "loss": 0.0173, "step": 121000 }, { "action_loss": 0.0255, "epoch": 11.380088370781236, "learning_rate": 1.0021023268685355e-05, "llm_loss": 0.0, "loss": 0.0255, "step": 121050 }, { "action_loss": 0.0178, "epoch": 11.384788944251198, "learning_rate": 1.001441218353097e-05, "llm_loss": 0.0, "loss": 0.0178, "step": 121100 }, { "action_loss": 0.0303, "epoch": 11.389489517721161, "learning_rate": 1.0007801092077513e-05, "llm_loss": 0.0, "loss": 0.0303, "step": 121150 }, { "action_loss": 0.0224, "epoch": 11.394190091191126, "learning_rate": 1.0001189997214462e-05, "llm_loss": 0.0, "loss": 0.0224, "step": 121200 }, { "action_loss": 0.0126, "epoch": 11.398890664661089, "learning_rate": 9.994578901831304e-06, "llm_loss": 0.0, "loss": 0.0126, "step": 121250 }, { "action_loss": 0.0228, "epoch": 11.403591238131051, "learning_rate": 9.987967808817522e-06, "llm_loss": 0.0, "loss": 0.0228, "step": 121300 }, { "action_loss": 0.0173, "epoch": 11.408291811601016, "learning_rate": 9.981356721062602e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 121350 }, { "action_loss": 0.0244, "epoch": 11.412992385070979, "learning_rate": 9.974745641456026e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 121400 }, { "action_loss": 0.0276, "epoch": 11.417692958540941, "learning_rate": 9.968134572887263e-06, "llm_loss": 0.0, "loss": 0.0276, "step": 121450 }, { "action_loss": 0.0185, "epoch": 11.422393532010906, "learning_rate": 9.96152351824579e-06, "llm_loss": 0.0, "loss": 0.0185, "step": 121500 }, { "action_loss": 0.0183, "epoch": 11.427094105480869, "learning_rate": 9.954912480421073e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 121550 }, { "action_loss": 0.0221, "epoch": 11.431794678950832, "learning_rate": 9.948301462302572e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 121600 }, { "action_loss": 0.0153, "epoch": 11.436495252420796, "learning_rate": 9.941690466779734e-06, "llm_loss": 0.0, "loss": 0.0153, "step": 121650 }, { "action_loss": 0.0249, "epoch": 11.441195825890759, "learning_rate": 9.935079496742001e-06, "llm_loss": 0.0, "loss": 0.0249, "step": 121700 }, { "action_loss": 0.0183, "epoch": 11.445896399360722, "learning_rate": 9.928468555078802e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 121750 }, { "action_loss": 0.028, "epoch": 11.450596972830684, "learning_rate": 9.921857644679556e-06, "llm_loss": 0.0, "loss": 0.028, "step": 121800 }, { "action_loss": 0.0143, "epoch": 11.455297546300649, "learning_rate": 9.915246768433662e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 121850 }, { "action_loss": 0.0322, "epoch": 11.459998119770612, "learning_rate": 9.908635929230515e-06, "llm_loss": 0.0, "loss": 0.0322, "step": 121900 }, { "action_loss": 0.0317, "epoch": 11.464698693240575, "learning_rate": 9.902025129959478e-06, "llm_loss": 0.0, "loss": 0.0317, "step": 121950 }, { "action_loss": 0.0226, "epoch": 11.469399266710539, "learning_rate": 9.895414373509912e-06, "llm_loss": 0.0, "loss": 0.0226, "step": 122000 }, { "action_loss": 0.0084, "epoch": 11.474099840180502, "learning_rate": 9.888803662771149e-06, "llm_loss": 0.0, "loss": 0.0084, "step": 122050 }, { "action_loss": 0.0339, "epoch": 11.478800413650465, "learning_rate": 9.882193000632507e-06, "llm_loss": 0.0, "loss": 0.0339, "step": 122100 }, { "action_loss": 0.0163, "epoch": 11.48350098712043, "learning_rate": 9.875582389983281e-06, "llm_loss": 0.0, "loss": 0.0163, "step": 122150 }, { "action_loss": 0.0205, "epoch": 11.488201560590392, "learning_rate": 9.868971833712745e-06, "llm_loss": 0.0, "loss": 0.0205, "step": 122200 }, { "action_loss": 0.0172, "epoch": 11.492902134060355, "learning_rate": 9.862361334710142e-06, "llm_loss": 0.0, "loss": 0.0172, "step": 122250 }, { "action_loss": 0.0147, "epoch": 11.49760270753032, "learning_rate": 9.855750895864698e-06, "llm_loss": 0.0, "loss": 0.0147, "step": 122300 }, { "action_loss": 0.0244, "epoch": 11.502303281000282, "learning_rate": 9.849140520065614e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 122350 }, { "action_loss": 0.0249, "epoch": 11.507003854470245, "learning_rate": 9.84253021020205e-06, "llm_loss": 0.0, "loss": 0.0249, "step": 122400 }, { "action_loss": 0.0221, "epoch": 11.51170442794021, "learning_rate": 9.835919969163154e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 122450 }, { "action_loss": 0.0085, "epoch": 11.516405001410172, "learning_rate": 9.829309799838035e-06, "llm_loss": 0.0, "loss": 0.0085, "step": 122500 }, { "action_loss": 0.0185, "epoch": 11.521105574880135, "learning_rate": 9.822699705115771e-06, "llm_loss": 0.0, "loss": 0.0185, "step": 122550 }, { "action_loss": 0.0276, "epoch": 11.5258061483501, "learning_rate": 9.81608968788541e-06, "llm_loss": 0.0, "loss": 0.0276, "step": 122600 }, { "action_loss": 0.0191, "epoch": 11.530506721820062, "learning_rate": 9.809479751035966e-06, "llm_loss": 0.0, "loss": 0.0191, "step": 122650 }, { "action_loss": 0.0221, "epoch": 11.535207295290025, "learning_rate": 9.802869897456413e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 122700 }, { "action_loss": 0.0316, "epoch": 11.53990786875999, "learning_rate": 9.796260130035694e-06, "llm_loss": 0.0, "loss": 0.0316, "step": 122750 }, { "action_loss": 0.0155, "epoch": 11.544608442229952, "learning_rate": 9.789650451662713e-06, "llm_loss": 0.0, "loss": 0.0155, "step": 122800 }, { "action_loss": 0.0087, "epoch": 11.549309015699915, "learning_rate": 9.783040865226333e-06, "llm_loss": 0.0, "loss": 0.0087, "step": 122850 }, { "action_loss": 0.0108, "epoch": 11.554009589169878, "learning_rate": 9.77643137361538e-06, "llm_loss": 0.0, "loss": 0.0108, "step": 122900 }, { "action_loss": 0.015, "epoch": 11.558710162639843, "learning_rate": 9.769821979718639e-06, "llm_loss": 0.0, "loss": 0.015, "step": 122950 }, { "action_loss": 0.0315, "epoch": 11.563410736109805, "learning_rate": 9.763212686424848e-06, "llm_loss": 0.0, "loss": 0.0315, "step": 123000 }, { "action_loss": 0.0202, "epoch": 11.568111309579768, "learning_rate": 9.756603496622702e-06, "llm_loss": 0.0, "loss": 0.0202, "step": 123050 }, { "action_loss": 0.0087, "epoch": 11.572811883049733, "learning_rate": 9.749994413200854e-06, "llm_loss": 0.0, "loss": 0.0087, "step": 123100 }, { "action_loss": 0.0176, "epoch": 11.577512456519695, "learning_rate": 9.743385439047906e-06, "llm_loss": 0.0, "loss": 0.0176, "step": 123150 }, { "action_loss": 0.0248, "epoch": 11.582213029989658, "learning_rate": 9.73677657705242e-06, "llm_loss": 0.0, "loss": 0.0248, "step": 123200 }, { "action_loss": 0.0156, "epoch": 11.586913603459623, "learning_rate": 9.730167830102904e-06, "llm_loss": 0.0, "loss": 0.0156, "step": 123250 }, { "action_loss": 0.0202, "epoch": 11.591614176929586, "learning_rate": 9.723559201087806e-06, "llm_loss": 0.0, "loss": 0.0202, "step": 123300 }, { "action_loss": 0.019, "epoch": 11.596314750399548, "learning_rate": 9.71695069289554e-06, "llm_loss": 0.0, "loss": 0.019, "step": 123350 }, { "action_loss": 0.0268, "epoch": 11.601015323869513, "learning_rate": 9.710342308414458e-06, "llm_loss": 0.0, "loss": 0.0268, "step": 123400 }, { "action_loss": 0.0174, "epoch": 11.605715897339476, "learning_rate": 9.70373405053286e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 123450 }, { "action_loss": 0.0156, "epoch": 11.610416470809438, "learning_rate": 9.697125922138987e-06, "llm_loss": 0.0, "loss": 0.0156, "step": 123500 }, { "action_loss": 0.0408, "epoch": 11.615117044279401, "learning_rate": 9.690517926121026e-06, "llm_loss": 0.0, "loss": 0.0408, "step": 123550 }, { "action_loss": 0.0287, "epoch": 11.619817617749366, "learning_rate": 9.68391006536711e-06, "llm_loss": 0.0, "loss": 0.0287, "step": 123600 }, { "action_loss": 0.0279, "epoch": 11.624518191219329, "learning_rate": 9.677302342765306e-06, "llm_loss": 0.0, "loss": 0.0279, "step": 123650 }, { "action_loss": 0.0246, "epoch": 11.629218764689291, "learning_rate": 9.670694761203625e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 123700 }, { "action_loss": 0.0155, "epoch": 11.633919338159256, "learning_rate": 9.664087323570015e-06, "llm_loss": 0.0, "loss": 0.0155, "step": 123750 }, { "action_loss": 0.0218, "epoch": 11.638619911629219, "learning_rate": 9.657480032752364e-06, "llm_loss": 0.0, "loss": 0.0218, "step": 123800 }, { "action_loss": 0.0118, "epoch": 11.643320485099181, "learning_rate": 9.65087289163849e-06, "llm_loss": 0.0, "loss": 0.0118, "step": 123850 }, { "action_loss": 0.0181, "epoch": 11.648021058569146, "learning_rate": 9.644265903116147e-06, "llm_loss": 0.0, "loss": 0.0181, "step": 123900 }, { "action_loss": 0.031, "epoch": 11.652721632039109, "learning_rate": 9.637659070073027e-06, "llm_loss": 0.0, "loss": 0.031, "step": 123950 }, { "action_loss": 0.0173, "epoch": 11.657422205509071, "learning_rate": 9.63105239539675e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 124000 }, { "action_loss": 0.0207, "epoch": 11.662122778979036, "learning_rate": 9.62444588197487e-06, "llm_loss": 0.0, "loss": 0.0207, "step": 124050 }, { "action_loss": 0.0161, "epoch": 11.666823352448999, "learning_rate": 9.617839532694866e-06, "llm_loss": 0.0, "loss": 0.0161, "step": 124100 }, { "action_loss": 0.018, "epoch": 11.671523925918962, "learning_rate": 9.611233350444148e-06, "llm_loss": 0.0, "loss": 0.018, "step": 124150 }, { "action_loss": 0.0286, "epoch": 11.676224499388926, "learning_rate": 9.604627338110053e-06, "llm_loss": 0.0, "loss": 0.0286, "step": 124200 }, { "action_loss": 0.0158, "epoch": 11.680925072858889, "learning_rate": 9.598021498579847e-06, "llm_loss": 0.0, "loss": 0.0158, "step": 124250 }, { "action_loss": 0.0234, "epoch": 11.685625646328852, "learning_rate": 9.591415834740706e-06, "llm_loss": 0.0, "loss": 0.0234, "step": 124300 }, { "action_loss": 0.0271, "epoch": 11.690326219798816, "learning_rate": 9.584810349479749e-06, "llm_loss": 0.0, "loss": 0.0271, "step": 124350 }, { "action_loss": 0.0178, "epoch": 11.695026793268779, "learning_rate": 9.578205045684007e-06, "llm_loss": 0.0, "loss": 0.0178, "step": 124400 }, { "action_loss": 0.022, "epoch": 11.699727366738742, "learning_rate": 9.571599926240429e-06, "llm_loss": 0.0, "loss": 0.022, "step": 124450 }, { "action_loss": 0.0172, "epoch": 11.704427940208706, "learning_rate": 9.56499499403589e-06, "llm_loss": 0.0, "loss": 0.0172, "step": 124500 }, { "action_loss": 0.0183, "epoch": 11.70912851367867, "learning_rate": 9.558390251957183e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 124550 }, { "action_loss": 0.0194, "epoch": 11.713829087148632, "learning_rate": 9.55178570289101e-06, "llm_loss": 0.0, "loss": 0.0194, "step": 124600 }, { "action_loss": 0.0376, "epoch": 11.718529660618595, "learning_rate": 9.545181349723996e-06, "llm_loss": 0.0, "loss": 0.0376, "step": 124650 }, { "action_loss": 0.0216, "epoch": 11.72323023408856, "learning_rate": 9.538577195342678e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 124700 }, { "action_loss": 0.0172, "epoch": 11.727930807558522, "learning_rate": 9.531973242633505e-06, "llm_loss": 0.0, "loss": 0.0172, "step": 124750 }, { "action_loss": 0.0249, "epoch": 11.732631381028485, "learning_rate": 9.525369494482843e-06, "llm_loss": 0.0, "loss": 0.0249, "step": 124800 }, { "action_loss": 0.0345, "epoch": 11.73733195449845, "learning_rate": 9.51876595377696e-06, "llm_loss": 0.0, "loss": 0.0345, "step": 124850 }, { "action_loss": 0.0151, "epoch": 11.742032527968412, "learning_rate": 9.512162623402042e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 124900 }, { "action_loss": 0.0116, "epoch": 11.746733101438375, "learning_rate": 9.505559506244176e-06, "llm_loss": 0.0, "loss": 0.0116, "step": 124950 }, { "action_loss": 0.0278, "epoch": 11.75143367490834, "learning_rate": 9.49895660518936e-06, "llm_loss": 0.0, "loss": 0.0278, "step": 125000 }, { "action_loss": 0.0222, "epoch": 11.756134248378302, "learning_rate": 9.492353923123498e-06, "llm_loss": 0.0, "loss": 0.0222, "step": 125050 }, { "action_loss": 0.0153, "epoch": 11.760834821848265, "learning_rate": 9.485751462932399e-06, "llm_loss": 0.0, "loss": 0.0153, "step": 125100 }, { "action_loss": 0.0183, "epoch": 11.76553539531823, "learning_rate": 9.479149227501765e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 125150 }, { "action_loss": 0.0185, "epoch": 11.770235968788192, "learning_rate": 9.472547219717212e-06, "llm_loss": 0.0, "loss": 0.0185, "step": 125200 }, { "action_loss": 0.0383, "epoch": 11.774936542258155, "learning_rate": 9.465945442464253e-06, "llm_loss": 0.0, "loss": 0.0383, "step": 125250 }, { "action_loss": 0.0282, "epoch": 11.779637115728118, "learning_rate": 9.459343898628298e-06, "llm_loss": 0.0, "loss": 0.0282, "step": 125300 }, { "action_loss": 0.0156, "epoch": 11.784337689198082, "learning_rate": 9.452742591094658e-06, "llm_loss": 0.0, "loss": 0.0156, "step": 125350 }, { "action_loss": 0.0152, "epoch": 11.789038262668045, "learning_rate": 9.446141522748536e-06, "llm_loss": 0.0, "loss": 0.0152, "step": 125400 }, { "action_loss": 0.0247, "epoch": 11.793738836138008, "learning_rate": 9.439540696475037e-06, "llm_loss": 0.0, "loss": 0.0247, "step": 125450 }, { "action_loss": 0.0179, "epoch": 11.798439409607973, "learning_rate": 9.432940115159152e-06, "llm_loss": 0.0, "loss": 0.0179, "step": 125500 }, { "action_loss": 0.0382, "epoch": 11.803139983077935, "learning_rate": 9.426339781685775e-06, "llm_loss": 0.0, "loss": 0.0382, "step": 125550 }, { "action_loss": 0.0244, "epoch": 11.807840556547898, "learning_rate": 9.419739698939683e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 125600 }, { "action_loss": 0.0256, "epoch": 11.812541130017863, "learning_rate": 9.41313986980555e-06, "llm_loss": 0.0, "loss": 0.0256, "step": 125650 }, { "action_loss": 0.0231, "epoch": 11.817241703487825, "learning_rate": 9.406540297167932e-06, "llm_loss": 0.0, "loss": 0.0231, "step": 125700 }, { "action_loss": 0.0151, "epoch": 11.821942276957788, "learning_rate": 9.399940983911277e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 125750 }, { "action_loss": 0.0123, "epoch": 11.826642850427753, "learning_rate": 9.39334193291992e-06, "llm_loss": 0.0, "loss": 0.0123, "step": 125800 }, { "action_loss": 0.0234, "epoch": 11.831343423897716, "learning_rate": 9.386743147078083e-06, "llm_loss": 0.0, "loss": 0.0234, "step": 125850 }, { "action_loss": 0.0151, "epoch": 11.836043997367678, "learning_rate": 9.380144629269869e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 125900 }, { "action_loss": 0.0274, "epoch": 11.840744570837643, "learning_rate": 9.373546382379262e-06, "llm_loss": 0.0, "loss": 0.0274, "step": 125950 }, { "action_loss": 0.0174, "epoch": 11.845445144307606, "learning_rate": 9.366948409290135e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 126000 }, { "action_loss": 0.0317, "epoch": 11.850145717777568, "learning_rate": 9.360350712886235e-06, "llm_loss": 0.0, "loss": 0.0317, "step": 126050 }, { "action_loss": 0.0277, "epoch": 11.854846291247533, "learning_rate": 9.353753296051186e-06, "llm_loss": 0.0, "loss": 0.0277, "step": 126100 }, { "action_loss": 0.0123, "epoch": 11.859546864717496, "learning_rate": 9.347156161668498e-06, "llm_loss": 0.0, "loss": 0.0123, "step": 126150 }, { "action_loss": 0.0197, "epoch": 11.864247438187459, "learning_rate": 9.34055931262155e-06, "llm_loss": 0.0, "loss": 0.0197, "step": 126200 }, { "action_loss": 0.0152, "epoch": 11.868948011657423, "learning_rate": 9.333962751793601e-06, "llm_loss": 0.0, "loss": 0.0152, "step": 126250 }, { "action_loss": 0.0112, "epoch": 11.873648585127386, "learning_rate": 9.32736648206778e-06, "llm_loss": 0.0, "loss": 0.0112, "step": 126300 }, { "action_loss": 0.0336, "epoch": 11.878349158597349, "learning_rate": 9.320770506327095e-06, "llm_loss": 0.0, "loss": 0.0336, "step": 126350 }, { "action_loss": 0.025, "epoch": 11.883049732067311, "learning_rate": 9.314174827454421e-06, "llm_loss": 0.0, "loss": 0.025, "step": 126400 }, { "action_loss": 0.0203, "epoch": 11.887750305537276, "learning_rate": 9.307579448332504e-06, "llm_loss": 0.0, "loss": 0.0203, "step": 126450 }, { "action_loss": 0.0212, "epoch": 11.892450879007239, "learning_rate": 9.300984371843955e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 126500 }, { "action_loss": 0.0112, "epoch": 11.897151452477202, "learning_rate": 9.294389600871261e-06, "llm_loss": 0.0, "loss": 0.0112, "step": 126550 }, { "action_loss": 0.0212, "epoch": 11.901852025947166, "learning_rate": 9.28779513829677e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 126600 }, { "action_loss": 0.0154, "epoch": 11.906552599417129, "learning_rate": 9.281200987002692e-06, "llm_loss": 0.0, "loss": 0.0154, "step": 126650 }, { "action_loss": 0.0389, "epoch": 11.911253172887092, "learning_rate": 9.274607149871113e-06, "llm_loss": 0.0, "loss": 0.0389, "step": 126700 }, { "action_loss": 0.0184, "epoch": 11.915953746357056, "learning_rate": 9.26801362978397e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 126750 }, { "action_loss": 0.0192, "epoch": 11.920654319827019, "learning_rate": 9.261420429623067e-06, "llm_loss": 0.0, "loss": 0.0192, "step": 126800 }, { "action_loss": 0.0322, "epoch": 11.925354893296982, "learning_rate": 9.254827552270064e-06, "llm_loss": 0.0, "loss": 0.0322, "step": 126850 }, { "action_loss": 0.032, "epoch": 11.930055466766946, "learning_rate": 9.248235000606482e-06, "llm_loss": 0.0, "loss": 0.032, "step": 126900 }, { "action_loss": 0.0123, "epoch": 11.934756040236909, "learning_rate": 9.241642777513709e-06, "llm_loss": 0.0, "loss": 0.0123, "step": 126950 }, { "action_loss": 0.0208, "epoch": 11.939456613706872, "learning_rate": 9.235050885872968e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 127000 }, { "action_loss": 0.0216, "epoch": 11.944157187176836, "learning_rate": 9.228459328565354e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 127050 }, { "action_loss": 0.0087, "epoch": 11.9488577606468, "learning_rate": 9.221868108471811e-06, "llm_loss": 0.0, "loss": 0.0087, "step": 127100 }, { "action_loss": 0.0334, "epoch": 11.953558334116762, "learning_rate": 9.215277228473141e-06, "llm_loss": 0.0, "loss": 0.0334, "step": 127150 }, { "action_loss": 0.0147, "epoch": 11.958258907586725, "learning_rate": 9.208686691449989e-06, "llm_loss": 0.0, "loss": 0.0147, "step": 127200 }, { "action_loss": 0.0247, "epoch": 11.96295948105669, "learning_rate": 9.202096500282855e-06, "llm_loss": 0.0, "loss": 0.0247, "step": 127250 }, { "action_loss": 0.0191, "epoch": 11.967660054526652, "learning_rate": 9.195506657852082e-06, "llm_loss": 0.0, "loss": 0.0191, "step": 127300 }, { "action_loss": 0.0256, "epoch": 11.972360627996615, "learning_rate": 9.18891716703787e-06, "llm_loss": 0.0, "loss": 0.0256, "step": 127350 }, { "action_loss": 0.0216, "epoch": 11.97706120146658, "learning_rate": 9.182328030720257e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 127400 }, { "action_loss": 0.0125, "epoch": 11.981761774936542, "learning_rate": 9.175739251779133e-06, "llm_loss": 0.0, "loss": 0.0125, "step": 127450 }, { "action_loss": 0.0216, "epoch": 11.986462348406505, "learning_rate": 9.169150833094226e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 127500 }, { "action_loss": 0.0218, "epoch": 11.99116292187647, "learning_rate": 9.16256277754511e-06, "llm_loss": 0.0, "loss": 0.0218, "step": 127550 }, { "action_loss": 0.025, "epoch": 11.995863495346432, "learning_rate": 9.155975088011197e-06, "llm_loss": 0.0, "loss": 0.025, "step": 127600 }, { "action_loss": 0.0154, "epoch": 12.000564068816395, "learning_rate": 9.149387767371742e-06, "llm_loss": 0.0, "loss": 0.0154, "step": 127650 }, { "action_loss": 0.0244, "epoch": 12.00526464228636, "learning_rate": 9.142800818505838e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 127700 }, { "action_loss": 0.0173, "epoch": 12.009965215756322, "learning_rate": 9.136214244292416e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 127750 }, { "action_loss": 0.0232, "epoch": 12.014665789226285, "learning_rate": 9.129628047610241e-06, "llm_loss": 0.0, "loss": 0.0232, "step": 127800 }, { "action_loss": 0.0274, "epoch": 12.01936636269625, "learning_rate": 9.123042231337915e-06, "llm_loss": 0.0, "loss": 0.0274, "step": 127850 }, { "action_loss": 0.019, "epoch": 12.024066936166212, "learning_rate": 9.116456798353879e-06, "llm_loss": 0.0, "loss": 0.019, "step": 127900 }, { "action_loss": 0.0208, "epoch": 12.028767509636175, "learning_rate": 9.10987175153639e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 127950 }, { "action_loss": 0.0128, "epoch": 12.03346808310614, "learning_rate": 9.103287093763555e-06, "llm_loss": 0.0, "loss": 0.0128, "step": 128000 }, { "action_loss": 0.0312, "epoch": 12.038168656576103, "learning_rate": 9.096702827913302e-06, "llm_loss": 0.0, "loss": 0.0312, "step": 128050 }, { "action_loss": 0.0249, "epoch": 12.042869230046065, "learning_rate": 9.090118956863385e-06, "llm_loss": 0.0, "loss": 0.0249, "step": 128100 }, { "action_loss": 0.0172, "epoch": 12.047569803516028, "learning_rate": 9.083535483491391e-06, "llm_loss": 0.0, "loss": 0.0172, "step": 128150 }, { "action_loss": 0.0311, "epoch": 12.052270376985993, "learning_rate": 9.076952410674733e-06, "llm_loss": 0.0, "loss": 0.0311, "step": 128200 }, { "action_loss": 0.0274, "epoch": 12.056970950455955, "learning_rate": 9.070369741290646e-06, "llm_loss": 0.0, "loss": 0.0274, "step": 128250 }, { "action_loss": 0.0209, "epoch": 12.061671523925918, "learning_rate": 9.06378747821619e-06, "llm_loss": 0.0, "loss": 0.0209, "step": 128300 }, { "action_loss": 0.0277, "epoch": 12.066372097395883, "learning_rate": 9.057205624328248e-06, "llm_loss": 0.0, "loss": 0.0277, "step": 128350 }, { "action_loss": 0.0162, "epoch": 12.071072670865846, "learning_rate": 9.050624182503523e-06, "llm_loss": 0.0, "loss": 0.0162, "step": 128400 }, { "action_loss": 0.0303, "epoch": 12.075773244335808, "learning_rate": 9.044043155618536e-06, "llm_loss": 0.0, "loss": 0.0303, "step": 128450 }, { "action_loss": 0.0184, "epoch": 12.080473817805773, "learning_rate": 9.037462546549633e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 128500 }, { "action_loss": 0.0183, "epoch": 12.085174391275736, "learning_rate": 9.030882358172972e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 128550 }, { "action_loss": 0.0178, "epoch": 12.089874964745698, "learning_rate": 9.024302593364526e-06, "llm_loss": 0.0, "loss": 0.0178, "step": 128600 }, { "action_loss": 0.027, "epoch": 12.094575538215663, "learning_rate": 9.01772325500009e-06, "llm_loss": 0.0, "loss": 0.027, "step": 128650 }, { "action_loss": 0.0314, "epoch": 12.099276111685626, "learning_rate": 9.011144345955267e-06, "llm_loss": 0.0, "loss": 0.0314, "step": 128700 }, { "action_loss": 0.0177, "epoch": 12.103976685155589, "learning_rate": 9.004565869105472e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 128750 }, { "action_loss": 0.0022, "epoch": 12.108677258625553, "learning_rate": 8.99798782732593e-06, "llm_loss": 0.0, "loss": 0.0022, "step": 128800 }, { "action_loss": 0.0191, "epoch": 12.113377832095516, "learning_rate": 8.991410223491689e-06, "llm_loss": 0.0, "loss": 0.0191, "step": 128850 }, { "action_loss": 0.0234, "epoch": 12.118078405565479, "learning_rate": 8.98483306047758e-06, "llm_loss": 0.0, "loss": 0.0234, "step": 128900 }, { "action_loss": 0.0207, "epoch": 12.122778979035441, "learning_rate": 8.978256341158264e-06, "llm_loss": 0.0, "loss": 0.0207, "step": 128950 }, { "action_loss": 0.0193, "epoch": 12.127479552505406, "learning_rate": 8.9716800684082e-06, "llm_loss": 0.0, "loss": 0.0193, "step": 129000 }, { "action_loss": 0.0264, "epoch": 12.132180125975369, "learning_rate": 8.96510424510165e-06, "llm_loss": 0.0, "loss": 0.0264, "step": 129050 }, { "action_loss": 0.0302, "epoch": 12.136880699445332, "learning_rate": 8.958528874112686e-06, "llm_loss": 0.0, "loss": 0.0302, "step": 129100 }, { "action_loss": 0.025, "epoch": 12.141581272915296, "learning_rate": 8.951953958315177e-06, "llm_loss": 0.0, "loss": 0.025, "step": 129150 }, { "action_loss": 0.0255, "epoch": 12.146281846385259, "learning_rate": 8.945379500582792e-06, "llm_loss": 0.0, "loss": 0.0255, "step": 129200 }, { "action_loss": 0.022, "epoch": 12.150982419855222, "learning_rate": 8.938805503789e-06, "llm_loss": 0.0, "loss": 0.022, "step": 129250 }, { "action_loss": 0.027, "epoch": 12.155682993325186, "learning_rate": 8.932231970807075e-06, "llm_loss": 0.0, "loss": 0.027, "step": 129300 }, { "action_loss": 0.0345, "epoch": 12.160383566795149, "learning_rate": 8.925658904510082e-06, "llm_loss": 0.0, "loss": 0.0345, "step": 129350 }, { "action_loss": 0.0277, "epoch": 12.165084140265112, "learning_rate": 8.919086307770884e-06, "llm_loss": 0.0, "loss": 0.0277, "step": 129400 }, { "action_loss": 0.0224, "epoch": 12.169784713735076, "learning_rate": 8.91251418346214e-06, "llm_loss": 0.0, "loss": 0.0224, "step": 129450 }, { "action_loss": 0.0254, "epoch": 12.174485287205039, "learning_rate": 8.905942534456296e-06, "llm_loss": 0.0, "loss": 0.0254, "step": 129500 }, { "action_loss": 0.0286, "epoch": 12.179185860675002, "learning_rate": 8.8993713636256e-06, "llm_loss": 0.0, "loss": 0.0286, "step": 129550 }, { "action_loss": 0.0177, "epoch": 12.183886434144966, "learning_rate": 8.892800673842085e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 129600 }, { "action_loss": 0.0375, "epoch": 12.18858700761493, "learning_rate": 8.886230467977574e-06, "llm_loss": 0.0, "loss": 0.0375, "step": 129650 }, { "action_loss": 0.0246, "epoch": 12.193287581084892, "learning_rate": 8.87966074890368e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 129700 }, { "action_loss": 0.0187, "epoch": 12.197988154554857, "learning_rate": 8.873091519491807e-06, "llm_loss": 0.0, "loss": 0.0187, "step": 129750 }, { "action_loss": 0.0142, "epoch": 12.20268872802482, "learning_rate": 8.866522782613135e-06, "llm_loss": 0.0, "loss": 0.0142, "step": 129800 }, { "action_loss": 0.0213, "epoch": 12.207389301494782, "learning_rate": 8.859954541138634e-06, "llm_loss": 0.0, "loss": 0.0213, "step": 129850 }, { "action_loss": 0.0151, "epoch": 12.212089874964745, "learning_rate": 8.853386797939062e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 129900 }, { "action_loss": 0.0309, "epoch": 12.21679044843471, "learning_rate": 8.846819555884957e-06, "llm_loss": 0.0, "loss": 0.0309, "step": 129950 }, { "action_loss": 0.0269, "epoch": 12.221491021904672, "learning_rate": 8.840252817846629e-06, "llm_loss": 0.0, "loss": 0.0269, "step": 130000 }, { "action_loss": 0.0203, "epoch": 12.226191595374635, "learning_rate": 8.833686586694179e-06, "llm_loss": 0.0, "loss": 0.0203, "step": 130050 }, { "action_loss": 0.0282, "epoch": 12.2308921688446, "learning_rate": 8.827120865297482e-06, "llm_loss": 0.0, "loss": 0.0282, "step": 130100 }, { "action_loss": 0.0322, "epoch": 12.235592742314562, "learning_rate": 8.820555656526191e-06, "llm_loss": 0.0, "loss": 0.0322, "step": 130150 }, { "action_loss": 0.0117, "epoch": 12.240293315784525, "learning_rate": 8.813990963249733e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 130200 }, { "action_loss": 0.0183, "epoch": 12.24499388925449, "learning_rate": 8.807426788337317e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 130250 }, { "action_loss": 0.0246, "epoch": 12.249694462724452, "learning_rate": 8.80086313465791e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 130300 }, { "action_loss": 0.0347, "epoch": 12.254395036194415, "learning_rate": 8.794300005080268e-06, "llm_loss": 0.0, "loss": 0.0347, "step": 130350 }, { "action_loss": 0.0227, "epoch": 12.25909560966438, "learning_rate": 8.787737402472907e-06, "llm_loss": 0.0, "loss": 0.0227, "step": 130400 }, { "action_loss": 0.0215, "epoch": 12.263796183134342, "learning_rate": 8.781175329704119e-06, "llm_loss": 0.0, "loss": 0.0215, "step": 130450 }, { "action_loss": 0.0099, "epoch": 12.268496756604305, "learning_rate": 8.774613789641958e-06, "llm_loss": 0.0, "loss": 0.0099, "step": 130500 }, { "action_loss": 0.015, "epoch": 12.27319733007427, "learning_rate": 8.768052785154257e-06, "llm_loss": 0.0, "loss": 0.015, "step": 130550 }, { "action_loss": 0.02, "epoch": 12.277897903544233, "learning_rate": 8.7614923191086e-06, "llm_loss": 0.0, "loss": 0.02, "step": 130600 }, { "action_loss": 0.0302, "epoch": 12.282598477014195, "learning_rate": 8.754932394372342e-06, "llm_loss": 0.0, "loss": 0.0302, "step": 130650 }, { "action_loss": 0.0327, "epoch": 12.287299050484158, "learning_rate": 8.748373013812612e-06, "llm_loss": 0.0, "loss": 0.0327, "step": 130700 }, { "action_loss": 0.0153, "epoch": 12.291999623954123, "learning_rate": 8.741814180296277e-06, "llm_loss": 0.0, "loss": 0.0153, "step": 130750 }, { "action_loss": 0.0214, "epoch": 12.296700197424085, "learning_rate": 8.735255896689986e-06, "llm_loss": 0.0, "loss": 0.0214, "step": 130800 }, { "action_loss": 0.0093, "epoch": 12.301400770894048, "learning_rate": 8.728698165860142e-06, "llm_loss": 0.0, "loss": 0.0093, "step": 130850 }, { "action_loss": 0.0358, "epoch": 12.306101344364013, "learning_rate": 8.7221409906729e-06, "llm_loss": 0.0, "loss": 0.0358, "step": 130900 }, { "action_loss": 0.0364, "epoch": 12.310801917833976, "learning_rate": 8.715584373994184e-06, "llm_loss": 0.0, "loss": 0.0364, "step": 130950 }, { "action_loss": 0.0179, "epoch": 12.315502491303938, "learning_rate": 8.709028318689662e-06, "llm_loss": 0.0, "loss": 0.0179, "step": 131000 }, { "action_loss": 0.0273, "epoch": 12.320203064773903, "learning_rate": 8.702472827624768e-06, "llm_loss": 0.0, "loss": 0.0273, "step": 131050 }, { "action_loss": 0.0246, "epoch": 12.324903638243866, "learning_rate": 8.695917903664676e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 131100 }, { "action_loss": 0.015, "epoch": 12.329604211713828, "learning_rate": 8.689363549674322e-06, "llm_loss": 0.0, "loss": 0.015, "step": 131150 }, { "action_loss": 0.0203, "epoch": 12.334304785183793, "learning_rate": 8.682809768518391e-06, "llm_loss": 0.0, "loss": 0.0203, "step": 131200 }, { "action_loss": 0.0293, "epoch": 12.339005358653756, "learning_rate": 8.676256563061318e-06, "llm_loss": 0.0, "loss": 0.0293, "step": 131250 }, { "action_loss": 0.0256, "epoch": 12.343705932123719, "learning_rate": 8.669703936167282e-06, "llm_loss": 0.0, "loss": 0.0256, "step": 131300 }, { "action_loss": 0.0267, "epoch": 12.348406505593683, "learning_rate": 8.663151890700216e-06, "llm_loss": 0.0, "loss": 0.0267, "step": 131350 }, { "action_loss": 0.016, "epoch": 12.353107079063646, "learning_rate": 8.656600429523793e-06, "llm_loss": 0.0, "loss": 0.016, "step": 131400 }, { "action_loss": 0.0221, "epoch": 12.357807652533609, "learning_rate": 8.650049555501431e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 131450 }, { "action_loss": 0.0121, "epoch": 12.362508226003573, "learning_rate": 8.643499271496298e-06, "llm_loss": 0.0, "loss": 0.0121, "step": 131500 }, { "action_loss": 0.0162, "epoch": 12.367208799473536, "learning_rate": 8.636949580371296e-06, "llm_loss": 0.0, "loss": 0.0162, "step": 131550 }, { "action_loss": 0.0203, "epoch": 12.371909372943499, "learning_rate": 8.630400484989076e-06, "llm_loss": 0.0, "loss": 0.0203, "step": 131600 }, { "action_loss": 0.0156, "epoch": 12.376609946413463, "learning_rate": 8.623851988212018e-06, "llm_loss": 0.0, "loss": 0.0156, "step": 131650 }, { "action_loss": 0.0179, "epoch": 12.381310519883426, "learning_rate": 8.617304092902246e-06, "llm_loss": 0.0, "loss": 0.0179, "step": 131700 }, { "action_loss": 0.0155, "epoch": 12.386011093353389, "learning_rate": 8.610756801921624e-06, "llm_loss": 0.0, "loss": 0.0155, "step": 131750 }, { "action_loss": 0.0145, "epoch": 12.390711666823352, "learning_rate": 8.60421011813175e-06, "llm_loss": 0.0, "loss": 0.0145, "step": 131800 }, { "action_loss": 0.0252, "epoch": 12.395412240293316, "learning_rate": 8.597664044393958e-06, "llm_loss": 0.0, "loss": 0.0252, "step": 131850 }, { "action_loss": 0.008, "epoch": 12.400112813763279, "learning_rate": 8.591118583569306e-06, "llm_loss": 0.0, "loss": 0.008, "step": 131900 }, { "action_loss": 0.0049, "epoch": 12.404813387233242, "learning_rate": 8.584704629366522e-06, "llm_loss": 0.0, "loss": 0.0049, "step": 131950 }, { "action_loss": 0.0268, "epoch": 12.409513960703206, "learning_rate": 8.578160390549563e-06, "llm_loss": 0.0, "loss": 0.0268, "step": 132000 }, { "action_loss": 0.0244, "epoch": 12.41421453417317, "learning_rate": 8.571616773170126e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 132050 }, { "action_loss": 0.0245, "epoch": 12.418915107643132, "learning_rate": 8.56507378008821e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 132100 }, { "action_loss": 0.0238, "epoch": 12.423615681113096, "learning_rate": 8.558531414163527e-06, "llm_loss": 0.0, "loss": 0.0238, "step": 132150 }, { "action_loss": 0.0219, "epoch": 12.42831625458306, "learning_rate": 8.551989678255524e-06, "llm_loss": 0.0, "loss": 0.0219, "step": 132200 }, { "action_loss": 0.0022, "epoch": 12.433016828053022, "learning_rate": 8.545448575223369e-06, "llm_loss": 0.0, "loss": 0.0022, "step": 132250 }, { "action_loss": 0.0271, "epoch": 12.437717401522987, "learning_rate": 8.538908107925956e-06, "llm_loss": 0.0, "loss": 0.0271, "step": 132300 }, { "action_loss": 0.0166, "epoch": 12.44241797499295, "learning_rate": 8.532368279221901e-06, "llm_loss": 0.0, "loss": 0.0166, "step": 132350 }, { "action_loss": 0.012, "epoch": 12.447118548462912, "learning_rate": 8.525829091969538e-06, "llm_loss": 0.0, "loss": 0.012, "step": 132400 }, { "action_loss": 0.0112, "epoch": 12.451819121932877, "learning_rate": 8.519290549026921e-06, "llm_loss": 0.0, "loss": 0.0112, "step": 132450 }, { "action_loss": 0.022, "epoch": 12.45651969540284, "learning_rate": 8.51275265325183e-06, "llm_loss": 0.0, "loss": 0.022, "step": 132500 }, { "action_loss": 0.016, "epoch": 12.461220268872802, "learning_rate": 8.50621540750175e-06, "llm_loss": 0.0, "loss": 0.016, "step": 132550 }, { "action_loss": 0.0122, "epoch": 12.465920842342765, "learning_rate": 8.499678814633889e-06, "llm_loss": 0.0, "loss": 0.0122, "step": 132600 }, { "action_loss": 0.0148, "epoch": 12.47062141581273, "learning_rate": 8.49314287750517e-06, "llm_loss": 0.0, "loss": 0.0148, "step": 132650 }, { "action_loss": 0.0056, "epoch": 12.475321989282692, "learning_rate": 8.486607598972225e-06, "llm_loss": 0.0, "loss": 0.0056, "step": 132700 }, { "action_loss": 0.0327, "epoch": 12.480022562752655, "learning_rate": 8.480072981891404e-06, "llm_loss": 0.0, "loss": 0.0327, "step": 132750 }, { "action_loss": 0.0151, "epoch": 12.48472313622262, "learning_rate": 8.473539029118766e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 132800 }, { "action_loss": 0.0283, "epoch": 12.489423709692582, "learning_rate": 8.467005743510072e-06, "llm_loss": 0.0, "loss": 0.0283, "step": 132850 }, { "action_loss": 0.0279, "epoch": 12.494124283162545, "learning_rate": 8.460473127920805e-06, "llm_loss": 0.0, "loss": 0.0279, "step": 132900 }, { "action_loss": 0.0087, "epoch": 12.49882485663251, "learning_rate": 8.453941185206145e-06, "llm_loss": 0.0, "loss": 0.0087, "step": 132950 }, { "action_loss": 0.0206, "epoch": 12.503525430102473, "learning_rate": 8.447409918220985e-06, "llm_loss": 0.0, "loss": 0.0206, "step": 133000 }, { "action_loss": 0.0226, "epoch": 12.508226003572435, "learning_rate": 8.440879329819912e-06, "llm_loss": 0.0, "loss": 0.0226, "step": 133050 }, { "action_loss": 0.0218, "epoch": 12.5129265770424, "learning_rate": 8.434349422857225e-06, "llm_loss": 0.0, "loss": 0.0218, "step": 133100 }, { "action_loss": 0.0253, "epoch": 12.517627150512363, "learning_rate": 8.427820200186924e-06, "llm_loss": 0.0, "loss": 0.0253, "step": 133150 }, { "action_loss": 0.0224, "epoch": 12.522327723982325, "learning_rate": 8.421291664662709e-06, "llm_loss": 0.0, "loss": 0.0224, "step": 133200 }, { "action_loss": 0.0115, "epoch": 12.52702829745229, "learning_rate": 8.414763819137986e-06, "llm_loss": 0.0, "loss": 0.0115, "step": 133250 }, { "action_loss": 0.0186, "epoch": 12.531728870922253, "learning_rate": 8.408236666465844e-06, "llm_loss": 0.0, "loss": 0.0186, "step": 133300 }, { "action_loss": 0.0195, "epoch": 12.536429444392216, "learning_rate": 8.401710209499082e-06, "llm_loss": 0.0, "loss": 0.0195, "step": 133350 }, { "action_loss": 0.021, "epoch": 12.54113001786218, "learning_rate": 8.395184451090193e-06, "llm_loss": 0.0, "loss": 0.021, "step": 133400 }, { "action_loss": 0.0106, "epoch": 12.545830591332143, "learning_rate": 8.388659394091362e-06, "llm_loss": 0.0, "loss": 0.0106, "step": 133450 }, { "action_loss": 0.0118, "epoch": 12.550531164802106, "learning_rate": 8.382135041354466e-06, "llm_loss": 0.0, "loss": 0.0118, "step": 133500 }, { "action_loss": 0.022, "epoch": 12.555231738272068, "learning_rate": 8.37561139573108e-06, "llm_loss": 0.0, "loss": 0.022, "step": 133550 }, { "action_loss": 0.0379, "epoch": 12.559932311742033, "learning_rate": 8.36908846007247e-06, "llm_loss": 0.0, "loss": 0.0379, "step": 133600 }, { "action_loss": 0.0083, "epoch": 12.564632885211996, "learning_rate": 8.362566237229579e-06, "llm_loss": 0.0, "loss": 0.0083, "step": 133650 }, { "action_loss": 0.0339, "epoch": 12.569333458681959, "learning_rate": 8.356044730053055e-06, "llm_loss": 0.0, "loss": 0.0339, "step": 133700 }, { "action_loss": 0.039, "epoch": 12.574034032151923, "learning_rate": 8.349523941393224e-06, "llm_loss": 0.0, "loss": 0.039, "step": 133750 }, { "action_loss": 0.0184, "epoch": 12.578734605621886, "learning_rate": 8.3430038741001e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 133800 }, { "action_loss": 0.0222, "epoch": 12.583435179091849, "learning_rate": 8.33648453102338e-06, "llm_loss": 0.0, "loss": 0.0222, "step": 133850 }, { "action_loss": 0.0189, "epoch": 12.588135752561813, "learning_rate": 8.329965915012451e-06, "llm_loss": 0.0, "loss": 0.0189, "step": 133900 }, { "action_loss": 0.0312, "epoch": 12.592836326031776, "learning_rate": 8.323448028916374e-06, "llm_loss": 0.0, "loss": 0.0312, "step": 133950 }, { "action_loss": 0.0208, "epoch": 12.597536899501739, "learning_rate": 8.316930875583892e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 134000 }, { "action_loss": 0.0216, "epoch": 12.602237472971703, "learning_rate": 8.310414457863437e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 134050 }, { "action_loss": 0.0146, "epoch": 12.606938046441666, "learning_rate": 8.303898778603105e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 134100 }, { "action_loss": 0.018, "epoch": 12.611638619911629, "learning_rate": 8.297383840650679e-06, "llm_loss": 0.0, "loss": 0.018, "step": 134150 }, { "action_loss": 0.0251, "epoch": 12.616339193381593, "learning_rate": 8.29086964685362e-06, "llm_loss": 0.0, "loss": 0.0251, "step": 134200 }, { "action_loss": 0.0118, "epoch": 12.621039766851556, "learning_rate": 8.284356200059051e-06, "llm_loss": 0.0, "loss": 0.0118, "step": 134250 }, { "action_loss": 0.0145, "epoch": 12.625740340321519, "learning_rate": 8.277843503113788e-06, "llm_loss": 0.0, "loss": 0.0145, "step": 134300 }, { "action_loss": 0.0144, "epoch": 12.630440913791482, "learning_rate": 8.2713315588643e-06, "llm_loss": 0.0, "loss": 0.0144, "step": 134350 }, { "action_loss": 0.0122, "epoch": 12.635141487261446, "learning_rate": 8.264820370156743e-06, "llm_loss": 0.0, "loss": 0.0122, "step": 134400 }, { "action_loss": 0.031, "epoch": 12.639842060731409, "learning_rate": 8.258309939836926e-06, "llm_loss": 0.0, "loss": 0.031, "step": 134450 }, { "action_loss": 0.0272, "epoch": 12.644542634201372, "learning_rate": 8.25180027075034e-06, "llm_loss": 0.0, "loss": 0.0272, "step": 134500 }, { "action_loss": 0.0281, "epoch": 12.649243207671336, "learning_rate": 8.245291365742141e-06, "llm_loss": 0.0, "loss": 0.0281, "step": 134550 }, { "action_loss": 0.0233, "epoch": 12.6539437811413, "learning_rate": 8.238783227657147e-06, "llm_loss": 0.0, "loss": 0.0233, "step": 134600 }, { "action_loss": 0.0218, "epoch": 12.658644354611262, "learning_rate": 8.232275859339842e-06, "llm_loss": 0.0, "loss": 0.0218, "step": 134650 }, { "action_loss": 0.0079, "epoch": 12.663344928081226, "learning_rate": 8.22576926363438e-06, "llm_loss": 0.0, "loss": 0.0079, "step": 134700 }, { "action_loss": 0.0143, "epoch": 12.66804550155119, "learning_rate": 8.219263443384563e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 134750 }, { "action_loss": 0.0148, "epoch": 12.672746075021152, "learning_rate": 8.212758401433868e-06, "llm_loss": 0.0, "loss": 0.0148, "step": 134800 }, { "action_loss": 0.0151, "epoch": 12.677446648491117, "learning_rate": 8.206254140625425e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 134850 }, { "action_loss": 0.0084, "epoch": 12.68214722196108, "learning_rate": 8.19975066380203e-06, "llm_loss": 0.0, "loss": 0.0084, "step": 134900 }, { "action_loss": 0.0351, "epoch": 12.686847795431042, "learning_rate": 8.193247973806122e-06, "llm_loss": 0.0, "loss": 0.0351, "step": 134950 }, { "action_loss": 0.0275, "epoch": 12.691548368901007, "learning_rate": 8.186746073479808e-06, "llm_loss": 0.0, "loss": 0.0275, "step": 135000 }, { "action_loss": 0.0173, "epoch": 12.69624894237097, "learning_rate": 8.180244965664845e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 135050 }, { "action_loss": 0.0233, "epoch": 12.700949515840932, "learning_rate": 8.173744653202647e-06, "llm_loss": 0.0, "loss": 0.0233, "step": 135100 }, { "action_loss": 0.0149, "epoch": 12.705650089310897, "learning_rate": 8.167245138934283e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 135150 }, { "action_loss": 0.0207, "epoch": 12.71035066278086, "learning_rate": 8.16074642570046e-06, "llm_loss": 0.0, "loss": 0.0207, "step": 135200 }, { "action_loss": 0.0188, "epoch": 12.715051236250822, "learning_rate": 8.154248516341547e-06, "llm_loss": 0.0, "loss": 0.0188, "step": 135250 }, { "action_loss": 0.0157, "epoch": 12.719751809720785, "learning_rate": 8.14775141369756e-06, "llm_loss": 0.0, "loss": 0.0157, "step": 135300 }, { "action_loss": 0.0142, "epoch": 12.72445238319075, "learning_rate": 8.14125512060816e-06, "llm_loss": 0.0, "loss": 0.0142, "step": 135350 }, { "action_loss": 0.0146, "epoch": 12.729152956660712, "learning_rate": 8.134759639912655e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 135400 }, { "action_loss": 0.0281, "epoch": 12.733853530130675, "learning_rate": 8.128264974449996e-06, "llm_loss": 0.0, "loss": 0.0281, "step": 135450 }, { "action_loss": 0.0112, "epoch": 12.73855410360064, "learning_rate": 8.121771127058782e-06, "llm_loss": 0.0, "loss": 0.0112, "step": 135500 }, { "action_loss": 0.0117, "epoch": 12.743254677070603, "learning_rate": 8.11527810057725e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 135550 }, { "action_loss": 0.0149, "epoch": 12.747955250540565, "learning_rate": 8.108785897843276e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 135600 }, { "action_loss": 0.0207, "epoch": 12.75265582401053, "learning_rate": 8.102294521694388e-06, "llm_loss": 0.0, "loss": 0.0207, "step": 135650 }, { "action_loss": 0.0225, "epoch": 12.757356397480493, "learning_rate": 8.095803974967741e-06, "llm_loss": 0.0, "loss": 0.0225, "step": 135700 }, { "action_loss": 0.0251, "epoch": 12.762056970950455, "learning_rate": 8.089314260500132e-06, "llm_loss": 0.0, "loss": 0.0251, "step": 135750 }, { "action_loss": 0.0237, "epoch": 12.76675754442042, "learning_rate": 8.082825381127997e-06, "llm_loss": 0.0, "loss": 0.0237, "step": 135800 }, { "action_loss": 0.0084, "epoch": 12.771458117890383, "learning_rate": 8.076337339687395e-06, "llm_loss": 0.0, "loss": 0.0084, "step": 135850 }, { "action_loss": 0.0194, "epoch": 12.776158691360346, "learning_rate": 8.069850139014031e-06, "llm_loss": 0.0, "loss": 0.0194, "step": 135900 }, { "action_loss": 0.0284, "epoch": 12.78085926483031, "learning_rate": 8.063363781943243e-06, "llm_loss": 0.0, "loss": 0.0284, "step": 135950 }, { "action_loss": 0.0239, "epoch": 12.785559838300273, "learning_rate": 8.05687827130999e-06, "llm_loss": 0.0, "loss": 0.0239, "step": 136000 }, { "action_loss": 0.0214, "epoch": 12.790260411770236, "learning_rate": 8.05039360994887e-06, "llm_loss": 0.0, "loss": 0.0214, "step": 136050 }, { "action_loss": 0.0192, "epoch": 12.794960985240198, "learning_rate": 8.043909800694106e-06, "llm_loss": 0.0, "loss": 0.0192, "step": 136100 }, { "action_loss": 0.0149, "epoch": 12.799661558710163, "learning_rate": 8.037426846379551e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 136150 }, { "action_loss": 0.0277, "epoch": 12.804362132180126, "learning_rate": 8.030944749838683e-06, "llm_loss": 0.0, "loss": 0.0277, "step": 136200 }, { "action_loss": 0.0407, "epoch": 12.809062705650089, "learning_rate": 8.024463513904608e-06, "llm_loss": 0.0, "loss": 0.0407, "step": 136250 }, { "action_loss": 0.0166, "epoch": 12.813763279120053, "learning_rate": 8.017983141410043e-06, "llm_loss": 0.0, "loss": 0.0166, "step": 136300 }, { "action_loss": 0.0305, "epoch": 12.818463852590016, "learning_rate": 8.011503635187347e-06, "llm_loss": 0.0, "loss": 0.0305, "step": 136350 }, { "action_loss": 0.026, "epoch": 12.823164426059979, "learning_rate": 8.005024998068486e-06, "llm_loss": 0.0, "loss": 0.026, "step": 136400 }, { "action_loss": 0.0174, "epoch": 12.827864999529943, "learning_rate": 7.998547232885053e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 136450 }, { "action_loss": 0.0258, "epoch": 12.832565572999906, "learning_rate": 7.992070342468257e-06, "llm_loss": 0.0, "loss": 0.0258, "step": 136500 }, { "action_loss": 0.0217, "epoch": 12.837266146469869, "learning_rate": 7.985594329648925e-06, "llm_loss": 0.0, "loss": 0.0217, "step": 136550 }, { "action_loss": 0.0182, "epoch": 12.841966719939833, "learning_rate": 7.979119197257505e-06, "llm_loss": 0.0, "loss": 0.0182, "step": 136600 }, { "action_loss": 0.0243, "epoch": 12.846667293409796, "learning_rate": 7.97264494812405e-06, "llm_loss": 0.0, "loss": 0.0243, "step": 136650 }, { "action_loss": 0.0332, "epoch": 12.851367866879759, "learning_rate": 7.966171585078238e-06, "llm_loss": 0.0, "loss": 0.0332, "step": 136700 }, { "action_loss": 0.0211, "epoch": 12.856068440349723, "learning_rate": 7.959699110949356e-06, "llm_loss": 0.0, "loss": 0.0211, "step": 136750 }, { "action_loss": 0.0313, "epoch": 12.860769013819686, "learning_rate": 7.953227528566293e-06, "llm_loss": 0.0, "loss": 0.0313, "step": 136800 }, { "action_loss": 0.0178, "epoch": 12.865469587289649, "learning_rate": 7.946756840757564e-06, "llm_loss": 0.0, "loss": 0.0178, "step": 136850 }, { "action_loss": 0.0215, "epoch": 12.870170160759614, "learning_rate": 7.940287050351283e-06, "llm_loss": 0.0, "loss": 0.0215, "step": 136900 }, { "action_loss": 0.0221, "epoch": 12.874870734229576, "learning_rate": 7.933818160175175e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 136950 }, { "action_loss": 0.0296, "epoch": 12.879571307699539, "learning_rate": 7.927350173056571e-06, "llm_loss": 0.0, "loss": 0.0296, "step": 137000 }, { "action_loss": 0.0248, "epoch": 12.884271881169504, "learning_rate": 7.92088309182241e-06, "llm_loss": 0.0, "loss": 0.0248, "step": 137050 }, { "action_loss": 0.0357, "epoch": 12.888972454639466, "learning_rate": 7.914416919299225e-06, "llm_loss": 0.0, "loss": 0.0357, "step": 137100 }, { "action_loss": 0.0245, "epoch": 12.89367302810943, "learning_rate": 7.907951658313167e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 137150 }, { "action_loss": 0.021, "epoch": 12.898373601579392, "learning_rate": 7.901487311689974e-06, "llm_loss": 0.0, "loss": 0.021, "step": 137200 }, { "action_loss": 0.0152, "epoch": 12.903074175049356, "learning_rate": 7.895023882254996e-06, "llm_loss": 0.0, "loss": 0.0152, "step": 137250 }, { "action_loss": 0.0177, "epoch": 12.90777474851932, "learning_rate": 7.888561372833176e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 137300 }, { "action_loss": 0.0151, "epoch": 12.912475321989282, "learning_rate": 7.882099786249054e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 137350 }, { "action_loss": 0.0225, "epoch": 12.917175895459247, "learning_rate": 7.875639125326774e-06, "llm_loss": 0.0, "loss": 0.0225, "step": 137400 }, { "action_loss": 0.0143, "epoch": 12.92187646892921, "learning_rate": 7.869179392890065e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 137450 }, { "action_loss": 0.0356, "epoch": 12.926577042399172, "learning_rate": 7.862720591762257e-06, "llm_loss": 0.0, "loss": 0.0356, "step": 137500 }, { "action_loss": 0.0219, "epoch": 12.931277615869137, "learning_rate": 7.85626272476627e-06, "llm_loss": 0.0, "loss": 0.0219, "step": 137550 }, { "action_loss": 0.0306, "epoch": 12.9359781893391, "learning_rate": 7.84980579472462e-06, "llm_loss": 0.0, "loss": 0.0306, "step": 137600 }, { "action_loss": 0.021, "epoch": 12.940678762809062, "learning_rate": 7.843349804459412e-06, "llm_loss": 0.0, "loss": 0.021, "step": 137650 }, { "action_loss": 0.0169, "epoch": 12.945379336279027, "learning_rate": 7.836894756792334e-06, "llm_loss": 0.0, "loss": 0.0169, "step": 137700 }, { "action_loss": 0.0301, "epoch": 12.95007990974899, "learning_rate": 7.830440654544666e-06, "llm_loss": 0.0, "loss": 0.0301, "step": 137750 }, { "action_loss": 0.0114, "epoch": 12.954780483218952, "learning_rate": 7.82398750053728e-06, "llm_loss": 0.0, "loss": 0.0114, "step": 137800 }, { "action_loss": 0.0019, "epoch": 12.959481056688915, "learning_rate": 7.81753529759063e-06, "llm_loss": 0.0, "loss": 0.0019, "step": 137850 }, { "action_loss": 0.0057, "epoch": 12.96418163015888, "learning_rate": 7.811084048524749e-06, "llm_loss": 0.0, "loss": 0.0057, "step": 137900 }, { "action_loss": 0.0246, "epoch": 12.968882203628842, "learning_rate": 7.804633756159258e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 137950 }, { "action_loss": 0.0185, "epoch": 12.973582777098805, "learning_rate": 7.798184423313362e-06, "llm_loss": 0.0, "loss": 0.0185, "step": 138000 }, { "action_loss": 0.0283, "epoch": 12.97828335056877, "learning_rate": 7.791736052805844e-06, "llm_loss": 0.0, "loss": 0.0283, "step": 138050 }, { "action_loss": 0.0263, "epoch": 12.982983924038733, "learning_rate": 7.785288647455065e-06, "llm_loss": 0.0, "loss": 0.0263, "step": 138100 }, { "action_loss": 0.0246, "epoch": 12.987684497508695, "learning_rate": 7.77884221007897e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 138150 }, { "action_loss": 0.0274, "epoch": 12.99238507097866, "learning_rate": 7.772396743495068e-06, "llm_loss": 0.0, "loss": 0.0274, "step": 138200 }, { "action_loss": 0.0182, "epoch": 12.997085644448623, "learning_rate": 7.765952250520459e-06, "llm_loss": 0.0, "loss": 0.0182, "step": 138250 }, { "action_loss": 0.0244, "epoch": 13.001786217918585, "learning_rate": 7.759508733971807e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 138300 }, { "action_loss": 0.0182, "epoch": 13.00648679138855, "learning_rate": 7.753066196665357e-06, "llm_loss": 0.0, "loss": 0.0182, "step": 138350 }, { "action_loss": 0.014, "epoch": 13.011187364858513, "learning_rate": 7.74662464141692e-06, "llm_loss": 0.0, "loss": 0.014, "step": 138400 }, { "action_loss": 0.0184, "epoch": 13.015887938328476, "learning_rate": 7.740184071041879e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 138450 }, { "action_loss": 0.0142, "epoch": 13.02058851179844, "learning_rate": 7.733744488355191e-06, "llm_loss": 0.0, "loss": 0.0142, "step": 138500 }, { "action_loss": 0.0289, "epoch": 13.025289085268403, "learning_rate": 7.727305896171372e-06, "llm_loss": 0.0, "loss": 0.0289, "step": 138550 }, { "action_loss": 0.0284, "epoch": 13.029989658738366, "learning_rate": 7.720868297304514e-06, "llm_loss": 0.0, "loss": 0.0284, "step": 138600 }, { "action_loss": 0.0188, "epoch": 13.03469023220833, "learning_rate": 7.714431694568272e-06, "llm_loss": 0.0, "loss": 0.0188, "step": 138650 }, { "action_loss": 0.0345, "epoch": 13.039390805678293, "learning_rate": 7.70799609077586e-06, "llm_loss": 0.0, "loss": 0.0345, "step": 138700 }, { "action_loss": 0.0156, "epoch": 13.044091379148256, "learning_rate": 7.701561488740068e-06, "llm_loss": 0.0, "loss": 0.0156, "step": 138750 }, { "action_loss": 0.0301, "epoch": 13.048791952618219, "learning_rate": 7.695127891273235e-06, "llm_loss": 0.0, "loss": 0.0301, "step": 138800 }, { "action_loss": 0.0082, "epoch": 13.053492526088183, "learning_rate": 7.68869530118727e-06, "llm_loss": 0.0, "loss": 0.0082, "step": 138850 }, { "action_loss": 0.0249, "epoch": 13.058193099558146, "learning_rate": 7.682263721293638e-06, "llm_loss": 0.0, "loss": 0.0249, "step": 138900 }, { "action_loss": 0.0215, "epoch": 13.062893673028109, "learning_rate": 7.675833154403363e-06, "llm_loss": 0.0, "loss": 0.0215, "step": 138950 }, { "action_loss": 0.0279, "epoch": 13.067594246498073, "learning_rate": 7.669403603327026e-06, "llm_loss": 0.0, "loss": 0.0279, "step": 139000 }, { "action_loss": 0.0208, "epoch": 13.072294819968036, "learning_rate": 7.662975070874761e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 139050 }, { "action_loss": 0.0275, "epoch": 13.076995393437999, "learning_rate": 7.656547559856264e-06, "llm_loss": 0.0, "loss": 0.0275, "step": 139100 }, { "action_loss": 0.0445, "epoch": 13.081695966907963, "learning_rate": 7.650121073080778e-06, "llm_loss": 0.0, "loss": 0.0445, "step": 139150 }, { "action_loss": 0.0222, "epoch": 13.086396540377926, "learning_rate": 7.643695613357103e-06, "llm_loss": 0.0, "loss": 0.0222, "step": 139200 }, { "action_loss": 0.0144, "epoch": 13.091097113847889, "learning_rate": 7.637271183493587e-06, "llm_loss": 0.0, "loss": 0.0144, "step": 139250 }, { "action_loss": 0.0204, "epoch": 13.095797687317853, "learning_rate": 7.63084778629813e-06, "llm_loss": 0.0, "loss": 0.0204, "step": 139300 }, { "action_loss": 0.0354, "epoch": 13.100498260787816, "learning_rate": 7.624425424578177e-06, "llm_loss": 0.0, "loss": 0.0354, "step": 139350 }, { "action_loss": 0.0215, "epoch": 13.105198834257779, "learning_rate": 7.618004101140723e-06, "llm_loss": 0.0, "loss": 0.0215, "step": 139400 }, { "action_loss": 0.0142, "epoch": 13.109899407727744, "learning_rate": 7.611583818792311e-06, "llm_loss": 0.0, "loss": 0.0142, "step": 139450 }, { "action_loss": 0.0399, "epoch": 13.114599981197706, "learning_rate": 7.60516458033903e-06, "llm_loss": 0.0, "loss": 0.0399, "step": 139500 }, { "action_loss": 0.0046, "epoch": 13.119300554667669, "learning_rate": 7.598746388586503e-06, "llm_loss": 0.0, "loss": 0.0046, "step": 139550 }, { "action_loss": 0.0159, "epoch": 13.124001128137634, "learning_rate": 7.592329246339905e-06, "llm_loss": 0.0, "loss": 0.0159, "step": 139600 }, { "action_loss": 0.0234, "epoch": 13.128701701607596, "learning_rate": 7.58591315640395e-06, "llm_loss": 0.0, "loss": 0.0234, "step": 139650 }, { "action_loss": 0.0053, "epoch": 13.13340227507756, "learning_rate": 7.579498121582894e-06, "llm_loss": 0.0, "loss": 0.0053, "step": 139700 }, { "action_loss": 0.0109, "epoch": 13.138102848547522, "learning_rate": 7.573084144680527e-06, "llm_loss": 0.0, "loss": 0.0109, "step": 139750 }, { "action_loss": 0.0174, "epoch": 13.142803422017487, "learning_rate": 7.56667122850018e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 139800 }, { "action_loss": 0.0116, "epoch": 13.14750399548745, "learning_rate": 7.560259375844719e-06, "llm_loss": 0.0, "loss": 0.0116, "step": 139850 }, { "action_loss": 0.0183, "epoch": 13.152204568957412, "learning_rate": 7.553848589516545e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 139900 }, { "action_loss": 0.0213, "epoch": 13.156905142427377, "learning_rate": 7.547438872317596e-06, "llm_loss": 0.0, "loss": 0.0213, "step": 139950 }, { "action_loss": 0.0229, "epoch": 13.16160571589734, "learning_rate": 7.541030227049339e-06, "llm_loss": 0.0, "loss": 0.0229, "step": 140000 }, { "action_loss": 0.0154, "epoch": 13.166306289367302, "learning_rate": 7.534622656512777e-06, "llm_loss": 0.0, "loss": 0.0154, "step": 140050 }, { "action_loss": 0.0306, "epoch": 13.171006862837267, "learning_rate": 7.528216163508435e-06, "llm_loss": 0.0, "loss": 0.0306, "step": 140100 }, { "action_loss": 0.031, "epoch": 13.17570743630723, "learning_rate": 7.521810750836375e-06, "llm_loss": 0.0, "loss": 0.031, "step": 140150 }, { "action_loss": 0.0307, "epoch": 13.180408009777192, "learning_rate": 7.515406421296183e-06, "llm_loss": 0.0, "loss": 0.0307, "step": 140200 }, { "action_loss": 0.017, "epoch": 13.185108583247157, "learning_rate": 7.509003177686975e-06, "llm_loss": 0.0, "loss": 0.017, "step": 140250 }, { "action_loss": 0.0162, "epoch": 13.18980915671712, "learning_rate": 7.502601022807387e-06, "llm_loss": 0.0, "loss": 0.0162, "step": 140300 }, { "action_loss": 0.0217, "epoch": 13.194509730187082, "learning_rate": 7.496199959455584e-06, "llm_loss": 0.0, "loss": 0.0217, "step": 140350 }, { "action_loss": 0.0208, "epoch": 13.199210303657047, "learning_rate": 7.489799990429256e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 140400 }, { "action_loss": 0.0292, "epoch": 13.20391087712701, "learning_rate": 7.483401118525605e-06, "llm_loss": 0.0, "loss": 0.0292, "step": 140450 }, { "action_loss": 0.0247, "epoch": 13.208611450596973, "learning_rate": 7.47700334654136e-06, "llm_loss": 0.0, "loss": 0.0247, "step": 140500 }, { "action_loss": 0.0177, "epoch": 13.213312024066937, "learning_rate": 7.470606677272771e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 140550 }, { "action_loss": 0.0191, "epoch": 13.2180125975369, "learning_rate": 7.4642111135156e-06, "llm_loss": 0.0, "loss": 0.0191, "step": 140600 }, { "action_loss": 0.0314, "epoch": 13.222713171006863, "learning_rate": 7.4578166580651335e-06, "llm_loss": 0.0, "loss": 0.0314, "step": 140650 }, { "action_loss": 0.0382, "epoch": 13.227413744476825, "learning_rate": 7.451423313716166e-06, "llm_loss": 0.0, "loss": 0.0382, "step": 140700 }, { "action_loss": 0.0295, "epoch": 13.23211431794679, "learning_rate": 7.445031083263012e-06, "llm_loss": 0.0, "loss": 0.0295, "step": 140750 }, { "action_loss": 0.021, "epoch": 13.236814891416753, "learning_rate": 7.4386399694994986e-06, "llm_loss": 0.0, "loss": 0.021, "step": 140800 }, { "action_loss": 0.024, "epoch": 13.241515464886715, "learning_rate": 7.432249975218962e-06, "llm_loss": 0.0, "loss": 0.024, "step": 140850 }, { "action_loss": 0.0279, "epoch": 13.24621603835668, "learning_rate": 7.425861103214249e-06, "llm_loss": 0.0, "loss": 0.0279, "step": 140900 }, { "action_loss": 0.0243, "epoch": 13.250916611826643, "learning_rate": 7.419473356277718e-06, "llm_loss": 0.0, "loss": 0.0243, "step": 140950 }, { "action_loss": 0.0176, "epoch": 13.255617185296606, "learning_rate": 7.413086737201234e-06, "llm_loss": 0.0, "loss": 0.0176, "step": 141000 }, { "action_loss": 0.0113, "epoch": 13.26031775876657, "learning_rate": 7.406701248776174e-06, "llm_loss": 0.0, "loss": 0.0113, "step": 141050 }, { "action_loss": 0.0183, "epoch": 13.265018332236533, "learning_rate": 7.400316893793411e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 141100 }, { "action_loss": 0.0268, "epoch": 13.269718905706496, "learning_rate": 7.393933675043334e-06, "llm_loss": 0.0, "loss": 0.0268, "step": 141150 }, { "action_loss": 0.0221, "epoch": 13.27441947917646, "learning_rate": 7.38755159531583e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 141200 }, { "action_loss": 0.0268, "epoch": 13.279120052646423, "learning_rate": 7.381170657400281e-06, "llm_loss": 0.0, "loss": 0.0268, "step": 141250 }, { "action_loss": 0.0175, "epoch": 13.283820626116386, "learning_rate": 7.374790864085583e-06, "llm_loss": 0.0, "loss": 0.0175, "step": 141300 }, { "action_loss": 0.012, "epoch": 13.28852119958635, "learning_rate": 7.3684122181601245e-06, "llm_loss": 0.0, "loss": 0.012, "step": 141350 }, { "action_loss": 0.0148, "epoch": 13.293221773056313, "learning_rate": 7.362034722411798e-06, "llm_loss": 0.0, "loss": 0.0148, "step": 141400 }, { "action_loss": 0.0324, "epoch": 13.297922346526276, "learning_rate": 7.355658379627981e-06, "llm_loss": 0.0, "loss": 0.0324, "step": 141450 }, { "action_loss": 0.0208, "epoch": 13.302622919996239, "learning_rate": 7.3492831925955574e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 141500 }, { "action_loss": 0.0236, "epoch": 13.307323493466203, "learning_rate": 7.342909164100906e-06, "llm_loss": 0.0, "loss": 0.0236, "step": 141550 }, { "action_loss": 0.0253, "epoch": 13.312024066936166, "learning_rate": 7.336536296929895e-06, "llm_loss": 0.0, "loss": 0.0253, "step": 141600 }, { "action_loss": 0.0155, "epoch": 13.316724640406129, "learning_rate": 7.3301645938678915e-06, "llm_loss": 0.0, "loss": 0.0155, "step": 141650 }, { "action_loss": 0.024, "epoch": 13.321425213876093, "learning_rate": 7.323794057699742e-06, "llm_loss": 0.0, "loss": 0.024, "step": 141700 }, { "action_loss": 0.0255, "epoch": 13.326125787346056, "learning_rate": 7.317424691209794e-06, "llm_loss": 0.0, "loss": 0.0255, "step": 141750 }, { "action_loss": 0.0328, "epoch": 13.330826360816019, "learning_rate": 7.311056497181881e-06, "llm_loss": 0.0, "loss": 0.0328, "step": 141800 }, { "action_loss": 0.0048, "epoch": 13.335526934285983, "learning_rate": 7.3046894783993225e-06, "llm_loss": 0.0, "loss": 0.0048, "step": 141850 }, { "action_loss": 0.0278, "epoch": 13.340227507755946, "learning_rate": 7.2983236376449226e-06, "llm_loss": 0.0, "loss": 0.0278, "step": 141900 }, { "action_loss": 0.0292, "epoch": 13.344928081225909, "learning_rate": 7.291958977700978e-06, "llm_loss": 0.0, "loss": 0.0292, "step": 141950 }, { "action_loss": 0.0124, "epoch": 13.349628654695874, "learning_rate": 7.285595501349259e-06, "llm_loss": 0.0, "loss": 0.0124, "step": 142000 }, { "action_loss": 0.0235, "epoch": 13.354329228165836, "learning_rate": 7.279233211371025e-06, "llm_loss": 0.0, "loss": 0.0235, "step": 142050 }, { "action_loss": 0.0144, "epoch": 13.3590298016358, "learning_rate": 7.272872110547016e-06, "llm_loss": 0.0, "loss": 0.0144, "step": 142100 }, { "action_loss": 0.0147, "epoch": 13.363730375105764, "learning_rate": 7.266512201657452e-06, "llm_loss": 0.0, "loss": 0.0147, "step": 142150 }, { "action_loss": 0.015, "epoch": 13.368430948575726, "learning_rate": 7.260153487482034e-06, "llm_loss": 0.0, "loss": 0.015, "step": 142200 }, { "action_loss": 0.0317, "epoch": 13.37313152204569, "learning_rate": 7.253795970799935e-06, "llm_loss": 0.0, "loss": 0.0317, "step": 142250 }, { "action_loss": 0.02, "epoch": 13.377832095515654, "learning_rate": 7.247439654389814e-06, "llm_loss": 0.0, "loss": 0.02, "step": 142300 }, { "action_loss": 0.0155, "epoch": 13.382532668985617, "learning_rate": 7.241084541029792e-06, "llm_loss": 0.0, "loss": 0.0155, "step": 142350 }, { "action_loss": 0.0212, "epoch": 13.38723324245558, "learning_rate": 7.234730633497477e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 142400 }, { "action_loss": 0.0154, "epoch": 13.391933815925542, "learning_rate": 7.2283779345699455e-06, "llm_loss": 0.0, "loss": 0.0154, "step": 142450 }, { "action_loss": 0.0149, "epoch": 13.396634389395507, "learning_rate": 7.22202644702374e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 142500 }, { "action_loss": 0.0215, "epoch": 13.40133496286547, "learning_rate": 7.215676173634884e-06, "llm_loss": 0.0, "loss": 0.0215, "step": 142550 }, { "action_loss": 0.0312, "epoch": 13.406035536335432, "learning_rate": 7.2093271171788635e-06, "llm_loss": 0.0, "loss": 0.0312, "step": 142600 }, { "action_loss": 0.033, "epoch": 13.410736109805397, "learning_rate": 7.202979280430633e-06, "llm_loss": 0.0, "loss": 0.033, "step": 142650 }, { "action_loss": 0.0086, "epoch": 13.41543668327536, "learning_rate": 7.196632666164616e-06, "llm_loss": 0.0, "loss": 0.0086, "step": 142700 }, { "action_loss": 0.0163, "epoch": 13.420137256745322, "learning_rate": 7.190287277154704e-06, "llm_loss": 0.0, "loss": 0.0163, "step": 142750 }, { "action_loss": 0.0244, "epoch": 13.424837830215287, "learning_rate": 7.183943116174245e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 142800 }, { "action_loss": 0.0119, "epoch": 13.42953840368525, "learning_rate": 7.177600185996056e-06, "llm_loss": 0.0, "loss": 0.0119, "step": 142850 }, { "action_loss": 0.0115, "epoch": 13.434238977155212, "learning_rate": 7.171258489392417e-06, "llm_loss": 0.0, "loss": 0.0115, "step": 142900 }, { "action_loss": 0.0212, "epoch": 13.438939550625177, "learning_rate": 7.164918029135064e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 142950 }, { "action_loss": 0.0221, "epoch": 13.44364012409514, "learning_rate": 7.158578807995199e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 143000 }, { "action_loss": 0.0267, "epoch": 13.448340697565103, "learning_rate": 7.1522408287434774e-06, "llm_loss": 0.0, "loss": 0.0267, "step": 143050 }, { "action_loss": 0.0281, "epoch": 13.453041271035067, "learning_rate": 7.145904094150015e-06, "llm_loss": 0.0, "loss": 0.0281, "step": 143100 }, { "action_loss": 0.0183, "epoch": 13.45774184450503, "learning_rate": 7.139568606984379e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 143150 }, { "action_loss": 0.0113, "epoch": 13.462442417974993, "learning_rate": 7.133234370015596e-06, "llm_loss": 0.0, "loss": 0.0113, "step": 143200 }, { "action_loss": 0.0218, "epoch": 13.467142991444955, "learning_rate": 7.126901386012148e-06, "llm_loss": 0.0, "loss": 0.0218, "step": 143250 }, { "action_loss": 0.0151, "epoch": 13.47184356491492, "learning_rate": 7.120569657741961e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 143300 }, { "action_loss": 0.0178, "epoch": 13.476544138384883, "learning_rate": 7.114239187972416e-06, "llm_loss": 0.0, "loss": 0.0178, "step": 143350 }, { "action_loss": 0.0122, "epoch": 13.481244711854846, "learning_rate": 7.107909979470349e-06, "llm_loss": 0.0, "loss": 0.0122, "step": 143400 }, { "action_loss": 0.0172, "epoch": 13.48594528532481, "learning_rate": 7.101582035002039e-06, "llm_loss": 0.0, "loss": 0.0172, "step": 143450 }, { "action_loss": 0.0244, "epoch": 13.490645858794773, "learning_rate": 7.095255357333215e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 143500 }, { "action_loss": 0.0119, "epoch": 13.495346432264736, "learning_rate": 7.088929949229054e-06, "llm_loss": 0.0, "loss": 0.0119, "step": 143550 }, { "action_loss": 0.0216, "epoch": 13.5000470057347, "learning_rate": 7.082605813454172e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 143600 }, { "action_loss": 0.0152, "epoch": 13.504747579204663, "learning_rate": 7.076282952772634e-06, "llm_loss": 0.0, "loss": 0.0152, "step": 143650 }, { "action_loss": 0.0314, "epoch": 13.509448152674626, "learning_rate": 7.069961369947947e-06, "llm_loss": 0.0, "loss": 0.0314, "step": 143700 }, { "action_loss": 0.0115, "epoch": 13.51414872614459, "learning_rate": 7.063641067743059e-06, "llm_loss": 0.0, "loss": 0.0115, "step": 143750 }, { "action_loss": 0.0307, "epoch": 13.518849299614553, "learning_rate": 7.057322048920356e-06, "llm_loss": 0.0, "loss": 0.0307, "step": 143800 }, { "action_loss": 0.0245, "epoch": 13.523549873084516, "learning_rate": 7.051004316241672e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 143850 }, { "action_loss": 0.0185, "epoch": 13.52825044655448, "learning_rate": 7.044687872468263e-06, "llm_loss": 0.0, "loss": 0.0185, "step": 143900 }, { "action_loss": 0.0234, "epoch": 13.532951020024443, "learning_rate": 7.0383727203608356e-06, "llm_loss": 0.0, "loss": 0.0234, "step": 143950 }, { "action_loss": 0.03, "epoch": 13.537651593494406, "learning_rate": 7.032058862679527e-06, "llm_loss": 0.0, "loss": 0.03, "step": 144000 }, { "action_loss": 0.016, "epoch": 13.54235216696437, "learning_rate": 7.025746302183907e-06, "llm_loss": 0.0, "loss": 0.016, "step": 144050 }, { "action_loss": 0.0308, "epoch": 13.547052740434333, "learning_rate": 7.019435041632982e-06, "llm_loss": 0.0, "loss": 0.0308, "step": 144100 }, { "action_loss": 0.0179, "epoch": 13.551753313904296, "learning_rate": 7.013125083785193e-06, "llm_loss": 0.0, "loss": 0.0179, "step": 144150 }, { "action_loss": 0.0242, "epoch": 13.55645388737426, "learning_rate": 7.006816431398398e-06, "llm_loss": 0.0, "loss": 0.0242, "step": 144200 }, { "action_loss": 0.0242, "epoch": 13.561154460844223, "learning_rate": 7.0005090872298955e-06, "llm_loss": 0.0, "loss": 0.0242, "step": 144250 }, { "action_loss": 0.012, "epoch": 13.565855034314186, "learning_rate": 6.994203054036415e-06, "llm_loss": 0.0, "loss": 0.012, "step": 144300 }, { "action_loss": 0.035, "epoch": 13.570555607784149, "learning_rate": 6.987898334574103e-06, "llm_loss": 0.0, "loss": 0.035, "step": 144350 }, { "action_loss": 0.0142, "epoch": 13.575256181254113, "learning_rate": 6.981594931598539e-06, "llm_loss": 0.0, "loss": 0.0142, "step": 144400 }, { "action_loss": 0.0285, "epoch": 13.579956754724076, "learning_rate": 6.975292847864725e-06, "llm_loss": 0.0, "loss": 0.0285, "step": 144450 }, { "action_loss": 0.0108, "epoch": 13.584657328194039, "learning_rate": 6.968992086127089e-06, "llm_loss": 0.0, "loss": 0.0108, "step": 144500 }, { "action_loss": 0.012, "epoch": 13.589357901664004, "learning_rate": 6.962692649139474e-06, "llm_loss": 0.0, "loss": 0.012, "step": 144550 }, { "action_loss": 0.024, "epoch": 13.594058475133966, "learning_rate": 6.956394539655153e-06, "llm_loss": 0.0, "loss": 0.024, "step": 144600 }, { "action_loss": 0.023, "epoch": 13.59875904860393, "learning_rate": 6.950097760426814e-06, "llm_loss": 0.0, "loss": 0.023, "step": 144650 }, { "action_loss": 0.0161, "epoch": 13.603459622073894, "learning_rate": 6.94380231420656e-06, "llm_loss": 0.0, "loss": 0.0161, "step": 144700 }, { "action_loss": 0.0148, "epoch": 13.608160195543856, "learning_rate": 6.93750820374592e-06, "llm_loss": 0.0, "loss": 0.0148, "step": 144750 }, { "action_loss": 0.024, "epoch": 13.61286076901382, "learning_rate": 6.931215431795834e-06, "llm_loss": 0.0, "loss": 0.024, "step": 144800 }, { "action_loss": 0.0139, "epoch": 13.617561342483784, "learning_rate": 6.924924001106655e-06, "llm_loss": 0.0, "loss": 0.0139, "step": 144850 }, { "action_loss": 0.0112, "epoch": 13.622261915953747, "learning_rate": 6.918633914428154e-06, "llm_loss": 0.0, "loss": 0.0112, "step": 144900 }, { "action_loss": 0.0252, "epoch": 13.62696248942371, "learning_rate": 6.912345174509517e-06, "llm_loss": 0.0, "loss": 0.0252, "step": 144950 }, { "action_loss": 0.0157, "epoch": 13.631663062893672, "learning_rate": 6.9060577840993316e-06, "llm_loss": 0.0, "loss": 0.0157, "step": 145000 }, { "action_loss": 0.021, "epoch": 13.636363636363637, "learning_rate": 6.899771745945601e-06, "llm_loss": 0.0, "loss": 0.021, "step": 145050 }, { "action_loss": 0.0149, "epoch": 13.6410642098336, "learning_rate": 6.893487062795748e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 145100 }, { "action_loss": 0.021, "epoch": 13.645764783303562, "learning_rate": 6.887203737396577e-06, "llm_loss": 0.0, "loss": 0.021, "step": 145150 }, { "action_loss": 0.015, "epoch": 13.650465356773527, "learning_rate": 6.880921772494325e-06, "llm_loss": 0.0, "loss": 0.015, "step": 145200 }, { "action_loss": 0.0151, "epoch": 13.65516593024349, "learning_rate": 6.874641170834624e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 145250 }, { "action_loss": 0.0213, "epoch": 13.659866503713452, "learning_rate": 6.8683619351625065e-06, "llm_loss": 0.0, "loss": 0.0213, "step": 145300 }, { "action_loss": 0.0225, "epoch": 13.664567077183417, "learning_rate": 6.862084068222414e-06, "llm_loss": 0.0, "loss": 0.0225, "step": 145350 }, { "action_loss": 0.0251, "epoch": 13.66926765065338, "learning_rate": 6.855807572758189e-06, "llm_loss": 0.0, "loss": 0.0251, "step": 145400 }, { "action_loss": 0.0335, "epoch": 13.673968224123342, "learning_rate": 6.8495324515130744e-06, "llm_loss": 0.0, "loss": 0.0335, "step": 145450 }, { "action_loss": 0.0182, "epoch": 13.678668797593307, "learning_rate": 6.8432587072297065e-06, "llm_loss": 0.0, "loss": 0.0182, "step": 145500 }, { "action_loss": 0.0113, "epoch": 13.68336937106327, "learning_rate": 6.836986342650127e-06, "llm_loss": 0.0, "loss": 0.0113, "step": 145550 }, { "action_loss": 0.0153, "epoch": 13.688069944533233, "learning_rate": 6.830715360515773e-06, "llm_loss": 0.0, "loss": 0.0153, "step": 145600 }, { "action_loss": 0.0263, "epoch": 13.692770518003197, "learning_rate": 6.824445763567475e-06, "llm_loss": 0.0, "loss": 0.0263, "step": 145650 }, { "action_loss": 0.0107, "epoch": 13.69747109147316, "learning_rate": 6.818177554545462e-06, "llm_loss": 0.0, "loss": 0.0107, "step": 145700 }, { "action_loss": 0.0269, "epoch": 13.702171664943123, "learning_rate": 6.811910736189353e-06, "llm_loss": 0.0, "loss": 0.0269, "step": 145750 }, { "action_loss": 0.0178, "epoch": 13.706872238413087, "learning_rate": 6.80564531123816e-06, "llm_loss": 0.0, "loss": 0.0178, "step": 145800 }, { "action_loss": 0.0082, "epoch": 13.71157281188305, "learning_rate": 6.799381282430284e-06, "llm_loss": 0.0, "loss": 0.0082, "step": 145850 }, { "action_loss": 0.025, "epoch": 13.716273385353013, "learning_rate": 6.793118652503522e-06, "llm_loss": 0.0, "loss": 0.025, "step": 145900 }, { "action_loss": 0.0341, "epoch": 13.720973958822977, "learning_rate": 6.786857424195052e-06, "llm_loss": 0.0, "loss": 0.0341, "step": 145950 }, { "action_loss": 0.011, "epoch": 13.72567453229294, "learning_rate": 6.780597600241452e-06, "llm_loss": 0.0, "loss": 0.011, "step": 146000 }, { "action_loss": 0.0208, "epoch": 13.730375105762903, "learning_rate": 6.774339183378663e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 146050 }, { "action_loss": 0.0209, "epoch": 13.735075679232866, "learning_rate": 6.768082176342034e-06, "llm_loss": 0.0, "loss": 0.0209, "step": 146100 }, { "action_loss": 0.0231, "epoch": 13.73977625270283, "learning_rate": 6.761826581866287e-06, "llm_loss": 0.0, "loss": 0.0231, "step": 146150 }, { "action_loss": 0.0251, "epoch": 13.744476826172793, "learning_rate": 6.755572402685531e-06, "llm_loss": 0.0, "loss": 0.0251, "step": 146200 }, { "action_loss": 0.0152, "epoch": 13.749177399642756, "learning_rate": 6.74931964153325e-06, "llm_loss": 0.0, "loss": 0.0152, "step": 146250 }, { "action_loss": 0.016, "epoch": 13.75387797311272, "learning_rate": 6.7430683011423125e-06, "llm_loss": 0.0, "loss": 0.016, "step": 146300 }, { "action_loss": 0.0224, "epoch": 13.758578546582683, "learning_rate": 6.73681838424497e-06, "llm_loss": 0.0, "loss": 0.0224, "step": 146350 }, { "action_loss": 0.0322, "epoch": 13.763279120052646, "learning_rate": 6.7305698935728445e-06, "llm_loss": 0.0, "loss": 0.0322, "step": 146400 }, { "action_loss": 0.0291, "epoch": 13.76797969352261, "learning_rate": 6.7243228318569395e-06, "llm_loss": 0.0, "loss": 0.0291, "step": 146450 }, { "action_loss": 0.0115, "epoch": 13.772680266992573, "learning_rate": 6.718077201827631e-06, "llm_loss": 0.0, "loss": 0.0115, "step": 146500 }, { "action_loss": 0.0255, "epoch": 13.777380840462536, "learning_rate": 6.7118330062146715e-06, "llm_loss": 0.0, "loss": 0.0255, "step": 146550 }, { "action_loss": 0.023, "epoch": 13.7820814139325, "learning_rate": 6.705590247747187e-06, "llm_loss": 0.0, "loss": 0.023, "step": 146600 }, { "action_loss": 0.0275, "epoch": 13.786781987402463, "learning_rate": 6.699348929153668e-06, "llm_loss": 0.0, "loss": 0.0275, "step": 146650 }, { "action_loss": 0.015, "epoch": 13.791482560872426, "learning_rate": 6.693109053161989e-06, "llm_loss": 0.0, "loss": 0.015, "step": 146700 }, { "action_loss": 0.0278, "epoch": 13.796183134342389, "learning_rate": 6.686870622499382e-06, "llm_loss": 0.0, "loss": 0.0278, "step": 146750 }, { "action_loss": 0.0298, "epoch": 13.800883707812353, "learning_rate": 6.680633639892453e-06, "llm_loss": 0.0, "loss": 0.0298, "step": 146800 }, { "action_loss": 0.0169, "epoch": 13.805584281282316, "learning_rate": 6.6743981080671785e-06, "llm_loss": 0.0, "loss": 0.0169, "step": 146850 }, { "action_loss": 0.0178, "epoch": 13.810284854752279, "learning_rate": 6.66816402974889e-06, "llm_loss": 0.0, "loss": 0.0178, "step": 146900 }, { "action_loss": 0.0172, "epoch": 13.814985428222244, "learning_rate": 6.661931407662292e-06, "llm_loss": 0.0, "loss": 0.0172, "step": 146950 }, { "action_loss": 0.0244, "epoch": 13.819686001692206, "learning_rate": 6.655700244531453e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 147000 }, { "action_loss": 0.0249, "epoch": 13.824386575162169, "learning_rate": 6.649470543079799e-06, "llm_loss": 0.0, "loss": 0.0249, "step": 147050 }, { "action_loss": 0.0277, "epoch": 13.829087148632134, "learning_rate": 6.64324230603012e-06, "llm_loss": 0.0, "loss": 0.0277, "step": 147100 }, { "action_loss": 0.0189, "epoch": 13.833787722102096, "learning_rate": 6.637015536104565e-06, "llm_loss": 0.0, "loss": 0.0189, "step": 147150 }, { "action_loss": 0.0156, "epoch": 13.83848829557206, "learning_rate": 6.630790236024644e-06, "llm_loss": 0.0, "loss": 0.0156, "step": 147200 }, { "action_loss": 0.0207, "epoch": 13.843188869042024, "learning_rate": 6.6245664085112235e-06, "llm_loss": 0.0, "loss": 0.0207, "step": 147250 }, { "action_loss": 0.0145, "epoch": 13.847889442511987, "learning_rate": 6.618344056284525e-06, "llm_loss": 0.0, "loss": 0.0145, "step": 147300 }, { "action_loss": 0.0114, "epoch": 13.85259001598195, "learning_rate": 6.612123182064128e-06, "llm_loss": 0.0, "loss": 0.0114, "step": 147350 }, { "action_loss": 0.0311, "epoch": 13.857290589451914, "learning_rate": 6.605903788568962e-06, "llm_loss": 0.0, "loss": 0.0311, "step": 147400 }, { "action_loss": 0.0135, "epoch": 13.861991162921877, "learning_rate": 6.5996858785173105e-06, "llm_loss": 0.0, "loss": 0.0135, "step": 147450 }, { "action_loss": 0.0293, "epoch": 13.86669173639184, "learning_rate": 6.5934694546268095e-06, "llm_loss": 0.0, "loss": 0.0293, "step": 147500 }, { "action_loss": 0.0177, "epoch": 13.871392309861804, "learning_rate": 6.587254519614447e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 147550 }, { "action_loss": 0.0295, "epoch": 13.876092883331767, "learning_rate": 6.581041076196561e-06, "llm_loss": 0.0, "loss": 0.0295, "step": 147600 }, { "action_loss": 0.0238, "epoch": 13.88079345680173, "learning_rate": 6.574829127088834e-06, "llm_loss": 0.0, "loss": 0.0238, "step": 147650 }, { "action_loss": 0.0283, "epoch": 13.885494030271694, "learning_rate": 6.5686186750062945e-06, "llm_loss": 0.0, "loss": 0.0283, "step": 147700 }, { "action_loss": 0.0243, "epoch": 13.890194603741657, "learning_rate": 6.562409722663319e-06, "llm_loss": 0.0, "loss": 0.0243, "step": 147750 }, { "action_loss": 0.021, "epoch": 13.89489517721162, "learning_rate": 6.5562022727736305e-06, "llm_loss": 0.0, "loss": 0.021, "step": 147800 }, { "action_loss": 0.0264, "epoch": 13.899595750681582, "learning_rate": 6.549996328050296e-06, "llm_loss": 0.0, "loss": 0.0264, "step": 147850 }, { "action_loss": 0.0346, "epoch": 13.904296324151547, "learning_rate": 6.543791891205715e-06, "llm_loss": 0.0, "loss": 0.0346, "step": 147900 }, { "action_loss": 0.028, "epoch": 13.90899689762151, "learning_rate": 6.53758896495164e-06, "llm_loss": 0.0, "loss": 0.028, "step": 147950 }, { "action_loss": 0.0186, "epoch": 13.913697471091472, "learning_rate": 6.531387551999155e-06, "llm_loss": 0.0, "loss": 0.0186, "step": 148000 }, { "action_loss": 0.0113, "epoch": 13.918398044561437, "learning_rate": 6.525187655058687e-06, "llm_loss": 0.0, "loss": 0.0113, "step": 148050 }, { "action_loss": 0.0308, "epoch": 13.9230986180314, "learning_rate": 6.518989276840003e-06, "llm_loss": 0.0, "loss": 0.0308, "step": 148100 }, { "action_loss": 0.0218, "epoch": 13.927799191501363, "learning_rate": 6.5127924200521965e-06, "llm_loss": 0.0, "loss": 0.0218, "step": 148150 }, { "action_loss": 0.0282, "epoch": 13.932499764971327, "learning_rate": 6.506597087403703e-06, "llm_loss": 0.0, "loss": 0.0282, "step": 148200 }, { "action_loss": 0.0136, "epoch": 13.93720033844129, "learning_rate": 6.500403281602295e-06, "llm_loss": 0.0, "loss": 0.0136, "step": 148250 }, { "action_loss": 0.0396, "epoch": 13.941900911911253, "learning_rate": 6.494211005355069e-06, "llm_loss": 0.0, "loss": 0.0396, "step": 148300 }, { "action_loss": 0.0146, "epoch": 13.946601485381217, "learning_rate": 6.4880202613684586e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 148350 }, { "action_loss": 0.0141, "epoch": 13.95130205885118, "learning_rate": 6.481831052348228e-06, "llm_loss": 0.0, "loss": 0.0141, "step": 148400 }, { "action_loss": 0.0309, "epoch": 13.956002632321143, "learning_rate": 6.475643380999469e-06, "llm_loss": 0.0, "loss": 0.0309, "step": 148450 }, { "action_loss": 0.015, "epoch": 13.960703205791106, "learning_rate": 6.469457250026599e-06, "llm_loss": 0.0, "loss": 0.015, "step": 148500 }, { "action_loss": 0.0217, "epoch": 13.96540377926107, "learning_rate": 6.463272662133366e-06, "llm_loss": 0.0, "loss": 0.0217, "step": 148550 }, { "action_loss": 0.0073, "epoch": 13.970104352731033, "learning_rate": 6.4570896200228415e-06, "llm_loss": 0.0, "loss": 0.0073, "step": 148600 }, { "action_loss": 0.0178, "epoch": 13.974804926200996, "learning_rate": 6.450908126397424e-06, "llm_loss": 0.0, "loss": 0.0178, "step": 148650 }, { "action_loss": 0.0239, "epoch": 13.97950549967096, "learning_rate": 6.444728183958831e-06, "llm_loss": 0.0, "loss": 0.0239, "step": 148700 }, { "action_loss": 0.0252, "epoch": 13.984206073140923, "learning_rate": 6.438549795408107e-06, "llm_loss": 0.0, "loss": 0.0252, "step": 148750 }, { "action_loss": 0.0244, "epoch": 13.988906646610886, "learning_rate": 6.432372963445612e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 148800 }, { "action_loss": 0.0184, "epoch": 13.99360722008085, "learning_rate": 6.4261976907710264e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 148850 }, { "action_loss": 0.0174, "epoch": 13.998307793550813, "learning_rate": 6.420023980083356e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 148900 }, { "action_loss": 0.0184, "epoch": 14.003008367020776, "learning_rate": 6.4138518340809155e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 148950 }, { "action_loss": 0.0205, "epoch": 14.00770894049074, "learning_rate": 6.407681255461339e-06, "llm_loss": 0.0, "loss": 0.0205, "step": 149000 }, { "action_loss": 0.0173, "epoch": 14.012409513960703, "learning_rate": 6.401512246921576e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 149050 }, { "action_loss": 0.0123, "epoch": 14.017110087430666, "learning_rate": 6.395344811157891e-06, "llm_loss": 0.0, "loss": 0.0123, "step": 149100 }, { "action_loss": 0.025, "epoch": 14.02181066090063, "learning_rate": 6.389178950865857e-06, "llm_loss": 0.0, "loss": 0.025, "step": 149150 }, { "action_loss": 0.0241, "epoch": 14.026511234370593, "learning_rate": 6.3830146687403635e-06, "llm_loss": 0.0, "loss": 0.0241, "step": 149200 }, { "action_loss": 0.0135, "epoch": 14.031211807840556, "learning_rate": 6.376851967475608e-06, "llm_loss": 0.0, "loss": 0.0135, "step": 149250 }, { "action_loss": 0.0292, "epoch": 14.03591238131052, "learning_rate": 6.3706908497650935e-06, "llm_loss": 0.0, "loss": 0.0292, "step": 149300 }, { "action_loss": 0.016, "epoch": 14.040612954780483, "learning_rate": 6.3645313183016345e-06, "llm_loss": 0.0, "loss": 0.016, "step": 149350 }, { "action_loss": 0.0117, "epoch": 14.045313528250446, "learning_rate": 6.358373375777354e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 149400 }, { "action_loss": 0.0225, "epoch": 14.05001410172041, "learning_rate": 6.352217024883678e-06, "llm_loss": 0.0, "loss": 0.0225, "step": 149450 }, { "action_loss": 0.0219, "epoch": 14.054714675190374, "learning_rate": 6.346062268311336e-06, "llm_loss": 0.0, "loss": 0.0219, "step": 149500 }, { "action_loss": 0.0198, "epoch": 14.059415248660336, "learning_rate": 6.339909108750364e-06, "llm_loss": 0.0, "loss": 0.0198, "step": 149550 }, { "action_loss": 0.0181, "epoch": 14.064115822130299, "learning_rate": 6.333757548890095e-06, "llm_loss": 0.0, "loss": 0.0181, "step": 149600 }, { "action_loss": 0.018, "epoch": 14.068816395600264, "learning_rate": 6.327607591419167e-06, "llm_loss": 0.0, "loss": 0.018, "step": 149650 }, { "action_loss": 0.0267, "epoch": 14.073516969070226, "learning_rate": 6.321459239025516e-06, "llm_loss": 0.0, "loss": 0.0267, "step": 149700 }, { "action_loss": 0.0258, "epoch": 14.07821754254019, "learning_rate": 6.315312494396382e-06, "llm_loss": 0.0, "loss": 0.0258, "step": 149750 }, { "action_loss": 0.0281, "epoch": 14.082918116010154, "learning_rate": 6.309167360218287e-06, "llm_loss": 0.0, "loss": 0.0281, "step": 149800 }, { "action_loss": 0.0177, "epoch": 14.087618689480117, "learning_rate": 6.303023839177065e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 149850 }, { "action_loss": 0.0334, "epoch": 14.09231926295008, "learning_rate": 6.296881933957838e-06, "llm_loss": 0.0, "loss": 0.0334, "step": 149900 }, { "action_loss": 0.0248, "epoch": 14.097019836420044, "learning_rate": 6.290741647245024e-06, "llm_loss": 0.0, "loss": 0.0248, "step": 149950 }, { "action_loss": 0.0321, "epoch": 14.101720409890007, "learning_rate": 6.284602981722334e-06, "llm_loss": 0.0, "loss": 0.0321, "step": 150000 }, { "action_loss": 0.0075, "epoch": 14.10642098335997, "learning_rate": 6.278465940072767e-06, "llm_loss": 0.0, "loss": 0.0075, "step": 150050 }, { "action_loss": 0.0212, "epoch": 14.111121556829934, "learning_rate": 6.272330524978613e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 150100 }, { "action_loss": 0.0202, "epoch": 14.115822130299897, "learning_rate": 6.266196739121453e-06, "llm_loss": 0.0, "loss": 0.0202, "step": 150150 }, { "action_loss": 0.0262, "epoch": 14.12052270376986, "learning_rate": 6.260064585182157e-06, "llm_loss": 0.0, "loss": 0.0262, "step": 150200 }, { "action_loss": 0.0181, "epoch": 14.125223277239824, "learning_rate": 6.25393406584088e-06, "llm_loss": 0.0, "loss": 0.0181, "step": 150250 }, { "action_loss": 0.005, "epoch": 14.129923850709787, "learning_rate": 6.24780518377706e-06, "llm_loss": 0.0, "loss": 0.005, "step": 150300 }, { "action_loss": 0.0266, "epoch": 14.13462442417975, "learning_rate": 6.241677941669428e-06, "llm_loss": 0.0, "loss": 0.0266, "step": 150350 }, { "action_loss": 0.0165, "epoch": 14.139324997649712, "learning_rate": 6.235552342195985e-06, "llm_loss": 0.0, "loss": 0.0165, "step": 150400 }, { "action_loss": 0.0136, "epoch": 14.144025571119677, "learning_rate": 6.2294283880340246e-06, "llm_loss": 0.0, "loss": 0.0136, "step": 150450 }, { "action_loss": 0.0308, "epoch": 14.14872614458964, "learning_rate": 6.223306081860117e-06, "llm_loss": 0.0, "loss": 0.0308, "step": 150500 }, { "action_loss": 0.0156, "epoch": 14.153426718059603, "learning_rate": 6.217185426350113e-06, "llm_loss": 0.0, "loss": 0.0156, "step": 150550 }, { "action_loss": 0.0151, "epoch": 14.158127291529567, "learning_rate": 6.211066424179144e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 150600 }, { "action_loss": 0.0242, "epoch": 14.16282786499953, "learning_rate": 6.20494907802162e-06, "llm_loss": 0.0, "loss": 0.0242, "step": 150650 }, { "action_loss": 0.0177, "epoch": 14.167528438469493, "learning_rate": 6.198833390551214e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 150700 }, { "action_loss": 0.0255, "epoch": 14.172229011939457, "learning_rate": 6.192719364440891e-06, "llm_loss": 0.0, "loss": 0.0255, "step": 150750 }, { "action_loss": 0.0176, "epoch": 14.17692958540942, "learning_rate": 6.186607002362883e-06, "llm_loss": 0.0, "loss": 0.0176, "step": 150800 }, { "action_loss": 0.0176, "epoch": 14.181630158879383, "learning_rate": 6.180496306988693e-06, "llm_loss": 0.0, "loss": 0.0176, "step": 150850 }, { "action_loss": 0.0249, "epoch": 14.186330732349347, "learning_rate": 6.174387280989096e-06, "llm_loss": 0.0, "loss": 0.0249, "step": 150900 }, { "action_loss": 0.0278, "epoch": 14.19103130581931, "learning_rate": 6.16827992703414e-06, "llm_loss": 0.0, "loss": 0.0278, "step": 150950 }, { "action_loss": 0.0179, "epoch": 14.195731879289273, "learning_rate": 6.162174247793141e-06, "llm_loss": 0.0, "loss": 0.0179, "step": 151000 }, { "action_loss": 0.0259, "epoch": 14.200432452759237, "learning_rate": 6.1560702459346845e-06, "llm_loss": 0.0, "loss": 0.0259, "step": 151050 }, { "action_loss": 0.0206, "epoch": 14.2051330262292, "learning_rate": 6.14996792412662e-06, "llm_loss": 0.0, "loss": 0.0206, "step": 151100 }, { "action_loss": 0.0208, "epoch": 14.209833599699163, "learning_rate": 6.143867285036061e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 151150 }, { "action_loss": 0.0142, "epoch": 14.214534173169127, "learning_rate": 6.137768331329392e-06, "llm_loss": 0.0, "loss": 0.0142, "step": 151200 }, { "action_loss": 0.0181, "epoch": 14.21923474663909, "learning_rate": 6.131671065672256e-06, "llm_loss": 0.0, "loss": 0.0181, "step": 151250 }, { "action_loss": 0.0143, "epoch": 14.223935320109053, "learning_rate": 6.125575490729561e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 151300 }, { "action_loss": 0.022, "epoch": 14.228635893579016, "learning_rate": 6.119481609165472e-06, "llm_loss": 0.0, "loss": 0.022, "step": 151350 }, { "action_loss": 0.0337, "epoch": 14.23333646704898, "learning_rate": 6.1133894236434164e-06, "llm_loss": 0.0, "loss": 0.0337, "step": 151400 }, { "action_loss": 0.0307, "epoch": 14.238037040518943, "learning_rate": 6.107298936826086e-06, "llm_loss": 0.0, "loss": 0.0307, "step": 151450 }, { "action_loss": 0.0186, "epoch": 14.242737613988906, "learning_rate": 6.101210151375417e-06, "llm_loss": 0.0, "loss": 0.0186, "step": 151500 }, { "action_loss": 0.0271, "epoch": 14.24743818745887, "learning_rate": 6.095123069952614e-06, "llm_loss": 0.0, "loss": 0.0271, "step": 151550 }, { "action_loss": 0.0222, "epoch": 14.252138760928833, "learning_rate": 6.089037695218135e-06, "llm_loss": 0.0, "loss": 0.0222, "step": 151600 }, { "action_loss": 0.0272, "epoch": 14.256839334398796, "learning_rate": 6.082954029831682e-06, "llm_loss": 0.0, "loss": 0.0272, "step": 151650 }, { "action_loss": 0.0149, "epoch": 14.26153990786876, "learning_rate": 6.07687207645222e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 151700 }, { "action_loss": 0.0155, "epoch": 14.266240481338723, "learning_rate": 6.070791837737965e-06, "llm_loss": 0.0, "loss": 0.0155, "step": 151750 }, { "action_loss": 0.027, "epoch": 14.270941054808686, "learning_rate": 6.06471331634638e-06, "llm_loss": 0.0, "loss": 0.027, "step": 151800 }, { "action_loss": 0.0238, "epoch": 14.27564162827865, "learning_rate": 6.058636514934181e-06, "llm_loss": 0.0, "loss": 0.0238, "step": 151850 }, { "action_loss": 0.0346, "epoch": 14.280342201748613, "learning_rate": 6.052561436157329e-06, "llm_loss": 0.0, "loss": 0.0346, "step": 151900 }, { "action_loss": 0.024, "epoch": 14.285042775218576, "learning_rate": 6.046488082671034e-06, "llm_loss": 0.0, "loss": 0.024, "step": 151950 }, { "action_loss": 0.0235, "epoch": 14.28974334868854, "learning_rate": 6.04041645712975e-06, "llm_loss": 0.0, "loss": 0.0235, "step": 152000 }, { "action_loss": 0.0224, "epoch": 14.294443922158504, "learning_rate": 6.0343465621871774e-06, "llm_loss": 0.0, "loss": 0.0224, "step": 152050 }, { "action_loss": 0.0209, "epoch": 14.299144495628466, "learning_rate": 6.028278400496261e-06, "llm_loss": 0.0, "loss": 0.0209, "step": 152100 }, { "action_loss": 0.0181, "epoch": 14.30384506909843, "learning_rate": 6.022211974709187e-06, "llm_loss": 0.0, "loss": 0.0181, "step": 152150 }, { "action_loss": 0.0245, "epoch": 14.308545642568394, "learning_rate": 6.016147287477382e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 152200 }, { "action_loss": 0.0236, "epoch": 14.313246216038356, "learning_rate": 6.010084341451516e-06, "llm_loss": 0.0, "loss": 0.0236, "step": 152250 }, { "action_loss": 0.0309, "epoch": 14.31794678950832, "learning_rate": 6.004023139281492e-06, "llm_loss": 0.0, "loss": 0.0309, "step": 152300 }, { "action_loss": 0.0154, "epoch": 14.322647362978284, "learning_rate": 5.997963683616454e-06, "llm_loss": 0.0, "loss": 0.0154, "step": 152350 }, { "action_loss": 0.0175, "epoch": 14.327347936448247, "learning_rate": 5.991905977104788e-06, "llm_loss": 0.0, "loss": 0.0175, "step": 152400 }, { "action_loss": 0.0078, "epoch": 14.33204850991821, "learning_rate": 5.9858500223941066e-06, "llm_loss": 0.0, "loss": 0.0078, "step": 152450 }, { "action_loss": 0.0195, "epoch": 14.336749083388174, "learning_rate": 5.979795822131267e-06, "llm_loss": 0.0, "loss": 0.0195, "step": 152500 }, { "action_loss": 0.0214, "epoch": 14.341449656858137, "learning_rate": 5.973743378962343e-06, "llm_loss": 0.0, "loss": 0.0214, "step": 152550 }, { "action_loss": 0.0272, "epoch": 14.3461502303281, "learning_rate": 5.967692695532657e-06, "llm_loss": 0.0, "loss": 0.0272, "step": 152600 }, { "action_loss": 0.0217, "epoch": 14.350850803798064, "learning_rate": 5.961643774486754e-06, "llm_loss": 0.0, "loss": 0.0217, "step": 152650 }, { "action_loss": 0.0182, "epoch": 14.355551377268027, "learning_rate": 5.955596618468413e-06, "llm_loss": 0.0, "loss": 0.0182, "step": 152700 }, { "action_loss": 0.0194, "epoch": 14.36025195073799, "learning_rate": 5.949551230120638e-06, "llm_loss": 0.0, "loss": 0.0194, "step": 152750 }, { "action_loss": 0.0282, "epoch": 14.364952524207954, "learning_rate": 5.943507612085661e-06, "llm_loss": 0.0, "loss": 0.0282, "step": 152800 }, { "action_loss": 0.0142, "epoch": 14.369653097677917, "learning_rate": 5.937465767004942e-06, "llm_loss": 0.0, "loss": 0.0142, "step": 152850 }, { "action_loss": 0.0153, "epoch": 14.37435367114788, "learning_rate": 5.931425697519166e-06, "llm_loss": 0.0, "loss": 0.0153, "step": 152900 }, { "action_loss": 0.0216, "epoch": 14.379054244617844, "learning_rate": 5.9253874062682396e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 152950 }, { "action_loss": 0.0263, "epoch": 14.383754818087807, "learning_rate": 5.9193508958912945e-06, "llm_loss": 0.0, "loss": 0.0263, "step": 153000 }, { "action_loss": 0.0172, "epoch": 14.38845539155777, "learning_rate": 5.913316169026683e-06, "llm_loss": 0.0, "loss": 0.0172, "step": 153050 }, { "action_loss": 0.0182, "epoch": 14.393155965027733, "learning_rate": 5.9072832283119765e-06, "llm_loss": 0.0, "loss": 0.0182, "step": 153100 }, { "action_loss": 0.0163, "epoch": 14.397856538497697, "learning_rate": 5.90125207638397e-06, "llm_loss": 0.0, "loss": 0.0163, "step": 153150 }, { "action_loss": 0.0122, "epoch": 14.40255711196766, "learning_rate": 5.89522271587867e-06, "llm_loss": 0.0, "loss": 0.0122, "step": 153200 }, { "action_loss": 0.0206, "epoch": 14.407257685437623, "learning_rate": 5.8891951494313096e-06, "llm_loss": 0.0, "loss": 0.0206, "step": 153250 }, { "action_loss": 0.0211, "epoch": 14.411958258907587, "learning_rate": 5.883169379676328e-06, "llm_loss": 0.0, "loss": 0.0211, "step": 153300 }, { "action_loss": 0.0081, "epoch": 14.41665883237755, "learning_rate": 5.877145409247387e-06, "llm_loss": 0.0, "loss": 0.0081, "step": 153350 }, { "action_loss": 0.0087, "epoch": 14.421359405847513, "learning_rate": 5.871123240777352e-06, "llm_loss": 0.0, "loss": 0.0087, "step": 153400 }, { "action_loss": 0.0085, "epoch": 14.426059979317477, "learning_rate": 5.8651028768983155e-06, "llm_loss": 0.0, "loss": 0.0085, "step": 153450 }, { "action_loss": 0.0279, "epoch": 14.43076055278744, "learning_rate": 5.859084320241568e-06, "llm_loss": 0.0, "loss": 0.0279, "step": 153500 }, { "action_loss": 0.0196, "epoch": 14.435461126257403, "learning_rate": 5.853067573437612e-06, "llm_loss": 0.0, "loss": 0.0196, "step": 153550 }, { "action_loss": 0.0388, "epoch": 14.440161699727367, "learning_rate": 5.8470526391161665e-06, "llm_loss": 0.0, "loss": 0.0388, "step": 153600 }, { "action_loss": 0.0211, "epoch": 14.44486227319733, "learning_rate": 5.841039519906153e-06, "llm_loss": 0.0, "loss": 0.0211, "step": 153650 }, { "action_loss": 0.0153, "epoch": 14.449562846667293, "learning_rate": 5.8350282184357024e-06, "llm_loss": 0.0, "loss": 0.0153, "step": 153700 }, { "action_loss": 0.0354, "epoch": 14.454263420137258, "learning_rate": 5.829018737332144e-06, "llm_loss": 0.0, "loss": 0.0354, "step": 153750 }, { "action_loss": 0.0177, "epoch": 14.45896399360722, "learning_rate": 5.823011079222021e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 153800 }, { "action_loss": 0.0303, "epoch": 14.463664567077183, "learning_rate": 5.8170052467310734e-06, "llm_loss": 0.0, "loss": 0.0303, "step": 153850 }, { "action_loss": 0.0196, "epoch": 14.468365140547146, "learning_rate": 5.811001242484248e-06, "llm_loss": 0.0, "loss": 0.0196, "step": 153900 }, { "action_loss": 0.0175, "epoch": 14.47306571401711, "learning_rate": 5.804999069105688e-06, "llm_loss": 0.0, "loss": 0.0175, "step": 153950 }, { "action_loss": 0.0267, "epoch": 14.477766287487073, "learning_rate": 5.798998729218738e-06, "llm_loss": 0.0, "loss": 0.0267, "step": 154000 }, { "action_loss": 0.0198, "epoch": 14.482466860957036, "learning_rate": 5.793000225445941e-06, "llm_loss": 0.0, "loss": 0.0198, "step": 154050 }, { "action_loss": 0.0143, "epoch": 14.487167434427, "learning_rate": 5.7870035604090416e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 154100 }, { "action_loss": 0.0151, "epoch": 14.491868007896963, "learning_rate": 5.781008736728975e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 154150 }, { "action_loss": 0.0177, "epoch": 14.496568581366926, "learning_rate": 5.7750157570258735e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 154200 }, { "action_loss": 0.0246, "epoch": 14.50126915483689, "learning_rate": 5.769024623919064e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 154250 }, { "action_loss": 0.0304, "epoch": 14.505969728306853, "learning_rate": 5.763035340027071e-06, "llm_loss": 0.0, "loss": 0.0304, "step": 154300 }, { "action_loss": 0.0247, "epoch": 14.510670301776816, "learning_rate": 5.757047907967597e-06, "llm_loss": 0.0, "loss": 0.0247, "step": 154350 }, { "action_loss": 0.0306, "epoch": 14.51537087524678, "learning_rate": 5.751062330357549e-06, "llm_loss": 0.0, "loss": 0.0306, "step": 154400 }, { "action_loss": 0.0143, "epoch": 14.520071448716743, "learning_rate": 5.7450786098130196e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 154450 }, { "action_loss": 0.0237, "epoch": 14.524772022186706, "learning_rate": 5.7390967489492845e-06, "llm_loss": 0.0, "loss": 0.0237, "step": 154500 }, { "action_loss": 0.0307, "epoch": 14.52947259565667, "learning_rate": 5.7331167503808135e-06, "llm_loss": 0.0, "loss": 0.0307, "step": 154550 }, { "action_loss": 0.0169, "epoch": 14.534173169126634, "learning_rate": 5.72713861672126e-06, "llm_loss": 0.0, "loss": 0.0169, "step": 154600 }, { "action_loss": 0.0208, "epoch": 14.538873742596596, "learning_rate": 5.72116235058346e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 154650 }, { "action_loss": 0.0122, "epoch": 14.543574316066561, "learning_rate": 5.715187954579437e-06, "llm_loss": 0.0, "loss": 0.0122, "step": 154700 }, { "action_loss": 0.0142, "epoch": 14.548274889536524, "learning_rate": 5.709215431320394e-06, "llm_loss": 0.0, "loss": 0.0142, "step": 154750 }, { "action_loss": 0.0246, "epoch": 14.552975463006486, "learning_rate": 5.703244783416719e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 154800 }, { "action_loss": 0.0122, "epoch": 14.557676036476451, "learning_rate": 5.697276013477977e-06, "llm_loss": 0.0, "loss": 0.0122, "step": 154850 }, { "action_loss": 0.014, "epoch": 14.562376609946414, "learning_rate": 5.691309124112913e-06, "llm_loss": 0.0, "loss": 0.014, "step": 154900 }, { "action_loss": 0.0305, "epoch": 14.567077183416377, "learning_rate": 5.685344117929453e-06, "llm_loss": 0.0, "loss": 0.0305, "step": 154950 }, { "action_loss": 0.0276, "epoch": 14.57177775688634, "learning_rate": 5.679380997534696e-06, "llm_loss": 0.0, "loss": 0.0276, "step": 155000 }, { "action_loss": 0.021, "epoch": 14.576478330356304, "learning_rate": 5.673419765534915e-06, "llm_loss": 0.0, "loss": 0.021, "step": 155050 }, { "action_loss": 0.0263, "epoch": 14.581178903826267, "learning_rate": 5.66746042453557e-06, "llm_loss": 0.0, "loss": 0.0263, "step": 155100 }, { "action_loss": 0.0147, "epoch": 14.58587947729623, "learning_rate": 5.661502977141274e-06, "llm_loss": 0.0, "loss": 0.0147, "step": 155150 }, { "action_loss": 0.0307, "epoch": 14.590580050766194, "learning_rate": 5.65554742595583e-06, "llm_loss": 0.0, "loss": 0.0307, "step": 155200 }, { "action_loss": 0.0052, "epoch": 14.595280624236157, "learning_rate": 5.649593773582205e-06, "llm_loss": 0.0, "loss": 0.0052, "step": 155250 }, { "action_loss": 0.0296, "epoch": 14.59998119770612, "learning_rate": 5.643642022622539e-06, "llm_loss": 0.0, "loss": 0.0296, "step": 155300 }, { "action_loss": 0.0083, "epoch": 14.604681771176084, "learning_rate": 5.637692175678136e-06, "llm_loss": 0.0, "loss": 0.0083, "step": 155350 }, { "action_loss": 0.0245, "epoch": 14.609382344646047, "learning_rate": 5.631744235349469e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 155400 }, { "action_loss": 0.0205, "epoch": 14.61408291811601, "learning_rate": 5.6257982042361835e-06, "llm_loss": 0.0, "loss": 0.0205, "step": 155450 }, { "action_loss": 0.0113, "epoch": 14.618783491585974, "learning_rate": 5.619854084937085e-06, "llm_loss": 0.0, "loss": 0.0113, "step": 155500 }, { "action_loss": 0.0254, "epoch": 14.623484065055937, "learning_rate": 5.6139118800501465e-06, "llm_loss": 0.0, "loss": 0.0254, "step": 155550 }, { "action_loss": 0.0103, "epoch": 14.6281846385259, "learning_rate": 5.607971592172501e-06, "llm_loss": 0.0, "loss": 0.0103, "step": 155600 }, { "action_loss": 0.0409, "epoch": 14.632885211995863, "learning_rate": 5.6020332239004475e-06, "llm_loss": 0.0, "loss": 0.0409, "step": 155650 }, { "action_loss": 0.0212, "epoch": 14.637585785465827, "learning_rate": 5.59609677782944e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 155700 }, { "action_loss": 0.0241, "epoch": 14.64228635893579, "learning_rate": 5.590162256554102e-06, "llm_loss": 0.0, "loss": 0.0241, "step": 155750 }, { "action_loss": 0.0146, "epoch": 14.646986932405753, "learning_rate": 5.584229662668206e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 155800 }, { "action_loss": 0.0262, "epoch": 14.651687505875717, "learning_rate": 5.57829899876469e-06, "llm_loss": 0.0, "loss": 0.0262, "step": 155850 }, { "action_loss": 0.0201, "epoch": 14.65638807934568, "learning_rate": 5.572370267435638e-06, "llm_loss": 0.0, "loss": 0.0201, "step": 155900 }, { "action_loss": 0.0115, "epoch": 14.661088652815643, "learning_rate": 5.5664434712723e-06, "llm_loss": 0.0, "loss": 0.0115, "step": 155950 }, { "action_loss": 0.0181, "epoch": 14.665789226285607, "learning_rate": 5.560518612865075e-06, "llm_loss": 0.0, "loss": 0.0181, "step": 156000 }, { "action_loss": 0.0453, "epoch": 14.67048979975557, "learning_rate": 5.554595694803518e-06, "llm_loss": 0.0, "loss": 0.0453, "step": 156050 }, { "action_loss": 0.0183, "epoch": 14.675190373225533, "learning_rate": 5.548674719676333e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 156100 }, { "action_loss": 0.03, "epoch": 14.679890946695497, "learning_rate": 5.542755690071377e-06, "llm_loss": 0.0, "loss": 0.03, "step": 156150 }, { "action_loss": 0.0342, "epoch": 14.68459152016546, "learning_rate": 5.536838608575653e-06, "llm_loss": 0.0, "loss": 0.0342, "step": 156200 }, { "action_loss": 0.015, "epoch": 14.689292093635423, "learning_rate": 5.5309234777753225e-06, "llm_loss": 0.0, "loss": 0.015, "step": 156250 }, { "action_loss": 0.0119, "epoch": 14.693992667105388, "learning_rate": 5.525010300255676e-06, "llm_loss": 0.0, "loss": 0.0119, "step": 156300 }, { "action_loss": 0.0233, "epoch": 14.69869324057535, "learning_rate": 5.5190990786011686e-06, "llm_loss": 0.0, "loss": 0.0233, "step": 156350 }, { "action_loss": 0.0335, "epoch": 14.703393814045313, "learning_rate": 5.5131898153953924e-06, "llm_loss": 0.0, "loss": 0.0335, "step": 156400 }, { "action_loss": 0.0108, "epoch": 14.708094387515278, "learning_rate": 5.507282513221082e-06, "llm_loss": 0.0, "loss": 0.0108, "step": 156450 }, { "action_loss": 0.0143, "epoch": 14.71279496098524, "learning_rate": 5.501377174660121e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 156500 }, { "action_loss": 0.037, "epoch": 14.717495534455203, "learning_rate": 5.495473802293527e-06, "llm_loss": 0.0, "loss": 0.037, "step": 156550 }, { "action_loss": 0.0245, "epoch": 14.722196107925168, "learning_rate": 5.489572398701467e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 156600 }, { "action_loss": 0.0185, "epoch": 14.72689668139513, "learning_rate": 5.483672966463245e-06, "llm_loss": 0.0, "loss": 0.0185, "step": 156650 }, { "action_loss": 0.0233, "epoch": 14.731597254865093, "learning_rate": 5.4777755081572915e-06, "llm_loss": 0.0, "loss": 0.0233, "step": 156700 }, { "action_loss": 0.0306, "epoch": 14.736297828335056, "learning_rate": 5.47188002636119e-06, "llm_loss": 0.0, "loss": 0.0306, "step": 156750 }, { "action_loss": 0.0166, "epoch": 14.74099840180502, "learning_rate": 5.465986523651654e-06, "llm_loss": 0.0, "loss": 0.0166, "step": 156800 }, { "action_loss": 0.0319, "epoch": 14.745698975274983, "learning_rate": 5.460095002604533e-06, "llm_loss": 0.0, "loss": 0.0319, "step": 156850 }, { "action_loss": 0.0089, "epoch": 14.750399548744946, "learning_rate": 5.454205465794808e-06, "llm_loss": 0.0, "loss": 0.0089, "step": 156900 }, { "action_loss": 0.0145, "epoch": 14.75510012221491, "learning_rate": 5.448317915796596e-06, "llm_loss": 0.0, "loss": 0.0145, "step": 156950 }, { "action_loss": 0.0404, "epoch": 14.759800695684874, "learning_rate": 5.442432355183142e-06, "llm_loss": 0.0, "loss": 0.0404, "step": 157000 }, { "action_loss": 0.0204, "epoch": 14.764501269154836, "learning_rate": 5.436548786526825e-06, "llm_loss": 0.0, "loss": 0.0204, "step": 157050 }, { "action_loss": 0.034, "epoch": 14.7692018426248, "learning_rate": 5.430667212399152e-06, "llm_loss": 0.0, "loss": 0.034, "step": 157100 }, { "action_loss": 0.0346, "epoch": 14.773902416094764, "learning_rate": 5.424787635370761e-06, "llm_loss": 0.0, "loss": 0.0346, "step": 157150 }, { "action_loss": 0.0242, "epoch": 14.778602989564726, "learning_rate": 5.418910058011406e-06, "llm_loss": 0.0, "loss": 0.0242, "step": 157200 }, { "action_loss": 0.0168, "epoch": 14.783303563034691, "learning_rate": 5.413034482889981e-06, "llm_loss": 0.0, "loss": 0.0168, "step": 157250 }, { "action_loss": 0.0151, "epoch": 14.788004136504654, "learning_rate": 5.4071609125744986e-06, "llm_loss": 0.0, "loss": 0.0151, "step": 157300 }, { "action_loss": 0.0051, "epoch": 14.792704709974617, "learning_rate": 5.4012893496320954e-06, "llm_loss": 0.0, "loss": 0.0051, "step": 157350 }, { "action_loss": 0.0239, "epoch": 14.797405283444581, "learning_rate": 5.3954197966290315e-06, "llm_loss": 0.0, "loss": 0.0239, "step": 157400 }, { "action_loss": 0.0153, "epoch": 14.802105856914544, "learning_rate": 5.38955225613069e-06, "llm_loss": 0.0, "loss": 0.0153, "step": 157450 }, { "action_loss": 0.0213, "epoch": 14.806806430384507, "learning_rate": 5.3836867307015675e-06, "llm_loss": 0.0, "loss": 0.0213, "step": 157500 }, { "action_loss": 0.0229, "epoch": 14.81150700385447, "learning_rate": 5.377823222905287e-06, "llm_loss": 0.0, "loss": 0.0229, "step": 157550 }, { "action_loss": 0.0109, "epoch": 14.816207577324434, "learning_rate": 5.371961735304587e-06, "llm_loss": 0.0, "loss": 0.0109, "step": 157600 }, { "action_loss": 0.0173, "epoch": 14.820908150794397, "learning_rate": 5.3661022704613265e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 157650 }, { "action_loss": 0.0183, "epoch": 14.82560872426436, "learning_rate": 5.360244830936473e-06, "llm_loss": 0.0, "loss": 0.0183, "step": 157700 }, { "action_loss": 0.0247, "epoch": 14.830309297734324, "learning_rate": 5.354389419290116e-06, "llm_loss": 0.0, "loss": 0.0247, "step": 157750 }, { "action_loss": 0.0143, "epoch": 14.835009871204287, "learning_rate": 5.348536038081455e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 157800 }, { "action_loss": 0.0337, "epoch": 14.83971044467425, "learning_rate": 5.3426846898688054e-06, "llm_loss": 0.0, "loss": 0.0337, "step": 157850 }, { "action_loss": 0.0216, "epoch": 14.844411018144214, "learning_rate": 5.336835377209588e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 157900 }, { "action_loss": 0.0239, "epoch": 14.849111591614177, "learning_rate": 5.33098810266034e-06, "llm_loss": 0.0, "loss": 0.0239, "step": 157950 }, { "action_loss": 0.021, "epoch": 14.85381216508414, "learning_rate": 5.325142868776705e-06, "llm_loss": 0.0, "loss": 0.021, "step": 158000 }, { "action_loss": 0.0121, "epoch": 14.858512738554104, "learning_rate": 5.319299678113432e-06, "llm_loss": 0.0, "loss": 0.0121, "step": 158050 }, { "action_loss": 0.0234, "epoch": 14.863213312024067, "learning_rate": 5.3134585332243895e-06, "llm_loss": 0.0, "loss": 0.0234, "step": 158100 }, { "action_loss": 0.0282, "epoch": 14.86791388549403, "learning_rate": 5.30761943666253e-06, "llm_loss": 0.0, "loss": 0.0282, "step": 158150 }, { "action_loss": 0.0184, "epoch": 14.872614458963994, "learning_rate": 5.3017823909799295e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 158200 }, { "action_loss": 0.0204, "epoch": 14.877315032433957, "learning_rate": 5.295947398727763e-06, "llm_loss": 0.0, "loss": 0.0204, "step": 158250 }, { "action_loss": 0.0405, "epoch": 14.88201560590392, "learning_rate": 5.2901144624563014e-06, "llm_loss": 0.0, "loss": 0.0405, "step": 158300 }, { "action_loss": 0.0305, "epoch": 14.886716179373884, "learning_rate": 5.284283584714925e-06, "llm_loss": 0.0, "loss": 0.0305, "step": 158350 }, { "action_loss": 0.0316, "epoch": 14.891416752843847, "learning_rate": 5.278454768052108e-06, "llm_loss": 0.0, "loss": 0.0316, "step": 158400 }, { "action_loss": 0.0175, "epoch": 14.89611732631381, "learning_rate": 5.272628015015431e-06, "llm_loss": 0.0, "loss": 0.0175, "step": 158450 }, { "action_loss": 0.0245, "epoch": 14.900817899783773, "learning_rate": 5.2668033281515676e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 158500 }, { "action_loss": 0.0268, "epoch": 14.905518473253737, "learning_rate": 5.260980710006289e-06, "llm_loss": 0.0, "loss": 0.0268, "step": 158550 }, { "action_loss": 0.0291, "epoch": 14.9102190467237, "learning_rate": 5.255160163124462e-06, "llm_loss": 0.0, "loss": 0.0291, "step": 158600 }, { "action_loss": 0.0049, "epoch": 14.914919620193663, "learning_rate": 5.249341690050051e-06, "llm_loss": 0.0, "loss": 0.0049, "step": 158650 }, { "action_loss": 0.0217, "epoch": 14.919620193663627, "learning_rate": 5.24352529332611e-06, "llm_loss": 0.0, "loss": 0.0217, "step": 158700 }, { "action_loss": 0.0398, "epoch": 14.92432076713359, "learning_rate": 5.237710975494789e-06, "llm_loss": 0.0, "loss": 0.0398, "step": 158750 }, { "action_loss": 0.0108, "epoch": 14.929021340603553, "learning_rate": 5.231898739097325e-06, "llm_loss": 0.0, "loss": 0.0108, "step": 158800 }, { "action_loss": 0.0212, "epoch": 14.933721914073518, "learning_rate": 5.226088586674054e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 158850 }, { "action_loss": 0.0049, "epoch": 14.93842248754348, "learning_rate": 5.2202805207643865e-06, "llm_loss": 0.0, "loss": 0.0049, "step": 158900 }, { "action_loss": 0.0256, "epoch": 14.943123061013443, "learning_rate": 5.214474543906835e-06, "llm_loss": 0.0, "loss": 0.0256, "step": 158950 }, { "action_loss": 0.0201, "epoch": 14.947823634483408, "learning_rate": 5.208670658638996e-06, "llm_loss": 0.0, "loss": 0.0201, "step": 159000 }, { "action_loss": 0.0181, "epoch": 14.95252420795337, "learning_rate": 5.202868867497542e-06, "llm_loss": 0.0, "loss": 0.0181, "step": 159050 }, { "action_loss": 0.021, "epoch": 14.957224781423333, "learning_rate": 5.197069173018241e-06, "llm_loss": 0.0, "loss": 0.021, "step": 159100 }, { "action_loss": 0.0144, "epoch": 14.961925354893298, "learning_rate": 5.191271577735943e-06, "llm_loss": 0.0, "loss": 0.0144, "step": 159150 }, { "action_loss": 0.0331, "epoch": 14.96662592836326, "learning_rate": 5.185476084184579e-06, "llm_loss": 0.0, "loss": 0.0331, "step": 159200 }, { "action_loss": 0.005, "epoch": 14.971326501833223, "learning_rate": 5.179682694897159e-06, "llm_loss": 0.0, "loss": 0.005, "step": 159250 }, { "action_loss": 0.0144, "epoch": 14.976027075303186, "learning_rate": 5.173891412405778e-06, "llm_loss": 0.0, "loss": 0.0144, "step": 159300 }, { "action_loss": 0.0134, "epoch": 14.98072764877315, "learning_rate": 5.168102239241606e-06, "llm_loss": 0.0, "loss": 0.0134, "step": 159350 }, { "action_loss": 0.0399, "epoch": 14.985428222243113, "learning_rate": 5.162315177934893e-06, "llm_loss": 0.0, "loss": 0.0399, "step": 159400 }, { "action_loss": 0.0318, "epoch": 14.990128795713076, "learning_rate": 5.156530231014967e-06, "llm_loss": 0.0, "loss": 0.0318, "step": 159450 }, { "action_loss": 0.0341, "epoch": 14.99482936918304, "learning_rate": 5.15074740101023e-06, "llm_loss": 0.0, "loss": 0.0341, "step": 159500 }, { "action_loss": 0.0245, "epoch": 14.999529942653004, "learning_rate": 5.144966690448159e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 159550 }, { "action_loss": 0.0108, "epoch": 15.004230516122966, "learning_rate": 5.139188101855306e-06, "llm_loss": 0.0, "loss": 0.0108, "step": 159600 }, { "action_loss": 0.0174, "epoch": 15.00893108959293, "learning_rate": 5.133411637757297e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 159650 }, { "action_loss": 0.0353, "epoch": 15.013631663062894, "learning_rate": 5.1276373006788216e-06, "llm_loss": 0.0, "loss": 0.0353, "step": 159700 }, { "action_loss": 0.0292, "epoch": 15.018332236532856, "learning_rate": 5.121865093143648e-06, "llm_loss": 0.0, "loss": 0.0292, "step": 159750 }, { "action_loss": 0.0188, "epoch": 15.023032810002821, "learning_rate": 5.11609501767461e-06, "llm_loss": 0.0, "loss": 0.0188, "step": 159800 }, { "action_loss": 0.0221, "epoch": 15.027733383472784, "learning_rate": 5.110327076793613e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 159850 }, { "action_loss": 0.0215, "epoch": 15.032433956942747, "learning_rate": 5.1045612730216246e-06, "llm_loss": 0.0, "loss": 0.0215, "step": 159900 }, { "action_loss": 0.0227, "epoch": 15.037134530412711, "learning_rate": 5.098912861176882e-06, "llm_loss": 0.0, "loss": 0.0227, "step": 159950 }, { "action_loss": 0.0254, "epoch": 15.041835103882674, "learning_rate": 5.093151296314442e-06, "llm_loss": 0.0, "loss": 0.0254, "step": 160000 }, { "action_loss": 0.0139, "epoch": 15.046535677352637, "learning_rate": 5.087391876067956e-06, "llm_loss": 0.0, "loss": 0.0139, "step": 160050 }, { "action_loss": 0.0154, "epoch": 15.051236250822601, "learning_rate": 5.0816346029546745e-06, "llm_loss": 0.0, "loss": 0.0154, "step": 160100 }, { "action_loss": 0.0117, "epoch": 15.055936824292564, "learning_rate": 5.075879479490902e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 160150 }, { "action_loss": 0.0168, "epoch": 15.060637397762527, "learning_rate": 5.070126508192006e-06, "llm_loss": 0.0, "loss": 0.0168, "step": 160200 }, { "action_loss": 0.02, "epoch": 15.06533797123249, "learning_rate": 5.064375691572417e-06, "llm_loss": 0.0, "loss": 0.02, "step": 160250 }, { "action_loss": 0.0288, "epoch": 15.070038544702454, "learning_rate": 5.05862703214562e-06, "llm_loss": 0.0, "loss": 0.0288, "step": 160300 }, { "action_loss": 0.0223, "epoch": 15.074739118172417, "learning_rate": 5.052880532424152e-06, "llm_loss": 0.0, "loss": 0.0223, "step": 160350 }, { "action_loss": 0.0188, "epoch": 15.07943969164238, "learning_rate": 5.047136194919617e-06, "llm_loss": 0.0, "loss": 0.0188, "step": 160400 }, { "action_loss": 0.0169, "epoch": 15.084140265112344, "learning_rate": 5.04139402214267e-06, "llm_loss": 0.0, "loss": 0.0169, "step": 160450 }, { "action_loss": 0.0119, "epoch": 15.088840838582307, "learning_rate": 5.035654016603013e-06, "llm_loss": 0.0, "loss": 0.0119, "step": 160500 }, { "action_loss": 0.024, "epoch": 15.09354141205227, "learning_rate": 5.029916180809409e-06, "llm_loss": 0.0, "loss": 0.024, "step": 160550 }, { "action_loss": 0.0303, "epoch": 15.098241985522234, "learning_rate": 5.024180517269671e-06, "llm_loss": 0.0, "loss": 0.0303, "step": 160600 }, { "action_loss": 0.0219, "epoch": 15.102942558992197, "learning_rate": 5.018447028490661e-06, "llm_loss": 0.0, "loss": 0.0219, "step": 160650 }, { "action_loss": 0.0319, "epoch": 15.10764313246216, "learning_rate": 5.012715716978289e-06, "llm_loss": 0.0, "loss": 0.0319, "step": 160700 }, { "action_loss": 0.0186, "epoch": 15.112343705932124, "learning_rate": 5.0069865852375195e-06, "llm_loss": 0.0, "loss": 0.0186, "step": 160750 }, { "action_loss": 0.0141, "epoch": 15.117044279402087, "learning_rate": 5.001259635772358e-06, "llm_loss": 0.0, "loss": 0.0141, "step": 160800 }, { "action_loss": 0.0114, "epoch": 15.12174485287205, "learning_rate": 4.9955348710858585e-06, "llm_loss": 0.0, "loss": 0.0114, "step": 160850 }, { "action_loss": 0.0114, "epoch": 15.126445426342014, "learning_rate": 4.9898122936801205e-06, "llm_loss": 0.0, "loss": 0.0114, "step": 160900 }, { "action_loss": 0.0174, "epoch": 15.131145999811977, "learning_rate": 4.984091906056287e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 160950 }, { "action_loss": 0.0251, "epoch": 15.13584657328194, "learning_rate": 4.9783737107145445e-06, "llm_loss": 0.0, "loss": 0.0251, "step": 161000 }, { "action_loss": 0.0269, "epoch": 15.140547146751905, "learning_rate": 4.972657710154119e-06, "llm_loss": 0.0, "loss": 0.0269, "step": 161050 }, { "action_loss": 0.0117, "epoch": 15.145247720221867, "learning_rate": 4.966943906873286e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 161100 }, { "action_loss": 0.0173, "epoch": 15.14994829369183, "learning_rate": 4.961232303369344e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 161150 }, { "action_loss": 0.0181, "epoch": 15.154648867161793, "learning_rate": 4.955522902138643e-06, "llm_loss": 0.0, "loss": 0.0181, "step": 161200 }, { "action_loss": 0.0209, "epoch": 15.159349440631757, "learning_rate": 4.9498157056765686e-06, "llm_loss": 0.0, "loss": 0.0209, "step": 161250 }, { "action_loss": 0.0165, "epoch": 15.16405001410172, "learning_rate": 4.944110716477541e-06, "llm_loss": 0.0, "loss": 0.0165, "step": 161300 }, { "action_loss": 0.0115, "epoch": 15.168750587571683, "learning_rate": 4.938407937035021e-06, "llm_loss": 0.0, "loss": 0.0115, "step": 161350 }, { "action_loss": 0.0106, "epoch": 15.173451161041648, "learning_rate": 4.932707369841489e-06, "llm_loss": 0.0, "loss": 0.0106, "step": 161400 }, { "action_loss": 0.0145, "epoch": 15.17815173451161, "learning_rate": 4.9270090173884734e-06, "llm_loss": 0.0, "loss": 0.0145, "step": 161450 }, { "action_loss": 0.0084, "epoch": 15.182852307981573, "learning_rate": 4.921312882166529e-06, "llm_loss": 0.0, "loss": 0.0084, "step": 161500 }, { "action_loss": 0.0083, "epoch": 15.187552881451538, "learning_rate": 4.915618966665242e-06, "llm_loss": 0.0, "loss": 0.0083, "step": 161550 }, { "action_loss": 0.0245, "epoch": 15.1922534549215, "learning_rate": 4.909927273373229e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 161600 }, { "action_loss": 0.0244, "epoch": 15.196954028391463, "learning_rate": 4.9042378047781335e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 161650 }, { "action_loss": 0.0249, "epoch": 15.201654601861428, "learning_rate": 4.898550563366628e-06, "llm_loss": 0.0, "loss": 0.0249, "step": 161700 }, { "action_loss": 0.0243, "epoch": 15.20635517533139, "learning_rate": 4.892865551624413e-06, "llm_loss": 0.0, "loss": 0.0243, "step": 161750 }, { "action_loss": 0.0214, "epoch": 15.211055748801353, "learning_rate": 4.887182772036212e-06, "llm_loss": 0.0, "loss": 0.0214, "step": 161800 }, { "action_loss": 0.0174, "epoch": 15.215756322271318, "learning_rate": 4.8815022270857726e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 161850 }, { "action_loss": 0.0215, "epoch": 15.22045689574128, "learning_rate": 4.8758239192558734e-06, "llm_loss": 0.0, "loss": 0.0215, "step": 161900 }, { "action_loss": 0.0139, "epoch": 15.225157469211243, "learning_rate": 4.870147851028297e-06, "llm_loss": 0.0, "loss": 0.0139, "step": 161950 }, { "action_loss": 0.03, "epoch": 15.229858042681206, "learning_rate": 4.864474024883868e-06, "llm_loss": 0.0, "loss": 0.03, "step": 162000 }, { "action_loss": 0.0233, "epoch": 15.23455861615117, "learning_rate": 4.858802443302418e-06, "llm_loss": 0.0, "loss": 0.0233, "step": 162050 }, { "action_loss": 0.0086, "epoch": 15.239259189621134, "learning_rate": 4.853133108762804e-06, "llm_loss": 0.0, "loss": 0.0086, "step": 162100 }, { "action_loss": 0.0149, "epoch": 15.243959763091096, "learning_rate": 4.847466023742896e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 162150 }, { "action_loss": 0.0204, "epoch": 15.248660336561061, "learning_rate": 4.841801190719586e-06, "llm_loss": 0.0, "loss": 0.0204, "step": 162200 }, { "action_loss": 0.0304, "epoch": 15.253360910031024, "learning_rate": 4.836138612168777e-06, "llm_loss": 0.0, "loss": 0.0304, "step": 162250 }, { "action_loss": 0.0188, "epoch": 15.258061483500986, "learning_rate": 4.830478290565393e-06, "llm_loss": 0.0, "loss": 0.0188, "step": 162300 }, { "action_loss": 0.0298, "epoch": 15.262762056970951, "learning_rate": 4.824820228383361e-06, "llm_loss": 0.0, "loss": 0.0298, "step": 162350 }, { "action_loss": 0.022, "epoch": 15.267462630440914, "learning_rate": 4.819164428095629e-06, "llm_loss": 0.0, "loss": 0.022, "step": 162400 }, { "action_loss": 0.0542, "epoch": 15.272163203910877, "learning_rate": 4.813510892174153e-06, "llm_loss": 0.0, "loss": 0.0542, "step": 162450 }, { "action_loss": 0.0256, "epoch": 15.276863777380841, "learning_rate": 4.807859623089903e-06, "llm_loss": 0.0, "loss": 0.0256, "step": 162500 }, { "action_loss": 0.0203, "epoch": 15.281564350850804, "learning_rate": 4.802210623312854e-06, "llm_loss": 0.0, "loss": 0.0203, "step": 162550 }, { "action_loss": 0.0298, "epoch": 15.286264924320767, "learning_rate": 4.796563895311992e-06, "llm_loss": 0.0, "loss": 0.0298, "step": 162600 }, { "action_loss": 0.0354, "epoch": 15.290965497790731, "learning_rate": 4.790919441555309e-06, "llm_loss": 0.0, "loss": 0.0354, "step": 162650 }, { "action_loss": 0.0265, "epoch": 15.295666071260694, "learning_rate": 4.785277264509805e-06, "llm_loss": 0.0, "loss": 0.0265, "step": 162700 }, { "action_loss": 0.0218, "epoch": 15.300366644730657, "learning_rate": 4.779637366641475e-06, "llm_loss": 0.0, "loss": 0.0218, "step": 162750 }, { "action_loss": 0.0212, "epoch": 15.305067218200621, "learning_rate": 4.773999750415332e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 162800 }, { "action_loss": 0.018, "epoch": 15.309767791670584, "learning_rate": 4.768364418295384e-06, "llm_loss": 0.0, "loss": 0.018, "step": 162850 }, { "action_loss": 0.0147, "epoch": 15.314468365140547, "learning_rate": 4.762731372744644e-06, "llm_loss": 0.0, "loss": 0.0147, "step": 162900 }, { "action_loss": 0.0222, "epoch": 15.31916893861051, "learning_rate": 4.757100616225121e-06, "llm_loss": 0.0, "loss": 0.0222, "step": 162950 }, { "action_loss": 0.0313, "epoch": 15.323869512080474, "learning_rate": 4.7514721511978276e-06, "llm_loss": 0.0, "loss": 0.0313, "step": 163000 }, { "action_loss": 0.0233, "epoch": 15.328570085550437, "learning_rate": 4.745845980122773e-06, "llm_loss": 0.0, "loss": 0.0233, "step": 163050 }, { "action_loss": 0.0138, "epoch": 15.3332706590204, "learning_rate": 4.740222105458966e-06, "llm_loss": 0.0, "loss": 0.0138, "step": 163100 }, { "action_loss": 0.0112, "epoch": 15.337971232490364, "learning_rate": 4.7346005296644085e-06, "llm_loss": 0.0, "loss": 0.0112, "step": 163150 }, { "action_loss": 0.0084, "epoch": 15.342671805960327, "learning_rate": 4.728981255196104e-06, "llm_loss": 0.0, "loss": 0.0084, "step": 163200 }, { "action_loss": 0.012, "epoch": 15.34737237943029, "learning_rate": 4.723364284510037e-06, "llm_loss": 0.0, "loss": 0.012, "step": 163250 }, { "action_loss": 0.0221, "epoch": 15.352072952900254, "learning_rate": 4.717749620061199e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 163300 }, { "action_loss": 0.022, "epoch": 15.356773526370217, "learning_rate": 4.7121372643035646e-06, "llm_loss": 0.0, "loss": 0.022, "step": 163350 }, { "action_loss": 0.0112, "epoch": 15.36147409984018, "learning_rate": 4.706527219690106e-06, "llm_loss": 0.0, "loss": 0.0112, "step": 163400 }, { "action_loss": 0.0188, "epoch": 15.366174673310145, "learning_rate": 4.700919488672785e-06, "llm_loss": 0.0, "loss": 0.0188, "step": 163450 }, { "action_loss": 0.017, "epoch": 15.370875246780107, "learning_rate": 4.695314073702541e-06, "llm_loss": 0.0, "loss": 0.017, "step": 163500 }, { "action_loss": 0.0102, "epoch": 15.37557582025007, "learning_rate": 4.689710977229315e-06, "llm_loss": 0.0, "loss": 0.0102, "step": 163550 }, { "action_loss": 0.0289, "epoch": 15.380276393720035, "learning_rate": 4.684110201702027e-06, "llm_loss": 0.0, "loss": 0.0289, "step": 163600 }, { "action_loss": 0.0251, "epoch": 15.384976967189997, "learning_rate": 4.6785117495685865e-06, "llm_loss": 0.0, "loss": 0.0251, "step": 163650 }, { "action_loss": 0.0306, "epoch": 15.38967754065996, "learning_rate": 4.672915623275883e-06, "llm_loss": 0.0, "loss": 0.0306, "step": 163700 }, { "action_loss": 0.0144, "epoch": 15.394378114129925, "learning_rate": 4.667321825269795e-06, "llm_loss": 0.0, "loss": 0.0144, "step": 163750 }, { "action_loss": 0.0303, "epoch": 15.399078687599888, "learning_rate": 4.661730357995179e-06, "llm_loss": 0.0, "loss": 0.0303, "step": 163800 }, { "action_loss": 0.0224, "epoch": 15.40377926106985, "learning_rate": 4.656141223895874e-06, "llm_loss": 0.0, "loss": 0.0224, "step": 163850 }, { "action_loss": 0.0174, "epoch": 15.408479834539813, "learning_rate": 4.650554425414701e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 163900 }, { "action_loss": 0.0191, "epoch": 15.413180408009778, "learning_rate": 4.644969964993457e-06, "llm_loss": 0.0, "loss": 0.0191, "step": 163950 }, { "action_loss": 0.0254, "epoch": 15.41788098147974, "learning_rate": 4.639387845072923e-06, "llm_loss": 0.0, "loss": 0.0254, "step": 164000 }, { "action_loss": 0.0134, "epoch": 15.422581554949703, "learning_rate": 4.633808068092848e-06, "llm_loss": 0.0, "loss": 0.0134, "step": 164050 }, { "action_loss": 0.0346, "epoch": 15.427282128419668, "learning_rate": 4.6282306364919675e-06, "llm_loss": 0.0, "loss": 0.0346, "step": 164100 }, { "action_loss": 0.026, "epoch": 15.43198270188963, "learning_rate": 4.6226555527079784e-06, "llm_loss": 0.0, "loss": 0.026, "step": 164150 }, { "action_loss": 0.0234, "epoch": 15.436683275359593, "learning_rate": 4.617082819177564e-06, "llm_loss": 0.0, "loss": 0.0234, "step": 164200 }, { "action_loss": 0.0041, "epoch": 15.441383848829558, "learning_rate": 4.61151243833638e-06, "llm_loss": 0.0, "loss": 0.0041, "step": 164250 }, { "action_loss": 0.0266, "epoch": 15.44608442229952, "learning_rate": 4.60594441261904e-06, "llm_loss": 0.0, "loss": 0.0266, "step": 164300 }, { "action_loss": 0.0232, "epoch": 15.450784995769483, "learning_rate": 4.600378744459143e-06, "llm_loss": 0.0, "loss": 0.0232, "step": 164350 }, { "action_loss": 0.0175, "epoch": 15.455485569239448, "learning_rate": 4.5948154362892526e-06, "llm_loss": 0.0, "loss": 0.0175, "step": 164400 }, { "action_loss": 0.0172, "epoch": 15.46018614270941, "learning_rate": 4.589254490540901e-06, "llm_loss": 0.0, "loss": 0.0172, "step": 164450 }, { "action_loss": 0.0225, "epoch": 15.464886716179373, "learning_rate": 4.583695909644586e-06, "llm_loss": 0.0, "loss": 0.0225, "step": 164500 }, { "action_loss": 0.0163, "epoch": 15.469587289649338, "learning_rate": 4.578139696029776e-06, "llm_loss": 0.0, "loss": 0.0163, "step": 164550 }, { "action_loss": 0.0266, "epoch": 15.4742878631193, "learning_rate": 4.5725858521249e-06, "llm_loss": 0.0, "loss": 0.0266, "step": 164600 }, { "action_loss": 0.0313, "epoch": 15.478988436589264, "learning_rate": 4.567034380357353e-06, "llm_loss": 0.0, "loss": 0.0313, "step": 164650 }, { "action_loss": 0.0177, "epoch": 15.483689010059226, "learning_rate": 4.561485283153496e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 164700 }, { "action_loss": 0.0184, "epoch": 15.488389583529191, "learning_rate": 4.555938562938649e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 164750 }, { "action_loss": 0.0201, "epoch": 15.493090156999154, "learning_rate": 4.550394222137093e-06, "llm_loss": 0.0, "loss": 0.0201, "step": 164800 }, { "action_loss": 0.0131, "epoch": 15.497790730469116, "learning_rate": 4.544852263172072e-06, "llm_loss": 0.0, "loss": 0.0131, "step": 164850 }, { "action_loss": 0.0177, "epoch": 15.502491303939081, "learning_rate": 4.539312688465787e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 164900 }, { "action_loss": 0.017, "epoch": 15.507191877409044, "learning_rate": 4.533775500439393e-06, "llm_loss": 0.0, "loss": 0.017, "step": 164950 }, { "action_loss": 0.0113, "epoch": 15.511892450879007, "learning_rate": 4.528240701513007e-06, "llm_loss": 0.0, "loss": 0.0113, "step": 165000 }, { "action_loss": 0.0218, "epoch": 15.516593024348971, "learning_rate": 4.522708294105707e-06, "llm_loss": 0.0, "loss": 0.0218, "step": 165050 }, { "action_loss": 0.0109, "epoch": 15.521293597818934, "learning_rate": 4.517178280635509e-06, "llm_loss": 0.0, "loss": 0.0109, "step": 165100 }, { "action_loss": 0.0152, "epoch": 15.525994171288897, "learning_rate": 4.511650663519399e-06, "llm_loss": 0.0, "loss": 0.0152, "step": 165150 }, { "action_loss": 0.0077, "epoch": 15.530694744758861, "learning_rate": 4.506125445173309e-06, "llm_loss": 0.0, "loss": 0.0077, "step": 165200 }, { "action_loss": 0.028, "epoch": 15.535395318228824, "learning_rate": 4.5006026280121206e-06, "llm_loss": 0.0, "loss": 0.028, "step": 165250 }, { "action_loss": 0.0234, "epoch": 15.540095891698787, "learning_rate": 4.495082214449673e-06, "llm_loss": 0.0, "loss": 0.0234, "step": 165300 }, { "action_loss": 0.0228, "epoch": 15.544796465168751, "learning_rate": 4.489564206898747e-06, "llm_loss": 0.0, "loss": 0.0228, "step": 165350 }, { "action_loss": 0.0146, "epoch": 15.549497038638714, "learning_rate": 4.4840486077710765e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 165400 }, { "action_loss": 0.0239, "epoch": 15.554197612108677, "learning_rate": 4.4785354194773414e-06, "llm_loss": 0.0, "loss": 0.0239, "step": 165450 }, { "action_loss": 0.026, "epoch": 15.558898185578641, "learning_rate": 4.473024644427168e-06, "llm_loss": 0.0, "loss": 0.026, "step": 165500 }, { "action_loss": 0.0203, "epoch": 15.563598759048604, "learning_rate": 4.4675162850291274e-06, "llm_loss": 0.0, "loss": 0.0203, "step": 165550 }, { "action_loss": 0.0144, "epoch": 15.568299332518567, "learning_rate": 4.462010343690737e-06, "llm_loss": 0.0, "loss": 0.0144, "step": 165600 }, { "action_loss": 0.0207, "epoch": 15.57299990598853, "learning_rate": 4.456506822818452e-06, "llm_loss": 0.0, "loss": 0.0207, "step": 165650 }, { "action_loss": 0.0149, "epoch": 15.577700479458494, "learning_rate": 4.451005724817682e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 165700 }, { "action_loss": 0.0128, "epoch": 15.582401052928457, "learning_rate": 4.4455070520927575e-06, "llm_loss": 0.0, "loss": 0.0128, "step": 165750 }, { "action_loss": 0.01, "epoch": 15.58710162639842, "learning_rate": 4.440010807046966e-06, "llm_loss": 0.0, "loss": 0.01, "step": 165800 }, { "action_loss": 0.0115, "epoch": 15.591802199868384, "learning_rate": 4.434516992082529e-06, "llm_loss": 0.0, "loss": 0.0115, "step": 165850 }, { "action_loss": 0.0105, "epoch": 15.596502773338347, "learning_rate": 4.4290256096006034e-06, "llm_loss": 0.0, "loss": 0.0105, "step": 165900 }, { "action_loss": 0.0115, "epoch": 15.60120334680831, "learning_rate": 4.423536662001288e-06, "llm_loss": 0.0, "loss": 0.0115, "step": 165950 }, { "action_loss": 0.0206, "epoch": 15.605903920278275, "learning_rate": 4.418050151683614e-06, "llm_loss": 0.0, "loss": 0.0206, "step": 166000 }, { "action_loss": 0.0191, "epoch": 15.610604493748237, "learning_rate": 4.412566081045544e-06, "llm_loss": 0.0, "loss": 0.0191, "step": 166050 }, { "action_loss": 0.0184, "epoch": 15.6153050672182, "learning_rate": 4.407084452483978e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 166100 }, { "action_loss": 0.0137, "epoch": 15.620005640688165, "learning_rate": 4.401605268394753e-06, "llm_loss": 0.0, "loss": 0.0137, "step": 166150 }, { "action_loss": 0.0104, "epoch": 15.624706214158127, "learning_rate": 4.39612853117263e-06, "llm_loss": 0.0, "loss": 0.0104, "step": 166200 }, { "action_loss": 0.0199, "epoch": 15.62940678762809, "learning_rate": 4.3906542432113045e-06, "llm_loss": 0.0, "loss": 0.0199, "step": 166250 }, { "action_loss": 0.0109, "epoch": 15.634107361098055, "learning_rate": 4.3851824069034e-06, "llm_loss": 0.0, "loss": 0.0109, "step": 166300 }, { "action_loss": 0.0309, "epoch": 15.638807934568018, "learning_rate": 4.379713024640471e-06, "llm_loss": 0.0, "loss": 0.0309, "step": 166350 }, { "action_loss": 0.0391, "epoch": 15.64350850803798, "learning_rate": 4.374246098812996e-06, "llm_loss": 0.0, "loss": 0.0391, "step": 166400 }, { "action_loss": 0.0221, "epoch": 15.648209081507943, "learning_rate": 4.368781631810386e-06, "llm_loss": 0.0, "loss": 0.0221, "step": 166450 }, { "action_loss": 0.0159, "epoch": 15.652909654977908, "learning_rate": 4.363319626020967e-06, "llm_loss": 0.0, "loss": 0.0159, "step": 166500 }, { "action_loss": 0.0237, "epoch": 15.65761022844787, "learning_rate": 4.357860083831995e-06, "llm_loss": 0.0, "loss": 0.0237, "step": 166550 }, { "action_loss": 0.034, "epoch": 15.662310801917833, "learning_rate": 4.352403007629653e-06, "llm_loss": 0.0, "loss": 0.034, "step": 166600 }, { "action_loss": 0.0233, "epoch": 15.667011375387798, "learning_rate": 4.3469483997990415e-06, "llm_loss": 0.0, "loss": 0.0233, "step": 166650 }, { "action_loss": 0.0246, "epoch": 15.67171194885776, "learning_rate": 4.341496262724183e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 166700 }, { "action_loss": 0.0165, "epoch": 15.676412522327723, "learning_rate": 4.33604659878802e-06, "llm_loss": 0.0, "loss": 0.0165, "step": 166750 }, { "action_loss": 0.0105, "epoch": 15.681113095797688, "learning_rate": 4.330599410372415e-06, "llm_loss": 0.0, "loss": 0.0105, "step": 166800 }, { "action_loss": 0.0172, "epoch": 15.68581366926765, "learning_rate": 4.325154699858148e-06, "llm_loss": 0.0, "loss": 0.0172, "step": 166850 }, { "action_loss": 0.0226, "epoch": 15.690514242737613, "learning_rate": 4.3197124696249195e-06, "llm_loss": 0.0, "loss": 0.0226, "step": 166900 }, { "action_loss": 0.0279, "epoch": 15.695214816207578, "learning_rate": 4.314272722051335e-06, "llm_loss": 0.0, "loss": 0.0279, "step": 166950 }, { "action_loss": 0.0274, "epoch": 15.69991538967754, "learning_rate": 4.308835459514926e-06, "llm_loss": 0.0, "loss": 0.0274, "step": 167000 }, { "action_loss": 0.0276, "epoch": 15.704615963147504, "learning_rate": 4.303400684392135e-06, "llm_loss": 0.0, "loss": 0.0276, "step": 167050 }, { "action_loss": 0.025, "epoch": 15.709316536617468, "learning_rate": 4.297968399058315e-06, "llm_loss": 0.0, "loss": 0.025, "step": 167100 }, { "action_loss": 0.018, "epoch": 15.71401711008743, "learning_rate": 4.292538605887735e-06, "llm_loss": 0.0, "loss": 0.018, "step": 167150 }, { "action_loss": 0.0441, "epoch": 15.718717683557394, "learning_rate": 4.287111307253568e-06, "llm_loss": 0.0, "loss": 0.0441, "step": 167200 }, { "action_loss": 0.0238, "epoch": 15.723418257027358, "learning_rate": 4.281686505527909e-06, "llm_loss": 0.0, "loss": 0.0238, "step": 167250 }, { "action_loss": 0.0173, "epoch": 15.728118830497321, "learning_rate": 4.276264203081744e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 167300 }, { "action_loss": 0.027, "epoch": 15.732819403967284, "learning_rate": 4.270844402284978e-06, "llm_loss": 0.0, "loss": 0.027, "step": 167350 }, { "action_loss": 0.0345, "epoch": 15.737519977437248, "learning_rate": 4.265427105506424e-06, "llm_loss": 0.0, "loss": 0.0345, "step": 167400 }, { "action_loss": 0.0149, "epoch": 15.742220550907211, "learning_rate": 4.260012315113796e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 167450 }, { "action_loss": 0.0227, "epoch": 15.746921124377174, "learning_rate": 4.2546000334737145e-06, "llm_loss": 0.0, "loss": 0.0227, "step": 167500 }, { "action_loss": 0.0135, "epoch": 15.751621697847137, "learning_rate": 4.249190262951702e-06, "llm_loss": 0.0, "loss": 0.0135, "step": 167550 }, { "action_loss": 0.0085, "epoch": 15.756322271317101, "learning_rate": 4.243783005912185e-06, "llm_loss": 0.0, "loss": 0.0085, "step": 167600 }, { "action_loss": 0.0339, "epoch": 15.761022844787064, "learning_rate": 4.238378264718491e-06, "llm_loss": 0.0, "loss": 0.0339, "step": 167650 }, { "action_loss": 0.021, "epoch": 15.765723418257027, "learning_rate": 4.2329760417328484e-06, "llm_loss": 0.0, "loss": 0.021, "step": 167700 }, { "action_loss": 0.0213, "epoch": 15.770423991726991, "learning_rate": 4.227576339316382e-06, "llm_loss": 0.0, "loss": 0.0213, "step": 167750 }, { "action_loss": 0.027, "epoch": 15.775124565196954, "learning_rate": 4.2221791598291185e-06, "llm_loss": 0.0, "loss": 0.027, "step": 167800 }, { "action_loss": 0.0272, "epoch": 15.779825138666917, "learning_rate": 4.216784505629985e-06, "llm_loss": 0.0, "loss": 0.0272, "step": 167850 }, { "action_loss": 0.0117, "epoch": 15.784525712136881, "learning_rate": 4.211392379076793e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 167900 }, { "action_loss": 0.0143, "epoch": 15.789226285606844, "learning_rate": 4.206002782526259e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 167950 }, { "action_loss": 0.024, "epoch": 15.793926859076807, "learning_rate": 4.200615718333995e-06, "llm_loss": 0.0, "loss": 0.024, "step": 168000 }, { "action_loss": 0.0102, "epoch": 15.798627432546771, "learning_rate": 4.195231188854503e-06, "llm_loss": 0.0, "loss": 0.0102, "step": 168050 }, { "action_loss": 0.0225, "epoch": 15.803328006016734, "learning_rate": 4.1898491964411714e-06, "llm_loss": 0.0, "loss": 0.0225, "step": 168100 }, { "action_loss": 0.0207, "epoch": 15.808028579486697, "learning_rate": 4.184469743446288e-06, "llm_loss": 0.0, "loss": 0.0207, "step": 168150 }, { "action_loss": 0.018, "epoch": 15.81272915295666, "learning_rate": 4.179092832221028e-06, "llm_loss": 0.0, "loss": 0.018, "step": 168200 }, { "action_loss": 0.0081, "epoch": 15.817429726426624, "learning_rate": 4.1737184651154564e-06, "llm_loss": 0.0, "loss": 0.0081, "step": 168250 }, { "action_loss": 0.0299, "epoch": 15.822130299896587, "learning_rate": 4.168346644478525e-06, "llm_loss": 0.0, "loss": 0.0299, "step": 168300 }, { "action_loss": 0.0212, "epoch": 15.82683087336655, "learning_rate": 4.162977372658072e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 168350 }, { "action_loss": 0.0324, "epoch": 15.831531446836514, "learning_rate": 4.157610652000825e-06, "llm_loss": 0.0, "loss": 0.0324, "step": 168400 }, { "action_loss": 0.0149, "epoch": 15.836232020306477, "learning_rate": 4.1522464848523924e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 168450 }, { "action_loss": 0.0247, "epoch": 15.84093259377644, "learning_rate": 4.1468848735572695e-06, "llm_loss": 0.0, "loss": 0.0247, "step": 168500 }, { "action_loss": 0.0271, "epoch": 15.845633167246405, "learning_rate": 4.141525820458833e-06, "llm_loss": 0.0, "loss": 0.0271, "step": 168550 }, { "action_loss": 0.0217, "epoch": 15.850333740716367, "learning_rate": 4.136169327899342e-06, "llm_loss": 0.0, "loss": 0.0217, "step": 168600 }, { "action_loss": 0.0228, "epoch": 15.85503431418633, "learning_rate": 4.130815398219936e-06, "llm_loss": 0.0, "loss": 0.0228, "step": 168650 }, { "action_loss": 0.023, "epoch": 15.859734887656295, "learning_rate": 4.12546403376064e-06, "llm_loss": 0.0, "loss": 0.023, "step": 168700 }, { "action_loss": 0.0277, "epoch": 15.864435461126257, "learning_rate": 4.120115236860343e-06, "llm_loss": 0.0, "loss": 0.0277, "step": 168750 }, { "action_loss": 0.0206, "epoch": 15.86913603459622, "learning_rate": 4.114769009856827e-06, "llm_loss": 0.0, "loss": 0.0206, "step": 168800 }, { "action_loss": 0.0165, "epoch": 15.873836608066185, "learning_rate": 4.109425355086748e-06, "llm_loss": 0.0, "loss": 0.0165, "step": 168850 }, { "action_loss": 0.0212, "epoch": 15.878537181536148, "learning_rate": 4.104084274885627e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 168900 }, { "action_loss": 0.0228, "epoch": 15.88323775500611, "learning_rate": 4.098745771587873e-06, "llm_loss": 0.0, "loss": 0.0228, "step": 168950 }, { "action_loss": 0.0148, "epoch": 15.887938328476075, "learning_rate": 4.093409847526762e-06, "llm_loss": 0.0, "loss": 0.0148, "step": 169000 }, { "action_loss": 0.018, "epoch": 15.892638901946038, "learning_rate": 4.088076505034445e-06, "llm_loss": 0.0, "loss": 0.018, "step": 169050 }, { "action_loss": 0.0082, "epoch": 15.897339475416, "learning_rate": 4.082745746441943e-06, "llm_loss": 0.0, "loss": 0.0082, "step": 169100 }, { "action_loss": 0.0251, "epoch": 15.902040048885965, "learning_rate": 4.07741757407915e-06, "llm_loss": 0.0, "loss": 0.0251, "step": 169150 }, { "action_loss": 0.017, "epoch": 15.906740622355928, "learning_rate": 4.072091990274826e-06, "llm_loss": 0.0, "loss": 0.017, "step": 169200 }, { "action_loss": 0.0078, "epoch": 15.91144119582589, "learning_rate": 4.066768997356604e-06, "llm_loss": 0.0, "loss": 0.0078, "step": 169250 }, { "action_loss": 0.017, "epoch": 15.916141769295853, "learning_rate": 4.06144859765098e-06, "llm_loss": 0.0, "loss": 0.017, "step": 169300 }, { "action_loss": 0.0084, "epoch": 15.920842342765818, "learning_rate": 4.0561307934833215e-06, "llm_loss": 0.0, "loss": 0.0084, "step": 169350 }, { "action_loss": 0.028, "epoch": 15.92554291623578, "learning_rate": 4.050815587177858e-06, "llm_loss": 0.0, "loss": 0.028, "step": 169400 }, { "action_loss": 0.0239, "epoch": 15.930243489705743, "learning_rate": 4.045502981057683e-06, "llm_loss": 0.0, "loss": 0.0239, "step": 169450 }, { "action_loss": 0.0063, "epoch": 15.934944063175708, "learning_rate": 4.040192977444761e-06, "llm_loss": 0.0, "loss": 0.0063, "step": 169500 }, { "action_loss": 0.0281, "epoch": 15.93964463664567, "learning_rate": 4.034885578659905e-06, "llm_loss": 0.0, "loss": 0.0281, "step": 169550 }, { "action_loss": 0.0154, "epoch": 15.944345210115634, "learning_rate": 4.029580787022801e-06, "llm_loss": 0.0, "loss": 0.0154, "step": 169600 }, { "action_loss": 0.0226, "epoch": 15.949045783585598, "learning_rate": 4.024278604851994e-06, "llm_loss": 0.0, "loss": 0.0226, "step": 169650 }, { "action_loss": 0.0175, "epoch": 15.95374635705556, "learning_rate": 4.018979034464888e-06, "llm_loss": 0.0, "loss": 0.0175, "step": 169700 }, { "action_loss": 0.0213, "epoch": 15.958446930525524, "learning_rate": 4.013682078177739e-06, "llm_loss": 0.0, "loss": 0.0213, "step": 169750 }, { "action_loss": 0.0188, "epoch": 15.963147503995488, "learning_rate": 4.008387738305667e-06, "llm_loss": 0.0, "loss": 0.0188, "step": 169800 }, { "action_loss": 0.033, "epoch": 15.967848077465451, "learning_rate": 4.003096017162649e-06, "llm_loss": 0.0, "loss": 0.033, "step": 169850 }, { "action_loss": 0.0164, "epoch": 15.972548650935414, "learning_rate": 3.997806917061515e-06, "llm_loss": 0.0, "loss": 0.0164, "step": 169900 }, { "action_loss": 0.0346, "epoch": 15.977249224405377, "learning_rate": 3.992520440313949e-06, "llm_loss": 0.0, "loss": 0.0346, "step": 169950 }, { "action_loss": 0.0173, "epoch": 15.981949797875341, "learning_rate": 3.987236589230492e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 170000 }, { "action_loss": 0.0201, "epoch": 15.986650371345304, "learning_rate": 3.981955366120532e-06, "llm_loss": 0.0, "loss": 0.0201, "step": 170050 }, { "action_loss": 0.0305, "epoch": 15.991350944815267, "learning_rate": 3.976676773292313e-06, "llm_loss": 0.0, "loss": 0.0305, "step": 170100 }, { "action_loss": 0.024, "epoch": 15.996051518285231, "learning_rate": 3.971400813052927e-06, "llm_loss": 0.0, "loss": 0.024, "step": 170150 }, { "action_loss": 0.0181, "epoch": 16.000752091755196, "learning_rate": 3.9661274877083155e-06, "llm_loss": 0.0, "loss": 0.0181, "step": 170200 }, { "action_loss": 0.0149, "epoch": 16.005452665225157, "learning_rate": 3.960856799563273e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 170250 }, { "action_loss": 0.0179, "epoch": 16.01015323869512, "learning_rate": 3.9555887509214295e-06, "llm_loss": 0.0, "loss": 0.0179, "step": 170300 }, { "action_loss": 0.0201, "epoch": 16.014853812165086, "learning_rate": 3.950323344085274e-06, "llm_loss": 0.0, "loss": 0.0201, "step": 170350 }, { "action_loss": 0.011, "epoch": 16.019554385635047, "learning_rate": 3.945060581356135e-06, "llm_loss": 0.0, "loss": 0.011, "step": 170400 }, { "action_loss": 0.012, "epoch": 16.02425495910501, "learning_rate": 3.939800465034187e-06, "llm_loss": 0.0, "loss": 0.012, "step": 170450 }, { "action_loss": 0.0375, "epoch": 16.028955532574972, "learning_rate": 3.9345429974184455e-06, "llm_loss": 0.0, "loss": 0.0375, "step": 170500 }, { "action_loss": 0.0171, "epoch": 16.033656106044937, "learning_rate": 3.929288180806773e-06, "llm_loss": 0.0, "loss": 0.0171, "step": 170550 }, { "action_loss": 0.0231, "epoch": 16.0383566795149, "learning_rate": 3.924036017495867e-06, "llm_loss": 0.0, "loss": 0.0231, "step": 170600 }, { "action_loss": 0.021, "epoch": 16.043057252984863, "learning_rate": 3.918786509781274e-06, "llm_loss": 0.0, "loss": 0.021, "step": 170650 }, { "action_loss": 0.0212, "epoch": 16.047757826454827, "learning_rate": 3.913539659957367e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 170700 }, { "action_loss": 0.0245, "epoch": 16.05245839992479, "learning_rate": 3.908295470317369e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 170750 }, { "action_loss": 0.0216, "epoch": 16.057158973394753, "learning_rate": 3.903053943153335e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 170800 }, { "action_loss": 0.0209, "epoch": 16.061859546864717, "learning_rate": 3.897815080756159e-06, "llm_loss": 0.0, "loss": 0.0209, "step": 170850 }, { "action_loss": 0.0345, "epoch": 16.06656012033468, "learning_rate": 3.892578885415568e-06, "llm_loss": 0.0, "loss": 0.0345, "step": 170900 }, { "action_loss": 0.0048, "epoch": 16.071260693804643, "learning_rate": 3.887345359420124e-06, "llm_loss": 0.0, "loss": 0.0048, "step": 170950 }, { "action_loss": 0.0132, "epoch": 16.075961267274607, "learning_rate": 3.882114505057221e-06, "llm_loss": 0.0, "loss": 0.0132, "step": 171000 }, { "action_loss": 0.0105, "epoch": 16.080661840744572, "learning_rate": 3.8768863246130935e-06, "llm_loss": 0.0, "loss": 0.0105, "step": 171050 }, { "action_loss": 0.0171, "epoch": 16.085362414214533, "learning_rate": 3.871660820372791e-06, "llm_loss": 0.0, "loss": 0.0171, "step": 171100 }, { "action_loss": 0.0246, "epoch": 16.090062987684497, "learning_rate": 3.866437994620208e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 171150 }, { "action_loss": 0.0422, "epoch": 16.094763561154462, "learning_rate": 3.8612178496380625e-06, "llm_loss": 0.0, "loss": 0.0422, "step": 171200 }, { "action_loss": 0.032, "epoch": 16.099464134624423, "learning_rate": 3.856000387707901e-06, "llm_loss": 0.0, "loss": 0.032, "step": 171250 }, { "action_loss": 0.0214, "epoch": 16.104164708094387, "learning_rate": 3.850785611110099e-06, "llm_loss": 0.0, "loss": 0.0214, "step": 171300 }, { "action_loss": 0.0206, "epoch": 16.108865281564352, "learning_rate": 3.845573522123857e-06, "llm_loss": 0.0, "loss": 0.0206, "step": 171350 }, { "action_loss": 0.0174, "epoch": 16.113565855034313, "learning_rate": 3.840364123027199e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 171400 }, { "action_loss": 0.0174, "epoch": 16.118266428504278, "learning_rate": 3.835157416096978e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 171450 }, { "action_loss": 0.0228, "epoch": 16.122967001974242, "learning_rate": 3.829953403608868e-06, "llm_loss": 0.0, "loss": 0.0228, "step": 171500 }, { "action_loss": 0.0212, "epoch": 16.127667575444203, "learning_rate": 3.824752087837366e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 171550 }, { "action_loss": 0.011, "epoch": 16.132368148914168, "learning_rate": 3.819553471055785e-06, "llm_loss": 0.0, "loss": 0.011, "step": 171600 }, { "action_loss": 0.0303, "epoch": 16.137068722384132, "learning_rate": 3.8143575555362656e-06, "llm_loss": 0.0, "loss": 0.0303, "step": 171650 }, { "action_loss": 0.024, "epoch": 16.141769295854093, "learning_rate": 3.809164343549764e-06, "llm_loss": 0.0, "loss": 0.024, "step": 171700 }, { "action_loss": 0.0277, "epoch": 16.146469869324058, "learning_rate": 3.8039738373660552e-06, "llm_loss": 0.0, "loss": 0.0277, "step": 171750 }, { "action_loss": 0.0182, "epoch": 16.151170442794022, "learning_rate": 3.7987860392537345e-06, "llm_loss": 0.0, "loss": 0.0182, "step": 171800 }, { "action_loss": 0.0296, "epoch": 16.155871016263983, "learning_rate": 3.793600951480214e-06, "llm_loss": 0.0, "loss": 0.0296, "step": 171850 }, { "action_loss": 0.0074, "epoch": 16.160571589733948, "learning_rate": 3.78841857631171e-06, "llm_loss": 0.0, "loss": 0.0074, "step": 171900 }, { "action_loss": 0.0184, "epoch": 16.165272163203912, "learning_rate": 3.7832389160132664e-06, "llm_loss": 0.0, "loss": 0.0184, "step": 171950 }, { "action_loss": 0.0117, "epoch": 16.169972736673873, "learning_rate": 3.7780619728487368e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 172000 }, { "action_loss": 0.0138, "epoch": 16.174673310143838, "learning_rate": 3.7728877490807835e-06, "llm_loss": 0.0, "loss": 0.0138, "step": 172050 }, { "action_loss": 0.0251, "epoch": 16.179373883613803, "learning_rate": 3.7677162469708863e-06, "llm_loss": 0.0, "loss": 0.0251, "step": 172100 }, { "action_loss": 0.017, "epoch": 16.184074457083764, "learning_rate": 3.7625474687793286e-06, "llm_loss": 0.0, "loss": 0.017, "step": 172150 }, { "action_loss": 0.0177, "epoch": 16.188775030553728, "learning_rate": 3.757381416765209e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 172200 }, { "action_loss": 0.0239, "epoch": 16.19347560402369, "learning_rate": 3.7522180931864317e-06, "llm_loss": 0.0, "loss": 0.0239, "step": 172250 }, { "action_loss": 0.0207, "epoch": 16.198176177493654, "learning_rate": 3.747057500299709e-06, "llm_loss": 0.0, "loss": 0.0207, "step": 172300 }, { "action_loss": 0.0177, "epoch": 16.20287675096362, "learning_rate": 3.7418996403605613e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 172350 }, { "action_loss": 0.0142, "epoch": 16.20757732443358, "learning_rate": 3.7367445156233107e-06, "llm_loss": 0.0, "loss": 0.0142, "step": 172400 }, { "action_loss": 0.0215, "epoch": 16.212277897903544, "learning_rate": 3.7315921283410873e-06, "llm_loss": 0.0, "loss": 0.0215, "step": 172450 }, { "action_loss": 0.0301, "epoch": 16.21697847137351, "learning_rate": 3.726442480765826e-06, "llm_loss": 0.0, "loss": 0.0301, "step": 172500 }, { "action_loss": 0.0198, "epoch": 16.22167904484347, "learning_rate": 3.721295575148257e-06, "llm_loss": 0.0, "loss": 0.0198, "step": 172550 }, { "action_loss": 0.0117, "epoch": 16.226379618313434, "learning_rate": 3.716151413737922e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 172600 }, { "action_loss": 0.0256, "epoch": 16.2310801917834, "learning_rate": 3.7110099987831516e-06, "llm_loss": 0.0, "loss": 0.0256, "step": 172650 }, { "action_loss": 0.0187, "epoch": 16.23578076525336, "learning_rate": 3.705871332531087e-06, "llm_loss": 0.0, "loss": 0.0187, "step": 172700 }, { "action_loss": 0.0292, "epoch": 16.240481338723324, "learning_rate": 3.7007354172276634e-06, "llm_loss": 0.0, "loss": 0.0292, "step": 172750 }, { "action_loss": 0.0196, "epoch": 16.24518191219329, "learning_rate": 3.695602255117613e-06, "llm_loss": 0.0, "loss": 0.0196, "step": 172800 }, { "action_loss": 0.0146, "epoch": 16.24988248566325, "learning_rate": 3.690471848444467e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 172850 }, { "action_loss": 0.0323, "epoch": 16.254583059133214, "learning_rate": 3.68534419945055e-06, "llm_loss": 0.0, "loss": 0.0323, "step": 172900 }, { "action_loss": 0.0197, "epoch": 16.25928363260318, "learning_rate": 3.6802193103769825e-06, "llm_loss": 0.0, "loss": 0.0197, "step": 172950 }, { "action_loss": 0.0352, "epoch": 16.26398420607314, "learning_rate": 3.6750971834636784e-06, "llm_loss": 0.0, "loss": 0.0352, "step": 173000 }, { "action_loss": 0.0141, "epoch": 16.268684779543104, "learning_rate": 3.6699778209493444e-06, "llm_loss": 0.0, "loss": 0.0141, "step": 173050 }, { "action_loss": 0.0177, "epoch": 16.27338535301307, "learning_rate": 3.6648612250714787e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 173100 }, { "action_loss": 0.0236, "epoch": 16.27808592648303, "learning_rate": 3.659747398066371e-06, "llm_loss": 0.0, "loss": 0.0236, "step": 173150 }, { "action_loss": 0.0115, "epoch": 16.282786499952994, "learning_rate": 3.6546363421691e-06, "llm_loss": 0.0, "loss": 0.0115, "step": 173200 }, { "action_loss": 0.0208, "epoch": 16.28748707342296, "learning_rate": 3.6495280596135375e-06, "llm_loss": 0.0, "loss": 0.0208, "step": 173250 }, { "action_loss": 0.0145, "epoch": 16.29218764689292, "learning_rate": 3.644422552632333e-06, "llm_loss": 0.0, "loss": 0.0145, "step": 173300 }, { "action_loss": 0.0275, "epoch": 16.296888220362884, "learning_rate": 3.6393198234569303e-06, "llm_loss": 0.0, "loss": 0.0275, "step": 173350 }, { "action_loss": 0.0242, "epoch": 16.30158879383285, "learning_rate": 3.634219874317565e-06, "llm_loss": 0.0, "loss": 0.0242, "step": 173400 }, { "action_loss": 0.0146, "epoch": 16.30628936730281, "learning_rate": 3.6291227074432413e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 173450 }, { "action_loss": 0.0308, "epoch": 16.310989940772775, "learning_rate": 3.6240283250617602e-06, "llm_loss": 0.0, "loss": 0.0308, "step": 173500 }, { "action_loss": 0.0332, "epoch": 16.31569051424274, "learning_rate": 3.6189367293997025e-06, "llm_loss": 0.0, "loss": 0.0332, "step": 173550 }, { "action_loss": 0.0246, "epoch": 16.3203910877127, "learning_rate": 3.613847922682432e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 173600 }, { "action_loss": 0.0378, "epoch": 16.325091661182665, "learning_rate": 3.608761907134091e-06, "llm_loss": 0.0, "loss": 0.0378, "step": 173650 }, { "action_loss": 0.0244, "epoch": 16.32979223465263, "learning_rate": 3.6036786849776027e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 173700 }, { "action_loss": 0.0117, "epoch": 16.33449280812259, "learning_rate": 3.5985982584346715e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 173750 }, { "action_loss": 0.0078, "epoch": 16.339193381592555, "learning_rate": 3.5935206297257774e-06, "llm_loss": 0.0, "loss": 0.0078, "step": 173800 }, { "action_loss": 0.0235, "epoch": 16.34389395506252, "learning_rate": 3.5884458010701774e-06, "llm_loss": 0.0, "loss": 0.0235, "step": 173850 }, { "action_loss": 0.0136, "epoch": 16.34859452853248, "learning_rate": 3.5833737746859076e-06, "llm_loss": 0.0, "loss": 0.0136, "step": 173900 }, { "action_loss": 0.0138, "epoch": 16.353295102002445, "learning_rate": 3.5783045527897774e-06, "llm_loss": 0.0, "loss": 0.0138, "step": 173950 }, { "action_loss": 0.0244, "epoch": 16.35799567547241, "learning_rate": 3.5732381375973692e-06, "llm_loss": 0.0, "loss": 0.0244, "step": 174000 }, { "action_loss": 0.021, "epoch": 16.36269624894237, "learning_rate": 3.568174531323043e-06, "llm_loss": 0.0, "loss": 0.021, "step": 174050 }, { "action_loss": 0.0324, "epoch": 16.367396822412335, "learning_rate": 3.563113736179924e-06, "llm_loss": 0.0, "loss": 0.0324, "step": 174100 }, { "action_loss": 0.0213, "epoch": 16.372097395882296, "learning_rate": 3.5580557543799143e-06, "llm_loss": 0.0, "loss": 0.0213, "step": 174150 }, { "action_loss": 0.0079, "epoch": 16.37679796935226, "learning_rate": 3.553000588133685e-06, "llm_loss": 0.0, "loss": 0.0079, "step": 174200 }, { "action_loss": 0.0148, "epoch": 16.381498542822225, "learning_rate": 3.547948239650677e-06, "llm_loss": 0.0, "loss": 0.0148, "step": 174250 }, { "action_loss": 0.0227, "epoch": 16.386199116292186, "learning_rate": 3.5428987111391e-06, "llm_loss": 0.0, "loss": 0.0227, "step": 174300 }, { "action_loss": 0.0185, "epoch": 16.39089968976215, "learning_rate": 3.5378520048059315e-06, "llm_loss": 0.0, "loss": 0.0185, "step": 174350 }, { "action_loss": 0.0117, "epoch": 16.395600263232115, "learning_rate": 3.53280812285691e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 174400 }, { "action_loss": 0.0214, "epoch": 16.400300836702076, "learning_rate": 3.527767067496547e-06, "llm_loss": 0.0, "loss": 0.0214, "step": 174450 }, { "action_loss": 0.0145, "epoch": 16.40500141017204, "learning_rate": 3.5227288409281145e-06, "llm_loss": 0.0, "loss": 0.0145, "step": 174500 }, { "action_loss": 0.0449, "epoch": 16.409701983642005, "learning_rate": 3.5176934453536503e-06, "llm_loss": 0.0, "loss": 0.0449, "step": 174550 }, { "action_loss": 0.017, "epoch": 16.414402557111966, "learning_rate": 3.512660882973953e-06, "llm_loss": 0.0, "loss": 0.017, "step": 174600 }, { "action_loss": 0.0196, "epoch": 16.41910313058193, "learning_rate": 3.5076311559885846e-06, "llm_loss": 0.0, "loss": 0.0196, "step": 174650 }, { "action_loss": 0.0079, "epoch": 16.423803704051895, "learning_rate": 3.502604266595866e-06, "llm_loss": 0.0, "loss": 0.0079, "step": 174700 }, { "action_loss": 0.0108, "epoch": 16.428504277521856, "learning_rate": 3.4975802169928797e-06, "llm_loss": 0.0, "loss": 0.0108, "step": 174750 }, { "action_loss": 0.0202, "epoch": 16.43320485099182, "learning_rate": 3.4925590093754657e-06, "llm_loss": 0.0, "loss": 0.0202, "step": 174800 }, { "action_loss": 0.0246, "epoch": 16.437905424461785, "learning_rate": 3.4875406459382254e-06, "llm_loss": 0.0, "loss": 0.0246, "step": 174850 }, { "action_loss": 0.0341, "epoch": 16.442605997931746, "learning_rate": 3.482525128874509e-06, "llm_loss": 0.0, "loss": 0.0341, "step": 174900 }, { "action_loss": 0.026, "epoch": 16.44730657140171, "learning_rate": 3.4775124603764287e-06, "llm_loss": 0.0, "loss": 0.026, "step": 174950 }, { "action_loss": 0.0204, "epoch": 16.452007144871676, "learning_rate": 3.472502642634852e-06, "llm_loss": 0.0, "loss": 0.0204, "step": 175000 }, { "action_loss": 0.035, "epoch": 16.456707718341637, "learning_rate": 3.4674956778393975e-06, "llm_loss": 0.0, "loss": 0.035, "step": 175050 }, { "action_loss": 0.0262, "epoch": 16.4614082918116, "learning_rate": 3.462491568178441e-06, "llm_loss": 0.0, "loss": 0.0262, "step": 175100 }, { "action_loss": 0.0102, "epoch": 16.466108865281566, "learning_rate": 3.457490315839107e-06, "llm_loss": 0.0, "loss": 0.0102, "step": 175150 }, { "action_loss": 0.0202, "epoch": 16.470809438751527, "learning_rate": 3.4524919230072705e-06, "llm_loss": 0.0, "loss": 0.0202, "step": 175200 }, { "action_loss": 0.0216, "epoch": 16.47551001222149, "learning_rate": 3.447496391867564e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 175250 }, { "action_loss": 0.0235, "epoch": 16.480210585691456, "learning_rate": 3.4425037246033556e-06, "llm_loss": 0.0, "loss": 0.0235, "step": 175300 }, { "action_loss": 0.0171, "epoch": 16.484911159161417, "learning_rate": 3.4375139233967726e-06, "llm_loss": 0.0, "loss": 0.0171, "step": 175350 }, { "action_loss": 0.0215, "epoch": 16.48961173263138, "learning_rate": 3.4325269904286872e-06, "llm_loss": 0.0, "loss": 0.0215, "step": 175400 }, { "action_loss": 0.0356, "epoch": 16.494312306101346, "learning_rate": 3.4275429278787177e-06, "llm_loss": 0.0, "loss": 0.0356, "step": 175450 }, { "action_loss": 0.0287, "epoch": 16.499012879571307, "learning_rate": 3.4225617379252273e-06, "llm_loss": 0.0, "loss": 0.0287, "step": 175500 }, { "action_loss": 0.0177, "epoch": 16.50371345304127, "learning_rate": 3.417583422745324e-06, "llm_loss": 0.0, "loss": 0.0177, "step": 175550 }, { "action_loss": 0.0117, "epoch": 16.508414026511236, "learning_rate": 3.4126079845148587e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 175600 }, { "action_loss": 0.0249, "epoch": 16.513114599981197, "learning_rate": 3.40763542540843e-06, "llm_loss": 0.0, "loss": 0.0249, "step": 175650 }, { "action_loss": 0.0146, "epoch": 16.51781517345116, "learning_rate": 3.402665747599365e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 175700 }, { "action_loss": 0.0143, "epoch": 16.522515746921123, "learning_rate": 3.397698953259746e-06, "llm_loss": 0.0, "loss": 0.0143, "step": 175750 }, { "action_loss": 0.0228, "epoch": 16.527216320391087, "learning_rate": 3.392735044560388e-06, "llm_loss": 0.0, "loss": 0.0228, "step": 175800 }, { "action_loss": 0.0201, "epoch": 16.53191689386105, "learning_rate": 3.3877740236708457e-06, "llm_loss": 0.0, "loss": 0.0201, "step": 175850 }, { "action_loss": 0.0211, "epoch": 16.536617467331013, "learning_rate": 3.3828158927594126e-06, "llm_loss": 0.0, "loss": 0.0211, "step": 175900 }, { "action_loss": 0.0102, "epoch": 16.541318040800977, "learning_rate": 3.377860653993117e-06, "llm_loss": 0.0, "loss": 0.0102, "step": 175950 }, { "action_loss": 0.0245, "epoch": 16.546018614270942, "learning_rate": 3.3729083095377248e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 176000 }, { "action_loss": 0.0077, "epoch": 16.550719187740903, "learning_rate": 3.3679588615577372e-06, "llm_loss": 0.0, "loss": 0.0077, "step": 176050 }, { "action_loss": 0.0078, "epoch": 16.555419761210867, "learning_rate": 3.3630123122163907e-06, "llm_loss": 0.0, "loss": 0.0078, "step": 176100 }, { "action_loss": 0.0147, "epoch": 16.560120334680832, "learning_rate": 3.3580686636756486e-06, "llm_loss": 0.0, "loss": 0.0147, "step": 176150 }, { "action_loss": 0.032, "epoch": 16.564820908150793, "learning_rate": 3.3531279180962183e-06, "llm_loss": 0.0, "loss": 0.032, "step": 176200 }, { "action_loss": 0.0366, "epoch": 16.569521481620757, "learning_rate": 3.348190077637522e-06, "llm_loss": 0.0, "loss": 0.0366, "step": 176250 }, { "action_loss": 0.0202, "epoch": 16.574222055090722, "learning_rate": 3.3432551444577245e-06, "llm_loss": 0.0, "loss": 0.0202, "step": 176300 }, { "action_loss": 0.0112, "epoch": 16.578922628560683, "learning_rate": 3.338323120713718e-06, "llm_loss": 0.0, "loss": 0.0112, "step": 176350 }, { "action_loss": 0.0286, "epoch": 16.583623202030648, "learning_rate": 3.333394008561123e-06, "llm_loss": 0.0, "loss": 0.0286, "step": 176400 }, { "action_loss": 0.0173, "epoch": 16.588323775500612, "learning_rate": 3.328467810154281e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 176450 }, { "action_loss": 0.0117, "epoch": 16.593024348970573, "learning_rate": 3.3235445276462674e-06, "llm_loss": 0.0, "loss": 0.0117, "step": 176500 }, { "action_loss": 0.0134, "epoch": 16.597724922440538, "learning_rate": 3.3186241631888807e-06, "llm_loss": 0.0, "loss": 0.0134, "step": 176550 }, { "action_loss": 0.0365, "epoch": 16.602425495910502, "learning_rate": 3.3137067189326457e-06, "llm_loss": 0.0, "loss": 0.0365, "step": 176600 }, { "action_loss": 0.008, "epoch": 16.607126069380463, "learning_rate": 3.3087921970268077e-06, "llm_loss": 0.0, "loss": 0.008, "step": 176650 }, { "action_loss": 0.0036, "epoch": 16.611826642850428, "learning_rate": 3.303978802893516e-06, "llm_loss": 0.0, "loss": 0.0036, "step": 176700 }, { "action_loss": 0.0206, "epoch": 16.616527216320392, "learning_rate": 3.299070073577173e-06, "llm_loss": 0.0, "loss": 0.0206, "step": 176750 }, { "action_loss": 0.0245, "epoch": 16.621227789790353, "learning_rate": 3.2941642730084056e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 176800 }, { "action_loss": 0.014, "epoch": 16.625928363260318, "learning_rate": 3.289261403331372e-06, "llm_loss": 0.0, "loss": 0.014, "step": 176850 }, { "action_loss": 0.0303, "epoch": 16.630628936730282, "learning_rate": 3.2843614666889466e-06, "llm_loss": 0.0, "loss": 0.0303, "step": 176900 }, { "action_loss": 0.034, "epoch": 16.635329510200243, "learning_rate": 3.279464465222727e-06, "llm_loss": 0.0, "loss": 0.034, "step": 176950 }, { "action_loss": 0.0382, "epoch": 16.640030083670208, "learning_rate": 3.274570401073024e-06, "llm_loss": 0.0, "loss": 0.0382, "step": 177000 }, { "action_loss": 0.0237, "epoch": 16.644730657140173, "learning_rate": 3.26967927637887e-06, "llm_loss": 0.0, "loss": 0.0237, "step": 177050 }, { "action_loss": 0.0166, "epoch": 16.649431230610134, "learning_rate": 3.2647910932780003e-06, "llm_loss": 0.0, "loss": 0.0166, "step": 177100 }, { "action_loss": 0.0283, "epoch": 16.654131804080098, "learning_rate": 3.2599058539068782e-06, "llm_loss": 0.0, "loss": 0.0283, "step": 177150 }, { "action_loss": 0.017, "epoch": 16.658832377550063, "learning_rate": 3.255023560400673e-06, "llm_loss": 0.0, "loss": 0.017, "step": 177200 }, { "action_loss": 0.0118, "epoch": 16.663532951020024, "learning_rate": 3.2501442148932705e-06, "llm_loss": 0.0, "loss": 0.0118, "step": 177250 }, { "action_loss": 0.0281, "epoch": 16.668233524489988, "learning_rate": 3.245267819517264e-06, "llm_loss": 0.0, "loss": 0.0281, "step": 177300 }, { "action_loss": 0.0344, "epoch": 16.672934097959953, "learning_rate": 3.2403943764039605e-06, "llm_loss": 0.0, "loss": 0.0344, "step": 177350 }, { "action_loss": 0.0175, "epoch": 16.677634671429914, "learning_rate": 3.235523887683375e-06, "llm_loss": 0.0, "loss": 0.0175, "step": 177400 }, { "action_loss": 0.0145, "epoch": 16.68233524489988, "learning_rate": 3.230656355484232e-06, "llm_loss": 0.0, "loss": 0.0145, "step": 177450 }, { "action_loss": 0.0205, "epoch": 16.687035818369843, "learning_rate": 3.2257917819339633e-06, "llm_loss": 0.0, "loss": 0.0205, "step": 177500 }, { "action_loss": 0.0239, "epoch": 16.691736391839804, "learning_rate": 3.220930169158708e-06, "llm_loss": 0.0, "loss": 0.0239, "step": 177550 }, { "action_loss": 0.0147, "epoch": 16.69643696530977, "learning_rate": 3.2160715192833136e-06, "llm_loss": 0.0, "loss": 0.0147, "step": 177600 }, { "action_loss": 0.024, "epoch": 16.701137538779733, "learning_rate": 3.2112158344313247e-06, "llm_loss": 0.0, "loss": 0.024, "step": 177650 }, { "action_loss": 0.016, "epoch": 16.705838112249694, "learning_rate": 3.2063631167249987e-06, "llm_loss": 0.0, "loss": 0.016, "step": 177700 }, { "action_loss": 0.0116, "epoch": 16.71053868571966, "learning_rate": 3.2015133682852904e-06, "llm_loss": 0.0, "loss": 0.0116, "step": 177750 }, { "action_loss": 0.0233, "epoch": 16.71523925918962, "learning_rate": 3.1966665912318616e-06, "llm_loss": 0.0, "loss": 0.0233, "step": 177800 }, { "action_loss": 0.0168, "epoch": 16.719939832659584, "learning_rate": 3.1918227876830756e-06, "llm_loss": 0.0, "loss": 0.0168, "step": 177850 }, { "action_loss": 0.0245, "epoch": 16.72464040612955, "learning_rate": 3.186981959755987e-06, "llm_loss": 0.0, "loss": 0.0245, "step": 177900 }, { "action_loss": 0.0188, "epoch": 16.72934097959951, "learning_rate": 3.1821441095663586e-06, "llm_loss": 0.0, "loss": 0.0188, "step": 177950 }, { "action_loss": 0.0146, "epoch": 16.734041553069474, "learning_rate": 3.177309239228651e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 178000 }, { "action_loss": 0.0174, "epoch": 16.73874212653944, "learning_rate": 3.172477350856019e-06, "llm_loss": 0.0, "loss": 0.0174, "step": 178050 }, { "action_loss": 0.034, "epoch": 16.7434427000094, "learning_rate": 3.1676484465603184e-06, "llm_loss": 0.0, "loss": 0.034, "step": 178100 }, { "action_loss": 0.0132, "epoch": 16.748143273479364, "learning_rate": 3.1628225284520962e-06, "llm_loss": 0.0, "loss": 0.0132, "step": 178150 }, { "action_loss": 0.0119, "epoch": 16.75284384694933, "learning_rate": 3.1579995986405976e-06, "llm_loss": 0.0, "loss": 0.0119, "step": 178200 }, { "action_loss": 0.0232, "epoch": 16.75754442041929, "learning_rate": 3.1531796592337592e-06, "llm_loss": 0.0, "loss": 0.0232, "step": 178250 }, { "action_loss": 0.0262, "epoch": 16.762244993889254, "learning_rate": 3.148362712338213e-06, "llm_loss": 0.0, "loss": 0.0262, "step": 178300 }, { "action_loss": 0.0213, "epoch": 16.76694556735922, "learning_rate": 3.1435487600592816e-06, "llm_loss": 0.0, "loss": 0.0213, "step": 178350 }, { "action_loss": 0.0149, "epoch": 16.77164614082918, "learning_rate": 3.13873780450098e-06, "llm_loss": 0.0, "loss": 0.0149, "step": 178400 }, { "action_loss": 0.0185, "epoch": 16.776346714299144, "learning_rate": 3.1339298477660097e-06, "llm_loss": 0.0, "loss": 0.0185, "step": 178450 }, { "action_loss": 0.0147, "epoch": 16.78104728776911, "learning_rate": 3.1291248919557717e-06, "llm_loss": 0.0, "loss": 0.0147, "step": 178500 }, { "action_loss": 0.0187, "epoch": 16.78574786123907, "learning_rate": 3.1243229391703377e-06, "llm_loss": 0.0, "loss": 0.0187, "step": 178550 }, { "action_loss": 0.0186, "epoch": 16.790448434709035, "learning_rate": 3.119523991508483e-06, "llm_loss": 0.0, "loss": 0.0186, "step": 178600 }, { "action_loss": 0.0148, "epoch": 16.795149008179, "learning_rate": 3.114728051067666e-06, "llm_loss": 0.0, "loss": 0.0148, "step": 178650 }, { "action_loss": 0.0146, "epoch": 16.79984958164896, "learning_rate": 3.109935119944022e-06, "llm_loss": 0.0, "loss": 0.0146, "step": 178700 }, { "action_loss": 0.0372, "epoch": 16.804550155118925, "learning_rate": 3.105145200232381e-06, "llm_loss": 0.0, "loss": 0.0372, "step": 178750 }, { "action_loss": 0.0119, "epoch": 16.80925072858889, "learning_rate": 3.1003582940262535e-06, "llm_loss": 0.0, "loss": 0.0119, "step": 178800 }, { "action_loss": 0.0048, "epoch": 16.81395130205885, "learning_rate": 3.0955744034178325e-06, "llm_loss": 0.0, "loss": 0.0048, "step": 178850 }, { "action_loss": 0.0305, "epoch": 16.818651875528815, "learning_rate": 3.090793530497992e-06, "llm_loss": 0.0, "loss": 0.0305, "step": 178900 }, { "action_loss": 0.0113, "epoch": 16.82335244899878, "learning_rate": 3.0860156773562898e-06, "llm_loss": 0.0, "loss": 0.0113, "step": 178950 }, { "action_loss": 0.0275, "epoch": 16.82805302246874, "learning_rate": 3.0812408460809607e-06, "llm_loss": 0.0, "loss": 0.0275, "step": 179000 }, { "action_loss": 0.011, "epoch": 16.832753595938705, "learning_rate": 3.0764690387589223e-06, "llm_loss": 0.0, "loss": 0.011, "step": 179050 }, { "action_loss": 0.0212, "epoch": 16.83745416940867, "learning_rate": 3.071700257475768e-06, "llm_loss": 0.0, "loss": 0.0212, "step": 179100 }, { "action_loss": 0.0171, "epoch": 16.84215474287863, "learning_rate": 3.0669345043157684e-06, "llm_loss": 0.0, "loss": 0.0171, "step": 179150 }, { "action_loss": 0.021, "epoch": 16.846855316348595, "learning_rate": 3.0621717813618712e-06, "llm_loss": 0.0, "loss": 0.021, "step": 179200 }, { "action_loss": 0.0286, "epoch": 16.85155588981856, "learning_rate": 3.057412090695702e-06, "llm_loss": 0.0, "loss": 0.0286, "step": 179250 }, { "action_loss": 0.0281, "epoch": 16.85625646328852, "learning_rate": 3.052655434397559e-06, "llm_loss": 0.0, "loss": 0.0281, "step": 179300 }, { "action_loss": 0.0316, "epoch": 16.860957036758485, "learning_rate": 3.0479018145464113e-06, "llm_loss": 0.0, "loss": 0.0316, "step": 179350 }, { "action_loss": 0.0233, "epoch": 16.865657610228446, "learning_rate": 3.043151233219904e-06, "llm_loss": 0.0, "loss": 0.0233, "step": 179400 }, { "action_loss": 0.0422, "epoch": 16.87035818369841, "learning_rate": 3.0384036924943583e-06, "llm_loss": 0.0, "loss": 0.0422, "step": 179450 }, { "action_loss": 0.0236, "epoch": 16.875058757168375, "learning_rate": 3.033659194444757e-06, "llm_loss": 0.0, "loss": 0.0236, "step": 179500 }, { "action_loss": 0.0306, "epoch": 16.879759330638336, "learning_rate": 3.028917741144759e-06, "llm_loss": 0.0, "loss": 0.0306, "step": 179550 }, { "action_loss": 0.0107, "epoch": 16.8844599041083, "learning_rate": 3.0241793346666924e-06, "llm_loss": 0.0, "loss": 0.0107, "step": 179600 }, { "action_loss": 0.0413, "epoch": 16.889160477578265, "learning_rate": 3.019443977081554e-06, "llm_loss": 0.0, "loss": 0.0413, "step": 179650 }, { "action_loss": 0.0173, "epoch": 16.893861051048226, "learning_rate": 3.0147116704590054e-06, "llm_loss": 0.0, "loss": 0.0173, "step": 179700 }, { "action_loss": 0.0359, "epoch": 16.89856162451819, "learning_rate": 3.0099824168673763e-06, "llm_loss": 0.0, "loss": 0.0359, "step": 179750 }, { "action_loss": 0.0241, "epoch": 16.903262197988155, "learning_rate": 3.005256218373663e-06, "llm_loss": 0.0, "loss": 0.0241, "step": 179800 }, { "action_loss": 0.0185, "epoch": 16.907962771458116, "learning_rate": 3.0005330770435247e-06, "llm_loss": 0.0, "loss": 0.0185, "step": 179850 }, { "action_loss": 0.0138, "epoch": 16.91266334492808, "learning_rate": 2.995812994941284e-06, "llm_loss": 0.0, "loss": 0.0138, "step": 179900 }, { "action_loss": 0.0265, "epoch": 16.917363918398046, "learning_rate": 2.9910959741299294e-06, "llm_loss": 0.0, "loss": 0.0265, "step": 179950 }, { "action_loss": 0.0216, "epoch": 16.922064491868007, "learning_rate": 2.98638201667111e-06, "llm_loss": 0.0, "loss": 0.0216, "step": 180000 } ], "logging_steps": 50, "max_steps": 240000, "num_input_tokens_seen": 0, "num_train_epochs": 23, "save_steps": 20000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.5577567482281984e+19, "train_batch_size": 6, "trial_name": null, "trial_params": null }